1 module General : sig
2 (******************************************************************************)
3 (* *)
4 (* Menhir *)
5 (* *)
6 (* François Pottier, Inria Paris *)
7 (* Yann Régis-Gianas, PPS, Université Paris Diderot *)
8 (* *)
9 (* Copyright Inria. All rights reserved. This file is distributed under the *)
10 (* terms of the GNU Library General Public License version 2, with a *)
11 (* special exception on linking, as described in the file LICENSE. *)
12 (* *)
13 (******************************************************************************)
14
15 (* This module offers general-purpose functions on lists and streams. *)
16
17 (* As of 2017/03/31, this module is DEPRECATED. It might be removed in
18 the future. *)
19
20 (* --------------------------------------------------------------------------- *)
21
22 (* Lists. *)
23
24 (* [take n xs] returns the [n] first elements of the list [xs]. It is
25 acceptable for the list [xs] to have length less than [n], in
26 which case [xs] itself is returned. *)
27
28 val take: int -> 'a list -> 'a list
29
30 (* [drop n xs] returns the list [xs], deprived of its [n] first elements.
31 It is acceptable for the list [xs] to have length less than [n], in
32 which case an empty list is returned. *)
33
34 val drop: int -> 'a list -> 'a list
35
36 (* [uniq cmp xs] assumes that the list [xs] is sorted according to the
37 ordering [cmp] and returns the list [xs] deprived of any duplicate
38 elements. *)
39
40 val uniq: ('a -> 'a -> int) -> 'a list -> 'a list
41
42 (* [weed cmp xs] returns the list [xs] deprived of any duplicate elements. *)
43
44 val weed: ('a -> 'a -> int) -> 'a list -> 'a list
45
46 (* --------------------------------------------------------------------------- *)
47
48 (* A stream is a list whose elements are produced on demand. *)
49
50 type 'a stream =
51 'a head Lazy.t
52
53 and 'a head =
54 | Nil
55 | Cons of 'a * 'a stream
56
57 (* The length of a stream. *)
58
59 val length: 'a stream -> int
60
61 (* Folding over a stream. *)
62
63 val foldr: ('a -> 'b -> 'b) -> 'a stream -> 'b -> 'b
64 end
65 module Convert : sig
66 (******************************************************************************)
67 (* *)
68 (* Menhir *)
69 (* *)
70 (* François Pottier, Inria Paris *)
71 (* Yann Régis-Gianas, PPS, Université Paris Diderot *)
72 (* *)
73 (* Copyright Inria. All rights reserved. This file is distributed under the *)
74 (* terms of the GNU Library General Public License version 2, with a *)
75 (* special exception on linking, as described in the file LICENSE. *)
76 (* *)
77 (******************************************************************************)
78
79 (* An ocamlyacc-style, or Menhir-style, parser requires access to
80 the lexer, which must be parameterized with a lexing buffer, and
81 to the lexing buffer itself, where it reads position information. *)
82
83 (* This traditional API is convenient when used with ocamllex, but
84 inelegant when used with other lexer generators. *)
85
86 type ('token, 'semantic_value) traditional =
87 (Lexing.lexbuf -> 'token) -> Lexing.lexbuf -> 'semantic_value
88
89 (* This revised API is independent of any lexer generator. Here, the
90 parser only requires access to the lexer, and the lexer takes no
91 parameters. The tokens returned by the lexer may contain position
92 information. *)
93
94 type ('token, 'semantic_value) revised =
95 (unit -> 'token) -> 'semantic_value
96
97 (* --------------------------------------------------------------------------- *)
98
99 (* Converting a traditional parser, produced by ocamlyacc or Menhir,
100 into a revised parser. *)
101
102 (* A token of the revised lexer is essentially a triple of a token
103 of the traditional lexer (or raw token), a start position, and
104 and end position. The three [get] functions are accessors. *)
105
106 (* We do not require the type ['token] to actually be a triple type.
107 This enables complex applications where it is a record type with
108 more than three fields. It also enables simple applications where
109 positions are of no interest, so ['token] is just ['raw_token]
110 and [get_startp] and [get_endp] return dummy positions. *)
111
112 val traditional2revised:
113 ('token -> 'raw_token) ->
114 ('token -> Lexing.position) ->
115 ('token -> Lexing.position) ->
116 ('raw_token, 'semantic_value) traditional ->
117 ('token, 'semantic_value) revised
118
119 (* --------------------------------------------------------------------------- *)
120
121 (* Converting a revised parser back to a traditional parser. *)
122
123 val revised2traditional:
124 ('raw_token -> Lexing.position -> Lexing.position -> 'token) ->
125 ('token, 'semantic_value) revised ->
126 ('raw_token, 'semantic_value) traditional
127
128 (* --------------------------------------------------------------------------- *)
129
130 (* Simplified versions of the above, where concrete triples are used. *)
131
132 module Simplified : sig
133
134 val traditional2revised:
135 ('token, 'semantic_value) traditional ->
136 ('token * Lexing.position * Lexing.position, 'semantic_value) revised
137
138 val revised2traditional:
139 ('token * Lexing.position * Lexing.position, 'semantic_value) revised ->
140 ('token, 'semantic_value) traditional
141
142 end
143 end
144 module IncrementalEngine : sig
145 (******************************************************************************)
146 (* *)
147 (* Menhir *)
148 (* *)
149 (* François Pottier, Inria Paris *)
150 (* Yann Régis-Gianas, PPS, Université Paris Diderot *)
151 (* *)
152 (* Copyright Inria. All rights reserved. This file is distributed under the *)
153 (* terms of the GNU Library General Public License version 2, with a *)
154 (* special exception on linking, as described in the file LICENSE. *)
155 (* *)
156 (******************************************************************************)
157
158 type position = Lexing.position
159
160 open General
161
162 (* This signature describes the incremental LR engine. *)
163
164 (* In this mode, the user controls the lexer, and the parser suspends
165 itself when it needs to read a new token. *)
166
167 module type INCREMENTAL_ENGINE = sig
168
169 type token
170
171 (* A value of type [production] is (an index for) a production. The start
172 productions (which do not exist in an \mly file, but are constructed by
173 Menhir internally) are not part of this type. *)
174
175 type production
176
177 (* The type ['a checkpoint] represents an intermediate or final state of the
178 parser. An intermediate checkpoint is a suspension: it records the parser's
179 current state, and allows parsing to be resumed. The parameter ['a] is
180 the type of the semantic value that will eventually be produced if the
181 parser succeeds. *)
182
183 (* [Accepted] and [Rejected] are final checkpoints. [Accepted] carries a
184 semantic value. *)
185
186 (* [InputNeeded] is an intermediate checkpoint. It means that the parser wishes
187 to read one token before continuing. *)
188
189 (* [Shifting] is an intermediate checkpoint. It means that the parser is taking
190 a shift transition. It exposes the state of the parser before and after
191 the transition. The Boolean parameter tells whether the parser intends to
192 request a new token after this transition. (It always does, except when
193 it is about to accept.) *)
194
195 (* [AboutToReduce] is an intermediate checkpoint. It means that the parser is
196 about to perform a reduction step. It exposes the parser's current
197 state as well as the production that is about to be reduced. *)
198
199 (* [HandlingError] is an intermediate checkpoint. It means that the parser has
200 detected an error and is currently handling it, in several steps. *)
201
202 (* A value of type ['a env] represents a configuration of the automaton:
203 current state, stack, lookahead token, etc. The parameter ['a] is the
204 type of the semantic value that will eventually be produced if the parser
205 succeeds. *)
206
207 (* In normal operation, the parser works with checkpoints: see the functions
208 [offer] and [resume]. However, it is also possible to work directly with
209 environments (see the functions [pop], [force_reduction], and [feed]) and
210 to reconstruct a checkpoint out of an environment (see [input_needed]).
211 This is considered advanced functionality; its purpose is to allow error
212 recovery strategies to be programmed by the user. *)
213
214 type 'a env
215
216 type 'a checkpoint = private
217 | InputNeeded of 'a env
218 | Shifting of 'a env * 'a env * bool
219 | AboutToReduce of 'a env * production
220 | HandlingError of 'a env
221 | Accepted of 'a
222 | Rejected
223
224 (* [offer] allows the user to resume the parser after it has suspended
225 itself with a checkpoint of the form [InputNeeded env]. [offer] expects the
226 old checkpoint as well as a new token and produces a new checkpoint. It does not
227 raise any exception. *)
228
229 val offer:
230 'a checkpoint ->
231 token * position * position ->
232 'a checkpoint
233
234 (* [resume] allows the user to resume the parser after it has suspended
235 itself with a checkpoint of the form [AboutToReduce (env, prod)] or
236 [HandlingError env]. [resume] expects the old checkpoint and produces a new
237 checkpoint. It does not raise any exception. *)
238
239 val resume:
240 'a checkpoint ->
241 'a checkpoint
242
243 (* A token supplier is a function of no arguments which delivers a new token
244 (together with its start and end positions) every time it is called. *)
245
246 type supplier =
247 unit -> token * position * position
248
249 (* A pair of a lexer and a lexing buffer can be easily turned into a supplier. *)
250
251 val lexer_lexbuf_to_supplier:
252 (Lexing.lexbuf -> token) ->
253 Lexing.lexbuf ->
254 supplier
255
256 (* The functions [offer] and [resume] are sufficient to write a parser loop.
257 One can imagine many variations (which is why we expose these functions
258 in the first place!). Here, we expose a few variations of the main loop,
259 ready for use. *)
260
261 (* [loop supplier checkpoint] begins parsing from [checkpoint], reading
262 tokens from [supplier]. It continues parsing until it reaches a
263 checkpoint of the form [Accepted v] or [Rejected]. In the former case, it
264 returns [v]. In the latter case, it raises the exception [Error]. *)
265
266 val loop: supplier -> 'a checkpoint -> 'a
267
268 (* [loop_handle succeed fail supplier checkpoint] begins parsing from
269 [checkpoint], reading tokens from [supplier]. It continues parsing until
270 it reaches a checkpoint of the form [Accepted v] or [HandlingError env]
271 (or [Rejected], but that should not happen, as [HandlingError _] will be
272 observed first). In the former case, it calls [succeed v]. In the latter
273 case, it calls [fail] with this checkpoint. It cannot raise [Error].
274
275 This means that Menhir's traditional error-handling procedure (which pops
276 the stack until a state that can act on the [error] token is found) does
277 not get a chance to run. Instead, the user can implement her own error
278 handling code, in the [fail] continuation. *)
279
280 val loop_handle:
281 ('a -> 'answer) ->
282 ('a checkpoint -> 'answer) ->
283 supplier -> 'a checkpoint -> 'answer
284
285 (* [loop_handle_undo] is analogous to [loop_handle], except it passes a pair
286 of checkpoints to the failure continuation.
287
288 The first (and oldest) checkpoint is the last [InputNeeded] checkpoint that
289 was encountered before the error was detected. The second (and newest)
290 checkpoint is where the error was detected, as in [loop_handle]. Going back
291 to the first checkpoint can be thought of as undoing any reductions that
292 were performed after seeing the problematic token. (These reductions must
293 be default reductions or spurious reductions.)
294
295 [loop_handle_undo] must initially be applied to an [InputNeeded] checkpoint.
296 The parser's initial checkpoints satisfy this constraint. *)
297
298 val loop_handle_undo:
299 ('a -> 'answer) ->
300 ('a checkpoint -> 'a checkpoint -> 'answer) ->
301 supplier -> 'a checkpoint -> 'answer
302
303 (* [shifts checkpoint] assumes that [checkpoint] has been obtained by
304 submitting a token to the parser. It runs the parser from [checkpoint],
305 through an arbitrary number of reductions, until the parser either
306 accepts this token (i.e., shifts) or rejects it (i.e., signals an error).
307 If the parser decides to shift, then [Some env] is returned, where [env]
308 is the parser's state just before shifting. Otherwise, [None] is
309 returned. *)
310
311 (* It is desirable that the semantic actions be side-effect free, or that
312 their side-effects be harmless (replayable). *)
313
314 val shifts: 'a checkpoint -> 'a env option
315
316 (* The function [acceptable] allows testing, after an error has been
317 detected, which tokens would have been accepted at this point. It is
318 implemented using [shifts]. Its argument should be an [InputNeeded]
319 checkpoint. *)
320
321 (* For completeness, one must undo any spurious reductions before carrying out
322 this test -- that is, one must apply [acceptable] to the FIRST checkpoint
323 that is passed by [loop_handle_undo] to its failure continuation. *)
324
325 (* This test causes some semantic actions to be run! The semantic actions
326 should be side-effect free, or their side-effects should be harmless. *)
327
328 (* The position [pos] is used as the start and end positions of the
329 hypothetical token, and may be picked up by the semantic actions. We
330 suggest using the position where the error was detected. *)
331
332 val acceptable: 'a checkpoint -> token -> position -> bool
333
334 (* The abstract type ['a lr1state] describes the non-initial states of the
335 LR(1) automaton. The index ['a] represents the type of the semantic value
336 associated with this state's incoming symbol. *)
337
338 type 'a lr1state
339
340 (* The states of the LR(1) automaton are numbered (from 0 and up). *)
341
342 val number: _ lr1state -> int
343
344 (* Productions are numbered. *)
345
346 (* [find_production i] requires the index [i] to be valid. Use with care. *)
347
348 val production_index: production -> int
349 val find_production: int -> production
350
351 (* An element is a pair of a non-initial state [s] and a semantic value [v]
352 associated with the incoming symbol of this state. The idea is, the value
353 [v] was pushed onto the stack just before the state [s] was entered. Thus,
354 for some type ['a], the state [s] has type ['a lr1state] and the value [v]
355 has type ['a]. In other words, the type [element] is an existential type. *)
356
357 type element =
358 | Element: 'a lr1state * 'a * position * position -> element
359
360 (* The parser's stack is (or, more precisely, can be viewed as) a stream of
361 elements. The type [stream] is defined by the module [General]. *)
362
363 (* As of 2017/03/31, the types [stream] and [stack] and the function [stack]
364 are DEPRECATED. They might be removed in the future. An alternative way
365 of inspecting the stack is via the functions [top] and [pop]. *)
366
367 type stack = (* DEPRECATED *)
368 element stream
369
370 (* This is the parser's stack, a stream of elements. This stream is empty if
371 the parser is in an initial state; otherwise, it is non-empty. The LR(1)
372 automaton's current state is the one found in the top element of the
373 stack. *)
374
375 val stack: 'a env -> stack (* DEPRECATED *)
376
377 (* [top env] returns the parser's top stack element. The state contained in
378 this stack element is the current state of the automaton. If the stack is
379 empty, [None] is returned. In that case, the current state of the
380 automaton must be an initial state. *)
381
382 val top: 'a env -> element option
383
384 (* [pop_many i env] pops [i] cells off the automaton's stack. This is done
385 via [i] successive invocations of [pop]. Thus, [pop_many 1] is [pop]. The
386 index [i] must be nonnegative. The time complexity is O(i). *)
387
388 val pop_many: int -> 'a env -> 'a env option
389
390 (* [get i env] returns the parser's [i]-th stack element. The index [i] is
391 0-based: thus, [get 0] is [top]. If [i] is greater than or equal to the
392 number of elements in the stack, [None] is returned. The time complexity
393 is O(i). *)
394
395 val get: int -> 'a env -> element option
396
397 (* [current_state_number env] is (the integer number of) the automaton's
398 current state. This works even if the automaton's stack is empty, in
399 which case the current state is an initial state. This number can be
400 passed as an argument to a [message] function generated by [menhir
401 --compile-errors]. *)
402
403 val current_state_number: 'a env -> int
404
405 (* [equal env1 env2] tells whether the parser configurations [env1] and
406 [env2] are equal in the sense that the automaton's current state is the
407 same in [env1] and [env2] and the stack is *physically* the same in
408 [env1] and [env2]. If [equal env1 env2] is [true], then the sequence of
409 the stack elements, as observed via [pop] and [top], must be the same in
410 [env1] and [env2]. Also, if [equal env1 env2] holds, then the checkpoints
411 [input_needed env1] and [input_needed env2] must be equivalent. The
412 function [equal] has time complexity O(1). *)
413
414 val equal: 'a env -> 'a env -> bool
415
416 (* These are the start and end positions of the current lookahead token. If
417 invoked in an initial state, this function returns a pair of twice the
418 initial position. *)
419
420 val positions: 'a env -> position * position
421
422 (* When applied to an environment taken from a checkpoint of the form
423 [AboutToReduce (env, prod)], the function [env_has_default_reduction]
424 tells whether the reduction that is about to take place is a default
425 reduction. *)
426
427 val env_has_default_reduction: 'a env -> bool
428
429 (* [state_has_default_reduction s] tells whether the state [s] has a default
430 reduction. This includes the case where [s] is an accepting state. *)
431
432 val state_has_default_reduction: _ lr1state -> bool
433
434 (* [pop env] returns a new environment, where the parser's top stack cell
435 has been popped off. (If the stack is empty, [None] is returned.) This
436 amounts to pretending that the (terminal or nonterminal) symbol that
437 corresponds to this stack cell has not been read. *)
438
439 val pop: 'a env -> 'a env option
440
441 (* [force_reduction prod env] should be called only if in the state [env]
442 the parser is capable of reducing the production [prod]. If this
443 condition is satisfied, then this production is reduced, which means that
444 its semantic action is executed (this can have side effects!) and the
445 automaton makes a goto (nonterminal) transition. If this condition is not
446 satisfied, [Invalid_argument _] is raised. *)
447
448 val force_reduction: production -> 'a env -> 'a env
449
450 (* [input_needed env] returns [InputNeeded env]. That is, out of an [env]
451 that might have been obtained via a series of calls to the functions
452 [pop], [force_reduction], [feed], etc., it produces a checkpoint, which
453 can be used to resume normal parsing, by supplying this checkpoint as an
454 argument to [offer]. *)
455
456 (* This function should be used with some care. It could "mess up the
457 lookahead" in the sense that it allows parsing to resume in an arbitrary
458 state [s] with an arbitrary lookahead symbol [t], even though Menhir's
459 reachability analysis (menhir --list-errors) might well think that it is
460 impossible to reach this particular configuration. If one is using
461 Menhir's new error reporting facility, this could cause the parser to
462 reach an error state for which no error message has been prepared. *)
463
464 val input_needed: 'a env -> 'a checkpoint
465
466 end
467
468 (* This signature is a fragment of the inspection API that is made available
469 to the user when [--inspection] is used. This fragment contains type
470 definitions for symbols. *)
471
472 module type SYMBOLS = sig
473
474 (* The type ['a terminal] represents a terminal symbol. The type ['a
475 nonterminal] represents a nonterminal symbol. In both cases, the index
476 ['a] represents the type of the semantic values associated with this
477 symbol. The concrete definitions of these types are generated. *)
478
479 type 'a terminal
480 type 'a nonterminal
481
482 (* The type ['a symbol] represents a terminal or nonterminal symbol. It is
483 the disjoint union of the types ['a terminal] and ['a nonterminal]. *)
484
485 type 'a symbol =
486 | T : 'a terminal -> 'a symbol
487 | N : 'a nonterminal -> 'a symbol
488
489 (* The type [xsymbol] is an existentially quantified version of the type
490 ['a symbol]. This type is useful in situations where the index ['a]
491 is not statically known. *)
492
493 type xsymbol =
494 | X : 'a symbol -> xsymbol
495
496 end
497
498 (* This signature describes the inspection API that is made available to the
499 user when [--inspection] is used. *)
500
501 module type INSPECTION = sig
502
503 (* The types of symbols are described above. *)
504
505 include SYMBOLS
506
507 (* The type ['a lr1state] is meant to be the same as in [INCREMENTAL_ENGINE]. *)
508
509 type 'a lr1state
510
511 (* The type [production] is meant to be the same as in [INCREMENTAL_ENGINE].
512 It represents a production of the grammar. A production can be examined
513 via the functions [lhs] and [rhs] below. *)
514
515 type production
516
517 (* An LR(0) item is a pair of a production [prod] and a valid index [i] into
518 this production. That is, if the length of [rhs prod] is [n], then [i] is
519 comprised between 0 and [n], inclusive. *)
520
521 type item =
522 production * int
523
524 (* Ordering functions. *)
525
526 val compare_terminals: _ terminal -> _ terminal -> int
527 val compare_nonterminals: _ nonterminal -> _ nonterminal -> int
528 val compare_symbols: xsymbol -> xsymbol -> int
529 val compare_productions: production -> production -> int
530 val compare_items: item -> item -> int
531
532 (* [incoming_symbol s] is the incoming symbol of the state [s], that is,
533 the symbol that the parser must recognize before (has recognized when)
534 it enters the state [s]. This function gives access to the semantic
535 value [v] stored in a stack element [Element (s, v, _, _)]. Indeed,
536 by case analysis on the symbol [incoming_symbol s], one discovers the
537 type ['a] of the value [v]. *)
538
539 val incoming_symbol: 'a lr1state -> 'a symbol
540
541 (* [items s] is the set of the LR(0) items in the LR(0) core of the LR(1)
542 state [s]. This set is not epsilon-closed. This set is presented as a
543 list, in an arbitrary order. *)
544
545 val items: _ lr1state -> item list
546
547 (* [lhs prod] is the left-hand side of the production [prod]. This is
548 always a non-terminal symbol. *)
549
550 val lhs: production -> xsymbol
551
552 (* [rhs prod] is the right-hand side of the production [prod]. This is
553 a (possibly empty) sequence of (terminal or nonterminal) symbols. *)
554
555 val rhs: production -> xsymbol list
556
557 (* [nullable nt] tells whether the non-terminal symbol [nt] is nullable.
558 That is, it is true if and only if this symbol produces the empty
559 word [epsilon]. *)
560
561 val nullable: _ nonterminal -> bool
562
563 (* [first nt t] tells whether the FIRST set of the nonterminal symbol [nt]
564 contains the terminal symbol [t]. That is, it is true if and only if
565 [nt] produces a word that begins with [t]. *)
566
567 val first: _ nonterminal -> _ terminal -> bool
568
569 (* [xfirst] is analogous to [first], but expects a first argument of type
570 [xsymbol] instead of [_ terminal]. *)
571
572 val xfirst: xsymbol -> _ terminal -> bool
573
574 (* [foreach_terminal] enumerates the terminal symbols, including [error].
575 [foreach_terminal_but_error] enumerates the terminal symbols, excluding
576 [error]. *)
577
578 val foreach_terminal: (xsymbol -> 'a -> 'a) -> 'a -> 'a
579 val foreach_terminal_but_error: (xsymbol -> 'a -> 'a) -> 'a -> 'a
580
581 (* The type [env] is meant to be the same as in [INCREMENTAL_ENGINE]. *)
582
583 type 'a env
584
585 (* [feed symbol startp semv endp env] causes the parser to consume the
586 (terminal or nonterminal) symbol [symbol], accompanied with the semantic
587 value [semv] and with the start and end positions [startp] and [endp].
588 Thus, the automaton makes a transition, and reaches a new state. The
589 stack grows by one cell. This operation is permitted only if the current
590 state (as determined by [env]) has an outgoing transition labeled with
591 [symbol]. Otherwise, [Invalid_argument _] is raised. *)
592
593 val feed: 'a symbol -> position -> 'a -> position -> 'b env -> 'b env
594
595 end
596
597 (* This signature combines the incremental API and the inspection API. *)
598
599 module type EVERYTHING = sig
600
601 include INCREMENTAL_ENGINE
602
603 include INSPECTION
604 with type 'a lr1state := 'a lr1state
605 with type production := production
606 with type 'a env := 'a env
607
608 end
609 end
610 module EngineTypes : sig
611 (******************************************************************************)
612 (* *)
613 (* Menhir *)
614 (* *)
615 (* François Pottier, Inria Paris *)
616 (* Yann Régis-Gianas, PPS, Université Paris Diderot *)
617 (* *)
618 (* Copyright Inria. All rights reserved. This file is distributed under the *)
619 (* terms of the GNU Library General Public License version 2, with a *)
620 (* special exception on linking, as described in the file LICENSE. *)
621 (* *)
622 (******************************************************************************)
623
624 (* This file defines several types and module types that are used in the
625 specification of module [Engine]. *)
626
627 (* --------------------------------------------------------------------------- *)
628
629 (* It would be nice if we could keep the structure of stacks and environments
630 hidden. However, stacks and environments must be accessible to semantic
631 actions, so the following data structure definitions must be public. *)
632
633 (* --------------------------------------------------------------------------- *)
634
635 (* A stack is a linked list of cells. A sentinel cell -- which is its own
636 successor -- is used to mark the bottom of the stack. The sentinel cell
637 itself is not significant -- it contains dummy values. *)
638
639 type ('state, 'semantic_value) stack = {
640
641 (* The state that we should go back to if we pop this stack cell. *)
642
643 (* This convention means that the state contained in the top stack cell is
644 not the current state [env.current]. It also means that the state found
645 within the sentinel is a dummy -- it is never consulted. This convention
646 is the same as that adopted by the code-based back-end. *)
647
648 state: 'state;
649
650 (* The semantic value associated with the chunk of input that this cell
651 represents. *)
652
653 semv: 'semantic_value;
654
655 (* The start and end positions of the chunk of input that this cell
656 represents. *)
657
658 startp: Lexing.position;
659 endp: Lexing.position;
660
661 (* The next cell down in the stack. If this is a self-pointer, then this
662 cell is the sentinel, and the stack is conceptually empty. *)
663
664 next: ('state, 'semantic_value) stack;
665
666 }
667
668 (* --------------------------------------------------------------------------- *)
669
670 (* A parsing environment contains all of the parser's state (except for the
671 current program point). *)
672
673 type ('state, 'semantic_value, 'token) env = {
674
675 (* If this flag is true, then the first component of [env.triple] should
676 be ignored, as it has been logically overwritten with the [error]
677 pseudo-token. *)
678
679 error: bool;
680
681 (* The last token that was obtained from the lexer, together with its start
682 and end positions. Warning: before the first call to the lexer has taken
683 place, a dummy (and possibly invalid) token is stored here. *)
684
685 triple: 'token * Lexing.position * Lexing.position;
686
687 (* The stack. In [CodeBackend], it is passed around on its own,
688 whereas, here, it is accessed via the environment. *)
689
690 stack: ('state, 'semantic_value) stack;
691
692 (* The current state. In [CodeBackend], it is passed around on its
693 own, whereas, here, it is accessed via the environment. *)
694
695 current: 'state;
696
697 }
698
699 (* --------------------------------------------------------------------------- *)
700
701 (* This signature describes the parameters that must be supplied to the LR
702 engine. *)
703
704 module type TABLE = sig
705
706 (* The type of automaton states. *)
707
708 type state
709
710 (* States are numbered. *)
711
712 val number: state -> int
713
714 (* The type of tokens. These can be thought of as real tokens, that is,
715 tokens returned by the lexer. They carry a semantic value. This type
716 does not include the [error] pseudo-token. *)
717
718 type token
719
720 (* The type of terminal symbols. These can be thought of as integer codes.
721 They do not carry a semantic value. This type does include the [error]
722 pseudo-token. *)
723
724 type terminal
725
726 (* The type of nonterminal symbols. *)
727
728 type nonterminal
729
730 (* The type of semantic values. *)
731
732 type semantic_value
733
734 (* A token is conceptually a pair of a (non-[error]) terminal symbol and
735 a semantic value. The following two functions are the pair projections. *)
736
737 val token2terminal: token -> terminal
738 val token2value: token -> semantic_value
739
740 (* Even though the [error] pseudo-token is not a real token, it is a
741 terminal symbol. Furthermore, for regularity, it must have a semantic
742 value. *)
743
744 val error_terminal: terminal
745 val error_value: semantic_value
746
747 (* [foreach_terminal] allows iterating over all terminal symbols. *)
748
749 val foreach_terminal: (terminal -> 'a -> 'a) -> 'a -> 'a
750
751 (* The type of productions. *)
752
753 type production
754
755 val production_index: production -> int
756 val find_production: int -> production
757
758 (* If a state [s] has a default reduction on production [prod], then, upon
759 entering [s], the automaton should reduce [prod] without consulting the
760 lookahead token. The following function allows determining which states
761 have default reductions. *)
762
763 (* Instead of returning a value of a sum type -- either [DefRed prod], or
764 [NoDefRed] -- it accepts two continuations, and invokes just one of
765 them. This mechanism allows avoiding a memory allocation. *)
766
767 val default_reduction:
768 state ->
769 ('env -> production -> 'answer) ->
770 ('env -> 'answer) ->
771 'env -> 'answer
772
773 (* An LR automaton can normally take three kinds of actions: shift, reduce,
774 or fail. (Acceptance is a particular case of reduction: it consists in
775 reducing a start production.) *)
776
777 (* There are two variants of the shift action. [shift/discard s] instructs
778 the automaton to discard the current token, request a new one from the
779 lexer, and move to state [s]. [shift/nodiscard s] instructs it to move to
780 state [s] without requesting a new token. This instruction should be used
781 when [s] has a default reduction on [#]. See [CodeBackend.gettoken] for
782 details. *)
783
784 (* This is the automaton's action table. It maps a pair of a state and a
785 terminal symbol to an action. *)
786
787 (* Instead of returning a value of a sum type -- one of shift/discard,
788 shift/nodiscard, reduce, or fail -- this function accepts three
789 continuations, and invokes just one them. This mechanism allows avoiding
790 a memory allocation. *)
791
792 (* In summary, the parameters to [action] are as follows:
793
794 - the first two parameters, a state and a terminal symbol, are used to
795 look up the action table;
796
797 - the next parameter is the semantic value associated with the above
798 terminal symbol; it is not used, only passed along to the shift
799 continuation, as explained below;
800
801 - the shift continuation expects an environment; a flag that tells
802 whether to discard the current token; the terminal symbol that
803 is being shifted; its semantic value; and the target state of
804 the transition;
805
806 - the reduce continuation expects an environment and a production;
807
808 - the fail continuation expects an environment;
809
810 - the last parameter is the environment; it is not used, only passed
811 along to the selected continuation. *)
812
813 val action:
814 state ->
815 terminal ->
816 semantic_value ->
817 ('env -> bool -> terminal -> semantic_value -> state -> 'answer) ->
818 ('env -> production -> 'answer) ->
819 ('env -> 'answer) ->
820 'env -> 'answer
821
822 (* This is the automaton's goto table. This table maps a pair of a state
823 and a nonterminal symbol to a new state. By extension, it also maps a
824 pair of a state and a production to a new state. *)
825
826 (* The function [goto_nt] can be applied to [s] and [nt] ONLY if the state
827 [s] has an outgoing transition labeled [nt]. Otherwise, its result is
828 undefined. Similarly, the call [goto_prod prod s] is permitted ONLY if
829 the state [s] has an outgoing transition labeled with the nonterminal
830 symbol [lhs prod]. The function [maybe_goto_nt] involves an additional
831 dynamic check and CAN be called even if there is no outgoing transition. *)
832
833 val goto_nt : state -> nonterminal -> state
834 val goto_prod: state -> production -> state
835 val maybe_goto_nt: state -> nonterminal -> state option
836
837 (* [is_start prod] tells whether the production [prod] is a start production. *)
838
839 val is_start: production -> bool
840
841 (* By convention, a semantic action is responsible for:
842
843 1. fetching whatever semantic values and positions it needs off the stack;
844
845 2. popping an appropriate number of cells off the stack, as dictated
846 by the length of the right-hand side of the production;
847
848 3. computing a new semantic value, as well as new start and end positions;
849
850 4. pushing a new stack cell, which contains the three values
851 computed in step 3;
852
853 5. returning the new stack computed in steps 2 and 4.
854
855 Point 1 is essentially forced upon us: if semantic values were fetched
856 off the stack by this interpreter, then the calling convention for
857 semantic actions would be variadic: not all semantic actions would have
858 the same number of arguments. The rest follows rather naturally. *)
859
860 (* Semantic actions are allowed to raise [Error]. *)
861
862 exception Error
863
864 type semantic_action =
865 (state, semantic_value, token) env -> (state, semantic_value) stack
866
867 val semantic_action: production -> semantic_action
868
869 (* [may_reduce state prod] tests whether the state [state] is capable of
870 reducing the production [prod]. This function is currently costly and
871 is not used by the core LR engine. It is used in the implementation
872 of certain functions, such as [force_reduction], which allow the engine
873 to be driven programmatically. *)
874
875 val may_reduce: state -> production -> bool
876
877 (* The LR engine requires a number of hooks, which are used for logging. *)
878
879 (* The comments below indicate the conventional messages that correspond
880 to these hooks in the code-based back-end; see [CodeBackend]. *)
881
882 (* If the flag [log] is false, then the logging functions are not called.
883 If it is [true], then they are called. *)
884
885 val log : bool
886
887 module Log : sig
888
889 (* State %d: *)
890
891 val state: state -> unit
892
893 (* Shifting (<terminal>) to state <state> *)
894
895 val shift: terminal -> state -> unit
896
897 (* Reducing a production should be logged either as a reduction
898 event (for regular productions) or as an acceptance event (for
899 start productions). *)
900
901 (* Reducing production <production> / Accepting *)
902
903 val reduce_or_accept: production -> unit
904
905 (* Lookahead token is now <terminal> (<pos>-<pos>) *)
906
907 val lookahead_token: terminal -> Lexing.position -> Lexing.position -> unit
908
909 (* Initiating error handling *)
910
911 val initiating_error_handling: unit -> unit
912
913 (* Resuming error handling *)
914
915 val resuming_error_handling: unit -> unit
916
917 (* Handling error in state <state> *)
918
919 val handling_error: state -> unit
920
921 end
922
923 end
924
925 (* --------------------------------------------------------------------------- *)
926
927 (* This signature describes the monolithic (traditional) LR engine. *)
928
929 (* In this interface, the parser controls the lexer. *)
930
931 module type MONOLITHIC_ENGINE = sig
932
933 type state
934
935 type token
936
937 type semantic_value
938
939 (* An entry point to the engine requires a start state, a lexer, and a lexing
940 buffer. It either succeeds and produces a semantic value, or fails and
941 raises [Error]. *)
942
943 exception Error
944
945 val entry:
946 state ->
947 (Lexing.lexbuf -> token) ->
948 Lexing.lexbuf ->
949 semantic_value
950
951 end
952
953 (* --------------------------------------------------------------------------- *)
954
955 (* The following signatures describe the incremental LR engine. *)
956
957 (* First, see [INCREMENTAL_ENGINE] in the file [IncrementalEngine.ml]. *)
958
959 (* The [start] function is set apart because we do not wish to publish
960 it as part of the generated [parser.mli] file. Instead, the table
961 back-end will publish specialized versions of it, with a suitable
962 type cast. *)
963
964 module type INCREMENTAL_ENGINE_START = sig
965
966 (* [start] is an entry point. It requires a start state and a start position
967 and begins the parsing process. If the lexer is based on an OCaml lexing
968 buffer, the start position should be [lexbuf.lex_curr_p]. [start] produces
969 a checkpoint, which usually will be an [InputNeeded] checkpoint. (It could
970 be [Accepted] if this starting state accepts only the empty word. It could
971 be [Rejected] if this starting state accepts no word at all.) It does not
972 raise any exception. *)
973
974 (* [start s pos] should really produce a checkpoint of type ['a checkpoint],
975 for a fixed ['a] that depends on the state [s]. We cannot express this, so
976 we use [semantic_value checkpoint], which is safe. The table back-end uses
977 [Obj.magic] to produce safe specialized versions of [start]. *)
978
979 type state
980 type semantic_value
981 type 'a checkpoint
982
983 val start:
984 state ->
985 Lexing.position ->
986 semantic_value checkpoint
987
988 end
989
990 (* --------------------------------------------------------------------------- *)
991
992 (* This signature describes the LR engine, which combines the monolithic
993 and incremental interfaces. *)
994
995 module type ENGINE = sig
996
997 include MONOLITHIC_ENGINE
998
999 include IncrementalEngine.INCREMENTAL_ENGINE
1000 with type token := token
1001 and type 'a lr1state = state (* useful for us; hidden from the end user *)
1002
1003 include INCREMENTAL_ENGINE_START
1004 with type state := state
1005 and type semantic_value := semantic_value
1006 and type 'a checkpoint := 'a checkpoint
1007
1008 end
1009 end
1010 module Engine : sig
1011 (******************************************************************************)
1012 (* *)
1013 (* Menhir *)
1014 (* *)
1015 (* François Pottier, Inria Paris *)
1016 (* Yann Régis-Gianas, PPS, Université Paris Diderot *)
1017 (* *)
1018 (* Copyright Inria. All rights reserved. This file is distributed under the *)
1019 (* terms of the GNU Library General Public License version 2, with a *)
1020 (* special exception on linking, as described in the file LICENSE. *)
1021 (* *)
1022 (******************************************************************************)
1023
1024 open EngineTypes
1025
1026 (* The LR parsing engine. *)
1027
1028 module Make (T : TABLE)
1029 : ENGINE
1030 with type state = T.state
1031 and type token = T.token
1032 and type semantic_value = T.semantic_value
1033 and type production = T.production
1034 and type 'a env = (T.state, T.semantic_value, T.token) EngineTypes.env
1035
1036 (* We would prefer not to expose the definition of the type [env].
1037 However, it must be exposed because some of the code in the
1038 inspection API needs access to the engine's internals; see
1039 [InspectionTableInterpreter]. Everything would be simpler if
1040 --inspection was always ON, but that would lead to bigger parse
1041 tables for everybody. *)
1042 end
1043 module ErrorReports : sig
1044 (******************************************************************************)
1045 (* *)
1046 (* Menhir *)
1047 (* *)
1048 (* François Pottier, Inria Paris *)
1049 (* Yann Régis-Gianas, PPS, Université Paris Diderot *)
1050 (* *)
1051 (* Copyright Inria. All rights reserved. This file is distributed under the *)
1052 (* terms of the GNU Library General Public License version 2, with a *)
1053 (* special exception on linking, as described in the file LICENSE. *)
1054 (* *)
1055 (******************************************************************************)
1056
1057 (* -------------------------------------------------------------------------- *)
1058
1059 (* The following functions help keep track of the start and end positions of
1060 the last two tokens in a two-place buffer. This is used to nicely display
1061 where a syntax error took place. *)
1062
1063 type 'a buffer
1064
1065 (* [wrap lexer] returns a pair of a new (initially empty) buffer and a lexer
1066 which internally relies on [lexer] and updates [buffer] on the fly whenever
1067 a token is demanded. *)
1068
1069 open Lexing
1070
1071 val wrap:
1072 (lexbuf -> 'token) ->
1073 (position * position) buffer * (lexbuf -> 'token)
1074
1075 (* [show f buffer] prints the contents of the buffer, producing a string that
1076 is typically of the form "after '%s' and before '%s'". The function [f] is
1077 used to print an element. The buffer MUST be nonempty. *)
1078
1079 val show: ('a -> string) -> 'a buffer -> string
1080
1081 (* [last buffer] returns the last element of the buffer. The buffer MUST be
1082 nonempty. *)
1083
1084 val last: 'a buffer -> 'a
1085
1086 (* -------------------------------------------------------------------------- *)
1087 end
1088 module Printers : sig
1089 (******************************************************************************)
1090 (* *)
1091 (* Menhir *)
1092 (* *)
1093 (* François Pottier, Inria Paris *)
1094 (* Yann Régis-Gianas, PPS, Université Paris Diderot *)
1095 (* *)
1096 (* Copyright Inria. All rights reserved. This file is distributed under the *)
1097 (* terms of the GNU Library General Public License version 2, with a *)
1098 (* special exception on linking, as described in the file LICENSE. *)
1099 (* *)
1100 (******************************************************************************)
1101
1102 (* This module is part of MenhirLib. *)
1103
1104 module Make
1105
1106 (I : IncrementalEngine.EVERYTHING)
1107
1108 (User : sig
1109
1110 (* [print s] is supposed to send the string [s] to some output channel. *)
1111
1112 val print: string -> unit
1113
1114 (* [print_symbol s] is supposed to print a representation of the symbol [s]. *)
1115
1116 val print_symbol: I.xsymbol -> unit
1117
1118 (* [print_element e] is supposed to print a representation of the element [e].
1119 This function is optional; if it is not provided, [print_element_as_symbol]
1120 (defined below) is used instead. *)
1121
1122 val print_element: (I.element -> unit) option
1123
1124 end)
1125
1126 : sig
1127
1128 open I
1129
1130 (* Printing a list of symbols. *)
1131
1132 val print_symbols: xsymbol list -> unit
1133
1134 (* Printing an element as a symbol. This prints just the symbol
1135 that this element represents; nothing more. *)
1136
1137 val print_element_as_symbol: element -> unit
1138
1139 (* Printing a stack as a list of elements. This function needs an element
1140 printer. It uses [print_element] if provided by the user; otherwise
1141 it uses [print_element_as_symbol]. (Ending with a newline.) *)
1142
1143 val print_stack: 'a env -> unit
1144
1145 (* Printing an item. (Ending with a newline.) *)
1146
1147 val print_item: item -> unit
1148
1149 (* Printing a production. (Ending with a newline.) *)
1150
1151 val print_production: production -> unit
1152
1153 (* Printing the current LR(1) state. The current state is first displayed
1154 as a number; then the list of its LR(0) items is printed. (Ending with
1155 a newline.) *)
1156
1157 val print_current_state: 'a env -> unit
1158
1159 (* Printing a summary of the stack and current state. This function just
1160 calls [print_stack] and [print_current_state] in succession. *)
1161
1162 val print_env: 'a env -> unit
1163
1164 end
1165 end
1166 module InfiniteArray : sig
1167 (******************************************************************************)
1168 (* *)
1169 (* Menhir *)
1170 (* *)
1171 (* François Pottier, Inria Paris *)
1172 (* Yann Régis-Gianas, PPS, Université Paris Diderot *)
1173 (* *)
1174 (* Copyright Inria. All rights reserved. This file is distributed under the *)
1175 (* terms of the GNU Library General Public License version 2, with a *)
1176 (* special exception on linking, as described in the file LICENSE. *)
1177 (* *)
1178 (******************************************************************************)
1179
1180 (** This module implements infinite arrays. **)
1181 type 'a t
1182
1183 (** [make x] creates an infinite array, where every slot contains [x]. **)
1184 val make: 'a -> 'a t
1185
1186 (** [get a i] returns the element contained at offset [i] in the array [a].
1187 Slots are numbered 0 and up. **)
1188 val get: 'a t -> int -> 'a
1189
1190 (** [set a i x] sets the element contained at offset [i] in the array
1191 [a] to [x]. Slots are numbered 0 and up. **)
1192 val set: 'a t -> int -> 'a -> unit
1193
1194 (** [extent a] is the length of an initial segment of the array [a]
1195 that is sufficiently large to contain all [set] operations ever
1196 performed. In other words, all elements beyond that segment have
1197 the default value. *)
1198 val extent: 'a t -> int
1199
1200 (** [domain a] is a fresh copy of an initial segment of the array [a]
1201 whose length is [extent a]. *)
1202 val domain: 'a t -> 'a array
1203 end
1204 module PackedIntArray : sig
1205 (******************************************************************************)
1206 (* *)
1207 (* Menhir *)
1208 (* *)
1209 (* François Pottier, Inria Paris *)
1210 (* Yann Régis-Gianas, PPS, Université Paris Diderot *)
1211 (* *)
1212 (* Copyright Inria. All rights reserved. This file is distributed under the *)
1213 (* terms of the GNU Library General Public License version 2, with a *)
1214 (* special exception on linking, as described in the file LICENSE. *)
1215 (* *)
1216 (******************************************************************************)
1217
1218 (* A packed integer array is represented as a pair of an integer [k] and
1219 a string [s]. The integer [k] is the number of bits per integer that we
1220 use. The string [s] is just an array of bits, which is read in 8-bit
1221 chunks. *)
1222
1223 (* The ocaml programming language treats string literals and array literals
1224 in slightly different ways: the former are statically allocated, while
1225 the latter are dynamically allocated. (This is rather arbitrary.) In the
1226 context of Menhir's table-based back-end, where compact, immutable
1227 integer arrays are needed, ocaml strings are preferable to ocaml arrays. *)
1228
1229 type t =
1230 int * string
1231
1232 (* [pack a] turns an array of integers into a packed integer array. *)
1233
1234 (* Because the sign bit is the most significant bit, the magnitude of
1235 any negative number is the word size. In other words, [pack] does
1236 not achieve any space savings as soon as [a] contains any negative
1237 numbers, even if they are ``small''. *)
1238
1239 val pack: int array -> t
1240
1241 (* [get t i] returns the integer stored in the packed array [t] at index [i]. *)
1242
1243 (* Together, [pack] and [get] satisfy the following property: if the index [i]
1244 is within bounds, then [get (pack a) i] equals [a.(i)]. *)
1245
1246 val get: t -> int -> int
1247
1248 (* [get1 t i] returns the integer stored in the packed array [t] at index [i].
1249 It assumes (and does not check) that the array's bit width is [1]. The
1250 parameter [t] is just a string. *)
1251
1252 val get1: string -> int -> int
1253
1254 (* [unflatten1 (n, data) i j] accesses the two-dimensional bitmap
1255 represented by [(n, data)] at indices [i] and [j]. The integer
1256 [n] is the width of the bitmap; the string [data] is the second
1257 component of the packed array obtained by encoding the table as
1258 a one-dimensional array. *)
1259
1260 val unflatten1: int * string -> int -> int -> int
1261
1262 end
1263 module RowDisplacement : sig
1264 (******************************************************************************)
1265 (* *)
1266 (* Menhir *)
1267 (* *)
1268 (* François Pottier, Inria Paris *)
1269 (* Yann Régis-Gianas, PPS, Université Paris Diderot *)
1270 (* *)
1271 (* Copyright Inria. All rights reserved. This file is distributed under the *)
1272 (* terms of the GNU Library General Public License version 2, with a *)
1273 (* special exception on linking, as described in the file LICENSE. *)
1274 (* *)
1275 (******************************************************************************)
1276
1277 (* This module compresses a two-dimensional table, where some values
1278 are considered insignificant, via row displacement. *)
1279
1280 (* A compressed table is represented as a pair of arrays. The
1281 displacement array is an array of offsets into the data array. *)
1282
1283 type 'a table =
1284 int array * (* displacement *)
1285 'a array (* data *)
1286
1287 (* [compress equal insignificant dummy m n t] turns the two-dimensional table
1288 [t] into a compressed table. The parameter [equal] is equality of data
1289 values. The parameter [wildcard] tells which data values are insignificant,
1290 and can thus be overwritten with other values. The parameter [dummy] is
1291 used to fill holes in the data array. [m] and [n] are the integer
1292 dimensions of the table [t]. *)
1293
1294 val compress:
1295 ('a -> 'a -> bool) ->
1296 ('a -> bool) ->
1297 'a ->
1298 int -> int ->
1299 'a array array ->
1300 'a table
1301
1302 (* [get ct i j] returns the value found at indices [i] and [j] in the
1303 compressed table [ct]. This function call is permitted only if the
1304 value found at indices [i] and [j] in the original table is
1305 significant -- otherwise, it could fail abruptly. *)
1306
1307 (* Together, [compress] and [get] have the property that, if the value
1308 found at indices [i] and [j] in an uncompressed table [t] is
1309 significant, then [get (compress t) i j] is equal to that value. *)
1310
1311 val get:
1312 'a table ->
1313 int -> int ->
1314 'a
1315
1316 (* [getget] is a variant of [get] which only requires read access,
1317 via accessors, to the two components of the table. *)
1318
1319 val getget:
1320 ('displacement -> int -> int) ->
1321 ('data -> int -> 'a) ->
1322 'displacement * 'data ->
1323 int -> int ->
1324 'a
1325
1326 end
1327 module LinearizedArray : sig
1328 (******************************************************************************)
1329 (* *)
1330 (* Menhir *)
1331 (* *)
1332 (* François Pottier, Inria Paris *)
1333 (* Yann Régis-Gianas, PPS, Université Paris Diderot *)
1334 (* *)
1335 (* Copyright Inria. All rights reserved. This file is distributed under the *)
1336 (* terms of the GNU Library General Public License version 2, with a *)
1337 (* special exception on linking, as described in the file LICENSE. *)
1338 (* *)
1339 (******************************************************************************)
1340
1341 (* An array of arrays (of possibly different lengths!) can be ``linearized'',
1342 i.e., encoded as a data array (by concatenating all of the little arrays)
1343 and an entry array (which contains offsets into the data array). *)
1344
1345 type 'a t =
1346 (* data: *) 'a array *
1347 (* entry: *) int array
1348
1349 (* [make a] turns the array of arrays [a] into a linearized array. *)
1350
1351 val make: 'a array array -> 'a t
1352
1353 (* [read la i j] reads the linearized array [la] at indices [i] and [j].
1354 Thus, [read (make a) i j] is equivalent to [a.(i).(j)]. *)
1355
1356 val read: 'a t -> int -> int -> 'a
1357
1358 (* [write la i j v] writes the value [v] into the linearized array [la]
1359 at indices [i] and [j]. *)
1360
1361 val write: 'a t -> int -> int -> 'a -> unit
1362
1363 (* [length la] is the number of rows of the array [la]. Thus, [length (make
1364 a)] is equivalent to [Array.length a]. *)
1365
1366 val length: 'a t -> int
1367
1368 (* [row_length la i] is the length of the row at index [i] in the linearized
1369 array [la]. Thus, [row_length (make a) i] is equivalent to [Array.length
1370 a.(i)]. *)
1371
1372 val row_length: 'a t -> int -> int
1373
1374 (* [read_row la i] reads the row at index [i], producing a list. Thus,
1375 [read_row (make a) i] is equivalent to [Array.to_list a.(i)]. *)
1376
1377 val read_row: 'a t -> int -> 'a list
1378
1379 (* The following variants read the linearized array via accessors
1380 [get_data : int -> 'a] and [get_entry : int -> int]. *)
1381
1382 val row_length_via:
1383 (* get_entry: *) (int -> int) ->
1384 (* i: *) int ->
1385 int
1386
1387 val read_via:
1388 (* get_data: *) (int -> 'a) ->
1389 (* get_entry: *) (int -> int) ->
1390 (* i: *) int ->
1391 (* j: *) int ->
1392 'a
1393
1394 val read_row_via:
1395 (* get_data: *) (int -> 'a) ->
1396 (* get_entry: *) (int -> int) ->
1397 (* i: *) int ->
1398 'a list
1399
1400 end
1401 module TableFormat : sig
1402 (******************************************************************************)
1403 (* *)
1404 (* Menhir *)
1405 (* *)
1406 (* François Pottier, Inria Paris *)
1407 (* Yann Régis-Gianas, PPS, Université Paris Diderot *)
1408 (* *)
1409 (* Copyright Inria. All rights reserved. This file is distributed under the *)
1410 (* terms of the GNU Library General Public License version 2, with a *)
1411 (* special exception on linking, as described in the file LICENSE. *)
1412 (* *)
1413 (******************************************************************************)
1414
1415 (* This signature defines the format of the parse tables. It is used as
1416 an argument to [TableInterpreter.Make]. *)
1417
1418 module type TABLES = sig
1419
1420 (* This is the parser's type of tokens. *)
1421
1422 type token
1423
1424 (* This maps a token to its internal (generation-time) integer code. *)
1425
1426 val token2terminal: token -> int
1427
1428 (* This is the integer code for the error pseudo-token. *)
1429
1430 val error_terminal: int
1431
1432 (* This maps a token to its semantic value. *)
1433
1434 val token2value: token -> Obj.t
1435
1436 (* Traditionally, an LR automaton is described by two tables, namely, an
1437 action table and a goto table. See, for instance, the Dragon book.
1438
1439 The action table is a two-dimensional matrix that maps a state and a
1440 lookahead token to an action. An action is one of: shift to a certain
1441 state, reduce a certain production, accept, or fail.
1442
1443 The goto table is a two-dimensional matrix that maps a state and a
1444 non-terminal symbol to either a state or undefined. By construction, this
1445 table is sparse: its undefined entries are never looked up. A compression
1446 technique is free to overlap them with other entries.
1447
1448 In Menhir, things are slightly different. If a state has a default
1449 reduction on token [#], then that reduction must be performed without
1450 consulting the lookahead token. As a result, we must first determine
1451 whether that is the case, before we can obtain a lookahead token and use it
1452 as an index in the action table.
1453
1454 Thus, Menhir's tables are as follows.
1455
1456 A one-dimensional default reduction table maps a state to either ``no
1457 default reduction'' (encoded as: 0) or ``by default, reduce prod''
1458 (encoded as: 1 + prod). The action table is looked up only when there
1459 is no default reduction. *)
1460
1461 val default_reduction: PackedIntArray.t
1462
1463 (* Menhir follows Dencker, Dürre and Heuft, who point out that, although the
1464 action table is not sparse by nature (i.e., the error entries are
1465 significant), it can be made sparse by first factoring out a binary error
1466 matrix, then replacing the error entries in the action table with undefined
1467 entries. Thus:
1468
1469 A two-dimensional error bitmap maps a state and a terminal to either
1470 ``fail'' (encoded as: 0) or ``do not fail'' (encoded as: 1). The action
1471 table, which is now sparse, is looked up only in the latter case. *)
1472
1473 (* The error bitmap is flattened into a one-dimensional table; its width is
1474 recorded so as to allow indexing. The table is then compressed via
1475 [PackedIntArray]. The bit width of the resulting packed array must be
1476 [1], so it is not explicitly recorded. *)
1477
1478 (* The error bitmap does not contain a column for the [#] pseudo-terminal.
1479 Thus, its width is [Terminal.n - 1]. We exploit the fact that the integer
1480 code assigned to [#] is greatest: the fact that the right-most column
1481 in the bitmap is missing does not affect the code for accessing it. *)
1482
1483 val error: int (* width of the bitmap *) * string (* second component of [PackedIntArray.t] *)
1484
1485 (* A two-dimensional action table maps a state and a terminal to one of
1486 ``shift to state s and discard the current token'' (encoded as: s | 10),
1487 ``shift to state s without discarding the current token'' (encoded as: s |
1488 11), or ``reduce prod'' (encoded as: prod | 01). *)
1489
1490 (* The action table is first compressed via [RowDisplacement], then packed
1491 via [PackedIntArray]. *)
1492
1493 (* Like the error bitmap, the action table does not contain a column for the
1494 [#] pseudo-terminal. *)
1495
1496 val action: PackedIntArray.t * PackedIntArray.t
1497
1498 (* A one-dimensional lhs table maps a production to its left-hand side (a
1499 non-terminal symbol). *)
1500
1501 val lhs: PackedIntArray.t
1502
1503 (* A two-dimensional goto table maps a state and a non-terminal symbol to
1504 either undefined (encoded as: 0) or a new state s (encoded as: 1 + s). *)
1505
1506 (* The goto table is first compressed via [RowDisplacement], then packed
1507 via [PackedIntArray]. *)
1508
1509 val goto: PackedIntArray.t * PackedIntArray.t
1510
1511 (* The number of start productions. A production [prod] is a start
1512 production if and only if [prod < start] holds. This is also the
1513 number of start symbols. A nonterminal symbol [nt] is a start
1514 symbol if and only if [nt < start] holds. *)
1515
1516 val start: int
1517
1518 (* A one-dimensional semantic action table maps productions to semantic
1519 actions. The calling convention for semantic actions is described in
1520 [EngineTypes]. This table contains ONLY NON-START PRODUCTIONS, so the
1521 indexing is off by [start]. Be careful. *)
1522
1523 val semantic_action: ((int, Obj.t, token) EngineTypes.env ->
1524 (int, Obj.t) EngineTypes.stack) array
1525
1526 (* The parser defines its own [Error] exception. This exception can be
1527 raised by semantic actions and caught by the engine, and raised by the
1528 engine towards the final user. *)
1529
1530 exception Error
1531
1532 (* The parser indicates whether to generate a trace. Generating a
1533 trace requires two extra tables, which respectively map a
1534 terminal symbol and a production to a string. *)
1535
1536 val trace: (string array * string array) option
1537
1538 end
1539 end
1540 module InspectionTableFormat : sig
1541 (******************************************************************************)
1542 (* *)
1543 (* Menhir *)
1544 (* *)
1545 (* François Pottier, Inria Paris *)
1546 (* Yann Régis-Gianas, PPS, Université Paris Diderot *)
1547 (* *)
1548 (* Copyright Inria. All rights reserved. This file is distributed under the *)
1549 (* terms of the GNU Library General Public License version 2, with a *)
1550 (* special exception on linking, as described in the file LICENSE. *)
1551 (* *)
1552 (******************************************************************************)
1553
1554 (* This signature defines the format of the tables that are produced (in
1555 addition to the tables described in [TableFormat]) when the command line
1556 switch [--inspection] is enabled. It is used as an argument to
1557 [InspectionTableInterpreter.Make]. *)
1558
1559 module type TABLES = sig
1560
1561 (* The types of symbols. *)
1562
1563 include IncrementalEngine.SYMBOLS
1564
1565 (* The type ['a lr1state] describes an LR(1) state. The generated parser defines
1566 it internally as [int]. *)
1567
1568 type 'a lr1state
1569
1570 (* Some of the tables that follow use encodings of (terminal and
1571 nonterminal) symbols as integers. So, we need functions that
1572 map the integer encoding of a symbol to its algebraic encoding. *)
1573
1574 val terminal: int -> xsymbol
1575 val nonterminal: int -> xsymbol
1576
1577 (* The left-hand side of every production already appears in the
1578 signature [TableFormat.TABLES], so we need not repeat it here. *)
1579
1580 (* The right-hand side of every production. This a linearized array
1581 of arrays of integers, whose [data] and [entry] components have
1582 been packed. The encoding of symbols as integers in described in
1583 [TableBackend]. *)
1584
1585 val rhs: PackedIntArray.t * PackedIntArray.t
1586
1587 (* A mapping of every (non-initial) state to its LR(0) core. *)
1588
1589 val lr0_core: PackedIntArray.t
1590
1591 (* A mapping of every LR(0) state to its set of LR(0) items. Each item is
1592 represented in its packed form (see [Item]) as an integer. Thus the
1593 mapping is an array of arrays of integers, which is linearized and
1594 packed, like [rhs]. *)
1595
1596 val lr0_items: PackedIntArray.t * PackedIntArray.t
1597
1598 (* A mapping of every LR(0) state to its incoming symbol, if it has one. *)
1599
1600 val lr0_incoming: PackedIntArray.t
1601
1602 (* A table that tells which non-terminal symbols are nullable. *)
1603
1604 val nullable: string
1605 (* This is a packed int array of bit width 1. It can be read
1606 using [PackedIntArray.get1]. *)
1607
1608 (* A two-table dimensional table, indexed by a nonterminal symbol and
1609 by a terminal symbol (other than [#]), encodes the FIRST sets. *)
1610
1611 val first: int (* width of the bitmap *) * string (* second component of [PackedIntArray.t] *)
1612
1613 end
1614
1615 end
1616 module InspectionTableInterpreter : sig
1617 (******************************************************************************)
1618 (* *)
1619 (* Menhir *)
1620 (* *)
1621 (* François Pottier, Inria Paris *)
1622 (* Yann Régis-Gianas, PPS, Université Paris Diderot *)
1623 (* *)
1624 (* Copyright Inria. All rights reserved. This file is distributed under the *)
1625 (* terms of the GNU Library General Public License version 2, with a *)
1626 (* special exception on linking, as described in the file LICENSE. *)
1627 (* *)
1628 (******************************************************************************)
1629
1630 (* This functor is invoked inside the generated parser, in [--table] mode. It
1631 produces no code! It simply constructs the types [symbol] and [xsymbol] on
1632 top of the generated types [terminal] and [nonterminal]. *)
1633
1634 module Symbols (T : sig
1635
1636 type 'a terminal
1637 type 'a nonterminal
1638
1639 end)
1640
1641 : IncrementalEngine.SYMBOLS
1642 with type 'a terminal := 'a T.terminal
1643 and type 'a nonterminal := 'a T.nonterminal
1644
1645 (* This functor is invoked inside the generated parser, in [--table] mode. It
1646 constructs the inspection API on top of the inspection tables described in
1647 [InspectionTableFormat]. *)
1648
1649 module Make
1650 (TT : TableFormat.TABLES)
1651 (IT : InspectionTableFormat.TABLES
1652 with type 'a lr1state = int)
1653 (ET : EngineTypes.TABLE
1654 with type terminal = int
1655 and type nonterminal = int
1656 and type semantic_value = Obj.t)
1657 (E : sig
1658 type 'a env = (ET.state, ET.semantic_value, ET.token) EngineTypes.env
1659 end)
1660
1661 : IncrementalEngine.INSPECTION
1662 with type 'a terminal := 'a IT.terminal
1663 and type 'a nonterminal := 'a IT.nonterminal
1664 and type 'a lr1state := 'a IT.lr1state
1665 and type production := int
1666 and type 'a env := 'a E.env
1667 end
1668 module TableInterpreter : sig
1669 (******************************************************************************)
1670 (* *)
1671 (* Menhir *)
1672 (* *)
1673 (* François Pottier, Inria Paris *)
1674 (* Yann Régis-Gianas, PPS, Université Paris Diderot *)
1675 (* *)
1676 (* Copyright Inria. All rights reserved. This file is distributed under the *)
1677 (* terms of the GNU Library General Public License version 2, with a *)
1678 (* special exception on linking, as described in the file LICENSE. *)
1679 (* *)
1680 (******************************************************************************)
1681
1682 (* This module provides a thin decoding layer for the generated tables, thus
1683 providing an API that is suitable for use by [Engine.Make]. It is part of
1684 [MenhirLib]. *)
1685
1686 (* The exception [Error] is declared within the generated parser. This is
1687 preferable to pre-declaring it here, as it ensures that each parser gets
1688 its own, distinct [Error] exception. This is consistent with the code-based
1689 back-end. *)
1690
1691 (* This functor is invoked by the generated parser. *)
1692
1693 module MakeEngineTable
1694 (T : TableFormat.TABLES)
1695 : EngineTypes.TABLE
1696 with type state = int
1697 and type token = T.token
1698 and type semantic_value = Obj.t
1699 and type production = int
1700 and type terminal = int
1701 and type nonterminal = int
1702 end
1703 module StaticVersion : sig
1704 val require_20181113 : unit
1705 end
1706