1 /**************************************************************************/
2 /* */
3 /* OCaml */
4 /* */
5 /* Xavier Leroy, projet Cristal, INRIA Rocquencourt */
6 /* */
7 /* Copyright 1996 Institut National de Recherche en Informatique et */
8 /* en Automatique. */
9 /* */
10 /* All rights reserved. This file is distributed under the terms of */
11 /* the GNU Lesser General Public License version 2.1, with the */
12 /* special exception on linking described in the file LICENSE. */
13 /* */
14 /**************************************************************************/
15
16 /* Based on public-domain code from Berkeley Yacc */
17
18 #ifndef DEBUG
19 #define NDEBUG
20 #endif
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <errno.h>
25 #include <limits.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #define CAML_INTERNALS
30 #include "caml/config.h"
31 #include "caml/mlvalues.h"
32 #include "caml/osdeps.h"
33
34 #define caml_stat_strdup strdup
35
36 /* machine-dependent definitions */
37 /* the following definitions are for the Tahoe */
38 /* they might have to be changed for other machines */
39
40 /* MAXCHAR is the largest unsigned character value */
41 /* MAXSHORT is the largest value of a C short */
42 /* MINSHORT is the most negative value of a C short */
43 /* MAXTABLE is the maximum table size */
44 /* BITS_PER_WORD is the number of bits in a C unsigned */
45 /* WORDSIZE computes the number of words needed to */
46 /* store n bits */
47 /* BIT returns the value of the n-th bit starting */
48 /* from r (0-indexed) */
49 /* SETBIT sets the n-th bit starting from r */
50
51 #define MAXCHAR UCHAR_MAX
52 #define MAXSHORT SHRT_MAX
53 #define MINSHORT SHRT_MIN
54 #define MAXTABLE 32500
55
56 #define BITS_PER_WORD (8*sizeof(unsigned))
57 #define WORDSIZE(n) (((n)+(BITS_PER_WORD-1))/BITS_PER_WORD)
58 #define BIT(r, n) ((((r)[(n)/BITS_PER_WORD])>>((n)%BITS_PER_WORD))&1)
59 #define SETBIT(r, n) ((r)[(n)/BITS_PER_WORD]|=(1<<((n)%BITS_PER_WORD)))
60
61 /* character names */
62
63 #define NUL '\0' /* the null character */
64 #define NEWLINE '\n' /* line feed */
65 #define SP ' ' /* space */
66 #define BS '\b' /* backspace */
67 #define HT '\t' /* horizontal tab */
68 #define VT '\013' /* vertical tab */
69 #define CR '\r' /* carriage return */
70 #define FF '\f' /* form feed */
71 #define QUOTE '\'' /* single quote */
72 #define DOUBLE_QUOTE '\"' /* double quote */
73 #define BACKSLASH '\\' /* backslash */
74
75
76 /* defines for constructing filenames */
77
78 #define OUTPUT_SUFFIX T(".ml")
79 #define VERBOSE_SUFFIX T(".output")
80 #define INTERFACE_SUFFIX T(".mli")
81
82 /* keyword codes */
83
84 #define TOKEN 0
85 #define LEFT 1
86 #define RIGHT 2
87 #define NONASSOC 3
88 #define MARK 4
89 #define TEXT 5
90 #define TYPE 6
91 #define START 7
92
93 /* symbol classes */
94
95 #define UNKNOWN 0
96 #define TERM 1
97 #define NONTERM 2
98
99
100 /* the undefined value */
101
102 #define UNDEFINED (-1)
103
104
105 /* action codes */
106
107 #define SHIFT 1
108 #define REDUCE 2
109
110
111 /* character macros */
112
113 #define IS_IDENT(c) (isalnum(c) || (c) == '_' || (c) == '.' || (c) == '$')
114 #define IS_OCTAL(c) ((c) >= '0' && (c) <= '7')
115 #define NUMERIC_VALUE(c) ((c) - '0')
116
117
118 /* symbol macros */
119
120 #define ISTOKEN(s) ((s) < start_symbol)
121 #define ISVAR(s) ((s) >= start_symbol)
122
123
124 /* storage allocation macros */
125
126 #define CALLOC(k,n) (calloc((unsigned)(k),(unsigned)(n)))
127 #define FREE(x) (free((char*)(x)))
128 #define MALLOC(n) (malloc((unsigned)(n)))
129 #define NEW(t) ((t*)allocate(sizeof(t)))
130 #define NEW2(n,t) ((t*)allocate((unsigned)((n)*sizeof(t))))
131 #define REALLOC(p,n) (realloc((char*)(p),(unsigned)(n)))
132
133
134 /* the structure of a symbol table entry */
135
136 typedef struct bucket bucket;
137 struct bucket
138 {
139 struct bucket *link;
140 struct bucket *next;
141 char *name;
142 char *tag;
143 short value;
144 short index;
145 short prec;
146 char class;
147 char assoc;
148 char entry;
149 char true_token;
150 };
151
152 /* TABLE_SIZE is the number of entries in the symbol table. */
153 /* TABLE_SIZE must be a power of two. */
154
155 #define TABLE_SIZE 4096
156
157 /* the structure of the LR(0) state machine */
158
159 typedef struct core core;
160 struct core
161 {
162 struct core *next;
163 struct core *link;
164 short number;
165 short accessing_symbol;
166 short nitems;
167 short items[1];
168 };
169
170
171 /* the structure used to record shifts */
172
173 typedef struct shifts shifts;
174 struct shifts
175 {
176 struct shifts *next;
177 short number;
178 short nshifts;
179 short shift[1];
180 };
181
182
183 /* the structure used to store reductions */
184
185 typedef struct reductions reductions;
186 struct reductions
187 {
188 struct reductions *next;
189 short number;
190 short nreds;
191 short rules[1];
192 };
193
194
195 /* the structure used to represent parser actions */
196
197 typedef struct action action;
198 struct action
199 {
200 struct action *next;
201 short symbol;
202 short number;
203 short prec;
204 char action_code;
205 char assoc;
206 char suppressed;
207 };
208
209
210 /* global variables */
211
212 extern char lflag;
213 extern char rflag;
214 extern char tflag;
215 extern char vflag;
216 extern char qflag;
217 extern char sflag;
218 extern char eflag;
219 extern char big_endian;
220
221 /* myname should be UTF-8 encoded */
222 extern char *myname;
223 extern char *cptr;
224 extern char *line;
225 extern int lineno;
226 /* virtual_input_file_name should be UTF-8 encoded */
227 extern char *virtual_input_file_name;
228 extern int outline;
229
230 extern char_os *action_file_name;
231 extern char_os *entry_file_name;
232 extern char_os *code_file_name;
233 extern char_os *input_file_name;
234 extern char_os *output_file_name;
235 extern char_os *text_file_name;
236 extern char_os *verbose_file_name;
237 extern char_os *interface_file_name;
238
239 /* UTF-8 versions of code_file_name and input_file_name */
240 extern char *code_file_name_disp;
241 extern char *input_file_name_disp;
242
243 extern FILE *action_file;
244 extern FILE *entry_file;
245 extern FILE *code_file;
246 extern FILE *input_file;
247 extern FILE *output_file;
248 extern FILE *text_file;
249 extern FILE *verbose_file;
250 extern FILE *interface_file;
251
252 extern int nitems;
253 extern int nrules;
254 extern int ntotalrules;
255 extern int nsyms;
256 extern int ntokens;
257 extern int nvars;
258 extern int ntags;
259
260 #define line_format "# %d \"%s\"\n"
261
262 extern int start_symbol;
263 extern char **symbol_name;
264 extern short *symbol_value;
265 extern short *symbol_prec;
266 extern char *symbol_assoc;
267 extern char **symbol_tag;
268 extern char *symbol_true_token;
269
270 extern short *ritem;
271 extern short *rlhs;
272 extern short *rrhs;
273 extern short *rprec;
274 extern char *rassoc;
275
276 extern short **derives;
277 extern char *nullable;
278
279 extern bucket *first_symbol;
280 extern bucket *last_symbol;
281
282 extern int nstates;
283 extern core *first_state;
284 extern shifts *first_shift;
285 extern reductions *first_reduction;
286 extern short *accessing_symbol;
287 extern core **state_table;
288 extern shifts **shift_table;
289 extern reductions **reduction_table;
290 extern unsigned *LA;
291 extern short *LAruleno;
292 extern short *lookaheads;
293 extern short *goto_map;
294 extern short *from_state;
295 extern short *to_state;
296
297 extern action **parser;
298 extern int SRtotal;
299 extern int RRtotal;
300 extern short *SRconflicts;
301 extern short *RRconflicts;
302 extern short *defred;
303 extern short *rules_used;
304 extern short nunused;
305 extern short final_state;
306
307 /* global functions */
308
309 extern char *allocate(unsigned int n);
310 extern bucket *lookup(char *name);
311 extern bucket *make_bucket(char *name);
312 extern action *parse_actions(register int stateno);
313 extern action *get_shifts(int stateno);
314 extern action *add_reductions(int stateno, register action *actions);
315 extern action *add_reduce(register action *actions, register int ruleno, register int symbol);
316 extern void closure (short int *nucleus, int n);
317 extern void create_symbol_table (void);
318 extern void default_action_error (void) Noreturn;
319 extern void done (int k) Noreturn;
320 extern void entry_without_type (char *s) Noreturn;
321 extern void fatal (char *msg) Noreturn;
322 extern void finalize_closure (void);
323 extern void free_parser (void);
324 extern void free_symbol_table (void);
325 extern void free_symbols (void);
326 extern void illegal_character (char *c_cptr) Noreturn;
327 extern void illegal_token_ref (int i, char *name) Noreturn;
328 extern void lalr (void);
329 extern void lr0 (void);
330 extern void make_parser (void);
331 extern void no_grammar (void) Noreturn;
332 extern void no_space (void) Noreturn;
333 extern void open_error (char_os *filename) Noreturn;
334 extern void output (void);
335 extern void prec_redeclared (void);
336 extern void polymorphic_entry_point(char *s) Noreturn;
337 extern void forbidden_conflicts (void);
338 extern void reader (void);
339 extern void reflexive_transitive_closure (unsigned int *R, int n);
340 extern void reprec_warning (char *s);
341 extern void retyped_warning (char *s);
342 extern void revalued_warning (char *s);
343 extern void set_first_derives (void);
344 extern void syntax_error (int st_lineno, char *st_line, char *st_cptr) Noreturn, terminal_lhs (int s_lineno) Noreturn;
345 extern void terminal_start (char *s) Noreturn;
346 extern void tokenized_start (char *s) Noreturn;
347 extern void too_many_entries (void) Noreturn;
348 extern void undefined_goal (char *s);
349 extern void undefined_symbol (char *s);
350 extern void unexpected_EOF (void) Noreturn;
351 extern void unknown_rhs (int i) Noreturn;
352 extern void unterminated_action (int a_lineno, char *a_line, char *a_cptr) Noreturn;
353 extern void unterminated_comment (int c_lineno, char *c_line, char *c_cptr) Noreturn;
354 extern void unterminated_string (int s_lineno, char *s_line, char *s_cptr) Noreturn;
355 extern void unterminated_text (int t_lineno, char *t_line, char *t_cptr) Noreturn;
356 extern void used_reserved (char *s) Noreturn;
357 extern void verbose (void);
358 extern void write_section (char **section);
359