1 (**************************************************************************)
2 (* *)
3 (* OCaml *)
4 (* *)
5 (* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
6 (* *)
7 (* Copyright 1996 Institut National de Recherche en Informatique et *)
8 (* en Automatique. *)
9 (* *)
10 (* All rights reserved. This file is distributed under the terms of *)
11 (* the GNU Lesser General Public License version 2.1, with the *)
12 (* special exception on linking described in the file LICENSE. *)
13 (* *)
14 (**************************************************************************)
15
16 (* Output the DFA tables and its entry points *)
17
18 open Printf
19 open Lexgen
20 open Common
21
22 type ctx = {
23 oc: out_channel;
24 has_refill: bool;
25 goto_state: (ctx -> string -> int -> unit);
26 last_action: int option;
27 }
28
29 let pr ctx = fprintf ctx.oc
30
31 let output_auto_defs ctx =
32 if ctx.has_refill then begin
33 pr ctx "\n";
34 pr ctx "let rec __ocaml_lex_refill_buf lexbuf _buf _len _curr _last \
35 _last_action state k =\n";
36 pr ctx " if lexbuf.Lexing.lex_eof_reached then\n";
37 pr ctx " state lexbuf _last_action _buf _len _curr _last k 256\n";
38 pr ctx " else begin\n";
39 pr ctx " lexbuf.Lexing.lex_curr_pos <- _curr;\n";
40 pr ctx " lexbuf.Lexing.lex_last_pos <- _last;\n";
41 pr ctx " __ocaml_lex_refill\n";
42 pr ctx " (fun lexbuf ->\n";
43 pr ctx " let _curr = lexbuf.Lexing.lex_curr_pos in\n";
44 pr ctx " let _last = lexbuf.Lexing.lex_last_pos in\n";
45 pr ctx " let _len = lexbuf.Lexing.lex_buffer_len in\n";
46 pr ctx " let _buf = lexbuf.Lexing.lex_buffer in\n";
47 pr ctx " if _curr < _len then\n";
48 pr ctx " state lexbuf _last_action _buf _len (_curr + 1) \
49 _last k\n";
50 pr ctx " (Char.code (Bytes.unsafe_get _buf _curr))\n";
51 pr ctx " else\n";
52 pr ctx " __ocaml_lex_refill_buf lexbuf _buf _len _curr _last \
53 _last_action\n";
54 pr ctx " state k\n";
55 pr ctx " )\n";
56 pr ctx " lexbuf\n";
57 pr ctx " end\n";
58 pr ctx "\n";
59 end else begin
60 pr ctx "\n";
61 pr ctx "let rec __ocaml_lex_refill_buf lexbuf _buf _len _curr _last =\n";
62 pr ctx " if lexbuf.Lexing.lex_eof_reached then\n";
63 pr ctx " 256, _buf, _len, _curr, _last\n";
64 pr ctx " else begin\n";
65 pr ctx " lexbuf.Lexing.lex_curr_pos <- _curr;\n";
66 pr ctx " lexbuf.Lexing.lex_last_pos <- _last;\n";
67 pr ctx " lexbuf.Lexing.refill_buff lexbuf;\n";
68 pr ctx " let _curr = lexbuf.Lexing.lex_curr_pos in\n";
69 pr ctx " let _last = lexbuf.Lexing.lex_last_pos in\n";
70 pr ctx " let _len = lexbuf.Lexing.lex_buffer_len in\n";
71 pr ctx " let _buf = lexbuf.Lexing.lex_buffer in\n";
72 pr ctx " if _curr < _len then\n";
73 pr ctx " Char.code (Bytes.unsafe_get _buf _curr), _buf, _len, \
74 (_curr + 1), _last\n";
75 pr ctx " else\n";
76 pr ctx " __ocaml_lex_refill_buf lexbuf _buf _len _curr _last\n";
77 pr ctx " end\n";
78 pr ctx "\n";
79 end
80
81 let output_memory_actions pref oc = function
82 | [] -> ()
83 | mvs ->
84 output_string oc pref;
85 output_string oc "(* " ;
86 fprintf oc "L=%d " (List.length mvs) ;
87 List.iter
88 (fun mv -> match mv with
89 | Copy (tgt, src) ->
90 fprintf oc "[%d] <- [%d] ;" tgt src
91 | Set tgt ->
92 fprintf oc "[%d] <- p ; " tgt)
93 mvs ;
94 output_string oc " *)\n" ;
95 List.iter
96 (fun mv -> match mv with
97 | Copy (tgt, src) ->
98 fprintf oc
99 "%s%a <- %a ;\n"
100 pref output_mem_access tgt output_mem_access src
101 | Set tgt ->
102 fprintf oc "%s%a <- _curr;\n"
103 pref output_mem_access tgt)
104 mvs
105
106 let output_pats ctx = function
107 | [x] -> pr ctx "| %d" x
108 | pats -> List.iter (fun p -> pr ctx "|%d" p) pats
109
110 let last_action ctx =
111 match ctx.last_action with
112 | None -> "_last_action"
113 | Some i -> Printf.sprintf "%i (* = last_action *)" i
114
115 let output_action ctx pref mems r =
116 output_memory_actions pref ctx.oc mems;
117 match r with
118 | Backtrack ->
119 pr ctx "%slet _curr = _last in\n\
120 %slexbuf.Lexing.lex_curr_pos <- _curr;\n\
121 %slexbuf.Lexing.lex_last_pos <- _last;\n"
122 pref pref pref;
123 if ctx.has_refill then
124 pr ctx "%sk lexbuf %s\n" pref (last_action ctx)
125 else
126 pr ctx "%s%s\n" pref (last_action ctx)
127 | Goto n ->
128 ctx.goto_state ctx pref n
129
130 let output_pat ctx i =
131 if i >= 256 then
132 pr ctx "|eof"
133 else
134 pr ctx "|'%s'" (Char.escaped (Char.chr i))
135
136 let output_clause ctx pref pats mems r =
137 pr ctx "%s(* " pref;
138 List.iter (output_pat ctx) pats;
139 pr ctx " *)\n%s" pref;
140 output_pats ctx pats;
141 pr ctx " ->\n";
142 output_action ctx (" "^pref) mems r
143
144 let output_default_clause ctx pref mems r =
145 pr ctx "%s| _ ->\n" pref;
146 output_action ctx (" "^pref) mems r
147
148 let output_moves ctx pref moves =
149 let t = Hashtbl.create 17 in
150 let add_move i (m,mems) =
151 let mems,r = try Hashtbl.find t m with Not_found -> mems,[] in
152 Hashtbl.replace t m (mems,(i::r)) in
153
154 for i = 0 to 256 do
155 add_move i moves.(i)
156 done ;
157
158 let most_frequent = ref Backtrack
159 and most_mems = ref []
160 and size = ref 0 in
161 Hashtbl.iter
162 (fun m (mems,pats) ->
163 let size_m = List.length pats in
164 if size_m > !size then begin
165 most_frequent := m ;
166 most_mems := mems ;
167 size := size_m
168 end)
169 t ;
170 Hashtbl.iter
171 (fun m (mems,pats) ->
172 if m <> !most_frequent then
173 output_clause ctx pref (List.rev pats) mems m)
174 t ;
175 output_default_clause ctx pref !most_mems !most_frequent
176
177
178 let output_tag_actions pref ctx mvs =
179 pr ctx "%s(*" pref;
180 List.iter
181 (fun i -> match i with
182 | SetTag (t,m) -> pr ctx " t%d <- [%d] ;" t m
183 | EraseTag t -> pr ctx " t%d <- -1 ;" t)
184 mvs ;
185 pr ctx " *)\n" ;
186 List.iter
187 (fun i -> match i with
188 | SetTag (t,m) ->
189 pr ctx "%s%a <- %a ;\n"
190 pref output_mem_access t output_mem_access m
191 | EraseTag t ->
192 pr ctx "%s%a <- -1 ;\n"
193 pref output_mem_access t)
194 mvs
195
196 let output_trans_body pref ctx = function
197 | Perform (n,mvs) ->
198 output_tag_actions pref ctx mvs ;
199 pr ctx "%slexbuf.Lexing.lex_curr_pos <- _curr;\n" pref;
200 pr ctx "%slexbuf.Lexing.lex_last_pos <- _last;\n" pref;
201 pr ctx "%s%s%d\n" pref (if ctx.has_refill then "k lexbuf " else "") n
202 | Shift (trans, move) ->
203 let ctx =
204 match trans with
205 | Remember (n,mvs) ->
206 output_tag_actions pref ctx mvs ;
207 pr ctx "%slet _last = _curr in\n" pref;
208 begin match ctx.last_action with
209 | Some i when i = n ->
210 pr ctx "%s(* let _last_action = %d in*)\n" pref n;
211 ctx
212 | _ ->
213 pr ctx "%slet _last_action = %d in\n" pref n;
214 {ctx with last_action = Some n}
215 end
216 | No_remember ->
217 ctx
218 in
219 if ctx.has_refill then begin
220 (* TODO: bind this 'state' function at toplevel instead *)
221 pr ctx
222 "%slet state lexbuf _last_action _buf _len _curr _last k = function\n"
223 pref;
224 output_moves ctx pref move;
225 pr ctx "%sin\n\
226 %sif _curr >= _len then\n\
227 %s __ocaml_lex_refill_buf lexbuf _buf _len _curr _last \
228 _last_action state k\n\
229 %selse\n\
230 %s state lexbuf _last_action _buf _len (_curr + 1) _last k\n\
231 %s (Char.code (Bytes.unsafe_get _buf _curr))\n"
232 pref pref pref pref pref pref
233 end
234 else begin
235 pr ctx "%slet next_char, _buf, _len, _curr, _last =\n\
236 %s if _curr >= _len then\n\
237 %s __ocaml_lex_refill_buf lexbuf _buf _len _curr _last\n\
238 %s else\n\
239 %s Char.code (Bytes.unsafe_get _buf _curr),\n\
240 %s _buf, _len, (_curr + 1), _last\n\
241 %sin\n\
242 %sbegin match next_char with\n"
243 pref pref pref pref pref pref pref pref;
244 output_moves ctx (pref ^ " ") move;
245 pr ctx "%send\n" pref
246 end
247
248 let output_automata ctx auto inline =
249 output_auto_defs ctx;
250 let n = Array.length auto in
251 let first = ref true in
252 for i = 0 to n-1 do
253 if not inline.(i) then begin
254 pr ctx
255 "%s __ocaml_lex_state%d lexbuf _last_action _buf _len _curr _last %s=\n"
256 (if !first then "let rec" else "\nand")
257 i
258 (if ctx.has_refill then "k " else "");
259 output_trans_body " " ctx auto.(i);
260 first := false;
261 end
262 done;
263 pr ctx "\n\n"
264
265
266 (* Output the entries *)
267
268 let output_init ctx pref e init_moves =
269 if e.auto_mem_size > 0 then
270 pr ctx "%slexbuf.Lexing.lex_mem <- Array.make %d (-1);\n"
271 pref e.auto_mem_size;
272 pr ctx "%slet _curr = lexbuf.Lexing.lex_curr_pos in\n" pref;
273 pr ctx "%slet _last = _curr in\n" pref;
274 pr ctx "%slet _len = lexbuf.Lexing.lex_buffer_len in\n" pref;
275 pr ctx "%slet _buf = lexbuf.Lexing.lex_buffer in\n" pref;
276 pr ctx "%slet _last_action = -1 in\n" pref;
277 pr ctx "%slexbuf.Lexing.lex_start_pos <- _curr;\n" pref;
278 output_memory_actions pref ctx.oc init_moves
279
280 let output_rules ic ctx pref tr e =
281 pr ctx "%sbegin\n" pref;
282 pr ctx "%s let _curr_p = lexbuf.Lexing.lex_curr_p in\n" pref;
283 pr ctx "%s if _curr_p != Lexing.dummy_pos then begin\n" pref;
284 pr ctx "%s lexbuf.Lexing.lex_start_p <- _curr_p;\n" pref;
285 pr ctx "%s lexbuf.Lexing.lex_curr_p <-\n" pref;
286 pr ctx "%s {_curr_p with Lexing.pos_cnum =\n" pref;
287 pr ctx "%s lexbuf.Lexing.lex_abs_pos+lexbuf.Lexing.lex_curr_pos}\n"
288 pref;
289 pr ctx "%s end\n" pref;
290 pr ctx "%send;\n" pref;
291 pr ctx "%smatch __ocaml_lex_result with\n" pref;
292 List.iter
293 (fun (num, env, loc) ->
294 pr ctx "%s| %d ->\n" pref num;
295 output_env ic ctx.oc tr env;
296 copy_chunk ic ctx.oc tr loc true;
297 pr ctx "\n")
298 e.auto_actions;
299 pr ctx "%s| _ -> raise (Failure \"lexing: empty token\")\n" pref
300
301 let output_entry ic ctx tr e =
302 let init_num, init_moves = e.auto_initial_state in
303 pr ctx "%s %alexbuf =\n" e.auto_name output_args e.auto_args;
304
305 if ctx.has_refill then begin
306 pr ctx " let k lexbuf __ocaml_lex_result =\n";
307 output_rules ic ctx " " tr e;
308 pr ctx " in\n";
309 output_init ctx " " e init_moves;
310 ctx.goto_state ctx " " init_num
311 end else begin
312 pr ctx " let __ocaml_lex_result =\n";
313 output_init ctx " " e init_moves;
314 ctx.goto_state ctx " " init_num;
315 pr ctx " in\n";
316 output_rules ic ctx " " tr e
317 end;
318 pr ctx "\n\n"
319
320
321 (* Determine which states to inline *)
322
323 let choose_inlining entry_points transitions =
324 let counters = Array.make (Array.length transitions) 0 in
325 let count i = counters.(i) <- counters.(i) + 1 in
326 List.iter (fun e -> count (fst e.auto_initial_state)) entry_points;
327 Array.iter
328 (function
329 | Shift (_, a) ->
330 let tbl = Hashtbl.create 8 in
331 Array.iter
332 (function
333 | (Goto i, _) when not (Hashtbl.mem tbl i) ->
334 Hashtbl.add tbl i (); count i
335 | _ -> ()
336 )
337 a
338 | Perform _ -> ()
339 )
340 transitions;
341 Array.mapi
342 (fun i -> function
343 | Perform _ -> true
344 | Shift _ -> counters.(i) = 1
345 )
346 transitions
347
348 let goto_state inline transitions ctx pref n =
349 if inline.(n) then
350 output_trans_body pref ctx transitions.(n)
351 else
352 pr ctx "%s__ocaml_lex_state%d lexbuf %s _buf _len _curr _last%s\n"
353 pref n
354 (last_action ctx)
355 (if ctx.has_refill then " k" else "")
356
357 (* Main output function *)
358
359 let output_lexdef ic oc tr header rh
360 entry_points transitions trailer =
361
362 copy_chunk ic oc tr header false;
363 let has_refill = output_refill_handler ic oc tr rh in
364 let inline = choose_inlining entry_points transitions in
365 let ctx =
366 {
367 has_refill;
368 oc;
369 goto_state = goto_state inline transitions;
370 last_action = None;
371 }
372 in
373 output_automata ctx transitions inline;
374 begin match entry_points with
375 [] -> ()
376 | entry1 :: entries ->
377 output_string oc "let rec ";
378 output_entry ic ctx tr entry1;
379 List.iter
380 (fun e -> output_string oc "and ";
381 output_entry ic ctx tr e)
382 entries;
383 output_string oc ";;\n\n";
384 end;
385 copy_chunk ic oc tr trailer false
386