1 (**************************************************************************)
2 (* *)
3 (* OCaml *)
4 (* *)
5 (* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
6 (* *)
7 (* Copyright 1996 Institut National de Recherche en Informatique et *)
8 (* en Automatique. *)
9 (* *)
10 (* All rights reserved. This file is distributed under the terms of *)
11 (* the GNU Lesser General Public License version 2.1, with the *)
12 (* special exception on linking described in the file LICENSE. *)
13 (* *)
14 (**************************************************************************)
15
16 (* Compaction of an automata *)
17
18 open Lexgen
19
20 (* Code for memory actions *)
21 let code = Table.create 0
22
23 (* instructions are 2 8-bits integers, a 0xff byte means return *)
24
25 let emit_int i = Table.emit code i
26
27 let ins_mem i c = match i with
28 | Copy (dst, src) -> dst::src::c
29 | Set dst -> dst::0xff::c
30
31
32 let ins_tag i c = match i with
33 | SetTag (dst, src) -> dst::src::c
34 | EraseTag dst -> dst::0xff::c
35
36
37 let do_emit_code c =
38 let r = Table.size code in
39 List.iter emit_int c ;
40 emit_int 0xff ;
41 r
42
43 let memory = Hashtbl.create 101
44
45 let mem_emit_code c =
46 try Hashtbl.find memory c with
47 | Not_found ->
48 let r = do_emit_code c in
49 Hashtbl.add memory c r ;
50 r
51
52 (* Code address 0 is the empty code (ie do nothing) *)
53 let _ = mem_emit_code []
54
55 let emit_tag_code c = mem_emit_code (List.fold_right ins_tag c [])
56 and emit_mem_code c =mem_emit_code (List.fold_right ins_mem c [])
57
58 (*******************************************)
59 (* Compact the transition and check arrays *)
60 (*******************************************)
61
62
63 (* Determine the integer occurring most frequently in an array *)
64
65 let most_frequent_elt v =
66 let frequencies = Hashtbl.create 17 in
67 let max_freq = ref 0 in
68 let most_freq = ref (v.(0)) in
69 for i = 0 to Array.length v - 1 do
70 let e = v.(i) in
71 let r =
72 try
73 Hashtbl.find frequencies e
74 with Not_found ->
75 let r = ref 1 in Hashtbl.add frequencies e r; r in
76 incr r;
77 if !r > !max_freq then begin max_freq := !r; most_freq := e end
78 done;
79 !most_freq
80
81 (* Transform an array into a list of (position, non-default element) *)
82
83 let non_default_elements def v =
84 let rec nondef i =
85 if i >= Array.length v then [] else begin
86 let e = v.(i) in
87 if e = def then nondef(i+1) else (i, e) :: nondef(i+1)
88 end in
89 nondef 0
90
91
92 type t_compact =
93 {mutable c_trans : int array ;
94 mutable c_check : int array ;
95 mutable c_last_used : int ; }
96
97 let create_compact () =
98 { c_trans = Array.make 1024 0 ;
99 c_check = Array.make 1024 (-1) ;
100 c_last_used = 0 ; }
101
102 let reset_compact c =
103 c.c_trans <- Array.make 1024 0 ;
104 c.c_check <- Array.make 1024 (-1) ;
105 c.c_last_used <- 0
106
107 (* One compacted table for transitions, one other for memory actions *)
108 let trans = create_compact ()
109 and moves = create_compact ()
110
111
112 let grow_compact c =
113 let old_trans = c.c_trans
114 and old_check = c.c_check in
115 let n = Array.length old_trans in
116 c.c_trans <- Array.make (2*n) 0;
117 Array.blit old_trans 0 c.c_trans 0 c.c_last_used;
118 c.c_check <- Array.make (2*n) (-1);
119 Array.blit old_check 0 c.c_check 0 c.c_last_used
120
121 let do_pack state_num orig compact =
122 let default = most_frequent_elt orig in
123 let nondef = non_default_elements default orig in
124 let rec pack_from b =
125 while
126 b + 257 > Array.length compact.c_trans
127 do
128 grow_compact compact
129 done;
130 let rec try_pack = function
131 [] -> b
132 | (pos, _v) :: rem ->
133 if compact.c_check.(b + pos) = -1 then
134 try_pack rem
135 else pack_from (b+1) in
136 try_pack nondef in
137 let base = pack_from 0 in
138 List.iter
139 (fun (pos, v) ->
140 compact.c_trans.(base + pos) <- v;
141 compact.c_check.(base + pos) <- state_num)
142 nondef;
143 if base + 257 > compact.c_last_used then
144 compact.c_last_used <- base + 257;
145 (base, default)
146
147 let pack_moves state_num move_t =
148 let move_v = Array.make 257 0
149 and move_m = Array.make 257 0 in
150 for i = 0 to 256 do
151 let act,c = move_t.(i) in
152 move_v.(i) <- (match act with Backtrack -> -1 | Goto n -> n) ;
153 move_m.(i) <- emit_mem_code c
154 done ;
155 let pk_trans = do_pack state_num move_v trans
156 and pk_moves = do_pack state_num move_m moves in
157 pk_trans, pk_moves
158
159
160 (* Build the tables *)
161
162 type lex_tables =
163 { tbl_base: int array; (* Perform / Shift *)
164 tbl_backtrk: int array; (* No_remember / Remember *)
165 tbl_default: int array; (* Default transition *)
166 tbl_trans: int array; (* Transitions (compacted) *)
167 tbl_check: int array; (* Check (compacted) *)
168 (* code addresses are managed in a similar fashion as transitions *)
169 tbl_base_code : int array; (* code ptr / base for Shift *)
170 tbl_backtrk_code : int array; (* nothing / code when Remember *)
171 (* moves to execute before transitions (compacted) *)
172 tbl_default_code : int array;
173 tbl_trans_code : int array;
174 tbl_check_code : int array;
175 (* byte code itself *)
176 tbl_code: int array;}
177
178
179 let compact_tables state_v =
180 let n = Array.length state_v in
181 let base = Array.make n 0
182 and backtrk = Array.make n (-1)
183 and default = Array.make n 0
184 and base_code = Array.make n 0
185 and backtrk_code = Array.make n 0
186 and default_code = Array.make n 0 in
187 for i = 0 to n - 1 do
188 match state_v.(i) with
189 | Perform (n,c) ->
190 base.(i) <- -(n+1) ;
191 base_code.(i) <- emit_tag_code c
192 | Shift(trans, move) ->
193 begin match trans with
194 | No_remember -> ()
195 | Remember (n,c) ->
196 backtrk.(i) <- n ;
197 backtrk_code.(i) <- emit_tag_code c
198 end;
199 let (b_trans, d_trans),(b_moves,d_moves) = pack_moves i move in
200 base.(i) <- b_trans; default.(i) <- d_trans ;
201 base_code.(i) <- b_moves; default_code.(i) <- d_moves ;
202 done;
203 let code = Table.trim code in
204 let tables =
205 if Array.length code > 1 then
206 { tbl_base = base;
207 tbl_backtrk = backtrk;
208 tbl_default = default;
209 tbl_trans = Array.sub trans.c_trans 0 trans.c_last_used;
210 tbl_check = Array.sub trans.c_check 0 trans.c_last_used;
211 tbl_base_code = base_code ;
212 tbl_backtrk_code = backtrk_code;
213 tbl_default_code = default_code;
214 tbl_trans_code = Array.sub moves.c_trans 0 moves.c_last_used;
215 tbl_check_code = Array.sub moves.c_check 0 moves.c_last_used;
216 tbl_code = code}
217 else (* when no memory moves, do not emit related tables *)
218 { tbl_base = base;
219 tbl_backtrk = backtrk;
220 tbl_default = default;
221 tbl_trans = Array.sub trans.c_trans 0 trans.c_last_used;
222 tbl_check = Array.sub trans.c_check 0 trans.c_last_used;
223 tbl_base_code = [||] ;
224 tbl_backtrk_code = [||];
225 tbl_default_code = [||];
226 tbl_trans_code = [||];
227 tbl_check_code = [||];
228 tbl_code = [||]}
229 in
230 reset_compact trans ;
231 reset_compact moves ;
232 tables
233