261 lines | 7354 chars
1 | (**************************************************************************) |
2 | (* *) |
3 | (* OCaml *) |
4 | (* *) |
5 | (* Xavier Leroy, projet Cristal, INRIA Rocquencourt *) |
6 | (* *) |
7 | (* Copyright 1996 Institut National de Recherche en Informatique et *) |
8 | (* en Automatique. *) |
9 | (* *) |
10 | (* All rights reserved. This file is distributed under the terms of *) |
11 | (* the GNU Lesser General Public License version 2.1, with the *) |
12 | (* special exception on linking described in the file LICENSE. *) |
13 | (* *) |
14 | (**************************************************************************) |
15 | |
16 | { |
17 | open Parsecmm |
18 | |
19 | type error = |
20 | Illegal_character |
21 | | Unterminated_comment |
22 | | Unterminated_string |
23 | |
24 | exception Error of error |
25 | |
26 | (* For nested comments *) |
27 | |
28 | let comment_depth = ref 0 |
29 | |
30 | (* The table of keywords *) |
31 | |
32 | let keyword_table = |
33 | Misc.create_hashtable 149 [ |
34 | "absf", ABSF; |
35 | "addr", ADDR; |
36 | "align", ALIGN; |
37 | "alloc", ALLOC; |
38 | "and", AND; |
39 | "app", APPLY; |
40 | "assign", ASSIGN; |
41 | "byte", BYTE; |
42 | "case", CASE; |
43 | "catch", CATCH; |
44 | "checkbound", CHECKBOUND; |
45 | "data", DATA; |
46 | "exit", EXIT; |
47 | "extcall", EXTCALL; |
48 | "float", FLOAT; |
49 | "float32", FLOAT32; |
50 | "float64", FLOAT64; |
51 | "floatofint", FLOATOFINT; |
52 | "function", FUNCTION; |
53 | "global", GLOBAL; |
54 | "half", HALF; |
55 | "if", IF; |
56 | "int", INT; |
57 | "int32", INT32; |
58 | "intoffloat", INTOFFLOAT; |
59 | "string", KSTRING; |
60 | "let", LET; |
61 | "load", LOAD; |
62 | "mod", MODI; |
63 | "mulh", MULH; |
64 | "or", OR; |
65 | "proj", PROJ; |
66 | "raise", RAISE Lambda.Raise_regular; |
67 | "reraise", RAISE Lambda.Raise_reraise; |
68 | "raise_notrace", RAISE Lambda.Raise_notrace; |
69 | "seq", SEQ; |
70 | "signed", SIGNED; |
71 | "skip", SKIP; |
72 | "store", STORE; |
73 | "switch", SWITCH; |
74 | "try", TRY; |
75 | "unit", UNIT; |
76 | "unsigned", UNSIGNED; |
77 | "val", VAL; |
78 | "while", WHILE; |
79 | "with", WITH; |
80 | "xor", XOR; |
81 | "addraref", ADDRAREF; |
82 | "intaref", INTAREF; |
83 | "floataref", FLOATAREF; |
84 | "addraset", ADDRASET; |
85 | "intaset", INTASET; |
86 | "floataset", FLOATASET |
87 | ] |
88 | |
89 | (* To buffer string literals *) |
90 | |
91 | let initial_string_buffer = Bytes.create 256 |
92 | let string_buff = ref initial_string_buffer |
93 | let string_index = ref 0 |
94 | |
95 | let reset_string_buffer () = |
96 | string_buff := initial_string_buffer; |
97 | string_index := 0 |
98 | |
99 | let store_string_char c = |
100 | if !string_index >= Bytes.length (!string_buff) then begin |
101 | let new_buff = Bytes.create (Bytes.length (!string_buff) * 2) in |
102 | Bytes.blit (!string_buff) 0 new_buff 0 (Bytes.length (!string_buff)); |
103 | string_buff := new_buff |
104 | end; |
105 | Bytes.unsafe_set (!string_buff) (!string_index) c; |
106 | incr string_index |
107 | |
108 | let get_stored_string () = |
109 | let s = Bytes.sub_string (!string_buff) 0 (!string_index) in |
110 | string_buff := initial_string_buffer; |
111 | s |
112 | |
113 | (* To translate escape sequences *) |
114 | |
115 | let char_for_backslash = function |
116 | 'n' -> '\010' |
117 | | 'r' -> '\013' |
118 | | 'b' -> '\008' |
119 | | 't' -> '\009' |
120 | | c -> c |
121 | |
122 | let char_for_decimal_code lexbuf i = |
123 | Char.chr(100 * (Char.code(Lexing.lexeme_char lexbuf i) - 48) + |
124 | 10 * (Char.code(Lexing.lexeme_char lexbuf (i+1)) - 48) + |
125 | (Char.code(Lexing.lexeme_char lexbuf (i+2)) - 48)) |
126 | |
127 | (* Error report *) |
128 | |
129 | let report_error lexbuf msg = |
130 | prerr_string "Lexical error around character "; |
131 | prerr_int (Lexing.lexeme_start lexbuf); |
132 | match msg with |
133 | Illegal_character -> |
134 | prerr_string ": illegal character" |
135 | | Unterminated_comment -> |
136 | prerr_string ": unterminated comment" |
137 | | Unterminated_string -> |
138 | prerr_string ": unterminated string" |
139 | |
140 | } |
141 | |
142 | let newline = ('\013'* '\010') |
143 | |
144 | rule token = parse |
145 | newline |
146 | { Lexing.new_line lexbuf; token lexbuf } |
147 | | [' ' '\009' '\012'] + |
148 | { token lexbuf } |
149 | | "+a" { ADDA } |
150 | | "+v" { ADDV } |
151 | | "+f" { ADDF } |
152 | | "+" { ADDI } |
153 | | ">>s" { ASR } |
154 | | ":" { COLON } |
155 | | "/f" { DIVF } |
156 | | "/" { DIVI } |
157 | | eof { EOF } |
158 | | "==a" { EQA } |
159 | | "==f" { EQF } |
160 | | "==" { EQI } |
161 | | ">=a" { GEA } |
162 | | ">=f" { GEF } |
163 | | ">=" { GEI } |
164 | | ">a" { GTA } |
165 | | ">f" { GTF } |
166 | | ">" { GTI } |
167 | | "[" { LBRACKET } |
168 | | "<=a" { LEA } |
169 | | "<=f" { LEF } |
170 | | "<=" { LEI } |
171 | | "(" { LPAREN } |
172 | | "<<" { LSL } |
173 | | ">>u" { LSR } |
174 | | "<a" { LTA } |
175 | | "<f" { LTF } |
176 | | "<" { LTI } |
177 | | "*f" { MULF } |
178 | | "*" { STAR } |
179 | | "!=a" { NEA } |
180 | | "!=f" { NEF } |
181 | | "!=" { NEI } |
182 | | "!>=f" { NGEF } |
183 | | "!>f" { NGTF } |
184 | | "!<=f" { NLEF } |
185 | | "!<f" { NLTF } |
186 | | "]" { RBRACKET } |
187 | | ")" { RPAREN } |
188 | | "-f" { SUBF } |
189 | | "-" { SUBI } |
190 | | '-'? (['0'-'9']+ | "0x" ['0'-'9' 'a'-'f' 'A'-'F']+ |
191 | | "0o" ['0'-'7']+ | "0b" ['0'-'1']+) |
192 | { INTCONST(int_of_string(Lexing.lexeme lexbuf)) } |
193 | | '-'? ['0'-'9']+ 'a' |
194 | { let s = Lexing.lexeme lexbuf in |
195 | POINTER(int_of_string(String.sub s 0 (String.length s - 1))) } |
196 | | '-'? ['0'-'9']+ ('.' ['0'-'9']*)? (['e' 'E'] ['+' '-']? ['0'-'9']+)? |
197 | { FLOATCONST(Lexing.lexeme lexbuf) } |
198 | | ['A'-'Z' 'a'-'z' '\223'-'\246' '\248'-'\255' ] |
199 | (['A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255' |
200 | '\'' '0'-'9' ]) * '/'? (['0'-'9'] *) |
201 | { let s = Lexing.lexeme lexbuf in |
202 | try |
203 | Hashtbl.find keyword_table s |
204 | with Not_found -> |
205 | IDENT s } |
206 | | "\"" |
207 | { reset_string_buffer(); |
208 | string lexbuf; |
209 | STRING (get_stored_string()) } |
210 | | "(*" |
211 | { comment_depth := 1; |
212 | comment lexbuf; |
213 | token lexbuf } |
214 | | '{' ['A' - 'Z' 'a'-'z' '/' ',' '.' '-' '_' ' ''0'-'9']+ |
215 | ':' [ '0'-'9' ]+ ',' ['0'-'9' ]+ '-' ['0'-'9' ]+ '}' |
216 | { |
217 | let loc_s = Lexing.lexeme lexbuf in |
218 | let pos_fname, pos_lnum, start, end_ = |
219 | Scanf.sscanf loc_s "{%s@:%i,%i-%i}" (fun file line start end_ -> |
220 | (file, line, start, end_)) |
221 | in |
222 | let loc_start = |
223 | Lexing.{ pos_fname; pos_lnum; pos_bol = 0; pos_cnum = start } |
224 | in |
225 | let loc_end = |
226 | Lexing.{ pos_fname; pos_lnum; pos_bol = 0; pos_cnum = end_ } |
227 | in |
228 | let location = Location.{ loc_start; loc_end; loc_ghost = false } in |
229 | LOCATION location } |
230 | | _ { raise(Error(Illegal_character)) } |
231 | |
232 | and comment = parse |
233 | "(*" |
234 | { comment_depth := succ !comment_depth; comment lexbuf } |
235 | | "*)" |
236 | { comment_depth := pred !comment_depth; |
237 | if !comment_depth > 0 then comment lexbuf } |
238 | | eof |
239 | { raise (Error(Unterminated_comment)) } |
240 | | newline |
241 | { Lexing.new_line lexbuf; comment lexbuf } |
242 | | _ |
243 | { comment lexbuf } |
244 | |
245 | and string = parse |
246 | '"' |
247 | { () } |
248 | | '\\' [' ' '\010' '\013' '\009' '\026' '\012'] + |
249 | { string lexbuf } |
250 | | '\\' ['\\' '"' 'n' 't' 'b' 'r'] |
251 | { store_string_char(char_for_backslash(Lexing.lexeme_char lexbuf 1)); |
252 | string lexbuf } |
253 | | '\\' ['0'-'9'] ['0'-'9'] ['0'-'9'] |
254 | { store_string_char(char_for_decimal_code lexbuf 1); |
255 | string lexbuf } |
256 | | eof |
257 | { raise (Error(Unterminated_string)) } |
258 | | _ |
259 | { store_string_char(Lexing.lexeme_char lexbuf 0); |
260 | string lexbuf } |
261 |