1 (**************************************************************************)
2 (* *)
3 (* OCaml *)
4 (* *)
5 (* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
6 (* *)
7 (* Copyright 1996 Institut National de Recherche en Informatique et *)
8 (* en Automatique. *)
9 (* *)
10 (* All rights reserved. This file is distributed under the terms of *)
11 (* the GNU Lesser General Public License version 2.1, with the *)
12 (* special exception on linking described in the file LICENSE. *)
13 (* *)
14 (**************************************************************************)
15
16 {
17 open Parsecmm
18
19 type error =
20 Illegal_character
21 | Unterminated_comment
22 | Unterminated_string
23
24 exception Error of error
25
26 (* For nested comments *)
27
28 let comment_depth = ref 0
29
30 (* The table of keywords *)
31
32 let keyword_table =
33 Misc.create_hashtable 149 [
34 "absf", ABSF;
35 "addr", ADDR;
36 "align", ALIGN;
37 "alloc", ALLOC;
38 "and", AND;
39 "app", APPLY;
40 "assign", ASSIGN;
41 "byte", BYTE;
42 "case", CASE;
43 "catch", CATCH;
44 "checkbound", CHECKBOUND;
45 "data", DATA;
46 "exit", EXIT;
47 "extcall", EXTCALL;
48 "float", FLOAT;
49 "float32", FLOAT32;
50 "float64", FLOAT64;
51 "floatofint", FLOATOFINT;
52 "function", FUNCTION;
53 "global", GLOBAL;
54 "half", HALF;
55 "if", IF;
56 "int", INT;
57 "int32", INT32;
58 "intoffloat", INTOFFLOAT;
59 "string", KSTRING;
60 "let", LET;
61 "load", LOAD;
62 "mod", MODI;
63 "mulh", MULH;
64 "or", OR;
65 "proj", PROJ;
66 "raise", RAISE Lambda.Raise_regular;
67 "reraise", RAISE Lambda.Raise_reraise;
68 "raise_notrace", RAISE Lambda.Raise_notrace;
69 "seq", SEQ;
70 "signed", SIGNED;
71 "skip", SKIP;
72 "store", STORE;
73 "switch", SWITCH;
74 "try", TRY;
75 "unit", UNIT;
76 "unsigned", UNSIGNED;
77 "val", VAL;
78 "while", WHILE;
79 "with", WITH;
80 "xor", XOR;
81 "addraref", ADDRAREF;
82 "intaref", INTAREF;
83 "floataref", FLOATAREF;
84 "addraset", ADDRASET;
85 "intaset", INTASET;
86 "floataset", FLOATASET
87 ]
88
89 (* To buffer string literals *)
90
91 let initial_string_buffer = Bytes.create 256
92 let string_buff = ref initial_string_buffer
93 let string_index = ref 0
94
95 let reset_string_buffer () =
96 string_buff := initial_string_buffer;
97 string_index := 0
98
99 let store_string_char c =
100 if !string_index >= Bytes.length (!string_buff) then begin
101 let new_buff = Bytes.create (Bytes.length (!string_buff) * 2) in
102 Bytes.blit (!string_buff) 0 new_buff 0 (Bytes.length (!string_buff));
103 string_buff := new_buff
104 end;
105 Bytes.unsafe_set (!string_buff) (!string_index) c;
106 incr string_index
107
108 let get_stored_string () =
109 let s = Bytes.sub_string (!string_buff) 0 (!string_index) in
110 string_buff := initial_string_buffer;
111 s
112
113 (* To translate escape sequences *)
114
115 let char_for_backslash = function
116 'n' -> '\010'
117 | 'r' -> '\013'
118 | 'b' -> '\008'
119 | 't' -> '\009'
120 | c -> c
121
122 let char_for_decimal_code lexbuf i =
123 Char.chr(100 * (Char.code(Lexing.lexeme_char lexbuf i) - 48) +
124 10 * (Char.code(Lexing.lexeme_char lexbuf (i+1)) - 48) +
125 (Char.code(Lexing.lexeme_char lexbuf (i+2)) - 48))
126
127 (* Error report *)
128
129 let report_error lexbuf msg =
130 prerr_string "Lexical error around character ";
131 prerr_int (Lexing.lexeme_start lexbuf);
132 match msg with
133 Illegal_character ->
134 prerr_string ": illegal character"
135 | Unterminated_comment ->
136 prerr_string ": unterminated comment"
137 | Unterminated_string ->
138 prerr_string ": unterminated string"
139
140 }
141
142 let newline = ('\013'* '\010')
143
144 rule token = parse
145 newline
146 { Lexing.new_line lexbuf; token lexbuf }
147 | [' ' '\009' '\012'] +
148 { token lexbuf }
149 | "+a" { ADDA }
150 | "+v" { ADDV }
151 | "+f" { ADDF }
152 | "+" { ADDI }
153 | ">>s" { ASR }
154 | ":" { COLON }
155 | "/f" { DIVF }
156 | "/" { DIVI }
157 | eof { EOF }
158 | "==a" { EQA }
159 | "==f" { EQF }
160 | "==" { EQI }
161 | ">=a" { GEA }
162 | ">=f" { GEF }
163 | ">=" { GEI }
164 | ">a" { GTA }
165 | ">f" { GTF }
166 | ">" { GTI }
167 | "[" { LBRACKET }
168 | "<=a" { LEA }
169 | "<=f" { LEF }
170 | "<=" { LEI }
171 | "(" { LPAREN }
172 | "<<" { LSL }
173 | ">>u" { LSR }
174 | "<a" { LTA }
175 | "<f" { LTF }
176 | "<" { LTI }
177 | "*f" { MULF }
178 | "*" { STAR }
179 | "!=a" { NEA }
180 | "!=f" { NEF }
181 | "!=" { NEI }
182 | "!>=f" { NGEF }
183 | "!>f" { NGTF }
184 | "!<=f" { NLEF }
185 | "!<f" { NLTF }
186 | "]" { RBRACKET }
187 | ")" { RPAREN }
188 | "-f" { SUBF }
189 | "-" { SUBI }
190 | '-'? (['0'-'9']+ | "0x" ['0'-'9' 'a'-'f' 'A'-'F']+
191 | "0o" ['0'-'7']+ | "0b" ['0'-'1']+)
192 { INTCONST(int_of_string(Lexing.lexeme lexbuf)) }
193 | '-'? ['0'-'9']+ 'a'
194 { let s = Lexing.lexeme lexbuf in
195 POINTER(int_of_string(String.sub s 0 (String.length s - 1))) }
196 | '-'? ['0'-'9']+ ('.' ['0'-'9']*)? (['e' 'E'] ['+' '-']? ['0'-'9']+)?
197 { FLOATCONST(Lexing.lexeme lexbuf) }
198 | ['A'-'Z' 'a'-'z' '\223'-'\246' '\248'-'\255' ]
199 (['A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255'
200 '\'' '0'-'9' ]) * '/'? (['0'-'9'] *)
201 { let s = Lexing.lexeme lexbuf in
202 try
203 Hashtbl.find keyword_table s
204 with Not_found ->
205 IDENT s }
206 | "\""
207 { reset_string_buffer();
208 string lexbuf;
209 STRING (get_stored_string()) }
210 | "(*"
211 { comment_depth := 1;
212 comment lexbuf;
213 token lexbuf }
214 | '{' ['A' - 'Z' 'a'-'z' '/' ',' '.' '-' '_' ' ''0'-'9']+
215 ':' [ '0'-'9' ]+ ',' ['0'-'9' ]+ '-' ['0'-'9' ]+ '}'
216 {
217 let loc_s = Lexing.lexeme lexbuf in
218 let pos_fname, pos_lnum, start, end_ =
219 Scanf.sscanf loc_s "{%s@:%i,%i-%i}" (fun file line start end_ ->
220 (file, line, start, end_))
221 in
222 let loc_start =
223 Lexing.{ pos_fname; pos_lnum; pos_bol = 0; pos_cnum = start }
224 in
225 let loc_end =
226 Lexing.{ pos_fname; pos_lnum; pos_bol = 0; pos_cnum = end_ }
227 in
228 let location = Location.{ loc_start; loc_end; loc_ghost = false } in
229 LOCATION location }
230 | _ { raise(Error(Illegal_character)) }
231
232 and comment = parse
233 "(*"
234 { comment_depth := succ !comment_depth; comment lexbuf }
235 | "*)"
236 { comment_depth := pred !comment_depth;
237 if !comment_depth > 0 then comment lexbuf }
238 | eof
239 { raise (Error(Unterminated_comment)) }
240 | newline
241 { Lexing.new_line lexbuf; comment lexbuf }
242 | _
243 { comment lexbuf }
244
245 and string = parse
246 '"'
247 { () }
248 | '\\' [' ' '\010' '\013' '\009' '\026' '\012'] +
249 { string lexbuf }
250 | '\\' ['\\' '"' 'n' 't' 'b' 'r']
251 { store_string_char(char_for_backslash(Lexing.lexeme_char lexbuf 1));
252 string lexbuf }
253 | '\\' ['0'-'9'] ['0'-'9'] ['0'-'9']
254 { store_string_char(char_for_decimal_code lexbuf 1);
255 string lexbuf }
256 | eof
257 { raise (Error(Unterminated_string)) }
258 | _
259 { store_string_char(Lexing.lexeme_char lexbuf 0);
260 string lexbuf }
261