1 # 2 "asmcomp/amd64/proc.ml"
2 (**************************************************************************)
3 (* *)
4 (* OCaml *)
5 (* *)
6 (* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
7 (* *)
8 (* Copyright 2000 Institut National de Recherche en Informatique et *)
9 (* en Automatique. *)
10 (* *)
11 (* All rights reserved. This file is distributed under the terms of *)
12 (* the GNU Lesser General Public License version 2.1, with the *)
13 (* special exception on linking described in the file LICENSE. *)
14 (* *)
15 (**************************************************************************)
16
17 (* Description of the AMD64 processor *)
18
19 open Misc
20 open Arch
21 open Cmm
22 open Reg
23 open Mach
24
25 let fp = Config.with_frame_pointers
26
27 (* Which ABI to use *)
28
29 let win64 = Arch.win64
30
31 (* Registers available for register allocation *)
32
33 (* Register map:
34 rax 0
35 rbx 1
36 rdi 2
37 rsi 3
38 rdx 4
39 rcx 5
40 r8 6
41 r9 7
42 r12 8
43 r13 9
44 r10 10
45 r11 11
46 rbp 12
47 r14 domain state pointer
48 r15 allocation pointer
49
50 xmm0 - xmm15 100 - 115 *)
51
52 (* Conventions:
53 rax - r13: OCaml function arguments
54 rax: OCaml and C function results
55 xmm0 - xmm9: OCaml function arguments
56 xmm0: OCaml and C function results
57 Under Unix:
58 rdi, rsi, rdx, rcx, r8, r9: C function arguments
59 xmm0 - xmm7: C function arguments
60 rbx, rbp, r12-r15 are preserved by C
61 xmm registers are not preserved by C
62 Under Win64:
63 rcx, rdx, r8, r9: C function arguments
64 xmm0 - xmm3: C function arguments
65 rbx, rbp, rsi, rdi r12-r15 are preserved by C
66 xmm6-xmm15 are preserved by C
67 Note (PR#5707, GPR#1304): PLT stubs (used for dynamic resolution of symbols
68 on Unix-like platforms) may clobber any register except those used for:
69 1. C parameter passing;
70 2. C return values;
71 3. C callee-saved registers.
72 This translates to the set { r10, r11 }. These registers hence cannot
73 be used for OCaml parameter passing and must also be marked as
74 destroyed across [Ialloc] (otherwise a call to caml_call_gc@PLT might
75 clobber these two registers before the assembly stub saves them into
76 the GC regs block).
77 *)
78
79 let max_arguments_for_tailcalls = 10
80
81 let int_reg_name =
82 match Config.ccomp_type with
83 | "msvc" ->
84 [| "rax"; "rbx"; "rdi"; "rsi"; "rdx"; "rcx"; "r8"; "r9";
85 "r12"; "r13"; "r10"; "r11"; "rbp" |]
86 | _ ->
87 [| "%rax"; "%rbx"; "%rdi"; "%rsi"; "%rdx"; "%rcx"; "%r8"; "%r9";
88 "%r12"; "%r13"; "%r10"; "%r11"; "%rbp" |]
89
90 let float_reg_name =
91 match Config.ccomp_type with
92 | "msvc" ->
93 [| "xmm0"; "xmm1"; "xmm2"; "xmm3"; "xmm4"; "xmm5"; "xmm6"; "xmm7";
94 "xmm8"; "xmm9"; "xmm10"; "xmm11";
95 "xmm12"; "xmm13"; "xmm14"; "xmm15" |]
96 | _ ->
97 [| "%xmm0"; "%xmm1"; "%xmm2"; "%xmm3"; "%xmm4"; "%xmm5"; "%xmm6"; "%xmm7";
98 "%xmm8"; "%xmm9"; "%xmm10"; "%xmm11";
99 "%xmm12"; "%xmm13"; "%xmm14"; "%xmm15" |]
100
101 let num_register_classes = 2
102
103 let register_class r =
104 match r.typ with
105 | Val | Int | Addr -> 0
106 | Float -> 1
107
108 let num_available_registers = [| 13; 16 |]
109
110 let first_available_register = [| 0; 100 |]
111
112 let register_name r =
113 if r < 100 then int_reg_name.(r) else float_reg_name.(r - 100)
114
115 (* Pack registers starting at %rax so as to reduce the number of REX
116 prefixes and thus improve code density *)
117 let rotate_registers = false
118
119 (* Representation of hard registers by pseudo-registers *)
120
121 let hard_int_reg =
122 let v = Array.make 13 Reg.dummy in
123 for i = 0 to 12 do v.(i) <- Reg.at_location Int (Reg i) done;
124 v
125
126 let hard_float_reg =
127 let v = Array.make 16 Reg.dummy in
128 for i = 0 to 15 do v.(i) <- Reg.at_location Float (Reg (100 + i)) done;
129 v
130
131 let all_phys_regs =
132 Array.append hard_int_reg hard_float_reg
133
134 let phys_reg n =
135 if n < 100 then hard_int_reg.(n) else hard_float_reg.(n - 100)
136
137 let rax = phys_reg 0
138 let rdx = phys_reg 4
139 let r10 = phys_reg 10
140 let r11 = phys_reg 11
141 let r13 = phys_reg 9
142 let rbp = phys_reg 12
143 let rxmm15 = phys_reg 115
144
145 let destroyed_by_plt_stub =
146 if not X86_proc.use_plt then [| |] else [| r10; r11 |]
147
148 let num_destroyed_by_plt_stub = Array.length destroyed_by_plt_stub
149
150 let destroyed_by_plt_stub_set = Reg.set_of_array destroyed_by_plt_stub
151
152 let stack_slot slot ty =
153 Reg.at_location ty (Stack slot)
154
155 (* Instruction selection *)
156
157 let word_addressed = false
158
159 (* Calling conventions *)
160
161 let calling_conventions first_int last_int first_float last_float make_stack
162 arg =
163 let loc = Array.make (Array.length arg) Reg.dummy in
164 let int = ref first_int in
165 let float = ref first_float in
166 let ofs = ref 0 in
167 for i = 0 to Array.length arg - 1 do
168 match arg.(i).typ with
169 | Val | Int | Addr as ty ->
170 if !int <= last_int then begin
171 loc.(i) <- phys_reg !int;
172 incr int
173 end else begin
174 loc.(i) <- stack_slot (make_stack !ofs) ty;
175 ofs := !ofs + size_int
176 end;
177 assert (not (Reg.Set.mem loc.(i) destroyed_by_plt_stub_set))
178 | Float ->
179 if !float <= last_float then begin
180 loc.(i) <- phys_reg !float;
181 incr float
182 end else begin
183 loc.(i) <- stack_slot (make_stack !ofs) Float;
184 ofs := !ofs + size_float
185 end
186 done;
187 (loc, Misc.align !ofs 16) (* keep stack 16-aligned *)
188
189 let incoming ofs = Incoming ofs
190 let outgoing ofs = Outgoing ofs
191 let not_supported _ofs = fatal_error "Proc.loc_results: cannot call"
192
193 let max_int_args_in_regs () =
194 if Config.spacetime then 9 else 10
195
196 let loc_arguments arg =
197 calling_conventions 0 ((max_int_args_in_regs ()) - 1) 100 109 outgoing arg
198 let loc_parameters arg =
199 let (loc, _ofs) =
200 calling_conventions 0 ((max_int_args_in_regs ()) - 1) 100 109 incoming arg
201 in
202 loc
203 let loc_results res =
204 let (loc, _ofs) = calling_conventions 0 0 100 100 not_supported res in loc
205
206 let loc_spacetime_node_hole = r13
207
208 (* C calling conventions under Unix:
209 first integer args in rdi, rsi, rdx, rcx, r8, r9
210 first float args in xmm0 ... xmm7
211 remaining args on stack
212 return value in rax or xmm0.
213 C calling conventions under Win64:
214 first integer args in rcx, rdx, r8, r9
215 first float args in xmm0 ... xmm3
216 each integer arg consumes a float reg, and conversely
217 remaining args on stack
218 always 32 bytes reserved at bottom of stack.
219 Return value in rax or xmm0. *)
220
221 let loc_external_results res =
222 let (loc, _ofs) = calling_conventions 0 0 100 100 not_supported res in loc
223
224 let unix_loc_external_arguments arg =
225 calling_conventions 2 7 100 107 outgoing arg
226
227 let win64_int_external_arguments =
228 [| 5 (*rcx*); 4 (*rdx*); 6 (*r8*); 7 (*r9*) |]
229 let win64_float_external_arguments =
230 [| 100 (*xmm0*); 101 (*xmm1*); 102 (*xmm2*); 103 (*xmm3*) |]
231
232 let win64_loc_external_arguments arg =
233 let loc = Array.make (Array.length arg) Reg.dummy in
234 let reg = ref 0
235 and ofs = ref 32 in
236 for i = 0 to Array.length arg - 1 do
237 match arg.(i).typ with
238 | Val | Int | Addr as ty ->
239 if !reg < 4 then begin
240 loc.(i) <- phys_reg win64_int_external_arguments.(!reg);
241 incr reg
242 end else begin
243 loc.(i) <- stack_slot (Outgoing !ofs) ty;
244 ofs := !ofs + size_int
245 end
246 | Float ->
247 if !reg < 4 then begin
248 loc.(i) <- phys_reg win64_float_external_arguments.(!reg);
249 incr reg
250 end else begin
251 loc.(i) <- stack_slot (Outgoing !ofs) Float;
252 ofs := !ofs + size_float
253 end
254 done;
255 (loc, Misc.align !ofs 16) (* keep stack 16-aligned *)
256
257 let loc_external_arguments arg =
258 let arg =
259 Array.map (fun regs -> assert (Array.length regs = 1); regs.(0)) arg
260 in
261 let loc, alignment =
262 if win64 then win64_loc_external_arguments arg
263 else unix_loc_external_arguments arg
264 in
265 Array.map (fun reg -> [|reg|]) loc, alignment
266
267 let loc_exn_bucket = rax
268
269 (** See "System V Application Binary Interface, AMD64 Architecture Processor
270 Supplement" (www.x86-64.org/documentation/abi.pdf) page 57, fig. 3.36. *)
271 let int_dwarf_reg_numbers =
272 [| 0; 3; 5; 4; 1; 2; 8; 9; 12; 13; 10; 11; 6 |]
273
274 let float_dwarf_reg_numbers =
275 [| 17; 18; 19; 20; 21; 22; 23; 24; 25; 26; 27; 28; 29; 30; 31; 32 |]
276
277 let dwarf_register_numbers ~reg_class =
278 match reg_class with
279 | 0 -> int_dwarf_reg_numbers
280 | 1 -> float_dwarf_reg_numbers
281 | _ -> Misc.fatal_errorf "Bad register class %d" reg_class
282
283 let stack_ptr_dwarf_register_number = 7
284
285 (* Volatile registers: none *)
286
287 let regs_are_volatile _rs = false
288
289 (* Registers destroyed by operations *)
290
291 let destroyed_at_c_call =
292 if win64 then
293 (* Win64: rbx, rbp, rsi, rdi, r12-r15, xmm6-xmm15 preserved *)
294 Array.of_list(List.map phys_reg
295 [0;4;5;6;7;10;11;
296 100;101;102;103;104;105])
297 else
298 (* Unix: rbp, rbx, r12-r15 preserved *)
299 Array.of_list(List.map phys_reg
300 [0;2;3;4;5;6;7;10;11;
301 100;101;102;103;104;105;106;107;
302 108;109;110;111;112;113;114;115])
303
304 let destroyed_at_alloc =
305 let regs =
306 if Config.spacetime then
307 [| rax; loc_spacetime_node_hole |]
308 else
309 [| rax |]
310 in
311 Array.concat [regs; destroyed_by_plt_stub]
312
313 let destroyed_at_oper = function
314 Iop(Icall_ind _ | Icall_imm _ | Iextcall { alloc = true; }) ->
315 all_phys_regs
316 | Iop(Iextcall { alloc = false; }) -> destroyed_at_c_call
317 | Iop(Iintop(Idiv | Imod)) | Iop(Iintop_imm((Idiv | Imod), _))
318 -> [| rax; rdx |]
319 | Iop(Istore(Single, _, _)) -> [| rxmm15 |]
320 | Iop(Ialloc _) -> destroyed_at_alloc
321 | Iop(Iintop(Imulh | Icomp _) | Iintop_imm((Icomp _), _))
322 -> [| rax |]
323 | Iop (Iintop (Icheckbound _)) when Config.spacetime ->
324 [| loc_spacetime_node_hole |]
325 | Iop (Iintop_imm(Icheckbound _, _)) when Config.spacetime ->
326 [| loc_spacetime_node_hole |]
327 | Iswitch(_, _) -> [| rax; rdx |]
328 | Itrywith _ -> [| r11 |]
329 | _ ->
330 if fp then
331 (* prevent any use of the frame pointer ! *)
332 [| rbp |]
333 else
334 [||]
335
336
337 let destroyed_at_raise = all_phys_regs
338
339 let destroyed_at_reloadretaddr = [| |]
340
341 (* Maximal register pressure *)
342
343
344 let safe_register_pressure = function
345 Iextcall _ -> if win64 then if fp then 7 else 8 else 0
346 | _ -> if fp then 10 else 11
347
348 let max_register_pressure = function
349 Iextcall _ ->
350 if win64 then
351 if fp then [| 7; 10 |] else [| 8; 10 |]
352 else
353 if fp then [| 3; 0 |] else [| 4; 0 |]
354 | Iintop(Idiv | Imod) | Iintop_imm((Idiv | Imod), _) ->
355 if fp then [| 10; 16 |] else [| 11; 16 |]
356 | Ialloc _ ->
357 if fp then [| 11 - num_destroyed_by_plt_stub; 16 |]
358 else [| 12 - num_destroyed_by_plt_stub; 16 |]
359 | Iintop(Icomp _) | Iintop_imm((Icomp _), _) ->
360 if fp then [| 11; 16 |] else [| 12; 16 |]
361 | Istore(Single, _, _) ->
362 if fp then [| 12; 15 |] else [| 13; 15 |]
363 | _ -> if fp then [| 12; 16 |] else [| 13; 16 |]
364
365 (* Pure operations (without any side effect besides updating their result
366 registers). *)
367
368 let op_is_pure = function
369 | Icall_ind _ | Icall_imm _ | Itailcall_ind _ | Itailcall_imm _
370 | Iextcall _ | Istackoffset _ | Istore _ | Ialloc _
371 | Iintop(Icheckbound _) | Iintop_imm(Icheckbound _, _) -> false
372 | Ispecific(Ilea _|Isextend32|Izextend32) -> true
373 | Ispecific _ -> false
374 | _ -> true
375
376 (* Layout of the stack frame *)
377
378 let frame_required fd =
379 fp || fd.fun_contains_calls ||
380 fd.fun_num_stack_slots.(0) > 0 || fd.fun_num_stack_slots.(1) > 0
381
382 let prologue_required fd =
383 frame_required fd
384
385 (* Calling the assembler *)
386
387 let assemble_file infile outfile =
388 X86_proc.assemble_file infile outfile
389
390 let init () =
391 if fp then begin
392 num_available_registers.(0) <- 12
393 end else
394 num_available_registers.(0) <- 13
395