1 # 2 "asmcomp/amd64/emit.mlp"
2 (**************************************************************************)
3 (* *)
4 (* OCaml *)
5 (* *)
6 (* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
7 (* *)
8 (* Copyright 1996 Institut National de Recherche en Informatique et *)
9 (* en Automatique. *)
10 (* *)
11 (* All rights reserved. This file is distributed under the terms of *)
12 (* the GNU Lesser General Public License version 2.1, with the *)
13 (* special exception on linking described in the file LICENSE. *)
14 (* *)
15 (**************************************************************************)
16
17 (* Emission of Intel x86_64 assembly code *)
18
19 open Cmm
20 open Arch
21 open Proc
22 open Reg
23 open Mach
24 open Linear
25 open Emitaux
26
27 open X86_ast
28 open X86_proc
29 open X86_dsl
30 module String = Misc.Stdlib.String
31
32 (* [Branch_relaxation] is not used in this file, but is required by
33 emit.mlp files for certain other targets; the reference here ensures
34 that when releases are being prepared the .depend files are correct
35 for all targets. *)
36 [@@@ocaml.warning "-66"]
37 open! Branch_relaxation
38
39 let _label s = D.label ~typ:QWORD s
40
41 (* Override proc.ml *)
42
43 let int_reg_name =
44 [| RAX; RBX; RDI; RSI; RDX; RCX; R8; R9;
45 R12; R13; R10; R11; RBP; |]
46
47 let float_reg_name = Array.init 16 (fun i -> XMM i)
48
49 let register_name r =
50 if r < 100 then Reg64 (int_reg_name.(r))
51 else Regf (float_reg_name.(r - 100))
52
53 (* CFI directives *)
54
55 let cfi_startproc () =
56 if Config.asm_cfi_supported then D.cfi_startproc ()
57
58 let cfi_endproc () =
59 if Config.asm_cfi_supported then D.cfi_endproc ()
60
61 let cfi_adjust_cfa_offset n =
62 if Config.asm_cfi_supported then D.cfi_adjust_cfa_offset n
63
64 let emit_debug_info dbg =
65 emit_debug_info_gen dbg D.file D.loc
66
67 let fp = Config.with_frame_pointers
68
69 (* Tradeoff between code size and code speed *)
70
71 let fastcode_flag = ref true
72
73 (* Layout of the stack frame *)
74 let stack_offset = ref 0
75
76 let num_stack_slots = Array.make Proc.num_register_classes 0
77
78 let prologue_required = ref false
79
80 let frame_required = ref false
81
82 let frame_size () = (* includes return address *)
83 if !frame_required then begin
84 let sz =
85 (!stack_offset + 8 * (num_stack_slots.(0) + num_stack_slots.(1)) + 8
86 + (if fp then 8 else 0))
87 in Misc.align sz 16
88 end else
89 !stack_offset + 8
90
91 let slot_offset loc cl =
92 match loc with
93 | Incoming n -> frame_size() + n
94 | Local n ->
95 if cl = 0
96 then !stack_offset + n * 8
97 else !stack_offset + (num_stack_slots.(0) + n) * 8
98 | Outgoing n -> n
99
100 (* Symbols *)
101
102 let symbol_prefix = if system = S_macosx then "_" else ""
103
104 let emit_symbol s = string_of_symbol symbol_prefix s
105
106 (* Record symbols used and defined - at the end generate extern for those
107 used but not defined *)
108
109 let symbols_defined = ref String.Set.empty
110 let symbols_used = ref String.Set.empty
111
112 let add_def_symbol s = symbols_defined := String.Set.add s !symbols_defined
113 let add_used_symbol s = symbols_used := String.Set.add s !symbols_used
114
115 let imp_table = Hashtbl.create 16
116
117 let reset_imp_table () = Hashtbl.clear imp_table
118
119 let get_imp_symbol s =
120 match Hashtbl.find imp_table s with
121 | exception Not_found ->
122 let imps = "__caml_imp_" ^ s in
123 Hashtbl.add imp_table s imps;
124 imps
125 | imps -> imps
126
127 let emit_imp_table () =
128 let f s imps =
129 _label (emit_symbol imps);
130 D.qword (ConstLabel (emit_symbol s))
131 in
132 D.data();
133 D.comment "relocation table start";
134 D.align 8;
135 Hashtbl.iter f imp_table;
136 D.comment "relocation table end"
137
138 let mem__imp s =
139 let imp_s = get_imp_symbol s in
140 mem64_rip QWORD (emit_symbol imp_s)
141
142 let rel_plt s =
143 if windows && !Clflags.dlcode then mem__imp s
144 else
145 sym (if use_plt then emit_symbol s ^ "@PLT" else emit_symbol s)
146
147 let emit_call s = I.call (rel_plt s)
148
149 let emit_jump s = I.jmp (rel_plt s)
150
151 let load_symbol_addr s arg =
152 if !Clflags.dlcode then
153 if windows then begin
154 (* I.mov (mem__imp s) arg (\* mov __caml_imp_foo(%rip), ... *\) *)
155 I.mov (sym (emit_symbol s)) arg (* movabsq $foo, ... *)
156 end else I.mov (mem64_rip QWORD (emit_symbol s ^ "@GOTPCREL")) arg
157 else if !Clflags.pic_code then
158 I.lea (mem64_rip NONE (emit_symbol s)) arg
159 else
160 I.mov (sym (emit_symbol s)) arg
161
162 let domain_field f =
163 mem64 QWORD (Domainstate.idx_of_field f * 8) R14
164
165 (* Output a label *)
166
167 let emit_label lbl =
168 match system with
169 | S_macosx | S_win64 -> "L" ^ Int.to_string lbl
170 | _ -> ".L" ^ Int.to_string lbl
171
172 let label s = sym (emit_label s)
173
174 let def_label s = D.label (emit_label s)
175
176 let emit_Llabel fallthrough lbl =
177 if not fallthrough && !fastcode_flag then D.align 4;
178 def_label lbl
179
180 (* Output a pseudo-register *)
181
182 let reg = function
183 | { loc = Reg.Reg r } -> register_name r
184 | { loc = Stack s; typ = Float } as r ->
185 let ofs = slot_offset s (register_class r) in
186 mem64 REAL8 ofs RSP
187 | { loc = Stack s } as r ->
188 let ofs = slot_offset s (register_class r) in
189 mem64 QWORD ofs RSP
190 | { loc = Unknown } ->
191 assert false
192
193 let reg64 = function
194 | { loc = Reg.Reg r } -> int_reg_name.(r)
195 | _ -> assert false
196
197
198 let res i n = reg i.res.(n)
199
200 let arg i n = reg i.arg.(n)
201
202 (* Output a reference to the lower 8, 16 or 32 bits of a register *)
203
204 let reg_low_8_name = Array.map (fun r -> Reg8L r) int_reg_name
205 let reg_low_16_name = Array.map (fun r -> Reg16 r) int_reg_name
206 let reg_low_32_name = Array.map (fun r -> Reg32 r) int_reg_name
207
208 let emit_subreg tbl typ r =
209 match r.loc with
210 | Reg.Reg r when r < 13 -> tbl.(r)
211 | Stack s -> mem64 typ (slot_offset s (register_class r)) RSP
212 | _ -> assert false
213
214 let arg8 i n = emit_subreg reg_low_8_name BYTE i.arg.(n)
215 let arg16 i n = emit_subreg reg_low_16_name WORD i.arg.(n)
216 let arg32 i n = emit_subreg reg_low_32_name DWORD i.arg.(n)
217 let arg64 i n = reg64 i.arg.(n)
218
219 let res16 i n = emit_subreg reg_low_16_name WORD i.res.(n)
220 let res32 i n = emit_subreg reg_low_32_name DWORD i.res.(n)
221
222 (* Output an addressing mode *)
223
224 let addressing addr typ i n =
225 match addr with
226 | Ibased(s, ofs) ->
227 add_used_symbol s;
228 mem64_rip typ (emit_symbol s) ~ofs
229 | Iindexed d ->
230 mem64 typ d (arg64 i n)
231 | Iindexed2 d ->
232 mem64 typ ~base:(arg64 i n) d (arg64 i (n+1))
233 | Iscaled(2, d) ->
234 mem64 typ ~base:(arg64 i n) d (arg64 i n)
235 | Iscaled(scale, d) ->
236 mem64 typ ~scale d (arg64 i n)
237 | Iindexed2scaled(scale, d) ->
238 mem64 typ ~scale ~base:(arg64 i n) d (arg64 i (n+1))
239
240 (* Record live pointers at call points -- see Emitaux *)
241
242 let record_frame_label ?label live raise_ dbg =
243 let lbl =
244 match label with
245 | None -> new_label()
246 | Some label -> label
247 in
248 let live_offset = ref [] in
249 Reg.Set.iter
250 (function
251 | {typ = Val; loc = Reg r} ->
252 live_offset := ((r lsl 1) + 1) :: !live_offset
253 | {typ = Val; loc = Stack s} as reg ->
254 live_offset := slot_offset s (register_class reg) :: !live_offset
255 | {typ = Addr} as r ->
256 Misc.fatal_error ("bad GC root " ^ Reg.name r)
257 | _ -> ()
258 )
259 live;
260 record_frame_descr ~label:lbl ~frame_size:(frame_size())
261 ~live_offset:!live_offset ~raise_frame:raise_ dbg;
262 lbl
263
264 let record_frame ?label live raise_ dbg =
265 let lbl = record_frame_label ?label live raise_ dbg in
266 def_label lbl
267
268 (* Spacetime instrumentation *)
269
270 let spacetime_before_uninstrumented_call ~node_ptr ~index =
271 (* At the moment, [node_ptr] is pointing at the node for the current
272 OCaml function. Get hold of the node itself and move the pointer
273 forwards, saving it into the distinguished register. This is used
274 for instrumentation of function calls (e.g. caml_call_gc and bounds
275 check failures) not inserted until this stage of the compiler
276 pipeline. *)
277 I.mov node_ptr (reg Proc.loc_spacetime_node_hole);
278 assert (index >= 2);
279 I.add (int (index * 8)) (reg Proc.loc_spacetime_node_hole)
280
281 (* Record calls to the GC -- we've moved them out of the way *)
282
283 type gc_call =
284 { gc_size: int; (* Allocation size, in bytes *)
285 gc_lbl: label; (* Entry label *)
286 gc_return_lbl: label; (* Where to branch after GC *)
287 gc_frame: label; (* Label of frame descriptor *)
288 gc_spacetime : (X86_ast.arg * int) option;
289 (* Spacetime node hole pointer and index *)
290 }
291
292 let call_gc_sites = ref ([] : gc_call list)
293
294 let emit_call_gc gc =
295 def_label gc.gc_lbl;
296 begin match gc.gc_spacetime with
297 | None -> assert (not Config.spacetime)
298 | Some (node_ptr, index) ->
299 assert Config.spacetime;
300 spacetime_before_uninstrumented_call ~node_ptr ~index
301 end;
302 begin match gc.gc_size with
303 | 16 -> emit_call "caml_call_gc1"
304 | 24 -> emit_call "caml_call_gc2"
305 | 32 -> emit_call "caml_call_gc3"
306 | n -> I.add (int n) r15;
307 emit_call "caml_call_gc"
308 end;
309 def_label gc.gc_frame;
310 I.jmp (label gc.gc_return_lbl)
311
312 (* Record calls to caml_ml_array_bound_error.
313 In -g mode, or when using Spacetime profiling, we maintain one call to
314 caml_ml_array_bound_error per bound check site. Without -g, we can share
315 a single call. *)
316
317 type bound_error_call =
318 { bd_lbl: label; (* Entry label *)
319 bd_frame: label; (* Label of frame descriptor *)
320 bd_spacetime : (X86_ast.arg * int) option;
321 (* As for [gc_call]. *)
322 }
323
324 let bound_error_sites = ref ([] : bound_error_call list)
325 let bound_error_call = ref 0
326
327 let bound_error_label ?label dbg ~spacetime =
328 if !Clflags.debug || Config.spacetime then begin
329 let lbl_bound_error = new_label() in
330 let lbl_frame = record_frame_label ?label Reg.Set.empty false dbg in
331 bound_error_sites :=
332 { bd_lbl = lbl_bound_error; bd_frame = lbl_frame;
333 bd_spacetime = spacetime; } :: !bound_error_sites;
334 lbl_bound_error
335 end else begin
336 if !bound_error_call = 0 then bound_error_call := new_label();
337 !bound_error_call
338 end
339
340 let emit_call_bound_error bd =
341 def_label bd.bd_lbl;
342 begin match bd.bd_spacetime with
343 | None -> ()
344 | Some (node_ptr, index) ->
345 spacetime_before_uninstrumented_call ~node_ptr ~index
346 end;
347 emit_call "caml_ml_array_bound_error";
348 def_label bd.bd_frame
349
350 let emit_call_bound_errors () =
351 List.iter emit_call_bound_error !bound_error_sites;
352 if !bound_error_call > 0 then begin
353 def_label !bound_error_call;
354 emit_call "caml_ml_array_bound_error"
355 end
356
357 (* Names for instructions *)
358
359 let instr_for_intop = function
360 | Iadd -> I.add
361 | Isub -> I.sub
362 | Imul -> (fun arg1 arg2 -> I.imul arg1 (Some arg2))
363 | Iand -> I.and_
364 | Ior -> I.or_
365 | Ixor -> I.xor
366 | Ilsl -> I.sal
367 | Ilsr -> I.shr
368 | Iasr -> I.sar
369 | _ -> assert false
370
371 let instr_for_floatop = function
372 | Iaddf -> I.addsd
373 | Isubf -> I.subsd
374 | Imulf -> I.mulsd
375 | Idivf -> I.divsd
376 | _ -> assert false
377
378 let instr_for_floatarithmem = function
379 | Ifloatadd -> I.addsd
380 | Ifloatsub -> I.subsd
381 | Ifloatmul -> I.mulsd
382 | Ifloatdiv -> I.divsd
383
384 let cond = function
385 | Isigned Ceq -> E | Isigned Cne -> NE
386 | Isigned Cle -> LE | Isigned Cgt -> G
387 | Isigned Clt -> L | Isigned Cge -> GE
388 | Iunsigned Ceq -> E | Iunsigned Cne -> NE
389 | Iunsigned Cle -> BE | Iunsigned Cgt -> A
390 | Iunsigned Clt -> B | Iunsigned Cge -> AE
391
392 (* Output an = 0 or <> 0 test. *)
393
394 let output_test_zero arg =
395 match arg.loc with
396 | Reg.Reg _ -> I.test (reg arg) (reg arg)
397 | _ -> I.cmp (int 0) (reg arg)
398
399 (* Output a floating-point compare and branch *)
400
401 let emit_float_test cmp i lbl =
402 (* Effect of comisd on flags and conditional branches:
403 ZF PF CF cond. branches taken
404 unordered 1 1 1 je, jb, jbe, jp
405 > 0 0 0 jne, jae, ja
406 < 0 0 1 jne, jbe, jb
407 = 1 0 0 je, jae, jbe.
408 If FP traps are on (they are off by default),
409 comisd traps on QNaN and SNaN but ucomisd traps on SNaN only.
410 *)
411 match cmp with
412 | CFeq ->
413 let next = new_label() in
414 I.ucomisd (arg i 1) (arg i 0);
415 I.jp (label next); (* skip if unordered *)
416 I.je lbl; (* branch taken if x=y *)
417 def_label next
418 | CFneq ->
419 I.ucomisd (arg i 1) (arg i 0);
420 I.jp lbl; (* branch taken if unordered *)
421 I.jne lbl (* branch taken if x<y or x>y *)
422 | CFlt ->
423 I.comisd (arg i 0) (arg i 1);
424 I.ja lbl (* branch taken if y>x i.e. x<y *)
425 | CFnlt ->
426 I.comisd (arg i 0) (arg i 1);
427 I.jbe lbl (* taken if unordered or y<=x i.e. !(x<y) *)
428 | CFle ->
429 I.comisd (arg i 0) (arg i 1);(* swap compare *)
430 I.jae lbl (* branch taken if y>=x i.e. x<=y *)
431 | CFnle ->
432 I.comisd (arg i 0) (arg i 1);(* swap compare *)
433 I.jb lbl (* taken if unordered or y<x i.e. !(x<=y) *)
434 | CFgt ->
435 I.comisd (arg i 1) (arg i 0);
436 I.ja lbl (* branch taken if x>y *)
437 | CFngt ->
438 I.comisd (arg i 1) (arg i 0);
439 I.jbe lbl (* taken if unordered or x<=y i.e. !(x>y) *)
440 | CFge ->
441 I.comisd (arg i 1) (arg i 0);(* swap compare *)
442 I.jae lbl (* branch taken if x>=y *)
443 | CFnge ->
444 I.comisd (arg i 1) (arg i 0);(* swap compare *)
445 I.jb lbl (* taken if unordered or x<y i.e. !(x>=y) *)
446
447 (* Deallocate the stack frame before a return or tail call *)
448
449 let output_epilogue f =
450 if !frame_required then begin
451 let n = frame_size() - 8 - (if fp then 8 else 0) in
452 if n <> 0
453 then begin
454 I.add (int n) rsp;
455 cfi_adjust_cfa_offset (-n);
456 end;
457 if fp then I.pop rbp;
458 f ();
459 (* reset CFA back cause function body may continue *)
460 if n <> 0
461 then cfi_adjust_cfa_offset n
462 end
463 else
464 f ()
465
466 (* Floating-point constants *)
467
468 let float_constants = ref ([] : (int64 * int) list)
469
470 let add_float_constant cst =
471 try
472 List.assoc cst !float_constants
473 with Not_found ->
474 let lbl = new_label() in
475 float_constants := (cst, lbl) :: !float_constants;
476 lbl
477
478 let emit_float_constant f lbl =
479 _label (emit_label lbl);
480 D.qword (Const f)
481
482 let emit_global_label s =
483 let lbl = Compilenv.make_symbol (Some s) in
484 add_def_symbol lbl;
485 let lbl = emit_symbol lbl in
486 D.global lbl;
487 _label lbl
488
489 (* Output .text section directive, or named .text.caml.<name> if enabled and
490 supported on the target system. *)
491
492 let emit_named_text_section func_name =
493 if !Clflags.function_sections then
494 begin match system with
495 | S_macosx
496 (* Names of section segments in macosx are restricted to 16 characters,
497 but function names are often longer, especially anonymous functions. *)
498 | S_win64 | S_mingw64 | S_cygwin
499 (* Win systems provide named text sections, but configure on these
500 systems does not support function sections. *)
501 -> assert false
502 | _ -> D.section
503 [ ".text.caml."^(emit_symbol func_name) ]
504 (Some "ax")
505 ["@progbits"]
506 end
507 else D.text ()
508
509 (* Output the assembly code for an instruction *)
510
511 (* Name of current function *)
512 let function_name = ref ""
513 (* Entry point for tail recursive calls *)
514 let tailrec_entry_point = ref 0
515
516 (* Emit an instruction *)
517 let emit_instr fallthrough i =
518 emit_debug_info i.dbg;
519 match i.desc with
520 | Lend -> ()
521 | Lprologue ->
522 assert (!prologue_required);
523 if fp then begin
524 I.push rbp;
525 cfi_adjust_cfa_offset 8;
526 I.mov rsp rbp;
527 end;
528 if !frame_required then begin
529 let n = frame_size() - 8 - (if fp then 8 else 0) in
530 if n <> 0
531 then begin
532 I.sub (int n) rsp;
533 cfi_adjust_cfa_offset n;
534 end;
535 end
536 | Lop(Imove | Ispill | Ireload) ->
537 let src = i.arg.(0) and dst = i.res.(0) in
538 if src.loc <> dst.loc then
539 begin match src.typ, src.loc, dst.loc with
540 | Float, Reg.Reg _, Reg.Reg _ -> I.movapd (reg src) (reg dst)
541 | Float, _, _ -> I.movsd (reg src) (reg dst)
542 | _ -> I.mov (reg src) (reg dst)
543 end
544 | Lop(Iconst_int n) ->
545 if n = 0n then begin
546 match i.res.(0).loc with
547 | Reg _ ->
548 (* Clearing the bottom half also clears the top half (except for
549 64-bit-only registers where the behaviour is as if the operands
550 were 64 bit). *)
551 I.xor (res32 i 0) (res32 i 0)
552 | _ ->
553 I.mov (int 0) (res i 0)
554 end else if n > 0n && n <= 0xFFFF_FFFFn then begin
555 match i.res.(0).loc with
556 | Reg _ ->
557 (* Similarly, setting only the bottom half clears the top half. *)
558 I.mov (nat n) (res32 i 0)
559 | _ ->
560 I.mov (nat n) (res i 0)
561 end else
562 I.mov (nat n) (res i 0)
563 | Lop(Iconst_float f) ->
564 begin match f with
565 | 0x0000_0000_0000_0000L -> (* +0.0 *)
566 I.xorpd (res i 0) (res i 0)
567 | _ ->
568 let lbl = add_float_constant f in
569 I.movsd (mem64_rip NONE (emit_label lbl)) (res i 0)
570 end
571 | Lop(Iconst_symbol s) ->
572 add_used_symbol s;
573 load_symbol_addr s (res i 0)
574 | Lop(Icall_ind { label_after; }) ->
575 I.call (arg i 0);
576 record_frame i.live false i.dbg ~label:label_after
577 | Lop(Icall_imm { func; label_after; }) ->
578 add_used_symbol func;
579 emit_call func;
580 record_frame i.live false i.dbg ~label:label_after
581 | Lop(Itailcall_ind { label_after; }) ->
582 output_epilogue begin fun () ->
583 I.jmp (arg i 0);
584 if Config.spacetime then begin
585 record_frame Reg.Set.empty false i.dbg ~label:label_after
586 end
587 end
588 | Lop(Itailcall_imm { func; label_after; }) ->
589 begin
590 if func = !function_name then
591 I.jmp (label !tailrec_entry_point)
592 else begin
593 output_epilogue begin fun () ->
594 add_used_symbol func;
595 emit_jump func
596 end
597 end
598 end;
599 if Config.spacetime then begin
600 record_frame Reg.Set.empty false i.dbg ~label:label_after
601 end
602 | Lop(Iextcall { func; alloc; label_after; }) ->
603 add_used_symbol func;
604 if alloc then begin
605 load_symbol_addr func rax;
606 emit_call "caml_c_call";
607 record_frame i.live false i.dbg ~label:label_after;
608 if system <> S_win64 then begin
609 (* TODO: investigate why such a diff.
610 This comes from:
611 http://caml.inria.fr/cgi-bin/viewvc.cgi?view=revision&revision=12664
612
613 If we do the same for Win64, we probably need to change
614 amd64nt.asm accordingly.
615 *)
616 I.mov (domain_field Domainstate.Domain_young_ptr) r15
617 end
618 end else begin
619 emit_call func;
620 if Config.spacetime then begin
621 record_frame Reg.Set.empty false i.dbg ~label:label_after
622 end
623 end
624 | Lop(Istackoffset n) ->
625 if n < 0
626 then I.add (int (-n)) rsp
627 else if n > 0
628 then I.sub (int n) rsp;
629 if n <> 0
630 then cfi_adjust_cfa_offset n;
631 stack_offset := !stack_offset + n
632 | Lop(Iload(chunk, addr)) ->
633 let dest = res i 0 in
634 begin match chunk with
635 | Word_int | Word_val ->
636 I.mov (addressing addr QWORD i 0) dest
637 | Byte_unsigned ->
638 I.movzx (addressing addr BYTE i 0) dest
639 | Byte_signed ->
640 I.movsx (addressing addr BYTE i 0) dest
641 | Sixteen_unsigned ->
642 I.movzx (addressing addr WORD i 0) dest
643 | Sixteen_signed ->
644 I.movsx (addressing addr WORD i 0) dest;
645 | Thirtytwo_unsigned ->
646 I.mov (addressing addr DWORD i 0) (res32 i 0)
647 | Thirtytwo_signed ->
648 I.movsxd (addressing addr DWORD i 0) dest
649 | Single ->
650 I.cvtss2sd (addressing addr REAL4 i 0) dest
651 | Double | Double_u ->
652 I.movsd (addressing addr REAL8 i 0) dest
653 end
654 | Lop(Istore(chunk, addr, _)) ->
655 begin match chunk with
656 | Word_int | Word_val ->
657 I.mov (arg i 0) (addressing addr QWORD i 1)
658 | Byte_unsigned | Byte_signed ->
659 I.mov (arg8 i 0) (addressing addr BYTE i 1)
660 | Sixteen_unsigned | Sixteen_signed ->
661 I.mov (arg16 i 0) (addressing addr WORD i 1)
662 | Thirtytwo_signed | Thirtytwo_unsigned ->
663 I.mov (arg32 i 0) (addressing addr DWORD i 1)
664 | Single ->
665 I.cvtsd2ss (arg i 0) xmm15;
666 I.movss xmm15 (addressing addr REAL4 i 1)
667 | Double | Double_u ->
668 I.movsd (arg i 0) (addressing addr REAL8 i 1)
669 end
670 | Lop(Ialloc { bytes = n; label_after_call_gc; spacetime_index; }) ->
671 if !fastcode_flag then begin
672 let lbl_redo = new_label() in
673 def_label lbl_redo;
674 I.sub (int n) r15;
675 I.cmp (domain_field Domainstate.Domain_young_limit) r15;
676 let lbl_call_gc = new_label() in
677 let dbg =
678 if not Config.spacetime then Debuginfo.none
679 else i.dbg
680 in
681 let lbl_frame =
682 record_frame_label ?label:label_after_call_gc i.live false dbg
683 in
684 I.jb (label lbl_call_gc);
685 I.lea (mem64 NONE 8 R15) (res i 0);
686 let gc_spacetime =
687 if not Config.spacetime then None
688 else Some (arg i 0, spacetime_index)
689 in
690 call_gc_sites :=
691 { gc_size = n;
692 gc_lbl = lbl_call_gc;
693 gc_return_lbl = lbl_redo;
694 gc_frame = lbl_frame;
695 gc_spacetime; } :: !call_gc_sites
696 end else begin
697 if Config.spacetime then begin
698 spacetime_before_uninstrumented_call ~node_ptr:(arg i 0)
699 ~index:spacetime_index;
700 end;
701 begin match n with
702 | 16 -> emit_call "caml_alloc1"
703 | 24 -> emit_call "caml_alloc2"
704 | 32 -> emit_call "caml_alloc3"
705 | _ ->
706 I.mov (int n) rax;
707 emit_call "caml_allocN"
708 end;
709 let label =
710 record_frame_label ?label:label_after_call_gc i.live false
711 Debuginfo.none
712 in
713 def_label label;
714 I.lea (mem64 NONE 8 R15) (res i 0)
715 end
716 | Lop(Iintop(Icomp cmp)) ->
717 I.cmp (arg i 1) (arg i 0);
718 I.set (cond cmp) al;
719 I.movzx al (res i 0)
720 | Lop(Iintop_imm(Icomp cmp, n)) ->
721 I.cmp (int n) (arg i 0);
722 I.set (cond cmp) al;
723 I.movzx al (res i 0)
724 | Lop(Iintop (Icheckbound { label_after_error; spacetime_index; } )) ->
725 let spacetime =
726 if not Config.spacetime then None
727 else Some (arg i 2, spacetime_index)
728 in
729 let lbl = bound_error_label ?label:label_after_error i.dbg ~spacetime in
730 I.cmp (arg i 1) (arg i 0);
731 I.jbe (label lbl)
732 | Lop(Iintop_imm(Icheckbound { label_after_error; spacetime_index; }, n)) ->
733 let spacetime =
734 if not Config.spacetime then None
735 else Some (arg i 1, spacetime_index)
736 in
737 let lbl = bound_error_label ?label:label_after_error i.dbg ~spacetime in
738 I.cmp (int n) (arg i 0);
739 I.jbe (label lbl)
740 | Lop(Iintop(Idiv | Imod)) ->
741 I.cqo ();
742 I.idiv (arg i 1)
743 | Lop(Iintop(Ilsl | Ilsr | Iasr as op)) ->
744 (* We have i.arg.(0) = i.res.(0) and i.arg.(1) = %rcx *)
745 instr_for_intop op cl (res i 0)
746 | Lop(Iintop Imulh) ->
747 I.imul (arg i 1) None
748 | Lop(Iintop op) ->
749 (* We have i.arg.(0) = i.res.(0) *)
750 instr_for_intop op (arg i 1) (res i 0)
751 | Lop(Iintop_imm(Iadd, n)) when i.arg.(0).loc <> i.res.(0).loc ->
752 I.lea (mem64 NONE n (arg64 i 0)) (res i 0)
753 | Lop(Iintop_imm(Iadd, 1) | Iintop_imm(Isub, -1)) ->
754 I.inc (res i 0)
755 | Lop(Iintop_imm(Iadd, -1) | Iintop_imm(Isub, 1)) ->
756 I.dec (res i 0)
757 | Lop(Iintop_imm(op, n)) ->
758 (* We have i.arg.(0) = i.res.(0) *)
759 instr_for_intop op (int n) (res i 0)
760 | Lop(Inegf) ->
761 I.xorpd (mem64_rip OWORD (emit_symbol "caml_negf_mask")) (res i 0)
762 | Lop(Iabsf) ->
763 I.andpd (mem64_rip OWORD (emit_symbol "caml_absf_mask")) (res i 0)
764 | Lop(Iaddf | Isubf | Imulf | Idivf as floatop) ->
765 instr_for_floatop floatop (arg i 1) (res i 0)
766 | Lop(Ifloatofint) ->
767 I.cvtsi2sd (arg i 0) (res i 0)
768 | Lop(Iintoffloat) ->
769 I.cvttsd2si (arg i 0) (res i 0)
770 | Lop(Ispecific(Ilea addr)) ->
771 I.lea (addressing addr NONE i 0) (res i 0)
772 | Lop(Ispecific(Istore_int(n, addr, _))) ->
773 I.mov (nat n) (addressing addr QWORD i 0)
774 | Lop(Ispecific(Ioffset_loc(n, addr))) ->
775 I.add (int n) (addressing addr QWORD i 0)
776 | Lop(Ispecific(Ifloatarithmem(op, addr))) ->
777 instr_for_floatarithmem op (addressing addr REAL8 i 1) (res i 0)
778 | Lop(Ispecific(Ibswap 16)) ->
779 I.xchg ah al;
780 I.movzx (res16 i 0) (res i 0)
781 | Lop(Ispecific(Ibswap 32)) ->
782 I.bswap (res32 i 0);
783 I.movsxd (res32 i 0) (res i 0)
784 | Lop(Ispecific(Ibswap 64)) ->
785 I.bswap (res i 0)
786 | Lop(Ispecific(Ibswap _)) ->
787 assert false
788 | Lop(Ispecific Isqrtf) ->
789 if arg i 0 <> res i 0 then
790 I.xorpd (res i 0) (res i 0); (* avoid partial register stall *)
791 I.sqrtsd (arg i 0) (res i 0)
792 | Lop(Ispecific(Ifloatsqrtf addr)) ->
793 I.xorpd (res i 0) (res i 0); (* avoid partial register stall *)
794 I.sqrtsd (addressing addr REAL8 i 0) (res i 0)
795 | Lop(Ispecific(Isextend32)) ->
796 I.movsxd (arg32 i 0) (res i 0)
797 | Lop(Ispecific(Izextend32)) ->
798 I.mov (arg32 i 0) (res32 i 0)
799 | Lop (Iname_for_debugger _) -> ()
800 | Lreloadretaddr ->
801 ()
802 | Lreturn ->
803 output_epilogue begin fun () ->
804 I.ret ()
805 end
806 | Llabel lbl ->
807 emit_Llabel fallthrough lbl
808 | Lbranch lbl ->
809 I.jmp (label lbl)
810 | Lcondbranch(tst, lbl) ->
811 let lbl = label lbl in
812 begin match tst with
813 | Itruetest ->
814 output_test_zero i.arg.(0);
815 I.jne lbl
816 | Ifalsetest ->
817 output_test_zero i.arg.(0);
818 I.je lbl
819 | Iinttest cmp ->
820 I.cmp (arg i 1) (arg i 0);
821 I.j (cond cmp) lbl
822 | Iinttest_imm((Isigned Ceq | Isigned Cne |
823 Iunsigned Ceq | Iunsigned Cne) as cmp, 0) ->
824 output_test_zero i.arg.(0);
825 I.j (cond cmp) lbl
826 | Iinttest_imm(cmp, n) ->
827 I.cmp (int n) (arg i 0);
828 I.j (cond cmp) lbl
829 | Ifloattest cmp ->
830 emit_float_test cmp i lbl
831 | Ioddtest ->
832 I.test (int 1) (arg8 i 0);
833 I.jne lbl
834 | Ieventest ->
835 I.test (int 1) (arg8 i 0);
836 I.je lbl
837 end
838 | Lcondbranch3(lbl0, lbl1, lbl2) ->
839 I.cmp (int 1) (arg i 0);
840 begin match lbl0 with
841 | None -> ()
842 | Some lbl -> I.jb (label lbl)
843 end;
844 begin match lbl1 with
845 | None -> ()
846 | Some lbl -> I.je (label lbl)
847 end;
848 begin match lbl2 with
849 | None -> ()
850 | Some lbl -> I.ja (label lbl)
851 end
852 | Lswitch jumptbl ->
853 let lbl = emit_label (new_label()) in
854 (* rax and rdx are clobbered by the Lswitch,
855 meaning that no variable that is live across the Lswitch
856 is assigned to rax or rdx. However, the argument to Lswitch
857 can still be assigned to one of these two registers, so
858 we must be careful not to clobber it before use. *)
859 let (tmp1, tmp2) =
860 if i.arg.(0).loc = Reg 0 (* rax *)
861 then (phys_reg 4 (*rdx*), phys_reg 0 (*rax*))
862 else (phys_reg 0 (*rax*), phys_reg 4 (*rdx*)) in
863
864 I.lea (mem64_rip NONE lbl) (reg tmp1);
865 I.movsxd (mem64 DWORD 0 (arg64 i 0) ~scale:4 ~base:(reg64 tmp1))
866 (reg tmp2);
867 I.add (reg tmp2) (reg tmp1);
868 I.jmp (reg tmp1);
869
870 begin match system with
871 | S_mingw64 | S_cygwin -> D.section [".rdata"] (Some "dr") []
872 | S_macosx | S_win64 -> () (* with LLVM/OS X and MASM, use the text segment *)
873 | _ -> D.section [".rodata"] None []
874 end;
875 D.align 4;
876 _label lbl;
877 for i = 0 to Array.length jumptbl - 1 do
878 D.long (ConstSub (ConstLabel(emit_label jumptbl.(i)),
879 ConstLabel lbl))
880 done;
881 emit_named_text_section !function_name
882 | Lentertrap ->
883 ()
884 | Ladjust_trap_depth { delta_traps; } ->
885 (* each trap occupies 16 bytes on the stack *)
886 let delta = 16 * delta_traps in
887 cfi_adjust_cfa_offset delta;
888 stack_offset := !stack_offset + delta
889 | Lpushtrap { lbl_handler; } ->
890 let load_label_addr s arg =
891 if !Clflags.pic_code then
892 I.lea (mem64_rip NONE (emit_label s)) arg
893 else
894 I.mov (sym (emit_label s)) arg
895 in
896 load_label_addr lbl_handler r11;
897 I.push r11;
898 cfi_adjust_cfa_offset 8;
899 I.push (domain_field Domainstate.Domain_exception_pointer);
900 cfi_adjust_cfa_offset 8;
901 I.mov rsp (domain_field Domainstate.Domain_exception_pointer);
902 stack_offset := !stack_offset + 16;
903 | Lpoptrap ->
904 I.pop (domain_field Domainstate.Domain_exception_pointer);
905 cfi_adjust_cfa_offset (-8);
906 I.add (int 8) rsp;
907 cfi_adjust_cfa_offset (-8);
908 stack_offset := !stack_offset - 16
909 | Lraise k ->
910 (* No Spacetime instrumentation is required for [caml_raise_exn] and
911 [caml_reraise_exn]. The only function called that might affect the
912 trie is [caml_stash_backtrace], and it does not. *)
913 begin match k with
914 | Lambda.Raise_regular ->
915 I.mov (int 0) (domain_field Domainstate.Domain_backtrace_pos);
916 emit_call "caml_raise_exn";
917 record_frame Reg.Set.empty true i.dbg
918 | Lambda.Raise_reraise ->
919 emit_call "caml_raise_exn";
920 record_frame Reg.Set.empty true i.dbg
921 | Lambda.Raise_notrace ->
922 I.mov (domain_field Domainstate.Domain_exception_pointer) rsp;
923 I.pop (domain_field Domainstate.Domain_exception_pointer);
924 I.pop r11;
925 I.jmp r11
926 end
927
928 let rec emit_all fallthrough i =
929 match i.desc with
930 | Lend -> ()
931 | _ ->
932 emit_instr fallthrough i;
933 emit_all (Linear.has_fallthrough i.desc) i.next
934
935 let all_functions = ref []
936
937 (* Emission of a function declaration *)
938
939 let fundecl fundecl =
940 function_name := fundecl.fun_name;
941 fastcode_flag := fundecl.fun_fast;
942 tailrec_entry_point := fundecl.fun_tailrec_entry_point_label;
943 stack_offset := 0;
944 call_gc_sites := [];
945 bound_error_sites := [];
946 bound_error_call := 0;
947 for i = 0 to Proc.num_register_classes - 1 do
948 num_stack_slots.(i) <- fundecl.fun_num_stack_slots.(i);
949 done;
950 prologue_required := fundecl.fun_prologue_required;
951 frame_required := fundecl.fun_frame_required;
952 all_functions := fundecl :: !all_functions;
953 emit_named_text_section !function_name;
954 D.align 16;
955 add_def_symbol fundecl.fun_name;
956 if system = S_macosx
957 && not !Clflags.output_c_object
958 && is_generic_function fundecl.fun_name
959 then (* PR#4690 *)
960 D.private_extern (emit_symbol fundecl.fun_name)
961 else
962 D.global (emit_symbol fundecl.fun_name);
963 D.label (emit_symbol fundecl.fun_name);
964 emit_debug_info fundecl.fun_dbg;
965 cfi_startproc ();
966 emit_all true fundecl.fun_body;
967 List.iter emit_call_gc !call_gc_sites;
968 emit_call_bound_errors ();
969 if !frame_required then begin
970 let n = frame_size() - 8 - (if fp then 8 else 0) in
971 if n <> 0
972 then begin
973 cfi_adjust_cfa_offset (-n);
974 end;
975 end;
976 cfi_endproc ();
977 begin match system with
978 | S_gnu | S_linux ->
979 D.type_ (emit_symbol fundecl.fun_name) "@function";
980 D.size (emit_symbol fundecl.fun_name)
981 (ConstSub (
982 ConstThis,
983 ConstLabel (emit_symbol fundecl.fun_name)))
984 | _ -> ()
985 end
986
987 (* Emission of data *)
988
989 let emit_item = function
990 | Cglobal_symbol s -> D.global (emit_symbol s)
991 | Cdefine_symbol s -> add_def_symbol s; _label (emit_symbol s)
992 | Cint8 n -> D.byte (const n)
993 | Cint16 n -> D.word (const n)
994 | Cint32 n -> D.long (const_nat n)
995 | Cint n -> D.qword (const_nat n)
996 | Csingle f -> D.long (Const (Int64.of_int32 (Int32.bits_of_float f)))
997 | Cdouble f -> D.qword (Const (Int64.bits_of_float f))
998 | Csymbol_address s -> add_used_symbol s; D.qword (ConstLabel (emit_symbol s))
999 | Cstring s -> D.bytes s
1000 | Cskip n -> if n > 0 then D.space n
1001 | Calign n -> D.align n
1002
1003 let data l =
1004 D.data ();
1005 D.align 8;
1006 List.iter emit_item l
1007
1008 (* Beginning / end of an assembly file *)
1009
1010 let begin_assembly() =
1011 X86_proc.reset_asm_code ();
1012 reset_debug_info(); (* PR#5603 *)
1013 reset_imp_table();
1014 float_constants := [];
1015 all_functions := [];
1016 if system = S_win64 then begin
1017 D.extrn "caml_call_gc" NEAR;
1018 D.extrn "caml_call_gc1" NEAR;
1019 D.extrn "caml_call_gc2" NEAR;
1020 D.extrn "caml_call_gc3" NEAR;
1021 D.extrn "caml_c_call" NEAR;
1022 D.extrn "caml_allocN" NEAR;
1023 D.extrn "caml_alloc1" NEAR;
1024 D.extrn "caml_alloc2" NEAR;
1025 D.extrn "caml_alloc3" NEAR;
1026 D.extrn "caml_ml_array_bound_error" NEAR;
1027 D.extrn "caml_raise_exn" NEAR;
1028 end;
1029
1030
1031 if !Clflags.dlcode || Arch.win64 then begin
1032 (* from amd64.S; could emit these constants on demand *)
1033 begin match system with
1034 | S_macosx -> D.section ["__TEXT";"__literal16"] None ["16byte_literals"]
1035 | S_mingw64 | S_cygwin -> D.section [".rdata"] (Some "dr") []
1036 | S_win64 -> D.data ()
1037 | _ -> D.section [".rodata.cst8"] (Some "a") ["@progbits"]
1038 end;
1039 D.align 16;
1040 _label (emit_symbol "caml_negf_mask");
1041 D.qword (Const 0x8000000000000000L);
1042 D.qword (Const 0L);
1043 D.align 16;
1044 _label (emit_symbol "caml_absf_mask");
1045 D.qword (Const 0x7FFFFFFFFFFFFFFFL);
1046 D.qword (Const 0xFFFFFFFFFFFFFFFFL);
1047 end;
1048
1049 D.data ();
1050 emit_global_label "data_begin";
1051
1052 emit_named_text_section (Compilenv.make_symbol (Some "code_begin"));
1053 emit_global_label "code_begin";
1054 if system = S_macosx then I.nop (); (* PR#4690 *)
1055 ()
1056
1057 let emit_spacetime_shapes () =
1058 D.data ();
1059 D.align 8;
1060 emit_global_label "spacetime_shapes";
1061 List.iter (fun fundecl ->
1062 (* CR-someday mshinwell: some of this should be platform independent *)
1063 begin match fundecl.fun_spacetime_shape with
1064 | None -> ()
1065 | Some shape ->
1066 let funsym = emit_symbol fundecl.fun_name in
1067 D.comment ("Shape for " ^ funsym ^ ":");
1068 D.qword (ConstLabel funsym);
1069 List.iter (fun (part_of_shape, label) ->
1070 let tag =
1071 match part_of_shape with
1072 | Direct_call_point _ -> 1
1073 | Indirect_call_point -> 2
1074 | Allocation_point -> 3
1075 in
1076 D.qword (Const (Int64.of_int tag));
1077 D.qword (ConstLabel (emit_label label));
1078 begin match part_of_shape with
1079 | Direct_call_point { callee; } ->
1080 D.qword (ConstLabel (emit_symbol callee))
1081 | Indirect_call_point -> ()
1082 | Allocation_point -> ()
1083 end)
1084 shape;
1085 D.qword (Const 0L)
1086 end)
1087 !all_functions;
1088 D.qword (Const 0L);
1089 D.comment "End of Spacetime shapes."
1090
1091 let end_assembly() =
1092 if !float_constants <> [] then begin
1093 begin match system with
1094 | S_macosx -> D.section ["__TEXT";"__literal8"] None ["8byte_literals"]
1095 | S_mingw64 | S_cygwin -> D.section [".rdata"] (Some "dr") []
1096 | S_win64 -> D.data ()
1097 | _ -> D.section [".rodata.cst8"] (Some "a") ["@progbits"]
1098 end;
1099 List.iter (fun (cst,lbl) -> emit_float_constant cst lbl) !float_constants
1100 end;
1101
1102 emit_named_text_section (Compilenv.make_symbol (Some "code_end"));
1103 if system = S_macosx then I.nop ();
1104 (* suppress "ld warning: atom sorting error" *)
1105
1106 emit_global_label "code_end";
1107
1108 emit_imp_table();
1109
1110 D.data ();
1111 D.qword (const 0); (* PR#6329 *)
1112 emit_global_label "data_end";
1113 D.qword (const 0);
1114
1115 D.align 8; (* PR#7591 *)
1116 emit_global_label "frametable";
1117
1118 let setcnt = ref 0 in
1119 emit_frames
1120 { efa_code_label = (fun l -> D.qword (ConstLabel (emit_label l)));
1121 efa_data_label = (fun l -> D.qword (ConstLabel (emit_label l)));
1122 efa_16 = (fun n -> D.word (const n));
1123 efa_32 = (fun n -> D.long (const_32 n));
1124 efa_word = (fun n -> D.qword (const n));
1125 efa_align = D.align;
1126 efa_label_rel =
1127 (fun lbl ofs ->
1128 let c =
1129 ConstAdd (
1130 ConstSub(ConstLabel(emit_label lbl), ConstThis),
1131 const_32 ofs
1132 ) in
1133 if system = S_macosx then begin
1134 incr setcnt;
1135 let s = Printf.sprintf "L$set$%d" !setcnt in
1136 D.setvar (s, c);
1137 D.long (ConstLabel s)
1138 end else
1139 D.long c
1140 );
1141 efa_def_label = (fun l -> _label (emit_label l));
1142 efa_string = (fun s -> D.bytes (s ^ "\000"))
1143 };
1144
1145 if Config.spacetime then begin
1146 emit_spacetime_shapes ()
1147 end;
1148
1149 if system = S_linux then
1150 (* Mark stack as non-executable, PR#4564 *)
1151 D.section [".note.GNU-stack"] (Some "") [ "%progbits" ];
1152
1153 if system = S_win64 then begin
1154 D.comment "External functions";
1155 String.Set.iter
1156 (fun s ->
1157 if not (String.Set.mem s !symbols_defined) then
1158 D.extrn (emit_symbol s) NEAR)
1159 !symbols_used;
1160 symbols_used := String.Set.empty;
1161 symbols_defined := String.Set.empty;
1162 end;
1163
1164 let asm =
1165 if !Emitaux.create_asm_file then
1166 Some
1167 (
1168 (if X86_proc.masm then X86_masm.generate_asm
1169 else X86_gas.generate_asm) !Emitaux.output_channel
1170 )
1171 else
1172 None
1173 in
1174 X86_proc.generate_code asm
1175