1 (**************************************************************************)
2 (* *)
3 (* OCaml *)
4 (* *)
5 (* Xavier Leroy, projet Cristal, INRIA Rocquencourt *)
6 (* *)
7 (* Copyright 2000 Institut National de Recherche en Informatique et *)
8 (* en Automatique. *)
9 (* *)
10 (* All rights reserved. This file is distributed under the terms of *)
11 (* the GNU Lesser General Public License version 2.1, with the *)
12 (* special exception on linking described in the file LICENSE. *)
13 (* *)
14 (**************************************************************************)
15
16 (* Instruction selection for the AMD64 *)
17
18 open Arch
19 open Proc
20 open Cmm
21 open Mach
22
23 (* Auxiliary for recognizing addressing modes *)
24
25 type addressing_expr =
26 Asymbol of string
27 | Alinear of expression
28 | Aadd of expression * expression
29 | Ascale of expression * int
30 | Ascaledadd of expression * expression * int
31
32 let rec select_addr exp =
33 match exp with
34 Cconst_symbol (s, _) when not !Clflags.dlcode ->
35 (Asymbol s, 0)
36 | Cop((Caddi | Caddv | Cadda), [arg; Cconst_int (m, _)], _) ->
37 let (a, n) = select_addr arg in (a, n + m)
38 | Cop(Csubi, [arg; Cconst_int (m, _)], _) ->
39 let (a, n) = select_addr arg in (a, n - m)
40 | Cop((Caddi | Caddv | Cadda), [Cconst_int (m, _); arg], _) ->
41 let (a, n) = select_addr arg in (a, n + m)
42 | Cop(Clsl, [arg; Cconst_int((1|2|3 as shift), _)], _) ->
43 begin match select_addr arg with
44 (Alinear e, n) -> (Ascale(e, 1 lsl shift), n lsl shift)
45 | _ -> (Alinear exp, 0)
46 end
47 | Cop(Cmuli, [arg; Cconst_int((2|4|8 as mult), _)], _) ->
48 begin match select_addr arg with
49 (Alinear e, n) -> (Ascale(e, mult), n * mult)
50 | _ -> (Alinear exp, 0)
51 end
52 | Cop(Cmuli, [Cconst_int((2|4|8 as mult), _); arg], _) ->
53 begin match select_addr arg with
54 (Alinear e, n) -> (Ascale(e, mult), n * mult)
55 | _ -> (Alinear exp, 0)
56 end
57 | Cop((Caddi | Caddv | Cadda), [arg1; arg2], _) ->
58 begin match (select_addr arg1, select_addr arg2) with
59 ((Alinear e1, n1), (Alinear e2, n2)) ->
60 (Aadd(e1, e2), n1 + n2)
61 | ((Alinear e1, n1), (Ascale(e2, scale), n2)) ->
62 (Ascaledadd(e1, e2, scale), n1 + n2)
63 | ((Ascale(e1, scale), n1), (Alinear e2, n2)) ->
64 (Ascaledadd(e2, e1, scale), n1 + n2)
65 | (_, (Ascale(e2, scale), n2)) ->
66 (Ascaledadd(arg1, e2, scale), n2)
67 | ((Ascale(e1, scale), n1), _) ->
68 (Ascaledadd(arg2, e1, scale), n1)
69 | _ ->
70 (Aadd(arg1, arg2), 0)
71 end
72 | arg ->
73 (Alinear arg, 0)
74
75 (* Special constraints on operand and result registers *)
76
77 exception Use_default
78
79 let rax = phys_reg 0
80 let rcx = phys_reg 5
81 let rdx = phys_reg 4
82
83 let pseudoregs_for_operation op arg res =
84 match op with
85 (* Two-address binary operations: arg.(0) and res.(0) must be the same *)
86 Iintop(Iadd|Isub|Imul|Iand|Ior|Ixor) | Iaddf|Isubf|Imulf|Idivf ->
87 ([|res.(0); arg.(1)|], res)
88 (* One-address unary operations: arg.(0) and res.(0) must be the same *)
89 | Iintop_imm((Iadd|Isub|Imul|Iand|Ior|Ixor|Ilsl|Ilsr|Iasr), _)
90 | Iabsf | Inegf
91 | Ispecific(Ibswap (32|64)) ->
92 (res, res)
93 (* For xchg, args must be a register allowing access to high 8 bit register
94 (rax, rbx, rcx or rdx). Keep it simple, just force the argument in rax. *)
95 | Ispecific(Ibswap 16) ->
96 ([| rax |], [| rax |])
97 (* For imulq, first arg must be in rax, rax is clobbered, and result is in
98 rdx. *)
99 | Iintop(Imulh) ->
100 ([| rax; arg.(1) |], [| rdx |])
101 | Ispecific(Ifloatarithmem(_,_)) ->
102 let arg' = Array.copy arg in
103 arg'.(0) <- res.(0);
104 (arg', res)
105 (* For shifts with variable shift count, second arg must be in rcx *)
106 | Iintop(Ilsl|Ilsr|Iasr) ->
107 ([|res.(0); rcx|], res)
108 (* For div and mod, first arg must be in rax, rdx is clobbered,
109 and result is in rax or rdx respectively.
110 Keep it simple, just force second argument in rcx. *)
111 | Iintop(Idiv) ->
112 ([| rax; rcx |], [| rax |])
113 | Iintop(Imod) ->
114 ([| rax; rcx |], [| rdx |])
115 (* Other instructions are regular *)
116 | _ -> raise Use_default
117
118 (* If you update [inline_ops], you may need to update [is_simple_expr] and/or
119 [effects_of], below. *)
120 let inline_ops =
121 [ "sqrt"; "caml_bswap16_direct"; "caml_int32_direct_bswap";
122 "caml_int64_direct_bswap"; "caml_nativeint_direct_bswap" ]
123
124 (* The selector class *)
125
126 class selector = object (self)
127
128 inherit Spacetime_profiling.instruction_selection as super
129
130 method is_immediate n = n <= 0x7FFF_FFFF && n >= (-1-0x7FFF_FFFF)
131 (* -1-.... : hack so that this can be compiled on 32-bit
132 (cf 'make check_all_arches') *)
133
134 method is_immediate_natint n = n <= 0x7FFFFFFFn && n >= -0x80000000n
135
136 method! is_simple_expr e =
137 match e with
138 | Cop(Cextcall (fn, _, _, _), args, _)
139 when List.mem fn inline_ops ->
140 (* inlined ops are simple if their arguments are *)
141 List.for_all self#is_simple_expr args
142 | _ ->
143 super#is_simple_expr e
144
145 method! effects_of e =
146 match e with
147 | Cop(Cextcall(fn, _, _, _), args, _)
148 when List.mem fn inline_ops ->
149 Selectgen.Effect_and_coeffect.join_list_map args self#effects_of
150 | _ ->
151 super#effects_of e
152
153 method select_addressing _chunk exp =
154 let (a, d) = select_addr exp in
155 (* PR#4625: displacement must be a signed 32-bit immediate *)
156 if not (self # is_immediate d)
157 then (Iindexed 0, exp)
158 else match a with
159 | Asymbol s ->
160 (Ibased(s, d), Ctuple [])
161 | Alinear e ->
162 (Iindexed d, e)
163 | Aadd(e1, e2) ->
164 (Iindexed2 d, Ctuple[e1; e2])
165 | Ascale(e, scale) ->
166 (Iscaled(scale, d), e)
167 | Ascaledadd(e1, e2, scale) ->
168 (Iindexed2scaled(scale, d), Ctuple[e1; e2])
169
170 method! select_store is_assign addr exp =
171 match exp with
172 Cconst_int (n, _dbg) when self#is_immediate n ->
173 (Ispecific(Istore_int(Nativeint.of_int n, addr, is_assign)), Ctuple [])
174 | (Cconst_natint (n, _dbg)) when self#is_immediate_natint n ->
175 (Ispecific(Istore_int(n, addr, is_assign)), Ctuple [])
176 | (Cblockheader(n, _dbg))
177 when self#is_immediate_natint n && not Config.spacetime ->
178 (Ispecific(Istore_int(n, addr, is_assign)), Ctuple [])
179 | Cconst_pointer (n, _dbg) when self#is_immediate n ->
180 (Ispecific(Istore_int(Nativeint.of_int n, addr, is_assign)), Ctuple [])
181 | Cconst_natpointer (n, _dbg) when self#is_immediate_natint n ->
182 (Ispecific(Istore_int(n, addr, is_assign)), Ctuple [])
183 | _ ->
184 super#select_store is_assign addr exp
185
186 method! select_operation op args dbg =
187 match op with
188 (* Recognize the LEA instruction *)
189 Caddi | Caddv | Cadda | Csubi ->
190 begin match self#select_addressing Word_int (Cop(op, args, dbg)) with
191 (Iindexed _, _)
192 | (Iindexed2 0, _) -> super#select_operation op args dbg
193 | (addr, arg) -> (Ispecific(Ilea addr), [arg])
194 end
195 (* Recognize float arithmetic with memory. *)
196 | Caddf ->
197 self#select_floatarith true Iaddf Ifloatadd args
198 | Csubf ->
199 self#select_floatarith false Isubf Ifloatsub args
200 | Cmulf ->
201 self#select_floatarith true Imulf Ifloatmul args
202 | Cdivf ->
203 self#select_floatarith false Idivf Ifloatdiv args
204 | Cextcall("sqrt", _, false, _) ->
205 begin match args with
206 [Cop(Cload ((Double|Double_u as chunk), _), [loc], _dbg)] ->
207 let (addr, arg) = self#select_addressing chunk loc in
208 (Ispecific(Ifloatsqrtf addr), [arg])
209 | [arg] ->
210 (Ispecific Isqrtf, [arg])
211 | _ ->
212 assert false
213 end
214 (* Recognize store instructions *)
215 | Cstore ((Word_int|Word_val as chunk), _init) ->
216 begin match args with
217 [loc; Cop(Caddi, [Cop(Cload _, [loc'], _); Cconst_int (n, _dbg)], _)]
218 when loc = loc' && self#is_immediate n ->
219 let (addr, arg) = self#select_addressing chunk loc in
220 (Ispecific(Ioffset_loc(n, addr)), [arg])
221 | _ ->
222 super#select_operation op args dbg
223 end
224 | Cextcall("caml_bswap16_direct", _, _, _) ->
225 (Ispecific (Ibswap 16), args)
226 | Cextcall("caml_int32_direct_bswap", _, _, _) ->
227 (Ispecific (Ibswap 32), args)
228 | Cextcall("caml_int64_direct_bswap", _, _, _)
229 | Cextcall("caml_nativeint_direct_bswap", _, _, _) ->
230 (Ispecific (Ibswap 64), args)
231 (* AMD64 does not support immediate operands for multiply high signed *)
232 | Cmulhi ->
233 (Iintop Imulh, args)
234 | Casr ->
235 begin match args with
236 (* Recognize sign extension *)
237 [Cop(Clsl, [k; Cconst_int (32, _)], _); Cconst_int (32, _)] ->
238 (Ispecific Isextend32, [k])
239 | _ -> super#select_operation op args dbg
240 end
241 (* Recognize zero extension *)
242 | Cand ->
243 begin match args with
244 | [arg; Cconst_int (0xffff_ffff, _)]
245 | [arg; Cconst_natint (0xffff_ffffn, _)]
246 | [Cconst_int (0xffff_ffff, _); arg]
247 | [Cconst_natint (0xffff_ffffn, _); arg] ->
248 Ispecific Izextend32, [arg]
249 | _ -> super#select_operation op args dbg
250 end
251 | _ -> super#select_operation op args dbg
252
253 (* Recognize float arithmetic with mem *)
254
255 method select_floatarith commutative regular_op mem_op args =
256 match args with
257 [arg1; Cop(Cload ((Double|Double_u as chunk), _), [loc2], _)] ->
258 let (addr, arg2) = self#select_addressing chunk loc2 in
259 (Ispecific(Ifloatarithmem(mem_op, addr)),
260 [arg1; arg2])
261 | [Cop(Cload ((Double|Double_u as chunk), _), [loc1], _); arg2]
262 when commutative ->
263 let (addr, arg1) = self#select_addressing chunk loc1 in
264 (Ispecific(Ifloatarithmem(mem_op, addr)),
265 [arg2; arg1])
266 | [arg1; arg2] ->
267 (regular_op, [arg1; arg2])
268 | _ ->
269 assert false
270
271 method! mark_c_tailcall =
272 contains_calls := true
273
274 (* Deal with register constraints *)
275
276 method! insert_op_debug env op dbg rs rd =
277 try
278 let (rsrc, rdst) = pseudoregs_for_operation op rs rd in
279 self#insert_moves env rs rsrc;
280 self#insert_debug env (Iop op) dbg rsrc rdst;
281 self#insert_moves env rdst rd;
282 rd
283 with Use_default ->
284 super#insert_op_debug env op dbg rs rd
285
286 end
287
288 let fundecl f = (new selector)#emit_fundecl f
289