package / ocaml-base-compiler.4.10.0 / asmcomp / spacetime_profiling.ml
1 (**************************************************************************)
2 (* *)
3 (* OCaml *)
4 (* *)
5 (* Mark Shinwell and Leo White, Jane Street Europe *)
6 (* *)
7 (* Copyright 2015--2018 Jane Street Group LLC *)
8 (* *)
9 (* All rights reserved. This file is distributed under the terms of *)
10 (* the GNU Lesser General Public License version 2.1, with the *)
11 (* special exception on linking described in the file LICENSE. *)
12 (* *)
13 (**************************************************************************)
14
15 [@@@ocaml.warning "+a-4-30-40-41-42"]
16
17 module V = Backend_var
18 module VP = Backend_var.With_provenance
19
20 let node_num_header_words = 2 (* [Node_num_header_words] in the runtime. *)
21 let index_within_node = ref node_num_header_words
22 (* The [lazy]s are to ensure that we don't create [V.t]s at toplevel
23 when not using Spacetime profiling. (This could cause stamps to differ
24 between bytecode and native .cmis when no .mli is present, e.g.
25 arch.ml.) *)
26 let spacetime_node = ref (lazy (Cmm.Cvar (V.create_local "dummy")))
27 let spacetime_node_ident = ref (lazy (V.create_local "dummy"))
28 let current_function_label = ref None
29 let direct_tail_call_point_indexes = ref []
30
31 let reverse_shape = ref ([] : Mach.spacetime_shape)
32
33 (* CR-someday mshinwell: This code could be updated to use [placeholder_dbg] as
34 in [Cmmgen]. *)
35 let cconst_int i = Cmm.Cconst_int (i, Debuginfo.none)
36 let cconst_natint i = Cmm.Cconst_natint (i, Debuginfo.none)
37 let cconst_symbol s = Cmm.Cconst_symbol (s, Debuginfo.none)
38
39 let something_was_instrumented () =
40 !index_within_node > node_num_header_words
41
42 let next_index_within_node ~part_of_shape ~label =
43 let index = !index_within_node in
44 begin match part_of_shape with
45 | Mach.Direct_call_point _ ->
46 incr index_within_node;
47 if Config.enable_call_counts then begin
48 incr index_within_node
49 end
50 | Mach.Indirect_call_point ->
51 incr index_within_node
52 | Mach.Allocation_point ->
53 incr index_within_node;
54 incr index_within_node;
55 incr index_within_node
56 end;
57 reverse_shape := (part_of_shape, label) :: !reverse_shape;
58 index
59
60 let reset ~spacetime_node_ident:ident ~function_label =
61 index_within_node := node_num_header_words;
62 spacetime_node := lazy (Cmm.Cvar ident);
63 spacetime_node_ident := lazy ident;
64 direct_tail_call_point_indexes := [];
65 current_function_label := Some function_label;
66 reverse_shape := []
67
68 let code_for_function_prologue ~function_name ~fun_dbg:dbg ~node_hole =
69 let node = V.create_local "node" in
70 let new_node = V.create_local "new_node" in
71 let must_allocate_node = V.create_local "must_allocate_node" in
72 let is_new_node = V.create_local "is_new_node" in
73 let no_tail_calls = List.length !direct_tail_call_point_indexes < 1 in
74 let open Cmm in
75 let initialize_direct_tail_call_points_and_return_node =
76 let new_node_encoded = V.create_local "new_node_encoded" in
77 (* The callee node pointers within direct tail call points must initially
78 point back at the start of the current node and be marked as per
79 [Encode_tail_caller_node] in the runtime. *)
80 let indexes = !direct_tail_call_point_indexes in
81 let body =
82 List.fold_left (fun init_code index ->
83 (* Cf. [Direct_callee_node] in the runtime. *)
84 let offset_in_bytes = index * Arch.size_addr in
85 Csequence (
86 Cop (Cstore (Word_int, Lambda.Assignment),
87 [Cop (Caddi, [Cvar new_node; cconst_int offset_in_bytes], dbg);
88 Cvar new_node_encoded], dbg),
89 init_code))
90 (Cvar new_node)
91 indexes
92 in
93 match indexes with
94 | [] -> body
95 | _ ->
96 Clet (VP.create new_node_encoded,
97 (* Cf. [Encode_tail_caller_node] in the runtime. *)
98 Cop (Cor, [Cvar new_node; cconst_int 1], dbg),
99 body)
100 in
101 let pc = V.create_local "pc" in
102 Clet (VP.create node,
103 Cop (Cload (Word_int, Asttypes.Mutable), [Cvar node_hole], dbg),
104 Clet (VP.create must_allocate_node,
105 Cop (Cand, [Cvar node; cconst_int 1], dbg),
106 Cifthenelse (
107 Cop (Ccmpi Cne, [Cvar must_allocate_node; cconst_int 1], dbg),
108 dbg,
109 Cvar node,
110 dbg,
111 Clet (VP.create is_new_node,
112 Clet (VP.create pc, cconst_symbol function_name,
113 Cop (Cextcall ("caml_spacetime_allocate_node",
114 [| Int |], false, None),
115 [cconst_int (1 (* header *) + !index_within_node);
116 Cvar pc;
117 Cvar node_hole;
118 ],
119 dbg)),
120 Clet (VP.create new_node,
121 Cop (Cload (Word_int, Asttypes.Mutable), [Cvar node_hole], dbg),
122 if no_tail_calls then Cvar new_node
123 else
124 Cifthenelse (
125 Cop (Ccmpi Ceq, [Cvar is_new_node; cconst_int 0], dbg),
126 dbg,
127 Cvar new_node,
128 dbg,
129 initialize_direct_tail_call_points_and_return_node,
130 dbg))),
131 dbg)))
132
133 let code_for_blockheader ~value's_header ~node ~dbg =
134 let num_words = Nativeint.shift_right_logical value's_header 10 in
135 let existing_profinfo = V.create_local "existing_profinfo" in
136 let existing_count = V.create_local "existing_count" in
137 let profinfo = V.create_local "profinfo" in
138 let address_of_profinfo = V.create_local "address_of_profinfo" in
139 let label = Cmm.new_label () in
140 let index_within_node =
141 next_index_within_node ~part_of_shape:Mach.Allocation_point ~label
142 in
143 let offset_into_node = Arch.size_addr * index_within_node in
144 let open Cmm in
145 let generate_new_profinfo =
146 (* This will generate a static branch to a function that should usually
147 be in the cache, which hopefully gives a good code size/performance
148 balance.
149 The "Some label" is important: it provides the link between the shape
150 table, the allocation point, and the frame descriptor table---enabling
151 the latter table to be used for resolving a program counter at such
152 a point to a location.
153 *)
154 Cop (Cextcall ("caml_spacetime_generate_profinfo", [| Int |],
155 false, Some label),
156 [Cvar address_of_profinfo;
157 cconst_int (index_within_node + 1)],
158 dbg)
159 in
160 (* Check if we have already allocated a profinfo value for this allocation
161 point with the current backtrace. If so, use that value; if not,
162 allocate a new one. *)
163 Clet (VP.create address_of_profinfo,
164 Cop (Caddi, [
165 Cvar node;
166 cconst_int offset_into_node;
167 ], dbg),
168 Clet (VP.create existing_profinfo,
169 Cop (Cload (Word_int, Asttypes.Mutable), [Cvar address_of_profinfo],
170 dbg),
171 Clet (VP.create profinfo,
172 Cifthenelse (
173 Cop (Ccmpi Cne, [Cvar existing_profinfo; cconst_int 1 (* () *)], dbg),
174 dbg,
175 Cvar existing_profinfo,
176 dbg,
177 generate_new_profinfo,
178 dbg),
179 Clet (VP.create existing_count,
180 Cop (Cload (Word_int, Asttypes.Mutable), [
181 Cop (Caddi,
182 [Cvar address_of_profinfo; cconst_int Arch.size_addr], dbg)
183 ], dbg),
184 Csequence (
185 Cop (Cstore (Word_int, Lambda.Assignment),
186 [Cop (Caddi,
187 [Cvar address_of_profinfo; cconst_int Arch.size_addr], dbg);
188 Cop (Caddi, [
189 Cvar existing_count;
190 (* N.B. "*2" since the count is an OCaml integer.
191 The "1 +" is to count the value's header. *)
192 cconst_int (2 * (1 + Nativeint.to_int num_words));
193 ], dbg);
194 ], dbg),
195 (* [profinfo] looks like a black [Infix_tag] header. Instead of
196 having to mask [profinfo] before ORing it with the desired
197 header, we can use an XOR trick, to keep code size down. *)
198 let value's_header =
199 Nativeint.logxor value's_header
200 (Nativeint.logor
201 ((Nativeint.logor (Nativeint.of_int Obj.infix_tag)
202 (Nativeint.shift_left 3n (* <- Caml_black *) 8)))
203 (Nativeint.shift_left
204 (* The following is the [Infix_offset_val], in words. *)
205 (Nativeint.of_int (index_within_node + 1)) 10))
206 in
207 Cop (Cxor, [Cvar profinfo; cconst_natint value's_header], dbg))))))
208
209 type callee =
210 | Direct of string
211 | Indirect of Cmm.expression
212
213 let code_for_call ~node ~callee ~is_tail ~label dbg =
214 (* We treat self recursive calls as tail calls to avoid blow-ups in the
215 graph. *)
216 let is_self_recursive_call =
217 match callee with
218 | Direct callee ->
219 begin match !current_function_label with
220 | None -> Misc.fatal_error "[current_function_label] not set"
221 | Some label -> String.equal callee label
222 end
223 | Indirect _ -> false
224 in
225 let is_tail = is_tail || is_self_recursive_call in
226 let index_within_node =
227 match callee with
228 | Direct callee ->
229 next_index_within_node
230 ~part_of_shape:(Mach.Direct_call_point { callee; })
231 ~label
232 | Indirect _ ->
233 next_index_within_node ~part_of_shape:Mach.Indirect_call_point ~label
234 in
235 begin match callee with
236 (* If this is a direct tail call point, we need to note down its index,
237 so the correct initialization code can be emitted in the prologue. *)
238 | Direct _ when is_tail ->
239 direct_tail_call_point_indexes :=
240 index_within_node::!direct_tail_call_point_indexes
241 | Direct _ | Indirect _ -> ()
242 end;
243 let place_within_node = V.create_local "place_within_node" in
244 let open Cmm in
245 Clet (VP.create place_within_node,
246 Cop (Caddi, [node; cconst_int (index_within_node * Arch.size_addr)], dbg),
247 (* The following code returns the address that is to be moved into the
248 (hard) node hole pointer register immediately before the call.
249 (That move is inserted in [Selectgen].) *)
250 match callee with
251 | Direct _callee ->
252 if Config.enable_call_counts then begin
253 let count_addr = V.create_local "call_count_addr" in
254 let count = V.create_local "call_count" in
255 Clet (VP.create count_addr,
256 Cop (Caddi, [Cvar place_within_node; cconst_int Arch.size_addr], dbg),
257 Clet (VP.create count,
258 Cop (Cload (Word_int, Asttypes.Mutable), [Cvar count_addr], dbg),
259 Csequence (
260 Cop (Cstore (Word_int, Lambda.Assignment),
261 (* Adding 2 really means adding 1; the count is encoded
262 as an OCaml integer. *)
263 [Cvar count_addr; Cop (Caddi, [Cvar count; cconst_int 2], dbg)],
264 dbg),
265 Cvar place_within_node)))
266 end else begin
267 Cvar place_within_node
268 end
269 | Indirect callee ->
270 let caller_node =
271 if is_tail then node
272 else cconst_int 1 (* [Val_unit] *)
273 in
274 Cop (Cextcall ("caml_spacetime_indirect_node_hole_ptr",
275 [| Int |], false, None),
276 [callee; Cvar place_within_node; caller_node],
277 dbg))
278
279 class virtual instruction_selection = object (self)
280 inherit Selectgen.selector_generic as super
281
282 (* [disable_instrumentation] ensures that we don't try to instrument the
283 instrumentation... *)
284 val mutable disable_instrumentation = false
285
286 method private instrument_direct_call ~env ~func ~is_tail ~label_after dbg =
287 let instrumentation =
288 code_for_call
289 ~node:(Lazy.force !spacetime_node)
290 ~callee:(Direct func)
291 ~is_tail
292 ~label:label_after
293 dbg
294 in
295 match self#emit_expr env instrumentation with
296 | None -> assert false
297 | Some reg -> Some reg
298
299 method private instrument_indirect_call ~env ~callee ~is_tail
300 ~label_after dbg =
301 (* [callee] is a pseudoregister, so we have to bind it in the environment
302 and reference the variable to which it is bound. *)
303 let callee_ident = V.create_local "callee" in
304 let env = Selectgen.env_add (VP.create callee_ident) [| callee |] env in
305 let instrumentation =
306 code_for_call
307 ~node:(Lazy.force !spacetime_node)
308 ~callee:(Indirect (Cmm.Cvar callee_ident))
309 ~is_tail
310 ~label:label_after
311 dbg
312 in
313 match self#emit_expr env instrumentation with
314 | None -> assert false
315 | Some reg -> Some reg
316
317 method private can_instrument () =
318 Config.spacetime && not disable_instrumentation
319
320 method! about_to_emit_call env desc arg dbg =
321 if not (self#can_instrument ()) then None
322 else
323 let module M = Mach in
324 match desc with
325 | M.Iop (M.Icall_imm { func; label_after; }) ->
326 assert (Array.length arg = 0);
327 self#instrument_direct_call ~env ~func ~is_tail:false ~label_after dbg
328 | M.Iop (M.Icall_ind { label_after; }) ->
329 assert (Array.length arg = 1);
330 self#instrument_indirect_call ~env ~callee:arg.(0)
331 ~is_tail:false ~label_after dbg
332 | M.Iop (M.Itailcall_imm { func; label_after; }) ->
333 assert (Array.length arg = 0);
334 self#instrument_direct_call ~env ~func ~is_tail:true ~label_after dbg
335 | M.Iop (M.Itailcall_ind { label_after; }) ->
336 assert (Array.length arg = 1);
337 self#instrument_indirect_call ~env ~callee:arg.(0)
338 ~is_tail:true ~label_after dbg
339 | M.Iop (M.Iextcall { func; alloc = true; label_after; }) ->
340 (* N.B. No need to instrument "noalloc" external calls. *)
341 assert (Array.length arg = 0);
342 self#instrument_direct_call ~env ~func ~is_tail:false ~label_after dbg
343 | _ -> None
344
345 method private instrument_blockheader ~env ~value's_header ~dbg =
346 let instrumentation =
347 code_for_blockheader
348 ~node:(Lazy.force !spacetime_node_ident)
349 ~value's_header ~dbg
350 in
351 self#emit_expr env instrumentation
352
353 method private emit_prologue f ~node_hole ~env =
354 (* We don't need the prologue unless we inserted some instrumentation.
355 This corresponds to adding the prologue if the function contains one
356 or more call or allocation points. *)
357 if something_was_instrumented () then begin
358 let prologue_cmm =
359 code_for_function_prologue ~function_name:f.Cmm.fun_name ~node_hole
360 ~fun_dbg:f.Cmm.fun_dbg
361 in
362 disable_instrumentation <- true;
363 let node_temp_reg =
364 match self#emit_expr env prologue_cmm with
365 | None ->
366 Misc.fatal_error "Spacetime prologue instruction \
367 selection did not yield a destination register"
368 | Some node_temp_reg -> node_temp_reg
369 in
370 disable_instrumentation <- false;
371 let node = Lazy.force !spacetime_node_ident in
372 let node_reg = Selectgen.env_find node env in
373 self#insert_moves env node_temp_reg node_reg
374 end
375
376 method! emit_blockheader env n dbg =
377 if self#can_instrument () then begin
378 disable_instrumentation <- true;
379 let result = self#instrument_blockheader ~env ~value's_header:n ~dbg in
380 disable_instrumentation <- false;
381 result
382 end else begin
383 super#emit_blockheader env n dbg
384 end
385
386 method! select_allocation bytes =
387 if self#can_instrument () then begin
388 (* Leave space for a direct call point. We cannot easily insert any
389 instrumentation code, so the fields are filled in instead by
390 [caml_spacetime_caml_garbage_collection]. *)
391 let label = Cmm.new_label () in
392 let index =
393 next_index_within_node
394 ~part_of_shape:(Mach.Direct_call_point { callee = "caml_call_gc"; })
395 ~label
396 in
397 Mach.Ialloc {
398 bytes;
399 label_after_call_gc = Some label;
400 spacetime_index = index;
401 }
402 end else begin
403 super#select_allocation bytes
404 end
405
406 method! select_allocation_args env =
407 if self#can_instrument () then begin
408 let regs = Selectgen.env_find (Lazy.force !spacetime_node_ident) env in
409 match regs with
410 | [| reg |] -> [| reg |]
411 | _ -> failwith "Expected one register only for spacetime_node_ident"
412 end else begin
413 super#select_allocation_args env
414 end
415
416 method! select_checkbound () =
417 (* This follows [select_allocation], above. *)
418 if self#can_instrument () then begin
419 let label = Cmm.new_label () in
420 let index =
421 next_index_within_node
422 ~part_of_shape:(
423 Mach.Direct_call_point { callee = "caml_ml_array_bound_error"; })
424 ~label
425 in
426 Mach.Icheckbound {
427 label_after_error = Some label;
428 spacetime_index = index;
429 }
430 end else begin
431 super#select_checkbound ()
432 end
433
434 method! select_checkbound_extra_args () =
435 if self#can_instrument () then begin
436 (* This follows [select_allocation_args], above. *)
437 [Cmm.Cvar (Lazy.force !spacetime_node_ident)]
438 end else begin
439 super#select_checkbound_extra_args ()
440 end
441
442 method! initial_env () =
443 let env = super#initial_env () in
444 if Config.spacetime then
445 Selectgen.env_add (VP.create (Lazy.force !spacetime_node_ident))
446 (self#regs_for Cmm.typ_int) env
447 else
448 env
449
450 method! emit_fundecl f =
451 if Config.spacetime then begin
452 disable_instrumentation <- false;
453 let node = V.create_local "spacetime_node" in
454 reset ~spacetime_node_ident:node ~function_label:f.Cmm.fun_name
455 end;
456 super#emit_fundecl f
457
458 method! insert_prologue f ~loc_arg ~rarg ~spacetime_node_hole ~env =
459 let fun_spacetime_shape =
460 super#insert_prologue f ~loc_arg ~rarg ~spacetime_node_hole ~env
461 in
462 (* CR-soon mshinwell: add check to make sure the node size doesn't exceed
463 the chunk size of the allocator *)
464 if not Config.spacetime then fun_spacetime_shape
465 else begin
466 let node_hole, node_hole_reg =
467 match spacetime_node_hole with
468 | None -> assert false
469 | Some (node_hole, reg) -> node_hole, reg
470 in
471 self#insert_moves env [| Proc.loc_spacetime_node_hole |] node_hole_reg;
472 self#emit_prologue f ~node_hole ~env;
473 match !reverse_shape with
474 | [] -> None
475 (* N.B. We do not reverse the shape list, since the function that
476 reconstructs it (caml_spacetime_shape_table) reverses it again. *)
477 | reverse_shape -> Some reverse_shape
478 end
479 end
480