1 /**************************************************************************/
2 /* */
3 /* OCaml */
4 /* */
5 /* Mark Shinwell and Leo White, Jane Street Europe */
6 /* */
7 /* Copyright 2013--2016, Jane Street Group, LLC */
8 /* */
9 /* All rights reserved. This file is distributed under the terms of */
10 /* the GNU Lesser General Public License version 2.1, with the */
11 /* special exception on linking described in the file LICENSE. */
12 /* */
13 /**************************************************************************/
14
15 #define CAML_INTERNALS
16
17 #include <stdio.h>
18 #include <stdlib.h>
19 #include <string.h>
20 #include <limits.h>
21 #include <math.h>
22 #include <sys/types.h>
23 #include <sys/stat.h>
24 #include <fcntl.h>
25 #include <signal.h>
26 #include "caml/config.h"
27 #ifdef HAS_UNISTD
28 #include <unistd.h>
29 #endif
30 #ifdef _WIN32
31 #include <process.h> /* for _getpid */
32 #include <direct.h> /* for _wgetcwd */
33 #endif
34
35 #include "caml/alloc.h"
36 #include "caml/backtrace_prim.h"
37 #include "caml/fail.h"
38 #include "caml/gc.h"
39 #include "caml/intext.h"
40 #include "caml/major_gc.h"
41 #include "caml/memory.h"
42 #include "caml/minor_gc.h"
43 #include "caml/misc.h"
44 #include "caml/mlvalues.h"
45 #include "caml/osdeps.h"
46 #include "caml/roots.h"
47 #include "caml/signals.h"
48 #include "caml/stack.h"
49 #include "caml/sys.h"
50 #include "caml/spacetime.h"
51
52 #ifdef WITH_SPACETIME
53
54 /* We force "noinline" in certain places to be sure we know how many
55 frames there will be on the stack. */
56 #ifdef _MSC_VER
57 #define NOINLINE __declspec(noinline)
58 #else
59 #define NOINLINE __attribute__((noinline))
60 #endif
61
62 #ifdef HAS_LIBUNWIND
63 #define UNW_LOCAL_ONLY
64 #include "libunwind.h"
65 #endif
66
67 static int automatic_snapshots = 0;
68 static double snapshot_interval = 0.0;
69 static double next_snapshot_time = 0.0;
70 static struct channel *snapshot_channel;
71 static int pid_when_snapshot_channel_opened;
72
73 extern value caml_spacetime_debug(value);
74
75 static char* start_of_free_node_block;
76 static char* end_of_free_node_block;
77
78 typedef struct per_thread {
79 value* trie_node_root;
80 value* finaliser_trie_node_root;
81 struct per_thread* next;
82 } per_thread;
83
84 /* List of tries corresponding to threads that have been created. */
85 /* CR-soon mshinwell: just include the main trie in this list. */
86 static per_thread* per_threads = NULL;
87 static int num_per_threads = 0;
88
89 /* [caml_spacetime_shapes] is defined in the startup file. */
90 extern uint64_t* caml_spacetime_shapes;
91
92 uint64_t** caml_spacetime_static_shape_tables = NULL;
93 shape_table* caml_spacetime_dynamic_shape_tables = NULL;
94
95 static uintnat caml_spacetime_profinfo = (uintnat) 0;
96
97 value caml_spacetime_trie_root = Val_unit;
98 value* caml_spacetime_trie_node_ptr = &caml_spacetime_trie_root;
99
100 static value caml_spacetime_finaliser_trie_root_main_thread = Val_unit;
101 value* caml_spacetime_finaliser_trie_root
102 = &caml_spacetime_finaliser_trie_root_main_thread;
103
104 /* CR-someday mshinwell: think about thread safety of the manipulation of
105 this list for multicore */
106 allocation_point* caml_all_allocation_points = NULL;
107
108 static const uintnat chunk_size = 1024 * 1024;
109
110 #ifdef _WIN32
111 #define strdup_os wcsdup
112 #define snprintf_os _snwprintf
113 #else
114 #define strdup_os strdup
115 #define snprintf_os snprintf
116 #endif
117
118 static void reinitialise_free_node_block(void)
119 {
120 size_t index;
121
122 start_of_free_node_block = (char*) caml_stat_alloc_noexc(chunk_size);
123 end_of_free_node_block = start_of_free_node_block + chunk_size;
124
125 for (index = 0; index < chunk_size / sizeof(value); index++) {
126 ((value*) start_of_free_node_block)[index] = Val_unit;
127 }
128 }
129
130 #ifndef O_BINARY
131 #define O_BINARY 0
132 #endif
133
134 enum {
135 FEATURE_CALL_COUNTS = 1,
136 } features;
137
138 static uint16_t version_number = 0;
139 static uint32_t magic_number_base = 0xace00ace;
140
141 static void caml_spacetime_write_magic_number_internal(struct channel* chan)
142 {
143 value magic_number;
144 uint16_t features = 0;
145
146 #ifdef ENABLE_CALL_COUNTS
147 features |= FEATURE_CALL_COUNTS;
148 #endif
149
150 magic_number =
151 Val_long(((uint64_t) magic_number_base)
152 | (((uint64_t) version_number) << 32)
153 | (((uint64_t) features) << 48));
154
155 Lock(chan);
156 caml_output_val(chan, magic_number, Val_long(0));
157 Unlock(chan);
158 }
159
160 CAMLprim value caml_spacetime_write_magic_number(value v_channel)
161 {
162 caml_spacetime_write_magic_number_internal(Channel(v_channel));
163 return Val_unit;
164 }
165
166 static char_os* automatic_snapshot_dir;
167
168 static void open_snapshot_channel(void)
169 {
170 int fd;
171 char_os filename[8192];
172 int pid;
173 int filename_len = sizeof(filename)/sizeof(char_os);
174 #ifdef _WIN32
175 pid = _getpid();
176 #else
177 pid = getpid();
178 #endif
179 snprintf_os(filename, filename_len, T("%s/spacetime-%d"),
180 automatic_snapshot_dir, pid);
181 filename[filename_len-1] = '\0';
182 fd = open_os(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
183 if (fd == -1) {
184 automatic_snapshots = 0;
185 }
186 else {
187 snapshot_channel = caml_open_descriptor_out(fd);
188 snapshot_channel->flags |= CHANNEL_FLAG_BLOCKING_WRITE;
189 pid_when_snapshot_channel_opened = pid;
190 caml_spacetime_write_magic_number_internal(snapshot_channel);
191 }
192 }
193
194 static void maybe_reopen_snapshot_channel(void)
195 {
196 /* This function should be used before writing to the automatic snapshot
197 channel. It detects whether we have forked since the channel was opened.
198 If so, we close the old channel (ignoring any errors just in case the
199 old fd has been closed, e.g. in a double-fork situation where the middle
200 process has a loop to manually close all fds and no Spacetime snapshot
201 was written during that time) and then open a new one. */
202
203 int pid;
204 #ifdef _WIN32
205 pid = _getpid();
206 #else
207 pid = getpid();
208 #endif
209
210 if (pid != pid_when_snapshot_channel_opened) {
211 caml_close_channel(snapshot_channel);
212 open_snapshot_channel();
213 }
214 }
215
216 extern void caml_spacetime_automatic_save(void);
217
218 void caml_spacetime_initialize(void)
219 {
220 /* Note that this is called very early (even prior to GC initialisation). */
221
222 char_os *ap_interval;
223
224 reinitialise_free_node_block();
225
226 caml_spacetime_static_shape_tables = &caml_spacetime_shapes;
227
228 ap_interval = caml_secure_getenv (T("OCAML_SPACETIME_INTERVAL"));
229 if (ap_interval != NULL) {
230 unsigned int interval = 0;
231 sscanf_os(ap_interval, T("%u"), &interval);
232 if (interval != 0) {
233 double time;
234 char_os cwd[4096];
235 char_os* user_specified_automatic_snapshot_dir;
236 int dir_ok = 1;
237
238 user_specified_automatic_snapshot_dir =
239 caml_secure_getenv(T("OCAML_SPACETIME_SNAPSHOT_DIR"));
240
241 if (user_specified_automatic_snapshot_dir == NULL) {
242 #if defined(HAS_GETCWD)
243 if (getcwd_os(cwd, sizeof(cwd)/sizeof(char_os)) == NULL) {
244 dir_ok = 0;
245 }
246 #else
247 dir_ok = 0;
248 #endif
249 if (dir_ok) {
250 automatic_snapshot_dir = strdup_os(cwd);
251 }
252 }
253 else {
254 automatic_snapshot_dir =
255 strdup_os(user_specified_automatic_snapshot_dir);
256 }
257
258 if (dir_ok) {
259 automatic_snapshots = 1;
260 open_snapshot_channel();
261 if (automatic_snapshots) {
262 #ifdef SIGINT
263 /* Catch interrupt so that the profile can be completed.
264 We do this by marking the signal as handled without
265 specifying an actual handler. This causes the signal
266 to be handled by a call to exit. */
267 caml_set_signal_action(SIGINT, 2);
268 #endif
269 snapshot_interval = interval / 1e3;
270 time = caml_sys_time_unboxed(Val_unit);
271 next_snapshot_time = time + snapshot_interval;
272 atexit(&caml_spacetime_automatic_save);
273 }
274 }
275 }
276 }
277 }
278
279 void caml_spacetime_register_shapes(void* dynlinked_table)
280 {
281 shape_table* table;
282 table = (shape_table*) caml_stat_alloc_noexc(sizeof(shape_table));
283 if (table == NULL) {
284 fprintf(stderr, "Out of memory whilst registering shape table");
285 abort();
286 }
287 table->table = (uint64_t*) dynlinked_table;
288 table->next = caml_spacetime_dynamic_shape_tables;
289 caml_spacetime_dynamic_shape_tables = table;
290 }
291
292 CAMLprim value caml_spacetime_trie_is_initialized (value v_unit)
293 {
294 return (caml_spacetime_trie_root == Val_unit) ? Val_false : Val_true;
295 }
296
297 CAMLprim value caml_spacetime_get_trie_root (value v_unit)
298 {
299 return caml_spacetime_trie_root;
300 }
301
302 void caml_spacetime_register_thread(
303 value* trie_node_root, value* finaliser_trie_node_root)
304 {
305 per_thread* thr;
306
307 thr = (per_thread*) caml_stat_alloc_noexc(sizeof(per_thread));
308 if (thr == NULL) {
309 fprintf(stderr, "Out of memory while registering thread for profiling\n");
310 abort();
311 }
312 thr->next = per_threads;
313 per_threads = thr;
314
315 thr->trie_node_root = trie_node_root;
316 thr->finaliser_trie_node_root = finaliser_trie_node_root;
317
318 /* CR-soon mshinwell: record thread ID (and for the main thread too) */
319
320 num_per_threads++;
321 }
322
323 static void caml_spacetime_save_event_internal (value v_time_opt,
324 struct channel* chan,
325 value v_event_name)
326 {
327 value v_time;
328 double time_override = 0.0;
329 int use_time_override = 0;
330
331 if (Is_block(v_time_opt)) {
332 time_override = Double_field(Field(v_time_opt, 0), 0);
333 use_time_override = 1;
334 }
335 v_time = caml_spacetime_timestamp(time_override, use_time_override);
336
337 Lock(chan);
338 caml_output_val(chan, Val_long(2), Val_long(0));
339 caml_output_val(chan, v_event_name, Val_long(0));
340 caml_extern_allow_out_of_heap = 1;
341 caml_output_val(chan, v_time, Val_long(0));
342 caml_extern_allow_out_of_heap = 0;
343 Unlock(chan);
344
345 caml_stat_free(Hp_val(v_time));
346 }
347
348 CAMLprim value caml_spacetime_save_event (value v_time_opt,
349 value v_channel,
350 value v_event_name)
351 {
352 struct channel* chan = Channel(v_channel);
353
354 caml_spacetime_save_event_internal(v_time_opt, chan, v_event_name);
355
356 return Val_unit;
357 }
358
359
360 void save_trie (struct channel *chan, double time_override,
361 int use_time_override)
362 {
363 value v_time, v_frames, v_shapes;
364 /* CR-someday mshinwell: The commented-out changes here are for multicore,
365 where we think we should have one trie per domain. */
366 /* int num_marshalled = 0;
367 per_thread* thr = per_threads; */
368
369 Lock(chan);
370
371 caml_output_val(chan, Val_long(1), Val_long(0));
372
373 v_time = caml_spacetime_timestamp(time_override, use_time_override);
374 v_frames = caml_spacetime_frame_table();
375 v_shapes = caml_spacetime_shape_table();
376
377 caml_extern_allow_out_of_heap = 1;
378 caml_output_val(chan, v_time, Val_long(0));
379 caml_output_val(chan, v_frames, Val_long(0));
380 caml_output_val(chan, v_shapes, Val_long(0));
381 caml_extern_allow_out_of_heap = 0;
382
383 caml_output_val(chan, Val_long(1) /* Val_long(num_per_threads + 1) */,
384 Val_long(0));
385
386 /* Marshal both the main and finaliser tries, for all threads that have
387 been created, to an [out_channel]. This can be done by using the
388 extern.c code as usual, since the trie looks like standard OCaml values;
389 but we must allow it to traverse outside the heap. */
390
391 caml_extern_allow_out_of_heap = 1;
392 caml_output_val(chan, caml_spacetime_trie_root, Val_long(0));
393 caml_output_val(chan,
394 caml_spacetime_finaliser_trie_root_main_thread, Val_long(0));
395 /* while (thr != NULL) {
396 caml_output_val(chan, *(thr->trie_node_root), Val_long(0));
397 caml_output_val(chan, *(thr->finaliser_trie_node_root),
398 Val_long(0));
399 thr = thr->next;
400 num_marshalled++;
401 }
402 CAMLassert(num_marshalled == num_per_threads); */
403 caml_extern_allow_out_of_heap = 0;
404
405 Unlock(chan);
406 }
407
408 CAMLprim value caml_spacetime_save_trie (value v_time_opt, value v_channel)
409 {
410 struct channel* channel = Channel(v_channel);
411 double time_override = 0.0;
412 int use_time_override = 0;
413
414 if (Is_block(v_time_opt)) {
415 time_override = Double_field(Field(v_time_opt, 0), 0);
416 use_time_override = 1;
417 }
418
419 save_trie(channel, time_override, use_time_override);
420
421 return Val_unit;
422 }
423
424 c_node_type caml_spacetime_classify_c_node(c_node* node)
425 {
426 return (node->pc & 2) ? CALL : ALLOCATION;
427 }
428
429 c_node* caml_spacetime_c_node_of_stored_pointer(value node_stored)
430 {
431 CAMLassert(node_stored == Val_unit || Is_c_node(node_stored));
432 return (node_stored == Val_unit) ? NULL : (c_node*) Hp_val(node_stored);
433 }
434
435 c_node* caml_spacetime_c_node_of_stored_pointer_not_null(
436 value node_stored)
437 {
438 CAMLassert(Is_c_node(node_stored));
439 return (c_node*) Hp_val(node_stored);
440 }
441
442 value caml_spacetime_stored_pointer_of_c_node(c_node* c_node)
443 {
444 value node;
445 CAMLassert(c_node != NULL);
446 node = Val_hp(c_node);
447 CAMLassert(Is_c_node(node));
448 return node;
449 }
450
451 #ifdef HAS_LIBUNWIND
452 static int pc_inside_c_node_matches(c_node* node, void* pc)
453 {
454 return Decode_c_node_pc(node->pc) == pc;
455 }
456 #endif
457
458 static value allocate_uninitialized_ocaml_node(int size_including_header)
459 {
460 void* node;
461 uintnat size;
462
463 CAMLassert(size_including_header >= 3);
464 node = caml_stat_alloc(sizeof(uintnat) * size_including_header);
465
466 size = size_including_header * sizeof(value);
467
468 node = (void*) start_of_free_node_block;
469 if (end_of_free_node_block - start_of_free_node_block < size) {
470 reinitialise_free_node_block();
471 node = (void*) start_of_free_node_block;
472 CAMLassert(end_of_free_node_block - start_of_free_node_block >= size);
473 }
474
475 start_of_free_node_block += size;
476
477 /* We don't currently rely on [uintnat] alignment, but we do need some
478 alignment, so just be sure. */
479 CAMLassert (((uintnat) node) % sizeof(uintnat) == 0);
480 return Val_hp(node);
481 }
482
483 static value find_tail_node(value node, void* callee)
484 {
485 /* Search the tail chain within [node] (which corresponds to an invocation
486 of a caller of [callee]) to determine whether it contains a tail node
487 corresponding to [callee]. Returns any such node, or [Val_unit] if no
488 such node exists. */
489
490 value starting_node;
491 value pc;
492 value found = Val_unit;
493
494 starting_node = node;
495 pc = Encode_node_pc(callee);
496
497 do {
498 CAMLassert(Is_ocaml_node(node));
499 if (Node_pc(node) == pc) {
500 found = node;
501 }
502 else {
503 node = Tail_link(node);
504 }
505 } while (found == Val_unit && starting_node != node);
506
507 return found;
508 }
509
510 CAMLprim value caml_spacetime_allocate_node(
511 int size_including_header, void* pc, value* node_hole)
512 {
513 value node;
514 value caller_node = Val_unit;
515
516 node = *node_hole;
517 /* The node hole should either contain [Val_unit], indicating that this
518 function was not tail called and we have not been to this point in the
519 trie before; or it should contain a value encoded using
520 [Encoded_tail_caller_node] that points at the node of a caller
521 that tail called the current function. (Such a value is necessary to
522 be able to find the start of the caller's node, and hence its tail
523 chain, so we as a tail-called callee can link ourselves in.) */
524 CAMLassert(Is_tail_caller_node_encoded(node));
525
526 if (node != Val_unit) {
527 value tail_node;
528 /* The callee was tail called. Find whether there already exists a node
529 for it in the tail call chain within the caller's node. The caller's
530 node must always be an OCaml node. */
531 caller_node = Decode_tail_caller_node(node);
532 tail_node = find_tail_node(caller_node, pc);
533 if (tail_node != Val_unit) {
534 /* This tail calling sequence has happened before; just fill the hole
535 with the existing node and return. */
536 *node_hole = tail_node;
537 return 0; /* indicates an existing node was returned */
538 }
539 }
540
541 node = allocate_uninitialized_ocaml_node(size_including_header);
542 Hd_val(node) =
543 Make_header(size_including_header - 1, OCaml_node_tag, Caml_black);
544 CAMLassert((((uintnat) pc) % 1) == 0);
545 Node_pc(node) = Encode_node_pc(pc);
546 /* If the callee was tail called, then the tail link field will link this
547 new node into an existing tail chain. Otherwise, it is initialized with
548 the empty tail chain, i.e. the one pointing directly at [node]. */
549 if (caller_node == Val_unit) {
550 Tail_link(node) = node;
551 }
552 else {
553 Tail_link(node) = Tail_link(caller_node);
554 Tail_link(caller_node) = node;
555 }
556
557 /* The callee node pointers for direct tail call points are
558 initialized from code emitted by the OCaml compiler. This is done to
559 avoid having to pass this function a description of which nodes are
560 direct tail call points. (We cannot just count them and put them at the
561 beginning of the node because we need the indexes of elements within the
562 node during instruction selection before we have found all call points.)
563
564 All other fields have already been initialised by
565 [reinitialise_free_node_block].
566 */
567
568 *node_hole = node;
569
570 return 1; /* indicates a new node was created */
571 }
572
573 static c_node* allocate_c_node(void)
574 {
575 c_node* node;
576 size_t index;
577
578 node = (c_node*) start_of_free_node_block;
579 if (end_of_free_node_block - start_of_free_node_block < sizeof(c_node)) {
580 reinitialise_free_node_block();
581 node = (c_node*) start_of_free_node_block;
582 CAMLassert(end_of_free_node_block - start_of_free_node_block
583 >= sizeof(c_node));
584 }
585 start_of_free_node_block += sizeof(c_node);
586
587 CAMLassert((sizeof(c_node) % sizeof(uintnat)) == 0);
588
589 /* CR-soon mshinwell: remove this and pad the structure properly */
590 for (index = 0; index < sizeof(c_node) / sizeof(value); index++) {
591 ((value*) node)[index] = Val_unit;
592 }
593
594 node->gc_header =
595 Make_header(sizeof(c_node)/sizeof(uintnat) - 1, C_node_tag, Caml_black);
596 node->data.call.callee_node = Val_unit;
597 node->data.call.call_count = Val_long(0);
598 node->next = Val_unit;
599
600 return node;
601 }
602
603 /* Since a given indirect call site either always yields tail calls or
604 always yields non-tail calls, the output of
605 [caml_spacetime_indirect_node_hole_ptr] is uniquely determined by its
606 first two arguments (the callee and the node hole). We cache these
607 to increase performance of recursive functions containing an indirect
608 call (e.g. [List.map] when not inlined). */
609 static void* last_indirect_node_hole_ptr_callee;
610 static value* last_indirect_node_hole_ptr_node_hole;
611 static call_point* last_indirect_node_hole_ptr_result;
612
613 CAMLprim value* caml_spacetime_indirect_node_hole_ptr
614 (void* callee, value* node_hole, value caller_node)
615 {
616 /* Find the address of the node hole for an indirect call to [callee].
617 If [caller_node] is not [Val_unit], it is a pointer to the caller's
618 node, and indicates that this is a tail call site. */
619
620 c_node* c_node;
621 value encoded_callee;
622
623 if (callee == last_indirect_node_hole_ptr_callee
624 && node_hole == last_indirect_node_hole_ptr_node_hole) {
625 #ifdef ENABLE_CALL_COUNTS
626 last_indirect_node_hole_ptr_result->call_count =
627 Val_long (Long_val (last_indirect_node_hole_ptr_result->call_count) + 1);
628 #endif
629 return &(last_indirect_node_hole_ptr_result->callee_node);
630 }
631
632 last_indirect_node_hole_ptr_callee = callee;
633 last_indirect_node_hole_ptr_node_hole = node_hole;
634
635 encoded_callee = Encode_c_node_pc_for_call(callee);
636
637 while (*node_hole != Val_unit) {
638 CAMLassert(((uintnat) *node_hole) % sizeof(value) == 0);
639
640 c_node = caml_spacetime_c_node_of_stored_pointer_not_null(*node_hole);
641
642 CAMLassert(c_node != NULL);
643 CAMLassert(caml_spacetime_classify_c_node(c_node) == CALL);
644
645 if (c_node->pc == encoded_callee) {
646 #ifdef ENABLE_CALL_COUNTS
647 c_node->data.call.call_count =
648 Val_long (Long_val(c_node->data.call.call_count) + 1);
649 #endif
650 last_indirect_node_hole_ptr_result = &(c_node->data.call);
651 return &(last_indirect_node_hole_ptr_result->callee_node);
652 }
653 else {
654 node_hole = &c_node->next;
655 }
656 }
657
658 c_node = allocate_c_node();
659 c_node->pc = encoded_callee;
660
661 if (caller_node != Val_unit) {
662 /* This is a tail call site.
663 Perform the initialization equivalent to that emitted by
664 [Spacetime.code_for_function_prologue] for direct tail call
665 sites. */
666 c_node->data.call.callee_node = Encode_tail_caller_node(caller_node);
667 }
668
669 *node_hole = caml_spacetime_stored_pointer_of_c_node(c_node);
670
671 CAMLassert(((uintnat) *node_hole) % sizeof(value) == 0);
672 CAMLassert(*node_hole != Val_unit);
673
674 #ifdef ENABLE_CALL_COUNTS
675 c_node->data.call.call_count =
676 Val_long (Long_val(c_node->data.call.call_count) + 1);
677 #endif
678 last_indirect_node_hole_ptr_result = &(c_node->data.call);
679
680 return &(last_indirect_node_hole_ptr_result->callee_node);
681 }
682
683 /* Some notes on why caml_call_gc doesn't need a distinguished node.
684 (Remember that thread switches are irrelevant here because each thread
685 has its own trie.)
686
687 caml_call_gc only invokes OCaml functions in the following circumstances:
688 1. running an OCaml finaliser;
689 2. executing an OCaml signal handler;
690 3. executing memprof callbacks.
691 All of these are done on the finaliser trie. Furthermore, all of
692 these invocations start via caml_callback; the code in this file for
693 handling that (caml_spacetime_c_to_ocaml) correctly copes with that by
694 attaching a single "caml_start_program" node that can cope with any
695 number of indirect OCaml calls from that point.
696
697 caml_call_gc may also invoke C functions that cause allocation. All of
698 these (assuming libunwind support is present) will cause a chain of
699 c_node structures to be attached to the trie, starting at the node hole
700 passed to caml_call_gc from OCaml code. These structures are extensible
701 and can thus accommodate any number of C backtraces leading from
702 caml_call_gc.
703 */
704 /* CR-soon mshinwell: it might in fact be the case now that nothing called
705 from caml_call_gc will do any allocation that ends up on the trie. We
706 can revisit this after the first release. */
707
708 static NOINLINE void* find_trie_node_from_libunwind(int for_allocation,
709 uintnat wosize, struct ext_table** cached_frames)
710 {
711 #ifdef HAS_LIBUNWIND
712 /* Given that [Caml_state->last_return_address] is the most recent call site
713 in OCaml code, and that we are now in C (or other) code called from that
714 site, obtain a backtrace using libunwind and graft the most recent
715 portion (everything back to but not including [last_return_address])
716 onto the trie. See the important comment below regarding the fact that
717 call site, and not callee, addresses are recorded during this process.
718
719 If [for_allocation] is non-zero, the final node recorded will be for
720 an allocation, and the returned pointer is to the allocation node.
721 Otherwise, no node is recorded for the innermost frame, and the
722 returned pointer is a pointer to the *node hole* where a node for that
723 frame should be attached.
724
725 If [for_allocation] is non-zero then [wosize] must give the size in
726 words, excluding the header, of the value being allocated.
727
728 If [cached_frames != NULL] then:
729 1. If [*cached_frames] is NULL then save the captured backtrace in a
730 newly-allocated table and store the pointer to that table in
731 [*cached_frames];
732 2. Otherwise use [*cached_frames] as the unwinding information.
733 The intention is that when the context is known (e.g. a function such
734 as [caml_make_vect] known to have been directly invoked from OCaml),
735 we can avoid expensive calls to libunwind.
736 */
737
738 unw_cursor_t cur;
739 unw_context_t ctx;
740 int ret;
741 int innermost_frame;
742 int frame;
743 static struct ext_table frames_local;
744 struct ext_table* frames;
745 static int ext_table_initialised = 0;
746 int have_frames_already = 0;
747 value* node_hole;
748 c_node* node = NULL;
749 int initial_table_size = 1000;
750 int must_initialise_node_for_allocation = 0;
751
752 if (!cached_frames) {
753 if (!ext_table_initialised) {
754 caml_ext_table_init(&frames_local, initial_table_size);
755 ext_table_initialised = 1;
756 }
757 else {
758 caml_ext_table_clear(&frames_local, 0);
759 }
760 frames = &frames_local;
761 } else {
762 if (*cached_frames) {
763 frames = *cached_frames;
764 have_frames_already = 1;
765 }
766 else {
767 frames =
768 (struct ext_table*) caml_stat_alloc_noexc(sizeof(struct ext_table));
769 if (!frames) {
770 caml_fatal_error("not enough memory for ext_table allocation");
771 }
772 caml_ext_table_init(frames, initial_table_size);
773 *cached_frames = frames;
774 }
775 }
776
777 if (!have_frames_already) {
778 /* Get the stack backtrace as far as [Caml_state->last_return_address]. */
779
780 ret = unw_getcontext(&ctx);
781 if (ret != UNW_ESUCCESS) {
782 return NULL;
783 }
784
785 ret = unw_init_local(&cur, &ctx);
786 if (ret != UNW_ESUCCESS) {
787 return NULL;
788 }
789
790 while ((ret = unw_step(&cur)) > 0) {
791 unw_word_t ip;
792 unw_get_reg(&cur, UNW_REG_IP, &ip);
793 if (Caml_state->last_return_address == (uintnat) ip) {
794 break;
795 }
796 else {
797 /* Inlined some of [caml_ext_table_add] for speed. */
798 if (frames->size < frames->capacity) {
799 frames->contents[frames->size++] = (void*) ip;
800 } else {
801 caml_ext_table_add(frames, (void*) ip);
802 }
803 }
804 }
805 }
806
807 /* We always need to ignore the frames for:
808 #0 find_trie_node_from_libunwind
809 #1 caml_spacetime_c_to_ocaml
810 Further, if this is not an allocation point, we should not create the
811 node for the current C function that triggered us (i.e. frame #2). */
812 innermost_frame = for_allocation ? 1 : 2;
813
814 if (frames->size - 1 < innermost_frame) {
815 /* Insufficiently many frames (maybe no frames) returned from
816 libunwind; just don't do anything. */
817 return NULL;
818 }
819
820 node_hole = caml_spacetime_trie_node_ptr;
821 /* Note that if [node_hole] is filled, then it must point to a C node,
822 since it is not possible for there to be a call point in an OCaml
823 function that sometimes calls C and sometimes calls OCaml. */
824
825 for (frame = frames->size - 1; frame >= innermost_frame; frame--) {
826 c_node_type expected_type;
827 void* pc = frames->contents[frame];
828 CAMLassert (pc != (void*) Caml_state->last_return_address);
829
830 if (!for_allocation) {
831 expected_type = CALL;
832 }
833 else {
834 expected_type = (frame > innermost_frame ? CALL : ALLOCATION);
835 }
836
837 if (*node_hole == Val_unit) {
838 node = allocate_c_node();
839 /* Note: for CALL nodes, the PC is the program counter at each call
840 site. We do not store program counter addresses of the start of
841 callees, unlike for OCaml nodes. This means that some trie nodes
842 will become conflated. These can be split during post-processing by
843 working out which function each call site was in. */
844 node->pc = (expected_type == CALL ? Encode_c_node_pc_for_call(pc)
845 : Encode_c_node_pc_for_alloc_point(pc));
846 *node_hole = caml_spacetime_stored_pointer_of_c_node(node);
847 if (expected_type == ALLOCATION) {
848 must_initialise_node_for_allocation = 1;
849 }
850 }
851 else {
852 c_node* prev;
853 int found = 0;
854
855 node = caml_spacetime_c_node_of_stored_pointer_not_null(*node_hole);
856 CAMLassert(node != NULL);
857 CAMLassert(node->next == Val_unit
858 || (((uintnat) (node->next)) % sizeof(value) == 0));
859
860 prev = NULL;
861
862 while (!found && node != NULL) {
863 if (caml_spacetime_classify_c_node(node) == expected_type
864 && pc_inside_c_node_matches(node, pc)) {
865 found = 1;
866 }
867 else {
868 prev = node;
869 node = caml_spacetime_c_node_of_stored_pointer(node->next);
870 }
871 }
872 if (!found) {
873 CAMLassert(prev != NULL);
874 node = allocate_c_node();
875 node->pc = (expected_type == CALL ? Encode_c_node_pc_for_call(pc)
876 : Encode_c_node_pc_for_alloc_point(pc));
877 if (expected_type == ALLOCATION) {
878 must_initialise_node_for_allocation = 1;
879 }
880 prev->next = caml_spacetime_stored_pointer_of_c_node(node);
881 }
882 }
883
884 CAMLassert(node != NULL);
885
886 CAMLassert(caml_spacetime_classify_c_node(node) == expected_type);
887 CAMLassert(pc_inside_c_node_matches(node, pc));
888 node_hole = &node->data.call.callee_node;
889 }
890
891 if (must_initialise_node_for_allocation) {
892 caml_spacetime_profinfo++;
893 if (caml_spacetime_profinfo > PROFINFO_MASK) {
894 /* Profiling counter overflow. */
895 caml_spacetime_profinfo = PROFINFO_MASK;
896 }
897 node->data.allocation.profinfo =
898 Make_header_with_profinfo(
899 /* "-1" because [c_node] has the GC header as its first
900 element. */
901 offsetof(c_node, data.allocation.count)/sizeof(value) - 1,
902 Infix_tag,
903 Caml_black,
904 caml_spacetime_profinfo);
905 node->data.allocation.count = Val_long(0);
906
907 /* Add the new allocation point into the linked list of all allocation
908 points. */
909 if (caml_all_allocation_points != NULL) {
910 node->data.allocation.next =
911 (value) &caml_all_allocation_points->count;
912 } else {
913 node->data.allocation.next = Val_unit;
914 }
915 caml_all_allocation_points = &node->data.allocation;
916 }
917
918 if (for_allocation) {
919 CAMLassert(caml_spacetime_classify_c_node(node) == ALLOCATION);
920 CAMLassert(caml_spacetime_c_node_of_stored_pointer(node->next) != node);
921 CAMLassert(Profinfo_hd(node->data.allocation.profinfo) > 0);
922 node->data.allocation.count =
923 Val_long(Long_val(node->data.allocation.count) + (1 + wosize));
924 }
925
926 CAMLassert(node->next != (value) NULL);
927
928 return for_allocation ? (void*) node : (void*) node_hole;
929 #else
930 return NULL;
931 #endif
932 }
933
934 void caml_spacetime_c_to_ocaml(void* ocaml_entry_point,
935 void* identifying_pc_for_caml_start_program)
936 {
937 /* Called in [caml_start_program] and [caml_callback*] when we are about
938 to cross from C into OCaml. [ocaml_entry_point] is the branch target.
939 This situation is handled by ensuring the presence of a new OCaml node
940 for the callback veneer; the node contains a single indirect call point
941 which accumulates the [ocaml_entry_point]s.
942
943 The layout of the node is described in the "system shape table"; see
944 amd64.S.
945 */
946
947 value node;
948
949 /* Update the trie with the current backtrace, as far back as
950 [Caml_state->last_return_address], and leave the node hole pointer at
951 the correct place for attachment of a [caml_start_program] node. */
952
953 #ifdef HAS_LIBUNWIND
954 value* node_temp;
955 node_temp = (value*) find_trie_node_from_libunwind(0, 0, NULL);
956 if (node_temp != NULL) {
957 caml_spacetime_trie_node_ptr = node_temp;
958 }
959 #endif
960
961 if (*caml_spacetime_trie_node_ptr == Val_unit) {
962 uintnat size_including_header;
963
964 size_including_header =
965 1 /* GC header */ + Node_num_header_words + Indirect_num_fields;
966
967 node = allocate_uninitialized_ocaml_node(size_including_header);
968 Hd_val(node) =
969 Make_header(size_including_header - 1, OCaml_node_tag, Caml_black);
970 CAMLassert((((uintnat) identifying_pc_for_caml_start_program) % 1) == 0);
971 Node_pc(node) = Encode_node_pc(identifying_pc_for_caml_start_program);
972 Tail_link(node) = node;
973 Indirect_pc_linked_list(node, Node_num_header_words) = Val_unit;
974 *caml_spacetime_trie_node_ptr = node;
975 }
976 else {
977 node = *caml_spacetime_trie_node_ptr;
978 /* If there is a node here already, it should never be an initialized
979 (but as yet unused) tail call point, since calls from OCaml into C
980 are never tail calls (and no C -> C call is marked as tail). */
981 CAMLassert(!Is_tail_caller_node_encoded(node));
982 }
983
984 CAMLassert(Is_ocaml_node(node));
985 CAMLassert(Decode_node_pc(Node_pc(node))
986 == identifying_pc_for_caml_start_program);
987 CAMLassert(Tail_link(node) == node);
988 CAMLassert(Wosize_val(node) == Node_num_header_words + Indirect_num_fields);
989
990 /* Search the node to find the node hole corresponding to the indirect
991 call to the OCaml function. */
992 caml_spacetime_trie_node_ptr =
993 caml_spacetime_indirect_node_hole_ptr(
994 ocaml_entry_point,
995 &Indirect_pc_linked_list(node, Node_num_header_words),
996 Val_unit);
997 CAMLassert(*caml_spacetime_trie_node_ptr == Val_unit
998 || Is_ocaml_node(*caml_spacetime_trie_node_ptr));
999 }
1000
1001 extern void caml_garbage_collection(void); /* signals_nat.c */
1002 extern void caml_array_bound_error(void); /* fail.c */
1003
1004 CAMLprim uintnat caml_spacetime_generate_profinfo (void* profinfo_words,
1005 uintnat index_within_node)
1006 {
1007 /* Called from code that creates a value's header inside an OCaml
1008 function. */
1009
1010 value node;
1011 uintnat profinfo;
1012
1013 caml_spacetime_profinfo++;
1014 if (caml_spacetime_profinfo > PROFINFO_MASK) {
1015 /* Profiling counter overflow. */
1016 caml_spacetime_profinfo = PROFINFO_MASK;
1017 }
1018 profinfo = caml_spacetime_profinfo;
1019
1020 /* CR-someday mshinwell: we could always use the [struct allocation_point]
1021 overlay instead of the macros now. */
1022
1023 /* [node] isn't really a node; it points into the middle of
1024 one---specifically to the "profinfo" word of an allocation point.
1025 It's done like this to avoid re-calculating the place in the node
1026 (which already has to be done in the OCaml-generated code run before
1027 this function). */
1028 node = (value) profinfo_words;
1029 CAMLassert(Alloc_point_profinfo(node, 0) == Val_unit);
1030
1031 /* The profinfo value is stored shifted to reduce the number of
1032 instructions required on the OCaml side. It also enables us to use
1033 [Infix_tag] to obtain valid value pointers into the middle of nodes,
1034 which is used for the linked list of all allocation points. */
1035 profinfo = Make_header_with_profinfo(
1036 index_within_node, Infix_tag, Caml_black, profinfo);
1037
1038 CAMLassert(!Is_block(profinfo));
1039 Alloc_point_profinfo(node, 0) = profinfo;
1040 /* The count is set to zero by the initialisation when the node was
1041 created (see above). */
1042 CAMLassert(Alloc_point_count(node, 0) == Val_long(0));
1043
1044 /* Add the new allocation point into the linked list of all allocation
1045 points. */
1046 if (caml_all_allocation_points != NULL) {
1047 Alloc_point_next_ptr(node, 0) = (value) &caml_all_allocation_points->count;
1048 }
1049 else {
1050 CAMLassert(Alloc_point_next_ptr(node, 0) == Val_unit);
1051 }
1052 caml_all_allocation_points = (allocation_point*) node;
1053
1054 return profinfo;
1055 }
1056
1057 uintnat caml_spacetime_my_profinfo (struct ext_table** cached_frames,
1058 uintnat wosize)
1059 {
1060 /* Return the profinfo value that should be written into a value's header
1061 during an allocation from C. This may necessitate extending the trie
1062 with information obtained from libunwind. */
1063
1064 c_node* node;
1065 uintnat profinfo = 0;
1066
1067 node = find_trie_node_from_libunwind(1, wosize, cached_frames);
1068 if (node != NULL) {
1069 profinfo = ((uintnat) (node->data.allocation.profinfo)) >> PROFINFO_SHIFT;
1070 }
1071
1072 return profinfo; /* N.B. not shifted by PROFINFO_SHIFT */
1073 }
1074
1075 void caml_spacetime_automatic_snapshot (void)
1076 {
1077 if (automatic_snapshots) {
1078 double start_time, end_time;
1079 start_time = caml_sys_time_unboxed(Val_unit);
1080 if (start_time >= next_snapshot_time) {
1081 maybe_reopen_snapshot_channel();
1082 caml_spacetime_save_snapshot(snapshot_channel, 0.0, 0);
1083 end_time = caml_sys_time_unboxed(Val_unit);
1084 next_snapshot_time = end_time + snapshot_interval;
1085 }
1086 }
1087 }
1088
1089 CAMLprim value caml_spacetime_save_event_for_automatic_snapshots
1090 (value v_event_name)
1091 {
1092 if (automatic_snapshots) {
1093 maybe_reopen_snapshot_channel();
1094 caml_spacetime_save_event_internal (Val_unit, snapshot_channel,
1095 v_event_name);
1096 }
1097 return Val_unit;
1098 }
1099
1100 void caml_spacetime_automatic_save (void)
1101 {
1102 /* Called from [atexit]. */
1103
1104 if (automatic_snapshots) {
1105 automatic_snapshots = 0;
1106 maybe_reopen_snapshot_channel();
1107 save_trie(snapshot_channel, 0.0, 0);
1108 caml_flush(snapshot_channel);
1109 caml_close_channel(snapshot_channel);
1110 }
1111 }
1112
1113 CAMLprim value caml_spacetime_enabled (value v_unit)
1114 {
1115 return Val_true;
1116 }
1117
1118 CAMLprim value caml_register_channel_for_spacetime (value v_channel)
1119 {
1120 struct channel* channel = Channel(v_channel);
1121 channel->flags |= CHANNEL_FLAG_BLOCKING_WRITE;
1122 return Val_unit;
1123 }
1124
1125 #else
1126
1127 /* Functions for when the compiler was not configured with "-spacetime". */
1128
1129 CAMLprim value caml_spacetime_write_magic_number(value v_channel)
1130 {
1131 return Val_unit;
1132 }
1133
1134 CAMLprim value caml_spacetime_enabled (value v_unit)
1135 {
1136 return Val_false;
1137 }
1138
1139 CAMLprim value caml_spacetime_save_event (value v_time_opt,
1140 value v_channel,
1141 value v_event_name)
1142 {
1143 return Val_unit;
1144 }
1145
1146 CAMLprim value caml_spacetime_save_event_for_automatic_snapshots
1147 (value v_event_name)
1148 {
1149 return Val_unit;
1150 }
1151
1152 CAMLprim value caml_spacetime_save_trie (value ignored)
1153 {
1154 return Val_unit;
1155 }
1156
1157 CAMLprim value caml_register_channel_for_spacetime (value v_channel)
1158 {
1159 return Val_unit;
1160 }
1161
1162 #endif
1163