Skip to content

Commit 7b739d7

Browse files
kpamnanyRAI CI (GitHub Action Automation)
authored andcommitted
RAI: Add heartbeat capability
1 parent 54943bb commit 7b739d7

File tree

9 files changed

+327
-1
lines changed

9 files changed

+327
-1
lines changed

src/gc-stock.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3395,6 +3395,9 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
33953395
return recollect;
33963396
}
33973397

3398+
extern int jl_heartbeat_pause(void);
3399+
extern int jl_heartbeat_resume(void);
3400+
33983401
JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
33993402
{
34003403
JL_PROBE_GC_BEGIN(collection);
@@ -3437,6 +3440,7 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
34373440
// existence of the thread in the jl_n_threads count.
34383441
//
34393442
// TODO: concurrently queue objects
3443+
jl_heartbeat_pause();
34403444
jl_fence();
34413445
gc_n_threads = jl_atomic_load_acquire(&jl_n_threads);
34423446
gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states);
@@ -3468,6 +3472,7 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
34683472

34693473
gc_n_threads = 0;
34703474
gc_all_tls_states = NULL;
3475+
jl_heartbeat_resume();
34713476
jl_safepoint_end_gc();
34723477
jl_gc_state_set(ptls, old_state, JL_GC_STATE_WAITING);
34733478
JL_PROBE_GC_END();

src/gc-stock.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -499,6 +499,9 @@ extern uv_sem_t gc_sweep_assists_needed;
499499
extern _Atomic(int) gc_n_threads_marking;
500500
extern _Atomic(int) gc_n_threads_sweeping_pools;
501501
extern _Atomic(int) n_threads_running;
502+
extern _Atomic(int) gc_n_threads_sweeping_stacks;
503+
extern _Atomic(int) gc_ptls_sweep_idx;
504+
extern _Atomic(int) gc_stack_free_idx;
502505
extern uv_barrier_t thread_init_done;
503506
void gc_mark_queue_all_roots(jl_ptls_t ptls, jl_gc_markqueue_t *mq);
504507
void gc_mark_finlist_(jl_gc_markqueue_t *mq, jl_value_t *fl_parent, jl_value_t **fl_begin, jl_value_t **fl_end) JL_NOTSAFEPOINT;

src/init.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -614,6 +614,11 @@ static NOINLINE void _finish_jl_init_(jl_image_buf_t sysimage, jl_ptls_t ptls, j
614614
jl_start_gc_threads();
615615
uv_barrier_wait(&thread_init_done);
616616

617+
if (jl_base_module != NULL) {
618+
// requires code in Base
619+
jl_init_heartbeat();
620+
}
621+
617622
jl_gc_enable(1);
618623

619624
if ((sysimage.kind != JL_IMAGE_KIND_NONE) &&

src/julia_internal.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,7 @@ extern volatile size_t profile_bt_size_max;
228228
extern volatile size_t profile_bt_size_cur;
229229
extern volatile int profile_running;
230230
extern volatile int profile_all_tasks;
231+
extern int heartbeat_tid; // Mostly used to ensure we skip this thread in the CPU profiler. XXX: not implemented on Windows
231232
// Ensures that we can safely read the `live_tasks`field of every TLS when profiling.
232233
// We want to avoid the case that a GC gets interleaved with `jl_profile_task` and shrinks
233234
// the `live_tasks` array while we are reading it or frees tasks that are being profiled.
@@ -243,6 +244,7 @@ extern uv_mutex_t bt_data_prof_lock;
243244
#define PROFILE_STATE_THREAD_NOT_SLEEPING (1)
244245
#define PROFILE_STATE_THREAD_SLEEPING (2)
245246
#define PROFILE_STATE_WALL_TIME_PROFILING (3)
247+
extern _Atomic(int) n_threads_running;
246248
void jl_profile_task(void);
247249

248250
// number of cycles since power-on

src/options.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,9 @@
144144
#define MACHINE_EXCLUSIVE_NAME "JULIA_EXCLUSIVE"
145145
#define DEFAULT_MACHINE_EXCLUSIVE 0
146146

147+
// heartbeats
148+
#define JL_HEARTBEAT_THREAD
149+
147150
// sanitizer defaults ---------------------------------------------------------
148151

149152
// Automatically enable MEMDEBUG and KEEP_BODIES for the sanitizers

src/signals-mach.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -826,6 +826,10 @@ void *mach_profile_listener(void *arg)
826826
for (int idx = nthreads; idx-- > 0; ) {
827827
// Stop the threads in random order.
828828
int i = randperm[idx];
829+
// skip heartbeat thread
830+
if (i == heartbeat_tid) {
831+
continue;
832+
}
829833
jl_profile_thread_mach(i);
830834
}
831835
}

src/signals-unix.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -873,6 +873,10 @@ static void do_profile(void *ctx)
873873
for (int idx = nthreads; idx-- > 0; ) {
874874
// Stop the threads in the random order.
875875
int tid = randperm[idx];
876+
// skip heartbeat thread
877+
if (tid == heartbeat_tid) {
878+
return;
879+
}
876880
// do backtrace for profiler
877881
if (!profile_running)
878882
return;

src/stackwalk.c

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1402,9 +1402,22 @@ JL_DLLEXPORT void jl_print_backtrace(void) JL_NOTSAFEPOINT
14021402
jlbacktrace();
14031403
}
14041404

1405-
// Print backtraces for all live tasks, for all threads, to jl_safe_printf stderr
1405+
extern int jl_inside_heartbeat_thread(void);
1406+
extern int jl_heartbeat_pause(void);
1407+
extern int jl_heartbeat_resume(void);
1408+
1409+
// Print backtraces for all live tasks, for all threads, to jl_safe_printf
1410+
// stderr. This can take a _long_ time!
14061411
JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT
14071412
{
1413+
// disable heartbeats to prevent heartbeat loss while running this,
1414+
// unless this is called from the heartbeat thread itself; in that
1415+
// situation, the thread is busy running this and it will not be
1416+
// updating the missed heartbeats counter
1417+
if (!jl_inside_heartbeat_thread()) {
1418+
jl_heartbeat_pause();
1419+
}
1420+
14081421
size_t nthreads = jl_atomic_load_acquire(&jl_n_threads);
14091422
jl_ptls_t *allstates = jl_atomic_load_relaxed(&jl_all_tls_states);
14101423
int ctid = jl_threadid() + 1;
@@ -1463,6 +1476,10 @@ JL_DLLEXPORT void jl_print_task_backtraces(int show_done) JL_NOTSAFEPOINT
14631476
jl_safe_printf("thread (%d) ==== End thread %d\n", ctid, ptls2->tid + 1);
14641477
}
14651478
jl_safe_printf("thread (%d) ++++ Done\n", ctid);
1479+
1480+
if (!jl_inside_heartbeat_thread()) {
1481+
jl_heartbeat_resume();
1482+
}
14661483
}
14671484

14681485
#ifdef __cplusplus

0 commit comments

Comments
 (0)