Skip to content

Commit bb35dc9

Browse files
authored
optimize remset marking (#52476)
Tag the lowest bit of a pointer to indicate it's in the remset and enqueue objects in the remset for later processing when GC threads have woken up, instead of sequentially marking them all at once. In principle, this should allow for more parallelism in the mark phase, though I didn't benchmark it yet.
1 parent e618369 commit bb35dc9

File tree

2 files changed

+12
-11
lines changed

2 files changed

+12
-11
lines changed

src/gc.c

+10-11
Original file line numberDiff line numberDiff line change
@@ -2644,13 +2644,12 @@ JL_DLLEXPORT void jl_gc_mark_queue_objarray(jl_ptls_t ptls, jl_value_t *parent,
26442644
gc_mark_objarray(ptls, parent, objs, objs + nobjs, 1, nptr);
26452645
}
26462646

2647-
// Enqueue and mark all outgoing references from `new_obj` which have not been marked
2648-
// yet. `meta_updated` is mostly used to make sure we don't update metadata twice for
2649-
// objects which have been enqueued into the `remset`
2650-
FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_new_obj,
2651-
int meta_updated)
2647+
// Enqueue and mark all outgoing references from `new_obj` which have not been marked yet.
2648+
// `_new_obj` has its lowest bit tagged if it's in the remset (in which case we shouldn't update page metadata)
2649+
FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_new_obj)
26522650
{
2653-
jl_value_t *new_obj = (jl_value_t *)_new_obj;
2651+
int meta_updated = (uintptr_t)_new_obj & GC_REMSET_PTR_TAG;
2652+
jl_value_t *new_obj = (jl_value_t *)((uintptr_t)_new_obj & ~(uintptr_t)GC_REMSET_PTR_TAG);
26542653
mark_obj: {
26552654
jl_taggedvalue_t *o = jl_astaggedvalue(new_obj);
26562655
uintptr_t vtag = o->header & ~(uintptr_t)0xf;
@@ -2948,7 +2947,7 @@ void gc_mark_loop_serial_(jl_ptls_t ptls, jl_gc_markqueue_t *mq)
29482947
if (__unlikely(new_obj == NULL)) {
29492948
return;
29502949
}
2951-
gc_mark_outrefs(ptls, mq, new_obj, 0);
2950+
gc_mark_outrefs(ptls, mq, new_obj);
29522951
}
29532952
}
29542953

@@ -2997,7 +2996,7 @@ void gc_mark_and_steal(jl_ptls_t ptls)
29972996
goto steal;
29982997
}
29992998
mark : {
3000-
gc_mark_outrefs(ptls, mq, new_obj, 0);
2999+
gc_mark_outrefs(ptls, mq, new_obj);
30013000
goto pop;
30023001
}
30033002
// Note that for the stealing heuristics, we try to
@@ -3260,9 +3259,9 @@ static void gc_queue_remset(jl_ptls_t ptls, jl_ptls_t ptls2)
32603259
size_t len = ptls2->heap.last_remset->len;
32613260
void **items = ptls2->heap.last_remset->items;
32623261
for (size_t i = 0; i < len; i++) {
3263-
// Objects in the `remset` are already marked,
3264-
// so a `gc_try_claim_and_push` wouldn't work here
3265-
gc_mark_outrefs(ptls, &ptls->mark_queue, (jl_value_t *)items[i], 1);
3262+
// Tag the pointer to indicate it's in the remset
3263+
jl_value_t *v = (jl_value_t *)((uintptr_t)items[i] | GC_REMSET_PTR_TAG);
3264+
gc_ptr_queue_push(&ptls->mark_queue, v);
32663265
}
32673266
}
32683267

src/gc.h

+2
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,8 @@ typedef struct _jl_gc_chunk_t {
118118
#define GC_PTR_QUEUE_INIT_SIZE (1 << 18) // initial size of queue of `jl_value_t *`
119119
#define GC_CHUNK_QUEUE_INIT_SIZE (1 << 14) // initial size of chunk-queue
120120

121+
#define GC_REMSET_PTR_TAG (0x1) // lowest bit of `jl_value_t *` is tagged if it's in the remset
122+
121123
// layout for big (>2k) objects
122124

123125
JL_EXTENSION typedef struct _bigval_t {

0 commit comments

Comments
 (0)