Skip to content

Commit 7ea2484

Browse files
committed
Apply refactorings
1 parent e8ebb0f commit 7ea2484

9 files changed

+288
-76
lines changed

src/gc-interface.h

+4-2
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,10 @@ struct _jl_value_t *jl_gc_permobj(size_t sz, void *ty, unsigned align) JL_NOTSAF
216216
// The GC may use that information to, for instance, determine that such objects should
217217
// be treated as marked and belonged to the old generation in nursery collections.
218218
void jl_gc_notify_image_load(const char* img_data, size_t len);
219+
// This function notifies the GC about memory addresses that are set when allocating the boot image.
220+
// The GC may use that information to, for instance, determine that all objects in that chunk of memory should
221+
// be treated as marked and belonged to the old generation in nursery collections.
222+
void jl_gc_notify_image_alloc(const char* img_data, size_t len);
219223

220224
// ========================================================================= //
221225
// Runtime Write-Barriers
@@ -254,13 +258,11 @@ STATIC_INLINE void jl_gc_wb_knownold(const void *parent, const void *ptr) JL_NOT
254258
// per field of the object being copied, but may be special-cased for performance reasons.
255259
STATIC_INLINE void jl_gc_multi_wb(const void *parent,
256260
const struct _jl_value_t *ptr) JL_NOTSAFEPOINT;
257-
258261
// Write-barrier function that must be used after copying fields of elements of genericmemory objects
259262
// into another. It should be semantically equivalent to triggering multiple write barriers – one
260263
// per field of the object being copied, but may be special-cased for performance reasons.
261264
STATIC_INLINE void jl_gc_wb_genericmemory_copy_ptr(const struct _jl_value_t *owner, struct _jl_genericmemory_t *src, char* src_p,
262265
size_t n, struct _jl_datatype_t *dt) JL_NOTSAFEPOINT;
263-
264266
// Similar to jl_gc_wb_genericmemory_copy but must be used when copying *boxed* elements of a genericmemory
265267
// object. Note that this barrier also performs the copying unlike jl_gc_wb_genericmemory_copy_ptr.
266268
// The parameters src_p, dest_p and n will be modified and will contain information about

src/gc-mmtk.c

+95-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include "gc-common.h"
22
#include "gc-tls-mmtk.h"
3+
#include "gc-wb-mmtk.h"
34
#include "mmtkMutator.h"
45
#include "threading.h"
56

@@ -861,10 +862,22 @@ STATIC_INLINE void* mmtk_immortal_alloc_fast(MMTkMutatorContext* mutator, size_t
861862
return bump_alloc_fast(mutator, (uintptr_t*)&allocator->cursor, (uintptr_t)allocator->limit, size, align, offset, 1);
862863
}
863864

865+
inline void mmtk_set_side_metadata(const void* side_metadata_base, void* obj) {
866+
intptr_t addr = (intptr_t) obj;
867+
uint8_t* meta_addr = (uint8_t*) side_metadata_base + (addr >> 6);
868+
intptr_t shift = (addr >> 3) & 0b111;
869+
while(1) {
870+
uint8_t old_val = *meta_addr;
871+
uint8_t new_val = old_val | (1 << shift);
872+
if (jl_atomic_cmpswap((_Atomic(uint8_t)*)meta_addr, &old_val, new_val)) {
873+
break;
874+
}
875+
}
876+
}
877+
864878
STATIC_INLINE void mmtk_immortal_post_alloc_fast(MMTkMutatorContext* mutator, void* obj, size_t size) {
865-
// FIXME: Similarly, for now, we do nothing
866-
// but when supporting moving, this is where we set the valid object (VO) bit
867-
// and log (old gen) bit
879+
if (MMTK_NEEDS_WRITE_BARRIER == MMTK_OBJECT_BARRIER) {
880+
mmtk_set_side_metadata(MMTK_SIDE_LOG_BIT_BASE_ADDRESS, obj);
868881
}
869882

870883
JL_DLLEXPORT jl_value_t *jl_mmtk_gc_alloc_default(jl_ptls_t ptls, int osize, size_t align, void *ty)
@@ -1128,7 +1141,9 @@ _Atomic(int) gc_stack_free_idx = 0;
11281141

11291142
JL_DLLEXPORT void jl_gc_queue_root(const struct _jl_value_t *ptr) JL_NOTSAFEPOINT
11301143
{
1131-
mmtk_unreachable();
1144+
jl_task_t *ct = jl_current_task;
1145+
jl_ptls_t ptls = ct->ptls;
1146+
mmtk_object_reference_write_slow(&ptls->gc_tls.mmtk_mutator, parent, (const void*) 0);
11321147
}
11331148

11341149
JL_DLLEXPORT void jl_gc_queue_multiroot(const struct _jl_value_t *root, const void *stored,
@@ -1210,6 +1225,82 @@ JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p)
12101225
return NULL;
12111226
}
12121227

1228+
JL_DLLEXPORT void jl_genericmemory_copyto(jl_genericmemory_t *dest, char* destdata,
1229+
jl_genericmemory_t *src, char* srcdata,
1230+
size_t n) JL_NOTSAFEPOINT
1231+
{
1232+
jl_datatype_t *dt = (jl_datatype_t*)jl_typetagof(dest);
1233+
if (dt != (jl_datatype_t*)jl_typetagof(src))
1234+
jl_exceptionf(jl_argumenterror_type, "jl_genericmemory_copyto requires source and dest to have same type");
1235+
const jl_datatype_layout_t *layout = dt->layout;
1236+
if (layout->flags.arrayelem_isboxed) {
1237+
_Atomic(void*) * dest_p = (_Atomic(void*)*)destdata;
1238+
_Atomic(void*) * src_p = (_Atomic(void*)*)srcdata;
1239+
jl_value_t *owner = jl_genericmemory_owner(dest);
1240+
jl_gc_wb(owner, NULL);
1241+
if (__unlikely(jl_astaggedvalue(owner)->bits.gc == GC_OLD_MARKED)) {
1242+
jl_value_t *src_owner = jl_genericmemory_owner(src);
1243+
ssize_t done = 0;
1244+
if (jl_astaggedvalue(src_owner)->bits.gc != GC_OLD_MARKED) {
1245+
if (dest_p < src_p || dest_p > src_p + n) {
1246+
for (; done < n; done++) { // copy forwards
1247+
void *val = jl_atomic_load_relaxed(src_p + done);
1248+
jl_atomic_store_release(dest_p + done, val);
1249+
// `val` is young or old-unmarked
1250+
if (val && !(jl_astaggedvalue(val)->bits.gc & GC_MARKED)) {
1251+
jl_gc_queue_root(owner);
1252+
break;
1253+
}
1254+
}
1255+
src_p += done;
1256+
dest_p += done;
1257+
} else {
1258+
for (; done < n; done++) { // copy backwards
1259+
void *val = jl_atomic_load_relaxed(src_p + n - done - 1);
1260+
jl_atomic_store_release(dest_p + n - done - 1, val);
1261+
// `val` is young or old-unmarked
1262+
if (val && !(jl_astaggedvalue(val)->bits.gc & GC_MARKED)) {
1263+
jl_gc_queue_root(owner);
1264+
break;
1265+
}
1266+
}
1267+
}
1268+
n -= done;
1269+
}
1270+
}
1271+
return memmove_refs(dest_p, src_p, n);
1272+
}
1273+
size_t elsz = layout->size;
1274+
char *src_p = srcdata;
1275+
int isbitsunion = layout->flags.arrayelem_isunion;
1276+
if (isbitsunion) {
1277+
char *sourcetypetagdata = jl_genericmemory_typetagdata(src);
1278+
char *desttypetagdata = jl_genericmemory_typetagdata(dest);
1279+
memmove(desttypetagdata+(size_t)destdata, sourcetypetagdata+(size_t)srcdata, n);
1280+
srcdata = (char*)src->ptr + elsz*(size_t)srcdata;
1281+
destdata = (char*)dest->ptr + elsz*(size_t)destdata;
1282+
}
1283+
if (layout->first_ptr != -1) {
1284+
memmove_refs((_Atomic(void*)*)destdata, (_Atomic(void*)*)srcdata, n * elsz / sizeof(void*));
1285+
jl_value_t *owner = jl_genericmemory_owner(dest);
1286+
if (__unlikely(jl_astaggedvalue(owner)->bits.gc == GC_OLD_MARKED)) {
1287+
jl_value_t *src_owner = jl_genericmemory_owner(src);
1288+
if (jl_astaggedvalue(src_owner)->bits.gc != GC_OLD_MARKED) {
1289+
dt = (jl_datatype_t*)jl_tparam1(dt);
1290+
for (size_t done = 0; done < n; done++) { // copy forwards
1291+
char* s = (char*)src_p+done*elsz;
1292+
if (*((jl_value_t**)s+layout->first_ptr) != NULL)
1293+
jl_gc_queue_multiroot(owner, s, dt);
1294+
}
1295+
}
1296+
}
1297+
}
1298+
else {
1299+
memmove(destdata, srcdata, n * elsz);
1300+
}
1301+
}
1302+
1303+
12131304
#ifdef __cplusplus
12141305
}
12151306
#endif

src/gc-stock.c

+5
Original file line numberDiff line numberDiff line change
@@ -4071,6 +4071,11 @@ void jl_gc_notify_image_load(const char* img_data, size_t len)
40714071
// Do nothing
40724072
}
40734073

4074+
void jl_gc_notify_image_alloc(const char* img_data, size_t len)
4075+
{
4076+
// Do nothing
4077+
}
4078+
40744079
JL_DLLEXPORT const char* jl_gc_active_impl(void) {
40754080
return "Built with stock GC";
40764081
}

src/gc-wb-mmtk.h

+39-1
Original file line numberDiff line numberDiff line change
@@ -11,19 +11,57 @@
1111
extern "C" {
1212
#endif
1313

14+
extern void mmtk_object_reference_write_post(void* mutator, const void* parent, const void* ptr);
15+
extern void mmtk_object_reference_write_slow(void* mutator, const void* parent, const void* ptr);
16+
17+
#define MMTK_OBJECT_BARRIER (1)
18+
// Stickyimmix needs write barrier. Immix does not need write barrier.
19+
#ifdef MMTK_PLAN_IMMIX
20+
#define MMTK_NEEDS_WRITE_BARRIER (0)
21+
#endif
22+
#ifdef MMTK_PLAN_STICKYIMMIX
23+
#define MMTK_NEEDS_WRITE_BARRIER (1)
24+
#endif
25+
1426
// GC write barriers
1527

16-
// TODO: implement these functions for MMTk
28+
// Directly call into MMTk for write barrier (debugging only)
29+
STATIC_INLINE void mmtk_gc_wb_full(const void *parent, const void *ptr) JL_NOTSAFEPOINT
30+
{
31+
jl_task_t *ct = jl_current_task;
32+
jl_ptls_t ptls = ct->ptls;
33+
mmtk_object_reference_write_post(&ptls->gc_tls.mmtk_mutator, parent, ptr);
34+
}
35+
36+
// Inlined fastpath
37+
STATIC_INLINE void mmtk_gc_wb_fast(const void *parent, const void *ptr) JL_NOTSAFEPOINT
38+
{
39+
if (MMTK_NEEDS_WRITE_BARRIER == MMTK_OBJECT_BARRIER) {
40+
intptr_t addr = (intptr_t) (void*) parent;
41+
uint8_t* meta_addr = (uint8_t*) (MMTK_SIDE_LOG_BIT_BASE_ADDRESS) + (addr >> 6);
42+
intptr_t shift = (addr >> 3) & 0b111;
43+
uint8_t byte_val = *meta_addr;
44+
if (((byte_val >> shift) & 1) == 1) {
45+
jl_task_t *ct = jl_current_task;
46+
jl_ptls_t ptls = ct->ptls;
47+
mmtk_object_reference_write_slow(&ptls->gc_tls.mmtk_mutator, parent, ptr);
48+
}
49+
}
50+
}
51+
1752
STATIC_INLINE void jl_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT
1853
{
54+
mmtk_gc_wb_fast(parent, ptr);
1955
}
2056

2157
STATIC_INLINE void jl_gc_wb_back(const void *ptr) JL_NOTSAFEPOINT // ptr isa jl_value_t*
2258
{
59+
mmtk_gc_wb_fast(ptr, (void*)0);
2360
}
2461

2562
STATIC_INLINE void jl_gc_multi_wb(const void *parent, const jl_value_t *ptr) JL_NOTSAFEPOINT
2663
{
64+
mmtk_gc_wb_fast(parent, (void*)0);
2765
}
2866

2967

src/julia.h

+1
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@
6666

6767
typedef struct _jl_taggedvalue_t jl_taggedvalue_t;
6868
typedef struct _jl_tls_states_t *jl_ptls_t;
69+
typedef struct _jl_genericmemory_t jl_genericmemory_t;
6970

7071
#ifdef JL_LIBRARY_EXPORTS
7172
#include "uv.h"

src/llvm-gc-interface-passes.h

+25
Original file line numberDiff line numberDiff line change
@@ -413,4 +413,29 @@ struct FinalLowerGC: private JuliaPassContext {
413413
void lowerSafepoint(CallInst *target, Function &F);
414414
};
415415

416+
// Enable this optimization only on LLVM 4.0+ since this cause LLVM to optimize
417+
// constant store loop to produce a `memset_pattern16` with a global variable
418+
// that's initialized by `addrspacecast`. Such a global variable is not supported by the backend.
419+
// This is not a problem on 4.0+ since that transformation (in loop-idiom) is disabled
420+
// for NI pointers.
421+
static SmallVector<int, 1> *FindRefinements(Value *V, State *S)
422+
{
423+
if (!S)
424+
return nullptr;
425+
auto it = S->AllPtrNumbering.find(V);
426+
if (it == S->AllPtrNumbering.end())
427+
return nullptr;
428+
auto rit = S->Refinements.find(it->second);
429+
return rit != S->Refinements.end() && !rit->second.empty() ? &rit->second : nullptr;
430+
}
431+
432+
inline bool IsPermRooted(Value *V, State *S)
433+
{
434+
if (isa<Constant>(V))
435+
return true;
436+
if (auto *RefinePtr = FindRefinements(V, S))
437+
return RefinePtr->size() == 1 && (*RefinePtr)[0] == -2;
438+
return false;
439+
}
440+
416441
#endif // LLVM_GC_PASSES_H

src/llvm-late-gc-lowering-mmtk.cpp

+75
Original file line numberDiff line numberDiff line change
@@ -94,3 +94,78 @@ Value* LateLowerGCFrame::lowerGCAllocBytesLate(CallInst *target, Function &F)
9494
}
9595
return target;
9696
}
97+
98+
void LateLowerGCFrame::CleanupWriteBarriers(Function &F, State *S, const SmallVector<CallInst*, 0> &WriteBarriers, bool *CFGModified) {
99+
auto T_size = F.getParent()->getDataLayout().getIntPtrType(F.getContext());
100+
for (auto CI : WriteBarriers) {
101+
auto parent = CI->getArgOperand(0);
102+
if (std::all_of(CI->op_begin() + 1, CI->op_end(),
103+
[parent, &S](Value *child) { return parent == child || IsPermRooted(child, S); })) {
104+
CI->eraseFromParent();
105+
continue;
106+
}
107+
if (CFGModified) {
108+
*CFGModified = true;
109+
}
110+
111+
IRBuilder<> builder(CI);
112+
builder.SetCurrentDebugLocation(CI->getDebugLoc());
113+
114+
// FIXME: Currently we call write barrier with the src object (parent).
115+
// This works fine for object barrier for generational plans (such as stickyimmix), which does not use the target object at all.
116+
// But for other MMTk plans, we need to be careful.
117+
const bool INLINE_WRITE_BARRIER = true;
118+
if (CI->getCalledOperand() == write_barrier_func) {
119+
if (MMTK_NEEDS_WRITE_BARRIER == MMTK_OBJECT_BARRIER) {
120+
if (INLINE_WRITE_BARRIER) {
121+
auto i8_ty = Type::getInt8Ty(F.getContext());
122+
auto intptr_ty = T_size;
123+
124+
// intptr_t addr = (intptr_t) (void*) src;
125+
// uint8_t* meta_addr = (uint8_t*) (SIDE_METADATA_BASE_ADDRESS + (addr >> 6));
126+
intptr_t metadata_base_address = reinterpret_cast<intptr_t>(MMTK_SIDE_LOG_BIT_BASE_ADDRESS);
127+
auto metadata_base_val = ConstantInt::get(intptr_ty, metadata_base_address);
128+
auto metadata_base_ptr = ConstantExpr::getIntToPtr(metadata_base_val, PointerType::get(i8_ty, 0));
129+
130+
auto parent_val = builder.CreatePtrToInt(parent, intptr_ty);
131+
auto shr = builder.CreateLShr(parent_val, ConstantInt::get(intptr_ty, 6));
132+
auto metadata_ptr = builder.CreateGEP(i8_ty, metadata_base_ptr, shr);
133+
134+
// intptr_t shift = (addr >> 3) & 0b111;
135+
auto shift = builder.CreateAnd(builder.CreateLShr(parent_val, ConstantInt::get(intptr_ty, 3)), ConstantInt::get(intptr_ty, 7));
136+
auto shift_i8 = builder.CreateTruncOrBitCast(shift, i8_ty);
137+
138+
// uint8_t byte_val = *meta_addr;
139+
auto load_i8 = builder.CreateAlignedLoad(i8_ty, metadata_ptr, Align());
140+
141+
// if (((byte_val >> shift) & 1) == 1) {
142+
auto shifted_load_i8 = builder.CreateLShr(load_i8, shift_i8);
143+
auto masked = builder.CreateAnd(shifted_load_i8, ConstantInt::get(i8_ty, 1));
144+
auto is_unlogged = builder.CreateICmpEQ(masked, ConstantInt::get(i8_ty, 1));
145+
146+
// object_reference_write_slow_call((void*) src, (void*) slot, (void*) target);
147+
MDBuilder MDB(F.getContext());
148+
SmallVector<uint32_t, 2> Weights{1, 9};
149+
if (S) {
150+
if (!S->DT) {
151+
S->DT = &GetDT();
152+
}
153+
DomTreeUpdater dtu = DomTreeUpdater(S->DT, llvm::DomTreeUpdater::UpdateStrategy::Lazy);
154+
auto mayTriggerSlowpath = SplitBlockAndInsertIfThen(is_unlogged, CI, false, MDB.createBranchWeights(Weights), &dtu);
155+
builder.SetInsertPoint(mayTriggerSlowpath);
156+
} else {
157+
auto mayTriggerSlowpath = SplitBlockAndInsertIfThen(is_unlogged, CI, false, MDB.createBranchWeights(Weights));
158+
builder.SetInsertPoint(mayTriggerSlowpath);
159+
}
160+
builder.CreateCall(getOrDeclare(jl_intrinsics::queueGCRoot), { parent });
161+
} else {
162+
Function *wb_func = getOrDeclare(jl_intrinsics::queueGCRoot);
163+
builder.CreateCall(wb_func, { parent });
164+
}
165+
}
166+
} else {
167+
assert(false);
168+
}
169+
CI->eraseFromParent();
170+
}
171+
}

src/llvm-late-gc-lowering-stock.cpp

+44
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,47 @@ Value* LateLowerGCFrame::lowerGCAllocBytesLate(CallInst *target, Function &F)
77
// Do nothing for the stock GC
88
return target;
99
}
10+
11+
void LateLowerGCFrame::CleanupWriteBarriers(Function &F, State *S, const SmallVector<CallInst*, 0> &WriteBarriers, bool *CFGModified) {
12+
auto T_size = F.getParent()->getDataLayout().getIntPtrType(F.getContext());
13+
for (auto CI : WriteBarriers) {
14+
auto parent = CI->getArgOperand(0);
15+
if (std::all_of(CI->op_begin() + 1, CI->op_end(),
16+
[parent, &S](Value *child) { return parent == child || IsPermRooted(child, S); })) {
17+
CI->eraseFromParent();
18+
continue;
19+
}
20+
if (CFGModified) {
21+
*CFGModified = true;
22+
}
23+
24+
IRBuilder<> builder(CI);
25+
builder.SetCurrentDebugLocation(CI->getDebugLoc());
26+
auto parBits = builder.CreateAnd(EmitLoadTag(builder, T_size, parent), GC_OLD_MARKED, "parent_bits");
27+
auto parOldMarked = builder.CreateICmpEQ(parBits, ConstantInt::get(T_size, GC_OLD_MARKED), "parent_old_marked");
28+
auto mayTrigTerm = SplitBlockAndInsertIfThen(parOldMarked, CI, false);
29+
builder.SetInsertPoint(mayTrigTerm);
30+
mayTrigTerm->getParent()->setName("may_trigger_wb");
31+
Value *anyChldNotMarked = NULL;
32+
for (unsigned i = 1; i < CI->arg_size(); i++) {
33+
Value *child = CI->getArgOperand(i);
34+
Value *chldBit = builder.CreateAnd(EmitLoadTag(builder, T_size, child), GC_MARKED, "child_bit");
35+
Value *chldNotMarked = builder.CreateICmpEQ(chldBit, ConstantInt::get(T_size, 0), "child_not_marked");
36+
anyChldNotMarked = anyChldNotMarked ? builder.CreateOr(anyChldNotMarked, chldNotMarked) : chldNotMarked;
37+
}
38+
assert(anyChldNotMarked); // handled by all_of test above
39+
MDBuilder MDB(parent->getContext());
40+
SmallVector<uint32_t, 2> Weights{1, 9};
41+
auto trigTerm = SplitBlockAndInsertIfThen(anyChldNotMarked, mayTrigTerm, false,
42+
MDB.createBranchWeights(Weights));
43+
trigTerm->getParent()->setName("trigger_wb");
44+
builder.SetInsertPoint(trigTerm);
45+
if (CI->getCalledOperand() == write_barrier_func) {
46+
builder.CreateCall(getOrDeclare(jl_intrinsics::queueGCRoot), parent);
47+
}
48+
else {
49+
assert(false);
50+
}
51+
CI->eraseFromParent();
52+
}
53+
}

0 commit comments

Comments
 (0)