Skip to content

Commit 6eec92d

Browse files
committed
Apply refactorings
1 parent 2daf212 commit 6eec92d

8 files changed

+249
-75
lines changed

src/gc-interface.h

+4-2
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,10 @@ struct _jl_value_t *jl_gc_permobj(size_t sz, void *ty, unsigned align) JL_NOTSAF
214214
// The GC may use that information to, for instance, determine that such objects should
215215
// be treated as marked and belonged to the old generation in nursery collections.
216216
void jl_gc_notify_image_load(const char* img_data, size_t len);
217+
// This function notifies the GC about memory addresses that are set when allocating the boot image.
218+
// The GC may use that information to, for instance, determine that all objects in that chunk of memory should
219+
// be treated as marked and belonged to the old generation in nursery collections.
220+
void jl_gc_notify_image_alloc(const char* img_data, size_t len);
217221

218222
// ========================================================================= //
219223
// Runtime Write-Barriers
@@ -252,13 +256,11 @@ STATIC_INLINE void jl_gc_wb_knownold(const void *parent JL_UNUSED, const void *p
252256
// per field of the object being copied, but may be special-cased for performance reasons.
253257
STATIC_INLINE void jl_gc_multi_wb(const void *parent,
254258
const struct _jl_value_t *ptr) JL_NOTSAFEPOINT;
255-
256259
// Write-barrier function that must be used after copying fields of elements of genericmemory objects
257260
// into another. It should be semantically equivalent to triggering multiple write barriers – one
258261
// per field of the object being copied, but may be special-cased for performance reasons.
259262
STATIC_INLINE void jl_gc_wb_genericmemory_copy_ptr(const struct _jl_value_t *owner, struct _jl_genericmemory_t *src, char* src_p,
260263
size_t n, struct _jl_datatype_t *dt) JL_NOTSAFEPOINT;
261-
262264
// Similar to jl_gc_wb_genericmemory_copy but must be used when copying *boxed* elements of a genericmemory
263265
// object. Note that this barrier also performs the copying unlike jl_gc_wb_genericmemory_copy_ptr.
264266
// The parameters src_p, dest_p and n will be modified and will contain information about

src/gc-mmtk.c

+95-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include "gc-common.h"
22
#include "gc-tls-mmtk.h"
3+
#include "gc-wb-mmtk.h"
34
#include "mmtkMutator.h"
45
#include "threading.h"
56

@@ -861,10 +862,22 @@ STATIC_INLINE void* mmtk_immortal_alloc_fast(MMTkMutatorContext* mutator, size_t
861862
return bump_alloc_fast(mutator, (uintptr_t*)&allocator->cursor, (uintptr_t)allocator->limit, size, align, offset, 1);
862863
}
863864

865+
inline void mmtk_set_side_metadata(const void* side_metadata_base, void* obj) {
866+
intptr_t addr = (intptr_t) obj;
867+
uint8_t* meta_addr = (uint8_t*) side_metadata_base + (addr >> 6);
868+
intptr_t shift = (addr >> 3) & 0b111;
869+
while(1) {
870+
uint8_t old_val = *meta_addr;
871+
uint8_t new_val = old_val | (1 << shift);
872+
if (jl_atomic_cmpswap((_Atomic(uint8_t)*)meta_addr, &old_val, new_val)) {
873+
break;
874+
}
875+
}
876+
}
877+
864878
STATIC_INLINE void mmtk_immortal_post_alloc_fast(MMTkMutatorContext* mutator, void* obj, size_t size) {
865-
// FIXME: Similarly, for now, we do nothing
866-
// but when supporting moving, this is where we set the valid object (VO) bit
867-
// and log (old gen) bit
879+
if (MMTK_NEEDS_WRITE_BARRIER == MMTK_OBJECT_BARRIER) {
880+
mmtk_set_side_metadata(MMTK_SIDE_LOG_BIT_BASE_ADDRESS, obj);
868881
}
869882

870883
JL_DLLEXPORT jl_value_t *jl_mmtk_gc_alloc_default(jl_ptls_t ptls, int osize, size_t align, void *ty)
@@ -1128,7 +1141,9 @@ _Atomic(int) gc_stack_free_idx = 0;
11281141

11291142
JL_DLLEXPORT void jl_gc_queue_root(const struct _jl_value_t *ptr) JL_NOTSAFEPOINT
11301143
{
1131-
mmtk_unreachable();
1144+
jl_task_t *ct = jl_current_task;
1145+
jl_ptls_t ptls = ct->ptls;
1146+
mmtk_object_reference_write_slow(&ptls->gc_tls.mmtk_mutator, parent, (const void*) 0);
11321147
}
11331148

11341149
JL_DLLEXPORT void jl_gc_queue_multiroot(const struct _jl_value_t *root, const void *stored,
@@ -1210,6 +1225,82 @@ JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p)
12101225
return NULL;
12111226
}
12121227

1228+
JL_DLLEXPORT void jl_genericmemory_copyto(jl_genericmemory_t *dest, char* destdata,
1229+
jl_genericmemory_t *src, char* srcdata,
1230+
size_t n) JL_NOTSAFEPOINT
1231+
{
1232+
jl_datatype_t *dt = (jl_datatype_t*)jl_typetagof(dest);
1233+
if (dt != (jl_datatype_t*)jl_typetagof(src))
1234+
jl_exceptionf(jl_argumenterror_type, "jl_genericmemory_copyto requires source and dest to have same type");
1235+
const jl_datatype_layout_t *layout = dt->layout;
1236+
if (layout->flags.arrayelem_isboxed) {
1237+
_Atomic(void*) * dest_p = (_Atomic(void*)*)destdata;
1238+
_Atomic(void*) * src_p = (_Atomic(void*)*)srcdata;
1239+
jl_value_t *owner = jl_genericmemory_owner(dest);
1240+
jl_gc_wb(owner, NULL);
1241+
if (__unlikely(jl_astaggedvalue(owner)->bits.gc == GC_OLD_MARKED)) {
1242+
jl_value_t *src_owner = jl_genericmemory_owner(src);
1243+
ssize_t done = 0;
1244+
if (jl_astaggedvalue(src_owner)->bits.gc != GC_OLD_MARKED) {
1245+
if (dest_p < src_p || dest_p > src_p + n) {
1246+
for (; done < n; done++) { // copy forwards
1247+
void *val = jl_atomic_load_relaxed(src_p + done);
1248+
jl_atomic_store_release(dest_p + done, val);
1249+
// `val` is young or old-unmarked
1250+
if (val && !(jl_astaggedvalue(val)->bits.gc & GC_MARKED)) {
1251+
jl_gc_queue_root(owner);
1252+
break;
1253+
}
1254+
}
1255+
src_p += done;
1256+
dest_p += done;
1257+
} else {
1258+
for (; done < n; done++) { // copy backwards
1259+
void *val = jl_atomic_load_relaxed(src_p + n - done - 1);
1260+
jl_atomic_store_release(dest_p + n - done - 1, val);
1261+
// `val` is young or old-unmarked
1262+
if (val && !(jl_astaggedvalue(val)->bits.gc & GC_MARKED)) {
1263+
jl_gc_queue_root(owner);
1264+
break;
1265+
}
1266+
}
1267+
}
1268+
n -= done;
1269+
}
1270+
}
1271+
return memmove_refs(dest_p, src_p, n);
1272+
}
1273+
size_t elsz = layout->size;
1274+
char *src_p = srcdata;
1275+
int isbitsunion = layout->flags.arrayelem_isunion;
1276+
if (isbitsunion) {
1277+
char *sourcetypetagdata = jl_genericmemory_typetagdata(src);
1278+
char *desttypetagdata = jl_genericmemory_typetagdata(dest);
1279+
memmove(desttypetagdata+(size_t)destdata, sourcetypetagdata+(size_t)srcdata, n);
1280+
srcdata = (char*)src->ptr + elsz*(size_t)srcdata;
1281+
destdata = (char*)dest->ptr + elsz*(size_t)destdata;
1282+
}
1283+
if (layout->first_ptr != -1) {
1284+
memmove_refs((_Atomic(void*)*)destdata, (_Atomic(void*)*)srcdata, n * elsz / sizeof(void*));
1285+
jl_value_t *owner = jl_genericmemory_owner(dest);
1286+
if (__unlikely(jl_astaggedvalue(owner)->bits.gc == GC_OLD_MARKED)) {
1287+
jl_value_t *src_owner = jl_genericmemory_owner(src);
1288+
if (jl_astaggedvalue(src_owner)->bits.gc != GC_OLD_MARKED) {
1289+
dt = (jl_datatype_t*)jl_tparam1(dt);
1290+
for (size_t done = 0; done < n; done++) { // copy forwards
1291+
char* s = (char*)src_p+done*elsz;
1292+
if (*((jl_value_t**)s+layout->first_ptr) != NULL)
1293+
jl_gc_queue_multiroot(owner, s, dt);
1294+
}
1295+
}
1296+
}
1297+
}
1298+
else {
1299+
memmove(destdata, srcdata, n * elsz);
1300+
}
1301+
}
1302+
1303+
12131304
#ifdef __cplusplus
12141305
}
12151306
#endif

src/gc-stock.c

+5
Original file line numberDiff line numberDiff line change
@@ -4074,6 +4074,11 @@ void jl_gc_notify_image_load(const char* img_data, size_t len)
40744074
// Do nothing
40754075
}
40764076

4077+
void jl_gc_notify_image_alloc(const char* img_data, size_t len)
4078+
{
4079+
// Do nothing
4080+
}
4081+
40774082
JL_DLLEXPORT const char* jl_gc_active_impl(void) {
40784083
return "Built with stock GC";
40794084
}

src/julia.h

+1
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@
6666

6767
typedef struct _jl_taggedvalue_t jl_taggedvalue_t;
6868
typedef struct _jl_tls_states_t *jl_ptls_t;
69+
typedef struct _jl_genericmemory_t jl_genericmemory_t;
6970

7071
#ifdef JL_LIBRARY_EXPORTS
7172
#include "uv.h"

src/llvm-gc-interface-passes.h

+25
Original file line numberDiff line numberDiff line change
@@ -417,4 +417,29 @@ struct FinalLowerGC: private JuliaPassContext {
417417
bool shouldRunFinalGC();
418418
};
419419

420+
// Enable this optimization only on LLVM 4.0+ since this cause LLVM to optimize
421+
// constant store loop to produce a `memset_pattern16` with a global variable
422+
// that's initialized by `addrspacecast`. Such a global variable is not supported by the backend.
423+
// This is not a problem on 4.0+ since that transformation (in loop-idiom) is disabled
424+
// for NI pointers.
425+
static SmallVector<int, 1> *FindRefinements(Value *V, State *S)
426+
{
427+
if (!S)
428+
return nullptr;
429+
auto it = S->AllPtrNumbering.find(V);
430+
if (it == S->AllPtrNumbering.end())
431+
return nullptr;
432+
auto rit = S->Refinements.find(it->second);
433+
return rit != S->Refinements.end() && !rit->second.empty() ? &rit->second : nullptr;
434+
}
435+
436+
inline bool IsPermRooted(Value *V, State *S)
437+
{
438+
if (isa<Constant>(V))
439+
return true;
440+
if (auto *RefinePtr = FindRefinements(V, S))
441+
return RefinePtr->size() == 1 && (*RefinePtr)[0] == -2;
442+
return false;
443+
}
444+
420445
#endif // LLVM_GC_PASSES_H

src/llvm-late-gc-lowering-mmtk.cpp

+75
Original file line numberDiff line numberDiff line change
@@ -94,3 +94,78 @@ Value* LateLowerGCFrame::lowerGCAllocBytesLate(CallInst *target, Function &F)
9494
}
9595
return target;
9696
}
97+
98+
void LateLowerGCFrame::CleanupWriteBarriers(Function &F, State *S, const SmallVector<CallInst*, 0> &WriteBarriers, bool *CFGModified) {
99+
auto T_size = F.getParent()->getDataLayout().getIntPtrType(F.getContext());
100+
for (auto CI : WriteBarriers) {
101+
auto parent = CI->getArgOperand(0);
102+
if (std::all_of(CI->op_begin() + 1, CI->op_end(),
103+
[parent, &S](Value *child) { return parent == child || IsPermRooted(child, S); })) {
104+
CI->eraseFromParent();
105+
continue;
106+
}
107+
if (CFGModified) {
108+
*CFGModified = true;
109+
}
110+
111+
IRBuilder<> builder(CI);
112+
builder.SetCurrentDebugLocation(CI->getDebugLoc());
113+
114+
// FIXME: Currently we call write barrier with the src object (parent).
115+
// This works fine for object barrier for generational plans (such as stickyimmix), which does not use the target object at all.
116+
// But for other MMTk plans, we need to be careful.
117+
const bool INLINE_WRITE_BARRIER = true;
118+
if (CI->getCalledOperand() == write_barrier_func) {
119+
if (MMTK_NEEDS_WRITE_BARRIER == MMTK_OBJECT_BARRIER) {
120+
if (INLINE_WRITE_BARRIER) {
121+
auto i8_ty = Type::getInt8Ty(F.getContext());
122+
auto intptr_ty = T_size;
123+
124+
// intptr_t addr = (intptr_t) (void*) src;
125+
// uint8_t* meta_addr = (uint8_t*) (SIDE_METADATA_BASE_ADDRESS + (addr >> 6));
126+
intptr_t metadata_base_address = reinterpret_cast<intptr_t>(MMTK_SIDE_LOG_BIT_BASE_ADDRESS);
127+
auto metadata_base_val = ConstantInt::get(intptr_ty, metadata_base_address);
128+
auto metadata_base_ptr = ConstantExpr::getIntToPtr(metadata_base_val, PointerType::get(i8_ty, 0));
129+
130+
auto parent_val = builder.CreatePtrToInt(parent, intptr_ty);
131+
auto shr = builder.CreateLShr(parent_val, ConstantInt::get(intptr_ty, 6));
132+
auto metadata_ptr = builder.CreateGEP(i8_ty, metadata_base_ptr, shr);
133+
134+
// intptr_t shift = (addr >> 3) & 0b111;
135+
auto shift = builder.CreateAnd(builder.CreateLShr(parent_val, ConstantInt::get(intptr_ty, 3)), ConstantInt::get(intptr_ty, 7));
136+
auto shift_i8 = builder.CreateTruncOrBitCast(shift, i8_ty);
137+
138+
// uint8_t byte_val = *meta_addr;
139+
auto load_i8 = builder.CreateAlignedLoad(i8_ty, metadata_ptr, Align());
140+
141+
// if (((byte_val >> shift) & 1) == 1) {
142+
auto shifted_load_i8 = builder.CreateLShr(load_i8, shift_i8);
143+
auto masked = builder.CreateAnd(shifted_load_i8, ConstantInt::get(i8_ty, 1));
144+
auto is_unlogged = builder.CreateICmpEQ(masked, ConstantInt::get(i8_ty, 1));
145+
146+
// object_reference_write_slow_call((void*) src, (void*) slot, (void*) target);
147+
MDBuilder MDB(F.getContext());
148+
SmallVector<uint32_t, 2> Weights{1, 9};
149+
if (S) {
150+
if (!S->DT) {
151+
S->DT = &GetDT();
152+
}
153+
DomTreeUpdater dtu = DomTreeUpdater(S->DT, llvm::DomTreeUpdater::UpdateStrategy::Lazy);
154+
auto mayTriggerSlowpath = SplitBlockAndInsertIfThen(is_unlogged, CI, false, MDB.createBranchWeights(Weights), &dtu);
155+
builder.SetInsertPoint(mayTriggerSlowpath);
156+
} else {
157+
auto mayTriggerSlowpath = SplitBlockAndInsertIfThen(is_unlogged, CI, false, MDB.createBranchWeights(Weights));
158+
builder.SetInsertPoint(mayTriggerSlowpath);
159+
}
160+
builder.CreateCall(getOrDeclare(jl_intrinsics::queueGCRoot), { parent });
161+
} else {
162+
Function *wb_func = getOrDeclare(jl_intrinsics::queueGCRoot);
163+
builder.CreateCall(wb_func, { parent });
164+
}
165+
}
166+
} else {
167+
assert(false);
168+
}
169+
CI->eraseFromParent();
170+
}
171+
}

src/llvm-late-gc-lowering-stock.cpp

+44
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,47 @@ Value* LateLowerGCFrame::lowerGCAllocBytesLate(CallInst *target, Function &F)
77
// Do nothing for the stock GC
88
return target;
99
}
10+
11+
void LateLowerGCFrame::CleanupWriteBarriers(Function &F, State *S, const SmallVector<CallInst*, 0> &WriteBarriers, bool *CFGModified) {
12+
auto T_size = F.getParent()->getDataLayout().getIntPtrType(F.getContext());
13+
for (auto CI : WriteBarriers) {
14+
auto parent = CI->getArgOperand(0);
15+
if (std::all_of(CI->op_begin() + 1, CI->op_end(),
16+
[parent, &S](Value *child) { return parent == child || IsPermRooted(child, S); })) {
17+
CI->eraseFromParent();
18+
continue;
19+
}
20+
if (CFGModified) {
21+
*CFGModified = true;
22+
}
23+
24+
IRBuilder<> builder(CI);
25+
builder.SetCurrentDebugLocation(CI->getDebugLoc());
26+
auto parBits = builder.CreateAnd(EmitLoadTag(builder, T_size, parent), GC_OLD_MARKED, "parent_bits");
27+
auto parOldMarked = builder.CreateICmpEQ(parBits, ConstantInt::get(T_size, GC_OLD_MARKED), "parent_old_marked");
28+
auto mayTrigTerm = SplitBlockAndInsertIfThen(parOldMarked, CI, false);
29+
builder.SetInsertPoint(mayTrigTerm);
30+
mayTrigTerm->getParent()->setName("may_trigger_wb");
31+
Value *anyChldNotMarked = NULL;
32+
for (unsigned i = 1; i < CI->arg_size(); i++) {
33+
Value *child = CI->getArgOperand(i);
34+
Value *chldBit = builder.CreateAnd(EmitLoadTag(builder, T_size, child), GC_MARKED, "child_bit");
35+
Value *chldNotMarked = builder.CreateICmpEQ(chldBit, ConstantInt::get(T_size, 0), "child_not_marked");
36+
anyChldNotMarked = anyChldNotMarked ? builder.CreateOr(anyChldNotMarked, chldNotMarked) : chldNotMarked;
37+
}
38+
assert(anyChldNotMarked); // handled by all_of test above
39+
MDBuilder MDB(parent->getContext());
40+
SmallVector<uint32_t, 2> Weights{1, 9};
41+
auto trigTerm = SplitBlockAndInsertIfThen(anyChldNotMarked, mayTrigTerm, false,
42+
MDB.createBranchWeights(Weights));
43+
trigTerm->getParent()->setName("trigger_wb");
44+
builder.SetInsertPoint(trigTerm);
45+
if (CI->getCalledOperand() == write_barrier_func) {
46+
builder.CreateCall(getOrDeclare(jl_intrinsics::queueGCRoot), parent);
47+
}
48+
else {
49+
assert(false);
50+
}
51+
CI->eraseFromParent();
52+
}
53+
}

0 commit comments

Comments
 (0)