Skip to content

Commit 133e48b

Browse files
committed
Refactoring passes that lower write barrier and allocation
1 parent 500c935 commit 133e48b

7 files changed

+241
-233
lines changed

src/Makefile

+2-2
Original file line numberDiff line numberDiff line change
@@ -72,9 +72,9 @@ ifeq ($(JULIACODEGEN),LLVM)
7272
GC_CODEGEN_SRCS := llvm-final-gc-lowering llvm-late-gc-lowering llvm-gc-invariant-verifier
7373
ifeq (${USE_THIRD_PARTY_GC},mmtk)
7474
FLAGS += -I$(MMTK_API_INC)
75-
GC_CODEGEN_SRCS += llvm-late-gc-lowering-mmtk
75+
GC_CODEGEN_SRCS += llvm-final-gc-lowering-mmtk
7676
else
77-
GC_CODEGEN_SRCS += llvm-late-gc-lowering-stock
77+
GC_CODEGEN_SRCS += llvm-final-gc-lowering-stock
7878
endif
7979
CODEGEN_SRCS := codegen jitlayers aotcompile debuginfo disasm llvm-simdloop \
8080
llvm-pass-helpers llvm-ptls llvm-propagate-addrspaces null_sysimage \

src/llvm-late-gc-lowering-mmtk.cpp src/llvm-final-gc-lowering-mmtk.cpp

+74-67
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,31 @@
22

33
#include "llvm-gc-interface-passes.h"
44

5-
Value* LateLowerGCFrame::lowerGCAllocBytesLate(CallInst *target, Function &F)
5+
#define DEBUG_TYPE "mmtk_final_gc_lowering"
6+
STATISTIC(GCAllocBytesCount, "Number of lowered GCAllocBytesFunc intrinsics");
7+
8+
Value* FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F)
69
{
7-
assert(target->arg_size() == 3);
10+
++GCAllocBytesCount;
11+
CallInst *newI;
812

913
IRBuilder<> builder(target);
1014
auto ptls = target->getArgOperand(0);
1115
auto type = target->getArgOperand(2);
16+
uint64_t derefBytes = 0;
1217
if (auto CI = dyn_cast<ConstantInt>(target->getArgOperand(1))) {
1318
size_t sz = (size_t)CI->getZExtValue();
1419
// This is strongly architecture and OS dependent
1520
int osize;
1621
int offset = jl_gc_classify_pools(sz, &osize);
17-
if (offset >= 0) {
22+
if (offset < 0) {
23+
newI = builder.CreateCall(
24+
bigAllocFunc,
25+
{ ptls, ConstantInt::get(T_size, sz + sizeof(void*)), type });
26+
if (sz > 0)
27+
derefBytes = sz;
28+
}
29+
else {
1830
// In this case instead of lowering julia.gc_alloc_bytes to jl_gc_small_alloc
1931
// We do a slowpath/fastpath check and lower it only on the slowpath, returning
2032
// the cursor and updating it in the fastpath.
@@ -91,81 +103,76 @@ Value* LateLowerGCFrame::lowerGCAllocBytesLate(CallInst *target, Function &F)
91103
return phiNode;
92104
}
93105
}
106+
} else {
107+
auto size = builder.CreateZExtOrTrunc(target->getArgOperand(1), T_size);
108+
// allocTypedFunc does not include the type tag in the allocation size!
109+
newI = builder.CreateCall(allocTypedFunc, { ptls, size, type });
110+
derefBytes = sizeof(void*);
94111
}
95-
return target;
112+
newI->setAttributes(newI->getCalledFunction()->getAttributes());
113+
unsigned align = std::max((unsigned)target->getRetAlign().valueOrOne().value(), (unsigned)sizeof(void*));
114+
newI->addRetAttr(Attribute::getWithAlignment(F.getContext(), Align(align)));
115+
if (derefBytes > 0)
116+
newI->addDereferenceableRetAttr(derefBytes);
117+
newI->takeName(target);
118+
return newI;
96119
}
97120

98-
void LateLowerGCFrame::CleanupWriteBarriers(Function &F, State *S, const SmallVector<CallInst*, 0> &WriteBarriers, bool *CFGModified) {
99-
auto T_size = F.getParent()->getDataLayout().getIntPtrType(F.getContext());
100-
for (auto CI : WriteBarriers) {
101-
auto parent = CI->getArgOperand(0);
102-
if (std::all_of(CI->op_begin() + 1, CI->op_end(),
103-
[parent, &S](Value *child) { return parent == child || IsPermRooted(child, S); })) {
104-
CI->eraseFromParent();
105-
continue;
106-
}
107-
if (CFGModified) {
108-
*CFGModified = true;
121+
122+
void FinalLowerGC::lowerWriteBarrier(CallInst *target, Function &F) {
123+
State S(F);
124+
auto parent = target->getArgOperand(0);
125+
if (std::all_of(target->op_begin() + 1, target->op_end(),
126+
[parent, &S](Value *child) { return parent == child || IsPermRooted(child, &S); })) {
127+
return;
109128
}
110129

111-
IRBuilder<> builder(CI);
112-
builder.SetCurrentDebugLocation(CI->getDebugLoc());
130+
IRBuilder<> builder(target);
131+
builder.SetCurrentDebugLocation(target->getDebugLoc());
113132

114133
// FIXME: Currently we call write barrier with the src object (parent).
115134
// This works fine for object barrier for generational plans (such as stickyimmix), which does not use the target object at all.
116135
// But for other MMTk plans, we need to be careful.
117136
const bool INLINE_WRITE_BARRIER = true;
118-
if (CI->getCalledOperand() == write_barrier_func) {
119-
if (MMTK_NEEDS_WRITE_BARRIER == MMTK_OBJECT_BARRIER) {
120-
if (INLINE_WRITE_BARRIER) {
121-
auto i8_ty = Type::getInt8Ty(F.getContext());
122-
auto intptr_ty = T_size;
123-
124-
// intptr_t addr = (intptr_t) (void*) src;
125-
// uint8_t* meta_addr = (uint8_t*) (SIDE_METADATA_BASE_ADDRESS + (addr >> 6));
126-
intptr_t metadata_base_address = reinterpret_cast<intptr_t>(MMTK_SIDE_LOG_BIT_BASE_ADDRESS);
127-
auto metadata_base_val = ConstantInt::get(intptr_ty, metadata_base_address);
128-
auto metadata_base_ptr = ConstantExpr::getIntToPtr(metadata_base_val, PointerType::get(i8_ty, 0));
129-
130-
auto parent_val = builder.CreatePtrToInt(parent, intptr_ty);
131-
auto shr = builder.CreateLShr(parent_val, ConstantInt::get(intptr_ty, 6));
132-
auto metadata_ptr = builder.CreateGEP(i8_ty, metadata_base_ptr, shr);
133-
134-
// intptr_t shift = (addr >> 3) & 0b111;
135-
auto shift = builder.CreateAnd(builder.CreateLShr(parent_val, ConstantInt::get(intptr_ty, 3)), ConstantInt::get(intptr_ty, 7));
136-
auto shift_i8 = builder.CreateTruncOrBitCast(shift, i8_ty);
137-
138-
// uint8_t byte_val = *meta_addr;
139-
auto load_i8 = builder.CreateAlignedLoad(i8_ty, metadata_ptr, Align());
140-
141-
// if (((byte_val >> shift) & 1) == 1) {
142-
auto shifted_load_i8 = builder.CreateLShr(load_i8, shift_i8);
143-
auto masked = builder.CreateAnd(shifted_load_i8, ConstantInt::get(i8_ty, 1));
144-
auto is_unlogged = builder.CreateICmpEQ(masked, ConstantInt::get(i8_ty, 1));
145-
146-
// object_reference_write_slow_call((void*) src, (void*) slot, (void*) target);
147-
MDBuilder MDB(F.getContext());
148-
SmallVector<uint32_t, 2> Weights{1, 9};
149-
if (S) {
150-
if (!S->DT) {
151-
S->DT = &GetDT();
152-
}
153-
DomTreeUpdater dtu = DomTreeUpdater(S->DT, llvm::DomTreeUpdater::UpdateStrategy::Lazy);
154-
auto mayTriggerSlowpath = SplitBlockAndInsertIfThen(is_unlogged, CI, false, MDB.createBranchWeights(Weights), &dtu);
155-
builder.SetInsertPoint(mayTriggerSlowpath);
156-
} else {
157-
auto mayTriggerSlowpath = SplitBlockAndInsertIfThen(is_unlogged, CI, false, MDB.createBranchWeights(Weights));
158-
builder.SetInsertPoint(mayTriggerSlowpath);
159-
}
160-
builder.CreateCall(getOrDeclare(jl_intrinsics::queueGCRoot), { parent });
161-
} else {
162-
Function *wb_func = getOrDeclare(jl_intrinsics::queueGCRoot);
163-
builder.CreateCall(wb_func, { parent });
164-
}
137+
if (MMTK_NEEDS_WRITE_BARRIER == MMTK_OBJECT_BARRIER) {
138+
if (INLINE_WRITE_BARRIER) {
139+
auto i8_ty = Type::getInt8Ty(F.getContext());
140+
auto intptr_ty = T_size;
141+
142+
// intptr_t addr = (intptr_t) (void*) src;
143+
// uint8_t* meta_addr = (uint8_t*) (SIDE_METADATA_BASE_ADDRESS + (addr >> 6));
144+
intptr_t metadata_base_address = reinterpret_cast<intptr_t>(MMTK_SIDE_LOG_BIT_BASE_ADDRESS);
145+
auto metadata_base_val = ConstantInt::get(intptr_ty, metadata_base_address);
146+
auto metadata_base_ptr = ConstantExpr::getIntToPtr(metadata_base_val, PointerType::get(i8_ty, 0));
147+
148+
auto parent_val = builder.CreatePtrToInt(parent, intptr_ty);
149+
auto shr = builder.CreateLShr(parent_val, ConstantInt::get(intptr_ty, 6));
150+
auto metadata_ptr = builder.CreateGEP(i8_ty, metadata_base_ptr, shr);
151+
152+
// intptr_t shift = (addr >> 3) & 0b111;
153+
auto shift = builder.CreateAnd(builder.CreateLShr(parent_val, ConstantInt::get(intptr_ty, 3)), ConstantInt::get(intptr_ty, 7));
154+
auto shift_i8 = builder.CreateTruncOrBitCast(shift, i8_ty);
155+
156+
// uint8_t byte_val = *meta_addr;
157+
auto load_i8 = builder.CreateAlignedLoad(i8_ty, metadata_ptr, Align());
158+
159+
// if (((byte_val >> shift) & 1) == 1) {
160+
auto shifted_load_i8 = builder.CreateLShr(load_i8, shift_i8);
161+
auto masked = builder.CreateAnd(shifted_load_i8, ConstantInt::get(i8_ty, 1));
162+
auto is_unlogged = builder.CreateICmpEQ(masked, ConstantInt::get(i8_ty, 1));
163+
164+
// object_reference_write_slow_call((void*) src, (void*) slot, (void*) target);
165+
MDBuilder MDB(F.getContext());
166+
SmallVector<uint32_t, 2> Weights{1, 9};
167+
168+
auto mayTriggerSlowpath = SplitBlockAndInsertIfThen(is_unlogged, target, false, MDB.createBranchWeights(Weights));
169+
builder.SetInsertPoint(mayTriggerSlowpath);
170+
builder.CreateCall(getOrDeclare(jl_intrinsics::queueGCRoot), { parent });
171+
} else {
172+
Function *wb_func = getOrDeclare(jl_intrinsics::queueGCRoot);
173+
builder.CreateCall(wb_func, { parent });
165174
}
166175
} else {
167-
assert(false);
176+
// Using a plan that does not need write barriers
168177
}
169-
CI->eraseFromParent();
170-
}
171178
}

src/llvm-final-gc-lowering-stock.cpp

+86
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
// This file is a part of Julia. License is MIT: https://julialang.org/license
2+
3+
#include "llvm-gc-interface-passes.h"
4+
5+
#define DEBUG_TYPE "stock_final_gc_lowering"
6+
STATISTIC(GCAllocBytesCount, "Number of lowered GCAllocBytesFunc intrinsics");
7+
8+
Value* FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F)
9+
{
10+
++GCAllocBytesCount;
11+
CallInst *newI;
12+
13+
IRBuilder<> builder(target);
14+
auto ptls = target->getArgOperand(0);
15+
auto type = target->getArgOperand(2);
16+
uint64_t derefBytes = 0;
17+
if (auto CI = dyn_cast<ConstantInt>(target->getArgOperand(1))) {
18+
size_t sz = (size_t)CI->getZExtValue();
19+
// This is strongly architecture and OS dependent
20+
int osize;
21+
int offset = jl_gc_classify_pools(sz, &osize);
22+
if (offset < 0) {
23+
newI = builder.CreateCall(
24+
bigAllocFunc,
25+
{ ptls, ConstantInt::get(T_size, sz + sizeof(void*)), type });
26+
if (sz > 0)
27+
derefBytes = sz;
28+
}
29+
else {
30+
auto pool_offs = ConstantInt::get(Type::getInt32Ty(F.getContext()), offset);
31+
auto pool_osize = ConstantInt::get(Type::getInt32Ty(F.getContext()), osize);
32+
newI = builder.CreateCall(smallAllocFunc, { ptls, pool_offs, pool_osize, type });
33+
if (sz > 0)
34+
derefBytes = sz;
35+
}
36+
} else {
37+
auto size = builder.CreateZExtOrTrunc(target->getArgOperand(1), T_size);
38+
// allocTypedFunc does not include the type tag in the allocation size!
39+
newI = builder.CreateCall(allocTypedFunc, { ptls, size, type });
40+
derefBytes = sizeof(void*);
41+
}
42+
newI->setAttributes(newI->getCalledFunction()->getAttributes());
43+
unsigned align = std::max((unsigned)target->getRetAlign().valueOrOne().value(), (unsigned)sizeof(void*));
44+
newI->addRetAttr(Attribute::getWithAlignment(F.getContext(), Align(align)));
45+
if (derefBytes > 0)
46+
newI->addDereferenceableRetAttr(derefBytes);
47+
newI->takeName(target);
48+
return newI;
49+
}
50+
51+
void FinalLowerGC::lowerWriteBarrier(CallInst *target, Function &F) {
52+
State S(F);
53+
auto parent = target->getArgOperand(0);
54+
if (std::all_of(target->op_begin() + 1, target->op_end(),
55+
[parent, &S](Value *child) { return parent == child || IsPermRooted(child, &S); })) {
56+
return;
57+
}
58+
59+
IRBuilder<> builder(target);
60+
builder.SetCurrentDebugLocation(target->getDebugLoc());
61+
auto parBits = builder.CreateAnd(EmitLoadTag(builder, T_size, parent, tbaa_tag), GC_OLD_MARKED, "parent_bits");
62+
auto parOldMarked = builder.CreateICmpEQ(parBits, ConstantInt::get(T_size, GC_OLD_MARKED), "parent_old_marked");
63+
auto mayTrigTerm = SplitBlockAndInsertIfThen(parOldMarked, target, false);
64+
builder.SetInsertPoint(mayTrigTerm);
65+
mayTrigTerm->getParent()->setName("may_trigger_wb");
66+
Value *anyChldNotMarked = NULL;
67+
for (unsigned i = 1; i < target->arg_size(); i++) {
68+
Value *child = target->getArgOperand(i);
69+
Value *chldBit = builder.CreateAnd(EmitLoadTag(builder, T_size, child, tbaa_tag), GC_MARKED, "child_bit");
70+
Value *chldNotMarked = builder.CreateICmpEQ(chldBit, ConstantInt::get(T_size, 0), "child_not_marked");
71+
anyChldNotMarked = anyChldNotMarked ? builder.CreateOr(anyChldNotMarked, chldNotMarked) : chldNotMarked;
72+
}
73+
assert(anyChldNotMarked); // handled by all_of test above
74+
MDBuilder MDB(parent->getContext());
75+
SmallVector<uint32_t, 2> Weights{1, 9};
76+
auto trigTerm = SplitBlockAndInsertIfThen(anyChldNotMarked, mayTrigTerm, false,
77+
MDB.createBranchWeights(Weights));
78+
trigTerm->getParent()->setName("trigger_wb");
79+
builder.SetInsertPoint(trigTerm);
80+
if (target->getCalledOperand() == write_barrier_func) {
81+
builder.CreateCall(getOrDeclare(jl_intrinsics::queueGCRoot), parent);
82+
}
83+
else {
84+
assert(false);
85+
}
86+
}

0 commit comments

Comments
 (0)