|
2 | 2 |
|
3 | 3 | #include "llvm-gc-interface-passes.h"
|
4 | 4 |
|
5 |
| -Value* LateLowerGCFrame::lowerGCAllocBytesLate(CallInst *target, Function &F) |
| 5 | +#define DEBUG_TYPE "mmtk_final_gc_lowering" |
| 6 | +STATISTIC(GCAllocBytesCount, "Number of lowered GCAllocBytesFunc intrinsics"); |
| 7 | + |
| 8 | +Value* FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F) |
6 | 9 | {
|
7 |
| - assert(target->arg_size() == 3); |
| 10 | + ++GCAllocBytesCount; |
| 11 | + CallInst *newI; |
8 | 12 |
|
9 | 13 | IRBuilder<> builder(target);
|
10 | 14 | auto ptls = target->getArgOperand(0);
|
11 | 15 | auto type = target->getArgOperand(2);
|
| 16 | + uint64_t derefBytes = 0; |
12 | 17 | if (auto CI = dyn_cast<ConstantInt>(target->getArgOperand(1))) {
|
13 | 18 | size_t sz = (size_t)CI->getZExtValue();
|
14 | 19 | // This is strongly architecture and OS dependent
|
15 | 20 | int osize;
|
16 | 21 | int offset = jl_gc_classify_pools(sz, &osize);
|
17 |
| - if (offset >= 0) { |
| 22 | + if (offset < 0) { |
| 23 | + newI = builder.CreateCall( |
| 24 | + bigAllocFunc, |
| 25 | + { ptls, ConstantInt::get(T_size, sz + sizeof(void*)), type }); |
| 26 | + if (sz > 0) |
| 27 | + derefBytes = sz; |
| 28 | + } |
| 29 | + else { |
18 | 30 | // In this case instead of lowering julia.gc_alloc_bytes to jl_gc_small_alloc
|
19 | 31 | // We do a slowpath/fastpath check and lower it only on the slowpath, returning
|
20 | 32 | // the cursor and updating it in the fastpath.
|
@@ -91,81 +103,76 @@ Value* LateLowerGCFrame::lowerGCAllocBytesLate(CallInst *target, Function &F)
|
91 | 103 | return phiNode;
|
92 | 104 | }
|
93 | 105 | }
|
| 106 | + } else { |
| 107 | + auto size = builder.CreateZExtOrTrunc(target->getArgOperand(1), T_size); |
| 108 | + // allocTypedFunc does not include the type tag in the allocation size! |
| 109 | + newI = builder.CreateCall(allocTypedFunc, { ptls, size, type }); |
| 110 | + derefBytes = sizeof(void*); |
94 | 111 | }
|
95 |
| - return target; |
| 112 | + newI->setAttributes(newI->getCalledFunction()->getAttributes()); |
| 113 | + unsigned align = std::max((unsigned)target->getRetAlign().valueOrOne().value(), (unsigned)sizeof(void*)); |
| 114 | + newI->addRetAttr(Attribute::getWithAlignment(F.getContext(), Align(align))); |
| 115 | + if (derefBytes > 0) |
| 116 | + newI->addDereferenceableRetAttr(derefBytes); |
| 117 | + newI->takeName(target); |
| 118 | + return newI; |
96 | 119 | }
|
97 | 120 |
|
98 |
| -void LateLowerGCFrame::CleanupWriteBarriers(Function &F, State *S, const SmallVector<CallInst*, 0> &WriteBarriers, bool *CFGModified) { |
99 |
| - auto T_size = F.getParent()->getDataLayout().getIntPtrType(F.getContext()); |
100 |
| - for (auto CI : WriteBarriers) { |
101 |
| - auto parent = CI->getArgOperand(0); |
102 |
| - if (std::all_of(CI->op_begin() + 1, CI->op_end(), |
103 |
| - [parent, &S](Value *child) { return parent == child || IsPermRooted(child, S); })) { |
104 |
| - CI->eraseFromParent(); |
105 |
| - continue; |
106 |
| - } |
107 |
| - if (CFGModified) { |
108 |
| - *CFGModified = true; |
| 121 | + |
| 122 | +void FinalLowerGC::lowerWriteBarrier(CallInst *target, Function &F) { |
| 123 | + State S(F); |
| 124 | + auto parent = target->getArgOperand(0); |
| 125 | + if (std::all_of(target->op_begin() + 1, target->op_end(), |
| 126 | + [parent, &S](Value *child) { return parent == child || IsPermRooted(child, &S); })) { |
| 127 | + return; |
109 | 128 | }
|
110 | 129 |
|
111 |
| - IRBuilder<> builder(CI); |
112 |
| - builder.SetCurrentDebugLocation(CI->getDebugLoc()); |
| 130 | + IRBuilder<> builder(target); |
| 131 | + builder.SetCurrentDebugLocation(target->getDebugLoc()); |
113 | 132 |
|
114 | 133 | // FIXME: Currently we call write barrier with the src object (parent).
|
115 | 134 | // This works fine for object barrier for generational plans (such as stickyimmix), which does not use the target object at all.
|
116 | 135 | // But for other MMTk plans, we need to be careful.
|
117 | 136 | const bool INLINE_WRITE_BARRIER = true;
|
118 |
| - if (CI->getCalledOperand() == write_barrier_func) { |
119 |
| - if (MMTK_NEEDS_WRITE_BARRIER == MMTK_OBJECT_BARRIER) { |
120 |
| - if (INLINE_WRITE_BARRIER) { |
121 |
| - auto i8_ty = Type::getInt8Ty(F.getContext()); |
122 |
| - auto intptr_ty = T_size; |
123 |
| - |
124 |
| - // intptr_t addr = (intptr_t) (void*) src; |
125 |
| - // uint8_t* meta_addr = (uint8_t*) (SIDE_METADATA_BASE_ADDRESS + (addr >> 6)); |
126 |
| - intptr_t metadata_base_address = reinterpret_cast<intptr_t>(MMTK_SIDE_LOG_BIT_BASE_ADDRESS); |
127 |
| - auto metadata_base_val = ConstantInt::get(intptr_ty, metadata_base_address); |
128 |
| - auto metadata_base_ptr = ConstantExpr::getIntToPtr(metadata_base_val, PointerType::get(i8_ty, 0)); |
129 |
| - |
130 |
| - auto parent_val = builder.CreatePtrToInt(parent, intptr_ty); |
131 |
| - auto shr = builder.CreateLShr(parent_val, ConstantInt::get(intptr_ty, 6)); |
132 |
| - auto metadata_ptr = builder.CreateGEP(i8_ty, metadata_base_ptr, shr); |
133 |
| - |
134 |
| - // intptr_t shift = (addr >> 3) & 0b111; |
135 |
| - auto shift = builder.CreateAnd(builder.CreateLShr(parent_val, ConstantInt::get(intptr_ty, 3)), ConstantInt::get(intptr_ty, 7)); |
136 |
| - auto shift_i8 = builder.CreateTruncOrBitCast(shift, i8_ty); |
137 |
| - |
138 |
| - // uint8_t byte_val = *meta_addr; |
139 |
| - auto load_i8 = builder.CreateAlignedLoad(i8_ty, metadata_ptr, Align()); |
140 |
| - |
141 |
| - // if (((byte_val >> shift) & 1) == 1) { |
142 |
| - auto shifted_load_i8 = builder.CreateLShr(load_i8, shift_i8); |
143 |
| - auto masked = builder.CreateAnd(shifted_load_i8, ConstantInt::get(i8_ty, 1)); |
144 |
| - auto is_unlogged = builder.CreateICmpEQ(masked, ConstantInt::get(i8_ty, 1)); |
145 |
| - |
146 |
| - // object_reference_write_slow_call((void*) src, (void*) slot, (void*) target); |
147 |
| - MDBuilder MDB(F.getContext()); |
148 |
| - SmallVector<uint32_t, 2> Weights{1, 9}; |
149 |
| - if (S) { |
150 |
| - if (!S->DT) { |
151 |
| - S->DT = &GetDT(); |
152 |
| - } |
153 |
| - DomTreeUpdater dtu = DomTreeUpdater(S->DT, llvm::DomTreeUpdater::UpdateStrategy::Lazy); |
154 |
| - auto mayTriggerSlowpath = SplitBlockAndInsertIfThen(is_unlogged, CI, false, MDB.createBranchWeights(Weights), &dtu); |
155 |
| - builder.SetInsertPoint(mayTriggerSlowpath); |
156 |
| - } else { |
157 |
| - auto mayTriggerSlowpath = SplitBlockAndInsertIfThen(is_unlogged, CI, false, MDB.createBranchWeights(Weights)); |
158 |
| - builder.SetInsertPoint(mayTriggerSlowpath); |
159 |
| - } |
160 |
| - builder.CreateCall(getOrDeclare(jl_intrinsics::queueGCRoot), { parent }); |
161 |
| - } else { |
162 |
| - Function *wb_func = getOrDeclare(jl_intrinsics::queueGCRoot); |
163 |
| - builder.CreateCall(wb_func, { parent }); |
164 |
| - } |
| 137 | + if (MMTK_NEEDS_WRITE_BARRIER == MMTK_OBJECT_BARRIER) { |
| 138 | + if (INLINE_WRITE_BARRIER) { |
| 139 | + auto i8_ty = Type::getInt8Ty(F.getContext()); |
| 140 | + auto intptr_ty = T_size; |
| 141 | + |
| 142 | + // intptr_t addr = (intptr_t) (void*) src; |
| 143 | + // uint8_t* meta_addr = (uint8_t*) (SIDE_METADATA_BASE_ADDRESS + (addr >> 6)); |
| 144 | + intptr_t metadata_base_address = reinterpret_cast<intptr_t>(MMTK_SIDE_LOG_BIT_BASE_ADDRESS); |
| 145 | + auto metadata_base_val = ConstantInt::get(intptr_ty, metadata_base_address); |
| 146 | + auto metadata_base_ptr = ConstantExpr::getIntToPtr(metadata_base_val, PointerType::get(i8_ty, 0)); |
| 147 | + |
| 148 | + auto parent_val = builder.CreatePtrToInt(parent, intptr_ty); |
| 149 | + auto shr = builder.CreateLShr(parent_val, ConstantInt::get(intptr_ty, 6)); |
| 150 | + auto metadata_ptr = builder.CreateGEP(i8_ty, metadata_base_ptr, shr); |
| 151 | + |
| 152 | + // intptr_t shift = (addr >> 3) & 0b111; |
| 153 | + auto shift = builder.CreateAnd(builder.CreateLShr(parent_val, ConstantInt::get(intptr_ty, 3)), ConstantInt::get(intptr_ty, 7)); |
| 154 | + auto shift_i8 = builder.CreateTruncOrBitCast(shift, i8_ty); |
| 155 | + |
| 156 | + // uint8_t byte_val = *meta_addr; |
| 157 | + auto load_i8 = builder.CreateAlignedLoad(i8_ty, metadata_ptr, Align()); |
| 158 | + |
| 159 | + // if (((byte_val >> shift) & 1) == 1) { |
| 160 | + auto shifted_load_i8 = builder.CreateLShr(load_i8, shift_i8); |
| 161 | + auto masked = builder.CreateAnd(shifted_load_i8, ConstantInt::get(i8_ty, 1)); |
| 162 | + auto is_unlogged = builder.CreateICmpEQ(masked, ConstantInt::get(i8_ty, 1)); |
| 163 | + |
| 164 | + // object_reference_write_slow_call((void*) src, (void*) slot, (void*) target); |
| 165 | + MDBuilder MDB(F.getContext()); |
| 166 | + SmallVector<uint32_t, 2> Weights{1, 9}; |
| 167 | + |
| 168 | + auto mayTriggerSlowpath = SplitBlockAndInsertIfThen(is_unlogged, target, false, MDB.createBranchWeights(Weights)); |
| 169 | + builder.SetInsertPoint(mayTriggerSlowpath); |
| 170 | + builder.CreateCall(getOrDeclare(jl_intrinsics::queueGCRoot), { parent }); |
| 171 | + } else { |
| 172 | + Function *wb_func = getOrDeclare(jl_intrinsics::queueGCRoot); |
| 173 | + builder.CreateCall(wb_func, { parent }); |
165 | 174 | }
|
166 | 175 | } else {
|
167 |
| - assert(false); |
| 176 | + // Using a plan that does not need write barriers |
168 | 177 | }
|
169 |
| - CI->eraseFromParent(); |
170 |
| - } |
171 | 178 | }
|
0 commit comments