diff --git a/src/Makefile b/src/Makefile index c605d6c70573b..d46f5fb767ca3 100644 --- a/src/Makefile +++ b/src/Makefile @@ -72,9 +72,9 @@ ifeq ($(JULIACODEGEN),LLVM) GC_CODEGEN_SRCS := llvm-final-gc-lowering llvm-late-gc-lowering llvm-gc-invariant-verifier ifeq (${USE_THIRD_PARTY_GC},mmtk) FLAGS += -I$(MMTK_API_INC) -GC_CODEGEN_SRCS += llvm-late-gc-lowering-mmtk +GC_CODEGEN_SRCS += llvm-final-gc-lowering-mmtk else -GC_CODEGEN_SRCS += llvm-late-gc-lowering-stock +GC_CODEGEN_SRCS += llvm-final-gc-lowering-stock endif CODEGEN_SRCS := codegen jitlayers aotcompile debuginfo disasm llvm-simdloop \ llvm-pass-helpers llvm-ptls llvm-propagate-addrspaces null_sysimage \ diff --git a/src/llvm-late-gc-lowering-mmtk.cpp b/src/llvm-final-gc-lowering-mmtk.cpp similarity index 57% rename from src/llvm-late-gc-lowering-mmtk.cpp rename to src/llvm-final-gc-lowering-mmtk.cpp index 5539c8dbcf153..a8b32ce91f807 100644 --- a/src/llvm-late-gc-lowering-mmtk.cpp +++ b/src/llvm-final-gc-lowering-mmtk.cpp @@ -2,19 +2,31 @@ #include "llvm-gc-interface-passes.h" -Value* LateLowerGCFrame::lowerGCAllocBytesLate(CallInst *target, Function &F) +#define DEBUG_TYPE "mmtk_final_gc_lowering" +STATISTIC(GCAllocBytesCount, "Number of lowered GCAllocBytesFunc intrinsics"); + +Value* FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F) { - assert(target->arg_size() == 3); + ++GCAllocBytesCount; + CallInst *newI; IRBuilder<> builder(target); auto ptls = target->getArgOperand(0); auto type = target->getArgOperand(2); + uint64_t derefBytes = 0; if (auto CI = dyn_cast(target->getArgOperand(1))) { size_t sz = (size_t)CI->getZExtValue(); // This is strongly architecture and OS dependent int osize; int offset = jl_gc_classify_pools(sz, &osize); - if (offset >= 0) { + if (offset < 0) { + newI = builder.CreateCall( + bigAllocFunc, + { ptls, ConstantInt::get(T_size, sz + sizeof(void*)), type }); + if (sz > 0) + derefBytes = sz; + } + else { // In this case instead of lowering julia.gc_alloc_bytes to jl_gc_small_alloc // We do a slowpath/fastpath check and lower it only on the slowpath, returning // the cursor and updating it in the fastpath. @@ -91,6 +103,70 @@ Value* LateLowerGCFrame::lowerGCAllocBytesLate(CallInst *target, Function &F) return phiNode; } } + } else { + auto size = builder.CreateZExtOrTrunc(target->getArgOperand(1), T_size); + // allocTypedFunc does not include the type tag in the allocation size! + newI = builder.CreateCall(allocTypedFunc, { ptls, size, type }); + derefBytes = sizeof(void*); } - return target; + newI->setAttributes(newI->getCalledFunction()->getAttributes()); + unsigned align = std::max((unsigned)target->getRetAlign().valueOrOne().value(), (unsigned)sizeof(void*)); + newI->addRetAttr(Attribute::getWithAlignment(F.getContext(), Align(align))); + if (derefBytes > 0) + newI->addDereferenceableRetAttr(derefBytes); + newI->takeName(target); + return newI; +} + + +void FinalLowerGC::lowerWriteBarrier(CallInst *target, Function &F) { + auto parent = target->getArgOperand(0); + IRBuilder<> builder(target); + builder.SetCurrentDebugLocation(target->getDebugLoc()); + + // FIXME: Currently we call write barrier with the src object (parent). + // This works fine for object barrier for generational plans (such as stickyimmix), which does not use the target object at all. + // But for other MMTk plans, we need to be careful. + const bool INLINE_WRITE_BARRIER = true; + if (MMTK_NEEDS_WRITE_BARRIER == MMTK_OBJECT_BARRIER) { + if (INLINE_WRITE_BARRIER) { + auto i8_ty = Type::getInt8Ty(F.getContext()); + auto intptr_ty = T_size; + + // intptr_t addr = (intptr_t) (void*) src; + // uint8_t* meta_addr = (uint8_t*) (SIDE_METADATA_BASE_ADDRESS + (addr >> 6)); + intptr_t metadata_base_address = reinterpret_cast(MMTK_SIDE_LOG_BIT_BASE_ADDRESS); + auto metadata_base_val = ConstantInt::get(intptr_ty, metadata_base_address); + auto metadata_base_ptr = ConstantExpr::getIntToPtr(metadata_base_val, PointerType::get(i8_ty, 0)); + + auto parent_val = builder.CreatePtrToInt(parent, intptr_ty); + auto shr = builder.CreateLShr(parent_val, ConstantInt::get(intptr_ty, 6)); + auto metadata_ptr = builder.CreateGEP(i8_ty, metadata_base_ptr, shr); + + // intptr_t shift = (addr >> 3) & 0b111; + auto shift = builder.CreateAnd(builder.CreateLShr(parent_val, ConstantInt::get(intptr_ty, 3)), ConstantInt::get(intptr_ty, 7)); + auto shift_i8 = builder.CreateTruncOrBitCast(shift, i8_ty); + + // uint8_t byte_val = *meta_addr; + auto load_i8 = builder.CreateAlignedLoad(i8_ty, metadata_ptr, Align()); + + // if (((byte_val >> shift) & 1) == 1) { + auto shifted_load_i8 = builder.CreateLShr(load_i8, shift_i8); + auto masked = builder.CreateAnd(shifted_load_i8, ConstantInt::get(i8_ty, 1)); + auto is_unlogged = builder.CreateICmpEQ(masked, ConstantInt::get(i8_ty, 1)); + + // object_reference_write_slow_call((void*) src, (void*) slot, (void*) target); + MDBuilder MDB(F.getContext()); + SmallVector Weights{1, 9}; + + auto mayTriggerSlowpath = SplitBlockAndInsertIfThen(is_unlogged, target, false, MDB.createBranchWeights(Weights)); + builder.SetInsertPoint(mayTriggerSlowpath); + builder.CreateCall(getOrDeclare(jl_intrinsics::queueGCRoot), { parent }); + } else { + Function *wb_func = getOrDeclare(jl_intrinsics::queueGCRoot); + builder.CreateCall(wb_func, { parent }); + } + } else { + // Using a plan that does not need write barriers + } } diff --git a/src/llvm-final-gc-lowering-stock.cpp b/src/llvm-final-gc-lowering-stock.cpp new file mode 100644 index 0000000000000..44132c784f74b --- /dev/null +++ b/src/llvm-final-gc-lowering-stock.cpp @@ -0,0 +1,80 @@ +// This file is a part of Julia. License is MIT: https://julialang.org/license + +#include "llvm-gc-interface-passes.h" + +#define DEBUG_TYPE "stock_final_gc_lowering" +STATISTIC(GCAllocBytesCount, "Number of lowered GCAllocBytesFunc intrinsics"); + +Value* FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F) +{ + ++GCAllocBytesCount; + CallInst *newI; + + IRBuilder<> builder(target); + auto ptls = target->getArgOperand(0); + auto type = target->getArgOperand(2); + uint64_t derefBytes = 0; + if (auto CI = dyn_cast(target->getArgOperand(1))) { + size_t sz = (size_t)CI->getZExtValue(); + // This is strongly architecture and OS dependent + int osize; + int offset = jl_gc_classify_pools(sz, &osize); + if (offset < 0) { + newI = builder.CreateCall( + bigAllocFunc, + { ptls, ConstantInt::get(T_size, sz + sizeof(void*)), type }); + if (sz > 0) + derefBytes = sz; + } + else { + auto pool_offs = ConstantInt::get(Type::getInt32Ty(F.getContext()), offset); + auto pool_osize = ConstantInt::get(Type::getInt32Ty(F.getContext()), osize); + newI = builder.CreateCall(smallAllocFunc, { ptls, pool_offs, pool_osize, type }); + if (sz > 0) + derefBytes = sz; + } + } else { + auto size = builder.CreateZExtOrTrunc(target->getArgOperand(1), T_size); + // allocTypedFunc does not include the type tag in the allocation size! + newI = builder.CreateCall(allocTypedFunc, { ptls, size, type }); + derefBytes = sizeof(void*); + } + newI->setAttributes(newI->getCalledFunction()->getAttributes()); + unsigned align = std::max((unsigned)target->getRetAlign().valueOrOne().value(), (unsigned)sizeof(void*)); + newI->addRetAttr(Attribute::getWithAlignment(F.getContext(), Align(align))); + if (derefBytes > 0) + newI->addDereferenceableRetAttr(derefBytes); + newI->takeName(target); + return newI; +} + +void FinalLowerGC::lowerWriteBarrier(CallInst *target, Function &F) { + auto parent = target->getArgOperand(0); + IRBuilder<> builder(target); + builder.SetCurrentDebugLocation(target->getDebugLoc()); + auto parBits = builder.CreateAnd(EmitLoadTag(builder, T_size, parent, tbaa_tag), GC_OLD_MARKED, "parent_bits"); + auto parOldMarked = builder.CreateICmpEQ(parBits, ConstantInt::get(T_size, GC_OLD_MARKED), "parent_old_marked"); + auto mayTrigTerm = SplitBlockAndInsertIfThen(parOldMarked, target, false); + builder.SetInsertPoint(mayTrigTerm); + mayTrigTerm->getParent()->setName("may_trigger_wb"); + Value *anyChldNotMarked = NULL; + for (unsigned i = 1; i < target->arg_size(); i++) { + Value *child = target->getArgOperand(i); + Value *chldBit = builder.CreateAnd(EmitLoadTag(builder, T_size, child, tbaa_tag), GC_MARKED, "child_bit"); + Value *chldNotMarked = builder.CreateICmpEQ(chldBit, ConstantInt::get(T_size, 0), "child_not_marked"); + anyChldNotMarked = anyChldNotMarked ? builder.CreateOr(anyChldNotMarked, chldNotMarked) : chldNotMarked; + } + assert(anyChldNotMarked); // handled by all_of test above + MDBuilder MDB(parent->getContext()); + SmallVector Weights{1, 9}; + auto trigTerm = SplitBlockAndInsertIfThen(anyChldNotMarked, mayTrigTerm, false, + MDB.createBranchWeights(Weights)); + trigTerm->getParent()->setName("trigger_wb"); + builder.SetInsertPoint(trigTerm); + if (target->getCalledOperand() == write_barrier_func) { + builder.CreateCall(getOrDeclare(jl_intrinsics::queueGCRoot), parent); + } + else { + assert(false); + } +} diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp index 7d3a233c0a720..b7e4e86a9c5bb 100644 --- a/src/llvm-final-gc-lowering.cpp +++ b/src/llvm-final-gc-lowering.cpp @@ -7,7 +7,6 @@ STATISTIC(NewGCFrameCount, "Number of lowered newGCFrameFunc intrinsics"); STATISTIC(PushGCFrameCount, "Number of lowered pushGCFrameFunc intrinsics"); STATISTIC(PopGCFrameCount, "Number of lowered popGCFrameFunc intrinsics"); STATISTIC(GetGCFrameSlotCount, "Number of lowered getGCFrameSlotFunc intrinsics"); -STATISTIC(GCAllocBytesCount, "Number of lowered GCAllocBytesFunc intrinsics"); STATISTIC(QueueGCRootCount, "Number of lowered queueGCRootFunc intrinsics"); STATISTIC(SafepointCount, "Number of lowered safepoint intrinsics"); @@ -117,51 +116,6 @@ void FinalLowerGC::lowerSafepoint(CallInst *target, Function &F) target->eraseFromParent(); } -void FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F) -{ - ++GCAllocBytesCount; - assert(target->arg_size() == 3); - CallInst *newI; - - IRBuilder<> builder(target); - auto ptls = target->getArgOperand(0); - auto type = target->getArgOperand(2); - uint64_t derefBytes = 0; - if (auto CI = dyn_cast(target->getArgOperand(1))) { - size_t sz = (size_t)CI->getZExtValue(); - // This is strongly architecture and OS dependent - int osize; - int offset = jl_gc_classify_pools(sz, &osize); - if (offset < 0) { - newI = builder.CreateCall( - bigAllocFunc, - { ptls, ConstantInt::get(T_size, sz + sizeof(void*)), type }); - if (sz > 0) - derefBytes = sz; - } - else { - auto pool_offs = ConstantInt::get(Type::getInt32Ty(F.getContext()), offset); - auto pool_osize = ConstantInt::get(Type::getInt32Ty(F.getContext()), osize); - newI = builder.CreateCall(smallAllocFunc, { ptls, pool_offs, pool_osize, type }); - if (sz > 0) - derefBytes = sz; - } - } else { - auto size = builder.CreateZExtOrTrunc(target->getArgOperand(1), T_size); - // allocTypedFunc does not include the type tag in the allocation size! - newI = builder.CreateCall(allocTypedFunc, { ptls, size, type }); - derefBytes = sizeof(void*); - } - newI->setAttributes(newI->getCalledFunction()->getAttributes()); - unsigned align = std::max((unsigned)target->getRetAlign().valueOrOne().value(), (unsigned)sizeof(void*)); - newI->addRetAttr(Attribute::getWithAlignment(F.getContext(), Align(align))); - if (derefBytes > 0) - newI->addDereferenceableRetAttr(derefBytes); - newI->takeName(target); - target->replaceAllUsesWith(newI); - target->eraseFromParent(); -} - static bool hasUse(const JuliaPassContext &ctx, const jl_intrinsics::IntrinsicDescription &v) { auto Intr = ctx.getOrNull(v); @@ -178,6 +132,7 @@ bool FinalLowerGC::shouldRunFinalGC() should_run |= hasUse(*this, jl_intrinsics::GCAllocBytes); should_run |= hasUse(*this, jl_intrinsics::queueGCRoot); should_run |= hasUse(*this, jl_intrinsics::safepoint); + should_run |= (write_barrier_func && !write_barrier_func->use_empty()); return should_run; } @@ -185,6 +140,9 @@ bool FinalLowerGC::runOnFunction(Function &F) { initAll(*F.getParent()); pgcstack = getPGCstack(F); + + auto gc_alloc_bytes = getOrNull(jl_intrinsics::GCAllocBytes); + if (!pgcstack || !shouldRunFinalGC()) goto verify_skip; @@ -195,6 +153,41 @@ bool FinalLowerGC::runOnFunction(Function &F) allocTypedFunc = getOrDeclare(jl_well_known::GCAllocTyped); T_size = F.getParent()->getDataLayout().getIntPtrType(F.getContext()); + // The replacement for these may require creating new BasicBlocks + // which messes up the loop below. Process them first + if (gc_alloc_bytes) { + for (auto it = gc_alloc_bytes->user_begin(); it != gc_alloc_bytes->user_end(); ) { + if (auto *CI = dyn_cast(*it)) { + + assert(CI->getCalledOperand() == gc_alloc_bytes); + + auto newI = lowerGCAllocBytes(CI, F); + if (newI != CI) { + ++it; + CI->replaceAllUsesWith(newI); + CI->eraseFromParent(); + continue; + } + } + ++it; + } + } + + // Write barriers should always be processed first since they may + // insert julia.queue_gc_root intrinsics + if (write_barrier_func) { + for (auto it = write_barrier_func->user_begin(); it != write_barrier_func->user_end(); ) { + if (auto *CI = dyn_cast(*it)) { + assert(CI->getCalledOperand() == write_barrier_func); + lowerWriteBarrier(CI, F); + ++it; + CI->eraseFromParent(); + continue; + } + ++it; + } + } + // Lower all calls to supported intrinsics. for (auto &BB : F) { for (auto &I : make_early_inc_range(BB)) { @@ -217,13 +210,13 @@ bool FinalLowerGC::runOnFunction(Function &F) LOWER_INTRINSIC(getGCFrameSlot, lowerGetGCFrameSlot); LOWER_INTRINSIC(pushGCFrame, lowerPushGCFrame); LOWER_INTRINSIC(popGCFrame, lowerPopGCFrame); - LOWER_INTRINSIC(GCAllocBytes, lowerGCAllocBytes); LOWER_INTRINSIC(queueGCRoot, lowerQueueGCRoot); LOWER_INTRINSIC(safepoint, lowerSafepoint); #undef LOWER_INTRINSIC } } + return true; // Verify that skipping was in fact correct verify_skip: @@ -236,6 +229,12 @@ bool FinalLowerGC::runOnFunction(Function &F) Value *callee = CI->getCalledOperand(); assert(callee); + if (write_barrier_func == callee) { + errs() << "Final-GC-lowering didn't eliminate all write barriers from '" << F.getName() << "', dumping entire module!\n\n"; + errs() << *F.getParent() << "\n"; + abort(); + } + auto IS_INTRINSIC = [&](auto intrinsic) { auto intrinsic2 = getOrNull(intrinsic); if (intrinsic2 == callee) { diff --git a/src/llvm-gc-interface-passes.h b/src/llvm-gc-interface-passes.h index d1bb1fae01446..4d32e5020d18e 100644 --- a/src/llvm-gc-interface-passes.h +++ b/src/llvm-gc-interface-passes.h @@ -367,9 +367,6 @@ struct LateLowerGCFrame: private JuliaPassContext { SmallVector GetPHIRefinements(PHINode *phi, State &S); void FixUpRefinements(ArrayRef PHINumbers, State &S); void RefineLiveSet(LargeSparseBitVector &LS, State &S, ArrayRef CalleeRoots); - Value *EmitTagPtr(IRBuilder<> &builder, Type *T, Type *T_size, Value *V); - Value *EmitLoadTag(IRBuilder<> &builder, Type *T_size, Value *V); - Value* lowerGCAllocBytesLate(CallInst *target, Function &F); }; // The final GC lowering pass. This pass lowers platform-agnostic GC @@ -405,7 +402,7 @@ struct FinalLowerGC: private JuliaPassContext { void lowerGetGCFrameSlot(CallInst *target, Function &F); // Lowers a `julia.gc_alloc_bytes` intrinsic. - void lowerGCAllocBytes(CallInst *target, Function &F); + Value* lowerGCAllocBytes(CallInst *target, Function &F); // Lowers a `julia.queue_gc_root` intrinsic. void lowerQueueGCRoot(CallInst *target, Function &F); @@ -413,8 +410,37 @@ struct FinalLowerGC: private JuliaPassContext { // Lowers a `julia.safepoint` intrinsic. void lowerSafepoint(CallInst *target, Function &F); + // Lowers a `julia.write_barrier` function. + void lowerWriteBarrier(CallInst *target, Function &F); + // Check if the pass should be run bool shouldRunFinalGC(); }; +// These are now used in LateLower and FinalLower + +// Size of T is assumed to be `sizeof(void*)` +inline Value *EmitTagPtr(IRBuilder<> &builder, Type *T, Type *T_size, Value *V) +{ + assert(T == T_size || isa(T)); + return builder.CreateInBoundsGEP(T, V, ConstantInt::get(T_size, -1), V->getName() + ".tag_addr"); +} + +inline Value *EmitLoadTag(IRBuilder<> &builder, Type *T_size, Value *V, llvm::MDNode *tbaa_tag) +{ + auto addr = EmitTagPtr(builder, T_size, T_size, V); + auto &M = *builder.GetInsertBlock()->getModule(); + LoadInst *load = builder.CreateAlignedLoad(T_size, addr, M.getDataLayout().getPointerABIAlignment(0), V->getName() + ".tag"); + load->setOrdering(AtomicOrdering::Unordered); + load->setMetadata(LLVMContext::MD_tbaa, tbaa_tag); + MDBuilder MDB(load->getContext()); + auto *NullInt = ConstantInt::get(T_size, 0); + // We can be sure that the tag is at least 16 (1<<4) + // Hopefully this is enough to convince LLVM that the value is still not NULL + // after masking off the tag bits + auto *NonNullInt = ConstantExpr::getAdd(NullInt, ConstantInt::get(T_size, 16)); + load->setMetadata(LLVMContext::MD_range, MDB.createRange(NonNullInt, NullInt)); + return load; +} + #endif // LLVM_GC_PASSES_H diff --git a/src/llvm-late-gc-lowering-stock.cpp b/src/llvm-late-gc-lowering-stock.cpp deleted file mode 100644 index 2a11487773396..0000000000000 --- a/src/llvm-late-gc-lowering-stock.cpp +++ /dev/null @@ -1,9 +0,0 @@ -// This file is a part of Julia. License is MIT: https://julialang.org/license - -#include "llvm-gc-interface-passes.h" - -Value* LateLowerGCFrame::lowerGCAllocBytesLate(CallInst *target, Function &F) -{ - // Do nothing for the stock GC - return target; -} diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index a41e947c0b6b3..99a861aded7d3 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -1898,30 +1898,6 @@ std::pair, int> LateLowerGCFrame::ColorRoots(const State &S) return {Colors, PreAssignedColors}; } -// Size of T is assumed to be `sizeof(void*)` -Value *LateLowerGCFrame::EmitTagPtr(IRBuilder<> &builder, Type *T, Type *T_size, Value *V) -{ - assert(T == T_size || isa(T)); - return builder.CreateInBoundsGEP(T, V, ConstantInt::get(T_size, -1), V->getName() + ".tag_addr"); -} - -Value *LateLowerGCFrame::EmitLoadTag(IRBuilder<> &builder, Type *T_size, Value *V) -{ - auto addr = EmitTagPtr(builder, T_size, T_size, V); - auto &M = *builder.GetInsertBlock()->getModule(); - LoadInst *load = builder.CreateAlignedLoad(T_size, addr, M.getDataLayout().getPointerABIAlignment(0), V->getName() + ".tag"); - load->setOrdering(AtomicOrdering::Unordered); - load->setMetadata(LLVMContext::MD_tbaa, tbaa_tag); - MDBuilder MDB(load->getContext()); - auto *NullInt = ConstantInt::get(T_size, 0); - // We can be sure that the tag is at least 16 (1<<4) - // Hopefully this is enough to convince LLVM that the value is still not NULL - // after masking off the tag bits - auto *NonNullInt = ConstantExpr::getAdd(NullInt, ConstantInt::get(T_size, 16)); - load->setMetadata(LLVMContext::MD_range, MDB.createRange(NonNullInt, NullInt)); - return load; -} - // Enable this optimization only on LLVM 4.0+ since this cause LLVM to optimize // constant store loop to produce a `memset_pattern16` with a global variable // that's initialized by `addrspacecast`. Such a global variable is not supported by the backend. @@ -1980,7 +1956,7 @@ void LateLowerGCFrame::CleanupWriteBarriers(Function &F, State *S, const SmallVe IRBuilder<> builder(CI); builder.SetCurrentDebugLocation(CI->getDebugLoc()); - auto parBits = builder.CreateAnd(EmitLoadTag(builder, T_size, parent), GC_OLD_MARKED, "parent_bits"); + auto parBits = builder.CreateAnd(EmitLoadTag(builder, T_size, parent, tbaa_tag), GC_OLD_MARKED, "parent_bits"); auto parOldMarked = builder.CreateICmpEQ(parBits, ConstantInt::get(T_size, GC_OLD_MARKED), "parent_old_marked"); auto mayTrigTerm = SplitBlockAndInsertIfThen(parOldMarked, CI, false); builder.SetInsertPoint(mayTrigTerm); @@ -1988,7 +1964,7 @@ void LateLowerGCFrame::CleanupWriteBarriers(Function &F, State *S, const SmallVe Value *anyChldNotMarked = NULL; for (unsigned i = 1; i < CI->arg_size(); i++) { Value *child = CI->getArgOperand(i); - Value *chldBit = builder.CreateAnd(EmitLoadTag(builder, T_size, child), GC_MARKED, "child_bit"); + Value *chldBit = builder.CreateAnd(EmitLoadTag(builder, T_size, child, tbaa_tag), GC_MARKED, "child_bit"); Value *chldNotMarked = builder.CreateICmpEQ(chldBit, ConstantInt::get(T_size, 0), "child_not_marked"); anyChldNotMarked = anyChldNotMarked ? builder.CreateOr(anyChldNotMarked, chldNotMarked) : chldNotMarked; } @@ -2179,7 +2155,7 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) { assert(CI->arg_size() == 1); IRBuilder<> builder(CI); builder.SetCurrentDebugLocation(CI->getDebugLoc()); - auto tag = EmitLoadTag(builder, T_size, CI->getArgOperand(0)); + auto tag = EmitLoadTag(builder, T_size, CI->getArgOperand(0), tbaa_tag); auto masked = builder.CreateAnd(tag, ConstantInt::get(T_size, ~(uintptr_t)15)); auto typ = builder.CreateAddrSpaceCast(builder.CreateIntToPtr(masked, JuliaType::get_pjlvalue_ty(masked->getContext())), T_prjlvalue); @@ -2576,30 +2552,6 @@ bool LateLowerGCFrame::runOnFunction(Function &F, bool *CFGModified) { PlaceRootsAndUpdateCalls(Colors.first, Colors.second, S, CallFrames); CleanupIR(F, &S, CFGModified); - - // We lower the julia.gc_alloc_bytes intrinsic in this pass to insert slowpath/fastpath blocks for MMTk - // For now, we do nothing for the Stock GC - auto GCAllocBytes = getOrNull(jl_intrinsics::GCAllocBytes); - - if (GCAllocBytes) { - for (auto it = GCAllocBytes->user_begin(); it != GCAllocBytes->user_end(); ) { - if (auto *CI = dyn_cast(*it)) { - *CFGModified = true; - - assert(CI->getCalledOperand() == GCAllocBytes); - - auto newI = lowerGCAllocBytesLate(CI, F); - if (newI != CI) { - ++it; - CI->replaceAllUsesWith(newI); - CI->eraseFromParent(); - continue; - } - } - ++it; - } - } - return true; }