@@ -7,7 +7,6 @@ STATISTIC(NewGCFrameCount, "Number of lowered newGCFrameFunc intrinsics");
7
7
STATISTIC (PushGCFrameCount, " Number of lowered pushGCFrameFunc intrinsics" );
8
8
STATISTIC (PopGCFrameCount, " Number of lowered popGCFrameFunc intrinsics" );
9
9
STATISTIC (GetGCFrameSlotCount, " Number of lowered getGCFrameSlotFunc intrinsics" );
10
- STATISTIC (GCAllocBytesCount, " Number of lowered GCAllocBytesFunc intrinsics" );
11
10
STATISTIC (QueueGCRootCount, " Number of lowered queueGCRootFunc intrinsics" );
12
11
STATISTIC (SafepointCount, " Number of lowered safepoint intrinsics" );
13
12
@@ -117,51 +116,6 @@ void FinalLowerGC::lowerSafepoint(CallInst *target, Function &F)
117
116
target->eraseFromParent ();
118
117
}
119
118
120
- void FinalLowerGC::lowerGCAllocBytes (CallInst *target, Function &F)
121
- {
122
- ++GCAllocBytesCount;
123
- assert (target->arg_size () == 3 );
124
- CallInst *newI;
125
-
126
- IRBuilder<> builder (target);
127
- auto ptls = target->getArgOperand (0 );
128
- auto type = target->getArgOperand (2 );
129
- uint64_t derefBytes = 0 ;
130
- if (auto CI = dyn_cast<ConstantInt>(target->getArgOperand (1 ))) {
131
- size_t sz = (size_t )CI->getZExtValue ();
132
- // This is strongly architecture and OS dependent
133
- int osize;
134
- int offset = jl_gc_classify_pools (sz, &osize);
135
- if (offset < 0 ) {
136
- newI = builder.CreateCall (
137
- bigAllocFunc,
138
- { ptls, ConstantInt::get (T_size, sz + sizeof (void *)), type });
139
- if (sz > 0 )
140
- derefBytes = sz;
141
- }
142
- else {
143
- auto pool_offs = ConstantInt::get (Type::getInt32Ty (F.getContext ()), offset);
144
- auto pool_osize = ConstantInt::get (Type::getInt32Ty (F.getContext ()), osize);
145
- newI = builder.CreateCall (smallAllocFunc, { ptls, pool_offs, pool_osize, type });
146
- if (sz > 0 )
147
- derefBytes = sz;
148
- }
149
- } else {
150
- auto size = builder.CreateZExtOrTrunc (target->getArgOperand (1 ), T_size);
151
- // allocTypedFunc does not include the type tag in the allocation size!
152
- newI = builder.CreateCall (allocTypedFunc, { ptls, size, type });
153
- derefBytes = sizeof (void *);
154
- }
155
- newI->setAttributes (newI->getCalledFunction ()->getAttributes ());
156
- unsigned align = std::max ((unsigned )target->getRetAlign ().valueOrOne ().value (), (unsigned )sizeof (void *));
157
- newI->addRetAttr (Attribute::getWithAlignment (F.getContext (), Align (align)));
158
- if (derefBytes > 0 )
159
- newI->addDereferenceableRetAttr (derefBytes);
160
- newI->takeName (target);
161
- target->replaceAllUsesWith (newI);
162
- target->eraseFromParent ();
163
- }
164
-
165
119
static bool hasUse (const JuliaPassContext &ctx, const jl_intrinsics::IntrinsicDescription &v)
166
120
{
167
121
auto Intr = ctx.getOrNull (v);
@@ -178,13 +132,17 @@ bool FinalLowerGC::shouldRunFinalGC()
178
132
should_run |= hasUse (*this , jl_intrinsics::GCAllocBytes);
179
133
should_run |= hasUse (*this , jl_intrinsics::queueGCRoot);
180
134
should_run |= hasUse (*this , jl_intrinsics::safepoint);
135
+ should_run |= (write_barrier_func && !write_barrier_func->use_empty ());
181
136
return should_run;
182
137
}
183
138
184
139
bool FinalLowerGC::runOnFunction (Function &F)
185
140
{
186
141
initAll (*F.getParent ());
187
142
pgcstack = getPGCstack (F);
143
+
144
+ auto gc_alloc_bytes = getOrNull (jl_intrinsics::GCAllocBytes);
145
+
188
146
if (!pgcstack || !shouldRunFinalGC ())
189
147
goto verify_skip;
190
148
@@ -195,6 +153,41 @@ bool FinalLowerGC::runOnFunction(Function &F)
195
153
allocTypedFunc = getOrDeclare (jl_well_known::GCAllocTyped);
196
154
T_size = F.getParent ()->getDataLayout ().getIntPtrType (F.getContext ());
197
155
156
+ // The replacement for these may require creating new BasicBlocks
157
+ // which messes up the loop below. Process them first
158
+ if (gc_alloc_bytes) {
159
+ for (auto it = gc_alloc_bytes->user_begin (); it != gc_alloc_bytes->user_end (); ) {
160
+ if (auto *CI = dyn_cast<CallInst>(*it)) {
161
+
162
+ assert (CI->getCalledOperand () == gc_alloc_bytes);
163
+
164
+ auto newI = lowerGCAllocBytes (CI, F);
165
+ if (newI != CI) {
166
+ ++it;
167
+ CI->replaceAllUsesWith (newI);
168
+ CI->eraseFromParent ();
169
+ continue ;
170
+ }
171
+ }
172
+ ++it;
173
+ }
174
+ }
175
+
176
+ // Write barriers should always be processed first since they may
177
+ // insert julia.queue_gc_root intrinsics
178
+ if (write_barrier_func) {
179
+ for (auto it = write_barrier_func->user_begin (); it != write_barrier_func->user_end (); ) {
180
+ if (auto *CI = dyn_cast<CallInst>(*it)) {
181
+ assert (CI->getCalledOperand () == write_barrier_func);
182
+ lowerWriteBarrier (CI, F);
183
+ ++it;
184
+ CI->eraseFromParent ();
185
+ continue ;
186
+ }
187
+ ++it;
188
+ }
189
+ }
190
+
198
191
// Lower all calls to supported intrinsics.
199
192
for (auto &BB : F) {
200
193
for (auto &I : make_early_inc_range (BB)) {
@@ -217,13 +210,13 @@ bool FinalLowerGC::runOnFunction(Function &F)
217
210
LOWER_INTRINSIC (getGCFrameSlot, lowerGetGCFrameSlot);
218
211
LOWER_INTRINSIC (pushGCFrame, lowerPushGCFrame);
219
212
LOWER_INTRINSIC (popGCFrame, lowerPopGCFrame);
220
- LOWER_INTRINSIC (GCAllocBytes, lowerGCAllocBytes);
221
213
LOWER_INTRINSIC (queueGCRoot, lowerQueueGCRoot);
222
214
LOWER_INTRINSIC (safepoint, lowerSafepoint);
223
215
224
216
#undef LOWER_INTRINSIC
225
217
}
226
218
}
219
+
227
220
return true ;
228
221
// Verify that skipping was in fact correct
229
222
verify_skip:
@@ -236,6 +229,12 @@ bool FinalLowerGC::runOnFunction(Function &F)
236
229
237
230
Value *callee = CI->getCalledOperand ();
238
231
assert (callee);
232
+ if (write_barrier_func == callee) {
233
+ errs () << " Final-GC-lowering didn't eliminate all write barriers from '" << F.getName () << " ', dumping entire module!\n\n " ;
234
+ errs () << *F.getParent () << " \n " ;
235
+ abort ();
236
+ }
237
+
239
238
auto IS_INTRINSIC = [&](auto intrinsic) {
240
239
auto intrinsic2 = getOrNull (intrinsic);
241
240
if (intrinsic2 == callee) {
0 commit comments