Skip to content

Commit eddf04a

Browse files
gbaraldiKristofferC
authored andcommitted
Teach alloc-opt to handle atomics a bit better (#57208)
Fixes #57190 The fact that this passes thinks memcpy is a potential issue is quite annoying so it deserves a decent refactor, which flows through the type information from julia instead of trying to regenerate it on site, specially given that opaque pointers means we can't really instrospect into pointers at all (cherry picked from commit 2c7527b)
1 parent 18b53c8 commit eddf04a

File tree

4 files changed

+171
-26
lines changed

4 files changed

+171
-26
lines changed

src/llvm-alloc-helpers.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,7 @@ void jl_alloc::runEscapeAnalysis(llvm::CallInst *I, EscapeAnalysisRequiredArgs r
214214
}
215215
if (auto call = dyn_cast<CallInst>(inst)) {
216216
// TODO handle `memcmp`
217+
// TODO handle `memcpy` which is used a lot more often since opaque pointers
217218
// None of the intrinsics should care if the memory is stack or heap allocated.
218219
auto callee = call->getCalledOperand();
219220
if (auto II = dyn_cast<IntrinsicInst>(call)) {

src/llvm-alloc-opt.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -742,7 +742,9 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref, AllocF
742742
auto replace_inst = [&] (Instruction *user) {
743743
Instruction *orig_i = cur.orig_i;
744744
Instruction *new_i = cur.new_i;
745-
if (isa<LoadInst>(user) || isa<StoreInst>(user)) {
745+
if (isa<LoadInst>(user) || isa<StoreInst>(user) ||
746+
isa<AtomicCmpXchgInst>(user) || isa<AtomicRMWInst>(user)) {
747+
// TODO: these atomics are likely removable if the user is the first argument
746748
user->replaceUsesOfWith(orig_i, new_i);
747749
}
748750
else if (auto call = dyn_cast<CallInst>(user)) {
@@ -1111,6 +1113,7 @@ void Optimizer::splitOnStack(CallInst *orig_inst)
11111113
return;
11121114
}
11131115
else if (isa<AtomicCmpXchgInst>(user) || isa<AtomicRMWInst>(user)) {
1116+
// TODO: Downgrade atomics here potentially
11141117
auto slot_idx = find_slot(offset);
11151118
auto &slot = slots[slot_idx];
11161119
assert(slot.offset <= offset && slot.offset + slot.size >= offset);

test/atomics.jl

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1099,3 +1099,14 @@ test_once_undef(Any)
10991099
test_once_undef(Union{Nothing,Integer})
11001100
test_once_undef(UndefComplex{Any})
11011101
test_once_undef(UndefComplex{UndefComplex{Any}})
1102+
1103+
mutable struct Atomic57190
1104+
@atomic x::Int
1105+
end
1106+
1107+
1108+
function add_one57190!()
1109+
@atomic (Atomic57190(0).x) += 1
1110+
end
1111+
1112+
@test add_one57190!() == 1

test/llvmpasses/alloc-opt-pass.ll

Lines changed: 155 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,11 @@ L3: ; preds = %L2, %L1, %0
7373
}
7474
; CHECK-LABEL: }{{$}}
7575

76+
declare void @external_function()
77+
78+
declare ptr addrspace(10) @external_function2()
79+
80+
7681
; CHECK-LABEL: @legal_int_types
7782
; CHECK: alloca [12 x i8]
7883
; CHECK-NOT: alloca i96
@@ -89,21 +94,6 @@ define void @legal_int_types() {
8994
}
9095
; CHECK-LABEL: }{{$}}
9196

92-
declare void @external_function()
93-
94-
declare ptr addrspace(10) @external_function2()
95-
96-
declare ptr @julia.ptls_states()
97-
98-
declare ptr @julia.get_pgcstack()
99-
100-
declare noalias ptr addrspace(10) @julia.gc_alloc_obj(ptr, i64, ptr addrspace(10))
101-
102-
declare ptr @julia.pointer_from_objref(ptr addrspace(11))
103-
104-
declare token @llvm.julia.gc_preserve_begin(...)
105-
106-
declare void @llvm.julia.gc_preserve_end(token)
10797

10898
; CHECK-LABEL: @memref_collision
10999
; OPAQUE: call ptr @julia.ptls_states()
@@ -171,13 +161,13 @@ define void @initializers() {
171161
%pgcstack = call ptr @julia.get_pgcstack()
172162
%ptls = call ptr @julia.ptls_states()
173163
%ptls_i8 = bitcast ptr %ptls to ptr
174-
%var1 = call ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 1, ptr addrspace(10) @tag) #1
164+
%var1 = call ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 1, ptr addrspace(10) @tag) #4
175165
%var2 = addrspacecast ptr addrspace(10) %var1 to ptr addrspace(11)
176166
%var3 = call ptr @julia.pointer_from_objref(ptr addrspace(11) %var2)
177-
%var4 = call ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 2, ptr addrspace(10) @tag) #2
167+
%var4 = call ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 2, ptr addrspace(10) @tag) #7
178168
%var5 = addrspacecast ptr addrspace(10) %var4 to ptr addrspace(11)
179169
%var6 = call ptr @julia.pointer_from_objref(ptr addrspace(11) %var5)
180-
%var7 = call ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 3, ptr addrspace(10) @tag) #3
170+
%var7 = call ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 3, ptr addrspace(10) @tag) #1
181171
%var8 = addrspacecast ptr addrspace(10) %var7 to ptr addrspace(11)
182172
%var9 = call ptr @julia.pointer_from_objref(ptr addrspace(11) %var8)
183173
ret void
@@ -203,14 +193,154 @@ union_move9: ; No predecessors!
203193
}
204194
; CHECK-LABEL: }{{$}}
205195

196+
@0 = private unnamed_addr constant ptr inttoptr (i64 4373799056 to ptr), !julia.constgv !0
197+
@1 = private unnamed_addr constant i64 0, align 8
198+
199+
; CHECK-LABEL: @cmpxchg
200+
; CHECK: alloca
201+
; CHECK: alloca
202+
; CHECK: %20 = cmpxchg ptr %2,
203+
define swiftcc i64 @"cmpxchg"(ptr nonnull swiftself %0) #0 {
204+
%2 = alloca i64, align 16
205+
%3 = call ptr @julia.get_pgcstack()
206+
%4 = getelementptr inbounds i8, ptr %3, i32 -152
207+
%5 = getelementptr inbounds i8, ptr %4, i32 168
208+
%6 = load ptr, ptr %5, align 8, !tbaa !4
209+
%7 = getelementptr inbounds i8, ptr %6, i32 16
210+
%8 = load ptr, ptr %7, align 8, !tbaa !8, !invariant.load !0
211+
fence syncscope("singlethread") seq_cst
212+
call void @julia.safepoint(ptr %8)
213+
fence syncscope("singlethread") seq_cst
214+
%9 = load ptr, ptr @0, align 8, !tbaa !8, !invariant.load !0, !alias.scope !10, !noalias !13, !nonnull !0, !dereferenceable !18, !align !19
215+
%10 = ptrtoint ptr %9 to i64
216+
%11 = inttoptr i64 %10 to ptr
217+
%12 = getelementptr inbounds i8, ptr %3, i32 -152
218+
%13 = addrspacecast ptr %11 to ptr addrspace(10)
219+
call void @llvm.lifetime.start.p0(i64 8, ptr %2)
220+
%14 = call noalias nonnull align 8 dereferenceable(8) ptr addrspace(10) @julia.gc_alloc_obj(ptr %12, i64 8, ptr addrspace(10) %13) #7
221+
%15 = addrspacecast ptr addrspace(10) %14 to ptr addrspace(11)
222+
call void @llvm.memcpy.p11.p0.i64(ptr addrspace(11) align 8 %15, ptr align 8 @1, i64 8, i1 false), !tbaa !20, !alias.scope !23, !noalias !24
223+
%16 = addrspacecast ptr addrspace(10) %14 to ptr addrspace(11)
224+
%17 = load atomic i64, ptr addrspace(11) %16 monotonic, align 8, !tbaa !25, !alias.scope !23, !noalias !24
225+
br label %19
226+
227+
18: ; preds = %19
228+
ret i64 %21
229+
230+
19: ; preds = %19, %1
231+
%20 = phi i64 [ %17, %1 ], [ %23, %19 ]
232+
%21 = call swiftcc i64 @"jlsys_+_47"(ptr nonnull swiftself %3, i64 signext %20, i64 signext 1)
233+
%22 = cmpxchg ptr addrspace(11) %16, i64 %20, i64 %21 seq_cst monotonic, align 8, !tbaa !25, !alias.scope !23, !noalias !24
234+
%23 = extractvalue { i64, i1 } %22, 0
235+
%24 = extractvalue { i64, i1 } %22, 1
236+
br i1 %24, label %18, label %19
237+
}
238+
239+
; CHECK-LABEL: }{{$}}
240+
; CHECK-LABEL: @atomicrmw
241+
; CHECK: alloca
242+
; CHECK: alloca
243+
; CHECK: atomicrmw xchg ptr %2,
244+
define swiftcc i64 @"atomicrmw"(ptr nonnull swiftself %0) #0 {
245+
%2 = alloca i64, align 16
246+
%3 = call ptr @julia.get_pgcstack()
247+
%4 = getelementptr inbounds i8, ptr %3, i32 -152
248+
%5 = getelementptr inbounds i8, ptr %4, i32 168
249+
%6 = load ptr, ptr %5, align 8, !tbaa !4
250+
%7 = getelementptr inbounds i8, ptr %6, i32 16
251+
%8 = load ptr, ptr %7, align 8, !tbaa !8, !invariant.load !0
252+
fence syncscope("singlethread") seq_cst
253+
call void @julia.safepoint(ptr %8)
254+
fence syncscope("singlethread") seq_cst
255+
%9 = load ptr, ptr @0, align 8, !tbaa !8, !invariant.load !0, !alias.scope !10, !noalias !13, !nonnull !0, !dereferenceable !18, !align !19
256+
%10 = ptrtoint ptr %9 to i64
257+
%11 = inttoptr i64 %10 to ptr
258+
%12 = getelementptr inbounds i8, ptr %3, i32 -152
259+
%13 = addrspacecast ptr %11 to ptr addrspace(10)
260+
call void @llvm.lifetime.start.p0(i64 8, ptr %2)
261+
%14 = call noalias nonnull align 8 dereferenceable(8) ptr addrspace(10) @julia.gc_alloc_obj(ptr %12, i64 8, ptr addrspace(10) %13) #7
262+
%15 = addrspacecast ptr addrspace(10) %14 to ptr addrspace(11)
263+
call void @llvm.memcpy.p11.p0.i64(ptr addrspace(11) align 8 %15, ptr align 8 @1, i64 8, i1 false), !tbaa !20, !alias.scope !23, !noalias !24
264+
%16 = addrspacecast ptr addrspace(10) %14 to ptr addrspace(11)
265+
%17 = load atomic i64, ptr addrspace(11) %16 monotonic, align 8, !tbaa !25, !alias.scope !23, !noalias !24
266+
%18 = call swiftcc i64 @"jlsys_+_47"(ptr nonnull swiftself %3, i64 signext %17, i64 signext 1)
267+
%19 = atomicrmw xchg ptr addrspace(11) %16, i64 %18 seq_cst, align 8, !tbaa !25, !alias.scope !23, !noalias !24 ; preds = %19
268+
ret i64 %19
269+
}
270+
271+
declare ptr @julia.ptls_states()
272+
273+
declare ptr @julia.pointer_from_objref(ptr addrspace(11))
274+
275+
declare token @llvm.julia.gc_preserve_begin(...)
276+
277+
declare void @llvm.julia.gc_preserve_end(token)
278+
279+
declare ptr @julia.get_pgcstack()
280+
281+
; Function Attrs: mustprogress nounwind willreturn memory(inaccessiblemem: readwrite)
282+
declare nonnull align 8 dereferenceable(8) ptr addrspace(10) @ijl_box_int64(i64 signext) #2
283+
284+
; Function Attrs: memory(argmem: readwrite, inaccessiblemem: readwrite)
285+
declare void @julia.safepoint(ptr) #3
286+
287+
; Function Attrs: mustprogress nounwind willreturn allockind("alloc") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite)
288+
declare noalias nonnull ptr addrspace(10) @julia.gc_alloc_obj(ptr, i64, ptr addrspace(10)) #4
289+
206290
; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
207-
declare void @llvm.memcpy.p11.p0.i64(ptr addrspace(11) noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #0
291+
declare void @llvm.memcpy.p11.p0.i64(ptr addrspace(11) noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #5
292+
208293
; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
209-
declare void @llvm.memcpy.p0.p11.i64(ptr noalias nocapture writeonly, ptr addrspace(11) noalias nocapture readonly, i64, i1 immarg) #0
294+
declare void @llvm.memcpy.p0.p11.i64(ptr noalias nocapture writeonly, ptr addrspace(11) noalias nocapture readonly, i64, i1 immarg) #5
295+
210296
; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
211-
declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #0
297+
declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #5
298+
299+
declare swiftcc i64 @"jlsys_+_47"(ptr nonnull swiftself, i64 signext, i64 signext) #0
300+
301+
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
302+
declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #6
303+
304+
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
305+
declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #6
306+
307+
attributes #0 = { "probe-stack"="inline-asm" }
308+
attributes #1 = { nounwind willreturn allockind("alloc,zeroed") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite) }
309+
attributes #2 = { mustprogress nounwind willreturn memory(inaccessiblemem: readwrite) }
310+
attributes #3 = { memory(argmem: readwrite, inaccessiblemem: readwrite) }
311+
attributes #4 = { mustprogress nounwind willreturn allockind("alloc") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite) }
312+
attributes #5 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
313+
attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
314+
attributes #7 = { nounwind willreturn allockind("alloc,uninitialized") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite) }
315+
attributes #8 = { nounwind willreturn memory(inaccessiblemem: readwrite) }
316+
317+
!llvm.module.flags = !{!1, !2, !3}
318+
319+
!0 = !{}
320+
!1 = !{i32 2, !"Dwarf Version", i32 4}
321+
!2 = !{i32 2, !"Debug Info Version", i32 3}
322+
!3 = !{i32 2, !"julia.optlevel", i32 2}
323+
!4 = !{!5, !5, i64 0}
324+
!5 = !{!"jtbaa_gcframe", !6, i64 0}
325+
!6 = !{!"jtbaa", !7, i64 0}
326+
!7 = !{!"jtbaa"}
327+
!8 = !{!9, !9, i64 0, i64 1}
328+
!9 = !{!"jtbaa_const", !6, i64 0}
329+
!10 = !{!11}
330+
!11 = !{!"jnoalias_const", !12}
331+
!12 = !{!"jnoalias"}
332+
!13 = !{!14, !15, !16, !17}
333+
!14 = !{!"jnoalias_gcframe", !12}
334+
!15 = !{!"jnoalias_stack", !12}
335+
!16 = !{!"jnoalias_data", !12}
336+
!17 = !{!"jnoalias_typemd", !12}
337+
!18 = !{i64 56}
338+
!19 = !{i64 16}
339+
!20 = !{!21, !21, i64 0}
340+
!21 = !{!"jtbaa_value", !22, i64 0}
341+
!22 = !{!"jtbaa_data", !6, i64 0}
342+
!23 = !{!16}
343+
!24 = !{!14, !15, !17, !11}
344+
!25 = !{!26, !26, i64 0}
345+
!26 = !{!"jtbaa_mutab", !21, i64 0}
212346

213-
attributes #0 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
214-
attributes #1 = { allockind("alloc") }
215-
attributes #2 = { allockind("alloc,uninitialized") }
216-
attributes #3 = { allockind("alloc,zeroed") }

0 commit comments

Comments
 (0)