Skip to content

Commit 2c7527b

Browse files
authored
Teach alloc-opt to handle atomics a bit better (#57208)
Fixes #57190 The fact that this passes thinks memcpy is a potential issue is quite annoying so it deserves a decent refactor, which flows through the type information from julia instead of trying to regenerate it on site, specially given that opaque pointers means we can't really instrospect into pointers at all
1 parent e7ff95d commit 2c7527b

File tree

4 files changed

+171
-26
lines changed

4 files changed

+171
-26
lines changed

src/llvm-alloc-helpers.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,7 @@ void jl_alloc::runEscapeAnalysis(llvm::CallInst *I, EscapeAnalysisRequiredArgs r
214214
}
215215
if (auto call = dyn_cast<CallInst>(inst)) {
216216
// TODO handle `memcmp`
217+
// TODO handle `memcpy` which is used a lot more often since opaque pointers
217218
// None of the intrinsics should care if the memory is stack or heap allocated.
218219
auto callee = call->getCalledOperand();
219220
if (auto II = dyn_cast<IntrinsicInst>(call)) {

src/llvm-alloc-opt.cpp

+4-1
Original file line numberDiff line numberDiff line change
@@ -758,7 +758,9 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref, AllocF
758758
auto replace_inst = [&] (Instruction *user) {
759759
Instruction *orig_i = cur.orig_i;
760760
Instruction *new_i = cur.new_i;
761-
if (isa<LoadInst>(user) || isa<StoreInst>(user)) {
761+
if (isa<LoadInst>(user) || isa<StoreInst>(user) ||
762+
isa<AtomicCmpXchgInst>(user) || isa<AtomicRMWInst>(user)) {
763+
// TODO: these atomics are likely removable if the user is the first argument
762764
user->replaceUsesOfWith(orig_i, new_i);
763765
}
764766
else if (auto call = dyn_cast<CallInst>(user)) {
@@ -1131,6 +1133,7 @@ void Optimizer::splitOnStack(CallInst *orig_inst)
11311133
return;
11321134
}
11331135
else if (isa<AtomicCmpXchgInst>(user) || isa<AtomicRMWInst>(user)) {
1136+
// TODO: Downgrade atomics here potentially
11341137
auto slot_idx = find_slot(offset);
11351138
auto &slot = slots[slot_idx];
11361139
assert(slot.offset <= offset && slot.offset + slot.size >= offset);

test/atomics.jl

+11
Original file line numberDiff line numberDiff line change
@@ -1099,3 +1099,14 @@ test_once_undef(Any)
10991099
test_once_undef(Union{Nothing,Integer})
11001100
test_once_undef(UndefComplex{Any})
11011101
test_once_undef(UndefComplex{UndefComplex{Any}})
1102+
1103+
mutable struct Atomic57190
1104+
@atomic x::Int
1105+
end
1106+
1107+
1108+
function add_one57190!()
1109+
@atomic (Atomic57190(0).x) += 1
1110+
end
1111+
1112+
@test add_one57190!() == 1

test/llvmpasses/alloc-opt-pass.ll

+155-25
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,11 @@ L3: ; preds = %L2, %L1, %0
7373
}
7474
; CHECK-LABEL: }{{$}}
7575

76+
declare void @external_function()
77+
78+
declare ptr addrspace(10) @external_function2()
79+
80+
7681
; CHECK-LABEL: @legal_int_types
7782
; CHECK: alloca [12 x i8]
7883
; CHECK-NOT: alloca i96
@@ -89,21 +94,6 @@ define void @legal_int_types() {
8994
}
9095
; CHECK-LABEL: }{{$}}
9196

92-
declare void @external_function()
93-
94-
declare ptr addrspace(10) @external_function2()
95-
96-
declare ptr @julia.ptls_states()
97-
98-
declare ptr @julia.get_pgcstack()
99-
100-
declare noalias ptr addrspace(10) @julia.gc_alloc_obj(ptr, i64, ptr addrspace(10))
101-
102-
declare ptr @julia.pointer_from_objref(ptr addrspace(11))
103-
104-
declare token @llvm.julia.gc_preserve_begin(...)
105-
106-
declare void @llvm.julia.gc_preserve_end(token)
10797

10898
; CHECK-LABEL: @memref_collision
10999
; OPAQUE: call ptr @julia.ptls_states()
@@ -171,13 +161,13 @@ define void @initializers() {
171161
%pgcstack = call ptr @julia.get_pgcstack()
172162
%ptls = call ptr @julia.ptls_states()
173163
%ptls_i8 = bitcast ptr %ptls to ptr
174-
%var1 = call ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 1, ptr addrspace(10) @tag) #1
164+
%var1 = call ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 1, ptr addrspace(10) @tag) #4
175165
%var2 = addrspacecast ptr addrspace(10) %var1 to ptr addrspace(11)
176166
%var3 = call ptr @julia.pointer_from_objref(ptr addrspace(11) %var2)
177-
%var4 = call ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 2, ptr addrspace(10) @tag) #2
167+
%var4 = call ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 2, ptr addrspace(10) @tag) #7
178168
%var5 = addrspacecast ptr addrspace(10) %var4 to ptr addrspace(11)
179169
%var6 = call ptr @julia.pointer_from_objref(ptr addrspace(11) %var5)
180-
%var7 = call ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 3, ptr addrspace(10) @tag) #3
170+
%var7 = call ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 3, ptr addrspace(10) @tag) #1
181171
%var8 = addrspacecast ptr addrspace(10) %var7 to ptr addrspace(11)
182172
%var9 = call ptr @julia.pointer_from_objref(ptr addrspace(11) %var8)
183173
ret void
@@ -203,14 +193,154 @@ union_move9: ; No predecessors!
203193
}
204194
; CHECK-LABEL: }{{$}}
205195

196+
@0 = private unnamed_addr constant ptr inttoptr (i64 4373799056 to ptr), !julia.constgv !0
197+
@1 = private unnamed_addr constant i64 0, align 8
198+
199+
; CHECK-LABEL: @cmpxchg
200+
; CHECK: alloca
201+
; CHECK: alloca
202+
; CHECK: %20 = cmpxchg ptr %2,
203+
define swiftcc i64 @"cmpxchg"(ptr nonnull swiftself %0) #0 {
204+
%2 = alloca i64, align 16
205+
%3 = call ptr @julia.get_pgcstack()
206+
%4 = getelementptr inbounds i8, ptr %3, i32 -152
207+
%5 = getelementptr inbounds i8, ptr %4, i32 168
208+
%6 = load ptr, ptr %5, align 8, !tbaa !4
209+
%7 = getelementptr inbounds i8, ptr %6, i32 16
210+
%8 = load ptr, ptr %7, align 8, !tbaa !8, !invariant.load !0
211+
fence syncscope("singlethread") seq_cst
212+
call void @julia.safepoint(ptr %8)
213+
fence syncscope("singlethread") seq_cst
214+
%9 = load ptr, ptr @0, align 8, !tbaa !8, !invariant.load !0, !alias.scope !10, !noalias !13, !nonnull !0, !dereferenceable !18, !align !19
215+
%10 = ptrtoint ptr %9 to i64
216+
%11 = inttoptr i64 %10 to ptr
217+
%12 = getelementptr inbounds i8, ptr %3, i32 -152
218+
%13 = addrspacecast ptr %11 to ptr addrspace(10)
219+
call void @llvm.lifetime.start.p0(i64 8, ptr %2)
220+
%14 = call noalias nonnull align 8 dereferenceable(8) ptr addrspace(10) @julia.gc_alloc_obj(ptr %12, i64 8, ptr addrspace(10) %13) #7
221+
%15 = addrspacecast ptr addrspace(10) %14 to ptr addrspace(11)
222+
call void @llvm.memcpy.p11.p0.i64(ptr addrspace(11) align 8 %15, ptr align 8 @1, i64 8, i1 false), !tbaa !20, !alias.scope !23, !noalias !24
223+
%16 = addrspacecast ptr addrspace(10) %14 to ptr addrspace(11)
224+
%17 = load atomic i64, ptr addrspace(11) %16 monotonic, align 8, !tbaa !25, !alias.scope !23, !noalias !24
225+
br label %19
226+
227+
18: ; preds = %19
228+
ret i64 %21
229+
230+
19: ; preds = %19, %1
231+
%20 = phi i64 [ %17, %1 ], [ %23, %19 ]
232+
%21 = call swiftcc i64 @"jlsys_+_47"(ptr nonnull swiftself %3, i64 signext %20, i64 signext 1)
233+
%22 = cmpxchg ptr addrspace(11) %16, i64 %20, i64 %21 seq_cst monotonic, align 8, !tbaa !25, !alias.scope !23, !noalias !24
234+
%23 = extractvalue { i64, i1 } %22, 0
235+
%24 = extractvalue { i64, i1 } %22, 1
236+
br i1 %24, label %18, label %19
237+
}
238+
239+
; CHECK-LABEL: }{{$}}
240+
; CHECK-LABEL: @atomicrmw
241+
; CHECK: alloca
242+
; CHECK: alloca
243+
; CHECK: atomicrmw xchg ptr %2,
244+
define swiftcc i64 @"atomicrmw"(ptr nonnull swiftself %0) #0 {
245+
%2 = alloca i64, align 16
246+
%3 = call ptr @julia.get_pgcstack()
247+
%4 = getelementptr inbounds i8, ptr %3, i32 -152
248+
%5 = getelementptr inbounds i8, ptr %4, i32 168
249+
%6 = load ptr, ptr %5, align 8, !tbaa !4
250+
%7 = getelementptr inbounds i8, ptr %6, i32 16
251+
%8 = load ptr, ptr %7, align 8, !tbaa !8, !invariant.load !0
252+
fence syncscope("singlethread") seq_cst
253+
call void @julia.safepoint(ptr %8)
254+
fence syncscope("singlethread") seq_cst
255+
%9 = load ptr, ptr @0, align 8, !tbaa !8, !invariant.load !0, !alias.scope !10, !noalias !13, !nonnull !0, !dereferenceable !18, !align !19
256+
%10 = ptrtoint ptr %9 to i64
257+
%11 = inttoptr i64 %10 to ptr
258+
%12 = getelementptr inbounds i8, ptr %3, i32 -152
259+
%13 = addrspacecast ptr %11 to ptr addrspace(10)
260+
call void @llvm.lifetime.start.p0(i64 8, ptr %2)
261+
%14 = call noalias nonnull align 8 dereferenceable(8) ptr addrspace(10) @julia.gc_alloc_obj(ptr %12, i64 8, ptr addrspace(10) %13) #7
262+
%15 = addrspacecast ptr addrspace(10) %14 to ptr addrspace(11)
263+
call void @llvm.memcpy.p11.p0.i64(ptr addrspace(11) align 8 %15, ptr align 8 @1, i64 8, i1 false), !tbaa !20, !alias.scope !23, !noalias !24
264+
%16 = addrspacecast ptr addrspace(10) %14 to ptr addrspace(11)
265+
%17 = load atomic i64, ptr addrspace(11) %16 monotonic, align 8, !tbaa !25, !alias.scope !23, !noalias !24
266+
%18 = call swiftcc i64 @"jlsys_+_47"(ptr nonnull swiftself %3, i64 signext %17, i64 signext 1)
267+
%19 = atomicrmw xchg ptr addrspace(11) %16, i64 %18 seq_cst, align 8, !tbaa !25, !alias.scope !23, !noalias !24 ; preds = %19
268+
ret i64 %19
269+
}
270+
271+
declare ptr @julia.ptls_states()
272+
273+
declare ptr @julia.pointer_from_objref(ptr addrspace(11))
274+
275+
declare token @llvm.julia.gc_preserve_begin(...)
276+
277+
declare void @llvm.julia.gc_preserve_end(token)
278+
279+
declare ptr @julia.get_pgcstack()
280+
281+
; Function Attrs: mustprogress nounwind willreturn memory(inaccessiblemem: readwrite)
282+
declare nonnull align 8 dereferenceable(8) ptr addrspace(10) @ijl_box_int64(i64 signext) #2
283+
284+
; Function Attrs: memory(argmem: readwrite, inaccessiblemem: readwrite)
285+
declare void @julia.safepoint(ptr) #3
286+
287+
; Function Attrs: mustprogress nounwind willreturn allockind("alloc") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite)
288+
declare noalias nonnull ptr addrspace(10) @julia.gc_alloc_obj(ptr, i64, ptr addrspace(10)) #4
289+
206290
; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
207-
declare void @llvm.memcpy.p11.p0.i64(ptr addrspace(11) noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #0
291+
declare void @llvm.memcpy.p11.p0.i64(ptr addrspace(11) noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #5
292+
208293
; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
209-
declare void @llvm.memcpy.p0.p11.i64(ptr noalias nocapture writeonly, ptr addrspace(11) noalias nocapture readonly, i64, i1 immarg) #0
294+
declare void @llvm.memcpy.p0.p11.i64(ptr noalias nocapture writeonly, ptr addrspace(11) noalias nocapture readonly, i64, i1 immarg) #5
295+
210296
; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
211-
declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #0
297+
declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #5
298+
299+
declare swiftcc i64 @"jlsys_+_47"(ptr nonnull swiftself, i64 signext, i64 signext) #0
300+
301+
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
302+
declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #6
303+
304+
; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
305+
declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #6
306+
307+
attributes #0 = { "probe-stack"="inline-asm" }
308+
attributes #1 = { nounwind willreturn allockind("alloc,zeroed") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite) }
309+
attributes #2 = { mustprogress nounwind willreturn memory(inaccessiblemem: readwrite) }
310+
attributes #3 = { memory(argmem: readwrite, inaccessiblemem: readwrite) }
311+
attributes #4 = { mustprogress nounwind willreturn allockind("alloc") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite) }
312+
attributes #5 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
313+
attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
314+
attributes #7 = { nounwind willreturn allockind("alloc,uninitialized") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite) }
315+
attributes #8 = { nounwind willreturn memory(inaccessiblemem: readwrite) }
316+
317+
!llvm.module.flags = !{!1, !2, !3}
318+
319+
!0 = !{}
320+
!1 = !{i32 2, !"Dwarf Version", i32 4}
321+
!2 = !{i32 2, !"Debug Info Version", i32 3}
322+
!3 = !{i32 2, !"julia.optlevel", i32 2}
323+
!4 = !{!5, !5, i64 0}
324+
!5 = !{!"jtbaa_gcframe", !6, i64 0}
325+
!6 = !{!"jtbaa", !7, i64 0}
326+
!7 = !{!"jtbaa"}
327+
!8 = !{!9, !9, i64 0, i64 1}
328+
!9 = !{!"jtbaa_const", !6, i64 0}
329+
!10 = !{!11}
330+
!11 = !{!"jnoalias_const", !12}
331+
!12 = !{!"jnoalias"}
332+
!13 = !{!14, !15, !16, !17}
333+
!14 = !{!"jnoalias_gcframe", !12}
334+
!15 = !{!"jnoalias_stack", !12}
335+
!16 = !{!"jnoalias_data", !12}
336+
!17 = !{!"jnoalias_typemd", !12}
337+
!18 = !{i64 56}
338+
!19 = !{i64 16}
339+
!20 = !{!21, !21, i64 0}
340+
!21 = !{!"jtbaa_value", !22, i64 0}
341+
!22 = !{!"jtbaa_data", !6, i64 0}
342+
!23 = !{!16}
343+
!24 = !{!14, !15, !17, !11}
344+
!25 = !{!26, !26, i64 0}
345+
!26 = !{!"jtbaa_mutab", !21, i64 0}
212346

213-
attributes #0 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
214-
attributes #1 = { allockind("alloc") }
215-
attributes #2 = { allockind("alloc,uninitialized") }
216-
attributes #3 = { allockind("alloc,zeroed") }

0 commit comments

Comments
 (0)