Skip to content

Commit 123993f

Browse files
[SLP] Make getSameOpcode support interchangeable instructions. (#133888)
We use the term "interchangeable instructions" to refer to different operators that have the same meaning (e.g., `add x, 0` is equivalent to `mul x, 1`). Non-constant values are not supported, as they may incur high costs with little benefit. --------- Co-authored-by: Alexey Bataev <[email protected]>
1 parent 90c01f4 commit 123993f

25 files changed

+627
-201
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

+410-51
Large diffs are not rendered by default.

llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll

+4-4
Original file line numberDiff line numberDiff line change
@@ -314,10 +314,10 @@ define void @store_try_reorder(ptr %dst) {
314314
;
315315
; POW2-ONLY-LABEL: @store_try_reorder(
316316
; POW2-ONLY-NEXT: entry:
317-
; POW2-ONLY-NEXT: [[ADD:%.*]] = add i32 0, 0
318-
; POW2-ONLY-NEXT: store i32 [[ADD]], ptr [[DST:%.*]], align 4
319-
; POW2-ONLY-NEXT: [[ARRAYIDX_I1887:%.*]] = getelementptr i32, ptr [[DST]], i64 1
320-
; POW2-ONLY-NEXT: store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887]], align 4
317+
; POW2-ONLY-NEXT: store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887:%.*]], align 4
318+
; POW2-ONLY-NEXT: [[ADD216:%.*]] = sub i32 0, 0
319+
; POW2-ONLY-NEXT: [[ARRAYIDX_I1891:%.*]] = getelementptr i32, ptr [[ARRAYIDX_I1887]], i64 2
320+
; POW2-ONLY-NEXT: store i32 [[ADD216]], ptr [[ARRAYIDX_I1891]], align 4
321321
; POW2-ONLY-NEXT: ret void
322322
;
323323
entry:

llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-node-with-external-ptr.ll

+3-4
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,12 @@ define void @test(ptr %a, i64 %0) {
77
; CHECK-NEXT: [[ENTRY:.*:]]
88
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[A]], i32 0
99
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x ptr> [[TMP1]], <2 x ptr> poison, <2 x i32> zeroinitializer
10+
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[TMP0]], i32 0
1011
; CHECK-NEXT: br label %[[BB:.*]]
1112
; CHECK: [[BB]]:
12-
; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i64 [[TMP0]], 1
13-
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i32 0
14-
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> [[TMP4]], i64 0, i32 1
13+
; CHECK-NEXT: [[TMP5:%.*]] = or disjoint <2 x i64> [[TMP3]], <i64 1, i64 0>
1514
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr double, <2 x ptr> [[TMP2]], <2 x i64> [[TMP5]]
16-
; CHECK-NEXT: [[ARRAYIDX17_I28_1:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP3]]
15+
; CHECK-NEXT: [[ARRAYIDX17_I28_1:%.*]] = extractelement <2 x ptr> [[TMP6]], i32 0
1716
; CHECK-NEXT: [[TMP7:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> [[TMP6]], i32 8, <2 x i1> splat (i1 true), <2 x double> poison)
1817
; CHECK-NEXT: [[TMP8:%.*]] = load <2 x double>, ptr [[A]], align 8
1918
; CHECK-NEXT: [[TMP9:%.*]] = load <2 x double>, ptr [[A]], align 8

llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll

+4-4
Original file line numberDiff line numberDiff line change
@@ -324,10 +324,10 @@ define void @store_try_reorder(ptr %dst) {
324324
;
325325
; POW2-ONLY-LABEL: @store_try_reorder(
326326
; POW2-ONLY-NEXT: entry:
327-
; POW2-ONLY-NEXT: [[ADD:%.*]] = add i32 0, 0
328-
; POW2-ONLY-NEXT: store i32 [[ADD]], ptr [[DST:%.*]], align 4
329-
; POW2-ONLY-NEXT: [[ARRAYIDX_I1887:%.*]] = getelementptr i32, ptr [[DST]], i64 1
330-
; POW2-ONLY-NEXT: store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887]], align 4
327+
; POW2-ONLY-NEXT: store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887:%.*]], align 4
328+
; POW2-ONLY-NEXT: [[ADD216:%.*]] = sub i32 0, 0
329+
; POW2-ONLY-NEXT: [[ARRAYIDX_I1891:%.*]] = getelementptr i32, ptr [[ARRAYIDX_I1887]], i64 2
330+
; POW2-ONLY-NEXT: store i32 [[ADD216]], ptr [[ARRAYIDX_I1891]], align 4
331331
; POW2-ONLY-NEXT: ret void
332332
;
333333
entry:
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -mtriple=x86_64-unknown-linux-gnu -passes=slp-vectorizer -S %s | FileCheck %s
3+
4+
define void @test() {
5+
; CHECK-LABEL: @test(
6+
; CHECK-NEXT: entry:
7+
; CHECK-NEXT: [[TMP0:%.*]] = lshr i64 0, 0
8+
; CHECK-NEXT: [[TMP1:%.*]] = sub i64 0, 1
9+
; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 0
10+
; CHECK-NEXT: [[UMIN120:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP0]], i64 [[TMP2]])
11+
; CHECK-NEXT: [[TMP3:%.*]] = sub i64 0, 0
12+
; CHECK-NEXT: [[TMP4:%.*]] = lshr i64 [[TMP3]], 0
13+
; CHECK-NEXT: [[UMIN122:%.*]] = call i64 @llvm.umin.i64(i64 [[UMIN120]], i64 [[TMP4]])
14+
; CHECK-NEXT: [[TMP5:%.*]] = add i64 0, 1
15+
; CHECK-NEXT: [[TMP6:%.*]] = lshr i64 [[TMP5]], 0
16+
; CHECK-NEXT: [[UMIN123:%.*]] = call i64 @llvm.umin.i64(i64 [[UMIN122]], i64 [[TMP6]])
17+
; CHECK-NEXT: [[UMIN124:%.*]] = call i64 @llvm.umin.i64(i64 [[UMIN123]], i64 0)
18+
; CHECK-NEXT: ret void
19+
;
20+
entry:
21+
%0 = mul i64 0, 0
22+
%1 = lshr i64 %0, 0
23+
%2 = sub i64 0, 1
24+
%3 = lshr i64 %2, 0
25+
%umin120 = call i64 @llvm.umin.i64(i64 %1, i64 %3)
26+
%4 = sub i64 0, 0
27+
%5 = lshr i64 %4, 0
28+
%umin122 = call i64 @llvm.umin.i64(i64 %umin120, i64 %5)
29+
%6 = add i64 0, 1
30+
%7 = lshr i64 %6, 0
31+
%umin123 = call i64 @llvm.umin.i64(i64 %umin122, i64 %7)
32+
%umin124 = call i64 @llvm.umin.i64(i64 %umin123, i64 0)
33+
ret void
34+
}
35+
36+
declare i64 @llvm.umin.i64(i64, i64)

llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll

+1-3
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,7 @@ define i32 @foo(ptr nocapture %A, i32 %n) {
1010
; CHECK-NEXT: [[CALL:%.*]] = tail call i32 (...) @bar()
1111
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
1212
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer
13-
; CHECK-NEXT: [[TMP1:%.*]] = mul nsw <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 3, i32 10>
14-
; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 3, i32 10>
15-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
13+
; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 8, i32 10>
1614
; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], splat (i32 9)
1715
; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[A:%.*]], align 4
1816
; CHECK-NEXT: ret i32 undef

llvm/test/Transforms/SLPVectorizer/X86/bottom-to-top-reorder.ll

+3-8
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,10 @@ define void @test(ptr %0, ptr %1, ptr %2) {
1010
; CHECK-NEXT: [[TMP11:%.*]] = sub <4 x i32> <i32 0, i32 0, i32 undef, i32 0>, [[TMP8]]
1111
; CHECK-NEXT: [[TMP12:%.*]] = sub <4 x i32> [[TMP11]], [[TMP10]]
1212
; CHECK-NEXT: [[TMP13:%.*]] = add <4 x i32> [[TMP12]], [[TMP6]]
13-
; CHECK-NEXT: [[TMP14:%.*]] = add <4 x i32> [[TMP13]], <i32 0, i32 0, i32 1, i32 0>
14-
; CHECK-NEXT: [[TMP15:%.*]] = sub <4 x i32> [[TMP13]], <i32 0, i32 0, i32 1, i32 0>
15-
; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], <4 x i32> <i32 2, i32 0, i32 1, i32 7>
13+
; CHECK-NEXT: [[TMP16:%.*]] = add <4 x i32> <i32 0, i32 0, i32 1, i32 0>, [[TMP13]]
1614
; CHECK-NEXT: [[TMP17:%.*]] = add <4 x i32> [[TMP16]], zeroinitializer
17-
; CHECK-NEXT: [[TMP18:%.*]] = sub <4 x i32> [[TMP16]], zeroinitializer
18-
; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <4 x i32> [[TMP17]], <4 x i32> [[TMP18]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
19-
; CHECK-NEXT: [[TMP20:%.*]] = add <4 x i32> [[TMP19]], zeroinitializer
20-
; CHECK-NEXT: [[TMP21:%.*]] = sub <4 x i32> [[TMP19]], zeroinitializer
21-
; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i32> [[TMP20]], <4 x i32> [[TMP21]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
15+
; CHECK-NEXT: [[TMP14:%.*]] = add <4 x i32> [[TMP17]], zeroinitializer
16+
; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i32> [[TMP14]], <4 x i32> poison, <4 x i32> <i32 2, i32 0, i32 1, i32 3>
2217
; CHECK-NEXT: store <4 x i32> [[TMP22]], ptr [[TMP2:%.*]], align 4
2318
; CHECK-NEXT: ret void
2419
;

llvm/test/Transforms/SLPVectorizer/X86/buildvector-postpone-for-dependency.ll

+3-5
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,13 @@ define void @test() {
88
; CHECK: [[BB1:.*]]:
99
; CHECK-NEXT: br label %[[BB2:.*]]
1010
; CHECK: [[BB2]]:
11-
; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x i32> [ poison, %[[BB1]] ], [ [[TMP5:%.*]], %[[BB6]] ]
11+
; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x i32> [ poison, %[[BB1]] ], [ [[TMP4:%.*]], %[[BB6]] ]
1212
; CHECK-NEXT: ret void
1313
; CHECK: [[BB6]]:
1414
; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, %[[BB]] ], [ [[TMP8:%.*]], %[[BB6]] ]
1515
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
16-
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> <i32 0, i32 0, i32 poison, i32 poison>, <4 x i32> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 5, i32 4>
17-
; CHECK-NEXT: [[TMP3:%.*]] = ashr <4 x i32> zeroinitializer, [[TMP2]]
18-
; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i32> zeroinitializer, [[TMP2]]
19-
; CHECK-NEXT: [[TMP5]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
16+
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> <i32 0, i32 0, i32 poison, i32 poison>, <4 x i32> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 5, i32 4>
17+
; CHECK-NEXT: [[TMP4]] = mul <4 x i32> [[TMP3]], zeroinitializer
2018
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> <i32 0, i32 poison>, <2 x i32> <i32 2, i32 1>
2119
; CHECK-NEXT: [[TMP8]] = mul <2 x i32> zeroinitializer, [[TMP7]]
2220
; CHECK-NEXT: br i1 false, label %[[BB2]], label %[[BB6]]

llvm/test/Transforms/SLPVectorizer/X86/bv-shuffle-mask.ll

+1-3
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,12 @@ define i16 @test(i16 %v1, i16 %v2) {
77
; CHECK-NEXT: [[ENTRY:.*:]]
88
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> <i16 0, i16 0, i16 0, i16 poison>, i16 [[V2]], i32 3
99
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> <i16 0, i16 0, i16 0, i16 poison>, i16 [[V1]], i32 3
10-
; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i16> [[TMP0]], [[TMP1]]
1110
; CHECK-NEXT: [[TMP3:%.*]] = and <4 x i16> [[TMP0]], [[TMP1]]
12-
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
1311
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <2 x i32> <i32 poison, i32 3>
1412
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i16> [[TMP5]], i16 [[V1]], i32 0
1513
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i16> [[TMP6]], <2 x i16> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
1614
; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i16> [[TMP7]], zeroinitializer
17-
; CHECK-NEXT: [[TMP9:%.*]] = and <4 x i16> [[TMP4]], zeroinitializer
15+
; CHECK-NEXT: [[TMP9:%.*]] = and <4 x i16> [[TMP3]], zeroinitializer
1816
; CHECK-NEXT: [[TMP10:%.*]] = and <4 x i16> [[TMP9]], zeroinitializer
1917
; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <4 x i16> [[TMP10]], zeroinitializer
2018
; CHECK-NEXT: [[TMP12:%.*]] = or <4 x i1> [[TMP11]], zeroinitializer

llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll

+12-16
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,13 @@
44
define i64 @foo(i32 %tmp7) {
55
; CHECK-LABEL: @foo(
66
; CHECK-NEXT: bb:
7-
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, i32 [[TMP5:%.*]], i32 2
8-
; CHECK-NEXT: [[TMP3:%.*]] = sub <4 x i32> [[TMP2]], zeroinitializer
9-
; CHECK-NEXT: [[TMP24:%.*]] = sub i32 undef, 0
10-
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 poison, i32 poison, i32 undef, i32 0>, i32 [[TMP24]], i32 4
11-
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i32> [[TMP0]], i32 0, i32 5
12-
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <8 x i32> <i32 poison, i32 poison, i32 undef, i32 poison, i32 poison, i32 undef, i32 poison, i32 undef>, i32 [[TMP24]], i32 6
13-
; CHECK-NEXT: [[TMP12:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> poison, <4 x i32> [[TMP3]], i64 0)
14-
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP12]], <8 x i32> [[TMP11]], <8 x i32> <i32 0, i32 1, i32 poison, i32 2, i32 3, i32 poison, i32 14, i32 poison>
15-
; CHECK-NEXT: [[TMP5:%.*]] = add nsw <8 x i32> [[TMP1]], [[TMP4]]
16-
; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <8 x i32> [[TMP1]], [[TMP4]]
17-
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> [[TMP6]], <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 4, i32 5, i32 14, i32 15>
7+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i32> <i32 0, i32 0, i32 poison, i32 poison, i32 0, i32 poison, i32 poison, i32 poison>, i32 [[TMP8:%.*]], i32 3
8+
; CHECK-NEXT: [[TMP4:%.*]] = sub <8 x i32> [[TMP0]], <i32 0, i32 0, i32 poison, i32 0, i32 0, i32 poison, i32 0, i32 poison>
9+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 0>, <8 x i32> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 14, i32 poison, i32 poison, i32 7>
10+
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <8 x i32> [[TMP2]], i32 0, i32 5
11+
; CHECK-NEXT: [[TMP5:%.*]] = sub nsw <8 x i32> [[TMP13]], [[TMP4]]
12+
; CHECK-NEXT: [[TMP6:%.*]] = add nsw <8 x i32> [[TMP13]], [[TMP4]]
13+
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> [[TMP6]], <8 x i32> <i32 8, i32 1, i32 2, i32 3, i32 12, i32 13, i32 6, i32 7>
1814
; CHECK-NEXT: [[TMP8:%.*]] = add <8 x i32> zeroinitializer, [[TMP7]]
1915
; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i32> [[TMP8]], zeroinitializer
2016
; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP9]])
@@ -29,7 +25,7 @@ bb:
2925
%tmp4 = xor i32 %tmp3, 0
3026
%tmp6 = sub i32 0, 0
3127
%tmp8 = sub i32 %tmp7, 0
32-
%tmp9 = sub nsw i32 0, undef
28+
%tmp9 = sub nsw i32 0, poison
3329
%tmp10 = add nsw i32 0, %tmp6
3430
%tmp11 = sub nsw i32 0, %tmp8
3531
%tmp12 = add i32 0, %tmp10
@@ -44,10 +40,10 @@ bb:
4440
%tmp21 = add i32 %tmp20, %tmp17
4541
%tmp22 = sub i32 0, 0
4642
%tmp23 = add i32 0, 0
47-
%tmp24 = sub i32 undef, 0
48-
%tmp25 = add nsw i32 %tmp23, undef
43+
%tmp24 = sub i32 poison, 0
44+
%tmp25 = add nsw i32 %tmp23, poison
4945
%tmp26 = add nsw i32 %tmp24, %tmp22
50-
%tmp27 = sub nsw i32 undef, %tmp24
46+
%tmp27 = sub nsw i32 poison, %tmp24
5147
%tmp28 = add i32 0, %tmp25
5248
%tmp29 = xor i32 %tmp28, 0
5349
%tmp30 = add i32 0, %tmp26
@@ -58,7 +54,7 @@ bb:
5854
%tmp35 = add i32 %tmp34, %tmp29
5955
%tmp36 = add i32 %tmp35, 0
6056
%tmp37 = add i32 %tmp36, %tmp33
61-
%tmp38 = sub nsw i32 0, undef
57+
%tmp38 = sub nsw i32 0, poison
6258
%tmp39 = add i32 0, %tmp38
6359
%tmp40 = xor i32 %tmp39, 0
6460
%tmp41 = add i32 0, %tmp37

llvm/test/Transforms/SLPVectorizer/X86/extractcost.ll

+1-3
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,7 @@ define i32 @foo(ptr nocapture %A, i32 %n, i32 %m) {
99
; CHECK-NEXT: entry:
1010
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
1111
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer
12-
; CHECK-NEXT: [[TMP1:%.*]] = mul nsw <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 3, i32 10>
13-
; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 3, i32 10>
14-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
12+
; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 8, i32 10>
1513
; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], splat (i32 9)
1614
; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[A:%.*]], align 4
1715
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 0

llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll

+16-18
Original file line numberDiff line numberDiff line change
@@ -6,26 +6,26 @@
66
define i64 @foo() {
77
; CHECK-LABEL: define i64 @foo() {
88
; CHECK-NEXT: bb:
9+
; CHECK-NEXT: [[ADD7:%.*]] = add i64 0, 0
910
; CHECK-NEXT: br label [[BB3:%.*]]
1011
; CHECK: bb1:
11-
; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ [[ADD:%.*]], [[BB3]] ]
12-
; CHECK-NEXT: [[PHI2:%.*]] = phi i64 [ [[TMP9:%.*]], [[BB3]] ]
12+
; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i64> [ [[TMP5:%.*]], [[BB3]] ]
1313
; CHECK-NEXT: ret i64 0
1414
; CHECK: bb3:
15-
; CHECK-NEXT: [[PHI5:%.*]] = phi i64 [ 0, [[BB:%.*]] ], [ 0, [[BB3]] ]
16-
; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i64> [ zeroinitializer, [[BB]] ], [ [[TMP7:%.*]], [[BB3]] ]
17-
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
18-
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1
19-
; CHECK-NEXT: [[ADD]] = add i64 [[TMP3]], [[TMP2]]
20-
; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i64, ptr addrspace(1) null, i64 0
21-
; CHECK-NEXT: [[TMP9]] = or i64 [[PHI5]], 0
22-
; CHECK-NEXT: [[ICMP:%.*]] = icmp ult i64 [[TMP9]], 0
23-
; CHECK-NEXT: [[TMP7]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[ADD]], i32 0
15+
; CHECK-NEXT: [[PHI4:%.*]] = phi i64 [ 0, [[BB:%.*]] ], [ 0, [[BB3]] ]
16+
; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i64> [ zeroinitializer, [[BB]] ], [ [[TMP3:%.*]], [[BB3]] ]
17+
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP0]], <2 x i64> <i64 poison, i64 0>, <2 x i32> <i32 0, i32 3>
18+
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[PHI4]], i32 0
19+
; CHECK-NEXT: [[TMP3]] = add <2 x i64> [[TMP4]], [[TMP2]]
20+
; CHECK-NEXT: [[TMP5]] = add <2 x i64> [[TMP0]], [[TMP2]]
21+
; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i64, ptr addrspace(1) null, i64 [[ADD7]]
22+
; CHECK-NEXT: [[OR:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
23+
; CHECK-NEXT: [[ICMP:%.*]] = icmp ult i64 [[OR]], 0
2424
; CHECK-NEXT: br i1 false, label [[BB3]], label [[BB1:%.*]]
2525
;
2626
; FORCED-LABEL: define i64 @foo() {
2727
; FORCED-NEXT: bb:
28-
; FORCED-NEXT: [[TMP8:%.*]] = add i64 0, 0
28+
; FORCED-NEXT: [[ADD7:%.*]] = add i64 0, 0
2929
; FORCED-NEXT: br label [[BB3:%.*]]
3030
; FORCED: bb1:
3131
; FORCED-NEXT: [[TMP0:%.*]] = phi <2 x i64> [ [[TMP5:%.*]], [[BB3]] ]
@@ -36,12 +36,10 @@ define i64 @foo() {
3636
; FORCED-NEXT: [[TMP6:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> <i64 poison, i64 0>, <2 x i32> <i32 0, i32 3>
3737
; FORCED-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[PHI5]], i32 0
3838
; FORCED-NEXT: [[TMP7]] = add <2 x i64> [[TMP6]], [[TMP2]]
39-
; FORCED-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]]
40-
; FORCED-NEXT: [[TMP4:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]]
41-
; FORCED-NEXT: [[TMP5]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i32> <i32 0, i32 3>
42-
; FORCED-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i64, ptr addrspace(1) null, i64 [[TMP8]]
43-
; FORCED-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
44-
; FORCED-NEXT: [[ICMP:%.*]] = icmp ult i64 [[TMP9]], 0
39+
; FORCED-NEXT: [[TMP5]] = add <2 x i64> [[TMP1]], [[TMP2]]
40+
; FORCED-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i64, ptr addrspace(1) null, i64 [[ADD7]]
41+
; FORCED-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
42+
; FORCED-NEXT: [[ICMP:%.*]] = icmp ult i64 [[TMP8]], 0
4543
; FORCED-NEXT: br i1 false, label [[BB3]], label [[BB1:%.*]]
4644
;
4745
bb:

llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-drop-wrapping-flags.ll

+1-3
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,7 @@ define i32 @test() {
99
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i8> poison, i8 [[A_PROMOTED]], i32 0
1010
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> poison, <4 x i32> zeroinitializer
1111
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i8> [[TMP1]], zeroinitializer
12-
; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i8> [[TMP1]], zeroinitializer
13-
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
14-
; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i16>
12+
; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i16>
1513
; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i16> [[TMP5]], <i16 0, i16 -1, i16 0, i16 0>
1614
; CHECK-NEXT: [[TMP7:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP6]])
1715
; CHECK-NEXT: [[TMP8:%.*]] = zext i16 [[TMP7]] to i32

llvm/test/Transforms/SLPVectorizer/X86/multi-extracts-bv-combined.ll

+2-4
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,8 @@ define i32 @foo() {
88
; CHECK-NEXT: [[D:%.*]] = load i32, ptr null, align 4
99
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> <i32 0, i32 undef, i32 1, i32 0>, i32 [[D]], i32 1
1010
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 1, i32 2, i32 3, i32 1, i32 1, i32 1>
11-
; CHECK-NEXT: [[TMP2:%.*]] = or <8 x i32> zeroinitializer, [[TMP1]]
12-
; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> zeroinitializer, [[TMP1]]
13-
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 5, i32 6, i32 7>
14-
; CHECK-NEXT: store <8 x i32> [[TMP4]], ptr getelementptr inbounds ([64 x i32], ptr null, i64 0, i64 15), align 4
11+
; CHECK-NEXT: [[TMP2:%.*]] = add <8 x i32> zeroinitializer, [[TMP1]]
12+
; CHECK-NEXT: store <8 x i32> [[TMP2]], ptr getelementptr inbounds ([64 x i32], ptr null, i64 0, i64 15), align 4
1513
; CHECK-NEXT: ret i32 0
1614
;
1715
entry:

0 commit comments

Comments
 (0)