Skip to content

Commit 1afe35a

Browse files
alexey-bataevGeorgeARM
authored andcommitted
[SLP]Do not match nodes if schedulability of parent nodes is different
If one user node is non-schedulable and another one is schedulable, such nodes should be considered matched. The selection of the actual insert point in this case differs and the insert points may match, which may cause a compiler crash because of the broken def-use chain. Fixes llvm#137797
1 parent 23f5400 commit 1afe35a

File tree

2 files changed

+44
-0
lines changed

2 files changed

+44
-0
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

+5
Original file line numberDiff line numberDiff line change
@@ -15245,6 +15245,11 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry(
1524515245
continue;
1524615246
}
1524715247

15248+
if (!TEUseEI.UserTE->isGather() && !UserPHI &&
15249+
doesNotNeedToSchedule(TEUseEI.UserTE->Scalars) !=
15250+
doesNotNeedToSchedule(UseEI.UserTE->Scalars) &&
15251+
is_contained(UseEI.UserTE->Scalars, TEInsertPt))
15252+
continue;
1524815253
// Check if the user node of the TE comes after user node of TEPtr,
1524915254
// otherwise TEPtr depends on TE.
1525015255
if ((TEInsertBlock != InsertPt->getParent() ||
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s -slp-threshold=-99999 | FileCheck %s
3+
4+
define void @test() {
5+
; CHECK-LABEL: define void @test() {
6+
; CHECK-NEXT: [[BB:.*]]:
7+
; CHECK-NEXT: br i1 false, label %[[BB1:.*]], label %[[BB5:.*]]
8+
; CHECK: [[BB1]]:
9+
; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ [[TMP3:%.*]], %[[BB1]] ], [ zeroinitializer, %[[BB]] ]
10+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> <i32 poison, i32 0>, <2 x i32> <i32 0, i32 3>
11+
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> <i32 poison, i32 1>, i32 0, i32 0
12+
; CHECK-NEXT: [[TMP3]] = or <2 x i32> [[TMP1]], [[TMP2]]
13+
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 0, i32 0
14+
; CHECK-NEXT: [[TMP5:%.*]] = or <2 x i32> [[TMP0]], [[TMP4]]
15+
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP5]], i32 1
16+
; CHECK-NEXT: [[OR3:%.*]] = or i32 [[TMP6]], 0
17+
; CHECK-NEXT: br i1 false, label %[[BB1]], label %[[BB5]]
18+
; CHECK: [[BB5]]:
19+
; CHECK-NEXT: [[TMP7:%.*]] = phi <2 x i32> [ zeroinitializer, %[[BB]] ], [ [[TMP5]], %[[BB1]] ]
20+
; CHECK-NEXT: ret void
21+
;
22+
bb:
23+
br i1 false, label %bb1, label %bb5
24+
25+
bb1:
26+
%phi = phi i32 [ %or4, %bb1 ], [ 0, %bb ]
27+
%phi2 = phi i32 [ %add, %bb1 ], [ 0, %bb ]
28+
%add = add i32 1, 0
29+
%or = or i32 0, %phi2
30+
%or3 = or i32 %or, 0
31+
%mul = mul i32 0, 0
32+
%or4 = or i32 %phi, %mul
33+
br i1 false, label %bb1, label %bb5
34+
35+
bb5:
36+
%phi6 = phi i32 [ 0, %bb ], [ %or4, %bb1 ]
37+
%phi7 = phi i32 [ 0, %bb ], [ %or, %bb1 ]
38+
ret void
39+
}

0 commit comments

Comments
 (0)