Skip to content

Commit 173839c

Browse files
esukhovigcbot
authored andcommitted
IGC Vectorizer now implements cycle-proof deletion strategy
Bug fix, IGC Vectorizer has cycle-proof deletion strategy. Now we always can clean up in case vectorizer chain is discarded.
1 parent 2f20cb4 commit 173839c

File tree

2 files changed

+279
-13
lines changed

2 files changed

+279
-13
lines changed

IGC/Compiler/CISACodeGen/IGCVectorizer.cpp

+16-13
Original file line numberDiff line numberDiff line change
@@ -179,8 +179,7 @@ unsigned int getVectorSize(Value *I) {
179179
}
180180

181181

182-
// due to our emitter, currently we only process float fdiv's that we can
183-
// construct as INV (first operand is 1.0f);
182+
// due to our emitter, currently we only process float fdiv's
184183
bool isFDivSafe(Instruction *I) {
185184
if (!IGC_GET_FLAG_VALUE(VectorizerAllowFDIV)) return false;
186185
auto* Binary = llvm::dyn_cast<BinaryOperator>(I);
@@ -189,10 +188,6 @@ bool isFDivSafe(Instruction *I) {
189188
auto OpCode = Binary->getOpcode();
190189
if (!(OpCode == Instruction::FDiv && I->getType()->isFloatTy())) return false;
191190

192-
//auto* constFloat = llvm::dyn_cast<llvm::ConstantFP>(I->getOperand(0));
193-
//if (!constFloat) return false;
194-
//if (!constFloat->isExactlyValue(1.f)) return false;
195-
196191
return true;
197192
}
198193

@@ -209,13 +204,20 @@ bool isBinarySafe(Instruction *I) {
209204
return Result;
210205
}
211206

207+
bool isPHISafe(Instruction *I) {
208+
auto* PHI = llvm::dyn_cast<PHINode>(I);
209+
if (PHI && PHI->getNumIncomingValues() == 2)
210+
return true;
211+
return false;
212+
}
213+
212214
bool isSafeToVectorize(Instruction *I) {
213215
// this is a very limited approach for vectorizing but it's safe
214216
bool Result =
215-
llvm::isa<PHINode>(I) ||
217+
isPHISafe(I) ||
216218
llvm::isa<ExtractElementInst>(I) ||
217219
llvm::isa<InsertElementInst>(I) ||
218-
( llvm::isa<FPTruncInst>(I) && IGC_GET_FLAG_VALUE(VectorizerAllowFPTRUNC) ) ||
220+
(llvm::isa<FPTruncInst>(I) && IGC_GET_FLAG_VALUE(VectorizerAllowFPTRUNC)) ||
219221
isBinarySafe(I);
220222

221223
return Result;
@@ -818,18 +820,19 @@ bool IGCVectorizer::runOnFunction(llvm::Function &F) {
818820
CreatedVectorInstructions.clear();
819821
if (!processChain(InSt)) {
820822
writeLog();
823+
// this is important to not mix up instructions that were created for the chain
824+
// that was scraped later
825+
ScalarToVector.clear();
821826
std::reverse(CreatedVectorInstructions.begin(), CreatedVectorInstructions.end());
822827
PRINT_DS("To Clean: ", CreatedVectorInstructions);
828+
// we move to a new cycle-proof deletion algorithm
823829
for (auto& el : CreatedVectorInstructions) {
824830
PRINT_LOG("Cleaned: "); PRINT_INST_NL(el); writeLog();
831+
el->replaceAllUsesWith(UndefValue::get(el->getType()));
825832
el->eraseFromParent();
826833
}
827-
ScalarToVector.clear();
828834
}
829-
else {
830-
for (auto& el : CreatedVectorInstructions) { PRINT_LOG("Created: "); PRINT_INST_NL(el); writeLog(); }
831-
}
832-
writeLog();
835+
else { PRINT_DS("Created: ", CreatedVectorInstructions); writeLog(); }
833836
}
834837

835838
PRINT_LOG("\n\n");
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,263 @@
1+
; NOTE: this test just checks that we can process such IR
2+
; previously we had issues with processing discarded instructions for
3+
; vectorization chain
4+
; RUN: igc_opt -S %s --igc-vectorizer -dce
5+
6+
; ModuleID = 'reduced.ll'
7+
source_filename = "reduced.ll"
8+
9+
define spir_kernel void @widget() {
10+
bb:
11+
%tmp = fmul reassoc nsz arcp contract float 0.000000e+00, 0.000000e+00
12+
br i1 false, label %bb1, label %bb12
13+
14+
bb1: ; preds = %bb
15+
br label %bb12
16+
17+
bb2: ; No predecessors!
18+
%tmp3 = fmul float %tmp, %tmp
19+
%tmp4 = fmul float %tmp3, %tmp
20+
%tmp5 = fmul float %tmp3, %tmp4
21+
%tmp6 = fmul float %tmp5, 0.000000e+00
22+
%tmp7 = fadd float 0.000000e+00, %tmp6
23+
%tmp8 = fadd float %tmp7, 0.000000e+00
24+
%tmp9 = fdiv float %tmp8, 0.000000e+00
25+
br label %bb12
26+
27+
bb10: ; No predecessors!
28+
br label %bb12
29+
30+
bb11: ; No predecessors!
31+
br label %bb12
32+
33+
bb12: ; preds = %bb11, %bb10, %bb2, %bb1, %bb
34+
%tmp13 = phi float [ %tmp9, %bb2 ], [ 0.000000e+00, %bb10 ], [ 0.000000e+00, %bb11 ], [ 0.000000e+00, %bb ], [ 0.000000e+00, %bb1 ]
35+
%tmp14 = fmul reassoc nsz arcp contract float %tmp13, 0.000000e+00
36+
%tmp15 = fadd reassoc nsz arcp contract float %tmp14, 0.000000e+00
37+
%tmp16 = fmul reassoc nsz arcp contract float %tmp15, 0.000000e+00
38+
%tmp17 = insertelement <8 x float> zeroinitializer, float %tmp16, i64 0
39+
%tmp18 = fmul reassoc nsz arcp contract float 0.000000e+00, 0.000000e+00
40+
br i1 false, label %bb19, label %bb30
41+
42+
bb19: ; preds = %bb12
43+
br label %bb30
44+
45+
bb20: ; No predecessors!
46+
br label %bb30
47+
48+
bb21: ; No predecessors!
49+
br label %bb30
50+
51+
bb22: ; No predecessors!
52+
%tmp23 = fmul float %tmp18, %tmp18
53+
%tmp24 = fmul float %tmp18, %tmp18
54+
%tmp25 = fmul float %tmp23, %tmp24
55+
%tmp26 = fmul float %tmp25, 0.000000e+00
56+
%tmp27 = fadd float 0.000000e+00, %tmp26
57+
%tmp28 = fadd float %tmp27, 0.000000e+00
58+
%tmp29 = fdiv float %tmp28, 0.000000e+00
59+
br label %bb30
60+
61+
bb30: ; preds = %bb22, %bb21, %bb20, %bb19, %bb12
62+
%tmp31 = phi float [ %tmp29, %bb22 ], [ 0.000000e+00, %bb21 ], [ 0.000000e+00, %bb20 ], [ 0.000000e+00, %bb12 ], [ 0.000000e+00, %bb19 ]
63+
%tmp32 = fmul reassoc nsz arcp contract float %tmp31, 0.000000e+00
64+
%tmp33 = fadd reassoc nsz arcp contract float %tmp32, 0.000000e+00
65+
%tmp34 = fmul reassoc nsz arcp contract float %tmp33, 0.000000e+00
66+
%tmp35 = insertelement <8 x float> %tmp17, float %tmp34, i64 0
67+
%tmp36 = fmul reassoc nsz arcp contract float 0.000000e+00, 0.000000e+00
68+
br i1 false, label %bb37, label %bb48
69+
70+
bb37: ; preds = %bb30
71+
br label %bb48
72+
73+
bb38: ; No predecessors!
74+
br label %bb48
75+
76+
bb39: ; No predecessors!
77+
br label %bb48
78+
79+
bb40: ; No predecessors!
80+
%tmp41 = fmul float %tmp36, %tmp36
81+
%tmp42 = fmul float %tmp36, %tmp36
82+
%tmp43 = fmul float %tmp41, %tmp42
83+
%tmp44 = fmul float %tmp43, 0.000000e+00
84+
%tmp45 = fadd float 0.000000e+00, %tmp44
85+
%tmp46 = fadd float %tmp45, 0.000000e+00
86+
%tmp47 = fdiv float %tmp46, 0.000000e+00
87+
br label %bb48
88+
89+
bb48: ; preds = %bb40, %bb39, %bb38, %bb37, %bb30
90+
%tmp49 = phi float [ %tmp47, %bb40 ], [ 0.000000e+00, %bb39 ], [ 0.000000e+00, %bb38 ], [ 0.000000e+00, %bb30 ], [ 0.000000e+00, %bb37 ]
91+
%tmp50 = fmul reassoc nsz arcp contract float %tmp49, 0.000000e+00
92+
%tmp51 = fadd reassoc nsz arcp contract float %tmp50, 0.000000e+00
93+
%tmp52 = fmul reassoc nsz arcp contract float %tmp51, 0.000000e+00
94+
%tmp53 = insertelement <8 x float> %tmp35, float %tmp52, i64 0
95+
%tmp54 = fmul reassoc nsz arcp contract float 0.000000e+00, 0.000000e+00
96+
br i1 false, label %bb55, label %bb66
97+
98+
bb55: ; preds = %bb48
99+
br label %bb66
100+
101+
bb56: ; No predecessors!
102+
br label %bb66
103+
104+
bb57: ; No predecessors!
105+
br label %bb66
106+
107+
bb58: ; No predecessors!
108+
%tmp59 = fmul float %tmp54, %tmp54
109+
%tmp60 = fmul float %tmp54, %tmp54
110+
%tmp61 = fmul float %tmp59, %tmp60
111+
%tmp62 = fmul float %tmp61, 0.000000e+00
112+
%tmp63 = fadd float 0.000000e+00, %tmp62
113+
%tmp64 = fadd float %tmp63, 0.000000e+00
114+
%tmp65 = fdiv float %tmp64, 0.000000e+00
115+
br label %bb66
116+
117+
bb66: ; preds = %bb58, %bb57, %bb56, %bb55, %bb48
118+
%tmp67 = phi float [ %tmp65, %bb58 ], [ 0.000000e+00, %bb57 ], [ 0.000000e+00, %bb56 ], [ 0.000000e+00, %bb48 ], [ 0.000000e+00, %bb55 ]
119+
%tmp68 = fmul reassoc nsz arcp contract float %tmp67, 0.000000e+00
120+
%tmp69 = fadd reassoc nsz arcp contract float %tmp68, 0.000000e+00
121+
%tmp70 = fmul reassoc nsz arcp contract float %tmp69, 0.000000e+00
122+
%tmp71 = insertelement <8 x float> %tmp53, float %tmp70, i64 0
123+
%tmp72 = fmul reassoc nsz arcp contract float 0.000000e+00, 0.000000e+00
124+
br i1 false, label %bb73, label %bb84
125+
126+
bb73: ; preds = %bb66
127+
br label %bb84
128+
129+
bb74: ; No predecessors!
130+
br label %bb84
131+
132+
bb75: ; No predecessors!
133+
br label %bb84
134+
135+
bb76: ; No predecessors!
136+
%tmp77 = fmul float %tmp72, %tmp72
137+
%tmp78 = fmul float %tmp72, %tmp72
138+
%tmp79 = fmul float %tmp77, %tmp78
139+
%tmp80 = fmul float %tmp79, 0.000000e+00
140+
%tmp81 = fadd float 0.000000e+00, %tmp80
141+
%tmp82 = fadd float %tmp81, 0.000000e+00
142+
%tmp83 = fdiv float %tmp82, 0.000000e+00
143+
br label %bb84
144+
145+
bb84: ; preds = %bb76, %bb75, %bb74, %bb73, %bb66
146+
%tmp85 = phi float [ %tmp83, %bb76 ], [ 0.000000e+00, %bb75 ], [ 0.000000e+00, %bb74 ], [ 0.000000e+00, %bb66 ], [ 0.000000e+00, %bb73 ]
147+
%tmp86 = fmul reassoc nsz arcp contract float %tmp85, 0.000000e+00
148+
%tmp87 = fadd reassoc nsz arcp contract float %tmp86, 0.000000e+00
149+
%tmp88 = fmul reassoc nsz arcp contract float %tmp87, 0.000000e+00
150+
%tmp89 = insertelement <8 x float> %tmp71, float %tmp88, i64 0
151+
%tmp90 = fmul reassoc nsz arcp contract float 0.000000e+00, 0.000000e+00
152+
br i1 false, label %bb91, label %bb102
153+
154+
bb91: ; preds = %bb84
155+
br label %bb102
156+
157+
bb92: ; No predecessors!
158+
br label %bb102
159+
160+
bb93: ; No predecessors!
161+
br label %bb102
162+
163+
bb94: ; No predecessors!
164+
%tmp95 = fmul float %tmp90, %tmp90
165+
%tmp96 = fmul float %tmp90, %tmp90
166+
%tmp97 = fmul float %tmp95, %tmp96
167+
%tmp98 = fmul float %tmp97, 0.000000e+00
168+
%tmp99 = fadd float 0.000000e+00, %tmp98
169+
%tmp100 = fadd float %tmp99, 0.000000e+00
170+
%tmp101 = fdiv float %tmp100, 0.000000e+00
171+
br label %bb102
172+
173+
bb102: ; preds = %bb94, %bb93, %bb92, %bb91, %bb84
174+
%tmp103 = phi float [ %tmp101, %bb94 ], [ 0.000000e+00, %bb93 ], [ 0.000000e+00, %bb92 ], [ 0.000000e+00, %bb84 ], [ 0.000000e+00, %bb91 ]
175+
%tmp104 = fmul reassoc nsz arcp contract float %tmp103, 0.000000e+00
176+
%tmp105 = fadd reassoc nsz arcp contract float %tmp104, 0.000000e+00
177+
%tmp106 = fmul reassoc nsz arcp contract float %tmp105, 0.000000e+00
178+
%tmp107 = insertelement <8 x float> %tmp89, float %tmp106, i64 0
179+
%tmp108 = fmul reassoc nsz arcp contract float 0.000000e+00, 0.000000e+00
180+
br i1 false, label %bb109, label %bb120
181+
182+
bb109: ; preds = %bb102
183+
br label %bb120
184+
185+
bb110: ; No predecessors!
186+
br label %bb120
187+
188+
bb111: ; No predecessors!
189+
br label %bb120
190+
191+
bb112: ; No predecessors!
192+
%tmp113 = fmul float %tmp108, %tmp108
193+
%tmp114 = fmul float %tmp108, %tmp108
194+
%tmp115 = fmul float %tmp113, %tmp114
195+
%tmp116 = fmul float %tmp115, 0.000000e+00
196+
%tmp117 = fadd float 0.000000e+00, %tmp116
197+
%tmp118 = fadd float %tmp117, 0.000000e+00
198+
%tmp119 = fdiv float %tmp118, 0.000000e+00
199+
br label %bb120
200+
201+
bb120: ; preds = %bb112, %bb111, %bb110, %bb109, %bb102
202+
%tmp121 = phi float [ %tmp119, %bb112 ], [ 0.000000e+00, %bb111 ], [ 0.000000e+00, %bb110 ], [ 0.000000e+00, %bb102 ], [ 0.000000e+00, %bb109 ]
203+
%tmp122 = fmul reassoc nsz arcp contract float %tmp121, 0.000000e+00
204+
%tmp123 = fadd reassoc nsz arcp contract float %tmp122, 0.000000e+00
205+
%tmp124 = fmul reassoc nsz arcp contract float %tmp123, 0.000000e+00
206+
%tmp125 = insertelement <8 x float> %tmp107, float %tmp124, i64 0
207+
%tmp126 = fmul reassoc nsz arcp contract float 0.000000e+00, 0.000000e+00
208+
br i1 false, label %bb127, label %bb138
209+
210+
bb127: ; preds = %bb120
211+
br label %bb138
212+
213+
bb128: ; No predecessors!
214+
br label %bb138
215+
216+
bb129: ; No predecessors!
217+
br label %bb138
218+
219+
bb130: ; No predecessors!
220+
%tmp131 = fmul float %tmp126, %tmp126
221+
%tmp132 = fmul float %tmp126, %tmp126
222+
%tmp133 = fmul float %tmp131, %tmp132
223+
%tmp134 = fmul float %tmp133, 0.000000e+00
224+
%tmp135 = fadd float 0.000000e+00, %tmp134
225+
%tmp136 = fadd float %tmp135, 0.000000e+00
226+
%tmp137 = fdiv float %tmp136, 0.000000e+00
227+
br label %bb138
228+
229+
bb138: ; preds = %bb130, %bb129, %bb128, %bb127, %bb120
230+
%tmp139 = phi float [ %tmp137, %bb130 ], [ 0.000000e+00, %bb129 ], [ 0.000000e+00, %bb128 ], [ 0.000000e+00, %bb120 ], [ 0.000000e+00, %bb127 ]
231+
%tmp140 = fmul reassoc nsz arcp contract float %tmp139, 0.000000e+00
232+
%tmp141 = fadd reassoc nsz arcp contract float %tmp140, 0.000000e+00
233+
%tmp142 = fmul reassoc nsz arcp contract float %tmp141, 0.000000e+00
234+
%tmp143 = insertelement <8 x float> %tmp125, float %tmp142, i64 0
235+
%tmp144 = bitcast <8 x float> %tmp143 to <8 x i32>
236+
call void @llvm.genx.GenISA.LSC2DBlockWrite.v8i32(i64 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i1 false, i1 false, i32 0, <8 x i32> %tmp144)
237+
ret void
238+
}
239+
240+
; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn
241+
declare void @llvm.assume(i1 noundef) #0
242+
243+
declare <8 x float> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x float>, <8 x i16>, <8 x i32>, i32, i32, i32, i32, i1)
244+
245+
; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
246+
declare float @llvm.fma.f32(float, float, float) #1
247+
248+
declare <8 x i16> @llvm.genx.GenISA.LSC2DBlockRead.v8i16(i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i1, i1, i32)
249+
250+
declare <8 x i32> @llvm.genx.GenISA.LSC2DBlockRead.v8i32(i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i1, i1, i32)
251+
252+
declare void @llvm.genx.GenISA.LSC2DBlockWrite.v8i32(i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i1, i1, i32, <8 x i32>)
253+
254+
; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
255+
declare float @llvm.floor.f32(float) #1
256+
257+
; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
258+
declare float @llvm.exp2.f32(float) #1
259+
260+
attributes #0 = { inaccessiblememonly nofree nosync nounwind willreturn }
261+
attributes #1 = { nofree nosync nounwind readnone speculatable willreturn }
262+
263+
!igc.functions = !{}

0 commit comments

Comments
 (0)