Skip to content

Commit 70f4ecd

Browse files
committed
[VPlan] Add ReductionStartVector VPInstruction.
1 parent f2bbca0 commit 70f4ecd

20 files changed

+168
-115
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7235,8 +7235,14 @@ static void fixReductionScalarResumeWhenVectorizingEpilog(
72357235
cast<VPReductionPHIRecipe>(EpiRedResult->getOperand(0));
72367236
const RecurrenceDescriptor &RdxDesc =
72377237
EpiRedHeaderPhi->getRecurrenceDescriptor();
7238-
Value *MainResumeValue =
7239-
EpiRedHeaderPhi->getStartValue()->getUnderlyingValue();
7238+
Value *MainResumeValue;
7239+
if (auto *VPI = dyn_cast<VPInstruction>(EpiRedHeaderPhi->getStartValue())) {
7240+
assert((VPI->getOpcode() == VPInstruction::Broadcast ||
7241+
VPI->getOpcode() == VPInstruction::ReductionStartVector) &&
7242+
"unexpected start recipe");
7243+
MainResumeValue = VPI->getOperand(0)->getUnderlyingValue();
7244+
} else
7245+
MainResumeValue = EpiRedHeaderPhi->getStartValue()->getUnderlyingValue();
72407246
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(
72417247
RdxDesc.getRecurrenceKind())) {
72427248
Value *StartV = EpiRedResult->getOperand(1)->getLiveInIRValue();
@@ -9182,7 +9188,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
91829188
continue;
91839189

91849190
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
9185-
Type *PhiTy = PhiR->getOperand(0)->getLiveInIRValue()->getType();
9191+
Type *PhiTy = PhiR->getUnderlyingValue()->getType();
91869192
// If tail is folded by masking, introduce selects between the phi
91879193
// and the users outside the vector region of each reduction, at the
91889194
// beginning of the dedicated latch block.
@@ -9325,6 +9331,27 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
93259331
// start value.
93269332
PhiR->setOperand(0, Plan->getOrAddLiveIn(RdxDesc.getSentinelValue()));
93279333
}
9334+
RecurKind RK = RdxDesc.getRecurrenceKind();
9335+
if ((!RecurrenceDescriptor::isAnyOfRecurrenceKind(RK) &&
9336+
!RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK) &&
9337+
!RecurrenceDescriptor::isMinMaxRecurrenceKind(RK))) {
9338+
VPBuilder PHBuilder(Plan->getVectorPreheader());
9339+
VPValue *Iden = Plan->getOrAddLiveIn(
9340+
getRecurrenceIdentity(RK, PhiTy, RdxDesc.getFastMathFlags()));
9341+
// If the PHI is used by a partial reduction, set the scale factor.
9342+
unsigned ScaleFactor =
9343+
RecipeBuilder.getScalingForReduction(RdxDesc.getLoopExitInstr())
9344+
.value_or(1);
9345+
Type *I32Ty = IntegerType::getInt32Ty(PhiTy->getContext());
9346+
auto *ScaleFactorVPV =
9347+
Plan->getOrAddLiveIn(ConstantInt::get(I32Ty, ScaleFactor));
9348+
VPValue *StartV = PHBuilder.createNaryOp(
9349+
VPInstruction::ReductionStartVector,
9350+
{PhiR->getStartValue(), Iden, ScaleFactorVPV},
9351+
PhiTy->isFloatingPointTy() ? RdxDesc.getFastMathFlags()
9352+
: FastMathFlags());
9353+
PhiR->setOperand(0, StartV);
9354+
}
93289355
}
93299356
for (VPRecipeBase *R : ToDelete)
93309357
R->eraseFromParent();

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -907,6 +907,10 @@ class VPInstruction : public VPRecipeWithIRFlags,
907907
BranchOnCount,
908908
BranchOnCond,
909909
Broadcast,
910+
/// Start vector for reductions with 3 operands: the original start value,
911+
/// the identity value for the reduction and an integer indicating the
912+
/// scaling factor.
913+
ReductionStartVector,
910914
ComputeAnyOfResult,
911915
ComputeFindLastIVResult,
912916
ComputeReductionResult,
@@ -2227,11 +2231,8 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
22272231
/// Returns true, if the phi is part of an in-loop reduction.
22282232
bool isInLoop() const { return IsInLoop; }
22292233

2230-
/// Returns true if the recipe only uses the first lane of operand \p Op.
22312234
bool onlyFirstLaneUsed(const VPValue *Op) const override {
2232-
assert(is_contained(operands(), Op) &&
2233-
"Op must be an operand of the recipe");
2234-
return Op == getStartValue();
2235+
return isOrdered() || isInLoop();
22352236
}
22362237
};
22372238

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,8 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
8989
inferScalarType(R->getOperand(1)) &&
9090
"different types inferred for different operands");
9191
return IntegerType::get(Ctx, 1);
92+
case VPInstruction::ReductionStartVector:
93+
return inferScalarType(R->getOperand(0));
9294
case VPInstruction::ComputeAnyOfResult:
9395
case VPInstruction::ComputeFindLastIVResult:
9496
case VPInstruction::ComputeReductionResult: {
@@ -395,6 +397,10 @@ static unsigned getVFScaleFactor(VPRecipeBase *R) {
395397
return RR->getVFScaleFactor();
396398
if (auto *RR = dyn_cast<VPPartialReductionRecipe>(R))
397399
return RR->getVFScaleFactor();
400+
if (auto *VPI = dyn_cast<VPInstruction>(R))
401+
assert(
402+
VPI->getOpcode() != VPInstruction::ReductionStartVector &&
403+
"getting scaling factor of reduction-start-vector not implemented yet");
398404
return 1;
399405
}
400406

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 28 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -604,6 +604,20 @@ Value *VPInstruction::generate(VPTransformState &State) {
604604
return Builder.CreateVectorSplat(
605605
State.VF, State.get(getOperand(0), /*IsScalar*/ true), "broadcast");
606606
}
607+
case VPInstruction::ReductionStartVector: {
608+
if (State.VF.isScalar())
609+
return State.get(getOperand(0), true);
610+
IRBuilderBase::FastMathFlagGuard FMFG(Builder);
611+
Builder.setFastMathFlags(getFastMathFlags());
612+
// If this start vector is scaled then it should produce a vector with fewer
613+
// elements than the VF.
614+
ElementCount VF = State.VF.divideCoefficientBy(
615+
cast<ConstantInt>(getOperand(2)->getLiveInIRValue())->getZExtValue());
616+
auto *Iden = Builder.CreateVectorSplat(VF, State.get(getOperand(1), true));
617+
Constant *Zero = Builder.getInt32(0);
618+
return Builder.CreateInsertElement(Iden, State.get(getOperand(0), true),
619+
Zero);
620+
}
607621
case VPInstruction::ComputeAnyOfResult: {
608622
// FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
609623
// and will be removed by breaking up the recipe further.
@@ -882,6 +896,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
882896
case VPInstruction::PtrAdd:
883897
case VPInstruction::WideIVStep:
884898
case VPInstruction::StepVector:
899+
case VPInstruction::ReductionStartVector:
885900
return false;
886901
default:
887902
return true;
@@ -912,6 +927,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
912927
case VPInstruction::CanonicalIVIncrementForPart:
913928
case VPInstruction::BranchOnCount:
914929
case VPInstruction::BranchOnCond:
930+
case VPInstruction::ReductionStartVector:
915931
return true;
916932
case VPInstruction::PtrAdd:
917933
return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this);
@@ -1017,6 +1033,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
10171033
case VPInstruction::FirstActiveLane:
10181034
O << "first-active-lane";
10191035
break;
1036+
case VPInstruction::ReductionStartVector:
1037+
O << "reduction-start-vector";
1038+
break;
10201039
default:
10211040
O << Instruction::getOpcodeName(getOpcode());
10221041
}
@@ -1601,6 +1620,7 @@ bool VPIRFlags::flagsValidForOpcode(unsigned Opcode) const {
16011620
Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
16021621
Opcode == Instruction::FCmp || Opcode == Instruction::Select ||
16031622
Opcode == VPInstruction::WideIVStep ||
1623+
Opcode == VPInstruction::ReductionStartVector ||
16041624
Opcode == VPInstruction::ComputeReductionResult;
16051625
case OperationType::NonNegOp:
16061626
return Opcode == Instruction::ZExt;
@@ -3831,17 +3851,19 @@ void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent,
38313851
#endif
38323852

38333853
void VPReductionPHIRecipe::execute(VPTransformState &State) {
3834-
// If this phi is fed by a scaled reduction then it should output a
3835-
// vector with fewer elements than the VF.
3836-
ElementCount VF = State.VF.divideCoefficientBy(VFScaleFactor);
3854+
// Reductions do not have to start at zero. They can start with
3855+
// any loop invariant values.
3856+
VPValue *StartVPV = getStartValue();
38373857

38383858
// In order to support recurrences we need to be able to vectorize Phi nodes.
38393859
// Phi nodes have cycles, so we need to vectorize them in two stages. This is
38403860
// stage #1: We create a new vector PHI node with no incoming edges. We'll use
38413861
// this value when we vectorize all of the instructions that use the PHI.
3842-
auto *ScalarTy = State.TypeAnalysis.inferScalarType(this);
3862+
BasicBlock *VectorPH =
3863+
State.CFG.VPBB2IRBB.at(getParent()->getCFGPredecessor(0));
38433864
bool ScalarPHI = State.VF.isScalar() || IsInLoop;
3844-
Type *VecTy = ScalarPHI ? ScalarTy : VectorType::get(ScalarTy, VF);
3865+
Value *StartV = State.get(StartVPV, ScalarPHI);
3866+
Type *VecTy = StartV->getType();
38453867

38463868
BasicBlock *HeaderBB = State.CFG.PrevBB;
38473869
assert(State.CurrentParentLoop->getHeader() == HeaderBB &&
@@ -3850,49 +3872,7 @@ void VPReductionPHIRecipe::execute(VPTransformState &State) {
38503872
Phi->insertBefore(HeaderBB->getFirstInsertionPt());
38513873
State.set(this, Phi, IsInLoop);
38523874

3853-
BasicBlock *VectorPH =
3854-
State.CFG.VPBB2IRBB.at(getParent()->getCFGPredecessor(0));
3855-
// Create start and identity vector values for the reduction in the preheader.
3856-
// TODO: Introduce recipes in VPlan preheader to create initial values.
3857-
IRBuilderBase::InsertPointGuard IPBuilder(State.Builder);
3858-
State.Builder.SetInsertPoint(VectorPH->getTerminator());
3859-
3860-
// Reductions do not have to start at zero. They can start with
3861-
// any loop invariant values.
3862-
VPValue *StartVPV = getStartValue();
3863-
RecurKind RK = RdxDesc.getRecurrenceKind();
3864-
if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) ||
3865-
RecurrenceDescriptor::isAnyOfRecurrenceKind(RK) ||
3866-
RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) {
3867-
// [I|F]FindLastIV will use a sentinel value to initialize the reduction
3868-
// phi or the resume value from the main vector loop when vectorizing the
3869-
// epilogue loop. In the exit block, ComputeReductionResult will generate
3870-
// checks to verify if the reduction result is the sentinel value. If the
3871-
// result is the sentinel value, it will be corrected back to the start
3872-
// value.
3873-
// TODO: The sentinel value is not always necessary. When the start value is
3874-
// a constant, and smaller than the start value of the induction variable,
3875-
// the start value can be directly used to initialize the reduction phi.
3876-
Phi->addIncoming(State.get(StartVPV, ScalarPHI), VectorPH);
3877-
return;
3878-
}
3879-
3880-
Value *Iden = getRecurrenceIdentity(RK, VecTy->getScalarType(),
3881-
RdxDesc.getFastMathFlags());
3882-
unsigned CurrentPart = getUnrollPart(*this);
3883-
Value *StartV = StartVPV->getLiveInIRValue();
3884-
if (!ScalarPHI) {
3885-
if (CurrentPart == 0) {
3886-
Iden = State.Builder.CreateVectorSplat(VF, Iden);
3887-
Constant *Zero = State.Builder.getInt32(0);
3888-
StartV = State.Builder.CreateInsertElement(Iden, StartV, Zero);
3889-
} else {
3890-
Iden = State.Builder.CreateVectorSplat(VF, Iden);
3891-
}
3892-
}
3893-
3894-
Value *StartVal = (CurrentPart == 0) ? StartV : Iden;
3895-
Phi->addIncoming(StartVal, VectorPH);
3875+
Phi->addIncoming(StartV, VectorPH);
38963876
}
38973877

38983878
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1153,6 +1153,16 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
11531153
return;
11541154
}
11551155
}
1156+
// Simplify redundant ReductionStartVector recipes after unrolling.
1157+
VPValue *StartV;
1158+
if (match(Def, m_VPInstruction<VPInstruction::ReductionStartVector>(
1159+
m_VPValue(StartV), m_VPValue(), m_VPValue()))) {
1160+
Def->replaceUsesWithIf(StartV, [](const VPUser &U, unsigned Idx) {
1161+
auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&U);
1162+
return PhiR && PhiR->isInLoop();
1163+
});
1164+
return;
1165+
}
11561166
}
11571167

11581168
void VPlanTransforms::simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy) {

llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,22 @@ void UnrollState::unrollHeaderPHIByUF(VPHeaderPHIRecipe *R,
223223
Copy->addOperand(R);
224224
Copy->addOperand(getConstantVPV(Part));
225225
} else if (RdxPhi) {
226+
// If the start value is a ReductionStartVector, use the identity value
227+
// (second operand) for unrolled parts. If the scaling factor is > 1,
228+
// create a new ReductionStartVector with the scale factor and both
229+
// operands set to the identity value.
230+
231+
if (auto *VPI = dyn_cast<VPInstruction>(RdxPhi->getStartValue())) {
232+
if (cast<ConstantInt>(VPI->getOperand(2)->getLiveInIRValue())
233+
->getZExtValue() == 1) {
234+
Copy->setOperand(0, VPI->getOperand(1));
235+
} else if (Part == 1) {
236+
auto *C = VPI->clone();
237+
C->setOperand(0, C->getOperand(1));
238+
C->insertAfter(VPI);
239+
addUniformForAllParts(C);
240+
}
241+
}
226242
Copy->addOperand(getConstantVPV(Part));
227243
} else {
228244
assert(isa<VPActiveLaneMaskPHIRecipe>(R) &&

llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -60,16 +60,16 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) {
6060
; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i32 [[TMP2]], 8
6161
; CHECK-NEXT: [[N_VEC5:%.*]] = sub i32 [[TMP2]], [[N_MOD_VF4]]
6262
; CHECK-NEXT: [[TMP16:%.*]] = trunc i32 [[N_VEC5]] to i8
63-
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i8> poison, i8 [[BC_RESUME_VAL]], i64 0
63+
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i8> poison, i8 [[TMP15]], i64 0
6464
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i8> [[DOTSPLATINSERT]], <8 x i8> poison, <8 x i32> zeroinitializer
65-
; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i8> [[DOTSPLAT]], <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>
66-
; CHECK-NEXT: [[DOTSPLATINSERT10:%.*]] = insertelement <8 x i8> poison, i8 [[TMP15]], i64 0
65+
; CHECK-NEXT: [[DOTSPLATINSERT10:%.*]] = insertelement <8 x i8> poison, i8 [[BC_RESUME_VAL]], i64 0
6766
; CHECK-NEXT: [[DOTSPLAT11:%.*]] = shufflevector <8 x i8> [[DOTSPLATINSERT10]], <8 x i8> poison, <8 x i32> zeroinitializer
67+
; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i8> [[DOTSPLAT11]], <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>
6868
; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
6969
; CHECK: [[VEC_EPILOG_VECTOR_BODY]]:
7070
; CHECK-NEXT: [[INDEX6:%.*]] = phi i32 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT13:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
7171
; CHECK-NEXT: [[VEC_IND7:%.*]] = phi <8 x i8> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT8:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
72-
; CHECK-NEXT: [[VEC_PHI9:%.*]] = phi <8 x i8> [ [[DOTSPLAT11]], %[[VEC_EPILOG_PH]] ], [ [[TMP20:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
72+
; CHECK-NEXT: [[VEC_PHI9:%.*]] = phi <8 x i8> [ [[DOTSPLAT]], %[[VEC_EPILOG_PH]] ], [ [[TMP20:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
7373
; CHECK-NEXT: [[IV:%.*]] = trunc i32 [[INDEX6]] to i8
7474
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[A]], i8 [[IV]]
7575
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[GEP]], i32 0
@@ -87,12 +87,12 @@ define i8 @select_icmp_var_start(ptr %a, i8 %n, i8 %start) {
8787
; CHECK-NEXT: [[CMP_N16:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC5]]
8888
; CHECK-NEXT: br i1 [[CMP_N16]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
8989
; CHECK: [[VEC_EPILOG_SCALAR_PH]]:
90-
; CHECK-NEXT: [[BC_RESUME_VAL17:%.*]] = phi i8 [ [[TMP16]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
91-
; CHECK-NEXT: [[BC_MERGE_RDX18:%.*]] = phi i8 [ [[RDX_SELECT15]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], %[[ITER_CHECK]] ]
90+
; CHECK-NEXT: [[BC_RESUME_VAL15:%.*]] = phi i8 [ [[TMP16]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
91+
; CHECK-NEXT: [[BC_MERGE_RDX16:%.*]] = phi i8 [ [[RDX_SELECT15]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[RDX_SELECT]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], %[[ITER_CHECK]] ]
9292
; CHECK-NEXT: br label %[[LOOP:.*]]
9393
; CHECK: [[LOOP]]:
94-
; CHECK-NEXT: [[IV1:%.*]] = phi i8 [ [[BC_RESUME_VAL17]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
95-
; CHECK-NEXT: [[RDX:%.*]] = phi i8 [ [[BC_MERGE_RDX18]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
94+
; CHECK-NEXT: [[IV1:%.*]] = phi i8 [ [[BC_RESUME_VAL15]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
95+
; CHECK-NEXT: [[RDX:%.*]] = phi i8 [ [[BC_MERGE_RDX16]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
9696
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, ptr [[A]], i8 [[IV1]]
9797
; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[GEP1]], align 8
9898
; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[L]], 3

llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-epilogue.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,8 +161,8 @@ define void @dotp_small_epilogue_vf(i64 %idx.neg, i8 %a) #1 {
161161
; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i8> poison, i8 [[A]], i64 0
162162
; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT6]], <4 x i8> poison, <4 x i32> zeroinitializer
163163
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[IDX_NEG]], [[N_VEC5]]
164-
; CHECK-NEXT: [[TMP8:%.*]] = sext <4 x i8> [[BROADCAST_SPLAT7]] to <4 x i32>
165164
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[BC_MERGE_RDX]], i32 0
165+
; CHECK-NEXT: [[TMP8:%.*]] = sext <4 x i8> [[BROADCAST_SPLAT7]] to <4 x i32>
166166
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
167167
; CHECK: vec.epilog.vector.body:
168168
; CHECK-NEXT: [[INDEX9:%.*]] = phi i64 [ [[IV]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT14:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]

llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,13 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) {
1717
; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
1818
; CHECK-EMPTY:
1919
; CHECK-NEXT: vector.ph:
20+
; CHECK-NEXT: EMIT vp<[[RDX_START:%.+]]> = reduction-start-vector ir<0>, ir<0>, ir<4>
2021
; CHECK-NEXT: Successor(s): vector loop
2122
; CHECK-EMPTY:
2223
; CHECK-NEXT: <x1> vector loop: {
2324
; CHECK-NEXT: vector.body:
2425
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]>
25-
; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[ACC:%.+]]> = phi ir<0>, ir<[[REDUCE:%.+]]> (VF scaled by 1/4)
26+
; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[ACC:%.+]]> = phi vp<[[RDX_START]]>, ir<[[REDUCE:%.+]]> (VF scaled by 1/4)
2627
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]>
2728
; CHECK-NEXT: CLONE ir<%gep.a> = getelementptr ir<%a>, vp<[[STEPS]]>
2829
; CHECK-NEXT: vp<[[PTR_A:%.+]]> = vector-pointer ir<%gep.a>
@@ -83,11 +84,12 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) {
8384
; CHECK-NEXT: Successor(s): ir-bb<scalar.ph>, ir-bb<vector.ph>
8485
; CHECK-EMPTY:
8586
; CHECK-NEXT: ir-bb<vector.ph>:
87+
; CHECK-NEXT: EMIT vp<[[RDX_START:%.+]]> = reduction-start-vector ir<0>, ir<0>, ir<4>
8688
; CHECK-NEXT: Successor(s): vector.body
8789
; CHECK-EMPTY:
8890
; CHECK-NEXT: vector.body:
8991
; CHECK-NEXT: EMIT-SCALAR vp<[[EP_IV:%.+]]> = phi [ ir<0>, ir-bb<vector.ph> ], [ vp<%index.next>, vector.body ]
90-
; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%accum> = phi ir<0>, ir<%add> (VF scaled by 1/4)
92+
; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%accum> = phi vp<[[RDX_START]]>, ir<%add> (VF scaled by 1/4)
9193
; CHECK-NEXT: CLONE ir<%gep.a> = getelementptr ir<%a>, vp<[[EP_IV]]>
9294
; CHECK-NEXT: vp<[[PTR_A:%.+]]> = vector-pointer ir<%gep.a>
9395
; CHECK-NEXT: WIDEN ir<%load.a> = load vp<[[PTR_A]]>

llvm/test/Transforms/LoopVectorize/PowerPC/exit-branch-cost.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,10 +153,10 @@ define i1 @select_exit_cond(ptr %start, ptr %end, i64 %N) {
153153
; CHECK-NEXT: [[N_MOD_VF24:%.*]] = urem i64 [[TMP2]], 2
154154
; CHECK-NEXT: [[N_VEC25:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF24]]
155155
; CHECK-NEXT: [[TMP56:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC25]]
156+
; CHECK-NEXT: [[TMP57:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BC_MERGE_RDX]], i32 0
156157
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[VEC_EPILOG_RESUME_VAL]], i64 0
157158
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
158159
; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i64> [[DOTSPLAT]], <i64 0, i64 1>
159-
; CHECK-NEXT: [[TMP57:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BC_MERGE_RDX]], i32 0
160160
; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
161161
; CHECK: [[VEC_EPILOG_VECTOR_BODY]]:
162162
; CHECK-NEXT: [[INDEX38:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT32:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]

0 commit comments

Comments
 (0)