diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 35ea468bb1a28..05fd87ed5807e 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7209,15 +7209,25 @@ static void addRuntimeUnrollDisableMetaData(Loop *L) { } } -// If \p R is a ComputeReductionResult when vectorizing the epilog loop, -// fix the reduction's scalar PHI node by adding the incoming value from the -// main vector loop. +static Value *getStartValueFromReductionResult(VPInstruction *RdxResult) { + using namespace VPlanPatternMatch; + assert(RdxResult->getOpcode() == VPInstruction::ComputeFindLastIVResult && + "RdxResult must be ComputeFindLastIVResult"); + VPValue *StartVPV = RdxResult->getOperand(1); + match(StartVPV, m_Freeze(m_VPValue(StartVPV))); + return StartVPV->getLiveInIRValue(); +} + +// If \p R is a Compute{Reduction,AnyOf,FindLastIV}Result when vectorizing the +// epilog loop, fix the reduction's scalar PHI node by adding the incoming value +// from the main vector loop. static void fixReductionScalarResumeWhenVectorizingEpilog( VPRecipeBase *R, VPTransformState &State, BasicBlock *LoopMiddleBlock, BasicBlock *BypassBlock) { auto *EpiRedResult = dyn_cast(R); if (!EpiRedResult || - (EpiRedResult->getOpcode() != VPInstruction::ComputeReductionResult && + (EpiRedResult->getOpcode() != VPInstruction::ComputeAnyOfResult && + EpiRedResult->getOpcode() != VPInstruction::ComputeReductionResult && EpiRedResult->getOpcode() != VPInstruction::ComputeFindLastIVResult)) return; @@ -7229,15 +7239,18 @@ static void fixReductionScalarResumeWhenVectorizingEpilog( EpiRedHeaderPhi->getStartValue()->getUnderlyingValue(); if (RecurrenceDescriptor::isAnyOfRecurrenceKind( RdxDesc.getRecurrenceKind())) { + Value *StartV = EpiRedResult->getOperand(1)->getLiveInIRValue(); + (void)StartV; auto *Cmp = cast(MainResumeValue); assert(Cmp->getPredicate() == CmpInst::ICMP_NE && "AnyOf expected to start with ICMP_NE"); - assert(Cmp->getOperand(1) == RdxDesc.getRecurrenceStartValue() && + assert(Cmp->getOperand(1) == StartV && "AnyOf expected to start by comparing main resume value to original " "start value"); MainResumeValue = Cmp->getOperand(0); } else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind( RdxDesc.getRecurrenceKind())) { + Value *StartV = getStartValueFromReductionResult(EpiRedResult); using namespace llvm::PatternMatch; Value *Cmp, *OrigResumeV, *CmpOp; bool IsExpectedPattern = @@ -7246,10 +7259,7 @@ static void fixReductionScalarResumeWhenVectorizingEpilog( m_Value(OrigResumeV))) && (match(Cmp, m_SpecificICmp(ICmpInst::ICMP_EQ, m_Specific(OrigResumeV), m_Value(CmpOp))) && - (match(CmpOp, - m_Freeze(m_Specific(RdxDesc.getRecurrenceStartValue()))) || - (CmpOp == RdxDesc.getRecurrenceStartValue() && - isGuaranteedNotToBeUndefOrPoison(CmpOp)))); + ((CmpOp == StartV && isGuaranteedNotToBeUndefOrPoison(CmpOp)))); assert(IsExpectedPattern && "Unexpected reduction resume pattern"); (void)IsExpectedPattern; MainResumeValue = OrigResumeV; @@ -9184,6 +9194,8 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( OrigExitingVPV->replaceUsesWithIf(NewExitingVPV, [](VPUser &U, unsigned) { return isa(&U) && (cast(&U)->getOpcode() == + VPInstruction::ComputeAnyOfResult || + cast(&U)->getOpcode() == VPInstruction::ComputeReductionResult || cast(&U)->getOpcode() == VPInstruction::ComputeFindLastIVResult); @@ -9236,6 +9248,12 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( FinalReductionResult = Builder.createNaryOp(VPInstruction::ComputeFindLastIVResult, {PhiR, Start, NewExitingVPV}, ExitDL); + } else if (RecurrenceDescriptor::isAnyOfRecurrenceKind( + RdxDesc.getRecurrenceKind())) { + VPValue *Start = PhiR->getStartValue(); + FinalReductionResult = + Builder.createNaryOp(VPInstruction::ComputeAnyOfResult, + {PhiR, Start, NewExitingVPV}, ExitDL); } else { VPIRFlags Flags = RecurrenceDescriptor::isFloatingPointRecurrenceKind( RdxDesc.getRecurrenceKind()) @@ -9764,23 +9782,37 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L, Value *ResumeV = nullptr; // TODO: Move setting of resume values to prepareToExecute. if (auto *ReductionPhi = dyn_cast(&R)) { + auto *RdxResult = + cast(*find_if(ReductionPhi->users(), [](VPUser *U) { + auto *VPI = dyn_cast(U); + return VPI && + (VPI->getOpcode() == VPInstruction::ComputeAnyOfResult || + VPI->getOpcode() == VPInstruction::ComputeReductionResult || + VPI->getOpcode() == VPInstruction::ComputeFindLastIVResult); + })); ResumeV = cast(ReductionPhi->getUnderlyingInstr()) ->getIncomingValueForBlock(L->getLoopPreheader()); const RecurrenceDescriptor &RdxDesc = ReductionPhi->getRecurrenceDescriptor(); RecurKind RK = RdxDesc.getRecurrenceKind(); if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) { + Value *StartV = RdxResult->getOperand(1)->getLiveInIRValue(); + assert(RdxDesc.getRecurrenceStartValue() == StartV && + "start value from ComputeAnyOfResult must match"); + // VPReductionPHIRecipes for AnyOf reductions expect a boolean as // start value; compare the final value from the main vector loop // to the start value. BasicBlock *PBB = cast(ResumeV)->getParent(); IRBuilder<> Builder(PBB, PBB->getFirstNonPHIIt()); - ResumeV = - Builder.CreateICmpNE(ResumeV, RdxDesc.getRecurrenceStartValue()); + ResumeV = Builder.CreateICmpNE(ResumeV, StartV); } else if (RecurrenceDescriptor::isFindLastIVRecurrenceKind(RK)) { - ToFrozen[RdxDesc.getRecurrenceStartValue()] = - cast(ResumeV)->getIncomingValueForBlock( - EPI.MainLoopIterationCountCheck); + Value *StartV = getStartValueFromReductionResult(RdxResult); + assert(RdxDesc.getRecurrenceStartValue() == StartV && + "start value from ComputeFindLastIVResult must match"); + + ToFrozen[StartV] = cast(ResumeV)->getIncomingValueForBlock( + EPI.MainLoopIterationCountCheck); // VPReductionPHIRecipe for FindLastIV reductions requires an adjustment // to the resume value. The resume value is adjusted to the sentinel @@ -9790,8 +9822,7 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L, // variable. BasicBlock *ResumeBB = cast(ResumeV)->getParent(); IRBuilder<> Builder(ResumeBB, ResumeBB->getFirstNonPHIIt()); - Value *Cmp = Builder.CreateICmpEQ( - ResumeV, ToFrozen[RdxDesc.getRecurrenceStartValue()]); + Value *Cmp = Builder.CreateICmpEQ(ResumeV, ToFrozen[StartV]); ResumeV = Builder.CreateSelect(Cmp, RdxDesc.getSentinelValue(), ResumeV); } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 44f0b6d964a6e..273df55188c16 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -907,6 +907,7 @@ class VPInstruction : public VPRecipeWithIRFlags, BranchOnCount, BranchOnCond, Broadcast, + ComputeAnyOfResult, ComputeFindLastIVResult, ComputeReductionResult, // Extracts the last lane from its operand if it is a vector, or the last diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp index e028497249f2e..81fc93bbf51fd 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp @@ -89,6 +89,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) { inferScalarType(R->getOperand(1)) && "different types inferred for different operands"); return IntegerType::get(Ctx, 1); + case VPInstruction::ComputeAnyOfResult: case VPInstruction::ComputeFindLastIVResult: case VPInstruction::ComputeReductionResult: { auto *PhiR = cast(R->getOperand(0)); diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h index f2a7f16e19a79..dfd9fc3d4d719 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h +++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h @@ -318,6 +318,12 @@ m_VPInstruction(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2) { {Op0, Op1, Op2}); } +template +inline UnaryVPInstruction_match +m_Freeze(const Op0_t &Op0) { + return m_VPInstruction(Op0); +} + template inline UnaryVPInstruction_match m_Not(const Op0_t &Op0) { diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index a4831ea7c11f7..2aa5dd1b48c00 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -604,6 +604,20 @@ Value *VPInstruction::generate(VPTransformState &State) { return Builder.CreateVectorSplat( State.VF, State.get(getOperand(0), /*IsScalar*/ true), "broadcast"); } + case VPInstruction::ComputeAnyOfResult: { + // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary + // and will be removed by breaking up the recipe further. + auto *PhiR = cast(getOperand(0)); + auto *OrigPhi = cast(PhiR->getUnderlyingValue()); + Value *ReducedPartRdx = State.get(getOperand(2)); + for (unsigned Idx = 3; Idx < getNumOperands(); ++Idx) + ReducedPartRdx = Builder.CreateBinOp( + (Instruction::BinaryOps)RecurrenceDescriptor::getOpcode( + RecurKind::AnyOf), + State.get(getOperand(Idx)), ReducedPartRdx, "bin.rdx"); + return createAnyOfReduction(Builder, ReducedPartRdx, + State.get(getOperand(1), VPLane(0)), OrigPhi); + } case VPInstruction::ComputeFindLastIVResult: { // FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary // and will be removed by breaking up the recipe further. @@ -681,18 +695,11 @@ Value *VPInstruction::generate(VPTransformState &State) { // Create the reduction after the loop. Note that inloop reductions create // the target reduction in the loop using a Reduction recipe. - if ((State.VF.isVector() || - RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) && - !PhiR->isInLoop()) { + if (State.VF.isVector() && !PhiR->isInLoop()) { // TODO: Support in-order reductions based on the recurrence descriptor. // All ops in the reduction inherit fast-math-flags from the recurrence // descriptor. - if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK)) - ReducedPartRdx = - createAnyOfReduction(Builder, ReducedPartRdx, - RdxDesc.getRecurrenceStartValue(), OrigPhi); - else - ReducedPartRdx = createSimpleReduction(Builder, ReducedPartRdx, RK); + ReducedPartRdx = createSimpleReduction(Builder, ReducedPartRdx, RK); // If the reduction can be performed in a smaller type, we need to extend // the reduction to the wider type before we branch to the original loop. @@ -830,6 +837,7 @@ bool VPInstruction::isVectorToScalar() const { getOpcode() == VPInstruction::ExtractPenultimateElement || getOpcode() == Instruction::ExtractElement || getOpcode() == VPInstruction::FirstActiveLane || + getOpcode() == VPInstruction::ComputeAnyOfResult || getOpcode() == VPInstruction::ComputeFindLastIVResult || getOpcode() == VPInstruction::ComputeReductionResult || getOpcode() == VPInstruction::AnyOf; @@ -925,6 +933,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { return true; case VPInstruction::PtrAdd: return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this); + case VPInstruction::ComputeAnyOfResult: case VPInstruction::ComputeFindLastIVResult: return Op == getOperand(1); }; @@ -1005,6 +1014,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, case VPInstruction::ExtractPenultimateElement: O << "extract-penultimate-element"; break; + case VPInstruction::ComputeAnyOfResult: + O << "compute-anyof-result"; + break; case VPInstruction::ComputeFindLastIVResult: O << "compute-find-last-iv-result"; break; diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp index e1fb3d476c58d..335301a927ceb 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp @@ -327,7 +327,9 @@ void UnrollState::unrollBlock(VPBlockBase *VPB) { // Add all VPValues for all parts to ComputeReductionResult which combines // the parts to compute the final reduction value. VPValue *Op1; - if (match(&R, m_VPInstruction( + if (match(&R, m_VPInstruction( + m_VPValue(), m_VPValue(), m_VPValue(Op1))) || + match(&R, m_VPInstruction( m_VPValue(), m_VPValue(Op1))) || match(&R, m_VPInstruction( m_VPValue(), m_VPValue(), m_VPValue(Op1)))) {