[llvm] [VPlan] Update final exit value via VPlan. (PR #112147)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Dec 29 12:16:49 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Florian Hahn (fhahn)
<details>
<summary>Changes</summary>
Model updating IV users directly in VPlan, replace fixupIVUsers.
This completes the transition towards modeling all live-outs directly in VPlan.
Depends on https://github.com/llvm/llvm-project/pull/110004,
https://github.com/llvm/llvm-project/pull/109975 and
https://github.com/llvm/llvm-project/pull/112145.
---
Patch is 21.85 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/112147.diff
5 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+98-149)
- (modified) llvm/lib/Transforms/Vectorize/VPlan.cpp (+14-10)
- (modified) llvm/lib/Transforms/Vectorize/VPlan.h (+5)
- (modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll (-2)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 1975df3cacbcae..e95797591bb297 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -542,11 +542,6 @@ class InnerLoopVectorizer {
protected:
friend class LoopVectorizationPlanner;
- /// Set up the values of the IVs correctly when exiting the vector loop.
- virtual void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II,
- Value *VectorTripCount, BasicBlock *MiddleBlock,
- VPTransformState &State);
-
/// Iteratively sink the scalarized operands of a predicated instruction into
/// the block that was created for it.
void sinkScalarOperands(Instruction *PredInst);
@@ -775,10 +770,6 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
BasicBlock *emitIterationCountCheck(BasicBlock *Bypass, bool ForEpilogue);
void printDebugTracesAtStart() override;
void printDebugTracesAtEnd() override;
-
- void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II,
- Value *VectorTripCount, BasicBlock *MiddleBlock,
- VPTransformState &State) override {};
};
// A specialized derived class of inner loop vectorizer that performs
@@ -2751,97 +2742,6 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton(
return LoopVectorPreHeader;
}
-// Fix up external users of the induction variable. At this point, we are
-// in LCSSA form, with all external PHIs that use the IV having one input value,
-// coming from the remainder loop. We need those PHIs to also have a correct
-// value for the IV when arriving directly from the middle block.
-void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
- const InductionDescriptor &II,
- Value *VectorTripCount,
- BasicBlock *MiddleBlock,
- VPTransformState &State) {
- // There are two kinds of external IV usages - those that use the value
- // computed in the last iteration (the PHI) and those that use the penultimate
- // value (the value that feeds into the phi from the loop latch).
- // We allow both, but they, obviously, have different values.
-
- DenseMap<Value *, Value *> MissingVals;
-
- Value *EndValue = cast<PHINode>(OrigPhi->getIncomingValueForBlock(
- OrigLoop->getLoopPreheader()))
- ->getIncomingValueForBlock(MiddleBlock);
-
- // An external user of the last iteration's value should see the value that
- // the remainder loop uses to initialize its own IV.
- Value *PostInc = OrigPhi->getIncomingValueForBlock(OrigLoop->getLoopLatch());
- for (User *U : PostInc->users()) {
- Instruction *UI = cast<Instruction>(U);
- if (!OrigLoop->contains(UI)) {
- assert(isa<PHINode>(UI) && "Expected LCSSA form");
- MissingVals[UI] = EndValue;
- }
- }
-
- // An external user of the penultimate value need to see EndValue - Step.
- // The simplest way to get this is to recompute it from the constituent SCEVs,
- // that is Start + (Step * (CRD - 1)).
- for (User *U : OrigPhi->users()) {
- auto *UI = cast<Instruction>(U);
- if (!OrigLoop->contains(UI)) {
- assert(isa<PHINode>(UI) && "Expected LCSSA form");
- IRBuilder<> B(MiddleBlock->getTerminator());
-
- // Fast-math-flags propagate from the original induction instruction.
- if (isa_and_nonnull<FPMathOperator>(II.getInductionBinOp()))
- B.setFastMathFlags(II.getInductionBinOp()->getFastMathFlags());
-
- VPValue *StepVPV = Plan.getSCEVExpansion(II.getStep());
- assert(StepVPV && "step must have been expanded during VPlan execution");
- Value *Step = StepVPV->isLiveIn() ? StepVPV->getLiveInIRValue()
- : State.get(StepVPV, VPLane(0));
- Value *Escape = nullptr;
- if (EndValue->getType()->isIntegerTy())
- Escape = B.CreateSub(EndValue, Step);
- else if (EndValue->getType()->isPointerTy())
- Escape = B.CreatePtrAdd(EndValue, B.CreateNeg(Step));
- else {
- assert(EndValue->getType()->isFloatingPointTy() &&
- "Unexpected induction type");
- Escape = B.CreateBinOp(II.getInductionBinOp()->getOpcode() ==
- Instruction::FAdd
- ? Instruction::FSub
- : Instruction::FAdd,
- EndValue, Step);
- }
- Escape->setName("ind.escape");
- MissingVals[UI] = Escape;
- }
- }
-
- assert((MissingVals.empty() ||
- all_of(MissingVals,
- [MiddleBlock, this](const std::pair<Value *, Value *> &P) {
- return all_of(
- predecessors(cast<Instruction>(P.first)->getParent()),
- [MiddleBlock, this](BasicBlock *Pred) {
- return Pred == MiddleBlock ||
- Pred == OrigLoop->getLoopLatch();
- });
- })) &&
- "Expected escaping values from latch/middle.block only");
-
- for (auto &I : MissingVals) {
- PHINode *PHI = cast<PHINode>(I.first);
- // One corner case we have to handle is two IVs "chasing" each-other,
- // that is %IV2 = phi [...], [ %IV1, %latch ]
- // In this case, if IV1 has an external use, we need to avoid adding both
- // "last value of IV1" and "penultimate value of IV2". So, verify that we
- // don't already have an incoming value for the middle block.
- if (PHI->getBasicBlockIndex(MiddleBlock) == -1)
- PHI->addIncoming(I.second, MiddleBlock);
- }
-}
-
namespace {
struct CSEDenseMapInfo {
@@ -2986,24 +2886,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
for (PHINode &PN : Exit->phis())
PSE.getSE()->forgetLcssaPhiWithNewPredecessor(OrigLoop, &PN);
- if (Cost->requiresScalarEpilogue(VF.isVector())) {
- // No edge from the middle block to the unique exit block has been inserted
- // and there is nothing to fix from vector loop; phis should have incoming
- // from scalar loop only.
- } else {
- // TODO: Check in VPlan to see if IV users need fixing instead of checking
- // the cost model.
-
- // If we inserted an edge from the middle block to the unique exit block,
- // update uses outside the loop (phis) to account for the newly inserted
- // edge.
-
- // Fix-up external users of the induction variables.
- for (const auto &Entry : Legal->getInductionVars())
- fixupIVUsers(Entry.first, Entry.second,
- getOrCreateVectorTripCount(nullptr), LoopMiddleBlock, State);
- }
-
for (Instruction *PI : PredicatedInstructions)
sinkScalarOperands(&*PI);
@@ -8857,11 +8739,10 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
/// Create and return a ResumePhi for \p WideIV, unless it is truncated. If the
/// induction recipe is not canonical, creates a VPDerivedIVRecipe to compute
/// the end value of the induction.
-static VPValue *addResumePhiRecipeForInduction(VPWidenInductionRecipe *WideIV,
- VPBuilder &VectorPHBuilder,
- VPBuilder &ScalarPHBuilder,
- VPTypeAnalysis &TypeInfo,
- VPValue *VectorTC) {
+static VPValue *addResumePhiRecipeForInduction(
+ VPWidenInductionRecipe *WideIV, VPBuilder &VectorPHBuilder,
+ VPBuilder &ScalarPHBuilder, VPTypeAnalysis &TypeInfo, VPValue *VectorTC,
+ DenseMap<VPValue *, VPValue *> &EndValues) {
auto *WideIntOrFp = dyn_cast<VPWidenIntOrFpInductionRecipe>(WideIV);
// Truncated wide inductions resume from the last lane of their vector value
// in the last vector iteration which is handled elsewhere.
@@ -8886,6 +8767,7 @@ static VPValue *addResumePhiRecipeForInduction(VPWidenInductionRecipe *WideIV,
ScalarTypeOfWideIV);
}
+ EndValues[WideIV] = EndValue;
auto *ResumePhiRecipe =
ScalarPHBuilder.createNaryOp(VPInstruction::ResumePhi, {EndValue, Start},
WideIV->getDebugLoc(), "bc.resume.val");
@@ -8895,7 +8777,9 @@ static VPValue *addResumePhiRecipeForInduction(VPWidenInductionRecipe *WideIV,
/// Create resume phis in the scalar preheader for first-order recurrences,
/// reductions and inductions, and update the VPIRInstructions wrapping the
/// original phis in the scalar header.
-static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
+static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
+ Loop *OrigLoop,
+ DenseMap<VPValue *, VPValue *> &EndValues) {
VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
auto *ScalarPH = Plan.getScalarPreheader();
auto *MiddleVPBB = cast<VPBasicBlock>(ScalarPH->getSinglePredecessor());
@@ -8915,7 +8799,7 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
if (auto *WideIVR = dyn_cast<VPWidenInductionRecipe>(VectorPhiR)) {
if (VPValue *ResumePhi = addResumePhiRecipeForInduction(
WideIVR, VectorPHBuilder, ScalarPHBuilder, TypeInfo,
- &Plan.getVectorTripCount())) {
+ &Plan.getVectorTripCount(), EndValues)) {
ScalarPhiIRI->addOperand(ResumePhi);
continue;
}
@@ -8949,9 +8833,9 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
// modeled explicitly yet and won't be included. Those are un-truncated
// VPWidenIntOrFpInductionRecipe, VPWidenPointerInductionRecipe and induction
// increments.
-static SetVector<VPIRInstruction *> collectUsersInExitBlocks(
- Loop *OrigLoop, VPRecipeBuilder &Builder, VPlan &Plan,
- const MapVector<PHINode *, InductionDescriptor> &Inductions) {
+static SetVector<VPIRInstruction *>
+collectUsersInExitBlocks(Loop *OrigLoop, VPRecipeBuilder &Builder,
+ VPlan &Plan) {
auto *MiddleVPBB = Plan.getMiddleBlock();
SetVector<VPIRInstruction *> ExitUsersToFix;
for (VPIRBasicBlock *ExitVPBB : Plan.getExitBlocks()) {
@@ -8976,18 +8860,6 @@ static SetVector<VPIRInstruction *> collectUsersInExitBlocks(
// Exit values for inductions are computed and updated outside of VPlan
// and independent of induction recipes.
// TODO: Compute induction exit values in VPlan.
- if ((isa<VPWidenIntOrFpInductionRecipe>(V) &&
- !cast<VPWidenIntOrFpInductionRecipe>(V)->getTruncInst()) ||
- isa<VPWidenPointerInductionRecipe>(V) ||
- (isa<Instruction>(IncomingValue) &&
- OrigLoop->contains(cast<Instruction>(IncomingValue)) &&
- any_of(IncomingValue->users(), [&Inductions](User *U) {
- auto *P = dyn_cast<PHINode>(U);
- return P && Inductions.contains(P);
- }))) {
- if (ExitVPBB->getSinglePredecessor() == MiddleVPBB)
- continue;
- }
ExitUsersToFix.insert(ExitIRI);
ExitIRI->addOperand(V);
}
@@ -8996,17 +8868,86 @@ static SetVector<VPIRInstruction *> collectUsersInExitBlocks(
return ExitUsersToFix;
}
+/// If \p Incoming is a user of a non-truncated induction, create recipes to
+/// compute the final value and update the user \p ExitIRI.
+static bool addInductionEndValue(
+ VPlan &Plan, VPIRInstruction *ExitIRI, VPValue *Incoming,
+ const MapVector<PHINode *, InductionDescriptor> &Inductions,
+ DenseMap<VPValue *, VPValue *> &EndValues, VPTypeAnalysis &TypeInfo) {
+ if ((isa<VPWidenIntOrFpInductionRecipe>(Incoming) &&
+ !cast<VPWidenIntOrFpInductionRecipe>(Incoming)->getTruncInst()) ||
+ isa<VPWidenPointerInductionRecipe>(Incoming) ||
+ (isa<Instruction>(Incoming->getUnderlyingValue()) &&
+ any_of(cast<Instruction>(Incoming->getUnderlyingValue())->users(),
+ [&Inductions](User *U) {
+ auto *P = dyn_cast<PHINode>(U);
+ return P && Inductions.contains(P);
+ }))) {
+ VPValue *IV;
+ if (auto *WideIV =
+ dyn_cast<VPWidenInductionRecipe>(Incoming->getDefiningRecipe()))
+ IV = WideIV;
+ else if (auto *WideIV =
+ dyn_cast<VPWidenInductionRecipe>(Incoming->getDefiningRecipe()
+ ->getOperand(0)
+ ->getDefiningRecipe()))
+ IV = WideIV;
+ else
+ IV = Incoming->getDefiningRecipe()->getOperand(1);
+ // Skip phi nodes already updated. This can be the case if 2 induction
+ // phis chase each other.
+ VPValue *EndValue = EndValues[IV];
+ if (any_of(cast<VPRecipeBase>(Incoming->getDefiningRecipe())->operands(),
+ IsaPred<VPWidenIntOrFpInductionRecipe,
+ VPWidenPointerInductionRecipe>)) {
+ ExitIRI->setOperand(0, EndValue);
+ return true;
+ }
+
+ VPBuilder B(Plan.getMiddleBlock()->getTerminator());
+ VPValue *Escape = nullptr;
+ auto *WideIV = cast<VPWidenInductionRecipe>(IV->getDefiningRecipe());
+ VPValue *Step = WideIV->getStepValue();
+ Type *ScalarTy = TypeInfo.inferScalarType(WideIV);
+ if (ScalarTy->isIntegerTy())
+ Escape =
+ B.createNaryOp(Instruction::Sub, {EndValue, Step}, {}, "ind.escape");
+ else if (ScalarTy->isPointerTy())
+ Escape = B.createPtrAdd(
+ EndValue,
+ B.createNaryOp(Instruction::Sub,
+ {Plan.getOrAddLiveIn(ConstantInt::get(
+ Step->getLiveInIRValue()->getType(), 0)),
+ Step}),
+ {}, "ind.escape");
+ else if (ScalarTy->isFloatingPointTy()) {
+ const auto &ID = WideIV->getInductionDescriptor();
+ Escape = B.createNaryOp(
+ ID.getInductionBinOp()->getOpcode() == Instruction::FAdd
+ ? Instruction::FSub
+ : Instruction::FAdd,
+ {EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()});
+ } else {
+ llvm_unreachable("all possible induction types must be handled");
+ }
+ ExitIRI->setOperand(0, Escape);
+ return true;
+ }
+ return false;
+}
// Add exit values to \p Plan. Extracts are added for each entry in \p
// ExitUsersToFix if needed and their operands are updated. Returns true if all
// exit users can be handled, otherwise return false.
-static bool
-addUsersInExitBlocks(VPlan &Plan,
- const SetVector<VPIRInstruction *> &ExitUsersToFix) {
+static bool addUsersInExitBlocks(
+ VPlan &Plan, const SetVector<VPIRInstruction *> &ExitUsersToFix,
+ const MapVector<PHINode *, InductionDescriptor> &Inductions,
+ DenseMap<VPValue *, VPValue *> &EndValues) {
if (ExitUsersToFix.empty())
return true;
auto *MiddleVPBB = Plan.getMiddleBlock();
VPBuilder B(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
+ VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
// Introduce extract for exiting values and update the VPIRInstructions
// modeling the corresponding LCSSA phis.
@@ -9022,11 +8963,16 @@ addUsersInExitBlocks(VPlan &Plan,
if (ExitIRI->getParent()->getSinglePredecessor() != MiddleVPBB)
return false;
+ VPValue *Incoming = ExitIRI->getOperand(0);
+ if (addInductionEndValue(Plan, ExitIRI, Incoming, Inductions, EndValues,
+ TypeInfo))
+ continue;
+
LLVMContext &Ctx = ExitIRI->getInstruction().getContext();
VPValue *Ext = B.createNaryOp(VPInstruction::ExtractFromEnd,
{Op, Plan.getOrAddLiveIn(ConstantInt::get(
IntegerType::get(Ctx, 32), 1))});
- ExitIRI->setOperand(Idx, Ext);
+ ExitIRI->setOperand(0, Ext);
}
}
return true;
@@ -9307,11 +9253,13 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
VPlanTransforms::handleUncountableEarlyExit(
*Plan, *PSE.getSE(), OrigLoop, UncountableExitingBlock, RecipeBuilder);
}
- addScalarResumePhis(RecipeBuilder, *Plan);
- SetVector<VPIRInstruction *> ExitUsersToFix = collectUsersInExitBlocks(
- OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars());
+ DenseMap<VPValue *, VPValue *> EndValues;
+ addScalarResumePhis(RecipeBuilder, *Plan, OrigLoop, EndValues);
+ SetVector<VPIRInstruction *> ExitUsersToFix =
+ collectUsersInExitBlocks(OrigLoop, RecipeBuilder, *Plan);
addExitUsersForFirstOrderRecurrences(*Plan, ExitUsersToFix);
- if (!addUsersInExitBlocks(*Plan, ExitUsersToFix)) {
+ if (!addUsersInExitBlocks(*Plan, ExitUsersToFix, Legal->getInductionVars(),
+ EndValues)) {
reportVectorizationFailure(
"Some exit values in loop with uncountable exit not supported yet",
"UncountableEarlyExitLoopsUnsupportedExitValue", ORE, OrigLoop);
@@ -9438,7 +9386,8 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
auto *HeaderR = cast<VPHeaderPHIRecipe>(&R);
RecipeBuilder.setRecipe(HeaderR->getUnderlyingInstr(), HeaderR);
}
- addScalarResumePhis(RecipeBuilder, *Plan);
+ DenseMap<VPValue *, VPValue *> EndValues;
+ addScalarResumePhis(RecipeBuilder, *Plan, OrigLoop, EndValues);
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
return Plan;
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 9a082921d4f7f2..d763a9a605294b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -315,16 +315,20 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
LastLane = 0;
}
- auto *LastInst = cast<Instruction>(get(Def, LastLane));
- // Set the insert point after the last scalarized instruction or after the
- // last PHI, if LastInst is a PHI. This ensures the insertelement sequence
- // will directly follow the scalar definitions.
+ auto *LastDef = get(Def, LastLane);
auto OldIP = Builder.saveIP();
- auto NewIP =
- isa<PHINode>(LastInst)
- ? BasicBlock::iterator(LastInst->getParent()->getFirstNonPHI())
- : std::next(BasicBlock::iterator(LastInst));
- Builder.SetInsertPoint(&*NewIP);
+ if (auto *LastInst = dyn_cast<Instruction>(LastDef)) {
+ // TODO: Remove once VPDerivedIVReicpe can be simplified, which requires
+ // vector trip count being modeled in VPlan.
+ // Set the insert point after the last scalarized instruction or after the
+ // last PHI, if LastInst is a PHI. This ensures the insertelement sequence
+ // will directly follow the scalar definitions.
+ auto NewIP =
+ isa<PHINode>(LastInst)
+ ? BasicBlock::iterator(LastInst->getParent()->getFirstNonPHI())
+ : std::next(BasicBlock::iterator(LastInst));
+ Builder.SetInsertPoint(&*NewIP);
+ }
// However, if we are vectorizing, we need to construct the vector values.
// If the value is known to be uniform after vectorization, we can just
@@ -339,7 +343,7 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
} else {
// Initialize packing with insertelements to start from undef.
assert(!VF.isScalable() && "VF is assumed to be non scalable.");
- Value *Undef = PoisonValue::get(VectorType::get(LastInst->getType(), VF));
+ Value *Undef = PoisonValue::get(VectorType::get(LastDef->getType(), VF));
set(Def, Undef);
for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane)
packScalarIntoVectorValue(Def, Lane);
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 404202b7f31304..dd0d2dfdd2e01c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1422,6 +1422,11 @@ class VPIRInstruction : public VPRecipeBase {
"Op must be an operand of the recipe");
return true;
}
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/112147
More information about the llvm-commits
mailing list