[llvm] c2d15ac - [VPlan] Update final IV exit value via VPlan. (#112147)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Jan 18 05:22:38 PST 2025
Author: Florian Hahn
Date: 2025-01-18T13:22:34Z
New Revision: c2d15ac4d4432788557e77c15ce572ac655a8fec
URL: https://github.com/llvm/llvm-project/commit/c2d15ac4d4432788557e77c15ce572ac655a8fec
DIFF: https://github.com/llvm/llvm-project/commit/c2d15ac4d4432788557e77c15ce572ac655a8fec.diff
LOG: [VPlan] Update final IV exit value via VPlan. (#112147)
Model updating IV users directly in VPlan, replace fixupIVUsers.
Now simple extracts are created for all phis in the exit block during
initial VPlan construction. A later VPlan transform
(optimizeInductionExitUsers) replaces extracts of inductions with
their pre-computed values if possible.
This completes the transition towards modeling all live-outs directly in
VPlan.
There are a few follow-ups:
* emit extracts initially also for resume phis, and optimize them
tougher with IV exit users
* support for VPlans with multiple exits in optimizeInductionExitUsers.
Depends on https://github.com/llvm/llvm-project/pull/110004,
https://github.com/llvm/llvm-project/pull/109975 and
https://github.com/llvm/llvm-project/pull/112145.
Added:
Modified:
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/lib/Transforms/Vectorize/VPlan.h
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
llvm/lib/Transforms/Vectorize/VPlanTransforms.h
llvm/lib/Transforms/Vectorize/VPlanUtils.h
llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
llvm/test/Transforms/LoopVectorize/X86/multi-exit-cost.ll
llvm/test/Transforms/LoopVectorize/iv_outside_user.ll
llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll
llvm/test/Transforms/LoopVectorize/unused-blend-mask-for-first-operand.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index d79d9e8445b3df..34c5bc3312aec1 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -543,11 +543,6 @@ class InnerLoopVectorizer {
protected:
friend class LoopVectorizationPlanner;
- /// Set up the values of the IVs correctly when exiting the vector loop.
- virtual void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II,
- Value *VectorTripCount, BasicBlock *MiddleBlock,
- VPTransformState &State);
-
/// Iteratively sink the scalarized operands of a predicated instruction into
/// the block that was created for it.
void sinkScalarOperands(Instruction *PredInst);
@@ -785,10 +780,6 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
BasicBlock *emitIterationCountCheck(BasicBlock *Bypass, bool ForEpilogue);
void printDebugTracesAtStart() override;
void printDebugTracesAtEnd() override;
-
- void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II,
- Value *VectorTripCount, BasicBlock *MiddleBlock,
- VPTransformState &State) override {};
};
// A specialized derived class of inner loop vectorizer that performs
@@ -2782,97 +2773,6 @@ BasicBlock *InnerLoopVectorizer::createVectorizedLoopSkeleton(
return LoopVectorPreHeader;
}
-// Fix up external users of the induction variable. At this point, we are
-// in LCSSA form, with all external PHIs that use the IV having one input value,
-// coming from the remainder loop. We need those PHIs to also have a correct
-// value for the IV when arriving directly from the middle block.
-void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
- const InductionDescriptor &II,
- Value *VectorTripCount,
- BasicBlock *MiddleBlock,
- VPTransformState &State) {
- // There are two kinds of external IV usages - those that use the value
- // computed in the last iteration (the PHI) and those that use the penultimate
- // value (the value that feeds into the phi from the loop latch).
- // We allow both, but they, obviously, have
diff erent values.
-
- DenseMap<Value *, Value *> MissingVals;
-
- Value *EndValue = cast<PHINode>(OrigPhi->getIncomingValueForBlock(
- OrigLoop->getLoopPreheader()))
- ->getIncomingValueForBlock(MiddleBlock);
-
- // An external user of the last iteration's value should see the value that
- // the remainder loop uses to initialize its own IV.
- Value *PostInc = OrigPhi->getIncomingValueForBlock(OrigLoop->getLoopLatch());
- for (User *U : PostInc->users()) {
- Instruction *UI = cast<Instruction>(U);
- if (!OrigLoop->contains(UI)) {
- assert(isa<PHINode>(UI) && "Expected LCSSA form");
- MissingVals[UI] = EndValue;
- }
- }
-
- // An external user of the penultimate value need to see EndValue - Step.
- // The simplest way to get this is to recompute it from the constituent SCEVs,
- // that is Start + (Step * (CRD - 1)).
- for (User *U : OrigPhi->users()) {
- auto *UI = cast<Instruction>(U);
- if (!OrigLoop->contains(UI)) {
- assert(isa<PHINode>(UI) && "Expected LCSSA form");
- IRBuilder<> B(MiddleBlock->getTerminator());
-
- // Fast-math-flags propagate from the original induction instruction.
- if (isa_and_nonnull<FPMathOperator>(II.getInductionBinOp()))
- B.setFastMathFlags(II.getInductionBinOp()->getFastMathFlags());
-
- VPValue *StepVPV = Plan.getSCEVExpansion(II.getStep());
- assert(StepVPV && "step must have been expanded during VPlan execution");
- Value *Step = StepVPV->isLiveIn() ? StepVPV->getLiveInIRValue()
- : State.get(StepVPV, VPLane(0));
- Value *Escape = nullptr;
- if (EndValue->getType()->isIntegerTy())
- Escape = B.CreateSub(EndValue, Step);
- else if (EndValue->getType()->isPointerTy())
- Escape = B.CreatePtrAdd(EndValue, B.CreateNeg(Step));
- else {
- assert(EndValue->getType()->isFloatingPointTy() &&
- "Unexpected induction type");
- Escape = B.CreateBinOp(II.getInductionBinOp()->getOpcode() ==
- Instruction::FAdd
- ? Instruction::FSub
- : Instruction::FAdd,
- EndValue, Step);
- }
- Escape->setName("ind.escape");
- MissingVals[UI] = Escape;
- }
- }
-
- assert((MissingVals.empty() ||
- all_of(MissingVals,
- [MiddleBlock, this](const std::pair<Value *, Value *> &P) {
- return all_of(
- predecessors(cast<Instruction>(P.first)->getParent()),
- [MiddleBlock, this](BasicBlock *Pred) {
- return Pred == MiddleBlock ||
- Pred == OrigLoop->getLoopLatch();
- });
- })) &&
- "Expected escaping values from latch/middle.block only");
-
- for (auto &I : MissingVals) {
- PHINode *PHI = cast<PHINode>(I.first);
- // One corner case we have to handle is two IVs "chasing" each-other,
- // that is %IV2 = phi [...], [ %IV1, %latch ]
- // In this case, if IV1 has an external use, we need to avoid adding both
- // "last value of IV1" and "penultimate value of IV2". So, verify that we
- // don't already have an incoming value for the middle block.
- if (PHI->getBasicBlockIndex(MiddleBlock) == -1)
- PHI->addIncoming(I.second, MiddleBlock);
- }
-}
-
namespace {
struct CSEDenseMapInfo {
@@ -2999,24 +2899,6 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
for (PHINode &PN : Exit->phis())
PSE.getSE()->forgetLcssaPhiWithNewPredecessor(OrigLoop, &PN);
- if (Cost->requiresScalarEpilogue(VF.isVector())) {
- // No edge from the middle block to the unique exit block has been inserted
- // and there is nothing to fix from vector loop; phis should have incoming
- // from scalar loop only.
- } else {
- // TODO: Check in VPlan to see if IV users need fixing instead of checking
- // the cost model.
-
- // If we inserted an edge from the middle block to the unique exit block,
- // update uses outside the loop (phis) to account for the newly inserted
- // edge.
-
- // Fix-up external users of the induction variables.
- for (const auto &Entry : Legal->getInductionVars())
- fixupIVUsers(Entry.first, Entry.second,
- getOrCreateVectorTripCount(nullptr), LoopMiddleBlock, State);
- }
-
// Don't apply optimizations below when no vector region remains, as they all
// require a vector loop at the moment.
if (!State.Plan->getVectorLoopRegion())
@@ -9049,11 +8931,9 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, bool HasNUW,
/// Create and return a ResumePhi for \p WideIV, unless it is truncated. If the
/// induction recipe is not canonical, creates a VPDerivedIVRecipe to compute
/// the end value of the induction.
-static VPValue *addResumePhiRecipeForInduction(VPWidenInductionRecipe *WideIV,
- VPBuilder &VectorPHBuilder,
- VPBuilder &ScalarPHBuilder,
- VPTypeAnalysis &TypeInfo,
- VPValue *VectorTC) {
+static VPInstruction *addResumePhiRecipeForInduction(
+ VPWidenInductionRecipe *WideIV, VPBuilder &VectorPHBuilder,
+ VPBuilder &ScalarPHBuilder, VPTypeAnalysis &TypeInfo, VPValue *VectorTC) {
auto *WideIntOrFp = dyn_cast<VPWidenIntOrFpInductionRecipe>(WideIV);
// Truncated wide inductions resume from the last lane of their vector value
// in the last vector iteration which is handled elsewhere.
@@ -9087,8 +8967,10 @@ static VPValue *addResumePhiRecipeForInduction(VPWidenInductionRecipe *WideIV,
/// Create resume phis in the scalar preheader for first-order recurrences,
/// reductions and inductions, and update the VPIRInstructions wrapping the
-/// original phis in the scalar header.
-static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
+/// original phis in the scalar header. End values for inductions are added to
+/// \p IVEndValues.
+static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan,
+ DenseMap<VPValue *, VPValue *> &IVEndValues) {
VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
auto *ScalarPH = Plan.getScalarPreheader();
auto *MiddleVPBB = cast<VPBasicBlock>(ScalarPH->getSinglePredecessor());
@@ -9105,11 +8987,16 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
if (!ScalarPhiI)
break;
+ // TODO: Extract final value from induction recipe initially, optimize to
+ // pre-computed end value together in optimizeInductionExitUsers.
auto *VectorPhiR = cast<VPHeaderPHIRecipe>(Builder.getRecipe(ScalarPhiI));
if (auto *WideIVR = dyn_cast<VPWidenInductionRecipe>(VectorPhiR)) {
- if (VPValue *ResumePhi = addResumePhiRecipeForInduction(
+ if (VPInstruction *ResumePhi = addResumePhiRecipeForInduction(
WideIVR, VectorPHBuilder, ScalarPHBuilder, TypeInfo,
&Plan.getVectorTripCount())) {
+ assert(ResumePhi->getOpcode() == VPInstruction::ResumePhi &&
+ "Expected a ResumePhi");
+ IVEndValues[WideIVR] = ResumePhi->getOperand(0);
ScalarPhiIRI->addOperand(ResumePhi);
continue;
}
@@ -9140,65 +9027,6 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan) {
}
}
-/// Return true if \p VPV is an optimizable IV or IV use. That is, if \p VPV is
-/// either an untruncated wide induction, or if it increments a wide induction
-/// by its step.
-static bool isOptimizableIVOrUse(VPValue *VPV) {
- VPRecipeBase *Def = VPV->getDefiningRecipe();
- if (!Def)
- return false;
- auto *WideIV = dyn_cast<VPWidenInductionRecipe>(Def);
- if (WideIV) {
- // VPV itself is a wide induction, separately compute the end value for exit
- // users if it is not a truncated IV.
- return isa<VPWidenPointerInductionRecipe>(WideIV) ||
- !cast<VPWidenIntOrFpInductionRecipe>(WideIV)->getTruncInst();
- }
-
- // Check if VPV is an optimizable induction increment.
- if (Def->getNumOperands() != 2)
- return false;
- WideIV = dyn_cast<VPWidenInductionRecipe>(Def->getOperand(0));
- if (!WideIV)
- WideIV = dyn_cast<VPWidenInductionRecipe>(Def->getOperand(1));
- if (!WideIV)
- return false;
-
- using namespace VPlanPatternMatch;
- auto &ID = WideIV->getInductionDescriptor();
-
- // Check if VPV increments the induction by the induction step.
- VPValue *IVStep = WideIV->getStepValue();
- switch (ID.getInductionOpcode()) {
- case Instruction::Add:
- return match(VPV, m_c_Binary<Instruction::Add>(m_Specific(WideIV),
- m_Specific(IVStep)));
- case Instruction::FAdd:
- return match(VPV, m_c_Binary<Instruction::FAdd>(m_Specific(WideIV),
- m_Specific(IVStep)));
- case Instruction::FSub:
- return match(VPV, m_Binary<Instruction::FSub>(m_Specific(WideIV),
- m_Specific(IVStep)));
- case Instruction::Sub: {
- // IVStep will be the negated step of the subtraction. Check if Step == -1 *
- // IVStep.
- VPValue *Step;
- if (!match(VPV, m_Binary<Instruction::Sub>(m_VPValue(), m_VPValue(Step))) ||
- !Step->isLiveIn() || !IVStep->isLiveIn())
- return false;
- auto *StepCI = dyn_cast<ConstantInt>(Step->getLiveInIRValue());
- auto *IVStepCI = dyn_cast<ConstantInt>(IVStep->getLiveInIRValue());
- return StepCI && IVStepCI &&
- StepCI->getValue() == (-1 * IVStepCI->getValue());
- }
- default:
- return ID.getKind() == InductionDescriptor::IK_PtrInduction &&
- match(VPV, m_GetElementPtr(m_Specific(WideIV),
- m_Specific(WideIV->getStepValue())));
- }
- llvm_unreachable("should have been covered by switch above");
-}
-
// Collect VPIRInstructions for phis in the exit blocks that are modeled
// in VPlan and add the exiting VPValue as operand. Some exiting values are not
// modeled explicitly yet and won't be included. Those are un-truncated
@@ -9228,12 +9056,6 @@ collectUsersInExitBlocks(Loop *OrigLoop, VPRecipeBuilder &Builder,
}
Value *IncomingValue = ExitPhi->getIncomingValueForBlock(ExitingBB);
VPValue *V = Builder.getVPValueOrAddLiveIn(IncomingValue);
- // Exit values for inductions are computed and updated outside of VPlan
- // and independent of induction recipes.
- // TODO: Compute induction exit values in VPlan.
- if (isOptimizableIVOrUse(V) &&
- ExitVPBB->getSinglePredecessor() == MiddleVPBB)
- continue;
ExitUsersToFix.insert(ExitIRI);
ExitIRI->addOperand(V);
}
@@ -9253,6 +9075,7 @@ addUsersInExitBlocks(VPlan &Plan,
auto *MiddleVPBB = Plan.getMiddleBlock();
VPBuilder B(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
+ VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
// Introduce extract for exiting values and update the VPIRInstructions
// modeling the corresponding LCSSA phis.
@@ -9574,7 +9397,8 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
VPlanTransforms::handleUncountableEarlyExit(
*Plan, *PSE.getSE(), OrigLoop, UncountableExitingBlock, RecipeBuilder);
}
- addScalarResumePhis(RecipeBuilder, *Plan);
+ DenseMap<VPValue *, VPValue *> IVEndValues;
+ addScalarResumePhis(RecipeBuilder, *Plan, IVEndValues);
SetVector<VPIRInstruction *> ExitUsersToFix =
collectUsersInExitBlocks(OrigLoop, RecipeBuilder, *Plan);
addExitUsersForFirstOrderRecurrences(*Plan, ExitUsersToFix);
@@ -9657,6 +9481,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
VPlanTransforms::addActiveLaneMask(*Plan, ForControlFlow,
WithoutRuntimeCheck);
}
+ VPlanTransforms::optimizeInductionExitUsers(*Plan, IVEndValues);
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
return Plan;
@@ -9708,7 +9533,10 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
auto *HeaderR = cast<VPHeaderPHIRecipe>(&R);
RecipeBuilder.setRecipe(HeaderR->getUnderlyingInstr(), HeaderR);
}
- addScalarResumePhis(RecipeBuilder, *Plan);
+ DenseMap<VPValue *, VPValue *> IVEndValues;
+ // TODO: IVEndValues are not used yet in the native path, to optimize exit
+ // values.
+ addScalarResumePhis(RecipeBuilder, *Plan, IVEndValues);
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
return Plan;
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 784cee6ed4b06c..db45ad8aadbbe3 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1422,6 +1422,12 @@ class VPIRInstruction : public VPRecipeBase {
"Op must be an operand of the recipe");
return true;
}
+
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ return true;
+ }
};
/// VPWidenRecipe is a recipe for producing a widened instruction using the
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index a04ad1b3705373..9febd612c644e1 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -667,6 +667,131 @@ static void legalizeAndOptimizeInductions(VPlan &Plan) {
}
}
+/// Check if \p VPV is an untruncated wide induction, either before or after the
+/// increment. If so return the header IV (before the increment), otherwise
+/// return null.
+static VPWidenInductionRecipe *getOptimizableIVOf(VPValue *VPV) {
+ auto *WideIV = dyn_cast<VPWidenInductionRecipe>(VPV);
+ if (WideIV) {
+ // VPV itself is a wide induction, separately compute the end value for exit
+ // users if it is not a truncated IV.
+ auto *IntOrFpIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(WideIV);
+ return (IntOrFpIV && IntOrFpIV->getTruncInst()) ? nullptr : WideIV;
+ }
+
+ // Check if VPV is an optimizable induction increment.
+ VPRecipeBase *Def = VPV->getDefiningRecipe();
+ if (!Def || Def->getNumOperands() != 2)
+ return nullptr;
+ WideIV = dyn_cast<VPWidenInductionRecipe>(Def->getOperand(0));
+ if (!WideIV)
+ WideIV = dyn_cast<VPWidenInductionRecipe>(Def->getOperand(1));
+ if (!WideIV)
+ return nullptr;
+
+ auto IsWideIVInc = [&]() {
+ using namespace VPlanPatternMatch;
+ auto &ID = WideIV->getInductionDescriptor();
+
+ // Check if VPV increments the induction by the induction step.
+ VPValue *IVStep = WideIV->getStepValue();
+ switch (ID.getInductionOpcode()) {
+ case Instruction::Add:
+ return match(VPV, m_c_Binary<Instruction::Add>(m_Specific(WideIV),
+ m_Specific(IVStep)));
+ case Instruction::FAdd:
+ return match(VPV, m_c_Binary<Instruction::FAdd>(m_Specific(WideIV),
+ m_Specific(IVStep)));
+ case Instruction::FSub:
+ return match(VPV, m_Binary<Instruction::FSub>(m_Specific(WideIV),
+ m_Specific(IVStep)));
+ case Instruction::Sub: {
+ // IVStep will be the negated step of the subtraction. Check if Step == -1
+ // * IVStep.
+ VPValue *Step;
+ if (!match(VPV,
+ m_Binary<Instruction::Sub>(m_VPValue(), m_VPValue(Step))) ||
+ !Step->isLiveIn() || !IVStep->isLiveIn())
+ return false;
+ auto *StepCI = dyn_cast<ConstantInt>(Step->getLiveInIRValue());
+ auto *IVStepCI = dyn_cast<ConstantInt>(IVStep->getLiveInIRValue());
+ return StepCI && IVStepCI &&
+ StepCI->getValue() == (-1 * IVStepCI->getValue());
+ }
+ default:
+ return ID.getKind() == InductionDescriptor::IK_PtrInduction &&
+ match(VPV, m_GetElementPtr(m_Specific(WideIV),
+ m_Specific(WideIV->getStepValue())));
+ }
+ llvm_unreachable("should have been covered by switch above");
+ };
+ return IsWideIVInc() ? WideIV : nullptr;
+}
+
+void VPlanTransforms::optimizeInductionExitUsers(
+ VPlan &Plan, DenseMap<VPValue *, VPValue *> &EndValues) {
+ using namespace VPlanPatternMatch;
+ SmallVector<VPIRBasicBlock *> ExitVPBBs(Plan.getExitBlocks());
+ if (ExitVPBBs.size() != 1)
+ return;
+
+ VPIRBasicBlock *ExitVPBB = ExitVPBBs[0];
+ VPBlockBase *PredVPBB = ExitVPBB->getSinglePredecessor();
+ if (!PredVPBB)
+ return;
+ assert(PredVPBB == Plan.getMiddleBlock() &&
+ "predecessor must be the middle block");
+
+ VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
+ VPBuilder B(Plan.getMiddleBlock()->getTerminator());
+ for (VPRecipeBase &R : *ExitVPBB) {
+ auto *ExitIRI = cast<VPIRInstruction>(&R);
+ if (!isa<PHINode>(ExitIRI->getInstruction()))
+ break;
+
+ VPValue *Incoming;
+ if (!match(ExitIRI->getOperand(0),
+ m_VPInstruction<VPInstruction::ExtractFromEnd>(
+ m_VPValue(Incoming), m_SpecificInt(1))))
+ continue;
+
+ auto *WideIV = getOptimizableIVOf(Incoming);
+ if (!WideIV)
+ continue;
+ VPValue *EndValue = EndValues.lookup(WideIV);
+ assert(EndValue && "end value must have been pre-computed");
+
+ if (Incoming != WideIV) {
+ ExitIRI->setOperand(0, EndValue);
+ continue;
+ }
+
+ VPValue *Escape = nullptr;
+ VPValue *Step = WideIV->getStepValue();
+ Type *ScalarTy = TypeInfo.inferScalarType(WideIV);
+ if (ScalarTy->isIntegerTy()) {
+ Escape =
+ B.createNaryOp(Instruction::Sub, {EndValue, Step}, {}, "ind.escape");
+ } else if (ScalarTy->isPointerTy()) {
+ auto *Zero = Plan.getOrAddLiveIn(
+ ConstantInt::get(Step->getLiveInIRValue()->getType(), 0));
+ Escape = B.createPtrAdd(EndValue,
+ B.createNaryOp(Instruction::Sub, {Zero, Step}),
+ {}, "ind.escape");
+ } else if (ScalarTy->isFloatingPointTy()) {
+ const auto &ID = WideIV->getInductionDescriptor();
+ Escape = B.createNaryOp(
+ ID.getInductionBinOp()->getOpcode() == Instruction::FAdd
+ ? Instruction::FSub
+ : Instruction::FAdd,
+ {EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()});
+ } else {
+ llvm_unreachable("all possible induction types must be handled");
+ }
+ ExitIRI->setOperand(0, Escape);
+ }
+}
+
/// Remove redundant EpxandSCEVRecipes in \p Plan's entry block by replacing
/// them with already existing recipes expanding the same SCEV expression.
static void removeRedundantExpandSCEVRecipes(VPlan &Plan) {
@@ -1318,6 +1443,7 @@ void VPlanTransforms::optimize(VPlan &Plan) {
removeRedundantInductionCasts(Plan);
simplifyRecipes(Plan, Plan.getCanonicalIV()->getScalarType());
+ removeDeadRecipes(Plan);
legalizeAndOptimizeInductions(Plan);
removeRedundantExpandSCEVRecipes(Plan);
simplifyRecipes(Plan, Plan.getCanonicalIV()->getScalarType());
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index fddde868911665..a751b8b5e8dc59 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -137,6 +137,13 @@ struct VPlanTransforms {
/// Lower abstract recipes to concrete ones, that can be codegen'd.
static void convertToConcreteRecipes(VPlan &Plan);
+
+ /// If there's a single exit block, optimize its phi recipes that use exiting
+ /// IV values by feeding them precomputed end values instead, possibly taken
+ /// one step backwards.
+ static void
+ optimizeInductionExitUsers(VPlan &Plan,
+ DenseMap<VPValue *, VPValue *> &EndValues);
};
} // namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
index 777944264f458e..1395202c10d158 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h
@@ -45,8 +45,8 @@ inline bool isUniformAfterVectorization(const VPValue *VPV) {
assert(Def && "Must have definition for value defined inside vector region");
if (auto *Rep = dyn_cast<VPReplicateRecipe>(Def))
return Rep->isUniform();
- if (auto *GEP = dyn_cast<VPWidenGEPRecipe>(Def))
- return all_of(GEP->operands(), isUniformAfterVectorization);
+ if (isa<VPWidenGEPRecipe, VPDerivedIVRecipe>(Def))
+ return all_of(Def->operands(), isUniformAfterVectorization);
if (auto *VPI = dyn_cast<VPInstruction>(Def))
return VPI->isSingleScalar() || VPI->isVectorToScalar();
// VPExpandSCEVRecipes must be placed in the entry and are alway uniform.
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
index 951d833fa941e8..f630f4f21e065f 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
@@ -230,7 +230,6 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
; CHECK: IR %indvars.iv.next = add nsw i64 %indvars.iv, -1
; CHECK-NEXT: No successors
; CHECK-NEXT: }
-; CHECK: LV: Loop does not require scalar epilogue
;
entry:
%cmp7 = icmp sgt i32 %n, 0
@@ -480,7 +479,6 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
; CHECK: IR %indvars.iv.next = add nsw i64 %indvars.iv, -1
; CHECK-NEXT: No successors
; CHECK-NEXT: }
-; CHECK: LV: Loop does not require scalar epilogue
;
entry:
%cmp7 = icmp sgt i32 %n, 0
diff --git a/llvm/test/Transforms/LoopVectorize/X86/multi-exit-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/multi-exit-cost.ll
index 7b29d0ef7cbb50..6c97ab362fc869 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/multi-exit-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/multi-exit-cost.ll
@@ -29,21 +29,7 @@ define i64 @test_value_in_exit_compare_chain_used_outside(ptr %src, i64 %x, i64
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i8> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP29:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 2
-; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[INDEX]], 3
-; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 4
-; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 5
-; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 6
-; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 7
; CHECK-NEXT: [[TMP18:%.*]] = and i64 [[TMP10]], 1
-; CHECK-NEXT: [[TMP19:%.*]] = and i64 [[TMP11]], 1
-; CHECK-NEXT: [[TMP20:%.*]] = and i64 [[TMP12]], 1
-; CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP13]], 1
-; CHECK-NEXT: [[TMP22:%.*]] = and i64 [[TMP14]], 1
-; CHECK-NEXT: [[TMP23:%.*]] = and i64 [[TMP15]], 1
-; CHECK-NEXT: [[TMP24:%.*]] = and i64 [[TMP16]], 1
-; CHECK-NEXT: [[TMP25:%.*]] = and i64 [[TMP17]], 1
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP18]]
; CHECK-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[TMP26]], i32 0
; CHECK-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[TMP27]], i32 -7
diff --git a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll
index 3e61546da2cebc..eb1dc9debc6b58 100644
--- a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll
+++ b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll
@@ -860,6 +860,126 @@ exit:
ret float %add
}
+define float @fp_postinc_use_fadd_ops_swapped(float %init, ptr noalias nocapture %A, i64 %N, float %fpinc) {
+; VEC-LABEL: define float @fp_postinc_use_fadd_ops_swapped(
+; VEC-SAME: float [[INIT:%.*]], ptr noalias nocapture [[A:%.*]], i64 [[N:%.*]], float [[FPINC:%.*]]) {
+; VEC-NEXT: [[ENTRY:.*]]:
+; VEC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2
+; VEC-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; VEC: [[VECTOR_PH]]:
+; VEC-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
+; VEC-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; VEC-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float
+; VEC-NEXT: [[TMP0:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]]
+; VEC-NEXT: [[TMP1:%.*]] = fadd fast float [[INIT]], [[TMP0]]
+; VEC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0
+; VEC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer
+; VEC-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <2 x float> poison, float [[FPINC]], i64 0
+; VEC-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT1]], <2 x float> poison, <2 x i32> zeroinitializer
+; VEC-NEXT: [[TMP2:%.*]] = fmul fast <2 x float> <float 0.000000e+00, float 1.000000e+00>, [[DOTSPLAT2]]
+; VEC-NEXT: [[INDUCTION:%.*]] = fadd fast <2 x float> [[DOTSPLAT]], [[TMP2]]
+; VEC-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], 2.000000e+00
+; VEC-NEXT: [[DOTSPLATINSERT3:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i64 0
+; VEC-NEXT: [[DOTSPLAT4:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT3]], <2 x float> poison, <2 x i32> zeroinitializer
+; VEC-NEXT: br label %[[VECTOR_BODY:.*]]
+; VEC: [[VECTOR_BODY]]:
+; VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; VEC-NEXT: [[VEC_IND:%.*]] = phi <2 x float> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; VEC-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0
+; VEC-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP4]]
+; VEC-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i32 0
+; VEC-NEXT: store <2 x float> [[VEC_IND]], ptr [[TMP6]], align 4
+; VEC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; VEC-NEXT: [[VEC_IND_NEXT]] = fadd fast <2 x float> [[VEC_IND]], [[DOTSPLAT4]]
+; VEC-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; VEC-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
+; VEC: [[MIDDLE_BLOCK]]:
+; VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; VEC-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; VEC: [[SCALAR_PH]]:
+; VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; VEC-NEXT: [[BC_RESUME_VAL5:%.*]] = phi float [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ [[INIT]], %[[ENTRY]] ]
+; VEC-NEXT: br label %[[LOOP:.*]]
+; VEC: [[LOOP]]:
+; VEC-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; VEC-NEXT: [[FP_IV:%.*]] = phi float [ [[BC_RESUME_VAL5]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[LOOP]] ]
+; VEC-NEXT: [[GEP_A:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
+; VEC-NEXT: store float [[FP_IV]], ptr [[GEP_A]], align 4
+; VEC-NEXT: [[ADD]] = fadd fast float [[FPINC]], [[FP_IV]]
+; VEC-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; VEC-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; VEC-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], {{!llvm.loop ![0-9]+}}
+; VEC: [[EXIT]]:
+; VEC-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], %[[LOOP]] ], [ [[TMP1]], %[[MIDDLE_BLOCK]] ]
+; VEC-NEXT: ret float [[ADD_LCSSA]]
+;
+; INTERLEAVE-LABEL: define float @fp_postinc_use_fadd_ops_swapped(
+; INTERLEAVE-SAME: float [[INIT:%.*]], ptr noalias nocapture [[A:%.*]], i64 [[N:%.*]], float [[FPINC:%.*]]) {
+; INTERLEAVE-NEXT: [[ENTRY:.*]]:
+; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2
+; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; INTERLEAVE: [[VECTOR_PH]]:
+; INTERLEAVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
+; INTERLEAVE-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; INTERLEAVE-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float
+; INTERLEAVE-NEXT: [[TMP0:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]]
+; INTERLEAVE-NEXT: [[TMP1:%.*]] = fadd fast float [[INIT]], [[TMP0]]
+; INTERLEAVE-NEXT: br label %[[VECTOR_BODY:.*]]
+; INTERLEAVE: [[VECTOR_BODY]]:
+; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; INTERLEAVE-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
+; INTERLEAVE-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 1
+; INTERLEAVE-NEXT: [[DOTCAST1:%.*]] = sitofp i64 [[INDEX]] to float
+; INTERLEAVE-NEXT: [[TMP4:%.*]] = fmul fast float [[FPINC]], [[DOTCAST1]]
+; INTERLEAVE-NEXT: [[OFFSET_IDX:%.*]] = fadd fast float [[INIT]], [[TMP4]]
+; INTERLEAVE-NEXT: [[TMP5:%.*]] = fmul fast float 0.000000e+00, [[FPINC]]
+; INTERLEAVE-NEXT: [[TMP6:%.*]] = fadd fast float [[OFFSET_IDX]], [[TMP5]]
+; INTERLEAVE-NEXT: [[TMP7:%.*]] = fmul fast float 1.000000e+00, [[FPINC]]
+; INTERLEAVE-NEXT: [[TMP8:%.*]] = fadd fast float [[OFFSET_IDX]], [[TMP7]]
+; INTERLEAVE-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP2]]
+; INTERLEAVE-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP3]]
+; INTERLEAVE-NEXT: store float [[TMP6]], ptr [[TMP9]], align 4
+; INTERLEAVE-NEXT: store float [[TMP8]], ptr [[TMP10]], align 4
+; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; INTERLEAVE-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; INTERLEAVE-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
+; INTERLEAVE: [[MIDDLE_BLOCK]]:
+; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; INTERLEAVE-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; INTERLEAVE: [[SCALAR_PH]]:
+; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; INTERLEAVE-NEXT: [[BC_RESUME_VAL2:%.*]] = phi float [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ [[INIT]], %[[ENTRY]] ]
+; INTERLEAVE-NEXT: br label %[[LOOP:.*]]
+; INTERLEAVE: [[LOOP]]:
+; INTERLEAVE-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; INTERLEAVE-NEXT: [[FP_IV:%.*]] = phi float [ [[BC_RESUME_VAL2]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[LOOP]] ]
+; INTERLEAVE-NEXT: [[GEP_A:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
+; INTERLEAVE-NEXT: store float [[FP_IV]], ptr [[GEP_A]], align 4
+; INTERLEAVE-NEXT: [[ADD]] = fadd fast float [[FPINC]], [[FP_IV]]
+; INTERLEAVE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; INTERLEAVE-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; INTERLEAVE-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], {{!llvm.loop ![0-9]+}}
+; INTERLEAVE: [[EXIT]]:
+; INTERLEAVE-NEXT: [[ADD_LCSSA:%.*]] = phi float [ [[ADD]], %[[LOOP]] ], [ [[TMP1]], %[[MIDDLE_BLOCK]] ]
+; INTERLEAVE-NEXT: ret float [[ADD_LCSSA]]
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %fp.iv = phi float [ %init, %entry ], [ %add, %loop ]
+ %gep.A = getelementptr inbounds float, ptr %A, i64 %iv
+ store float %fp.iv, ptr %gep.A, align 4
+ %add = fadd fast float %fpinc, %fp.iv
+ %iv.next = add nuw nsw i64 %iv, 1
+ %ec = icmp eq i64 %iv.next, %N
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret float %add
+}
+
define float @fp_postinc_use_fsub(float %init, ptr noalias nocapture %A, i64 %N, float %fpinc) {
; VEC-LABEL: define float @fp_postinc_use_fsub(
; VEC-SAME: float [[INIT:%.*]], ptr noalias nocapture [[A:%.*]], i64 [[N:%.*]], float [[FPINC:%.*]]) {
diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll
index 085438aa80f246..6e542bd873b8c3 100644
--- a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll
+++ b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll
@@ -532,6 +532,7 @@ define i64 @
diff _exit_block_pre_inc_use2() {
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 3, i64 4, i64 5, i64 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[LOOP]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[TMP0]]
@@ -545,11 +546,13 @@ define i64 @
diff _exit_block_pre_inc_use2() {
; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP5]], splat (i1 true)
; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.split:
; CHECK-NEXT: br i1 [[TMP7]], label [[LOOP_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
; CHECK: middle.block:
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 3
; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
@@ -570,7 +573,7 @@ define i64 @
diff _exit_block_pre_inc_use2() {
; CHECK-NEXT: [[RETVAL1:%.*]] = phi i64 [ 67, [[LOOP1]] ], [ 67, [[MIDDLE_SPLIT]] ]
; CHECK-NEXT: ret i64 [[RETVAL1]]
; CHECK: loop.end:
-; CHECK-NEXT: [[RETVAL2:%.*]] = phi i64 [ [[INDEX]], [[LOOP_INC]] ], [ 66, [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[RETVAL2:%.*]] = phi i64 [ [[INDEX]], [[LOOP_INC]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i64 [[RETVAL2]]
;
entry:
diff --git a/llvm/test/Transforms/LoopVectorize/unused-blend-mask-for-first-operand.ll b/llvm/test/Transforms/LoopVectorize/unused-blend-mask-for-first-operand.ll
index d7d7d5d9c5da0e..50c1f74d2aacc4 100644
--- a/llvm/test/Transforms/LoopVectorize/unused-blend-mask-for-first-operand.ll
+++ b/llvm/test/Transforms/LoopVectorize/unused-blend-mask-for-first-operand.ll
@@ -84,8 +84,6 @@ define void @test_not_first_lane_only_wide_compare(ptr %A, ptr noalias %B, i16 %
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[X]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -94,8 +92,8 @@ define void @test_not_first_lane_only_wide_compare(ptr %A, ptr noalias %B, i16 %
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[A]], i16 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP2]], align 2
-; CHECK-NEXT: [[TMP5:%.*]] = icmp ult <4 x i16> [[WIDE_LOAD]], [[BROADCAST_SPLAT2]]
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP5]], i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 0
+; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i16 [[TMP3]], [[X]]
; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP4]], ptr poison, ptr [[B]]
; CHECK-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP12]], align 2
; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <4 x i16> poison, i16 [[TMP13]], i64 0
More information about the llvm-commits
mailing list