[llvm] b8709a9 - [LV] Support fixed order recurrences.
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 18 11:16:13 PDT 2022
Author: Florian Hahn
Date: 2022-08-18T19:15:52+01:00
New Revision: b8709a9d03f8ba28146f612aa47cc25a44d0723e
URL: https://github.com/llvm/llvm-project/commit/b8709a9d03f8ba28146f612aa47cc25a44d0723e
DIFF: https://github.com/llvm/llvm-project/commit/b8709a9d03f8ba28146f612aa47cc25a44d0723e.diff
LOG: [LV] Support fixed order recurrences.
If the incoming previous value of a fixed-order recurrence is a phi in
the header, go through incoming values from the latch until we find a
non-phi value. Use this as the new Previous, all uses in the header
will be dominated by the original phi, but need to be moved after
the non-phi previous value.
At the moment, fixed-order recurrences are modeled as a chain of
first-order recurrences.
Reviewed By: Ayal
Differential Revision: https://reviews.llvm.org/D119661
Added:
llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll
Modified:
llvm/include/llvm/Analysis/IVDescriptors.h
llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
llvm/lib/Analysis/IVDescriptors.cpp
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll
llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h
index a23b64ef20cce..696d5e290c16d 100644
--- a/llvm/include/llvm/Analysis/IVDescriptors.h
+++ b/llvm/include/llvm/Analysis/IVDescriptors.h
@@ -180,15 +180,18 @@ class RecurrenceDescriptor {
DemandedBits *DB = nullptr, AssumptionCache *AC = nullptr,
DominatorTree *DT = nullptr, ScalarEvolution *SE = nullptr);
- /// Returns true if Phi is a first-order recurrence. A first-order recurrence
+ /// Returns true if Phi is a fixed-order recurrence. A fixed-order recurrence
/// is a non-reduction recurrence relation in which the value of the
- /// recurrence in the current loop iteration equals a value defined in the
- /// previous iteration. \p SinkAfter includes pairs of instructions where the
- /// first will be rescheduled to appear after the second if/when the loop is
- /// vectorized. It may be augmented with additional pairs if needed in order
- /// to handle Phi as a first-order recurrence.
+ /// recurrence in the current loop iteration equals a value defined in a
+ /// previous iteration (e.g. if the value is defined in the previous
+ /// iteration, we refer to it as first-order recurrence, if it is defined in
+ /// the iteration before the previous, we refer to it as second-order
+ /// recurrence and so on). \p SinkAfter includes pairs of instructions where
+ /// the first will be rescheduled to appear after the second if/when the loop
+ /// is vectorized. It may be augmented with additional pairs if needed in
+ /// order to handle Phi as a first-order recurrence.
static bool
- isFirstOrderRecurrence(PHINode *Phi, Loop *TheLoop,
+ isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop,
MapVector<Instruction *, Instruction *> &SinkAfter,
DominatorTree *DT);
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index 91f5676e0d1f4..728d54d8c1ff5 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -291,10 +291,10 @@ class LoopVectorizationLegality {
/// Returns the induction variables found in the loop.
const InductionList &getInductionVars() const { return Inductions; }
- /// Return the first-order recurrences found in the loop.
- RecurrenceSet &getFirstOrderRecurrences() { return FirstOrderRecurrences; }
+ /// Return the fixed-order recurrences found in the loop.
+ RecurrenceSet &getFixedOrderRecurrences() { return FixedOrderRecurrences; }
- /// Return the set of instructions to sink to handle first-order recurrences.
+ /// Return the set of instructions to sink to handle fixed-order recurrences.
MapVector<Instruction *, Instruction *> &getSinkAfter() { return SinkAfter; }
/// Returns the widest induction type.
@@ -332,8 +332,8 @@ class LoopVectorizationLegality {
/// Returns True if PN is a reduction variable in this loop.
bool isReductionVariable(PHINode *PN) const { return Reductions.count(PN); }
- /// Returns True if Phi is a first-order recurrence in this loop.
- bool isFirstOrderRecurrence(const PHINode *Phi) const;
+ /// Returns True if Phi is a fixed-order recurrence in this loop.
+ bool isFixedOrderRecurrence(const PHINode *Phi) const;
/// Return true if the block BB needs to be predicated in order for the loop
/// to be vectorized.
@@ -515,11 +515,11 @@ class LoopVectorizationLegality {
/// loop body.
SmallPtrSet<Instruction *, 4> InductionCastsToIgnore;
- /// Holds the phi nodes that are first-order recurrences.
- RecurrenceSet FirstOrderRecurrences;
+ /// Holds the phi nodes that are fixed-order recurrences.
+ RecurrenceSet FixedOrderRecurrences;
/// Holds instructions that need to sink past other instructions to handle
- /// first-order recurrences.
+ /// fixed-order recurrences.
MapVector<Instruction *, Instruction *> SinkAfter;
/// Holds the widest induction type encountered.
diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index 6d20043759c8e..d3f6c9e25ae6c 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -921,7 +921,7 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
return false;
}
-bool RecurrenceDescriptor::isFirstOrderRecurrence(
+bool RecurrenceDescriptor::isFixedOrderRecurrence(
PHINode *Phi, Loop *TheLoop,
MapVector<Instruction *, Instruction *> &SinkAfter, DominatorTree *DT) {
@@ -945,6 +945,20 @@ bool RecurrenceDescriptor::isFirstOrderRecurrence(
// Get the previous value. The previous value comes from the latch edge while
// the initial value comes form the preheader edge.
auto *Previous = dyn_cast<Instruction>(Phi->getIncomingValueForBlock(Latch));
+
+ // If Previous is a phi in the header, go through incoming values from the
+ // latch until we find a non-phi value. Use this as the new Previous, all uses
+ // in the header will be dominated by the original phi, but need to be moved
+ // after the non-phi previous value.
+ SmallPtrSet<PHINode *, 4> SeenPhis;
+ while (auto *PrevPhi = dyn_cast_or_null<PHINode>(Previous)) {
+ if (PrevPhi->getParent() != Phi->getParent())
+ return false;
+ if (!SeenPhis.insert(PrevPhi).second)
+ return false;
+ Previous = dyn_cast<Instruction>(PrevPhi->getIncomingValueForBlock(Latch));
+ }
+
if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous) ||
SinkAfter.count(Previous)) // Cannot rely on dominance due to motion.
return false;
@@ -986,7 +1000,7 @@ bool RecurrenceDescriptor::isFirstOrderRecurrence(
return false;
// Avoid sinking an instruction multiple times (if multiple operands are
- // first order recurrences) by sinking once - after the latest 'previous'
+ // fixed order recurrences) by sinking once - after the latest 'previous'
// instruction.
auto It = SinkAfter.find(SinkCandidate);
if (It != SinkAfter.end()) {
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 0b80750fa5c52..06cc1bfb628ff 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -102,7 +102,7 @@ cl::opt<TailFoldingKind, true, cl::parser<std::string>> SVETailFolding(
"\nsimple Use tail-folding for simple loops (not reductions or "
"recurrences)"
"\nreductions Use tail-folding for loops containing reductions"
- "\nrecurrences Use tail-folding for loops containing first order "
+ "\nrecurrences Use tail-folding for loops containing fixed order "
"recurrences"),
cl::location(TailFoldingKindLoc));
@@ -3044,7 +3044,7 @@ bool AArch64TTIImpl::preferPredicateOverEpilogue(
TailFoldingKind Required; // Defaults to 0.
if (LVL->getReductionVars().size())
Required.add(TailFoldingKind::TFReductions);
- if (LVL->getFirstOrderRecurrences().size())
+ if (LVL->getFixedOrderRecurrences().size())
Required.add(TailFoldingKind::TFRecurrences);
if (!Required)
Required.add(TailFoldingKind::TFSimple);
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index f39fab1dbafda..e271c1af87ce4 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -677,7 +677,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// Non-header phi nodes that have outside uses can be vectorized. Add
// them to the list of allowed exits.
// Unsafe cyclic dependencies with header phis are identified during
- // legalization for reduction, induction and first order
+ // legalization for reduction, induction and fixed order
// recurrences.
AllowedExit.insert(&I);
continue;
@@ -710,7 +710,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// 3. Non-Phis with outside uses when SCEV predicates cannot be used
// outside the loop - see call to hasOutsideLoopUser in the non-phi
// handling below
- // 4. FirstOrderRecurrence phis that can possibly be handled by
+ // 4. FixedOrderRecurrence phis that can possibly be handled by
// extraction.
// By recording these, we can then reason about ways to vectorize each
// of these NotAllowedExit.
@@ -721,10 +721,10 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
continue;
}
- if (RecurrenceDescriptor::isFirstOrderRecurrence(Phi, TheLoop,
+ if (RecurrenceDescriptor::isFixedOrderRecurrence(Phi, TheLoop,
SinkAfter, DT)) {
AllowedExit.insert(Phi);
- FirstOrderRecurrences.insert(Phi);
+ FixedOrderRecurrences.insert(Phi);
continue;
}
@@ -894,12 +894,12 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
}
}
- // For first order recurrences, we use the previous value (incoming value from
+ // For fixed order recurrences, we use the previous value (incoming value from
// the latch) to check if it dominates all users of the recurrence. Bail out
// if we have to sink such an instruction for another recurrence, as the
// dominance requirement may not hold after sinking.
BasicBlock *LoopLatch = TheLoop->getLoopLatch();
- if (any_of(FirstOrderRecurrences, [LoopLatch, this](const PHINode *Phi) {
+ if (any_of(FixedOrderRecurrences, [LoopLatch, this](const PHINode *Phi) {
Instruction *V =
cast<Instruction>(Phi->getIncomingValueForBlock(LoopLatch));
return SinkAfter.find(V) != SinkAfter.end();
@@ -1080,9 +1080,9 @@ bool LoopVectorizationLegality::isInductionVariable(const Value *V) const {
return isInductionPhi(V) || isCastedInductionVariable(V);
}
-bool LoopVectorizationLegality::isFirstOrderRecurrence(
+bool LoopVectorizationLegality::isFixedOrderRecurrence(
const PHINode *Phi) const {
- return FirstOrderRecurrences.count(Phi);
+ return FixedOrderRecurrences.count(Phi);
}
bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) const {
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 4c0867971aa04..e09e66b8ea8ab 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -552,7 +552,7 @@ class InnerLoopVectorizer {
/// Create the exit value of first order recurrences in the middle block and
/// update their users.
- void fixFirstOrderRecurrence(VPFirstOrderRecurrencePHIRecipe *PhiR,
+ void fixFixedOrderRecurrence(VPFirstOrderRecurrencePHIRecipe *PhiR,
VPTransformState &State);
/// Create code for the loop exit value of the reduction.
@@ -3705,11 +3705,11 @@ void InnerLoopVectorizer::fixCrossIterationPHIs(VPTransformState &State) {
if (auto *ReductionPhi = dyn_cast<VPReductionPHIRecipe>(&R))
fixReduction(ReductionPhi, State);
else if (auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&R))
- fixFirstOrderRecurrence(FOR, State);
+ fixFixedOrderRecurrence(FOR, State);
}
}
-void InnerLoopVectorizer::fixFirstOrderRecurrence(
+void InnerLoopVectorizer::fixFixedOrderRecurrence(
VPFirstOrderRecurrencePHIRecipe *PhiR, VPTransformState &State) {
// This is the second phase of vectorizing first-order recurrences. An
// overview of the transformation is described below. Suppose we have the
@@ -4002,7 +4002,7 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
// We know that the loop is in LCSSA form. We need to update the PHI nodes
// in the exit blocks. See comment on analogous loop in
- // fixFirstOrderRecurrence for a more complete explaination of the logic.
+ // fixFixedOrderRecurrence for a more complete explaination of the logic.
if (!Cost->requiresScalarEpilogue(VF))
for (PHINode &LCSSAPhi : LoopExitBlock->phis())
if (llvm::is_contained(LCSSAPhi.incoming_values(), LoopExitInst)) {
@@ -4738,7 +4738,7 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
// First order recurrence Phi's should typically be considered
// non-uniform.
auto *OP = dyn_cast<PHINode>(OV);
- if (OP && Legal->isFirstOrderRecurrence(OP))
+ if (OP && Legal->isFixedOrderRecurrence(OP))
continue;
// If all the users of the operand are uniform, then add the
// operand into the uniform worklist.
@@ -5427,7 +5427,7 @@ bool LoopVectorizationCostModel::isCandidateForEpilogueVectorization(
// Cross iteration phis such as reductions need special handling and are
// currently unsupported.
if (any_of(L.getHeader()->phis(),
- [&](PHINode &Phi) { return Legal->isFirstOrderRecurrence(&Phi); }))
+ [&](PHINode &Phi) { return Legal->isFixedOrderRecurrence(&Phi); }))
return false;
// Phis with uses outside of the loop require special handling and are
@@ -7032,7 +7032,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF,
// First-order recurrences are replaced by vector shuffles inside the loop.
// NOTE: Don't use ToVectorTy as SK_ExtractSubvector expects a vector type.
- if (VF.isVector() && Legal->isFirstOrderRecurrence(Phi))
+ if (VF.isVector() && Legal->isFixedOrderRecurrence(Phi))
return TTI.getShuffleCost(
TargetTransformInfo::SK_ExtractSubvector, cast<VectorType>(VectorTy),
None, VF.getKnownMinValue() - 1, FixedVectorType::get(RetTy, 1));
@@ -8509,13 +8509,18 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
if (auto Phi = dyn_cast<PHINode>(Instr)) {
if (Phi->getParent() != OrigLoop->getHeader())
return tryToBlend(Phi, Operands, Plan);
+
+ // Always record recipes for header phis. Later first-order recurrence phis
+ // can have earlier phis as incoming values.
+ recordRecipeOf(Phi);
+
if ((Recipe = tryToOptimizeInductionPHI(Phi, Operands, *Plan, Range)))
return toVPRecipeResult(Recipe);
VPHeaderPHIRecipe *PhiRecipe = nullptr;
assert((Legal->isReductionVariable(Phi) ||
- Legal->isFirstOrderRecurrence(Phi)) &&
- "can only widen reductions and first-order recurrences here");
+ Legal->isFixedOrderRecurrence(Phi)) &&
+ "can only widen reductions and fixed-order recurrences here");
VPValue *StartV = Operands[0];
if (Legal->isReductionVariable(Phi)) {
const RecurrenceDescriptor &RdxDesc =
@@ -8526,13 +8531,21 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
CM.isInLoopReduction(Phi),
CM.useOrderedReductions(RdxDesc));
} else {
+ // TODO: Currently fixed-order recurrences are modeled as chains of
+ // first-order recurrences. If there are no users of the intermediate
+ // recurrences in the chain, the fixed order recurrence should be modeled
+ // directly, enabling more efficient codegen.
PhiRecipe = new VPFirstOrderRecurrencePHIRecipe(Phi, *StartV);
}
// Record the incoming value from the backedge, so we can add the incoming
// value from the backedge after all recipes have been created.
- recordRecipeOf(cast<Instruction>(
- Phi->getIncomingValueForBlock(OrigLoop->getLoopLatch())));
+ auto *Inc = cast<Instruction>(
+ Phi->getIncomingValueForBlock(OrigLoop->getLoopLatch()));
+ auto RecipeIter = Ingredient2Recipe.find(Inc);
+ if (RecipeIter == Ingredient2Recipe.end())
+ recordRecipeOf(Inc);
+
PhisToFix.push_back(PhiRecipe);
return toVPRecipeResult(PhiRecipe);
}
@@ -8597,7 +8610,7 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
assert(
SinkTarget != FirstInst &&
"Must find a live instruction (at least the one feeding the "
- "first-order recurrence PHI) before reaching beginning of the block");
+ "fixed-order recurrence PHI) before reaching beginning of the block");
SinkTarget = SinkTarget->getPrevNode();
assert(SinkTarget != P.first &&
"sink source equals target, no sinking required");
@@ -8982,7 +8995,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
RecipeBuilder, Range.Start);
// Introduce a recipe to combine the incoming and previous values of a
- // first-order recurrence.
+ // fixed-order recurrence.
for (VPRecipeBase &R :
Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
auto *RecurPhi = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&R);
@@ -8990,6 +9003,11 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
continue;
VPRecipeBase *PrevRecipe = RecurPhi->getBackedgeRecipe();
+ // Fixed-order recurrences do not contain cycles, so this loop is guaranteed
+ // to terminate.
+ while (auto *PrevPhi =
+ dyn_cast<VPFirstOrderRecurrencePHIRecipe>(PrevRecipe))
+ PrevRecipe = PrevPhi->getBackedgeRecipe();
VPBasicBlock *InsertBlock = PrevRecipe->getParent();
auto *Region = GetReplicateRegion(PrevRecipe);
if (Region)
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll
new file mode 100644
index 0000000000000..e68c3cdfd838f
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll
@@ -0,0 +1,108 @@
+; REQUIRES: asserts
+
+; RUN: opt -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -debug-only=loop-vectorize -disable-output -S %s 2>&1 | FileCheck %s
+
+define void @test_chained_first_order_recurrences_1(i16* %ptr) {
+; CHECK-LABEL: 'test_chained_first_order_recurrences_1'
+; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
+; CHECK-NEXT: Live-in vp<%1> = vector-trip-count
+; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
+; CHECK-NEXT: <x1> vector loop: {
+; CHECK-NEXT: vector.body:
+; CHECK-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION
+; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for.1> = phi ir<22>, ir<%for.1.next>
+; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for.2> = phi ir<33>, vp<%8>
+; CHECK-NEXT: vp<%5> = SCALAR-STEPS vp<%2>, ir<0>, ir<1>
+; CHECK-NEXT: CLONE ir<%gep.ptr> = getelementptr ir<%ptr>, vp<%5>
+; CHECK-NEXT: WIDEN ir<%for.1.next> = load ir<%gep.ptr>
+; CHECK-NEXT: EMIT vp<%8> = first-order splice ir<%for.1> ir<%for.1.next>
+; CHECK-NEXT: EMIT vp<%9> = first-order splice ir<%for.2> vp<%8>
+; CHECK-NEXT: WIDEN ir<%add> = add vp<%8>, vp<%9>
+; CHECK-NEXT: WIDEN store ir<%gep.ptr>, ir<%add>
+; CHECK-NEXT: EMIT vp<%11> = VF * UF +(nuw) vp<%2>
+; CHECK-NEXT: EMIT branch-on-count vp<%11> vp<%1>
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+; CHECK-NEXT: Successor(s): middle.block
+; CHECK-EMPTY:
+; CHECK-NEXT: middle.block:
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+;
+entry:
+ br label %loop
+
+loop:
+ %for.1 = phi i16 [ 22, %entry ], [ %for.1.next, %loop ]
+ %for.2 = phi i16 [ 33, %entry ], [ %for.1, %loop ]
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %iv.next = add nuw nsw i64 %iv, 1
+ %gep.ptr = getelementptr inbounds i16, i16* %ptr, i64 %iv
+ %for.1.next = load i16, i16* %gep.ptr, align 2
+ %add = add i16 %for.1, %for.2
+ store i16 %add, i16* %gep.ptr
+ %exitcond.not = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond.not, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+define void @test_chained_first_order_recurrences_3(i16* %ptr) {
+; CHECK-LABEL: 'test_chained_first_order_recurrences_3'
+; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
+; CHECK-NEXT: Live-in vp<%1> = vector-trip-count
+; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
+; CHECK-NEXT: <x1> vector loop: {
+; CHECK-NEXT: vector.body:
+; CHECK-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION
+; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for.1> = phi ir<22>, ir<%for.1.next>
+; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for.2> = phi ir<33>, vp<%9>
+; CHECK-NEXT: FIRST-ORDER-RECURRENCE-PHI ir<%for.3> = phi ir<33>, vp<%10>
+; CHECK-NEXT: vp<%6> = SCALAR-STEPS vp<%2>, ir<0>, ir<1>
+; CHECK-NEXT: CLONE ir<%gep.ptr> = getelementptr ir<%ptr>, vp<%6>
+; CHECK-NEXT: WIDEN ir<%for.1.next> = load ir<%gep.ptr>
+; CHECK-NEXT: EMIT vp<%9> = first-order splice ir<%for.1> ir<%for.1.next>
+; CHECK-NEXT: EMIT vp<%10> = first-order splice ir<%for.2> vp<%9>
+; CHECK-NEXT: EMIT vp<%11> = first-order splice ir<%for.3> vp<%10>
+; CHECK-NEXT: WIDEN ir<%add.1> = add vp<%9>, vp<%10>
+; CHECK-NEXT: WIDEN ir<%add.2> = add ir<%add.1>, vp<%11>
+; CHECK-NEXT: WIDEN store ir<%gep.ptr>, ir<%add.2>
+; CHECK-NEXT: EMIT vp<%14> = VF * UF +(nuw) vp<%2>
+; CHECK-NEXT: EMIT branch-on-count vp<%14> vp<%1>
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+; CHECK-NEXT: Successor(s): middle.block
+; CHECK-EMPTY:
+; CHECK-NEXT: middle.block:
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+
+; CHECK-NOT: vector.body:
+;
+entry:
+ br label %loop
+
+loop:
+ %for.1 = phi i16 [ 22, %entry ], [ %for.1.next, %loop ]
+ %for.2 = phi i16 [ 33, %entry ], [ %for.1, %loop ]
+ %for.3 = phi i16 [ 33, %entry ], [ %for.2, %loop ]
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %iv.next = add nuw nsw i64 %iv, 1
+ %gep.ptr = getelementptr inbounds i16, i16* %ptr, i64 %iv
+ %for.1.next = load i16, i16* %gep.ptr, align 2
+ %add.1 = add i16 %for.1, %for.2
+ %add.2 = add i16 %add.1, %for.3
+ store i16 %add.2, i16* %gep.ptr
+ %exitcond.not = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond.not, label %exit, label %loop
+
+exit:
+ ret void
+}
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll
index fb11fbf32a8ca..6e054809fbb95 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll
@@ -2,7 +2,27 @@
define void @test_chained_first_order_recurrences_1(ptr %ptr) {
; CHECK-LABEL: @test_chained_first_order_recurrences_1
-; CHECK-NOT: vector.body:
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
+; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ <i16 poison, i16 poison, i16 poison, i16 22>, %vector.ph ], [ [[WIDE_LOAD:%.*]], %vector.body ]
+; CHECK-NEXT: [[VECTOR_RECUR1:%.*]] = phi <4 x i16> [ <i16 poison, i16 poison, i16 poison, i16 33>, %vector.ph ], [ [[TMP4:%.*]], %vector.body ]
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[PTR:%.*]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x i16>, ptr [[TMP2]], align 2
+; CHECK-NEXT: [[TMP4]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR1]], <4 x i16> [[TMP4]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i16> [[TMP4]], [[TMP5]]
+; CHECK-NEXT: store <4 x i16> [[TMP6]], ptr [[TMP2]], align 2
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
+; CHECK-NEXT: br i1 [[TMP8]], label %middle.block, label %vector.body
+; CHECK: middle.block:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1000, 1000
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x i16> [[TMP4]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI3:%.*]] = extractelement <4 x i16> [[TMP4]], i32 2
;
entry:
br label %loop
@@ -25,7 +45,27 @@ exit:
define void @test_chained_first_order_recurrences_2(ptr %ptr) {
; CHECK-LABEL: @test_chained_first_order_recurrences_2
-; CHECK-NOT: vector.body:
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
+; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ <i16 poison, i16 poison, i16 poison, i16 33>, %vector.ph ], [ [[TMP4:%.*]], %vector.body ]
+; CHECK-NEXT: [[VECTOR_RECUR1:%.*]] = phi <4 x i16> [ <i16 poison, i16 poison, i16 poison, i16 22>, %vector.ph ], [ [[WIDE_LOAD:%.*]], %vector.body ]
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[PTR:%.*]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x i16>, ptr [[TMP2]], align 2
+; CHECK-NEXT: [[TMP4]] = shufflevector <4 x i16> [[VECTOR_RECUR1]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP4]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i16> [[TMP4]], [[TMP5]]
+; CHECK-NEXT: store <4 x i16> [[TMP6]], ptr [[TMP2]], align 2
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
+; CHECK-NEXT: br i1 [[TMP8]], label %middle.block, label %vector.body, !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1000, 1000
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP4]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[TMP4]], i32 2
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI3:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2
;
entry:
br label %loop
@@ -48,7 +88,32 @@ exit:
define void @test_chained_first_order_recurrences_3(ptr %ptr) {
; CHECK-LABEL: @test_chained_first_order_recurrences_3
-; CHECK-NOT: vector.body:
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
+; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ <i16 poison, i16 poison, i16 poison, i16 22>, %vector.ph ], [ [[WIDE_LOAD:%.*]], %vector.body ]
+; CHECK-NEXT: [[VECTOR_RECUR1:%.*]] = phi <4 x i16> [ <i16 poison, i16 poison, i16 poison, i16 33>, %vector.ph ], [ [[TMP4:%.*]], %vector.body ]
+; CHECK-NEXT: [[VECTOR_RECUR2:%.*]] = phi <4 x i16> [ <i16 poison, i16 poison, i16 poison, i16 33>, %vector.ph ], [ [[TMP5:%.*]], %vector.body ]
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[PTR:%.*]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x i16>, ptr [[TMP2]], align 2
+; CHECK-NEXT: [[TMP4]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; CHECK-NEXT: [[TMP5]] = shufflevector <4 x i16> [[VECTOR_RECUR1]], <4 x i16> [[TMP4]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR2]], <4 x i16> [[TMP5]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i16> [[TMP4]], [[TMP5]]
+; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i16> [[TMP7]], [[TMP6]]
+; CHECK-NEXT: store <4 x i16> [[TMP8]], ptr [[TMP2]], align 2
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
+; CHECK-NEXT: br i1 [[TMP10]], label %middle.block, label %vector.body, !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1000, 1000
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT3:%.*]] = extractelement <4 x i16> [[TMP4]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI4:%.*]] = extractelement <4 x i16> [[TMP4]], i32 2
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT7:%.*]] = extractelement <4 x i16> [[TMP5]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI8:%.*]] = extractelement <4 x i16> [[TMP5]], i32 2
;
entry:
br label %loop
@@ -134,7 +199,32 @@ exit:
define void @test_chained_first_order_recurrences_3_reordered_1(ptr %ptr) {
; CHECK-LABEL: @test_chained_first_order_recurrences_3_reordered_1
-; CHECK-NOT: vector.body:
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
+; CHECK-NEXT: [[VECTOR_RECUR2:%.*]] = phi <4 x i16> [ <i16 poison, i16 poison, i16 poison, i16 33>, %vector.ph ], [ [[TMP5:%.*]], %vector.body ]
+; CHECK-NEXT: [[VECTOR_RECUR1:%.*]] = phi <4 x i16> [ <i16 poison, i16 poison, i16 poison, i16 33>, %vector.ph ], [ [[TMP4:%.*]], %vector.body ]
+; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ <i16 poison, i16 poison, i16 poison, i16 22>, %vector.ph ], [ [[WIDE_LOAD:%.*]], %vector.body ]
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[PTR:%.*]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x i16>, ptr [[TMP2]], align 2
+; CHECK-NEXT: [[TMP4]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; CHECK-NEXT: [[TMP5]] = shufflevector <4 x i16> [[VECTOR_RECUR1]], <4 x i16> [[TMP4]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR2]], <4 x i16> [[TMP5]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i16> [[TMP4]], [[TMP5]]
+; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i16> [[TMP7]], [[TMP6]]
+; CHECK-NEXT: store <4 x i16> [[TMP8]], ptr [[TMP2]], align 2
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
+; CHECK-NEXT: br i1 [[TMP10]], label %middle.block, label %vector.body, !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1000, 1000
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT7:%.*]] = extractelement <4 x i16> [[TMP5]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI8:%.*]] = extractelement <4 x i16> [[TMP5]], i32 2
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT3:%.*]] = extractelement <4 x i16> [[TMP4]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI4:%.*]] = extractelement <4 x i16> [[TMP4]], i32 2
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2
;
entry:
br label %loop
@@ -159,7 +249,32 @@ exit:
define void @test_chained_first_order_recurrences_3_reordered_2(ptr %ptr) {
; CHECK-LABEL: @test_chained_first_order_recurrences_3_reordered_2
-; CHECK-NOT: vector.body:
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
+; CHECK-NEXT: [[VECTOR_RECUR1:%.*]] = phi <4 x i16> [ <i16 poison, i16 poison, i16 poison, i16 33>, %vector.ph ], [ [[TMP4:%.*]], %vector.body ]
+; CHECK-NEXT: [[VECTOR_RECUR2:%.*]] = phi <4 x i16> [ <i16 poison, i16 poison, i16 poison, i16 33>, %vector.ph ], [ [[TMP5:%.*]], %vector.body ]
+; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ <i16 poison, i16 poison, i16 poison, i16 22>, %vector.ph ], [ [[WIDE_LOAD:%.*]], %vector.body ]
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[PTR:%.*]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x i16>, ptr [[TMP2]], align 2
+; CHECK-NEXT: [[TMP4]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; CHECK-NEXT: [[TMP5]] = shufflevector <4 x i16> [[VECTOR_RECUR1]], <4 x i16> [[TMP4]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR2]], <4 x i16> [[TMP5]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i16> [[TMP4]], [[TMP5]]
+; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i16> [[TMP7]], [[TMP6]]
+; CHECK-NEXT: store <4 x i16> [[TMP8]], ptr [[TMP2]], align 2
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
+; CHECK-NEXT: br i1 [[TMP10]], label %middle.block, label %vector.body, !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1000, 1000
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT3:%.*]] = extractelement <4 x i16> [[TMP4]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI4:%.*]] = extractelement <4 x i16> [[TMP4]], i32 2
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT7:%.*]] = extractelement <4 x i16> [[TMP5]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI8:%.*]] = extractelement <4 x i16> [[TMP5]], i32 2
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2
;
entry:
br label %loop
@@ -184,7 +299,32 @@ exit:
define void @test_chained_first_order_recurrences_3_for2_no_other_uses(ptr %ptr) {
; CHECK-LABEL: @test_chained_first_order_recurrences_3_for2_no_other_uses
-; CHECK-NOT: vector.body:
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
+; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ <i16 poison, i16 poison, i16 poison, i16 22>, %vector.ph ], [ [[WIDE_LOAD:%.*]], %vector.body ]
+; CHECK-NEXT: [[VECTOR_RECUR1:%.*]] = phi <4 x i16> [ <i16 poison, i16 poison, i16 poison, i16 33>, %vector.ph ], [ [[TMP4:%.*]], %vector.body ]
+; CHECK-NEXT: [[VECTOR_RECUR2:%.*]] = phi <4 x i16> [ <i16 poison, i16 poison, i16 poison, i16 33>, %vector.ph ], [ [[TMP5:%.*]], %vector.body ]
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[PTR:%.*]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x i16>, ptr [[TMP2]], align 2
+; CHECK-NEXT: [[TMP4]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; CHECK-NEXT: [[TMP5]] = shufflevector <4 x i16> [[VECTOR_RECUR1]], <4 x i16> [[TMP4]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR2]], <4 x i16> [[TMP5]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i16> [[TMP4]], <i16 10, i16 10, i16 10, i16 10>
+; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i16> [[TMP7]], [[TMP6]]
+; CHECK-NEXT: store <4 x i16> [[TMP8]], ptr [[TMP2]], align 2
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
+; CHECK-NEXT: br i1 [[TMP10]], label %middle.block, label %vector.body, !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1000, 1000
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT3:%.*]] = extractelement <4 x i16> [[TMP4]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI4:%.*]] = extractelement <4 x i16> [[TMP4]], i32 2
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT7:%.*]] = extractelement <4 x i16> [[TMP5]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI8:%.*]] = extractelement <4 x i16> [[TMP5]], i32 2
;
entry:
br label %loop
@@ -209,7 +349,31 @@ exit:
define void @test_chained_first_order_recurrences_3_for1_for2_no_other_uses(ptr %ptr) {
; CHECK-LABEL: @test_chained_first_order_recurrences_3_for1_for2_no_other_uses
-; CHECK-NOT: vector.body:
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
+; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ <i16 poison, i16 poison, i16 poison, i16 22>, %vector.ph ], [ [[WIDE_LOAD:%.*]], %vector.body ]
+; CHECK-NEXT: [[VECTOR_RECUR1:%.*]] = phi <4 x i16> [ <i16 poison, i16 poison, i16 poison, i16 33>, %vector.ph ], [ [[TMP4:%.*]], %vector.body ]
+; CHECK-NEXT: [[VECTOR_RECUR2:%.*]] = phi <4 x i16> [ <i16 poison, i16 poison, i16 poison, i16 33>, %vector.ph ], [ [[TMP5:%.*]], %vector.body ]
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[PTR:%.*]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x i16>, ptr [[TMP2]], align 2
+; CHECK-NEXT: [[TMP4]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; CHECK-NEXT: [[TMP5]] = shufflevector <4 x i16> [[VECTOR_RECUR1]], <4 x i16> [[TMP4]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR2]], <4 x i16> [[TMP5]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i16> [[TMP6]], <i16 10, i16 10, i16 10, i16 10>
+; CHECK-NEXT: store <4 x i16> [[TMP8]], ptr [[TMP2]], align 2
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
+; CHECK-NEXT: br i1 [[TMP10]], label %middle.block, label %vector.body, !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1000, 1000
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT3:%.*]] = extractelement <4 x i16> [[TMP4]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI4:%.*]] = extractelement <4 x i16> [[TMP4]], i32 2
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT7:%.*]] = extractelement <4 x i16> [[TMP5]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI8:%.*]] = extractelement <4 x i16> [[TMP5]], i32 2
;
entry:
br label %loop
@@ -233,7 +397,29 @@ exit:
define void @test_chained_first_order_recurrence_sink_users_1(double* %ptr) {
; CHECK-LABEL: @test_chained_first_order_recurrence_sink_users_1
-; CHECK-NOT: vector.body:
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
+; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x double> [ <double poison, double poison, double poison, double 1.000000e+01>, %vector.ph ], [ [[WIDE_LOAD:%.*]], %vector.body ]
+; CHECK-NEXT: [[VECTOR_RECUR1:%.*]] = phi <4 x double> [ <double poison, double poison, double poison, double 2.000000e+01>, %vector.ph ], [ [[TMP4:%.*]], %vector.body ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, ptr [[PTR:%.*]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD]] = load <4 x double>, ptr [[TMP2]], align 8
+; CHECK-NEXT: [[TMP4]] = shufflevector <4 x double> [[VECTOR_RECUR]], <4 x double> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR1]], <4 x double> [[TMP4]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; CHECK-NEXT: [[TMP6:%.*]] = fadd <4 x double> <double 1.000000e+01, double 1.000000e+01, double 1.000000e+01, double 1.000000e+01>, [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = fadd <4 x double> [[TMP6]], [[TMP4]]
+; CHECK-NEXT: store <4 x double> [[TMP7]], ptr [[TMP2]], align 8
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 996
+; CHECK-NEXT: br i1 [[TMP9]], label %middle.block, label %vector.body, !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 999, 996
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x double> [[WIDE_LOAD]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x double> [[WIDE_LOAD]], i32 2
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x double> [[TMP4]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI3:%.*]] = extractelement <4 x double> [[TMP4]], i32 2
;
entry:
br label %loop
@@ -282,8 +468,27 @@ exit:
define void @test_first_order_recurrences_and_induction(ptr %ptr) {
; CHECK-LABEL: @test_first_order_recurrences_and_induction(
-; CHECK-NOT: vector.body:
-;
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
+; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ <i64 poison, i64 poison, i64 poison, i64 22>, %vector.ph ], [ [[VEC_IND:%.*]], %vector.body ]
+; CHECK-NEXT: [[VEC_IND]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %vector.ph ], [ [[VEC_IND_NEXT:%.*]], %vector.body ]
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> [[VEC_IND]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[PTR:%.*]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP3]], align 2
+; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i64> [[TMP1]], <i64 10, i64 10, i64 10, i64 10>
+; CHECK-NEXT: store <4 x i64> [[TMP4]], ptr [[TMP3]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
+; CHECK-NEXT: br i1 [[TMP5]], label %middle.block, label %vector.body
+; CHECK: middle.block:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1000, 1000
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 2
+; CHECK-NEXT: br i1 [[CMP_N]]
+
entry:
br label %loop
@@ -306,7 +511,26 @@ exit:
; flipped.
define void @test_first_order_recurrences_and_induction2(ptr %ptr) {
; CHECK-LABEL: @test_first_order_recurrences_and_induction2(
-; CHECK-NOT: vector.body:
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %vector.ph ], [ [[VEC_IND_NEXT:%.*]], %vector.body ]
+; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ <i64 poison, i64 poison, i64 poison, i64 22>, %vector.ph ], [ [[VEC_IND]], %vector.body ]
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> [[VEC_IND]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[PTR:%.*]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP3]], align 2
+; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i64> [[TMP1]], <i64 10, i64 10, i64 10, i64 10>
+; CHECK-NEXT: store <4 x i64> [[TMP4]], ptr [[TMP3]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
+; CHECK-NEXT: br i1 [[TMP5]], label %middle.block, label %vector.body
+; CHECK: middle.block:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1000, 1000
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[VEC_IND]], i32 2
+; CHECK-NEXT: br i1 [[CMP_N]]
;
entry:
br label %loop
@@ -328,7 +552,28 @@ exit:
define void @test_first_order_recurrences_and_pointer_induction1(ptr %ptr) {
; CHECK-LABEL: @test_first_order_recurrences_and_pointer_induction1(
-; CHECK-NOT: vector.body
+; CHECK: vector.ph:
+; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 4000
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[PTR]], %vector.ph ], [ [[PTR_IND:%.*]], %vector.body ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
+; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x ptr> [ <ptr poison, ptr poison, ptr poison, ptr null>, %vector.ph ], [ [[TMP0:%.*]], %vector.body ]
+; CHECK-NEXT: [[TMP0]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> <i64 0, i64 4, i64 8, i64 12>
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x ptr> [[VECTOR_RECUR]], <4 x ptr> [[TMP0]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[PTR]], i64 [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i32 0
+; CHECK-NEXT: store <4 x ptr> [[TMP0]], ptr [[TMP4]], align 8
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 16
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
+; CHECK-NEXT: br i1 [[TMP5]], label %middle.block, label %vector.body
+; CHECK: middle.block:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1000, 1000
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x ptr> [[TMP0]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x ptr> [[TMP0]], i32 2
+; CHECK-NEXT: br i1 [[CMP_N]],
;
entry:
br label %loop
@@ -352,7 +597,28 @@ exit:
; of phis flipped.
define void @test_first_order_recurrences_and_pointer_induction2(ptr %ptr) {
; CHECK-LABEL: @test_first_order_recurrences_and_pointer_induction2(
-; CHECK-NOT: vector.body
+; CHECK: vector.ph:
+; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 4000
+; CHECK-NEXT: br label %vector.body
+; CHECK: vector.body:
+; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[PTR]], %vector.ph ], [ [[PTR_IND:%.*]], %vector.body ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
+; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x ptr> [ <ptr poison, ptr poison, ptr poison, ptr null>, %vector.ph ], [ [[TMP0:%.*]], %vector.body ]
+; CHECK-NEXT: [[TMP0]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> <i64 0, i64 4, i64 8, i64 12>
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x ptr> [[VECTOR_RECUR]], <4 x ptr> [[TMP0]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[PTR]], i64 [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i32 0
+; CHECK-NEXT: store <4 x ptr> [[TMP0]], ptr [[TMP4]], align 8
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 16
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
+; CHECK-NEXT: br i1 [[TMP5]], label %middle.block, label %vector.body
+; CHECK: middle.block:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1000, 1000
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x ptr> [[TMP0]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x ptr> [[TMP0]], i32 2
+; CHECK-NEXT: br i1 [[CMP_N]],
;
entry:
br label %loop
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
index 7c8eb4c17c3cf..4eb08a105b739 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
@@ -933,18 +933,48 @@ define i32 @PR27246() {
; UNROLL-NO-IC: for.cond1.preheader:
; UNROLL-NO-IC-NEXT: [[I_016:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND_CLEANUP3:%.*]] ]
; UNROLL-NO-IC-NEXT: [[E_015:%.*]] = phi i32 [ poison, [[ENTRY]] ], [ [[E_1_LCSSA:%.*]], [[FOR_COND_CLEANUP3]] ]
+; UNROLL-NO-IC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[I_016]], 8
+; UNROLL-NO-IC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; UNROLL-NO-IC: vector.ph:
+; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[I_016]], 8
+; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i32 [[I_016]], [[N_MOD_VF]]
+; UNROLL-NO-IC-NEXT: [[IND_END:%.*]] = sub i32 [[I_016]], [[N_VEC]]
+; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[E_015]], i32 3
+; UNROLL-NO-IC-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[I_016]], i32 0
+; UNROLL-NO-IC-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
+; UNROLL-NO-IC-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[DOTSPLAT]], <i32 0, i32 -1, i32 -2, i32 -3>
+; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]]
+; UNROLL-NO-IC: vector.body:
+; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[STEP_ADD:%.*]], [[VECTOR_BODY]] ]
+; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; UNROLL-NO-IC-NEXT: [[STEP_ADD]] = add <4 x i32> [[VEC_IND]], <i32 -4, i32 -4, i32 -4, i32 -4>
+; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[VEC_IND]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[VEC_IND]], <4 x i32> [[STEP_ADD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
+; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], <i32 -4, i32 -4, i32 -4, i32 -4>
+; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; UNROLL-NO-IC-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; UNROLL-NO-IC: middle.block:
+; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[I_016]], [[N_VEC]]
+; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[STEP_ADD]], i32 3
+; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[STEP_ADD]], i32 2
+; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP3]], label [[SCALAR_PH]]
+; UNROLL-NO-IC: scalar.ph:
+; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[E_015]], [[FOR_COND1_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
+; UNROLL-NO-IC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[I_016]], [[FOR_COND1_PREHEADER]] ]
; UNROLL-NO-IC-NEXT: br label [[FOR_COND1:%.*]]
; UNROLL-NO-IC: for.cond.cleanup:
; UNROLL-NO-IC-NEXT: [[E_1_LCSSA_LCSSA:%.*]] = phi i32 [ [[E_1_LCSSA]], [[FOR_COND_CLEANUP3]] ]
; UNROLL-NO-IC-NEXT: ret i32 [[E_1_LCSSA_LCSSA]]
; UNROLL-NO-IC: for.cond1:
-; UNROLL-NO-IC-NEXT: [[E_1:%.*]] = phi i32 [ [[K_0:%.*]], [[FOR_COND1]] ], [ [[E_015]], [[FOR_COND1_PREHEADER]] ]
-; UNROLL-NO-IC-NEXT: [[K_0]] = phi i32 [ [[DEC:%.*]], [[FOR_COND1]] ], [ [[I_016]], [[FOR_COND1_PREHEADER]] ]
+; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[K_0:%.*]], [[FOR_COND1]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
+; UNROLL-NO-IC-NEXT: [[K_0]] = phi i32 [ [[DEC:%.*]], [[FOR_COND1]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; UNROLL-NO-IC-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[K_0]], 1
; UNROLL-NO-IC-NEXT: [[DEC]] = add nsw i32 [[K_0]], -1
-; UNROLL-NO-IC-NEXT: br i1 [[CMP2]], label [[FOR_COND1]], label [[FOR_COND_CLEANUP3]]
+; UNROLL-NO-IC-NEXT: br i1 [[CMP2]], label [[FOR_COND1]], label [[FOR_COND_CLEANUP3]], !llvm.loop [[LOOP9:![0-9]+]]
; UNROLL-NO-IC: for.cond.cleanup3:
-; UNROLL-NO-IC-NEXT: [[E_1_LCSSA]] = phi i32 [ [[E_1]], [[FOR_COND1]] ]
+; UNROLL-NO-IC-NEXT: [[E_1_LCSSA]] = phi i32 [ [[SCALAR_RECUR]], [[FOR_COND1]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-IC-NEXT: [[INC]] = add nuw nsw i32 [[I_016]], 1
; UNROLL-NO-IC-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 49
; UNROLL-NO-IC-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER]]
@@ -955,18 +985,40 @@ define i32 @PR27246() {
; UNROLL-NO-VF: for.cond1.preheader:
; UNROLL-NO-VF-NEXT: [[I_016:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND_CLEANUP3:%.*]] ]
; UNROLL-NO-VF-NEXT: [[E_015:%.*]] = phi i32 [ poison, [[ENTRY]] ], [ [[E_1_LCSSA:%.*]], [[FOR_COND_CLEANUP3]] ]
+; UNROLL-NO-VF-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[I_016]], 2
+; UNROLL-NO-VF-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; UNROLL-NO-VF: vector.ph:
+; UNROLL-NO-VF-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[I_016]], 2
+; UNROLL-NO-VF-NEXT: [[N_VEC:%.*]] = sub i32 [[I_016]], [[N_MOD_VF]]
+; UNROLL-NO-VF-NEXT: [[IND_END:%.*]] = sub i32 [[I_016]], [[N_VEC]]
+; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]]
+; UNROLL-NO-VF: vector.body:
+; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i32 [ [[E_015]], [[VECTOR_PH]] ], [ [[INDUCTION1:%.*]], [[VECTOR_BODY]] ]
+; UNROLL-NO-VF-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[I_016]], [[INDEX]]
+; UNROLL-NO-VF-NEXT: [[INDUCTION:%.*]] = add i32 [[OFFSET_IDX]], 0
+; UNROLL-NO-VF-NEXT: [[INDUCTION1]] = add i32 [[OFFSET_IDX]], -1
+; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
+; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; UNROLL-NO-VF-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; UNROLL-NO-VF: middle.block:
+; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[I_016]], [[N_VEC]]
+; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP3]], label [[SCALAR_PH]]
+; UNROLL-NO-VF: scalar.ph:
+; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[E_015]], [[FOR_COND1_PREHEADER]] ], [ [[INDUCTION1]], [[MIDDLE_BLOCK]] ]
+; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[I_016]], [[FOR_COND1_PREHEADER]] ]
; UNROLL-NO-VF-NEXT: br label [[FOR_COND1:%.*]]
; UNROLL-NO-VF: for.cond.cleanup:
; UNROLL-NO-VF-NEXT: [[E_1_LCSSA_LCSSA:%.*]] = phi i32 [ [[E_1_LCSSA]], [[FOR_COND_CLEANUP3]] ]
; UNROLL-NO-VF-NEXT: ret i32 [[E_1_LCSSA_LCSSA]]
; UNROLL-NO-VF: for.cond1:
-; UNROLL-NO-VF-NEXT: [[E_1:%.*]] = phi i32 [ [[K_0:%.*]], [[FOR_COND1]] ], [ [[E_015]], [[FOR_COND1_PREHEADER]] ]
-; UNROLL-NO-VF-NEXT: [[K_0]] = phi i32 [ [[DEC:%.*]], [[FOR_COND1]] ], [ [[I_016]], [[FOR_COND1_PREHEADER]] ]
+; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[K_0:%.*]], [[FOR_COND1]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
+; UNROLL-NO-VF-NEXT: [[K_0]] = phi i32 [ [[DEC:%.*]], [[FOR_COND1]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; UNROLL-NO-VF-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[K_0]], 1
; UNROLL-NO-VF-NEXT: [[DEC]] = add nsw i32 [[K_0]], -1
-; UNROLL-NO-VF-NEXT: br i1 [[CMP2]], label [[FOR_COND1]], label [[FOR_COND_CLEANUP3]]
+; UNROLL-NO-VF-NEXT: br i1 [[CMP2]], label [[FOR_COND1]], label [[FOR_COND_CLEANUP3]], !llvm.loop [[LOOP8:![0-9]+]]
; UNROLL-NO-VF: for.cond.cleanup3:
-; UNROLL-NO-VF-NEXT: [[E_1_LCSSA]] = phi i32 [ [[E_1]], [[FOR_COND1]] ]
+; UNROLL-NO-VF-NEXT: [[E_1_LCSSA]] = phi i32 [ [[SCALAR_RECUR]], [[FOR_COND1]] ], [ [[INDUCTION]], [[MIDDLE_BLOCK]] ]
; UNROLL-NO-VF-NEXT: [[INC]] = add nuw nsw i32 [[I_016]], 1
; UNROLL-NO-VF-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 49
; UNROLL-NO-VF-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER]]
@@ -977,18 +1029,46 @@ define i32 @PR27246() {
; SINK-AFTER: for.cond1.preheader:
; SINK-AFTER-NEXT: [[I_016:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND_CLEANUP3:%.*]] ]
; SINK-AFTER-NEXT: [[E_015:%.*]] = phi i32 [ poison, [[ENTRY]] ], [ [[E_1_LCSSA:%.*]], [[FOR_COND_CLEANUP3]] ]
+; SINK-AFTER-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[I_016]], 4
+; SINK-AFTER-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; SINK-AFTER: vector.ph:
+; SINK-AFTER-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[I_016]], 4
+; SINK-AFTER-NEXT: [[N_VEC:%.*]] = sub i32 [[I_016]], [[N_MOD_VF]]
+; SINK-AFTER-NEXT: [[IND_END:%.*]] = sub i32 [[I_016]], [[N_VEC]]
+; SINK-AFTER-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[E_015]], i32 3
+; SINK-AFTER-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[I_016]], i32 0
+; SINK-AFTER-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
+; SINK-AFTER-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[DOTSPLAT]], <i32 0, i32 -1, i32 -2, i32 -3>
+; SINK-AFTER-NEXT: br label [[VECTOR_BODY:%.*]]
+; SINK-AFTER: vector.body:
+; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; SINK-AFTER-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[VEC_IND:%.*]], [[VECTOR_BODY]] ]
+; SINK-AFTER-NEXT: [[VEC_IND]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; SINK-AFTER-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[VEC_IND]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
+; SINK-AFTER-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 -4, i32 -4, i32 -4, i32 -4>
+; SINK-AFTER-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; SINK-AFTER-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; SINK-AFTER: middle.block:
+; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[I_016]], [[N_VEC]]
+; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[VEC_IND]], i32 3
+; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[VEC_IND]], i32 2
+; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP3]], label [[SCALAR_PH]]
+; SINK-AFTER: scalar.ph:
+; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[E_015]], [[FOR_COND1_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
+; SINK-AFTER-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[I_016]], [[FOR_COND1_PREHEADER]] ]
; SINK-AFTER-NEXT: br label [[FOR_COND1:%.*]]
; SINK-AFTER: for.cond.cleanup:
; SINK-AFTER-NEXT: [[E_1_LCSSA_LCSSA:%.*]] = phi i32 [ [[E_1_LCSSA]], [[FOR_COND_CLEANUP3]] ]
; SINK-AFTER-NEXT: ret i32 [[E_1_LCSSA_LCSSA]]
; SINK-AFTER: for.cond1:
-; SINK-AFTER-NEXT: [[E_1:%.*]] = phi i32 [ [[K_0:%.*]], [[FOR_COND1]] ], [ [[E_015]], [[FOR_COND1_PREHEADER]] ]
-; SINK-AFTER-NEXT: [[K_0]] = phi i32 [ [[DEC:%.*]], [[FOR_COND1]] ], [ [[I_016]], [[FOR_COND1_PREHEADER]] ]
+; SINK-AFTER-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[K_0:%.*]], [[FOR_COND1]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
+; SINK-AFTER-NEXT: [[K_0]] = phi i32 [ [[DEC:%.*]], [[FOR_COND1]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; SINK-AFTER-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[K_0]], 1
; SINK-AFTER-NEXT: [[DEC]] = add nsw i32 [[K_0]], -1
-; SINK-AFTER-NEXT: br i1 [[CMP2]], label [[FOR_COND1]], label [[FOR_COND_CLEANUP3]]
+; SINK-AFTER-NEXT: br i1 [[CMP2]], label [[FOR_COND1]], label [[FOR_COND_CLEANUP3]], !llvm.loop [[LOOP9:![0-9]+]]
; SINK-AFTER: for.cond.cleanup3:
-; SINK-AFTER-NEXT: [[E_1_LCSSA]] = phi i32 [ [[E_1]], [[FOR_COND1]] ]
+; SINK-AFTER-NEXT: [[E_1_LCSSA]] = phi i32 [ [[SCALAR_RECUR]], [[FOR_COND1]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ]
; SINK-AFTER-NEXT: [[INC]] = add nuw nsw i32 [[I_016]], 1
; SINK-AFTER-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 49
; SINK-AFTER-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER]]
More information about the llvm-commits
mailing list