[llvm] 6b8d19d - Recommit "[VPlan] Switch to checking sinking legality for recurrences in VPlan."
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 20 01:31:36 PDT 2023
Author: Florian Hahn
Date: 2023-04-20T09:31:16+01:00
New Revision: 6b8d19d2b57ecd45eb2ff1964798f3cd8c907ac4
URL: https://github.com/llvm/llvm-project/commit/6b8d19d2b57ecd45eb2ff1964798f3cd8c907ac4
DIFF: https://github.com/llvm/llvm-project/commit/6b8d19d2b57ecd45eb2ff1964798f3cd8c907ac4.diff
LOG: Recommit "[VPlan] Switch to checking sinking legality for recurrences in VPlan."
This reverts the revert commit 3d8ed8b5192a59104bfbd5bf7ac84d035ee0a4a5.
The new version of the patch adds a set to avoid duplicating work in
isFixedOrderRecurrence, which was previously done through the removed
SinkAfter map.
Original commit message:
Building on D142885 and D142589, retire the SinkAfter map from the
recurrence handling code. It is replaced by checking whether it is
possible to sink all users of a recurrence directly in VPlan. This
results in simpler code overall and allows to handle additional cases
(see the improvements in @test_crash).
Depends on D142885.
Depends on D142589.
Reviewed By: Ayal
Differential Revision: https://reviews.llvm.org/D142886
Added:
Modified:
llvm/include/llvm/Analysis/IVDescriptors.h
llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
llvm/lib/Analysis/IVDescriptors.cpp
llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
llvm/lib/Transforms/Vectorize/VPlanTransforms.h
llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll
llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h
index 696d5e290c16d..ad936d213134c 100644
--- a/llvm/include/llvm/Analysis/IVDescriptors.h
+++ b/llvm/include/llvm/Analysis/IVDescriptors.h
@@ -186,14 +186,11 @@ class RecurrenceDescriptor {
/// previous iteration (e.g. if the value is defined in the previous
/// iteration, we refer to it as first-order recurrence, if it is defined in
/// the iteration before the previous, we refer to it as second-order
- /// recurrence and so on). \p SinkAfter includes pairs of instructions where
- /// the first will be rescheduled to appear after the second if/when the loop
- /// is vectorized. It may be augmented with additional pairs if needed in
- /// order to handle Phi as a first-order recurrence.
- static bool
- isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop,
- MapVector<Instruction *, Instruction *> &SinkAfter,
- DominatorTree *DT);
+ /// recurrence and so on). Note that this function optimistically assumes that
+ /// uses of the recurrence can be re-ordered if necessary and users need to
+ /// check and perform the re-ordering.
+ static bool isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop,
+ DominatorTree *DT);
RecurKind getRecurrenceKind() const { return Kind; }
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index 4514e6000e597..1863e2e65553f 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -516,10 +516,6 @@ class LoopVectorizationLegality {
/// Holds the phi nodes that are fixed-order recurrences.
RecurrenceSet FixedOrderRecurrences;
- /// Holds instructions that need to sink past other instructions to handle
- /// fixed-order recurrences.
- MapVector<Instruction *, Instruction *> SinkAfter;
-
/// Holds the widest induction type encountered.
Type *WidestIndTy = nullptr;
diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index 22b9f64ef88d8..30cb3adc8b809 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -927,9 +927,8 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
return false;
}
-bool RecurrenceDescriptor::isFixedOrderRecurrence(
- PHINode *Phi, Loop *TheLoop,
- MapVector<Instruction *, Instruction *> &SinkAfter, DominatorTree *DT) {
+bool RecurrenceDescriptor::isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop,
+ DominatorTree *DT) {
// Ensure the phi node is in the loop header and has two incoming values.
if (Phi->getParent() != TheLoop->getHeader() ||
@@ -965,8 +964,7 @@ bool RecurrenceDescriptor::isFixedOrderRecurrence(
Previous = dyn_cast<Instruction>(PrevPhi->getIncomingValueForBlock(Latch));
}
- if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous) ||
- SinkAfter.count(Previous)) // Cannot rely on dominance due to motion.
+ if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous))
return false;
// Ensure every user of the phi node (recursively) is dominated by the
@@ -975,27 +973,16 @@ bool RecurrenceDescriptor::isFixedOrderRecurrence(
// loop.
// TODO: Consider extending this sinking to handle memory instructions.
- // We optimistically assume we can sink all users after Previous. Keep a set
- // of instructions to sink after Previous ordered by dominance in the common
- // basic block. It will be applied to SinkAfter if all users can be sunk.
- auto CompareByComesBefore = [](const Instruction *A, const Instruction *B) {
- return A->comesBefore(B);
- };
- std::set<Instruction *, decltype(CompareByComesBefore)> InstrsToSink(
- CompareByComesBefore);
-
+ SmallPtrSet<Value *, 8> Seen;
BasicBlock *PhiBB = Phi->getParent();
SmallVector<Instruction *, 8> WorkList;
auto TryToPushSinkCandidate = [&](Instruction *SinkCandidate) {
- // Already sunk SinkCandidate.
- if (SinkCandidate->getParent() == PhiBB &&
- InstrsToSink.find(SinkCandidate) != InstrsToSink.end())
- return true;
-
// Cyclic dependence.
if (Previous == SinkCandidate)
return false;
+ if (!Seen.insert(SinkCandidate).second)
+ return true;
if (DT->dominates(Previous,
SinkCandidate)) // We already are good w/o sinking.
return true;
@@ -1005,55 +992,12 @@ bool RecurrenceDescriptor::isFixedOrderRecurrence(
SinkCandidate->mayReadFromMemory() || SinkCandidate->isTerminator())
return false;
- // Avoid sinking an instruction multiple times (if multiple operands are
- // fixed order recurrences) by sinking once - after the latest 'previous'
- // instruction.
- auto It = SinkAfter.find(SinkCandidate);
- if (It != SinkAfter.end()) {
- auto *OtherPrev = It->second;
- // Find the earliest entry in the 'sink-after' chain. The last entry in
- // the chain is the original 'Previous' for a recurrence handled earlier.
- auto EarlierIt = SinkAfter.find(OtherPrev);
- while (EarlierIt != SinkAfter.end()) {
- Instruction *EarlierInst = EarlierIt->second;
- EarlierIt = SinkAfter.find(EarlierInst);
- // Bail out if order has not been preserved.
- if (EarlierIt != SinkAfter.end() &&
- !DT->dominates(EarlierInst, OtherPrev))
- return false;
- OtherPrev = EarlierInst;
- }
- // Bail out if order has not been preserved.
- if (OtherPrev != It->second && !DT->dominates(It->second, OtherPrev))
- return false;
-
- // SinkCandidate is already being sunk after an instruction after
- // Previous. Nothing left to do.
- if (DT->dominates(Previous, OtherPrev) || Previous == OtherPrev)
- return true;
-
- // If there are other instructions to be sunk after SinkCandidate, remove
- // and re-insert SinkCandidate can break those instructions. Bail out for
- // simplicity.
- if (any_of(SinkAfter,
- [SinkCandidate](const std::pair<Instruction *, Instruction *> &P) {
- return P.second == SinkCandidate;
- }))
- return false;
-
- // Otherwise, Previous comes after OtherPrev and SinkCandidate needs to be
- // re-sunk to Previous, instead of sinking to OtherPrev. Remove
- // SinkCandidate from SinkAfter to ensure it's insert position is updated.
- SinkAfter.erase(SinkCandidate);
- }
-
// If we reach a PHI node that is not dominated by Previous, we reached a
// header PHI. No need for sinking.
if (isa<PHINode>(SinkCandidate))
return true;
// Sink User tentatively and check its users
- InstrsToSink.insert(SinkCandidate);
WorkList.push_back(SinkCandidate);
return true;
};
@@ -1068,11 +1012,6 @@ bool RecurrenceDescriptor::isFixedOrderRecurrence(
}
}
- // We can sink all users of Phi. Update the mapping.
- for (Instruction *I : InstrsToSink) {
- SinkAfter[I] = Previous;
- Previous = I;
- }
return true;
}
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index f45d800b28bab..3a868e8625a85 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -743,8 +743,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
continue;
}
- if (RecurrenceDescriptor::isFixedOrderRecurrence(Phi, TheLoop,
- SinkAfter, DT)) {
+ if (RecurrenceDescriptor::isFixedOrderRecurrence(Phi, TheLoop, DT)) {
AllowedExit.insert(Phi);
FixedOrderRecurrences.insert(Phi);
continue;
@@ -917,18 +916,6 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
}
}
- // For fixed order recurrences, we use the previous value (incoming value from
- // the latch) to check if it dominates all users of the recurrence. Bail out
- // if we have to sink such an instruction for another recurrence, as the
- // dominance requirement may not hold after sinking.
- BasicBlock *LoopLatch = TheLoop->getLoopLatch();
- if (any_of(FixedOrderRecurrences, [LoopLatch, this](const PHINode *Phi) {
- Instruction *V =
- cast<Instruction>(Phi->getIncomingValueForBlock(LoopLatch));
- return SinkAfter.contains(V);
- }))
- return false;
-
// Now we know the widest induction type, check if our found induction
// is the same size. If it's not, unset it here and InnerLoopVectorizer
// will create another.
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index daebffb16b5a9..254af15523cc4 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9055,7 +9055,8 @@ std::optional<VPlanPtr> LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
// Sink users of fixed-order recurrence past the recipe defining the previous
// value and introduce FirstOrderRecurrenceSplice VPInstructions.
- VPlanTransforms::adjustFixedOrderRecurrences(*Plan, Builder);
+ if (!VPlanTransforms::adjustFixedOrderRecurrences(*Plan, Builder))
+ return std::nullopt;
// Interleave memory: for each Interleave Group we marked earlier as relevant
// for this VPlan, replace the Recipes widening its memory instructions with a
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index c657b7615cc68..ec1a1a8307e96 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -657,9 +657,10 @@ static bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B,
return VPDT.properlyDominates(ParentA, ParentB);
}
-// Sink users of \p FOR after the recipe defining the previous value \p Previous
-// of the recurrence.
-static void
+/// Sink users of \p FOR after the recipe defining the previous value \p
+/// Previous of the recurrence. \returns true if all users of \p FOR could be
+/// re-arranged as needed or false if it is not possible.
+static bool
sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR,
VPRecipeBase *Previous,
VPDominatorTree &VPDT) {
@@ -668,15 +669,18 @@ sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR,
SmallPtrSet<VPRecipeBase *, 8> Seen;
Seen.insert(Previous);
auto TryToPushSinkCandidate = [&](VPRecipeBase *SinkCandidate) {
- assert(
- SinkCandidate != Previous &&
- "The previous value cannot depend on the users of the recurrence phi.");
+ // The previous value must not depend on the users of the recurrence phi. In
+ // that case, FOR is not a fixed order recurrence.
+ if (SinkCandidate == Previous)
+ return false;
+
if (isa<VPHeaderPHIRecipe>(SinkCandidate) ||
!Seen.insert(SinkCandidate).second ||
properlyDominates(Previous, SinkCandidate, VPDT))
- return;
+ return true;
WorkList.push_back(SinkCandidate);
+ return true;
};
// Recursively sink users of FOR after Previous.
@@ -687,7 +691,8 @@ sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR,
"only recipes with a single defined value expected");
for (VPUser *User : Current->getVPSingleValue()->users()) {
if (auto *R = dyn_cast<VPRecipeBase>(User))
- TryToPushSinkCandidate(R);
+ if (!TryToPushSinkCandidate(R))
+ return false;
}
}
@@ -704,9 +709,10 @@ sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR,
SinkCandidate->moveAfter(Previous);
Previous = SinkCandidate;
}
+ return true;
}
-void VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
+bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
VPBuilder &Builder) {
VPDominatorTree VPDT;
VPDT.recalculate(Plan);
@@ -729,7 +735,8 @@ void VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
Previous = PrevPhi->getBackedgeValue()->getDefiningRecipe();
}
- sinkRecurrenceUsersAfterPrevious(FOR, Previous, VPDT);
+ if (!sinkRecurrenceUsersAfterPrevious(FOR, Previous, VPDT))
+ return false;
// Introduce a recipe to combine the incoming and previous values of a
// fixed-order recurrence.
@@ -748,4 +755,5 @@ void VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
// all users.
RecurSplice->setOperand(0, FOR);
}
+ return true;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index e5bd1a42f8778..430628cb068d8 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -77,7 +77,10 @@ struct VPlanTransforms {
/// to combine the value from the recurrence phis and previous values. The
/// current implementation assumes all users can be sunk after the previous
/// value, which is enforced by earlier legality checks.
- static void adjustFixedOrderRecurrences(VPlan &Plan, VPBuilder &Builder);
+ /// \returns true if all users of fixed-order recurrences could be re-arranged
+ /// as needed or false if it is not possible. In the latter case, \p Plan is
+ /// not valid.
+ static bool adjustFixedOrderRecurrences(VPlan &Plan, VPBuilder &Builder);
/// Optimize \p Plan based on \p BestVF and \p BestUF. This may restrict the
/// resulting plan to \p BestVF and \p BestUF.
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll
index 5e235077ac743..371dc7f5922f5 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll
@@ -647,12 +647,40 @@ exit:
ret ptr %for.1
}
-; Make sure LLVM doesn't generate wrong data in SinkAfter, and causes crash in
-; loop vectorizer.
-define double @test_crash(ptr %p, ptr noalias %a, ptr noalias %b) {
-; CHECK-LABEL: @test_crash
-; CHECK-NOT: vector.body:
-; CHECK: ret
+; In this test case, %USE_2_FORS uses 2
diff erent fixed-order recurrences and
+; it needs to be sunk past the previous value for both recurrences.
+define double @test_resinking_required(ptr %p, ptr noalias %a, ptr noalias %b) {
+; CHECK-LABEL: @test_resinking_required(
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
+; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x double> [ <double poison, double poison, double poison, double 0.000000e+00>, %vector.ph ], [ [[BROADCAST_SPLAT:%.*]], %vector.body ]
+; CHECK-NEXT: [[VECTOR_RECUR1:%.*]] = phi <4 x double> [ <double poison, double poison, double poison, double 0.000000e+00>, %vector.ph ], [ [[BROADCAST_SPLAT4:%.*]], %vector.body ]
+; CHECK-NEXT: [[VECTOR_RECUR2:%.*]] = phi <4 x double> [ <double poison, double poison, double poison, double 0.000000e+00>, %vector.ph ], [ [[TMP4:%.*]], %vector.body ]
+; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr %a, align 8
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[TMP0]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR]], <4 x double> [[BROADCAST_SPLAT]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; CHECK-NEXT: [[TMP2:%.*]] = fdiv <4 x double> zeroinitializer, [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = load double, ptr %b, align 8
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x double> poison, double [[TMP3]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT4]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT3]], <4 x double> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP4]] = shufflevector <4 x double> [[VECTOR_RECUR1]], <4 x double> [[BROADCAST_SPLAT4]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR2]], <4 x double> [[TMP4]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x double> [[TMP2]], i32 3
+; CHECK-NEXT: store double [[TMP6]], ptr [[P:%.*]], align 8
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
+; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 0, 0
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x double> [[BROADCAST_SPLAT]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x double> [[BROADCAST_SPLAT]], i32 2
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT5:%.*]] = extractelement <4 x double> [[BROADCAST_SPLAT4]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI6:%.*]] = extractelement <4 x double> [[BROADCAST_SPLAT4]], i32 2
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT9:%.*]] = extractelement <4 x double> [[TMP4]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI10:%.*]] = extractelement <4 x double> [[TMP4]], i32 2
+; CHECK-NEXT: br i1 [[CMP_N]], label %End, label %scalar.ph
+;
Entry:
br label %Loop
@@ -661,7 +689,7 @@ Loop:
%for.2 = phi double [ %l2, %Loop ], [ 0.000000e+00, %Entry ]
%for.3 = phi double [ %for.2, %Loop ], [ 0.000000e+00, %Entry ]
%iv = phi i64 [ %iv.next, %Loop ], [ 0, %Entry ]
- %USE_2_INDVARS = fdiv double %for.3, %for.1
+ %USE_2_FORS = fdiv double %for.3, %for.1
%div = fdiv double 0.000000e+00, %for.1
%l1 = load double, ptr %a, align 8
%iv.next= add nuw nsw i64 %iv, 1
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll
index 3d07e2cb2bc4c..aea49ac225de7 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll
@@ -221,22 +221,55 @@ exit:
define void @test_pr54233_for_depend_on_each_other(ptr noalias %a, ptr noalias %b) {
; CHECK-LABEL: @test_pr54233_for_depend_on_each_other(
; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VECTOR_RECUR1:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[BROADCAST_SPLAT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[B:%.*]], align 4
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR1]], <4 x i32> [[BROADCAST_SPLAT]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP2]], <i32 10, i32 10, i32 10, i32 10>
+; CHECK-NEXT: [[TMP4]] = xor <4 x i32> <i32 12, i32 12, i32 12, i32 12>, [[TMP2]]
+; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP4]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; CHECK-NEXT: [[TMP6:%.*]] = shl <4 x i32> [[TMP2]], [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i32> [[TMP6]], <i32 255, i32 255, i32 255, i32 255>
+; CHECK-NEXT: [[TMP8:%.*]] = and <4 x i32> [[TMP7]], [[TMP3]]
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0
+; CHECK-NEXT: store <4 x i32> [[TMP8]], ptr [[TMP10]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
+; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1001, 1000
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3
+; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x i32> [[BROADCAST_SPLAT]], i32 3
+; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: [[SCALAR_RECUR_INIT3:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT2]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[FOR_1:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[FOR_1_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[FOR_2:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[FOR_2_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[OR:%.*]] = or i32 [[FOR_2]], 10
-; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[FOR_2]], [[FOR_1]]
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_1_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[SCALAR_RECUR4:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT3]], [[SCALAR_PH]] ], [ [[FOR_2_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[SCALAR_RECUR4]], 10
+; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[SCALAR_RECUR4]], [[SCALAR_RECUR]]
; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[SHL]], 255
; CHECK-NEXT: [[AND:%.*]] = and i32 [[XOR]], [[OR]]
-; CHECK-NEXT: [[FOR_1_NEXT]] = xor i32 12, [[FOR_2]]
-; CHECK-NEXT: [[FOR_2_NEXT]] = load i32, ptr [[B:%.*]], align 4
-; CHECK-NEXT: [[A_GEP:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IV]]
+; CHECK-NEXT: [[FOR_1_NEXT]] = xor i32 12, [[SCALAR_RECUR4]]
+; CHECK-NEXT: [[FOR_2_NEXT]] = load i32, ptr [[B]], align 4
+; CHECK-NEXT: [[A_GEP:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
; CHECK-NEXT: store i32 [[AND]], ptr [[A_GEP]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV]], 1000
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
More information about the llvm-commits
mailing list