[llvm] 3d8ed8b - Revert "[VPlan] Switch to checking sinking legality for recurrences in VPlan."
Manoj Gupta via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 17 12:19:48 PDT 2023
Author: Manoj Gupta
Date: 2023-04-17T12:19:36-07:00
New Revision: 3d8ed8b5192a59104bfbd5bf7ac84d035ee0a4a5
URL: https://github.com/llvm/llvm-project/commit/3d8ed8b5192a59104bfbd5bf7ac84d035ee0a4a5
DIFF: https://github.com/llvm/llvm-project/commit/3d8ed8b5192a59104bfbd5bf7ac84d035ee0a4a5.diff
LOG: Revert "[VPlan] Switch to checking sinking legality for recurrences in VPlan."
This reverts commit 7fc0b3049df532fce726d1ff6869a9f6e3183780.
Causes a clang hang when building xz utils, github issue #62187.
Added:
Modified:
llvm/include/llvm/Analysis/IVDescriptors.h
llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
llvm/lib/Analysis/IVDescriptors.cpp
llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
llvm/lib/Transforms/Vectorize/VPlanTransforms.h
llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll
llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h
index ad936d213134c..696d5e290c16d 100644
--- a/llvm/include/llvm/Analysis/IVDescriptors.h
+++ b/llvm/include/llvm/Analysis/IVDescriptors.h
@@ -186,11 +186,14 @@ class RecurrenceDescriptor {
/// previous iteration (e.g. if the value is defined in the previous
/// iteration, we refer to it as first-order recurrence, if it is defined in
/// the iteration before the previous, we refer to it as second-order
- /// recurrence and so on). Note that this function optimistically assumes that
- /// uses of the recurrence can be re-ordered if necessary and users need to
- /// check and perform the re-ordering.
- static bool isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop,
- DominatorTree *DT);
+ /// recurrence and so on). \p SinkAfter includes pairs of instructions where
+ /// the first will be rescheduled to appear after the second if/when the loop
+ /// is vectorized. It may be augmented with additional pairs if needed in
+ /// order to handle Phi as a first-order recurrence.
+ static bool
+ isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop,
+ MapVector<Instruction *, Instruction *> &SinkAfter,
+ DominatorTree *DT);
RecurKind getRecurrenceKind() const { return Kind; }
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index 1863e2e65553f..4514e6000e597 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -516,6 +516,10 @@ class LoopVectorizationLegality {
/// Holds the phi nodes that are fixed-order recurrences.
RecurrenceSet FixedOrderRecurrences;
+ /// Holds instructions that need to sink past other instructions to handle
+ /// fixed-order recurrences.
+ MapVector<Instruction *, Instruction *> SinkAfter;
+
/// Holds the widest induction type encountered.
Type *WidestIndTy = nullptr;
diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index 6fcf584bd4cc4..22b9f64ef88d8 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -927,8 +927,9 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
return false;
}
-bool RecurrenceDescriptor::isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop,
- DominatorTree *DT) {
+bool RecurrenceDescriptor::isFixedOrderRecurrence(
+ PHINode *Phi, Loop *TheLoop,
+ MapVector<Instruction *, Instruction *> &SinkAfter, DominatorTree *DT) {
// Ensure the phi node is in the loop header and has two incoming values.
if (Phi->getParent() != TheLoop->getHeader() ||
@@ -964,7 +965,8 @@ bool RecurrenceDescriptor::isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop,
Previous = dyn_cast<Instruction>(PrevPhi->getIncomingValueForBlock(Latch));
}
- if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous))
+ if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous) ||
+ SinkAfter.count(Previous)) // Cannot rely on dominance due to motion.
return false;
// Ensure every user of the phi node (recursively) is dominated by the
@@ -973,9 +975,23 @@ bool RecurrenceDescriptor::isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop,
// loop.
// TODO: Consider extending this sinking to handle memory instructions.
+ // We optimistically assume we can sink all users after Previous. Keep a set
+ // of instructions to sink after Previous ordered by dominance in the common
+ // basic block. It will be applied to SinkAfter if all users can be sunk.
+ auto CompareByComesBefore = [](const Instruction *A, const Instruction *B) {
+ return A->comesBefore(B);
+ };
+ std::set<Instruction *, decltype(CompareByComesBefore)> InstrsToSink(
+ CompareByComesBefore);
+
BasicBlock *PhiBB = Phi->getParent();
SmallVector<Instruction *, 8> WorkList;
auto TryToPushSinkCandidate = [&](Instruction *SinkCandidate) {
+ // Already sunk SinkCandidate.
+ if (SinkCandidate->getParent() == PhiBB &&
+ InstrsToSink.find(SinkCandidate) != InstrsToSink.end())
+ return true;
+
// Cyclic dependence.
if (Previous == SinkCandidate)
return false;
@@ -989,12 +1005,55 @@ bool RecurrenceDescriptor::isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop,
SinkCandidate->mayReadFromMemory() || SinkCandidate->isTerminator())
return false;
+ // Avoid sinking an instruction multiple times (if multiple operands are
+ // fixed order recurrences) by sinking once - after the latest 'previous'
+ // instruction.
+ auto It = SinkAfter.find(SinkCandidate);
+ if (It != SinkAfter.end()) {
+ auto *OtherPrev = It->second;
+ // Find the earliest entry in the 'sink-after' chain. The last entry in
+ // the chain is the original 'Previous' for a recurrence handled earlier.
+ auto EarlierIt = SinkAfter.find(OtherPrev);
+ while (EarlierIt != SinkAfter.end()) {
+ Instruction *EarlierInst = EarlierIt->second;
+ EarlierIt = SinkAfter.find(EarlierInst);
+ // Bail out if order has not been preserved.
+ if (EarlierIt != SinkAfter.end() &&
+ !DT->dominates(EarlierInst, OtherPrev))
+ return false;
+ OtherPrev = EarlierInst;
+ }
+ // Bail out if order has not been preserved.
+ if (OtherPrev != It->second && !DT->dominates(It->second, OtherPrev))
+ return false;
+
+ // SinkCandidate is already being sunk after an instruction after
+ // Previous. Nothing left to do.
+ if (DT->dominates(Previous, OtherPrev) || Previous == OtherPrev)
+ return true;
+
+ // If there are other instructions to be sunk after SinkCandidate, remove
+ // and re-insert SinkCandidate can break those instructions. Bail out for
+ // simplicity.
+ if (any_of(SinkAfter,
+ [SinkCandidate](const std::pair<Instruction *, Instruction *> &P) {
+ return P.second == SinkCandidate;
+ }))
+ return false;
+
+ // Otherwise, Previous comes after OtherPrev and SinkCandidate needs to be
+ // re-sunk to Previous, instead of sinking to OtherPrev. Remove
+ // SinkCandidate from SinkAfter to ensure it's insert position is updated.
+ SinkAfter.erase(SinkCandidate);
+ }
+
// If we reach a PHI node that is not dominated by Previous, we reached a
// header PHI. No need for sinking.
if (isa<PHINode>(SinkCandidate))
return true;
// Sink User tentatively and check its users
+ InstrsToSink.insert(SinkCandidate);
WorkList.push_back(SinkCandidate);
return true;
};
@@ -1009,6 +1068,11 @@ bool RecurrenceDescriptor::isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop,
}
}
+ // We can sink all users of Phi. Update the mapping.
+ for (Instruction *I : InstrsToSink) {
+ SinkAfter[I] = Previous;
+ Previous = I;
+ }
return true;
}
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 3a868e8625a85..f45d800b28bab 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -743,7 +743,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
continue;
}
- if (RecurrenceDescriptor::isFixedOrderRecurrence(Phi, TheLoop, DT)) {
+ if (RecurrenceDescriptor::isFixedOrderRecurrence(Phi, TheLoop,
+ SinkAfter, DT)) {
AllowedExit.insert(Phi);
FixedOrderRecurrences.insert(Phi);
continue;
@@ -916,6 +917,18 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
}
}
+ // For fixed order recurrences, we use the previous value (incoming value from
+ // the latch) to check if it dominates all users of the recurrence. Bail out
+ // if we have to sink such an instruction for another recurrence, as the
+ // dominance requirement may not hold after sinking.
+ BasicBlock *LoopLatch = TheLoop->getLoopLatch();
+ if (any_of(FixedOrderRecurrences, [LoopLatch, this](const PHINode *Phi) {
+ Instruction *V =
+ cast<Instruction>(Phi->getIncomingValueForBlock(LoopLatch));
+ return SinkAfter.contains(V);
+ }))
+ return false;
+
// Now we know the widest induction type, check if our found induction
// is the same size. If it's not, unset it here and InnerLoopVectorizer
// will create another.
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 254af15523cc4..daebffb16b5a9 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9055,8 +9055,7 @@ std::optional<VPlanPtr> LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
// Sink users of fixed-order recurrence past the recipe defining the previous
// value and introduce FirstOrderRecurrenceSplice VPInstructions.
- if (!VPlanTransforms::adjustFixedOrderRecurrences(*Plan, Builder))
- return std::nullopt;
+ VPlanTransforms::adjustFixedOrderRecurrences(*Plan, Builder);
// Interleave memory: for each Interleave Group we marked earlier as relevant
// for this VPlan, replace the Recipes widening its memory instructions with a
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index ec1a1a8307e96..c657b7615cc68 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -657,10 +657,9 @@ static bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B,
return VPDT.properlyDominates(ParentA, ParentB);
}
-/// Sink users of \p FOR after the recipe defining the previous value \p
-/// Previous of the recurrence. \returns true if all users of \p FOR could be
-/// re-arranged as needed or false if it is not possible.
-static bool
+// Sink users of \p FOR after the recipe defining the previous value \p Previous
+// of the recurrence.
+static void
sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR,
VPRecipeBase *Previous,
VPDominatorTree &VPDT) {
@@ -669,18 +668,15 @@ sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR,
SmallPtrSet<VPRecipeBase *, 8> Seen;
Seen.insert(Previous);
auto TryToPushSinkCandidate = [&](VPRecipeBase *SinkCandidate) {
- // The previous value must not depend on the users of the recurrence phi. In
- // that case, FOR is not a fixed order recurrence.
- if (SinkCandidate == Previous)
- return false;
-
+ assert(
+ SinkCandidate != Previous &&
+ "The previous value cannot depend on the users of the recurrence phi.");
if (isa<VPHeaderPHIRecipe>(SinkCandidate) ||
!Seen.insert(SinkCandidate).second ||
properlyDominates(Previous, SinkCandidate, VPDT))
- return true;
+ return;
WorkList.push_back(SinkCandidate);
- return true;
};
// Recursively sink users of FOR after Previous.
@@ -691,8 +687,7 @@ sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR,
"only recipes with a single defined value expected");
for (VPUser *User : Current->getVPSingleValue()->users()) {
if (auto *R = dyn_cast<VPRecipeBase>(User))
- if (!TryToPushSinkCandidate(R))
- return false;
+ TryToPushSinkCandidate(R);
}
}
@@ -709,10 +704,9 @@ sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR,
SinkCandidate->moveAfter(Previous);
Previous = SinkCandidate;
}
- return true;
}
-bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
+void VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
VPBuilder &Builder) {
VPDominatorTree VPDT;
VPDT.recalculate(Plan);
@@ -735,8 +729,7 @@ bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
Previous = PrevPhi->getBackedgeValue()->getDefiningRecipe();
}
- if (!sinkRecurrenceUsersAfterPrevious(FOR, Previous, VPDT))
- return false;
+ sinkRecurrenceUsersAfterPrevious(FOR, Previous, VPDT);
// Introduce a recipe to combine the incoming and previous values of a
// fixed-order recurrence.
@@ -755,5 +748,4 @@ bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
// all users.
RecurSplice->setOperand(0, FOR);
}
- return true;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 430628cb068d8..e5bd1a42f8778 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -77,10 +77,7 @@ struct VPlanTransforms {
/// to combine the value from the recurrence phis and previous values. The
/// current implementation assumes all users can be sunk after the previous
/// value, which is enforced by earlier legality checks.
- /// \returns true if all users of fixed-order recurrences could be re-arranged
- /// as needed or false if it is not possible. In the latter case, \p Plan is
- /// not valid.
- static bool adjustFixedOrderRecurrences(VPlan &Plan, VPBuilder &Builder);
+ static void adjustFixedOrderRecurrences(VPlan &Plan, VPBuilder &Builder);
/// Optimize \p Plan based on \p BestVF and \p BestUF. This may restrict the
/// resulting plan to \p BestVF and \p BestUF.
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll
index 371dc7f5922f5..5e235077ac743 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll
@@ -647,40 +647,12 @@ exit:
ret ptr %for.1
}
-; In this test case, %USE_2_FORS uses 2
diff erent fixed-order recurrences and
-; it needs to be sunk past the previous value for both recurrences.
-define double @test_resinking_required(ptr %p, ptr noalias %a, ptr noalias %b) {
-; CHECK-LABEL: @test_resinking_required(
-; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
-; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x double> [ <double poison, double poison, double poison, double 0.000000e+00>, %vector.ph ], [ [[BROADCAST_SPLAT:%.*]], %vector.body ]
-; CHECK-NEXT: [[VECTOR_RECUR1:%.*]] = phi <4 x double> [ <double poison, double poison, double poison, double 0.000000e+00>, %vector.ph ], [ [[BROADCAST_SPLAT4:%.*]], %vector.body ]
-; CHECK-NEXT: [[VECTOR_RECUR2:%.*]] = phi <4 x double> [ <double poison, double poison, double poison, double 0.000000e+00>, %vector.ph ], [ [[TMP4:%.*]], %vector.body ]
-; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr %a, align 8
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[TMP0]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR]], <4 x double> [[BROADCAST_SPLAT]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-; CHECK-NEXT: [[TMP2:%.*]] = fdiv <4 x double> zeroinitializer, [[TMP1]]
-; CHECK-NEXT: [[TMP3:%.*]] = load double, ptr %b, align 8
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x double> poison, double [[TMP3]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT4]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT3]], <4 x double> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP4]] = shufflevector <4 x double> [[VECTOR_RECUR1]], <4 x double> [[BROADCAST_SPLAT4]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR2]], <4 x double> [[TMP4]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x double> [[TMP2]], i32 3
-; CHECK-NEXT: store double [[TMP6]], ptr [[P:%.*]], align 8
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
-; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
-; CHECK: middle.block:
-; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 0, 0
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x double> [[BROADCAST_SPLAT]], i32 3
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x double> [[BROADCAST_SPLAT]], i32 2
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT5:%.*]] = extractelement <4 x double> [[BROADCAST_SPLAT4]], i32 3
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI6:%.*]] = extractelement <4 x double> [[BROADCAST_SPLAT4]], i32 2
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT9:%.*]] = extractelement <4 x double> [[TMP4]], i32 3
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI10:%.*]] = extractelement <4 x double> [[TMP4]], i32 2
-; CHECK-NEXT: br i1 [[CMP_N]], label %End, label %scalar.ph
-;
+; Make sure LLVM doesn't generate wrong data in SinkAfter, and causes crash in
+; loop vectorizer.
+define double @test_crash(ptr %p, ptr noalias %a, ptr noalias %b) {
+; CHECK-LABEL: @test_crash
+; CHECK-NOT: vector.body:
+; CHECK: ret
Entry:
br label %Loop
@@ -689,7 +661,7 @@ Loop:
%for.2 = phi double [ %l2, %Loop ], [ 0.000000e+00, %Entry ]
%for.3 = phi double [ %for.2, %Loop ], [ 0.000000e+00, %Entry ]
%iv = phi i64 [ %iv.next, %Loop ], [ 0, %Entry ]
- %USE_2_FORS = fdiv double %for.3, %for.1
+ %USE_2_INDVARS = fdiv double %for.3, %for.1
%div = fdiv double 0.000000e+00, %for.1
%l1 = load double, ptr %a, align 8
%iv.next= add nuw nsw i64 %iv, 1
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll
index aea49ac225de7..3d07e2cb2bc4c 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll
@@ -221,55 +221,22 @@ exit:
define void @test_pr54233_for_depend_on_each_other(ptr noalias %a, ptr noalias %b) {
; CHECK-LABEL: @test_pr54233_for_depend_on_each_other(
; CHECK-NEXT: entry:
-; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
-; CHECK: vector.ph:
-; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
-; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VECTOR_RECUR1:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[BROADCAST_SPLAT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[B:%.*]], align 4
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR1]], <4 x i32> [[BROADCAST_SPLAT]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP2]], <i32 10, i32 10, i32 10, i32 10>
-; CHECK-NEXT: [[TMP4]] = xor <4 x i32> <i32 12, i32 12, i32 12, i32 12>, [[TMP2]]
-; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP4]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-; CHECK-NEXT: [[TMP6:%.*]] = shl <4 x i32> [[TMP2]], [[TMP5]]
-; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i32> [[TMP6]], <i32 255, i32 255, i32 255, i32 255>
-; CHECK-NEXT: [[TMP8:%.*]] = and <4 x i32> [[TMP7]], [[TMP3]]
-; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]]
-; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0
-; CHECK-NEXT: store <4 x i32> [[TMP8]], ptr [[TMP10]], align 4
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
-; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
-; CHECK: middle.block:
-; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1001, 1000
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3
-; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x i32> [[BROADCAST_SPLAT]], i32 3
-; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
-; CHECK: scalar.ph:
-; CHECK-NEXT: [[SCALAR_RECUR_INIT3:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT2]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_1_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[SCALAR_RECUR4:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT3]], [[SCALAR_PH]] ], [ [[FOR_2_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[OR:%.*]] = or i32 [[SCALAR_RECUR4]], 10
-; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[SCALAR_RECUR4]], [[SCALAR_RECUR]]
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[FOR_1:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[FOR_1_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[FOR_2:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[FOR_2_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[FOR_2]], 10
+; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[FOR_2]], [[FOR_1]]
; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[SHL]], 255
; CHECK-NEXT: [[AND:%.*]] = and i32 [[XOR]], [[OR]]
-; CHECK-NEXT: [[FOR_1_NEXT]] = xor i32 12, [[SCALAR_RECUR4]]
-; CHECK-NEXT: [[FOR_2_NEXT]] = load i32, ptr [[B]], align 4
-; CHECK-NEXT: [[A_GEP:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT: [[FOR_1_NEXT]] = xor i32 12, [[FOR_2]]
+; CHECK-NEXT: [[FOR_2_NEXT]] = load i32, ptr [[B:%.*]], align 4
+; CHECK-NEXT: [[A_GEP:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IV]]
; CHECK-NEXT: store i32 [[AND]], ptr [[A_GEP]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV]], 1000
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
More information about the llvm-commits
mailing list