[llvm] 3d8ed8b - Revert "[VPlan] Switch to checking sinking legality for recurrences in VPlan."

Manoj Gupta via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 17 12:19:48 PDT 2023


Author: Manoj Gupta
Date: 2023-04-17T12:19:36-07:00
New Revision: 3d8ed8b5192a59104bfbd5bf7ac84d035ee0a4a5

URL: https://github.com/llvm/llvm-project/commit/3d8ed8b5192a59104bfbd5bf7ac84d035ee0a4a5
DIFF: https://github.com/llvm/llvm-project/commit/3d8ed8b5192a59104bfbd5bf7ac84d035ee0a4a5.diff

LOG: Revert "[VPlan] Switch to checking sinking legality for recurrences in VPlan."

This reverts commit 7fc0b3049df532fce726d1ff6869a9f6e3183780.

Causes a clang hang when building xz utils, github issue #62187.

Added: 
    

Modified: 
    llvm/include/llvm/Analysis/IVDescriptors.h
    llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
    llvm/lib/Analysis/IVDescriptors.cpp
    llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
    llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
    llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
    llvm/lib/Transforms/Vectorize/VPlanTransforms.h
    llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll
    llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h
index ad936d213134c..696d5e290c16d 100644
--- a/llvm/include/llvm/Analysis/IVDescriptors.h
+++ b/llvm/include/llvm/Analysis/IVDescriptors.h
@@ -186,11 +186,14 @@ class RecurrenceDescriptor {
   /// previous iteration (e.g. if the value is defined in the previous
   /// iteration, we refer to it as first-order recurrence, if it is defined in
   /// the iteration before the previous, we refer to it as second-order
-  /// recurrence and so on). Note that this function optimistically assumes that
-  /// uses of the recurrence can be re-ordered if necessary and users need to
-  /// check and perform the re-ordering.
-  static bool isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop,
-                                     DominatorTree *DT);
+  /// recurrence and so on). \p SinkAfter includes pairs of instructions where
+  /// the first will be rescheduled to appear after the second if/when the loop
+  /// is vectorized. It may be augmented with additional pairs if needed in
+  /// order to handle Phi as a first-order recurrence.
+  static bool
+  isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop,
+                         MapVector<Instruction *, Instruction *> &SinkAfter,
+                         DominatorTree *DT);
 
   RecurKind getRecurrenceKind() const { return Kind; }
 

diff  --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index 1863e2e65553f..4514e6000e597 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -516,6 +516,10 @@ class LoopVectorizationLegality {
   /// Holds the phi nodes that are fixed-order recurrences.
   RecurrenceSet FixedOrderRecurrences;
 
+  /// Holds instructions that need to sink past other instructions to handle
+  /// fixed-order recurrences.
+  MapVector<Instruction *, Instruction *> SinkAfter;
+
   /// Holds the widest induction type encountered.
   Type *WidestIndTy = nullptr;
 

diff  --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index 6fcf584bd4cc4..22b9f64ef88d8 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -927,8 +927,9 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
   return false;
 }
 
-bool RecurrenceDescriptor::isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop,
-                                                  DominatorTree *DT) {
+bool RecurrenceDescriptor::isFixedOrderRecurrence(
+    PHINode *Phi, Loop *TheLoop,
+    MapVector<Instruction *, Instruction *> &SinkAfter, DominatorTree *DT) {
 
   // Ensure the phi node is in the loop header and has two incoming values.
   if (Phi->getParent() != TheLoop->getHeader() ||
@@ -964,7 +965,8 @@ bool RecurrenceDescriptor::isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop,
     Previous = dyn_cast<Instruction>(PrevPhi->getIncomingValueForBlock(Latch));
   }
 
-  if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous))
+  if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous) ||
+      SinkAfter.count(Previous)) // Cannot rely on dominance due to motion.
     return false;
 
   // Ensure every user of the phi node (recursively) is dominated by the
@@ -973,9 +975,23 @@ bool RecurrenceDescriptor::isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop,
   // loop.
   // TODO: Consider extending this sinking to handle memory instructions.
 
+  // We optimistically assume we can sink all users after Previous. Keep a set
+  // of instructions to sink after Previous ordered by dominance in the common
+  // basic block. It will be applied to SinkAfter if all users can be sunk.
+  auto CompareByComesBefore = [](const Instruction *A, const Instruction *B) {
+    return A->comesBefore(B);
+  };
+  std::set<Instruction *, decltype(CompareByComesBefore)> InstrsToSink(
+      CompareByComesBefore);
+
   BasicBlock *PhiBB = Phi->getParent();
   SmallVector<Instruction *, 8> WorkList;
   auto TryToPushSinkCandidate = [&](Instruction *SinkCandidate) {
+    // Already sunk SinkCandidate.
+    if (SinkCandidate->getParent() == PhiBB &&
+        InstrsToSink.find(SinkCandidate) != InstrsToSink.end())
+      return true;
+
     // Cyclic dependence.
     if (Previous == SinkCandidate)
       return false;
@@ -989,12 +1005,55 @@ bool RecurrenceDescriptor::isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop,
         SinkCandidate->mayReadFromMemory() || SinkCandidate->isTerminator())
       return false;
 
+    // Avoid sinking an instruction multiple times (if multiple operands are
+    // fixed order recurrences) by sinking once - after the latest 'previous'
+    // instruction.
+    auto It = SinkAfter.find(SinkCandidate);
+    if (It != SinkAfter.end()) {
+      auto *OtherPrev = It->second;
+      // Find the earliest entry in the 'sink-after' chain. The last entry in
+      // the chain is the original 'Previous' for a recurrence handled earlier.
+      auto EarlierIt = SinkAfter.find(OtherPrev);
+      while (EarlierIt != SinkAfter.end()) {
+        Instruction *EarlierInst = EarlierIt->second;
+        EarlierIt = SinkAfter.find(EarlierInst);
+        // Bail out if order has not been preserved.
+        if (EarlierIt != SinkAfter.end() &&
+            !DT->dominates(EarlierInst, OtherPrev))
+          return false;
+        OtherPrev = EarlierInst;
+      }
+      // Bail out if order has not been preserved.
+      if (OtherPrev != It->second && !DT->dominates(It->second, OtherPrev))
+        return false;
+
+      // SinkCandidate is already being sunk after an instruction after
+      // Previous. Nothing left to do.
+      if (DT->dominates(Previous, OtherPrev) || Previous == OtherPrev)
+        return true;
+
+      // If there are other instructions to be sunk after SinkCandidate, remove
+      // and re-insert SinkCandidate can break those instructions. Bail out for
+      // simplicity.
+      if (any_of(SinkAfter,
+          [SinkCandidate](const std::pair<Instruction *, Instruction *> &P) {
+            return P.second == SinkCandidate;
+          }))
+        return false;
+
+      // Otherwise, Previous comes after OtherPrev and SinkCandidate needs to be
+      // re-sunk to Previous, instead of sinking to OtherPrev. Remove
+      // SinkCandidate from SinkAfter to ensure it's insert position is updated.
+      SinkAfter.erase(SinkCandidate);
+    }
+
     // If we reach a PHI node that is not dominated by Previous, we reached a
     // header PHI. No need for sinking.
     if (isa<PHINode>(SinkCandidate))
       return true;
 
     // Sink User tentatively and check its users
+    InstrsToSink.insert(SinkCandidate);
     WorkList.push_back(SinkCandidate);
     return true;
   };
@@ -1009,6 +1068,11 @@ bool RecurrenceDescriptor::isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop,
     }
   }
 
+  // We can sink all users of Phi. Update the mapping.
+  for (Instruction *I : InstrsToSink) {
+    SinkAfter[I] = Previous;
+    Previous = I;
+  }
   return true;
 }
 

diff  --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 3a868e8625a85..f45d800b28bab 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -743,7 +743,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
           continue;
         }
 
-        if (RecurrenceDescriptor::isFixedOrderRecurrence(Phi, TheLoop, DT)) {
+        if (RecurrenceDescriptor::isFixedOrderRecurrence(Phi, TheLoop,
+                                                         SinkAfter, DT)) {
           AllowedExit.insert(Phi);
           FixedOrderRecurrences.insert(Phi);
           continue;
@@ -916,6 +917,18 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
     }
   }
 
+  // For fixed order recurrences, we use the previous value (incoming value from
+  // the latch) to check if it dominates all users of the recurrence. Bail out
+  // if we have to sink such an instruction for another recurrence, as the
+  // dominance requirement may not hold after sinking.
+  BasicBlock *LoopLatch = TheLoop->getLoopLatch();
+  if (any_of(FixedOrderRecurrences, [LoopLatch, this](const PHINode *Phi) {
+        Instruction *V =
+            cast<Instruction>(Phi->getIncomingValueForBlock(LoopLatch));
+        return SinkAfter.contains(V);
+      }))
+    return false;
+
   // Now we know the widest induction type, check if our found induction
   // is the same size. If it's not, unset it here and InnerLoopVectorizer
   // will create another.

diff  --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 254af15523cc4..daebffb16b5a9 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9055,8 +9055,7 @@ std::optional<VPlanPtr> LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
 
   // Sink users of fixed-order recurrence past the recipe defining the previous
   // value and introduce FirstOrderRecurrenceSplice VPInstructions.
-  if (!VPlanTransforms::adjustFixedOrderRecurrences(*Plan, Builder))
-    return std::nullopt;
+  VPlanTransforms::adjustFixedOrderRecurrences(*Plan, Builder);
 
   // Interleave memory: for each Interleave Group we marked earlier as relevant
   // for this VPlan, replace the Recipes widening its memory instructions with a

diff  --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index ec1a1a8307e96..c657b7615cc68 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -657,10 +657,9 @@ static bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B,
   return VPDT.properlyDominates(ParentA, ParentB);
 }
 
-/// Sink users of \p FOR after the recipe defining the previous value \p
-/// Previous of the recurrence. \returns true if all users of \p FOR could be
-/// re-arranged as needed or false if it is not possible.
-static bool
+// Sink users of \p FOR after the recipe defining the previous value \p Previous
+// of the recurrence.
+static void
 sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR,
                                  VPRecipeBase *Previous,
                                  VPDominatorTree &VPDT) {
@@ -669,18 +668,15 @@ sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR,
   SmallPtrSet<VPRecipeBase *, 8> Seen;
   Seen.insert(Previous);
   auto TryToPushSinkCandidate = [&](VPRecipeBase *SinkCandidate) {
-    // The previous value must not depend on the users of the recurrence phi. In
-    // that case, FOR is not a fixed order recurrence.
-    if (SinkCandidate == Previous)
-      return false;
-
+    assert(
+        SinkCandidate != Previous &&
+        "The previous value cannot depend on the users of the recurrence phi.");
     if (isa<VPHeaderPHIRecipe>(SinkCandidate) ||
         !Seen.insert(SinkCandidate).second ||
         properlyDominates(Previous, SinkCandidate, VPDT))
-      return true;
+      return;
 
     WorkList.push_back(SinkCandidate);
-    return true;
   };
 
   // Recursively sink users of FOR after Previous.
@@ -691,8 +687,7 @@ sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR,
            "only recipes with a single defined value expected");
     for (VPUser *User : Current->getVPSingleValue()->users()) {
       if (auto *R = dyn_cast<VPRecipeBase>(User))
-        if (!TryToPushSinkCandidate(R))
-          return false;
+        TryToPushSinkCandidate(R);
     }
   }
 
@@ -709,10 +704,9 @@ sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR,
     SinkCandidate->moveAfter(Previous);
     Previous = SinkCandidate;
   }
-  return true;
 }
 
-bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
+void VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
                                                   VPBuilder &Builder) {
   VPDominatorTree VPDT;
   VPDT.recalculate(Plan);
@@ -735,8 +729,7 @@ bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
       Previous = PrevPhi->getBackedgeValue()->getDefiningRecipe();
     }
 
-    if (!sinkRecurrenceUsersAfterPrevious(FOR, Previous, VPDT))
-      return false;
+    sinkRecurrenceUsersAfterPrevious(FOR, Previous, VPDT);
 
     // Introduce a recipe to combine the incoming and previous values of a
     // fixed-order recurrence.
@@ -755,5 +748,4 @@ bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
     // all users.
     RecurSplice->setOperand(0, FOR);
   }
-  return true;
 }

diff  --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 430628cb068d8..e5bd1a42f8778 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -77,10 +77,7 @@ struct VPlanTransforms {
   /// to combine the value from the recurrence phis and previous values. The
   /// current implementation assumes all users can be sunk after the previous
   /// value, which is enforced by earlier legality checks.
-  /// \returns true if all users of fixed-order recurrences could be re-arranged
-  /// as needed or false if it is not possible. In the latter case, \p Plan is
-  /// not valid.
-  static bool adjustFixedOrderRecurrences(VPlan &Plan, VPBuilder &Builder);
+  static void adjustFixedOrderRecurrences(VPlan &Plan, VPBuilder &Builder);
 
   /// Optimize \p Plan based on \p BestVF and \p BestUF. This may restrict the
   /// resulting plan to \p BestVF and \p BestUF.

diff  --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll
index 371dc7f5922f5..5e235077ac743 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll
@@ -647,40 +647,12 @@ exit:
   ret ptr %for.1
 }
 
-; In this test case, %USE_2_FORS uses 2 
diff erent fixed-order recurrences and
-; it needs to be sunk past the previous value for both recurrences.
-define double @test_resinking_required(ptr %p, ptr noalias %a, ptr noalias %b) {
-; CHECK-LABEL: @test_resinking_required(
-; CHECK:       vector.body:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
-; CHECK-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x double> [ <double poison, double poison, double poison, double 0.000000e+00>, %vector.ph ], [ [[BROADCAST_SPLAT:%.*]], %vector.body ]
-; CHECK-NEXT:    [[VECTOR_RECUR1:%.*]] = phi <4 x double> [ <double poison, double poison, double poison, double 0.000000e+00>, %vector.ph ], [ [[BROADCAST_SPLAT4:%.*]], %vector.body ]
-; CHECK-NEXT:    [[VECTOR_RECUR2:%.*]] = phi <4 x double> [ <double poison, double poison, double poison, double 0.000000e+00>, %vector.ph ], [ [[TMP4:%.*]], %vector.body ]
-; CHECK-NEXT:    [[TMP0:%.*]] = load double, ptr %a, align 8
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[TMP0]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR]], <4 x double> [[BROADCAST_SPLAT]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-; CHECK-NEXT:    [[TMP2:%.*]] = fdiv <4 x double> zeroinitializer, [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load double, ptr %b, align 8
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x double> poison, double [[TMP3]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT4]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT3]], <4 x double> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP4]] = shufflevector <4 x double> [[VECTOR_RECUR1]], <4 x double> [[BROADCAST_SPLAT4]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR2]], <4 x double> [[TMP4]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x double> [[TMP2]], i32 3
-; CHECK-NEXT:    store double [[TMP6]], ptr [[P:%.*]], align 8
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
-; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
-; CHECK:       middle.block:
-; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 0, 0
-; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x double> [[BROADCAST_SPLAT]], i32 3
-; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x double> [[BROADCAST_SPLAT]], i32 2
-; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT5:%.*]] = extractelement <4 x double> [[BROADCAST_SPLAT4]], i32 3
-; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI6:%.*]] = extractelement <4 x double> [[BROADCAST_SPLAT4]], i32 2
-; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT9:%.*]] = extractelement <4 x double> [[TMP4]], i32 3
-; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI10:%.*]] = extractelement <4 x double> [[TMP4]], i32 2
-; CHECK-NEXT:    br i1 [[CMP_N]], label %End, label %scalar.ph
-;
+; Make sure LLVM doesn't generate wrong data in SinkAfter, and causes crash in
+; loop vectorizer.
+define double @test_crash(ptr %p, ptr noalias %a, ptr noalias %b) {
+; CHECK-LABEL: @test_crash
+; CHECK-NOT:   vector.body:
+; CHECK:       ret
 Entry:
   br label %Loop
 
@@ -689,7 +661,7 @@ Loop:
   %for.2 = phi double [ %l2, %Loop ], [ 0.000000e+00, %Entry ]
   %for.3 = phi double [ %for.2, %Loop ], [ 0.000000e+00, %Entry ]
   %iv = phi i64 [ %iv.next, %Loop ], [ 0, %Entry ]
-  %USE_2_FORS = fdiv double %for.3, %for.1
+  %USE_2_INDVARS = fdiv double %for.3, %for.1
   %div = fdiv double 0.000000e+00, %for.1
   %l1 = load double, ptr %a, align 8
   %iv.next= add nuw nsw i64 %iv, 1

diff  --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll
index aea49ac225de7..3d07e2cb2bc4c 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-multiply-recurrences.ll
@@ -221,55 +221,22 @@ exit:
 define void @test_pr54233_for_depend_on_each_other(ptr noalias %a, ptr noalias %b) {
 ; CHECK-LABEL: @test_pr54233_for_depend_on_each_other(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
-; CHECK:       vector.ph:
-; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
-; CHECK:       vector.body:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VECTOR_RECUR1:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[BROADCAST_SPLAT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[B:%.*]], align 4
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR1]], <4 x i32> [[BROADCAST_SPLAT]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-; CHECK-NEXT:    [[TMP3:%.*]] = or <4 x i32> [[TMP2]], <i32 10, i32 10, i32 10, i32 10>
-; CHECK-NEXT:    [[TMP4]] = xor <4 x i32> <i32 12, i32 12, i32 12, i32 12>, [[TMP2]]
-; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP4]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-; CHECK-NEXT:    [[TMP6:%.*]] = shl <4 x i32> [[TMP2]], [[TMP5]]
-; CHECK-NEXT:    [[TMP7:%.*]] = xor <4 x i32> [[TMP6]], <i32 255, i32 255, i32 255, i32 255>
-; CHECK-NEXT:    [[TMP8:%.*]] = and <4 x i32> [[TMP7]], [[TMP3]]
-; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]]
-; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0
-; CHECK-NEXT:    store <4 x i32> [[TMP8]], ptr [[TMP10]], align 4
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
-; CHECK-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
-; CHECK:       middle.block:
-; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 1001, 1000
-; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3
-; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x i32> [[BROADCAST_SPLAT]], i32 3
-; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
-; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[SCALAR_RECUR_INIT3:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT2]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_1_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[SCALAR_RECUR4:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT3]], [[SCALAR_PH]] ], [ [[FOR_2_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[OR:%.*]] = or i32 [[SCALAR_RECUR4]], 10
-; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[SCALAR_RECUR4]], [[SCALAR_RECUR]]
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[FOR_1:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[FOR_1_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[FOR_2:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[FOR_2_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[FOR_2]], 10
+; CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[FOR_2]], [[FOR_1]]
 ; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[SHL]], 255
 ; CHECK-NEXT:    [[AND:%.*]] = and i32 [[XOR]], [[OR]]
-; CHECK-NEXT:    [[FOR_1_NEXT]] = xor i32 12, [[SCALAR_RECUR4]]
-; CHECK-NEXT:    [[FOR_2_NEXT]] = load i32, ptr [[B]], align 4
-; CHECK-NEXT:    [[A_GEP:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT:    [[FOR_1_NEXT]] = xor i32 12, [[FOR_2]]
+; CHECK-NEXT:    [[FOR_2_NEXT]] = load i32, ptr [[B:%.*]], align 4
+; CHECK-NEXT:    [[A_GEP:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IV]]
 ; CHECK-NEXT:    store i32 [[AND]], ptr [[A_GEP]], align 4
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw i64 [[IV]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[IV]], 1000
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[LOOP]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret void
 ;


        


More information about the llvm-commits mailing list