[llvm] [LV] Reuse VPReplicateRecipe to handle scalar stores in exit block. (PR #106342)

Mel Chen via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 23 07:00:52 PDT 2024


https://github.com/Mel-Chen updated https://github.com/llvm/llvm-project/pull/106342

>From e6bbf555057583bd86667c93eef1b9892b891b42 Mon Sep 17 00:00:00 2001
From: Mel Chen <mel.chen at sifive.com>
Date: Tue, 27 Aug 2024 00:11:05 -0700
Subject: [PATCH 1/7] Init implement

---
 .../Transforms/Vectorize/LoopVectorize.cpp    | 78 +++++++------------
 .../lib/Transforms/Vectorize/VPlanRecipes.cpp |  8 --
 .../reduction-with-invariant-store.ll         | 32 ++++----
 .../LoopVectorize/vplan-printing.ll           |  1 +
 4 files changed, 47 insertions(+), 72 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 9685e7d124b7d1..9df17d51beca29 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8926,6 +8926,9 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
         bool NeedsBlends = BB != HeaderBB && !BB->phis().empty();
         return Legal->blockNeedsPredication(BB) || NeedsBlends;
       });
+  auto *MiddleVPBB =
+      cast<VPBasicBlock>(Plan->getVectorLoopRegion()->getSingleSuccessor());
+  VPBasicBlock::iterator MBIP = MiddleVPBB->getFirstNonPhi();
   for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO())) {
     // Relevant instructions from basic block BB will be grouped into VPRecipe
     // ingredients and fill a new VPBasicBlock.
@@ -8956,8 +8959,17 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
       // with the final reduction value will be added to the exit block
       StoreInst *SI;
       if ((SI = dyn_cast<StoreInst>(&I)) &&
-          Legal->isInvariantAddressOfReduction(SI->getPointerOperand()))
+          Legal->isInvariantAddressOfReduction(SI->getPointerOperand())) {
+        // Only create recipe for the last intermediate store of the reduction.
+        if (!Legal->isInvariantStoreOfReduction(SI))
+          continue;
+        auto *Recipe = new VPReplicateRecipe(
+            SI, RecipeBuilder.mapToVPValues(Instr->operands()),
+            true /* IsUniform */);
+        RecipeBuilder.setRecipe(SI, Recipe);
+        Recipe->insertBefore(*MiddleVPBB, MBIP);
         continue;
+      }
 
       VPRecipeBase *Recipe =
           RecipeBuilder.tryToCreateWidenRecipe(Instr, Operands, Range, VPBB);
@@ -9126,51 +9138,13 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
   using namespace VPlanPatternMatch;
   VPRegionBlock *VectorLoopRegion = Plan->getVectorLoopRegion();
   VPBasicBlock *Header = VectorLoopRegion->getEntryBasicBlock();
-  // Gather all VPReductionPHIRecipe and sort them so that Intermediate stores
-  // sank outside of the loop would keep the same order as they had in the
-  // original loop.
-  SmallVector<VPReductionPHIRecipe *> ReductionPHIList;
-  for (VPRecipeBase &R : Header->phis()) {
-    if (auto *ReductionPhi = dyn_cast<VPReductionPHIRecipe>(&R))
-      ReductionPHIList.emplace_back(ReductionPhi);
-  }
-  bool HasIntermediateStore = false;
-  stable_sort(ReductionPHIList,
-              [this, &HasIntermediateStore](const VPReductionPHIRecipe *R1,
-                                            const VPReductionPHIRecipe *R2) {
-                auto *IS1 = R1->getRecurrenceDescriptor().IntermediateStore;
-                auto *IS2 = R2->getRecurrenceDescriptor().IntermediateStore;
-                HasIntermediateStore |= IS1 || IS2;
-
-                // If neither of the recipes has an intermediate store, keep the
-                // order the same.
-                if (!IS1 && !IS2)
-                  return false;
-
-                // If only one of the recipes has an intermediate store, then
-                // move it towards the beginning of the list.
-                if (IS1 && !IS2)
-                  return true;
-
-                if (!IS1 && IS2)
-                  return false;
-
-                // If both recipes have an intermediate store, then the recipe
-                // with the later store should be processed earlier. So it
-                // should go to the beginning of the list.
-                return DT->dominates(IS2, IS1);
-              });
-
-  if (HasIntermediateStore && ReductionPHIList.size() > 1)
-    for (VPRecipeBase *R : ReductionPHIList)
-      R->moveBefore(*Header, Header->getFirstNonPhi());
-
   for (VPRecipeBase &R : Header->phis()) {
     auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
     if (!PhiR || !PhiR->isInLoop() || (MinVF.isScalar() && !PhiR->isOrdered()))
       continue;
 
     const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
+    StoreInst *IntermediateStore = RdxDesc.IntermediateStore;
     RecurKind Kind = RdxDesc.getRecurrenceKind();
     assert(!RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) &&
            "AnyOf reductions are not allowed for in-loop reductions");
@@ -9183,9 +9157,13 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
       for (VPUser *U : Cur->users()) {
         auto *UserRecipe = cast<VPSingleDefRecipe>(U);
         if (!UserRecipe->getParent()->getEnclosingLoopRegion()) {
-          assert(match(U, m_Binary<VPInstruction::ExtractFromEnd>(
-                              m_VPValue(), m_VPValue())) &&
-                 "U must be an ExtractFromEnd VPInstruction");
+          assert((match(U, m_Binary<VPInstruction::ExtractFromEnd>(
+                               m_VPValue(), m_VPValue())) ||
+                  (isa<VPReplicateRecipe>(U) &&
+                   cast<VPReplicateRecipe>(U)->getUnderlyingValue() ==
+                       IntermediateStore)) &&
+                 "U must be either an ExtractFromEnd VPInstruction or a "
+                 "uniform store sourced from the intermediate store.");
           continue;
         }
         Worklist.insert(UserRecipe);
@@ -9300,6 +9278,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
       continue;
 
     const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
+    StoreInst *IntermediateStore = RdxDesc.IntermediateStore;
     // Adjust AnyOf reductions; replace the reduction phi for the selected value
     // with a boolean reduction phi node to check if the condition is true in
     // any iteration. The final value is selected by the final
@@ -9402,11 +9381,14 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
     auto *FinalReductionResult = new VPInstruction(
         VPInstruction::ComputeReductionResult, {PhiR, NewExitingVPV}, ExitDL);
     FinalReductionResult->insertBefore(*MiddleVPBB, IP);
-    OrigExitingVPV->replaceUsesWithIf(FinalReductionResult, [](VPUser &User,
-                                                               unsigned) {
-      return match(&User, m_Binary<VPInstruction::ExtractFromEnd>(m_VPValue(),
-                                                                  m_VPValue()));
-    });
+    OrigExitingVPV->replaceUsesWithIf(
+        FinalReductionResult, [IntermediateStore](VPUser &User, unsigned) {
+          return match(&User, m_Binary<VPInstruction::ExtractFromEnd>(
+                                  m_VPValue(), m_VPValue())) ||
+                 (isa<VPReplicateRecipe>(&User) &&
+                  cast<VPReplicateRecipe>(&User)->getUnderlyingValue() ==
+                      IntermediateStore);
+        });
   }
 
   VPlanTransforms::clearReductionWrapFlags(*Plan);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 8f4b2951839118..3ac41a567f3c20 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -614,14 +614,6 @@ Value *VPInstruction::generate(VPTransformState &State) {
                              : Builder.CreateZExt(ReducedPartRdx, PhiTy);
     }
 
-    // If there were stores of the reduction value to a uniform memory address
-    // inside the loop, create the final store here.
-    if (StoreInst *SI = RdxDesc.IntermediateStore) {
-      auto *NewSI = Builder.CreateAlignedStore(
-          ReducedPartRdx, SI->getPointerOperand(), SI->getAlign());
-      propagateMetadata(NewSI, SI);
-    }
-
     return ReducedPartRdx;
   }
   case VPInstruction::ExtractFromEnd: {
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll b/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll
index 8cf4e77a0d4990..0d9918b74a2ff2 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll
@@ -596,10 +596,10 @@ exit:                                             ; preds = %for.body
 define void @reduc_add_mul_store_same_ptr(ptr %dst, ptr readonly %src) {
 ; CHECK-LABEL: define void @reduc_add_mul_store_same_ptr
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP3:%.*]])
-; CHECK-NEXT:    store i32 [[TMP4]], ptr %dst, align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1:%.*]])
-; CHECK-NEXT:    store i32 [[TMP2]], ptr %dst, align 4
+; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP3:%.*]])
+; CHECK-NEXT:    [[TMP7:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP4:%.*]])
+; CHECK-NEXT:    store i32 [[TMP6]], ptr %dst, align 4
+; CHECK-NEXT:    store i32 [[TMP7]], ptr %dst, align 4
 ;
 entry:
   br label %for.body
@@ -625,10 +625,10 @@ exit:
 define void @reduc_mul_add_store_same_ptr(ptr %dst, ptr readonly %src) {
 ; CHECK-LABEL: define void @reduc_mul_add_store_same_ptr
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP3:%.*]])
-; CHECK-NEXT:    store i32 [[TMP4]], ptr %dst, align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP1:%.*]])
-; CHECK-NEXT:    store i32 [[TMP2]], ptr %dst, align 4
+; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4:%.*]])
+; CHECK-NEXT:    [[TMP7:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP3:%.*]])
+; CHECK-NEXT:    store i32 [[TMP7]], ptr %dst, align 4
+; CHECK-NEXT:    store i32 [[TMP6]], ptr %dst, align 4
 ;
 entry:
   br label %for.body
@@ -655,10 +655,10 @@ exit:
 define void @reduc_add_mul_store_different_ptr(ptr %dst1, ptr %dst2, ptr readonly %src) {
 ; CHECK-LABEL: define void @reduc_add_mul_store_different_ptr
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP3:%.*]])
-; CHECK-NEXT:    store i32 [[TMP4]], ptr %dst2, align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1:%.*]])
-; CHECK-NEXT:    store i32 [[TMP2]], ptr %dst1, align 4
+; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP3:%.*]])
+; CHECK-NEXT:    [[TMP7:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP4:%.*]])
+; CHECK-NEXT:    store i32 [[TMP6]], ptr %dst1, align 4
+; CHECK-NEXT:    store i32 [[TMP7]], ptr %dst2, align 4
 ;
 entry:
   br label %for.body
@@ -684,10 +684,10 @@ exit:
 define void @reduc_mul_add_store_different_ptr(ptr %dst1, ptr %dst2, ptr readonly %src) {
 ; CHECK-LABEL: define void @reduc_mul_add_store_different_ptr
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP3:%.*]])
-; CHECK-NEXT:    store i32 [[TMP4]], ptr %dst2, align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP1:%.*]])
-; CHECK-NEXT:    store i32 [[TMP2]], ptr %dst1, align 4
+; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4:%.*]])
+; CHECK-NEXT:    [[TMP7:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP3:%.*]])
+; CHECK-NEXT:    store i32 [[TMP7]], ptr %dst1, align 4
+; CHECK-NEXT:    store i32 [[TMP6]], ptr %dst2, align 4
 ;
 entry:
   br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
index 26974c2307065a..2a55f6d7568265 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
@@ -212,6 +212,7 @@ define void @print_reduction_with_invariant_store(i64 %n, ptr noalias %y, ptr no
 ; CHECK-EMPTY:
 ; CHECK-NEXT: middle.block:
 ; CHECK-NEXT:   EMIT vp<[[RED_RES:.+]]> = compute-reduction-result ir<%red>, ir<%red.next>
+; CHECK-NEXT:   CLONE store vp<[[RED_RES]]>, ir<%dst>
 ; CHECK-NEXT:   EMIT vp<[[CMP:%.+]]> = icmp eq ir<%n>, vp<[[VTC]]>
 ; CHECK-NEXT:   EMIT branch-on-cond vp<[[CMP]]>
 ; CHECK-NEXT: Successor(s): ir-bb<for.end>, scalar.ph

>From 807579f8927d8b51f948dd40bdf5d9d8774e9835 Mon Sep 17 00:00:00 2001
From: Mel Chen <mel.chen at sifive.com>
Date: Tue, 3 Sep 2024 23:03:25 -0700
Subject: [PATCH 2/7] Move intermediate store into assertion section.

---
 llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 9df17d51beca29..908151992233e7 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9144,7 +9144,6 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
       continue;
 
     const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
-    StoreInst *IntermediateStore = RdxDesc.IntermediateStore;
     RecurKind Kind = RdxDesc.getRecurrenceKind();
     assert(!RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) &&
            "AnyOf reductions are not allowed for in-loop reductions");
@@ -9161,7 +9160,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
                                m_VPValue(), m_VPValue())) ||
                   (isa<VPReplicateRecipe>(U) &&
                    cast<VPReplicateRecipe>(U)->getUnderlyingValue() ==
-                       IntermediateStore)) &&
+                       RdxDesc.IntermediateStore)) &&
                  "U must be either an ExtractFromEnd VPInstruction or a "
                  "uniform store sourced from the intermediate store.");
           continue;

>From 3ecb19ada73e17c67b1531115299a3d75203a330 Mon Sep 17 00:00:00 2001
From: Mel Chen <mel.chen at sifive.com>
Date: Wed, 4 Sep 2024 02:10:55 -0700
Subject: [PATCH 3/7] Add assertion for scalarizeInstruction

---
 llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 908151992233e7..c6441533210f4e 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2380,6 +2380,14 @@ void InnerLoopVectorizer::scalarizeInstruction(const Instruction *Instr,
   // End if-block.
   VPRegionBlock *Parent = RepRecipe->getParent()->getParent();
   bool IfPredicateInstr = Parent ? Parent->isReplicator() : false;
+  assert((Parent ||
+          (RepRecipe->getOpcode() == Instruction::Store &&
+           isa<VPInstruction>(RepRecipe->getOperand(0)) &&
+           cast<VPInstruction>(RepRecipe->getOperand(0))->isVectorToScalar() &&
+           RepRecipe->getOperand(1)->isDefinedOutsideLoopRegions())) &&
+         "Expected a recipe either in a region, or a uniform store with a "
+         "vector-to-scalar stored value and an address defined outside "
+         "vectorized region.");
   if (IfPredicateInstr)
     PredicatedInstructions.push_back(Cloned);
 }

>From fec4ac2313595affa02afacbfe14d86943415464 Mon Sep 17 00:00:00 2001
From: Mel Chen <mel.chen at sifive.com>
Date: Wed, 4 Sep 2024 02:40:02 -0700
Subject: [PATCH 4/7] Check the address is defined outside vector region.

---
 llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index c6441533210f4e..4f6333b40eac28 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9394,7 +9394,10 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
                                   m_VPValue(), m_VPValue())) ||
                  (isa<VPReplicateRecipe>(&User) &&
                   cast<VPReplicateRecipe>(&User)->getUnderlyingValue() ==
-                      IntermediateStore);
+                      IntermediateStore &&
+                  cast<VPReplicateRecipe>(&User)
+                      ->getOperand(1)
+                      ->isDefinedOutsideVectorRegions());
         });
   }
 

>From 08211074828b2600a60adbaf16c8cc98f25e9e97 Mon Sep 17 00:00:00 2001
From: Mel Chen <mel.chen at sifive.com>
Date: Mon, 23 Sep 2024 20:57:39 +0800
Subject: [PATCH 5/7] Update llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Refine the update lambda of ComputeReductionResult.

Co-authored-by: Florian Hahn <flo at fhahn.com>
---
 llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 4f6333b40eac28..1720330fed0889 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9388,17 +9388,15 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
     auto *FinalReductionResult = new VPInstruction(
         VPInstruction::ComputeReductionResult, {PhiR, NewExitingVPV}, ExitDL);
     FinalReductionResult->insertBefore(*MiddleVPBB, IP);
+     auto *FinalReductionResult = new VPInstruction( 
+        VPInstruction::ComputeReductionResult, {PhiR, NewExitingVPV}, ExitDL);
+    // Update all users outside the vector region.
     OrigExitingVPV->replaceUsesWithIf(
-        FinalReductionResult, [IntermediateStore](VPUser &User, unsigned) {
-          return match(&User, m_Binary<VPInstruction::ExtractFromEnd>(
-                                  m_VPValue(), m_VPValue())) ||
-                 (isa<VPReplicateRecipe>(&User) &&
-                  cast<VPReplicateRecipe>(&User)->getUnderlyingValue() ==
-                      IntermediateStore &&
-                  cast<VPReplicateRecipe>(&User)
-                      ->getOperand(1)
-                      ->isDefinedOutsideVectorRegions());
+        FinalReductionResult, [](VPUser &User, unsigned) {
+        auto *Parent = cast<VPRecipeBase>(&User)->getParent();
+          return Parent && !Parent->getParent();
         });
+    FinalReductionResult->insertBefore(*MiddleVPBB, IP);
   }
 
   VPlanTransforms::clearReductionWrapFlags(*Plan);

>From 155bf09baf9d6d915d0dfac44b5613f07682bad0 Mon Sep 17 00:00:00 2001
From: Mel Chen <mel.chen at sifive.com>
Date: Mon, 23 Sep 2024 06:26:11 -0700
Subject: [PATCH 6/7] Fix the suggestion code

---
 llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 1720330fed0889..1de055a8d298a2 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9285,7 +9285,6 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
       continue;
 
     const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
-    StoreInst *IntermediateStore = RdxDesc.IntermediateStore;
     // Adjust AnyOf reductions; replace the reduction phi for the selected value
     // with a boolean reduction phi node to check if the condition is true in
     // any iteration. The final value is selected by the final
@@ -9387,13 +9386,10 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
     // also modeled in VPlan.
     auto *FinalReductionResult = new VPInstruction(
         VPInstruction::ComputeReductionResult, {PhiR, NewExitingVPV}, ExitDL);
-    FinalReductionResult->insertBefore(*MiddleVPBB, IP);
-     auto *FinalReductionResult = new VPInstruction( 
-        VPInstruction::ComputeReductionResult, {PhiR, NewExitingVPV}, ExitDL);
     // Update all users outside the vector region.
     OrigExitingVPV->replaceUsesWithIf(
         FinalReductionResult, [](VPUser &User, unsigned) {
-        auto *Parent = cast<VPRecipeBase>(&User)->getParent();
+          auto *Parent = cast<VPRecipeBase>(&User)->getParent();
           return Parent && !Parent->getParent();
         });
     FinalReductionResult->insertBefore(*MiddleVPBB, IP);

>From c3b41e21e1fbff16d2f644091e37c3edc609dbcc Mon Sep 17 00:00:00 2001
From: Mel Chen <mel.chen at sifive.com>
Date: Mon, 23 Sep 2024 06:54:08 -0700
Subject: [PATCH 7/7] Rebase and update

---
 llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 1de055a8d298a2..5674cb62d871f5 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2380,14 +2380,12 @@ void InnerLoopVectorizer::scalarizeInstruction(const Instruction *Instr,
   // End if-block.
   VPRegionBlock *Parent = RepRecipe->getParent()->getParent();
   bool IfPredicateInstr = Parent ? Parent->isReplicator() : false;
-  assert((Parent ||
-          (RepRecipe->getOpcode() == Instruction::Store &&
-           isa<VPInstruction>(RepRecipe->getOperand(0)) &&
-           cast<VPInstruction>(RepRecipe->getOperand(0))->isVectorToScalar() &&
-           RepRecipe->getOperand(1)->isDefinedOutsideLoopRegions())) &&
-         "Expected a recipe either in a region, or a uniform store with a "
-         "vector-to-scalar stored value and an address defined outside "
-         "vectorized region.");
+  assert((Parent || all_of(RepRecipe->operands(),
+                           [](VPValue *Op) {
+                             return Op->isDefinedOutsideLoopRegions();
+                           })) &&
+         "Expected a recipe is either within a region or all of its operands "
+         "are defined outside the vectorized region.");
   if (IfPredicateInstr)
     PredicatedInstructions.push_back(Cloned);
 }



More information about the llvm-commits mailing list