[llvm] [LV] Reuse VPReplicateRecipe to handle scalar stores in exit block. (PR #106342)
Mel Chen via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 26 01:08:29 PDT 2024
https://github.com/Mel-Chen updated https://github.com/llvm/llvm-project/pull/106342
>From 95e91d69ddf1f5c41abb48a00ff3cf365495f209 Mon Sep 17 00:00:00 2001
From: Mel Chen <mel.chen at sifive.com>
Date: Tue, 27 Aug 2024 00:11:05 -0700
Subject: [PATCH 1/9] Init implement
---
.../Transforms/Vectorize/LoopVectorize.cpp | 78 +++++++------------
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 8 --
.../reduction-with-invariant-store.ll | 32 ++++----
.../LoopVectorize/vplan-printing.ll | 1 +
4 files changed, 47 insertions(+), 72 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index bd493fb2c1ba19..43b645d6fd0312 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8922,6 +8922,9 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
bool NeedsBlends = BB != HeaderBB && !BB->phis().empty();
return Legal->blockNeedsPredication(BB) || NeedsBlends;
});
+ auto *MiddleVPBB =
+ cast<VPBasicBlock>(Plan->getVectorLoopRegion()->getSingleSuccessor());
+ VPBasicBlock::iterator MBIP = MiddleVPBB->getFirstNonPhi();
for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO())) {
// Relevant instructions from basic block BB will be grouped into VPRecipe
// ingredients and fill a new VPBasicBlock.
@@ -8952,8 +8955,17 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
// with the final reduction value will be added to the exit block
StoreInst *SI;
if ((SI = dyn_cast<StoreInst>(&I)) &&
- Legal->isInvariantAddressOfReduction(SI->getPointerOperand()))
+ Legal->isInvariantAddressOfReduction(SI->getPointerOperand())) {
+ // Only create recipe for the last intermediate store of the reduction.
+ if (!Legal->isInvariantStoreOfReduction(SI))
+ continue;
+ auto *Recipe = new VPReplicateRecipe(
+ SI, RecipeBuilder.mapToVPValues(Instr->operands()),
+ true /* IsUniform */);
+ RecipeBuilder.setRecipe(SI, Recipe);
+ Recipe->insertBefore(*MiddleVPBB, MBIP);
continue;
+ }
VPRecipeBase *Recipe =
RecipeBuilder.tryToCreateWidenRecipe(Instr, Operands, Range, VPBB);
@@ -9122,51 +9134,13 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
using namespace VPlanPatternMatch;
VPRegionBlock *VectorLoopRegion = Plan->getVectorLoopRegion();
VPBasicBlock *Header = VectorLoopRegion->getEntryBasicBlock();
- // Gather all VPReductionPHIRecipe and sort them so that Intermediate stores
- // sank outside of the loop would keep the same order as they had in the
- // original loop.
- SmallVector<VPReductionPHIRecipe *> ReductionPHIList;
- for (VPRecipeBase &R : Header->phis()) {
- if (auto *ReductionPhi = dyn_cast<VPReductionPHIRecipe>(&R))
- ReductionPHIList.emplace_back(ReductionPhi);
- }
- bool HasIntermediateStore = false;
- stable_sort(ReductionPHIList,
- [this, &HasIntermediateStore](const VPReductionPHIRecipe *R1,
- const VPReductionPHIRecipe *R2) {
- auto *IS1 = R1->getRecurrenceDescriptor().IntermediateStore;
- auto *IS2 = R2->getRecurrenceDescriptor().IntermediateStore;
- HasIntermediateStore |= IS1 || IS2;
-
- // If neither of the recipes has an intermediate store, keep the
- // order the same.
- if (!IS1 && !IS2)
- return false;
-
- // If only one of the recipes has an intermediate store, then
- // move it towards the beginning of the list.
- if (IS1 && !IS2)
- return true;
-
- if (!IS1 && IS2)
- return false;
-
- // If both recipes have an intermediate store, then the recipe
- // with the later store should be processed earlier. So it
- // should go to the beginning of the list.
- return DT->dominates(IS2, IS1);
- });
-
- if (HasIntermediateStore && ReductionPHIList.size() > 1)
- for (VPRecipeBase *R : ReductionPHIList)
- R->moveBefore(*Header, Header->getFirstNonPhi());
-
for (VPRecipeBase &R : Header->phis()) {
auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
if (!PhiR || !PhiR->isInLoop() || (MinVF.isScalar() && !PhiR->isOrdered()))
continue;
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
+ StoreInst *IntermediateStore = RdxDesc.IntermediateStore;
RecurKind Kind = RdxDesc.getRecurrenceKind();
assert(!RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) &&
"AnyOf reductions are not allowed for in-loop reductions");
@@ -9179,9 +9153,13 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
for (VPUser *U : Cur->users()) {
auto *UserRecipe = cast<VPSingleDefRecipe>(U);
if (!UserRecipe->getParent()->getEnclosingLoopRegion()) {
- assert(match(U, m_Binary<VPInstruction::ExtractFromEnd>(
- m_VPValue(), m_VPValue())) &&
- "U must be an ExtractFromEnd VPInstruction");
+ assert((match(U, m_Binary<VPInstruction::ExtractFromEnd>(
+ m_VPValue(), m_VPValue())) ||
+ (isa<VPReplicateRecipe>(U) &&
+ cast<VPReplicateRecipe>(U)->getUnderlyingValue() ==
+ IntermediateStore)) &&
+ "U must be either an ExtractFromEnd VPInstruction or a "
+ "uniform store sourced from the intermediate store.");
continue;
}
Worklist.insert(UserRecipe);
@@ -9296,6 +9274,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
continue;
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
+ StoreInst *IntermediateStore = RdxDesc.IntermediateStore;
// If tail is folded by masking, introduce selects between the phi
// and the live-out instruction of each reduction, at the beginning of the
// dedicated latch block.
@@ -9363,11 +9342,14 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
auto *FinalReductionResult = new VPInstruction(
VPInstruction::ComputeReductionResult, {PhiR, NewExitingVPV}, ExitDL);
FinalReductionResult->insertBefore(*MiddleVPBB, IP);
- OrigExitingVPV->replaceUsesWithIf(FinalReductionResult, [](VPUser &User,
- unsigned) {
- return match(&User, m_Binary<VPInstruction::ExtractFromEnd>(m_VPValue(),
- m_VPValue()));
- });
+ OrigExitingVPV->replaceUsesWithIf(
+ FinalReductionResult, [IntermediateStore](VPUser &User, unsigned) {
+ return match(&User, m_Binary<VPInstruction::ExtractFromEnd>(
+ m_VPValue(), m_VPValue())) ||
+ (isa<VPReplicateRecipe>(&User) &&
+ cast<VPReplicateRecipe>(&User)->getUnderlyingValue() ==
+ IntermediateStore);
+ });
// Adjust AnyOf reductions; replace the reduction phi for the selected value
// with a boolean reduction phi node to check if the condition is true in
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 9a0aefb898e582..a7d0df52dc9409 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -606,14 +606,6 @@ Value *VPInstruction::generate(VPTransformState &State) {
: Builder.CreateZExt(ReducedPartRdx, PhiTy);
}
- // If there were stores of the reduction value to a uniform memory address
- // inside the loop, create the final store here.
- if (StoreInst *SI = RdxDesc.IntermediateStore) {
- auto *NewSI = Builder.CreateAlignedStore(
- ReducedPartRdx, SI->getPointerOperand(), SI->getAlign());
- propagateMetadata(NewSI, SI);
- }
-
return ReducedPartRdx;
}
case VPInstruction::ExtractFromEnd: {
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll b/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll
index 8cf4e77a0d4990..0d9918b74a2ff2 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll
@@ -596,10 +596,10 @@ exit: ; preds = %for.body
define void @reduc_add_mul_store_same_ptr(ptr %dst, ptr readonly %src) {
; CHECK-LABEL: define void @reduc_add_mul_store_same_ptr
; CHECK: middle.block:
-; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP3:%.*]])
-; CHECK-NEXT: store i32 [[TMP4]], ptr %dst, align 4
-; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1:%.*]])
-; CHECK-NEXT: store i32 [[TMP2]], ptr %dst, align 4
+; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP3:%.*]])
+; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP4:%.*]])
+; CHECK-NEXT: store i32 [[TMP6]], ptr %dst, align 4
+; CHECK-NEXT: store i32 [[TMP7]], ptr %dst, align 4
;
entry:
br label %for.body
@@ -625,10 +625,10 @@ exit:
define void @reduc_mul_add_store_same_ptr(ptr %dst, ptr readonly %src) {
; CHECK-LABEL: define void @reduc_mul_add_store_same_ptr
; CHECK: middle.block:
-; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP3:%.*]])
-; CHECK-NEXT: store i32 [[TMP4]], ptr %dst, align 4
-; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP1:%.*]])
-; CHECK-NEXT: store i32 [[TMP2]], ptr %dst, align 4
+; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4:%.*]])
+; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP3:%.*]])
+; CHECK-NEXT: store i32 [[TMP7]], ptr %dst, align 4
+; CHECK-NEXT: store i32 [[TMP6]], ptr %dst, align 4
;
entry:
br label %for.body
@@ -655,10 +655,10 @@ exit:
define void @reduc_add_mul_store_different_ptr(ptr %dst1, ptr %dst2, ptr readonly %src) {
; CHECK-LABEL: define void @reduc_add_mul_store_different_ptr
; CHECK: middle.block:
-; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP3:%.*]])
-; CHECK-NEXT: store i32 [[TMP4]], ptr %dst2, align 4
-; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1:%.*]])
-; CHECK-NEXT: store i32 [[TMP2]], ptr %dst1, align 4
+; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP3:%.*]])
+; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP4:%.*]])
+; CHECK-NEXT: store i32 [[TMP6]], ptr %dst1, align 4
+; CHECK-NEXT: store i32 [[TMP7]], ptr %dst2, align 4
;
entry:
br label %for.body
@@ -684,10 +684,10 @@ exit:
define void @reduc_mul_add_store_different_ptr(ptr %dst1, ptr %dst2, ptr readonly %src) {
; CHECK-LABEL: define void @reduc_mul_add_store_different_ptr
; CHECK: middle.block:
-; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP3:%.*]])
-; CHECK-NEXT: store i32 [[TMP4]], ptr %dst2, align 4
-; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP1:%.*]])
-; CHECK-NEXT: store i32 [[TMP2]], ptr %dst1, align 4
+; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4:%.*]])
+; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> [[TMP3:%.*]])
+; CHECK-NEXT: store i32 [[TMP7]], ptr %dst1, align 4
+; CHECK-NEXT: store i32 [[TMP6]], ptr %dst2, align 4
;
entry:
br label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
index 26974c2307065a..2a55f6d7568265 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
@@ -212,6 +212,7 @@ define void @print_reduction_with_invariant_store(i64 %n, ptr noalias %y, ptr no
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
; CHECK-NEXT: EMIT vp<[[RED_RES:.+]]> = compute-reduction-result ir<%red>, ir<%red.next>
+; CHECK-NEXT: CLONE store vp<[[RED_RES]]>, ir<%dst>
; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<%n>, vp<[[VTC]]>
; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]>
; CHECK-NEXT: Successor(s): ir-bb<for.end>, scalar.ph
>From 90cd7e3dc84bb669861674d67952bd3794f5e99a Mon Sep 17 00:00:00 2001
From: Mel Chen <mel.chen at sifive.com>
Date: Tue, 3 Sep 2024 23:03:25 -0700
Subject: [PATCH 2/9] Move intermediate store into assertion section.
---
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 43b645d6fd0312..d415a7d7bbbd7b 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9140,7 +9140,6 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
continue;
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
- StoreInst *IntermediateStore = RdxDesc.IntermediateStore;
RecurKind Kind = RdxDesc.getRecurrenceKind();
assert(!RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) &&
"AnyOf reductions are not allowed for in-loop reductions");
@@ -9157,7 +9156,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
m_VPValue(), m_VPValue())) ||
(isa<VPReplicateRecipe>(U) &&
cast<VPReplicateRecipe>(U)->getUnderlyingValue() ==
- IntermediateStore)) &&
+ RdxDesc.IntermediateStore)) &&
"U must be either an ExtractFromEnd VPInstruction or a "
"uniform store sourced from the intermediate store.");
continue;
>From 2cdcab5106a3397cac8d4f97858a23f086f40824 Mon Sep 17 00:00:00 2001
From: Mel Chen <mel.chen at sifive.com>
Date: Wed, 4 Sep 2024 02:10:55 -0700
Subject: [PATCH 3/9] Add assertion for scalarizeInstruction
---
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index d415a7d7bbbd7b..2690b447c9dc4b 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2373,6 +2373,14 @@ void InnerLoopVectorizer::scalarizeInstruction(const Instruction *Instr,
// End if-block.
VPRegionBlock *Parent = RepRecipe->getParent()->getParent();
bool IfPredicateInstr = Parent ? Parent->isReplicator() : false;
+ assert((Parent ||
+ (RepRecipe->getOpcode() == Instruction::Store &&
+ isa<VPInstruction>(RepRecipe->getOperand(0)) &&
+ cast<VPInstruction>(RepRecipe->getOperand(0))->isVectorToScalar() &&
+ RepRecipe->getOperand(1)->isDefinedOutsideLoopRegions())) &&
+ "Expected a recipe either in a region, or a uniform store with a "
+ "vector-to-scalar stored value and an address defined outside "
+ "vectorized region.");
if (IfPredicateInstr)
PredicatedInstructions.push_back(Cloned);
}
>From 0906238ac7778910a65d3041373378d106754eaa Mon Sep 17 00:00:00 2001
From: Mel Chen <mel.chen at sifive.com>
Date: Wed, 4 Sep 2024 02:40:02 -0700
Subject: [PATCH 4/9] Check the address is defined outside vector region.
---
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 2690b447c9dc4b..1d61bc30d0e1d8 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9355,7 +9355,10 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
m_VPValue(), m_VPValue())) ||
(isa<VPReplicateRecipe>(&User) &&
cast<VPReplicateRecipe>(&User)->getUnderlyingValue() ==
- IntermediateStore);
+ IntermediateStore &&
+ cast<VPReplicateRecipe>(&User)
+ ->getOperand(1)
+ ->isDefinedOutsideVectorRegions());
});
// Adjust AnyOf reductions; replace the reduction phi for the selected value
>From ce3c2cab6a96746443289737f03311eee006b77d Mon Sep 17 00:00:00 2001
From: Mel Chen <mel.chen at sifive.com>
Date: Mon, 23 Sep 2024 20:57:39 +0800
Subject: [PATCH 5/9] Update llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Refine the update lambda of ComputeReductionResult.
Co-authored-by: Florian Hahn <flo at fhahn.com>
---
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 16 +++++++---------
1 file changed, 7 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 1d61bc30d0e1d8..d61228c8d1864e 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9349,17 +9349,15 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
auto *FinalReductionResult = new VPInstruction(
VPInstruction::ComputeReductionResult, {PhiR, NewExitingVPV}, ExitDL);
FinalReductionResult->insertBefore(*MiddleVPBB, IP);
+ auto *FinalReductionResult = new VPInstruction(
+ VPInstruction::ComputeReductionResult, {PhiR, NewExitingVPV}, ExitDL);
+ // Update all users outside the vector region.
OrigExitingVPV->replaceUsesWithIf(
- FinalReductionResult, [IntermediateStore](VPUser &User, unsigned) {
- return match(&User, m_Binary<VPInstruction::ExtractFromEnd>(
- m_VPValue(), m_VPValue())) ||
- (isa<VPReplicateRecipe>(&User) &&
- cast<VPReplicateRecipe>(&User)->getUnderlyingValue() ==
- IntermediateStore &&
- cast<VPReplicateRecipe>(&User)
- ->getOperand(1)
- ->isDefinedOutsideVectorRegions());
+ FinalReductionResult, [](VPUser &User, unsigned) {
+ auto *Parent = cast<VPRecipeBase>(&User)->getParent();
+ return Parent && !Parent->getParent();
});
+ FinalReductionResult->insertBefore(*MiddleVPBB, IP);
// Adjust AnyOf reductions; replace the reduction phi for the selected value
// with a boolean reduction phi node to check if the condition is true in
>From f4bbf952b6605c1484bde1b69f86c12dba199c08 Mon Sep 17 00:00:00 2001
From: Mel Chen <mel.chen at sifive.com>
Date: Mon, 23 Sep 2024 06:26:11 -0700
Subject: [PATCH 6/9] Fix the suggestion code
---
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 6 +-----
1 file changed, 1 insertion(+), 5 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index d61228c8d1864e..5a8d971bd1d162 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9281,7 +9281,6 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
continue;
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
- StoreInst *IntermediateStore = RdxDesc.IntermediateStore;
// If tail is folded by masking, introduce selects between the phi
// and the live-out instruction of each reduction, at the beginning of the
// dedicated latch block.
@@ -9348,13 +9347,10 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
// also modeled in VPlan.
auto *FinalReductionResult = new VPInstruction(
VPInstruction::ComputeReductionResult, {PhiR, NewExitingVPV}, ExitDL);
- FinalReductionResult->insertBefore(*MiddleVPBB, IP);
- auto *FinalReductionResult = new VPInstruction(
- VPInstruction::ComputeReductionResult, {PhiR, NewExitingVPV}, ExitDL);
// Update all users outside the vector region.
OrigExitingVPV->replaceUsesWithIf(
FinalReductionResult, [](VPUser &User, unsigned) {
- auto *Parent = cast<VPRecipeBase>(&User)->getParent();
+ auto *Parent = cast<VPRecipeBase>(&User)->getParent();
return Parent && !Parent->getParent();
});
FinalReductionResult->insertBefore(*MiddleVPBB, IP);
>From 1fa2c6a5169e458dac65e3074ff3fd9ef82e56a5 Mon Sep 17 00:00:00 2001
From: Mel Chen <mel.chen at sifive.com>
Date: Mon, 23 Sep 2024 06:54:08 -0700
Subject: [PATCH 7/9] Rebase and update
---
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 14 ++++++--------
1 file changed, 6 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 5a8d971bd1d162..09e029ea205c52 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2373,14 +2373,12 @@ void InnerLoopVectorizer::scalarizeInstruction(const Instruction *Instr,
// End if-block.
VPRegionBlock *Parent = RepRecipe->getParent()->getParent();
bool IfPredicateInstr = Parent ? Parent->isReplicator() : false;
- assert((Parent ||
- (RepRecipe->getOpcode() == Instruction::Store &&
- isa<VPInstruction>(RepRecipe->getOperand(0)) &&
- cast<VPInstruction>(RepRecipe->getOperand(0))->isVectorToScalar() &&
- RepRecipe->getOperand(1)->isDefinedOutsideLoopRegions())) &&
- "Expected a recipe either in a region, or a uniform store with a "
- "vector-to-scalar stored value and an address defined outside "
- "vectorized region.");
+ assert((Parent || all_of(RepRecipe->operands(),
+ [](VPValue *Op) {
+ return Op->isDefinedOutsideLoopRegions();
+ })) &&
+ "Expected a recipe is either within a region or all of its operands "
+ "are defined outside the vectorized region.");
if (IfPredicateInstr)
PredicatedInstructions.push_back(Cloned);
}
>From 76d4dde1b0a9a7a6c0184180ad02f77db23c7447 Mon Sep 17 00:00:00 2001
From: Mel Chen <mel.chen at sifive.com>
Date: Thu, 26 Sep 2024 00:09:55 -0700
Subject: [PATCH 8/9] Updated comments
---
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 09e029ea205c52..dc2135a1bcfe70 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8957,12 +8957,13 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
Operands = {OpRange.begin(), OpRange.end()};
}
- // Invariant stores inside loop will be deleted and a single store
- // with the final reduction value will be added to the exit block
+ // The stores with invariant address inside the loop will be deleted, and
+ // in the exit block, a uniform store recipe will be created for the final
+ // invariant store of the reduction.
StoreInst *SI;
if ((SI = dyn_cast<StoreInst>(&I)) &&
Legal->isInvariantAddressOfReduction(SI->getPointerOperand())) {
- // Only create recipe for the last intermediate store of the reduction.
+ // Only create recipe for the final invariant store of the reduction.
if (!Legal->isInvariantStoreOfReduction(SI))
continue;
auto *Recipe = new VPReplicateRecipe(
>From a3a6614c608013bea8c235c710139583fa9e8e8e Mon Sep 17 00:00:00 2001
From: Mel Chen <mel.chen at sifive.com>
Date: Thu, 26 Sep 2024 01:07:30 -0700
Subject: [PATCH 9/9] Update assertion
---
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 13 ++++---------
1 file changed, 4 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index dc2135a1bcfe70..08e21a9fd6729f 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9141,6 +9141,8 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
using namespace VPlanPatternMatch;
VPRegionBlock *VectorLoopRegion = Plan->getVectorLoopRegion();
VPBasicBlock *Header = VectorLoopRegion->getEntryBasicBlock();
+ VPBasicBlock *MiddleVPBB =
+ cast<VPBasicBlock>(VectorLoopRegion->getSingleSuccessor());
for (VPRecipeBase &R : Header->phis()) {
auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
if (!PhiR || !PhiR->isInLoop() || (MinVF.isScalar() && !PhiR->isOrdered()))
@@ -9159,13 +9161,8 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
for (VPUser *U : Cur->users()) {
auto *UserRecipe = cast<VPSingleDefRecipe>(U);
if (!UserRecipe->getParent()->getEnclosingLoopRegion()) {
- assert((match(U, m_Binary<VPInstruction::ExtractFromEnd>(
- m_VPValue(), m_VPValue())) ||
- (isa<VPReplicateRecipe>(U) &&
- cast<VPReplicateRecipe>(U)->getUnderlyingValue() ==
- RdxDesc.IntermediateStore)) &&
- "U must be either an ExtractFromEnd VPInstruction or a "
- "uniform store sourced from the intermediate store.");
+ assert(UserRecipe->getParent() == MiddleVPBB &&
+ "U must be either in the loop region or the middle block.");
continue;
}
Worklist.insert(UserRecipe);
@@ -9270,8 +9267,6 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
}
VPBasicBlock *LatchVPBB = VectorLoopRegion->getExitingBasicBlock();
Builder.setInsertPoint(&*LatchVPBB->begin());
- VPBasicBlock *MiddleVPBB =
- cast<VPBasicBlock>(VectorLoopRegion->getSingleSuccessor());
VPBasicBlock::iterator IP = MiddleVPBB->getFirstNonPhi();
for (VPRecipeBase &R :
Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
More information about the llvm-commits
mailing list