[llvm] [VPlan] Only store RecurKind + FastMathFlags in VPReductionRecipe. NFCI (PR #131300)
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 17 10:02:57 PDT 2025
https://github.com/lukel97 updated https://github.com/llvm/llvm-project/pull/131300
>From 4ac2a490665ecad373b23076af4230d715e3943e Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Sun, 15 Dec 2024 23:04:37 -0800
Subject: [PATCH 01/12] [VPlan] Passing non-null instruction when creating
VPReductionRecipe in unittest.
---
.../Transforms/Vectorize/VPlanTest.cpp | 20 +++++++++++++++----
1 file changed, 16 insertions(+), 4 deletions(-)
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
index f9a85869e3142..da6c490d7af6a 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
@@ -1165,22 +1165,27 @@ TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) {
}
{
+ auto *Add = BinaryOperator::CreateAdd(PoisonValue::get(Int32),
+ PoisonValue::get(Int32));
VPValue *ChainOp = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1));
VPValue *VecOp = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2));
VPValue *CondOp = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 3));
- VPReductionRecipe Recipe(RecurrenceDescriptor(), nullptr, ChainOp, CondOp,
+ VPReductionRecipe Recipe(RecurrenceDescriptor(), Add, ChainOp, CondOp,
VecOp, false);
EXPECT_FALSE(Recipe.mayHaveSideEffects());
EXPECT_FALSE(Recipe.mayReadFromMemory());
EXPECT_FALSE(Recipe.mayWriteToMemory());
EXPECT_FALSE(Recipe.mayReadOrWriteMemory());
+ delete Add;
}
{
+ auto *Add = BinaryOperator::CreateAdd(PoisonValue::get(Int32),
+ PoisonValue::get(Int32));
VPValue *ChainOp = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 1));
VPValue *VecOp = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 2));
VPValue *CondOp = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 3));
- VPReductionRecipe Recipe(RecurrenceDescriptor(), nullptr, ChainOp, CondOp,
+ VPReductionRecipe Recipe(RecurrenceDescriptor(), Add, ChainOp, CondOp,
VecOp, false);
VPValue *EVL = Plan.getOrAddLiveIn(ConstantInt::get(Int32, 4));
VPReductionEVLRecipe EVLRecipe(Recipe, *EVL, CondOp);
@@ -1188,6 +1193,7 @@ TEST_F(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) {
EXPECT_FALSE(EVLRecipe.mayReadFromMemory());
EXPECT_FALSE(EVLRecipe.mayWriteToMemory());
EXPECT_FALSE(EVLRecipe.mayReadOrWriteMemory());
+ delete Add;
}
{
@@ -1529,28 +1535,34 @@ TEST_F(VPRecipeTest, dumpRecipeUnnamedVPValuesNotInPlanOrBlock) {
TEST_F(VPRecipeTest, CastVPReductionRecipeToVPUser) {
IntegerType *Int32 = IntegerType::get(C, 32);
+ auto *Add = BinaryOperator::CreateAdd(PoisonValue::get(Int32),
+ PoisonValue::get(Int32));
VPValue *ChainOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 1));
VPValue *VecOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 2));
VPValue *CondOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 3));
- VPReductionRecipe Recipe(RecurrenceDescriptor(), nullptr, ChainOp, CondOp,
+ VPReductionRecipe Recipe(RecurrenceDescriptor(), Add, ChainOp, CondOp,
VecOp, false);
EXPECT_TRUE(isa<VPUser>(&Recipe));
VPRecipeBase *BaseR = &Recipe;
EXPECT_TRUE(isa<VPUser>(BaseR));
+ delete Add;
}
TEST_F(VPRecipeTest, CastVPReductionEVLRecipeToVPUser) {
IntegerType *Int32 = IntegerType::get(C, 32);
+ auto *Add = BinaryOperator::CreateAdd(PoisonValue::get(Int32),
+ PoisonValue::get(Int32));
VPValue *ChainOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 1));
VPValue *VecOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 2));
VPValue *CondOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 3));
- VPReductionRecipe Recipe(RecurrenceDescriptor(), nullptr, ChainOp, CondOp,
+ VPReductionRecipe Recipe(RecurrenceDescriptor(), Add, ChainOp, CondOp,
VecOp, false);
VPValue *EVL = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 0));
VPReductionEVLRecipe EVLRecipe(Recipe, *EVL, CondOp);
EXPECT_TRUE(isa<VPUser>(&EVLRecipe));
VPRecipeBase *BaseR = &EVLRecipe;
EXPECT_TRUE(isa<VPUser>(BaseR));
+ delete Add;
}
} // namespace
>From e3df7442d4cbd7000cc7bab95ff60cfe68bf95e5 Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Tue, 11 Mar 2025 19:27:51 -0700
Subject: [PATCH 02/12] [VPlan] Change parent of VPReductionRecipe to
VPRecipeWithIRFlags. NFC
This patch change the parent of the VPReductionRecipe from
VPSingleDefRecipe to VPRecipeWithIRFlags and also print/get/control
flags by the VPRecipeWithIRFlags. This will remove the dependency of the
underlying instruction.
This patch also add a new function `setFastMathFlags()` to the
VPRecipeWithIRFlags because the entire reduction chain may contains
multiple instructions. And the underlying instruction may not contains
the corresponding flags for this reduction.
---
.../Transforms/Vectorize/LoopVectorize.cpp | 6 ++---
llvm/lib/Transforms/Vectorize/VPlan.h | 27 +++++++++++++------
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 24 ++++++++++-------
3 files changed, 36 insertions(+), 21 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 0e38ccc565ea2..507f7d55534a0 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9797,9 +9797,9 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
if (CM.blockNeedsPredicationForAnyReason(BB))
CondOp = RecipeBuilder.getBlockInMask(BB);
- auto *RedRecipe = new VPReductionRecipe(
- RdxDesc, CurrentLinkI, PreviousLink, VecOp, CondOp,
- CM.useOrderedReductions(RdxDesc), CurrentLinkI->getDebugLoc());
+ auto *RedRecipe =
+ new VPReductionRecipe(RdxDesc, CurrentLinkI, PreviousLink, VecOp,
+ CondOp, CM.useOrderedReductions(RdxDesc));
// Append the recipe to the end of the VPBasicBlock because we need to
// ensure that it comes after all of it's inputs, including CondOp.
// Delete CurrentLink as it will be invalid if its operand is replaced
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index f2ca33c581433..424e90e4ba505 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -711,6 +711,8 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
R->getVPDefID() == VPRecipeBase::VPWidenIntrinsicSC ||
+ R->getVPDefID() == VPRecipeBase::VPReductionSC ||
+ R->getVPDefID() == VPRecipeBase::VPReductionEVLSC ||
R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
R->getVPDefID() == VPRecipeBase::VPReverseVectorPointerSC ||
R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
@@ -786,6 +788,12 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
}
}
+ /// Set fast-math flags for this recipe.
+ void setFastMathFlags(FastMathFlags FMFs) {
+ OpType = OperationType::FPMathOp;
+ this->FMFs = FMFs;
+ }
+
CmpInst::Predicate getPredicate() const {
assert(OpType == OperationType::Cmp &&
"recipe doesn't have a compare predicate");
@@ -2236,7 +2244,7 @@ class VPInterleaveRecipe : public VPRecipeBase {
/// A recipe to represent inloop reduction operations, performing a reduction on
/// a vector operand into a scalar value, and adding the result to a chain.
/// The Operands are {ChainOp, VecOp, [Condition]}.
-class VPReductionRecipe : public VPSingleDefRecipe {
+class VPReductionRecipe : public VPRecipeWithIRFlags {
/// The recurrence decriptor for the reduction in question.
const RecurrenceDescriptor &RdxDesc;
bool IsOrdered;
@@ -2246,29 +2254,32 @@ class VPReductionRecipe : public VPSingleDefRecipe {
protected:
VPReductionRecipe(const unsigned char SC, const RecurrenceDescriptor &R,
Instruction *I, ArrayRef<VPValue *> Operands,
- VPValue *CondOp, bool IsOrdered, DebugLoc DL)
- : VPSingleDefRecipe(SC, Operands, I, DL), RdxDesc(R),
+ VPValue *CondOp, bool IsOrdered)
+ : VPRecipeWithIRFlags(SC, Operands, *I), RdxDesc(R),
IsOrdered(IsOrdered) {
if (CondOp) {
IsConditional = true;
addOperand(CondOp);
}
+ // The inloop reduction may across multiple scalar instruction and the
+ // underlying instruction may not contains the corresponding flags. Set the
+ // flags explicit from the redurrence descriptor.
+ setFastMathFlags(R.getFastMathFlags());
}
public:
VPReductionRecipe(const RecurrenceDescriptor &R, Instruction *I,
VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
- bool IsOrdered, DebugLoc DL = {})
+ bool IsOrdered)
: VPReductionRecipe(VPDef::VPReductionSC, R, I,
ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
- IsOrdered, DL) {}
+ IsOrdered) {}
~VPReductionRecipe() override = default;
VPReductionRecipe *clone() override {
return new VPReductionRecipe(RdxDesc, getUnderlyingInstr(), getChainOp(),
- getVecOp(), getCondOp(), IsOrdered,
- getDebugLoc());
+ getVecOp(), getCondOp(), IsOrdered);
}
static inline bool classof(const VPRecipeBase *R) {
@@ -2323,7 +2334,7 @@ class VPReductionEVLRecipe : public VPReductionRecipe {
VPDef::VPReductionEVLSC, R.getRecurrenceDescriptor(),
cast_or_null<Instruction>(R.getUnderlyingValue()),
ArrayRef<VPValue *>({R.getChainOp(), R.getVecOp(), &EVL}), CondOp,
- R.isOrdered(), R.getDebugLoc()) {}
+ R.isOrdered()) {}
~VPReductionEVLRecipe() override = default;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 1b6894376f73b..fe2ef7395e4c1 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -2290,7 +2290,8 @@ void VPReductionRecipe::execute(VPTransformState &State) {
"In-loop AnyOf reductions aren't currently supported");
// Propagate the fast-math flags carried by the underlying instruction.
IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
- State.Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
+ if (hasFastMathFlags())
+ State.Builder.setFastMathFlags(getFastMathFlags());
State.setDebugLocFrom(getDebugLoc());
Value *NewVecOp = State.get(getVecOp());
if (VPValue *Cond = getCondOp()) {
@@ -2337,7 +2338,8 @@ void VPReductionEVLRecipe::execute(VPTransformState &State) {
// Propagate the fast-math flags carried by the underlying instruction.
IRBuilderBase::FastMathFlagGuard FMFGuard(Builder);
const RecurrenceDescriptor &RdxDesc = getRecurrenceDescriptor();
- Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
+ if (hasFastMathFlags())
+ Builder.setFastMathFlags(getFastMathFlags());
RecurKind Kind = RdxDesc.getRecurrenceKind();
Value *Prev = State.get(getChainOp(), /*IsScalar*/ true);
@@ -2374,6 +2376,8 @@ InstructionCost VPReductionRecipe::computeCost(ElementCount VF,
Type *ElementTy = Ctx.Types.inferScalarType(this);
auto *VectorTy = cast<VectorType>(toVectorTy(ElementTy, VF));
unsigned Opcode = RdxDesc.getOpcode();
+ FastMathFlags FMFs =
+ hasFastMathFlags() ? getFastMathFlags() : FastMathFlags();
// TODO: Support any-of and in-loop reductions.
assert(
@@ -2393,12 +2397,12 @@ InstructionCost VPReductionRecipe::computeCost(ElementCount VF,
Ctx.TTI.getArithmeticInstrCost(Opcode, ElementTy, Ctx.CostKind);
if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RdxKind)) {
Intrinsic::ID Id = getMinMaxReductionIntrinsicOp(RdxKind);
- return Cost + Ctx.TTI.getMinMaxReductionCost(
- Id, VectorTy, RdxDesc.getFastMathFlags(), Ctx.CostKind);
+ return Cost +
+ Ctx.TTI.getMinMaxReductionCost(Id, VectorTy, FMFs, Ctx.CostKind);
}
- return Cost + Ctx.TTI.getArithmeticReductionCost(
- Opcode, VectorTy, RdxDesc.getFastMathFlags(), Ctx.CostKind);
+ return Cost + Ctx.TTI.getArithmeticReductionCost(Opcode, VectorTy, FMFs,
+ Ctx.CostKind);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -2409,8 +2413,8 @@ void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent,
O << " = ";
getChainOp()->printAsOperand(O, SlotTracker);
O << " +";
- if (isa<FPMathOperator>(getUnderlyingInstr()))
- O << getUnderlyingInstr()->getFastMathFlags();
+ if (isa_and_nonnull<FPMathOperator>(getUnderlyingValue()))
+ printFlags(O);
O << " reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";
getVecOp()->printAsOperand(O, SlotTracker);
if (isConditional()) {
@@ -2431,8 +2435,8 @@ void VPReductionEVLRecipe::print(raw_ostream &O, const Twine &Indent,
O << " = ";
getChainOp()->printAsOperand(O, SlotTracker);
O << " +";
- if (isa<FPMathOperator>(getUnderlyingInstr()))
- O << getUnderlyingInstr()->getFastMathFlags();
+ if (isa_and_nonnull<FPMathOperator>(getUnderlyingValue()))
+ printFlags(O);
O << " vp.reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";
getVecOp()->printAsOperand(O, SlotTracker);
O << ", ";
>From b547b9816fef5b7c7b49a67e9a9a7513208586da Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Tue, 11 Mar 2025 20:00:34 -0700
Subject: [PATCH 03/12] !fixup, formating.
---
llvm/unittests/Transforms/Vectorize/VPlanTest.cpp | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
index da6c490d7af6a..ca1e48290f25b 100644
--- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
+++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp
@@ -1540,8 +1540,8 @@ TEST_F(VPRecipeTest, CastVPReductionRecipeToVPUser) {
VPValue *ChainOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 1));
VPValue *VecOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 2));
VPValue *CondOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 3));
- VPReductionRecipe Recipe(RecurrenceDescriptor(), Add, ChainOp, CondOp,
- VecOp, false);
+ VPReductionRecipe Recipe(RecurrenceDescriptor(), Add, ChainOp, CondOp, VecOp,
+ false);
EXPECT_TRUE(isa<VPUser>(&Recipe));
VPRecipeBase *BaseR = &Recipe;
EXPECT_TRUE(isa<VPUser>(BaseR));
@@ -1555,8 +1555,8 @@ TEST_F(VPRecipeTest, CastVPReductionEVLRecipeToVPUser) {
VPValue *ChainOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 1));
VPValue *VecOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 2));
VPValue *CondOp = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 3));
- VPReductionRecipe Recipe(RecurrenceDescriptor(), Add, ChainOp, CondOp,
- VecOp, false);
+ VPReductionRecipe Recipe(RecurrenceDescriptor(), Add, ChainOp, CondOp, VecOp,
+ false);
VPValue *EVL = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 0));
VPReductionEVLRecipe EVLRecipe(Recipe, *EVL, CondOp);
EXPECT_TRUE(isa<VPUser>(&EVLRecipe));
>From a7120a1c478de67e01f7f33d79a63854e1a12eab Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Fri, 14 Mar 2025 05:54:29 -0700
Subject: [PATCH 04/12] !fixup, independent from #130880.
---
llvm/test/Transforms/LoopVectorize/vplan-printing.ll | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
index 207cb8b4a0d30..4493257e8aef0 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
@@ -167,7 +167,7 @@ define float @print_reduction(i64 %n, ptr noalias %y) {
; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%y>, vp<[[STEPS]]>
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%arrayidx>
; CHECK-NEXT: WIDEN ir<%lv> = load vp<[[VEC_PTR]]>
-; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + fast reduce.fadd (ir<%lv>)
+; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + reassoc nnan ninf nsz arcp contract afn reduce.fadd (ir<%lv>)
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]>
; CHECK-NEXT: No successors
@@ -234,7 +234,7 @@ define void @print_reduction_with_invariant_store(i64 %n, ptr noalias %y, ptr no
; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%y>, vp<[[IV]]>
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%arrayidx>
; CHECK-NEXT: WIDEN ir<%lv> = load vp<[[VEC_PTR]]>
-; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + fast reduce.fadd (ir<%lv>) (with final reduction value stored in invariant address sank outside of loop)
+; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + reassoc nnan ninf nsz arcp contract afn reduce.fadd (ir<%lv>) (with final reduction value stored in invariant address sank outside of loop)
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]>
; CHECK-NEXT: No successors
>From 4efc26bf5828b0ce0be73268c8f004834fefa058 Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Fri, 14 Mar 2025 06:25:35 -0700
Subject: [PATCH 05/12] !fixup, Address comments.
---
llvm/lib/Transforms/Vectorize/VPlan.h | 5 +++--
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 6 ++----
.../LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll | 2 +-
3 files changed, 6 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 424e90e4ba505..ebbc1b7acd36d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -790,7 +790,7 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
/// Set fast-math flags for this recipe.
void setFastMathFlags(FastMathFlags FMFs) {
- OpType = OperationType::FPMathOp;
+ assert(OpType == OperationType::FPMathOp);
this->FMFs = FMFs;
}
@@ -2264,7 +2264,8 @@ class VPReductionRecipe : public VPRecipeWithIRFlags {
// The inloop reduction may across multiple scalar instruction and the
// underlying instruction may not contains the corresponding flags. Set the
// flags explicit from the redurrence descriptor.
- setFastMathFlags(R.getFastMathFlags());
+ if (isa<FPMathOperator>(I))
+ setFastMathFlags(R.getFastMathFlags());
}
public:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index fe2ef7395e4c1..ee30f024da0c0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -2413,8 +2413,7 @@ void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent,
O << " = ";
getChainOp()->printAsOperand(O, SlotTracker);
O << " +";
- if (isa_and_nonnull<FPMathOperator>(getUnderlyingValue()))
- printFlags(O);
+ printFlags(O);
O << " reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";
getVecOp()->printAsOperand(O, SlotTracker);
if (isConditional()) {
@@ -2435,8 +2434,7 @@ void VPReductionEVLRecipe::print(raw_ostream &O, const Twine &Indent,
O << " = ";
getChainOp()->printAsOperand(O, SlotTracker);
O << " +";
- if (isa_and_nonnull<FPMathOperator>(getUnderlyingValue()))
- printFlags(O);
+ printFlags(O);
O << " vp.reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";
getVecOp()->printAsOperand(O, SlotTracker);
O << ", ";
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll
index 3594b3f047363..f1ca934cc133a 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll
@@ -101,7 +101,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
; IF-EVL-INLOOP-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-INLOOP-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
; IF-EVL-INLOOP-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
-; IF-EVL-INLOOP-NEXT: REDUCE ir<[[ADD:%.+]]> = ir<[[RDX_PHI]]> + vp.reduce.add (ir<[[LD1]]>, vp<[[EVL]]>)
+; IF-EVL-INLOOP-NEXT: REDUCE ir<[[ADD:%.+]]> = ir<[[RDX_PHI]]> + nsw vp.reduce.add (ir<[[LD1]]>, vp<[[EVL]]>)
; IF-EVL-INLOOP-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-INLOOP-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-INLOOP-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
>From 9fcb50545fe685ea39dd0366f0ca53eddcc5f308 Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Fri, 14 Mar 2025 07:11:00 -0700
Subject: [PATCH 06/12] !fixup, Update comments.
---
llvm/lib/Transforms/Vectorize/VPlan.h | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index ebbc1b7acd36d..36574a28607da 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -2261,9 +2261,10 @@ class VPReductionRecipe : public VPRecipeWithIRFlags {
IsConditional = true;
addOperand(CondOp);
}
- // The inloop reduction may across multiple scalar instruction and the
- // underlying instruction may not contains the corresponding flags. Set the
- // flags explicit from the redurrence descriptor.
+
+ // In-loop reductions may comprise of multiple scalar instructions, and the
+ // underlying instruction may not contain the same flags as the
+ // recurrence descriptor, so set the flags explicitly.
if (isa<FPMathOperator>(I))
setFastMathFlags(R.getFastMathFlags());
}
>From 281514aee414126ed161847fe64a45a31dd9d180 Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Fri, 14 Mar 2025 08:41:57 -0700
Subject: [PATCH 07/12] !fixup, Add description of the assertion.
---
llvm/lib/Transforms/Vectorize/VPlan.h | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 36574a28607da..9ac6e3e003f2e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -790,7 +790,8 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
/// Set fast-math flags for this recipe.
void setFastMathFlags(FastMathFlags FMFs) {
- assert(OpType == OperationType::FPMathOp);
+ assert(OpType == OperationType::FPMathOp &&
+ "We should only set the FastMathFlags when the recipes is FPFathOP");
this->FMFs = FMFs;
}
>From 44e1e6384da6c41710914f162a179007cbda6a05 Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Fri, 14 Mar 2025 17:23:57 -0700
Subject: [PATCH 08/12] !fixup, Set FMF for VPReductionRecipe in ctor directly.
---
.../Transforms/Vectorize/LoopVectorize.cpp | 6 ++--
llvm/lib/Transforms/Vectorize/VPlan.h | 36 ++++++++-----------
.../RISCV/vplan-vp-intrinsics-reduction.ll | 4 +--
3 files changed, 19 insertions(+), 27 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 507f7d55534a0..0e38ccc565ea2 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9797,9 +9797,9 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
if (CM.blockNeedsPredicationForAnyReason(BB))
CondOp = RecipeBuilder.getBlockInMask(BB);
- auto *RedRecipe =
- new VPReductionRecipe(RdxDesc, CurrentLinkI, PreviousLink, VecOp,
- CondOp, CM.useOrderedReductions(RdxDesc));
+ auto *RedRecipe = new VPReductionRecipe(
+ RdxDesc, CurrentLinkI, PreviousLink, VecOp, CondOp,
+ CM.useOrderedReductions(RdxDesc), CurrentLinkI->getDebugLoc());
// Append the recipe to the end of the VPBasicBlock because we need to
// ensure that it comes after all of it's inputs, including CondOp.
// Delete CurrentLink as it will be invalid if its operand is replaced
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 9ac6e3e003f2e..bd27646032ac8 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -788,13 +788,6 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
}
}
- /// Set fast-math flags for this recipe.
- void setFastMathFlags(FastMathFlags FMFs) {
- assert(OpType == OperationType::FPMathOp &&
- "We should only set the FastMathFlags when the recipes is FPFathOP");
- this->FMFs = FMFs;
- }
-
CmpInst::Predicate getPredicate() const {
assert(OpType == OperationType::Cmp &&
"recipe doesn't have a compare predicate");
@@ -2254,35 +2247,32 @@ class VPReductionRecipe : public VPRecipeWithIRFlags {
protected:
VPReductionRecipe(const unsigned char SC, const RecurrenceDescriptor &R,
- Instruction *I, ArrayRef<VPValue *> Operands,
- VPValue *CondOp, bool IsOrdered)
- : VPRecipeWithIRFlags(SC, Operands, *I), RdxDesc(R),
+ FastMathFlags FMF, Instruction *I,
+ ArrayRef<VPValue *> Operands, VPValue *CondOp,
+ bool IsOrdered, DebugLoc DL)
+ : VPRecipeWithIRFlags(SC, Operands, FMF, DL), RdxDesc(R),
IsOrdered(IsOrdered) {
if (CondOp) {
IsConditional = true;
addOperand(CondOp);
}
-
- // In-loop reductions may comprise of multiple scalar instructions, and the
- // underlying instruction may not contain the same flags as the
- // recurrence descriptor, so set the flags explicitly.
- if (isa<FPMathOperator>(I))
- setFastMathFlags(R.getFastMathFlags());
+ setUnderlyingValue(I);
}
public:
VPReductionRecipe(const RecurrenceDescriptor &R, Instruction *I,
VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
- bool IsOrdered)
- : VPReductionRecipe(VPDef::VPReductionSC, R, I,
+ bool IsOrdered, DebugLoc DL = {})
+ : VPReductionRecipe(VPDef::VPReductionSC, R, R.getFastMathFlags(), I,
ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
- IsOrdered) {}
+ IsOrdered, DL) {}
~VPReductionRecipe() override = default;
VPReductionRecipe *clone() override {
return new VPReductionRecipe(RdxDesc, getUnderlyingInstr(), getChainOp(),
- getVecOp(), getCondOp(), IsOrdered);
+ getVecOp(), getCondOp(), IsOrdered,
+ getDebugLoc());
}
static inline bool classof(const VPRecipeBase *R) {
@@ -2332,12 +2322,14 @@ class VPReductionRecipe : public VPRecipeWithIRFlags {
/// The Operands are {ChainOp, VecOp, EVL, [Condition]}.
class VPReductionEVLRecipe : public VPReductionRecipe {
public:
- VPReductionEVLRecipe(VPReductionRecipe &R, VPValue &EVL, VPValue *CondOp)
+ VPReductionEVLRecipe(VPReductionRecipe &R, VPValue &EVL, VPValue *CondOp,
+ DebugLoc DL = {})
: VPReductionRecipe(
VPDef::VPReductionEVLSC, R.getRecurrenceDescriptor(),
+ R.getFastMathFlags(),
cast_or_null<Instruction>(R.getUnderlyingValue()),
ArrayRef<VPValue *>({R.getChainOp(), R.getVecOp(), &EVL}), CondOp,
- R.isOrdered()) {}
+ R.isOrdered(), DL) {}
~VPReductionEVLRecipe() override = default;
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll
index f1ca934cc133a..2be823a8ef9c5 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll
@@ -101,7 +101,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
; IF-EVL-INLOOP-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-INLOOP-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
; IF-EVL-INLOOP-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
-; IF-EVL-INLOOP-NEXT: REDUCE ir<[[ADD:%.+]]> = ir<[[RDX_PHI]]> + nsw vp.reduce.add (ir<[[LD1]]>, vp<[[EVL]]>)
+; IF-EVL-INLOOP-NEXT: REDUCE ir<[[ADD:%.+]]> = ir<[[RDX_PHI]]> + reassoc nsz arcp contract afn vp.reduce.add (ir<[[LD1]]>, vp<[[EVL]]>)
; IF-EVL-INLOOP-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-INLOOP-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-INLOOP-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
@@ -196,7 +196,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
; NO-VP-INLOOP-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; NO-VP-INLOOP-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
; NO-VP-INLOOP-NEXT: WIDEN ir<[[LD1:%.+]]> = load vp<[[PTR1]]>
-; NO-VP-INLOOP-NEXT: REDUCE ir<[[ADD:%.+]]> = ir<[[RDX_PHI]]> + reduce.add (ir<[[LD1]]>)
+; NO-VP-INLOOP-NEXT: REDUCE ir<[[ADD:%.+]]> = ir<[[RDX_PHI]]> + reassoc nsz arcp contract afn reduce.add (ir<[[LD1]]>)
; NO-VP-INLOOP-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add nuw vp<[[IV]]>, vp<[[VFUF]]>
; NO-VP-INLOOP-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
; NO-VP-INLOOP-NEXT: No successors
>From 90e3b5c1dc1d051e8ae8a296e332afd066539180 Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Sun, 16 Mar 2025 17:05:11 -0700
Subject: [PATCH 09/12] !fixup, Remove unneeded checks and check
undelryingInstr is FPMO or not.
---
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 15 +++++++--------
.../RISCV/vplan-vp-intrinsics-reduction.ll | 4 ++--
2 files changed, 9 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index ee30f024da0c0..47ef6de3be856 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -2290,8 +2290,7 @@ void VPReductionRecipe::execute(VPTransformState &State) {
"In-loop AnyOf reductions aren't currently supported");
// Propagate the fast-math flags carried by the underlying instruction.
IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
- if (hasFastMathFlags())
- State.Builder.setFastMathFlags(getFastMathFlags());
+ State.Builder.setFastMathFlags(getFastMathFlags());
State.setDebugLocFrom(getDebugLoc());
Value *NewVecOp = State.get(getVecOp());
if (VPValue *Cond = getCondOp()) {
@@ -2338,8 +2337,7 @@ void VPReductionEVLRecipe::execute(VPTransformState &State) {
// Propagate the fast-math flags carried by the underlying instruction.
IRBuilderBase::FastMathFlagGuard FMFGuard(Builder);
const RecurrenceDescriptor &RdxDesc = getRecurrenceDescriptor();
- if (hasFastMathFlags())
- Builder.setFastMathFlags(getFastMathFlags());
+ Builder.setFastMathFlags(getFastMathFlags());
RecurKind Kind = RdxDesc.getRecurrenceKind();
Value *Prev = State.get(getChainOp(), /*IsScalar*/ true);
@@ -2376,8 +2374,7 @@ InstructionCost VPReductionRecipe::computeCost(ElementCount VF,
Type *ElementTy = Ctx.Types.inferScalarType(this);
auto *VectorTy = cast<VectorType>(toVectorTy(ElementTy, VF));
unsigned Opcode = RdxDesc.getOpcode();
- FastMathFlags FMFs =
- hasFastMathFlags() ? getFastMathFlags() : FastMathFlags();
+ FastMathFlags FMFs = getFastMathFlags();
// TODO: Support any-of and in-loop reductions.
assert(
@@ -2413,7 +2410,8 @@ void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent,
O << " = ";
getChainOp()->printAsOperand(O, SlotTracker);
O << " +";
- printFlags(O);
+ if (isa<FPMathOperator>(getUnderlyingInstr()))
+ printFlags(O);
O << " reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";
getVecOp()->printAsOperand(O, SlotTracker);
if (isConditional()) {
@@ -2434,7 +2432,8 @@ void VPReductionEVLRecipe::print(raw_ostream &O, const Twine &Indent,
O << " = ";
getChainOp()->printAsOperand(O, SlotTracker);
O << " +";
- printFlags(O);
+ if (isa<FPMathOperator>(getUnderlyingInstr()))
+ printFlags(O);
O << " vp.reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";
getVecOp()->printAsOperand(O, SlotTracker);
O << ", ";
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll
index 2be823a8ef9c5..3594b3f047363 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics-reduction.ll
@@ -101,7 +101,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
; IF-EVL-INLOOP-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-INLOOP-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
; IF-EVL-INLOOP-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
-; IF-EVL-INLOOP-NEXT: REDUCE ir<[[ADD:%.+]]> = ir<[[RDX_PHI]]> + reassoc nsz arcp contract afn vp.reduce.add (ir<[[LD1]]>, vp<[[EVL]]>)
+; IF-EVL-INLOOP-NEXT: REDUCE ir<[[ADD:%.+]]> = ir<[[RDX_PHI]]> + vp.reduce.add (ir<[[LD1]]>, vp<[[EVL]]>)
; IF-EVL-INLOOP-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-INLOOP-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-INLOOP-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
@@ -196,7 +196,7 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
; NO-VP-INLOOP-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; NO-VP-INLOOP-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
; NO-VP-INLOOP-NEXT: WIDEN ir<[[LD1:%.+]]> = load vp<[[PTR1]]>
-; NO-VP-INLOOP-NEXT: REDUCE ir<[[ADD:%.+]]> = ir<[[RDX_PHI]]> + reassoc nsz arcp contract afn reduce.add (ir<[[LD1]]>)
+; NO-VP-INLOOP-NEXT: REDUCE ir<[[ADD:%.+]]> = ir<[[RDX_PHI]]> + reduce.add (ir<[[LD1]]>)
; NO-VP-INLOOP-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add nuw vp<[[IV]]>, vp<[[VFUF]]>
; NO-VP-INLOOP-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
; NO-VP-INLOOP-NEXT: No successors
>From dbaec21f618e4d773e7e7c9a81af3706cdd870b1 Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Sun, 16 Mar 2025 21:34:18 -0700
Subject: [PATCH 10/12] !fixup, Remove FMF from recipes after add checks of
underlying instruction.
---
llvm/lib/Transforms/Vectorize/VPlan.h | 10 ++++------
1 file changed, 4 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index bd27646032ac8..b28c1aad9abb2 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -2247,10 +2247,9 @@ class VPReductionRecipe : public VPRecipeWithIRFlags {
protected:
VPReductionRecipe(const unsigned char SC, const RecurrenceDescriptor &R,
- FastMathFlags FMF, Instruction *I,
- ArrayRef<VPValue *> Operands, VPValue *CondOp,
- bool IsOrdered, DebugLoc DL)
- : VPRecipeWithIRFlags(SC, Operands, FMF, DL), RdxDesc(R),
+ Instruction *I, ArrayRef<VPValue *> Operands,
+ VPValue *CondOp, bool IsOrdered, DebugLoc DL)
+ : VPRecipeWithIRFlags(SC, Operands, R.getFastMathFlags(), DL), RdxDesc(R),
IsOrdered(IsOrdered) {
if (CondOp) {
IsConditional = true;
@@ -2263,7 +2262,7 @@ class VPReductionRecipe : public VPRecipeWithIRFlags {
VPReductionRecipe(const RecurrenceDescriptor &R, Instruction *I,
VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
bool IsOrdered, DebugLoc DL = {})
- : VPReductionRecipe(VPDef::VPReductionSC, R, R.getFastMathFlags(), I,
+ : VPReductionRecipe(VPDef::VPReductionSC, R, I,
ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
IsOrdered, DL) {}
@@ -2326,7 +2325,6 @@ class VPReductionEVLRecipe : public VPReductionRecipe {
DebugLoc DL = {})
: VPReductionRecipe(
VPDef::VPReductionEVLSC, R.getRecurrenceDescriptor(),
- R.getFastMathFlags(),
cast_or_null<Instruction>(R.getUnderlyingValue()),
ArrayRef<VPValue *>({R.getChainOp(), R.getVecOp(), &EVL}), CondOp,
R.isOrdered(), DL) {}
>From 8bfa38a1584d09c1efb78891963ee01c14ecea1f Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Mon, 17 Mar 2025 05:14:28 -0700
Subject: [PATCH 11/12] !fixup, Only set FMF for FMOperators and update after
merge main.
---
llvm/lib/Transforms/Vectorize/VPlan.h | 8 ++++++--
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 6 ++----
llvm/test/Transforms/LoopVectorize/vplan-printing.ll | 4 ++--
3 files changed, 10 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index b28c1aad9abb2..ba24143e0b5b6 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -2249,8 +2249,12 @@ class VPReductionRecipe : public VPRecipeWithIRFlags {
VPReductionRecipe(const unsigned char SC, const RecurrenceDescriptor &R,
Instruction *I, ArrayRef<VPValue *> Operands,
VPValue *CondOp, bool IsOrdered, DebugLoc DL)
- : VPRecipeWithIRFlags(SC, Operands, R.getFastMathFlags(), DL), RdxDesc(R),
- IsOrdered(IsOrdered) {
+ : VPRecipeWithIRFlags(SC, Operands,
+ isa_and_nonnull<FPMathOperator>(I)
+ ? R.getFastMathFlags()
+ : FastMathFlags(),
+ DL),
+ RdxDesc(R), IsOrdered(IsOrdered) {
if (CondOp) {
IsConditional = true;
addOperand(CondOp);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 47ef6de3be856..d315dbe9b4170 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -2410,8 +2410,7 @@ void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent,
O << " = ";
getChainOp()->printAsOperand(O, SlotTracker);
O << " +";
- if (isa<FPMathOperator>(getUnderlyingInstr()))
- printFlags(O);
+ printFlags(O);
O << " reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";
getVecOp()->printAsOperand(O, SlotTracker);
if (isConditional()) {
@@ -2432,8 +2431,7 @@ void VPReductionEVLRecipe::print(raw_ostream &O, const Twine &Indent,
O << " = ";
getChainOp()->printAsOperand(O, SlotTracker);
O << " +";
- if (isa<FPMathOperator>(getUnderlyingInstr()))
- printFlags(O);
+ printFlags(O);
O << " vp.reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";
getVecOp()->printAsOperand(O, SlotTracker);
O << ", ";
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
index 4493257e8aef0..207cb8b4a0d30 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
@@ -167,7 +167,7 @@ define float @print_reduction(i64 %n, ptr noalias %y) {
; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%y>, vp<[[STEPS]]>
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%arrayidx>
; CHECK-NEXT: WIDEN ir<%lv> = load vp<[[VEC_PTR]]>
-; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + reassoc nnan ninf nsz arcp contract afn reduce.fadd (ir<%lv>)
+; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + fast reduce.fadd (ir<%lv>)
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]>
; CHECK-NEXT: No successors
@@ -234,7 +234,7 @@ define void @print_reduction_with_invariant_store(i64 %n, ptr noalias %y, ptr no
; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%y>, vp<[[IV]]>
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%arrayidx>
; CHECK-NEXT: WIDEN ir<%lv> = load vp<[[VEC_PTR]]>
-; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + reassoc nnan ninf nsz arcp contract afn reduce.fadd (ir<%lv>) (with final reduction value stored in invariant address sank outside of loop)
+; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + fast reduce.fadd (ir<%lv>) (with final reduction value stored in invariant address sank outside of loop)
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]>
; CHECK-NEXT: No successors
>From c14193addb5e34b898da59ab2460b2a5122ce328 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Fri, 14 Mar 2025 18:08:29 +0800
Subject: [PATCH 12/12] [VPlan] Only store RecurKind + FastMathFlags in
VPReductionRecipe. NFCI
VPReductionRecipes take a RecurrenceDescriptor, but only use the RecurKind and FastMathFlags in it when executing. This patch makes the recipe more lightweight by stripping it to only take the latter, which allows it inherit from VPRecipeWithIRFlags.
This also allows us to remove createReduction in LoopUtils since it now only has one user in VPInstruction::ComputeReductionResult.
The motiviation for this is to simplify an upcoming patch to support in-loop AnyOf reductions. For an in-loop AnyOf reduction we want to create an Or reduction, and by using RecurKind we can create an arbitrary reduction without needing a full RecurrenceDescriptor.
---
.../include/llvm/Transforms/Utils/LoopUtils.h | 12 ++--
llvm/lib/Transforms/Utils/LoopUtils.cpp | 17 ++----
llvm/lib/Transforms/Vectorize/VPlan.h | 48 +++++++++-------
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 55 +++++++------------
.../LoopVectorize/vplan-printing.ll | 2 +-
5 files changed, 60 insertions(+), 74 deletions(-)
diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
index 1818ee03d2ec8..3ad7b8f17856c 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -411,8 +411,8 @@ Value *createSimpleReduction(IRBuilderBase &B, Value *Src,
RecurKind RdxKind);
/// Overloaded function to generate vector-predication intrinsics for
/// reduction.
-Value *createSimpleReduction(VectorBuilder &VB, Value *Src,
- const RecurrenceDescriptor &Desc);
+Value *createSimpleReduction(VectorBuilder &VB, Value *Src, RecurKind RdxKind,
+ FastMathFlags FMFs);
/// Create a reduction of the given vector \p Src for a reduction of the
/// kind RecurKind::IAnyOf or RecurKind::FAnyOf. The reduction operation is
@@ -428,14 +428,12 @@ Value *createFindLastIVReduction(IRBuilderBase &B, Value *Src,
const RecurrenceDescriptor &Desc);
/// Create an ordered reduction intrinsic using the given recurrence
-/// descriptor \p Desc.
-Value *createOrderedReduction(IRBuilderBase &B,
- const RecurrenceDescriptor &Desc, Value *Src,
+/// kind \p Kind.
+Value *createOrderedReduction(IRBuilderBase &B, RecurKind Kind, Value *Src,
Value *Start);
/// Overloaded function to generate vector-predication intrinsics for ordered
/// reduction.
-Value *createOrderedReduction(VectorBuilder &VB,
- const RecurrenceDescriptor &Desc, Value *Src,
+Value *createOrderedReduction(VectorBuilder &VB, RecurKind Kind, Value *Src,
Value *Start);
/// Get the intersection (logical and) of all of the potential IR flags
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 185af8631454a..41f43a24e19e6 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -1333,24 +1333,21 @@ Value *llvm::createSimpleReduction(IRBuilderBase &Builder, Value *Src,
}
Value *llvm::createSimpleReduction(VectorBuilder &VBuilder, Value *Src,
- const RecurrenceDescriptor &Desc) {
- RecurKind Kind = Desc.getRecurrenceKind();
+ RecurKind Kind, FastMathFlags FMFs) {
assert(!RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) &&
!RecurrenceDescriptor::isFindLastIVRecurrenceKind(Kind) &&
"AnyOf or FindLastIV reductions are not supported.");
Intrinsic::ID Id = getReductionIntrinsicID(Kind);
auto *SrcTy = cast<VectorType>(Src->getType());
Type *SrcEltTy = SrcTy->getElementType();
- Value *Iden = getRecurrenceIdentity(Kind, SrcEltTy, Desc.getFastMathFlags());
+ Value *Iden = getRecurrenceIdentity(Kind, SrcEltTy, FMFs);
Value *Ops[] = {Iden, Src};
return VBuilder.createSimpleReduction(Id, SrcTy, Ops);
}
-Value *llvm::createOrderedReduction(IRBuilderBase &B,
- const RecurrenceDescriptor &Desc,
+Value *llvm::createOrderedReduction(IRBuilderBase &B, RecurKind Kind,
Value *Src, Value *Start) {
- assert((Desc.getRecurrenceKind() == RecurKind::FAdd ||
- Desc.getRecurrenceKind() == RecurKind::FMulAdd) &&
+ assert((Kind == RecurKind::FAdd || Kind == RecurKind::FMulAdd) &&
"Unexpected reduction kind");
assert(Src->getType()->isVectorTy() && "Expected a vector type");
assert(!Start->getType()->isVectorTy() && "Expected a scalar type");
@@ -1358,11 +1355,9 @@ Value *llvm::createOrderedReduction(IRBuilderBase &B,
return B.CreateFAddReduce(Start, Src);
}
-Value *llvm::createOrderedReduction(VectorBuilder &VBuilder,
- const RecurrenceDescriptor &Desc,
+Value *llvm::createOrderedReduction(VectorBuilder &VBuilder, RecurKind Kind,
Value *Src, Value *Start) {
- assert((Desc.getRecurrenceKind() == RecurKind::FAdd ||
- Desc.getRecurrenceKind() == RecurKind::FMulAdd) &&
+ assert((Kind == RecurKind::FAdd || Kind == RecurKind::FMulAdd) &&
"Unexpected reduction kind");
assert(Src->getType()->isVectorTy() && "Expected a vector type");
assert(!Start->getType()->isVectorTy() && "Expected a scalar type");
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index ba24143e0b5b6..d0745119f3403 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -2239,22 +2239,21 @@ class VPInterleaveRecipe : public VPRecipeBase {
/// a vector operand into a scalar value, and adding the result to a chain.
/// The Operands are {ChainOp, VecOp, [Condition]}.
class VPReductionRecipe : public VPRecipeWithIRFlags {
- /// The recurrence decriptor for the reduction in question.
- const RecurrenceDescriptor &RdxDesc;
+ /// The recurrence kind for the reduction in question.
+ RecurKind RdxKind;
bool IsOrdered;
/// Whether the reduction is conditional.
bool IsConditional = false;
protected:
- VPReductionRecipe(const unsigned char SC, const RecurrenceDescriptor &R,
- Instruction *I, ArrayRef<VPValue *> Operands,
- VPValue *CondOp, bool IsOrdered, DebugLoc DL)
- : VPRecipeWithIRFlags(SC, Operands,
- isa_and_nonnull<FPMathOperator>(I)
- ? R.getFastMathFlags()
- : FastMathFlags(),
- DL),
- RdxDesc(R), IsOrdered(IsOrdered) {
+ VPReductionRecipe(const unsigned char SC, RecurKind RdxKind,
+ FastMathFlags FMFs, Instruction *I,
+ ArrayRef<VPValue *> Operands, VPValue *CondOp,
+ bool IsOrdered, DebugLoc DL)
+ : VPRecipeWithIRFlags(
+ SC, Operands,
+ isa_and_nonnull<FPMathOperator>(I) ? FMFs : FastMathFlags(), DL),
+ RdxKind(RdxKind), IsOrdered(IsOrdered) {
if (CondOp) {
IsConditional = true;
addOperand(CondOp);
@@ -2263,19 +2262,25 @@ class VPReductionRecipe : public VPRecipeWithIRFlags {
}
public:
- VPReductionRecipe(const RecurrenceDescriptor &R, Instruction *I,
+ VPReductionRecipe(RecurKind RdxKind, FastMathFlags FMFs, Instruction *I,
VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
bool IsOrdered, DebugLoc DL = {})
- : VPReductionRecipe(VPDef::VPReductionSC, R, I,
+ : VPReductionRecipe(VPRecipeBase::VPReductionSC, RdxKind, FMFs, I,
ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
IsOrdered, DL) {}
+ VPReductionRecipe(const RecurrenceDescriptor &R, Instruction *I,
+ VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
+ bool IsOrdered, DebugLoc DL = {})
+ : VPReductionRecipe(R.getRecurrenceKind(), R.getFastMathFlags(), I,
+ ChainOp, VecOp, CondOp, IsOrdered, DL) {}
+
~VPReductionRecipe() override = default;
VPReductionRecipe *clone() override {
- return new VPReductionRecipe(RdxDesc, getUnderlyingInstr(), getChainOp(),
- getVecOp(), getCondOp(), IsOrdered,
- getDebugLoc());
+ return new VPReductionRecipe(RdxKind, getFastMathFlags(),
+ getUnderlyingInstr(), getChainOp(), getVecOp(),
+ getCondOp(), IsOrdered, getDebugLoc());
}
static inline bool classof(const VPRecipeBase *R) {
@@ -2301,9 +2306,11 @@ class VPReductionRecipe : public VPRecipeWithIRFlags {
VPSlotTracker &SlotTracker) const override;
#endif
- /// Return the recurrence decriptor for the in-loop reduction.
- const RecurrenceDescriptor &getRecurrenceDescriptor() const {
- return RdxDesc;
+ /// Return the recurrence kind for the in-loop reduction.
+ RecurKind getRecurrenceKind() const { return RdxKind; }
+ /// Return the opcode for the recurrence for the in-loop reduction.
+ unsigned getOpcode() const {
+ return RecurrenceDescriptor::getOpcode(RdxKind);
}
/// Return true if the in-loop reduction is ordered.
bool isOrdered() const { return IsOrdered; };
@@ -2328,7 +2335,8 @@ class VPReductionEVLRecipe : public VPReductionRecipe {
VPReductionEVLRecipe(VPReductionRecipe &R, VPValue &EVL, VPValue *CondOp,
DebugLoc DL = {})
: VPReductionRecipe(
- VPDef::VPReductionEVLSC, R.getRecurrenceDescriptor(),
+ VPDef::VPReductionEVLSC, R.getRecurrenceKind(),
+ R.getFastMathFlags(),
cast_or_null<Instruction>(R.getUnderlyingValue()),
ArrayRef<VPValue *>({R.getChainOp(), R.getVecOp(), &EVL}), CondOp,
R.isOrdered(), DL) {}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index d315dbe9b4170..4a45a14f75c8d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -2285,7 +2285,7 @@ void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent,
void VPReductionRecipe::execute(VPTransformState &State) {
assert(!State.Lane && "Reduction being replicated.");
Value *PrevInChain = State.get(getChainOp(), /*IsScalar*/ true);
- RecurKind Kind = RdxDesc.getRecurrenceKind();
+ RecurKind Kind = getRecurrenceKind();
assert(!RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) &&
"In-loop AnyOf reductions aren't currently supported");
// Propagate the fast-math flags carried by the underlying instruction.
@@ -2298,8 +2298,7 @@ void VPReductionRecipe::execute(VPTransformState &State) {
VectorType *VecTy = dyn_cast<VectorType>(NewVecOp->getType());
Type *ElementTy = VecTy ? VecTy->getElementType() : NewVecOp->getType();
- Value *Start =
- getRecurrenceIdentity(Kind, ElementTy, RdxDesc.getFastMathFlags());
+ Value *Start = getRecurrenceIdentity(Kind, ElementTy, getFastMathFlags());
if (State.VF.isVector())
Start = State.Builder.CreateVectorSplat(VecTy->getElementCount(), Start);
@@ -2311,21 +2310,20 @@ void VPReductionRecipe::execute(VPTransformState &State) {
if (IsOrdered) {
if (State.VF.isVector())
NewRed =
- createOrderedReduction(State.Builder, RdxDesc, NewVecOp, PrevInChain);
+ createOrderedReduction(State.Builder, Kind, NewVecOp, PrevInChain);
else
- NewRed = State.Builder.CreateBinOp(
- (Instruction::BinaryOps)RdxDesc.getOpcode(), PrevInChain, NewVecOp);
+ NewRed = State.Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(),
+ PrevInChain, NewVecOp);
PrevInChain = NewRed;
NextInChain = NewRed;
} else {
PrevInChain = State.get(getChainOp(), /*IsScalar*/ true);
NewRed = createSimpleReduction(State.Builder, NewVecOp, Kind);
if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind))
- NextInChain = createMinMaxOp(State.Builder, RdxDesc.getRecurrenceKind(),
- NewRed, PrevInChain);
+ NextInChain = createMinMaxOp(State.Builder, Kind, NewRed, PrevInChain);
else
NextInChain = State.Builder.CreateBinOp(
- (Instruction::BinaryOps)RdxDesc.getOpcode(), NewRed, PrevInChain);
+ (Instruction::BinaryOps)getOpcode(), NewRed, PrevInChain);
}
State.set(this, NextInChain, /*IsScalar*/ true);
}
@@ -2336,10 +2334,9 @@ void VPReductionEVLRecipe::execute(VPTransformState &State) {
auto &Builder = State.Builder;
// Propagate the fast-math flags carried by the underlying instruction.
IRBuilderBase::FastMathFlagGuard FMFGuard(Builder);
- const RecurrenceDescriptor &RdxDesc = getRecurrenceDescriptor();
Builder.setFastMathFlags(getFastMathFlags());
- RecurKind Kind = RdxDesc.getRecurrenceKind();
+ RecurKind Kind = getRecurrenceKind();
Value *Prev = State.get(getChainOp(), /*IsScalar*/ true);
Value *VecOp = State.get(getVecOp());
Value *EVL = State.get(getEVL(), VPLane(0));
@@ -2356,25 +2353,23 @@ void VPReductionEVLRecipe::execute(VPTransformState &State) {
Value *NewRed;
if (isOrdered()) {
- NewRed = createOrderedReduction(VBuilder, RdxDesc, VecOp, Prev);
+ NewRed = createOrderedReduction(VBuilder, Kind, VecOp, Prev);
} else {
- NewRed = createSimpleReduction(VBuilder, VecOp, RdxDesc);
+ NewRed = createSimpleReduction(VBuilder, VecOp, Kind, getFastMathFlags());
if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind))
NewRed = createMinMaxOp(Builder, Kind, NewRed, Prev);
else
- NewRed = Builder.CreateBinOp((Instruction::BinaryOps)RdxDesc.getOpcode(),
- NewRed, Prev);
+ NewRed = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), NewRed,
+ Prev);
}
State.set(this, NewRed, /*IsScalar*/ true);
}
InstructionCost VPReductionRecipe::computeCost(ElementCount VF,
VPCostContext &Ctx) const {
- RecurKind RdxKind = RdxDesc.getRecurrenceKind();
+ RecurKind RdxKind = getRecurrenceKind();
Type *ElementTy = Ctx.Types.inferScalarType(this);
auto *VectorTy = cast<VectorType>(toVectorTy(ElementTy, VF));
- unsigned Opcode = RdxDesc.getOpcode();
- FastMathFlags FMFs = getFastMathFlags();
// TODO: Support any-of and in-loop reductions.
assert(
@@ -2386,20 +2381,17 @@ InstructionCost VPReductionRecipe::computeCost(ElementCount VF,
ForceTargetInstructionCost.getNumOccurrences() > 0) &&
"In-loop reduction not implemented in VPlan-based cost model currently.");
- assert(ElementTy->getTypeID() == RdxDesc.getRecurrenceType()->getTypeID() &&
- "Inferred type and recurrence type mismatch.");
-
// Cost = Reduction cost + BinOp cost
InstructionCost Cost =
- Ctx.TTI.getArithmeticInstrCost(Opcode, ElementTy, Ctx.CostKind);
+ Ctx.TTI.getArithmeticInstrCost(getOpcode(), ElementTy, Ctx.CostKind);
if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RdxKind)) {
Intrinsic::ID Id = getMinMaxReductionIntrinsicOp(RdxKind);
- return Cost +
- Ctx.TTI.getMinMaxReductionCost(Id, VectorTy, FMFs, Ctx.CostKind);
+ return Cost + Ctx.TTI.getMinMaxReductionCost(
+ Id, VectorTy, getFastMathFlags(), Ctx.CostKind);
}
- return Cost + Ctx.TTI.getArithmeticReductionCost(Opcode, VectorTy, FMFs,
- Ctx.CostKind);
+ return Cost + Ctx.TTI.getArithmeticReductionCost(
+ getOpcode(), VectorTy, getFastMathFlags(), Ctx.CostKind);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -2411,28 +2403,24 @@ void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent,
getChainOp()->printAsOperand(O, SlotTracker);
O << " +";
printFlags(O);
- O << " reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";
+ O << " reduce." << Instruction::getOpcodeName(getOpcode()) << " (";
getVecOp()->printAsOperand(O, SlotTracker);
if (isConditional()) {
O << ", ";
getCondOp()->printAsOperand(O, SlotTracker);
}
O << ")";
- if (RdxDesc.IntermediateStore)
- O << " (with final reduction value stored in invariant address sank "
- "outside of loop)";
}
void VPReductionEVLRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
- const RecurrenceDescriptor &RdxDesc = getRecurrenceDescriptor();
O << Indent << "REDUCE ";
printAsOperand(O, SlotTracker);
O << " = ";
getChainOp()->printAsOperand(O, SlotTracker);
O << " +";
printFlags(O);
- O << " vp.reduce." << Instruction::getOpcodeName(RdxDesc.getOpcode()) << " (";
+ O << " vp.reduce." << Instruction::getOpcodeName(getOpcode()) << " (";
getVecOp()->printAsOperand(O, SlotTracker);
O << ", ";
getEVL()->printAsOperand(O, SlotTracker);
@@ -2441,9 +2429,6 @@ void VPReductionEVLRecipe::print(raw_ostream &O, const Twine &Indent,
getCondOp()->printAsOperand(O, SlotTracker);
}
O << ")";
- if (RdxDesc.IntermediateStore)
- O << " (with final reduction value stored in invariant address sank "
- "outside of loop)";
}
#endif
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
index 207cb8b4a0d30..9274915ff46a2 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
@@ -234,7 +234,7 @@ define void @print_reduction_with_invariant_store(i64 %n, ptr noalias %y, ptr no
; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%y>, vp<[[IV]]>
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%arrayidx>
; CHECK-NEXT: WIDEN ir<%lv> = load vp<[[VEC_PTR]]>
-; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + fast reduce.fadd (ir<%lv>) (with final reduction value stored in invariant address sank outside of loop)
+; CHECK-NEXT: REDUCE ir<%red.next> = ir<%red> + fast reduce.fadd (ir<%lv>)
; CHECK-NEXT: EMIT vp<[[CAN_IV_NEXT]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]>
; CHECK-NEXT: No successors
More information about the llvm-commits
mailing list