[llvm-branch-commits] [llvm] [LV] Bundle sub reductions into VPExpressionRecipe (PR #147255)
Sam Tebbs via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Jul 22 08:45:38 PDT 2025
https://github.com/SamTebbs33 updated https://github.com/llvm/llvm-project/pull/147255
>From a439b53c4e068e1ecf6ee3988d344a60700b2bd2 Mon Sep 17 00:00:00 2001
From: Samuel Tebbs <samuel.tebbs at arm.com>
Date: Mon, 30 Jun 2025 14:29:54 +0100
Subject: [PATCH 1/2] [LV] Bundle sub reductions into VPExpressionRecipe
This PR bundles sub reductions into the VPExpressionRecipe class and
adjusts the cost functions to take the negation into account.
---
.../llvm/Analysis/TargetTransformInfo.h | 4 +-
.../llvm/Analysis/TargetTransformInfoImpl.h | 2 +-
llvm/include/llvm/CodeGen/BasicTTIImpl.h | 3 +
llvm/lib/Analysis/TargetTransformInfo.cpp | 5 +-
.../AArch64/AArch64TargetTransformInfo.cpp | 7 +-
.../AArch64/AArch64TargetTransformInfo.h | 2 +-
.../lib/Target/ARM/ARMTargetTransformInfo.cpp | 7 +-
llvm/lib/Target/ARM/ARMTargetTransformInfo.h | 1 +
.../Transforms/Vectorize/LoopVectorize.cpp | 6 +-
llvm/lib/Transforms/Vectorize/VPlan.h | 11 ++
.../lib/Transforms/Vectorize/VPlanRecipes.cpp | 35 ++++-
.../Transforms/Vectorize/VPlanTransforms.cpp | 33 ++--
.../Transforms/Vectorize/VectorCombine.cpp | 4 +-
.../vplan-printing-reductions.ll | 143 ++++++++++++++++++
14 files changed, 236 insertions(+), 27 deletions(-)
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 98b793aace7a3..383e1d0ea2108 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1645,8 +1645,10 @@ class TargetTransformInfo {
/// extensions. This is the cost of as:
/// ResTy vecreduce.add(mul (A, B)).
/// ResTy vecreduce.add(mul(ext(Ty A), ext(Ty B)).
+ /// The multiply can optionally be negated, which signifies that it is a sub
+ /// reduction.
LLVM_ABI InstructionCost getMulAccReductionCost(
- bool IsUnsigned, Type *ResTy, VectorType *Ty,
+ bool IsUnsigned, Type *ResTy, VectorType *Ty, bool Negated,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
/// Calculate the cost of an extended reduction pattern, similar to
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index ddc8a5eaffa94..3a7d75fd13dec 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -960,7 +960,7 @@ class TargetTransformInfoImplBase {
virtual InstructionCost
getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty,
- TTI::TargetCostKind CostKind) const {
+ bool Negated, TTI::TargetCostKind CostKind) const {
return 1;
}
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 1d7c41452f7d5..efe2e9e3c6d15 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -3188,7 +3188,10 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
InstructionCost
getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty,
+ bool Negated,
TTI::TargetCostKind CostKind) const override {
+ if (Negated)
+ return InstructionCost::getInvalid(CostKind);
// Without any native support, this is equivalent to the cost of
// vecreduce.add(mul(ext(Ty A), ext(Ty B))) or
// vecreduce.add(mul(A, B)).
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 8a470ebf85a16..2c4d424615ace 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1274,9 +1274,10 @@ InstructionCost TargetTransformInfo::getExtendedReductionCost(
}
InstructionCost TargetTransformInfo::getMulAccReductionCost(
- bool IsUnsigned, Type *ResTy, VectorType *Ty,
+ bool IsUnsigned, Type *ResTy, VectorType *Ty, bool Negated,
TTI::TargetCostKind CostKind) const {
- return TTIImpl->getMulAccReductionCost(IsUnsigned, ResTy, Ty, CostKind);
+ return TTIImpl->getMulAccReductionCost(IsUnsigned, ResTy, Ty, Negated,
+ CostKind);
}
InstructionCost
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 56ff81ab178e8..7f9c02eebf971 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -5324,8 +5324,10 @@ InstructionCost AArch64TTIImpl::getExtendedReductionCost(
InstructionCost
AArch64TTIImpl::getMulAccReductionCost(bool IsUnsigned, Type *ResTy,
- VectorType *VecTy,
+ VectorType *VecTy, bool Negated,
TTI::TargetCostKind CostKind) const {
+ if (Negated)
+ return InstructionCost::getInvalid(CostKind);
EVT VecVT = TLI->getValueType(DL, VecTy);
EVT ResVT = TLI->getValueType(DL, ResTy);
@@ -5340,7 +5342,8 @@ AArch64TTIImpl::getMulAccReductionCost(bool IsUnsigned, Type *ResTy,
return LT.first + 2;
}
- return BaseT::getMulAccReductionCost(IsUnsigned, ResTy, VecTy, CostKind);
+ return BaseT::getMulAccReductionCost(IsUnsigned, ResTy, VecTy, Negated,
+ CostKind);
}
InstructionCost
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index b27eb2ef7a39d..9d52a00bd487a 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -446,7 +446,7 @@ class AArch64TTIImpl final : public BasicTTIImplBase<AArch64TTIImpl> {
TTI::TargetCostKind CostKind) const override;
InstructionCost getMulAccReductionCost(
- bool IsUnsigned, Type *ResTy, VectorType *Ty,
+ bool IsUnsigned, Type *ResTy, VectorType *Ty, bool Negated,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const override;
InstructionCost
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 6f37eca2b00a5..180377f68a375 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -1883,8 +1883,10 @@ InstructionCost ARMTTIImpl::getExtendedReductionCost(
InstructionCost
ARMTTIImpl::getMulAccReductionCost(bool IsUnsigned, Type *ResTy,
- VectorType *ValTy,
+ VectorType *ValTy, bool Negated,
TTI::TargetCostKind CostKind) const {
+ if (Negated)
+ return InstructionCost::getInvalid(CostKind);
EVT ValVT = TLI->getValueType(DL, ValTy);
EVT ResVT = TLI->getValueType(DL, ResTy);
@@ -1905,7 +1907,8 @@ ARMTTIImpl::getMulAccReductionCost(bool IsUnsigned, Type *ResTy,
return ST->getMVEVectorCostFactor(CostKind) * LT.first;
}
- return BaseT::getMulAccReductionCost(IsUnsigned, ResTy, ValTy, CostKind);
+ return BaseT::getMulAccReductionCost(IsUnsigned, ResTy, ValTy, Negated,
+ CostKind);
}
InstructionCost
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index 522c235a90a8f..b9593621c50f4 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -299,6 +299,7 @@ class ARMTTIImpl final : public BasicTTIImplBase<ARMTTIImpl> {
TTI::TargetCostKind CostKind) const override;
InstructionCost
getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *ValTy,
+ bool Negated,
TTI::TargetCostKind CostKind) const override;
InstructionCost
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index f7641c0219668..85b807b645ba2 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5451,7 +5451,7 @@ LoopVectorizationCostModel::getReductionPatternCost(Instruction *I,
TTI::CastContextHint::None, CostKind, RedOp);
InstructionCost RedCost = TTI.getMulAccReductionCost(
- IsUnsigned, RdxDesc.getRecurrenceType(), ExtType, CostKind);
+ IsUnsigned, RdxDesc.getRecurrenceType(), ExtType, false, CostKind);
if (RedCost.isValid() &&
RedCost < ExtCost * 2 + MulCost + Ext2Cost + BaseCost)
@@ -5496,7 +5496,7 @@ LoopVectorizationCostModel::getReductionPatternCost(Instruction *I,
TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy, CostKind);
InstructionCost RedCost = TTI.getMulAccReductionCost(
- IsUnsigned, RdxDesc.getRecurrenceType(), ExtType, CostKind);
+ IsUnsigned, RdxDesc.getRecurrenceType(), ExtType, false, CostKind);
InstructionCost ExtraExtCost = 0;
if (Op0Ty != LargestOpTy || Op1Ty != LargestOpTy) {
Instruction *ExtraExtOp = (Op0Ty != LargestOpTy) ? Op0 : Op1;
@@ -5515,7 +5515,7 @@ LoopVectorizationCostModel::getReductionPatternCost(Instruction *I,
TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy, CostKind);
InstructionCost RedCost = TTI.getMulAccReductionCost(
- true, RdxDesc.getRecurrenceType(), VectorTy, CostKind);
+ true, RdxDesc.getRecurrenceType(), VectorTy, false, CostKind);
if (RedCost.isValid() && RedCost < MulCost + BaseCost)
return I == RetI ? RedCost : 0;
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index db40ce2d20b81..26d66a247c55f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -2797,6 +2797,12 @@ class VPExpressionRecipe : public VPSingleDefRecipe {
/// vector operands, performing a reduction.add on the result, and adding
/// the scalar result to a chain.
MulAccReduction,
+ /// Represent an inloop multiply-accumulate reduction, multiplying the
+ /// extended vector operands, negating the multiplication, performing a
+ /// reduction.add
+ /// on the result, and adding
+ /// the scalar result to a chain.
+ ExtNegatedMulAccReduction,
};
/// Type of the expression.
@@ -2820,6 +2826,11 @@ class VPExpressionRecipe : public VPSingleDefRecipe {
VPWidenRecipe *Mul, VPReductionRecipe *Red)
: VPExpressionRecipe(ExpressionTypes::ExtMulAccReduction,
{Ext0, Ext1, Mul, Red}) {}
+ VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1,
+ VPWidenRecipe *Mul, VPWidenRecipe *Sub,
+ VPReductionRecipe *Red)
+ : VPExpressionRecipe(ExpressionTypes::ExtNegatedMulAccReduction,
+ {Ext0, Ext1, Mul, Sub, Red}) {}
~VPExpressionRecipe() override {
for (auto *R : reverse(ExpressionRecipes))
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 1fbc3f3e725ee..637f823cbf743 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -2680,13 +2680,17 @@ InstructionCost VPExpressionRecipe::computeCost(ElementCount VF,
RedTy, SrcVecTy, std::nullopt, Ctx.CostKind);
}
case ExpressionTypes::MulAccReduction:
- return Ctx.TTI.getMulAccReductionCost(false, RedTy, SrcVecTy, Ctx.CostKind);
+ return Ctx.TTI.getMulAccReductionCost(false, RedTy, SrcVecTy, false,
+ Ctx.CostKind);
- case ExpressionTypes::ExtMulAccReduction:
+ case ExpressionTypes::ExtNegatedMulAccReduction:
+ case ExpressionTypes::ExtMulAccReduction: {
+ bool Negated = ExpressionType == ExpressionTypes::ExtNegatedMulAccReduction;
return Ctx.TTI.getMulAccReductionCost(
cast<VPWidenCastRecipe>(ExpressionRecipes.front())->getOpcode() ==
Instruction::ZExt,
- RedTy, SrcVecTy, Ctx.CostKind);
+ RedTy, SrcVecTy, Negated, Ctx.CostKind);
+ }
}
llvm_unreachable("Unknown VPExpressionRecipe::ExpressionTypes enum");
}
@@ -2733,6 +2737,31 @@ void VPExpressionRecipe::print(raw_ostream &O, const Twine &Indent,
O << ")";
break;
}
+ case ExpressionTypes::ExtNegatedMulAccReduction: {
+ getOperand(getNumOperands() - 1)->printAsOperand(O, SlotTracker);
+ O << " + ";
+ O << "reduce."
+ << Instruction::getOpcodeName(
+ RecurrenceDescriptor::getOpcode(Red->getRecurrenceKind()))
+ << " (sub (0, mul";
+ auto *Mul = cast<VPWidenRecipe>(ExpressionRecipes[2]);
+ Mul->printFlags(O);
+ O << "(";
+ getOperand(0)->printAsOperand(O, SlotTracker);
+ auto *Ext0 = cast<VPWidenCastRecipe>(ExpressionRecipes[0]);
+ O << " " << Instruction::getOpcodeName(Ext0->getOpcode()) << " to "
+ << *Ext0->getResultType() << "), (";
+ getOperand(1)->printAsOperand(O, SlotTracker);
+ auto *Ext1 = cast<VPWidenCastRecipe>(ExpressionRecipes[1]);
+ O << " " << Instruction::getOpcodeName(Ext1->getOpcode()) << " to "
+ << *Ext1->getResultType() << ")";
+ if (Red->isConditional()) {
+ O << ", ";
+ Red->getCondOp()->printAsOperand(O, SlotTracker);
+ }
+ O << "))";
+ break;
+ }
case ExpressionTypes::MulAccReduction:
case ExpressionTypes::ExtMulAccReduction: {
getOperand(getNumOperands() - 1)->printAsOperand(O, SlotTracker);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 1dff6cb5239a9..136e483f3803e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2917,16 +2917,17 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
// Clamp the range if using multiply-accumulate-reduction is profitable.
auto IsMulAccValidAndClampRange =
- [&](bool isZExt, VPWidenRecipe *Mul, VPWidenCastRecipe *Ext0,
- VPWidenCastRecipe *Ext1, VPWidenCastRecipe *OuterExt) -> bool {
+ [&](bool IsZExt, VPWidenRecipe *Mul, VPWidenCastRecipe *Ext0,
+ VPWidenCastRecipe *Ext1, VPWidenCastRecipe *OuterExt,
+ bool Negated = false) -> bool {
return LoopVectorizationPlanner::getDecisionAndClampRange(
[&](ElementCount VF) {
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
Type *SrcTy =
Ext0 ? Ctx.Types.inferScalarType(Ext0->getOperand(0)) : RedTy;
auto *SrcVecTy = cast<VectorType>(toVectorTy(SrcTy, VF));
- InstructionCost MulAccCost =
- Ctx.TTI.getMulAccReductionCost(isZExt, RedTy, SrcVecTy, CostKind);
+ InstructionCost MulAccCost = Ctx.TTI.getMulAccReductionCost(
+ IsZExt, RedTy, SrcVecTy, Negated, CostKind);
InstructionCost MulCost = Mul->computeCost(VF, Ctx);
InstructionCost RedCost = Red->computeCost(VF, Ctx);
InstructionCost ExtCost = 0;
@@ -2944,14 +2945,22 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
};
VPValue *VecOp = Red->getVecOp();
+ VPValue *Mul = nullptr;
+ VPValue *Sub = nullptr;
VPValue *A, *B;
+ // Sub reductions will have a sub between the add reduction and vec op.
+ if (match(VecOp,
+ m_Binary<Instruction::Sub>(m_SpecificInt(0), m_VPValue(Mul))))
+ Sub = VecOp;
+ else
+ Mul = VecOp;
// Try to match reduce.add(mul(...)).
- if (match(VecOp, m_Mul(m_VPValue(A), m_VPValue(B)))) {
+ if (match(Mul, m_Mul(m_VPValue(A), m_VPValue(B)))) {
auto *RecipeA =
dyn_cast_if_present<VPWidenCastRecipe>(A->getDefiningRecipe());
auto *RecipeB =
dyn_cast_if_present<VPWidenCastRecipe>(B->getDefiningRecipe());
- auto *Mul = cast<VPWidenRecipe>(VecOp->getDefiningRecipe());
+ auto *MulR = cast<VPWidenRecipe>(Mul->getDefiningRecipe());
// Match reduce.add(mul(ext, ext)).
if (RecipeA && RecipeB &&
@@ -2960,12 +2969,16 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
match(RecipeB, m_ZExtOrSExt(m_VPValue())) &&
IsMulAccValidAndClampRange(RecipeA->getOpcode() ==
Instruction::CastOps::ZExt,
- Mul, RecipeA, RecipeB, nullptr)) {
- return new VPExpressionRecipe(RecipeA, RecipeB, Mul, Red);
+ MulR, RecipeA, RecipeB, nullptr, Sub)) {
+ if (Sub)
+ return new VPExpressionRecipe(
+ RecipeA, RecipeB, MulR,
+ cast<VPWidenRecipe>(Sub->getDefiningRecipe()), Red);
+ return new VPExpressionRecipe(RecipeA, RecipeB, MulR, Red);
}
// Match reduce.add(mul).
- if (IsMulAccValidAndClampRange(true, Mul, nullptr, nullptr, nullptr))
- return new VPExpressionRecipe(Mul, Red);
+ if (IsMulAccValidAndClampRange(true, MulR, nullptr, nullptr, nullptr, Sub))
+ return new VPExpressionRecipe(MulR, Red);
}
// Match reduce.add(ext(mul(ext(A), ext(B)))).
// All extend recipes must have same opcode or A == B
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 82adc34fdbd84..5b2bddc233831 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1453,8 +1453,8 @@ static void analyzeCostOfVecReduction(const IntrinsicInst &II,
TTI::CastContextHint::None, CostKind, RedOp);
CostBeforeReduction = ExtCost * 2 + MulCost + Ext2Cost;
- CostAfterReduction =
- TTI.getMulAccReductionCost(IsUnsigned, II.getType(), ExtType, CostKind);
+ CostAfterReduction = TTI.getMulAccReductionCost(IsUnsigned, II.getType(),
+ ExtType, false, CostKind);
return;
}
CostAfterReduction = TTI.getArithmeticReductionCost(ReductionOpc, VecRedTy,
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll
index 4af3fa9202c77..8059ac12ecd2e 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll
@@ -416,3 +416,146 @@ exit:
%r.0.lcssa = phi i64 [ %rdx.next, %loop ]
ret i64 %r.0.lcssa
}
+
+define i32 @print_mulacc_sub(ptr %a, ptr %b) {
+; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
+; CHECK-NEXT: Live-in vp<%0> = VF
+; CHECK-NEXT: Live-in vp<%1> = VF * UF
+; CHECK-NEXT: Live-in vp<%2> = vector-trip-count
+; CHECK-NEXT: Live-in ir<1024> = original trip-count
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<entry>:
+; CHECK-NEXT: Successor(s): scalar.ph, vector.ph
+; CHECK-EMPTY:
+; CHECK-NEXT: vector.ph:
+; CHECK-NEXT: EMIT vp<%3> = reduction-start-vector ir<0>, ir<0>, ir<1>
+; CHECK-NEXT: Successor(s): vector loop
+; CHECK-EMPTY:
+; CHECK-NEXT: <x1> vector loop: {
+; CHECK-NEXT: vector.body:
+; CHECK-NEXT: EMIT vp<%4> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
+; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%accum> = phi vp<%3>, vp<%8>
+; CHECK-NEXT: vp<%5> = SCALAR-STEPS vp<%4>, ir<1>, vp<%0>
+; CHECK-NEXT: CLONE ir<%gep.a> = getelementptr ir<%a>, vp<%5>
+; CHECK-NEXT: vp<%6> = vector-pointer ir<%gep.a>
+; CHECK-NEXT: WIDEN ir<%load.a> = load vp<%6>
+; CHECK-NEXT: CLONE ir<%gep.b> = getelementptr ir<%b>, vp<%5>
+; CHECK-NEXT: vp<%7> = vector-pointer ir<%gep.b>
+; CHECK-NEXT: WIDEN ir<%load.b> = load vp<%7>
+; CHECK-NEXT: EXPRESSION vp<%8> = ir<%accum> + reduce.add (sub (0, mul (ir<%load.b> zext to i32), (ir<%load.a> zext to i32)))
+; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%4>, vp<%1>
+; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, vp<%2>
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+; CHECK-NEXT: Successor(s): middle.block
+; CHECK-EMPTY:
+; CHECK-NEXT: middle.block:
+; CHECK-NEXT: EMIT vp<%10> = compute-reduction-result ir<%accum>, vp<%8>
+; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<1024>, vp<%2>
+; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
+; CHECK-NEXT: Successor(s): ir-bb<for.exit>, scalar.ph
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<for.exit>:
+; CHECK-NEXT: IR %add.lcssa = phi i32 [ %add, %for.body ] (extra operand: vp<%10> from middle.block)
+; CHECK-NEXT: No successors
+; CHECK-EMPTY:
+; CHECK-NEXT: scalar.ph:
+; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ vp<%2>, middle.block ], [ ir<0>, ir-bb<entry> ]
+; CHECK-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<%10>, middle.block ], [ ir<0>, ir-bb<entry> ]
+; CHECK-NEXT: Successor(s): ir-bb<for.body>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<for.body>:
+; CHECK-NEXT: IR %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] (extra operand: vp<%bc.resume.val> from scalar.ph)
+; CHECK-NEXT: IR %accum = phi i32 [ 0, %entry ], [ %add, %for.body ] (extra operand: vp<%bc.merge.rdx> from scalar.ph)
+; CHECK-NEXT: IR %gep.a = getelementptr i8, ptr %a, i64 %iv
+; CHECK-NEXT: IR %load.a = load i8, ptr %gep.a, align 1
+; CHECK-NEXT: IR %ext.a = zext i8 %load.a to i32
+; CHECK-NEXT: IR %gep.b = getelementptr i8, ptr %b, i64 %iv
+; CHECK-NEXT: IR %load.b = load i8, ptr %gep.b, align 1
+; CHECK-NEXT: IR %ext.b = zext i8 %load.b to i32
+; CHECK-NEXT: IR %mul = mul i32 %ext.b, %ext.a
+; CHECK-NEXT: IR %add = sub i32 %accum, %mul
+; CHECK-NEXT: IR %iv.next = add i64 %iv, 1
+; CHECK-NEXT: IR %exitcond.not = icmp eq i64 %iv.next, 1024
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+; CHECK: VPlan 'Final VPlan for VF={4},UF={1}' {
+; CHECK-NEXT: Live-in ir<4> = VF * UF
+; CHECK-NEXT: Live-in ir<1024> = vector-trip-count
+; CHECK-NEXT: Live-in ir<1024> = original trip-count
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<entry>:
+; CHECK-NEXT: Successor(s): ir-bb<scalar.ph>, ir-bb<vector.ph>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<vector.ph>:
+; CHECK-NEXT: Successor(s): vector.body
+; CHECK-EMPTY:
+; CHECK-NEXT: vector.body:
+; CHECK-NEXT: EMIT-SCALAR vp<%index> = phi [ ir<0>, ir-bb<vector.ph> ], [ vp<%index.next>, vector.body ]
+; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<%accum> = phi ir<0>, ir<%add>.1
+; CHECK-NEXT: CLONE ir<%gep.a> = getelementptr ir<%a>, vp<%index>
+; CHECK-NEXT: vp<%1> = vector-pointer ir<%gep.a>
+; CHECK-NEXT: WIDEN ir<%load.a> = load vp<%1>
+; CHECK-NEXT: CLONE ir<%gep.b> = getelementptr ir<%b>, vp<%index>
+; CHECK-NEXT: vp<%2> = vector-pointer ir<%gep.b>
+; CHECK-NEXT: WIDEN ir<%load.b> = load vp<%2>
+; CHECK-NEXT: WIDEN-CAST ir<%ext.b> = zext ir<%load.b> to i32
+; CHECK-NEXT: WIDEN-CAST ir<%ext.a> = zext ir<%load.a> to i32
+; CHECK-NEXT: WIDEN ir<%mul> = mul ir<%ext.b>, ir<%ext.a>
+; CHECK-NEXT: WIDEN ir<%add> = sub ir<0>, ir<%mul>
+; CHECK-NEXT: REDUCE ir<%add>.1 = ir<%accum> + reduce.add (ir<%add>)
+; CHECK-NEXT: EMIT vp<%index.next> = add nuw vp<%index>, ir<4>
+; CHECK-NEXT: EMIT branch-on-count vp<%index.next>, ir<1024>
+; CHECK-NEXT: Successor(s): middle.block, vector.body
+; CHECK-EMPTY:
+; CHECK-NEXT: middle.block:
+; CHECK-NEXT: EMIT vp<%4> = compute-reduction-result ir<%accum>, ir<%add>.1
+; CHECK-NEXT: EMIT vp<%cmp.n> = icmp eq ir<1024>, ir<1024>
+; CHECK-NEXT: EMIT branch-on-cond vp<%cmp.n>
+; CHECK-NEXT: Successor(s): ir-bb<for.exit>, ir-bb<scalar.ph>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<for.exit>:
+; CHECK-NEXT: IR %add.lcssa = phi i32 [ %add, %for.body ] (extra operand: vp<%4> from middle.block)
+; CHECK-NEXT: No successors
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<scalar.ph>:
+; CHECK-NEXT: EMIT-SCALAR vp<%bc.resume.val> = phi [ ir<1024>, middle.block ], [ ir<0>, ir-bb<entry> ]
+; CHECK-NEXT: EMIT-SCALAR vp<%bc.merge.rdx> = phi [ vp<%4>, middle.block ], [ ir<0>, ir-bb<entry> ]
+; CHECK-NEXT: Successor(s): ir-bb<for.body>
+; CHECK-EMPTY:
+; CHECK-NEXT: ir-bb<for.body>:
+; CHECK-NEXT: IR %iv = phi i64 [ 0, %scalar.ph ], [ %iv.next, %for.body ] (extra operand: vp<%bc.resume.val> from ir-bb<scalar.ph>)
+; CHECK-NEXT: IR %accum = phi i32 [ 0, %scalar.ph ], [ %add, %for.body ] (extra operand: vp<%bc.merge.rdx> from ir-bb<scalar.ph>)
+; CHECK-NEXT: IR %gep.a = getelementptr i8, ptr %a, i64 %iv
+; CHECK-NEXT: IR %load.a = load i8, ptr %gep.a, align 1
+; CHECK-NEXT: IR %ext.a = zext i8 %load.a to i32
+; CHECK-NEXT: IR %gep.b = getelementptr i8, ptr %b, i64 %iv
+; CHECK-NEXT: IR %load.b = load i8, ptr %gep.b, align 1
+; CHECK-NEXT: IR %ext.b = zext i8 %load.b to i32
+; CHECK-NEXT: IR %mul = mul i32 %ext.b, %ext.a
+; CHECK-NEXT: IR %add = sub i32 %accum, %mul
+; CHECK-NEXT: IR %iv.next = add i64 %iv, 1
+; CHECK-NEXT: IR %exitcond.not = icmp eq i64 %iv.next, 1024
+; CHECK-NEXT: No successors
+; CHECK-NEXT: }
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %accum = phi i32 [ 0, %entry ], [ %add, %for.body ]
+ %gep.a = getelementptr i8, ptr %a, i64 %iv
+ %load.a = load i8, ptr %gep.a, align 1
+ %ext.a = zext i8 %load.a to i32
+ %gep.b = getelementptr i8, ptr %b, i64 %iv
+ %load.b = load i8, ptr %gep.b, align 1
+ %ext.b = zext i8 %load.b to i32
+ %mul = mul i32 %ext.b, %ext.a
+ %add = sub i32 %accum, %mul
+ %iv.next = add i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, 1024
+ br i1 %exitcond.not, label %for.exit, label %for.body
+
+for.exit: ; preds = %for.body
+ ret i32 %add
+}
>From fa30b51f46159255def6b4bb3857d855d39ee287 Mon Sep 17 00:00:00 2001
From: Samuel Tebbs <samuel.tebbs at arm.com>
Date: Tue, 8 Jul 2025 15:25:46 +0100
Subject: [PATCH 2/2] Move IsNegated parameter and cost sub
---
llvm/include/llvm/Analysis/TargetTransformInfo.h | 2 +-
.../llvm/Analysis/TargetTransformInfoImpl.h | 4 ++--
llvm/include/llvm/CodeGen/BasicTTIImpl.h | 14 ++++++++------
llvm/lib/Analysis/TargetTransformInfo.cpp | 4 ++--
.../Target/AArch64/AArch64TargetTransformInfo.cpp | 8 ++++----
.../Target/AArch64/AArch64TargetTransformInfo.h | 2 +-
llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp | 8 ++++----
llvm/lib/Target/ARM/ARMTargetTransformInfo.h | 4 ++--
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 9 ++++++---
llvm/lib/Transforms/Vectorize/VPlan.h | 4 +---
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 4 ++--
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 2 +-
llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 4 ++--
13 files changed, 36 insertions(+), 33 deletions(-)
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 383e1d0ea2108..c81c8a238291c 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1648,7 +1648,7 @@ class TargetTransformInfo {
/// The multiply can optionally be negated, which signifies that it is a sub
/// reduction.
LLVM_ABI InstructionCost getMulAccReductionCost(
- bool IsUnsigned, Type *ResTy, VectorType *Ty, bool Negated,
+ bool IsUnsigned, bool IsNegated, Type *ResTy, VectorType *Ty,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
/// Calculate the cost of an extended reduction pattern, similar to
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 3a7d75fd13dec..b4817e8aa222a 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -959,8 +959,8 @@ class TargetTransformInfoImplBase {
}
virtual InstructionCost
- getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty,
- bool Negated, TTI::TargetCostKind CostKind) const {
+ getMulAccReductionCost(bool IsUnsigned, bool IsNegated, Type *ResTy,
+ VectorType *Ty, TTI::TargetCostKind CostKind) const {
return 1;
}
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index efe2e9e3c6d15..e347fd1f96e33 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -3187,14 +3187,12 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
}
InstructionCost
- getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty,
- bool Negated,
+ getMulAccReductionCost(bool IsUnsigned, bool IsNegated, Type *ResTy,
+ VectorType *Ty,
TTI::TargetCostKind CostKind) const override {
- if (Negated)
- return InstructionCost::getInvalid(CostKind);
// Without any native support, this is equivalent to the cost of
// vecreduce.add(mul(ext(Ty A), ext(Ty B))) or
- // vecreduce.add(mul(A, B)).
+ // vecreduce.add(mul(A, B)) with an optional negation of the mul.
VectorType *ExtTy = VectorType::get(ResTy, Ty);
InstructionCost RedCost = thisT()->getArithmeticReductionCost(
Instruction::Add, ExtTy, std::nullopt, CostKind);
@@ -3204,8 +3202,12 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
InstructionCost MulCost =
thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
+ InstructionCost SubCost =
+ IsNegated
+ ? thisT()->getArithmeticInstrCost(Instruction::Sub, ExtTy, CostKind)
+ : 0;
- return RedCost + MulCost + 2 * ExtCost;
+ return RedCost + SubCost + MulCost + 2 * ExtCost;
}
InstructionCost getVectorSplitCost() const { return 1; }
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 2c4d424615ace..7e49505e93f42 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1274,9 +1274,9 @@ InstructionCost TargetTransformInfo::getExtendedReductionCost(
}
InstructionCost TargetTransformInfo::getMulAccReductionCost(
- bool IsUnsigned, Type *ResTy, VectorType *Ty, bool Negated,
+ bool IsUnsigned, bool IsNegated, Type *ResTy, VectorType *Ty,
TTI::TargetCostKind CostKind) const {
- return TTIImpl->getMulAccReductionCost(IsUnsigned, ResTy, Ty, Negated,
+ return TTIImpl->getMulAccReductionCost(IsUnsigned, IsNegated, ResTy, Ty,
CostKind);
}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 7f9c02eebf971..677d2deb19500 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -5323,10 +5323,10 @@ InstructionCost AArch64TTIImpl::getExtendedReductionCost(
}
InstructionCost
-AArch64TTIImpl::getMulAccReductionCost(bool IsUnsigned, Type *ResTy,
- VectorType *VecTy, bool Negated,
+AArch64TTIImpl::getMulAccReductionCost(bool IsUnsigned, bool IsNegated,
+ Type *ResTy, VectorType *VecTy,
TTI::TargetCostKind CostKind) const {
- if (Negated)
+ if (IsNegated)
return InstructionCost::getInvalid(CostKind);
EVT VecVT = TLI->getValueType(DL, VecTy);
EVT ResVT = TLI->getValueType(DL, ResTy);
@@ -5342,7 +5342,7 @@ AArch64TTIImpl::getMulAccReductionCost(bool IsUnsigned, Type *ResTy,
return LT.first + 2;
}
- return BaseT::getMulAccReductionCost(IsUnsigned, ResTy, VecTy, Negated,
+ return BaseT::getMulAccReductionCost(IsUnsigned, IsNegated, ResTy, VecTy,
CostKind);
}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 9d52a00bd487a..76ad4b3f282a6 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -446,7 +446,7 @@ class AArch64TTIImpl final : public BasicTTIImplBase<AArch64TTIImpl> {
TTI::TargetCostKind CostKind) const override;
InstructionCost getMulAccReductionCost(
- bool IsUnsigned, Type *ResTy, VectorType *Ty, bool Negated,
+ bool IsUnsigned, bool IsNegated, Type *ResTy, VectorType *Ty,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const override;
InstructionCost
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 180377f68a375..945b89ce434e7 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -1882,10 +1882,10 @@ InstructionCost ARMTTIImpl::getExtendedReductionCost(
}
InstructionCost
-ARMTTIImpl::getMulAccReductionCost(bool IsUnsigned, Type *ResTy,
- VectorType *ValTy, bool Negated,
+ARMTTIImpl::getMulAccReductionCost(bool IsUnsigned, bool IsNegated, Type *ResTy,
+ VectorType *ValTy,
TTI::TargetCostKind CostKind) const {
- if (Negated)
+ if (IsNegated)
return InstructionCost::getInvalid(CostKind);
EVT ValVT = TLI->getValueType(DL, ValTy);
EVT ResVT = TLI->getValueType(DL, ResTy);
@@ -1907,7 +1907,7 @@ ARMTTIImpl::getMulAccReductionCost(bool IsUnsigned, Type *ResTy,
return ST->getMVEVectorCostFactor(CostKind) * LT.first;
}
- return BaseT::getMulAccReductionCost(IsUnsigned, ResTy, ValTy, Negated,
+ return BaseT::getMulAccReductionCost(IsUnsigned, IsNegated, ResTy, ValTy,
CostKind);
}
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index b9593621c50f4..25c8196470a26 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -298,8 +298,8 @@ class ARMTTIImpl final : public BasicTTIImplBase<ARMTTIImpl> {
VectorType *ValTy, std::optional<FastMathFlags> FMF,
TTI::TargetCostKind CostKind) const override;
InstructionCost
- getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *ValTy,
- bool Negated,
+ getMulAccReductionCost(bool IsUnsigned, bool IsNegated, Type *ResTy,
+ VectorType *ValTy,
TTI::TargetCostKind CostKind) const override;
InstructionCost
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 85b807b645ba2..91c4e2c3adaae 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5451,7 +5451,8 @@ LoopVectorizationCostModel::getReductionPatternCost(Instruction *I,
TTI::CastContextHint::None, CostKind, RedOp);
InstructionCost RedCost = TTI.getMulAccReductionCost(
- IsUnsigned, RdxDesc.getRecurrenceType(), ExtType, false, CostKind);
+ IsUnsigned, /*IsNegated=*/false, RdxDesc.getRecurrenceType(), ExtType,
+ CostKind);
if (RedCost.isValid() &&
RedCost < ExtCost * 2 + MulCost + Ext2Cost + BaseCost)
@@ -5496,7 +5497,8 @@ LoopVectorizationCostModel::getReductionPatternCost(Instruction *I,
TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy, CostKind);
InstructionCost RedCost = TTI.getMulAccReductionCost(
- IsUnsigned, RdxDesc.getRecurrenceType(), ExtType, false, CostKind);
+ IsUnsigned, /*IsNegated=*/false, RdxDesc.getRecurrenceType(), ExtType,
+ CostKind);
InstructionCost ExtraExtCost = 0;
if (Op0Ty != LargestOpTy || Op1Ty != LargestOpTy) {
Instruction *ExtraExtOp = (Op0Ty != LargestOpTy) ? Op0 : Op1;
@@ -5515,7 +5517,8 @@ LoopVectorizationCostModel::getReductionPatternCost(Instruction *I,
TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy, CostKind);
InstructionCost RedCost = TTI.getMulAccReductionCost(
- true, RdxDesc.getRecurrenceType(), VectorTy, false, CostKind);
+ true, /*IsNegated=*/false, RdxDesc.getRecurrenceType(), VectorTy,
+ CostKind);
if (RedCost.isValid() && RedCost < MulCost + BaseCost)
return I == RetI ? RedCost : 0;
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 26d66a247c55f..4c2b511de4103 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -2799,9 +2799,7 @@ class VPExpressionRecipe : public VPSingleDefRecipe {
MulAccReduction,
/// Represent an inloop multiply-accumulate reduction, multiplying the
/// extended vector operands, negating the multiplication, performing a
- /// reduction.add
- /// on the result, and adding
- /// the scalar result to a chain.
+ /// reduction.add on the result, and adding the scalar result to a chain.
ExtNegatedMulAccReduction,
};
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 637f823cbf743..6f1c2a9eb6be0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -2680,7 +2680,7 @@ InstructionCost VPExpressionRecipe::computeCost(ElementCount VF,
RedTy, SrcVecTy, std::nullopt, Ctx.CostKind);
}
case ExpressionTypes::MulAccReduction:
- return Ctx.TTI.getMulAccReductionCost(false, RedTy, SrcVecTy, false,
+ return Ctx.TTI.getMulAccReductionCost(false, false, RedTy, SrcVecTy,
Ctx.CostKind);
case ExpressionTypes::ExtNegatedMulAccReduction:
@@ -2689,7 +2689,7 @@ InstructionCost VPExpressionRecipe::computeCost(ElementCount VF,
return Ctx.TTI.getMulAccReductionCost(
cast<VPWidenCastRecipe>(ExpressionRecipes.front())->getOpcode() ==
Instruction::ZExt,
- RedTy, SrcVecTy, Negated, Ctx.CostKind);
+ Negated, RedTy, SrcVecTy, Ctx.CostKind);
}
}
llvm_unreachable("Unknown VPExpressionRecipe::ExpressionTypes enum");
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 136e483f3803e..682321a972ce8 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2927,7 +2927,7 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
Ext0 ? Ctx.Types.inferScalarType(Ext0->getOperand(0)) : RedTy;
auto *SrcVecTy = cast<VectorType>(toVectorTy(SrcTy, VF));
InstructionCost MulAccCost = Ctx.TTI.getMulAccReductionCost(
- IsZExt, RedTy, SrcVecTy, Negated, CostKind);
+ IsZExt, Negated, RedTy, SrcVecTy, CostKind);
InstructionCost MulCost = Mul->computeCost(VF, Ctx);
InstructionCost RedCost = Red->computeCost(VF, Ctx);
InstructionCost ExtCost = 0;
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 5b2bddc233831..13ab6a11c9395 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1453,8 +1453,8 @@ static void analyzeCostOfVecReduction(const IntrinsicInst &II,
TTI::CastContextHint::None, CostKind, RedOp);
CostBeforeReduction = ExtCost * 2 + MulCost + Ext2Cost;
- CostAfterReduction = TTI.getMulAccReductionCost(IsUnsigned, II.getType(),
- ExtType, false, CostKind);
+ CostAfterReduction = TTI.getMulAccReductionCost(
+ IsUnsigned, /*IsNegated=*/false, II.getType(), ExtType, CostKind);
return;
}
CostAfterReduction = TTI.getArithmeticReductionCost(ReductionOpc, VecRedTy,
More information about the llvm-branch-commits
mailing list