[llvm] 12025ce - [CostModel] Use min/max intrinsics for vecreduce.min/max costs
David Green via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 4 07:02:36 PDT 2023
Author: David Green
Date: 2023-07-04T15:02:30+01:00
New Revision: 12025cef3ec84f9528613c6248df92341ab2c765
URL: https://github.com/llvm/llvm-project/commit/12025cef3ec84f9528613c6248df92341ab2c765
DIFF: https://github.com/llvm/llvm-project/commit/12025cef3ec84f9528613c6248df92341ab2c765.diff
LOG: [CostModel] Use min/max intrinsics for vecreduce.min/max costs
This changes the costmodelling of the vecreduce.min/max nodes to use the costs
of the relevant min/max intrinsics instead of expanding them to compare and
selects. The getMinMaxReductionCost have changed to take a Opcode for the
relevant intrinsic, dropping the IsUnsigned and CondTy parameters as they are
no longer needed.
A follow up patch will add some basic fminimum/fmaximum costmodelling.
Differential Revision: https://reviews.llvm.org/D153547
Added:
Modified:
llvm/include/llvm/Analysis/TargetTransformInfo.h
llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
llvm/include/llvm/CodeGen/BasicTTIImpl.h
llvm/lib/Analysis/TargetTransformInfo.cpp
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
llvm/lib/Target/X86/X86TargetTransformInfo.cpp
llvm/lib/Target/X86/X86TargetTransformInfo.h
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll
llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index ca4b83727c02c4..1ae595d2110457 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1409,8 +1409,7 @@ class TargetTransformInfo {
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
InstructionCost getMinMaxReductionCost(
- VectorType *Ty, VectorType *CondTy, bool IsUnsigned,
- FastMathFlags FMF = FastMathFlags(),
+ Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF = FastMathFlags(),
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
/// Calculate the cost of an extended reduction pattern, similar to
@@ -1959,8 +1958,8 @@ class TargetTransformInfo::Concept {
std::optional<FastMathFlags> FMF,
TTI::TargetCostKind CostKind) = 0;
virtual InstructionCost
- getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned,
- FastMathFlags FMF, TTI::TargetCostKind CostKind) = 0;
+ getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF,
+ TTI::TargetCostKind CostKind) = 0;
virtual InstructionCost getExtendedReductionCost(
unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty,
FastMathFlags FMF,
@@ -2584,10 +2583,9 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
return Impl.getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
}
InstructionCost
- getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned,
- FastMathFlags FMF,
+ getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF,
TTI::TargetCostKind CostKind) override {
- return Impl.getMinMaxReductionCost(Ty, CondTy, IsUnsigned, FMF, CostKind);
+ return Impl.getMinMaxReductionCost(IID, Ty, FMF, CostKind);
}
InstructionCost
getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy,
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index de113323cd9d8a..b34eaa3d6f7dd7 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -729,7 +729,7 @@ class TargetTransformInfoImplBase {
return 1;
}
- InstructionCost getMinMaxReductionCost(VectorType *, VectorType *, bool,
+ InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *,
FastMathFlags,
TTI::TargetCostKind) const {
return 1;
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 2dc81596573ef0..d8ca03ae2c33c0 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1890,17 +1890,23 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
return thisT()->getArithmeticReductionCost(Instruction::FMul, VecOpTy,
FMF, CostKind);
case Intrinsic::vector_reduce_smax:
+ return thisT()->getMinMaxReductionCost(Intrinsic::smax, VecOpTy,
+ ICA.getFlags(), CostKind);
case Intrinsic::vector_reduce_smin:
- case Intrinsic::vector_reduce_fmax:
- case Intrinsic::vector_reduce_fmin:
- return thisT()->getMinMaxReductionCost(
- VecOpTy, cast<VectorType>(CmpInst::makeCmpResultType(VecOpTy)),
- /*IsUnsigned=*/false, ICA.getFlags(), CostKind);
+ return thisT()->getMinMaxReductionCost(Intrinsic::smin, VecOpTy,
+ ICA.getFlags(), CostKind);
case Intrinsic::vector_reduce_umax:
+ return thisT()->getMinMaxReductionCost(Intrinsic::umax, VecOpTy,
+ ICA.getFlags(), CostKind);
case Intrinsic::vector_reduce_umin:
- return thisT()->getMinMaxReductionCost(
- VecOpTy, cast<VectorType>(CmpInst::makeCmpResultType(VecOpTy)),
- /*IsUnsigned=*/true, ICA.getFlags(), CostKind);
+ return thisT()->getMinMaxReductionCost(Intrinsic::umin, VecOpTy,
+ ICA.getFlags(), CostKind);
+ case Intrinsic::vector_reduce_fmax:
+ return thisT()->getMinMaxReductionCost(Intrinsic::maxnum, VecOpTy,
+ ICA.getFlags(), CostKind);
+ case Intrinsic::vector_reduce_fmin:
+ return thisT()->getMinMaxReductionCost(Intrinsic::minnum, VecOpTy,
+ ICA.getFlags(), CostKind);
case Intrinsic::abs: {
// abs(X) = select(icmp(X,0),X,sub(0,X))
Type *CondTy = RetTy->getWithNewBitWidth(1);
@@ -2348,8 +2354,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
/// Try to calculate op costs for min/max reduction operations.
/// \param CondTy Conditional type for the Select instruction.
- InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
- bool IsUnsigned, FastMathFlags FMF,
+ InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
+ FastMathFlags FMF,
TTI::TargetCostKind CostKind) {
// Targets must implement a default value for the scalable case, since
// we don't know how many lanes the vector has.
@@ -2357,17 +2363,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
return InstructionCost::getInvalid();
Type *ScalarTy = Ty->getElementType();
- Type *ScalarCondTy = CondTy->getElementType();
unsigned NumVecElts = cast<FixedVectorType>(Ty)->getNumElements();
unsigned NumReduxLevels = Log2_32(NumVecElts);
- unsigned CmpOpcode;
- if (Ty->isFPOrFPVectorTy()) {
- CmpOpcode = Instruction::FCmp;
- } else {
- assert(Ty->isIntOrIntVectorTy() &&
- "expecting floating point or integer type for min/max reduction");
- CmpOpcode = Instruction::ICmp;
- }
InstructionCost MinMaxCost = 0;
InstructionCost ShuffleCost = 0;
std::pair<InstructionCost, MVT> LT = thisT()->getTypeLegalizationCost(Ty);
@@ -2377,16 +2374,13 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
while (NumVecElts > MVTLen) {
NumVecElts /= 2;
auto *SubTy = FixedVectorType::get(ScalarTy, NumVecElts);
- CondTy = FixedVectorType::get(ScalarCondTy, NumVecElts);
ShuffleCost +=
thisT()->getShuffleCost(TTI::SK_ExtractSubvector, Ty, std::nullopt,
CostKind, NumVecElts, SubTy);
- MinMaxCost +=
- thisT()->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy,
- CmpInst::BAD_ICMP_PREDICATE, CostKind) +
- thisT()->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy,
- CmpInst::BAD_ICMP_PREDICATE, CostKind);
+
+ IntrinsicCostAttributes Attrs(IID, SubTy, {SubTy, SubTy}, FMF);
+ MinMaxCost += getIntrinsicInstrCost(Attrs, CostKind);
Ty = SubTy;
++LongVectorCount;
}
@@ -2400,12 +2394,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
ShuffleCost +=
NumReduxLevels * thisT()->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
std::nullopt, CostKind, 0, Ty);
- MinMaxCost +=
- NumReduxLevels *
- (thisT()->getCmpSelInstrCost(CmpOpcode, Ty, CondTy,
- CmpInst::BAD_ICMP_PREDICATE, CostKind) +
- thisT()->getCmpSelInstrCost(Instruction::Select, Ty, CondTy,
- CmpInst::BAD_ICMP_PREDICATE, CostKind));
+ IntrinsicCostAttributes Attrs(IID, Ty, {Ty, Ty}, FMF);
+ MinMaxCost += NumReduxLevels * getIntrinsicInstrCost(Attrs, CostKind);
// The last min/max should be in vector registers and we counted it above.
// So just need a single extractelement.
return ShuffleCost + MinMaxCost +
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index e5a8d7934a0017..c751d174a48ab1 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1058,10 +1058,10 @@ InstructionCost TargetTransformInfo::getArithmeticReductionCost(
}
InstructionCost TargetTransformInfo::getMinMaxReductionCost(
- VectorType *Ty, VectorType *CondTy, bool IsUnsigned, FastMathFlags FMF,
+ Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF,
TTI::TargetCostKind CostKind) const {
InstructionCost Cost =
- TTIImpl->getMinMaxReductionCost(Ty, CondTy, IsUnsigned, FMF, CostKind);
+ TTIImpl->getMinMaxReductionCost(IID, Ty, FMF, CostKind);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index c22e2540b02ee7..34d7fea0738083 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -3274,26 +3274,18 @@ bool AArch64TTIImpl::isLegalToVectorizeReduction(
}
InstructionCost
-AArch64TTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
- bool IsUnsigned, FastMathFlags FMF,
+AArch64TTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
+ FastMathFlags FMF,
TTI::TargetCostKind CostKind) {
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
- return BaseT::getMinMaxReductionCost(Ty, CondTy, IsUnsigned, FMF, CostKind);
-
- assert((isa<ScalableVectorType>(Ty) == isa<ScalableVectorType>(CondTy)) &&
- "Both vector needs to be equally scalable");
+ return BaseT::getMinMaxReductionCost(IID, Ty, FMF, CostKind);
InstructionCost LegalizationCost = 0;
if (LT.first > 1) {
Type *LegalVTy = EVT(LT.second).getTypeForEVT(Ty->getContext());
- Intrinsic::ID MinMaxOpcode =
- Ty->isFPOrFPVectorTy()
- ? Intrinsic::maxnum
- : (IsUnsigned ? Intrinsic::umin : Intrinsic::smin);
- IntrinsicCostAttributes Attrs(MinMaxOpcode, LegalVTy, {LegalVTy, LegalVTy},
- FMF);
+ IntrinsicCostAttributes Attrs(IID, LegalVTy, {LegalVTy, LegalVTy}, FMF);
LegalizationCost = getIntrinsicInstrCost(Attrs, CostKind) * (LT.first - 1);
}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 4f638019a3a7d0..e261892254da10 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -181,8 +181,8 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
TTI::TargetCostKind CostKind,
unsigned Index);
- InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
- bool IsUnsigned, FastMathFlags FMF,
+ InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
+ FastMathFlags FMF,
TTI::TargetCostKind CostKind);
InstructionCost getArithmeticReductionCostSVE(unsigned Opcode,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 1fa0a025c6226d..90197d698642e8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -787,15 +787,15 @@ GCNTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
}
InstructionCost
-GCNTTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
- bool IsUnsigned, FastMathFlags FMF,
+GCNTTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
+ FastMathFlags FMF,
TTI::TargetCostKind CostKind) {
EVT OrigTy = TLI->getValueType(DL, Ty);
// Computes cost on targets that have packed math instructions(which support
// 16-bit types only).
if (!ST->hasVOP3PInsts() || OrigTy.getScalarSizeInBits() != 16)
- return BaseT::getMinMaxReductionCost(Ty, CondTy, IsUnsigned, FMF, CostKind);
+ return BaseT::getMinMaxReductionCost(IID, Ty, FMF, CostKind);
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
return LT.first * getHalfRateInstrCost(CostKind);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index 7bbf414b2de16d..65e4f3e59c29c5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -251,8 +251,8 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind);
- InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
- bool IsUnsigned, FastMathFlags FMF,
+ InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
+ FastMathFlags FMF,
TTI::TargetCostKind CostKind);
};
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 88f4e87533d4dd..6e26241275ab05 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1212,15 +1212,15 @@ unsigned RISCVTTIImpl::getEstimatedVLFor(VectorType *Ty) {
}
InstructionCost
-RISCVTTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
- bool IsUnsigned, FastMathFlags FMF,
+RISCVTTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
+ FastMathFlags FMF,
TTI::TargetCostKind CostKind) {
if (isa<FixedVectorType>(Ty) && !ST->useRVVForFixedLengthVectors())
- return BaseT::getMinMaxReductionCost(Ty, CondTy, IsUnsigned, FMF, CostKind);
+ return BaseT::getMinMaxReductionCost(IID, Ty, FMF, CostKind);
// Skip if scalar size of Ty is bigger than ELEN.
if (Ty->getScalarSizeInBits() > ST->getELEN())
- return BaseT::getMinMaxReductionCost(Ty, CondTy, IsUnsigned, FMF, CostKind);
+ return BaseT::getMinMaxReductionCost(IID, Ty, FMF, CostKind);
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
if (Ty->getElementType()->isIntegerTy(1))
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index e96f93cd98fa0c..36f6c4524da072 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -148,8 +148,8 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
- InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
- bool IsUnsigned, FastMathFlags FMF,
+ InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
+ FastMathFlags FMF,
TTI::TargetCostKind CostKind);
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 4c678a6ff846dc..8bfbd27a5b900a 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -5256,25 +5256,16 @@ X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
CostKind, 0, nullptr, nullptr);
}
-InstructionCost X86TTIImpl::getMinMaxCost(Type *Ty, Type *CondTy,
+InstructionCost X86TTIImpl::getMinMaxCost(Intrinsic::ID IID, Type *Ty,
TTI::TargetCostKind CostKind,
- bool IsUnsigned, FastMathFlags FMF) {
- Intrinsic::ID Id;
- if (Ty->isIntOrIntVectorTy()) {
- Id = IsUnsigned ? Intrinsic::umin : Intrinsic::smin;
- } else {
- assert(Ty->isFPOrFPVectorTy() &&
- "Expected float point or integer vector type.");
- Id = Intrinsic::minnum;
- }
-
- IntrinsicCostAttributes ICA(Id, Ty, {Ty, Ty}, FMF);
+ FastMathFlags FMF) {
+ IntrinsicCostAttributes ICA(IID, Ty, {Ty, Ty}, FMF);
return getIntrinsicInstrCost(ICA, CostKind);
}
InstructionCost
-X86TTIImpl::getMinMaxReductionCost(VectorType *ValTy, VectorType *CondTy,
- bool IsUnsigned, FastMathFlags FMF,
+X86TTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *ValTy,
+ FastMathFlags FMF,
TTI::TargetCostKind CostKind) {
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);
@@ -5282,11 +5273,14 @@ X86TTIImpl::getMinMaxReductionCost(VectorType *ValTy, VectorType *CondTy,
int ISD;
if (ValTy->isIntOrIntVectorTy()) {
- ISD = IsUnsigned ? ISD::UMIN : ISD::SMIN;
+ ISD = (IID == Intrinsic::umin || IID == Intrinsic::umax) ? ISD::UMIN
+ : ISD::SMIN;
} else {
assert(ValTy->isFPOrFPVectorTy() &&
"Expected float point or integer vector type.");
- ISD = ISD::FMINNUM;
+ ISD = (IID == Intrinsic::minnum || IID == Intrinsic::maxnum)
+ ? ISD::FMINNUM
+ : ISD::FMINIMUM;
}
// We use the Intel Architecture Code Analyzer(IACA) to measure the throughput
@@ -5362,9 +5356,7 @@ X86TTIImpl::getMinMaxReductionCost(VectorType *ValTy, VectorType *CondTy,
// Type needs to be split. We need LT.first - 1 operations ops.
Ty = FixedVectorType::get(ValVTy->getElementType(),
MTy.getVectorNumElements());
- auto *SubCondTy = FixedVectorType::get(CondTy->getElementType(),
- MTy.getVectorNumElements());
- MinMaxCost = getMinMaxCost(Ty, SubCondTy, CostKind, IsUnsigned, FMF);
+ MinMaxCost = getMinMaxCost(IID, Ty, CostKind, FMF);
MinMaxCost *= LT.first - 1;
NumVecElts = MTy.getVectorNumElements();
}
@@ -5391,8 +5383,7 @@ X86TTIImpl::getMinMaxReductionCost(VectorType *ValTy, VectorType *CondTy,
// by type legalization.
if (!isPowerOf2_32(ValVTy->getNumElements()) ||
ScalarSize != MTy.getScalarSizeInBits())
- return BaseT::getMinMaxReductionCost(ValTy, CondTy, IsUnsigned, FMF,
- CostKind);
+ return BaseT::getMinMaxReductionCost(IID, ValTy, FMF, CostKind);
// Now handle reduction with the legal type, taking into account size changes
// at each level.
@@ -5436,9 +5427,7 @@ X86TTIImpl::getMinMaxReductionCost(VectorType *ValTy, VectorType *CondTy,
}
// Add the arithmetic op for this level.
- auto *SubCondTy =
- FixedVectorType::get(CondTy->getElementType(), Ty->getNumElements());
- MinMaxCost += getMinMaxCost(Ty, SubCondTy, CostKind, IsUnsigned, FMF);
+ MinMaxCost += getMinMaxCost(IID, Ty, CostKind, FMF);
}
// Add the final extract element to the cost.
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h
index 0e1a475b2af669..89c7916260a451 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -207,12 +207,12 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
std::optional<FastMathFlags> FMF,
TTI::TargetCostKind CostKind);
- InstructionCost getMinMaxCost(Type *Ty, Type *CondTy,
- TTI::TargetCostKind CostKind, bool IsUnsigned,
+ InstructionCost getMinMaxCost(Intrinsic::ID IID, Type *Ty,
+ TTI::TargetCostKind CostKind,
FastMathFlags FMF);
- InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
- bool IsUnsigned, FastMathFlags FMF,
+ InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
+ FastMathFlags FMF,
TTI::TargetCostKind CostKind);
InstructionCost getInterleavedMemoryOpCost(
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 84c30da74d3d3a..ebf9e7be260641 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -13804,15 +13804,9 @@ class HorizontalReduction {
case RecurKind::SMin:
case RecurKind::UMax:
case RecurKind::UMin: {
- if (!AllConsts) {
- auto *VecCondTy =
- cast<VectorType>(CmpInst::makeCmpResultType(VectorTy));
- bool IsUnsigned =
- RdxKind == RecurKind::UMax || RdxKind == RecurKind::UMin;
- VectorCost = TTI->getMinMaxReductionCost(VectorTy, VecCondTy,
- IsUnsigned, FMF, CostKind);
- }
Intrinsic::ID Id = getMinMaxReductionIntrinsicOp(RdxKind);
+ if (!AllConsts)
+ VectorCost = TTI->getMinMaxReductionCost(Id, VectorTy, FMF, CostKind);
ScalarCost = EvaluateScalarCost([&]() {
IntrinsicCostAttributes ICA(Id, ScalarTy, {ScalarTy, ScalarTy}, FMF);
return TTI->getIntrinsicInstrCost(ICA, CostKind);
diff --git a/llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll b/llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll
index afa1d5bdd31612..95b1e99564291b 100644
--- a/llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll
@@ -162,10 +162,10 @@ define void @reduce_smax() {
define void @reduce_fmin16() {
; CHECK-NOF16-LABEL: 'reduce_fmin16'
-; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2f16 = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef)
-; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef)
-; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef)
-; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 190 for instruction: %V16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef)
+; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2f16 = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef)
+; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef)
+; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 237 for instruction: %V8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef)
+; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef)
; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2f16m = call half @llvm.vector.reduce.fminimum.v2f16(<2 x half> undef)
; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4f16m = call half @llvm.vector.reduce.fminimum.v4f16(<4 x half> undef)
; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8f16m = call half @llvm.vector.reduce.fminimum.v8f16(<8 x half> undef)
@@ -196,10 +196,10 @@ define void @reduce_fmin16() {
define void @reduce_fmax16() {
; CHECK-NOF16-LABEL: 'reduce_fmax16'
-; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2f16 = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef)
-; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef)
-; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef)
-; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 190 for instruction: %V16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef)
+; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2f16 = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef)
+; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef)
+; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 237 for instruction: %V8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef)
+; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef)
; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2f16m = call half @llvm.vector.reduce.fmaximum.v2f16(<2 x half> undef)
; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4f16m = call half @llvm.vector.reduce.fmaximum.v4f16(<4 x half> undef)
; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8f16m = call half @llvm.vector.reduce.fmaximum.v8f16(<8 x half> undef)
diff --git a/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll b/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll
index 1279bb152647f9..5bc936e1fe38e6 100644
--- a/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll
+++ b/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll
@@ -294,19 +294,19 @@ define void @maskedscatter(<16 x float> %va, <16 x ptr> %vb, <16 x i1> %vc) {
define void @reduce_fmax(<16 x float> %va) {
; THRU-LABEL: 'reduce_fmax'
-; THRU-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
+; THRU-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; LATE-LABEL: 'reduce_fmax'
-; LATE-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
+; LATE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SIZE-LABEL: 'reduce_fmax'
-; SIZE-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
+; SIZE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; SIZE_LATE-LABEL: 'reduce_fmax'
-; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
+; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)
More information about the llvm-commits
mailing list