[llvm] 5263155 - [CostModel] Add CostKind argument to getShuffleCost
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sun Aug 21 02:57:29 PDT 2022
Author: Simon Pilgrim
Date: 2022-08-21T10:54:51+01:00
New Revision: 5263155d5be64b435a97fd4fa12f7f0aa97f88a8
URL: https://github.com/llvm/llvm-project/commit/5263155d5be64b435a97fd4fa12f7f0aa97f88a8
DIFF: https://github.com/llvm/llvm-project/commit/5263155d5be64b435a97fd4fa12f7f0aa97f88a8.diff
LOG: [CostModel] Add CostKind argument to getShuffleCost
Defaults to TCK_RecipThroughput - as most explicit calls were assuming TCK_RecipThroughput (vectorizers) or was just doing a before-vs-after comparison (vectorcombiner). Calls via getInstructionCost were just dropping the CostKind, so again there should be no change at this time (as getShuffleCost and its expansions don't use CostKind yet) - but it will make it easier for us to better account for size/latency shuffle costs in inline/unroll passes in the future.
Differential Revision: https://reviews.llvm.org/D132287
Added:
Modified:
llvm/include/llvm/Analysis/TargetTransformInfo.h
llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
llvm/include/llvm/CodeGen/BasicTTIImpl.h
llvm/lib/Analysis/TargetTransformInfo.cpp
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
llvm/lib/Target/ARM/ARMTargetTransformInfo.h
llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
llvm/lib/Target/X86/X86TargetTransformInfo.cpp
llvm/lib/Target/X86/X86TargetTransformInfo.h
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 1eb416496bf32..04db5a9484a76 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1069,10 +1069,11 @@ class TargetTransformInfo {
/// passed through \p Args, which helps improve the cost estimation in some
/// cases, like in broadcast loads.
/// NOTE: For subvector extractions Tp represents the source type.
- InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
- ArrayRef<int> Mask = None, int Index = 0,
- VectorType *SubTp = nullptr,
- ArrayRef<const Value *> Args = None) const;
+ InstructionCost
+ getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef<int> Mask = None,
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
+ int Index = 0, VectorType *SubTp = nullptr,
+ ArrayRef<const Value *> Args = None) const;
/// Represents a hint about the context in which a cast is used.
///
@@ -1715,8 +1716,9 @@ class TargetTransformInfo::Concept {
OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo,
ArrayRef<const Value *> Args, const Instruction *CxtI = nullptr) = 0;
virtual InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
- ArrayRef<int> Mask, int Index,
- VectorType *SubTp,
+ ArrayRef<int> Mask,
+ TTI::TargetCostKind CostKind,
+ int Index, VectorType *SubTp,
ArrayRef<const Value *> Args) = 0;
virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst,
Type *Src, CastContextHint CCH,
@@ -2265,10 +2267,11 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
Opd1PropInfo, Opd2PropInfo, Args, CxtI);
}
InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
- ArrayRef<int> Mask, int Index,
+ ArrayRef<int> Mask,
+ TTI::TargetCostKind CostKind, int Index,
VectorType *SubTp,
ArrayRef<const Value *> Args) override {
- return Impl.getShuffleCost(Kind, Tp, Mask, Index, SubTp, Args);
+ return Impl.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args);
}
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
CastContextHint CCH,
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 42282a77fa41e..116606973b398 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -510,7 +510,8 @@ class TargetTransformInfoImplBase {
}
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty,
- ArrayRef<int> Mask, int Index,
+ ArrayRef<int> Mask,
+ TTI::TargetCostKind CostKind, int Index,
VectorType *SubTp,
ArrayRef<const Value *> Args = None) const {
return 1;
@@ -1185,13 +1186,13 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
if (Shuffle->isExtractSubvectorMask(SubIndex))
return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy,
- Shuffle->getShuffleMask(), SubIndex,
- VecTy, Operands);
+ Shuffle->getShuffleMask(), CostKind,
+ SubIndex, VecTy, Operands);
if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
return TargetTTI->getShuffleCost(
TTI::SK_InsertSubvector, VecTy, Shuffle->getShuffleMask(),
- SubIndex,
+ CostKind, SubIndex,
FixedVectorType::get(VecTy->getScalarType(), NumSubElts),
Operands);
@@ -1216,37 +1217,38 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
if (Shuffle->isReverse())
return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy,
- Shuffle->getShuffleMask(), 0, nullptr,
- Operands);
+ Shuffle->getShuffleMask(), CostKind, 0,
+ nullptr, Operands);
if (Shuffle->isSelect())
return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy,
- Shuffle->getShuffleMask(), 0, nullptr,
- Operands);
+ Shuffle->getShuffleMask(), CostKind, 0,
+ nullptr, Operands);
if (Shuffle->isTranspose())
return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy,
- Shuffle->getShuffleMask(), 0, nullptr,
- Operands);
+ Shuffle->getShuffleMask(), CostKind, 0,
+ nullptr, Operands);
if (Shuffle->isZeroEltSplat())
return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy,
- Shuffle->getShuffleMask(), 0, nullptr,
- Operands);
+ Shuffle->getShuffleMask(), CostKind, 0,
+ nullptr, Operands);
if (Shuffle->isSingleSource())
return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy,
- Shuffle->getShuffleMask(), 0, nullptr,
- Operands);
+ Shuffle->getShuffleMask(), CostKind, 0,
+ nullptr, Operands);
if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
return TargetTTI->getShuffleCost(
- TTI::SK_InsertSubvector, VecTy, Shuffle->getShuffleMask(), SubIndex,
- FixedVectorType::get(VecTy->getScalarType(), NumSubElts), Operands);
+ TTI::SK_InsertSubvector, VecTy, Shuffle->getShuffleMask(), CostKind,
+ SubIndex, FixedVectorType::get(VecTy->getScalarType(), NumSubElts),
+ Operands);
return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy,
- Shuffle->getShuffleMask(), 0, nullptr,
- Operands);
+ Shuffle->getShuffleMask(), CostKind, 0,
+ nullptr, Operands);
}
case Instruction::ExtractElement: {
auto *EEI = dyn_cast<ExtractElementInst>(U);
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index f97c7f6a0aaad..3bd808ca8fd5f 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -932,7 +932,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
}
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
- ArrayRef<int> Mask, int Index,
+ ArrayRef<int> Mask,
+ TTI::TargetCostKind CostKind, int Index,
VectorType *SubTp,
ArrayRef<const Value *> Args = None) {
@@ -1514,9 +1515,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
if (isa<ScalableVectorType>(RetTy))
return BaseT::getIntrinsicInstrCost(ICA, CostKind);
unsigned Index = cast<ConstantInt>(Args[1])->getZExtValue();
- return thisT()->getShuffleCost(TTI::SK_ExtractSubvector,
- cast<VectorType>(Args[0]->getType()), None,
- Index, cast<VectorType>(RetTy));
+ return thisT()->getShuffleCost(
+ TTI::SK_ExtractSubvector, cast<VectorType>(Args[0]->getType()),
+ None, CostKind, Index, cast<VectorType>(RetTy));
}
case Intrinsic::vector_insert: {
// FIXME: Handle case where a scalable vector is inserted into a scalable
@@ -1526,18 +1527,18 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
unsigned Index = cast<ConstantInt>(Args[2])->getZExtValue();
return thisT()->getShuffleCost(
TTI::SK_InsertSubvector, cast<VectorType>(Args[0]->getType()), None,
- Index, cast<VectorType>(Args[1]->getType()));
+ CostKind, Index, cast<VectorType>(Args[1]->getType()));
}
case Intrinsic::experimental_vector_reverse: {
return thisT()->getShuffleCost(TTI::SK_Reverse,
cast<VectorType>(Args[0]->getType()), None,
- 0, cast<VectorType>(RetTy));
+ CostKind, 0, cast<VectorType>(RetTy));
}
case Intrinsic::experimental_vector_splice: {
unsigned Index = cast<ConstantInt>(Args[2])->getZExtValue();
return thisT()->getShuffleCost(TTI::SK_Splice,
cast<VectorType>(Args[0]->getType()), None,
- Index, cast<VectorType>(RetTy));
+ CostKind, Index, cast<VectorType>(RetTy));
}
case Intrinsic::vector_reduce_add:
case Intrinsic::vector_reduce_mul:
@@ -2215,7 +2216,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
NumVecElts /= 2;
VectorType *SubTy = FixedVectorType::get(ScalarTy, NumVecElts);
ShuffleCost += thisT()->getShuffleCost(TTI::SK_ExtractSubvector, Ty, None,
- NumVecElts, SubTy);
+ CostKind, NumVecElts, SubTy);
ArithCost += thisT()->getArithmeticInstrCost(Opcode, SubTy, CostKind);
Ty = SubTy;
++LongVectorCount;
@@ -2229,8 +2230,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
// architecture-dependent length.
// By default reductions need one shuffle per reduction level.
- ShuffleCost += NumReduxLevels * thisT()->getShuffleCost(
- TTI::SK_PermuteSingleSrc, Ty, None, 0, Ty);
+ ShuffleCost +=
+ NumReduxLevels * thisT()->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
+ None, CostKind, 0, Ty);
ArithCost +=
NumReduxLevels * thisT()->getArithmeticInstrCost(Opcode, Ty, CostKind);
return ShuffleCost + ArithCost +
@@ -2311,8 +2313,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
auto *SubTy = FixedVectorType::get(ScalarTy, NumVecElts);
CondTy = FixedVectorType::get(ScalarCondTy, NumVecElts);
- ShuffleCost += thisT()->getShuffleCost(TTI::SK_ExtractSubvector, Ty, None,
- NumVecElts, SubTy);
+ ShuffleCost += thisT()->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
+ None, CostKind, NumVecElts, SubTy);
MinMaxCost +=
thisT()->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy,
CmpInst::BAD_ICMP_PREDICATE, CostKind) +
@@ -2328,8 +2330,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
// operations performed on the current platform. That's why several final
// reduction opertions are perfomed on the vectors with the same
// architecture-dependent length.
- ShuffleCost += NumReduxLevels * thisT()->getShuffleCost(
- TTI::SK_PermuteSingleSrc, Ty, None, 0, Ty);
+ ShuffleCost +=
+ NumReduxLevels * thisT()->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
+ None, CostKind, 0, Ty);
MinMaxCost +=
NumReduxLevels *
(thisT()->getCmpSelInstrCost(CmpOpcode, Ty, CondTy,
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index bba09f7dd8c22..f9855ecf3d6e1 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -781,10 +781,11 @@ InstructionCost TargetTransformInfo::getArithmeticInstrCost(
}
InstructionCost TargetTransformInfo::getShuffleCost(
- ShuffleKind Kind, VectorType *Ty, ArrayRef<int> Mask, int Index,
- VectorType *SubTp, ArrayRef<const Value *> Args) const {
+ ShuffleKind Kind, VectorType *Ty, ArrayRef<int> Mask,
+ TTI::TargetCostKind CostKind, int Index, VectorType *SubTp,
+ ArrayRef<const Value *> Args) const {
InstructionCost Cost =
- TTIImpl->getShuffleCost(Kind, Ty, Mask, Index, SubTp, Args);
+ TTIImpl->getShuffleCost(Kind, Ty, Mask, CostKind, Index, SubTp, Args);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index be12a2de8ef23..ba1cd2746519f 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -2818,8 +2818,9 @@ InstructionCost AArch64TTIImpl::getSpliceCost(VectorType *Tp, int Index) {
InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
VectorType *Tp,
- ArrayRef<int> Mask, int Index,
- VectorType *SubTp,
+ ArrayRef<int> Mask,
+ TTI::TargetCostKind CostKind,
+ int Index, VectorType *SubTp,
ArrayRef<const Value *> Args) {
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);
// If we have a Mask, and the LT is being legalized somehow, split the Mask
@@ -2877,7 +2878,7 @@ InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
if (NumSources <= 2)
Cost += getShuffleCost(NumSources <= 1 ? TTI::SK_PermuteSingleSrc
: TTI::SK_PermuteTwoSrc,
- NTp, NMask, 0, nullptr, Args);
+ NTp, NMask, CostKind, 0, nullptr, Args);
else if (any_of(enumerate(NMask), [&](const auto &ME) {
return ME.value() % LTNumElts == ME.index();
}))
@@ -3027,7 +3028,7 @@ InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
}
}
- return BaseT::getShuffleCost(Kind, Tp, Mask, Index, SubTp);
+ return BaseT::getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp);
}
bool AArch64TTIImpl::preferPredicateOverEpilogue(
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 94b09e2ee0708..ab84b127f003c 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -369,7 +369,8 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
TTI::TargetCostKind CostKind);
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
- ArrayRef<int> Mask, int Index,
+ ArrayRef<int> Mask,
+ TTI::TargetCostKind CostKind, int Index,
VectorType *SubTp,
ArrayRef<const Value *> Args = None);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 7ebad9ea6d5d0..7c1d0e92e7706 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -1054,6 +1054,7 @@ Value *GCNTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
InstructionCost GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
VectorType *VT, ArrayRef<int> Mask,
+ TTI::TargetCostKind CostKind,
int Index, VectorType *SubTp,
ArrayRef<const Value *> Args) {
Kind = improveShuffleKindFromMask(Kind, Mask);
@@ -1074,7 +1075,7 @@ InstructionCost GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
}
}
- return BaseT::getShuffleCost(Kind, VT, Mask, Index, SubTp);
+ return BaseT::getShuffleCost(Kind, VT, Mask, CostKind, Index, SubTp);
}
bool GCNTTIImpl::areInlineCompatible(const Function *Caller,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index 4a8b2eeebb6a2..3d133a0e0f666 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -199,7 +199,8 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
InstructionCost getVectorSplitCost() { return 0; }
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
- ArrayRef<int> Mask, int Index,
+ ArrayRef<int> Mask,
+ TTI::TargetCostKind CostKind, int Index,
VectorType *SubTp,
ArrayRef<const Value *> Args = None);
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 9c7ed9c7ead9f..2ff00791b6601 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -1201,6 +1201,7 @@ InstructionCost ARMTTIImpl::getMemcpyCost(const Instruction *I) {
InstructionCost ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
VectorType *Tp, ArrayRef<int> Mask,
+ TTI::TargetCostKind CostKind,
int Index, VectorType *SubTp,
ArrayRef<const Value *> Args) {
Kind = improveShuffleKindFromMask(Kind, Mask);
@@ -1301,7 +1302,8 @@ InstructionCost ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
int BaseCost = ST->hasMVEIntegerOps() && Tp->isVectorTy()
? ST->getMVEVectorCostFactor(TTI::TCK_RecipThroughput)
: 1;
- return BaseCost * BaseT::getShuffleCost(Kind, Tp, Mask, Index, SubTp);
+ return BaseCost *
+ BaseT::getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp);
}
InstructionCost ARMTTIImpl::getArithmeticInstrCost(
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index e4344f8486750..4249e42991610 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -212,7 +212,8 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
int getNumMemOps(const IntrinsicInst *I) const;
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
- ArrayRef<int> Mask, int Index,
+ ArrayRef<int> Mask,
+ TTI::TargetCostKind CostKind, int Index,
VectorType *SubTp,
ArrayRef<const Value *> Args = None);
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
index fadaa935320ee..fb9315fce0ef9 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
@@ -223,8 +223,9 @@ HexagonTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
}
InstructionCost HexagonTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp,
- ArrayRef<int> Mask, int Index,
- Type *SubTp,
+ ArrayRef<int> Mask,
+ TTI::TargetCostKind CostKind,
+ int Index, Type *SubTp,
ArrayRef<const Value *> Args) {
return 1;
}
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
index b34c947417068..1da52f3946968 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
+++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
@@ -125,7 +125,9 @@ class HexagonTTIImpl : public BasicTTIImplBase<HexagonTTIImpl> {
Align Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind);
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, Type *Tp,
- ArrayRef<int> Mask, int Index, Type *SubTp,
+ ArrayRef<int> Mask,
+ TTI::TargetCostKind CostKind, int Index,
+ Type *SubTp,
ArrayRef<const Value *> Args = None);
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
const Value *Ptr, bool VariableMask,
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index d8660ae844ce8..d6a56628d47a4 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -1003,8 +1003,9 @@ InstructionCost PPCTTIImpl::getArithmeticInstrCost(
}
InstructionCost PPCTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp,
- ArrayRef<int> Mask, int Index,
- Type *SubTp,
+ ArrayRef<int> Mask,
+ TTI::TargetCostKind CostKind,
+ int Index, Type *SubTp,
ArrayRef<const Value *> Args) {
InstructionCost CostFactor =
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
index 36e6fec2a36d2..3caa66ddf2261 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -112,7 +112,9 @@ class PPCTTIImpl : public BasicTTIImplBase<PPCTTIImpl> {
ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
const Instruction *CxtI = nullptr);
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, Type *Tp,
- ArrayRef<int> Mask, int Index, Type *SubTp,
+ ArrayRef<int> Mask,
+ TTI::TargetCostKind CostKind, int Index,
+ Type *SubTp,
ArrayRef<const Value *> Args = None);
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
TTI::CastContextHint CCH,
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 80c735dd17715..a3d2dfe24c3ba 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -180,6 +180,7 @@ InstructionCost RISCVTTIImpl::getSpliceCost(VectorType *Tp, int Index) {
InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
VectorType *Tp, ArrayRef<int> Mask,
+ TTI::TargetCostKind CostKind,
int Index, VectorType *SubTp,
ArrayRef<const Value *> Args) {
if (isa<ScalableVectorType>(Tp)) {
@@ -212,7 +213,7 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
}
}
- return BaseT::getShuffleCost(Kind, Tp, Mask, Index, SubTp);
+ return BaseT::getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp);
}
InstructionCost
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index aa39f635e5085..0d424b2b0a0c7 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -101,7 +101,8 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
InstructionCost getSpliceCost(VectorType *Tp, int Index);
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
- ArrayRef<int> Mask, int Index,
+ ArrayRef<int> Mask,
+ TTI::TargetCostKind CostKind, int Index,
VectorType *SubTp,
ArrayRef<const Value *> Args = None);
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index 101dd3f80a334..e7d7e60570cf8 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -594,8 +594,9 @@ InstructionCost SystemZTTIImpl::getArithmeticInstrCost(
InstructionCost SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
VectorType *Tp,
- ArrayRef<int> Mask, int Index,
- VectorType *SubTp,
+ ArrayRef<int> Mask,
+ TTI::TargetCostKind CostKind,
+ int Index, VectorType *SubTp,
ArrayRef<const Value *> Args) {
Kind = improveShuffleKindFromMask(Kind, Mask);
if (ST->hasVector()) {
@@ -630,7 +631,7 @@ InstructionCost SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
}
}
- return BaseT::getShuffleCost(Kind, Tp, Mask, Index, SubTp);
+ return BaseT::getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp);
}
// Return the log2
diff erence of the element sizes of the two vector types.
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
index 80d777aaf2fff..8b1d9f99c81ce 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@@ -92,7 +92,8 @@ class SystemZTTIImpl : public BasicTTIImplBase<SystemZTTIImpl> {
ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
const Instruction *CxtI = nullptr);
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
- ArrayRef<int> Mask, int Index,
+ ArrayRef<int> Mask,
+ TTI::TargetCostKind CostKind, int Index,
VectorType *SubTp,
ArrayRef<const Value *> Args = None);
unsigned getVectorTruncCost(Type *SrcTy, Type *DstTy);
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index fae43dfb71531..63a6dce6caaaa 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -1085,8 +1085,9 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
VectorType *BaseTp,
- ArrayRef<int> Mask, int Index,
- VectorType *SubTp,
+ ArrayRef<int> Mask,
+ TTI::TargetCostKind CostKind,
+ int Index, VectorType *SubTp,
ArrayRef<const Value *> Args) {
// 64-bit packed float vectors (v2f32) are widened to type v4f32.
// 64-bit packed integer vectors (v2i32) are widened to type v4i32.
@@ -1134,8 +1135,9 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
auto *SubTy = FixedVectorType::get(BaseTp->getElementType(),
SubLT.second.getVectorNumElements());
int ExtractIndex = alignDown((Index % NumElts), NumSubElts);
- InstructionCost ExtractCost = getShuffleCost(
- TTI::SK_ExtractSubvector, VecTy, None, ExtractIndex, SubTy);
+ InstructionCost ExtractCost =
+ getShuffleCost(TTI::SK_ExtractSubvector, VecTy, None, CostKind,
+ ExtractIndex, SubTy);
// If the original size is 32-bits or more, we can use pshufd. Otherwise
// if we have SSSE3 we can use pshufb.
@@ -1249,7 +1251,7 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
InstructionCost Cost = 0;
processShuffleMasks(
NormalizedMask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs, []() {},
- [this, SingleOpTy, &PrevSrcReg, &PrevRegMask,
+ [this, SingleOpTy, CostKind, &PrevSrcReg, &PrevRegMask,
&Cost](ArrayRef<int> RegMask, unsigned SrcReg, unsigned DestReg) {
if (!ShuffleVectorInst::isIdentityMask(RegMask)) {
// Check if the previous register can be just copied to the next
@@ -1257,7 +1259,7 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
if (PrevRegMask.empty() || PrevSrcReg != SrcReg ||
PrevRegMask != RegMask)
Cost += getShuffleCost(TTI::SK_PermuteSingleSrc, SingleOpTy,
- RegMask, 0, nullptr);
+ RegMask, CostKind, 0, nullptr);
else
// Just a copy of previous destination register.
Cost += TTI::TCC_Basic;
@@ -1271,21 +1273,21 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
PrevSrcReg = SrcReg;
PrevRegMask = RegMask;
},
- [this, SingleOpTy, &Cost](ArrayRef<int> RegMask,
- unsigned /*Unused*/,
- unsigned /*Unused*/) {
+ [this, SingleOpTy, CostKind, &Cost](ArrayRef<int> RegMask,
+ unsigned /*Unused*/,
+ unsigned /*Unused*/) {
Cost += getShuffleCost(TTI::SK_PermuteTwoSrc, SingleOpTy, RegMask,
- 0, nullptr);
+ CostKind, 0, nullptr);
});
return Cost;
}
InstructionCost NumOfShuffles = (NumOfSrcs - 1) * NumOfDests;
return NumOfShuffles * getShuffleCost(TTI::SK_PermuteTwoSrc, SingleOpTy,
- None, 0, nullptr);
+ None, CostKind, 0, nullptr);
}
- return BaseT::getShuffleCost(Kind, BaseTp, Mask, Index, SubTp);
+ return BaseT::getShuffleCost(Kind, BaseTp, Mask, CostKind, Index, SubTp);
}
// For 2-input shuffles, we must account for splitting the 2 inputs into many.
@@ -1648,7 +1650,7 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
if (const auto *Entry = CostTableLookup(SSE1ShuffleTbl, Kind, LT.second))
return LT.first * Entry->Cost;
- return BaseT::getShuffleCost(Kind, BaseTp, Mask, Index, SubTp);
+ return BaseT::getShuffleCost(Kind, BaseTp, Mask, CostKind, Index, SubTp);
}
InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
@@ -3678,6 +3680,7 @@ InstructionCost X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
assert(Val->isVectorTy() && "This must be a vector type");
Type *ScalarType = Val->getScalarType();
InstructionCost RegisterFileMoveCost = 0;
+ TTI::TargetCostKind CostKind = TTI::TargetCostKind::TCK_RecipThroughput;
// Non-immediate extraction/insertion can be handled as a sequence of
// aliased loads+stores via the stack.
@@ -3693,19 +3696,16 @@ InstructionCost X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
// Extract - store vector to stack, load scalar.
if (Opcode == Instruction::ExtractElement) {
- return getMemoryOpCost(Instruction::Store, Val, VecAlign, 0,
- TTI::TargetCostKind::TCK_RecipThroughput) +
+ return getMemoryOpCost(Instruction::Store, Val, VecAlign, 0, CostKind) +
getMemoryOpCost(Instruction::Load, ScalarType, SclAlign, 0,
- TTI::TargetCostKind::TCK_RecipThroughput);
+ CostKind);
}
// Insert - store vector to stack, store scalar, load vector.
if (Opcode == Instruction::InsertElement) {
- return getMemoryOpCost(Instruction::Store, Val, VecAlign, 0,
- TTI::TargetCostKind::TCK_RecipThroughput) +
+ return getMemoryOpCost(Instruction::Store, Val, VecAlign, 0, CostKind) +
getMemoryOpCost(Instruction::Store, ScalarType, SclAlign, 0,
- TTI::TargetCostKind::TCK_RecipThroughput) +
- getMemoryOpCost(Instruction::Load, Val, VecAlign, 0,
- TTI::TargetCostKind::TCK_RecipThroughput);
+ CostKind) +
+ getMemoryOpCost(Instruction::Load, Val, VecAlign, 0, CostKind);
}
}
@@ -3783,8 +3783,8 @@ InstructionCost X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
EVT VT = TLI->getValueType(DL, Val);
if (VT.getScalarType() != MScalarTy || VT.getSizeInBits() >= 128)
SubTy = FixedVectorType::get(ScalarType, SubNumElts);
- ShuffleCost =
- getShuffleCost(TTI::SK_PermuteTwoSrc, SubTy, None, 0, SubTy);
+ ShuffleCost = getShuffleCost(TTI::SK_PermuteTwoSrc, SubTy, None, CostKind,
+ 0, SubTy);
}
int IntOrFpCost = ScalarType->isFloatingPointTy() ? 0 : 1;
return ShuffleCost + IntOrFpCost + RegisterFileMoveCost;
@@ -3809,7 +3809,7 @@ InstructionCost X86TTIImpl::getScalarizationOverhead(VectorType *Ty,
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
MVT MScalarTy = LT.second.getScalarType();
unsigned SizeInBits = LT.second.getSizeInBits();
-
+ TTI::TargetCostKind CostKind = TTI::TargetCostKind::TCK_RecipThroughput;
InstructionCost Cost = 0;
// For insertions, a ISD::BUILD_VECTOR style vector initialization can be much
@@ -3916,7 +3916,7 @@ InstructionCost X86TTIImpl::getScalarizationOverhead(VectorType *Ty,
auto *Ty128 = FixedVectorType::get(Ty->getElementType(), Scale);
for (unsigned I = 0; I != Num128Lanes; ++I)
if (DemandedUpper128Lanes[I])
- Cost += getShuffleCost(TTI::SK_ExtractSubvector, Ty, None,
+ Cost += getShuffleCost(TTI::SK_ExtractSubvector, Ty, None, CostKind,
I * Scale, Ty128);
// Add all the demanded element extractions together, but adjust the
@@ -4042,8 +4042,8 @@ X86TTIImpl::getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
unsigned NumDstVectorsDemanded = DemandedDstVectors.countPopulation();
InstructionCost SingleShuffleCost =
- getShuffleCost(TTI::SK_PermuteSingleSrc, SingleDstVecTy,
- /*Mask=*/None, /*Index=*/0, /*SubTp=*/nullptr);
+ getShuffleCost(TTI::SK_PermuteSingleSrc, SingleDstVecTy, /*Mask=*/None,
+ CostKind, /*Index=*/0, /*SubTp=*/nullptr);
return NumDstVectorsDemanded * SingleShuffleCost;
}
@@ -4172,7 +4172,7 @@ InstructionCost X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
if (!Is0thSubVec)
Cost += getShuffleCost(IsLoad ? TTI::ShuffleKind::SK_InsertSubvector
: TTI::ShuffleKind::SK_ExtractSubvector,
- VTy, None, NumEltDone(), CurrVecTy);
+ VTy, None, CostKind, NumEltDone(), CurrVecTy);
}
// While we can directly load/store ZMM, YMM, and 64-bit halves of XMM,
@@ -4251,14 +4251,17 @@ X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy, Align Alignment,
if (VT.isSimple() && LT.second != VT.getSimpleVT() &&
LT.second.getVectorNumElements() == NumElem)
// Promotion requires extend/truncate for data and a shuffle for mask.
- Cost += getShuffleCost(TTI::SK_PermuteTwoSrc, SrcVTy, None, 0, nullptr) +
- getShuffleCost(TTI::SK_PermuteTwoSrc, MaskTy, None, 0, nullptr);
+ Cost += getShuffleCost(TTI::SK_PermuteTwoSrc, SrcVTy, None, CostKind, 0,
+ nullptr) +
+ getShuffleCost(TTI::SK_PermuteTwoSrc, MaskTy, None, CostKind, 0,
+ nullptr);
else if (LT.first * LT.second.getVectorNumElements() > NumElem) {
auto *NewMaskTy = FixedVectorType::get(MaskTy->getElementType(),
LT.second.getVectorNumElements());
// Expanding requires fill mask with zeroes
- Cost += getShuffleCost(TTI::SK_InsertSubvector, NewMaskTy, None, 0, MaskTy);
+ Cost += getShuffleCost(TTI::SK_InsertSubvector, NewMaskTy, None, CostKind,
+ 0, MaskTy);
}
// Pre-AVX512 - each maskmov load costs 2 + store costs ~8.
@@ -4503,8 +4506,8 @@ X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
// If we're reducing from 256/512 bits, use an extract_subvector.
if (Size > 128) {
auto *SubTy = FixedVectorType::get(ValVTy->getElementType(), NumVecElts);
- ReductionCost +=
- getShuffleCost(TTI::SK_ExtractSubvector, Ty, None, NumVecElts, SubTy);
+ ReductionCost += getShuffleCost(TTI::SK_ExtractSubvector, Ty, None,
+ CostKind, NumVecElts, SubTy);
Ty = SubTy;
} else if (Size == 128) {
// Reducing from 128 bits is a permute of v2f64/v2i64.
@@ -4515,8 +4518,8 @@ X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
else
ShufTy =
FixedVectorType::get(Type::getInt64Ty(ValVTy->getContext()), 2);
- ReductionCost +=
- getShuffleCost(TTI::SK_PermuteSingleSrc, ShufTy, None, 0, nullptr);
+ ReductionCost += getShuffleCost(TTI::SK_PermuteSingleSrc, ShufTy,
+ None, CostKind, 0, nullptr);
} else if (Size == 64) {
// Reducing from 64 bits is a shuffle of v4f32/v4i32.
FixedVectorType *ShufTy;
@@ -4526,8 +4529,8 @@ X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
else
ShufTy =
FixedVectorType::get(Type::getInt32Ty(ValVTy->getContext()), 4);
- ReductionCost +=
- getShuffleCost(TTI::SK_PermuteSingleSrc, ShufTy, None, 0, nullptr);
+ ReductionCost += getShuffleCost(TTI::SK_PermuteSingleSrc, ShufTy,
+ None, CostKind, 0, nullptr);
} else {
// Reducing from smaller size is a shift by immediate.
auto *ShiftTy = FixedVectorType::get(
@@ -4805,8 +4808,8 @@ X86TTIImpl::getMinMaxReductionCost(VectorType *ValTy, VectorType *CondTy,
// If we're reducing from 256/512 bits, use an extract_subvector.
if (Size > 128) {
auto *SubTy = FixedVectorType::get(ValVTy->getElementType(), NumVecElts);
- MinMaxCost +=
- getShuffleCost(TTI::SK_ExtractSubvector, Ty, None, NumVecElts, SubTy);
+ MinMaxCost += getShuffleCost(TTI::SK_ExtractSubvector, Ty, None, CostKind,
+ NumVecElts, SubTy);
Ty = SubTy;
} else if (Size == 128) {
// Reducing from 128 bits is a permute of v2f64/v2i64.
@@ -4816,8 +4819,8 @@ X86TTIImpl::getMinMaxReductionCost(VectorType *ValTy, VectorType *CondTy,
FixedVectorType::get(Type::getDoubleTy(ValTy->getContext()), 2);
else
ShufTy = FixedVectorType::get(Type::getInt64Ty(ValTy->getContext()), 2);
- MinMaxCost +=
- getShuffleCost(TTI::SK_PermuteSingleSrc, ShufTy, None, 0, nullptr);
+ MinMaxCost += getShuffleCost(TTI::SK_PermuteSingleSrc, ShufTy, None,
+ CostKind, 0, nullptr);
} else if (Size == 64) {
// Reducing from 64 bits is a shuffle of v4f32/v4i32.
FixedVectorType *ShufTy;
@@ -4825,8 +4828,8 @@ X86TTIImpl::getMinMaxReductionCost(VectorType *ValTy, VectorType *CondTy,
ShufTy = FixedVectorType::get(Type::getFloatTy(ValTy->getContext()), 4);
else
ShufTy = FixedVectorType::get(Type::getInt32Ty(ValTy->getContext()), 4);
- MinMaxCost +=
- getShuffleCost(TTI::SK_PermuteSingleSrc, ShufTy, None, 0, nullptr);
+ MinMaxCost += getShuffleCost(TTI::SK_PermuteSingleSrc, ShufTy, None,
+ CostKind, 0, nullptr);
} else {
// Reducing from smaller size is a shift by immediate.
auto *ShiftTy = FixedVectorType::get(
@@ -5622,7 +5625,7 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512(
(NumOfMemOps > 1) ? TTI::SK_PermuteTwoSrc : TTI::SK_PermuteSingleSrc;
InstructionCost ShuffleCost =
- getShuffleCost(ShuffleKind, SingleMemOpTy, None, 0, nullptr);
+ getShuffleCost(ShuffleKind, SingleMemOpTy, None, CostKind, 0, nullptr);
unsigned NumOfLoadsInInterleaveGrp =
Indices.size() ? Indices.size() : Factor;
@@ -5678,8 +5681,8 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512(
// There is no strided stores meanwhile. And store can't be folded in
// shuffle.
unsigned NumOfSources = Factor; // The number of values to be merged.
- InstructionCost ShuffleCost =
- getShuffleCost(TTI::SK_PermuteTwoSrc, SingleMemOpTy, None, 0, nullptr);
+ InstructionCost ShuffleCost = getShuffleCost(
+ TTI::SK_PermuteTwoSrc, SingleMemOpTy, None, CostKind, 0, nullptr);
unsigned NumOfShufflesPerStore = NumOfSources - 1;
// The SK_MergeTwoSrc shuffle clobbers one of src operands.
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h
index f0aefed2713c7..9ae462d194c46 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -135,7 +135,8 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
const Instruction *CxtI = nullptr);
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
- ArrayRef<int> Mask, int Index,
+ ArrayRef<int> Mask,
+ TTI::TargetCostKind CostKind, int Index,
VectorType *SubTp,
ArrayRef<const Value *> Args = None);
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 39cd9ecca8884..84ea2b14a3d62 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -6405,8 +6405,8 @@ LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
bool Reverse = ConsecutiveStride < 0;
if (Reverse)
- Cost +=
- TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy, None, 0);
+ Cost += TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy, None,
+ CostKind, 0);
return Cost;
}
@@ -6463,6 +6463,7 @@ LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I,
Type *ValTy = getLoadStoreType(I);
auto *VectorTy = cast<VectorType>(ToVectorTy(ValTy, VF));
unsigned AS = getLoadStoreAddressSpace(I);
+ enum TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
auto Group = getInterleavedAccessGroup(I);
assert(Group && "Fail to get an interleaved access group.");
@@ -6482,15 +6483,15 @@ LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I,
(isa<StoreInst>(I) && (Group->getNumMembers() < Group->getFactor()));
InstructionCost Cost = TTI.getInterleavedMemoryOpCost(
I->getOpcode(), WideVecTy, Group->getFactor(), Indices, Group->getAlign(),
- AS, TTI::TCK_RecipThroughput, Legal->isMaskRequired(I), UseMaskForGaps);
+ AS, CostKind, Legal->isMaskRequired(I), UseMaskForGaps);
if (Group->isReverse()) {
// TODO: Add support for reversed masked interleaved access.
assert(!Legal->isMaskRequired(I) &&
"Reverse masked interleaved access not supported.");
- Cost +=
- Group->getNumMembers() *
- TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy, None, 0);
+ Cost += Group->getNumMembers() *
+ TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy, None,
+ CostKind, 0);
}
return Cost;
}
@@ -7036,9 +7037,10 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF,
// First-order recurrences are replaced by vector shuffles inside the loop.
// NOTE: Don't use ToVectorTy as SK_ExtractSubvector expects a vector type.
if (VF.isVector() && Legal->isFixedOrderRecurrence(Phi))
- return TTI.getShuffleCost(
- TargetTransformInfo::SK_ExtractSubvector, cast<VectorType>(VectorTy),
- None, VF.getKnownMinValue() - 1, FixedVectorType::get(RetTy, 1));
+ return TTI.getShuffleCost(TargetTransformInfo::SK_ExtractSubvector,
+ cast<VectorType>(VectorTy), None, CostKind,
+ VF.getKnownMinValue() - 1,
+ FixedVectorType::get(RetTy, 1));
// Phi nodes in non-header blocks (not inductions, reductions, etc.) are
// converted into select instructions. We require N - 1 selects per phi
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 065198ff16dad..0a235c8720fce 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -5895,7 +5895,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
if (Idx + NumElts <= EENumElts) {
Cost +=
TTIRef.getShuffleCost(TargetTransformInfo::SK_ExtractSubvector,
- EEVTy, None, Idx, VecTy);
+ EEVTy, None, CostKind, Idx, VecTy);
} else {
// Need to round up the subvector type vectorization factor to avoid a
// crash in cost model functions. Make SubVT so that Idx + VF of SubVT
@@ -5904,11 +5904,11 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
FixedVectorType::get(VecTy->getElementType(), EENumElts - Idx);
Cost +=
TTIRef.getShuffleCost(TargetTransformInfo::SK_ExtractSubvector,
- EEVTy, None, Idx, SubVT);
+ EEVTy, None, CostKind, Idx, SubVT);
}
} else {
Cost += TTIRef.getShuffleCost(TargetTransformInfo::SK_InsertSubvector,
- VecTy, None, 0, EEVTy);
+ VecTy, None, CostKind, 0, EEVTy);
}
}
};
@@ -5976,7 +5976,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
assert(VecTy == FinalVecTy &&
"No reused scalars expected for broadcast.");
return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy,
- /*Mask=*/None, /*Index=*/0,
+ /*Mask=*/None, CostKind, /*Index=*/0,
/*SubTp=*/nullptr, /*Args=*/VL[0]);
}
InstructionCost ReuseShuffleCost = 0;
@@ -6065,7 +6065,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
// Add the cost for the subvectors insert.
for (int I = VF, E = VL.size(); I < E; I += VF)
GatherCost += TTI->getShuffleCost(TTI::SK_InsertSubvector, VecTy,
- None, I, LoadTy);
+ None, CostKind, I, LoadTy);
}
return ReuseShuffleCost + GatherCost - ScalarsCost;
}
@@ -6249,8 +6249,9 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
if (InsertVecSz != VecSz) {
auto *ActualVecTy =
FixedVectorType::get(SrcVecTy->getElementType(), VecSz);
- Cost += TTI->getShuffleCost(TTI::SK_InsertSubvector, ActualVecTy,
- None, OffsetBeg - Offset, InsertVecTy);
+ Cost +=
+ TTI->getShuffleCost(TTI::SK_InsertSubvector, ActualVecTy, None,
+ CostKind, OffsetBeg - Offset, InsertVecTy);
} else {
for (unsigned I = 0, End = OffsetBeg - Offset; I < End; ++I)
Mask[I] = I;
More information about the llvm-commits
mailing list