[llvm] 259eb61 - Revert "[CostModel] Unify Intrinsic Costs."
Sam Parker via llvm-commits
llvm-commits at lists.llvm.org
Thu May 21 04:51:54 PDT 2020
Author: Sam Parker
Date: 2020-05-21T12:50:24+01:00
New Revision: 259eb619ff6dcd5b6111d1686e18559b9ca004d4
URL: https://github.com/llvm/llvm-project/commit/259eb619ff6dcd5b6111d1686e18559b9ca004d4
DIFF: https://github.com/llvm/llvm-project/commit/259eb619ff6dcd5b6111d1686e18559b9ca004d4.diff
LOG: Revert "[CostModel] Unify Intrinsic Costs."
This reverts commit de71def3f59dc9f12f67141b5040d8e15c84d08a.
This is causing some very large changes, so I'm first going to break
this patch down and re-commit in parts.
Added:
Modified:
llvm/include/llvm/Analysis/TargetTransformInfo.h
llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
llvm/include/llvm/CodeGen/BasicTTIImpl.h
llvm/lib/Analysis/TargetTransformInfo.cpp
llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
llvm/lib/Target/X86/X86TargetTransformInfo.cpp
llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index c50c696741b1..c2ba9a488dca 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -38,7 +38,6 @@ class AssumptionCache;
class BlockFrequencyInfo;
class DominatorTree;
class BranchInst;
-class CallBase;
class Function;
class GlobalValue;
class IntrinsicInst;
@@ -121,12 +120,10 @@ class IntrinsicCostAttributes {
public:
IntrinsicCostAttributes(const IntrinsicInst &I);
- IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI);
-
- IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI,
+ IntrinsicCostAttributes(Intrinsic::ID Id, CallInst &CI,
unsigned Factor);
- IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI,
+ IntrinsicCostAttributes(Intrinsic::ID Id, CallInst &CI,
unsigned Factor, unsigned ScalarCost);
IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
@@ -144,7 +141,7 @@ class IntrinsicCostAttributes {
IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
ArrayRef<Type *> Tys);
- IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
+ IntrinsicCostAttributes(Intrinsic::ID Id, Type *Ty,
ArrayRef<Value *> Args);
Intrinsic::ID getID() const { return IID; }
@@ -291,6 +288,18 @@ class TargetTransformInfo {
/// scientific. A target may has no bonus on vector instructions.
int getInlinerVectorBonusPercent() const;
+ /// Estimate the cost of an intrinsic when lowered.
+ int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
+ ArrayRef<Type *> ParamTys,
+ const User *U = nullptr,
+ TTI::TargetCostKind CostKind = TCK_SizeAndLatency) const;
+
+ /// Estimate the cost of an intrinsic when lowered.
+ int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
+ ArrayRef<const Value *> Arguments,
+ const User *U = nullptr,
+ TTI::TargetCostKind CostKind = TCK_SizeAndLatency) const;
+
/// \return the expected cost of a memcpy, which could e.g. depend on the
/// source/destination type and alignment and the number of bytes copied.
int getMemcpyCost(const Instruction *I) const;
@@ -1222,6 +1231,13 @@ class TargetTransformInfo::Concept {
TTI::TargetCostKind CostKind) = 0;
virtual unsigned getInliningThresholdMultiplier() = 0;
virtual int getInlinerVectorBonusPercent() = 0;
+ virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
+ ArrayRef<Type *> ParamTys, const User *U,
+ enum TargetCostKind CostKind) = 0;
+ virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
+ ArrayRef<const Value *> Arguments,
+ const User *U,
+ enum TargetCostKind CostKind) = 0;
virtual int getMemcpyCost(const Instruction *I) = 0;
virtual unsigned
getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize,
@@ -1479,6 +1495,18 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
int getInlinerVectorBonusPercent() override {
return Impl.getInlinerVectorBonusPercent();
}
+ int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
+ ArrayRef<Type *> ParamTys,
+ const User *U = nullptr,
+ TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) override {
+ return Impl.getIntrinsicCost(IID, RetTy, ParamTys, U, CostKind);
+ }
+ int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
+ ArrayRef<const Value *> Arguments,
+ const User *U = nullptr,
+ TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) override {
+ return Impl.getIntrinsicCost(IID, RetTy, Arguments, U, CostKind);
+ }
int getMemcpyCost(const Instruction *I) override {
return Impl.getMemcpyCost(I);
}
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index ea56bfcff809..a0a0e53dfadc 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -462,39 +462,6 @@ class TargetTransformInfoImplBase {
unsigned getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind) {
- switch (ICA.getID()) {
- default:
- break;
- case Intrinsic::annotation:
- case Intrinsic::assume:
- case Intrinsic::sideeffect:
- case Intrinsic::dbg_declare:
- case Intrinsic::dbg_value:
- case Intrinsic::dbg_label:
- case Intrinsic::invariant_start:
- case Intrinsic::invariant_end:
- case Intrinsic::launder_invariant_group:
- case Intrinsic::strip_invariant_group:
- case Intrinsic::is_constant:
- case Intrinsic::lifetime_start:
- case Intrinsic::lifetime_end:
- case Intrinsic::objectsize:
- case Intrinsic::ptr_annotation:
- case Intrinsic::var_annotation:
- case Intrinsic::experimental_gc_result:
- case Intrinsic::experimental_gc_relocate:
- case Intrinsic::coro_alloc:
- case Intrinsic::coro_begin:
- case Intrinsic::coro_free:
- case Intrinsic::coro_end:
- case Intrinsic::coro_frame:
- case Intrinsic::coro_size:
- case Intrinsic::coro_suspend:
- case Intrinsic::coro_param:
- case Intrinsic::coro_subfn_addr:
- // These intrinsics don't actually represent code after lowering.
- return 0;
- }
return 1;
}
@@ -772,32 +739,78 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
return TTI::TCC_Basic;
}
- int getUserCost(const User *U, ArrayRef<const Value *> Operands,
- TTI::TargetCostKind CostKind) {
+ unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
+ ArrayRef<Type *> ParamTys, const User *U,
+ TTI::TargetCostKind TCK_SizeAndLatency) {
+ switch (IID) {
+ default:
+ // Intrinsics rarely (if ever) have normal argument setup constraints.
+ // Model them as having a basic instruction cost.
+ return TTI::TCC_Basic;
+
+ // TODO: other libc intrinsics.
+ case Intrinsic::memcpy:
+ return static_cast<T *>(this)->getMemcpyCost(dyn_cast<Instruction>(U));
+
+ case Intrinsic::annotation:
+ case Intrinsic::assume:
+ case Intrinsic::sideeffect:
+ case Intrinsic::dbg_declare:
+ case Intrinsic::dbg_value:
+ case Intrinsic::dbg_label:
+ case Intrinsic::invariant_start:
+ case Intrinsic::invariant_end:
+ case Intrinsic::launder_invariant_group:
+ case Intrinsic::strip_invariant_group:
+ case Intrinsic::is_constant:
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ case Intrinsic::objectsize:
+ case Intrinsic::ptr_annotation:
+ case Intrinsic::var_annotation:
+ case Intrinsic::experimental_gc_result:
+ case Intrinsic::experimental_gc_relocate:
+ case Intrinsic::coro_alloc:
+ case Intrinsic::coro_begin:
+ case Intrinsic::coro_free:
+ case Intrinsic::coro_end:
+ case Intrinsic::coro_frame:
+ case Intrinsic::coro_size:
+ case Intrinsic::coro_suspend:
+ case Intrinsic::coro_param:
+ case Intrinsic::coro_subfn_addr:
+ // These intrinsics don't actually represent code after lowering.
+ return TTI::TCC_Free;
+ }
+ }
+
+ unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
+ ArrayRef<const Value *> Arguments, const User *U,
+ TTI::TargetCostKind CostKind) {
+ // Delegate to the generic intrinsic handling code. This mostly provides an
+ // opportunity for targets to (for example) special case the cost of
+ // certain intrinsics based on constants used as arguments.
+ SmallVector<Type *, 8> ParamTys;
+ ParamTys.reserve(Arguments.size());
+ for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx)
+ ParamTys.push_back(Arguments[Idx]->getType());
+ return static_cast<T *>(this)->getIntrinsicCost(IID, RetTy, ParamTys, U,
+ CostKind);
+ }
+
+ unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands,
+ TTI::TargetCostKind CostKind) {
auto *TargetTTI = static_cast<T *>(this);
+ // FIXME: Unlikely to be true for anything but CodeSize.
if (const auto *CB = dyn_cast<CallBase>(U)) {
- // Special-case throughput here because it can make wild
diff erences
- // whether the arguments are passed around or just the arg types. The
- // IntrinsicCostAttribute constructor used here will save all the
- // available information.
- // FIXME: More information isn't always useful and we shouldn't have to
- // make a special case like this.
- if (CostKind == TTI::TCK_RecipThroughput) {
- if (auto *II = dyn_cast<IntrinsicInst>(CB)) {
- IntrinsicCostAttributes Attrs(*II);
- return TargetTTI->getIntrinsicInstrCost(Attrs, CostKind);
- }
- return -1; // We know nothing about this call.
- }
-
- // FIXME: Unlikely to be true for anything but CodeSize.
const Function *F = CB->getCalledFunction();
if (F) {
FunctionType *FTy = F->getFunctionType();
if (Intrinsic::ID IID = F->getIntrinsicID()) {
- IntrinsicCostAttributes Attrs(IID, *CB);
- return TargetTTI->getIntrinsicInstrCost(Attrs, CostKind);
+ SmallVector<Type *, 8> ParamTys(FTy->param_begin(), FTy->param_end());
+ return TargetTTI->getIntrinsicCost(IID, FTy->getReturnType(),
+ ParamTys, U, CostKind);
}
if (!TargetTTI->isLoweredToCall(F))
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index d65f91969b69..5a891779e185 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -296,6 +296,30 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
return BaseT::getGEPCost(PointeeType, Ptr, Operands);
}
+ unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
+ ArrayRef<const Value *> Arguments, const User *U,
+ TTI::TargetCostKind CostKind) {
+ return BaseT::getIntrinsicCost(IID, RetTy, Arguments, U, CostKind);
+ }
+
+ unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
+ ArrayRef<Type *> ParamTys, const User *U,
+ TTI::TargetCostKind CostKind) {
+ if (IID == Intrinsic::cttz) {
+ if (getTLI()->isCheapToSpeculateCttz())
+ return TargetTransformInfo::TCC_Basic;
+ return TargetTransformInfo::TCC_Expensive;
+ }
+
+ if (IID == Intrinsic::ctlz) {
+ if (getTLI()->isCheapToSpeculateCtlz())
+ return TargetTransformInfo::TCC_Basic;
+ return TargetTransformInfo::TCC_Expensive;
+ }
+
+ return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U, CostKind);
+ }
+
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
unsigned &JumpTableSize,
ProfileSummaryInfo *PSI,
@@ -1067,40 +1091,21 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
unsigned getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind) {
- Intrinsic::ID IID = ICA.getID();
- auto *ConcreteTTI = static_cast<T *>(this);
-
- // Special case some scalar intrinsics.
- if (CostKind != TTI::TCK_RecipThroughput) {
- switch (IID) {
- default:
- break;
- case Intrinsic::cttz:
- if (getTLI()->isCheapToSpeculateCttz())
- return TargetTransformInfo::TCC_Basic;
- break;
- case Intrinsic::ctlz:
- if (getTLI()->isCheapToSpeculateCtlz())
- return TargetTransformInfo::TCC_Basic;
- break;
- case Intrinsic::memcpy:
- return ConcreteTTI->getMemcpyCost(ICA.getInst());
- }
- return BaseT::getIntrinsicInstrCost(ICA, CostKind);
- }
-
// TODO: Combine these two logic paths.
if (ICA.isTypeBasedOnly())
return getTypeBasedIntrinsicInstrCost(ICA, CostKind);
+ Intrinsic::ID IID = ICA.getID();
const IntrinsicInst *I = ICA.getInst();
- const SmallVectorImpl<Value *> &Args = ICA.getArgs();
- FastMathFlags FMF = ICA.getFlags();
Type *RetTy = ICA.getReturnType();
+ const SmallVectorImpl<Value *> &Args = ICA.getArgs();
unsigned VF = ICA.getVectorFactor();
+ FastMathFlags FMF = ICA.getFlags();
+
unsigned RetVF =
(RetTy->isVectorTy() ? cast<VectorType>(RetTy)->getNumElements() : 1);
assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type");
+ auto *ConcreteTTI = static_cast<T *>(this);
switch (IID) {
default: {
@@ -1587,14 +1592,13 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
CostKind) +
ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy,
CostKind);
- if (IID == Intrinsic::experimental_constrained_fmuladd) {
- IntrinsicCostAttributes FMulAttrs(
- Intrinsic::experimental_constrained_fmul, RetTy, Tys);
- IntrinsicCostAttributes FAddAttrs(
- Intrinsic::experimental_constrained_fadd, RetTy, Tys);
- return ConcreteTTI->getIntrinsicInstrCost(FMulAttrs, CostKind) +
- ConcreteTTI->getIntrinsicInstrCost(FAddAttrs, CostKind);
- }
+ if (IID == Intrinsic::experimental_constrained_fmuladd)
+ return ConcreteTTI->getIntrinsicCost(
+ Intrinsic::experimental_constrained_fmul, RetTy, Tys, nullptr,
+ CostKind) +
+ ConcreteTTI->getIntrinsicCost(
+ Intrinsic::experimental_constrained_fadd, RetTy, Tys, nullptr,
+ CostKind);
// Else, assume that we need to scalarize this intrinsic. For math builtins
// this will emit a costly libcall, adding call overhead and spills. Make it
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index c16f47a46846..7e05fcca1170 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -63,20 +63,7 @@ IntrinsicCostAttributes::IntrinsicCostAttributes(const IntrinsicInst &I) :
FMF = FPMO->getFastMathFlags();
}
-IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id,
- const CallBase &CI) :
- II(dyn_cast<IntrinsicInst>(&CI)), RetTy(CI.getType()), IID(Id) {
-
- if (auto *FPMO = dyn_cast<FPMathOperator>(&CI))
- FMF = FPMO->getFastMathFlags();
-
- FunctionType *FTy =
- CI.getCalledFunction()->getFunctionType();
- ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end());
-}
-
-IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id,
- const CallBase &CI,
+IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, CallInst &CI,
unsigned Factor) :
RetTy(CI.getType()), IID(Id), VF(Factor) {
@@ -89,8 +76,7 @@ IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id,
ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end());
}
-IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id,
- const CallBase &CI,
+IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, CallInst &CI,
unsigned Factor,
unsigned ScalarCost) :
RetTy(CI.getType()), IID(Id), VF(Factor), ScalarizationCost(ScalarCost) {
@@ -250,6 +236,15 @@ int TargetTransformInfo::getGEPCost(Type *PointeeType, const Value *Ptr,
return TTIImpl->getGEPCost(PointeeType, Ptr, Operands, CostKind);
}
+int TargetTransformInfo::getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
+ ArrayRef<const Value *> Arguments,
+ const User *U,
+ TTI::TargetCostKind CostKind) const {
+ int Cost = TTIImpl->getIntrinsicCost(IID, RetTy, Arguments, U, CostKind);
+ assert(Cost >= 0 && "TTI should not produce negative costs!");
+ return Cost;
+}
+
unsigned TargetTransformInfo::getEstimatedNumberOfCaseClusters(
const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI) const {
@@ -1421,7 +1416,11 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
return TTIImpl->getShuffleCost(SK_PermuteTwoSrc, Ty, 0, nullptr);
}
case Instruction::Call:
- return getUserCost(I, CostKind);
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ IntrinsicCostAttributes CostAttrs(*II);
+ return getIntrinsicInstrCost(CostAttrs, CostKind);
+ }
+ return -1;
default:
// We don't have any information on this instruction.
return -1;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 2405a24dd14f..ca75531be4a4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -560,9 +560,6 @@ static bool intrinsicHasPackedVectorBenefit(Intrinsic::ID ID) {
int GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind) {
- if (ICA.getID() == Intrinsic::fabs)
- return 0;
-
if (!intrinsicHasPackedVectorBenefit(ICA.getID()))
return BaseT::getIntrinsicInstrCost(ICA, CostKind);
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 12620ed0aaa2..09f99af8c8e8 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -2699,9 +2699,6 @@ int X86TTIImpl::getTypeBasedIntrinsicInstrCost(
int X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind) {
- if (CostKind != TTI::TCK_RecipThroughput)
- return 1;
-
if (ICA.isTypeBasedOnly())
return getTypeBasedIntrinsicInstrCost(ICA, CostKind);
@@ -3935,9 +3932,6 @@ int X86TTIImpl::getGatherScatterOpCost(
unsigned Alignment, TTI::TargetCostKind CostKind,
const Instruction *I = nullptr) {
- if (CostKind != TTI::TCK_RecipThroughput)
- return 1;
-
assert(SrcVTy->isVectorTy() && "Unexpected data type for Gather/Scatter");
unsigned VF = cast<VectorType>(SrcVTy)->getNumElements();
PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 5b93aad11e14..8bd1aa8514ca 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -1535,7 +1535,7 @@ bool LoopIdiomRecognize::recognizeAndInsertFFS() {
// %inc = add nsw %i.0, 1
// br i1 %tobool
- Value *Args[] =
+ const Value *Args[] =
{InitX, ZeroCheck ? ConstantInt::getTrue(InitX->getContext())
: ConstantInt::getFalse(InitX->getContext())};
@@ -1544,11 +1544,9 @@ bool LoopIdiomRecognize::recognizeAndInsertFFS() {
uint32_t HeaderSize =
std::distance(InstWithoutDebugIt.begin(), InstWithoutDebugIt.end());
- IntrinsicCostAttributes Attrs(IntrinID, InitX->getType(), Args);
- int Cost =
- TTI->getIntrinsicInstrCost(Attrs, TargetTransformInfo::TCK_SizeAndLatency);
if (HeaderSize != IdiomCanonicalSize &&
- Cost > TargetTransformInfo::TCC_Basic)
+ TTI->getIntrinsicCost(IntrinID, InitX->getType(), Args) >
+ TargetTransformInfo::TCC_Basic)
return false;
transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,
More information about the llvm-commits
mailing list