[llvm] 871556a - [CostModel] Unify Intrinsic Costs.

Sam Parker via llvm-commits llvm-commits at lists.llvm.org
Tue May 26 01:50:11 PDT 2020


Author: Sam Parker
Date: 2020-05-26T09:48:26+01:00
New Revision: 871556a494552c0f503eec17055f075bcd859937

URL: https://github.com/llvm/llvm-project/commit/871556a494552c0f503eec17055f075bcd859937
DIFF: https://github.com/llvm/llvm-project/commit/871556a494552c0f503eec17055f075bcd859937.diff

LOG: [CostModel] Unify Intrinsic Costs.

Recommitting most of the remaining changes from
259eb619ff6dcd5b6111d1686e18559b9ca004d4, but excluding the call to
getUserCost from getInstructionThroughput. Though there's still no
test changes, I doubt that this is an NFC...

With the two getIntrinsicInstrCosts folded into one, now fold in the
scalar/code-size orientated getIntrinsicCost. The remaining scalar
intrinsics were memcpy, cttz and ctlz which now have special handling
in the BasicTTI implementation.

This had required a change in the AMDGPU backend for fabs as it
should always be 'free'. I've also changed the X86 backend to return
the BaseT implementation when the CostKind isn't RecipThroughput.

Differential Revision: https://reviews.llvm.org/D80012

Added: 
    

Modified: 
    llvm/include/llvm/Analysis/TargetTransformInfo.h
    llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
    llvm/include/llvm/CodeGen/BasicTTIImpl.h
    llvm/lib/Analysis/TargetTransformInfo.cpp
    llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
    llvm/lib/Target/X86/X86TargetTransformInfo.cpp
    llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index c2ba9a488dca..c50c696741b1 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -38,6 +38,7 @@ class AssumptionCache;
 class BlockFrequencyInfo;
 class DominatorTree;
 class BranchInst;
+class CallBase;
 class Function;
 class GlobalValue;
 class IntrinsicInst;
@@ -120,10 +121,12 @@ class IntrinsicCostAttributes {
 public:
   IntrinsicCostAttributes(const IntrinsicInst &I);
 
-  IntrinsicCostAttributes(Intrinsic::ID Id, CallInst &CI,
+  IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI);
+
+  IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI,
                           unsigned Factor);
 
-  IntrinsicCostAttributes(Intrinsic::ID Id, CallInst &CI,
+  IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI,
                           unsigned Factor, unsigned ScalarCost);
 
   IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
@@ -141,7 +144,7 @@ class IntrinsicCostAttributes {
   IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
                           ArrayRef<Type *> Tys);
 
-  IntrinsicCostAttributes(Intrinsic::ID Id, Type *Ty,
+  IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
                           ArrayRef<Value *> Args);
 
   Intrinsic::ID getID() const { return IID; }
@@ -288,18 +291,6 @@ class TargetTransformInfo {
   /// scientific. A target may has no bonus on vector instructions.
   int getInlinerVectorBonusPercent() const;
 
-  /// Estimate the cost of an intrinsic when lowered.
-  int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
-                       ArrayRef<Type *> ParamTys,
-                       const User *U = nullptr,
-                       TTI::TargetCostKind CostKind = TCK_SizeAndLatency) const;
-
-  /// Estimate the cost of an intrinsic when lowered.
-  int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
-                       ArrayRef<const Value *> Arguments,
-                       const User *U = nullptr,
-                       TTI::TargetCostKind CostKind = TCK_SizeAndLatency) const;
-
   /// \return the expected cost of a memcpy, which could e.g. depend on the
   /// source/destination type and alignment and the number of bytes copied.
   int getMemcpyCost(const Instruction *I) const;
@@ -1231,13 +1222,6 @@ class TargetTransformInfo::Concept {
                          TTI::TargetCostKind CostKind) = 0;
   virtual unsigned getInliningThresholdMultiplier() = 0;
   virtual int getInlinerVectorBonusPercent() = 0;
-  virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
-                               ArrayRef<Type *> ParamTys, const User *U,
-                               enum TargetCostKind CostKind) = 0;
-  virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
-                               ArrayRef<const Value *> Arguments,
-                               const User *U,
-                               enum TargetCostKind CostKind) = 0;
   virtual int getMemcpyCost(const Instruction *I) = 0;
   virtual unsigned
   getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize,
@@ -1495,18 +1479,6 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
   int getInlinerVectorBonusPercent() override {
     return Impl.getInlinerVectorBonusPercent();
   }
-  int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
-                       ArrayRef<Type *> ParamTys,
-                       const User *U = nullptr,
-                       TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) override {
-    return Impl.getIntrinsicCost(IID, RetTy, ParamTys, U, CostKind);
-  }
-  int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
-                       ArrayRef<const Value *> Arguments,
-                       const User *U = nullptr,
-                       TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) override {
-    return Impl.getIntrinsicCost(IID, RetTy, Arguments, U, CostKind);
-  }
   int getMemcpyCost(const Instruction *I) override {
     return Impl.getMemcpyCost(I);
   }

diff  --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index d353bf056df9..60de70dcb16a 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -772,36 +772,8 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
     return TTI::TCC_Basic;
   }
 
-  unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
-                            ArrayRef<Type *> ParamTys, const User *U,
-                            TTI::TargetCostKind CostKind) {
-    switch (IID) {
-    default:
-      break;
-    // TODO: other libc intrinsics.
-    case Intrinsic::memcpy:
-      return static_cast<T *>(this)->getMemcpyCost(dyn_cast<Instruction>(U));
-    }
-    IntrinsicCostAttributes Attrs(IID, RetTy, ParamTys);
-    return getIntrinsicInstrCost(Attrs, CostKind);
-  }
-
-  unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
-                            ArrayRef<const Value *> Arguments, const User *U,
-                            TTI::TargetCostKind CostKind) {
-    // Delegate to the generic intrinsic handling code. This mostly provides an
-    // opportunity for targets to (for example) special case the cost of
-    // certain intrinsics based on constants used as arguments.
-    SmallVector<Type *, 8> ParamTys;
-    ParamTys.reserve(Arguments.size());
-    for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx)
-      ParamTys.push_back(Arguments[Idx]->getType());
-    return static_cast<T *>(this)->getIntrinsicCost(IID, RetTy, ParamTys, U,
-                                                    CostKind);
-  }
-
-  unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands,
-                       TTI::TargetCostKind CostKind) {
+  int getUserCost(const User *U, ArrayRef<const Value *> Operands,
+                  TTI::TargetCostKind CostKind) {
     auto *TargetTTI = static_cast<T *>(this);
 
     // FIXME: Unlikely to be true for anything but CodeSize.
@@ -810,9 +782,8 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
       if (F) {
         FunctionType *FTy = F->getFunctionType();
         if (Intrinsic::ID IID = F->getIntrinsicID()) {
-          SmallVector<Type *, 8> ParamTys(FTy->param_begin(), FTy->param_end());
-          return TargetTTI->getIntrinsicCost(IID, FTy->getReturnType(),
-                                             ParamTys, U, CostKind);
+          IntrinsicCostAttributes Attrs(IID, *CB);
+          return TargetTTI->getIntrinsicInstrCost(Attrs, CostKind);
         }
 
         if (!TargetTTI->isLoweredToCall(F))

diff  --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index dbbcc795ea00..7866e71853cf 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -296,30 +296,6 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     return BaseT::getGEPCost(PointeeType, Ptr, Operands);
   }
 
-  unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
-                            ArrayRef<const Value *> Arguments, const User *U,
-                            TTI::TargetCostKind CostKind) {
-    return BaseT::getIntrinsicCost(IID, RetTy, Arguments, U, CostKind);
-  }
-
-  unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
-                            ArrayRef<Type *> ParamTys, const User *U,
-                            TTI::TargetCostKind CostKind) {
-    if (IID == Intrinsic::cttz) {
-      if (getTLI()->isCheapToSpeculateCttz())
-        return TargetTransformInfo::TCC_Basic;
-      return TargetTransformInfo::TCC_Expensive;
-    }
-
-    if (IID == Intrinsic::ctlz) {
-      if (getTLI()->isCheapToSpeculateCtlz())
-        return TargetTransformInfo::TCC_Basic;
-      return TargetTransformInfo::TCC_Expensive;
-    }
-
-    return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U, CostKind);
-  }
-
   unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
                                             unsigned &JumpTableSize,
                                             ProfileSummaryInfo *PSI,
@@ -1090,6 +1066,28 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
   /// Get intrinsic cost based on arguments.
   unsigned getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
                                  TTI::TargetCostKind CostKind) {
+    Intrinsic::ID IID = ICA.getID();
+    auto *ConcreteTTI = static_cast<T *>(this);
+
+    // Special case some scalar intrinsics.
+    if (CostKind != TTI::TCK_RecipThroughput) {
+      switch (IID) {
+      default:
+        break;
+      case Intrinsic::cttz:
+        if (getTLI()->isCheapToSpeculateCttz())
+          return TargetTransformInfo::TCC_Basic;
+        break;
+      case Intrinsic::ctlz:
+        if (getTLI()->isCheapToSpeculateCtlz())
+          return TargetTransformInfo::TCC_Basic;
+        break;
+      case Intrinsic::memcpy:
+        return ConcreteTTI->getMemcpyCost(ICA.getInst());
+      // TODO: other libc intrinsics.
+      }
+      return BaseT::getIntrinsicInstrCost(ICA, CostKind);
+    }
 
     if (BaseT::getIntrinsicInstrCost(ICA, CostKind) == 0)
       return 0;
@@ -1098,17 +1096,14 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     if (ICA.isTypeBasedOnly())
       return getTypeBasedIntrinsicInstrCost(ICA, CostKind);
 
-    Intrinsic::ID IID = ICA.getID();
-    const IntrinsicInst *I = ICA.getInst();
     Type *RetTy = ICA.getReturnType();
-    const SmallVectorImpl<Value *> &Args = ICA.getArgs();
     unsigned VF = ICA.getVectorFactor();
-    FastMathFlags FMF = ICA.getFlags();
-
     unsigned RetVF =
         (RetTy->isVectorTy() ? cast<VectorType>(RetTy)->getNumElements() : 1);
     assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type");
-    auto *ConcreteTTI = static_cast<T *>(this);
+    const IntrinsicInst *I = ICA.getInst();
+    const SmallVectorImpl<Value *> &Args = ICA.getArgs();
+    FastMathFlags FMF = ICA.getFlags();
 
     switch (IID) {
     default: {
@@ -1595,13 +1590,14 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
                                                  CostKind) +
              ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy,
                                                  CostKind);
-    if (IID == Intrinsic::experimental_constrained_fmuladd)
-      return ConcreteTTI->getIntrinsicCost(
-                 Intrinsic::experimental_constrained_fmul, RetTy, Tys, nullptr,
-                 CostKind) +
-             ConcreteTTI->getIntrinsicCost(
-                 Intrinsic::experimental_constrained_fadd, RetTy, Tys, nullptr,
-                 CostKind);
+    if (IID == Intrinsic::experimental_constrained_fmuladd) {
+      IntrinsicCostAttributes FMulAttrs(
+        Intrinsic::experimental_constrained_fmul, RetTy, Tys);
+      IntrinsicCostAttributes FAddAttrs(
+        Intrinsic::experimental_constrained_fadd, RetTy, Tys);
+      return ConcreteTTI->getIntrinsicInstrCost(FMulAttrs, CostKind) +
+             ConcreteTTI->getIntrinsicInstrCost(FAddAttrs, CostKind);
+    }
 
     // Else, assume that we need to scalarize this intrinsic. For math builtins
     // this will emit a costly libcall, adding call overhead and spills. Make it

diff  --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 7e05fcca1170..86952a5ad659 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -63,7 +63,20 @@ IntrinsicCostAttributes::IntrinsicCostAttributes(const IntrinsicInst &I) :
    FMF = FPMO->getFastMathFlags();
 }
 
-IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, CallInst &CI,
+IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id,
+                                                 const CallBase &CI) :
+  II(dyn_cast<IntrinsicInst>(&CI)),  RetTy(CI.getType()), IID(Id) {
+
+  if (auto *FPMO = dyn_cast<FPMathOperator>(&CI))
+    FMF = FPMO->getFastMathFlags();
+
+  FunctionType *FTy =
+    CI.getCalledFunction()->getFunctionType();
+  ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end());
+}
+
+IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id,
+                                                 const CallBase &CI,
                                                  unsigned Factor) :
     RetTy(CI.getType()), IID(Id), VF(Factor) {
 
@@ -76,7 +89,8 @@ IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, CallInst &CI,
   ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end());
 }
 
-IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, CallInst &CI,
+IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id,
+                                                 const CallBase &CI,
                                                  unsigned Factor,
                                                  unsigned ScalarCost) :
     RetTy(CI.getType()), IID(Id), VF(Factor), ScalarizationCost(ScalarCost) {
@@ -236,15 +250,6 @@ int TargetTransformInfo::getGEPCost(Type *PointeeType, const Value *Ptr,
   return TTIImpl->getGEPCost(PointeeType, Ptr, Operands, CostKind);
 }
 
-int TargetTransformInfo::getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
-                                          ArrayRef<const Value *> Arguments,
-                                          const User *U,
-                                          TTI::TargetCostKind CostKind) const {
-  int Cost = TTIImpl->getIntrinsicCost(IID, RetTy, Arguments, U, CostKind);
-  assert(Cost >= 0 && "TTI should not produce negative costs!");
-  return Cost;
-}
-
 unsigned TargetTransformInfo::getEstimatedNumberOfCaseClusters(
     const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI,
     BlockFrequencyInfo *BFI) const {

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index ca75531be4a4..2405a24dd14f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -560,6 +560,9 @@ static bool intrinsicHasPackedVectorBenefit(Intrinsic::ID ID) {
 
 int GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
                                       TTI::TargetCostKind CostKind) {
+  if (ICA.getID() == Intrinsic::fabs)
+    return 0;
+
   if (!intrinsicHasPackedVectorBenefit(ICA.getID()))
     return BaseT::getIntrinsicInstrCost(ICA, CostKind);
 

diff  --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 09f99af8c8e8..4170b102f2b3 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -2699,6 +2699,9 @@ int X86TTIImpl::getTypeBasedIntrinsicInstrCost(
 
 int X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
                                       TTI::TargetCostKind CostKind) {
+  if (CostKind != TTI::TCK_RecipThroughput)
+    return BaseT::getIntrinsicInstrCost(ICA, CostKind);
+
   if (ICA.isTypeBasedOnly())
     return getTypeBasedIntrinsicInstrCost(ICA, CostKind);
 
@@ -3932,6 +3935,9 @@ int X86TTIImpl::getGatherScatterOpCost(
     unsigned Alignment, TTI::TargetCostKind CostKind,
     const Instruction *I = nullptr) {
 
+  if (CostKind != TTI::TCK_RecipThroughput)
+    return 1;
+
   assert(SrcVTy->isVectorTy() && "Unexpected data type for Gather/Scatter");
   unsigned VF = cast<VectorType>(SrcVTy)->getNumElements();
   PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());

diff  --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 8bd1aa8514ca..5b93aad11e14 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -1535,7 +1535,7 @@ bool LoopIdiomRecognize::recognizeAndInsertFFS() {
   //  %inc = add nsw %i.0, 1
   //  br i1 %tobool
 
-  const Value *Args[] =
+  Value *Args[] =
       {InitX, ZeroCheck ? ConstantInt::getTrue(InitX->getContext())
                         : ConstantInt::getFalse(InitX->getContext())};
 
@@ -1544,9 +1544,11 @@ bool LoopIdiomRecognize::recognizeAndInsertFFS() {
   uint32_t HeaderSize =
       std::distance(InstWithoutDebugIt.begin(), InstWithoutDebugIt.end());
 
+  IntrinsicCostAttributes Attrs(IntrinID, InitX->getType(), Args);
+  int Cost =
+    TTI->getIntrinsicInstrCost(Attrs, TargetTransformInfo::TCK_SizeAndLatency);
   if (HeaderSize != IdiomCanonicalSize &&
-      TTI->getIntrinsicCost(IntrinID, InitX->getType(), Args) >
-          TargetTransformInfo::TCC_Basic)
+      Cost > TargetTransformInfo::TCC_Basic)
     return false;
 
   transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,


        


More information about the llvm-commits mailing list