[llvm] [TTI][AMDGPU] Allow targets to adjust `LastCallToStaticBonus` via `getInliningLastCallToStaticBonus` (PR #111311)

Shilei Tian via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 10 12:55:25 PDT 2024


https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/111311

>From dfde4194ef2e5d9ff364901307042ac2948a56ec Mon Sep 17 00:00:00 2001
From: Shilei Tian <shilei.tian at amd.com>
Date: Sun, 6 Oct 2024 16:16:47 -0400
Subject: [PATCH] [AMDGPU] Increase inline threshold when the callee only has
 one live use

Currently we will not inline a large function even if it only has one live use.
This could significantly impact the performance because CSR spill is very
expensive. The goal of this PR is trying to force the inlining if there is only
one live use by adjusting the inlining threshold, which is a configurable
number. The default value is 15000, which borrows from
`InlineConstants::LastCallToStaticBonus`. I'm not sure if this is a good number,
and if this is the right way to do that. After making this change, the callee in
my local test case can finally be inlined, but the cost is still very close to
the threshold: `cost=14010, threshold=170775`.

Speaking of the test, how are we gonna test this? Do we want to include a giant
IR file?

Fixes SWDEV-471398.
---
 llvm/include/llvm/Analysis/InlineAdvisor.h    |  3 ++-
 llvm/include/llvm/Analysis/InlineCost.h       |  1 -
 .../llvm/Analysis/TargetTransformInfo.h       |  7 ++++++
 .../llvm/Analysis/TargetTransformInfoImpl.h   |  5 +++++
 llvm/lib/Analysis/InlineAdvisor.cpp           | 15 +++++++------
 llvm/lib/Analysis/InlineCost.cpp              |  2 +-
 llvm/lib/Analysis/TargetTransformInfo.cpp     |  4 ++++
 .../AMDGPU/AMDGPUTargetTransformInfo.cpp      |  5 +++++
 .../Target/AMDGPU/AMDGPUTargetTransformInfo.h |  1 +
 .../AMDGPU/amdgpu-inline-only-one-live-use.ll | 22 +++++++++++++++++++
 10 files changed, 55 insertions(+), 10 deletions(-)
 create mode 100644 llvm/test/Transforms/Inline/AMDGPU/amdgpu-inline-only-one-live-use.ll

diff --git a/llvm/include/llvm/Analysis/InlineAdvisor.h b/llvm/include/llvm/Analysis/InlineAdvisor.h
index 871a6e97861e29..700c3b0f18b8d5 100644
--- a/llvm/include/llvm/Analysis/InlineAdvisor.h
+++ b/llvm/include/llvm/Analysis/InlineAdvisor.h
@@ -371,7 +371,8 @@ getDevelopmentModeAdvisor(Module &M, ModuleAnalysisManager &MAM,
 /// using that cost, so we won't do so from this function. Return std::nullopt
 /// if inlining should not be attempted.
 std::optional<InlineCost>
-shouldInline(CallBase &CB, function_ref<InlineCost(CallBase &CB)> GetInlineCost,
+shouldInline(CallBase &CB, TargetTransformInfo &CalleeTTI,
+             function_ref<InlineCost(CallBase &CB)> GetInlineCost,
              OptimizationRemarkEmitter &ORE, bool EnableDeferral = true);
 
 /// Emit ORE message.
diff --git a/llvm/include/llvm/Analysis/InlineCost.h b/llvm/include/llvm/Analysis/InlineCost.h
index c5978ce54fc18b..22e0b1bc901a43 100644
--- a/llvm/include/llvm/Analysis/InlineCost.h
+++ b/llvm/include/llvm/Analysis/InlineCost.h
@@ -47,7 +47,6 @@ const int OptAggressiveThreshold = 250;
 int getInstrCost();
 const int IndirectCallThreshold = 100;
 const int LoopPenalty = 25;
-const int LastCallToStaticBonus = 15000;
 const int ColdccPenalty = 2000;
 /// Do not inline functions which allocate this many bytes on the stack
 /// when the caller is recursive.
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 5c5da5e06c1bff..64dc9aacd5c57b 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -352,6 +352,9 @@ class TargetTransformInfo {
   unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const;
   unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const;
 
+  /// \returns The bonus of inlining the last call to a static function.
+  int getInliningLastCallToStaticBonus() const;
+
   /// \returns A value to be added to the inlining threshold.
   unsigned adjustInliningThreshold(const CallBase *CB) const;
 
@@ -1840,6 +1843,7 @@ class TargetTransformInfo::Concept {
   virtual unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const = 0;
   virtual unsigned
   getInliningCostBenefitAnalysisProfitableMultiplier() const = 0;
+  virtual int getInliningLastCallToStaticBonus() const = 0;
   virtual unsigned adjustInliningThreshold(const CallBase *CB) = 0;
   virtual int getInlinerVectorBonusPercent() const = 0;
   virtual unsigned getCallerAllocaCost(const CallBase *CB,
@@ -2250,6 +2254,9 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
   unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const override {
     return Impl.getInliningCostBenefitAnalysisProfitableMultiplier();
   }
+  int getInliningLastCallToStaticBonus() const override {
+    return Impl.getInliningLastCallToStaticBonus();
+  }
   int getInlinerVectorBonusPercent() const override {
     return Impl.getInlinerVectorBonusPercent();
   }
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 3d0140ad7ad7a3..55219915bc3728 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -74,6 +74,11 @@ class TargetTransformInfoImplBase {
   unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const {
     return 8;
   }
+  int getInliningLastCallToStaticBonus() const {
+    // This is the value of InlineConstants::LastCallToStaticBonus before this
+    // function was introduced.
+    return 15000;
+  }
   unsigned adjustInliningThreshold(const CallBase *CB) const { return 0; }
   unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const {
     return 0;
diff --git a/llvm/lib/Analysis/InlineAdvisor.cpp b/llvm/lib/Analysis/InlineAdvisor.cpp
index c6907cb128bb47..56cad70ab7d9b7 100644
--- a/llvm/lib/Analysis/InlineAdvisor.cpp
+++ b/llvm/lib/Analysis/InlineAdvisor.cpp
@@ -151,9 +151,9 @@ std::optional<llvm::InlineCost> static getDefaultInlineAdvice(
     return FAM.getResult<TargetLibraryAnalysis>(F);
   };
 
+  Function &Callee = *CB.getCalledFunction();
+  auto &CalleeTTI = FAM.getResult<TargetIRAnalysis>(Callee);
   auto GetInlineCost = [&](CallBase &CB) {
-    Function &Callee = *CB.getCalledFunction();
-    auto &CalleeTTI = FAM.getResult<TargetIRAnalysis>(Callee);
     bool RemarksEnabled =
         Callee.getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled(
             DEBUG_TYPE);
@@ -161,7 +161,7 @@ std::optional<llvm::InlineCost> static getDefaultInlineAdvice(
                          GetBFI, PSI, RemarksEnabled ? &ORE : nullptr);
   };
   return llvm::shouldInline(
-      CB, GetInlineCost, ORE,
+      CB, CalleeTTI, GetInlineCost, ORE,
       Params.EnableDeferral.value_or(EnableInlineDeferral));
 }
 
@@ -247,7 +247,8 @@ bool InlineAdvisorAnalysis::Result::tryCreate(
 /// \p TotalSecondaryCost will be set to the estimated cost of inlining the
 /// caller if \p CB is suppressed for inlining.
 static bool
-shouldBeDeferred(Function *Caller, InlineCost IC, int &TotalSecondaryCost,
+shouldBeDeferred(Function *Caller, TargetTransformInfo &CalleeTTI,
+                 InlineCost IC, int &TotalSecondaryCost,
                  function_ref<InlineCost(CallBase &CB)> GetInlineCost) {
   // For now we only handle local or inline functions.
   if (!Caller->hasLocalLinkage() && !Caller->hasLinkOnceODRLinkage())
@@ -320,7 +321,7 @@ shouldBeDeferred(Function *Caller, InlineCost IC, int &TotalSecondaryCost,
   // be removed entirely.  We did not account for this above unless there
   // is only one caller of Caller.
   if (ApplyLastCallBonus)
-    TotalSecondaryCost -= InlineConstants::LastCallToStaticBonus;
+    TotalSecondaryCost -= CalleeTTI.getInliningLastCallToStaticBonus();
 
   // If InlineDeferralScale is negative, then ignore the cost of primary
   // inlining -- IC.getCost() multiplied by the number of callers to Caller.
@@ -374,7 +375,7 @@ void llvm::setInlineRemark(CallBase &CB, StringRef Message) {
 /// using that cost, so we won't do so from this function. Return std::nullopt
 /// if inlining should not be attempted.
 std::optional<InlineCost>
-llvm::shouldInline(CallBase &CB,
+llvm::shouldInline(CallBase &CB, TargetTransformInfo &CalleeTTI,
                    function_ref<InlineCost(CallBase &CB)> GetInlineCost,
                    OptimizationRemarkEmitter &ORE, bool EnableDeferral) {
   using namespace ore;
@@ -414,7 +415,7 @@ llvm::shouldInline(CallBase &CB,
 
   int TotalSecondaryCost = 0;
   if (EnableDeferral &&
-      shouldBeDeferred(Caller, IC, TotalSecondaryCost, GetInlineCost)) {
+      shouldBeDeferred(Caller, CalleeTTI, IC, TotalSecondaryCost, GetInlineCost)) {
     LLVM_DEBUG(dbgs() << "    NOT Inlining: " << CB
                       << " Cost = " << IC.getCost()
                       << ", outer Cost = " << TotalSecondaryCost << '\n');
diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index d2c329ba748e58..dd9ac910456ade 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -1943,7 +1943,7 @@ void InlineCostCallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) {
   // and the callsite.
   int SingleBBBonusPercent = 50;
   int VectorBonusPercent = TTI.getInlinerVectorBonusPercent();
-  int LastCallToStaticBonus = InlineConstants::LastCallToStaticBonus;
+  int LastCallToStaticBonus = TTI.getInliningLastCallToStaticBonus();
 
   // Lambda to set all the above bonus and bonus percentages to 0.
   auto DisallowAllBonuses = [&]() {
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 3dc29fc7cd77b1..8ab8a53b753112 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -228,6 +228,10 @@ TargetTransformInfo::getInliningCostBenefitAnalysisProfitableMultiplier()
   return TTIImpl->getInliningCostBenefitAnalysisProfitableMultiplier();
 }
 
+int TargetTransformInfo::getInliningLastCallToStaticBonus() const {
+  return TTIImpl->getInliningLastCallToStaticBonus();
+}
+
 unsigned
 TargetTransformInfo::adjustInliningThreshold(const CallBase *CB) const {
   return TTIImpl->adjustInliningThreshold(CB);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 3f4f42377d56ee..c27118e5b0b3e2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -1314,6 +1314,11 @@ static unsigned getCallArgsTotalAllocaSize(const CallBase *CB,
   return AllocaSize;
 }
 
+int GCNTTIImpl::getInliningLastCallToStaticBonus() const {
+  return BaseT::getInliningLastCallToStaticBonus() *
+         getInliningThresholdMultiplier();
+}
+
 unsigned GCNTTIImpl::adjustInliningThreshold(const CallBase *CB) const {
   unsigned Threshold = adjustInliningThresholdUsingCallee(CB, TLI, this);
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index 30da002376251c..022af501289af8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -243,6 +243,7 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
   bool areInlineCompatible(const Function *Caller,
                            const Function *Callee) const;
 
+  int getInliningLastCallToStaticBonus() const;
   unsigned getInliningThresholdMultiplier() const { return 11; }
   unsigned adjustInliningThreshold(const CallBase *CB) const;
   unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const;
diff --git a/llvm/test/Transforms/Inline/AMDGPU/amdgpu-inline-only-one-live-use.ll b/llvm/test/Transforms/Inline/AMDGPU/amdgpu-inline-only-one-live-use.ll
new file mode 100644
index 00000000000000..043b73e28b8937
--- /dev/null
+++ b/llvm/test/Transforms/Inline/AMDGPU/amdgpu-inline-only-one-live-use.ll
@@ -0,0 +1,22 @@
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=inline -inline-threshold=0 -debug-only=inline-cost %s -o - 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+; CHECK: Analyzing call of callee_not_only_one_live_use... (caller:caller)
+; CHECK: Cost: -30
+; CHECK: Analyzing call of callee_only_one_live_use... (caller:caller)
+; CHECK: Cost: -165030
+
+define internal void @callee_not_only_one_live_use() {
+  ret void
+}
+
+define internal void @callee_only_one_live_use() {
+  ret void
+}
+
+define void @caller() {
+  call void @callee_not_only_one_live_use()
+  call void @callee_not_only_one_live_use()
+  call void @callee_only_one_live_use()
+  ret void
+}



More information about the llvm-commits mailing list