[llvm] [TTI][AMDGPU] Allow targets to adjust `LastCallToStaticBonus` via `getInliningLastCallToStaticBonus` (PR #111311)
Shilei Tian via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 7 12:51:32 PDT 2024
https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/111311
>From dba75a5bf12020f893de0b155b24c316162cb2af Mon Sep 17 00:00:00 2001
From: Shilei Tian <shilei.tian at amd.com>
Date: Sun, 6 Oct 2024 16:16:47 -0400
Subject: [PATCH] [AMDGPU] Increase inline threshold when the callee only has
one live use
Currently we will not inline a large function even if it only has one live use.
This could significantly impact the performance because CSR spill is very
expensive. The goal of this PR is trying to force the inlining if there is only
one live use by adjusting the inlining threshold, which is a configurable
number. The default value is 15000, which borrows from
`InlineConstants::LastCallToStaticBonus`. I'm not sure if this is a good number,
and if this is the right way to do that. After making this change, the callee in
my local test case can finally be inlined, but the cost is still very close to
the threshold: `cost=14010, threshold=170775`.
Speaking of the test, how are we gonna test this? Do we want to include a giant
IR file?
Fixes SWDEV-471398.
---
.../llvm/Analysis/TargetTransformInfo.h | 7 +++++
.../llvm/Analysis/TargetTransformInfoImpl.h | 4 +++
llvm/lib/Analysis/InlineCost.cpp | 2 +-
llvm/lib/Analysis/TargetTransformInfo.cpp | 4 +++
.../AMDGPU/AMDGPUTargetTransformInfo.cpp | 9 +++++++
.../Target/AMDGPU/AMDGPUTargetTransformInfo.h | 1 +
.../AMDGPU/amdgpu-inline-only-one-live-use.ll | 26 +++++++++++++++++++
7 files changed, 52 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/Transforms/Inline/AMDGPU/amdgpu-inline-only-one-live-use.ll
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 89a85bc8a90864..30a20d388939cc 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -352,6 +352,9 @@ class TargetTransformInfo {
unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const;
unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const;
+ /// \returns The bonus of inlining the last call to a static function.
+ int getInliningLastCallToStaticBonus() const;
+
/// \returns A value to be added to the inlining threshold.
unsigned adjustInliningThreshold(const CallBase *CB) const;
@@ -1822,6 +1825,7 @@ class TargetTransformInfo::Concept {
virtual unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const = 0;
virtual unsigned
getInliningCostBenefitAnalysisProfitableMultiplier() const = 0;
+ virtual int getInliningLastCallToStaticBonus() const = 0;
virtual unsigned adjustInliningThreshold(const CallBase *CB) = 0;
virtual int getInlinerVectorBonusPercent() const = 0;
virtual unsigned getCallerAllocaCost(const CallBase *CB,
@@ -2225,6 +2229,9 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const override {
return Impl.getInliningCostBenefitAnalysisProfitableMultiplier();
}
+ int getInliningLastCallToStaticBonus() const override {
+ return Impl.getInliningLastCallToStaticBonus();
+ }
int getInlinerVectorBonusPercent() const override {
return Impl.getInlinerVectorBonusPercent();
}
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 50040dc8f6165b..1acb5c95b729aa 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -74,6 +74,10 @@ class TargetTransformInfoImplBase {
unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const {
return 8;
}
+ int getInliningLastCallToStaticBonus() const {
+ // This is same as InlineConstants::LastCallToStaticBonus.
+ return 15000;
+ }
unsigned adjustInliningThreshold(const CallBase *CB) const { return 0; }
unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const {
return 0;
diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index d2c329ba748e58..dd9ac910456ade 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -1943,7 +1943,7 @@ void InlineCostCallAnalyzer::updateThreshold(CallBase &Call, Function &Callee) {
// and the callsite.
int SingleBBBonusPercent = 50;
int VectorBonusPercent = TTI.getInlinerVectorBonusPercent();
- int LastCallToStaticBonus = InlineConstants::LastCallToStaticBonus;
+ int LastCallToStaticBonus = TTI.getInliningLastCallToStaticBonus();
// Lambda to set all the above bonus and bonus percentages to 0.
auto DisallowAllBonuses = [&]() {
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index b5195f764cbd1c..889aeae2fc1274 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -228,6 +228,10 @@ TargetTransformInfo::getInliningCostBenefitAnalysisProfitableMultiplier()
return TTIImpl->getInliningCostBenefitAnalysisProfitableMultiplier();
}
+int TargetTransformInfo::getInliningLastCallToStaticBonus() const {
+ return TTIImpl->getInliningLastCallToStaticBonus();
+}
+
unsigned
TargetTransformInfo::adjustInliningThreshold(const CallBase *CB) const {
return TTIImpl->adjustInliningThreshold(CB);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index d348166c2d9a04..47d3d09279f472 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -75,6 +75,10 @@ static cl::opt<size_t> InlineMaxBB(
cl::desc("Maximum number of BBs allowed in a function after inlining"
" (compile time constraint)"));
+static cl::opt<unsigned> InlineLastCallToStaticBonus(
+ "amdgpu-inline-last-call-to-static-bonus", cl::Hidden, cl::init(165000),
+ cl::desc("Threshold added when the callee only has one live use"));
+
static bool dependsOnLocalPhi(const Loop *L, const Value *Cond,
unsigned Depth = 0) {
const Instruction *I = dyn_cast<Instruction>(Cond);
@@ -1299,6 +1303,10 @@ static unsigned getCallArgsTotalAllocaSize(const CallBase *CB,
return AllocaSize;
}
+int GCNTTIImpl::getInliningLastCallToStaticBonus() const {
+ return InlineLastCallToStaticBonus;
+}
+
unsigned GCNTTIImpl::adjustInliningThreshold(const CallBase *CB) const {
unsigned Threshold = adjustInliningThresholdUsingCallee(CB, TLI, this);
@@ -1307,6 +1315,7 @@ unsigned GCNTTIImpl::adjustInliningThreshold(const CallBase *CB) const {
unsigned AllocaSize = getCallArgsTotalAllocaSize(CB, DL);
if (AllocaSize > 0)
Threshold += ArgAllocaCost;
+
return Threshold;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index 76785ee456a417..9f077535f781ca 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -240,6 +240,7 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
bool areInlineCompatible(const Function *Caller,
const Function *Callee) const;
+ int getInliningLastCallToStaticBonus() const;
unsigned getInliningThresholdMultiplier() const { return 11; }
unsigned adjustInliningThreshold(const CallBase *CB) const;
unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const;
diff --git a/llvm/test/Transforms/Inline/AMDGPU/amdgpu-inline-only-one-live-use.ll b/llvm/test/Transforms/Inline/AMDGPU/amdgpu-inline-only-one-live-use.ll
new file mode 100644
index 00000000000000..cf38ada57aced7
--- /dev/null
+++ b/llvm/test/Transforms/Inline/AMDGPU/amdgpu-inline-only-one-live-use.ll
@@ -0,0 +1,26 @@
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=inline -inline-threshold=0 -debug-only=inline-cost %s -o - 2>&1 | FileCheck --check-prefixes=CHECK,CHECK-DEFAULT %s
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes=inline -inline-threshold=0 -debug-only=inline-cost %s -amdgpu-inline-last-call-to-static-bonus=1024 -o - 2>&1 | FileCheck --check-prefixes=CHECK,CHECK-USER %s
+; REQUIRES: asserts
+
+; CHECK: Analyzing call of callee_not_only_one_live_use... (caller:caller)
+; CHECK: Cost: -30
+; CHECK: Threshold: 0
+; CHECK: Analyzing call of callee_only_one_live_use... (caller:caller)
+; CHECK-DEFAULT: Cost: -165030
+; CHECK-USER: Cost: -1054
+; CHECK: Threshold: 0
+
+define internal void @callee_not_only_one_live_use() {
+ ret void
+}
+
+define internal void @callee_only_one_live_use() {
+ ret void
+}
+
+define void @caller() {
+ call void @callee_not_only_one_live_use()
+ call void @callee_not_only_one_live_use()
+ call void @callee_only_one_live_use()
+ ret void
+}
More information about the llvm-commits
mailing list