[llvm] [Inline] Accumulate the cost of the inlined function to the new call site (PR #111104)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 4 00:21:56 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-analysis
Author: DianQK (DianQK)
<details>
<summary>Changes</summary>
Fixes #<!-- -->111102.
Consider the following IR, the `call void @<!-- -->bar` is a local host call site:
```llvm
@<!-- -->define void foo() {
...
loop:
call void @<!-- -->bar()
...
}
define void @<!-- -->bar() {
...
call void @<!-- -->baz()
...
}
define void @<!-- -->baz() {
...
}
```
If `foo` can inline `baz` after inlined `bar`, I think it can also inline a `bar` that has inlined `baz`. With this in mind, I accumulated the previous cost into the new call site, which is a linearly increasing limit.
I did some experimenting on [DianQK/perf/inline-extra-cost](https://llvm-compile-time-tracker.com/?config=Overview&stat=instructions%3Au&remote=DianQK) and the current thresholds look OK. I'm looking forward to getting some feedback before modifying this threshold to an option.
---
Full diff: https://github.com/llvm/llvm-project/pull/111104.diff
8 Files Affected:
- (modified) llvm/include/llvm/Analysis/InlineAdvisor.h (+9-3)
- (modified) llvm/include/llvm/Analysis/InlineCost.h (+3)
- (modified) llvm/lib/Analysis/InlineAdvisor.cpp (+4-2)
- (modified) llvm/lib/Analysis/InlineCost.cpp (+5)
- (modified) llvm/lib/Transforms/IPO/Inliner.cpp (+15)
- (modified) llvm/lib/Transforms/IPO/ModuleInliner.cpp (+18-1)
- (modified) llvm/test/Transforms/Inline/inline-history-noinline.ll (+1-1)
- (added) llvm/test/Transforms/Inline/inline-hot-callsite-limit.ll (+109)
``````````diff
diff --git a/llvm/include/llvm/Analysis/InlineAdvisor.h b/llvm/include/llvm/Analysis/InlineAdvisor.h
index 871a6e97861e29..2880553817ebd8 100644
--- a/llvm/include/llvm/Analysis/InlineAdvisor.h
+++ b/llvm/include/llvm/Analysis/InlineAdvisor.h
@@ -74,7 +74,8 @@ class InlineAdvisor;
class InlineAdvice {
public:
InlineAdvice(InlineAdvisor *Advisor, CallBase &CB,
- OptimizationRemarkEmitter &ORE, bool IsInliningRecommended);
+ OptimizationRemarkEmitter &ORE, bool IsInliningRecommended,
+ std::optional<int> InliningCost = std::nullopt);
InlineAdvice(InlineAdvice &&) = delete;
InlineAdvice(const InlineAdvice &) = delete;
@@ -108,6 +109,7 @@ class InlineAdvice {
/// Get the inlining recommendation.
bool isInliningRecommended() const { return IsInliningRecommended; }
+ std::optional<int> inliningCost() const { return InliningCost; }
const DebugLoc &getOriginalCallSiteDebugLoc() const { return DLoc; }
const BasicBlock *getOriginalCallSiteBasicBlock() const { return Block; }
@@ -129,6 +131,7 @@ class InlineAdvice {
const BasicBlock *const Block;
OptimizationRemarkEmitter &ORE;
const bool IsInliningRecommended;
+ const std::optional<int> InliningCost;
private:
void markRecorded() {
@@ -145,8 +148,11 @@ class DefaultInlineAdvice : public InlineAdvice {
DefaultInlineAdvice(InlineAdvisor *Advisor, CallBase &CB,
std::optional<InlineCost> OIC,
OptimizationRemarkEmitter &ORE, bool EmitRemarks = true)
- : InlineAdvice(Advisor, CB, ORE, OIC.has_value()), OriginalCB(&CB),
- OIC(OIC), EmitRemarks(EmitRemarks) {}
+ : InlineAdvice(Advisor, CB, ORE, OIC.has_value(),
+ OIC && OIC->isVariable()
+ ? std::optional<int>(OIC->getCost())
+ : std::nullopt),
+ OriginalCB(&CB), OIC(OIC), EmitRemarks(EmitRemarks) {}
private:
void recordUnsuccessfulInliningImpl(const InlineResult &Result) override;
diff --git a/llvm/include/llvm/Analysis/InlineCost.h b/llvm/include/llvm/Analysis/InlineCost.h
index c5978ce54fc18b..1190308dba4193 100644
--- a/llvm/include/llvm/Analysis/InlineCost.h
+++ b/llvm/include/llvm/Analysis/InlineCost.h
@@ -58,6 +58,9 @@ const uint64_t MaxSimplifiedDynamicAllocaToInline = 65536;
const char FunctionInlineCostMultiplierAttributeName[] =
"function-inline-cost-multiplier";
+/// Cost of call site accumulation added after inlining.
+const char FunctionInlineAdditionalCostAttributeName[] =
+ "function-inline-additional-cost";
const char MaxInlineStackSizeAttributeName[] = "inline-max-stacksize";
} // namespace InlineConstants
diff --git a/llvm/lib/Analysis/InlineAdvisor.cpp b/llvm/lib/Analysis/InlineAdvisor.cpp
index c6907cb128bb47..9706dd212d7bc5 100644
--- a/llvm/lib/Analysis/InlineAdvisor.cpp
+++ b/llvm/lib/Analysis/InlineAdvisor.cpp
@@ -175,10 +175,12 @@ DefaultInlineAdvisor::getAdviceImpl(CallBase &CB) {
InlineAdvice::InlineAdvice(InlineAdvisor *Advisor, CallBase &CB,
OptimizationRemarkEmitter &ORE,
- bool IsInliningRecommended)
+ bool IsInliningRecommended,
+ std::optional<int> InliningCost)
: Advisor(Advisor), Caller(CB.getCaller()), Callee(CB.getCalledFunction()),
DLoc(CB.getDebugLoc()), Block(CB.getParent()), ORE(ORE),
- IsInliningRecommended(IsInliningRecommended) {}
+ IsInliningRecommended(IsInliningRecommended), InliningCost(InliningCost) {
+}
void InlineAdvice::recordInlineStatsIfNeeded() {
if (Advisor->ImportedFunctionsStats)
diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index d2c329ba748e58..e45423f2130d8a 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -1017,6 +1017,11 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
InlineConstants::FunctionInlineCostMultiplierAttributeName))
Cost *= *AttrCostMult;
+ if (std::optional<int> AttrAdditonalCost = getStringFnAttrAsInt(
+ CandidateCall,
+ InlineConstants::FunctionInlineAdditionalCostAttributeName))
+ Cost += *AttrAdditonalCost;
+
if (std::optional<int> AttrThreshold =
getStringFnAttrAsInt(CandidateCall, "function-inline-threshold"))
Threshold = *AttrThreshold;
diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp
index 23ee23eb047f58..977bbcd35f73e8 100644
--- a/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -376,6 +376,11 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
getStringFnAttrAsInt(
*CB, InlineConstants::FunctionInlineCostMultiplierAttributeName)
.value_or(1);
+ int CBInliningAdditionalCost =
+ getStringFnAttrAsInt(
+ *CB, InlineConstants::FunctionInlineAdditionalCostAttributeName)
+ .value_or(0);
+ std::optional<int> InliningCost = Advice->inliningCost();
// Setup the data structure used to plumb customization into the
// `InlineFunction` routine.
@@ -435,6 +440,16 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
InlineConstants::FunctionInlineCostMultiplierAttributeName,
itostr(CBCostMult * IntraSCCCostMultiplier));
ICB->addFnAttr(NewCBCostMult);
+ } else if (InliningCost && *InliningCost > 0) {
+ // Similar to hot call site thresholds that can cause Inliner to
+ // inline numerous functions causing compile time issues, a
+ // linear accumulator was created to mitigate the problem.
+ Attribute NewCBAdditionalCost = Attribute::get(
+ M.getContext(),
+ InlineConstants::FunctionInlineAdditionalCostAttributeName,
+ itostr(CBInliningAdditionalCost +
+ (*InliningCost - CBInliningAdditionalCost) / 16));
+ ICB->addFnAttr(NewCBAdditionalCost);
}
}
}
diff --git a/llvm/lib/Transforms/IPO/ModuleInliner.cpp b/llvm/lib/Transforms/IPO/ModuleInliner.cpp
index dbc733826944b9..c196be9d6dd163 100644
--- a/llvm/lib/Transforms/IPO/ModuleInliner.cpp
+++ b/llvm/lib/Transforms/IPO/ModuleInliner.cpp
@@ -225,6 +225,11 @@ PreservedAnalyses ModuleInlinerPass::run(Module &M,
Advice->recordUnattemptedInlining();
continue;
}
+ int CBInliningAdditionalCost =
+ getStringFnAttrAsInt(
+ *CB, InlineConstants::FunctionInlineAdditionalCostAttributeName)
+ .value_or(0);
+ std::optional<int> InliningCost = Advice->inliningCost();
// Setup the data structure used to plumb customization into the
// `InlineFunction` routine.
@@ -265,8 +270,20 @@ PreservedAnalyses ModuleInlinerPass::run(Module &M,
NewCallee = ICB->getCalledFunction();
}
if (NewCallee)
- if (!NewCallee->isDeclaration())
+ if (!NewCallee->isDeclaration()) {
Calls->push({ICB, NewHistoryID});
+ if (InliningCost && *InliningCost > 0) {
+ // Similar to hot call site thresholds that can cause Inliner to
+ // inline numerous functions causing compile time issues, a linear
+ // accumulator was created to mitigate the problem.
+ Attribute NewCBAdditionalCost = Attribute::get(
+ M.getContext(),
+ InlineConstants::FunctionInlineAdditionalCostAttributeName,
+ itostr(CBInliningAdditionalCost +
+ (*InliningCost - CBInliningAdditionalCost) / 16));
+ ICB->addFnAttr(NewCBAdditionalCost);
+ }
+ }
}
}
diff --git a/llvm/test/Transforms/Inline/inline-history-noinline.ll b/llvm/test/Transforms/Inline/inline-history-noinline.ll
index 742bd25ecd9bb9..fbe633fc3c797b 100644
--- a/llvm/test/Transforms/Inline/inline-history-noinline.ll
+++ b/llvm/test/Transforms/Inline/inline-history-noinline.ll
@@ -29,4 +29,4 @@ define internal void @a() {
ret void
}
-; CHECK: [[NOINLINE]] = { noinline }
+; CHECK: [[NOINLINE]] = { noinline {{.*}}}
diff --git a/llvm/test/Transforms/Inline/inline-hot-callsite-limit.ll b/llvm/test/Transforms/Inline/inline-hot-callsite-limit.ll
new file mode 100644
index 00000000000000..a1730d76cd547c
--- /dev/null
+++ b/llvm/test/Transforms/Inline/inline-hot-callsite-limit.ll
@@ -0,0 +1,109 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; This tests that a hot callsite gets the (higher) inlinehint-threshold even without
+; without inline hints and gets inlined because the cost is less than
+; RUN: opt < %s -passes=inline -inline-threshold=0 -locally-hot-callsite-threshold=30 -S | FileCheck %s
+; RUN: opt < %s -passes=module-inline -inline-threshold=0 -locally-hot-callsite-threshold=30 -S | FileCheck %s
+
+; Due to the hot call site, foo0 inlined foo1, foo2, and foo3,
+; but foo4 is not inlined due to the accumulated cost.
+
+declare void @bar(ptr)
+
+define void @foo0(ptr %p) {
+; CHECK-LABEL: define void @foo0(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT: [[HEADER:.*:]]
+; CHECK-NEXT: [[I_I2:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[I_I1:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[I_I:%.*]] = alloca i32, align 4
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[I_I]])
+; CHECK-NEXT: call void @bar(ptr [[I_I]])
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[I_I1]])
+; CHECK-NEXT: call void @bar(ptr [[I_I1]])
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[I_I2]])
+; CHECK-NEXT: call void @bar(ptr [[I_I2]])
+; CHECK-NEXT: call void @foo4(ptr [[P]]) #[[ATTR1:[0-9]+]]
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[I_I2]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[I_I1]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[I_I]])
+; CHECK-NEXT: br label %[[LOOP]]
+;
+header:
+ br label %loop
+
+loop:
+ call void @foo1(ptr %p)
+ br label %loop
+}
+
+define void @foo1(ptr %p) {
+; CHECK-LABEL: define void @foo1(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
+; CHECK-NEXT: call void @bar(ptr [[I]])
+; CHECK-NEXT: call void @foo2(ptr [[P]])
+; CHECK-NEXT: ret void
+;
+ %i = alloca i32
+ call void @bar(ptr %i)
+ call void @foo2(ptr %p)
+ ret void
+}
+
+define void @foo2(ptr %p) {
+; CHECK-LABEL: define void @foo2(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
+; CHECK-NEXT: call void @bar(ptr [[I]])
+; CHECK-NEXT: call void @foo3(ptr [[P]])
+; CHECK-NEXT: ret void
+;
+ %i = alloca i32
+ call void @bar(ptr %i)
+ call void @foo3(ptr %p)
+ ret void
+}
+
+define void @foo3(ptr %p) {
+; CHECK-LABEL: define void @foo3(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
+; CHECK-NEXT: call void @bar(ptr [[I]])
+; CHECK-NEXT: call void @foo4(ptr [[P]])
+; CHECK-NEXT: ret void
+;
+ %i = alloca i32
+ call void @bar(ptr %i)
+ call void @foo4(ptr %p)
+ ret void
+}
+
+define void @foo4(ptr %p) {
+; CHECK-LABEL: define void @foo4(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
+; CHECK-NEXT: call void @bar(ptr [[I]])
+; CHECK-NEXT: call void @foo5(ptr [[P]])
+; CHECK-NEXT: ret void
+;
+ %i = alloca i32
+ call void @bar(ptr %i)
+ call void @foo5(ptr %p)
+ ret void
+}
+
+define void @foo5(ptr %p) {
+; CHECK-LABEL: define void @foo5(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
+; CHECK-NEXT: call void @bar(ptr [[I]])
+; CHECK-NEXT: call void @bar(ptr [[I]])
+; CHECK-NEXT: ret void
+;
+ %i = alloca i32
+ call void @bar(ptr %i)
+ call void @bar(ptr %i)
+ ret void
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/111104
More information about the llvm-commits
mailing list