[llvm] [Sample Profile] Check hot callsite threshold when inlining a function with a sample profile (PR #93286)
William Junda Huang via llvm-commits
llvm-commits at lists.llvm.org
Fri May 24 02:51:46 PDT 2024
https://github.com/huangjd created https://github.com/llvm/llvm-project/pull/93286
Currently if a callsite is hot as determined by the sample profile, it is unconditionally inlined barring invalid cases (such as recursion). Inline cost check should still apply because a function's hotness and its inline cost are two different things.
For example if a function is calling another very large function multiple times (at different code paths), the large function should not be inlined even if its hot.
>From 7fadab699144313c1cb39662734c1f16c309437a Mon Sep 17 00:00:00 2001
From: William Huang <williamjhuang at google.com>
Date: Fri, 24 May 2024 05:42:27 -0400
Subject: [PATCH] [Sample Profile] Check hot callsite threshold when inlining a
function with a sample profile
Currently if a callsite is hot as determined by the sample profile, it
is unconditionally inlined barring invalid cases (such as recursion).
Inline cost check should still apply because a function's hotness and
its inline cost are two different things.
For example if a function is calling another very large function
multiple times (at different code paths), the large function should not
be inlined even if its hot.
---
llvm/lib/Transforms/IPO/SampleProfile.cpp | 8 ++-
.../Inputs/inline-hot-callsite-threshold.prof | 3 +
.../inline-hot-callsite-threshold.ll | 61 +++++++++++++++++++
.../SampleProfile/pseudo-probe-inline.ll | 2 +-
llvm/test/Transforms/SampleProfile/remarks.ll | 4 +-
5 files changed, 72 insertions(+), 6 deletions(-)
create mode 100644 llvm/test/Transforms/SampleProfile/Inputs/inline-hot-callsite-threshold.prof
create mode 100644 llvm/test/Transforms/SampleProfile/inline-hot-callsite-threshold.ll
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 0920179fb76b7..10d6e2282ac2a 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -1391,10 +1391,12 @@ SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
return InlineCost::getAlways("preinliner");
}
- // For old FDO inliner, we inline the call site as long as cost is not
- // "Never". The cost-benefit check is done earlier.
+ // For old FDO inliner, we inline the call site if it is below hot threshold,
+ // even if the function is hot based on sample profile data. This is to
+ // prevent huge functions from being inlined.
if (!CallsitePrioritizedInline) {
- return InlineCost::get(Cost.getCost(), INT_MAX);
+ return InlineCost::get(Cost.getCost(),
+ Params.HotCallSiteThreshold.value_or(INT_MAX));
}
// Otherwise only use the cost from call analyzer, but overwite threshold with
diff --git a/llvm/test/Transforms/SampleProfile/Inputs/inline-hot-callsite-threshold.prof b/llvm/test/Transforms/SampleProfile/Inputs/inline-hot-callsite-threshold.prof
new file mode 100644
index 0000000000000..d1c0408210f49
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/Inputs/inline-hot-callsite-threshold.prof
@@ -0,0 +1,3 @@
+foo:100:100
+ 1: bar:100
+ 1:100
diff --git a/llvm/test/Transforms/SampleProfile/inline-hot-callsite-threshold.ll b/llvm/test/Transforms/SampleProfile/inline-hot-callsite-threshold.ll
new file mode 100644
index 0000000000000..0584e1b5ee679
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/inline-hot-callsite-threshold.ll
@@ -0,0 +1,61 @@
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-hot-callsite-threshold.prof -S -pass-remarks=sample-profile -hot-callsite-threshold=100 2>&1 | FileCheck %s
+
+; CHECK: remark: a.cc:6:12: 'bar' inlined into 'foo' to match profiling context with (cost={{.*}}, threshold=100)
+; CHECK: define dso_local noundef i32 @foo(i32 noundef %0)
+; CHECK-NOT: %2 = tail call noundef i32 @bar(i32 noundef %0)
+; CHECK-NEXT: %2 = icmp sgt i32 %0, 1
+; CHECK-NEXT: br i1 %2, label %3, label %bar.exit
+
+; Manually lower cost threshold for hot function inlining, so that the function
+; is not inlined even profile indicates it as hot.
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-hot-callsite-threshold.prof -S -pass-remarks=sample-profile -hot-callsite-threshold=1 2>&1 | FileCheck %s --check-prefix=COST
+
+; COST-NOT: remark
+; COST: define dso_local noundef i32 @foo(i32 noundef %0)
+; COST-NEXT: %2 = tail call noundef i32 @bar(i32 noundef %0)
+
+define dso_local noundef i32 @bar(i32 noundef %0) #0 !dbg !10 {
+ %2 = icmp sgt i32 %0, 1
+ br i1 %2, label %3, label %15
+3: ; preds = %1
+ %4 = add nsw i32 %0, -2
+ %5 = mul i32 %4, %4
+ %6 = add i32 %5, %0
+ %7 = zext nneg i32 %4 to i33
+ %8 = add nsw i32 %0, -3
+ %9 = zext i32 %8 to i33
+ %10 = mul i33 %7, %9
+ %11 = lshr i33 %10, 1
+ %12 = trunc nuw i33 %11 to i32
+ %13 = xor i32 %12, -1
+ %14 = add i32 %6, %13
+ br label %15
+15: ; preds = %3, %1
+ %16 = phi i32 [ 0, %1 ], [ %14, %3 ]
+ ret i32 %16
+}
+
+define dso_local noundef i32 @foo(i32 noundef %0) #1 !dbg !20 {
+ %2 = tail call noundef i32 @bar(i32 noundef %0), !dbg !24
+ ret i32 %2
+}
+
+attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable "use-sample-profile" }
+attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable "use-sample-profile" }
+attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug)
+!1 = !DIFile(filename: "a.cc", directory: ".")
+!2 = !{i32 2, !"Dwarf Version", i32 4}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!10 = distinct !DISubprogram(name: "bar", linkageName: "bar", scope: !1, file: !1, line: 1, type: !12, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, unit: !0)
+!11 = !DIFile(filename: "a.cc", directory: ".")
+!12 = !DISubroutineType(types: !13)
+!13 = !{!14, !14}
+!14 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!20 = distinct !DISubprogram(name: "foo", linkageName: "foo", scope: !11, file: !11, line: 5, type: !12, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, unit: !0)
+!23 = !DILocation(line: 0, scope: !20)
+!24 = !DILocation(line: 6, column: 12, scope: !20)
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll
index 18cbd857d97bb..2cd9abf0e11e9 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll
@@ -98,7 +98,7 @@ if.end:
;YAML-NEXT: - String: '(cost='
;YAML-NEXT: - Cost: '15'
;YAML-NEXT: - String: ', threshold='
-;YAML-NEXT: - Threshold: '2147483647'
+;YAML-NEXT: - Threshold: '3000'
;YAML-NEXT: - String: ')'
;YAML-NEXT: - String: ' at callsite '
;YAML-NEXT: - String: foo
diff --git a/llvm/test/Transforms/SampleProfile/remarks.ll b/llvm/test/Transforms/SampleProfile/remarks.ll
index 997e02bb5b544..9c0143ae65ca7 100644
--- a/llvm/test/Transforms/SampleProfile/remarks.ll
+++ b/llvm/test/Transforms/SampleProfile/remarks.ll
@@ -22,7 +22,7 @@
; We are expecting foo() to be inlined in main() (almost all the cycles are
; spent inside foo).
-; CHECK: remark: remarks.cc:13:21: '_Z3foov' inlined into 'main' to match profiling context with (cost=130, threshold=2147483647) at callsite main:0:21;
+; CHECK: remark: remarks.cc:13:21: '_Z3foov' inlined into 'main' to match profiling context with (cost=130, threshold=3000) at callsite main:0:21;
; CHECK: remark: remarks.cc:9:19: 'rand' inlined into 'main' to match profiling context with (cost=always): always inline attribute at callsite _Z3foov:6:19 @ main:0:21;
; The back edge for the loop is the hottest edge in the loop subgraph.
@@ -51,7 +51,7 @@
;YAML-NEXT: - String: '(cost='
;YAML-NEXT: - Cost: '130'
;YAML-NEXT: - String: ', threshold='
-;YAML-NEXT: - Threshold: '2147483647'
+;YAML-NEXT: - Threshold: '3000'
;YAML-NEXT: - String: ')'
;YAML-NEXT: - String: ' at callsite '
;YAML-NEXT: - String: main
More information about the llvm-commits
mailing list