[llvm] [Sample Profile] Check hot callsite threshold when inlining a function with a sample profile (PR #93286)

via llvm-commits llvm-commits at lists.llvm.org
Fri May 24 02:52:04 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-pgo

Author: William Junda Huang (huangjd)

<details>
<summary>Changes</summary>


Currently if a callsite is hot as determined by the sample profile, it is unconditionally inlined barring invalid cases (such as recursion). Inline cost check should still apply because a function's hotness and its inline cost are two different things.
For example if a function is calling another very large function multiple times (at different code paths), the large function should not be inlined even if its hot.

---
Full diff: https://github.com/llvm/llvm-project/pull/93286.diff


5 Files Affected:

- (modified) llvm/lib/Transforms/IPO/SampleProfile.cpp (+5-3) 
- (added) llvm/test/Transforms/SampleProfile/Inputs/inline-hot-callsite-threshold.prof (+3) 
- (added) llvm/test/Transforms/SampleProfile/inline-hot-callsite-threshold.ll (+61) 
- (modified) llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll (+1-1) 
- (modified) llvm/test/Transforms/SampleProfile/remarks.ll (+2-2) 


``````````diff
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 0920179fb76b7..10d6e2282ac2a 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -1391,10 +1391,12 @@ SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
       return InlineCost::getAlways("preinliner");
   }
 
-  // For old FDO inliner, we inline the call site as long as cost is not
-  // "Never". The cost-benefit check is done earlier.
+  // For old FDO inliner, we inline the call site if it is below hot threshold,
+  // even if the function is hot based on sample profile data. This is to
+  // prevent huge functions from being inlined.
   if (!CallsitePrioritizedInline) {
-    return InlineCost::get(Cost.getCost(), INT_MAX);
+    return InlineCost::get(Cost.getCost(),
+                           Params.HotCallSiteThreshold.value_or(INT_MAX));
   }
 
   // Otherwise only use the cost from call analyzer, but overwite threshold with
diff --git a/llvm/test/Transforms/SampleProfile/Inputs/inline-hot-callsite-threshold.prof b/llvm/test/Transforms/SampleProfile/Inputs/inline-hot-callsite-threshold.prof
new file mode 100644
index 0000000000000..d1c0408210f49
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/Inputs/inline-hot-callsite-threshold.prof
@@ -0,0 +1,3 @@
+foo:100:100
+ 1: bar:100
+  1:100
diff --git a/llvm/test/Transforms/SampleProfile/inline-hot-callsite-threshold.ll b/llvm/test/Transforms/SampleProfile/inline-hot-callsite-threshold.ll
new file mode 100644
index 0000000000000..0584e1b5ee679
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/inline-hot-callsite-threshold.ll
@@ -0,0 +1,61 @@
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-hot-callsite-threshold.prof -S -pass-remarks=sample-profile -hot-callsite-threshold=100 2>&1 | FileCheck %s
+
+; CHECK: remark: a.cc:6:12: 'bar' inlined into 'foo' to match profiling context with (cost={{.*}}, threshold=100)
+; CHECK:     define dso_local noundef i32 @foo(i32 noundef %0)
+; CHECK-NOT:   %2 = tail call noundef i32 @bar(i32 noundef %0)
+; CHECK-NEXT:  %2 = icmp sgt i32 %0, 1
+; CHECK-NEXT:  br i1 %2, label %3, label %bar.exit
+
+; Manually lower cost threshold for hot function inlining, so that the function
+; is not inlined even profile indicates it as hot.
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-hot-callsite-threshold.prof -S -pass-remarks=sample-profile -hot-callsite-threshold=1 2>&1 | FileCheck %s --check-prefix=COST
+
+; COST-NOT:  remark
+; COST: define dso_local noundef i32 @foo(i32 noundef %0)
+; COST-NEXT: %2 = tail call noundef i32 @bar(i32 noundef %0)
+
+define dso_local noundef i32 @bar(i32 noundef %0) #0 !dbg !10 {
+  %2 = icmp sgt i32 %0, 1
+  br i1 %2, label %3, label %15
+3:                                                ; preds = %1
+  %4 = add nsw i32 %0, -2
+  %5 = mul i32 %4, %4
+  %6 = add i32 %5, %0
+  %7 = zext nneg i32 %4 to i33
+  %8 = add nsw i32 %0, -3
+  %9 = zext i32 %8 to i33
+  %10 = mul i33 %7, %9
+  %11 = lshr i33 %10, 1
+  %12 = trunc nuw i33 %11 to i32
+  %13 = xor i32 %12, -1
+  %14 = add i32 %6, %13
+  br label %15
+15:                                               ; preds = %3, %1
+  %16 = phi i32 [ 0, %1 ], [ %14, %3 ]
+  ret i32 %16
+}
+
+define dso_local noundef i32 @foo(i32 noundef %0) #1 !dbg !20 {
+  %2 = tail call noundef i32 @bar(i32 noundef %0), !dbg !24
+  ret i32 %2
+}
+
+attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable  "use-sample-profile" }
+attributes #1 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable  "use-sample-profile" }
+attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug)
+!1 = !DIFile(filename: "a.cc", directory: ".")
+!2 = !{i32 2, !"Dwarf Version", i32 4}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!10 = distinct !DISubprogram(name: "bar", linkageName: "bar", scope: !1, file: !1, line: 1, type: !12, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, unit: !0)
+!11 = !DIFile(filename: "a.cc", directory: ".")
+!12 = !DISubroutineType(types: !13)
+!13 = !{!14, !14}
+!14 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!20 = distinct !DISubprogram(name: "foo", linkageName: "foo", scope: !11, file: !11, line: 5, type: !12, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, unit: !0)
+!23 = !DILocation(line: 0, scope: !20)
+!24 = !DILocation(line: 6, column: 12, scope: !20)
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll
index 18cbd857d97bb..2cd9abf0e11e9 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll
@@ -98,7 +98,7 @@ if.end:
 ;YAML-NEXT:    - String:          '(cost='
 ;YAML-NEXT:    - Cost:            '15'
 ;YAML-NEXT:    - String:          ', threshold='
-;YAML-NEXT:    - Threshold:       '2147483647'
+;YAML-NEXT:    - Threshold:       '3000'
 ;YAML-NEXT:    - String:          ')'
 ;YAML-NEXT:    - String:          ' at callsite '
 ;YAML-NEXT:    - String:          foo
diff --git a/llvm/test/Transforms/SampleProfile/remarks.ll b/llvm/test/Transforms/SampleProfile/remarks.ll
index 997e02bb5b544..9c0143ae65ca7 100644
--- a/llvm/test/Transforms/SampleProfile/remarks.ll
+++ b/llvm/test/Transforms/SampleProfile/remarks.ll
@@ -22,7 +22,7 @@
 
 ; We are expecting foo() to be inlined in main() (almost all the cycles are
 ; spent inside foo).
-; CHECK: remark: remarks.cc:13:21: '_Z3foov' inlined into 'main' to match profiling context with (cost=130, threshold=2147483647) at callsite main:0:21;
+; CHECK: remark: remarks.cc:13:21: '_Z3foov' inlined into 'main' to match profiling context with (cost=130, threshold=3000) at callsite main:0:21;
 ; CHECK: remark: remarks.cc:9:19: 'rand' inlined into 'main' to match profiling context with (cost=always): always inline attribute at callsite _Z3foov:6:19 @ main:0:21;
 
 ; The back edge for the loop is the hottest edge in the loop subgraph.
@@ -51,7 +51,7 @@
 ;YAML-NEXT:    - String:          '(cost='
 ;YAML-NEXT:    - Cost:            '130'
 ;YAML-NEXT:    - String:          ', threshold='
-;YAML-NEXT:    - Threshold:       '2147483647'
+;YAML-NEXT:    - Threshold:       '3000'
 ;YAML-NEXT:    - String:          ')'
 ;YAML-NEXT:    - String:          ' at callsite '
 ;YAML-NEXT:    - String:          main

``````````

</details>


https://github.com/llvm/llvm-project/pull/93286


More information about the llvm-commits mailing list