[llvm] MemProf: Add minimum count threshold for inlining of promoted calls (PR #148001)

via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 10 09:59:39 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-lto

Author: Teresa Johnson (teresajohnson)

<details>
<summary>Changes</summary>

Allow users to set the minimum absolute count for inlining of indirect
calls promoted during cloning. This is primarily meant to enable
generation of synthetic vp metadata introduced in PR141164 when
profiling memprof-optimized binaries.


---
Full diff: https://github.com/llvm/llvm-project/pull/148001.diff


2 Files Affected:

- (modified) llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp (+9) 
- (modified) llvm/test/ThinLTO/X86/memprof-icp-recursive.ll (+44-7) 


``````````diff
diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
index c0f84456d2b27..3a7c3cd773b0a 100644
--- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -181,6 +181,12 @@ static cl::opt<bool> AllowRecursiveContexts(
     "memprof-allow-recursive-contexts", cl::init(true), cl::Hidden,
     cl::desc("Allow cloning of contexts having recursive cycles"));
 
+// Set the minimum absolute count threshold for inlining of indirect calls
+// promoted during cloning.
+static cl::opt<unsigned> ICPInlineMinimumCountThreshold(
+    "memprof-icp-inline-minimum-count-threshold", cl::init(0), cl::Hidden,
+    cl::desc("Minimum absolute count for promoted target to be inlinable"));
+
 namespace llvm {
 cl::opt<bool> EnableMemProfContextDisambiguation(
     "enable-memprof-context-disambiguation", cl::init(false), cl::Hidden,
@@ -5573,6 +5579,9 @@ void MemProfContextDisambiguation::performICP(
                                  .getCallee());
         }
         DirectCall.setCalledFunction(TargetToUse);
+        if (ICPInlineMinimumCountThreshold &&
+            Candidate.Count < ICPInlineMinimumCountThreshold)
+          DirectCall.setIsNoInline();
         ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CBClone)
                  << ore::NV("Call", CBClone) << " in clone "
                  << ore::NV("Caller", CBClone->getFunction())
diff --git a/llvm/test/ThinLTO/X86/memprof-icp-recursive.ll b/llvm/test/ThinLTO/X86/memprof-icp-recursive.ll
index f8dcd80d4e141..9e2f6e6130829 100644
--- a/llvm/test/ThinLTO/X86/memprof-icp-recursive.ll
+++ b/llvm/test/ThinLTO/X86/memprof-icp-recursive.ll
@@ -54,7 +54,39 @@
 ; RUN:  -o %t.out 2>&1 | FileCheck %s --check-prefix=STATS \
 ; RUN:  --check-prefix=REMARKS
 
-; RUN: llvm-dis %t.out.2.4.opt.bc -o - | FileCheck %s --check-prefix=IR
+; RUN: llvm-dis %t.out.2.4.opt.bc -o - | FileCheck %s --check-prefixes=IR,IR-INLINE
+
+;; Next, add a threshold to prevent inlining of small count promoted calls.
+; RUN: llvm-lto2 run %t/main.o %t/foo.o -enable-memprof-context-disambiguation \
+; RUN:	-memprof-icp-inline-minimum-count-threshold=3 \
+; RUN:	-enable-memprof-indirect-call-support=true \
+; RUN:  -memprof-allow-recursive-callsites \
+; RUN:  -supports-hot-cold-new \
+; RUN:  -r=%t/foo.o,_Z3fooR2B0j,plx \
+; RUN:  -r=%t/foo.o,_ZN2B03barEj, \
+; RUN:  -r=%t/foo.o,_ZN1B3barEj, \
+; RUN:  -r=%t/main.o,_Z3fooR2B0j, \
+; RUN:  -r=%t/main.o,_Znwm, \
+; RUN:  -r=%t/main.o,_ZdlPvm, \
+; RUN:  -r=%t/main.o,_Z8externalPi, \
+; RUN:  -r=%t/main.o,main,plx \
+; RUN:  -r=%t/main.o,_ZN2B03barEj,plx \
+; RUN:  -r=%t/main.o,_ZN1B3barEj,plx \
+; RUN:  -r=%t/main.o,_ZTV1B,plx \
+; RUN:  -r=%t/main.o,_ZTVN10__cxxabiv120__si_class_type_infoE,plx \
+; RUN:  -r=%t/main.o,_ZTS1B,plx \
+; RUN:  -r=%t/main.o,_ZTVN10__cxxabiv117__class_type_infoE,plx \
+; RUN:  -r=%t/main.o,_ZTS2B0,plx \
+; RUN:  -r=%t/main.o,_ZTI2B0,plx \
+; RUN:  -r=%t/main.o,_ZTI1B,plx \
+; RUN:  -r=%t/main.o,_ZTV2B0,plx \
+; RUN:	-thinlto-threads=1 \
+; RUN:  -memprof-verify-ccg -memprof-verify-nodes -stats \
+; RUN:  -pass-remarks=. -save-temps \
+; RUN:  -o %t.out 2>&1 | FileCheck %s --check-prefix=STATS \
+; RUN:  --check-prefix=REMARKS
+
+; RUN: llvm-dis %t.out.2.4.opt.bc -o - | FileCheck %s --check-prefixes=IR,IR-NOINLINE
 
 ; REMARKS: call in clone main assigned to call function clone _Z3fooR2B0j.memprof.1
 ; REMARKS: call in clone main assigned to call function clone _Z3fooR2B0j.memprof.1
@@ -98,12 +130,14 @@
 ; IR:   %[[R1:[0-9]+]] = icmp eq ptr %0, @_ZN1B3barEj
 ; IR:   br i1 %[[R1]], label %if.true.direct_targ, label %if.false.orig_indirect
 ; IR: if.true.direct_targ:
-; IR:   call {{.*}} @_Znwm(i64 noundef 4) #[[NOTCOLD:[0-9]+]]
+; IR-INLINE:   call {{.*}} @_Znwm(i64 noundef 4) #[[NOTCOLD:[0-9]+]]
+; IR-NOINLINE:   call {{.*}} @_ZN1B3barEj(ptr null, i32 0) #[[NOINLINE:[0-9]+]]
 ; IR: if.false.orig_indirect:
 ; IR:   %[[R2:[0-9]+]] = icmp eq ptr %0, @_ZN2B03barEj
 ; IR:   br i1 %[[R2]], label %if.true.direct_targ1, label %if.false.orig_indirect2
 ; IR: if.true.direct_targ1:
-; IR:   call {{.*}} @_Znwm(i64 noundef 4) #[[NOTCOLD]]
+; IR-INLINE:   call {{.*}} @_Znwm(i64 noundef 4) #[[NOTCOLD]]
+; IR-NOINLINE:   call {{.*}} @_ZN2B03barEj(ptr null, i32 0) #[[NOINLINE]]
 ; IR: if.false.orig_indirect2:
 ; IR:   call {{.*}} %0
 
@@ -114,17 +148,20 @@
 ; IR:   %[[R3:[0-9]+]] = icmp eq ptr %0, @_ZN1B3barEj
 ; IR:   br i1 %[[R3]], label %if.true.direct_targ, label %if.false.orig_indirect
 ; IR: if.true.direct_targ:
-; IR:   call {{.*}} @_Znwm(i64 noundef 4) #[[COLD:[0-9]+]]
+; IR-INLINE:   call {{.*}} @_Znwm(i64 noundef 4) #[[COLD:[0-9]+]]
+; IR-NOINLINE:   call {{.*}} @_ZN1B3barEj.memprof.1(ptr null, i32 0) #[[NOINLINE]]
 ; IR: if.false.orig_indirect:
 ; IR:   %[[R4:[0-9]+]] = icmp eq ptr %0, @_ZN2B03barEj
 ; IR:   br i1 %[[R4]], label %if.true.direct_targ1, label %if.false.orig_indirect2
 ; IR: if.true.direct_targ1:
-; IR:   call {{.*}} @_Znwm(i64 noundef 4) #[[COLD]]
+; IR-INLINE:   call {{.*}} @_Znwm(i64 noundef 4) #[[COLD]]
+; IR-NOINLINE:   call {{.*}} @_ZN2B03barEj.memprof.1(ptr null, i32 0) #[[NOINLINE]]
 ; IR: if.false.orig_indirect2:
 ; IR:   call {{.*}} %0
 
-; IR: attributes #[[NOTCOLD]] = {{.*}} "memprof"="notcold"
-; IR: attributes #[[COLD]] = {{.*}} "memprof"="cold"
+; IR-INLINE: attributes #[[NOTCOLD]] = {{.*}} "memprof"="notcold"
+; IR-INLINE: attributes #[[COLD]] = {{.*}} "memprof"="cold"
+; IR-NOINLINE: attributes #[[NOINLINE]] = { noinline }
 
 ;--- foo.ll
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"

``````````

</details>


https://github.com/llvm/llvm-project/pull/148001


More information about the llvm-commits mailing list