[llvm] 838701a - MemProf: Add minimum count threshold for inlining of promoted calls (#148001)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 10 13:48:20 PDT 2025
Author: Teresa Johnson
Date: 2025-07-10T13:48:16-07:00
New Revision: 838701a5403efbaf6e25254377a6f033acee6681
URL: https://github.com/llvm/llvm-project/commit/838701a5403efbaf6e25254377a6f033acee6681
DIFF: https://github.com/llvm/llvm-project/commit/838701a5403efbaf6e25254377a6f033acee6681.diff
LOG: MemProf: Add minimum count threshold for inlining of promoted calls (#148001)
Allow users to set the minimum absolute count for inlining of indirect
calls promoted during cloning. This is primarily meant to enable
generation of synthetic vp metadata introduced in PR141164 when
profiling memprof-optimized binaries.
Added:
Modified:
llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
llvm/test/ThinLTO/X86/memprof-icp-recursive.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
index c0f84456d2b27..3bf1ebb226d37 100644
--- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -181,6 +181,12 @@ static cl::opt<bool> AllowRecursiveContexts(
"memprof-allow-recursive-contexts", cl::init(true), cl::Hidden,
cl::desc("Allow cloning of contexts having recursive cycles"));
+// Set the minimum absolute count threshold for allowing inlining of indirect
+// calls promoted during cloning.
+static cl::opt<unsigned> MemProfICPNoInlineThreshold(
+ "memprof-icp-noinline-threshold", cl::init(2), cl::Hidden,
+ cl::desc("Minimum absolute count for promoted target to be inlinable"));
+
namespace llvm {
cl::opt<bool> EnableMemProfContextDisambiguation(
"enable-memprof-context-disambiguation", cl::init(false), cl::Hidden,
@@ -5573,6 +5579,15 @@ void MemProfContextDisambiguation::performICP(
.getCallee());
}
DirectCall.setCalledFunction(TargetToUse);
+ // During matching we generate synthetic VP metadata for indirect calls
+ // not already having any, from the memprof profile's callee GUIDs. If
+ // we subsequently promote and inline those callees, we currently lose
+ // the ability to generate this synthetic VP metadata. Optionally apply
+ // a noinline attribute to promoted direct calls, where the threshold is
+ // set to capture synthetic VP metadata targets which get a count of 1.
+ if (MemProfICPNoInlineThreshold &&
+ Candidate.Count < MemProfICPNoInlineThreshold)
+ DirectCall.setIsNoInline();
ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CBClone)
<< ore::NV("Call", CBClone) << " in clone "
<< ore::NV("Caller", CBClone->getFunction())
diff --git a/llvm/test/ThinLTO/X86/memprof-icp-recursive.ll b/llvm/test/ThinLTO/X86/memprof-icp-recursive.ll
index f8dcd80d4e141..3394efd52a3ba 100644
--- a/llvm/test/ThinLTO/X86/memprof-icp-recursive.ll
+++ b/llvm/test/ThinLTO/X86/memprof-icp-recursive.ll
@@ -54,7 +54,40 @@
; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=STATS \
; RUN: --check-prefix=REMARKS
-; RUN: llvm-dis %t.out.2.4.opt.bc -o - | FileCheck %s --check-prefix=IR
+; RUN: llvm-dis %t.out.2.4.opt.bc -o - | FileCheck %s --check-prefixes=IR,IR-INLINE
+
+;; Next, add a threshold to prevent inlining of the promoted calls which have
+;; count 2 (the default threshold of 2 means they are inlinable by default).
+; RUN: llvm-lto2 run %t/main.o %t/foo.o -enable-memprof-context-disambiguation \
+; RUN: -memprof-icp-noinline-threshold=3 \
+; RUN: -enable-memprof-indirect-call-support=true \
+; RUN: -memprof-allow-recursive-callsites \
+; RUN: -supports-hot-cold-new \
+; RUN: -r=%t/foo.o,_Z3fooR2B0j,plx \
+; RUN: -r=%t/foo.o,_ZN2B03barEj, \
+; RUN: -r=%t/foo.o,_ZN1B3barEj, \
+; RUN: -r=%t/main.o,_Z3fooR2B0j, \
+; RUN: -r=%t/main.o,_Znwm, \
+; RUN: -r=%t/main.o,_ZdlPvm, \
+; RUN: -r=%t/main.o,_Z8externalPi, \
+; RUN: -r=%t/main.o,main,plx \
+; RUN: -r=%t/main.o,_ZN2B03barEj,plx \
+; RUN: -r=%t/main.o,_ZN1B3barEj,plx \
+; RUN: -r=%t/main.o,_ZTV1B,plx \
+; RUN: -r=%t/main.o,_ZTVN10__cxxabiv120__si_class_type_infoE,plx \
+; RUN: -r=%t/main.o,_ZTS1B,plx \
+; RUN: -r=%t/main.o,_ZTVN10__cxxabiv117__class_type_infoE,plx \
+; RUN: -r=%t/main.o,_ZTS2B0,plx \
+; RUN: -r=%t/main.o,_ZTI2B0,plx \
+; RUN: -r=%t/main.o,_ZTI1B,plx \
+; RUN: -r=%t/main.o,_ZTV2B0,plx \
+; RUN: -thinlto-threads=1 \
+; RUN: -memprof-verify-ccg -memprof-verify-nodes -stats \
+; RUN: -pass-remarks=. -save-temps \
+; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=STATS \
+; RUN: --check-prefix=REMARKS
+
+; RUN: llvm-dis %t.out.2.4.opt.bc -o - | FileCheck %s --check-prefixes=IR,IR-NOINLINE
; REMARKS: call in clone main assigned to call function clone _Z3fooR2B0j.memprof.1
; REMARKS: call in clone main assigned to call function clone _Z3fooR2B0j.memprof.1
@@ -98,12 +131,14 @@
; IR: %[[R1:[0-9]+]] = icmp eq ptr %0, @_ZN1B3barEj
; IR: br i1 %[[R1]], label %if.true.direct_targ, label %if.false.orig_indirect
; IR: if.true.direct_targ:
-; IR: call {{.*}} @_Znwm(i64 noundef 4) #[[NOTCOLD:[0-9]+]]
+; IR-INLINE: call {{.*}} @_Znwm(i64 noundef 4) #[[NOTCOLD:[0-9]+]]
+; IR-NOINLINE: call {{.*}} @_ZN1B3barEj(ptr null, i32 0) #[[NOINLINE:[0-9]+]]
; IR: if.false.orig_indirect:
; IR: %[[R2:[0-9]+]] = icmp eq ptr %0, @_ZN2B03barEj
; IR: br i1 %[[R2]], label %if.true.direct_targ1, label %if.false.orig_indirect2
; IR: if.true.direct_targ1:
-; IR: call {{.*}} @_Znwm(i64 noundef 4) #[[NOTCOLD]]
+; IR-INLINE: call {{.*}} @_Znwm(i64 noundef 4) #[[NOTCOLD]]
+; IR-NOINLINE: call {{.*}} @_ZN2B03barEj(ptr null, i32 0) #[[NOINLINE]]
; IR: if.false.orig_indirect2:
; IR: call {{.*}} %0
@@ -114,17 +149,20 @@
; IR: %[[R3:[0-9]+]] = icmp eq ptr %0, @_ZN1B3barEj
; IR: br i1 %[[R3]], label %if.true.direct_targ, label %if.false.orig_indirect
; IR: if.true.direct_targ:
-; IR: call {{.*}} @_Znwm(i64 noundef 4) #[[COLD:[0-9]+]]
+; IR-INLINE: call {{.*}} @_Znwm(i64 noundef 4) #[[COLD:[0-9]+]]
+; IR-NOINLINE: call {{.*}} @_ZN1B3barEj.memprof.1(ptr null, i32 0) #[[NOINLINE]]
; IR: if.false.orig_indirect:
; IR: %[[R4:[0-9]+]] = icmp eq ptr %0, @_ZN2B03barEj
; IR: br i1 %[[R4]], label %if.true.direct_targ1, label %if.false.orig_indirect2
; IR: if.true.direct_targ1:
-; IR: call {{.*}} @_Znwm(i64 noundef 4) #[[COLD]]
+; IR-INLINE: call {{.*}} @_Znwm(i64 noundef 4) #[[COLD]]
+; IR-NOINLINE: call {{.*}} @_ZN2B03barEj.memprof.1(ptr null, i32 0) #[[NOINLINE]]
; IR: if.false.orig_indirect2:
; IR: call {{.*}} %0
-; IR: attributes #[[NOTCOLD]] = {{.*}} "memprof"="notcold"
-; IR: attributes #[[COLD]] = {{.*}} "memprof"="cold"
+; IR-INLINE: attributes #[[NOTCOLD]] = {{.*}} "memprof"="notcold"
+; IR-INLINE: attributes #[[COLD]] = {{.*}} "memprof"="cold"
+; IR-NOINLINE: attributes #[[NOINLINE]] = { noinline }
;--- foo.ll
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
More information about the llvm-commits
mailing list