[llvm] [ICP] Add a few tunings to indirect-call-promotion (PR #149892)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 21 13:53:47 PDT 2025
https://github.com/xur-llvm updated https://github.com/llvm/llvm-project/pull/149892
>From 93af9449e3bebff17323a480323ee025c766fafe Mon Sep 17 00:00:00 2001
From: Rong Xu <xur at google.com>
Date: Mon, 21 Jul 2025 11:10:05 -0700
Subject: [PATCH 1/2] [PSI] Ensure hot and could counter thresholds are
distinct
If hot and cold thresholds are identical, both isColdCount() and
isHotCount() can return true due to inclusive checks in these
functions. This is undesirable.
This patch ensures distinct hot and cold counter thresholds.
An alternative fix is to make the check in isColdCount() exclusive.
But that requires many test case changes.
---
llvm/lib/Analysis/ProfileSummaryInfo.cpp | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Analysis/ProfileSummaryInfo.cpp b/llvm/lib/Analysis/ProfileSummaryInfo.cpp
index e8d4e37a4eb7e..f1c3155f2f141 100644
--- a/llvm/lib/Analysis/ProfileSummaryInfo.cpp
+++ b/llvm/lib/Analysis/ProfileSummaryInfo.cpp
@@ -121,8 +121,18 @@ void ProfileSummaryInfo::computeThresholds() {
ProfileSummaryBuilder::getHotCountThreshold(DetailedSummary);
ColdCountThreshold =
ProfileSummaryBuilder::getColdCountThreshold(DetailedSummary);
- assert(ColdCountThreshold <= HotCountThreshold &&
- "Cold count threshold cannot exceed hot count threshold!");
+ // When the hot and cold thresholds are identical, we would classify
+ // a count value as both hot and cold since we are doing an inclusive check
+ // (see ::is{Hot|Cold}Count(). To avoid this undesirable overlap, ensure the
+ // thresholds are distinct.
+ if (HotCountThreshold == ColdCountThreshold) {
+ if (ColdCountThreshold > 0)
+ (*ColdCountThreshold)--;
+ else
+ (*HotCountThreshold)++;
+ }
+ assert(ColdCountThreshold < HotCountThreshold &&
+ "Cold count threshold should be less than hot count threshold!");
if (!hasPartialSampleProfile() || !ScalePartialSampleProfileWorkingSetSize) {
HasHugeWorkingSetSize =
HotEntry.NumCounts > ProfileSummaryHugeWorkingSetSizeThreshold;
>From ae13963fbc0fe3e3c6f76b33f08ca6db9bbda9a1 Mon Sep 17 00:00:00 2001
From: Rong Xu <xur at google.com>
Date: Mon, 21 Jul 2025 11:24:42 -0700
Subject: [PATCH 2/2] [ICP] Add a few tunings to indirect-call-promtion
Indirect-call promotion (ICP) has been adjusted with the following
tunings, both of which are enabled by default:
(1) Candidate functions can be now ICP'd even if only a declaration
is present.
(2) All non-cold candidate functions are now considered by ICP.
Previously, only hot targets were considered.
These changes are expected to improve performance, with some large
Google benchmarks showing up to a 0.3% improvement.
---
.../Instrumentation/IndirectCallPromotion.cpp | 39 +++++++++++++++++--
llvm/test/ThinLTO/X86/memprof-icp.ll | 1 +
2 files changed, 37 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
index 854db0ff6864c..63e4a8d958476 100644
--- a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
+++ b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
@@ -80,6 +80,19 @@ static cl::opt<unsigned>
ICPCSSkip("icp-csskip", cl::init(0), cl::Hidden,
cl::desc("Skip Callsite up to this number for this compilation"));
+// ICP the candidate function even when only a declaration is present.
+static cl::opt<bool> ICPAllowDeclOnly(
+ "icp-allow-decl-only", cl::init(true), cl::Hidden,
+ cl::desc("Promote the target candidate even when the defintion "
+ " is not available"));
+
+// ICP all non-cold candidate functions. When it's false, only ICP hot
+// functions.
+static cl::opt<bool>
+ ICPAllowWarmFunc("icp-allow-warm-func", cl::init(true), cl::Hidden,
+ cl::desc("Promote the target candidate even if it is not "
+ "hot"));
+
// Set if the pass is called in LTO optimization. The difference for LTO mode
// is the pass won't prefix the source module name to the internal linkage
// symbols.
@@ -477,7 +490,7 @@ IndirectCallPromoter::getPromotionCandidatesForCallSite(
// the case where the symbol is globally dead in the binary and removed by
// ThinLTO.
Function *TargetFunction = Symtab->getFunction(Target);
- if (TargetFunction == nullptr || TargetFunction->isDeclaration()) {
+ if (TargetFunction == nullptr) {
LLVM_DEBUG(dbgs() << " Not promote: Cannot find the target\n");
ORE.emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToFindTarget", &CB)
@@ -486,6 +499,17 @@ IndirectCallPromoter::getPromotionCandidatesForCallSite(
});
break;
}
+ if (!ICPAllowDeclOnly && TargetFunction->isDeclaration()) {
+ LLVM_DEBUG(
+ dbgs() << " Not promote: target definition is not available\n");
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "NoTargetDef", &CB)
+ << "Do not promote indirect call: target with md5sum "
+ << ore::NV("target md5sum", Target)
+ << " definition not available";
+ });
+ break;
+ }
const char *Reason = nullptr;
if (!isLegalToPromote(CB, TargetFunction, &Reason)) {
@@ -822,9 +846,18 @@ bool IndirectCallPromoter::processFunction(ProfileSummaryInfo *PSI) {
uint64_t TotalCount;
auto ICallProfDataRef = ICallAnalysis.getPromotionCandidatesForInstruction(
CB, TotalCount, NumCandidates);
- if (!NumCandidates ||
- (PSI && PSI->hasProfileSummary() && !PSI->isHotCount(TotalCount)))
+ if (!NumCandidates)
continue;
+ if (PSI && PSI->hasProfileSummary()) {
+ // Don't perform if ICPAllowWarmFunc is true AND the count is cold, OR
+ // ICPAllowWarmFunc is false AND the count is NOT hot.
+ if ((ICPAllowWarmFunc && PSI->isColdCount(TotalCount)) ||
+ (!ICPAllowWarmFunc && !PSI->isHotCount(TotalCount))) {
+ LLVM_DEBUG(dbgs() << " TotalCount=" << TotalCount
+ << " is not large enough.\n");
+ continue;
+ }
+ }
auto PromotionCandidates = getPromotionCandidatesForCallSite(
*CB, ICallProfDataRef, TotalCount, NumCandidates);
diff --git a/llvm/test/ThinLTO/X86/memprof-icp.ll b/llvm/test/ThinLTO/X86/memprof-icp.ll
index dbc532ee52828..87e5435a4229d 100644
--- a/llvm/test/ThinLTO/X86/memprof-icp.ll
+++ b/llvm/test/ThinLTO/X86/memprof-icp.ll
@@ -229,6 +229,7 @@
; RUN: llvm-lto2 run %t/main.o %t/foo.o -enable-memprof-context-disambiguation \
; RUN: -import-instr-limit=0 \
; RUN: -memprof-require-definition-for-promotion \
+; RUN: -icp-allow-decl-only=false \
; RUN: -enable-memprof-indirect-call-support=true \
; RUN: -supports-hot-cold-new \
; RUN: -r=%t/foo.o,_Z3fooR2B0j,plx \
More information about the llvm-commits
mailing list