[llvm] [SampleProfileMatcher] Add direct basename early matching for orphan functions (PR #184409)
Wei Wang via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 9 16:56:54 PDT 2026
https://github.com/apolloww updated https://github.com/llvm/llvm-project/pull/184409
>From d91b127742cbae8caf814c9e681c7b4573f9a66d Mon Sep 17 00:00:00 2001
From: Wei Wang <apollo.mobility at gmail.com>
Date: Tue, 3 Mar 2026 10:16:54 -0800
Subject: [PATCH 1/2] [SampleProfileMatcher] Add direct basename matching for
orphan functions
---
.../Transforms/IPO/SampleProfileMatcher.h | 3 +
.../Transforms/IPO/SampleProfileMatcher.cpp | 144 +++++++++++++++---
...o-probe-stale-profile-direct-basename.prof | 3 +
...udo-probe-stale-profile-direct-basename.ll | 83 ++++++++++
4 files changed, 213 insertions(+), 20 deletions(-)
create mode 100644 llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-direct-basename.prof
create mode 100644 llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-direct-basename.ll
diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
index c695784641b4e..9c9c301d609ff 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
@@ -238,6 +238,9 @@ class SampleProfileMatcher {
// which are supposed to be new functions. We use them as the targets for
// call graph matching.
void findFunctionsWithoutProfile();
+ // Match orphan IR functions to unused top-level profile entries by demangled
+ // basename, without requiring a matched caller in the call graph.
+ void matchFunctionsWithoutProfileByBasename();
void reportOrPersistProfileStats();
};
} // end namespace llvm
diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
index b9fb7a3ae4b5b..4278ad0dc5a66 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO/SampleProfileMatcher.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Demangle/Demangle.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/MDBuilder.h"
@@ -25,6 +26,9 @@ using namespace sampleprof;
#define DEBUG_TYPE "sample-profile-matcher"
+STATISTIC(NumDirectProfileMatch,
+ "Number of functions matched by demangled basename");
+
namespace llvm {
static cl::opt<unsigned> FuncProfileSimilarityThreshold(
@@ -728,6 +732,121 @@ void SampleProfileMatcher::findFunctionsWithoutProfile() {
}
}
+// Demangle \p FName and return the base function name (stripping namespaces,
+// templates, and parameter types). Returns an empty string on failure.
+static std::string getDemangledBaseName(ItaniumPartialDemangler &Demangler,
+ StringRef FName) {
+ auto FunctionName = FName.str();
+ if (Demangler.partialDemangle(FunctionName.c_str()))
+ return std::string();
+ size_t BaseNameSize = 0;
+ // The demangler API follows the __cxa_demangle one, and thus needs a
+ // pointer that originates from malloc (or nullptr) and the caller is
+ // responsible for free()-ing the buffer.
+ char *BaseNamePtr = Demangler.getFunctionBaseName(nullptr, &BaseNameSize);
+ std::string Result = (BaseNamePtr && BaseNameSize)
+ ? std::string(BaseNamePtr, BaseNameSize)
+ : std::string();
+ free(BaseNamePtr);
+ // Trim trailing whitespace/null — getFunctionBaseName may include trailing
+ // characters in the reported size.
+ while (!Result.empty() && (Result.back() == ' ' || Result.back() == '\0'))
+ Result.pop_back();
+ return Result;
+}
+
+void SampleProfileMatcher::matchFunctionsWithoutProfileByBasename() {
+ if (FunctionsWithoutProfile.empty() || !LoadFuncProfileforCGMatching)
+ return;
+ auto *NameTable = Reader.getNameTable();
+ if (!NameTable)
+ return;
+
+ ItaniumPartialDemangler Demangler;
+
+ // Build a map from demangled basename to orphan function. Only keep
+ // basenames that map to exactly one orphan — ambiguous basenames like
+ // "get" or "operator()" would produce false positives.
+ StringMap<Function *> OrphansByBaseName;
+ StringSet<> AmbiguousBaseNames;
+ for (auto &[FuncId, Func] : FunctionsWithoutProfile) {
+ std::string BaseName = getDemangledBaseName(Demangler, Func->getName());
+ if (BaseName.empty() || AmbiguousBaseNames.count(BaseName))
+ continue;
+ auto [It, Inserted] = OrphansByBaseName.try_emplace(BaseName, Func);
+ if (!Inserted) {
+ // More than one orphan shares this basename — mark ambiguous.
+ OrphansByBaseName.erase(It);
+ AmbiguousBaseNames.insert(BaseName);
+ }
+ }
+ if (OrphansByBaseName.empty())
+ return;
+
+ // Scan the profile NameTable for candidates whose demangled basename matches
+ // a unique orphan. Use a quick substring check to avoid demangling every
+ // entry. Only keep 1:1 basename matches (exactly one profile candidate).
+ // Maps basename -> profile FunctionId; entries with multiple candidates are
+ // removed.
+ StringMap<FunctionId> CandidateByBaseName;
+ for (auto &ProfileFuncId : *NameTable) {
+ StringRef ProfName = ProfileFuncId.stringRef();
+ if (ProfName.empty())
+ continue;
+ for (auto &[BaseName, _] : OrphansByBaseName) {
+ if (AmbiguousBaseNames.count(BaseName) || !ProfName.contains(BaseName))
+ continue;
+ std::string ProfBaseName = getDemangledBaseName(Demangler, ProfName);
+ if (ProfBaseName != BaseName)
+ continue;
+ auto [It, Inserted] =
+ CandidateByBaseName.try_emplace(BaseName, ProfileFuncId);
+ if (!Inserted) {
+ // More than one profile entry shares this basename — mark ambiguous.
+ CandidateByBaseName.erase(It);
+ AmbiguousBaseNames.insert(BaseName);
+ }
+ break;
+ }
+ }
+ if (CandidateByBaseName.empty())
+ return;
+
+ // Load candidate profiles on demand, match, and flatten.
+ DenseSet<StringRef> ToLoad;
+ for (auto &[BaseName, ProfId] : CandidateByBaseName)
+ ToLoad.insert(ProfId.stringRef());
+ Reader.read(ToLoad);
+
+ unsigned MatchCount = 0;
+ SampleProfileMap NewlyLoadedProfiles;
+ for (auto &[BaseName, ProfId] : CandidateByBaseName) {
+ if (!isProfileUnused(ProfId))
+ continue;
+ Function *OrphanFunc = OrphansByBaseName.lookup(BaseName);
+ if (!OrphanFunc)
+ continue;
+
+ FuncToProfileNameMap[OrphanFunc] = ProfId;
+ if (const auto *FS = Reader.getSamplesFor(ProfId.stringRef()))
+ NewlyLoadedProfiles.create(FS->getFunction()).merge(*FS);
+ MatchCount++;
+ LLVM_DEBUG(dbgs() << "Direct basename match: " << OrphanFunc->getName()
+ << " (IR) -> " << ProfId << " (Profile)"
+ << " [basename: " << BaseName << "]\n");
+ }
+
+ // Flatten newly loaded profiles so inlined callees are available for
+ // subsequent LCS-based CG matching.
+ if (!NewlyLoadedProfiles.empty())
+ ProfileConverter::flattenProfile(NewlyLoadedProfiles, FlattenedProfiles,
+ FunctionSamples::ProfileIsCS);
+
+ NumDirectProfileMatch += MatchCount;
+ LLVM_DEBUG(dbgs() << "Direct basename matching found " << MatchCount
+ << " matches\n");
+}
+
bool SampleProfileMatcher::functionMatchesProfileHelper(
const Function &IRFunc, const FunctionId &ProfFunc) {
// The value is in the range [0, 1]. The bigger the value is, the more similar
@@ -737,25 +856,8 @@ bool SampleProfileMatcher::functionMatchesProfileHelper(
// Match the functions if they have the same base name(after demangling) and
// skip the similarity check.
ItaniumPartialDemangler Demangler;
- // Helper lambda to demangle and get the base name. If the demangling failed,
- // return an empty string.
- auto GetBaseName = [&](StringRef FName) {
- auto FunctionName = FName.str();
- if (Demangler.partialDemangle(FunctionName.c_str()))
- return std::string();
- size_t BaseNameSize = 0;
- // The demangler API follows the __cxa_demangle one, and thus needs a
- // pointer that originates from malloc (or nullptr) and the caller is
- // responsible for free()-ing the buffer.
- char *BaseNamePtr = Demangler.getFunctionBaseName(nullptr, &BaseNameSize);
- std::string Result = (BaseNamePtr && BaseNameSize)
- ? std::string(BaseNamePtr, BaseNameSize)
- : std::string();
- free(BaseNamePtr);
- return Result;
- };
- auto IRBaseName = GetBaseName(IRFunc.getName());
- auto ProfBaseName = GetBaseName(ProfFunc.stringRef());
+ auto IRBaseName = getDemangledBaseName(Demangler, IRFunc.getName());
+ auto ProfBaseName = getDemangledBaseName(Demangler, ProfFunc.stringRef());
if (!IRBaseName.empty() && IRBaseName == ProfBaseName) {
LLVM_DEBUG(dbgs() << "The functions " << IRFunc.getName() << "(IR) and "
<< ProfFunc << "(Profile) share the same base name: "
@@ -886,8 +988,10 @@ void SampleProfileMatcher::UpdateWithSalvagedProfiles() {
void SampleProfileMatcher::runOnModule() {
ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
FunctionSamples::ProfileIsCS);
- if (SalvageUnusedProfile)
+ if (SalvageUnusedProfile) {
findFunctionsWithoutProfile();
+ matchFunctionsWithoutProfileByBasename();
+ }
// Process the matching in top-down order so that the caller matching result
// can be used to the callee matching.
diff --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-direct-basename.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-direct-basename.prof
new file mode 100644
index 0000000000000..0eee70934991d
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-direct-basename.prof
@@ -0,0 +1,3 @@
+_Z3fooi:52:52
+ 1: 52
+ !CFGChecksum: 4294967295000
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-direct-basename.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-direct-basename.ll
new file mode 100644
index 0000000000000..9564513d55fa9
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-direct-basename.ll
@@ -0,0 +1,83 @@
+; Test direct basename matching for orphan functions.
+;
+; When an IR function's signature changes (e.g., parameter type change),
+; the mangled name changes but the base function name stays the same.
+; If the function's caller has no profile (e.g., fully inlined in a
+; different TU), CG matching can't discover the rename. The direct
+; basename matching step should pair the orphan IR function with the
+; unused profile entry by demangled basename.
+;
+; IR has _Z3fool (foo(long)) — orphan, called only from caller() which
+; has no profile. Profile has _Z3fooi (foo(int)) — unused top-level entry.
+; Direct basename matching should match _Z3fool -> _Z3fooi.
+
+; REQUIRES: x86_64-linux
+; REQUIRES: asserts
+; RUN: llvm-profdata merge --sample --extbinary %S/Inputs/pseudo-probe-stale-profile-direct-basename.prof -o %t.prof
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.prof --salvage-stale-profile --salvage-unused-profile -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl 2>&1 | FileCheck %s
+
+; Verify that _Z3fool is identified as orphan.
+; CHECK: Function _Z3fool is not in profile or profile symbol list.
+
+; Verify direct basename matching finds the match (not CG matching).
+; CHECK: Direct basename match: _Z3fool (IR) -> _Z3fooi (Profile) [basename: foo]
+; CHECK: Direct basename matching found 1 matches
+
+; Verify that stale profile matching runs for _Z3fool using the matched profile.
+; CHECK: Run stale profile matching for _Z3fool
+
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at x = dso_local global i32 0, align 4
+
+; _Z3fool (foo(long)) — the renamed function (orphan, no profile).
+define dso_local void @_Z3fool(i64 noundef %y) #0 !dbg !11 {
+entry:
+ %y.addr = alloca i64, align 8
+ store i64 %y, ptr %y.addr, align 8
+ call void @llvm.pseudoprobe(i64 5326982120444056491, i64 1, i32 0, i64 -1), !dbg !14
+ %0 = load i64, ptr %y.addr, align 8, !dbg !14
+ %1 = load volatile i32, ptr @x, align 4, !dbg !15
+ %conv = sext i32 %1 to i64, !dbg !15
+ %add = add nsw i64 %conv, %0, !dbg !15
+ %conv1 = trunc i64 %add to i32, !dbg !15
+ store volatile i32 %conv1, ptr @x, align 4, !dbg !15
+ ret void, !dbg !16
+}
+
+; caller() — calls _Z3fool but has NO profile. CG matching can't
+; discover the rename because this function is never matched.
+define dso_local void @caller() #0 !dbg !17 {
+entry:
+ call void @llvm.pseudoprobe(i64 -7421642274262752513, i64 1, i32 0, i64 -1), !dbg !20
+ call void @_Z3fool(i64 noundef 42), !dbg !21
+ ret void, !dbg !22
+}
+
+declare void @llvm.pseudoprobe(i64, i64, i32, i64) #2
+
+attributes #0 = { "use-sample-profile" }
+attributes #2 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3}
+!llvm.pseudo_probe_desc = !{!9, !10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, nameTableKind: None)
+!1 = !DIFile(filename: "test_direct_basename.cpp", directory: "/home")
+!2 = !{i32 2, !"Debug Info Version", i32 3}
+!3 = !{i32 7, !"uwtable", i32 2}
+!9 = !{i64 5326982120444056491, i64 4294967295, !"_Z3fool"}
+!10 = !{i64 -7421642274262752513, i64 4294967295, !"caller"}
+!11 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fool", scope: !1, file: !1, line: 3, type: !12, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
+!12 = !DISubroutineType(types: !13)
+!13 = !{}
+!14 = !DILocation(line: 4, column: 9, scope: !11)
+!15 = !DILocation(line: 4, column: 6, scope: !11)
+!16 = !DILocation(line: 5, column: 1, scope: !11)
+!17 = distinct !DISubprogram(name: "caller", linkageName: "caller", scope: !1, file: !1, line: 7, type: !12, scopeLine: 7, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
+!20 = !DILocation(line: 8, column: 3, scope: !17)
+!21 = !DILocation(line: 9, column: 3, scope: !17)
+!22 = !DILocation(line: 10, column: 1, scope: !17)
>From ee62e093b96da8fb2fd37c4efe3273612eea0b51 Mon Sep 17 00:00:00 2001
From: Wei Wang <apollo.mobility at gmail.com>
Date: Mon, 9 Mar 2026 16:50:44 -0700
Subject: [PATCH 2/2] simplify test
---
...udo-probe-stale-profile-direct-basename.ll | 56 +++++--------------
1 file changed, 14 insertions(+), 42 deletions(-)
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-direct-basename.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-direct-basename.ll
index 9564513d55fa9..66bbf2278accd 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-direct-basename.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-direct-basename.ll
@@ -1,12 +1,5 @@
; Test direct basename matching for orphan functions.
;
-; When an IR function's signature changes (e.g., parameter type change),
-; the mangled name changes but the base function name stays the same.
-; If the function's caller has no profile (e.g., fully inlined in a
-; different TU), CG matching can't discover the rename. The direct
-; basename matching step should pair the orphan IR function with the
-; unused profile entry by demangled basename.
-;
; IR has _Z3fool (foo(long)) — orphan, called only from caller() which
; has no profile. Profile has _Z3fooi (foo(int)) — unused top-level entry.
; Direct basename matching should match _Z3fool -> _Z3fooi.
@@ -16,57 +9,37 @@
; RUN: llvm-profdata merge --sample --extbinary %S/Inputs/pseudo-probe-stale-profile-direct-basename.prof -o %t.prof
; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.prof --salvage-stale-profile --salvage-unused-profile -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl 2>&1 | FileCheck %s
-; Verify that _Z3fool is identified as orphan.
; CHECK: Function _Z3fool is not in profile or profile symbol list.
-
-; Verify direct basename matching finds the match (not CG matching).
; CHECK: Direct basename match: _Z3fool (IR) -> _Z3fooi (Profile) [basename: foo]
; CHECK: Direct basename matching found 1 matches
-
-; Verify that stale profile matching runs for _Z3fool using the matched profile.
; CHECK: Run stale profile matching for _Z3fool
-
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
- at x = dso_local global i32 0, align 4
-
-; _Z3fool (foo(long)) — the renamed function (orphan, no profile).
-define dso_local void @_Z3fool(i64 noundef %y) #0 !dbg !11 {
+define dso_local void @_Z3fool(i64 %y) #0 !dbg !11 {
entry:
- %y.addr = alloca i64, align 8
- store i64 %y, ptr %y.addr, align 8
call void @llvm.pseudoprobe(i64 5326982120444056491, i64 1, i32 0, i64 -1), !dbg !14
- %0 = load i64, ptr %y.addr, align 8, !dbg !14
- %1 = load volatile i32, ptr @x, align 4, !dbg !15
- %conv = sext i32 %1 to i64, !dbg !15
- %add = add nsw i64 %conv, %0, !dbg !15
- %conv1 = trunc i64 %add to i32, !dbg !15
- store volatile i32 %conv1, ptr @x, align 4, !dbg !15
- ret void, !dbg !16
+ ret void, !dbg !15
}
-; caller() — calls _Z3fool but has NO profile. CG matching can't
-; discover the rename because this function is never matched.
-define dso_local void @caller() #0 !dbg !17 {
+define dso_local void @caller() #0 !dbg !16 {
entry:
- call void @llvm.pseudoprobe(i64 -7421642274262752513, i64 1, i32 0, i64 -1), !dbg !20
- call void @_Z3fool(i64 noundef 42), !dbg !21
- ret void, !dbg !22
+ call void @llvm.pseudoprobe(i64 -7421642274262752513, i64 1, i32 0, i64 -1), !dbg !18
+ call void @_Z3fool(i64 0), !dbg !19
+ ret void, !dbg !20
}
-declare void @llvm.pseudoprobe(i64, i64, i32, i64) #2
+declare void @llvm.pseudoprobe(i64, i64, i32, i64)
attributes #0 = { "use-sample-profile" }
-attributes #2 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!2, !3}
!llvm.pseudo_probe_desc = !{!9, !10}
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, nameTableKind: None)
-!1 = !DIFile(filename: "test_direct_basename.cpp", directory: "/home")
+!1 = !DIFile(filename: "test.cpp", directory: "/tmp")
!2 = !{i32 2, !"Debug Info Version", i32 3}
!3 = !{i32 7, !"uwtable", i32 2}
!9 = !{i64 5326982120444056491, i64 4294967295, !"_Z3fool"}
@@ -74,10 +47,9 @@ attributes #2 = { nocallback nofree nosync nounwind willreturn memory(inaccessib
!11 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fool", scope: !1, file: !1, line: 3, type: !12, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
!12 = !DISubroutineType(types: !13)
!13 = !{}
-!14 = !DILocation(line: 4, column: 9, scope: !11)
-!15 = !DILocation(line: 4, column: 6, scope: !11)
-!16 = !DILocation(line: 5, column: 1, scope: !11)
-!17 = distinct !DISubprogram(name: "caller", linkageName: "caller", scope: !1, file: !1, line: 7, type: !12, scopeLine: 7, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
-!20 = !DILocation(line: 8, column: 3, scope: !17)
-!21 = !DILocation(line: 9, column: 3, scope: !17)
-!22 = !DILocation(line: 10, column: 1, scope: !17)
+!14 = !DILocation(line: 4, column: 1, scope: !11)
+!15 = !DILocation(line: 5, column: 1, scope: !11)
+!16 = distinct !DISubprogram(name: "caller", linkageName: "caller", scope: !1, file: !1, line: 7, type: !12, scopeLine: 7, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
+!18 = !DILocation(line: 8, column: 1, scope: !16)
+!19 = !DILocation(line: 9, column: 1, scope: !16)
+!20 = !DILocation(line: 10, column: 1, scope: !16)
More information about the llvm-commits
mailing list