[llvm] [SampleProfileMatcher] Add direct basename early matching for orphan functions (PR #184409)

Wei Wang via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 9 16:56:54 PDT 2026


https://github.com/apolloww updated https://github.com/llvm/llvm-project/pull/184409

>From d91b127742cbae8caf814c9e681c7b4573f9a66d Mon Sep 17 00:00:00 2001
From: Wei Wang <apollo.mobility at gmail.com>
Date: Tue, 3 Mar 2026 10:16:54 -0800
Subject: [PATCH 1/2] [SampleProfileMatcher] Add direct basename matching for
 orphan functions

---
 .../Transforms/IPO/SampleProfileMatcher.h     |   3 +
 .../Transforms/IPO/SampleProfileMatcher.cpp   | 144 +++++++++++++++---
 ...o-probe-stale-profile-direct-basename.prof |   3 +
 ...udo-probe-stale-profile-direct-basename.ll |  83 ++++++++++
 4 files changed, 213 insertions(+), 20 deletions(-)
 create mode 100644 llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-direct-basename.prof
 create mode 100644 llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-direct-basename.ll

diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
index c695784641b4e..9c9c301d609ff 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
@@ -238,6 +238,9 @@ class SampleProfileMatcher {
   // which are supposed to be new functions. We use them as the targets for
   // call graph matching.
   void findFunctionsWithoutProfile();
+  // Match orphan IR functions to unused top-level profile entries by demangled
+  // basename, without requiring a matched caller in the call graph.
+  void matchFunctionsWithoutProfileByBasename();
   void reportOrPersistProfileStats();
 };
 } // end namespace llvm
diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
index b9fb7a3ae4b5b..4278ad0dc5a66 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/IPO/SampleProfileMatcher.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Demangle/Demangle.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/MDBuilder.h"
@@ -25,6 +26,9 @@ using namespace sampleprof;
 
 #define DEBUG_TYPE "sample-profile-matcher"
 
+STATISTIC(NumDirectProfileMatch,
+          "Number of functions matched by demangled basename");
+
 namespace llvm {
 
 static cl::opt<unsigned> FuncProfileSimilarityThreshold(
@@ -728,6 +732,121 @@ void SampleProfileMatcher::findFunctionsWithoutProfile() {
   }
 }
 
+// Demangle \p FName and return the base function name (stripping namespaces,
+// templates, and parameter types). Returns an empty string on failure.
+static std::string getDemangledBaseName(ItaniumPartialDemangler &Demangler,
+                                        StringRef FName) {
+  auto FunctionName = FName.str();
+  if (Demangler.partialDemangle(FunctionName.c_str()))
+    return std::string();
+  size_t BaseNameSize = 0;
+  // The demangler API follows the __cxa_demangle one, and thus needs a
+  // pointer that originates from malloc (or nullptr) and the caller is
+  // responsible for free()-ing the buffer.
+  char *BaseNamePtr = Demangler.getFunctionBaseName(nullptr, &BaseNameSize);
+  std::string Result = (BaseNamePtr && BaseNameSize)
+                           ? std::string(BaseNamePtr, BaseNameSize)
+                           : std::string();
+  free(BaseNamePtr);
+  // Trim trailing whitespace/null — getFunctionBaseName may include trailing
+  // characters in the reported size.
+  while (!Result.empty() && (Result.back() == ' ' || Result.back() == '\0'))
+    Result.pop_back();
+  return Result;
+}
+
+void SampleProfileMatcher::matchFunctionsWithoutProfileByBasename() {
+  if (FunctionsWithoutProfile.empty() || !LoadFuncProfileforCGMatching)
+    return;
+  auto *NameTable = Reader.getNameTable();
+  if (!NameTable)
+    return;
+
+  ItaniumPartialDemangler Demangler;
+
+  // Build a map from demangled basename to orphan function. Only keep
+  // basenames that map to exactly one orphan — ambiguous basenames like
+  // "get" or "operator()" would produce false positives.
+  StringMap<Function *> OrphansByBaseName;
+  StringSet<> AmbiguousBaseNames;
+  for (auto &[FuncId, Func] : FunctionsWithoutProfile) {
+    std::string BaseName = getDemangledBaseName(Demangler, Func->getName());
+    if (BaseName.empty() || AmbiguousBaseNames.count(BaseName))
+      continue;
+    auto [It, Inserted] = OrphansByBaseName.try_emplace(BaseName, Func);
+    if (!Inserted) {
+      // More than one orphan shares this basename — mark ambiguous.
+      OrphansByBaseName.erase(It);
+      AmbiguousBaseNames.insert(BaseName);
+    }
+  }
+  if (OrphansByBaseName.empty())
+    return;
+
+  // Scan the profile NameTable for candidates whose demangled basename matches
+  // a unique orphan. Use a quick substring check to avoid demangling every
+  // entry. Only keep 1:1 basename matches (exactly one profile candidate).
+  // Maps basename -> profile FunctionId; entries with multiple candidates are
+  // removed.
+  StringMap<FunctionId> CandidateByBaseName;
+  for (auto &ProfileFuncId : *NameTable) {
+    StringRef ProfName = ProfileFuncId.stringRef();
+    if (ProfName.empty())
+      continue;
+    for (auto &[BaseName, _] : OrphansByBaseName) {
+      if (AmbiguousBaseNames.count(BaseName) || !ProfName.contains(BaseName))
+        continue;
+      std::string ProfBaseName = getDemangledBaseName(Demangler, ProfName);
+      if (ProfBaseName != BaseName)
+        continue;
+      auto [It, Inserted] =
+          CandidateByBaseName.try_emplace(BaseName, ProfileFuncId);
+      if (!Inserted) {
+        // More than one profile entry shares this basename — mark ambiguous.
+        CandidateByBaseName.erase(It);
+        AmbiguousBaseNames.insert(BaseName);
+      }
+      break;
+    }
+  }
+  if (CandidateByBaseName.empty())
+    return;
+
+  // Load candidate profiles on demand, match, and flatten.
+  DenseSet<StringRef> ToLoad;
+  for (auto &[BaseName, ProfId] : CandidateByBaseName)
+    ToLoad.insert(ProfId.stringRef());
+  Reader.read(ToLoad);
+
+  unsigned MatchCount = 0;
+  SampleProfileMap NewlyLoadedProfiles;
+  for (auto &[BaseName, ProfId] : CandidateByBaseName) {
+    if (!isProfileUnused(ProfId))
+      continue;
+    Function *OrphanFunc = OrphansByBaseName.lookup(BaseName);
+    if (!OrphanFunc)
+      continue;
+
+    FuncToProfileNameMap[OrphanFunc] = ProfId;
+    if (const auto *FS = Reader.getSamplesFor(ProfId.stringRef()))
+      NewlyLoadedProfiles.create(FS->getFunction()).merge(*FS);
+    MatchCount++;
+    LLVM_DEBUG(dbgs() << "Direct basename match: " << OrphanFunc->getName()
+                      << " (IR) -> " << ProfId << " (Profile)"
+                      << " [basename: " << BaseName << "]\n");
+  }
+
+  // Flatten newly loaded profiles so inlined callees are available for
+  // subsequent LCS-based CG matching.
+  if (!NewlyLoadedProfiles.empty())
+    ProfileConverter::flattenProfile(NewlyLoadedProfiles, FlattenedProfiles,
+                                     FunctionSamples::ProfileIsCS);
+
+  NumDirectProfileMatch += MatchCount;
+  LLVM_DEBUG(dbgs() << "Direct basename matching found " << MatchCount
+                    << " matches\n");
+}
+
 bool SampleProfileMatcher::functionMatchesProfileHelper(
     const Function &IRFunc, const FunctionId &ProfFunc) {
   // The value is in the range [0, 1]. The bigger the value is, the more similar
@@ -737,25 +856,8 @@ bool SampleProfileMatcher::functionMatchesProfileHelper(
   // Match the functions if they have the same base name(after demangling) and
   // skip the similarity check.
   ItaniumPartialDemangler Demangler;
-  // Helper lambda to demangle and get the base name. If the demangling failed,
-  // return an empty string.
-  auto GetBaseName = [&](StringRef FName) {
-    auto FunctionName = FName.str();
-    if (Demangler.partialDemangle(FunctionName.c_str()))
-      return std::string();
-    size_t BaseNameSize = 0;
-    // The demangler API follows the __cxa_demangle one, and thus needs a
-    // pointer that originates from malloc (or nullptr) and the caller is
-    // responsible for free()-ing the buffer.
-    char *BaseNamePtr = Demangler.getFunctionBaseName(nullptr, &BaseNameSize);
-    std::string Result = (BaseNamePtr && BaseNameSize)
-                             ? std::string(BaseNamePtr, BaseNameSize)
-                             : std::string();
-    free(BaseNamePtr);
-    return Result;
-  };
-  auto IRBaseName = GetBaseName(IRFunc.getName());
-  auto ProfBaseName = GetBaseName(ProfFunc.stringRef());
+  auto IRBaseName = getDemangledBaseName(Demangler, IRFunc.getName());
+  auto ProfBaseName = getDemangledBaseName(Demangler, ProfFunc.stringRef());
   if (!IRBaseName.empty() && IRBaseName == ProfBaseName) {
     LLVM_DEBUG(dbgs() << "The functions " << IRFunc.getName() << "(IR) and "
                       << ProfFunc << "(Profile) share the same base name: "
@@ -886,8 +988,10 @@ void SampleProfileMatcher::UpdateWithSalvagedProfiles() {
 void SampleProfileMatcher::runOnModule() {
   ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
                                    FunctionSamples::ProfileIsCS);
-  if (SalvageUnusedProfile)
+  if (SalvageUnusedProfile) {
     findFunctionsWithoutProfile();
+    matchFunctionsWithoutProfileByBasename();
+  }
 
   // Process the matching in top-down order so that the caller matching result
   // can be used to the callee matching.
diff --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-direct-basename.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-direct-basename.prof
new file mode 100644
index 0000000000000..0eee70934991d
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-direct-basename.prof
@@ -0,0 +1,3 @@
+_Z3fooi:52:52
+ 1: 52
+ !CFGChecksum: 4294967295000
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-direct-basename.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-direct-basename.ll
new file mode 100644
index 0000000000000..9564513d55fa9
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-direct-basename.ll
@@ -0,0 +1,83 @@
+; Test direct basename matching for orphan functions.
+;
+; When an IR function's signature changes (e.g., parameter type change),
+; the mangled name changes but the base function name stays the same.
+; If the function's caller has no profile (e.g., fully inlined in a
+; different TU), CG matching can't discover the rename. The direct
+; basename matching step should pair the orphan IR function with the
+; unused profile entry by demangled basename.
+;
+; IR has _Z3fool (foo(long)) — orphan, called only from caller() which
+; has no profile. Profile has _Z3fooi (foo(int)) — unused top-level entry.
+; Direct basename matching should match _Z3fool -> _Z3fooi.
+
+; REQUIRES: x86_64-linux
+; REQUIRES: asserts
+; RUN: llvm-profdata merge --sample --extbinary %S/Inputs/pseudo-probe-stale-profile-direct-basename.prof -o %t.prof
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.prof --salvage-stale-profile --salvage-unused-profile -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl 2>&1 | FileCheck %s
+
+; Verify that _Z3fool is identified as orphan.
+; CHECK: Function _Z3fool is not in profile or profile symbol list.
+
+; Verify direct basename matching finds the match (not CG matching).
+; CHECK: Direct basename match: _Z3fool (IR) -> _Z3fooi (Profile) [basename: foo]
+; CHECK: Direct basename matching found 1 matches
+
+; Verify that stale profile matching runs for _Z3fool using the matched profile.
+; CHECK: Run stale profile matching for _Z3fool
+
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at x = dso_local global i32 0, align 4
+
+; _Z3fool (foo(long)) — the renamed function (orphan, no profile).
+define dso_local void @_Z3fool(i64 noundef %y) #0 !dbg !11 {
+entry:
+  %y.addr = alloca i64, align 8
+  store i64 %y, ptr %y.addr, align 8
+  call void @llvm.pseudoprobe(i64 5326982120444056491, i64 1, i32 0, i64 -1), !dbg !14
+  %0 = load i64, ptr %y.addr, align 8, !dbg !14
+  %1 = load volatile i32, ptr @x, align 4, !dbg !15
+  %conv = sext i32 %1 to i64, !dbg !15
+  %add = add nsw i64 %conv, %0, !dbg !15
+  %conv1 = trunc i64 %add to i32, !dbg !15
+  store volatile i32 %conv1, ptr @x, align 4, !dbg !15
+  ret void, !dbg !16
+}
+
+; caller() — calls _Z3fool but has NO profile. CG matching can't
+; discover the rename because this function is never matched.
+define dso_local void @caller() #0 !dbg !17 {
+entry:
+  call void @llvm.pseudoprobe(i64 -7421642274262752513, i64 1, i32 0, i64 -1), !dbg !20
+  call void @_Z3fool(i64 noundef 42), !dbg !21
+  ret void, !dbg !22
+}
+
+declare void @llvm.pseudoprobe(i64, i64, i32, i64) #2
+
+attributes #0 = { "use-sample-profile" }
+attributes #2 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3}
+!llvm.pseudo_probe_desc = !{!9, !10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, nameTableKind: None)
+!1 = !DIFile(filename: "test_direct_basename.cpp", directory: "/home")
+!2 = !{i32 2, !"Debug Info Version", i32 3}
+!3 = !{i32 7, !"uwtable", i32 2}
+!9 = !{i64 5326982120444056491, i64 4294967295, !"_Z3fool"}
+!10 = !{i64 -7421642274262752513, i64 4294967295, !"caller"}
+!11 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fool", scope: !1, file: !1, line: 3, type: !12, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
+!12 = !DISubroutineType(types: !13)
+!13 = !{}
+!14 = !DILocation(line: 4, column: 9, scope: !11)
+!15 = !DILocation(line: 4, column: 6, scope: !11)
+!16 = !DILocation(line: 5, column: 1, scope: !11)
+!17 = distinct !DISubprogram(name: "caller", linkageName: "caller", scope: !1, file: !1, line: 7, type: !12, scopeLine: 7, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
+!20 = !DILocation(line: 8, column: 3, scope: !17)
+!21 = !DILocation(line: 9, column: 3, scope: !17)
+!22 = !DILocation(line: 10, column: 1, scope: !17)

>From ee62e093b96da8fb2fd37c4efe3273612eea0b51 Mon Sep 17 00:00:00 2001
From: Wei Wang <apollo.mobility at gmail.com>
Date: Mon, 9 Mar 2026 16:50:44 -0700
Subject: [PATCH 2/2] simplify test

---
 ...udo-probe-stale-profile-direct-basename.ll | 56 +++++--------------
 1 file changed, 14 insertions(+), 42 deletions(-)

diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-direct-basename.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-direct-basename.ll
index 9564513d55fa9..66bbf2278accd 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-direct-basename.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-direct-basename.ll
@@ -1,12 +1,5 @@
 ; Test direct basename matching for orphan functions.
 ;
-; When an IR function's signature changes (e.g., parameter type change),
-; the mangled name changes but the base function name stays the same.
-; If the function's caller has no profile (e.g., fully inlined in a
-; different TU), CG matching can't discover the rename. The direct
-; basename matching step should pair the orphan IR function with the
-; unused profile entry by demangled basename.
-;
 ; IR has _Z3fool (foo(long)) — orphan, called only from caller() which
 ; has no profile. Profile has _Z3fooi (foo(int)) — unused top-level entry.
 ; Direct basename matching should match _Z3fool -> _Z3fooi.
@@ -16,57 +9,37 @@
 ; RUN: llvm-profdata merge --sample --extbinary %S/Inputs/pseudo-probe-stale-profile-direct-basename.prof -o %t.prof
 ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.prof --salvage-stale-profile --salvage-unused-profile -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl 2>&1 | FileCheck %s
 
-; Verify that _Z3fool is identified as orphan.
 ; CHECK: Function _Z3fool is not in profile or profile symbol list.
-
-; Verify direct basename matching finds the match (not CG matching).
 ; CHECK: Direct basename match: _Z3fool (IR) -> _Z3fooi (Profile) [basename: foo]
 ; CHECK: Direct basename matching found 1 matches
-
-; Verify that stale profile matching runs for _Z3fool using the matched profile.
 ; CHECK: Run stale profile matching for _Z3fool
 
-
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
- at x = dso_local global i32 0, align 4
-
-; _Z3fool (foo(long)) — the renamed function (orphan, no profile).
-define dso_local void @_Z3fool(i64 noundef %y) #0 !dbg !11 {
+define dso_local void @_Z3fool(i64 %y) #0 !dbg !11 {
 entry:
-  %y.addr = alloca i64, align 8
-  store i64 %y, ptr %y.addr, align 8
   call void @llvm.pseudoprobe(i64 5326982120444056491, i64 1, i32 0, i64 -1), !dbg !14
-  %0 = load i64, ptr %y.addr, align 8, !dbg !14
-  %1 = load volatile i32, ptr @x, align 4, !dbg !15
-  %conv = sext i32 %1 to i64, !dbg !15
-  %add = add nsw i64 %conv, %0, !dbg !15
-  %conv1 = trunc i64 %add to i32, !dbg !15
-  store volatile i32 %conv1, ptr @x, align 4, !dbg !15
-  ret void, !dbg !16
+  ret void, !dbg !15
 }
 
-; caller() — calls _Z3fool but has NO profile. CG matching can't
-; discover the rename because this function is never matched.
-define dso_local void @caller() #0 !dbg !17 {
+define dso_local void @caller() #0 !dbg !16 {
 entry:
-  call void @llvm.pseudoprobe(i64 -7421642274262752513, i64 1, i32 0, i64 -1), !dbg !20
-  call void @_Z3fool(i64 noundef 42), !dbg !21
-  ret void, !dbg !22
+  call void @llvm.pseudoprobe(i64 -7421642274262752513, i64 1, i32 0, i64 -1), !dbg !18
+  call void @_Z3fool(i64 0), !dbg !19
+  ret void, !dbg !20
 }
 
-declare void @llvm.pseudoprobe(i64, i64, i32, i64) #2
+declare void @llvm.pseudoprobe(i64, i64, i32, i64)
 
 attributes #0 = { "use-sample-profile" }
-attributes #2 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!2, !3}
 !llvm.pseudo_probe_desc = !{!9, !10}
 
 !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, nameTableKind: None)
-!1 = !DIFile(filename: "test_direct_basename.cpp", directory: "/home")
+!1 = !DIFile(filename: "test.cpp", directory: "/tmp")
 !2 = !{i32 2, !"Debug Info Version", i32 3}
 !3 = !{i32 7, !"uwtable", i32 2}
 !9 = !{i64 5326982120444056491, i64 4294967295, !"_Z3fool"}
@@ -74,10 +47,9 @@ attributes #2 = { nocallback nofree nosync nounwind willreturn memory(inaccessib
 !11 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fool", scope: !1, file: !1, line: 3, type: !12, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
 !12 = !DISubroutineType(types: !13)
 !13 = !{}
-!14 = !DILocation(line: 4, column: 9, scope: !11)
-!15 = !DILocation(line: 4, column: 6, scope: !11)
-!16 = !DILocation(line: 5, column: 1, scope: !11)
-!17 = distinct !DISubprogram(name: "caller", linkageName: "caller", scope: !1, file: !1, line: 7, type: !12, scopeLine: 7, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
-!20 = !DILocation(line: 8, column: 3, scope: !17)
-!21 = !DILocation(line: 9, column: 3, scope: !17)
-!22 = !DILocation(line: 10, column: 1, scope: !17)
+!14 = !DILocation(line: 4, column: 1, scope: !11)
+!15 = !DILocation(line: 5, column: 1, scope: !11)
+!16 = distinct !DISubprogram(name: "caller", linkageName: "caller", scope: !1, file: !1, line: 7, type: !12, scopeLine: 7, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
+!18 = !DILocation(line: 8, column: 1, scope: !16)
+!19 = !DILocation(line: 9, column: 1, scope: !16)
+!20 = !DILocation(line: 10, column: 1, scope: !16)



More information about the llvm-commits mailing list