[llvm-branch-commits] [llvm] [BOLT] Name similarity function matching (PR #95884)

shaw young via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Wed Jun 26 11:11:06 PDT 2024


https://github.com/shawbyoung updated https://github.com/llvm/llvm-project/pull/95884

>From 34652b2eebc62218c50a23509ce99937385c30e6 Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Thu, 20 Jun 2024 23:42:00 -0700
Subject: [PATCH 1/7] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 73 ++++++++++++++++++++------
 1 file changed, 56 insertions(+), 17 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index 66cabc236f4b2..c9f6d88f0b13a 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -424,36 +424,75 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
 
   // Uses name similarity to match functions that were not matched by name.
   uint64_t MatchedWithDemangledName = 0;
-  if (opts::NameSimilarityFunctionMatchingThreshold > 0) {
-
-    std::unordered_map<std::string, BinaryFunction *> NameToBinaryFunction;
-    NameToBinaryFunction.reserve(BC.getBinaryFunctions().size());
 
-    for (auto &[_, BF] : BC.getBinaryFunctions()) {
+  if (opts::NameSimilarityFunctionMatchingThreshold > 0) {
+    auto DemangleName = [&](const char* String) {
       int Status = 0;
-      char *DemangledName = abi::__cxa_demangle(BF.getOneName().str().c_str(),
+      char *DemangledName = abi::__cxa_demangle(String,
                                                 nullptr, nullptr, &Status);
-      if (Status == 0)
-        NameToBinaryFunction[std::string(DemangledName)] = &BF;
+      return Status == 0 ? new std::string(DemangledName) : nullptr;
+    };
+
+    auto DeriveNameSpace = [&](std::string DemangledName) {
+      size_t LParen = std::string(DemangledName).find("(");
+      std::string FunctionName = std::string(DemangledName).substr(0, LParen);
+      size_t ScopeResolutionOperator = std::string(FunctionName).rfind("::");
+      return ScopeResolutionOperator == std::string::npos ? std::string("") : std::string(DemangledName).substr(0, ScopeResolutionOperator);
+    };
+
+    std::unordered_map<std::string, std::vector<BinaryFunction *>> NamespaceToBFs;
+    NamespaceToBFs.reserve(BC.getBinaryFunctions().size());
+
+    for (BinaryFunction *BF : BC.getAllBinaryFunctions()) {
+      std::string* DemangledName = DemangleName(BF->getOneName().str().c_str());
+      if (!DemangledName)
+        continue;
+      std::string Namespace = DeriveNameSpace(*DemangledName);
+      auto It = NamespaceToBFs.find(Namespace);
+      if (It == NamespaceToBFs.end())
+        NamespaceToBFs[Namespace] = {BF};
+      else
+        It->second.push_back(BF);
     }
 
     for (auto YamlBF : YamlBP.Functions) {
       if (YamlBF.Used)
         continue;
-      int Status = 0;
-      char *DemangledName =
-          abi::__cxa_demangle(YamlBF.Name.c_str(), nullptr, nullptr, &Status);
-      if (Status != 0)
+      std::string* YamlBFDemangledName = DemangleName(YamlBF.Name.c_str());
+      if (!YamlBFDemangledName)
         continue;
-      auto It = NameToBinaryFunction.find(DemangledName);
-      if (It == NameToBinaryFunction.end())
+      std::string Namespace = DeriveNameSpace(*YamlBFDemangledName);
+      auto It = NamespaceToBFs.find(Namespace);
+      if (It == NamespaceToBFs.end())
         continue;
-      BinaryFunction *BF = It->second;
-      matchProfileToFunction(YamlBF, *BF);
-      ++MatchedWithDemangledName;
+      std::vector<BinaryFunction *> BFs = It->second;
+
+      unsigned MinEditDistance = UINT_MAX;
+      BinaryFunction *ClosestNameBF = nullptr;
+
+      for (BinaryFunction *BF : BFs) {
+        if (ProfiledFunctions.count(BF))
+          continue;
+        std::string *BFDemangledName = DemangleName(BF->getOneName().str().c_str());
+        if (!BFDemangledName)
+          continue;
+        unsigned BFEditDistance = StringRef(*BFDemangledName).edit_distance(*YamlBFDemangledName);
+        if (BFEditDistance < MinEditDistance) {
+          MinEditDistance = BFEditDistance;
+          ClosestNameBF = BF;
+        }
+      }
+
+      if (ClosestNameBF &&
+        MinEditDistance < opts::NameSimilarityFunctionMatchingThreshold) {
+        matchProfileToFunction(YamlBF, *ClosestNameBF);
+        ++MatchedWithDemangledName;
+      }
     }
   }
 
+  outs() << MatchedWithDemangledName  << ": functions matched by name similarity\n";
+
   for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions)
     if (!YamlBF.Used && opts::Verbosity >= 1)
       errs() << "BOLT-WARNING: profile ignored for function " << YamlBF.Name

>From 2d23bbd6b9ce4f0786ae8ceb39b1b008b4ca9c4d Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Thu, 20 Jun 2024 23:45:27 -0700
Subject: [PATCH 2/7] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index c9f6d88f0b13a..cf4a5393df8f4 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -491,8 +491,6 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
     }
   }
 
-  outs() << MatchedWithDemangledName  << ": functions matched by name similarity\n";
-
   for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions)
     if (!YamlBF.Used && opts::Verbosity >= 1)
       errs() << "BOLT-WARNING: profile ignored for function " << YamlBF.Name

>From 9e6bb260197ca22219887c9158d1d19529301064 Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Fri, 21 Jun 2024 11:14:15 -0700
Subject: [PATCH 3/7] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 50 +++++++++++++-------------
 1 file changed, 24 insertions(+), 26 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index cf4a5393df8f4..6aef9ea566858 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -11,13 +11,13 @@
 #include "bolt/Core/BinaryFunction.h"
 #include "bolt/Passes/MCF.h"
 #include "bolt/Profile/ProfileYAMLMapping.h"
+#include "bolt/Utils/NameResolver.h"
 #include "bolt/Utils/Utils.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/edit_distance.h"
+#include "llvm/Demangle/Demangle.h"
 #include "llvm/Support/CommandLine.h"
 
-#include <cxxabi.h>
-
 using namespace llvm;
 
 namespace opts {
@@ -426,43 +426,40 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
   uint64_t MatchedWithDemangledName = 0;
 
   if (opts::NameSimilarityFunctionMatchingThreshold > 0) {
-    auto DemangleName = [&](const char* String) {
-      int Status = 0;
-      char *DemangledName = abi::__cxa_demangle(String,
-                                                nullptr, nullptr, &Status);
-      return Status == 0 ? new std::string(DemangledName) : nullptr;
+    auto DemangleName = [&](std::string &FunctionName) {
+      StringRef RestoredName = NameResolver::restore(FunctionName);
+      return demangle(RestoredName);
     };
 
+    ItaniumPartialDemangler ItaniumPartialDemangler;
     auto DeriveNameSpace = [&](std::string DemangledName) {
-      size_t LParen = std::string(DemangledName).find("(");
-      std::string FunctionName = std::string(DemangledName).substr(0, LParen);
-      size_t ScopeResolutionOperator = std::string(FunctionName).rfind("::");
-      return ScopeResolutionOperator == std::string::npos ? std::string("") : std::string(DemangledName).substr(0, ScopeResolutionOperator);
+      std::vector<char> Buffer(DemangledName.begin(), DemangledName.end());
+      size_t BufferSize = Buffer.size();
+      char *NameSpace = ItaniumPartialDemangler.getFunctionDeclContextName(
+          &Buffer[0], &BufferSize);
+      return NameSpace ? std::string(NameSpace) : std::string("");
     };
 
-    std::unordered_map<std::string, std::vector<BinaryFunction *>> NamespaceToBFs;
+    std::unordered_map<std::string, std::vector<BinaryFunction *>>
+        NamespaceToBFs;
+
     NamespaceToBFs.reserve(BC.getBinaryFunctions().size());
 
     for (BinaryFunction *BF : BC.getAllBinaryFunctions()) {
-      std::string* DemangledName = DemangleName(BF->getOneName().str().c_str());
-      if (!DemangledName)
-        continue;
-      std::string Namespace = DeriveNameSpace(*DemangledName);
+      std::string DemangledName = BF->getDemangledName();
+      std::string Namespace = DeriveNameSpace(DemangledName);
       auto It = NamespaceToBFs.find(Namespace);
       if (It == NamespaceToBFs.end())
         NamespaceToBFs[Namespace] = {BF};
       else
         It->second.push_back(BF);
     }
-
     for (auto YamlBF : YamlBP.Functions) {
       if (YamlBF.Used)
         continue;
-      std::string* YamlBFDemangledName = DemangleName(YamlBF.Name.c_str());
-      if (!YamlBFDemangledName)
-        continue;
-      std::string Namespace = DeriveNameSpace(*YamlBFDemangledName);
-      auto It = NamespaceToBFs.find(Namespace);
+      std::string YamlBFDemangledName = DemangleName(YamlBF.Name);
+      std::string YamlBFNamespace = DeriveNameSpace(YamlBFDemangledName);
+      auto It = NamespaceToBFs.find(YamlBFNamespace);
       if (It == NamespaceToBFs.end())
         continue;
       std::vector<BinaryFunction *> BFs = It->second;
@@ -473,10 +470,11 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
       for (BinaryFunction *BF : BFs) {
         if (ProfiledFunctions.count(BF))
           continue;
-        std::string *BFDemangledName = DemangleName(BF->getOneName().str().c_str());
-        if (!BFDemangledName)
+        if (BF->size() != YamlBF.NumBasicBlocks)
           continue;
-        unsigned BFEditDistance = StringRef(*BFDemangledName).edit_distance(*YamlBFDemangledName);
+        std::string BFDemangledName = BF->getDemangledName();
+        unsigned BFEditDistance =
+            StringRef(BFDemangledName).edit_distance(YamlBFDemangledName);
         if (BFEditDistance < MinEditDistance) {
           MinEditDistance = BFEditDistance;
           ClosestNameBF = BF;
@@ -484,7 +482,7 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
       }
 
       if (ClosestNameBF &&
-        MinEditDistance < opts::NameSimilarityFunctionMatchingThreshold) {
+          MinEditDistance < opts::NameSimilarityFunctionMatchingThreshold) {
         matchProfileToFunction(YamlBF, *ClosestNameBF);
         ++MatchedWithDemangledName;
       }

>From 669afca2beb4b1f70cd3f8aabeaf8e227161751b Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Fri, 21 Jun 2024 14:13:13 -0700
Subject: [PATCH 4/7] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index 82139af2bd34a..0cf12c9765959 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -426,7 +426,6 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
   uint64_t MatchedWithDemangledName = 0;
 
   if (opts::NameSimilarityFunctionMatchingThreshold > 0) {
-    outs() << "starting name similarity matching\n";
     auto DemangleName = [&](std::string &FunctionName) {
       StringRef RestoredName = NameResolver::restore(FunctionName);
       return demangle(RestoredName);

>From 9c021f883374e82012d6aa7228c2d99bd368e7d8 Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Fri, 21 Jun 2024 14:17:22 -0700
Subject: [PATCH 5/7] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index 0cf12c9765959..c0a17a475bae0 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -423,7 +423,7 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
       matchProfileToFunction(YamlBF, *BF);
 
   // Uses name similarity to match functions that were not matched by name.
-  uint64_t MatchedWithDemangledName = 0;
+  uint64_t MatchedWithNameSimilarity = 0;
 
   if (opts::NameSimilarityFunctionMatchingThreshold > 0) {
     auto DemangleName = [&](std::string &FunctionName) {
@@ -490,7 +490,7 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
       if (ClosestNameBF &&
           MinEditDistance < opts::NameSimilarityFunctionMatchingThreshold) {
         matchProfileToFunction(YamlBF, *ClosestNameBF);
-        ++MatchedWithDemangledName;
+        ++MatchedWithNameSimilarity;
       }
     }
   }
@@ -500,6 +500,11 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
       errs() << "BOLT-WARNING: profile ignored for function " << YamlBF.Name
              << '\n';
 
+  if (opts::Verbosity >= 2) {
+    outs() << "BOLT-INFO: matched " << MatchedWithNameSimilarity
+           << " functions with similar names\n";
+  }
+
   // Set for parseFunctionProfile().
   NormalizeByInsnCount = usesEvent("cycles") || usesEvent("instructions");
   NormalizeByCalls = usesEvent("branches");

>From 9fc1899e6f84a6c133e941e69f112717d15eefad Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Fri, 21 Jun 2024 15:11:05 -0700
Subject: [PATCH 6/7] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index c0a17a475bae0..60899ead52f85 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -486,7 +486,6 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
         }
       }
 
-
       if (ClosestNameBF &&
           MinEditDistance < opts::NameSimilarityFunctionMatchingThreshold) {
         matchProfileToFunction(YamlBF, *ClosestNameBF);

>From d687bc035e5f279a8af526381a8af7acb2fc67bf Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Mon, 24 Jun 2024 16:33:14 -0700
Subject: [PATCH 7/7] Added test

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp        |  3 +-
 .../name-similarity-function-matching.test    | 64 +++++++++++++++++++
 2 files changed, 65 insertions(+), 2 deletions(-)
 create mode 100644 bolt/test/X86/name-similarity-function-matching.test

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index 60899ead52f85..cbbb0f96f358b 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -458,7 +458,6 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
         It->second.push_back(BF);
     }
 
-    size_t I = 0; size_t N = YamlBP.Functions.size();
     for (auto YamlBF : YamlBP.Functions) {
       if (YamlBF.Used)
         continue;
@@ -487,7 +486,7 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
       }
 
       if (ClosestNameBF &&
-          MinEditDistance < opts::NameSimilarityFunctionMatchingThreshold) {
+          MinEditDistance <= opts::NameSimilarityFunctionMatchingThreshold) {
         matchProfileToFunction(YamlBF, *ClosestNameBF);
         ++MatchedWithNameSimilarity;
       }
diff --git a/bolt/test/X86/name-similarity-function-matching.test b/bolt/test/X86/name-similarity-function-matching.test
new file mode 100644
index 0000000000000..1480a2165c389
--- /dev/null
+++ b/bolt/test/X86/name-similarity-function-matching.test
@@ -0,0 +1,64 @@
+## Tests function matching in YAMLProfileReader by name similarity.
+
+# REQUIRES: system-linux
+# RUN: split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/main.s -o %t.o
+# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib
+# RUN: llvm-bolt %t.exe -o %t.out --data %t/yaml -v=2 \
+# RUN:   --print-cfg --name-similarity-function-matching-threshold=1 2>&1 --funcs=main --profile-ignore-hash=0 | FileCheck %s
+
+# CHECK: BOLT-INFO: matched 1 functions with similar names
+
+#--- main.s
+.globl main
+.type	main, @function
+main:
+  .cfi_startproc
+.LBB00:
+  pushq   %rbp
+  movq    %rsp, %rbp
+  subq    $16, %rsp
+  testq   %rax, %rax
+  js      .LBB03
+.LBB01:
+  jne     .LBB04
+.LBB02:
+  nop
+.LBB03:
+  xorl    %eax, %eax
+  addq    $16, %rsp
+  popq    %rbp
+  retq
+.LBB04:
+  xorl    %eax, %eax
+  addq    $16, %rsp
+  popq    %rbp
+  retq
+## For relocations against .text
+.LBB05:
+  call exit
+  .cfi_endproc
+  .size	main, .-main
+
+#--- yaml
+---
+header:
+  profile-version: 1
+  binary-name:     'hashing-based-function-matching.s.tmp.exe'
+  binary-build-id: '<unknown>'
+  profile-flags:   [ lbr ]
+  profile-origin:  branch profile reader
+  profile-events:  ''
+  dfs-order:       false
+  hash-func:       xxh3
+functions:
+  - name:            main2
+    fid:             0
+    hash:            0x0000000000000001
+    exec:            1
+    nblocks:         6
+    blocks:
+      - bid:             1
+        insns:           1
+        succ:            [ { bid: 3, cnt: 1} ]
+...



More information about the llvm-branch-commits mailing list