[llvm] Added feature in llvm-profdata merge to filter functions from the profile (PR #78378)

William Junda Huang via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 23 01:42:19 PST 2024


https://github.com/huangjd updated https://github.com/llvm/llvm-project/pull/78378

>From c5224825387c9b5e7b2c2bb38ccc56ac19fdb1f6 Mon Sep 17 00:00:00 2001
From: William Huang <williamjhuang at google.com>
Date: Tue, 16 Jan 2024 20:49:21 -0500
Subject: [PATCH 1/3] Added feature in llvm-profdata merge to filter functions
 from the profile

--function=<regex> Include functions matching regex in the output
--no-function=<regex> Exclude functions matching regex from the output

If both are specified, --no-function has a higher precedence if a
function name matches both regexes
---
 llvm/include/llvm/ProfileData/SampleProf.h    |  2 +
 .../tools/llvm-profdata/merge-filter.test     | 62 +++++++++++++++++++
 llvm/tools/llvm-profdata/llvm-profdata.cpp    | 56 ++++++++++++++++-
 3 files changed, 117 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/tools/llvm-profdata/merge-filter.test

diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index 66aaf602d0e1d9..8ac84d4b933f20 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -1330,6 +1330,8 @@ class SampleProfileMap
   }
 
   size_t erase(const key_type &Key) { return base_type::erase(Key); }
+
+  iterator erase(iterator It) { return base_type::erase(It); }
 };
 
 using NameFunctionSamples = std::pair<hash_code, const FunctionSamples *>;
diff --git a/llvm/test/tools/llvm-profdata/merge-filter.test b/llvm/test/tools/llvm-profdata/merge-filter.test
new file mode 100644
index 00000000000000..8b24cbd6d54110
--- /dev/null
+++ b/llvm/test/tools/llvm-profdata/merge-filter.test
@@ -0,0 +1,62 @@
+Test llvm-profdata merge with function filters.
+
+RUN: llvm-profdata merge --sample %p/Inputs/sample-profile.proftext --text --function="_Z3.*" | FileCheck %s --check-prefix=CHECK-FILTER1
+RUN: llvm-profdata merge --sample %p/Inputs/sample-profile.proftext --text --no-function="main" | FileCheck %s --check-prefix=CHECK-FILTER1
+CHECK-FILTER1: _Z3bari:20301:1437
+CHECK-NEXT:  1: 1437
+CHECK-NEXT: _Z3fooi:7711:610
+CHECK-NEXT:  1: 610
+CHECK-NOT: main
+
+RUN: llvm-profdata merge --sample %p/Inputs/sample-profile.proftext --text --function="_Z3.*" --no-function="fooi$" | FileCheck %s --check-prefix=CHECK-FILTER2
+CHECK-FILTER2: _Z3bari:20301:1437
+CHECK-NEXT:  1: 1437
+CHECK-NOT: main
+CHECK-NOT: _Z3fooi
+
+RUN: llvm-profdata merge --instr %p/Inputs/basic.proftext --text --function="foo" | FileCheck %s --check-prefix=CHECK-FILTER3
+RUN: llvm-profdata merge --instr %p/Inputs/basic.proftext --text --no-function="main" | FileCheck %s --check-prefix=CHECK-FILTER3
+CHECK-FILTER3: foo
+CHECK-NEXT: # Func Hash:
+CHECK-NEXT: 10
+CHECK-NEXT: # Num Counters:
+CHECK-NEXT: 2
+CHECK-NEXT: # Counter Values:
+CHECK-NEXT: 499500
+CHECK-NEXT: 179900
+CHECK-NEXT: 
+CHECK-NEXT: foo2
+CHECK-NEXT: # Func Hash:
+CHECK-NEXT: 10
+CHECK-NEXT: # Num Counters:
+CHECK-NEXT: 2
+CHECK-NEXT: # Counter Values:
+CHECK-NEXT: 500500
+CHECK-NEXT: 180100
+
+RUN: llvm-profdata merge --instr %p/Inputs/basic.proftext --text --function="foo" --no-function="^foo$" | FileCheck %s --check-prefix=CHECK-FILTER4
+CHECK-FILTER4: foo2
+CHECK-NEXT: # Func Hash:
+CHECK-NEXT: 10
+CHECK-NEXT: # Num Counters:
+CHECK-NEXT: 2
+CHECK-NEXT: # Counter Values:
+CHECK-NEXT: 500500
+CHECK-NEXT: 180100
+
+RUN: llvm-profdata merge --sample %p/Inputs/cs-sample.proftext --text --function="main.*@.*_Z5funcBi" | FileCheck %s --check-prefix=CHECK-FILTER5
+CHECK-FILTER5: [main:3.1 @ _Z5funcBi:1 @ _Z8funcLeafi]:500853:20
+CHECK-NEXT:  0: 15
+CHECK-NEXT:  1: 15
+CHECK-NEXT:  3: 74946
+CHECK-NEXT:  4: 74941 _Z3fibi:82359
+CHECK-NEXT:  10: 23324
+CHECK-NEXT:  11: 23327 _Z3fibi:25228
+CHECK-NEXT:  15: 11
+CHECK-NEXT:  !Attributes: 1
+CHECK-NEXT: [main:3.1 @ _Z5funcBi]:120:19
+CHECK-NEXT:  0: 19
+CHECK-NEXT:  1: 19 _Z8funcLeafi:20
+CHECK-NEXT:  3: 12
+CHECK-NEXT:  !Attributes: 1
+
diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index 12b81d411cfa91..4c7bf90adb9226 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -35,6 +35,7 @@
 #include "llvm/Support/MD5.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/Regex.h"
 #include "llvm/Support/ThreadPool.h"
 #include "llvm/Support/Threading.h"
 #include "llvm/Support/VirtualFileSystem.h"
@@ -132,9 +133,11 @@ cl::opt<std::string>
                    cl::sub(MergeSubcommand));
 cl::opt<std::string> FuncNameFilter(
     "function",
-    cl::desc("Details for matching functions. For overlapping CSSPGO, this "
-             "takes a function name with calling context."),
-    cl::sub(ShowSubcommand), cl::sub(OverlapSubcommand));
+    cl::desc("Only functions matching the filter are shown in the output. For "
+             "overlapping CSSPGO, this takes a function name with calling "
+             "context."),
+    cl::sub(ShowSubcommand), cl::sub(OverlapSubcommand),
+    cl::sub(MergeSubcommand));
 
 // TODO: Consider creating a template class (e.g., MergeOption, ShowOption) to
 // factor out the common cl::sub in cl::opt constructor for subcommand-specific
@@ -244,6 +247,10 @@ cl::opt<uint64_t> TemporalProfMaxTraceLength(
     cl::sub(MergeSubcommand),
     cl::desc("The maximum length of a single temporal profile trace "
              "(default: 10000)"));
+cl::opt<std::string> FuncNameNegativeFilter(
+    "no-function", cl::init(""),
+    cl::sub(MergeSubcommand),
+    cl::desc("Exclude functions matching the filter from the output."));
 
 cl::opt<FailureMode>
     FailMode("failure-mode", cl::init(failIfAnyAreInvalid),
@@ -760,6 +767,45 @@ static void mergeWriterContexts(WriterContext *Dst, WriterContext *Src) {
   });
 }
 
+static StringRef
+getFuncName(const StringMap<InstrProfWriter::ProfilingData>::value_type &Val) {
+  return Val.first();
+}
+
+static std::string
+getFuncName(const SampleProfileMap::value_type &Val) {
+  return Val.second.getContext().toString();
+}
+
+template <typename T>
+static void filterFunctions(T &ProfileMap) {
+  bool hasFilter = !FuncNameFilter.empty();
+  bool hasNegativeFilter = !FuncNameNegativeFilter.empty();
+  if (hasFilter || hasNegativeFilter) {
+    size_t Count = ProfileMap.size();
+
+    llvm::Regex Pattern(FuncNameFilter);
+    llvm::Regex NegativePattern(FuncNameNegativeFilter);
+    std::string Error;
+    if (hasFilter && !Pattern.isValid(Error))
+      exitWithError(Error);
+    if (hasNegativeFilter && !NegativePattern.isValid(Error))
+      exitWithError(Error);
+
+    for (auto I = ProfileMap.begin(); I != ProfileMap.end();) {
+      auto Tmp = I++;
+      const auto &FuncName = getFuncName(*Tmp);
+      // Negative filter has higher precedence than positive filter.
+      if ((hasNegativeFilter && NegativePattern.match(FuncName)) ||
+          (hasFilter && !Pattern.match(FuncName)))
+        ProfileMap.erase(Tmp);
+    }
+
+    llvm::dbgs() << Count - ProfileMap.size() << " of " << Count << " functions"
+                 << " in the original profile are filtered.\n";
+  }
+}
+
 static void writeInstrProfile(StringRef OutputFilename,
                               ProfileFormat OutputFormat,
                               InstrProfWriter &Writer) {
@@ -879,6 +925,8 @@ static void mergeInstrProfile(const WeightedFileVector &Inputs,
       (NumErrors > 0 && FailMode == failIfAnyAreInvalid))
     exitWithError("no profile can be merged");
 
+  filterFunctions(Contexts[0]->Writer.getProfileData());
+
   writeInstrProfile(OutputFilename, OutputFormat, Contexts[0]->Writer);
 }
 
@@ -1459,6 +1507,8 @@ static void mergeSampleProfile(const WeightedFileVector &Inputs,
     ProfileIsCS = FunctionSamples::ProfileIsCS = false;
   }
 
+  filterFunctions(ProfileMap);
+
   auto WriterOrErr =
       SampleProfileWriter::create(OutputFilename, FormatMap[OutputFormat]);
   if (std::error_code EC = WriterOrErr.getError())

>From 6fa2d95b1b74ef0f9f2f0e32eddc1f258d28fecf Mon Sep 17 00:00:00 2001
From: William Huang <williamjhuang at google.com>
Date: Wed, 17 Jan 2024 19:15:26 -0500
Subject: [PATCH 2/3] Coding style cleanup Added documentation

---
 llvm/docs/CommandGuide/llvm-profdata.rst   | 10 +++++
 llvm/tools/llvm-profdata/llvm-profdata.cpp | 47 +++++++++++-----------
 2 files changed, 33 insertions(+), 24 deletions(-)

diff --git a/llvm/docs/CommandGuide/llvm-profdata.rst b/llvm/docs/CommandGuide/llvm-profdata.rst
index f5e3c13ffbc8e6..acf016a6dbcd70 100644
--- a/llvm/docs/CommandGuide/llvm-profdata.rst
+++ b/llvm/docs/CommandGuide/llvm-profdata.rst
@@ -217,6 +217,16 @@ OPTIONS
  The maximum number of functions in a single temporal profile trace. Longer
  traces will be truncated. The default value is 1000.
 
+.. option:: --function=<string>
+
+ Only keep functions matching the regex in the output, all others are erased
+ from the profile.
+
+.. option:: --no-function=<string>
+
+ Remove functions matching the regex from the profile. If both --function and
+ --no-function are specified and a function matches both, it is removed.
+
 EXAMPLES
 ^^^^^^^^
 Basic Usage
diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index 4c7bf90adb9226..99b80830069dee 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -777,33 +777,32 @@ getFuncName(const SampleProfileMap::value_type &Val) {
   return Val.second.getContext().toString();
 }
 
-template <typename T>
-static void filterFunctions(T &ProfileMap) {
+template <typename T> static void filterFunctions(T &ProfileMap) {
   bool hasFilter = !FuncNameFilter.empty();
   bool hasNegativeFilter = !FuncNameNegativeFilter.empty();
-  if (hasFilter || hasNegativeFilter) {
-    size_t Count = ProfileMap.size();
-
-    llvm::Regex Pattern(FuncNameFilter);
-    llvm::Regex NegativePattern(FuncNameNegativeFilter);
-    std::string Error;
-    if (hasFilter && !Pattern.isValid(Error))
-      exitWithError(Error);
-    if (hasNegativeFilter && !NegativePattern.isValid(Error))
-      exitWithError(Error);
-
-    for (auto I = ProfileMap.begin(); I != ProfileMap.end();) {
-      auto Tmp = I++;
-      const auto &FuncName = getFuncName(*Tmp);
-      // Negative filter has higher precedence than positive filter.
-      if ((hasNegativeFilter && NegativePattern.match(FuncName)) ||
-          (hasFilter && !Pattern.match(FuncName)))
-        ProfileMap.erase(Tmp);
-    }
+  if (!hasFilter && !hasNegativeFilter)
+    return;
 
-    llvm::dbgs() << Count - ProfileMap.size() << " of " << Count << " functions"
-                 << " in the original profile are filtered.\n";
-  }
+  size_t Count = ProfileMap.size();
+  llvm::Regex Pattern(FuncNameFilter);
+  llvm::Regex NegativePattern(FuncNameNegativeFilter);
+  std::string Error;
+  if (hasFilter && !Pattern.isValid(Error))
+    exitWithError(Error);
+  if (hasNegativeFilter && !NegativePattern.isValid(Error))
+    exitWithError(Error);
+
+  for (auto I = ProfileMap.begin(); I != ProfileMap.end();) {
+    auto Tmp = I++;
+    const auto &FuncName = getFuncName(*Tmp);
+    // Negative filter has higher precedence than positive filter.
+    if ((hasNegativeFilter && NegativePattern.match(FuncName)) ||
+        (hasFilter && !Pattern.match(FuncName)))
+      ProfileMap.erase(Tmp);
+  }
+
+  llvm::dbgs() << Count - ProfileMap.size() << " of " << Count << " functions "
+               << "in the original profile are filtered.\n";
 }
 
 static void writeInstrProfile(StringRef OutputFilename,

>From 979e27ab6f4e23522c40a8bdbf5ccda48b2a3067 Mon Sep 17 00:00:00 2001
From: William Huang <williamjhuang at google.com>
Date: Tue, 23 Jan 2024 04:41:54 -0500
Subject: [PATCH 3/3] Add filter support for MD5 profile

---
 .../tools/llvm-profdata/merge-filter.test     |  7 ++++++
 llvm/tools/llvm-profdata/llvm-profdata.cpp    | 24 ++++++++++++++++---
 2 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/llvm/test/tools/llvm-profdata/merge-filter.test b/llvm/test/tools/llvm-profdata/merge-filter.test
index 8b24cbd6d54110..5c47c6a75a7c40 100644
--- a/llvm/test/tools/llvm-profdata/merge-filter.test
+++ b/llvm/test/tools/llvm-profdata/merge-filter.test
@@ -14,6 +14,13 @@ CHECK-NEXT:  1: 1437
 CHECK-NOT: main
 CHECK-NOT: _Z3fooi
 
+RUN: llvm-profdata merge --sample --extbinary --use-md5 -output=%t.0.profdata %p/Inputs/sample-profile.proftext
+RUN: llvm-profdata merge --sample %t.0.profdata --text --function="_Z3fooi" | FileCheck %s --check-prefix=CHECK-FILTER-MD5
+CHECK-FILTER-MD5: 1228452328526475178:7711:610
+CHECK-NEXT:  1: 610
+CHECK-NOT: 15822663052811949562
+CHECK-NOT: 3727899762981752933
+
 RUN: llvm-profdata merge --instr %p/Inputs/basic.proftext --text --function="foo" | FileCheck %s --check-prefix=CHECK-FILTER3
 RUN: llvm-profdata merge --instr %p/Inputs/basic.proftext --text --no-function="main" | FileCheck %s --check-prefix=CHECK-FILTER3
 CHECK-FILTER3: foo
diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index 99b80830069dee..d00ce49701463e 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -777,12 +777,22 @@ getFuncName(const SampleProfileMap::value_type &Val) {
   return Val.second.getContext().toString();
 }
 
-template <typename T> static void filterFunctions(T &ProfileMap) {
+template <typename T>
+static void filterFunctions(T &ProfileMap) {
   bool hasFilter = !FuncNameFilter.empty();
   bool hasNegativeFilter = !FuncNameNegativeFilter.empty();
   if (!hasFilter && !hasNegativeFilter)
     return;
 
+  // If filter starts with '?' it is MSVC mangled name, not a regex.
+  llvm::Regex ProbablyMSVCMangledName("[?@$_0-9A-Za-z]+");
+  if (hasFilter && FuncNameFilter[0] == '?' &&
+      ProbablyMSVCMangledName.match(FuncNameFilter))
+    FuncNameFilter = llvm::Regex::escape(FuncNameFilter);
+  if (hasNegativeFilter && FuncNameNegativeFilter[0] == '?' &&
+      ProbablyMSVCMangledName.match(FuncNameNegativeFilter))
+    FuncNameNegativeFilter = llvm::Regex::escape(FuncNameNegativeFilter);
+
   size_t Count = ProfileMap.size();
   llvm::Regex Pattern(FuncNameFilter);
   llvm::Regex NegativePattern(FuncNameNegativeFilter);
@@ -792,12 +802,20 @@ template <typename T> static void filterFunctions(T &ProfileMap) {
   if (hasNegativeFilter && !NegativePattern.isValid(Error))
     exitWithError(Error);
 
+  // Handle MD5 profile, so it is still able to match using the original name.
+  std::string MD5Name = std::to_string(llvm::MD5Hash(FuncNameFilter));
+  std::string NegativeMD5Name =
+      std::to_string(llvm::MD5Hash(FuncNameNegativeFilter));
+
   for (auto I = ProfileMap.begin(); I != ProfileMap.end();) {
     auto Tmp = I++;
     const auto &FuncName = getFuncName(*Tmp);
     // Negative filter has higher precedence than positive filter.
-    if ((hasNegativeFilter && NegativePattern.match(FuncName)) ||
-        (hasFilter && !Pattern.match(FuncName)))
+    if ((hasNegativeFilter &&
+         (NegativePattern.match(FuncName) ||
+          (FunctionSamples::UseMD5 && NegativeMD5Name == FuncName))) ||
+        (hasFilter && !(Pattern.match(FuncName) ||
+                        (FunctionSamples::UseMD5 && MD5Name == FuncName))))
       ProfileMap.erase(Tmp);
   }
 



More information about the llvm-commits mailing list