[llvm] [Sample Profile] Expand functionality of llvm-profdata function filter (PR #101615)

David Li via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 12 13:51:29 PDT 2024


================
@@ -824,59 +829,108 @@ static void mergeWriterContexts(WriterContext *Dst, WriterContext *Src) {
   });
 }
 
-static StringRef
-getFuncName(const StringMap<InstrProfWriter::ProfilingData>::value_type &Val) {
-  return Val.first();
-}
+// Limitation: Wildcard may cause unexpected regex match, for example,
+// "foo.*bar:1 @ baz" may match "foo:1 @ bar:1 @ baz". The user should specify
+// regex pattern in a way not to match strings that are not valid mangled names.
+static void filterFunctions(SampleProfileMap &Profiles,
+                            std::string FilterString, bool Inverse) {
+  // Checking all call targets is very slow, only do this if FilterString can
+  // ever match a call target.
+  bool MatchCallTargets = (FilterString.find(" @@ ") != std::string::npos);
+
+  // Search inlined callsites recursively is extremely slow, only do this if
+  // FilterString has more than one part delimited by " @ " (except for CSSPGO
+  // top level function, we will check for that later).
+  bool SearchInlinedCallsites = (FilterString.find(" @ ") != std::string::npos);
+
+  uint64_t MD5 = 0;
+
+  // If Pattern is quoted string, treat it as escaped regex, otherwise treat it
+  // as literal match.
+  if (FilterString[0] == '\"') {
+    if (FilterString.size() < 2 || FilterString.back() != '\"')
+      exitWithError("missing terminating '\"' character");
+    FilterString = FilterString.substr(1, FilterString.length() - 2);
+
+    // If pattern is "\[.*\]", it is CSSPGO top level function.
+    if (FilterString[0] == '\\' && FilterString[1] == '[' &&
+        FilterString[FilterString.size() - 2] == '\\' &&
+        FilterString[FilterString.size() - 1] == ']')
+      SearchInlinedCallsites = false;
+  } else {
+    // If pattern is "[.*]", it is CSSPGO top level function.
+    if (FilterString[0] == '[' && FilterString[FilterString.size() - 1] == ']')
+      SearchInlinedCallsites = false;
+
+    // Handle MD5 profile as well if possible. Obviously it only makes sense if
+    // FilterString only matches top level function and is plain text only.
+    if (!SearchInlinedCallsites &&
+        !std::all_of(FilterString.begin(), FilterString.end(), ::isdigit)) {
+      std::list<SampleContextFrameVector> CSNameTable;
+      MD5 = SampleContext(FilterString, CSNameTable).getHashCode();
+    }
+
+    // Mangled name can contain `?` (MSVC), `.` (LLVM suffix), or `[]` (CSSPGO).
+    FilterString = "^" + llvm::Regex::escape(FilterString) + "$";
+  }
+
+  llvm::Regex Re(FilterString);
+  if (std::string Error; !Re.isValid(Error))
+    exitWithError(Error);
+
+  for (auto FS = Profiles.begin(); FS != Profiles.end();) {
+    std::string CanonicalName = FS->second.getContext().toString();
+    if (FS->second.getContext().hasContext())
+      CanonicalName = "[" + CanonicalName + "]";
+    if ((Re.match(CanonicalName) ||
+         (FunctionSamples::UseMD5 &&
+          FS->second.getContext().getHashCode() == MD5)) != Inverse) {
+      FS = Profiles.erase(FS);
+      continue;
+    }
+    // Perform expensive recursive search if the user specifies such pattern.
+    if (MatchCallTargets || SearchInlinedCallsites)
+      FS->second.removeCallTargetsAndCallsites(Re, CanonicalName, Inverse);
+    FS++;
+  }
+}
+
+static void filterFunctions(StringMap<InstrProfWriter::ProfilingData> &Profiles,
+                            std::string FilterString, bool Inverse) {
+  // If Pattern is quoted string, treat it as escaped regex, otherwise treat it
+  // as literal match.
+  if (FilterString[0] == '\"') {
+    if (FilterString.size() < 2 || FilterString.back() != '\"')
+      exitWithError("missing terminating '\"' character");
+    FilterString = FilterString.substr(1, FilterString.length() - 2);
+  } else
+    FilterString = "^" + llvm::Regex::escape(FilterString) + "$";
+
+  llvm::Regex Re(FilterString);
+  if (std::string Error; !Re.isValid(Error))
+    exitWithError(Error);
 
-static std::string
-getFuncName(const SampleProfileMap::value_type &Val) {
-  return Val.second.getContext().toString();
+  for (auto ProfileIt = Profiles.begin(); ProfileIt != Profiles.end();) {
+    auto Tmp = ProfileIt++;
+    if (Re.match(Tmp->first()) != Inverse)
+      Profiles.erase(Tmp);
+  }
 }
 
-template <typename T>
-static void filterFunctions(T &ProfileMap) {
+template <typename T> static void filterFunctions(T &Profiles) {
   bool hasFilter = !FuncNameFilter.empty();
   bool hasNegativeFilter = !FuncNameNegativeFilter.empty();
   if (!hasFilter && !hasNegativeFilter)
     return;
 
-  // If filter starts with '?' it is MSVC mangled name, not a regex.
-  llvm::Regex ProbablyMSVCMangledName("[?@$_0-9A-Za-z]+");
-  if (hasFilter && FuncNameFilter[0] == '?' &&
-      ProbablyMSVCMangledName.match(FuncNameFilter))
-    FuncNameFilter = llvm::Regex::escape(FuncNameFilter);
-  if (hasNegativeFilter && FuncNameNegativeFilter[0] == '?' &&
-      ProbablyMSVCMangledName.match(FuncNameNegativeFilter))
-    FuncNameNegativeFilter = llvm::Regex::escape(FuncNameNegativeFilter);
-
-  size_t Count = ProfileMap.size();
-  llvm::Regex Pattern(FuncNameFilter);
-  llvm::Regex NegativePattern(FuncNameNegativeFilter);
-  std::string Error;
-  if (hasFilter && !Pattern.isValid(Error))
-    exitWithError(Error);
-  if (hasNegativeFilter && !NegativePattern.isValid(Error))
-    exitWithError(Error);
+  size_t Count = Profiles.size();
 
-  // Handle MD5 profile, so it is still able to match using the original name.
-  std::string MD5Name = std::to_string(llvm::MD5Hash(FuncNameFilter));
-  std::string NegativeMD5Name =
-      std::to_string(llvm::MD5Hash(FuncNameNegativeFilter));
-
-  for (auto I = ProfileMap.begin(); I != ProfileMap.end();) {
-    auto Tmp = I++;
-    const auto &FuncName = getFuncName(*Tmp);
-    // Negative filter has higher precedence than positive filter.
-    if ((hasNegativeFilter &&
-         (NegativePattern.match(FuncName) ||
-          (FunctionSamples::UseMD5 && NegativeMD5Name == FuncName))) ||
-        (hasFilter && !(Pattern.match(FuncName) ||
-                        (FunctionSamples::UseMD5 && MD5Name == FuncName))))
-      ProfileMap.erase(Tmp);
-  }
+  if (!FuncNameFilter.empty())
----------------
david-xl wrote:

Is it possible to do a refactoring change first to minimize diffs?

https://github.com/llvm/llvm-project/pull/101615


More information about the llvm-commits mailing list