[llvm] [llvm-gsymutil] Add support for merged functions lookup differentiation (PR #122409)

via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 9 18:50:27 PST 2025


https://github.com/alx32 created https://github.com/llvm/llvm-project/pull/122409

This update introduces the ability to filter merged functions during lookups based on regex patterns derived from call site information in a previous call to `llvm-gsymutil`. The regex patterns, extracted from call sites, can then be passed to subsequent calls using the `--merged-functions-filter` option along with `--merged-functions` and `--address` (or `--addresses-from-stdin`). This allows for precise filtering of functions during lookups, giving accurate results for call stacks that contain merged functions. 







>From 7110a4198100405fa204cbadc5f68e3442b3f937 Mon Sep 17 00:00:00 2001
From: Alex Borcan <alexborcan at fb.com>
Date: Thu, 9 Jan 2025 18:47:05 -0800
Subject: [PATCH] [llvm-gsymutil] Add support for merged functions lookup
 differentiation

---
 .../llvm/DebugInfo/GSYM/CallSiteInfo.h        |  9 +++
 .../llvm/DebugInfo/GSYM/LookupResult.h        |  5 ++
 llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp      | 17 ++++++
 llvm/lib/DebugInfo/GSYM/LookupResult.cpp      | 10 ++++
 .../macho-gsym-merged-callsites-dsym.yaml     | 44 ++++++++++++++
 llvm/tools/llvm-gsymutil/Opts.td              |  5 ++
 llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp    | 59 ++++++++++++++++++-
 7 files changed, 147 insertions(+), 2 deletions(-)

diff --git a/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h b/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h
index 55f7322029d0fa..b09364c74db043 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h
@@ -49,6 +49,15 @@ struct CallSiteInfo {
   /// Bitwise OR of CallSiteInfo::Flags values
   uint8_t Flags = CallSiteInfo::Flags::None;
 
+  /// Equality comparison operator for CallSiteInfo.
+  bool operator==(const CallSiteInfo &RHS) const {
+    return ReturnOffset == RHS.ReturnOffset && MatchRegex == RHS.MatchRegex &&
+           Flags == RHS.Flags;
+  }
+
+  /// Inequality comparison operator for CallSiteInfo.
+  bool operator!=(const CallSiteInfo &RHS) const { return !(*this == RHS); }
+
   /// Decode a CallSiteInfo object from a binary data stream.
   ///
   /// \param Data The binary stream to read the data from.
diff --git a/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h b/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h
index 9ccc96fbb4d5c6..c4d8a8cc1795eb 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h
@@ -49,6 +49,9 @@ struct LookupResult {
   /// deepest inline function will appear at index zero in the source locations
   /// array, and the concrete function will appear at the end of the array.
   SourceLocations Locations;
+  ///< Function name regex patterns for call site
+  std::vector<StringRef> CallSiteFuncRegex;
+
   std::string getSourceFile(uint32_t Index) const;
 };
 
@@ -59,6 +62,8 @@ inline bool operator==(const LookupResult &LHS, const LookupResult &RHS) {
     return false;
   if (LHS.FuncName != RHS.FuncName)
     return false;
+  if (LHS.CallSiteFuncRegex != RHS.CallSiteFuncRegex)
+    return false;
   return LHS.Locations == RHS.Locations;
 }
 
diff --git a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
index 785a8da64abe4c..41cf5f926cce75 100644
--- a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
+++ b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
@@ -301,6 +301,23 @@ FunctionInfo::lookup(DataExtractor &Data, const GsymReader &GR,
         InlineInfoData = InfoData;
         break;
 
+      case InfoType::CallSiteInfo:
+        if (auto CSIC = CallSiteInfoCollection::decode(InfoData)) {
+          // Find matching call site based on relative offset
+          for (const auto &CS : CSIC->CallSites) {
+            // Check if the call site matches the lookup address
+            if (CS.ReturnOffset == Addr - FuncAddr) {
+              // Get regex patterns
+              for (uint32_t RegexOffset : CS.MatchRegex) {
+                LR.CallSiteFuncRegex.push_back(GR.getString(RegexOffset));
+              }
+              break;
+            }
+          }
+        } else {
+          return CSIC.takeError();
+        }
+
       default:
         break;
     }
diff --git a/llvm/lib/DebugInfo/GSYM/LookupResult.cpp b/llvm/lib/DebugInfo/GSYM/LookupResult.cpp
index 0ac0be6fda8f6a..e3f13cd7abdb8a 100644
--- a/llvm/lib/DebugInfo/GSYM/LookupResult.cpp
+++ b/llvm/lib/DebugInfo/GSYM/LookupResult.cpp
@@ -68,6 +68,16 @@ raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const LookupResult &LR) {
     if (IsInlined)
       OS << " [inlined]";
   }
+
+  if (!LR.CallSiteFuncRegex.empty()) {
+    OS << "\n      +CallSites:";
+    for (size_t i = 0; i < LR.CallSiteFuncRegex.size(); ++i) {
+      if (i > 0)
+        OS << ",";
+      OS << LR.CallSiteFuncRegex[i];
+    }
+  }
+
   OS << '\n';
   return OS;
 }
diff --git a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml
index 5001ffdeab9e20..dd09fa936d1990 100644
--- a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml
+++ b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml
@@ -42,6 +42,50 @@
 # CHECK-MERGED-CALLSITES-NEXT:   0x[[#%.4x,]] Flags[None] MatchRegex[function3_copy2]
 # CHECK-MERGED-CALLSITES-NEXT:   0x[[#%.4x,]] Flags[None] MatchRegex[function2_copy1]
 
+
+### Check that we can correctly resove merged functions using callstacks:
+### Resolve two callstacks containing merged functions.
+### We use the value obtained from `CallSites:[FILTER]` to pass to the next call to `llvm-gsymutil` via `--merged-functions-filter`.
+### The callstacks resolve differently based on the merged functions filter.
+###     0x00000001000003d0  =>  0x000000010000037c  =>  0x000000010000035c  =>  0x0000000100000340
+###     0x00000001000003e8  =========================>  0x000000010000035c  =>  0x0000000100000340
+
+# RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym --merged-functions --address=0x00000001000003d0 | FileCheck --check-prefix=CHECK-C1 %s
+# CHECK-C1:       0x00000001000003d0: main + 32 @ /tmp/tst/out/merged_funcs_test.cpp:63
+# CHECK-C1-NEXT:      +CallSites:function2_copy2
+
+# RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym --merged-functions --address=0x000000010000037c --merged-functions-filter="function2_copy2" | FileCheck --check-prefix=CHECK-C2 %s
+# CHECK-C2:       0x000000010000037c: function_inlined + 8 @ /tmp/tst/out/merged_funcs_test.cpp:35 [inlined]
+# CHECK-C2-NEXT:                   function2_copy2 + 16 @ /tmp/tst/out/merged_funcs_test.cpp:48
+# CHECK-C2-NEXT:     +CallSites:function3_copy1
+
+# RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym --merged-functions --address=0x000000010000035c --merged-functions-filter="function3_copy1" | FileCheck --check-prefix=CHECK-C3 %s
+# CHECK-C3:       Found 1 function at address 0x000000010000035c:
+# CHECK-C3-NEXT:     0x000000010000035c: function3_copy1 + 16 @ /tmp/tst/out/merged_funcs_test.cpp:28
+# CHECK-C3-NEXT:        +CallSites:function4_copy1
+
+# RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym --merged-functions --address=0x0000000100000340 --merged-functions-filter="function4_copy1" | FileCheck --check-prefix=CHECK-C4 %s
+# CHECK-C4:       Found 1 function at address 0x0000000100000340:
+# CHECK-C4-NEXT:     0x0000000100000340: function4_copy1 + 8 @ /tmp/tst/out/merged_funcs_test.cpp:14
+
+### ----------------------------------------------------------------------------------------------------------------------------------
+### Resolve the 2nd call stack - the 2nd and 3rd addresses are the same but they resolve to a different function because of the filter
+
+# RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym --merged-functions --address=0x00000001000003e8 | FileCheck --check-prefix=CHECK-C5 %s
+# CHECK-C5:       Found 1 function at address 0x00000001000003e8:
+# CHECK-C5-NEXT:     0x00000001000003e8: main + 56 @ /tmp/tst/out/merged_funcs_test.cpp:64
+# CHECK-C5-NEXT:        +CallSites:function3_copy2
+
+# RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym --merged-functions --address=0x000000010000035c --merged-functions-filter="function3_copy2" | FileCheck --check-prefix=CHECK-C6 %s
+# CHECK-C6:       Found 1 function at address 0x000000010000035c:
+# CHECK-C6-NEXT:     0x000000010000035c: function3_copy2 + 16 @ /tmp/tst/out/merged_funcs_test.cpp:28
+# CHECK-C6-NEXT:        +CallSites:function4_copy2
+
+# RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym --merged-functions --address=0x0000000100000340 --merged-functions-filter="function4_copy2" | FileCheck --check-prefix=CHECK-C7 %s
+# CHECK-C7:       Found 1 function at address 0x0000000100000340:
+# CHECK-C7-NEXT:     0x0000000100000340: function4_copy2 + 8 @ /tmp/tst/out/merged_funcs_test.cpp:14
+
+
 #--- merged_funcs_test.cpp
 #define ATTRIB extern "C" __attribute__((noinline))
 volatile int global_result = 0;
diff --git a/llvm/tools/llvm-gsymutil/Opts.td b/llvm/tools/llvm-gsymutil/Opts.td
index 89cd3ce6fc4138..15bc064ba6f2cc 100644
--- a/llvm/tools/llvm-gsymutil/Opts.td
+++ b/llvm/tools/llvm-gsymutil/Opts.td
@@ -46,3 +46,8 @@ def addresses_from_stdin :
 defm json_summary_file :
   Eq<"json-summary-file",
      "Output a categorized summary of errors into the JSON file specified.">;
+defm merged_functions_filter : 
+  Eq<"merged-functions-filter", 
+     "When used with --address/--addresses-from-stdin and --merged-functions,\n"
+     "filters the merged functions output to only show functions matching any of the specified regex patterns.\n"
+     "Can be specified multiple times.">;
diff --git a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
index 654da68bb69600..84934976be2c89 100644
--- a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
+++ b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
@@ -101,6 +101,7 @@ static bool LookupAddressesFromStdin;
 static bool UseMergedFunctions = false;
 static bool LoadDwarfCallSites = false;
 static std::string CallSiteYamlPath;
+static std::vector<std::string> MergedFunctionsFilters;
 
 static void parseArgs(int argc, char **argv) {
   GSYMUtilOptTable Tbl;
@@ -194,6 +195,24 @@ static void parseArgs(int argc, char **argv) {
   }
 
   LoadDwarfCallSites = Args.hasArg(OPT_dwarf_callsites);
+
+  for (const llvm::opt::Arg *A :
+       Args.filtered(OPT_merged_functions_filter_EQ)) {
+    MergedFunctionsFilters.push_back(A->getValue());
+    // Validate the filter is only used with correct flags
+    if (LookupAddresses.empty() && !LookupAddressesFromStdin) {
+      llvm::errs() << ToolName
+                   << ": --merged-functions-filter can only be used with "
+                      "--address/--addresses-from-stdin\n";
+      std::exit(1);
+    }
+    if (!UseMergedFunctions) {
+      llvm::errs()
+          << ToolName
+          << ": --merged-functions-filter requires --merged-functions\n";
+      std::exit(1);
+    }
+  }
 }
 
 /// @}
@@ -510,9 +529,43 @@ static llvm::Error convertFileToGSYM(OutputAggregator &Out) {
 static void doLookup(GsymReader &Gsym, uint64_t Addr, raw_ostream &OS) {
   if (UseMergedFunctions) {
     if (auto Results = Gsym.lookupAll(Addr)) {
-      OS << "Found " << Results->size() << " functions at address "
-         << HEX64(Addr) << ":\n";
+      // If we have filters, count matching results first
+      size_t NumMatching = Results->size();
+      if (!MergedFunctionsFilters.empty()) {
+        NumMatching = 0;
+        for (const auto &Result : *Results) {
+          bool Matches = false;
+          for (const auto &Filter : MergedFunctionsFilters) {
+            Regex Pattern(Filter);
+            if (Pattern.match(Result.FuncName)) {
+              Matches = true;
+              break;
+            }
+          }
+          if (Matches)
+            NumMatching++;
+        }
+      }
+
+      OS << "Found " << NumMatching << " function"
+         << (NumMatching != 1 ? "s" : "") << " at address " << HEX64(Addr)
+         << ":\n";
+
       for (size_t i = 0; i < Results->size(); ++i) {
+        // Skip if doesn't match any filter
+        if (!MergedFunctionsFilters.empty()) {
+          bool Matches = false;
+          for (const auto &Filter : MergedFunctionsFilters) {
+            Regex Pattern(Filter);
+            if (Pattern.match(Results->at(i).FuncName)) {
+              Matches = true;
+              break;
+            }
+          }
+          if (!Matches)
+            continue;
+        }
+
         OS << "   " << Results->at(i);
 
         if (i != Results->size() - 1)
@@ -529,6 +582,8 @@ static void doLookup(GsymReader &Gsym, uint64_t Addr, raw_ostream &OS) {
           OS << "\nLookupResult for " << HEX64(Addr) << ":\n";
         }
       }
+      // Don't print call site info if --merged-functions is not specified.
+      Result->CallSiteFuncRegex.clear();
       OS << Result.get();
     } else {
       if (Verbose)



More information about the llvm-commits mailing list