[llvm] [llvm-gsymutil] Add support for merged functions lookup differentiation (PR #122409)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 9 18:51:30 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-debuginfo
Author: None (alx32)
<details>
<summary>Changes</summary>
This update introduces the ability to filter merged functions during lookups based on regex patterns derived from call site information in a previous call to `llvm-gsymutil`. The regex patterns, extracted from call sites, can then be passed to subsequent calls using the `--merged-functions-filter` option along with `--merged-functions` and `--address` (or `--addresses-from-stdin`). This allows for precise filtering of functions during lookups, giving accurate results for call stacks that contain merged functions.
---
Full diff: https://github.com/llvm/llvm-project/pull/122409.diff
7 Files Affected:
- (modified) llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h (+9)
- (modified) llvm/include/llvm/DebugInfo/GSYM/LookupResult.h (+5)
- (modified) llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp (+17)
- (modified) llvm/lib/DebugInfo/GSYM/LookupResult.cpp (+10)
- (modified) llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml (+44)
- (modified) llvm/tools/llvm-gsymutil/Opts.td (+5)
- (modified) llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp (+57-2)
``````````diff
diff --git a/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h b/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h
index 55f7322029d0fa..b09364c74db043 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h
@@ -49,6 +49,15 @@ struct CallSiteInfo {
/// Bitwise OR of CallSiteInfo::Flags values
uint8_t Flags = CallSiteInfo::Flags::None;
+ /// Equality comparison operator for CallSiteInfo.
+ bool operator==(const CallSiteInfo &RHS) const {
+ return ReturnOffset == RHS.ReturnOffset && MatchRegex == RHS.MatchRegex &&
+ Flags == RHS.Flags;
+ }
+
+ /// Inequality comparison operator for CallSiteInfo.
+ bool operator!=(const CallSiteInfo &RHS) const { return !(*this == RHS); }
+
/// Decode a CallSiteInfo object from a binary data stream.
///
/// \param Data The binary stream to read the data from.
diff --git a/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h b/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h
index 9ccc96fbb4d5c6..c4d8a8cc1795eb 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h
@@ -49,6 +49,9 @@ struct LookupResult {
/// deepest inline function will appear at index zero in the source locations
/// array, and the concrete function will appear at the end of the array.
SourceLocations Locations;
+ ///< Function name regex patterns for call site
+ std::vector<StringRef> CallSiteFuncRegex;
+
std::string getSourceFile(uint32_t Index) const;
};
@@ -59,6 +62,8 @@ inline bool operator==(const LookupResult &LHS, const LookupResult &RHS) {
return false;
if (LHS.FuncName != RHS.FuncName)
return false;
+ if (LHS.CallSiteFuncRegex != RHS.CallSiteFuncRegex)
+ return false;
return LHS.Locations == RHS.Locations;
}
diff --git a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
index 785a8da64abe4c..41cf5f926cce75 100644
--- a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
+++ b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
@@ -301,6 +301,23 @@ FunctionInfo::lookup(DataExtractor &Data, const GsymReader &GR,
InlineInfoData = InfoData;
break;
+ case InfoType::CallSiteInfo:
+ if (auto CSIC = CallSiteInfoCollection::decode(InfoData)) {
+ // Find matching call site based on relative offset
+ for (const auto &CS : CSIC->CallSites) {
+ // Check if the call site matches the lookup address
+ if (CS.ReturnOffset == Addr - FuncAddr) {
+ // Get regex patterns
+ for (uint32_t RegexOffset : CS.MatchRegex) {
+ LR.CallSiteFuncRegex.push_back(GR.getString(RegexOffset));
+ }
+ break;
+ }
+ }
+ } else {
+ return CSIC.takeError();
+ }
+
default:
break;
}
diff --git a/llvm/lib/DebugInfo/GSYM/LookupResult.cpp b/llvm/lib/DebugInfo/GSYM/LookupResult.cpp
index 0ac0be6fda8f6a..e3f13cd7abdb8a 100644
--- a/llvm/lib/DebugInfo/GSYM/LookupResult.cpp
+++ b/llvm/lib/DebugInfo/GSYM/LookupResult.cpp
@@ -68,6 +68,16 @@ raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const LookupResult &LR) {
if (IsInlined)
OS << " [inlined]";
}
+
+ if (!LR.CallSiteFuncRegex.empty()) {
+ OS << "\n +CallSites:";
+ for (size_t i = 0; i < LR.CallSiteFuncRegex.size(); ++i) {
+ if (i > 0)
+ OS << ",";
+ OS << LR.CallSiteFuncRegex[i];
+ }
+ }
+
OS << '\n';
return OS;
}
diff --git a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml
index 5001ffdeab9e20..dd09fa936d1990 100644
--- a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml
+++ b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml
@@ -42,6 +42,50 @@
# CHECK-MERGED-CALLSITES-NEXT: 0x[[#%.4x,]] Flags[None] MatchRegex[function3_copy2]
# CHECK-MERGED-CALLSITES-NEXT: 0x[[#%.4x,]] Flags[None] MatchRegex[function2_copy1]
+
+### Check that we can correctly resove merged functions using callstacks:
+### Resolve two callstacks containing merged functions.
+### We use the value obtained from `CallSites:[FILTER]` to pass to the next call to `llvm-gsymutil` via `--merged-functions-filter`.
+### The callstacks resolve differently based on the merged functions filter.
+### 0x00000001000003d0 => 0x000000010000037c => 0x000000010000035c => 0x0000000100000340
+### 0x00000001000003e8 =========================> 0x000000010000035c => 0x0000000100000340
+
+# RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym --merged-functions --address=0x00000001000003d0 | FileCheck --check-prefix=CHECK-C1 %s
+# CHECK-C1: 0x00000001000003d0: main + 32 @ /tmp/tst/out/merged_funcs_test.cpp:63
+# CHECK-C1-NEXT: +CallSites:function2_copy2
+
+# RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym --merged-functions --address=0x000000010000037c --merged-functions-filter="function2_copy2" | FileCheck --check-prefix=CHECK-C2 %s
+# CHECK-C2: 0x000000010000037c: function_inlined + 8 @ /tmp/tst/out/merged_funcs_test.cpp:35 [inlined]
+# CHECK-C2-NEXT: function2_copy2 + 16 @ /tmp/tst/out/merged_funcs_test.cpp:48
+# CHECK-C2-NEXT: +CallSites:function3_copy1
+
+# RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym --merged-functions --address=0x000000010000035c --merged-functions-filter="function3_copy1" | FileCheck --check-prefix=CHECK-C3 %s
+# CHECK-C3: Found 1 function at address 0x000000010000035c:
+# CHECK-C3-NEXT: 0x000000010000035c: function3_copy1 + 16 @ /tmp/tst/out/merged_funcs_test.cpp:28
+# CHECK-C3-NEXT: +CallSites:function4_copy1
+
+# RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym --merged-functions --address=0x0000000100000340 --merged-functions-filter="function4_copy1" | FileCheck --check-prefix=CHECK-C4 %s
+# CHECK-C4: Found 1 function at address 0x0000000100000340:
+# CHECK-C4-NEXT: 0x0000000100000340: function4_copy1 + 8 @ /tmp/tst/out/merged_funcs_test.cpp:14
+
+### ----------------------------------------------------------------------------------------------------------------------------------
+### Resolve the 2nd call stack - the 2nd and 3rd addresses are the same but they resolve to a different function because of the filter
+
+# RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym --merged-functions --address=0x00000001000003e8 | FileCheck --check-prefix=CHECK-C5 %s
+# CHECK-C5: Found 1 function at address 0x00000001000003e8:
+# CHECK-C5-NEXT: 0x00000001000003e8: main + 56 @ /tmp/tst/out/merged_funcs_test.cpp:64
+# CHECK-C5-NEXT: +CallSites:function3_copy2
+
+# RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym --merged-functions --address=0x000000010000035c --merged-functions-filter="function3_copy2" | FileCheck --check-prefix=CHECK-C6 %s
+# CHECK-C6: Found 1 function at address 0x000000010000035c:
+# CHECK-C6-NEXT: 0x000000010000035c: function3_copy2 + 16 @ /tmp/tst/out/merged_funcs_test.cpp:28
+# CHECK-C6-NEXT: +CallSites:function4_copy2
+
+# RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym --merged-functions --address=0x0000000100000340 --merged-functions-filter="function4_copy2" | FileCheck --check-prefix=CHECK-C7 %s
+# CHECK-C7: Found 1 function at address 0x0000000100000340:
+# CHECK-C7-NEXT: 0x0000000100000340: function4_copy2 + 8 @ /tmp/tst/out/merged_funcs_test.cpp:14
+
+
#--- merged_funcs_test.cpp
#define ATTRIB extern "C" __attribute__((noinline))
volatile int global_result = 0;
diff --git a/llvm/tools/llvm-gsymutil/Opts.td b/llvm/tools/llvm-gsymutil/Opts.td
index 89cd3ce6fc4138..15bc064ba6f2cc 100644
--- a/llvm/tools/llvm-gsymutil/Opts.td
+++ b/llvm/tools/llvm-gsymutil/Opts.td
@@ -46,3 +46,8 @@ def addresses_from_stdin :
defm json_summary_file :
Eq<"json-summary-file",
"Output a categorized summary of errors into the JSON file specified.">;
+defm merged_functions_filter :
+ Eq<"merged-functions-filter",
+ "When used with --address/--addresses-from-stdin and --merged-functions,\n"
+ "filters the merged functions output to only show functions matching any of the specified regex patterns.\n"
+ "Can be specified multiple times.">;
diff --git a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
index 654da68bb69600..84934976be2c89 100644
--- a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
+++ b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
@@ -101,6 +101,7 @@ static bool LookupAddressesFromStdin;
static bool UseMergedFunctions = false;
static bool LoadDwarfCallSites = false;
static std::string CallSiteYamlPath;
+static std::vector<std::string> MergedFunctionsFilters;
static void parseArgs(int argc, char **argv) {
GSYMUtilOptTable Tbl;
@@ -194,6 +195,24 @@ static void parseArgs(int argc, char **argv) {
}
LoadDwarfCallSites = Args.hasArg(OPT_dwarf_callsites);
+
+ for (const llvm::opt::Arg *A :
+ Args.filtered(OPT_merged_functions_filter_EQ)) {
+ MergedFunctionsFilters.push_back(A->getValue());
+ // Validate the filter is only used with correct flags
+ if (LookupAddresses.empty() && !LookupAddressesFromStdin) {
+ llvm::errs() << ToolName
+ << ": --merged-functions-filter can only be used with "
+ "--address/--addresses-from-stdin\n";
+ std::exit(1);
+ }
+ if (!UseMergedFunctions) {
+ llvm::errs()
+ << ToolName
+ << ": --merged-functions-filter requires --merged-functions\n";
+ std::exit(1);
+ }
+ }
}
/// @}
@@ -510,9 +529,43 @@ static llvm::Error convertFileToGSYM(OutputAggregator &Out) {
static void doLookup(GsymReader &Gsym, uint64_t Addr, raw_ostream &OS) {
if (UseMergedFunctions) {
if (auto Results = Gsym.lookupAll(Addr)) {
- OS << "Found " << Results->size() << " functions at address "
- << HEX64(Addr) << ":\n";
+ // If we have filters, count matching results first
+ size_t NumMatching = Results->size();
+ if (!MergedFunctionsFilters.empty()) {
+ NumMatching = 0;
+ for (const auto &Result : *Results) {
+ bool Matches = false;
+ for (const auto &Filter : MergedFunctionsFilters) {
+ Regex Pattern(Filter);
+ if (Pattern.match(Result.FuncName)) {
+ Matches = true;
+ break;
+ }
+ }
+ if (Matches)
+ NumMatching++;
+ }
+ }
+
+ OS << "Found " << NumMatching << " function"
+ << (NumMatching != 1 ? "s" : "") << " at address " << HEX64(Addr)
+ << ":\n";
+
for (size_t i = 0; i < Results->size(); ++i) {
+ // Skip if doesn't match any filter
+ if (!MergedFunctionsFilters.empty()) {
+ bool Matches = false;
+ for (const auto &Filter : MergedFunctionsFilters) {
+ Regex Pattern(Filter);
+ if (Pattern.match(Results->at(i).FuncName)) {
+ Matches = true;
+ break;
+ }
+ }
+ if (!Matches)
+ continue;
+ }
+
OS << " " << Results->at(i);
if (i != Results->size() - 1)
@@ -529,6 +582,8 @@ static void doLookup(GsymReader &Gsym, uint64_t Addr, raw_ostream &OS) {
OS << "\nLookupResult for " << HEX64(Addr) << ":\n";
}
}
+ // Don't print call site info if --merged-functions is not specified.
+ Result->CallSiteFuncRegex.clear();
OS << Result.get();
} else {
if (Verbose)
``````````
</details>
https://github.com/llvm/llvm-project/pull/122409
More information about the llvm-commits
mailing list