[llvm] [GSYM] Add support for querying merged functions in llvm-gsymutil (PR #120991)

via llvm-commits llvm-commits at lists.llvm.org
Mon Dec 23 10:53:50 PST 2024


https://github.com/alx32 created https://github.com/llvm/llvm-project/pull/120991

Adds the ability to lookup and display all merged functions for an address in llvm-gsymutil.

Now, when `--merged-functions` is used in combination with `--address/--addresses-from-stdin`, lookup results will contain information about merged functions, if available.

To support printing merged function information when using the `--verbose` option, the `LookupResult` data structure also had to be extended with pointers to the raw function data and raw merged function data. This is because merged functions share the same address range, so it's not easy to look up the raw merged function data for a particular `LookupResult` that is based on a merged function.

>From 4bf853e74422fb541a8cc3c276213a3e162c0646 Mon Sep 17 00:00:00 2001
From: Alex B <alexborcan at meta.com>
Date: Sun, 22 Dec 2024 21:27:29 -0800
Subject: [PATCH] [GSYM] Add support for querying merged functions in
 llvm-gsymutil

Adds the ability to lookup and display all merged functions for an address in
llvm-gsymutil.

Now, when `--merged-functions` is used in combination with `--address/--addresses-from-stdin`, lookup results will contain information about merged functions, if available.

To support printing merged function information when using the `--verbose` option, the `LookupResult` data structure also had to be extended with pointers to the raw function data and raw merged function data. This is because merged functions share the same address range, so it's not easy to look up the raw merged function data for a particular `LookupResult` that is based on a merged function.
---
 llvm/include/llvm/DebugInfo/GSYM/GsymReader.h | 11 ++++
 .../llvm/DebugInfo/GSYM/LookupResult.h        | 12 +++++
 .../llvm/DebugInfo/GSYM/MergedFunctionsInfo.h | 12 +++++
 llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp      |  6 +++
 llvm/lib/DebugInfo/GSYM/GsymReader.cpp        | 35 +++++++++++++
 .../DebugInfo/GSYM/MergedFunctionsInfo.cpp    | 51 ++++++++++++++++---
 .../ARM_AArch64/macho-merged-funcs-dwarf.yaml | 10 ++++
 llvm/tools/llvm-gsymutil/Opts.td              |  5 +-
 llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp    | 44 ++++++++++++++--
 9 files changed, 174 insertions(+), 12 deletions(-)

diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
index 3d532588a70234..9f7a827f42be5e 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
@@ -132,6 +132,17 @@ class GsymReader {
   /// for failing to lookup the address.
   llvm::Expected<LookupResult> lookup(uint64_t Addr) const;
 
+  /// Lookup all merged functions for a given address.
+  ///
+  /// This function performs a lookup for the specified address and then
+  /// retrieves additional LookupResults from any merged functions associated
+  /// with the primary LookupResult.
+  ///
+  /// \param Addr The address to lookup.
+  /// \returns A vector of LookupResult objects, where the first element is the
+  /// primary result, followed by results for any merged functions
+  llvm::Expected<std::vector<LookupResult>> lookupAll(uint64_t Addr) const;
+
   /// Get a string from the string table.
   ///
   /// \param Offset The string table offset for the string to retrieve.
diff --git a/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h b/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h
index 9ccc96fbb4d5c6..fbfdf7db2dee94 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h
@@ -11,6 +11,7 @@
 
 #include "llvm/ADT/AddressRanges.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/DataExtractor.h"
 #include <inttypes.h>
 #include <vector>
 
@@ -50,6 +51,17 @@ struct LookupResult {
   /// array, and the concrete function will appear at the end of the array.
   SourceLocations Locations;
   std::string getSourceFile(uint32_t Index) const;
+
+  /// Optional DataExtractor containing the merged functions data.
+  /// This is only populated during lookups if merged function information
+  /// was present. This is an optimization to avoid parsing the
+  /// MergedFunctionsInfo data unless needed.
+  std::optional<DataExtractor> MergedFunctionsData;
+
+  /// The binary data used to decode the FunctionInfo from which this
+  /// LookupResult was created. This can be used to re-decode the entire
+  /// FunctionInfo if desired.
+  std::optional<DataExtractor> FunctionInfoData;
 };
 
 inline bool operator==(const LookupResult &LHS, const LookupResult &RHS) {
diff --git a/llvm/include/llvm/DebugInfo/GSYM/MergedFunctionsInfo.h b/llvm/include/llvm/DebugInfo/GSYM/MergedFunctionsInfo.h
index b68f9b6098d9e6..203fb13cada102 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/MergedFunctionsInfo.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/MergedFunctionsInfo.h
@@ -31,6 +31,18 @@ struct MergedFunctionsInfo {
   /// \returns A boolean indicating if this FunctionInfo is valid.
   bool isValid() { return !MergedFunctions.empty(); }
 
+  /// Get a vector of DataExtractor objects for the functions in this
+  /// MergedFunctionsInfo object.
+  ///
+  /// \param Data The binary stream to read the data from. This object must have
+  /// the data for the MergedFunctionsInfo object starting at offset zero. The
+  /// data can contain more data than needed.
+  ///
+  /// \returns An llvm::Expected containing a vector of DataExtractor objects on
+  /// success, or an error object if parsing fails.
+  static llvm::Expected<std::vector<DataExtractor>>
+  getFuncsDataExtractors(DataExtractor &Data);
+
   /// Decode an MergedFunctionsInfo object from a binary data stream.
   ///
   /// \param Data The binary stream to read the data from. This object must have
diff --git a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
index dd754c701f6240..2addf3c98e3545 100644
--- a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
+++ b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
@@ -241,6 +241,7 @@ llvm::Expected<LookupResult> FunctionInfo::lookup(DataExtractor &Data,
                                                   uint64_t Addr) {
   LookupResult LR;
   LR.LookupAddr = Addr;
+  LR.FunctionInfoData = Data;
   uint64_t Offset = 0;
   LR.FuncRange = {FuncAddr, FuncAddr + Data.getU32(&Offset)};
   uint32_t NameOffset = Data.getU32(&Offset);
@@ -289,6 +290,11 @@ llvm::Expected<LookupResult> FunctionInfo::lookup(DataExtractor &Data,
           return ExpectedLE.takeError();
         break;
 
+      case InfoType::MergedFunctionsInfo:
+        // Store the merged functions data for later parsing, if needed.
+        LR.MergedFunctionsData = InfoData;
+        break;
+
       case InfoType::InlineInfo:
         // We will parse the inline info after our line table, but only if
         // we have a line entry.
diff --git a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
index fa5476db191ec4..c0d66cb5e8faeb 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
@@ -342,6 +342,41 @@ llvm::Expected<LookupResult> GsymReader::lookup(uint64_t Addr) const {
     return ExpectedData.takeError();
 }
 
+llvm::Expected<std::vector<LookupResult>>
+GsymReader::lookupAll(uint64_t Addr) const {
+  std::vector<LookupResult> Results;
+
+  // First perform a lookup to get the primary function info result
+  auto MainResult = lookup(Addr);
+  if (!MainResult)
+    return MainResult.takeError();
+
+  // Add the main result as the first entry
+  Results.push_back(std::move(*MainResult));
+
+  // Now process any merged functions data that was found during the lookup
+  if (MainResult->MergedFunctionsData) {
+    // Get data extractors for each merged function
+    auto ExpectedMergedFuncExtractors =
+        MergedFunctionsInfo::getFuncsDataExtractors(
+            *MainResult->MergedFunctionsData);
+    if (!ExpectedMergedFuncExtractors)
+      return ExpectedMergedFuncExtractors.takeError();
+
+    // Process each merged function data
+    for (DataExtractor &MergedData : *ExpectedMergedFuncExtractors) {
+      if (auto FI = FunctionInfo::lookup(MergedData, *this,
+                                         MainResult->FuncRange.start(), Addr)) {
+        Results.push_back(std::move(*FI));
+      } else {
+        return FI.takeError();
+      }
+    }
+  }
+
+  return Results;
+}
+
 void GsymReader::dump(raw_ostream &OS) {
   const auto &Header = getHeader();
   // Dump the GSYM header.
diff --git a/llvm/lib/DebugInfo/GSYM/MergedFunctionsInfo.cpp b/llvm/lib/DebugInfo/GSYM/MergedFunctionsInfo.cpp
index 4efae2262271db..7988c1248a4bde 100644
--- a/llvm/lib/DebugInfo/GSYM/MergedFunctionsInfo.cpp
+++ b/llvm/lib/DebugInfo/GSYM/MergedFunctionsInfo.cpp
@@ -32,22 +32,59 @@ llvm::Error MergedFunctionsInfo::encode(FileWriter &Out) const {
   return Error::success();
 }
 
-llvm::Expected<MergedFunctionsInfo>
-MergedFunctionsInfo::decode(DataExtractor &Data, uint64_t BaseAddr) {
-  MergedFunctionsInfo MFI;
+llvm::Expected<std::vector<DataExtractor>>
+MergedFunctionsInfo::getFuncsDataExtractors(DataExtractor &Data) {
+  std::vector<DataExtractor> Results;
   uint64_t Offset = 0;
+
+  // Ensure there is enough data to read the function count.
+  if (!Data.isValidOffsetForDataOfSize(Offset, 4))
+    return createStringError(
+        std::errc::io_error,
+        "unable to read the function count at offset 0x%8.8" PRIx64, Offset);
+
   uint32_t Count = Data.getU32(&Offset);
 
   for (uint32_t i = 0; i < Count; ++i) {
+    // Ensure there is enough data to read the function size.
+    if (!Data.isValidOffsetForDataOfSize(Offset, 4))
+      return createStringError(
+          std::errc::io_error,
+          "unable to read size of function %u at offset 0x%8.8" PRIx64, i,
+          Offset);
+
     uint32_t FnSize = Data.getU32(&Offset);
-    DataExtractor FnData(Data.getData().substr(Offset, FnSize),
+
+    // Ensure there is enough data for the function content.
+    if (!Data.isValidOffsetForDataOfSize(Offset, FnSize))
+      return createStringError(
+          std::errc::io_error,
+          "function data is truncated for function %u at offset 0x%8.8" PRIx64
+          ", expected size %u",
+          i, Offset, FnSize);
+
+    // Extract the function data.
+    Results.emplace_back(Data.getData().substr(Offset, FnSize),
                          Data.isLittleEndian(), Data.getAddressSize());
-    llvm::Expected<FunctionInfo> FI =
-        FunctionInfo::decode(FnData, BaseAddr + Offset);
+
+    Offset += FnSize;
+  }
+  return Results;
+}
+
+llvm::Expected<MergedFunctionsInfo>
+MergedFunctionsInfo::decode(DataExtractor &Data, uint64_t BaseAddr) {
+  MergedFunctionsInfo MFI;
+  auto FuncExtractorsOrError = MFI.getFuncsDataExtractors(Data);
+
+  if (!FuncExtractorsOrError)
+    return FuncExtractorsOrError.takeError();
+
+  for (DataExtractor &FuncData : *FuncExtractorsOrError) {
+    llvm::Expected<FunctionInfo> FI = FunctionInfo::decode(FuncData, BaseAddr);
     if (!FI)
       return FI.takeError();
     MFI.MergedFunctions.push_back(std::move(*FI));
-    Offset += FnSize;
   }
 
   return MFI;
diff --git a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-merged-funcs-dwarf.yaml b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-merged-funcs-dwarf.yaml
index 94a162c5f2120d..2d8c4a63261e7d 100644
--- a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-merged-funcs-dwarf.yaml
+++ b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-merged-funcs-dwarf.yaml
@@ -64,6 +64,16 @@
 # CHECK-GSYM-KEEP-NEXT:       0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:10
 # CHECK-GSYM-KEEP-NEXT:       0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:6
 
+## Test the lookup functionality for merged functions:
+# RUN: llvm-gsymutil --verify %t.keep.gSYM --address 0x248 --merged-functions | FileCheck --check-prefix=CHECK-MERGED-LOOKUP %s
+# RUN: llvm-gsymutil --verify %t.keep.gSYM --address 0x248 | FileCheck --check-prefix=CHECK-NORMAL-LOOKUP %s
+ 
+# CHECK-MERGED-LOOKUP: Found 3 functions at address 0x0000000000000248:
+# CHECK-MERGED-LOOKUP:    0x0000000000000248: my_func_01 @ /tmp/test_gsym_yaml/out/file_01.cpp:5
+# CHECK-MERGED-LOOKUP:    0x0000000000000248: my_func_02 @ /tmp/test_gsym_yaml/out/file_02.cpp:5
+# CHECK-MERGED-LOOKUP:    0x0000000000000248: my_func_03 @ /tmp/test_gsym_yaml/out/file_03.cpp:5
+ 
+# CHECK-NORMAL-LOOKUP: 0x0000000000000248: my_func_01 @ /tmp/test_gsym_yaml/out/file_01.cpp:5
 
 
 --- !mach-o
diff --git a/llvm/tools/llvm-gsymutil/Opts.td b/llvm/tools/llvm-gsymutil/Opts.td
index d61b418d2d8439..89cd3ce6fc4138 100644
--- a/llvm/tools/llvm-gsymutil/Opts.td
+++ b/llvm/tools/llvm-gsymutil/Opts.td
@@ -17,7 +17,10 @@ defm convert :
   Eq<"convert",
      "Convert the specified file to the GSYM format.\nSupported files include ELF and mach-o files that will have their debug info (DWARF) and symbol table converted">;
 def merged_functions :
-  FF<"merged-functions", "Encode merged function information for functions in debug info that have matching address ranges.\nWithout this option one function per unique address range will be emitted.">;
+  FF<"merged-functions", "When used with --convert, encodes merged function information for functions in debug info that have matching address ranges.\n"
+                         "Without this option one function per unique address range will be emitted.\n"
+                         "When used with --address/--addresses-from-stdin, all merged functions for a particular address will be displayed.\n"
+                         "Without this option only one function will be displayed.">;
 def dwarf_callsites : FF<"dwarf-callsites", "Load call site info from DWARF, if available">;
 defm callsites_yaml_file :
   Eq<"callsites-yaml-file", "Load call site info from YAML file. Useful for testing.">, Flags<[HelpHidden]>;
diff --git a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
index aed4ae7c615fd1..ffec7689b61f92 100644
--- a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
+++ b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
@@ -98,7 +98,7 @@ static uint64_t SegmentSize;
 static bool Quiet;
 static std::vector<uint64_t> LookupAddresses;
 static bool LookupAddressesFromStdin;
-static bool StoreMergedFunctionInfo = false;
+static bool UseMergedFunctions = false;
 static bool LoadDwarfCallSites = false;
 static std::string CallSiteYamlPath;
 
@@ -181,7 +181,7 @@ static void parseArgs(int argc, char **argv) {
   }
 
   LookupAddressesFromStdin = Args.hasArg(OPT_addresses_from_stdin);
-  StoreMergedFunctionInfo = Args.hasArg(OPT_merged_functions);
+  UseMergedFunctions = Args.hasArg(OPT_merged_functions);
 
   if (Args.hasArg(OPT_callsites_yaml_file_EQ)) {
     CallSiteYamlPath = Args.getLastArgValue(OPT_callsites_yaml_file_EQ);
@@ -380,7 +380,7 @@ static llvm::Error handleObjectFile(ObjectFile &Obj, const std::string &OutFile,
   // functions in the first FunctionInfo with that address range. Do this right
   // after loading the DWARF data so we don't have to deal with functions from
   // the symbol table.
-  if (StoreMergedFunctionInfo)
+  if (UseMergedFunctions)
     Gsym.prepareMergedFunctions(Out);
 
   // Get the UUID and convert symbol table to GSYM.
@@ -507,9 +507,45 @@ static llvm::Error convertFileToGSYM(OutputAggregator &Out) {
   return Error::success();
 }
 
+static void doLookupMergedFunctions(GsymReader &Gsym, uint64_t Addr,
+                                    raw_ostream &OS) {
+  if (auto Results = Gsym.lookupAll(Addr)) {
+    OS << "Found " << Results->size() << " functions at address " << HEX64(Addr)
+       << ":\n";
+    for (size_t i = 0; i < Results->size(); ++i) {
+      if (Verbose) {
+        if (auto FI = FunctionInfo::decode(*Results->at(i).FunctionInfoData,
+                                           Results->at(i).FuncRange.start())) {
+          OS << "FunctionInfo for " << HEX64(Addr) << ":\n";
+          Gsym.dump(OS, *FI);
+          OS << "\nLookupResults for " << HEX64(Addr) << ":\n";
+        }
+      }
+
+      // Print the primary function lookup result
+      OS << "   " << Results->at(i);
+
+      if (i != Results->size() - 1)
+        OS << "\n";
+    }
+  } else {
+    if (Verbose)
+      OS << "\nLookupResult for " << HEX64(Addr) << ":\n";
+    OS << HEX64(Addr) << ": ";
+    logAllUnhandledErrors(Results.takeError(), OS, "error: ");
+  }
+  if (Verbose)
+    OS << "\n";
+}
+
 static void doLookup(GsymReader &Gsym, uint64_t Addr, raw_ostream &OS) {
+  if (UseMergedFunctions) {
+    doLookupMergedFunctions(Gsym, Addr, OS);
+    return;
+  }
+
   if (auto Result = Gsym.lookup(Addr)) {
-    // If verbose is enabled dump the full function info for the address.
+    // If verbose is enabled, dump the full function info for the address.
     if (Verbose) {
       if (auto FI = Gsym.getFunctionInfo(Addr)) {
         OS << "FunctionInfo for " << HEX64(Addr) << ":\n";



More information about the llvm-commits mailing list