[llvm] 6f28b4b - [GSYM] Add support for querying merged functions in llvm-gsymutil (#120991)

via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 6 11:55:31 PST 2025


Author: alx32
Date: 2025-01-06T11:55:27-08:00
New Revision: 6f28b4b5e960e1c4eeebad18b48e667df1e806a8

URL: https://github.com/llvm/llvm-project/commit/6f28b4b5e960e1c4eeebad18b48e667df1e806a8
DIFF: https://github.com/llvm/llvm-project/commit/6f28b4b5e960e1c4eeebad18b48e667df1e806a8.diff

LOG: [GSYM] Add support for querying merged functions in llvm-gsymutil (#120991)

Adds the ability to lookup and display all merged functions for an
address in llvm-gsymutil.

Now, when `--merged-functions` is used in combination with
`--address/--addresses-from-stdin`, lookup results will contain
information about merged functions, if available.

To support printing merged function information when using the
`--verbose` option, the `LookupResult` data structure also had to be
extended with pointers to the raw function data and raw merged function
data. This is because merged functions share the same address range, so
it's not easy to look up the raw merged function data for a particular
`LookupResult` that is based on a merged function.

Added: 
    

Modified: 
    llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h
    llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
    llvm/include/llvm/DebugInfo/GSYM/MergedFunctionsInfo.h
    llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
    llvm/lib/DebugInfo/GSYM/GsymReader.cpp
    llvm/lib/DebugInfo/GSYM/MergedFunctionsInfo.cpp
    llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-merged-funcs-dwarf.yaml
    llvm/tools/llvm-gsymutil/Opts.td
    llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h b/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h
index fd4ac3164c686d..187642257cc522 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h
@@ -187,13 +187,17 @@ struct FunctionInfo {
   ///
   /// \param Addr The address to lookup.
   ///
+  /// \param MergedFuncsData A pointer to an optional DataExtractor that, if
+  /// non-null, will be set to the raw data of the MergedFunctionInfo, if
+  /// present.
+  ///
   /// \returns An LookupResult or an error describing the issue that was
   /// encountered during decoding. An error should only be returned if the
   /// address is not contained in the FunctionInfo or if the data is corrupted.
-  static llvm::Expected<LookupResult> lookup(DataExtractor &Data,
-                                             const GsymReader &GR,
-                                             uint64_t FuncAddr,
-                                             uint64_t Addr);
+  static llvm::Expected<LookupResult>
+  lookup(DataExtractor &Data, const GsymReader &GR, uint64_t FuncAddr,
+         uint64_t Addr,
+         std::optional<DataExtractor> *MergedFuncsData = nullptr);
 
   uint64_t startAddress() const { return Range.start(); }
   uint64_t endAddress() const { return Range.end(); }

diff  --git a/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
index 3d532588a70234..ee7929ae850fd0 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
@@ -127,10 +127,29 @@ class GsymReader {
   /// is much faster for lookups.
   ///
   /// \param Addr A virtual address from the orignal object file to lookup.
+  ///
+  /// \param MergedFuncsData A pointer to an optional DataExtractor that, if
+  /// non-null, will be set to the raw data of the MergedFunctionInfo, if
+  /// present.
+  ///
   /// \returns An expected LookupResult that contains only the information
   /// needed for the current address, or an error object that indicates reason
   /// for failing to lookup the address.
-  llvm::Expected<LookupResult> lookup(uint64_t Addr) const;
+  llvm::Expected<LookupResult>
+  lookup(uint64_t Addr,
+         std::optional<DataExtractor> *MergedFuncsData = nullptr) const;
+
+  /// Lookup all merged functions for a given address.
+  ///
+  /// This function performs a lookup for the specified address and then
+  /// retrieves additional LookupResults from any merged functions associated
+  /// with the primary LookupResult.
+  ///
+  /// \param Addr The address to lookup.
+  ///
+  /// \returns A vector of LookupResult objects, where the first element is the
+  /// primary result, followed by results for any merged functions
+  llvm::Expected<std::vector<LookupResult>> lookupAll(uint64_t Addr) const;
 
   /// Get a string from the string table.
   ///

diff  --git a/llvm/include/llvm/DebugInfo/GSYM/MergedFunctionsInfo.h b/llvm/include/llvm/DebugInfo/GSYM/MergedFunctionsInfo.h
index b68f9b6098d9e6..203fb13cada102 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/MergedFunctionsInfo.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/MergedFunctionsInfo.h
@@ -31,6 +31,18 @@ struct MergedFunctionsInfo {
   /// \returns A boolean indicating if this FunctionInfo is valid.
   bool isValid() { return !MergedFunctions.empty(); }
 
+  /// Get a vector of DataExtractor objects for the functions in this
+  /// MergedFunctionsInfo object.
+  ///
+  /// \param Data The binary stream to read the data from. This object must have
+  /// the data for the MergedFunctionsInfo object starting at offset zero. The
+  /// data can contain more data than needed.
+  ///
+  /// \returns An llvm::Expected containing a vector of DataExtractor objects on
+  /// success, or an error object if parsing fails.
+  static llvm::Expected<std::vector<DataExtractor>>
+  getFuncsDataExtractors(DataExtractor &Data);
+
   /// Decode an MergedFunctionsInfo object from a binary data stream.
   ///
   /// \param Data The binary stream to read the data from. This object must have

diff  --git a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
index dd754c701f6240..785a8da64abe4c 100644
--- a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
+++ b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
@@ -235,10 +235,10 @@ llvm::Expected<uint64_t> FunctionInfo::encode(FileWriter &Out,
   return FuncInfoOffset;
 }
 
-llvm::Expected<LookupResult> FunctionInfo::lookup(DataExtractor &Data,
-                                                  const GsymReader &GR,
-                                                  uint64_t FuncAddr,
-                                                  uint64_t Addr) {
+llvm::Expected<LookupResult>
+FunctionInfo::lookup(DataExtractor &Data, const GsymReader &GR,
+                     uint64_t FuncAddr, uint64_t Addr,
+                     std::optional<DataExtractor> *MergedFuncsData) {
   LookupResult LR;
   LR.LookupAddr = Addr;
   uint64_t Offset = 0;
@@ -289,6 +289,12 @@ llvm::Expected<LookupResult> FunctionInfo::lookup(DataExtractor &Data,
           return ExpectedLE.takeError();
         break;
 
+      case InfoType::MergedFunctionsInfo:
+        // Store the merged functions data for later parsing, if needed.
+        if (MergedFuncsData)
+          *MergedFuncsData = InfoData;
+        break;
+
       case InfoType::InlineInfo:
         // We will parse the inline info after our line table, but only if
         // we have a line entry.

diff  --git a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
index fa5476db191ec4..0a5bb7caaee8c9 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
@@ -334,14 +334,52 @@ GsymReader::getFunctionInfoAtIndex(uint64_t Idx) const {
     return ExpectedData.takeError();
 }
 
-llvm::Expected<LookupResult> GsymReader::lookup(uint64_t Addr) const {
+llvm::Expected<LookupResult>
+GsymReader::lookup(uint64_t Addr,
+                   std::optional<DataExtractor> *MergedFunctionsData) const {
   uint64_t FuncStartAddr = 0;
   if (auto ExpectedData = getFunctionInfoDataForAddress(Addr, FuncStartAddr))
-    return FunctionInfo::lookup(*ExpectedData, *this, FuncStartAddr, Addr);
+    return FunctionInfo::lookup(*ExpectedData, *this, FuncStartAddr, Addr,
+                                MergedFunctionsData);
   else
     return ExpectedData.takeError();
 }
 
+llvm::Expected<std::vector<LookupResult>>
+GsymReader::lookupAll(uint64_t Addr) const {
+  std::vector<LookupResult> Results;
+  std::optional<DataExtractor> MergedFunctionsData;
+
+  // First perform a lookup to get the primary function info result.
+  auto MainResult = lookup(Addr, &MergedFunctionsData);
+  if (!MainResult)
+    return MainResult.takeError();
+
+  // Add the main result as the first entry.
+  Results.push_back(std::move(*MainResult));
+
+  // Now process any merged functions data that was found during the lookup.
+  if (MergedFunctionsData) {
+    // Get data extractors for each merged function.
+    auto ExpectedMergedFuncExtractors =
+        MergedFunctionsInfo::getFuncsDataExtractors(*MergedFunctionsData);
+    if (!ExpectedMergedFuncExtractors)
+      return ExpectedMergedFuncExtractors.takeError();
+
+    // Process each merged function data.
+    for (DataExtractor &MergedData : *ExpectedMergedFuncExtractors) {
+      if (auto FI = FunctionInfo::lookup(MergedData, *this,
+                                         MainResult->FuncRange.start(), Addr)) {
+        Results.push_back(std::move(*FI));
+      } else {
+        return FI.takeError();
+      }
+    }
+  }
+
+  return Results;
+}
+
 void GsymReader::dump(raw_ostream &OS) {
   const auto &Header = getHeader();
   // Dump the GSYM header.

diff  --git a/llvm/lib/DebugInfo/GSYM/MergedFunctionsInfo.cpp b/llvm/lib/DebugInfo/GSYM/MergedFunctionsInfo.cpp
index 4efae2262271db..d2c28f38799d3e 100644
--- a/llvm/lib/DebugInfo/GSYM/MergedFunctionsInfo.cpp
+++ b/llvm/lib/DebugInfo/GSYM/MergedFunctionsInfo.cpp
@@ -35,22 +35,59 @@ llvm::Error MergedFunctionsInfo::encode(FileWriter &Out) const {
 llvm::Expected<MergedFunctionsInfo>
 MergedFunctionsInfo::decode(DataExtractor &Data, uint64_t BaseAddr) {
   MergedFunctionsInfo MFI;
+  auto FuncExtractorsOrError = MFI.getFuncsDataExtractors(Data);
+
+  if (!FuncExtractorsOrError)
+    return FuncExtractorsOrError.takeError();
+
+  for (DataExtractor &FuncData : *FuncExtractorsOrError) {
+    llvm::Expected<FunctionInfo> FI = FunctionInfo::decode(FuncData, BaseAddr);
+    if (!FI)
+      return FI.takeError();
+    MFI.MergedFunctions.push_back(std::move(*FI));
+  }
+
+  return MFI;
+}
+
+llvm::Expected<std::vector<DataExtractor>>
+MergedFunctionsInfo::getFuncsDataExtractors(DataExtractor &Data) {
+  std::vector<DataExtractor> Results;
   uint64_t Offset = 0;
+
+  // Ensure there is enough data to read the function count.
+  if (!Data.isValidOffsetForDataOfSize(Offset, 4))
+    return createStringError(
+        std::errc::io_error,
+        "unable to read the function count at offset 0x%8.8" PRIx64, Offset);
+
   uint32_t Count = Data.getU32(&Offset);
 
   for (uint32_t i = 0; i < Count; ++i) {
+    // Ensure there is enough data to read the function size.
+    if (!Data.isValidOffsetForDataOfSize(Offset, 4))
+      return createStringError(
+          std::errc::io_error,
+          "unable to read size of function %u at offset 0x%8.8" PRIx64, i,
+          Offset);
+
     uint32_t FnSize = Data.getU32(&Offset);
-    DataExtractor FnData(Data.getData().substr(Offset, FnSize),
+
+    // Ensure there is enough data for the function content.
+    if (!Data.isValidOffsetForDataOfSize(Offset, FnSize))
+      return createStringError(
+          std::errc::io_error,
+          "function data is truncated for function %u at offset 0x%8.8" PRIx64
+          ", expected size %u",
+          i, Offset, FnSize);
+
+    // Extract the function data.
+    Results.emplace_back(Data.getData().substr(Offset, FnSize),
                          Data.isLittleEndian(), Data.getAddressSize());
-    llvm::Expected<FunctionInfo> FI =
-        FunctionInfo::decode(FnData, BaseAddr + Offset);
-    if (!FI)
-      return FI.takeError();
-    MFI.MergedFunctions.push_back(std::move(*FI));
+
     Offset += FnSize;
   }
-
-  return MFI;
+  return Results;
 }
 
 bool operator==(const MergedFunctionsInfo &LHS,

diff  --git a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-merged-funcs-dwarf.yaml b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-merged-funcs-dwarf.yaml
index 94a162c5f2120d..bcd3d7847da459 100644
--- a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-merged-funcs-dwarf.yaml
+++ b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-merged-funcs-dwarf.yaml
@@ -64,6 +64,16 @@
 # CHECK-GSYM-KEEP-NEXT:       0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:10
 # CHECK-GSYM-KEEP-NEXT:       0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml{{[/\\]}}out/file_0{{[1-3]}}.cpp:6
 
+## Test the lookup functionality for merged functions:
+# RUN: llvm-gsymutil --verify %t.keep.gSYM --address 0x248 --merged-functions | FileCheck --check-prefix=CHECK-MERGED-LOOKUP %s
+# RUN: llvm-gsymutil --verify %t.keep.gSYM --address 0x248 | FileCheck --check-prefix=CHECK-NORMAL-LOOKUP %s
+ 
+# CHECK-MERGED-LOOKUP: Found 3 functions at address 0x0000000000000248:
+# CHECK-MERGED-LOOKUP-NEXT:       0x0000000000000248: my_func_02 @ /tmp/test_gsym_yaml/out/file_02.cpp:5
+# CHECK-MERGED-LOOKUP-NEXT-NEXT:  0x0000000000000248: my_func_01 @ /tmp/test_gsym_yaml/out/file_01.cpp:5
+# CHECK-MERGED-LOOKUP-NEXT-NEXT:  0x0000000000000248: my_func_03 @ /tmp/test_gsym_yaml/out/file_03.cpp:5
+ 
+# CHECK-NORMAL-LOOKUP: 0x0000000000000248: my_func_01 @ /tmp/test_gsym_yaml/out/file_01.cpp:5
 
 
 --- !mach-o

diff  --git a/llvm/tools/llvm-gsymutil/Opts.td b/llvm/tools/llvm-gsymutil/Opts.td
index d61b418d2d8439..89cd3ce6fc4138 100644
--- a/llvm/tools/llvm-gsymutil/Opts.td
+++ b/llvm/tools/llvm-gsymutil/Opts.td
@@ -17,7 +17,10 @@ defm convert :
   Eq<"convert",
      "Convert the specified file to the GSYM format.\nSupported files include ELF and mach-o files that will have their debug info (DWARF) and symbol table converted">;
 def merged_functions :
-  FF<"merged-functions", "Encode merged function information for functions in debug info that have matching address ranges.\nWithout this option one function per unique address range will be emitted.">;
+  FF<"merged-functions", "When used with --convert, encodes merged function information for functions in debug info that have matching address ranges.\n"
+                         "Without this option one function per unique address range will be emitted.\n"
+                         "When used with --address/--addresses-from-stdin, all merged functions for a particular address will be displayed.\n"
+                         "Without this option only one function will be displayed.">;
 def dwarf_callsites : FF<"dwarf-callsites", "Load call site info from DWARF, if available">;
 defm callsites_yaml_file :
   Eq<"callsites-yaml-file", "Load call site info from YAML file. Useful for testing.">, Flags<[HelpHidden]>;

diff  --git a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
index aed4ae7c615fd1..e6562b9ebf4049 100644
--- a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
+++ b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
@@ -98,7 +98,7 @@ static uint64_t SegmentSize;
 static bool Quiet;
 static std::vector<uint64_t> LookupAddresses;
 static bool LookupAddressesFromStdin;
-static bool StoreMergedFunctionInfo = false;
+static bool UseMergedFunctions = false;
 static bool LoadDwarfCallSites = false;
 static std::string CallSiteYamlPath;
 
@@ -181,7 +181,7 @@ static void parseArgs(int argc, char **argv) {
   }
 
   LookupAddressesFromStdin = Args.hasArg(OPT_addresses_from_stdin);
-  StoreMergedFunctionInfo = Args.hasArg(OPT_merged_functions);
+  UseMergedFunctions = Args.hasArg(OPT_merged_functions);
 
   if (Args.hasArg(OPT_callsites_yaml_file_EQ)) {
     CallSiteYamlPath = Args.getLastArgValue(OPT_callsites_yaml_file_EQ);
@@ -380,7 +380,7 @@ static llvm::Error handleObjectFile(ObjectFile &Obj, const std::string &OutFile,
   // functions in the first FunctionInfo with that address range. Do this right
   // after loading the DWARF data so we don't have to deal with functions from
   // the symbol table.
-  if (StoreMergedFunctionInfo)
+  if (UseMergedFunctions)
     Gsym.prepareMergedFunctions(Out);
 
   // Get the UUID and convert symbol table to GSYM.
@@ -508,24 +508,39 @@ static llvm::Error convertFileToGSYM(OutputAggregator &Out) {
 }
 
 static void doLookup(GsymReader &Gsym, uint64_t Addr, raw_ostream &OS) {
-  if (auto Result = Gsym.lookup(Addr)) {
-    // If verbose is enabled dump the full function info for the address.
-    if (Verbose) {
-      if (auto FI = Gsym.getFunctionInfo(Addr)) {
-        OS << "FunctionInfo for " << HEX64(Addr) << ":\n";
-        Gsym.dump(OS, *FI);
-        OS << "\nLookupResult for " << HEX64(Addr) << ":\n";
+  auto logError = [Addr, &OS](Error E) {
+    OS << HEX64(Addr) << ": ";
+    logAllUnhandledErrors(std::move(E), OS, "error: ");
+  };
+
+  if (UseMergedFunctions) {
+    if (auto Results = Gsym.lookupAll(Addr)) {
+      OS << "Found " << Results->size() << " functions at address "
+         << HEX64(Addr) << ":\n";
+      for (size_t i = 0; i < Results->size(); ++i) {
+        OS << "   " << Results->at(i);
+
+        if (i != Results->size() - 1)
+          OS << "\n";
       }
     }
-    OS << Result.get();
-  } else {
-    if (Verbose)
-      OS << "\nLookupResult for " << HEX64(Addr) << ":\n";
-    OS << HEX64(Addr) << ": ";
-    logAllUnhandledErrors(Result.takeError(), OS, "error: ");
+  } else { /* UseMergedFunctions == false */
+    if (auto Result = Gsym.lookup(Addr)) {
+      OS << Result.get();
+    } else {
+      logError(Result.takeError());
+      return;
+    }
   }
-  if (Verbose)
+
+  if (Verbose) {
+    if (auto FI = Gsym.getFunctionInfo(Addr)) {
+      OS << "FunctionInfo for " << HEX64(Addr) << ":\n";
+      Gsym.dump(OS, *FI);
+      OS << "\nLookupResult for " << HEX64(Addr) << ":\n";
+    }
     OS << "\n";
+  }
 }
 
 int llvm_gsymutil_main(int argc, char **argv, const llvm::ToolContext &) {


        


More information about the llvm-commits mailing list