[llvm] 558de0e - [llvm-gsymutil] Add option to load callsites from DWARF (#119913)

via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 17 08:51:34 PST 2024


Author: alx32
Date: 2024-12-17T08:51:30-08:00
New Revision: 558de0e1f993f413a9c8b93d969b28b651c6e437

URL: https://github.com/llvm/llvm-project/commit/558de0e1f993f413a9c8b93d969b28b651c6e437
DIFF: https://github.com/llvm/llvm-project/commit/558de0e1f993f413a9c8b93d969b28b651c6e437.diff

LOG: [llvm-gsymutil] Add option to load callsites from DWARF (#119913)

This change adds support for loading gSYM callsite information from
DWARF. Previously the only support was for loading callsites info from
YAML.

For testing, we add a pass where `macho-gsym-merged-callsites-dsym`
loads callsite info from DWARF rather than YAML.

Added: 
    

Modified: 
    llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h
    llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
    llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml
    llvm/tools/llvm-gsymutil/Opts.td
    llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h b/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h
index 198c0ddc265826..d4f39ec0dc2875 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h
@@ -33,14 +33,17 @@ class OutputAggregator;
 /// allows this class to be unit tested.
 class DwarfTransformer {
 public:
-
   /// Create a DWARF transformer.
   ///
   /// \param D The DWARF to use when converting to GSYM.
   ///
   /// \param G The GSYM creator to populate with the function information
   /// from the debug info.
-  DwarfTransformer(DWARFContext &D, GsymCreator &G) : DICtx(D), Gsym(G) {}
+  ///
+  /// \param LDCS Flag to indicate whether we should load the call site
+  /// information from DWARF `DW_TAG_call_site` entries
+  DwarfTransformer(DWARFContext &D, GsymCreator &G, bool LDCS = false)
+      : DICtx(D), Gsym(G), LoadDwarfCallSites(LDCS) {}
 
   /// Extract the DWARF from the supplied object file and convert it into the
   /// Gsym format in the GsymCreator object that is passed in. Returns an
@@ -83,8 +86,16 @@ class DwarfTransformer {
   /// \param Die The DWARF debug info entry to parse.
   void handleDie(OutputAggregator &Strm, CUInfo &CUI, DWARFDie Die);
 
+  /// Parse call site information from DWARF
+  ///
+  /// \param CUI   The compile unit info for the current CU.
+  /// \param Die   The DWARFDie for the function.
+  /// \param FI    The FunctionInfo for the function being populated.
+  void parseCallSiteInfoFromDwarf(CUInfo &CUI, DWARFDie Die, FunctionInfo &FI);
+
   DWARFContext &DICtx;
   GsymCreator &Gsym;
+  bool LoadDwarfCallSites;
 
   friend class DwarfTransformerTest;
 };

diff  --git a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
index 520baa5ac720e8..568af5ee8e3ae0 100644
--- a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
+++ b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
@@ -543,6 +543,11 @@ void DwarfTransformer::handleDie(OutputAggregator &Out, CUInfo &CUI,
           FI.Inline = std::nullopt;
         }
       }
+
+      // If dwarf-callsites flag is set, parse DW_TAG_call_site DIEs.
+      if (LoadDwarfCallSites)
+        parseCallSiteInfoFromDwarf(CUI, Die, FI);
+
       Gsym.addFunctionInfo(std::move(FI));
     }
   } break;
@@ -553,6 +558,63 @@ void DwarfTransformer::handleDie(OutputAggregator &Out, CUInfo &CUI,
     handleDie(Out, CUI, ChildDie);
 }
 
+void DwarfTransformer::parseCallSiteInfoFromDwarf(CUInfo &CUI, DWARFDie Die,
+                                                  FunctionInfo &FI) {
+  // Parse all DW_TAG_call_site DIEs that are children of this subprogram DIE.
+  // DWARF specification:
+  // - DW_TAG_call_site can have DW_AT_call_return_pc for return address offset.
+  // - DW_AT_call_origin might point to a DIE of the function being called.
+  // For simplicity, we will just extract return_offset and possibly target name
+  // if available.
+
+  CallSiteInfoCollection CSIC;
+
+  for (DWARFDie Child : Die.children()) {
+    if (Child.getTag() != dwarf::DW_TAG_call_site)
+      continue;
+
+    CallSiteInfo CSI;
+    // DW_AT_call_return_pc: the return PC (address). We'll convert it to
+    // offset relative to FI's start.
+    auto ReturnPC =
+        dwarf::toAddress(Child.findRecursively(dwarf::DW_AT_call_return_pc));
+    if (!ReturnPC || !FI.Range.contains(*ReturnPC))
+      continue;
+
+    CSI.ReturnOffset = *ReturnPC - FI.startAddress();
+
+    // Attempt to get function name from DW_AT_call_origin. If present, we can
+    // insert it as a match regex.
+    if (DWARFDie OriginDie =
+            Child.getAttributeValueAsReferencedDie(dwarf::DW_AT_call_origin)) {
+
+      // Include the full unmangled name if available, otherwise the short name.
+      if (const char *LinkName = OriginDie.getLinkageName()) {
+        uint32_t LinkNameOff = Gsym.insertString(LinkName, /*Copy=*/false);
+        CSI.MatchRegex.push_back(LinkNameOff);
+      } else if (const char *ShortName = OriginDie.getShortName()) {
+        uint32_t ShortNameOff = Gsym.insertString(ShortName, /*Copy=*/false);
+        CSI.MatchRegex.push_back(ShortNameOff);
+      }
+    }
+
+    // For now, we won't attempt to deduce InternalCall/ExternalCall flags
+    // from DWARF.
+    CSI.Flags = CallSiteInfo::Flags::None;
+
+    CSIC.CallSites.push_back(CSI);
+  }
+
+  if (!CSIC.CallSites.empty()) {
+    if (!FI.CallSites)
+      FI.CallSites = CallSiteInfoCollection();
+    // Append parsed DWARF callsites:
+    FI.CallSites->CallSites.insert(FI.CallSites->CallSites.end(),
+                                   CSIC.CallSites.begin(),
+                                   CSIC.CallSites.end());
+  }
+}
+
 Error DwarfTransformer::convert(uint32_t NumThreads, OutputAggregator &Out) {
   size_t NumBefore = Gsym.getNumFunctionInfos();
   auto getDie = [&](DWARFUnit &DwarfUnit) -> DWARFDie {

diff  --git a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml
index caf77467bbb938..001145c01398c5 100644
--- a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml
+++ b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml
@@ -8,9 +8,11 @@
 # RUN: yaml2obj %t/merged_callsites.dSYM.yaml -o %t/merged_callsites.dSYM
 
 # RUN: llvm-gsymutil --convert=%t/merged_callsites.dSYM --merged-functions --callsites-yaml-file=%t/callsites.yaml -o %t/call_sites_dSYM.gsym
+# RUN: llvm-gsymutil --convert=%t/merged_callsites.dSYM --merged-functions --dwarf-callsites -o %t/dwarf_call_sites_dSYM.gsym
 
 # Dump the GSYM file and check the output for callsite information
 # RUN: llvm-gsymutil %t/call_sites_dSYM.gsym | FileCheck --check-prefix=CHECK-MERGED-CALLSITES %s
+# RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym | FileCheck --check-prefix=CHECK-MERGED-CALLSITES %s
 
 # CHECK-MERGED-CALLSITES:      FunctionInfo @ 0x[[#%x,FUNC4_1:]]: [0x[[#%x,FUNC4_1_START:]] - 0x[[#%x,FUNC4_1_END:]]) "function4_copy1"
 # CHECK-MERGED-CALLSITES:      ++ Merged FunctionInfos[0]:

diff  --git a/llvm/tools/llvm-gsymutil/Opts.td b/llvm/tools/llvm-gsymutil/Opts.td
index 00f903c5211f39..d61b418d2d8439 100644
--- a/llvm/tools/llvm-gsymutil/Opts.td
+++ b/llvm/tools/llvm-gsymutil/Opts.td
@@ -18,6 +18,7 @@ defm convert :
      "Convert the specified file to the GSYM format.\nSupported files include ELF and mach-o files that will have their debug info (DWARF) and symbol table converted">;
 def merged_functions :
   FF<"merged-functions", "Encode merged function information for functions in debug info that have matching address ranges.\nWithout this option one function per unique address range will be emitted.">;
+def dwarf_callsites : FF<"dwarf-callsites", "Load call site info from DWARF, if available">;
 defm callsites_yaml_file :
   Eq<"callsites-yaml-file", "Load call site info from YAML file. Useful for testing.">, Flags<[HelpHidden]>;
 defm arch :

diff  --git a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
index 42900159ce9667..aed4ae7c615fd1 100644
--- a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
+++ b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
@@ -99,6 +99,7 @@ static bool Quiet;
 static std::vector<uint64_t> LookupAddresses;
 static bool LookupAddressesFromStdin;
 static bool StoreMergedFunctionInfo = false;
+static bool LoadDwarfCallSites = false;
 static std::string CallSiteYamlPath;
 
 static void parseArgs(int argc, char **argv) {
@@ -191,6 +192,8 @@ static void parseArgs(int argc, char **argv) {
       std::exit(1);
     }
   }
+
+  LoadDwarfCallSites = Args.hasArg(OPT_dwarf_callsites);
 }
 
 /// @}
@@ -365,7 +368,7 @@ static llvm::Error handleObjectFile(ObjectFile &Obj, const std::string &OutFile,
 
   // Make a DWARF transformer object and populate the ranges of the code
   // so we don't end up adding invalid functions to GSYM data.
-  DwarfTransformer DT(*DICtx, Gsym);
+  DwarfTransformer DT(*DICtx, Gsym, LoadDwarfCallSites);
   if (!TextRanges.empty())
     Gsym.SetValidTextRanges(TextRanges);
 


        


More information about the llvm-commits mailing list