[llvm] [llvm-gsymutil] Add option to load callsites from DWARF (PR #119913)

via llvm-commits llvm-commits at lists.llvm.org
Mon Dec 16 14:05:25 PST 2024


https://github.com/alx32 updated https://github.com/llvm/llvm-project/pull/119913

>From 67ece329fa68e93b2b47dcc7f3c0d2ea88deef5c Mon Sep 17 00:00:00 2001
From: Alex B <alexborcan at meta.com>
Date: Fri, 13 Dec 2024 10:30:25 -0800
Subject: [PATCH 1/3] [llvm-gsymutil] Add option to load callsites from DWARF

---
 .../llvm/DebugInfo/GSYM/DwarfTransformer.h    | 15 ++++-
 llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp  | 56 +++++++++++++++++++
 .../macho-gsym-merged-callsites-dsym.yaml     |  2 +
 llvm/tools/llvm-gsymutil/Opts.td              |  1 +
 llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp    |  5 +-
 5 files changed, 76 insertions(+), 3 deletions(-)

diff --git a/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h b/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h
index 198c0ddc265826..a6db47a6b397e7 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h
@@ -33,14 +33,17 @@ class OutputAggregator;
 /// allows this class to be unit tested.
 class DwarfTransformer {
 public:
-
   /// Create a DWARF transformer.
   ///
   /// \param D The DWARF to use when converting to GSYM.
   ///
   /// \param G The GSYM creator to populate with the function information
   /// from the debug info.
-  DwarfTransformer(DWARFContext &D, GsymCreator &G) : DICtx(D), Gsym(G) {}
+  ///
+  /// \param LDCS Flag to indicate weather we should load the call site
+  /// information from DWARF `DW_TAG_call_site` entries
+  DwarfTransformer(DWARFContext &D, GsymCreator &G, bool LDCS = false)
+      : DICtx(D), Gsym(G), LoadDwarfCallSites(LDCS) {}
 
   /// Extract the DWARF from the supplied object file and convert it into the
   /// Gsym format in the GsymCreator object that is passed in. Returns an
@@ -83,8 +86,16 @@ class DwarfTransformer {
   /// \param Die The DWARF debug info entry to parse.
   void handleDie(OutputAggregator &Strm, CUInfo &CUI, DWARFDie Die);
 
+  /// Parse call site information from DWARF
+  ///
+  /// \param CUI   The compile unit info for the current CU.
+  /// \param Die   The DWARFDie for the function.
+  /// \param FI    The FunctionInfo for the function being populated.
+  void parseCallSiteInfoFromDwarf(CUInfo &CUI, DWARFDie Die, FunctionInfo &FI);
+
   DWARFContext &DICtx;
   GsymCreator &Gsym;
+  bool LoadDwarfCallSites;
 
   friend class DwarfTransformerTest;
 };
diff --git a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
index 520baa5ac720e8..9b024d50f2a108 100644
--- a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
+++ b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
@@ -543,6 +543,11 @@ void DwarfTransformer::handleDie(OutputAggregator &Out, CUInfo &CUI,
           FI.Inline = std::nullopt;
         }
       }
+
+      // If dwarf-callsites flag is set, parse DW_TAG_call_site DIEs.
+      if (LoadDwarfCallSites)
+        parseCallSiteInfoFromDwarf(CUI, Die, FI);
+
       Gsym.addFunctionInfo(std::move(FI));
     }
   } break;
@@ -553,6 +558,57 @@ void DwarfTransformer::handleDie(OutputAggregator &Out, CUInfo &CUI,
     handleDie(Out, CUI, ChildDie);
 }
 
+void DwarfTransformer::parseCallSiteInfoFromDwarf(CUInfo &CUI, DWARFDie Die,
+                                                  FunctionInfo &FI) {
+  // Parse all DW_TAG_call_site DIEs that are children of this subprogram DIE.
+  // DWARF specification:
+  // - DW_TAG_call_site can have DW_AT_call_return_pc for return address offset.
+  // - DW_AT_call_origin might point to a DIE of the function being called.
+  // For simplicity, we will just extract return_offset and possibly target name
+  // if available.
+
+  CallSiteInfoCollection CSIC;
+
+  for (DWARFDie Child : Die.children()) {
+    if (Child.getTag() == dwarf::DW_TAG_call_site) {
+      CallSiteInfo CSI;
+      // DW_AT_call_return_pc: the return PC (address). We'll convert it to
+      // offset relative to FI's start.
+      uint64_t ReturnPC =
+          dwarf::toAddress(Child.find(dwarf::DW_AT_call_return_pc), 0);
+      if (ReturnPC < FI.startAddress() || ReturnPC >= FI.endAddress())
+        continue;
+
+      CSI.ReturnOffset = ReturnPC - FI.startAddress();
+
+      // Attempt to get function name from DW_AT_call_origin. If present, we can
+      // insert it as a match regex.
+      if (DWARFDie OriginDie = Child.getAttributeValueAsReferencedDie(
+              dwarf::DW_AT_call_origin)) {
+        if (auto Name = OriginDie.getName(DINameKind::ShortName)) {
+          uint32_t NameOff = Gsym.insertString(Name, /*Copy=*/false);
+          CSI.MatchRegex.push_back(NameOff);
+        }
+      }
+
+      // For now, we won't attempt to deduce InternalCall/ExternalCall flags
+      // from DWARF.
+      CSI.Flags = CallSiteInfo::Flags::None;
+
+      CSIC.CallSites.push_back(CSI);
+    }
+  }
+
+  if (!CSIC.CallSites.empty()) {
+    if (!FI.CallSites)
+      FI.CallSites = CallSiteInfoCollection();
+    // Append parsed DWARF callsites:
+    FI.CallSites->CallSites.insert(FI.CallSites->CallSites.end(),
+                                   CSIC.CallSites.begin(),
+                                   CSIC.CallSites.end());
+  }
+}
+
 Error DwarfTransformer::convert(uint32_t NumThreads, OutputAggregator &Out) {
   size_t NumBefore = Gsym.getNumFunctionInfos();
   auto getDie = [&](DWARFUnit &DwarfUnit) -> DWARFDie {
diff --git a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml
index a98d4db0a791b7..4cecc79c72b4b3 100644
--- a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml
+++ b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml
@@ -4,9 +4,11 @@
 # RUN: yaml2obj %t/merged_callsites.dSYM.yaml -o %t/merged_callsites.dSYM
 
 # RUN: llvm-gsymutil --convert=%t/merged_callsites.dSYM --merged-functions --callsites-yaml-file=%t/callsites.yaml -o %t/call_sites_dSYM.gsym
+# RUN: llvm-gsymutil --convert=%t/merged_callsites.dSYM --merged-functions --dwarf-callsites -o %t/dwarf_call_sites_dSYM.gsym
 
 # Dump the GSYM file and check the output for callsite information
 # RUN: llvm-gsymutil %t/call_sites_dSYM.gsym | FileCheck --check-prefix=CHECK-MERGED-CALLSITES %s
+# RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym | FileCheck --check-prefix=CHECK-MERGED-CALLSITES %s
 
 # CHECK-MERGED-CALLSITES:      FunctionInfo @ 0x[[#%x,FUNC4_1:]]: [0x[[#%x,FUNC4_1_START:]] - 0x[[#%x,FUNC4_1_END:]]) "function4_copy1"
 # CHECK-MERGED-CALLSITES:      ++ Merged FunctionInfos[0]:
diff --git a/llvm/tools/llvm-gsymutil/Opts.td b/llvm/tools/llvm-gsymutil/Opts.td
index 00f903c5211f39..d61b418d2d8439 100644
--- a/llvm/tools/llvm-gsymutil/Opts.td
+++ b/llvm/tools/llvm-gsymutil/Opts.td
@@ -18,6 +18,7 @@ defm convert :
      "Convert the specified file to the GSYM format.\nSupported files include ELF and mach-o files that will have their debug info (DWARF) and symbol table converted">;
 def merged_functions :
   FF<"merged-functions", "Encode merged function information for functions in debug info that have matching address ranges.\nWithout this option one function per unique address range will be emitted.">;
+def dwarf_callsites : FF<"dwarf-callsites", "Load call site info from DWARF, if available">;
 defm callsites_yaml_file :
   Eq<"callsites-yaml-file", "Load call site info from YAML file. Useful for testing.">, Flags<[HelpHidden]>;
 defm arch :
diff --git a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
index 42900159ce9667..aed4ae7c615fd1 100644
--- a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
+++ b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
@@ -99,6 +99,7 @@ static bool Quiet;
 static std::vector<uint64_t> LookupAddresses;
 static bool LookupAddressesFromStdin;
 static bool StoreMergedFunctionInfo = false;
+static bool LoadDwarfCallSites = false;
 static std::string CallSiteYamlPath;
 
 static void parseArgs(int argc, char **argv) {
@@ -191,6 +192,8 @@ static void parseArgs(int argc, char **argv) {
       std::exit(1);
     }
   }
+
+  LoadDwarfCallSites = Args.hasArg(OPT_dwarf_callsites);
 }
 
 /// @}
@@ -365,7 +368,7 @@ static llvm::Error handleObjectFile(ObjectFile &Obj, const std::string &OutFile,
 
   // Make a DWARF transformer object and populate the ranges of the code
   // so we don't end up adding invalid functions to GSYM data.
-  DwarfTransformer DT(*DICtx, Gsym);
+  DwarfTransformer DT(*DICtx, Gsym, LoadDwarfCallSites);
   if (!TextRanges.empty())
     Gsym.SetValidTextRanges(TextRanges);
 

>From d7ab22a2485609c8d2f12d9af075ca1809521dbb Mon Sep 17 00:00:00 2001
From: Alex B <alexborcan at meta.com>
Date: Sun, 15 Dec 2024 21:07:21 -0800
Subject: [PATCH 2/3] Address Feedback Nr.1

---
 .../llvm/DebugInfo/GSYM/DwarfTransformer.h    |  2 +-
 llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp  | 45 ++++++++++---------
 2 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h b/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h
index a6db47a6b397e7..d4f39ec0dc2875 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h
@@ -40,7 +40,7 @@ class DwarfTransformer {
   /// \param G The GSYM creator to populate with the function information
   /// from the debug info.
   ///
-  /// \param LDCS Flag to indicate weather we should load the call site
+  /// \param LDCS Flag to indicate whether we should load the call site
   /// information from DWARF `DW_TAG_call_site` entries
   DwarfTransformer(DWARFContext &D, GsymCreator &G, bool LDCS = false)
       : DICtx(D), Gsym(G), LoadDwarfCallSites(LDCS) {}
diff --git a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
index 9b024d50f2a108..1ad82f00da94ce 100644
--- a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
+++ b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
@@ -570,33 +570,34 @@ void DwarfTransformer::parseCallSiteInfoFromDwarf(CUInfo &CUI, DWARFDie Die,
   CallSiteInfoCollection CSIC;
 
   for (DWARFDie Child : Die.children()) {
-    if (Child.getTag() == dwarf::DW_TAG_call_site) {
-      CallSiteInfo CSI;
-      // DW_AT_call_return_pc: the return PC (address). We'll convert it to
-      // offset relative to FI's start.
-      uint64_t ReturnPC =
-          dwarf::toAddress(Child.find(dwarf::DW_AT_call_return_pc), 0);
-      if (ReturnPC < FI.startAddress() || ReturnPC >= FI.endAddress())
-        continue;
+    if (Child.getTag() != dwarf::DW_TAG_call_site)
+      continue;
 
-      CSI.ReturnOffset = ReturnPC - FI.startAddress();
+    CallSiteInfo CSI;
+    // DW_AT_call_return_pc: the return PC (address). We'll convert it to
+    // offset relative to FI's start.
+    auto ReturnPC = dwarf::toAddress(Child.find(dwarf::DW_AT_call_return_pc));
+    if (!ReturnPC || *ReturnPC < FI.startAddress() ||
+        *ReturnPC >= FI.endAddress())
+      continue;
 
-      // Attempt to get function name from DW_AT_call_origin. If present, we can
-      // insert it as a match regex.
-      if (DWARFDie OriginDie = Child.getAttributeValueAsReferencedDie(
-              dwarf::DW_AT_call_origin)) {
-        if (auto Name = OriginDie.getName(DINameKind::ShortName)) {
-          uint32_t NameOff = Gsym.insertString(Name, /*Copy=*/false);
-          CSI.MatchRegex.push_back(NameOff);
-        }
+    CSI.ReturnOffset = *ReturnPC - FI.startAddress();
+
+    // Attempt to get function name from DW_AT_call_origin. If present, we can
+    // insert it as a match regex.
+    if (DWARFDie OriginDie =
+            Child.getAttributeValueAsReferencedDie(dwarf::DW_AT_call_origin)) {
+      if (auto Name = OriginDie.getName(DINameKind::ShortName)) {
+        uint32_t NameOff = Gsym.insertString(Name, /*Copy=*/false);
+        CSI.MatchRegex.push_back(NameOff);
       }
+    }
 
-      // For now, we won't attempt to deduce InternalCall/ExternalCall flags
-      // from DWARF.
-      CSI.Flags = CallSiteInfo::Flags::None;
+    // For now, we won't attempt to deduce InternalCall/ExternalCall flags
+    // from DWARF.
+    CSI.Flags = CallSiteInfo::Flags::None;
 
-      CSIC.CallSites.push_back(CSI);
-    }
+    CSIC.CallSites.push_back(CSI);
   }
 
   if (!CSIC.CallSites.empty()) {

>From 751a615d82cbe62e97ae10a06c2c1599dc8bf41c Mon Sep 17 00:00:00 2001
From: Alex B <alexborcan at meta.com>
Date: Mon, 16 Dec 2024 14:05:12 -0800
Subject: [PATCH 3/3] Address Feedback Nr.2

---
 llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
index 1ad82f00da94ce..b9d9373439a319 100644
--- a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
+++ b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
@@ -576,7 +576,8 @@ void DwarfTransformer::parseCallSiteInfoFromDwarf(CUInfo &CUI, DWARFDie Die,
     CallSiteInfo CSI;
     // DW_AT_call_return_pc: the return PC (address). We'll convert it to
     // offset relative to FI's start.
-    auto ReturnPC = dwarf::toAddress(Child.find(dwarf::DW_AT_call_return_pc));
+    auto ReturnPC =
+        dwarf::toAddress(Child.findRecursively(dwarf::DW_AT_call_return_pc));
     if (!ReturnPC || *ReturnPC < FI.startAddress() ||
         *ReturnPC >= FI.endAddress())
       continue;
@@ -587,9 +588,14 @@ void DwarfTransformer::parseCallSiteInfoFromDwarf(CUInfo &CUI, DWARFDie Die,
     // insert it as a match regex.
     if (DWARFDie OriginDie =
             Child.getAttributeValueAsReferencedDie(dwarf::DW_AT_call_origin)) {
-      if (auto Name = OriginDie.getName(DINameKind::ShortName)) {
-        uint32_t NameOff = Gsym.insertString(Name, /*Copy=*/false);
-        CSI.MatchRegex.push_back(NameOff);
+
+      // Include the full unmangled name if available, otherwise the short name.
+      if (const char *LinkName = OriginDie.getLinkageName()) {
+        uint32_t LinkNameOff = Gsym.insertString(LinkName, /*Copy=*/false);
+        CSI.MatchRegex.push_back(LinkNameOff);
+      } else if (const char *ShortName = OriginDie.getShortName()) {
+        uint32_t ShortNameOff = Gsym.insertString(ShortName, /*Copy=*/false);
+        CSI.MatchRegex.push_back(ShortNameOff);
       }
     }
 



More information about the llvm-commits mailing list