[llvm] [llvm-gsymutil] Add option to load callsites from DWARF (PR #119913)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 16 14:05:25 PST 2024
https://github.com/alx32 updated https://github.com/llvm/llvm-project/pull/119913
>From 67ece329fa68e93b2b47dcc7f3c0d2ea88deef5c Mon Sep 17 00:00:00 2001
From: Alex B <alexborcan at meta.com>
Date: Fri, 13 Dec 2024 10:30:25 -0800
Subject: [PATCH 1/3] [llvm-gsymutil] Add option to load callsites from DWARF
---
.../llvm/DebugInfo/GSYM/DwarfTransformer.h | 15 ++++-
llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp | 56 +++++++++++++++++++
.../macho-gsym-merged-callsites-dsym.yaml | 2 +
llvm/tools/llvm-gsymutil/Opts.td | 1 +
llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp | 5 +-
5 files changed, 76 insertions(+), 3 deletions(-)
diff --git a/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h b/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h
index 198c0ddc265826..a6db47a6b397e7 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h
@@ -33,14 +33,17 @@ class OutputAggregator;
/// allows this class to be unit tested.
class DwarfTransformer {
public:
-
/// Create a DWARF transformer.
///
/// \param D The DWARF to use when converting to GSYM.
///
/// \param G The GSYM creator to populate with the function information
/// from the debug info.
- DwarfTransformer(DWARFContext &D, GsymCreator &G) : DICtx(D), Gsym(G) {}
+ ///
+ /// \param LDCS Flag to indicate weather we should load the call site
+ /// information from DWARF `DW_TAG_call_site` entries
+ DwarfTransformer(DWARFContext &D, GsymCreator &G, bool LDCS = false)
+ : DICtx(D), Gsym(G), LoadDwarfCallSites(LDCS) {}
/// Extract the DWARF from the supplied object file and convert it into the
/// Gsym format in the GsymCreator object that is passed in. Returns an
@@ -83,8 +86,16 @@ class DwarfTransformer {
/// \param Die The DWARF debug info entry to parse.
void handleDie(OutputAggregator &Strm, CUInfo &CUI, DWARFDie Die);
+ /// Parse call site information from DWARF
+ ///
+ /// \param CUI The compile unit info for the current CU.
+ /// \param Die The DWARFDie for the function.
+ /// \param FI The FunctionInfo for the function being populated.
+ void parseCallSiteInfoFromDwarf(CUInfo &CUI, DWARFDie Die, FunctionInfo &FI);
+
DWARFContext &DICtx;
GsymCreator &Gsym;
+ bool LoadDwarfCallSites;
friend class DwarfTransformerTest;
};
diff --git a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
index 520baa5ac720e8..9b024d50f2a108 100644
--- a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
+++ b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
@@ -543,6 +543,11 @@ void DwarfTransformer::handleDie(OutputAggregator &Out, CUInfo &CUI,
FI.Inline = std::nullopt;
}
}
+
+ // If dwarf-callsites flag is set, parse DW_TAG_call_site DIEs.
+ if (LoadDwarfCallSites)
+ parseCallSiteInfoFromDwarf(CUI, Die, FI);
+
Gsym.addFunctionInfo(std::move(FI));
}
} break;
@@ -553,6 +558,57 @@ void DwarfTransformer::handleDie(OutputAggregator &Out, CUInfo &CUI,
handleDie(Out, CUI, ChildDie);
}
+void DwarfTransformer::parseCallSiteInfoFromDwarf(CUInfo &CUI, DWARFDie Die,
+ FunctionInfo &FI) {
+ // Parse all DW_TAG_call_site DIEs that are children of this subprogram DIE.
+ // DWARF specification:
+ // - DW_TAG_call_site can have DW_AT_call_return_pc for return address offset.
+ // - DW_AT_call_origin might point to a DIE of the function being called.
+ // For simplicity, we will just extract return_offset and possibly target name
+ // if available.
+
+ CallSiteInfoCollection CSIC;
+
+ for (DWARFDie Child : Die.children()) {
+ if (Child.getTag() == dwarf::DW_TAG_call_site) {
+ CallSiteInfo CSI;
+ // DW_AT_call_return_pc: the return PC (address). We'll convert it to
+ // offset relative to FI's start.
+ uint64_t ReturnPC =
+ dwarf::toAddress(Child.find(dwarf::DW_AT_call_return_pc), 0);
+ if (ReturnPC < FI.startAddress() || ReturnPC >= FI.endAddress())
+ continue;
+
+ CSI.ReturnOffset = ReturnPC - FI.startAddress();
+
+ // Attempt to get function name from DW_AT_call_origin. If present, we can
+ // insert it as a match regex.
+ if (DWARFDie OriginDie = Child.getAttributeValueAsReferencedDie(
+ dwarf::DW_AT_call_origin)) {
+ if (auto Name = OriginDie.getName(DINameKind::ShortName)) {
+ uint32_t NameOff = Gsym.insertString(Name, /*Copy=*/false);
+ CSI.MatchRegex.push_back(NameOff);
+ }
+ }
+
+ // For now, we won't attempt to deduce InternalCall/ExternalCall flags
+ // from DWARF.
+ CSI.Flags = CallSiteInfo::Flags::None;
+
+ CSIC.CallSites.push_back(CSI);
+ }
+ }
+
+ if (!CSIC.CallSites.empty()) {
+ if (!FI.CallSites)
+ FI.CallSites = CallSiteInfoCollection();
+ // Append parsed DWARF callsites:
+ FI.CallSites->CallSites.insert(FI.CallSites->CallSites.end(),
+ CSIC.CallSites.begin(),
+ CSIC.CallSites.end());
+ }
+}
+
Error DwarfTransformer::convert(uint32_t NumThreads, OutputAggregator &Out) {
size_t NumBefore = Gsym.getNumFunctionInfos();
auto getDie = [&](DWARFUnit &DwarfUnit) -> DWARFDie {
diff --git a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml
index a98d4db0a791b7..4cecc79c72b4b3 100644
--- a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml
+++ b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml
@@ -4,9 +4,11 @@
# RUN: yaml2obj %t/merged_callsites.dSYM.yaml -o %t/merged_callsites.dSYM
# RUN: llvm-gsymutil --convert=%t/merged_callsites.dSYM --merged-functions --callsites-yaml-file=%t/callsites.yaml -o %t/call_sites_dSYM.gsym
+# RUN: llvm-gsymutil --convert=%t/merged_callsites.dSYM --merged-functions --dwarf-callsites -o %t/dwarf_call_sites_dSYM.gsym
# Dump the GSYM file and check the output for callsite information
# RUN: llvm-gsymutil %t/call_sites_dSYM.gsym | FileCheck --check-prefix=CHECK-MERGED-CALLSITES %s
+# RUN: llvm-gsymutil %t/dwarf_call_sites_dSYM.gsym | FileCheck --check-prefix=CHECK-MERGED-CALLSITES %s
# CHECK-MERGED-CALLSITES: FunctionInfo @ 0x[[#%x,FUNC4_1:]]: [0x[[#%x,FUNC4_1_START:]] - 0x[[#%x,FUNC4_1_END:]]) "function4_copy1"
# CHECK-MERGED-CALLSITES: ++ Merged FunctionInfos[0]:
diff --git a/llvm/tools/llvm-gsymutil/Opts.td b/llvm/tools/llvm-gsymutil/Opts.td
index 00f903c5211f39..d61b418d2d8439 100644
--- a/llvm/tools/llvm-gsymutil/Opts.td
+++ b/llvm/tools/llvm-gsymutil/Opts.td
@@ -18,6 +18,7 @@ defm convert :
"Convert the specified file to the GSYM format.\nSupported files include ELF and mach-o files that will have their debug info (DWARF) and symbol table converted">;
def merged_functions :
FF<"merged-functions", "Encode merged function information for functions in debug info that have matching address ranges.\nWithout this option one function per unique address range will be emitted.">;
+def dwarf_callsites : FF<"dwarf-callsites", "Load call site info from DWARF, if available">;
defm callsites_yaml_file :
Eq<"callsites-yaml-file", "Load call site info from YAML file. Useful for testing.">, Flags<[HelpHidden]>;
defm arch :
diff --git a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
index 42900159ce9667..aed4ae7c615fd1 100644
--- a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
+++ b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
@@ -99,6 +99,7 @@ static bool Quiet;
static std::vector<uint64_t> LookupAddresses;
static bool LookupAddressesFromStdin;
static bool StoreMergedFunctionInfo = false;
+static bool LoadDwarfCallSites = false;
static std::string CallSiteYamlPath;
static void parseArgs(int argc, char **argv) {
@@ -191,6 +192,8 @@ static void parseArgs(int argc, char **argv) {
std::exit(1);
}
}
+
+ LoadDwarfCallSites = Args.hasArg(OPT_dwarf_callsites);
}
/// @}
@@ -365,7 +368,7 @@ static llvm::Error handleObjectFile(ObjectFile &Obj, const std::string &OutFile,
// Make a DWARF transformer object and populate the ranges of the code
// so we don't end up adding invalid functions to GSYM data.
- DwarfTransformer DT(*DICtx, Gsym);
+ DwarfTransformer DT(*DICtx, Gsym, LoadDwarfCallSites);
if (!TextRanges.empty())
Gsym.SetValidTextRanges(TextRanges);
>From d7ab22a2485609c8d2f12d9af075ca1809521dbb Mon Sep 17 00:00:00 2001
From: Alex B <alexborcan at meta.com>
Date: Sun, 15 Dec 2024 21:07:21 -0800
Subject: [PATCH 2/3] Address Feedback Nr.1
---
.../llvm/DebugInfo/GSYM/DwarfTransformer.h | 2 +-
llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp | 45 ++++++++++---------
2 files changed, 24 insertions(+), 23 deletions(-)
diff --git a/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h b/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h
index a6db47a6b397e7..d4f39ec0dc2875 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h
@@ -40,7 +40,7 @@ class DwarfTransformer {
/// \param G The GSYM creator to populate with the function information
/// from the debug info.
///
- /// \param LDCS Flag to indicate weather we should load the call site
+ /// \param LDCS Flag to indicate whether we should load the call site
/// information from DWARF `DW_TAG_call_site` entries
DwarfTransformer(DWARFContext &D, GsymCreator &G, bool LDCS = false)
: DICtx(D), Gsym(G), LoadDwarfCallSites(LDCS) {}
diff --git a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
index 9b024d50f2a108..1ad82f00da94ce 100644
--- a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
+++ b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
@@ -570,33 +570,34 @@ void DwarfTransformer::parseCallSiteInfoFromDwarf(CUInfo &CUI, DWARFDie Die,
CallSiteInfoCollection CSIC;
for (DWARFDie Child : Die.children()) {
- if (Child.getTag() == dwarf::DW_TAG_call_site) {
- CallSiteInfo CSI;
- // DW_AT_call_return_pc: the return PC (address). We'll convert it to
- // offset relative to FI's start.
- uint64_t ReturnPC =
- dwarf::toAddress(Child.find(dwarf::DW_AT_call_return_pc), 0);
- if (ReturnPC < FI.startAddress() || ReturnPC >= FI.endAddress())
- continue;
+ if (Child.getTag() != dwarf::DW_TAG_call_site)
+ continue;
- CSI.ReturnOffset = ReturnPC - FI.startAddress();
+ CallSiteInfo CSI;
+ // DW_AT_call_return_pc: the return PC (address). We'll convert it to
+ // offset relative to FI's start.
+ auto ReturnPC = dwarf::toAddress(Child.find(dwarf::DW_AT_call_return_pc));
+ if (!ReturnPC || *ReturnPC < FI.startAddress() ||
+ *ReturnPC >= FI.endAddress())
+ continue;
- // Attempt to get function name from DW_AT_call_origin. If present, we can
- // insert it as a match regex.
- if (DWARFDie OriginDie = Child.getAttributeValueAsReferencedDie(
- dwarf::DW_AT_call_origin)) {
- if (auto Name = OriginDie.getName(DINameKind::ShortName)) {
- uint32_t NameOff = Gsym.insertString(Name, /*Copy=*/false);
- CSI.MatchRegex.push_back(NameOff);
- }
+ CSI.ReturnOffset = *ReturnPC - FI.startAddress();
+
+ // Attempt to get function name from DW_AT_call_origin. If present, we can
+ // insert it as a match regex.
+ if (DWARFDie OriginDie =
+ Child.getAttributeValueAsReferencedDie(dwarf::DW_AT_call_origin)) {
+ if (auto Name = OriginDie.getName(DINameKind::ShortName)) {
+ uint32_t NameOff = Gsym.insertString(Name, /*Copy=*/false);
+ CSI.MatchRegex.push_back(NameOff);
}
+ }
- // For now, we won't attempt to deduce InternalCall/ExternalCall flags
- // from DWARF.
- CSI.Flags = CallSiteInfo::Flags::None;
+ // For now, we won't attempt to deduce InternalCall/ExternalCall flags
+ // from DWARF.
+ CSI.Flags = CallSiteInfo::Flags::None;
- CSIC.CallSites.push_back(CSI);
- }
+ CSIC.CallSites.push_back(CSI);
}
if (!CSIC.CallSites.empty()) {
>From 751a615d82cbe62e97ae10a06c2c1599dc8bf41c Mon Sep 17 00:00:00 2001
From: Alex B <alexborcan at meta.com>
Date: Mon, 16 Dec 2024 14:05:12 -0800
Subject: [PATCH 3/3] Address Feedback Nr.2
---
llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp | 14 ++++++++++----
1 file changed, 10 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
index 1ad82f00da94ce..b9d9373439a319 100644
--- a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
+++ b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
@@ -576,7 +576,8 @@ void DwarfTransformer::parseCallSiteInfoFromDwarf(CUInfo &CUI, DWARFDie Die,
CallSiteInfo CSI;
// DW_AT_call_return_pc: the return PC (address). We'll convert it to
// offset relative to FI's start.
- auto ReturnPC = dwarf::toAddress(Child.find(dwarf::DW_AT_call_return_pc));
+ auto ReturnPC =
+ dwarf::toAddress(Child.findRecursively(dwarf::DW_AT_call_return_pc));
if (!ReturnPC || *ReturnPC < FI.startAddress() ||
*ReturnPC >= FI.endAddress())
continue;
@@ -587,9 +588,14 @@ void DwarfTransformer::parseCallSiteInfoFromDwarf(CUInfo &CUI, DWARFDie Die,
// insert it as a match regex.
if (DWARFDie OriginDie =
Child.getAttributeValueAsReferencedDie(dwarf::DW_AT_call_origin)) {
- if (auto Name = OriginDie.getName(DINameKind::ShortName)) {
- uint32_t NameOff = Gsym.insertString(Name, /*Copy=*/false);
- CSI.MatchRegex.push_back(NameOff);
+
+ // Include the full unmangled name if available, otherwise the short name.
+ if (const char *LinkName = OriginDie.getLinkageName()) {
+ uint32_t LinkNameOff = Gsym.insertString(LinkName, /*Copy=*/false);
+ CSI.MatchRegex.push_back(LinkNameOff);
+ } else if (const char *ShortName = OriginDie.getShortName()) {
+ uint32_t ShortNameOff = Gsym.insertString(ShortName, /*Copy=*/false);
+ CSI.MatchRegex.push_back(ShortNameOff);
}
}
More information about the llvm-commits
mailing list