[llvm] [llvm-gsymutil] Fix dumping of call sites for merged functions (PR #119759)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 12 17:47:38 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-debuginfo
Author: None (alx32)
<details>
<summary>Changes</summary>
Currently, when dumping the contents of a GSYM there are three issues:
- Callsite information is not displayed for merged functions - this is because of a bug in `CallSiteInfoLoader::buildFunctionMap` where when enumerating through `Func.MergedFunctions` - we enumerate by value instead of by reference.
- There is no variable indent for printing callsite info - meaning that when printing callsites for merged functions, the indent will be different than the other info of the merged function. To address this we add configurable indent for printing callsite info
- Callsite info is printed right after merged function info. Meaning that if the merged function also has call site information, the parent's callsite info will appear right after the merged function's callsite info - leading to confusion. To address this we print the callsite info first, then the merged functions info.
This change addresses all the above 3 issues.
Example of old vs new:
<img width="1074" alt="image" src="https://github.com/user-attachments/assets/d039ad69-fa79-4abb-9816-eda9cc2eda53" />
---
Patch is 59.15 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/119759.diff
4 Files Affected:
- (modified) llvm/include/llvm/DebugInfo/GSYM/GsymReader.h (+5-1)
- (modified) llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp (+1-1)
- (modified) llvm/lib/DebugInfo/GSYM/GsymReader.cpp (+8-5)
- (added) llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml (+1525)
``````````diff
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
index 72b7f3e7bfc42e..3d532588a70234 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
@@ -199,7 +199,11 @@ class GsymReader {
/// \param OS The output stream to dump to.
///
/// \param CSIC The CallSiteInfoCollection object to dump.
- void dump(raw_ostream &OS, const CallSiteInfoCollection &CSIC);
+ ///
+ /// \param Indent The indentation as number of spaces. Used when dumping as an
+ /// item from within MergedFunctionsInfo.
+ void dump(raw_ostream &OS, const CallSiteInfoCollection &CSIC,
+ uint32_t Indent = 0);
/// Dump a LineTable object.
///
diff --git a/llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp b/llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp
index cf4c64e5e85ca6..85b41e28991316 100644
--- a/llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp
+++ b/llvm/lib/DebugInfo/GSYM/CallSiteInfo.cpp
@@ -181,7 +181,7 @@ StringMap<FunctionInfo *> CallSiteInfoLoader::buildFunctionMap() {
StringMap<FunctionInfo *> FuncMap;
for (auto &Func : Funcs) {
FuncMap.try_emplace(GCreator.getString(Func.Name), &Func);
- if (auto MFuncs = Func.MergedFunctions)
+ if (auto &MFuncs = Func.MergedFunctions)
for (auto &MFunc : MFuncs->MergedFunctions)
FuncMap.try_emplace(GCreator.getString(MFunc.Name), &MFunc);
}
diff --git a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
index 7979f1f5d51928..fa5476db191ec4 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
@@ -406,13 +406,13 @@ void GsymReader::dump(raw_ostream &OS, const FunctionInfo &FI,
if (FI.Inline)
dump(OS, *FI.Inline, Indent);
+ if (FI.CallSites)
+ dump(OS, *FI.CallSites, Indent);
+
if (FI.MergedFunctions) {
assert(Indent == 0 && "MergedFunctionsInfo should only exist at top level");
dump(OS, *FI.MergedFunctions);
}
-
- if (FI.CallSites)
- dump(OS, *FI.CallSites);
}
void GsymReader::dump(raw_ostream &OS, const MergedFunctionsInfo &MFI) {
@@ -454,10 +454,13 @@ void GsymReader::dump(raw_ostream &OS, const CallSiteInfo &CSI) {
}
}
-void GsymReader::dump(raw_ostream &OS, const CallSiteInfoCollection &CSIC) {
+void GsymReader::dump(raw_ostream &OS, const CallSiteInfoCollection &CSIC,
+ uint32_t Indent) {
+ OS.indent(Indent);
OS << "CallSites (by relative return offset):\n";
for (const auto &CS : CSIC.CallSites) {
- OS.indent(2);
+ OS.indent(Indent);
+ OS << " ";
dump(OS, CS);
OS << "\n";
}
diff --git a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml
new file mode 100644
index 00000000000000..2b4d09c99a0d09
--- /dev/null
+++ b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-gsym-merged-callsites-dsym.yaml
@@ -0,0 +1,1525 @@
+## Test that reconstructs a dSYM file from YAML and generates a gsym from it. The gsym has callsite info and merged functions.
+
+# RUN: split-file %s %t
+# RUN: yaml2obj %t/merged_callsites.dSYM.yaml -o %t/merged_callsites.dSYM
+
+# RUN: llvm-gsymutil --convert=%t/merged_callsites.dSYM --merged-functions --callsites-yaml-file=%t/callsites.yaml -o %t/call_sites_dSYM.gsym
+
+# Dump the GSYM file and check the output for callsite information
+# RUN: llvm-gsymutil %t/call_sites_dSYM.gsym | FileCheck --check-prefix=CHECK-MERGED-CALLSITES %s
+
+# CHECK-MERGED-CALLSITES: FunctionInfo @ 0x[[#%x,FUNC4_1:]]: [0x[[#%x,FUNC4_1_START:]] - 0x[[#%x,FUNC4_1_END:]]) "function4_copy1"
+# CHECK-MERGED-CALLSITES: ++ Merged FunctionInfos[0]:
+# CHECK-MERGED-CALLSITES-NEXT: [0x[[#%x,FUNC4_2_START:]] - 0x[[#%x,FUNC4_2_END:]]) "function4_copy2"
+
+# CHECK-MERGED-CALLSITES: FunctionInfo @ 0x[[#%x,FUNC3_1:]]: [0x[[#%x,FUNC3_1_START:]] - 0x[[#%x,FUNC3_1_END:]]) "function3_copy1"
+# CHECK-MERGED-CALLSITES: CallSites (by relative return offset):
+# CHECK-MERGED-CALLSITES-NEXT: 0x[[#%.4x,]] Flags[None] MatchRegex[function4_copy1]
+# CHECK-MERGED-CALLSITES: ++ Merged FunctionInfos[0]:
+# CHECK-MERGED-CALLSITES-NEXT: [0x[[#%x,FUNC3_2_START:]] - 0x[[#%x,FUNC3_2_END:]]) "function3_copy2"
+# CHECK-MERGED-CALLSITES: CallSites (by relative return offset):
+# CHECK-MERGED-CALLSITES-NEXT: 0x[[#%.4x,]] Flags[None] MatchRegex[function4_copy2]
+
+# CHECK-MERGED-CALLSITES: FunctionInfo @ 0x[[#%x,FUNC2_1:]]: [0x[[#%x,FUNC2_1_START:]] - 0x[[#%x,FUNC2_1_END:]]) "function2_copy1"
+# CHECK-MERGED-CALLSITES: CallSites (by relative return offset):
+# CHECK-MERGED-CALLSITES-NEXT: 0x[[#%.4x,]] Flags[None] MatchRegex[function3_copy1]
+# CHECK-MERGED-CALLSITES: ++ Merged FunctionInfos[0]:
+# CHECK-MERGED-CALLSITES-NEXT: [0x[[#%x,FUNC2_2_START:]] - 0x[[#%x,FUNC2_2_END:]]) "function2_copy2"
+# CHECK-MERGED-CALLSITES: CallSites (by relative return offset):
+# CHECK-MERGED-CALLSITES-NEXT: 0x[[#%.4x,]] Flags[None] MatchRegex[function3_copy1]
+
+# CHECK-MERGED-CALLSITES: FunctionInfo @ 0x[[#%x,FUNC1:]]: [0x[[#%x,FUNC1_START:]] - 0x[[#%x,FUNC1_END:]]) "function1"
+# CHECK-MERGED-CALLSITES: CallSites (by relative return offset):
+# CHECK-MERGED-CALLSITES-NEXT: 0x[[#%.4x,]] Flags[None] MatchRegex[function2_copy1]
+
+# CHECK-MERGED-CALLSITES: FunctionInfo @ 0x[[#%x,MAIN:]]: [0x[[#%x,MAIN_START:]] - 0x[[#%x,MAIN_END:]]) "main"
+# CHECK-MERGED-CALLSITES: CallSites (by relative return offset):
+# CHECK-MERGED-CALLSITES-NEXT: 0x[[#%.4x,]] Flags[None] MatchRegex[function1]
+# CHECK-MERGED-CALLSITES-NEXT: 0x[[#%.4x,]] Flags[None] MatchRegex[function2_copy2]
+# CHECK-MERGED-CALLSITES-NEXT: 0x[[#%.4x,]] Flags[None] MatchRegex[function4_copy2]
+# CHECK-MERGED-CALLSITES-NEXT: 0x[[#%.4x,]] Flags[None] MatchRegex[function3_copy2]
+# CHECK-MERGED-CALLSITES-NEXT: 0x[[#%.4x,]] Flags[None] MatchRegex[function2_copy1]
+
+
+#--- repro-script.sh
+#!/bin/bash
+set -ex
+
+# Set TOOLCHAIN_DIR to point to the llvm bin directory
+TOOLCHAIN_DIR="llvm-project/build/Debug/bin" # Replace with the actual path to your LLVM bin directory
+SCRIPT_DIR="$(cd "$(dirname "$0")"; pwd)"
+OUT_DIR="$SCRIPT_DIR/out"
+cd $SCRIPT_DIR
+rm -rf "$OUT_DIR" && mkdir -p "$OUT_DIR"
+
+cat > "$OUT_DIR/merged_funcs_test.cpp" << EOF
+#define ATTRIB extern "C" __attribute__((noinline))
+volatile int global_result = 0;
+
+ATTRIB int function4_copy1(int a) {
+ int b = a * 4;
+ int result = b + 4;
+ global_result = result;
+ return result;
+}
+
+ATTRIB int function4_copy2(int a) {
+ int b = a * 4;
+ int result = b + 4;
+ global_result = result;
+ return result;
+}
+
+ATTRIB int function3_copy1(int a) {
+ int b = a + 3;
+ int result = function4_copy1(b);
+ global_result = result;
+ return result;
+}
+
+ATTRIB int function3_copy2(int a) {
+ int b = a + 3;
+ int result = function4_copy2(b);
+ global_result = result;
+ return result;
+}
+
+extern "C" inline int function_inlined(int a) {
+ int b = a + 3;
+ int result = function3_copy1(b);
+ global_result = result;
+ return result;
+}
+
+ATTRIB int function2_copy1(int a) {
+ int b = a - 2;
+ int result = function_inlined(b);
+ global_result = result;
+ return result;
+}
+
+ATTRIB int function2_copy2(int a) {
+ int b = a - 2;
+ int result = function_inlined(b);
+ global_result = result;
+ return result;
+}
+
+ATTRIB int function1(int a) {
+ int b = a + 1;
+ int result = function2_copy1(b);
+ global_result = result;
+ return result;
+}
+
+int main() {
+ int sum = 0;
+ sum += function1(1);
+ sum += function2_copy2(3);
+ sum += function4_copy2(4);
+ sum += function3_copy2(41);
+ sum += function2_copy1(11);
+ return sum;
+}
+EOF
+
+# Compile merged_funcs_test.cpp to merged_funcs_test.o with flags -g -O3 for MachO / arm64
+"$TOOLCHAIN_DIR/clang++" --target=arm64-apple-macos11 -c -g -gdwarf-4 -fno-unwind-tables \
+ -mllvm -emit-func-debug-line-table-offsets -fno-exceptions -mno-outline \
+ -O3 "$OUT_DIR/merged_funcs_test.cpp" -o "$OUT_DIR/merged_funcs_test.o"
+
+# Link using ld64.lld directly with flags "--icf=all --keep-icf-stabs"
+"$TOOLCHAIN_DIR/ld64.lld" \
+ -arch arm64 \
+ -platform_version macos 11.0.0 11.0.0 \
+ -o "$OUT_DIR/merged_funcs_test.exe" \
+ "$OUT_DIR/merged_funcs_test.o" \
+ -dead_strip \
+ --icf=all \
+ --keep-icf-stabs
+
+# Create merged_funcs_test.dSYM from merged_funcs_test.exe
+"$TOOLCHAIN_DIR/dsymutil" --flat "$OUT_DIR/merged_funcs_test.exe" --verify-dwarf=none -o "$OUT_DIR/merged_funcs_test.dSYM"
+"$TOOLCHAIN_DIR/obj2yaml" "$OUT_DIR/merged_funcs_test.dSYM" -o "$OUT_DIR/merged_funcs_test.dSYM.yaml"
+
+
+
+
+#--- callsites.yaml
+functions:
+ - name: function3_copy1
+ callsites:
+ - return_offset: 0x10
+ match_regex: ["function4_copy1"]
+ - name: function3_copy2
+ callsites:
+ - return_offset: 0x10
+ match_regex: ["function4_copy2"]
+ - name: function2_copy1
+ callsites:
+ - return_offset: 0x10
+ match_regex: ["function3_copy1"]
+ - name: function2_copy2
+ callsites:
+ - return_offset: 0x10
+ match_regex: ["function3_copy1"]
+ - name: function1
+ callsites:
+ - return_offset: 0x10
+ match_regex: ["function2_copy1"]
+ - name: main
+ callsites:
+ - return_offset: 0x14
+ match_regex: ["function1"]
+ - return_offset: 0x20
+ match_regex: ["function2_copy2"]
+ - return_offset: 0x2c
+ match_regex: ["function4_copy2"]
+ - return_offset: 0x38
+ match_regex: ["function3_copy2"]
+ - return_offset: 0x48
+ match_regex: ["function2_copy1"]
+
+
+
+#--- merged_callsites.dSYM.yaml
+--- !mach-o
+FileHeader:
+ magic: 0xFEEDFACF
+ cputype: 0x100000C
+ cpusubtype: 0x0
+ filetype: 0xA
+ ncmds: 8
+ sizeofcmds: 1472
+ flags: 0x0
+ reserved: 0x0
+LoadCommands:
+ - cmd: LC_UUID
+ cmdsize: 24
+ uuid: 4C4C442F-5555-3144-A1E4-99C5508F990D
+ - cmd: LC_BUILD_VERSION
+ cmdsize: 24
+ platform: 1
+ minos: 720896
+ sdk: 720896
+ ntools: 0
+ - cmd: LC_SYMTAB
+ cmdsize: 24
+ symoff: 4096
+ nsyms: 10
+ stroff: 4256
+ strsize: 156
+ - cmd: LC_SEGMENT_64
+ cmdsize: 72
+ segname: __PAGEZERO
+ vmaddr: 0
+ vmsize: 4294967296
+ fileoff: 0
+ filesize: 0
+ maxprot: 0
+ initprot: 0
+ nsects: 0
+ flags: 0
+ - cmd: LC_SEGMENT_64
+ cmdsize: 152
+ segname: __TEXT
+ vmaddr: 4294967296
+ vmsize: 16384
+ fileoff: 0
+ filesize: 0
+ maxprot: 5
+ initprot: 5
+ nsects: 1
+ flags: 0
+ Sections:
+ - sectname: __text
+ segname: __TEXT
+ addr: 0x100000338
+ size: 208
+ offset: 0x0
+ align: 2
+ reloff: 0x0
+ nreloc: 0
+ flags: 0x80000400
+ reserved1: 0x0
+ reserved2: 0x0
+ reserved3: 0x0
+ content: CFFAEDFE0C000001000000000A00000008000000C005000000000000000000001B000000180000004C4C442F55553144A1E499C5508F990D32000000180000000100000000000B0000000B00000000000200000018000000001000000A000000A01000009C00000019000000480000005F5F504147455A45524F00000000000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000000000000019000000980000005F5F54455854000000000000000000000000000001000000
+ - cmd: LC_SEGMENT_64
+ cmdsize: 152
+ segname: __DATA
+ vmaddr: 4294983680
+ vmsize: 16384
+ fileoff: 0
+ filesize: 0
+ maxprot: 3
+ initprot: 3
+ nsects: 1
+ flags: 0
+ Sections:
+ - sectname: __common
+ segname: __DATA
+ addr: 0x100004000
+ size: 4
+ offset: 0x0
+ align: 2
+ reloff: 0x0
+ nreloc: 0
+ flags: 0x1
+ reserved1: 0x0
+ reserved2: 0x0
+ reserved3: 0x0
+ - cmd: LC_SEGMENT_64
+ cmdsize: 72
+ segname: __LINKEDIT
+ vmaddr: 4295000064
+ vmsize: 4096
+ fileoff: 4096
+ filesize: 316
+ maxprot: 1
+ initprot: 1
+ nsects: 0
+ flags: 0
+ - cmd: LC_SEGMENT_64
+ cmdsize: 952
+ segname: __DWARF
+ vmaddr: 4295004160
+ vmsize: 4096
+ fileoff: 8192
+ filesize: 3640
+ maxprot: 7
+ initprot: 3
+ nsects: 11
+ flags: 0
+ Sections:
+ - sectname: __debug_line
+ segname: __DWARF
+ addr: 0x100009000
+ size: 327
+ offset: 0x2000
+ align: 0
+ reloff: 0x0
+ nreloc: 0
+ flags: 0x0
+ reserved1: 0x0
+ reserved2: 0x0
+ reserved3: 0x0
+ - sectname: __debug_aranges
+ segname: __DWARF
+ addr: 0x100009147
+ size: 48
+ offset: 0x2147
+ align: 0
+ reloff: 0x0
+ nreloc: 0
+ flags: 0x0
+ reserved1: 0x0
+ reserved2: 0x0
+ reserved3: 0x0
+ - sectname: __debug_loc
+ segname: __DWARF
+ addr: 0x100009177
+ size: 1026
+ offset: 0x2177
+ align: 0
+ reloff: 0x0
+ nreloc: 0
+ flags: 0x0
+ reserved1: 0x0
+ reserved2: 0x0
+ reserved3: 0x0
+ content
+ - sectname: __debug_info
+ segname: __DWARF
+ addr: 0x100009579
+ size: 923
+ offset: 0x2579
+ align: 0
+ reloff: 0x0
+ nreloc: 0
+ flags: 0x0
+ reserved1: 0x0
+ reserved2: 0x0
+ reserved3: 0x0
+ - sectname: __debug_frame
+ segname: __DWARF
+ addr: 0x100009914
+ size: 272
+ offset: 0x2914
+ align: 0
+ reloff: 0x0
+ nreloc: 0
+ flags: 0x0
+ reserved1: 0x0
+ reserved2: 0x0
+ reserved3: 0x0
+ content
+ - sectname: __debug_abbrev
+ segname: __DWARF
+ addr: 0x100009A24
+ size: 260
+ offset: 0x2A24
+ align: 0
+ reloff: 0x0
+ nreloc: 0
+ flags: 0x0
+ reserved1: 0x0
+ reserved2: 0x0
+ reserved3: 0x0
+ - sectname: __debug_str
+ segname: __DWARF
+ addr: 0x100009B28
+ size: 317
+ offset: 0x2B28
+ align: 0
+ reloff: 0x0
+ nreloc: 0
+ flags: 0x0
+ reserved1: 0x0
+ reserved2: 0x0
+ reserved3: 0x0
+ - sectname: __apple_namespac
+ segname: __DWARF
+ addr: 0x100009C65
+ size: 36
+ offset: 0x2C65
+ align: 0
+ reloff: 0x0
+ nreloc: 0
+ flags: 0x0
+ reserved1: 0x0
+ reserved2: 0x0
+ reserved3: 0x0
+ content: 485341480100000001000000000000000C000000000000000100000001000600FFFFFFFF
+ - sectname: __apple_names
+ segname: __DWARF
+ addr: 0x100009C89
+ size: 316
+ offset: 0x2C89
+ align: 0
+ reloff: 0x0
+ nreloc: 0
+ flags: 0x0
+ reserved1: 0x0
+ reserved2: 0x0
+ reserved3: 0x0
+ content
+ - sectname: __apple_types
+ segname: __DWARF
+ addr: 0x100009DC5
+ size: 79
+ offset: 0x2DC5
+ align: 0
+ reloff: 0x0
+ nreloc: 0
+ flags: 0x0
+ reserved1: 0x0
+ reserved2: 0x0
+ reserved3: 0x0
+ ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/119759
More information about the llvm-commits
mailing list