[llvm] [gSYM] Add support merged functions in gSYM format (PR #101604)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 2 19:47:28 PDT 2024
https://github.com/alx32 updated https://github.com/llvm/llvm-project/pull/101604
>From 652bc99ccdec70f3996e2d383d444f6ebb250a5b Mon Sep 17 00:00:00 2001
From: Alex B <alexborcan at meta.com>
Date: Mon, 29 Jul 2024 17:07:04 -0700
Subject: [PATCH 1/2] [gSYM] Add support merged functions in gSYM format
This patch introduces support for storing debug info for merged functions in the GSYM debug info. It allows GSYM to represent multiple functions that share the same address range, which occur when multiple functions are merged during linker ICF.
The core of this functionality is the new `MergedFunctionsInfo` class, which is integrated into the existing `FunctionInfo` structure. During GSYM creation, functions with identical address ranges are now grouped together, with one function serving as the "master" and the others becoming "merged" functions. This organization is preserved in the GSYM format and can be read back and displayed when dumping GSYM information.
Old readers will only see the master function, and ther "merged" functions will not be processed.
Note: This patch just adds the functionality to the gSYM format - additional changes to the gsym format and algorithmic changes to logic existing tooling are needed to take advantage of this data.
---
.../llvm/DebugInfo/GSYM/FunctionInfo.h | 11 +-
.../include/llvm/DebugInfo/GSYM/GsymCreator.h | 9 +
llvm/include/llvm/DebugInfo/GSYM/GsymReader.h | 20 +-
.../llvm/DebugInfo/GSYM/MergedFunctionsInfo.h | 61 ++
llvm/lib/DebugInfo/GSYM/CMakeLists.txt | 1 +
llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp | 39 +-
llvm/lib/DebugInfo/GSYM/GsymCreator.cpp | 38 +
llvm/lib/DebugInfo/GSYM/GsymReader.cpp | 24 +-
.../DebugInfo/GSYM/MergedFunctionsInfo.cpp | 57 ++
.../ARM_AArch64/macho-merged-funcs-dwarf.yaml | 740 ++++++++++++++++++
llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp | 5 +
11 files changed, 993 insertions(+), 12 deletions(-)
create mode 100644 llvm/include/llvm/DebugInfo/GSYM/MergedFunctionsInfo.h
create mode 100644 llvm/lib/DebugInfo/GSYM/MergedFunctionsInfo.cpp
create mode 100644 llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-merged-funcs-dwarf.yaml
diff --git a/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h b/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h
index 12788b0d4feea..71209b6b5c9cd 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h
@@ -14,6 +14,7 @@
#include "llvm/DebugInfo/GSYM/InlineInfo.h"
#include "llvm/DebugInfo/GSYM/LineTable.h"
#include "llvm/DebugInfo/GSYM/LookupResult.h"
+#include "llvm/DebugInfo/GSYM/MergedFunctionsInfo.h"
#include "llvm/DebugInfo/GSYM/StringTable.h"
#include <cstdint>
@@ -90,6 +91,7 @@ struct FunctionInfo {
uint32_t Name; ///< String table offset in the string table.
std::optional<LineTable> OptLineTable;
std::optional<InlineInfo> Inline;
+ std::optional<MergedFunctionsInfo> MergedFunctions;
/// If we encode a FunctionInfo during segmenting so we know its size, we can
/// cache that encoding here so we don't need to re-encode it when saving the
/// GSYM file.
@@ -140,9 +142,16 @@ struct FunctionInfo {
/// \param O The binary stream to write the data to at the current file
/// position.
///
+ /// \param NoPadding Directly write the FunctionInfo data, without any padding
+ /// By default, FunctionInfo will be 4-byte aligned by padding with
+ /// 0's at the start. This is OK since the function will return the offset of
+ /// actual data in the stream. However when writing FunctionInfo's as a
+ /// stream, the padding will break the decoding of the data - since the offset
+ /// where the FunctionInfo starts is not kept in this scenario.
+ ///
/// \returns An error object that indicates failure or the offset of the
/// function info that was successfully written into the stream.
- llvm::Expected<uint64_t> encode(FileWriter &O) const;
+ llvm::Expected<uint64_t> encode(FileWriter &O, bool NoPadding = false) const;
/// Encode this function info into the internal byte cache and return the size
/// in bytes.
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h b/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h
index 855a275725c4a..48808fb7b71e1 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h
@@ -352,6 +352,15 @@ class GsymCreator {
/// \param FI The function info object to emplace into our functions list.
void addFunctionInfo(FunctionInfo &&FI);
+ /// Organize merged FunctionInfo's
+ ///
+ /// This method processes the list of function infos (Funcs) to identify and
+ /// group functions with overlapping address ranges.
+ ///
+ /// \param Out Output stream to report information about how merged
+ /// FunctionInfo's were handeled.
+ void prepareMergedFunctions(OutputAggregator &Out);
+
/// Finalize the data in the GSYM creator prior to saving the data out.
///
/// Finalize must be called after all FunctionInfo objects have been added
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
index c1bdc68d808c7..89f8c043b9151 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
@@ -166,7 +166,20 @@ class GsymReader {
/// \param OS The output stream to dump to.
///
/// \param FI The object to dump.
- void dump(raw_ostream &OS, const FunctionInfo &FI);
+ ///
+ /// \param Indent The indentation as number of spaces. Used when dumping as an
+ /// item within MergedFunctionsInfo.
+ void dump(raw_ostream &OS, const FunctionInfo &FI, uint32_t Indent = 0);
+
+ /// Dump a MergedFunctionsInfo object.
+ ///
+ /// This function will dump a MergedFunctionsInfo object - basically by
+ /// dumping the contained FunctionInfo objects with indentation.
+ ///
+ /// \param OS The output stream to dump to.
+ ///
+ /// \param MFI The object to dump.
+ void dump(raw_ostream &OS, const MergedFunctionsInfo &MFI);
/// Dump a LineTable object.
///
@@ -177,7 +190,10 @@ class GsymReader {
/// \param OS The output stream to dump to.
///
/// \param LT The object to dump.
- void dump(raw_ostream &OS, const LineTable <);
+ ///
+ /// \param Indent The indentation as number of spaces. Used when dumping as an
+ /// item from within MergedFunctionsInfo.
+ void dump(raw_ostream &OS, const LineTable <, uint32_t Indent = 0);
/// Dump a InlineInfo object.
///
diff --git a/llvm/include/llvm/DebugInfo/GSYM/MergedFunctionsInfo.h b/llvm/include/llvm/DebugInfo/GSYM/MergedFunctionsInfo.h
new file mode 100644
index 0000000000000..1cb5e0a9e557a
--- /dev/null
+++ b/llvm/include/llvm/DebugInfo/GSYM/MergedFunctionsInfo.h
@@ -0,0 +1,61 @@
+//===- MergedFunctionsInfo.h ------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_GSYM_MERGEDFUNCTIONSINFO_H
+#define LLVM_DEBUGINFO_GSYM_MERGEDFUNCTIONSINFO_H
+
+#include "llvm/DebugInfo/GSYM/ExtractRanges.h"
+#include "llvm/Support/Error.h"
+#include <stdint.h>
+#include <vector>
+
+namespace llvm {
+class raw_ostream;
+
+namespace gsym {
+
+class GsymReader;
+struct FunctionInfo;
+struct MergedFunctionsInfo {
+ std::vector<FunctionInfo> MergedFunctions;
+
+ void clear() { MergedFunctions.clear(); }
+
+ /// Query if a MergedFunctionsInfo object is valid.
+ ///
+ /// \returns A boolean indicating if this FunctionInfo is valid.
+ bool isValid() { return true; }
+
+ /// Decode an MergedFunctionsInfo object from a binary data stream.
+ ///
+ /// \param Data The binary stream to read the data from. This object must have
+ /// the data for the MergedFunctionsInfo object starting at offset zero. The
+ /// data can contain more data than needed.
+ ///
+ /// \param BaseAddr The base address to use when encoding all address ranges.
+ ///
+ /// \returns An MergedFunctionsInfo or an error describing the issue that was
+ /// encountered during decoding.
+ static llvm::Expected<MergedFunctionsInfo> decode(DataExtractor &Data,
+ uint64_t BaseAddr);
+
+ /// Encode this MergedFunctionsInfo object into FileWriter stream.
+ ///
+ /// \param O The binary stream to write the data to at the current file
+ /// position.
+ /// \returns An error object that indicates success or failure for the
+ /// encoding process.
+ llvm::Error encode(FileWriter &O) const;
+};
+
+bool operator==(const MergedFunctionsInfo &LHS, const MergedFunctionsInfo &RHS);
+
+} // namespace gsym
+} // namespace llvm
+
+#endif // LLVM_DEBUGINFO_GSYM_MERGEDFUNCTIONSINFO_H
diff --git a/llvm/lib/DebugInfo/GSYM/CMakeLists.txt b/llvm/lib/DebugInfo/GSYM/CMakeLists.txt
index 5447030e903da..be90bfdaa7fd2 100644
--- a/llvm/lib/DebugInfo/GSYM/CMakeLists.txt
+++ b/llvm/lib/DebugInfo/GSYM/CMakeLists.txt
@@ -8,6 +8,7 @@ add_llvm_component_library(LLVMDebugInfoGSYM
InlineInfo.cpp
LineTable.cpp
LookupResult.cpp
+ MergedFunctionsInfo.cpp
ObjectFileTransformer.cpp
ExtractRanges.cpp
diff --git a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
index 07303d551af50..2cd85ef2398f9 100644
--- a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
+++ b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
@@ -22,7 +22,8 @@ using namespace gsym;
enum InfoType : uint32_t {
EndOfList = 0u,
LineTableInfo = 1u,
- InlineInfo = 2u
+ InlineInfo = 2u,
+ MergedFunctionsInfo = 3u,
};
raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const FunctionInfo &FI) {
@@ -86,6 +87,14 @@ llvm::Expected<FunctionInfo> FunctionInfo::decode(DataExtractor &Data,
return II.takeError();
break;
+ case InfoType::MergedFunctionsInfo:
+ if (Expected<MergedFunctionsInfo> MI =
+ MergedFunctionsInfo::decode(InfoData, BaseAddr))
+ FI.MergedFunctions = std::move(MI.get());
+ else
+ return MI.takeError();
+ break;
+
default:
return createStringError(std::errc::io_error,
"0x%8.8" PRIx64 ": unsupported InfoType %u",
@@ -111,12 +120,14 @@ uint64_t FunctionInfo::cacheEncoding() {
return EncodingCache.size();
}
-llvm::Expected<uint64_t> FunctionInfo::encode(FileWriter &Out) const {
+llvm::Expected<uint64_t> FunctionInfo::encode(FileWriter &Out,
+ bool NoPadding) const {
if (!isValid())
return createStringError(std::errc::invalid_argument,
"attempted to encode invalid FunctionInfo object");
- // Align FunctionInfo data to a 4 byte alignment.
- Out.alignTo(4);
+ // Align FunctionInfo data to a 4 byte alignment, if padding is allowed
+ if (NoPadding == false)
+ Out.alignTo(4);
const uint64_t FuncInfoOffset = Out.tell();
// Check if we have already encoded this function info into EncodingCache.
// This will be non empty when creating segmented GSYM files as we need to
@@ -170,13 +181,31 @@ llvm::Expected<uint64_t> FunctionInfo::encode(FileWriter &Out) const {
Out.fixup32(static_cast<uint32_t>(Length), StartOffset - 4);
}
+ // Write out the merged functions info if we have any and if it is valid.
+ if (MergedFunctions) {
+ Out.writeU32(InfoType::MergedFunctionsInfo);
+ // Write a uint32_t length as zero for now, we will fix this up after
+ // writing the LineTable out with the number of bytes that were written.
+ Out.writeU32(0);
+ const auto StartOffset = Out.tell();
+ llvm::Error err = MergedFunctions->encode(Out);
+ if (err)
+ return std::move(err);
+ const auto Length = Out.tell() - StartOffset;
+ if (Length > UINT32_MAX)
+ return createStringError(
+ std::errc::invalid_argument,
+ "MergedFunctionsInfo length is greater than UINT32_MAX");
+ // Fixup the size of the MergedFunctionsInfo data with the correct size.
+ Out.fixup32(static_cast<uint32_t>(Length), StartOffset - 4);
+ }
+
// Terminate the data chunks with and end of list with zero size
Out.writeU32(InfoType::EndOfList);
Out.writeU32(0);
return FuncInfoOffset;
}
-
llvm::Expected<LookupResult> FunctionInfo::lookup(DataExtractor &Data,
const GsymReader &GR,
uint64_t FuncAddr,
diff --git a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp
index deedaeea2fe23..06c9846408395 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp
@@ -189,6 +189,44 @@ llvm::Error GsymCreator::encode(FileWriter &O) const {
return ErrorSuccess();
}
+void GsymCreator::prepareMergedFunctions(OutputAggregator &Out) {
+ // Nothing to do if we have less than 2 functions.
+ if (Funcs.size() < 2)
+ return;
+
+ // Sort the function infos by address range first
+ llvm::sort(Funcs);
+ std::vector<FunctionInfo> TopLevelFuncs;
+
+ // Add the first function info to the top level functions
+ TopLevelFuncs.emplace_back(std::move(Funcs.front()));
+
+ // Now if the next function info has the same address range as the top level,
+ // then merge it into the top level function, otherwise add it to the top
+ // level.
+ for (size_t Idx = 1; Idx < Funcs.size(); ++Idx) {
+ FunctionInfo &TopFunc = TopLevelFuncs.back();
+ FunctionInfo &MatchFunc = Funcs[Idx];
+ if (TopFunc.Range == MatchFunc.Range) {
+ // Both have the same range - add the 2nd func as a child of the 1st func
+ if (!TopFunc.MergedFunctions)
+ TopFunc.MergedFunctions = MergedFunctionsInfo();
+ TopFunc.MergedFunctions->MergedFunctions.emplace_back(
+ std::move(MatchFunc));
+ } else
+ // No match, add the function as a top-level function
+ TopLevelFuncs.emplace_back(std::move(MatchFunc));
+ }
+
+ uint32_t mergedCount = Funcs.size() - TopLevelFuncs.size();
+ // If any functions were merged, print a message about it.
+ if (mergedCount != 0)
+ Out << "Have " << mergedCount
+ << " merged functions as children of other functions\n";
+
+ std::swap(Funcs, TopLevelFuncs);
+}
+
llvm::Error GsymCreator::finalize(OutputAggregator &Out) {
std::lock_guard<std::mutex> Guard(Mutex);
if (Finalized)
diff --git a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
index 4b1b352466175..ddfc92e1a8a40 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
@@ -398,17 +398,33 @@ void GsymReader::dump(raw_ostream &OS) {
}
}
-void GsymReader::dump(raw_ostream &OS, const FunctionInfo &FI) {
+void GsymReader::dump(raw_ostream &OS, const FunctionInfo &FI,
+ uint32_t Indent) {
+ OS.indent(Indent);
OS << FI.Range << " \"" << getString(FI.Name) << "\"\n";
if (FI.OptLineTable)
- dump(OS, *FI.OptLineTable);
+ dump(OS, *FI.OptLineTable, Indent);
if (FI.Inline)
- dump(OS, *FI.Inline);
+ dump(OS, *FI.Inline, Indent);
+
+ if (FI.MergedFunctions) {
+ assert(Indent == 0 && "MergedFunctionsInfo should only exist at top level");
+ dump(OS, *FI.MergedFunctions);
+ }
+}
+
+void GsymReader::dump(raw_ostream &OS, const MergedFunctionsInfo &MFI) {
+ for (uint32_t inx = 0; inx < MFI.MergedFunctions.size(); inx++) {
+ OS << "++ Merged FunctionInfos[" << inx << "]:\n";
+ dump(OS, MFI.MergedFunctions[inx], 4);
+ }
}
-void GsymReader::dump(raw_ostream &OS, const LineTable <) {
+void GsymReader::dump(raw_ostream &OS, const LineTable <, uint32_t Indent) {
+ OS.indent(Indent);
OS << "LineTable:\n";
for (auto &LE: LT) {
+ OS.indent(Indent);
OS << " " << HEX64(LE.Addr) << ' ';
if (LE.File)
dump(OS, getFile(LE.File));
diff --git a/llvm/lib/DebugInfo/GSYM/MergedFunctionsInfo.cpp b/llvm/lib/DebugInfo/GSYM/MergedFunctionsInfo.cpp
new file mode 100644
index 0000000000000..41e74840510b8
--- /dev/null
+++ b/llvm/lib/DebugInfo/GSYM/MergedFunctionsInfo.cpp
@@ -0,0 +1,57 @@
+//===- LineTable.cpp --------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/GSYM/MergedFunctionsInfo.h"
+#include "llvm/DebugInfo/GSYM/FileWriter.h"
+#include "llvm/DebugInfo/GSYM/FunctionInfo.h"
+#include "llvm/Support/DataExtractor.h"
+
+using namespace llvm;
+using namespace gsym;
+
+llvm::Error MergedFunctionsInfo::encode(FileWriter &Out) const {
+ Out.writeU32(MergedFunctions.size());
+ for (const auto &F : MergedFunctions) {
+ Out.writeU32(0);
+ const auto StartOffset = Out.tell();
+ // Encode the FunctionInfo with no padding so later we can just read them
+ // one after the other without knowing the offset in the stream for each.
+ llvm::Expected<uint64_t> result = F.encode(Out, /*NoPadding =*/true);
+ if (!result)
+ return result.takeError();
+ const auto Length = Out.tell() - StartOffset;
+ Out.fixup32(static_cast<uint32_t>(Length), StartOffset - 4);
+ }
+ return Error::success();
+}
+
+llvm::Expected<MergedFunctionsInfo>
+MergedFunctionsInfo::decode(DataExtractor &Data, uint64_t BaseAddr) {
+ MergedFunctionsInfo MFI;
+ uint64_t Offset = 0;
+ uint32_t Count = Data.getU32(&Offset);
+
+ for (uint32_t i = 0; i < Count; ++i) {
+ uint32_t FnSize = Data.getU32(&Offset);
+ DataExtractor FnData(Data.getData().substr(Offset, FnSize),
+ Data.isLittleEndian(), Data.getAddressSize());
+ llvm::Expected<FunctionInfo> FI =
+ FunctionInfo::decode(FnData, BaseAddr + Offset);
+ if (!FI)
+ return FI.takeError();
+ MFI.MergedFunctions.push_back(std::move(*FI));
+ Offset += FnSize;
+ }
+
+ return MFI;
+}
+
+bool operator==(const MergedFunctionsInfo &LHS,
+ const MergedFunctionsInfo &RHS) {
+ return LHS.MergedFunctions == RHS.MergedFunctions;
+}
diff --git a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-merged-funcs-dwarf.yaml b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-merged-funcs-dwarf.yaml
new file mode 100644
index 0000000000000..3be64524a3d5b
--- /dev/null
+++ b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-merged-funcs-dwarf.yaml
@@ -0,0 +1,740 @@
+# RUN: yaml2obj %s -o %t.dSYM
+# RUN: llvm-gsymutil --convert %t.dSYM --out-file=%t.gSYM
+# RUN: llvm-gsymutil --verify --verbose %t.gSYM | FileCheck --check-prefix=CHECK-GSYM %s
+
+## Note: For identical functions, the dSYM / gSYM cannot be counted on to be deterministic.
+## So we can only match the general structure, not exact function names / offsets
+
+
+# CHECK-GSYM: Address Table:
+# CHECK-GSYM-NEXT: INDEX OFFSET16 (ADDRESS)
+# CHECK-GSYM-NEXT: ====== ===============================
+# CHECK-GSYM-NEXT: [ 0] 0x{{[0-9a-fA-F]+}}
+
+# CHECK-GSYM: Address Info Offsets:
+# CHECK-GSYM-NEXT: INDEX Offset
+# CHECK-GSYM-NEXT: ====== ==========
+# CHECK-GSYM-NEXT: [ 0] 0x{{[0-9a-fA-F]+}}
+
+# CHECK-GSYM: Files:
+# CHECK-GSYM-NEXT: INDEX DIRECTORY BASENAME PATH
+# CHECK-GSYM-NEXT: ====== ========== ========== ==============================
+# CHECK-GSYM-NEXT: [ 0] 0x{{[0-9a-fA-F]+}} 0x{{[0-9a-fA-F]+}}
+# CHECK-GSYM-NEXT: [ 1] 0x{{[0-9a-fA-F]+}} 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp
+# CHECK-GSYM-NEXT: [ 2] 0x{{[0-9a-fA-F]+}} 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp
+# CHECK-GSYM-NEXT: [ 3] 0x{{[0-9a-fA-F]+}} 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp
+
+# CHECK-GSYM: FunctionInfo @ 0x{{[0-9a-fA-F]+}}: [0x{{[0-9a-fA-F]+}} - 0x{{[0-9a-fA-F]+}}) "my_func_0{{[1-3]}}"
+# CHECK-GSYM-NEXT: LineTable:
+# CHECK-GSYM-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:5
+# CHECK-GSYM-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:7
+# CHECK-GSYM-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:9
+# CHECK-GSYM-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:8
+# CHECK-GSYM-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:11
+# CHECK-GSYM-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:10
+# CHECK-GSYM-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:6
+# CHECK-GSYM-NEXT: ++ Merged FunctionInfos[0]:
+# CHECK-GSYM-NEXT: [0x{{[0-9a-fA-F]+}} - 0x{{[0-9a-fA-F]+}}) "my_func_0{{[1-3]}}"
+# CHECK-GSYM-NEXT: LineTable:
+# CHECK-GSYM-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:5
+# CHECK-GSYM-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:7
+# CHECK-GSYM-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:9
+# CHECK-GSYM-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:8
+# CHECK-GSYM-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:11
+# CHECK-GSYM-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:10
+# CHECK-GSYM-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:6
+# CHECK-GSYM-NEXT: ++ Merged FunctionInfos[1]:
+# CHECK-GSYM-NEXT: [0x{{[0-9a-fA-F]+}} - 0x{{[0-9a-fA-F]+}}) "my_func_0{{[1-3]}}"
+# CHECK-GSYM-NEXT: LineTable:
+# CHECK-GSYM-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:5
+# CHECK-GSYM-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:7
+# CHECK-GSYM-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:9
+# CHECK-GSYM-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:8
+# CHECK-GSYM-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:11
+# CHECK-GSYM-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:10
+# CHECK-GSYM-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:6
+
+
+
+--- !mach-o
+FileHeader:
+ magic: 0xFEEDFACF
+ cputype: 0x100000C
+ cpusubtype: 0x0
+ filetype: 0xA
+ ncmds: 6
+ sizeofcmds: 1168
+ flags: 0x0
+ reserved: 0x0
+LoadCommands:
+ - cmd: LC_UUID
+ cmdsize: 24
+ uuid: 4C4C44EA-5555-3144-A15B-1DD428F7F3A2
+ - cmd: LC_BUILD_VERSION
+ cmdsize: 24
+ platform: 1
+ minos: 720896
+ sdk: 720896
+ ntools: 0
+ - cmd: LC_SYMTAB
+ cmdsize: 24
+ symoff: 4096
+ nsyms: 3
+ stroff: 4144
+ strsize: 38
+ - cmd: LC_SEGMENT_64
+ cmdsize: 152
+ segname: __TEXT
+ vmaddr: 0
+ vmsize: 16384
+ fileoff: 0
+ filesize: 0
+ maxprot: 5
+ initprot: 5
+ nsects: 1
+ flags: 0
+ Sections:
+ - sectname: __text
+ segname: __TEXT
+ addr: 0x248
+ size: 40
+ offset: 0x0
+ align: 2
+ reloff: 0x0
+ nreloc: 0
+ flags: 0x80000400
+ reserved1: 0x0
+ reserved2: 0x0
+ reserved3: 0x0
+ content: CFFAEDFE0C000001000000000A000000060000009004000000000000000000001B00000018000000
+ - cmd: LC_SEGMENT_64
+ cmdsize: 72
+ segname: __LINKEDIT
+ vmaddr: 16384
+ vmsize: 4096
+ fileoff: 4096
+ filesize: 86
+ maxprot: 1
+ initprot: 1
+ nsects: 0
+ flags: 0
+ - cmd: LC_SEGMENT_64
+ cmdsize: 872
+ segname: __DWARF
+ vmaddr: 20480
+ vmsize: 4096
+ fileoff: 8192
+ filesize: 1405
+ maxprot: 7
+ initprot: 3
+ nsects: 10
+ flags: 0
+ Sections:
+ - sectname: __debug_line
+ segname: __DWARF
+ addr: 0x5000
+ size: 234
+ offset: 0x2000
+ align: 0
+ reloff: 0x0
+ nreloc: 0
+ flags: 0x0
+ reserved1: 0x0
+ reserved2: 0x0
+ reserved3: 0x0
+ - sectname: __debug_aranges
+ segname: __DWARF
+ addr: 0x50EA
+ size: 144
+ offset: 0x20EA
+ align: 0
+ reloff: 0x0
+ nreloc: 0
+ flags: 0x0
+ reserved1: 0x0
+ reserved2: 0x0
+ reserved3: 0x0
+ - sectname: __debug_info
+ segname: __DWARF
+ addr: 0x517A
+ size: 324
+ offset: 0x217A
+ align: 0
+ reloff: 0x0
+ nreloc: 0
+ flags: 0x0
+ reserved1: 0x0
+ reserved2: 0x0
+ reserved3: 0x0
+ - sectname: __debug_frame
+ segname: __DWARF
+ addr: 0x52BE
+ size: 120
+ offset: 0x22BE
+ align: 0
+ reloff: 0x0
+ nreloc: 0
+ flags: 0x0
+ reserved1: 0x0
+ reserved2: 0x0
+ reserved3: 0x0
+ content: 14000000FFFFFFFF0400080001781E0C1F000000000000001C0000000000000048020000000000002800000000000000440E1000000000001C0000000000000048020000000000002800000000000000440E1000000000001C0000000000000048020000000000002800000000000000440E100000000000
+ - sectname: __debug_abbrev
+ segname: __DWARF
+ addr: 0x5336
+ size: 73
+ offset: 0x2336
+ align: 0
+ reloff: 0x0
+ nreloc: 0
+ flags: 0x0
+ reserved1: 0x0
+ reserved2: 0x0
+ reserved3: 0x0
+ - sectname: __debug_str
+ segname: __DWARF
+ addr: 0x537F
+ size: 221
+ offset: 0x237F
+ align: 0
+ reloff: 0x0
+ nreloc: 0
+ flags: 0x0
+ reserved1: 0x0
+ reserved2: 0x0
+ reserved3: 0x0
+ - sectname: __apple_namespac
+ segname: __DWARF
+ addr: 0x545C
+ size: 36
+ offset: 0x245C
+ align: 0
+ reloff: 0x0
+ nreloc: 0
+ flags: 0x0
+ reserved1: 0x0
+ reserved2: 0x0
+ reserved3: 0x0
+ content: 485341480100000001000000000000000C000000000000000100000001000600FFFFFFFF
+ - sectname: __apple_names
+ segname: __DWARF
+ addr: 0x5480
+ size: 116
+ offset: 0x2480
+ align: 0
+ reloff: 0x0
+ nreloc: 0
+ flags: 0x0
+ reserved1: 0x0
+ reserved2: 0x0
+ reserved3: 0x0
+ content: 485341480100000003000000030000000C00000000000000010000000100060000000000010000000200000038450B6E36450B6E37450B6E440000005400000064000000D200000001000000060100000000000094000000010000002E00000000000000B7000000010000009A00000000000000
+ - sectname: __apple_types
+ segname: __DWARF
+ addr: 0x54F4
+ size: 101
+ offset: 0x24F4
+ align: 0
+ reloff: 0x0
+ nreloc: 0
+ flags: 0x0
+ reserved1: 0x0
+ reserved2: 0x0
+ reserved3: 0x0
+ content: 48534148010000000100000001000000180000000000000004000000010006000300050005000B0006000600000000003080880B38000000A30000000300000064000000240000A4283A0CD0000000240000A4283A0C3C010000240000A4283A0C00000000
+ - sectname: __apple_objc
+ segname: __DWARF
+ addr: 0x5559
+ size: 36
+ offset: 0x2559
+ align: 0
+ reloff: 0x0
+ nreloc: 0
+ flags: 0x0
+ reserved1: 0x0
+ reserved2: 0x0
+ reserved3: 0x0
+ content: 485341480100000001000000000000000C000000000000000100000001000600FFFFFFFF
+LinkEditData:
+ NameList:
+ - n_strx: 2
+ n_type: 0xF
+ n_sect: 1
+ n_desc: 0
+ n_value: 584
+ - n_strx: 14
+ n_type: 0xF
+ n_sect: 1
+ n_desc: 0
+ n_value: 584
+ - n_strx: 26
+ n_type: 0xF
+ n_sect: 1
+ n_desc: 0
+ n_value: 584
+ StringTable:
+ - ''
+ - ''
+ - _my_func_01
+ - _my_func_02
+ - _my_func_03
+DWARF:
+ debug_str:
+ - ''
+ - 'clang version 20.0.0git (https://github.com/alx32/llvm-project.git cfb92be0a9cdd0f4595100c5add4e2795a44134e)'
+ - 'out/file_01.cpp'
+ - '/'
+ - '/tmp/test_gsym_yaml'
+ - my_func_01
+ - a
+ - b
+ - int
+ - 'out/file_02.cpp'
+ - my_func_02
+ - 'out/file_03.cpp'
+ - my_func_03
+ debug_abbrev:
+ - ID: 0
+ Table:
+ - Code: 0x1
+ Tag: DW_TAG_compile_unit
+ Children: DW_CHILDREN_yes
+ Attributes:
+ - Attribute: DW_AT_producer
+ Form: DW_FORM_strp
+ - Attribute: DW_AT_language
+ Form: DW_FORM_data2
+ - Attribute: DW_AT_name
+ Form: DW_FORM_strp
+ - Attribute: DW_AT_LLVM_sysroot
+ Form: DW_FORM_strp
+ - Attribute: DW_AT_stmt_list
+ Form: DW_FORM_sec_offset
+ - Attribute: DW_AT_comp_dir
+ Form: DW_FORM_strp
+ - Attribute: DW_AT_low_pc
+ Form: DW_FORM_addr
+ - Attribute: DW_AT_high_pc
+ Form: DW_FORM_data4
+ - Code: 0x2
+ Tag: DW_TAG_subprogram
+ Children: DW_CHILDREN_yes
+ Attributes:
+ - Attribute: DW_AT_low_pc
+ Form: DW_FORM_addr
+ - Attribute: DW_AT_high_pc
+ Form: DW_FORM_data4
+ - Attribute: DW_AT_APPLE_omit_frame_ptr
+ Form: DW_FORM_flag_present
+ - Attribute: DW_AT_frame_base
+ Form: DW_FORM_exprloc
+ - Attribute: DW_AT_name
+ Form: DW_FORM_strp
+ - Attribute: DW_AT_decl_file
+ Form: DW_FORM_data1
+ - Attribute: DW_AT_decl_line
+ Form: DW_FORM_data1
+ - Attribute: DW_AT_type
+ Form: DW_FORM_ref_addr
+ - Attribute: DW_AT_external
+ Form: DW_FORM_flag_present
+ - Code: 0x3
+ Tag: DW_TAG_formal_parameter
+ Children: DW_CHILDREN_no
+ Attributes:
+ - Attribute: DW_AT_location
+ Form: DW_FORM_exprloc
+ - Attribute: DW_AT_name
+ Form: DW_FORM_strp
+ - Attribute: DW_AT_decl_file
+ Form: DW_FORM_data1
+ - Attribute: DW_AT_decl_line
+ Form: DW_FORM_data1
+ - Attribute: DW_AT_type
+ Form: DW_FORM_ref_addr
+ - Code: 0x4
+ Tag: DW_TAG_base_type
+ Children: DW_CHILDREN_no
+ Attributes:
+ - Attribute: DW_AT_name
+ Form: DW_FORM_strp
+ - Attribute: DW_AT_encoding
+ Form: DW_FORM_data1
+ - Attribute: DW_AT_byte_size
+ Form: DW_FORM_data1
+ debug_aranges:
+ - Length: 0x2C
+ Version: 2
+ CuOffset: 0x0
+ AddressSize: 0x8
+ Descriptors:
+ - Address: 0x248
+ Length: 0x28
+ - Length: 0x2C
+ Version: 2
+ CuOffset: 0x6C
+ AddressSize: 0x8
+ Descriptors:
+ - Address: 0x248
+ Length: 0x28
+ - Length: 0x2C
+ Version: 2
+ CuOffset: 0xD8
+ AddressSize: 0x8
+ Descriptors:
+ - Address: 0x248
+ Length: 0x28
+ debug_info:
+ - Length: 0x68
+ Version: 4
+ AbbrevTableID: 0
+ AbbrOffset: 0x0
+ AddrSize: 8
+ Entries:
+ - AbbrCode: 0x1
+ Values:
+ - Value: 0x1
+ - Value: 0x21
+ - Value: 0x6E
+ - Value: 0x7E
+ - Value: 0x0
+ - Value: 0x80
+ - Value: 0x248
+ - Value: 0x28
+ - AbbrCode: 0x2
+ Values:
+ - Value: 0x248
+ - Value: 0x28
+ - Value: 0x1
+ - Value: 0x1
+ BlockData: [ 0x6F ]
+ - Value: 0x94
+ - Value: 0x1
+ - Value: 0x2
+ - Value: 0x64
+ - Value: 0x1
+ - AbbrCode: 0x3
+ Values:
+ - Value: 0x2
+ BlockData: [ 0x91, 0xC ]
+ - Value: 0x9F
+ - Value: 0x1
+ - Value: 0x3
+ - Value: 0x64
+ - AbbrCode: 0x3
+ Values:
+ - Value: 0x2
+ BlockData: [ 0x91, 0x8 ]
+ - Value: 0xA1
+ - Value: 0x1
+ - Value: 0x4
+ - Value: 0x64
+ - AbbrCode: 0x0
+ - AbbrCode: 0x4
+ Values:
+ - Value: 0xA3
+ - Value: 0x5
+ - Value: 0x4
+ - AbbrCode: 0x0
+ - Length: 0x68
+ Version: 4
+ AbbrevTableID: 0
+ AbbrOffset: 0x0
+ AddrSize: 8
+ Entries:
+ - AbbrCode: 0x1
+ Values:
+ - Value: 0x1
+ - Value: 0x21
+ - Value: 0xA7
+ - Value: 0x7E
+ - Value: 0x4E
+ - Value: 0x80
+ - Value: 0x248
+ - Value: 0x28
+ - AbbrCode: 0x2
+ Values:
+ - Value: 0x248
+ - Value: 0x28
+ - Value: 0x1
+ - Value: 0x1
+ BlockData: [ 0x6F ]
+ - Value: 0xB7
+ - Value: 0x1
+ - Value: 0x2
+ - Value: 0xD0
+ - Value: 0x1
+ - AbbrCode: 0x3
+ Values:
+ - Value: 0x2
+ BlockData: [ 0x91, 0xC ]
+ - Value: 0x9F
+ - Value: 0x1
+ - Value: 0x3
+ - Value: 0xD0
+ - AbbrCode: 0x3
+ Values:
+ - Value: 0x2
+ BlockData: [ 0x91, 0x8 ]
+ - Value: 0xA1
+ - Value: 0x1
+ - Value: 0x4
+ - Value: 0xD0
+ - AbbrCode: 0x0
+ - AbbrCode: 0x4
+ Values:
+ - Value: 0xA3
+ - Value: 0x5
+ - Value: 0x4
+ - AbbrCode: 0x0
+ - Length: 0x68
+ Version: 4
+ AbbrevTableID: 0
+ AbbrOffset: 0x0
+ AddrSize: 8
+ Entries:
+ - AbbrCode: 0x1
+ Values:
+ - Value: 0x1
+ - Value: 0x21
+ - Value: 0xC2
+ - Value: 0x7E
+ - Value: 0x9C
+ - Value: 0x80
+ - Value: 0x248
+ - Value: 0x28
+ - AbbrCode: 0x2
+ Values:
+ - Value: 0x248
+ - Value: 0x28
+ - Value: 0x1
+ - Value: 0x1
+ BlockData: [ 0x6F ]
+ - Value: 0xD2
+ - Value: 0x1
+ - Value: 0x2
+ - Value: 0x13C
+ - Value: 0x1
+ - AbbrCode: 0x3
+ Values:
+ - Value: 0x2
+ BlockData: [ 0x91, 0xC ]
+ - Value: 0x9F
+ - Value: 0x1
+ - Value: 0x3
+ - Value: 0x13C
+ - AbbrCode: 0x3
+ Values:
+ - Value: 0x2
+ BlockData: [ 0x91, 0x8 ]
+ - Value: 0xA1
+ - Value: 0x1
+ - Value: 0x4
+ - Value: 0x13C
+ - AbbrCode: 0x0
+ - AbbrCode: 0x4
+ Values:
+ - Value: 0xA3
+ - Value: 0x5
+ - Value: 0x4
+ - AbbrCode: 0x0
+ debug_line:
+ - Length: 74
+ Version: 4
+ PrologueLength: 39
+ MinInstLength: 1
+ MaxOpsPerInst: 1
+ DefaultIsStmt: 1
+ LineBase: 251
+ LineRange: 14
+ OpcodeBase: 13
+ StandardOpcodeLengths: [ 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1 ]
+ IncludeDirs:
+ - out
+ Files:
+ - Name: file_01.cpp
+ DirIdx: 1
+ ModTime: 0
+ Length: 0
+ Opcodes:
+ - Opcode: DW_LNS_extended_op
+ ExtLen: 9
+ SubOpcode: DW_LNE_set_address
+ Data: 584
+ - Opcode: 0x16
+ Data: 0
+ - Opcode: DW_LNS_set_column
+ Data: 7
+ - Opcode: DW_LNS_set_prologue_end
+ Data: 0
+ - Opcode: 0xBC
+ Data: 0
+ - Opcode: 0x4C
+ Data: 0
+ - Opcode: 0x49
+ Data: 0
+ - Opcode: 0x4D
+ Data: 0
+ - Opcode: 0x49
+ Data: 0
+ - Opcode: DW_LNS_set_column
+ Data: 5
+ - Opcode: DW_LNS_set_epilogue_begin
+ Data: 0
+ - Opcode: 0x46
+ Data: 0
+ - Opcode: DW_LNS_advance_pc
+ Data: 8
+ - Opcode: DW_LNS_extended_op
+ ExtLen: 1
+ SubOpcode: DW_LNE_end_sequence
+ Data: 0
+ - Length: 74
+ Version: 4
+ PrologueLength: 39
+ MinInstLength: 1
+ MaxOpsPerInst: 1
+ DefaultIsStmt: 1
+ LineBase: 251
+ LineRange: 14
+ OpcodeBase: 13
+ StandardOpcodeLengths: [ 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1 ]
+ IncludeDirs:
+ - out
+ Files:
+ - Name: file_02.cpp
+ DirIdx: 1
+ ModTime: 0
+ Length: 0
+ Opcodes:
+ - Opcode: DW_LNS_extended_op
+ ExtLen: 9
+ SubOpcode: DW_LNE_set_address
+ Data: 584
+ - Opcode: 0x16
+ Data: 0
+ - Opcode: DW_LNS_set_column
+ Data: 7
+ - Opcode: DW_LNS_set_prologue_end
+ Data: 0
+ - Opcode: 0xBC
+ Data: 0
+ - Opcode: 0x4C
+ Data: 0
+ - Opcode: 0x49
+ Data: 0
+ - Opcode: 0x4D
+ Data: 0
+ - Opcode: 0x49
+ Data: 0
+ - Opcode: DW_LNS_set_column
+ Data: 5
+ - Opcode: DW_LNS_set_epilogue_begin
+ Data: 0
+ - Opcode: 0x46
+ Data: 0
+ - Opcode: DW_LNS_advance_pc
+ Data: 8
+ - Opcode: DW_LNS_extended_op
+ ExtLen: 1
+ SubOpcode: DW_LNE_end_sequence
+ Data: 0
+ - Length: 74
+ Version: 4
+ PrologueLength: 39
+ MinInstLength: 1
+ MaxOpsPerInst: 1
+ DefaultIsStmt: 1
+ LineBase: 251
+ LineRange: 14
+ OpcodeBase: 13
+ StandardOpcodeLengths: [ 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1 ]
+ IncludeDirs:
+ - out
+ Files:
+ - Name: file_03.cpp
+ DirIdx: 1
+ ModTime: 0
+ Length: 0
+ Opcodes:
+ - Opcode: DW_LNS_extended_op
+ ExtLen: 9
+ SubOpcode: DW_LNE_set_address
+ Data: 584
+ - Opcode: 0x16
+ Data: 0
+ - Opcode: DW_LNS_set_column
+ Data: 7
+ - Opcode: DW_LNS_set_prologue_end
+ Data: 0
+ - Opcode: 0xBC
+ Data: 0
+ - Opcode: 0x4C
+ Data: 0
+ - Opcode: 0x49
+ Data: 0
+ - Opcode: 0x4D
+ Data: 0
+ - Opcode: 0x49
+ Data: 0
+ - Opcode: DW_LNS_set_column
+ Data: 5
+ - Opcode: DW_LNS_set_epilogue_begin
+ Data: 0
+ - Opcode: 0x46
+ Data: 0
+ - Opcode: DW_LNS_advance_pc
+ Data: 8
+ - Opcode: DW_LNS_extended_op
+ ExtLen: 1
+ SubOpcode: DW_LNE_end_sequence
+ Data: 0
+...
+
+
+###################################################################################
+############################# yaml generation script ##############################
+# #!/bin/bash
+# set -ex
+# # Global variable for LLVM toolchain path
+# TOOLCHAIN_PATH=[...]
+# # Switch to the script directory
+# cd "$(dirname "$0")"
+# # Create and clean 'out' directory
+# mkdir -p out
+# rm -rf out/*
+# # Generate C++ files
+# for i in {1..3}; do
+# cat << EOF > out/file_0${i}.cpp
+# extern "C" int my_func_0${i}(
+# int a,
+# int b)
+# {
+# return
+# a
+# *
+# a
+# +
+# b;
+# }
+# EOF
+# done
+# # Compile to object files with debug info
+# for i in {1..3}; do
+# ${TOOLCHAIN_PATH}/bin/clang++ \
+# -fno-unwind-tables -fno-asynchronous-unwind-tables \
+# -target arm64-apple-macos -c -g out/file_0${i}.cpp -o out/file_0${i}.o
+# done
+# # Link object files to dylib using lld directly
+# ${TOOLCHAIN_PATH}/bin/ld64.lld \
+# -dylib \
+# -arch arm64 \
+# -platform_version macos 11.0 11.0 \
+# -o out/libmyfuncs.dylib \
+# --icf=all --keep-icf-stabs \
+# -o out/mydylib out/file_01.o out/file_02.o out/file_03.o
+# # Create dsym with --flat option
+# "$TOOLCHAIN_PATH/bin/dsymutil" --flat out/mydylib -o out/mydylib.dSYM
+# # Convert dsym to yaml
+# "$TOOLCHAIN_PATH/bin/obj2yaml" out/mydylib.dSYM > out/mydylib.yaml
+###################################################################################
diff --git a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
index 00a24cdb33fe1..2bb8ba9baf4aa 100644
--- a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
+++ b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
@@ -357,6 +357,11 @@ static llvm::Error handleObjectFile(ObjectFile &Obj, const std::string &OutFile,
if (auto Err = DT.convert(ThreadCount, Out))
return Err;
+ // Organize overlapping functions as children of top-level functions. Do this
+ // right after loading the DWARF data so we don't have to deal with functions
+ // from the symbol table.
+ Gsym.prepareMergedFunctions(Out);
+
// Get the UUID and convert symbol table to GSYM.
if (auto Err = ObjectFileTransformer::convert(Obj, Out, Gsym))
return Err;
>From 6eece1db2dba3c887c8d045b6a247230c44aee2b Mon Sep 17 00:00:00 2001
From: Alex B <alexborcan at meta.com>
Date: Fri, 2 Aug 2024 16:19:00 -0700
Subject: [PATCH 2/2] Add flag to control merged functions in gSYM and address
other feedback
---
.../llvm/DebugInfo/GSYM/MergedFunctionsInfo.h | 2 +-
.../DebugInfo/GSYM/MergedFunctionsInfo.cpp | 2 +-
.../ARM_AArch64/macho-merged-funcs-dwarf.yaml | 102 ++++++++++--------
llvm/tools/llvm-gsymutil/Opts.td | 2 +
llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp | 12 ++-
5 files changed, 68 insertions(+), 52 deletions(-)
diff --git a/llvm/include/llvm/DebugInfo/GSYM/MergedFunctionsInfo.h b/llvm/include/llvm/DebugInfo/GSYM/MergedFunctionsInfo.h
index 1cb5e0a9e557a..11b45059c581a 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/MergedFunctionsInfo.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/MergedFunctionsInfo.h
@@ -29,7 +29,7 @@ struct MergedFunctionsInfo {
/// Query if a MergedFunctionsInfo object is valid.
///
/// \returns A boolean indicating if this FunctionInfo is valid.
- bool isValid() { return true; }
+ bool isValid() { return !MergedFunctions.empty(); }
/// Decode an MergedFunctionsInfo object from a binary data stream.
///
diff --git a/llvm/lib/DebugInfo/GSYM/MergedFunctionsInfo.cpp b/llvm/lib/DebugInfo/GSYM/MergedFunctionsInfo.cpp
index 41e74840510b8..19d7bff8c932c 100644
--- a/llvm/lib/DebugInfo/GSYM/MergedFunctionsInfo.cpp
+++ b/llvm/lib/DebugInfo/GSYM/MergedFunctionsInfo.cpp
@@ -1,4 +1,4 @@
-//===- LineTable.cpp --------------------------------------------*- C++ -*-===//
+//===- MergedFunctionsInfo.cpp ----------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-merged-funcs-dwarf.yaml b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-merged-funcs-dwarf.yaml
index 3be64524a3d5b..c539d3b7a2210 100644
--- a/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-merged-funcs-dwarf.yaml
+++ b/llvm/test/tools/llvm-gsymutil/ARM_AArch64/macho-merged-funcs-dwarf.yaml
@@ -1,58 +1,68 @@
# RUN: yaml2obj %s -o %t.dSYM
-# RUN: llvm-gsymutil --convert %t.dSYM --out-file=%t.gSYM
-# RUN: llvm-gsymutil --verify --verbose %t.gSYM | FileCheck --check-prefix=CHECK-GSYM %s
+
+## Verify that we don't keep merged functions by default
+# RUN: llvm-gsymutil --convert %t.dSYM --out-file=%t.default.gSYM
+# RUN: llvm-gsymutil --verify --verbose %t.default.gSYM | FileCheck --check-prefix=CHECK-GSYM-DEFAULT %s
+
+## Verify that we keep merged functions when specyfing --store-merged-function-info
+# RUN: llvm-gsymutil --convert %t.dSYM --out-file=%t.keep.gSYM --store-merged-function-info
+# RUN: llvm-gsymutil --verify --verbose %t.keep.gSYM | FileCheck --check-prefix=CHECK-GSYM-KEEP %s
## Note: For identical functions, the dSYM / gSYM cannot be counted on to be deterministic.
## So we can only match the general structure, not exact function names / offsets
-# CHECK-GSYM: Address Table:
-# CHECK-GSYM-NEXT: INDEX OFFSET16 (ADDRESS)
-# CHECK-GSYM-NEXT: ====== ===============================
-# CHECK-GSYM-NEXT: [ 0] 0x{{[0-9a-fA-F]+}}
+# CHECK-GSYM-DEFAULT-NOT: Merged FunctionInfos
+# CHECK-GSYM-DEFAULT: FunctionInfo @ 0x{{[0-9a-fA-F]+}}: [0x{{[0-9a-fA-F]+}} - 0x{{[0-9a-fA-F]+}}) "my_func_0{{[1-3]}}"
+
+
+# CHECK-GSYM-KEEP: Address Table:
+# CHECK-GSYM-KEEP-NEXT: INDEX OFFSET16 (ADDRESS)
+# CHECK-GSYM-KEEP-NEXT: ====== ===============================
+# CHECK-GSYM-KEEP-NEXT: [ 0] 0x{{[0-9a-fA-F]+}}
-# CHECK-GSYM: Address Info Offsets:
-# CHECK-GSYM-NEXT: INDEX Offset
-# CHECK-GSYM-NEXT: ====== ==========
-# CHECK-GSYM-NEXT: [ 0] 0x{{[0-9a-fA-F]+}}
+# CHECK-GSYM-KEEP: Address Info Offsets:
+# CHECK-GSYM-KEEP-NEXT: INDEX Offset
+# CHECK-GSYM-KEEP-NEXT: ====== ==========
+# CHECK-GSYM-KEEP-NEXT: [ 0] 0x{{[0-9a-fA-F]+}}
-# CHECK-GSYM: Files:
-# CHECK-GSYM-NEXT: INDEX DIRECTORY BASENAME PATH
-# CHECK-GSYM-NEXT: ====== ========== ========== ==============================
-# CHECK-GSYM-NEXT: [ 0] 0x{{[0-9a-fA-F]+}} 0x{{[0-9a-fA-F]+}}
-# CHECK-GSYM-NEXT: [ 1] 0x{{[0-9a-fA-F]+}} 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp
-# CHECK-GSYM-NEXT: [ 2] 0x{{[0-9a-fA-F]+}} 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp
-# CHECK-GSYM-NEXT: [ 3] 0x{{[0-9a-fA-F]+}} 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp
+# CHECK-GSYM-KEEP: Files:
+# CHECK-GSYM-KEEP-NEXT: INDEX DIRECTORY BASENAME PATH
+# CHECK-GSYM-KEEP-NEXT: ====== ========== ========== ==============================
+# CHECK-GSYM-KEEP-NEXT: [ 0] 0x{{[0-9a-fA-F]+}} 0x{{[0-9a-fA-F]+}}
+# CHECK-GSYM-KEEP-NEXT: [ 1] 0x{{[0-9a-fA-F]+}} 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp
+# CHECK-GSYM-KEEP-NEXT: [ 2] 0x{{[0-9a-fA-F]+}} 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp
+# CHECK-GSYM-KEEP-NEXT: [ 3] 0x{{[0-9a-fA-F]+}} 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp
-# CHECK-GSYM: FunctionInfo @ 0x{{[0-9a-fA-F]+}}: [0x{{[0-9a-fA-F]+}} - 0x{{[0-9a-fA-F]+}}) "my_func_0{{[1-3]}}"
-# CHECK-GSYM-NEXT: LineTable:
-# CHECK-GSYM-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:5
-# CHECK-GSYM-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:7
-# CHECK-GSYM-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:9
-# CHECK-GSYM-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:8
-# CHECK-GSYM-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:11
-# CHECK-GSYM-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:10
-# CHECK-GSYM-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:6
-# CHECK-GSYM-NEXT: ++ Merged FunctionInfos[0]:
-# CHECK-GSYM-NEXT: [0x{{[0-9a-fA-F]+}} - 0x{{[0-9a-fA-F]+}}) "my_func_0{{[1-3]}}"
-# CHECK-GSYM-NEXT: LineTable:
-# CHECK-GSYM-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:5
-# CHECK-GSYM-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:7
-# CHECK-GSYM-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:9
-# CHECK-GSYM-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:8
-# CHECK-GSYM-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:11
-# CHECK-GSYM-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:10
-# CHECK-GSYM-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:6
-# CHECK-GSYM-NEXT: ++ Merged FunctionInfos[1]:
-# CHECK-GSYM-NEXT: [0x{{[0-9a-fA-F]+}} - 0x{{[0-9a-fA-F]+}}) "my_func_0{{[1-3]}}"
-# CHECK-GSYM-NEXT: LineTable:
-# CHECK-GSYM-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:5
-# CHECK-GSYM-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:7
-# CHECK-GSYM-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:9
-# CHECK-GSYM-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:8
-# CHECK-GSYM-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:11
-# CHECK-GSYM-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:10
-# CHECK-GSYM-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:6
+# CHECK-GSYM-KEEP: FunctionInfo @ 0x{{[0-9a-fA-F]+}}: [0x{{[0-9a-fA-F]+}} - 0x{{[0-9a-fA-F]+}}) "my_func_0{{[1-3]}}"
+# CHECK-GSYM-KEEP-NEXT: LineTable:
+# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:5
+# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:7
+# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:9
+# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:8
+# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:11
+# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:10
+# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:6
+# CHECK-GSYM-KEEP-NEXT: ++ Merged FunctionInfos[0]:
+# CHECK-GSYM-KEEP-NEXT: [0x{{[0-9a-fA-F]+}} - 0x{{[0-9a-fA-F]+}}) "my_func_0{{[1-3]}}"
+# CHECK-GSYM-KEEP-NEXT: LineTable:
+# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:5
+# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:7
+# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:9
+# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:8
+# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:11
+# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:10
+# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:6
+# CHECK-GSYM-KEEP-NEXT: ++ Merged FunctionInfos[1]:
+# CHECK-GSYM-KEEP-NEXT: [0x{{[0-9a-fA-F]+}} - 0x{{[0-9a-fA-F]+}}) "my_func_0{{[1-3]}}"
+# CHECK-GSYM-KEEP-NEXT: LineTable:
+# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:5
+# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:7
+# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:9
+# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:8
+# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:11
+# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:10
+# CHECK-GSYM-KEEP-NEXT: 0x{{[0-9a-fA-F]+}} /tmp/test_gsym_yaml/out/file_0{{[1-3]}}.cpp:6
diff --git a/llvm/tools/llvm-gsymutil/Opts.td b/llvm/tools/llvm-gsymutil/Opts.td
index 3aabc8029ccbe..d02e3a22a62b2 100644
--- a/llvm/tools/llvm-gsymutil/Opts.td
+++ b/llvm/tools/llvm-gsymutil/Opts.td
@@ -16,6 +16,8 @@ def verbose : FF<"verbose", "Enable verbose logging and encoding details">;
defm convert :
Eq<"convert",
"Convert the specified file to the GSYM format.\nSupported files include ELF and mach-o files that will have their debug info (DWARF) and symbol table converted">;
+def store_merged_function_info :
+ FF<"store-merged-function-info", "Store all functions with overlapping address ranges in the GSYM, instead of keeping only one per range (default).">;
defm arch :
Eq<"arch",
"Process debug information for the specified CPU architecture only.\nArchitectures may be specified by name or by number.\nThis option can be specified multiple times, once for each desired architecture">;
diff --git a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
index 2bb8ba9baf4aa..2e96b1aea2015 100644
--- a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
+++ b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
@@ -95,6 +95,7 @@ static uint64_t SegmentSize;
static bool Quiet;
static std::vector<uint64_t> LookupAddresses;
static bool LookupAddressesFromStdin;
+static bool StoreMergedFunctionInfo;
static void parseArgs(int argc, char **argv) {
GSYMUtilOptTable Tbl;
@@ -175,6 +176,7 @@ static void parseArgs(int argc, char **argv) {
}
LookupAddressesFromStdin = Args.hasArg(OPT_addresses_from_stdin);
+ StoreMergedFunctionInfo = Args.hasArg(OPT_store_merged_function_info);
}
/// @}
@@ -357,10 +359,12 @@ static llvm::Error handleObjectFile(ObjectFile &Obj, const std::string &OutFile,
if (auto Err = DT.convert(ThreadCount, Out))
return Err;
- // Organize overlapping functions as children of top-level functions. Do this
- // right after loading the DWARF data so we don't have to deal with functions
- // from the symbol table.
- Gsym.prepareMergedFunctions(Out);
+ // If enabled, merge functions with identical address ranges as merged
+ // functions in the first FunctionInfo with that address range. Do this right
+ // after loading the DWARF data so we don't have to deal with functions from
+ // the symbol table.
+ if (StoreMergedFunctionInfo)
+ Gsym.prepareMergedFunctions(Out);
// Get the UUID and convert symbol table to GSYM.
if (auto Err = ObjectFileTransformer::convert(Obj, Out, Gsym))
More information about the llvm-commits
mailing list