[llvm] Make GSYM 64 bit safe and add a new version 2 of the GSYM files. (PR #189119)
Roy Shi via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 30 09:57:37 PDT 2026
https://github.com/royitaqi updated https://github.com/llvm/llvm-project/pull/189119
>From 6ac553ed22bfd58c21256cf7f2e9f55d8fee963e Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Wed, 25 Mar 2026 10:49:41 -0700
Subject: [PATCH 01/45] Create V2 classes
---
.../llvm/DebugInfo/GSYM/GsymCreatorV2.h | 496 ++++++++++++++
.../llvm/DebugInfo/GSYM/GsymReaderV2.h | 428 ++++++++++++
llvm/include/llvm/DebugInfo/GSYM/HeaderV2.h | 130 ++++
llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp | 627 ++++++++++++++++++
llvm/lib/DebugInfo/GSYM/GsymReaderV2.cpp | 558 ++++++++++++++++
5 files changed, 2239 insertions(+)
create mode 100644 llvm/include/llvm/DebugInfo/GSYM/GsymCreatorV2.h
create mode 100644 llvm/include/llvm/DebugInfo/GSYM/GsymReaderV2.h
create mode 100644 llvm/include/llvm/DebugInfo/GSYM/HeaderV2.h
create mode 100644 llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp
create mode 100644 llvm/lib/DebugInfo/GSYM/GsymReaderV2.cpp
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymCreatorV2.h b/llvm/include/llvm/DebugInfo/GSYM/GsymCreatorV2.h
new file mode 100644
index 0000000000000..5316e2f131553
--- /dev/null
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymCreatorV2.h
@@ -0,0 +1,496 @@
+//===- GsymCreatorV2.h ------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_GSYM_GSYMCREATORV2_H
+#define LLVM_DEBUGINFO_GSYM_GSYMCREATORV2_H
+
+#include "llvm/Support/Compiler.h"
+#include <functional>
+#include <memory>
+#include <mutex>
+#include <thread>
+
+#include "llvm/ADT/AddressRanges.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/DebugInfo/GSYM/FileEntry.h"
+#include "llvm/DebugInfo/GSYM/FunctionInfo.h"
+#include "llvm/MC/StringTableBuilder.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/Path.h"
+
+namespace llvm {
+
+namespace gsym {
+class FileWriter;
+class OutputAggregator;
+
+/// GsymCreatorV2 is used to emit GSYM V2 data to a stand alone file or section
+/// within a file.
+///
+/// The GsymCreatorV2 is designed to be used in 3 stages:
+/// - Create FunctionInfo objects and add them
+/// - Finalize the GsymCreatorV2 object
+/// - Save to file or section
+///
+/// The first stage involves creating FunctionInfo objects from another source
+/// of information like compiler debug info metadata, DWARF or Breakpad files.
+/// Any strings in the FunctionInfo or contained information, like InlineInfo
+/// or LineTable objects, should get the string table offsets by calling
+/// GsymCreatorV2::insertString(...). Any file indexes that are needed should be
+/// obtained by calling GsymCreatorV2::insertFile(...). All of the function calls
+/// in GsymCreatorV2 are thread safe. This allows multiple threads to create and
+/// add FunctionInfo objects while parsing debug information.
+///
+/// Once all of the FunctionInfo objects have been added, the
+/// GsymCreatorV2::finalize(...) must be called prior to saving. This function
+/// will sort the FunctionInfo objects, finalize the string table, and do any
+/// other passes on the information needed to prepare the information to be
+/// saved.
+///
+/// Once the object has been finalized, it can be saved to a file or section.
+///
+/// ENCODING
+///
+/// GSYM files are designed to be memory mapped into a process as shared, read
+/// only data, and used as is.
+///
+/// The GSYM file format when in a stand alone file consists of:
+/// - Header
+/// - Address Table
+/// - Function Info Offsets
+/// - File Table
+/// - String Table
+/// - Function Info Data
+///
+/// HEADER
+///
+/// The header is fully described in "llvm/DebugInfo/GSYM/HeaderV2.h".
+///
+/// ADDRESS TABLE
+///
+/// The address table immediately follows the header in the file and consists
+/// of Header.NumAddresses address offsets. These offsets are sorted and can be
+/// binary searched for efficient lookups. Addresses in the address table are
+/// stored as offsets from a 64 bit base address found in Header.BaseAddress.
+/// This allows the address table to contain 8, 16, or 32 offsets. This allows
+/// the address table to not require full 64 bit addresses for each address.
+/// The resulting GSYM size is smaller and causes fewer pages to be touched
+/// during address lookups when the address table is smaller. The size of the
+/// address offsets in the address table is specified in the header in
+/// Header.AddrOffSize. The first offset in the address table is aligned to
+/// Header.AddrOffSize alignment to ensure efficient access when loaded into
+/// memory.
+///
+/// FUNCTION INFO OFFSETS TABLE
+///
+/// The function info offsets table immediately follows the address table and
+/// consists of Header.NumAddresses 32 bit file offsets: one for each address
+/// in the address table. This data is aligned to a 4 byte boundary. The
+/// offsets in this table are the relative offsets from the start offset of the
+/// GSYM header and point to the function info data for each address in the
+/// address table. Keeping this data separate from the address table helps to
+/// reduce the number of pages that are touched when address lookups occur on a
+/// GSYM file.
+///
+/// FILE TABLE
+///
+/// The file table immediately follows the function info offsets table. The
+/// encoding of the FileTable is:
+///
+/// struct FileTable {
+/// uint32_t Count;
+/// FileEntry Files[];
+/// };
+///
+/// The file table starts with a 32 bit count of the number of files that are
+/// used in all of the function info, followed by that number of FileEntry
+/// structures. The file table is aligned to a 4 byte boundary, Each file in
+/// the file table is represented with a FileEntry structure.
+/// See "llvm/DebugInfo/GSYM/FileEntry.h" for details.
+///
+/// STRING TABLE
+///
+/// The string table follows the file table in stand alone GSYM files and
+/// contains all strings for everything contained in the GSYM file. Any string
+/// data should be added to the string table and any references to strings
+/// inside GSYM information must be stored as 32 bit string table offsets into
+/// this string table. The string table always starts with an empty string at
+/// offset zero and is followed by any strings needed by the GSYM information.
+/// The start of the string table is not aligned to any boundary.
+///
+/// FUNCTION INFO DATA
+///
+/// The function info data is the payload that contains information about the
+/// address that is being looked up. It contains all of the encoded
+/// FunctionInfo objects. Each encoded FunctionInfo's data is pointed to by an
+/// entry in the Function Info Offsets Table. For details on the exact encoding
+/// of FunctionInfo objects, see "llvm/DebugInfo/GSYM/FunctionInfo.h".
+class GsymCreatorV2 {
+ // Private member variables require Mutex protections
+ mutable std::mutex Mutex;
+ std::vector<FunctionInfo> Funcs;
+ StringTableBuilder StrTab;
+ StringSet<> StringStorage;
+ DenseMap<llvm::gsym::FileEntry, uint32_t> FileEntryToIndex;
+ // Needed for mapping string offsets back to the string stored in \a StrTab.
+ DenseMap<uint64_t, CachedHashStringRef> StringOffsetMap;
+ std::vector<llvm::gsym::FileEntry> Files;
+ std::vector<uint8_t> UUID;
+ std::optional<AddressRanges> ValidTextRanges;
+ std::optional<uint64_t> BaseAddress;
+ bool IsSegment = false;
+ bool Finalized = false;
+ bool Quiet;
+
+
+ /// Get the first function start address.
+ ///
+ /// \returns The start address of the first FunctionInfo or std::nullopt if
+ /// there are no function infos.
+ std::optional<uint64_t> getFirstFunctionAddress() const;
+
+ /// Get the last function address.
+ ///
+ /// \returns The start address of the last FunctionInfo or std::nullopt if
+ /// there are no function infos.
+ std::optional<uint64_t> getLastFunctionAddress() const;
+
+ /// Get the base address to use for this GSYM file.
+ ///
+ /// \returns The base address to put into the header and to use when creating
+ /// the address offset table or std::nullpt if there are no valid
+ /// function infos or if the base address wasn't specified.
+ std::optional<uint64_t> getBaseAddress() const;
+
+ /// Get the size of an address offset in the address offset table.
+ ///
+ /// GSYM files store offsets from the base address in the address offset table
+ /// and we store the size of the address offsets in the GSYM header. This
+ /// function will calculate the size in bytes of these address offsets based
+ /// on the current contents of the GSYM file.
+ ///
+ /// \returns The size in byets of the address offsets.
+ uint8_t getAddressOffsetSize() const;
+
+ /// Get the maximum address offset for the current address offset size.
+ ///
+ /// This is used when creating the address offset table to ensure we have
+ /// values that are in range so we don't end up truncating address offsets
+ /// when creating GSYM files as the code evolves.
+ ///
+ /// \returns The maximum address offset value that will be encoded into a GSYM
+ /// file.
+ uint64_t getMaxAddressOffset() const;
+
+ /// Calculate the byte size of the GSYM header and tables sizes.
+ ///
+ /// This function will calculate the exact size in bytes of the encocded GSYM
+ /// for the following items:
+ /// - The GSYM header
+ /// - The Address offset table
+ /// - The Address info offset table
+ /// - The file table
+ /// - The string table
+ ///
+ /// This is used to help split GSYM files into segments.
+ ///
+ /// \returns Size in bytes the GSYM header and tables.
+ uint64_t calculateHeaderAndTableSize() const;
+
+ /// Copy a FunctionInfo from the \a SrcGC GSYM creator into this creator.
+ ///
+ /// Copy the function info and only the needed files and strings and add a
+ /// converted FunctionInfo into this object. This is used to segment GSYM
+ /// files into separate files while only transferring the files and strings
+ /// that are needed from \a SrcGC.
+ ///
+ /// \param SrcGC The source gsym creator to copy from.
+ /// \param FuncInfoIdx The function info index within \a SrcGC to copy.
+ /// \returns The number of bytes it will take to encode the function info in
+ /// this GsymCreatorV2. This helps calculate the size of the current GSYM
+ /// segment file.
+ uint64_t copyFunctionInfo(const GsymCreatorV2 &SrcGC, size_t FuncInfoIdx);
+
+ /// Copy a string from \a SrcGC into this object.
+ ///
+ /// Copy a string from \a SrcGC by string table offset into this GSYM creator.
+ /// If a string has already been copied, the uniqued string table offset will
+ /// be returned, otherwise the string will be copied and a unique offset will
+ /// be returned.
+ ///
+ /// \param SrcGC The source gsym creator to copy from.
+ /// \param StrOff The string table offset from \a SrcGC to copy.
+ /// \returns The new string table offset of the string within this object.
+ uint32_t copyString(const GsymCreatorV2 &SrcGC, uint32_t StrOff);
+
+ /// Copy a file from \a SrcGC into this object.
+ ///
+ /// Copy a file from \a SrcGC by file index into this GSYM creator. Files
+ /// consist of two string table entries, one for the directory and one for the
+ /// filename, this function will copy any needed strings ensure the file is
+ /// uniqued within this object. If a file already exists in this GSYM creator
+ /// the uniqued index will be returned, else the stirngs will be copied and
+ /// the new file index will be returned.
+ ///
+ /// \param SrcGC The source gsym creator to copy from.
+ /// \param FileIdx The 1 based file table index within \a SrcGC to copy. A
+ /// file index of zero will always return zero as the zero is a reserved file
+ /// index that means no file.
+ /// \returns The new file index of the file within this object.
+ uint32_t copyFile(const GsymCreatorV2 &SrcGC, uint32_t FileIdx);
+
+ /// Inserts a FileEntry into the file table.
+ ///
+ /// This is used to insert a file entry in a thread safe way into this object.
+ ///
+ /// \param FE A file entry object that contains valid string table offsets
+ /// from this object already.
+ uint32_t insertFileEntry(FileEntry FE);
+
+ /// Fixup any string and file references by updating any file indexes and
+ /// strings offsets in the InlineInfo parameter.
+ ///
+ /// When copying InlineInfo entries, we can simply make a copy of the object
+ /// and then fixup the files and strings for efficiency.
+ ///
+ /// \param SrcGC The source gsym creator to copy from.
+ /// \param II The inline info that contains file indexes and string offsets
+ /// that come from \a SrcGC. The entries will be updated by coping any files
+ /// and strings over into this object.
+ void fixupInlineInfo(const GsymCreatorV2 &SrcGC, InlineInfo &II);
+
+ /// Save this GSYM file into segments that are roughly \a SegmentSize in size.
+ ///
+ /// When segemented GSYM files are saved to disk, they will use \a Path as a
+ /// prefix and then have the first function info address appended to the path
+ /// when each segment is saved. Each segmented GSYM file has a only the
+ /// strings and files that are needed to save the function infos that are in
+ /// each segment. These smaller files are easy to compress and download
+ /// separately and allow for efficient lookups with very large GSYM files and
+ /// segmenting them allows servers to download only the segments that are
+ /// needed.
+ ///
+ /// \param Path The path prefix to use when saving the GSYM files.
+ /// \param ByteOrder The endianness to use when saving the file.
+ /// \param SegmentSize The size in bytes to segment the GSYM file into.
+ llvm::Error saveSegments(StringRef Path, llvm::endianness ByteOrder,
+ uint64_t SegmentSize) const;
+
+ /// Let this creator know that this is a segment of another GsymCreatorV2.
+ ///
+ /// When we have a segment, we know that function infos will be added in
+ /// ascending address range order without having to be finalized. We also
+ /// don't need to sort and unique entries during the finalize function call.
+ void setIsSegment() {
+ IsSegment = true;
+ }
+
+public:
+ LLVM_ABI GsymCreatorV2(bool Quiet = false);
+
+ /// Save a GSYM file to a stand alone file.
+ ///
+ /// \param Path The file path to save the GSYM file to.
+ /// \param ByteOrder The endianness to use when saving the file.
+ /// \param SegmentSize The size in bytes to segment the GSYM file into. If
+ /// this option is set this function will create N segments
+ /// that are all around \a SegmentSize bytes in size. This
+ /// allows a very large GSYM file to be broken up into
+ /// shards. Each GSYM file will have its own file table,
+ /// and string table that only have the files and strings
+ /// needed for the shared. If this argument has no value,
+ /// a single GSYM file that contains all function
+ /// information will be created.
+ /// \returns An error object that indicates success or failure of the save.
+ LLVM_ABI llvm::Error
+ save(StringRef Path, llvm::endianness ByteOrder,
+ std::optional<uint64_t> SegmentSize = std::nullopt) const;
+
+ /// Encode a GSYM into the file writer stream at the current position.
+ ///
+ /// \param O The stream to save the binary data to
+ /// \returns An error object that indicates success or failure of the save.
+ LLVM_ABI llvm::Error encode(FileWriter &O) const;
+
+ /// Insert a string into the GSYM string table.
+ ///
+ /// All strings used by GSYM files must be uniqued by adding them to this
+ /// string pool and using the returned offset for any string values.
+ ///
+ /// \param S The string to insert into the string table.
+ /// \param Copy If true, then make a backing copy of the string. If false,
+ /// the string is owned by another object that will stay around
+ /// long enough for the GsymCreatorV2 to save the GSYM file.
+ /// \returns The unique 32 bit offset into the string table.
+ LLVM_ABI uint32_t insertString(StringRef S, bool Copy = true);
+
+ /// Retrieve a string from the GSYM string table given its offset.
+ ///
+ /// The offset is assumed to be a valid offset into the string table.
+ /// otherwise an assert will be triggered.
+ ///
+ /// \param Offset The offset of the string to retrieve, previously returned by
+ /// insertString.
+ /// \returns The string at the given offset in the string table.
+ LLVM_ABI StringRef getString(uint32_t Offset);
+
+ /// Insert a file into this GSYM creator.
+ ///
+ /// Inserts a file by adding a FileEntry into the "Files" member variable if
+ /// the file has not already been added. The file path is split into
+ /// directory and filename which are both added to the string table. This
+ /// allows paths to be stored efficiently by reusing the directories that are
+ /// common between multiple files.
+ ///
+ /// \param Path The path to the file to insert.
+ /// \param Style The path style for the "Path" parameter.
+ /// \returns The unique file index for the inserted file.
+ LLVM_ABI uint32_t
+ insertFile(StringRef Path, sys::path::Style Style = sys::path::Style::native);
+
+ /// Add a function info to this GSYM creator.
+ ///
+ /// All information in the FunctionInfo object must use the
+ /// GsymCreatorV2::insertString(...) function when creating string table
+ /// offsets for names and other strings.
+ ///
+ /// \param FI The function info object to emplace into our functions list.
+ LLVM_ABI void addFunctionInfo(FunctionInfo &&FI);
+
+ /// Load call site information from a YAML file.
+ ///
+ /// This function reads call site information from a specified YAML file and
+ /// adds it to the GSYM data.
+ ///
+ /// \param YAMLFile The path to the YAML file containing call site
+ /// information.
+ LLVM_ABI llvm::Error loadCallSitesFromYAML(StringRef YAMLFile);
+
+ /// Organize merged FunctionInfo's
+ ///
+ /// This method processes the list of function infos (Funcs) to identify and
+ /// group functions with overlapping address ranges.
+ ///
+ /// \param Out Output stream to report information about how merged
+ /// FunctionInfo's were handled.
+ LLVM_ABI void prepareMergedFunctions(OutputAggregator &Out);
+
+ /// Finalize the data in the GSYM creator prior to saving the data out.
+ ///
+ /// Finalize must be called after all FunctionInfo objects have been added
+ /// and before GsymCreatorV2::save() is called.
+ ///
+ /// \param OS Output stream to report duplicate function infos, overlapping
+ /// function infos, and function infos that were merged or removed.
+ /// \returns An error object that indicates success or failure of the
+ /// finalize.
+ LLVM_ABI llvm::Error finalize(OutputAggregator &OS);
+
+ /// Set the UUID value.
+ ///
+ /// \param UUIDBytes The new UUID bytes.
+ void setUUID(llvm::ArrayRef<uint8_t> UUIDBytes) {
+ UUID.assign(UUIDBytes.begin(), UUIDBytes.end());
+ }
+
+ /// Thread safe iteration over all function infos.
+ ///
+ /// \param Callback A callback function that will get called with each
+ /// FunctionInfo. If the callback returns false, stop iterating.
+ LLVM_ABI void
+ forEachFunctionInfo(std::function<bool(FunctionInfo &)> const &Callback);
+
+ /// Thread safe const iteration over all function infos.
+ ///
+ /// \param Callback A callback function that will get called with each
+ /// FunctionInfo. If the callback returns false, stop iterating.
+ LLVM_ABI void forEachFunctionInfo(
+ std::function<bool(const FunctionInfo &)> const &Callback) const;
+
+ /// Get the current number of FunctionInfo objects contained in this
+ /// object.
+ LLVM_ABI size_t getNumFunctionInfos() const;
+
+ /// Set valid .text address ranges that all functions must be contained in.
+ void SetValidTextRanges(AddressRanges &TextRanges) {
+ ValidTextRanges = TextRanges;
+ }
+
+ /// Get the valid text ranges.
+ const std::optional<AddressRanges> GetValidTextRanges() const {
+ return ValidTextRanges;
+ }
+
+ /// Check if an address is a valid code address.
+ ///
+ /// Any functions whose addresses do not exist within these function bounds
+ /// will not be converted into the final GSYM. This allows the object file
+ /// to figure out the valid file address ranges of all the code sections
+ /// and ensure we don't add invalid functions to the final output. Many
+ /// linkers have issues when dead stripping functions from DWARF debug info
+ /// where they set the DW_AT_low_pc to zero, but newer DWARF has the
+ /// DW_AT_high_pc as an offset from the DW_AT_low_pc and these size
+ /// attributes have no relocations that can be applied. This results in DWARF
+ /// where many functions have an DW_AT_low_pc of zero and a valid offset size
+ /// for DW_AT_high_pc. If we extract all valid ranges from an object file
+ /// that are marked with executable permissions, we can properly ensure that
+ /// these functions are removed.
+ ///
+ /// \param Addr An address to check.
+ ///
+ /// \returns True if the address is in the valid text ranges or if no valid
+ /// text ranges have been set, false otherwise.
+ LLVM_ABI bool IsValidTextAddress(uint64_t Addr) const;
+
+ /// Set the base address to use for the GSYM file.
+ ///
+ /// Setting the base address to use for the GSYM file. Object files typically
+ /// get loaded from a base address when the OS loads them into memory. Using
+ /// GSYM files for symbolication becomes easier if the base address in the
+ /// GSYM header is the same address as it allows addresses to be easily slid
+ /// and allows symbolication without needing to find the original base
+ /// address in the original object file.
+ ///
+ /// \param Addr The address to use as the base address of the GSYM file
+ /// when it is saved to disk.
+ void setBaseAddress(uint64_t Addr) {
+ BaseAddress = Addr;
+ }
+
+ /// Whether the transformation should be quiet, i.e. not output warnings.
+ bool isQuiet() const { return Quiet; }
+
+
+ /// Create a segmented GSYM creator starting with function info index
+ /// \a FuncIdx.
+ ///
+ /// This function will create a GsymCreatorV2 object that will encode into
+ /// roughly \a SegmentSize bytes and return it. It is used by the private
+ /// saveSegments(...) function and also is used by the GSYM unit tests to test
+ /// segmenting of GSYM files. The returned GsymCreatorV2 can be finalized and
+ /// encoded.
+ ///
+ /// \param [in] SegmentSize The size in bytes to roughly segment the GSYM file
+ /// into.
+ /// \param [in,out] FuncIdx The index of the first function info to encode
+ /// into the returned GsymCreatorV2. This index will be updated so it can be
+ /// used in subsequent calls to this function to allow more segments to be
+ /// created.
+ /// \returns An expected unique pointer to a GsymCreatorV2 or an error. The
+ /// returned unique pointer can be NULL if there are no more functions to
+ /// encode.
+ LLVM_ABI llvm::Expected<std::unique_ptr<GsymCreatorV2>>
+ createSegment(uint64_t SegmentSize, size_t &FuncIdx) const;
+};
+
+} // namespace gsym
+} // namespace llvm
+
+#endif // LLVM_DEBUGINFO_GSYM_GSYMCREATORV2_H
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymReaderV2.h b/llvm/include/llvm/DebugInfo/GSYM/GsymReaderV2.h
new file mode 100644
index 0000000000000..de1ed0481cbbd
--- /dev/null
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymReaderV2.h
@@ -0,0 +1,428 @@
+//===- GsymReaderV2.h -------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_GSYM_GSYMREADERV2_H
+#define LLVM_DEBUGINFO_GSYM_GSYMREADERV2_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/DebugInfo/GSYM/FileEntry.h"
+#include "llvm/DebugInfo/GSYM/FunctionInfo.h"
+#include "llvm/DebugInfo/GSYM/HeaderV2.h"
+#include "llvm/DebugInfo/GSYM/LineEntry.h"
+#include "llvm/DebugInfo/GSYM/StringTable.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/ErrorOr.h"
+#include <inttypes.h>
+#include <memory>
+#include <stdint.h>
+#include <vector>
+
+namespace llvm {
+class MemoryBuffer;
+class raw_ostream;
+
+namespace gsym {
+
+/// GsymReaderV2 is used to read GSYM V2 data from a file or buffer.
+///
+/// This class is optimized for very quick lookups when the endianness matches
+/// the host system. The HeaderV2, address table, address info offsets, and file
+/// table is designed to be mmap'ed as read only into memory and used without
+/// any parsing needed. If the endianness doesn't match, we swap these objects
+/// and tables into GsymReaderV2::SwappedData and then point our header and
+/// ArrayRefs to this swapped internal data.
+///
+/// GsymReaderV2 objects must use one of the static functions to create an
+/// instance: GsymReaderV2::openFile(...) and GsymReaderV2::copyBuffer(...).
+
+class GsymReaderV2 {
+ GsymReaderV2(std::unique_ptr<MemoryBuffer> Buffer);
+ llvm::Error parse();
+
+ std::unique_ptr<MemoryBuffer> MemBuffer;
+ StringRef GsymBytes;
+ llvm::endianness Endian;
+ const HeaderV2 *Hdr = nullptr;
+ ArrayRef<uint8_t> AddrOffsets;
+ ArrayRef<uint32_t> AddrInfoOffsets;
+ ArrayRef<FileEntry> Files;
+ StringTable StrTab;
+ /// When the GSYM file's endianness doesn't match the host system then
+ /// we must decode all data structures that need to be swapped into
+ /// local storage and set point the ArrayRef objects above to these swapped
+ /// copies.
+ struct SwappedData {
+ HeaderV2 Hdr;
+ std::vector<uint8_t> AddrOffsets;
+ std::vector<uint32_t> AddrInfoOffsets;
+ std::vector<FileEntry> Files;
+ };
+ std::unique_ptr<SwappedData> Swap;
+
+public:
+ LLVM_ABI GsymReaderV2(GsymReaderV2 &&RHS);
+ LLVM_ABI ~GsymReaderV2();
+
+ /// Construct a GsymReaderV2 from a file on disk.
+ ///
+ /// \param Path The file path the GSYM file to read.
+ /// \returns An expected GsymReaderV2 that contains the object or an error
+ /// object that indicates reason for failing to read the GSYM.
+ LLVM_ABI static llvm::Expected<GsymReaderV2> openFile(StringRef Path);
+
+ /// Construct a GsymReaderV2 from a buffer.
+ ///
+ /// \param Bytes A set of bytes that will be copied and owned by the
+ /// returned object on success.
+ /// \returns An expected GsymReaderV2 that contains the object or an error
+ /// object that indicates reason for failing to read the GSYM.
+ LLVM_ABI static llvm::Expected<GsymReaderV2> copyBuffer(StringRef Bytes);
+
+ /// Access the GSYM header.
+ /// \returns A native endian version of the GSYM header.
+ LLVM_ABI const HeaderV2 &getHeader() const;
+
+ /// Get the full function info for an address.
+ ///
+ /// This should be called when a client will store a copy of the complete
+ /// FunctionInfo for a given address. For one off lookups, use the lookup()
+ /// function below.
+ ///
+ /// Symbolication server processes might want to parse the entire function
+ /// info for a given address and cache it if the process stays around to
+ /// service many symbolication addresses, like for parsing profiling
+ /// information.
+ ///
+ /// \param Addr A virtual address from the orignal object file to lookup.
+ ///
+ /// \returns An expected FunctionInfo that contains the function info object
+ /// or an error object that indicates reason for failing to lookup the
+ /// address.
+ LLVM_ABI llvm::Expected<FunctionInfo> getFunctionInfo(uint64_t Addr) const;
+
+ /// Get the full function info given an address index.
+ ///
+ /// \param AddrIdx A address index for an address in the address table.
+ ///
+ /// \returns An expected FunctionInfo that contains the function info object
+ /// or an error object that indicates reason for failing get the function
+ /// info object.
+ LLVM_ABI llvm::Expected<FunctionInfo>
+ getFunctionInfoAtIndex(uint64_t AddrIdx) const;
+
+ /// Lookup an address in the a GSYM.
+ ///
+ /// Lookup just the information needed for a specific address \a Addr. This
+ /// function is faster that calling getFunctionInfo() as it will only return
+ /// information that pertains to \a Addr and allows the parsing to skip any
+ /// extra information encoded for other addresses. For example the line table
+ /// parsing can stop when a matching LineEntry has been fouhnd, and the
+ /// InlineInfo can stop parsing early once a match has been found and also
+ /// skip information that doesn't match. This avoids memory allocations and
+ /// is much faster for lookups.
+ ///
+ /// \param Addr A virtual address from the orignal object file to lookup.
+ ///
+ /// \param MergedFuncsData A pointer to an optional DataExtractor that, if
+ /// non-null, will be set to the raw data of the MergedFunctionInfo, if
+ /// present.
+ ///
+ /// \returns An expected LookupResult that contains only the information
+ /// needed for the current address, or an error object that indicates reason
+ /// for failing to lookup the address.
+ LLVM_ABI llvm::Expected<LookupResult>
+ lookup(uint64_t Addr,
+ std::optional<DataExtractor> *MergedFuncsData = nullptr) const;
+
+ /// Lookup all merged functions for a given address.
+ ///
+ /// This function performs a lookup for the specified address and then
+ /// retrieves additional LookupResults from any merged functions associated
+ /// with the primary LookupResult.
+ ///
+ /// \param Addr The address to lookup.
+ ///
+ /// \returns A vector of LookupResult objects, where the first element is the
+ /// primary result, followed by results for any merged functions
+ LLVM_ABI llvm::Expected<std::vector<LookupResult>>
+ lookupAll(uint64_t Addr) const;
+
+ /// Get a string from the string table.
+ ///
+ /// \param Offset The string table offset for the string to retrieve.
+ /// \returns The string from the strin table.
+ StringRef getString(uint32_t Offset) const { return StrTab[Offset]; }
+
+ /// Get the a file entry for the suppplied file index.
+ ///
+ /// Used to convert any file indexes in the FunctionInfo data back into
+ /// files. This function can be used for iteration, but is more commonly used
+ /// for random access when doing lookups.
+ ///
+ /// \param Index An index into the file table.
+ /// \returns An optional FileInfo that will be valid if the file index is
+ /// valid, or std::nullopt if the file index is out of bounds,
+ std::optional<FileEntry> getFile(uint32_t Index) const {
+ if (Index < Files.size())
+ return Files[Index];
+ return std::nullopt;
+ }
+
+ /// Dump the entire Gsym data contained in this object.
+ ///
+ /// \param OS The output stream to dump to.
+ LLVM_ABI void dump(raw_ostream &OS);
+
+ /// Dump a FunctionInfo object.
+ ///
+ /// This function will convert any string table indexes and file indexes
+ /// into human readable format.
+ ///
+ /// \param OS The output stream to dump to.
+ ///
+ /// \param FI The object to dump.
+ ///
+ /// \param Indent The indentation as number of spaces. Used when dumping as an
+ /// item within MergedFunctionsInfo.
+ LLVM_ABI void dump(raw_ostream &OS, const FunctionInfo &FI,
+ uint32_t Indent = 0);
+
+ /// Dump a MergedFunctionsInfo object.
+ ///
+ /// This function will dump a MergedFunctionsInfo object - basically by
+ /// dumping the contained FunctionInfo objects with indentation.
+ ///
+ /// \param OS The output stream to dump to.
+ ///
+ /// \param MFI The object to dump.
+ LLVM_ABI void dump(raw_ostream &OS, const MergedFunctionsInfo &MFI);
+
+ /// Dump a CallSiteInfo object.
+ ///
+ /// This function will output the details of a CallSiteInfo object in a
+ /// human-readable format.
+ ///
+ /// \param OS The output stream to dump to.
+ ///
+ /// \param CSI The CallSiteInfo object to dump.
+ LLVM_ABI void dump(raw_ostream &OS, const CallSiteInfo &CSI);
+
+ /// Dump a CallSiteInfoCollection object.
+ ///
+ /// This function will iterate over a collection of CallSiteInfo objects and
+ /// dump each one.
+ ///
+ /// \param OS The output stream to dump to.
+ ///
+ /// \param CSIC The CallSiteInfoCollection object to dump.
+ ///
+ /// \param Indent The indentation as number of spaces. Used when dumping as an
+ /// item from within MergedFunctionsInfo.
+ LLVM_ABI void dump(raw_ostream &OS, const CallSiteInfoCollection &CSIC,
+ uint32_t Indent = 0);
+
+ /// Dump a LineTable object.
+ ///
+ /// This function will convert any string table indexes and file indexes
+ /// into human readable format.
+ ///
+ ///
+ /// \param OS The output stream to dump to.
+ ///
+ /// \param LT The object to dump.
+ ///
+ /// \param Indent The indentation as number of spaces. Used when dumping as an
+ /// item from within MergedFunctionsInfo.
+ LLVM_ABI void dump(raw_ostream &OS, const LineTable <, uint32_t Indent = 0);
+
+ /// Dump a InlineInfo object.
+ ///
+ /// This function will convert any string table indexes and file indexes
+ /// into human readable format.
+ ///
+ /// \param OS The output stream to dump to.
+ ///
+ /// \param II The object to dump.
+ ///
+ /// \param Indent The indentation as number of spaces. Used for recurive
+ /// dumping.
+ LLVM_ABI void dump(raw_ostream &OS, const InlineInfo &II,
+ uint32_t Indent = 0);
+
+ /// Dump a FileEntry object.
+ ///
+ /// This function will convert any string table indexes into human readable
+ /// format.
+ ///
+ /// \param OS The output stream to dump to.
+ ///
+ /// \param FE The object to dump.
+ LLVM_ABI void dump(raw_ostream &OS, std::optional<FileEntry> FE);
+
+ /// Get the number of addresses in this Gsym file.
+ uint32_t getNumAddresses() const {
+ return Hdr->NumAddresses;
+ }
+
+ /// Gets an address from the address table.
+ ///
+ /// Addresses are stored as offsets frrom the gsym::HeaderV2::BaseAddress.
+ ///
+ /// \param Index A index into the address table.
+ /// \returns A resolved virtual address for adddress in the address table
+ /// or std::nullopt if Index is out of bounds.
+ LLVM_ABI std::optional<uint64_t> getAddress(size_t Index) const;
+
+protected:
+
+ /// Get an appropriate address info offsets array.
+ ///
+ /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8
+ /// byte offsets from the The gsym::HeaderV2::BaseAddress. The table is stored
+ /// internally as a array of bytes that are in the correct endianness. When
+ /// we access this table we must get an array that matches those sizes. This
+ /// templatized helper function is used when accessing address offsets in the
+ /// AddrOffsets member variable.
+ ///
+ /// \returns An ArrayRef of an appropriate address offset size.
+ template <class T> ArrayRef<T>
+ getAddrOffsets() const {
+ return ArrayRef<T>(reinterpret_cast<const T *>(AddrOffsets.data()),
+ AddrOffsets.size()/sizeof(T));
+ }
+
+ /// Get an appropriate address from the address table.
+ ///
+ /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8
+ /// byte address offsets from the The gsym::HeaderV2::BaseAddress. The table is
+ /// stored internally as a array of bytes that are in the correct endianness.
+ /// In order to extract an address from the address table we must access the
+ /// address offset using the correct size and then add it to the BaseAddress
+ /// in the header.
+ ///
+ /// \param Index An index into the AddrOffsets array.
+ /// \returns An virtual address that matches the original object file for the
+ /// address as the specified index, or std::nullopt if Index is out of bounds.
+ template <class T>
+ std::optional<uint64_t> addressForIndex(size_t Index) const {
+ ArrayRef<T> AIO = getAddrOffsets<T>();
+ if (Index < AIO.size())
+ return AIO[Index] + Hdr->BaseAddress;
+ return std::nullopt;
+ }
+ /// Lookup an address offset in the AddrOffsets table.
+ ///
+ /// Given an address offset, look it up using a binary search of the
+ /// AddrOffsets table.
+ ///
+ /// \param AddrOffset An address offset, that has already been computed by
+ /// subtracting the gsym::HeaderV2::BaseAddress.
+ /// \returns The matching address offset index. This index will be used to
+ /// extract the FunctionInfo data's offset from the AddrInfoOffsets array.
+ template <class T>
+ std::optional<uint64_t>
+ getAddressOffsetIndex(const uint64_t AddrOffset) const {
+ ArrayRef<T> AIO = getAddrOffsets<T>();
+ const auto Begin = AIO.begin();
+ const auto End = AIO.end();
+ auto Iter = std::lower_bound(Begin, End, AddrOffset);
+ // Watch for addresses that fall between the gsym::HeaderV2::BaseAddress and
+ // the first address offset.
+ if (Iter == Begin && AddrOffset < *Begin)
+ return std::nullopt;
+ if (Iter == End || AddrOffset < *Iter)
+ --Iter;
+
+ // GSYM files have sorted function infos with the most information (line
+ // table and/or inline info) first in the array of function infos, so
+ // always backup as much as possible as long as the address offset is the
+ // same as the previous entry.
+ while (Iter != Begin) {
+ auto Prev = Iter - 1;
+ if (*Prev == *Iter)
+ Iter = Prev;
+ else
+ break;
+ }
+
+ return std::distance(Begin, Iter);
+ }
+
+ /// Create a GSYM from a memory buffer.
+ ///
+ /// Called by both openFile() and copyBuffer(), this function does all of the
+ /// work of parsing the GSYM file and returning an error.
+ ///
+ /// \param MemBuffer A memory buffer that will transfer ownership into the
+ /// GsymReaderV2.
+ /// \returns An expected GsymReaderV2 that contains the object or an error
+ /// object that indicates reason for failing to read the GSYM.
+ LLVM_ABI static llvm::Expected<llvm::gsym::GsymReaderV2>
+ create(std::unique_ptr<MemoryBuffer> &MemBuffer);
+
+ /// Given an address, find the address index.
+ ///
+ /// Binary search the address table and find the matching address index.
+ ///
+ /// \param Addr A virtual address that matches the original object file
+ /// to lookup.
+ /// \returns An index into the address table. This index can be used to
+ /// extract the FunctionInfo data's offset from the AddrInfoOffsets array.
+ /// Returns an error if the address isn't in the GSYM with details of why.
+ LLVM_ABI Expected<uint64_t> getAddressIndex(const uint64_t Addr) const;
+
+ /// Given an address index, get the offset for the FunctionInfo.
+ ///
+ /// Looking up an address is done by finding the corresponding address
+ /// index for the address. This index is then used to get the offset of the
+ /// FunctionInfo data that we will decode using this function.
+ ///
+ /// \param Index An index into the address table.
+ /// \returns An optional GSYM data offset for the offset of the FunctionInfo
+ /// that needs to be decoded.
+ LLVM_ABI std::optional<uint64_t> getAddressInfoOffset(size_t Index) const;
+
+ /// Given an address, find the correct function info data and function
+ /// address.
+ ///
+ /// Binary search the address table and find the matching address info
+ /// and make sure that the function info contains the address. GSYM allows
+ /// functions to overlap, and the most debug info is contained in the first
+ /// entries due to the sorting when GSYM files are created. We can have
+ /// multiple function info that start at the same address only if their
+ /// address range doesn't match. So find the first entry that matches \a Addr
+ /// and iterate forward until we find one that contains the address.
+ ///
+ /// \param[in] Addr A virtual address that matches the original object file
+ /// to lookup.
+ ///
+ /// \param[out] FuncStartAddr A virtual address that is the base address of
+ /// the function that is used for decoding the FunctionInfo.
+ ///
+ /// \returns An valid data extractor on success, or an error if we fail to
+ /// find the address in a function info or corrrectly decode the data
+ LLVM_ABI llvm::Expected<llvm::DataExtractor>
+ getFunctionInfoDataForAddress(uint64_t Addr, uint64_t &FuncStartAddr) const;
+
+ /// Get the function data and address given an address index.
+ ///
+ /// \param AddrIdx A address index from the address table.
+ ///
+ /// \returns An expected FunctionInfo that contains the function info object
+ /// or an error object that indicates reason for failing to lookup the
+ /// address.
+ LLVM_ABI llvm::Expected<llvm::DataExtractor>
+ getFunctionInfoDataAtIndex(uint64_t AddrIdx, uint64_t &FuncStartAddr) const;
+};
+
+} // namespace gsym
+} // namespace llvm
+
+#endif // LLVM_DEBUGINFO_GSYM_GSYMREADERV2_H
diff --git a/llvm/include/llvm/DebugInfo/GSYM/HeaderV2.h b/llvm/include/llvm/DebugInfo/GSYM/HeaderV2.h
new file mode 100644
index 0000000000000..5152b5322778e
--- /dev/null
+++ b/llvm/include/llvm/DebugInfo/GSYM/HeaderV2.h
@@ -0,0 +1,130 @@
+//===- HeaderV2.h -----------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_GSYM_HEADERV2_H
+#define LLVM_DEBUGINFO_GSYM_HEADERV2_H
+
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Error.h"
+
+#include <cstddef>
+#include <cstdint>
+
+namespace llvm {
+class raw_ostream;
+class DataExtractor;
+
+namespace gsym {
+class FileWriter;
+
+constexpr uint32_t GSYM_MAGIC = 0x4753594d; // 'GSYM'
+constexpr uint32_t GSYM_CIGAM = 0x4d595347; // 'MYSG'
+constexpr uint32_t GSYM_VERSION_2 = 2;
+constexpr size_t GSYM_MAX_UUID_SIZE = 20;
+
+/// The GSYM V2 header.
+///
+/// The GSYM header is found at the start of a stand alone GSYM file, or as
+/// the first bytes in a section when GSYM is contained in a section of an
+/// executable file (ELF, mach-o, COFF).
+///
+/// The structure is encoded exactly as it appears in the structure definition
+/// with no gaps between members. Alignment should not change from system to
+/// system as the members were laid out so that they shouldn't align
+/// differently on different architectures.
+///
+/// When endianness of the system loading a GSYM file matches, the file can
+/// be mmap'ed in and a pointer to the header can be cast to the first bytes
+/// of the file (stand alone GSYM file) or section data (GSYM in a section).
+/// When endianness is swapped, the HeaderV2::decode() function should be used to
+/// decode the header.
+struct HeaderV2 {
+ /// The magic bytes should be set to GSYM_MAGIC. This helps detect if a file
+ /// is a GSYM file by scanning the first 4 bytes of a file or section.
+ /// This value might appear byte swapped
+ uint32_t Magic;
+ /// The version can number determines how the header is decoded and how each
+ /// InfoType in FunctionInfo is encoded/decoded. As version numbers increase,
+ /// "Magic" and "Version" members should always appear at offset zero and 4
+ /// respectively to ensure clients figure out if they can parse the format.
+ uint16_t Version;
+ /// The size in bytes of each address offset in the address offsets table.
+ uint8_t AddrOffSize;
+ /// The size in bytes of the UUID encoded in the "UUID" member.
+ uint8_t UUIDSize;
+ /// The 64 bit base address that all address offsets in the address offsets
+ /// table are relative to. Storing a full 64 bit address allows our address
+ /// offsets table to be smaller on disk.
+ uint64_t BaseAddress;
+ /// The number of addresses stored in the address offsets table.
+ uint32_t NumAddresses;
+ /// The file relative offset of the start of the string table for strings
+ /// contained in the GSYM file. If the GSYM in contained in a stand alone
+ /// file this will be the file offset of the start of the string table. If
+ /// the GSYM is contained in a section within an executable file, this can
+ /// be the offset of the first string used in the GSYM file and can possibly
+ /// span one or more executable string tables. This allows the strings to
+ /// share string tables in an ELF or mach-o file.
+ uint32_t StrtabOffset;
+ /// The size in bytes of the string table. For a stand alone GSYM file, this
+ /// will be the exact size in bytes of the string table. When the GSYM data
+ /// is in a section within an executable file, this size can span one or more
+ /// sections that contains strings. This allows any strings that are already
+ /// stored in the executable file to be re-used, and any extra strings could
+ /// be added to another string table and the string table offset and size
+ /// can be set to span all needed string tables.
+ uint32_t StrtabSize;
+ /// The UUID of the original executable file. This is stored to allow
+ /// matching a GSYM file to an executable file when symbolication is
+ /// required. Only the first "UUIDSize" bytes of the UUID are valid. Any
+ /// bytes in the UUID value that appear after the first UUIDSize bytes should
+ /// be set to zero.
+ uint8_t UUID[GSYM_MAX_UUID_SIZE];
+
+ /// Check if a header is valid and return an error if anything is wrong.
+ ///
+ /// This function can be used prior to encoding a header to ensure it is
+ /// valid, or after decoding a header to ensure it is valid and supported.
+ ///
+ /// Check a correctly byte swapped header for errors:
+ /// - check magic value
+ /// - check that version number is supported
+ /// - check that the address offset size is supported
+ /// - check that the UUID size is valid
+ ///
+ /// \returns An error if anything is wrong in the header, or Error::success()
+ /// if there are no errors.
+ LLVM_ABI llvm::Error checkForError() const;
+
+ /// Decode an object from a binary data stream.
+ ///
+ /// \param Data The binary stream to read the data from. This object must
+ /// have the data for the object starting at offset zero. The data
+ /// can contain more data than needed.
+ ///
+ /// \returns A HeaderV2 or an error describing the issue that was
+ /// encountered during decoding.
+ LLVM_ABI static llvm::Expected<HeaderV2> decode(DataExtractor &Data);
+
+ /// Encode this object into FileWriter stream.
+ ///
+ /// \param O The binary stream to write the data to at the current file
+ /// position.
+ ///
+ /// \returns An error object that indicates success or failure of the
+ /// encoding process.
+ LLVM_ABI llvm::Error encode(FileWriter &O) const;
+};
+
+LLVM_ABI bool operator==(const HeaderV2 &LHS, const HeaderV2 &RHS);
+LLVM_ABI raw_ostream &operator<<(raw_ostream &OS, const llvm::gsym::HeaderV2 &H);
+
+} // namespace gsym
+} // namespace llvm
+
+#endif // LLVM_DEBUGINFO_GSYM_HEADERV2_H
diff --git a/llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp b/llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp
new file mode 100644
index 0000000000000..946057b2e6072
--- /dev/null
+++ b/llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp
@@ -0,0 +1,627 @@
+//===- GsymCreatorV2.cpp --------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/GSYM/GsymCreatorV2.h"
+#include "llvm/DebugInfo/GSYM/FileWriter.h"
+#include "llvm/DebugInfo/GSYM/HeaderV2.h"
+#include "llvm/DebugInfo/GSYM/LineTable.h"
+#include "llvm/DebugInfo/GSYM/OutputAggregator.h"
+#include "llvm/MC/StringTableBuilder.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <algorithm>
+#include <cassert>
+#include <functional>
+#include <vector>
+
+using namespace llvm;
+using namespace gsym;
+
+GsymCreatorV2::GsymCreatorV2(bool Quiet)
+ : StrTab(StringTableBuilder::ELF), Quiet(Quiet) {
+ insertFile(StringRef());
+}
+
+uint32_t GsymCreatorV2::insertFile(StringRef Path, llvm::sys::path::Style Style) {
+ llvm::StringRef directory = llvm::sys::path::parent_path(Path, Style);
+ llvm::StringRef filename = llvm::sys::path::filename(Path, Style);
+ // We must insert the strings first, then call the FileEntry constructor.
+ // If we inline the insertString() function call into the constructor, the
+ // call order is undefined due to parameter lists not having any ordering
+ // requirements.
+ const uint32_t Dir = insertString(directory);
+ const uint32_t Base = insertString(filename);
+ return insertFileEntry(FileEntry(Dir, Base));
+}
+
+uint32_t GsymCreatorV2::insertFileEntry(FileEntry FE) {
+ std::lock_guard<std::mutex> Guard(Mutex);
+ const auto NextIndex = Files.size();
+ // Find FE in hash map and insert if not present.
+ auto R = FileEntryToIndex.insert(std::make_pair(FE, NextIndex));
+ if (R.second)
+ Files.emplace_back(FE);
+ return R.first->second;
+}
+
+uint32_t GsymCreatorV2::copyFile(const GsymCreatorV2 &SrcGC, uint32_t FileIdx) {
+ // File index zero is reserved for a FileEntry with no directory and no
+ // filename. Any other file and we need to copy the strings for the directory
+ // and filename.
+ if (FileIdx == 0)
+ return 0;
+ const FileEntry SrcFE = SrcGC.Files[FileIdx];
+ // Copy the strings for the file and then add the newly converted file entry.
+ uint32_t Dir =
+ SrcFE.Dir == 0
+ ? 0
+ : StrTab.add(SrcGC.StringOffsetMap.find(SrcFE.Dir)->second);
+ uint32_t Base = StrTab.add(SrcGC.StringOffsetMap.find(SrcFE.Base)->second);
+ FileEntry DstFE(Dir, Base);
+ return insertFileEntry(DstFE);
+}
+
+llvm::Error GsymCreatorV2::save(StringRef Path, llvm::endianness ByteOrder,
+ std::optional<uint64_t> SegmentSize) const {
+ if (SegmentSize)
+ return saveSegments(Path, ByteOrder, *SegmentSize);
+ std::error_code EC;
+ raw_fd_ostream OutStrm(Path, EC);
+ if (EC)
+ return llvm::errorCodeToError(EC);
+ FileWriter O(OutStrm, ByteOrder);
+ return encode(O);
+}
+
+llvm::Error GsymCreatorV2::encode(FileWriter &O) const {
+ std::lock_guard<std::mutex> Guard(Mutex);
+ if (Funcs.empty())
+ return createStringError(std::errc::invalid_argument,
+ "no functions to encode");
+ if (!Finalized)
+ return createStringError(std::errc::invalid_argument,
+ "GsymCreatorV2 wasn't finalized prior to encoding");
+
+ if (Funcs.size() > UINT32_MAX)
+ return createStringError(std::errc::invalid_argument,
+ "too many FunctionInfos");
+
+ std::optional<uint64_t> BaseAddress = getBaseAddress();
+ // Base address should be valid if we have any functions.
+ if (!BaseAddress)
+ return createStringError(std::errc::invalid_argument,
+ "invalid base address");
+ HeaderV2 Hdr;
+ Hdr.Magic = GSYM_MAGIC;
+ Hdr.Version = GSYM_VERSION_2;
+ Hdr.AddrOffSize = getAddressOffsetSize();
+ Hdr.UUIDSize = static_cast<uint8_t>(UUID.size());
+ Hdr.BaseAddress = *BaseAddress;
+ Hdr.NumAddresses = static_cast<uint32_t>(Funcs.size());
+ Hdr.StrtabOffset = 0; // We will fix this up later.
+ Hdr.StrtabSize = 0; // We will fix this up later.
+ memset(Hdr.UUID, 0, sizeof(Hdr.UUID));
+ if (UUID.size() > sizeof(Hdr.UUID))
+ return createStringError(std::errc::invalid_argument,
+ "invalid UUID size %u", (uint32_t)UUID.size());
+ // Copy the UUID value if we have one.
+ if (UUID.size() > 0)
+ memcpy(Hdr.UUID, UUID.data(), UUID.size());
+ // Write out the header.
+ llvm::Error Err = Hdr.encode(O);
+ if (Err)
+ return Err;
+
+ const uint64_t MaxAddressOffset = getMaxAddressOffset();
+ // Write out the address offsets.
+ O.alignTo(Hdr.AddrOffSize);
+ for (const auto &FuncInfo : Funcs) {
+ uint64_t AddrOffset = FuncInfo.startAddress() - Hdr.BaseAddress;
+ // Make sure we calculated the address offsets byte size correctly by
+ // verifying the current address offset is within ranges. We have seen bugs
+ // introduced when the code changes that can cause problems here so it is
+ // good to catch this during testing.
+ assert(AddrOffset <= MaxAddressOffset);
+ (void)MaxAddressOffset;
+ switch (Hdr.AddrOffSize) {
+ case 1:
+ O.writeU8(static_cast<uint8_t>(AddrOffset));
+ break;
+ case 2:
+ O.writeU16(static_cast<uint16_t>(AddrOffset));
+ break;
+ case 4:
+ O.writeU32(static_cast<uint32_t>(AddrOffset));
+ break;
+ case 8:
+ O.writeU64(AddrOffset);
+ break;
+ }
+ }
+
+ // Write out all zeros for the AddrInfoOffsets.
+ O.alignTo(4);
+ const off_t AddrInfoOffsetsOffset = O.tell();
+ for (size_t i = 0, n = Funcs.size(); i < n; ++i)
+ O.writeU32(0);
+
+ // Write out the file table
+ O.alignTo(4);
+ assert(!Files.empty());
+ assert(Files[0].Dir == 0);
+ assert(Files[0].Base == 0);
+ size_t NumFiles = Files.size();
+ if (NumFiles > UINT32_MAX)
+ return createStringError(std::errc::invalid_argument, "too many files");
+ O.writeU32(static_cast<uint32_t>(NumFiles));
+ for (auto File : Files) {
+ O.writeU32(File.Dir);
+ O.writeU32(File.Base);
+ }
+
+ // Write out the string table.
+ const off_t StrtabOffset = O.tell();
+ StrTab.write(O.get_stream());
+ const off_t StrtabSize = O.tell() - StrtabOffset;
+ std::vector<uint32_t> AddrInfoOffsets;
+
+ // Verify that the size of the string table does not exceed 32-bit max.
+ // This means the offsets in the string table will not exceed 32-bit max.
+ if (StrtabSize > UINT32_MAX) {
+ return createStringError(std::errc::invalid_argument,
+ "string table size exceeded 32-bit max");
+ }
+
+ // Write out the address infos for each function info.
+ for (const auto &FuncInfo : Funcs) {
+ if (Expected<uint64_t> OffsetOrErr = FuncInfo.encode(O)) {
+ // Verify that the address info offsets do not exceed 32-bit max.
+ uint64_t Offset = OffsetOrErr.get();
+ if (Offset > UINT32_MAX) {
+ return createStringError(std::errc::invalid_argument,
+ "address info offset exceeded 32-bit max");
+ }
+
+ AddrInfoOffsets.push_back(Offset);
+ } else
+ return OffsetOrErr.takeError();
+ }
+ // Fixup the string table offset and size in the header
+ O.fixup32((uint32_t)StrtabOffset, offsetof(HeaderV2, StrtabOffset));
+ O.fixup32((uint32_t)StrtabSize, offsetof(HeaderV2, StrtabSize));
+
+ // Fixup all address info offsets
+ uint64_t Offset = 0;
+ for (auto AddrInfoOffset : AddrInfoOffsets) {
+ O.fixup32(AddrInfoOffset, AddrInfoOffsetsOffset + Offset);
+ Offset += 4;
+ }
+ return ErrorSuccess();
+}
+
+llvm::Error GsymCreatorV2::loadCallSitesFromYAML(StringRef YAMLFile) {
+ // Use the loader to load call site information from the YAML file.
+ CallSiteInfoLoader Loader(*this, Funcs);
+ return Loader.loadYAML(YAMLFile);
+}
+
+void GsymCreatorV2::prepareMergedFunctions(OutputAggregator &Out) {
+ // Nothing to do if we have less than 2 functions.
+ if (Funcs.size() < 2)
+ return;
+
+ // Sort the function infos by address range first, preserving input order
+ llvm::stable_sort(Funcs);
+ std::vector<FunctionInfo> TopLevelFuncs;
+
+ // Add the first function info to the top level functions
+ TopLevelFuncs.emplace_back(std::move(Funcs.front()));
+
+ // Now if the next function info has the same address range as the top level,
+ // then merge it into the top level function, otherwise add it to the top
+ // level.
+ for (size_t Idx = 1; Idx < Funcs.size(); ++Idx) {
+ FunctionInfo &TopFunc = TopLevelFuncs.back();
+ FunctionInfo &MatchFunc = Funcs[Idx];
+ if (TopFunc.Range == MatchFunc.Range) {
+ // Both have the same range - add the 2nd func as a child of the 1st func
+ if (!TopFunc.MergedFunctions)
+ TopFunc.MergedFunctions = MergedFunctionsInfo();
+ // Avoid adding duplicate functions to MergedFunctions. Since functions
+ // are already ordered within the Funcs array, we can just check equality
+ // against the last function in the merged array.
+ else if (TopFunc.MergedFunctions->MergedFunctions.back() == MatchFunc)
+ continue;
+ TopFunc.MergedFunctions->MergedFunctions.emplace_back(
+ std::move(MatchFunc));
+ } else
+ // No match, add the function as a top-level function
+ TopLevelFuncs.emplace_back(std::move(MatchFunc));
+ }
+
+ uint32_t mergedCount = Funcs.size() - TopLevelFuncs.size();
+ // If any functions were merged, print a message about it.
+ if (mergedCount != 0)
+ Out << "Have " << mergedCount
+ << " merged functions as children of other functions\n";
+
+ std::swap(Funcs, TopLevelFuncs);
+}
+
+llvm::Error GsymCreatorV2::finalize(OutputAggregator &Out) {
+ std::lock_guard<std::mutex> Guard(Mutex);
+ if (Finalized)
+ return createStringError(std::errc::invalid_argument, "already finalized");
+ Finalized = true;
+
+ // Don't let the string table indexes change by finalizing in order.
+ StrTab.finalizeInOrder();
+
+ // Remove duplicates function infos that have both entries from debug info
+ // (DWARF or Breakpad) and entries from the SymbolTable.
+ //
+ // Also handle overlapping function. Usually there shouldn't be any, but they
+ // can and do happen in some rare cases.
+ //
+ // (a) (b) (c)
+ // ^ ^ ^ ^
+ // |X |Y |X ^ |X
+ // | | | |Y | ^
+ // | | | v v |Y
+ // v v v v
+ //
+ // In (a) and (b), Y is ignored and X will be reported for the full range.
+ // In (c), both functions will be included in the result and lookups for an
+ // address in the intersection will return Y because of binary search.
+ //
+ // Note that in case of (b), we cannot include Y in the result because then
+ // we wouldn't find any function for range (end of Y, end of X)
+ // with binary search
+
+ const auto NumBefore = Funcs.size();
+ // Only sort and unique if this isn't a segment. If this is a segment we
+ // already finalized the main GsymCreatorV2 with all of the function infos
+ // and then the already sorted and uniqued function infos were added to this
+ // object.
+ if (!IsSegment) {
+ if (NumBefore > 1) {
+ // Sort function infos so we can emit sorted functions. Use stable sort to
+ // ensure determinism.
+ llvm::stable_sort(Funcs);
+ std::vector<FunctionInfo> FinalizedFuncs;
+ FinalizedFuncs.reserve(Funcs.size());
+ FinalizedFuncs.emplace_back(std::move(Funcs.front()));
+ for (size_t Idx=1; Idx < NumBefore; ++Idx) {
+ FunctionInfo &Prev = FinalizedFuncs.back();
+ FunctionInfo &Curr = Funcs[Idx];
+ // Empty ranges won't intersect, but we still need to
+ // catch the case where we have multiple symbols at the
+ // same address and coalesce them.
+ const bool ranges_equal = Prev.Range == Curr.Range;
+ if (ranges_equal || Prev.Range.intersects(Curr.Range)) {
+ // Overlapping ranges or empty identical ranges.
+ if (ranges_equal) {
+ // Same address range. Check if one is from debug
+ // info and the other is from a symbol table. If
+ // so, then keep the one with debug info. Our
+ // sorting guarantees that entries with matching
+ // address ranges that have debug info are last in
+ // the sort.
+ if (!(Prev == Curr)) {
+ if (Prev.hasRichInfo() && Curr.hasRichInfo())
+ Out.Report(
+ "Duplicate address ranges with different debug info.",
+ [&](raw_ostream &OS) {
+ OS << "warning: same address range contains "
+ "different debug "
+ << "info. Removing:\n"
+ << Prev << "\nIn favor of this one:\n"
+ << Curr << "\n";
+ });
+
+ // We want to swap the current entry with the previous since
+ // later entries with the same range always have more debug info
+ // or different debug info.
+ std::swap(Prev, Curr);
+ }
+ } else {
+ Out.Report("Overlapping function ranges", [&](raw_ostream &OS) {
+ // print warnings about overlaps
+ OS << "warning: function ranges overlap:\n"
+ << Prev << "\n"
+ << Curr << "\n";
+ });
+ FinalizedFuncs.emplace_back(std::move(Curr));
+ }
+ } else {
+ if (Prev.Range.size() == 0 && Curr.Range.contains(Prev.Range.start())) {
+ // Symbols on macOS don't have address ranges, so if the range
+ // doesn't match and the size is zero, then we replace the empty
+ // symbol function info with the current one.
+ std::swap(Prev, Curr);
+ } else {
+ FinalizedFuncs.emplace_back(std::move(Curr));
+ }
+ }
+ }
+ std::swap(Funcs, FinalizedFuncs);
+ }
+ // If our last function info entry doesn't have a size and if we have valid
+ // text ranges, we should set the size of the last entry since any search for
+ // a high address might match our last entry. By fixing up this size, we can
+ // help ensure we don't cause lookups to always return the last symbol that
+ // has no size when doing lookups.
+ if (!Funcs.empty() && Funcs.back().Range.size() == 0 && ValidTextRanges) {
+ if (auto Range =
+ ValidTextRanges->getRangeThatContains(Funcs.back().Range.start())) {
+ Funcs.back().Range = {Funcs.back().Range.start(), Range->end()};
+ }
+ }
+ Out << "Pruned " << NumBefore - Funcs.size() << " functions, ended with "
+ << Funcs.size() << " total\n";
+ }
+ return Error::success();
+}
+
+uint32_t GsymCreatorV2::copyString(const GsymCreatorV2 &SrcGC, uint32_t StrOff) {
+ // String offset at zero is always the empty string, no copying needed.
+ if (StrOff == 0)
+ return 0;
+ return StrTab.add(SrcGC.StringOffsetMap.find(StrOff)->second);
+}
+
+uint32_t GsymCreatorV2::insertString(StringRef S, bool Copy) {
+ if (S.empty())
+ return 0;
+
+ // The hash can be calculated outside the lock.
+ CachedHashStringRef CHStr(S);
+ std::lock_guard<std::mutex> Guard(Mutex);
+ if (Copy) {
+ // We need to provide backing storage for the string if requested
+ // since StringTableBuilder stores references to strings. Any string
+ // that comes from a section in an object file doesn't need to be
+ // copied, but any string created by code will need to be copied.
+ // This allows GsymCreatorV2 to be really fast when parsing DWARF and
+ // other object files as most strings don't need to be copied.
+ if (!StrTab.contains(CHStr))
+ CHStr = CachedHashStringRef{StringStorage.insert(S).first->getKey(),
+ CHStr.hash()};
+ }
+ const uint32_t StrOff = StrTab.add(CHStr);
+ // Save a mapping of string offsets to the cached string reference in case
+ // we need to segment the GSYM file and copy string from one string table to
+ // another.
+ StringOffsetMap.try_emplace(StrOff, CHStr);
+ return StrOff;
+}
+
+StringRef GsymCreatorV2::getString(uint32_t Offset) {
+ auto I = StringOffsetMap.find(Offset);
+ assert(I != StringOffsetMap.end() &&
+ "GsymCreatorV2::getString expects a valid offset as parameter.");
+ return I->second.val();
+}
+
+void GsymCreatorV2::addFunctionInfo(FunctionInfo &&FI) {
+ std::lock_guard<std::mutex> Guard(Mutex);
+ Funcs.emplace_back(std::move(FI));
+}
+
+void GsymCreatorV2::forEachFunctionInfo(
+ std::function<bool(FunctionInfo &)> const &Callback) {
+ std::lock_guard<std::mutex> Guard(Mutex);
+ for (auto &FI : Funcs) {
+ if (!Callback(FI))
+ break;
+ }
+}
+
+void GsymCreatorV2::forEachFunctionInfo(
+ std::function<bool(const FunctionInfo &)> const &Callback) const {
+ std::lock_guard<std::mutex> Guard(Mutex);
+ for (const auto &FI : Funcs) {
+ if (!Callback(FI))
+ break;
+ }
+}
+
+size_t GsymCreatorV2::getNumFunctionInfos() const {
+ std::lock_guard<std::mutex> Guard(Mutex);
+ return Funcs.size();
+}
+
+bool GsymCreatorV2::IsValidTextAddress(uint64_t Addr) const {
+ if (ValidTextRanges)
+ return ValidTextRanges->contains(Addr);
+ return true; // No valid text ranges has been set, so accept all ranges.
+}
+
+std::optional<uint64_t> GsymCreatorV2::getFirstFunctionAddress() const {
+ // If we have finalized then Funcs are sorted. If we are a segment then
+ // Funcs will be sorted as well since function infos get added from an
+ // already finalized GsymCreatorV2 object where its functions were sorted and
+ // uniqued.
+ if ((Finalized || IsSegment) && !Funcs.empty())
+ return std::optional<uint64_t>(Funcs.front().startAddress());
+ return std::nullopt;
+}
+
+std::optional<uint64_t> GsymCreatorV2::getLastFunctionAddress() const {
+ // If we have finalized then Funcs are sorted. If we are a segment then
+ // Funcs will be sorted as well since function infos get added from an
+ // already finalized GsymCreatorV2 object where its functions were sorted and
+ // uniqued.
+ if ((Finalized || IsSegment) && !Funcs.empty())
+ return std::optional<uint64_t>(Funcs.back().startAddress());
+ return std::nullopt;
+}
+
+std::optional<uint64_t> GsymCreatorV2::getBaseAddress() const {
+ if (BaseAddress)
+ return BaseAddress;
+ return getFirstFunctionAddress();
+}
+
+uint64_t GsymCreatorV2::getMaxAddressOffset() const {
+ switch (getAddressOffsetSize()) {
+ case 1: return UINT8_MAX;
+ case 2: return UINT16_MAX;
+ case 4: return UINT32_MAX;
+ case 8: return UINT64_MAX;
+ }
+ llvm_unreachable("invalid address offset");
+}
+
+uint8_t GsymCreatorV2::getAddressOffsetSize() const {
+ const std::optional<uint64_t> BaseAddress = getBaseAddress();
+ const std::optional<uint64_t> LastFuncAddr = getLastFunctionAddress();
+ if (BaseAddress && LastFuncAddr) {
+ const uint64_t AddrDelta = *LastFuncAddr - *BaseAddress;
+ if (AddrDelta <= UINT8_MAX)
+ return 1;
+ else if (AddrDelta <= UINT16_MAX)
+ return 2;
+ else if (AddrDelta <= UINT32_MAX)
+ return 4;
+ return 8;
+ }
+ return 1;
+}
+
+uint64_t GsymCreatorV2::calculateHeaderAndTableSize() const {
+ uint64_t Size = sizeof(HeaderV2);
+ const size_t NumFuncs = Funcs.size();
+ // Add size of address offset table
+ Size += NumFuncs * getAddressOffsetSize();
+ // Add size of address info offsets which are 32 bit integers in version 1.
+ Size += NumFuncs * sizeof(uint32_t);
+ // Add file table size
+ Size += Files.size() * sizeof(FileEntry);
+ // Add string table size
+ Size += StrTab.getSize();
+
+ return Size;
+}
+
+// This function takes a InlineInfo class that was copy constructed from an
+// InlineInfo from the \a SrcGC and updates all members that point to strings
+// and files to point to strings and files from this GsymCreatorV2.
+void GsymCreatorV2::fixupInlineInfo(const GsymCreatorV2 &SrcGC, InlineInfo &II) {
+ II.Name = copyString(SrcGC, II.Name);
+ II.CallFile = copyFile(SrcGC, II.CallFile);
+ for (auto &ChildII: II.Children)
+ fixupInlineInfo(SrcGC, ChildII);
+}
+
+uint64_t GsymCreatorV2::copyFunctionInfo(const GsymCreatorV2 &SrcGC, size_t FuncIdx) {
+ // To copy a function info we need to copy any files and strings over into
+ // this GsymCreatorV2 and then copy the function info and update the string
+ // table offsets to match the new offsets.
+ const FunctionInfo &SrcFI = SrcGC.Funcs[FuncIdx];
+
+ FunctionInfo DstFI;
+ DstFI.Range = SrcFI.Range;
+ DstFI.Name = copyString(SrcGC, SrcFI.Name);
+ // Copy the line table if there is one.
+ if (SrcFI.OptLineTable) {
+ // Copy the entire line table.
+ DstFI.OptLineTable = LineTable(SrcFI.OptLineTable.value());
+ // Fixup all LineEntry::File entries which are indexes in the the file table
+ // from SrcGC and must be converted to file indexes from this GsymCreatorV2.
+ LineTable &DstLT = DstFI.OptLineTable.value();
+ const size_t NumLines = DstLT.size();
+ for (size_t I=0; I<NumLines; ++I) {
+ LineEntry &LE = DstLT.get(I);
+ LE.File = copyFile(SrcGC, LE.File);
+ }
+ }
+ // Copy the inline information if needed.
+ if (SrcFI.Inline) {
+ // Make a copy of the source inline information.
+ DstFI.Inline = SrcFI.Inline.value();
+ // Fixup all strings and files in the copied inline information.
+ fixupInlineInfo(SrcGC, *DstFI.Inline);
+ }
+ std::lock_guard<std::mutex> Guard(Mutex);
+ Funcs.emplace_back(DstFI);
+ return Funcs.back().cacheEncoding();
+}
+
+llvm::Error GsymCreatorV2::saveSegments(StringRef Path,
+ llvm::endianness ByteOrder,
+ uint64_t SegmentSize) const {
+ if (SegmentSize == 0)
+ return createStringError(std::errc::invalid_argument,
+ "invalid segment size zero");
+
+ size_t FuncIdx = 0;
+ const size_t NumFuncs = Funcs.size();
+ while (FuncIdx < NumFuncs) {
+ llvm::Expected<std::unique_ptr<GsymCreatorV2>> ExpectedGC =
+ createSegment(SegmentSize, FuncIdx);
+ if (ExpectedGC) {
+ GsymCreatorV2 *GC = ExpectedGC->get();
+ if (!GC)
+ break; // We had not more functions to encode.
+ // Don't collect any messages at all
+ OutputAggregator Out(nullptr);
+ llvm::Error Err = GC->finalize(Out);
+ if (Err)
+ return Err;
+ std::string SegmentedGsymPath;
+ raw_string_ostream SGP(SegmentedGsymPath);
+ std::optional<uint64_t> FirstFuncAddr = GC->getFirstFunctionAddress();
+ if (FirstFuncAddr) {
+ SGP << Path << "-" << llvm::format_hex(*FirstFuncAddr, 1);
+ Err = GC->save(SegmentedGsymPath, ByteOrder, std::nullopt);
+ if (Err)
+ return Err;
+ }
+ } else {
+ return ExpectedGC.takeError();
+ }
+ }
+ return Error::success();
+}
+
+llvm::Expected<std::unique_ptr<GsymCreatorV2>>
+GsymCreatorV2::createSegment(uint64_t SegmentSize, size_t &FuncIdx) const {
+ // No function entries, return empty unique pointer
+ if (FuncIdx >= Funcs.size())
+ return std::unique_ptr<GsymCreatorV2>();
+
+ std::unique_ptr<GsymCreatorV2> GC(new GsymCreatorV2(/*Quiet=*/true));
+
+ // Tell the creator that this is a segment.
+ GC->setIsSegment();
+
+ // Set the base address if there is one.
+ if (BaseAddress)
+ GC->setBaseAddress(*BaseAddress);
+ // Copy the UUID value from this object into the new creator.
+ GC->setUUID(UUID);
+ const size_t NumFuncs = Funcs.size();
+ // Track how big the function infos are for the current segment so we can
+ // emit segments that are close to the requested size. It is quick math to
+ // determine the current header and tables sizes, so we can do that each loop.
+ uint64_t SegmentFuncInfosSize = 0;
+ for (; FuncIdx < NumFuncs; ++FuncIdx) {
+ const uint64_t HeaderAndTableSize = GC->calculateHeaderAndTableSize();
+ if (HeaderAndTableSize + SegmentFuncInfosSize >= SegmentSize) {
+ if (SegmentFuncInfosSize == 0)
+ return createStringError(std::errc::invalid_argument,
+ "a segment size of %" PRIu64 " is to small to "
+ "fit any function infos, specify a larger value",
+ SegmentSize);
+
+ break;
+ }
+ SegmentFuncInfosSize += alignTo(GC->copyFunctionInfo(*this, FuncIdx), 4);
+ }
+ return std::move(GC);
+}
diff --git a/llvm/lib/DebugInfo/GSYM/GsymReaderV2.cpp b/llvm/lib/DebugInfo/GSYM/GsymReaderV2.cpp
new file mode 100644
index 0000000000000..de074cdde02dd
--- /dev/null
+++ b/llvm/lib/DebugInfo/GSYM/GsymReaderV2.cpp
@@ -0,0 +1,558 @@
+//===- GsymReaderV2.cpp ---------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/GSYM/GsymReaderV2.h"
+
+#include <assert.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "llvm/DebugInfo/GSYM/InlineInfo.h"
+#include "llvm/DebugInfo/GSYM/LineTable.h"
+#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+using namespace llvm;
+using namespace gsym;
+
+GsymReaderV2::GsymReaderV2(std::unique_ptr<MemoryBuffer> Buffer)
+ : MemBuffer(std::move(Buffer)), Endian(llvm::endianness::native) {}
+
+GsymReaderV2::GsymReaderV2(GsymReaderV2 &&RHS) = default;
+
+GsymReaderV2::~GsymReaderV2() = default;
+
+llvm::Expected<GsymReaderV2> GsymReaderV2::openFile(StringRef Filename) {
+ // Open the input file and return an appropriate error if needed.
+ ErrorOr<std::unique_ptr<MemoryBuffer>> BuffOrErr =
+ MemoryBuffer::getFileOrSTDIN(Filename);
+ auto Err = BuffOrErr.getError();
+ if (Err)
+ return llvm::errorCodeToError(Err);
+ return create(BuffOrErr.get());
+}
+
+llvm::Expected<GsymReaderV2> GsymReaderV2::copyBuffer(StringRef Bytes) {
+ auto MemBuffer = MemoryBuffer::getMemBufferCopy(Bytes, "GSYM bytes");
+ return create(MemBuffer);
+}
+
+llvm::Expected<llvm::gsym::GsymReaderV2>
+GsymReaderV2::create(std::unique_ptr<MemoryBuffer> &MemBuffer) {
+ if (!MemBuffer)
+ return createStringError(std::errc::invalid_argument,
+ "invalid memory buffer");
+ GsymReaderV2 GR(std::move(MemBuffer));
+ llvm::Error Err = GR.parse();
+ if (Err)
+ return std::move(Err);
+ return std::move(GR);
+}
+
+llvm::Error
+GsymReaderV2::parse() {
+ BinaryStreamReader FileData(MemBuffer->getBuffer(), llvm::endianness::native);
+ // Check for the magic bytes. This file format is designed to be mmap'ed
+ // into a process and accessed as read only. This is done for performance
+ // and efficiency for symbolicating and parsing GSYM data.
+ if (FileData.readObject(Hdr))
+ return createStringError(std::errc::invalid_argument,
+ "not enough data for a GSYM header");
+
+ const auto HostByteOrder = llvm::endianness::native;
+ switch (Hdr->Magic) {
+ case GSYM_MAGIC:
+ Endian = HostByteOrder;
+ break;
+ case GSYM_CIGAM:
+ // This is a GSYM file, but not native endianness.
+ Endian = sys::IsBigEndianHost ? llvm::endianness::little
+ : llvm::endianness::big;
+ Swap.reset(new SwappedData);
+ break;
+ default:
+ return createStringError(std::errc::invalid_argument,
+ "not a GSYM file");
+ }
+
+ bool DataIsLittleEndian = HostByteOrder != llvm::endianness::little;
+ // Read a correctly byte swapped header if we need to.
+ if (Swap) {
+ DataExtractor Data(MemBuffer->getBuffer(), DataIsLittleEndian, 4);
+ if (auto ExpectedHdr = HeaderV2::decode(Data))
+ Swap->Hdr = ExpectedHdr.get();
+ else
+ return ExpectedHdr.takeError();
+ Hdr = &Swap->Hdr;
+ }
+
+ // Detect errors in the header and report any that are found. If we make it
+ // past this without errors, we know we have a good magic value, a supported
+ // version number, verified address offset size and a valid UUID size.
+ if (Error Err = Hdr->checkForError())
+ return Err;
+
+ if (!Swap) {
+ // This is the native endianness case that is most common and optimized for
+ // efficient lookups. Here we just grab pointers to the native data and
+ // use ArrayRef objects to allow efficient read only access.
+
+ // Read the address offsets.
+ if (FileData.padToAlignment(Hdr->AddrOffSize) ||
+ FileData.readArray(AddrOffsets,
+ Hdr->NumAddresses * Hdr->AddrOffSize))
+ return createStringError(std::errc::invalid_argument,
+ "failed to read address table");
+
+ // Read the address info offsets.
+ if (FileData.padToAlignment(4) ||
+ FileData.readArray(AddrInfoOffsets, Hdr->NumAddresses))
+ return createStringError(std::errc::invalid_argument,
+ "failed to read address info offsets table");
+
+ // Read the file table.
+ uint32_t NumFiles = 0;
+ if (FileData.readInteger(NumFiles) || FileData.readArray(Files, NumFiles))
+ return createStringError(std::errc::invalid_argument,
+ "failed to read file table");
+
+ // Get the string table.
+ FileData.setOffset(Hdr->StrtabOffset);
+ if (FileData.readFixedString(StrTab.Data, Hdr->StrtabSize))
+ return createStringError(std::errc::invalid_argument,
+ "failed to read string table");
+} else {
+ // This is the non native endianness case that is not common and not
+ // optimized for lookups. Here we decode the important tables into local
+ // storage and then set the ArrayRef objects to point to these swapped
+ // copies of the read only data so lookups can be as efficient as possible.
+ DataExtractor Data(MemBuffer->getBuffer(), DataIsLittleEndian, 4);
+
+ // Read the address offsets.
+ uint64_t Offset = alignTo(sizeof(HeaderV2), Hdr->AddrOffSize);
+ Swap->AddrOffsets.resize(Hdr->NumAddresses * Hdr->AddrOffSize);
+ switch (Hdr->AddrOffSize) {
+ case 1:
+ if (!Data.getU8(&Offset, Swap->AddrOffsets.data(), Hdr->NumAddresses))
+ return createStringError(std::errc::invalid_argument,
+ "failed to read address table");
+ break;
+ case 2:
+ if (!Data.getU16(&Offset,
+ reinterpret_cast<uint16_t *>(Swap->AddrOffsets.data()),
+ Hdr->NumAddresses))
+ return createStringError(std::errc::invalid_argument,
+ "failed to read address table");
+ break;
+ case 4:
+ if (!Data.getU32(&Offset,
+ reinterpret_cast<uint32_t *>(Swap->AddrOffsets.data()),
+ Hdr->NumAddresses))
+ return createStringError(std::errc::invalid_argument,
+ "failed to read address table");
+ break;
+ case 8:
+ if (!Data.getU64(&Offset,
+ reinterpret_cast<uint64_t *>(Swap->AddrOffsets.data()),
+ Hdr->NumAddresses))
+ return createStringError(std::errc::invalid_argument,
+ "failed to read address table");
+ }
+ AddrOffsets = ArrayRef<uint8_t>(Swap->AddrOffsets);
+
+ // Read the address info offsets.
+ Offset = alignTo(Offset, 4);
+ Swap->AddrInfoOffsets.resize(Hdr->NumAddresses);
+ if (Data.getU32(&Offset, Swap->AddrInfoOffsets.data(), Hdr->NumAddresses))
+ AddrInfoOffsets = ArrayRef<uint32_t>(Swap->AddrInfoOffsets);
+ else
+ return createStringError(std::errc::invalid_argument,
+ "failed to read address table");
+ // Read the file table.
+ const uint32_t NumFiles = Data.getU32(&Offset);
+ if (NumFiles > 0) {
+ Swap->Files.resize(NumFiles);
+ if (Data.getU32(&Offset, &Swap->Files[0].Dir, NumFiles*2))
+ Files = ArrayRef<FileEntry>(Swap->Files);
+ else
+ return createStringError(std::errc::invalid_argument,
+ "failed to read file table");
+ }
+ // Get the string table.
+ StrTab.Data = MemBuffer->getBuffer().substr(Hdr->StrtabOffset,
+ Hdr->StrtabSize);
+ if (StrTab.Data.empty())
+ return createStringError(std::errc::invalid_argument,
+ "failed to read string table");
+ }
+ return Error::success();
+
+}
+
+const HeaderV2 &GsymReaderV2::getHeader() const {
+ // The only way to get a GsymReaderV2 is from GsymReaderV2::openFile(...) or
+ // GsymReaderV2::copyBuffer() and the header must be valid and initialized to
+ // a valid pointer value, so the assert below should not trigger.
+ assert(Hdr);
+ return *Hdr;
+}
+
+std::optional<uint64_t> GsymReaderV2::getAddress(size_t Index) const {
+ switch (Hdr->AddrOffSize) {
+ case 1: return addressForIndex<uint8_t>(Index);
+ case 2: return addressForIndex<uint16_t>(Index);
+ case 4: return addressForIndex<uint32_t>(Index);
+ case 8: return addressForIndex<uint64_t>(Index);
+ }
+ return std::nullopt;
+}
+
+std::optional<uint64_t> GsymReaderV2::getAddressInfoOffset(size_t Index) const {
+ const auto NumAddrInfoOffsets = AddrInfoOffsets.size();
+ if (Index < NumAddrInfoOffsets)
+ return AddrInfoOffsets[Index];
+ return std::nullopt;
+}
+
+Expected<uint64_t>
+GsymReaderV2::getAddressIndex(const uint64_t Addr) const {
+ if (Addr >= Hdr->BaseAddress) {
+ const uint64_t AddrOffset = Addr - Hdr->BaseAddress;
+ std::optional<uint64_t> AddrOffsetIndex;
+ switch (Hdr->AddrOffSize) {
+ case 1:
+ AddrOffsetIndex = getAddressOffsetIndex<uint8_t>(AddrOffset);
+ break;
+ case 2:
+ AddrOffsetIndex = getAddressOffsetIndex<uint16_t>(AddrOffset);
+ break;
+ case 4:
+ AddrOffsetIndex = getAddressOffsetIndex<uint32_t>(AddrOffset);
+ break;
+ case 8:
+ AddrOffsetIndex = getAddressOffsetIndex<uint64_t>(AddrOffset);
+ break;
+ default:
+ return createStringError(std::errc::invalid_argument,
+ "unsupported address offset size %u",
+ Hdr->AddrOffSize);
+ }
+ if (AddrOffsetIndex)
+ return *AddrOffsetIndex;
+ }
+ return createStringError(std::errc::invalid_argument,
+ "address 0x%" PRIx64 " is not in GSYM", Addr);
+
+}
+
+llvm::Expected<DataExtractor>
+GsymReaderV2::getFunctionInfoDataForAddress(uint64_t Addr,
+ uint64_t &FuncStartAddr) const {
+ Expected<uint64_t> ExpectedAddrIdx = getAddressIndex(Addr);
+ if (!ExpectedAddrIdx)
+ return ExpectedAddrIdx.takeError();
+ const uint64_t FirstAddrIdx = *ExpectedAddrIdx;
+ // The AddrIdx is the first index of the function info entries that match
+ // \a Addr. We need to iterate over all function info objects that start with
+ // the same address until we find a range that contains \a Addr.
+ std::optional<uint64_t> FirstFuncStartAddr;
+ const size_t NumAddresses = getNumAddresses();
+ for (uint64_t AddrIdx = FirstAddrIdx; AddrIdx < NumAddresses; ++AddrIdx) {
+ auto ExpextedData = getFunctionInfoDataAtIndex(AddrIdx, FuncStartAddr);
+ // If there was an error, return the error.
+ if (!ExpextedData)
+ return ExpextedData;
+
+ // Remember the first function start address if it hasn't already been set.
+ // If it is already valid, check to see if it matches the first function
+ // start address and only continue if it matches.
+ if (FirstFuncStartAddr.has_value()) {
+ if (*FirstFuncStartAddr != FuncStartAddr)
+ break; // Done with consecutive function entries with same address.
+ } else {
+ FirstFuncStartAddr = FuncStartAddr;
+ }
+ // Make sure the current function address ranges contains \a Addr.
+ // Some symbols on Darwin don't have valid sizes, so if we run into a
+ // symbol with zero size, then we have found a match for our address.
+
+ // The first thing the encoding of a FunctionInfo object is the function
+ // size.
+ uint64_t Offset = 0;
+ uint32_t FuncSize = ExpextedData->getU32(&Offset);
+ if (FuncSize == 0 ||
+ AddressRange(FuncStartAddr, FuncStartAddr + FuncSize).contains(Addr))
+ return ExpextedData;
+ }
+ return createStringError(std::errc::invalid_argument,
+ "address 0x%" PRIx64 " is not in GSYM", Addr);
+}
+
+llvm::Expected<DataExtractor>
+GsymReaderV2::getFunctionInfoDataAtIndex(uint64_t AddrIdx,
+ uint64_t &FuncStartAddr) const {
+ if (AddrIdx >= getNumAddresses())
+ return createStringError(std::errc::invalid_argument,
+ "invalid address index %" PRIu64, AddrIdx);
+ const uint32_t AddrInfoOffset = AddrInfoOffsets[AddrIdx];
+ assert((Endian == endianness::big || Endian == endianness::little) &&
+ "Endian must be either big or little");
+ StringRef Bytes = MemBuffer->getBuffer().substr(AddrInfoOffset);
+ if (Bytes.empty())
+ return createStringError(std::errc::invalid_argument,
+ "invalid address info offset 0x%" PRIx32,
+ AddrInfoOffset);
+ std::optional<uint64_t> OptFuncStartAddr = getAddress(AddrIdx);
+ if (!OptFuncStartAddr)
+ return createStringError(std::errc::invalid_argument,
+ "failed to extract address[%" PRIu64 "]", AddrIdx);
+ FuncStartAddr = *OptFuncStartAddr;
+ return DataExtractor(Bytes, Endian == llvm::endianness::little, 4);
+}
+
+llvm::Expected<FunctionInfo> GsymReaderV2::getFunctionInfo(uint64_t Addr) const {
+ uint64_t FuncStartAddr = 0;
+ if (auto ExpectedData = getFunctionInfoDataForAddress(Addr, FuncStartAddr))
+ return FunctionInfo::decode(*ExpectedData, FuncStartAddr);
+ else
+ return ExpectedData.takeError();
+}
+
+llvm::Expected<FunctionInfo>
+GsymReaderV2::getFunctionInfoAtIndex(uint64_t Idx) const {
+ uint64_t FuncStartAddr = 0;
+ if (auto ExpectedData = getFunctionInfoDataAtIndex(Idx, FuncStartAddr))
+ return FunctionInfo::decode(*ExpectedData, FuncStartAddr);
+ else
+ return ExpectedData.takeError();
+}
+
+llvm::Expected<LookupResult>
+GsymReaderV2::lookup(uint64_t Addr,
+ std::optional<DataExtractor> *MergedFunctionsData) const {
+ uint64_t FuncStartAddr = 0;
+ if (auto ExpectedData = getFunctionInfoDataForAddress(Addr, FuncStartAddr))
+ return FunctionInfo::lookup(*ExpectedData, *this, FuncStartAddr, Addr,
+ MergedFunctionsData);
+ else
+ return ExpectedData.takeError();
+}
+
+llvm::Expected<std::vector<LookupResult>>
+GsymReaderV2::lookupAll(uint64_t Addr) const {
+ std::vector<LookupResult> Results;
+ std::optional<DataExtractor> MergedFunctionsData;
+
+ // First perform a lookup to get the primary function info result.
+ auto MainResult = lookup(Addr, &MergedFunctionsData);
+ if (!MainResult)
+ return MainResult.takeError();
+
+ // Add the main result as the first entry.
+ Results.push_back(std::move(*MainResult));
+
+ // Now process any merged functions data that was found during the lookup.
+ if (MergedFunctionsData) {
+ // Get data extractors for each merged function.
+ auto ExpectedMergedFuncExtractors =
+ MergedFunctionsInfo::getFuncsDataExtractors(*MergedFunctionsData);
+ if (!ExpectedMergedFuncExtractors)
+ return ExpectedMergedFuncExtractors.takeError();
+
+ // Process each merged function data.
+ for (DataExtractor &MergedData : *ExpectedMergedFuncExtractors) {
+ if (auto FI = FunctionInfo::lookup(MergedData, *this,
+ MainResult->FuncRange.start(), Addr)) {
+ Results.push_back(std::move(*FI));
+ } else {
+ return FI.takeError();
+ }
+ }
+ }
+
+ return Results;
+}
+
+void GsymReaderV2::dump(raw_ostream &OS) {
+ const auto &Header = getHeader();
+ // Dump the GSYM header.
+ OS << Header << "\n";
+ // Dump the address table.
+ OS << "Address Table:\n";
+ OS << "INDEX OFFSET";
+
+ switch (Hdr->AddrOffSize) {
+ case 1: OS << "8 "; break;
+ case 2: OS << "16"; break;
+ case 4: OS << "32"; break;
+ case 8: OS << "64"; break;
+ default: OS << "??"; break;
+ }
+ OS << " (ADDRESS)\n";
+ OS << "====== =============================== \n";
+ for (uint32_t I = 0; I < Header.NumAddresses; ++I) {
+ OS << format("[%4u] ", I);
+ switch (Hdr->AddrOffSize) {
+ case 1: OS << HEX8(getAddrOffsets<uint8_t>()[I]); break;
+ case 2: OS << HEX16(getAddrOffsets<uint16_t>()[I]); break;
+ case 4: OS << HEX32(getAddrOffsets<uint32_t>()[I]); break;
+ case 8: OS << HEX32(getAddrOffsets<uint64_t>()[I]); break;
+ default: break;
+ }
+ OS << " (" << HEX64(*getAddress(I)) << ")\n";
+ }
+ // Dump the address info offsets table.
+ OS << "\nAddress Info Offsets:\n";
+ OS << "INDEX Offset\n";
+ OS << "====== ==========\n";
+ for (uint32_t I = 0; I < Header.NumAddresses; ++I)
+ OS << format("[%4u] ", I) << HEX32(AddrInfoOffsets[I]) << "\n";
+ // Dump the file table.
+ OS << "\nFiles:\n";
+ OS << "INDEX DIRECTORY BASENAME PATH\n";
+ OS << "====== ========== ========== ==============================\n";
+ for (uint32_t I = 0; I < Files.size(); ++I) {
+ OS << format("[%4u] ", I) << HEX32(Files[I].Dir) << ' '
+ << HEX32(Files[I].Base) << ' ';
+ dump(OS, getFile(I));
+ OS << "\n";
+ }
+ OS << "\n" << StrTab << "\n";
+
+ for (uint32_t I = 0; I < Header.NumAddresses; ++I) {
+ OS << "FunctionInfo @ " << HEX32(AddrInfoOffsets[I]) << ": ";
+ if (auto FI = getFunctionInfoAtIndex(I))
+ dump(OS, *FI);
+ else
+ logAllUnhandledErrors(FI.takeError(), OS, "FunctionInfo:");
+ }
+}
+
+void GsymReaderV2::dump(raw_ostream &OS, const FunctionInfo &FI,
+ uint32_t Indent) {
+ OS.indent(Indent);
+ OS << FI.Range << " \"" << getString(FI.Name) << "\"\n";
+ if (FI.OptLineTable)
+ dump(OS, *FI.OptLineTable, Indent);
+ if (FI.Inline)
+ dump(OS, *FI.Inline, Indent);
+
+ if (FI.CallSites)
+ dump(OS, *FI.CallSites, Indent);
+
+ if (FI.MergedFunctions) {
+ assert(Indent == 0 && "MergedFunctionsInfo should only exist at top level");
+ dump(OS, *FI.MergedFunctions);
+ }
+}
+
+void GsymReaderV2::dump(raw_ostream &OS, const MergedFunctionsInfo &MFI) {
+ for (uint32_t inx = 0; inx < MFI.MergedFunctions.size(); inx++) {
+ OS << "++ Merged FunctionInfos[" << inx << "]:\n";
+ dump(OS, MFI.MergedFunctions[inx], 4);
+ }
+}
+
+void GsymReaderV2::dump(raw_ostream &OS, const CallSiteInfo &CSI) {
+ OS << HEX16(CSI.ReturnOffset);
+
+ std::string Flags;
+ auto addFlag = [&](const char *Flag) {
+ if (!Flags.empty())
+ Flags += " | ";
+ Flags += Flag;
+ };
+
+ if (CSI.Flags == CallSiteInfo::Flags::None)
+ Flags = "None";
+ else {
+ if (CSI.Flags & CallSiteInfo::Flags::InternalCall)
+ addFlag("InternalCall");
+
+ if (CSI.Flags & CallSiteInfo::Flags::ExternalCall)
+ addFlag("ExternalCall");
+ }
+ OS << " Flags[" << Flags << "]";
+
+ if (!CSI.MatchRegex.empty()) {
+ OS << " MatchRegex[";
+ for (uint32_t i = 0; i < CSI.MatchRegex.size(); ++i) {
+ if (i > 0)
+ OS << ";";
+ OS << getString(CSI.MatchRegex[i]);
+ }
+ OS << "]";
+ }
+}
+
+void GsymReaderV2::dump(raw_ostream &OS, const CallSiteInfoCollection &CSIC,
+ uint32_t Indent) {
+ OS.indent(Indent);
+ OS << "CallSites (by relative return offset):\n";
+ for (const auto &CS : CSIC.CallSites) {
+ OS.indent(Indent);
+ OS << " ";
+ dump(OS, CS);
+ OS << "\n";
+ }
+}
+
+void GsymReaderV2::dump(raw_ostream &OS, const LineTable <, uint32_t Indent) {
+ OS.indent(Indent);
+ OS << "LineTable:\n";
+ for (auto &LE: LT) {
+ OS.indent(Indent);
+ OS << " " << HEX64(LE.Addr) << ' ';
+ if (LE.File)
+ dump(OS, getFile(LE.File));
+ OS << ':' << LE.Line << '\n';
+ }
+}
+
+void GsymReaderV2::dump(raw_ostream &OS, const InlineInfo &II, uint32_t Indent) {
+ if (Indent == 0)
+ OS << "InlineInfo:\n";
+ else
+ OS.indent(Indent);
+ OS << II.Ranges << ' ' << getString(II.Name);
+ if (II.CallFile != 0) {
+ if (auto File = getFile(II.CallFile)) {
+ OS << " called from ";
+ dump(OS, File);
+ OS << ':' << II.CallLine;
+ }
+ }
+ OS << '\n';
+ for (const auto &ChildII: II.Children)
+ dump(OS, ChildII, Indent + 2);
+}
+
+void GsymReaderV2::dump(raw_ostream &OS, std::optional<FileEntry> FE) {
+ if (FE) {
+ // IF we have the file from index 0, then don't print anything
+ if (FE->Dir == 0 && FE->Base == 0)
+ return;
+ StringRef Dir = getString(FE->Dir);
+ StringRef Base = getString(FE->Base);
+ if (!Dir.empty()) {
+ OS << Dir;
+ if (Dir.contains('\\') && !Dir.contains('/'))
+ OS << '\\';
+ else
+ OS << '/';
+ }
+ if (!Base.empty()) {
+ OS << Base;
+ }
+ if (!Dir.empty() || !Base.empty())
+ return;
+ }
+ OS << "<invalid-file>";
+}
>From 746850204dfc9e57988a720bece3836ca5777ec5 Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Wed, 25 Mar 2026 14:52:30 -0700
Subject: [PATCH 02/45] Update V2 header struct
---
llvm/include/llvm/DebugInfo/GSYM/HeaderV2.h | 58 +++++++++------------
1 file changed, 26 insertions(+), 32 deletions(-)
diff --git a/llvm/include/llvm/DebugInfo/GSYM/HeaderV2.h b/llvm/include/llvm/DebugInfo/GSYM/HeaderV2.h
index 5152b5322778e..44ac8157ae212 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/HeaderV2.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/HeaderV2.h
@@ -25,14 +25,19 @@ class FileWriter;
constexpr uint32_t GSYM_MAGIC = 0x4753594d; // 'GSYM'
constexpr uint32_t GSYM_CIGAM = 0x4d595347; // 'MYSG'
constexpr uint32_t GSYM_VERSION_2 = 2;
-constexpr size_t GSYM_MAX_UUID_SIZE = 20;
/// The GSYM V2 header.
///
-/// The GSYM header is found at the start of a stand alone GSYM file, or as
+/// The GSYM V2 header is found at the start of a stand alone GSYM file, or as
/// the first bytes in a section when GSYM is contained in a section of an
/// executable file (ELF, mach-o, COFF).
///
+/// The V2 file format consists of the following sections in order:
+/// - Header (this struct, 40 bytes)
+/// - AddrOffsets (address offset table, aligned to AddrOffSize)
+/// - AddrInfoOffsets (address info offset table, aligned to AddrInfoOffSize)
+/// - GlobalData (blob containing FunctionInfos, UUID, string table, etc.)
+///
/// The structure is encoded exactly as it appears in the structure definition
/// with no gaps between members. Alignment should not change from system to
/// system as the members were laid out so that they shouldn't align
@@ -41,50 +46,38 @@ constexpr size_t GSYM_MAX_UUID_SIZE = 20;
/// When endianness of the system loading a GSYM file matches, the file can
/// be mmap'ed in and a pointer to the header can be cast to the first bytes
/// of the file (stand alone GSYM file) or section data (GSYM in a section).
-/// When endianness is swapped, the HeaderV2::decode() function should be used to
-/// decode the header.
+/// When endianness is swapped, the HeaderV2::decode() function should be used
+/// to decode the header.
struct HeaderV2 {
/// The magic bytes should be set to GSYM_MAGIC. This helps detect if a file
/// is a GSYM file by scanning the first 4 bytes of a file or section.
- /// This value might appear byte swapped
+ /// This value might appear byte swapped.
uint32_t Magic;
- /// The version can number determines how the header is decoded and how each
+ /// The version number determines how the header is decoded and how each
/// InfoType in FunctionInfo is encoded/decoded. As version numbers increase,
/// "Magic" and "Version" members should always appear at offset zero and 4
/// respectively to ensure clients figure out if they can parse the format.
uint16_t Version;
/// The size in bytes of each address offset in the address offsets table.
+ /// Valid values are 1, 2, 4, or 8.
uint8_t AddrOffSize;
- /// The size in bytes of the UUID encoded in the "UUID" member.
- uint8_t UUIDSize;
+ /// The size in bytes of each entry in the address info offsets table.
+ /// Valid values are 1, 2, 4, or 8. These offsets point into GlobalData.
+ uint8_t AddrInfoOffSize;
/// The 64 bit base address that all address offsets in the address offsets
/// table are relative to. Storing a full 64 bit address allows our address
/// offsets table to be smaller on disk.
uint64_t BaseAddress;
- /// The number of addresses stored in the address offsets table.
+ /// The number of addresses stored in the address offsets table and the
+ /// address info offsets table.
uint32_t NumAddresses;
- /// The file relative offset of the start of the string table for strings
- /// contained in the GSYM file. If the GSYM in contained in a stand alone
- /// file this will be the file offset of the start of the string table. If
- /// the GSYM is contained in a section within an executable file, this can
- /// be the offset of the first string used in the GSYM file and can possibly
- /// span one or more executable string tables. This allows the strings to
- /// share string tables in an ELF or mach-o file.
- uint32_t StrtabOffset;
- /// The size in bytes of the string table. For a stand alone GSYM file, this
- /// will be the exact size in bytes of the string table. When the GSYM data
- /// is in a section within an executable file, this size can span one or more
- /// sections that contains strings. This allows any strings that are already
- /// stored in the executable file to be re-used, and any extra strings could
- /// be added to another string table and the string table offset and size
- /// can be set to span all needed string tables.
- uint32_t StrtabSize;
- /// The UUID of the original executable file. This is stored to allow
- /// matching a GSYM file to an executable file when symbolication is
- /// required. Only the first "UUIDSize" bytes of the UUID are valid. Any
- /// bytes in the UUID value that appear after the first UUIDSize bytes should
- /// be set to zero.
- uint8_t UUID[GSYM_MAX_UUID_SIZE];
+ /// Reserved for future use. Must be set to zero. Also serve as padding.
+ uint32_t Reserved;
+ /// The file offset of the start of the GlobalData blob. GlobalData contains
+ /// FunctionInfos, UUID, string table, and any other future sections.
+ uint64_t GlobalDataFileOffset;
+ /// The size in bytes of the GlobalData blob.
+ uint64_t GlobalDataFileSize;
/// Check if a header is valid and return an error if anything is wrong.
///
@@ -95,7 +88,8 @@ struct HeaderV2 {
/// - check magic value
/// - check that version number is supported
/// - check that the address offset size is supported
- /// - check that the UUID size is valid
+ /// - check that the address info offset size is supported
+ /// - check that Reserved is zero
///
/// \returns An error if anything is wrong in the header, or Error::success()
/// if there are no errors.
>From 0cd5167c95635c77e3d7f5f8b877b7bfe1113c7e Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Wed, 25 Mar 2026 17:21:28 -0700
Subject: [PATCH 03/45] Add new files into CMakeLists.txt
---
llvm/lib/DebugInfo/GSYM/CMakeLists.txt | 2 ++
1 file changed, 2 insertions(+)
diff --git a/llvm/lib/DebugInfo/GSYM/CMakeLists.txt b/llvm/lib/DebugInfo/GSYM/CMakeLists.txt
index eb610a6b34f51..cd65dab246a4c 100644
--- a/llvm/lib/DebugInfo/GSYM/CMakeLists.txt
+++ b/llvm/lib/DebugInfo/GSYM/CMakeLists.txt
@@ -4,8 +4,10 @@ add_llvm_component_library(LLVMDebugInfoGSYM
FileWriter.cpp
FunctionInfo.cpp
GsymCreator.cpp
+ GsymCreatorV2.cpp
GsymContext.cpp
GsymReader.cpp
+ GsymReaderV2.cpp
InlineInfo.cpp
LineTable.cpp
LookupResult.cpp
>From b2a7c091fa72efe137fec819f3b5ca88f9e0f36c Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Wed, 25 Mar 2026 17:27:31 -0700
Subject: [PATCH 04/45] Update V2 header struct per discussion with Greg
---
llvm/include/llvm/DebugInfo/GSYM/HeaderV2.h | 36 ++++++++++++---------
1 file changed, 21 insertions(+), 15 deletions(-)
diff --git a/llvm/include/llvm/DebugInfo/GSYM/HeaderV2.h b/llvm/include/llvm/DebugInfo/GSYM/HeaderV2.h
index 44ac8157ae212..3ec6c0f735206 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/HeaderV2.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/HeaderV2.h
@@ -33,9 +33,9 @@ constexpr uint32_t GSYM_VERSION_2 = 2;
/// executable file (ELF, mach-o, COFF).
///
/// The V2 file format consists of the following sections in order:
-/// - Header (this struct, 40 bytes)
-/// - AddrOffsets (address offset table, aligned to AddrOffSize)
-/// - AddrInfoOffsets (address info offset table, aligned to AddrInfoOffSize)
+/// - Header (this struct, 32 bytes)
+/// - AddrOffsets (address offset table)
+/// - AddrInfoOffsets (address info offset table)
/// - GlobalData (blob containing FunctionInfos, UUID, string table, etc.)
///
/// The structure is encoded exactly as it appears in the structure definition
@@ -58,12 +58,8 @@ struct HeaderV2 {
/// "Magic" and "Version" members should always appear at offset zero and 4
/// respectively to ensure clients figure out if they can parse the format.
uint16_t Version;
- /// The size in bytes of each address offset in the address offsets table.
- /// Valid values are 1, 2, 4, or 8.
- uint8_t AddrOffSize;
- /// The size in bytes of each entry in the address info offsets table.
- /// Valid values are 1, 2, 4, or 8. These offsets point into GlobalData.
- uint8_t AddrInfoOffSize;
+ /// Padding for alignment. Must be set to zero.
+ uint16_t Padding;
/// The 64 bit base address that all address offsets in the address offsets
/// table are relative to. Storing a full 64 bit address allows our address
/// offsets table to be smaller on disk.
@@ -71,13 +67,22 @@ struct HeaderV2 {
/// The number of addresses stored in the address offsets table and the
/// address info offsets table.
uint32_t NumAddresses;
- /// Reserved for future use. Must be set to zero. Also serve as padding.
- uint32_t Reserved;
- /// The file offset of the start of the GlobalData blob. GlobalData contains
+ /// The size in bytes of each address offset in the address offsets table.
+ uint8_t AddrOffSize;
+ /// The size in bytes of each entry in the address info offsets table.
+ /// These offsets point into GlobalData.
+ uint8_t AddrInfoOffSize;
+ /// The size in bytes of each string table reference (strp) in FunctionInfo
+ /// and other data structures within GlobalData.
+ uint8_t StrpSize;
+ /// Padding for alignment. Must be set to zero.
+ uint8_t Padding2;
+ /// The file offset of the start of the GlobalData.
+ ///
+ /// GlobalData is a list of GlobalData objects, with the last one having
+ /// GlobalInfoType == GlobalInfoType::EndOfList. GlobalData contains
/// FunctionInfos, UUID, string table, and any other future sections.
uint64_t GlobalDataFileOffset;
- /// The size in bytes of the GlobalData blob.
- uint64_t GlobalDataFileSize;
/// Check if a header is valid and return an error if anything is wrong.
///
@@ -89,7 +94,8 @@ struct HeaderV2 {
/// - check that version number is supported
/// - check that the address offset size is supported
/// - check that the address info offset size is supported
- /// - check that Reserved is zero
+ /// - check that the strp size is supported
+ /// - check that padding fields are zero
///
/// \returns An error if anything is wrong in the header, or Error::success()
/// if there are no errors.
>From a9ac19916bc92649b4e066a5fdbeac252b0e4bb3 Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Wed, 25 Mar 2026 18:13:09 -0700
Subject: [PATCH 05/45] Update header v2 and add global data struct
---
llvm/include/llvm/DebugInfo/GSYM/GlobalData.h | 38 +++++++++++++++++++
llvm/include/llvm/DebugInfo/GSYM/HeaderV2.h | 31 +++++++--------
2 files changed, 51 insertions(+), 18 deletions(-)
create mode 100644 llvm/include/llvm/DebugInfo/GSYM/GlobalData.h
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GlobalData.h b/llvm/include/llvm/DebugInfo/GSYM/GlobalData.h
new file mode 100644
index 0000000000000..74e25da0409bf
--- /dev/null
+++ b/llvm/include/llvm/DebugInfo/GSYM/GlobalData.h
@@ -0,0 +1,38 @@
+//===- GlobalData.h ---------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_GSYM_GLOBALDATA_H
+#define LLVM_DEBUGINFO_GSYM_GLOBALDATA_H
+
+#include <cstdint>
+
+namespace llvm {
+namespace gsym {
+
+enum GlobalInfoType : uint32_t {
+ EndOfList = 0u,
+ AddrOffsets = 1u,
+ AddrInfoOffsets = 2u,
+ StringTable = 3u,
+ FileTable = 4u,
+ FunctionInfo = 5u,
+};
+
+/// GlobalData describes a section of data in a GSYM file by its type, file
+/// offset, and size. This is used to support 64-bit GSYM files where data
+/// sections can be located at arbitrary file offsets.
+struct GlobalData {
+ GlobalInfoType Type;
+ uint64_t FileOffset;
+ uint64_t FileSize;
+};
+
+} // namespace gsym
+} // namespace llvm
+
+#endif // LLVM_DEBUGINFO_GSYM_GLOBALDATA_H
diff --git a/llvm/include/llvm/DebugInfo/GSYM/HeaderV2.h b/llvm/include/llvm/DebugInfo/GSYM/HeaderV2.h
index 3ec6c0f735206..2d430d39c7314 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/HeaderV2.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/HeaderV2.h
@@ -32,16 +32,15 @@ constexpr uint32_t GSYM_VERSION_2 = 2;
/// the first bytes in a section when GSYM is contained in a section of an
/// executable file (ELF, mach-o, COFF).
///
-/// The V2 file format consists of the following sections in order:
-/// - Header (this struct, 32 bytes)
-/// - AddrOffsets (address offset table)
-/// - AddrInfoOffsets (address info offset table)
-/// - GlobalData (blob containing FunctionInfos, UUID, string table, etc.)
+/// The V2 file format consists of the following GSYM sections in order:
+/// - Header (this struct, 40 bytes)
+/// - GlobalData (a list of GlobalData, each point to one of the following GSYM sections)
+/// - Followed by all the sections mentioned in the GlobalData list at the specified file offsets and sizes, with padding of zeros for alignment.
///
-/// The structure is encoded exactly as it appears in the structure definition
+/// The header structure is encoded exactly as it appears in the structure definition
/// with no gaps between members. Alignment should not change from system to
-/// system as the members were laid out so that they shouldn't align
-/// differently on different architectures.
+/// system as the members are laid out so that they will align the same
+/// on different architectures.
///
/// When endianness of the system loading a GSYM file matches, the file can
/// be mmap'ed in and a pointer to the header can be cast to the first bytes
@@ -51,14 +50,13 @@ constexpr uint32_t GSYM_VERSION_2 = 2;
struct HeaderV2 {
/// The magic bytes should be set to GSYM_MAGIC. This helps detect if a file
/// is a GSYM file by scanning the first 4 bytes of a file or section.
- /// This value might appear byte swapped.
+ /// This value might appear byte swapped when endianness is swapped.
uint32_t Magic;
- /// The version number determines how the header is decoded and how each
- /// InfoType in FunctionInfo is encoded/decoded. As version numbers increase,
+ /// The version number determines how the header is decoded. As version numbers increase,
/// "Magic" and "Version" members should always appear at offset zero and 4
/// respectively to ensure clients figure out if they can parse the format.
uint16_t Version;
- /// Padding for alignment. Must be set to zero.
+ /// Padding for alignment to keep all the "size" fields together. Must be set to zero.
uint16_t Padding;
/// The 64 bit base address that all address offsets in the address offsets
/// table are relative to. Storing a full 64 bit address allows our address
@@ -77,12 +75,9 @@ struct HeaderV2 {
uint8_t StrpSize;
/// Padding for alignment. Must be set to zero.
uint8_t Padding2;
- /// The file offset of the start of the GlobalData.
- ///
- /// GlobalData is a list of GlobalData objects, with the last one having
- /// GlobalInfoType == GlobalInfoType::EndOfList. GlobalData contains
- /// FunctionInfos, UUID, string table, and any other future sections.
- uint64_t GlobalDataFileOffset;
+ /// The starting point of the global data. This is a list of GlobalData objects, with the last one being the
+ /// GlobalInfoType::EndOfList. Each of the GlobalData objects point to a section in the GSYM, e.g. address FunctionInfos, UUID, string table, and any other future sections.
+ uint8_t GlobalData[0];
/// Check if a header is valid and return an error if anything is wrong.
///
>From ab3d54e8c68ed3acc18ebab2f022c1a33ecb9ebf Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Wed, 25 Mar 2026 18:16:55 -0700
Subject: [PATCH 06/45] Implement header v2 methods
---
llvm/lib/DebugInfo/GSYM/CMakeLists.txt | 1 +
llvm/lib/DebugInfo/GSYM/HeaderV2.cpp | 125 +++++++++++++++++++++++++
2 files changed, 126 insertions(+)
create mode 100644 llvm/lib/DebugInfo/GSYM/HeaderV2.cpp
diff --git a/llvm/lib/DebugInfo/GSYM/CMakeLists.txt b/llvm/lib/DebugInfo/GSYM/CMakeLists.txt
index cd65dab246a4c..fd3d6d581123c 100644
--- a/llvm/lib/DebugInfo/GSYM/CMakeLists.txt
+++ b/llvm/lib/DebugInfo/GSYM/CMakeLists.txt
@@ -1,6 +1,7 @@
add_llvm_component_library(LLVMDebugInfoGSYM
DwarfTransformer.cpp
Header.cpp
+ HeaderV2.cpp
FileWriter.cpp
FunctionInfo.cpp
GsymCreator.cpp
diff --git a/llvm/lib/DebugInfo/GSYM/HeaderV2.cpp b/llvm/lib/DebugInfo/GSYM/HeaderV2.cpp
new file mode 100644
index 0000000000000..4372e20ea9e61
--- /dev/null
+++ b/llvm/lib/DebugInfo/GSYM/HeaderV2.cpp
@@ -0,0 +1,125 @@
+//===- HeaderV2.cpp ---------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/GSYM/HeaderV2.h"
+#include "llvm/DebugInfo/GSYM/FileWriter.h"
+#include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define HEX8(v) llvm::format_hex(v, 4)
+#define HEX16(v) llvm::format_hex(v, 6)
+#define HEX32(v) llvm::format_hex(v, 10)
+#define HEX64(v) llvm::format_hex(v, 18)
+
+using namespace llvm;
+using namespace gsym;
+
+raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const HeaderV2 &H) {
+ OS << "HeaderV2:\n";
+ OS << " Magic = " << HEX32(H.Magic) << "\n";
+ OS << " Version = " << HEX16(H.Version) << '\n';
+ OS << " BaseAddress = " << HEX64(H.BaseAddress) << '\n';
+ OS << " NumAddresses = " << HEX32(H.NumAddresses) << '\n';
+ OS << " AddrOffSize = " << HEX8(H.AddrOffSize) << '\n';
+ OS << " AddrInfoOffSize = " << HEX8(H.AddrInfoOffSize) << '\n';
+ OS << " StrpSize = " << HEX8(H.StrpSize) << '\n';
+ return OS;
+}
+
+llvm::Error HeaderV2::checkForError() const {
+ if (Magic != GSYM_MAGIC)
+ return createStringError(std::errc::invalid_argument,
+ "invalid GSYM magic 0x%8.8x", Magic);
+ if (Version != GSYM_VERSION_2)
+ return createStringError(std::errc::invalid_argument,
+ "unsupported GSYM version %u", Version);
+ switch (AddrOffSize) {
+ case 1:
+ case 2:
+ case 4:
+ case 8:
+ break;
+ default:
+ return createStringError(std::errc::invalid_argument,
+ "invalid address offset size %u", AddrOffSize);
+ }
+ switch (AddrInfoOffSize) {
+ case 4:
+ case 8:
+ break;
+ default:
+ return createStringError(std::errc::invalid_argument,
+ "invalid address info offset size %u",
+ AddrInfoOffSize);
+ }
+ switch (StrpSize) {
+ case 4:
+ case 8:
+ break;
+ default:
+ return createStringError(std::errc::invalid_argument,
+ "invalid strp size %u", StrpSize);
+ }
+ if (Padding != 0)
+ return createStringError(std::errc::invalid_argument,
+ "padding must be zero, got %u", Padding);
+ if (Padding2 != 0)
+ return createStringError(std::errc::invalid_argument,
+ "padding2 must be zero, got %u", Padding2);
+ return Error::success();
+}
+
+llvm::Expected<HeaderV2> HeaderV2::decode(DataExtractor &Data) {
+ uint64_t Offset = 0;
+ // The fixed portion of the HeaderV2 is 24 bytes:
+ // Magic(4) + Version(2) + Padding(2) + BaseAddress(8) +
+ // NumAddresses(4) + AddrOffSize(1) + AddrInfoOffSize(1) +
+ // StrpSize(1) + Padding2(1)
+ constexpr uint64_t FixedHeaderSize = 24;
+ if (!Data.isValidOffsetForDataOfSize(Offset, FixedHeaderSize))
+ return createStringError(std::errc::invalid_argument,
+ "not enough data for a gsym::HeaderV2");
+ HeaderV2 H;
+ H.Magic = Data.getU32(&Offset);
+ H.Version = Data.getU16(&Offset);
+ H.Padding = Data.getU16(&Offset);
+ H.BaseAddress = Data.getU64(&Offset);
+ H.NumAddresses = Data.getU32(&Offset);
+ H.AddrOffSize = Data.getU8(&Offset);
+ H.AddrInfoOffSize = Data.getU8(&Offset);
+ H.StrpSize = Data.getU8(&Offset);
+ H.Padding2 = Data.getU8(&Offset);
+ if (llvm::Error Err = H.checkForError())
+ return std::move(Err);
+ return H;
+}
+
+llvm::Error HeaderV2::encode(FileWriter &O) const {
+ if (llvm::Error Err = checkForError())
+ return Err;
+ O.writeU32(Magic);
+ O.writeU16(Version);
+ O.writeU16(Padding);
+ O.writeU64(BaseAddress);
+ O.writeU32(NumAddresses);
+ O.writeU8(AddrOffSize);
+ O.writeU8(AddrInfoOffSize);
+ O.writeU8(StrpSize);
+ O.writeU8(Padding2);
+ return Error::success();
+}
+
+bool llvm::gsym::operator==(const HeaderV2 &LHS, const HeaderV2 &RHS) {
+ return LHS.Magic == RHS.Magic && LHS.Version == RHS.Version &&
+ LHS.Padding == RHS.Padding && LHS.BaseAddress == RHS.BaseAddress &&
+ LHS.NumAddresses == RHS.NumAddresses &&
+ LHS.AddrOffSize == RHS.AddrOffSize &&
+ LHS.AddrInfoOffSize == RHS.AddrInfoOffSize &&
+ LHS.StrpSize == RHS.StrpSize && LHS.Padding2 == RHS.Padding2;
+}
>From bd049cab8502045fa010d93aadb62fd78ee6295a Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Wed, 25 Mar 2026 19:18:30 -0700
Subject: [PATCH 07/45] Update v2 file design
---
llvm/include/llvm/DebugInfo/GSYM/GlobalData.h | 2 +
llvm/include/llvm/DebugInfo/GSYM/HeaderV2.h | 52 +++++++++++++------
2 files changed, 38 insertions(+), 16 deletions(-)
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GlobalData.h b/llvm/include/llvm/DebugInfo/GSYM/GlobalData.h
index 74e25da0409bf..2a836b21a1636 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GlobalData.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GlobalData.h
@@ -21,6 +21,7 @@ enum GlobalInfoType : uint32_t {
StringTable = 3u,
FileTable = 4u,
FunctionInfo = 5u,
+ UUID = 6u,
};
/// GlobalData describes a section of data in a GSYM file by its type, file
@@ -28,6 +29,7 @@ enum GlobalInfoType : uint32_t {
/// sections can be located at arbitrary file offsets.
struct GlobalData {
GlobalInfoType Type;
+ uint32_t Padding;
uint64_t FileOffset;
uint64_t FileSize;
};
diff --git a/llvm/include/llvm/DebugInfo/GSYM/HeaderV2.h b/llvm/include/llvm/DebugInfo/GSYM/HeaderV2.h
index 2d430d39c7314..f6072b0da58a0 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/HeaderV2.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/HeaderV2.h
@@ -9,6 +9,7 @@
#ifndef LLVM_DEBUGINFO_GSYM_HEADERV2_H
#define LLVM_DEBUGINFO_GSYM_HEADERV2_H
+#include "llvm/DebugInfo/GSYM/Header.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Error.h"
@@ -22,8 +23,6 @@ class DataExtractor;
namespace gsym {
class FileWriter;
-constexpr uint32_t GSYM_MAGIC = 0x4753594d; // 'GSYM'
-constexpr uint32_t GSYM_CIGAM = 0x4d595347; // 'MYSG'
constexpr uint32_t GSYM_VERSION_2 = 2;
/// The GSYM V2 header.
@@ -32,21 +31,38 @@ constexpr uint32_t GSYM_VERSION_2 = 2;
/// the first bytes in a section when GSYM is contained in a section of an
/// executable file (ELF, mach-o, COFF).
///
-/// The V2 file format consists of the following GSYM sections in order:
-/// - Header (this struct, 40 bytes)
-/// - GlobalData (a list of GlobalData, each point to one of the following GSYM sections)
-/// - Followed by all the sections mentioned in the GlobalData list at the specified file offsets and sizes, with padding of zeros for alignment.
+/// The V2 file layout is:
///
-/// The header structure is encoded exactly as it appears in the structure definition
-/// with no gaps between members. Alignment should not change from system to
-/// system as the members are laid out so that they will align the same
-/// on different architectures.
+/// [HeaderV2 - 24 bytes fixed]
+/// [GlobalData entries - array of 24-byte entries, terminated by EndOfList]
+/// [... data sections at arbitrary file offsets, zero-padded for alignment]
+///
+/// Each GlobalData entry (see GlobalData.h) describes a section by its type,
+/// file offset, and size. The sections can appear in any order in the file
+/// since each GlobalData entry contains an absolute file offset. The
+/// GlobalData array is terminated by an entry with type EndOfList and all
+/// other fields set to zero.
+///
+/// The GlobalInfoType values are:
+/// EndOfList = 0 (terminates GlobalData array)
+/// AddrOffsets = 1 (address offset table)
+/// AddrInfoOffsets = 2 (address info offset table)
+/// StringTable = 3 (string table)
+/// FileTable = 4 (file table)
+/// FunctionInfo = 5 (FunctionInfo data blob)
+/// UUID = 6 (binary UUID)
+///
+/// The header structure is encoded exactly as it appears in the structure
+/// definition with no gaps between members. Alignment should not change from
+/// system to system as the members are laid out so that they will align the
+/// same on different architectures.
///
/// When endianness of the system loading a GSYM file matches, the file can
/// be mmap'ed in and a pointer to the header can be cast to the first bytes
/// of the file (stand alone GSYM file) or section data (GSYM in a section).
-/// When endianness is swapped, the HeaderV2::decode() function should be used
-/// to decode the header.
+/// The trailing GlobalData array can also be mmap'ed directly as each entry
+/// is naturally aligned at 24 bytes. When endianness is swapped, the
+/// HeaderV2::decode() function should be used to decode the header.
struct HeaderV2 {
/// The magic bytes should be set to GSYM_MAGIC. This helps detect if a file
/// is a GSYM file by scanning the first 4 bytes of a file or section.
@@ -56,7 +72,9 @@ struct HeaderV2 {
/// "Magic" and "Version" members should always appear at offset zero and 4
/// respectively to ensure clients figure out if they can parse the format.
uint16_t Version;
- /// Padding for alignment to keep all the "size" fields together. Must be set to zero.
+ /// Padding for alignment of BaseAddress to 8 bytes. Must be zero. Without this padding,
+ /// one of the size fields (AddrOffSize, AddrInfoOffSize, StrpSize) would need
+ /// to be placed here, separating it from the other size fields.
uint16_t Padding;
/// The 64 bit base address that all address offsets in the address offsets
/// table are relative to. Storing a full 64 bit address allows our address
@@ -68,15 +86,17 @@ struct HeaderV2 {
/// The size in bytes of each address offset in the address offsets table.
uint8_t AddrOffSize;
/// The size in bytes of each entry in the address info offsets table.
- /// These offsets point into GlobalData.
uint8_t AddrInfoOffSize;
/// The size in bytes of each string table reference (strp) in FunctionInfo
/// and other data structures within GlobalData.
uint8_t StrpSize;
/// Padding for alignment. Must be set to zero.
uint8_t Padding2;
- /// The starting point of the global data. This is a list of GlobalData objects, with the last one being the
- /// GlobalInfoType::EndOfList. Each of the GlobalData objects point to a section in the GSYM, e.g. address FunctionInfos, UUID, string table, and any other future sections.
+ /// The starting point of the GlobalData array. This is a list of GlobalData
+ /// entries, each describing a section in the GSYM file (e.g. AddrOffsets,
+ /// FunctionInfo, UUID, StringTable). The array is terminated by an entry
+ /// with Type set to EndOfList and FileOffset, FileSize, and Padding all
+ /// set to zero.
uint8_t GlobalData[0];
/// Check if a header is valid and return an error if anything is wrong.
>From 576f63969de1d40c39cccd2066fe434554a0c5aa Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Wed, 25 Mar 2026 19:44:30 -0700
Subject: [PATCH 08/45] Implement GsymCreatorV2
---
.../llvm/DebugInfo/GSYM/GsymCreatorV2.h | 104 ++-----
llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp | 271 ++++++++++++------
2 files changed, 210 insertions(+), 165 deletions(-)
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymCreatorV2.h b/llvm/include/llvm/DebugInfo/GSYM/GsymCreatorV2.h
index 5316e2f131553..18c2d90e4bac1 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GsymCreatorV2.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymCreatorV2.h
@@ -58,80 +58,45 @@ class OutputAggregator;
///
/// ENCODING
///
-/// GSYM files are designed to be memory mapped into a process as shared, read
-/// only data, and used as is.
+/// GSYM V2 files are designed to be memory mapped into a process as shared,
+/// read only data, and used as is.
///
-/// The GSYM file format when in a stand alone file consists of:
-/// - Header
-/// - Address Table
-/// - Function Info Offsets
-/// - File Table
-/// - String Table
-/// - Function Info Data
+/// The V2 file layout is:
///
-/// HEADER
+/// [HeaderV2 - 24 bytes fixed]
+/// [GlobalData entries - array of 24-byte entries, terminated by EndOfList]
+/// [... data sections at file offsets specified by GlobalData entries ...]
///
/// The header is fully described in "llvm/DebugInfo/GSYM/HeaderV2.h".
+/// Each GlobalData entry (see "llvm/DebugInfo/GSYM/GlobalData.h") describes
+/// a data section by its type, file offset, and size. Sections can appear in
+/// any order since each entry contains an absolute file offset (relative to
+/// the start of the GSYM data). The GlobalData array is terminated by an
+/// entry with type EndOfList and all other fields set to zero.
///
-/// ADDRESS TABLE
+/// The data sections are:
///
-/// The address table immediately follows the header in the file and consists
-/// of Header.NumAddresses address offsets. These offsets are sorted and can be
-/// binary searched for efficient lookups. Addresses in the address table are
-/// stored as offsets from a 64 bit base address found in Header.BaseAddress.
-/// This allows the address table to contain 8, 16, or 32 offsets. This allows
-/// the address table to not require full 64 bit addresses for each address.
-/// The resulting GSYM size is smaller and causes fewer pages to be touched
-/// during address lookups when the address table is smaller. The size of the
-/// address offsets in the address table is specified in the header in
-/// Header.AddrOffSize. The first offset in the address table is aligned to
-/// Header.AddrOffSize alignment to ensure efficient access when loaded into
-/// memory.
+/// - AddrOffsets: Sorted address offset table with Header.NumAddresses
+/// entries, each Header.AddrOffSize bytes. Addresses are stored as offsets
+/// from Header.BaseAddress. Aligned to Header.AddrOffSize.
///
-/// FUNCTION INFO OFFSETS TABLE
+/// - AddrInfoOffsets: File offset table with Header.NumAddresses entries,
+/// each Header.AddrInfoOffSize bytes. Each entry is the file offset (from
+/// the start of the GSYM data) to the corresponding FunctionInfo. Aligned
+/// to Header.AddrInfoOffSize.
///
-/// The function info offsets table immediately follows the address table and
-/// consists of Header.NumAddresses 32 bit file offsets: one for each address
-/// in the address table. This data is aligned to a 4 byte boundary. The
-/// offsets in this table are the relative offsets from the start offset of the
-/// GSYM header and point to the function info data for each address in the
-/// address table. Keeping this data separate from the address table helps to
-/// reduce the number of pages that are touched when address lookups occur on a
-/// GSYM file.
+/// - FileTable: A uint32_t count followed by that many FileEntry structs.
+/// See "llvm/DebugInfo/GSYM/FileEntry.h". Aligned to 4 bytes.
///
-/// FILE TABLE
+/// - StringTable: NULL-terminated strings referenced by offset. Starts with
+/// an empty string at offset zero. No alignment requirement.
///
-/// The file table immediately follows the function info offsets table. The
-/// encoding of the FileTable is:
+/// - FunctionInfo: Encoded FunctionInfo objects. Each entry is pointed to by
+/// the AddrInfoOffsets table. See "llvm/DebugInfo/GSYM/FunctionInfo.h".
+/// Aligned to 4 bytes.
///
-/// struct FileTable {
-/// uint32_t Count;
-/// FileEntry Files[];
-/// };
-///
-/// The file table starts with a 32 bit count of the number of files that are
-/// used in all of the function info, followed by that number of FileEntry
-/// structures. The file table is aligned to a 4 byte boundary, Each file in
-/// the file table is represented with a FileEntry structure.
-/// See "llvm/DebugInfo/GSYM/FileEntry.h" for details.
-///
-/// STRING TABLE
-///
-/// The string table follows the file table in stand alone GSYM files and
-/// contains all strings for everything contained in the GSYM file. Any string
-/// data should be added to the string table and any references to strings
-/// inside GSYM information must be stored as 32 bit string table offsets into
-/// this string table. The string table always starts with an empty string at
-/// offset zero and is followed by any strings needed by the GSYM information.
-/// The start of the string table is not aligned to any boundary.
-///
-/// FUNCTION INFO DATA
-///
-/// The function info data is the payload that contains information about the
-/// address that is being looked up. It contains all of the encoded
-/// FunctionInfo objects. Each encoded FunctionInfo's data is pointed to by an
-/// entry in the Function Info Offsets Table. For details on the exact encoding
-/// of FunctionInfo objects, see "llvm/DebugInfo/GSYM/FunctionInfo.h".
+/// - UUID: Raw UUID bytes of the original executable. Only present if a UUID
+/// was set. No alignment requirement.
class GsymCreatorV2 {
// Private member variables require Mutex protections
mutable std::mutex Mutex;
@@ -189,19 +154,12 @@ class GsymCreatorV2 {
/// file.
uint64_t getMaxAddressOffset() const;
- /// Calculate the byte size of the GSYM header and tables sizes.
- ///
- /// This function will calculate the exact size in bytes of the encocded GSYM
- /// for the following items:
- /// - The GSYM header
- /// - The Address offset table
- /// - The Address info offset table
- /// - The file table
- /// - The string table
+ /// Calculate the byte size of the GSYM V2 header, GlobalData entries, and
+ /// table sections (everything except FunctionInfo data).
///
/// This is used to help split GSYM files into segments.
///
- /// \returns Size in bytes the GSYM header and tables.
+ /// \returns Size in bytes of the header and tables.
uint64_t calculateHeaderAndTableSize() const;
/// Copy a FunctionInfo from the \a SrcGC GSYM creator into this creator.
diff --git a/llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp b/llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp
index 946057b2e6072..01398c51c1db3 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp
@@ -7,10 +7,12 @@
#include "llvm/DebugInfo/GSYM/GsymCreatorV2.h"
#include "llvm/DebugInfo/GSYM/FileWriter.h"
+#include "llvm/DebugInfo/GSYM/GlobalData.h"
#include "llvm/DebugInfo/GSYM/HeaderV2.h"
#include "llvm/DebugInfo/GSYM/LineTable.h"
#include "llvm/DebugInfo/GSYM/OutputAggregator.h"
#include "llvm/MC/StringTableBuilder.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
@@ -77,6 +79,15 @@ llvm::Error GsymCreatorV2::save(StringRef Path, llvm::endianness ByteOrder,
return encode(O);
}
+/// Write a single GlobalData entry to the output stream.
+static void writeGlobalDataEntry(FileWriter &O, GlobalInfoType Type,
+ uint64_t FileOffset, uint64_t FileSize) {
+ O.writeU32(static_cast<uint32_t>(Type));
+ O.writeU32(0); // Padding
+ O.writeU64(FileOffset);
+ O.writeU64(FileSize);
+}
+
llvm::Error GsymCreatorV2::encode(FileWriter &O) const {
std::lock_guard<std::mutex> Guard(Mutex);
if (Funcs.empty())
@@ -85,122 +96,191 @@ llvm::Error GsymCreatorV2::encode(FileWriter &O) const {
if (!Finalized)
return createStringError(std::errc::invalid_argument,
"GsymCreatorV2 wasn't finalized prior to encoding");
-
if (Funcs.size() > UINT32_MAX)
return createStringError(std::errc::invalid_argument,
"too many FunctionInfos");
- std::optional<uint64_t> BaseAddress = getBaseAddress();
- // Base address should be valid if we have any functions.
- if (!BaseAddress)
+ std::optional<uint64_t> BaseAddr = getBaseAddress();
+ if (!BaseAddr)
return createStringError(std::errc::invalid_argument,
"invalid base address");
+
+ const uint8_t AddrOffSize = getAddressOffsetSize();
+
+ // Pre-encode all FunctionInfo objects into a temporary buffer so we know the
+ // total FunctionInfo section size and each function's offset within it.
+ SmallVector<char, 0> FIBuf;
+ raw_svector_ostream FIOS(FIBuf);
+ FileWriter FIFW(FIOS, O.getByteOrder());
+ std::vector<uint64_t> FIRelativeOffsets;
+ for (const auto &FI : Funcs) {
+ if (auto OffOrErr = FI.encode(FIFW))
+ FIRelativeOffsets.push_back(*OffOrErr);
+ else
+ return OffOrErr.takeError();
+ }
+ const uint64_t FISectionSize = FIBuf.size();
+ const uint64_t StringTableSize = StrTab.getSize();
+
+ // Determine StrpSize based on string table size.
+ const uint8_t StrpSize = (StringTableSize > UINT32_MAX) ? 8 : 4;
+
+ // Compute number of GlobalData entries.
+ const bool HasUUID = !UUID.empty();
+ // Sections: AddrOffsets, AddrInfoOffsets, StringTable, FileTable, FunctionInfo
+ // Plus UUID if present, plus EndOfList terminator.
+ const uint32_t NumGlobalDataEntries = 5 + (HasUUID ? 1 : 0) + 1;
+ const uint64_t GlobalDataArraySize =
+ static_cast<uint64_t>(NumGlobalDataEntries) * 24;
+
+ // Plan the file layout. All offsets are relative to the start of the GSYM
+ // data (i.e., the start of the header). We place sections sequentially after
+ // the header and GlobalData entries in a convenient order.
+ constexpr uint64_t HeaderSize = 24;
+ uint64_t CurOffset = HeaderSize + GlobalDataArraySize;
+
+ // AddrOffsets section.
+ CurOffset = llvm::alignTo(CurOffset, AddrOffSize);
+ const uint64_t AddrOffsetsOffset = CurOffset;
+ const uint64_t AddrOffsetsSize = Funcs.size() * AddrOffSize;
+ CurOffset += AddrOffsetsSize;
+
+ // Determine AddrInfoOffSize: if the estimated end of the FunctionInfo section
+ // would exceed UINT32_MAX, use 8-byte offsets.
+ uint8_t AddrInfoOffSize = 4;
+ {
+ // Estimate conservatively with 4-byte AddrInfoOffsets.
+ uint64_t Est = CurOffset;
+ Est = llvm::alignTo(Est, 4);
+ Est += Funcs.size() * 4; // AddrInfoOffsets
+ Est = llvm::alignTo(Est, 4);
+ Est += 4 + Files.size() * sizeof(FileEntry); // FileTable
+ Est += StringTableSize;
+ Est = llvm::alignTo(Est, 4);
+ Est += FISectionSize;
+ if (Est > UINT32_MAX)
+ AddrInfoOffSize = 8;
+ }
+
+ // AddrInfoOffsets section.
+ CurOffset = llvm::alignTo(CurOffset, AddrInfoOffSize);
+ const uint64_t AddrInfoOffsetsOffset = CurOffset;
+ const uint64_t AddrInfoOffsetsSize = Funcs.size() * AddrInfoOffSize;
+ CurOffset += AddrInfoOffsetsSize;
+
+ // FileTable section.
+ CurOffset = llvm::alignTo(CurOffset, 4);
+ const uint64_t FileTableOffset = CurOffset;
+ const uint64_t FileTableSize = 4 + Files.size() * sizeof(FileEntry);
+ CurOffset += FileTableSize;
+
+ // StringTable section (no alignment requirement).
+ const uint64_t StringTableOffset = CurOffset;
+ CurOffset += StringTableSize;
+
+ // FunctionInfo section.
+ CurOffset = llvm::alignTo(CurOffset, 4);
+ const uint64_t FISectionOffset = CurOffset;
+ CurOffset += FISectionSize;
+
+ // UUID section (no alignment requirement).
+ const uint64_t UUIDOffset = CurOffset;
+ const uint64_t UUIDSectionSize = UUID.size();
+
+ // Build and write the header.
HeaderV2 Hdr;
Hdr.Magic = GSYM_MAGIC;
Hdr.Version = GSYM_VERSION_2;
- Hdr.AddrOffSize = getAddressOffsetSize();
- Hdr.UUIDSize = static_cast<uint8_t>(UUID.size());
- Hdr.BaseAddress = *BaseAddress;
+ Hdr.Padding = 0;
+ Hdr.BaseAddress = *BaseAddr;
Hdr.NumAddresses = static_cast<uint32_t>(Funcs.size());
- Hdr.StrtabOffset = 0; // We will fix this up later.
- Hdr.StrtabSize = 0; // We will fix this up later.
- memset(Hdr.UUID, 0, sizeof(Hdr.UUID));
- if (UUID.size() > sizeof(Hdr.UUID))
- return createStringError(std::errc::invalid_argument,
- "invalid UUID size %u", (uint32_t)UUID.size());
- // Copy the UUID value if we have one.
- if (UUID.size() > 0)
- memcpy(Hdr.UUID, UUID.data(), UUID.size());
- // Write out the header.
- llvm::Error Err = Hdr.encode(O);
- if (Err)
+ Hdr.AddrOffSize = AddrOffSize;
+ Hdr.AddrInfoOffSize = AddrInfoOffSize;
+ Hdr.StrpSize = StrpSize;
+ Hdr.Padding2 = 0;
+ if (auto Err = Hdr.encode(O))
return Err;
+ // Write GlobalData entries.
+ writeGlobalDataEntry(O, GlobalInfoType::AddrOffsets,
+ AddrOffsetsOffset, AddrOffsetsSize);
+ writeGlobalDataEntry(O, GlobalInfoType::AddrInfoOffsets,
+ AddrInfoOffsetsOffset, AddrInfoOffsetsSize);
+ writeGlobalDataEntry(O, GlobalInfoType::FileTable,
+ FileTableOffset, FileTableSize);
+ writeGlobalDataEntry(O, GlobalInfoType::StringTable,
+ StringTableOffset, StringTableSize);
+ writeGlobalDataEntry(O, GlobalInfoType::FunctionInfo,
+ FISectionOffset, FISectionSize);
+ if (HasUUID)
+ writeGlobalDataEntry(O, GlobalInfoType::UUID,
+ UUIDOffset, UUIDSectionSize);
+ // EndOfList terminator.
+ writeGlobalDataEntry(O, GlobalInfoType::EndOfList, 0, 0);
+
+ // Write AddrOffsets section.
+ O.alignTo(AddrOffSize);
+ assert(O.tell() == AddrOffsetsOffset);
const uint64_t MaxAddressOffset = getMaxAddressOffset();
- // Write out the address offsets.
- O.alignTo(Hdr.AddrOffSize);
- for (const auto &FuncInfo : Funcs) {
- uint64_t AddrOffset = FuncInfo.startAddress() - Hdr.BaseAddress;
- // Make sure we calculated the address offsets byte size correctly by
- // verifying the current address offset is within ranges. We have seen bugs
- // introduced when the code changes that can cause problems here so it is
- // good to catch this during testing.
+ for (const auto &FI : Funcs) {
+ uint64_t AddrOffset = FI.startAddress() - *BaseAddr;
assert(AddrOffset <= MaxAddressOffset);
(void)MaxAddressOffset;
- switch (Hdr.AddrOffSize) {
- case 1:
- O.writeU8(static_cast<uint8_t>(AddrOffset));
- break;
- case 2:
- O.writeU16(static_cast<uint16_t>(AddrOffset));
- break;
- case 4:
- O.writeU32(static_cast<uint32_t>(AddrOffset));
- break;
- case 8:
- O.writeU64(AddrOffset);
- break;
+ switch (AddrOffSize) {
+ case 1: O.writeU8(static_cast<uint8_t>(AddrOffset)); break;
+ case 2: O.writeU16(static_cast<uint16_t>(AddrOffset)); break;
+ case 4: O.writeU32(static_cast<uint32_t>(AddrOffset)); break;
+ case 8: O.writeU64(AddrOffset); break;
}
}
- // Write out all zeros for the AddrInfoOffsets.
- O.alignTo(4);
- const off_t AddrInfoOffsetsOffset = O.tell();
- for (size_t i = 0, n = Funcs.size(); i < n; ++i)
- O.writeU32(0);
+ // Write AddrInfoOffsets section. Each entry is the absolute file offset
+ // (from the start of the GSYM data) to the corresponding FunctionInfo.
+ O.alignTo(AddrInfoOffSize);
+ assert(O.tell() == AddrInfoOffsetsOffset);
+ for (uint64_t RelOff : FIRelativeOffsets) {
+ uint64_t AbsOff = FISectionOffset + RelOff;
+ if (AddrInfoOffSize == 4) {
+ if (AbsOff > UINT32_MAX)
+ return createStringError(std::errc::invalid_argument,
+ "addr info offset exceeded 32-bit max");
+ O.writeU32(static_cast<uint32_t>(AbsOff));
+ } else {
+ O.writeU64(AbsOff);
+ }
+ }
- // Write out the file table
+ // Write FileTable section.
O.alignTo(4);
+ assert(O.tell() == FileTableOffset);
assert(!Files.empty());
assert(Files[0].Dir == 0);
assert(Files[0].Base == 0);
- size_t NumFiles = Files.size();
- if (NumFiles > UINT32_MAX)
+ if (Files.size() > UINT32_MAX)
return createStringError(std::errc::invalid_argument, "too many files");
- O.writeU32(static_cast<uint32_t>(NumFiles));
- for (auto File : Files) {
+ O.writeU32(static_cast<uint32_t>(Files.size()));
+ for (const auto &File : Files) {
O.writeU32(File.Dir);
O.writeU32(File.Base);
}
- // Write out the string table.
- const off_t StrtabOffset = O.tell();
+ // Write StringTable section.
+ assert(O.tell() == StringTableOffset);
StrTab.write(O.get_stream());
- const off_t StrtabSize = O.tell() - StrtabOffset;
- std::vector<uint32_t> AddrInfoOffsets;
- // Verify that the size of the string table does not exceed 32-bit max.
- // This means the offsets in the string table will not exceed 32-bit max.
- if (StrtabSize > UINT32_MAX) {
- return createStringError(std::errc::invalid_argument,
- "string table size exceeded 32-bit max");
+ // Write FunctionInfo section (pre-encoded data).
+ O.alignTo(4);
+ assert(O.tell() == FISectionOffset);
+ O.writeData(ArrayRef<uint8_t>(reinterpret_cast<const uint8_t *>(FIBuf.data()),
+ FIBuf.size()));
+
+ // Write UUID section.
+ if (HasUUID) {
+ assert(O.tell() == UUIDOffset);
+ O.writeData(ArrayRef<uint8_t>(UUID.data(), UUID.size()));
}
- // Write out the address infos for each function info.
- for (const auto &FuncInfo : Funcs) {
- if (Expected<uint64_t> OffsetOrErr = FuncInfo.encode(O)) {
- // Verify that the address info offsets do not exceed 32-bit max.
- uint64_t Offset = OffsetOrErr.get();
- if (Offset > UINT32_MAX) {
- return createStringError(std::errc::invalid_argument,
- "address info offset exceeded 32-bit max");
- }
-
- AddrInfoOffsets.push_back(Offset);
- } else
- return OffsetOrErr.takeError();
- }
- // Fixup the string table offset and size in the header
- O.fixup32((uint32_t)StrtabOffset, offsetof(HeaderV2, StrtabOffset));
- O.fixup32((uint32_t)StrtabSize, offsetof(HeaderV2, StrtabSize));
-
- // Fixup all address info offsets
- uint64_t Offset = 0;
- for (auto AddrInfoOffset : AddrInfoOffsets) {
- O.fixup32(AddrInfoOffset, AddrInfoOffsetsOffset + Offset);
- Offset += 4;
- }
- return ErrorSuccess();
+ return Error::success();
}
llvm::Error GsymCreatorV2::loadCallSitesFromYAML(StringRef YAMLFile) {
@@ -494,17 +574,24 @@ uint8_t GsymCreatorV2::getAddressOffsetSize() const {
}
uint64_t GsymCreatorV2::calculateHeaderAndTableSize() const {
- uint64_t Size = sizeof(HeaderV2);
+ constexpr uint64_t HeaderSize = 24;
const size_t NumFuncs = Funcs.size();
- // Add size of address offset table
+ // GlobalData entries: 5 sections + UUID (if any) + EndOfList terminator.
+ const uint32_t NumEntries = 5 + (UUID.empty() ? 0 : 1) + 1;
+ uint64_t Size = HeaderSize + NumEntries * 24;
+ // AddrOffsets
+ Size = llvm::alignTo(Size, getAddressOffsetSize());
Size += NumFuncs * getAddressOffsetSize();
- // Add size of address info offsets which are 32 bit integers in version 1.
- Size += NumFuncs * sizeof(uint32_t);
- // Add file table size
- Size += Files.size() * sizeof(FileEntry);
- // Add string table size
+ // AddrInfoOffsets (assume 4-byte entries for estimation)
+ Size = llvm::alignTo(Size, 4);
+ Size += NumFuncs * 4;
+ // FileTable
+ Size = llvm::alignTo(Size, 4);
+ Size += 4 + Files.size() * sizeof(FileEntry);
+ // StringTable
Size += StrTab.getSize();
-
+ // UUID
+ Size += UUID.size();
return Size;
}
>From 9f425e8a25b4ed5fbba264ed56f777c56e704f25 Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Wed, 25 Mar 2026 20:15:28 -0700
Subject: [PATCH 09/45] Add tests for v2 creator
---
llvm/include/llvm/DebugInfo/GSYM/GlobalData.h | 2 +-
llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp | 6 +-
llvm/lib/DebugInfo/GSYM/GsymReaderV2.cpp | 61 +--
llvm/unittests/DebugInfo/GSYM/CMakeLists.txt | 1 +
llvm/unittests/DebugInfo/GSYM/GSYMV2Test.cpp | 372 ++++++++++++++++++
5 files changed, 390 insertions(+), 52 deletions(-)
create mode 100644 llvm/unittests/DebugInfo/GSYM/GSYMV2Test.cpp
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GlobalData.h b/llvm/include/llvm/DebugInfo/GSYM/GlobalData.h
index 2a836b21a1636..dd5721f024704 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GlobalData.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GlobalData.h
@@ -14,7 +14,7 @@
namespace llvm {
namespace gsym {
-enum GlobalInfoType : uint32_t {
+enum class GlobalInfoType : uint32_t {
EndOfList = 0u,
AddrOffsets = 1u,
AddrInfoOffsets = 2u,
diff --git a/llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp b/llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp
index 01398c51c1db3..7804a96f1616e 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp
@@ -284,9 +284,9 @@ llvm::Error GsymCreatorV2::encode(FileWriter &O) const {
}
llvm::Error GsymCreatorV2::loadCallSitesFromYAML(StringRef YAMLFile) {
- // Use the loader to load call site information from the YAML file.
- CallSiteInfoLoader Loader(*this, Funcs);
- return Loader.loadYAML(YAMLFile);
+ // TODO: Implement V2-specific call site loading.
+ return createStringError(std::errc::not_supported,
+ "call site loading not yet supported in V2");
}
void GsymCreatorV2::prepareMergedFunctions(OutputAggregator &Out) {
diff --git a/llvm/lib/DebugInfo/GSYM/GsymReaderV2.cpp b/llvm/lib/DebugInfo/GSYM/GsymReaderV2.cpp
index de074cdde02dd..28452d5ddf385 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymReaderV2.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymReaderV2.cpp
@@ -123,11 +123,9 @@ GsymReaderV2::parse() {
return createStringError(std::errc::invalid_argument,
"failed to read file table");
- // Get the string table.
- FileData.setOffset(Hdr->StrtabOffset);
- if (FileData.readFixedString(StrTab.Data, Hdr->StrtabSize))
- return createStringError(std::errc::invalid_argument,
- "failed to read string table");
+ // TODO: V2 reader needs to read string table from GlobalData sections.
+ return createStringError(std::errc::not_supported,
+ "V2 native-endian reader not yet implemented");
} else {
// This is the non native endianness case that is not common and not
// optimized for lookups. Here we decode the important tables into local
@@ -185,12 +183,9 @@ GsymReaderV2::parse() {
return createStringError(std::errc::invalid_argument,
"failed to read file table");
}
- // Get the string table.
- StrTab.Data = MemBuffer->getBuffer().substr(Hdr->StrtabOffset,
- Hdr->StrtabSize);
- if (StrTab.Data.empty())
- return createStringError(std::errc::invalid_argument,
- "failed to read string table");
+ // TODO: V2 reader needs to read string table from GlobalData sections.
+ return createStringError(std::errc::not_supported,
+ "V2 swapped-endian reader not yet implemented");
}
return Error::success();
@@ -337,47 +332,17 @@ GsymReaderV2::getFunctionInfoAtIndex(uint64_t Idx) const {
llvm::Expected<LookupResult>
GsymReaderV2::lookup(uint64_t Addr,
std::optional<DataExtractor> *MergedFunctionsData) const {
- uint64_t FuncStartAddr = 0;
- if (auto ExpectedData = getFunctionInfoDataForAddress(Addr, FuncStartAddr))
- return FunctionInfo::lookup(*ExpectedData, *this, FuncStartAddr, Addr,
- MergedFunctionsData);
- else
- return ExpectedData.takeError();
+ // TODO: V2 reader lookup not yet implemented — FunctionInfo::lookup expects
+ // a GsymReader reference, not GsymReaderV2.
+ return createStringError(std::errc::not_supported,
+ "V2 reader lookup not yet implemented");
}
llvm::Expected<std::vector<LookupResult>>
GsymReaderV2::lookupAll(uint64_t Addr) const {
- std::vector<LookupResult> Results;
- std::optional<DataExtractor> MergedFunctionsData;
-
- // First perform a lookup to get the primary function info result.
- auto MainResult = lookup(Addr, &MergedFunctionsData);
- if (!MainResult)
- return MainResult.takeError();
-
- // Add the main result as the first entry.
- Results.push_back(std::move(*MainResult));
-
- // Now process any merged functions data that was found during the lookup.
- if (MergedFunctionsData) {
- // Get data extractors for each merged function.
- auto ExpectedMergedFuncExtractors =
- MergedFunctionsInfo::getFuncsDataExtractors(*MergedFunctionsData);
- if (!ExpectedMergedFuncExtractors)
- return ExpectedMergedFuncExtractors.takeError();
-
- // Process each merged function data.
- for (DataExtractor &MergedData : *ExpectedMergedFuncExtractors) {
- if (auto FI = FunctionInfo::lookup(MergedData, *this,
- MainResult->FuncRange.start(), Addr)) {
- Results.push_back(std::move(*FI));
- } else {
- return FI.takeError();
- }
- }
- }
-
- return Results;
+ // TODO: V2 reader lookupAll not yet implemented.
+ return createStringError(std::errc::not_supported,
+ "V2 reader lookupAll not yet implemented");
}
void GsymReaderV2::dump(raw_ostream &OS) {
diff --git a/llvm/unittests/DebugInfo/GSYM/CMakeLists.txt b/llvm/unittests/DebugInfo/GSYM/CMakeLists.txt
index 029767471c864..bb45ba03df8b8 100644
--- a/llvm/unittests/DebugInfo/GSYM/CMakeLists.txt
+++ b/llvm/unittests/DebugInfo/GSYM/CMakeLists.txt
@@ -8,6 +8,7 @@ set(LLVM_LINK_COMPONENTS
add_llvm_unittest(DebugInfoGSYMTests
GSYMTest.cpp
+ GSYMV2Test.cpp
)
target_link_libraries(DebugInfoGSYMTests PRIVATE LLVMTestingSupport)
diff --git a/llvm/unittests/DebugInfo/GSYM/GSYMV2Test.cpp b/llvm/unittests/DebugInfo/GSYM/GSYMV2Test.cpp
new file mode 100644
index 0000000000000..13ab384e6a53d
--- /dev/null
+++ b/llvm/unittests/DebugInfo/GSYM/GSYMV2Test.cpp
@@ -0,0 +1,372 @@
+//===- llvm/unittest/DebugInfo/GSYMV2Test.cpp -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/DebugInfo/GSYM/FileWriter.h"
+#include "llvm/DebugInfo/GSYM/FunctionInfo.h"
+#include "llvm/DebugInfo/GSYM/GlobalData.h"
+#include "llvm/DebugInfo/GSYM/GsymCreatorV2.h"
+#include "llvm/DebugInfo/GSYM/HeaderV2.h"
+#include "llvm/DebugInfo/GSYM/OutputAggregator.h"
+#include "llvm/Support/DataExtractor.h"
+#include "llvm/Testing/Support/Error.h"
+
+#include "gtest/gtest.h"
+#include <string>
+
+using namespace llvm;
+using namespace gsym;
+
+static void checkError(std::string ExpectedMsg, Error Err) {
+ ASSERT_TRUE(bool(Err));
+ handleAllErrors(std::move(Err), [&](const ErrorInfoBase &Actual) {
+ EXPECT_EQ(Actual.message(), ExpectedMsg);
+ });
+}
+
+/// Helper to encode a GsymCreatorV2 and return the raw bytes.
+static Expected<SmallString<512>> encodeV2(const GsymCreatorV2 &GC,
+ llvm::endianness ByteOrder) {
+ SmallString<512> Str;
+ raw_svector_ostream OutStrm(Str);
+ FileWriter FW(OutStrm, ByteOrder);
+ if (auto Err = GC.encode(FW))
+ return std::move(Err);
+ return Str;
+}
+
+/// Helper to decode the HeaderV2 from raw bytes.
+static Expected<HeaderV2> decodeHeaderV2(StringRef Data,
+ llvm::endianness ByteOrder) {
+ DataExtractor DE(Data, ByteOrder == llvm::endianness::little, 8);
+ return HeaderV2::decode(DE);
+}
+
+/// Helper to decode a GlobalData entry at a given offset.
+static GlobalData decodeGlobalDataEntry(StringRef Data, uint64_t &Offset,
+ llvm::endianness ByteOrder) {
+ DataExtractor DE(Data, ByteOrder == llvm::endianness::little, 8);
+ GlobalData GD;
+ GD.Type = static_cast<GlobalInfoType>(DE.getU32(&Offset));
+ GD.Padding = DE.getU32(&Offset);
+ GD.FileOffset = DE.getU64(&Offset);
+ GD.FileSize = DE.getU64(&Offset);
+ return GD;
+}
+
+//===----------------------------------------------------------------------===//
+// Encode error tests
+//===----------------------------------------------------------------------===//
+
+TEST(GSYMV2Test, TestEncodeErrorNoFunctions) {
+ GsymCreatorV2 GC;
+ auto Result = encodeV2(GC, llvm::endianness::little);
+ checkError("no functions to encode", Result.takeError());
+}
+
+TEST(GSYMV2Test, TestEncodeErrorNotFinalized) {
+ GsymCreatorV2 GC;
+ const uint32_t Name = GC.insertString("foo");
+ GC.addFunctionInfo(FunctionInfo(0x1000, 0x100, Name));
+ auto Result = encodeV2(GC, llvm::endianness::little);
+ checkError("GsymCreatorV2 wasn't finalized prior to encoding",
+ Result.takeError());
+}
+
+TEST(GSYMV2Test, TestDoubleFinalize) {
+ GsymCreatorV2 GC;
+ const uint32_t Name = GC.insertString("foo");
+ GC.addFunctionInfo(FunctionInfo(0x1000, 0x100, Name));
+ OutputAggregator Null(nullptr);
+ Error Err = GC.finalize(Null);
+ ASSERT_FALSE(bool(Err));
+ Err = GC.finalize(Null);
+ ASSERT_TRUE(bool(Err));
+ checkError("already finalized", std::move(Err));
+}
+
+//===----------------------------------------------------------------------===//
+// Header and GlobalData structure tests
+//===----------------------------------------------------------------------===//
+
+/// Encode a V2 GSYM and verify the header fields and GlobalData layout.
+static void TestV2HeaderAndGlobalData(llvm::endianness ByteOrder,
+ uint64_t BaseAddr,
+ uint8_t ExpectedAddrOffSize,
+ uint32_t ExpectedNumAddresses,
+ bool HasUUID) {
+ GsymCreatorV2 GC;
+ const uint32_t Func1Name = GC.insertString("foo");
+ const uint32_t Func2Name = GC.insertString("bar");
+ GC.addFunctionInfo(FunctionInfo(BaseAddr + 0x00, 0x10, Func1Name));
+ GC.addFunctionInfo(FunctionInfo(BaseAddr + 0x20, 0x10, Func2Name));
+
+ uint8_t UUID[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
+ if (HasUUID)
+ GC.setUUID(UUID);
+
+ OutputAggregator Null(nullptr);
+ Error Err = GC.finalize(Null);
+ ASSERT_FALSE(bool(Err));
+
+ auto Result = encodeV2(GC, ByteOrder);
+ ASSERT_THAT_EXPECTED(Result, Succeeded());
+ StringRef Data = *Result;
+
+ // Verify header.
+ auto HdrOrErr = decodeHeaderV2(Data, ByteOrder);
+ ASSERT_THAT_EXPECTED(HdrOrErr, Succeeded());
+ const HeaderV2 &Hdr = *HdrOrErr;
+ EXPECT_EQ(Hdr.Magic, GSYM_MAGIC);
+ EXPECT_EQ(Hdr.Version, GSYM_VERSION_2);
+ EXPECT_EQ(Hdr.Padding, 0u);
+ EXPECT_EQ(Hdr.BaseAddress, BaseAddr);
+ EXPECT_EQ(Hdr.NumAddresses, ExpectedNumAddresses);
+ EXPECT_EQ(Hdr.AddrOffSize, ExpectedAddrOffSize);
+ EXPECT_EQ(Hdr.AddrInfoOffSize, 4u); // Small file, should be 4 bytes.
+ EXPECT_EQ(Hdr.StrpSize, 4u); // Small string table, should be 4 bytes.
+ EXPECT_EQ(Hdr.Padding2, 0u);
+
+ // Decode GlobalData entries starting at offset 24 (after fixed header).
+ uint64_t Offset = 24;
+ bool FoundAddrOffsets = false;
+ bool FoundAddrInfoOffsets = false;
+ bool FoundStringTable = false;
+ bool FoundFileTable = false;
+ bool FoundFunctionInfo = false;
+ bool FoundUUID = false;
+ bool FoundEndOfList = false;
+
+ while (Offset < Data.size()) {
+ GlobalData GD = decodeGlobalDataEntry(Data, Offset, ByteOrder);
+ EXPECT_EQ(GD.Padding, 0u);
+
+ switch (GD.Type) {
+ case GlobalInfoType::EndOfList:
+ EXPECT_EQ(GD.FileOffset, 0u);
+ EXPECT_EQ(GD.FileSize, 0u);
+ FoundEndOfList = true;
+ break;
+ case GlobalInfoType::AddrOffsets:
+ EXPECT_EQ(GD.FileSize,
+ ExpectedNumAddresses * (uint64_t)ExpectedAddrOffSize);
+ EXPECT_GT(GD.FileOffset, 0u);
+ FoundAddrOffsets = true;
+ break;
+ case GlobalInfoType::AddrInfoOffsets:
+ EXPECT_EQ(GD.FileSize, ExpectedNumAddresses * 4u); // AddrInfoOffSize=4
+ EXPECT_GT(GD.FileOffset, 0u);
+ FoundAddrInfoOffsets = true;
+ break;
+ case GlobalInfoType::StringTable:
+ EXPECT_GT(GD.FileSize, 0u);
+ EXPECT_GT(GD.FileOffset, 0u);
+ FoundStringTable = true;
+ break;
+ case GlobalInfoType::FileTable:
+ EXPECT_GT(GD.FileSize, 0u);
+ EXPECT_GT(GD.FileOffset, 0u);
+ FoundFileTable = true;
+ break;
+ case GlobalInfoType::FunctionInfo:
+ EXPECT_GT(GD.FileSize, 0u);
+ EXPECT_GT(GD.FileOffset, 0u);
+ FoundFunctionInfo = true;
+ break;
+ case GlobalInfoType::UUID:
+ EXPECT_EQ(GD.FileSize, sizeof(UUID));
+ EXPECT_GT(GD.FileOffset, 0u);
+ FoundUUID = true;
+ break;
+ }
+ if (FoundEndOfList)
+ break;
+ }
+
+ EXPECT_TRUE(FoundAddrOffsets);
+ EXPECT_TRUE(FoundAddrInfoOffsets);
+ EXPECT_TRUE(FoundStringTable);
+ EXPECT_TRUE(FoundFileTable);
+ EXPECT_TRUE(FoundFunctionInfo);
+ EXPECT_TRUE(FoundEndOfList);
+ EXPECT_EQ(FoundUUID, HasUUID);
+
+ // Verify that all section data fits within the encoded buffer.
+ Offset = 24;
+ while (Offset < Data.size()) {
+ GlobalData GD = decodeGlobalDataEntry(Data, Offset, ByteOrder);
+ if (GD.Type == GlobalInfoType::EndOfList)
+ break;
+ EXPECT_LE(GD.FileOffset + GD.FileSize, Data.size())
+ << "Section type " << static_cast<uint32_t>(GD.Type)
+ << " extends beyond buffer";
+ }
+}
+
+TEST(GSYMV2Test, TestHeaderAndGlobalDataLittle) {
+ TestV2HeaderAndGlobalData(llvm::endianness::little, 0x1000,
+ /*ExpectedAddrOffSize=*/1,
+ /*ExpectedNumAddresses=*/2,
+ /*HasUUID=*/true);
+}
+
+TEST(GSYMV2Test, TestHeaderAndGlobalDataBig) {
+ TestV2HeaderAndGlobalData(llvm::endianness::big, 0x1000,
+ /*ExpectedAddrOffSize=*/1,
+ /*ExpectedNumAddresses=*/2,
+ /*HasUUID=*/true);
+}
+
+TEST(GSYMV2Test, TestHeaderAndGlobalDataNoUUID) {
+ TestV2HeaderAndGlobalData(llvm::endianness::little, 0x1000,
+ /*ExpectedAddrOffSize=*/1,
+ /*ExpectedNumAddresses=*/2,
+ /*HasUUID=*/false);
+}
+
+//===----------------------------------------------------------------------===//
+// Address offset size tests
+//===----------------------------------------------------------------------===//
+
+static void TestV2AddrOffSize(uint64_t BaseAddr, uint64_t Func2Offset,
+ uint8_t ExpectedAddrOffSize) {
+ GsymCreatorV2 GC;
+ const uint32_t Func1Name = GC.insertString("foo");
+ const uint32_t Func2Name = GC.insertString("bar");
+ GC.addFunctionInfo(FunctionInfo(BaseAddr, 0x10, Func1Name));
+ GC.addFunctionInfo(
+ FunctionInfo(BaseAddr + Func2Offset, 0x10, Func2Name));
+ OutputAggregator Null(nullptr);
+ Error Err = GC.finalize(Null);
+ ASSERT_FALSE(bool(Err));
+
+ auto Result = encodeV2(GC, llvm::endianness::little);
+ ASSERT_THAT_EXPECTED(Result, Succeeded());
+
+ auto HdrOrErr = decodeHeaderV2(*Result, llvm::endianness::little);
+ ASSERT_THAT_EXPECTED(HdrOrErr, Succeeded());
+ EXPECT_EQ(HdrOrErr->AddrOffSize, ExpectedAddrOffSize);
+}
+
+TEST(GSYMV2Test, TestAddrOffSize1Byte) {
+ TestV2AddrOffSize(0x1000, 0x20, 1);
+}
+
+TEST(GSYMV2Test, TestAddrOffSize2Byte) {
+ TestV2AddrOffSize(0x1000, 0x200, 2);
+}
+
+TEST(GSYMV2Test, TestAddrOffSize4Byte) {
+ TestV2AddrOffSize(0x1000, 0x20000, 4);
+}
+
+TEST(GSYMV2Test, TestAddrOffSize8Byte) {
+ TestV2AddrOffSize(0x1000, 0x100000000ULL, 8);
+}
+
+//===----------------------------------------------------------------------===//
+// AddrInfoOffsets verification
+//===----------------------------------------------------------------------===//
+
+TEST(GSYMV2Test, TestAddrInfoOffsetsPointToFunctionInfo) {
+ // Verify that each AddrInfoOffset entry points to a valid location within
+ // the FunctionInfo section.
+ GsymCreatorV2 GC;
+ const uint32_t Func1Name = GC.insertString("func_a");
+ const uint32_t Func2Name = GC.insertString("func_b");
+ const uint32_t Func3Name = GC.insertString("func_c");
+ GC.addFunctionInfo(FunctionInfo(0x1000, 0x100, Func1Name));
+ GC.addFunctionInfo(FunctionInfo(0x1100, 0x100, Func2Name));
+ GC.addFunctionInfo(FunctionInfo(0x1200, 0x100, Func3Name));
+ OutputAggregator Null(nullptr);
+ Error Err = GC.finalize(Null);
+ ASSERT_FALSE(bool(Err));
+
+ auto Result = encodeV2(GC, llvm::endianness::little);
+ ASSERT_THAT_EXPECTED(Result, Succeeded());
+ StringRef Data = *Result;
+
+ // Find the AddrInfoOffsets and FunctionInfo sections from GlobalData.
+ uint64_t Offset = 24;
+ uint64_t AIOffsetsOffset = 0, AIOffsetsSize = 0;
+ uint64_t FIOffset = 0, FISize = 0;
+ while (Offset < Data.size()) {
+ GlobalData GD = decodeGlobalDataEntry(Data, Offset, llvm::endianness::little);
+ if (GD.Type == GlobalInfoType::AddrInfoOffsets) {
+ AIOffsetsOffset = GD.FileOffset;
+ AIOffsetsSize = GD.FileSize;
+ } else if (GD.Type == GlobalInfoType::FunctionInfo) {
+ FIOffset = GD.FileOffset;
+ FISize = GD.FileSize;
+ } else if (GD.Type == GlobalInfoType::EndOfList) {
+ break;
+ }
+ }
+ ASSERT_GT(AIOffsetsOffset, 0u);
+ ASSERT_GT(FIOffset, 0u);
+
+ // Each AddrInfoOffset should point within [FIOffset, FIOffset + FISize).
+ DataExtractor DE(Data, /*IsLittleEndian=*/true, 8);
+ uint64_t AIOffset = AIOffsetsOffset;
+ for (uint32_t I = 0; I < 3; ++I) {
+ uint32_t FuncOffset = DE.getU32(&AIOffset);
+ EXPECT_GE(FuncOffset, FIOffset)
+ << "AddrInfoOffset[" << I << "] before FunctionInfo section";
+ EXPECT_LT(FuncOffset, FIOffset + FISize)
+ << "AddrInfoOffset[" << I << "] beyond FunctionInfo section";
+ }
+
+ // Offsets should be strictly increasing (sorted functions, no overlap).
+ AIOffset = AIOffsetsOffset;
+ uint32_t PrevOffset = DE.getU32(&AIOffset);
+ for (uint32_t I = 1; I < 3; ++I) {
+ uint32_t CurOffset = DE.getU32(&AIOffset);
+ EXPECT_GT(CurOffset, PrevOffset)
+ << "AddrInfoOffset[" << I << "] not strictly increasing";
+ PrevOffset = CurOffset;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// UUID section verification
+//===----------------------------------------------------------------------===//
+
+TEST(GSYMV2Test, TestUUIDSection) {
+ GsymCreatorV2 GC;
+ const uint32_t Name = GC.insertString("main");
+ GC.addFunctionInfo(FunctionInfo(0x1000, 0x100, Name));
+ uint8_t UUID[] = {0xAA, 0xBB, 0xCC, 0xDD, 0x11, 0x22, 0x33, 0x44};
+ GC.setUUID(UUID);
+ OutputAggregator Null(nullptr);
+ Error Err = GC.finalize(Null);
+ ASSERT_FALSE(bool(Err));
+
+ auto Result = encodeV2(GC, llvm::endianness::little);
+ ASSERT_THAT_EXPECTED(Result, Succeeded());
+ StringRef Data = *Result;
+
+ // Find UUID section.
+ uint64_t Offset = 24;
+ uint64_t UUIDOffset = 0, UUIDSize = 0;
+ while (Offset < Data.size()) {
+ GlobalData GD = decodeGlobalDataEntry(Data, Offset, llvm::endianness::little);
+ if (GD.Type == GlobalInfoType::UUID) {
+ UUIDOffset = GD.FileOffset;
+ UUIDSize = GD.FileSize;
+ } else if (GD.Type == GlobalInfoType::EndOfList) {
+ break;
+ }
+ }
+ ASSERT_EQ(UUIDSize, sizeof(UUID));
+ ASSERT_GT(UUIDOffset, 0u);
+
+ // Verify the UUID bytes match.
+ StringRef UUIDData = Data.substr(UUIDOffset, UUIDSize);
+ EXPECT_EQ(UUIDData, StringRef(reinterpret_cast<const char *>(UUID),
+ sizeof(UUID)));
+}
>From 35302789dd5ab608aaea6e3dc77cf08e1272ae8a Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Wed, 25 Mar 2026 20:26:51 -0700
Subject: [PATCH 10/45] Interface GsymReader and implement reader v2
---
.../include/llvm/DebugInfo/GSYM/GsymContext.h | 6 +-
llvm/include/llvm/DebugInfo/GSYM/GsymReader.h | 61 +++++---
.../llvm/DebugInfo/GSYM/GsymReaderV2.h | 9 +-
llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp | 2 +-
llvm/lib/DebugInfo/GSYM/GsymContext.cpp | 4 +-
llvm/lib/DebugInfo/GSYM/GsymReader.cpp | 60 ++++----
llvm/lib/DebugInfo/Symbolize/Symbolize.cpp | 6 +-
llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp | 8 +-
llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp | 144 +++++++++---------
9 files changed, 162 insertions(+), 138 deletions(-)
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymContext.h b/llvm/include/llvm/DebugInfo/GSYM/GsymContext.h
index f9382fa8d9577..72a6e4ce3e65a 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GsymContext.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymContext.h
@@ -17,7 +17,7 @@ namespace llvm {
namespace gsym {
-class GsymReader;
+class GsymReaderV1;
/// GSYM DI Context
/// This data structure is the top level entity that deals with GSYM
@@ -28,7 +28,7 @@ class GsymReader;
/// the GSYM interfaces directly.
class GsymContext : public DIContext {
public:
- GsymContext(std::unique_ptr<GsymReader> Reader);
+ GsymContext(std::unique_ptr<GsymReaderV1> Reader);
~GsymContext() override;
GsymContext(GsymContext &) = delete;
@@ -56,7 +56,7 @@ class GsymContext : public DIContext {
getLocalsForAddress(object::SectionedAddress Address) override;
private:
- const std::unique_ptr<GsymReader> Reader;
+ const std::unique_ptr<GsymReaderV1> Reader;
};
} // end namespace gsym
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
index d13f30e19226c..8709c033e2d89 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
@@ -30,20 +30,43 @@ class raw_ostream;
namespace gsym {
-/// GsymReader is used to read GSYM data from a file or buffer.
+/// GsymReader is an abstract interface for reading GSYM data.
+///
+/// This interface provides the methods needed by FunctionInfo::lookup and
+/// InlineInfo::lookup to resolve strings and files during symbolication.
+/// Both GsymReaderV1 and GsymReaderV2 implement this interface.
+class GsymReader {
+public:
+ virtual ~GsymReader() = default;
+
+ /// Get a string from the string table.
+ ///
+ /// \param Offset The string table offset for the string to retrieve.
+ /// \returns The string from the string table.
+ virtual StringRef getString(uint32_t Offset) const = 0;
+
+ /// Get a file entry for the supplied file index.
+ ///
+ /// \param Index An index into the file table.
+ /// \returns An optional FileEntry that will be valid if the file index is
+ /// valid, or std::nullopt if the file index is out of bounds.
+ virtual std::optional<FileEntry> getFile(uint32_t Index) const = 0;
+};
+
+/// GsymReaderV1 is used to read GSYM V1 data from a file or buffer.
///
/// This class is optimized for very quick lookups when the endianness matches
/// the host system. The Header, address table, address info offsets, and file
/// table is designed to be mmap'ed as read only into memory and used without
/// any parsing needed. If the endianness doesn't match, we swap these objects
-/// and tables into GsymReader::SwappedData and then point our header and
+/// and tables into GsymReaderV1::SwappedData and then point our header and
/// ArrayRefs to this swapped internal data.
///
-/// GsymReader objects must use one of the static functions to create an
-/// instance: GsymReader::openFile(...) and GsymReader::copyBuffer(...).
+/// GsymReaderV1 objects must use one of the static functions to create an
+/// instance: GsymReaderV1::openFile(...) and GsymReaderV1::copyBuffer(...).
-class GsymReader {
- GsymReader(std::unique_ptr<MemoryBuffer> Buffer);
+class GsymReaderV1 : public GsymReader {
+ GsymReaderV1(std::unique_ptr<MemoryBuffer> Buffer);
llvm::Error parse();
std::unique_ptr<MemoryBuffer> MemBuffer;
@@ -67,23 +90,23 @@ class GsymReader {
std::unique_ptr<SwappedData> Swap;
public:
- LLVM_ABI GsymReader(GsymReader &&RHS);
- LLVM_ABI ~GsymReader();
+ LLVM_ABI GsymReaderV1(GsymReaderV1 &&RHS);
+ LLVM_ABI ~GsymReaderV1() override;
- /// Construct a GsymReader from a file on disk.
+ /// Construct a GsymReaderV1 from a file on disk.
///
/// \param Path The file path the GSYM file to read.
- /// \returns An expected GsymReader that contains the object or an error
+ /// \returns An expected GsymReaderV1 that contains the object or an error
/// object that indicates reason for failing to read the GSYM.
- LLVM_ABI static llvm::Expected<GsymReader> openFile(StringRef Path);
+ LLVM_ABI static llvm::Expected<GsymReaderV1> openFile(StringRef Path);
- /// Construct a GsymReader from a buffer.
+ /// Construct a GsymReaderV1 from a buffer.
///
/// \param Bytes A set of bytes that will be copied and owned by the
/// returned object on success.
- /// \returns An expected GsymReader that contains the object or an error
+ /// \returns An expected GsymReaderV1 that contains the object or an error
/// object that indicates reason for failing to read the GSYM.
- LLVM_ABI static llvm::Expected<GsymReader> copyBuffer(StringRef Bytes);
+ LLVM_ABI static llvm::Expected<GsymReaderV1> copyBuffer(StringRef Bytes);
/// Access the GSYM header.
/// \returns A native endian version of the GSYM header.
@@ -158,7 +181,7 @@ class GsymReader {
///
/// \param Offset The string table offset for the string to retrieve.
/// \returns The string from the strin table.
- StringRef getString(uint32_t Offset) const { return StrTab[Offset]; }
+ StringRef getString(uint32_t Offset) const override { return StrTab[Offset]; }
/// Get the a file entry for the suppplied file index.
///
@@ -169,7 +192,7 @@ class GsymReader {
/// \param Index An index into the file table.
/// \returns An optional FileInfo that will be valid if the file index is
/// valid, or std::nullopt if the file index is out of bounds,
- std::optional<FileEntry> getFile(uint32_t Index) const {
+ std::optional<FileEntry> getFile(uint32_t Index) const override {
if (Index < Files.size())
return Files[Index];
return std::nullopt;
@@ -361,10 +384,10 @@ class GsymReader {
/// work of parsing the GSYM file and returning an error.
///
/// \param MemBuffer A memory buffer that will transfer ownership into the
- /// GsymReader.
- /// \returns An expected GsymReader that contains the object or an error
+ /// GsymReaderV1.
+ /// \returns An expected GsymReaderV1 that contains the object or an error
/// object that indicates reason for failing to read the GSYM.
- LLVM_ABI static llvm::Expected<llvm::gsym::GsymReader>
+ LLVM_ABI static llvm::Expected<llvm::gsym::GsymReaderV1>
create(std::unique_ptr<MemoryBuffer> &MemBuffer);
/// Given an address, find the address index.
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymReaderV2.h b/llvm/include/llvm/DebugInfo/GSYM/GsymReaderV2.h
index de1ed0481cbbd..192c5fee37b01 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GsymReaderV2.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymReaderV2.h
@@ -12,6 +12,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/DebugInfo/GSYM/FileEntry.h"
#include "llvm/DebugInfo/GSYM/FunctionInfo.h"
+#include "llvm/DebugInfo/GSYM/GsymReader.h"
#include "llvm/DebugInfo/GSYM/HeaderV2.h"
#include "llvm/DebugInfo/GSYM/LineEntry.h"
#include "llvm/DebugInfo/GSYM/StringTable.h"
@@ -42,7 +43,7 @@ namespace gsym {
/// GsymReaderV2 objects must use one of the static functions to create an
/// instance: GsymReaderV2::openFile(...) and GsymReaderV2::copyBuffer(...).
-class GsymReaderV2 {
+class GsymReaderV2 : public GsymReader {
GsymReaderV2(std::unique_ptr<MemoryBuffer> Buffer);
llvm::Error parse();
@@ -68,7 +69,7 @@ class GsymReaderV2 {
public:
LLVM_ABI GsymReaderV2(GsymReaderV2 &&RHS);
- LLVM_ABI ~GsymReaderV2();
+ LLVM_ABI ~GsymReaderV2() override;
/// Construct a GsymReaderV2 from a file on disk.
///
@@ -158,7 +159,7 @@ class GsymReaderV2 {
///
/// \param Offset The string table offset for the string to retrieve.
/// \returns The string from the strin table.
- StringRef getString(uint32_t Offset) const { return StrTab[Offset]; }
+ StringRef getString(uint32_t Offset) const override { return StrTab[Offset]; }
/// Get the a file entry for the suppplied file index.
///
@@ -169,7 +170,7 @@ class GsymReaderV2 {
/// \param Index An index into the file table.
/// \returns An optional FileInfo that will be valid if the file index is
/// valid, or std::nullopt if the file index is out of bounds,
- std::optional<FileEntry> getFile(uint32_t Index) const {
+ std::optional<FileEntry> getFile(uint32_t Index) const override {
if (Index < Files.size())
return Files[Index];
return std::nullopt;
diff --git a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
index c663ed68f29c6..7fba8abe99bb5 100644
--- a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
+++ b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
@@ -735,7 +735,7 @@ llvm::Error DwarfTransformer::verify(StringRef GsymPath,
OutputAggregator &Out) {
Out << "Verifying GSYM file \"" << GsymPath << "\":\n";
- auto Gsym = GsymReader::openFile(GsymPath);
+ auto Gsym = GsymReaderV1::openFile(GsymPath);
if (!Gsym)
return Gsym.takeError();
diff --git a/llvm/lib/DebugInfo/GSYM/GsymContext.cpp b/llvm/lib/DebugInfo/GSYM/GsymContext.cpp
index 62b4caa327d87..ac88ca3f94970 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymContext.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymContext.cpp
@@ -15,7 +15,7 @@ using namespace llvm;
using namespace llvm::gsym;
GsymContext::~GsymContext() = default;
-GsymContext::GsymContext(std::unique_ptr<GsymReader> Reader)
+GsymContext::GsymContext(std::unique_ptr<GsymReaderV1> Reader)
: DIContext(CK_GSYM), Reader(std::move(Reader)) {}
void GsymContext::dump(raw_ostream &OS, DIDumpOptions DumpOpts) {}
@@ -119,7 +119,7 @@ GsymContext::getLineInfoForAddressRange(object::SectionedAddress Address,
for (const auto &LineEntry : LT) {
if (StartAddr <= LineEntry.Addr && LineEntry.Addr < EndAddr) {
// Use LineEntry.Addr, LineEntry.File (which is a file index into the
- // files tables from the GsymReader), and LineEntry.Line (source line
+ // files tables from the GsymReaderV1), and LineEntry.Line (source line
// number) to add stuff to the DILineInfoTable
}
}
diff --git a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
index 0a5bb7caaee8c..c217a5cac1fd4 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
@@ -1,4 +1,4 @@
-//===- GsymReader.cpp -----------------------------------------------------===//
+//===- GsymReaderV1.cpp -----------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -22,14 +22,14 @@
using namespace llvm;
using namespace gsym;
-GsymReader::GsymReader(std::unique_ptr<MemoryBuffer> Buffer)
+GsymReaderV1::GsymReaderV1(std::unique_ptr<MemoryBuffer> Buffer)
: MemBuffer(std::move(Buffer)), Endian(llvm::endianness::native) {}
-GsymReader::GsymReader(GsymReader &&RHS) = default;
+GsymReaderV1::GsymReaderV1(GsymReaderV1 &&RHS) = default;
-GsymReader::~GsymReader() = default;
+GsymReaderV1::~GsymReaderV1() = default;
-llvm::Expected<GsymReader> GsymReader::openFile(StringRef Filename) {
+llvm::Expected<GsymReaderV1> GsymReaderV1::openFile(StringRef Filename) {
// Open the input file and return an appropriate error if needed.
ErrorOr<std::unique_ptr<MemoryBuffer>> BuffOrErr =
MemoryBuffer::getFileOrSTDIN(Filename);
@@ -39,17 +39,17 @@ llvm::Expected<GsymReader> GsymReader::openFile(StringRef Filename) {
return create(BuffOrErr.get());
}
-llvm::Expected<GsymReader> GsymReader::copyBuffer(StringRef Bytes) {
+llvm::Expected<GsymReaderV1> GsymReaderV1::copyBuffer(StringRef Bytes) {
auto MemBuffer = MemoryBuffer::getMemBufferCopy(Bytes, "GSYM bytes");
return create(MemBuffer);
}
-llvm::Expected<llvm::gsym::GsymReader>
-GsymReader::create(std::unique_ptr<MemoryBuffer> &MemBuffer) {
+llvm::Expected<llvm::gsym::GsymReaderV1>
+GsymReaderV1::create(std::unique_ptr<MemoryBuffer> &MemBuffer) {
if (!MemBuffer)
return createStringError(std::errc::invalid_argument,
"invalid memory buffer");
- GsymReader GR(std::move(MemBuffer));
+ GsymReaderV1 GR(std::move(MemBuffer));
llvm::Error Err = GR.parse();
if (Err)
return std::move(Err);
@@ -57,7 +57,7 @@ GsymReader::create(std::unique_ptr<MemoryBuffer> &MemBuffer) {
}
llvm::Error
-GsymReader::parse() {
+GsymReaderV1::parse() {
BinaryStreamReader FileData(MemBuffer->getBuffer(), llvm::endianness::native);
// Check for the magic bytes. This file format is designed to be mmap'ed
// into a process and accessed as read only. This is done for performance
@@ -196,15 +196,15 @@ GsymReader::parse() {
}
-const Header &GsymReader::getHeader() const {
- // The only way to get a GsymReader is from GsymReader::openFile(...) or
- // GsymReader::copyBuffer() and the header must be valid and initialized to
+const Header &GsymReaderV1::getHeader() const {
+ // The only way to get a GsymReaderV1 is from GsymReaderV1::openFile(...) or
+ // GsymReaderV1::copyBuffer() and the header must be valid and initialized to
// a valid pointer value, so the assert below should not trigger.
assert(Hdr);
return *Hdr;
}
-std::optional<uint64_t> GsymReader::getAddress(size_t Index) const {
+std::optional<uint64_t> GsymReaderV1::getAddress(size_t Index) const {
switch (Hdr->AddrOffSize) {
case 1: return addressForIndex<uint8_t>(Index);
case 2: return addressForIndex<uint16_t>(Index);
@@ -214,7 +214,7 @@ std::optional<uint64_t> GsymReader::getAddress(size_t Index) const {
return std::nullopt;
}
-std::optional<uint64_t> GsymReader::getAddressInfoOffset(size_t Index) const {
+std::optional<uint64_t> GsymReaderV1::getAddressInfoOffset(size_t Index) const {
const auto NumAddrInfoOffsets = AddrInfoOffsets.size();
if (Index < NumAddrInfoOffsets)
return AddrInfoOffsets[Index];
@@ -222,7 +222,7 @@ std::optional<uint64_t> GsymReader::getAddressInfoOffset(size_t Index) const {
}
Expected<uint64_t>
-GsymReader::getAddressIndex(const uint64_t Addr) const {
+GsymReaderV1::getAddressIndex(const uint64_t Addr) const {
if (Addr >= Hdr->BaseAddress) {
const uint64_t AddrOffset = Addr - Hdr->BaseAddress;
std::optional<uint64_t> AddrOffsetIndex;
@@ -253,7 +253,7 @@ GsymReader::getAddressIndex(const uint64_t Addr) const {
}
llvm::Expected<DataExtractor>
-GsymReader::getFunctionInfoDataForAddress(uint64_t Addr,
+GsymReaderV1::getFunctionInfoDataForAddress(uint64_t Addr,
uint64_t &FuncStartAddr) const {
Expected<uint64_t> ExpectedAddrIdx = getAddressIndex(Addr);
if (!ExpectedAddrIdx)
@@ -296,7 +296,7 @@ GsymReader::getFunctionInfoDataForAddress(uint64_t Addr,
}
llvm::Expected<DataExtractor>
-GsymReader::getFunctionInfoDataAtIndex(uint64_t AddrIdx,
+GsymReaderV1::getFunctionInfoDataAtIndex(uint64_t AddrIdx,
uint64_t &FuncStartAddr) const {
if (AddrIdx >= getNumAddresses())
return createStringError(std::errc::invalid_argument,
@@ -317,7 +317,7 @@ GsymReader::getFunctionInfoDataAtIndex(uint64_t AddrIdx,
return DataExtractor(Bytes, Endian == llvm::endianness::little, 4);
}
-llvm::Expected<FunctionInfo> GsymReader::getFunctionInfo(uint64_t Addr) const {
+llvm::Expected<FunctionInfo> GsymReaderV1::getFunctionInfo(uint64_t Addr) const {
uint64_t FuncStartAddr = 0;
if (auto ExpectedData = getFunctionInfoDataForAddress(Addr, FuncStartAddr))
return FunctionInfo::decode(*ExpectedData, FuncStartAddr);
@@ -326,7 +326,7 @@ llvm::Expected<FunctionInfo> GsymReader::getFunctionInfo(uint64_t Addr) const {
}
llvm::Expected<FunctionInfo>
-GsymReader::getFunctionInfoAtIndex(uint64_t Idx) const {
+GsymReaderV1::getFunctionInfoAtIndex(uint64_t Idx) const {
uint64_t FuncStartAddr = 0;
if (auto ExpectedData = getFunctionInfoDataAtIndex(Idx, FuncStartAddr))
return FunctionInfo::decode(*ExpectedData, FuncStartAddr);
@@ -335,7 +335,7 @@ GsymReader::getFunctionInfoAtIndex(uint64_t Idx) const {
}
llvm::Expected<LookupResult>
-GsymReader::lookup(uint64_t Addr,
+GsymReaderV1::lookup(uint64_t Addr,
std::optional<DataExtractor> *MergedFunctionsData) const {
uint64_t FuncStartAddr = 0;
if (auto ExpectedData = getFunctionInfoDataForAddress(Addr, FuncStartAddr))
@@ -346,7 +346,7 @@ GsymReader::lookup(uint64_t Addr,
}
llvm::Expected<std::vector<LookupResult>>
-GsymReader::lookupAll(uint64_t Addr) const {
+GsymReaderV1::lookupAll(uint64_t Addr) const {
std::vector<LookupResult> Results;
std::optional<DataExtractor> MergedFunctionsData;
@@ -380,7 +380,7 @@ GsymReader::lookupAll(uint64_t Addr) const {
return Results;
}
-void GsymReader::dump(raw_ostream &OS) {
+void GsymReaderV1::dump(raw_ostream &OS) {
const auto &Header = getHeader();
// Dump the GSYM header.
OS << Header << "\n";
@@ -435,7 +435,7 @@ void GsymReader::dump(raw_ostream &OS) {
}
}
-void GsymReader::dump(raw_ostream &OS, const FunctionInfo &FI,
+void GsymReaderV1::dump(raw_ostream &OS, const FunctionInfo &FI,
uint32_t Indent) {
OS.indent(Indent);
OS << FI.Range << " \"" << getString(FI.Name) << "\"\n";
@@ -453,14 +453,14 @@ void GsymReader::dump(raw_ostream &OS, const FunctionInfo &FI,
}
}
-void GsymReader::dump(raw_ostream &OS, const MergedFunctionsInfo &MFI) {
+void GsymReaderV1::dump(raw_ostream &OS, const MergedFunctionsInfo &MFI) {
for (uint32_t inx = 0; inx < MFI.MergedFunctions.size(); inx++) {
OS << "++ Merged FunctionInfos[" << inx << "]:\n";
dump(OS, MFI.MergedFunctions[inx], 4);
}
}
-void GsymReader::dump(raw_ostream &OS, const CallSiteInfo &CSI) {
+void GsymReaderV1::dump(raw_ostream &OS, const CallSiteInfo &CSI) {
OS << HEX16(CSI.ReturnOffset);
std::string Flags;
@@ -492,7 +492,7 @@ void GsymReader::dump(raw_ostream &OS, const CallSiteInfo &CSI) {
}
}
-void GsymReader::dump(raw_ostream &OS, const CallSiteInfoCollection &CSIC,
+void GsymReaderV1::dump(raw_ostream &OS, const CallSiteInfoCollection &CSIC,
uint32_t Indent) {
OS.indent(Indent);
OS << "CallSites (by relative return offset):\n";
@@ -504,7 +504,7 @@ void GsymReader::dump(raw_ostream &OS, const CallSiteInfoCollection &CSIC,
}
}
-void GsymReader::dump(raw_ostream &OS, const LineTable <, uint32_t Indent) {
+void GsymReaderV1::dump(raw_ostream &OS, const LineTable <, uint32_t Indent) {
OS.indent(Indent);
OS << "LineTable:\n";
for (auto &LE: LT) {
@@ -516,7 +516,7 @@ void GsymReader::dump(raw_ostream &OS, const LineTable <, uint32_t Indent) {
}
}
-void GsymReader::dump(raw_ostream &OS, const InlineInfo &II, uint32_t Indent) {
+void GsymReaderV1::dump(raw_ostream &OS, const InlineInfo &II, uint32_t Indent) {
if (Indent == 0)
OS << "InlineInfo:\n";
else
@@ -534,7 +534,7 @@ void GsymReader::dump(raw_ostream &OS, const InlineInfo &II, uint32_t Indent) {
dump(OS, ChildII, Indent + 2);
}
-void GsymReader::dump(raw_ostream &OS, std::optional<FileEntry> FE) {
+void GsymReaderV1::dump(raw_ostream &OS, std::optional<FileEntry> FE) {
if (FE) {
// IF we have the file from index 0, then don't print anything
if (FE->Dir == 0 && FE->Base == 0)
diff --git a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
index 3821f53d26b98..1883dcadf7c34 100644
--- a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
+++ b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
@@ -779,11 +779,11 @@ LLVMSymbolizer::getOrCreateModuleInfo(StringRef ModuleName) {
// - Otherwise, create a DWARFContext.
const auto GsymFile = lookUpGsymFile(BinaryName.str());
if (!GsymFile.empty()) {
- auto ReaderOrErr = gsym::GsymReader::openFile(GsymFile);
+ auto ReaderOrErr = gsym::GsymReaderV1::openFile(GsymFile);
if (ReaderOrErr) {
- std::unique_ptr<gsym::GsymReader> Reader =
- std::make_unique<gsym::GsymReader>(std::move(*ReaderOrErr));
+ std::unique_ptr<gsym::GsymReaderV1> Reader =
+ std::make_unique<gsym::GsymReaderV1>(std::move(*ReaderOrErr));
Context = std::make_unique<gsym::GsymContext>(std::move(Reader));
}
diff --git a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
index 4c08b57fb2f2b..b05c651696bce 100644
--- a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
+++ b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
@@ -527,7 +527,7 @@ static llvm::Error convertFileToGSYM(OutputAggregator &Out) {
return Error::success();
}
-static void doLookup(GsymReader &Gsym, uint64_t Addr, raw_ostream &OS) {
+static void doLookup(GsymReaderV1 &Gsym, uint64_t Addr, raw_ostream &OS) {
if (UseMergedFunctions) {
if (auto Results = Gsym.lookupAll(Addr)) {
// If we have filters, count matching results first
@@ -661,7 +661,7 @@ int llvm_gsymutil_main(int argc, char **argv, const llvm::ToolContext &) {
std::string InputLine;
std::string CurrentGSYMPath;
- std::optional<Expected<GsymReader>> CurrentGsym;
+ std::optional<Expected<GsymReaderV1>> CurrentGsym;
while (std::getline(std::cin, InputLine)) {
// Strip newline characters.
@@ -674,7 +674,7 @@ int llvm_gsymutil_main(int argc, char **argv, const llvm::ToolContext &) {
llvm::StringRef{StrippedInputLine}.split(' ');
if (GSYMPath != CurrentGSYMPath) {
- CurrentGsym = GsymReader::openFile(GSYMPath);
+ CurrentGsym = GsymReaderV1::openFile(GSYMPath);
if (!*CurrentGsym)
error(GSYMPath, CurrentGsym->takeError());
CurrentGSYMPath = GSYMPath;
@@ -698,7 +698,7 @@ int llvm_gsymutil_main(int argc, char **argv, const llvm::ToolContext &) {
// Dump or access data inside GSYM files
for (const auto &GSYMPath : InputFilenames) {
- auto Gsym = GsymReader::openFile(GSYMPath);
+ auto Gsym = GsymReaderV1::openFile(GSYMPath);
if (!Gsym)
error(GSYMPath, Gsym.takeError());
diff --git a/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp b/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp
index d56007371b2f2..f6e565cf53462 100644
--- a/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp
+++ b/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp
@@ -1003,9 +1003,9 @@ TEST(GSYMTest, TestGsymCreatorEncodeErrors) {
"attempted to encode invalid InlineInfo object");
}
-static void Compare(const GsymCreator &GC, const GsymReader &GR) {
+static void Compare(const GsymCreator &GC, const GsymReaderV1 &GR) {
// Verify that all of the data in a GsymCreator is correctly decoded from
- // a GsymReader. To do this, we iterator over
+ // a GsymReaderV1. To do this, we iterator over
GC.forEachFunctionInfo([&](const FunctionInfo &FI) -> bool {
auto DecodedFI = GR.getFunctionInfo(FI.Range.start());
EXPECT_TRUE(bool(DecodedFI));
@@ -1023,7 +1023,7 @@ static void TestEncodeDecode(const GsymCreator &GC, llvm::endianness ByteOrder,
FileWriter FW(OutStrm, ByteOrder);
llvm::Error Err = GC.encode(FW);
ASSERT_FALSE((bool)Err);
- Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
+ Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
ASSERT_TRUE(bool(GR));
const Header &Hdr = GR->getHeader();
EXPECT_EQ(Hdr.Version, Version);
@@ -1127,21 +1127,21 @@ TEST(GSYMTest, TestGsymCreator8ByteAddrOffsets) {
ArrayRef<uint8_t>(UUID));
}
-static void VerifyFunctionInfo(const GsymReader &GR, uint64_t Addr,
+static void VerifyFunctionInfo(const GsymReaderV1 &GR, uint64_t Addr,
const FunctionInfo &FI) {
auto ExpFI = GR.getFunctionInfo(Addr);
ASSERT_THAT_EXPECTED(ExpFI, Succeeded());
ASSERT_EQ(FI, ExpFI.get());
}
-static void VerifyFunctionInfoError(const GsymReader &GR, uint64_t Addr,
+static void VerifyFunctionInfoError(const GsymReaderV1 &GR, uint64_t Addr,
std::string ErrMessage) {
auto ExpFI = GR.getFunctionInfo(Addr);
ASSERT_FALSE(bool(ExpFI));
checkError(ErrMessage, ExpFI.takeError());
}
-TEST(GSYMTest, TestGsymReader) {
+TEST(GSYMTest, TestGsymReaderV1) {
uint8_t UUID[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
GsymCreator GC;
GC.setUUID(UUID);
@@ -1162,8 +1162,8 @@ TEST(GSYMTest, TestGsymReader) {
FileWriter FW(OutStrm, ByteOrder);
llvm::Error Err = GC.encode(FW);
ASSERT_FALSE((bool)Err);
- if (auto ExpectedGR = GsymReader::copyBuffer(OutStrm.str())) {
- const GsymReader &GR = ExpectedGR.get();
+ if (auto ExpectedGR = GsymReaderV1::copyBuffer(OutStrm.str())) {
+ const GsymReaderV1 &GR = ExpectedGR.get();
VerifyFunctionInfoError(GR, Func1Addr-1, "address 0xfff is not in GSYM");
FunctionInfo Func1(Func1Addr, FuncSize, Func1Name);
@@ -1228,7 +1228,7 @@ TEST(GSYMTest, TestGsymLookups) {
FileWriter FW(OutStrm, ByteOrder);
llvm::Error Err = GC.encode(FW);
ASSERT_FALSE((bool)Err);
- Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
+ Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
ASSERT_TRUE(bool(GR));
// Verify inline info is correct when doing lookups.
@@ -1348,7 +1348,7 @@ TEST(GSYMTest, TestDWARFFunctionWithAddresses) {
const auto ByteOrder = llvm::endianness::native;
FileWriter FW(OutStrm, ByteOrder);
ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
- Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
+ Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
ASSERT_THAT_EXPECTED(GR, Succeeded());
// There should only be one function in our GSYM.
EXPECT_EQ(GR->getNumAddresses(), 1u);
@@ -1426,7 +1426,7 @@ TEST(GSYMTest, TestDWARFFunctionWithAddressAndOffset) {
const auto ByteOrder = llvm::endianness::native;
FileWriter FW(OutStrm, ByteOrder);
ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
- Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
+ Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
ASSERT_THAT_EXPECTED(GR, Succeeded());
// There should only be one function in our GSYM.
EXPECT_EQ(GR->getNumAddresses(), 1u);
@@ -1534,7 +1534,7 @@ TEST(GSYMTest, TestDWARFStructMethodNoMangled) {
const auto ByteOrder = llvm::endianness::native;
FileWriter FW(OutStrm, ByteOrder);
ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
- Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
+ Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
ASSERT_THAT_EXPECTED(GR, Succeeded());
// There should only be one function in our GSYM.
EXPECT_EQ(GR->getNumAddresses(), 1u);
@@ -1640,7 +1640,7 @@ TEST(GSYMTest, TestDWARFTextRanges) {
const auto ByteOrder = llvm::endianness::native;
FileWriter FW(OutStrm, ByteOrder);
ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
- Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
+ Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
ASSERT_THAT_EXPECTED(GR, Succeeded());
// There should only be one function in our GSYM.
EXPECT_EQ(GR->getNumAddresses(), 1u);
@@ -1669,7 +1669,7 @@ TEST(GSYMTest, TestEmptySymbolEndAddressOfTextRanges) {
const auto ByteOrder = llvm::endianness::native;
FileWriter FW(OutStrm, ByteOrder);
ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
- Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
+ Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
ASSERT_THAT_EXPECTED(GR, Succeeded());
// There should only be one function in our GSYM.
EXPECT_EQ(GR->getNumAddresses(), 1u);
@@ -1839,7 +1839,7 @@ TEST(GSYMTest, TestDWARFInlineInfo) {
const auto ByteOrder = llvm::endianness::native;
FileWriter FW(OutStrm, ByteOrder);
ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
- Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
+ Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
ASSERT_THAT_EXPECTED(GR, Succeeded());
// There should only be one function in our GSYM.
EXPECT_EQ(GR->getNumAddresses(), 1u);
@@ -2100,7 +2100,7 @@ TEST(GSYMTest, TestDWARFNoLines) {
const auto ByteOrder = llvm::endianness::native;
FileWriter FW(OutStrm, ByteOrder);
ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
- Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
+ Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
ASSERT_THAT_EXPECTED(GR, Succeeded());
EXPECT_EQ(GR->getNumAddresses(), 4u);
@@ -2280,7 +2280,7 @@ TEST(GSYMTest, TestDWARFDeadStripAddr4) {
const auto ByteOrder = llvm::endianness::native;
FileWriter FW(OutStrm, ByteOrder);
ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
- Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
+ Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
ASSERT_THAT_EXPECTED(GR, Succeeded());
// Test that the only function that made it was the "main" function.
@@ -2421,7 +2421,7 @@ TEST(GSYMTest, TestDWARFDeadStripAddr8) {
const auto ByteOrder = llvm::endianness::native;
FileWriter FW(OutStrm, ByteOrder);
ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
- Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
+ Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
ASSERT_THAT_EXPECTED(GR, Succeeded());
// Test that the only function that made it was the "main" function.
@@ -2500,8 +2500,8 @@ static void AddFunctionInfo(GsymCreator &GC, const char *FuncName,
}
// Finalize a GsymCreator, encode it and decode it and return the error or
-// GsymReader that was successfully decoded.
-static Expected<GsymReader> FinalizeEncodeAndDecode(GsymCreator &GC) {
+// GsymReaderV1 that was successfully decoded.
+static Expected<GsymReaderV1> FinalizeEncodeAndDecode(GsymCreator &GC) {
OutputAggregator Null(nullptr);
Error FinalizeErr = GC.finalize(Null);
if (FinalizeErr)
@@ -2513,7 +2513,7 @@ static Expected<GsymReader> FinalizeEncodeAndDecode(GsymCreator &GC) {
llvm::Error Err = GC.encode(FW);
if (Err)
return std::move(Err);
- return GsymReader::copyBuffer(OutStrm.str());
+ return GsymReaderV1::copyBuffer(OutStrm.str());
}
TEST(GSYMTest, TestGsymSegmenting) {
@@ -2528,15 +2528,15 @@ TEST(GSYMTest, TestGsymSegmenting) {
AddFunctionInfo(GC, "foo", 0x2000, "/tmp/foo.c", "/tmp/foo.h");
AddFunctionInfo(GC, "bar", 0x3000, "/tmp/bar.c", "/tmp/bar.h");
AddFunctionInfo(GC, "baz", 0x4000, "/tmp/baz.c", "/tmp/baz.h");
- Expected<GsymReader> GR = FinalizeEncodeAndDecode(GC);
+ Expected<GsymReaderV1> GR = FinalizeEncodeAndDecode(GC);
ASSERT_THAT_EXPECTED(GR, Succeeded());
//GR->dump(outs());
// Create segmented GSYM files where each file contains 1 function. We will
// then test doing lookups on the "GR", or the full GSYM file and then test
- // doing lookups on the GsymReader objects for each segment to ensure we get
+ // doing lookups on the GsymReaderV1 objects for each segment to ensure we get
// the exact same information. So after all of the code below we will have
- // GsymReader objects that each contain one function. We name the creators
+ // GsymReaderV1 objects that each contain one function. We name the creators
// and readers to match the one and only address they contain.
// GC1000 and GR1000 are for [0x1000-0x1030)
// GC2000 and GR2000 are for [0x2000-0x2030)
@@ -2583,21 +2583,21 @@ TEST(GSYMTest, TestGsymSegmenting) {
ASSERT_TRUE(GC3000.get() != nullptr);
ASSERT_TRUE(GC4000.get() != nullptr);
ASSERT_TRUE(GCNull.get() == nullptr);
- // Encode and decode the GsymReader for each segment and verify they succeed.
- Expected<GsymReader> GR1000 = FinalizeEncodeAndDecode(*GC1000.get());
+ // Encode and decode the GsymReaderV1 for each segment and verify they succeed.
+ Expected<GsymReaderV1> GR1000 = FinalizeEncodeAndDecode(*GC1000.get());
ASSERT_THAT_EXPECTED(GR1000, Succeeded());
- Expected<GsymReader> GR2000 = FinalizeEncodeAndDecode(*GC2000.get());
+ Expected<GsymReaderV1> GR2000 = FinalizeEncodeAndDecode(*GC2000.get());
ASSERT_THAT_EXPECTED(GR2000, Succeeded());
- Expected<GsymReader> GR3000 = FinalizeEncodeAndDecode(*GC3000.get());
+ Expected<GsymReaderV1> GR3000 = FinalizeEncodeAndDecode(*GC3000.get());
ASSERT_THAT_EXPECTED(GR3000, Succeeded());
- Expected<GsymReader> GR4000 = FinalizeEncodeAndDecode(*GC4000.get());
+ Expected<GsymReaderV1> GR4000 = FinalizeEncodeAndDecode(*GC4000.get());
ASSERT_THAT_EXPECTED(GR4000, Succeeded());
// Verify that all lookups match the range [0x1000-0x1030) when doing lookups
- // in the GsymReader that contains all functions and from the segmented
- // GsymReader in GR1000.
+ // in the GsymReaderV1 that contains all functions and from the segmented
+ // GsymReaderV1 in GR1000.
for (uint64_t Addr = 0x1000; Addr < 0x1030; ++Addr) {
- // Lookup in the main GsymReader that contains all function infos
+ // Lookup in the main GsymReaderV1 that contains all function infos
auto MainLR = GR->lookup(Addr);
ASSERT_THAT_EXPECTED(MainLR, Succeeded());
auto SegmentLR = GR1000->lookup(Addr);
@@ -2612,10 +2612,10 @@ TEST(GSYMTest, TestGsymSegmenting) {
}
// Verify that all lookups match the range [0x2000-0x2030) when doing lookups
- // in the GsymReader that contains all functions and from the segmented
- // GsymReader in GR2000.
+ // in the GsymReaderV1 that contains all functions and from the segmented
+ // GsymReaderV1 in GR2000.
for (uint64_t Addr = 0x2000; Addr < 0x2030; ++Addr) {
- // Lookup in the main GsymReader that contains all function infos
+ // Lookup in the main GsymReaderV1 that contains all function infos
auto MainLR = GR->lookup(Addr);
ASSERT_THAT_EXPECTED(MainLR, Succeeded());
auto SegmentLR = GR2000->lookup(Addr);
@@ -2631,10 +2631,10 @@ TEST(GSYMTest, TestGsymSegmenting) {
}
// Verify that all lookups match the range [0x3000-0x3030) when doing lookups
- // in the GsymReader that contains all functions and from the segmented
- // GsymReader in GR3000.
+ // in the GsymReaderV1 that contains all functions and from the segmented
+ // GsymReaderV1 in GR3000.
for (uint64_t Addr = 0x3000; Addr < 0x3030; ++Addr) {
- // Lookup in the main GsymReader that contains all function infos
+ // Lookup in the main GsymReaderV1 that contains all function infos
auto MainLR = GR->lookup(Addr);
ASSERT_THAT_EXPECTED(MainLR, Succeeded());
auto SegmentLR = GR3000->lookup(Addr);
@@ -2649,13 +2649,13 @@ TEST(GSYMTest, TestGsymSegmenting) {
}
// Verify that all lookups match the range [0x4000-0x4030) when doing lookups
- // in the GsymReader that contains all functions and from the segmented
- // GsymReader in GR4000.
+ // in the GsymReaderV1 that contains all functions and from the segmented
+ // GsymReaderV1 in GR4000.
for (uint64_t Addr = 0x4000; Addr < 0x4030; ++Addr) {
- // Lookup in the main GsymReader that contains all function infos
+ // Lookup in the main GsymReaderV1 that contains all function infos
auto MainLR = GR->lookup(Addr);
ASSERT_THAT_EXPECTED(MainLR, Succeeded());
- // Lookup in the GsymReader for that contains 0x4000
+ // Lookup in the GsymReaderV1 for that contains 0x4000
auto SegmentLR = GR4000->lookup(Addr);
ASSERT_THAT_EXPECTED(SegmentLR, Succeeded());
// Make sure the lookup results match.
@@ -2679,15 +2679,15 @@ TEST(GSYMTest, TestGsymSegmentingNoBase) {
AddFunctionInfo(GC, "foo", 0x2000, "/tmp/foo.c", "/tmp/foo.h");
AddFunctionInfo(GC, "bar", 0x3000, "/tmp/bar.c", "/tmp/bar.h");
AddFunctionInfo(GC, "baz", 0x4000, "/tmp/baz.c", "/tmp/baz.h");
- Expected<GsymReader> GR = FinalizeEncodeAndDecode(GC);
+ Expected<GsymReaderV1> GR = FinalizeEncodeAndDecode(GC);
ASSERT_THAT_EXPECTED(GR, Succeeded());
//GR->dump(outs());
// Create segmented GSYM files where each file contains 1 function. We will
// then test doing lookups on the "GR", or the full GSYM file and then test
- // doing lookups on the GsymReader objects for each segment to ensure we get
+ // doing lookups on the GsymReaderV1 objects for each segment to ensure we get
// the exact same information. So after all of the code below we will have
- // GsymReader objects that each contain one function. We name the creators
+ // GsymReaderV1 objects that each contain one function. We name the creators
// and readers to match the one and only address they contain.
// GC1000 and GR1000 are for [0x1000-0x1030)
// GC2000 and GR2000 are for [0x2000-0x2030)
@@ -2734,21 +2734,21 @@ TEST(GSYMTest, TestGsymSegmentingNoBase) {
ASSERT_TRUE(GC3000.get() != nullptr);
ASSERT_TRUE(GC4000.get() != nullptr);
ASSERT_TRUE(GCNull.get() == nullptr);
- // Encode and decode the GsymReader for each segment and verify they succeed.
- Expected<GsymReader> GR1000 = FinalizeEncodeAndDecode(*GC1000.get());
+ // Encode and decode the GsymReaderV1 for each segment and verify they succeed.
+ Expected<GsymReaderV1> GR1000 = FinalizeEncodeAndDecode(*GC1000.get());
ASSERT_THAT_EXPECTED(GR1000, Succeeded());
- Expected<GsymReader> GR2000 = FinalizeEncodeAndDecode(*GC2000.get());
+ Expected<GsymReaderV1> GR2000 = FinalizeEncodeAndDecode(*GC2000.get());
ASSERT_THAT_EXPECTED(GR2000, Succeeded());
- Expected<GsymReader> GR3000 = FinalizeEncodeAndDecode(*GC3000.get());
+ Expected<GsymReaderV1> GR3000 = FinalizeEncodeAndDecode(*GC3000.get());
ASSERT_THAT_EXPECTED(GR3000, Succeeded());
- Expected<GsymReader> GR4000 = FinalizeEncodeAndDecode(*GC4000.get());
+ Expected<GsymReaderV1> GR4000 = FinalizeEncodeAndDecode(*GC4000.get());
ASSERT_THAT_EXPECTED(GR4000, Succeeded());
// Verify that all lookups match the range [0x1000-0x1030) when doing lookups
- // in the GsymReader that contains all functions and from the segmented
- // GsymReader in GR1000.
+ // in the GsymReaderV1 that contains all functions and from the segmented
+ // GsymReaderV1 in GR1000.
for (uint64_t Addr = 0x1000; Addr < 0x1030; ++Addr) {
- // Lookup in the main GsymReader that contains all function infos
+ // Lookup in the main GsymReaderV1 that contains all function infos
auto MainLR = GR->lookup(Addr);
ASSERT_THAT_EXPECTED(MainLR, Succeeded());
auto SegmentLR = GR1000->lookup(Addr);
@@ -2763,10 +2763,10 @@ TEST(GSYMTest, TestGsymSegmentingNoBase) {
}
// Verify that all lookups match the range [0x2000-0x2030) when doing lookups
- // in the GsymReader that contains all functions and from the segmented
- // GsymReader in GR2000.
+ // in the GsymReaderV1 that contains all functions and from the segmented
+ // GsymReaderV1 in GR2000.
for (uint64_t Addr = 0x2000; Addr < 0x2030; ++Addr) {
- // Lookup in the main GsymReader that contains all function infos
+ // Lookup in the main GsymReaderV1 that contains all function infos
auto MainLR = GR->lookup(Addr);
ASSERT_THAT_EXPECTED(MainLR, Succeeded());
auto SegmentLR = GR2000->lookup(Addr);
@@ -2782,10 +2782,10 @@ TEST(GSYMTest, TestGsymSegmentingNoBase) {
}
// Verify that all lookups match the range [0x3000-0x3030) when doing lookups
- // in the GsymReader that contains all functions and from the segmented
- // GsymReader in GR3000.
+ // in the GsymReaderV1 that contains all functions and from the segmented
+ // GsymReaderV1 in GR3000.
for (uint64_t Addr = 0x3000; Addr < 0x3030; ++Addr) {
- // Lookup in the main GsymReader that contains all function infos
+ // Lookup in the main GsymReaderV1 that contains all function infos
auto MainLR = GR->lookup(Addr);
ASSERT_THAT_EXPECTED(MainLR, Succeeded());
auto SegmentLR = GR3000->lookup(Addr);
@@ -2800,13 +2800,13 @@ TEST(GSYMTest, TestGsymSegmentingNoBase) {
}
// Verify that all lookups match the range [0x4000-0x4030) when doing lookups
- // in the GsymReader that contains all functions and from the segmented
- // GsymReader in GR4000.
+ // in the GsymReaderV1 that contains all functions and from the segmented
+ // GsymReaderV1 in GR4000.
for (uint64_t Addr = 0x4000; Addr < 0x4030; ++Addr) {
- // Lookup in the main GsymReader that contains all function infos
+ // Lookup in the main GsymReaderV1 that contains all function infos
auto MainLR = GR->lookup(Addr);
ASSERT_THAT_EXPECTED(MainLR, Succeeded());
- // Lookup in the GsymReader for that contains 0x4000
+ // Lookup in the GsymReaderV1 for that contains 0x4000
auto SegmentLR = GR4000->lookup(Addr);
ASSERT_THAT_EXPECTED(SegmentLR, Succeeded());
// Make sure the lookup results match.
@@ -3062,7 +3062,7 @@ TEST(GSYMTest, TestDWARFInlineRangeScopes) {
const auto ByteOrder = llvm::endianness::native;
FileWriter FW(OutStrm, ByteOrder);
ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
- Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
+ Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
ASSERT_THAT_EXPECTED(GR, Succeeded());
// There should only be one function in our GSYM.
EXPECT_EQ(GR->getNumAddresses(), 1u);
@@ -3290,7 +3290,7 @@ TEST(GSYMTest, TestDWARFEmptyInline) {
const auto ByteOrder = llvm::endianness::native;
FileWriter FW(OutStrm, ByteOrder);
ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
- Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
+ Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
ASSERT_THAT_EXPECTED(GR, Succeeded());
// There should only be one function in our GSYM.
EXPECT_EQ(GR->getNumAddresses(), 1u);
@@ -3527,7 +3527,7 @@ TEST(GSYMTest, TestFinalizeForLineTables) {
const auto ByteOrder = llvm::endianness::native;
FileWriter FW(OutStrm, ByteOrder);
ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
- Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
+ Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
ASSERT_THAT_EXPECTED(GR, Succeeded());
// There should only be two functions in our GSYM.
EXPECT_EQ(GR->getNumAddresses(), 2u);
@@ -3807,7 +3807,7 @@ TEST(GSYMTest, TestRangeWarnings) {
const auto ByteOrder = llvm::endianness::native;
FileWriter FW(OutStrm, ByteOrder);
ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
- Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
+ Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
ASSERT_THAT_EXPECTED(GR, Succeeded());
// There should be two functions in our GSYM.
EXPECT_EQ(GR->getNumAddresses(), 2u);
@@ -4009,7 +4009,7 @@ TEST(GSYMTest, TestEmptyRangeWarnings) {
const auto ByteOrder = llvm::endianness::native;
FileWriter FW(OutStrm, ByteOrder);
ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
- Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
+ Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
ASSERT_THAT_EXPECTED(GR, Succeeded());
// There should be one function in our GSYM.
EXPECT_EQ(GR->getNumAddresses(), 1u);
@@ -4161,7 +4161,7 @@ TEST(GSYMTest, TestEmptyLinkageName) {
const auto ByteOrder = llvm::endianness::native;
FileWriter FW(OutStrm, ByteOrder);
ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
- Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
+ Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
ASSERT_THAT_EXPECTED(GR, Succeeded());
// There should be one function in our GSYM.
EXPECT_EQ(GR->getNumAddresses(), 1u);
@@ -4322,7 +4322,7 @@ TEST(GSYMTest, TestLineTablesWithEmptyRanges) {
const auto ByteOrder = llvm::endianness::native;
FileWriter FW(OutStrm, ByteOrder);
ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
- Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
+ Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
ASSERT_THAT_EXPECTED(GR, Succeeded());
// There should be one function in our GSYM.
EXPECT_EQ(GR->getNumAddresses(), 1u);
@@ -4642,7 +4642,7 @@ TEST(GSYMTest, TestHandlingOfInvalidFileIndexes) {
const auto ByteOrder = llvm::endianness::native;
FileWriter FW(OutStrm, ByteOrder);
ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
- Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
+ Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
ASSERT_THAT_EXPECTED(GR, Succeeded());
// There should be one function in our GSYM.
EXPECT_EQ(GR->getNumAddresses(), 3u);
@@ -4857,7 +4857,7 @@ TEST(GSYMTest, TestLookupsOfOverlappingAndUnequalRanges) {
const auto ByteOrder = llvm::endianness::native;
FileWriter FW(OutStrm, ByteOrder);
ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
- Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
+ Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
ASSERT_THAT_EXPECTED(GR, Succeeded());
// There should be two functions in our GSYM.
EXPECT_EQ(GR->getNumAddresses(), 2u);
>From 96eb03f90e8bd1ae174d0d89add43cf1b5825914 Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Wed, 25 Mar 2026 20:36:31 -0700
Subject: [PATCH 11/45] Add tests for reader v2
---
llvm/lib/DebugInfo/GSYM/GsymReaderV2.cpp | 355 +++++++++++------
llvm/unittests/DebugInfo/GSYM/GSYMV2Test.cpp | 386 +++++++++++++++++++
2 files changed, 621 insertions(+), 120 deletions(-)
diff --git a/llvm/lib/DebugInfo/GSYM/GsymReaderV2.cpp b/llvm/lib/DebugInfo/GSYM/GsymReaderV2.cpp
index 28452d5ddf385..a0e54f066db37 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymReaderV2.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymReaderV2.cpp
@@ -13,6 +13,7 @@
#include <stdio.h>
#include <stdlib.h>
+#include "llvm/DebugInfo/GSYM/GlobalData.h"
#include "llvm/DebugInfo/GSYM/InlineInfo.h"
#include "llvm/DebugInfo/GSYM/LineTable.h"
#include "llvm/Support/BinaryStreamReader.h"
@@ -56,145 +57,251 @@ GsymReaderV2::create(std::unique_ptr<MemoryBuffer> &MemBuffer) {
return std::move(GR);
}
+/// Helper to parse GlobalData entries and populate section offsets/sizes.
+/// Works for both native and swapped endianness paths.
+static llvm::Error
+parseGlobalDataEntries(DataExtractor &DE, uint64_t &Offset,
+ uint64_t BufSize,
+ uint64_t &AddrOffsetsOff, uint64_t &AddrOffsetsSize,
+ uint64_t &AddrInfoOffsetsOff, uint64_t &AddrInfoOffsetsSize,
+ uint64_t &StringTableOff, uint64_t &StringTableSize,
+ uint64_t &FileTableOff, uint64_t &FileTableSize,
+ uint64_t &FuncInfoOff, uint64_t &FuncInfoSize) {
+ while (Offset + 24 <= BufSize) {
+ auto Type = static_cast<GlobalInfoType>(DE.getU32(&Offset));
+ uint32_t Pad = DE.getU32(&Offset);
+ uint64_t FileOffset = DE.getU64(&Offset);
+ uint64_t FileSize = DE.getU64(&Offset);
+ (void)Pad;
+
+ if (Type == GlobalInfoType::EndOfList)
+ return Error::success();
+
+ // Validate that the section fits within the buffer.
+ if (FileOffset + FileSize > BufSize)
+ return createStringError(std::errc::invalid_argument,
+ "GlobalData section type %u extends beyond "
+ "buffer (offset=%" PRIu64 ", size=%" PRIu64
+ ", bufsize=%" PRIu64 ")",
+ static_cast<uint32_t>(Type), FileOffset,
+ FileSize, BufSize);
+
+ switch (Type) {
+ case GlobalInfoType::AddrOffsets:
+ AddrOffsetsOff = FileOffset;
+ AddrOffsetsSize = FileSize;
+ break;
+ case GlobalInfoType::AddrInfoOffsets:
+ AddrInfoOffsetsOff = FileOffset;
+ AddrInfoOffsetsSize = FileSize;
+ break;
+ case GlobalInfoType::StringTable:
+ StringTableOff = FileOffset;
+ StringTableSize = FileSize;
+ break;
+ case GlobalInfoType::FileTable:
+ FileTableOff = FileOffset;
+ FileTableSize = FileSize;
+ break;
+ case GlobalInfoType::FunctionInfo:
+ FuncInfoOff = FileOffset;
+ FuncInfoSize = FileSize;
+ break;
+ case GlobalInfoType::UUID:
+ // UUID is noted but not needed for lookups.
+ break;
+ case GlobalInfoType::EndOfList:
+ llvm_unreachable("handled above");
+ }
+ }
+ return createStringError(std::errc::invalid_argument,
+ "GlobalData array not terminated by EndOfList");
+}
+
llvm::Error
GsymReaderV2::parse() {
- BinaryStreamReader FileData(MemBuffer->getBuffer(), llvm::endianness::native);
- // Check for the magic bytes. This file format is designed to be mmap'ed
- // into a process and accessed as read only. This is done for performance
- // and efficiency for symbolicating and parsing GSYM data.
- if (FileData.readObject(Hdr))
+ const StringRef Buf = MemBuffer->getBuffer();
+ const uint64_t BufSize = Buf.size();
+
+ if (BufSize < 24)
return createStringError(std::errc::invalid_argument,
- "not enough data for a GSYM header");
+ "not enough data for a GSYM V2 header");
+ // Check magic to determine endianness.
const auto HostByteOrder = llvm::endianness::native;
- switch (Hdr->Magic) {
- case GSYM_MAGIC:
- Endian = HostByteOrder;
- break;
- case GSYM_CIGAM:
- // This is a GSYM file, but not native endianness.
- Endian = sys::IsBigEndianHost ? llvm::endianness::little
- : llvm::endianness::big;
- Swap.reset(new SwappedData);
- break;
- default:
- return createStringError(std::errc::invalid_argument,
- "not a GSYM file");
+ uint32_t Magic;
+ memcpy(&Magic, Buf.data(), 4);
+
+ switch (Magic) {
+ case GSYM_MAGIC:
+ Endian = HostByteOrder;
+ break;
+ case GSYM_CIGAM:
+ Endian = sys::IsBigEndianHost ? llvm::endianness::little
+ : llvm::endianness::big;
+ Swap.reset(new SwappedData);
+ break;
+ default:
+ return createStringError(std::errc::invalid_argument, "not a GSYM file");
}
- bool DataIsLittleEndian = HostByteOrder != llvm::endianness::little;
- // Read a correctly byte swapped header if we need to.
+ const bool IsLittleEndian = (Endian == llvm::endianness::little);
+
+ // Decode the header.
+ DataExtractor DE(Buf, IsLittleEndian, 8);
if (Swap) {
- DataExtractor Data(MemBuffer->getBuffer(), DataIsLittleEndian, 4);
- if (auto ExpectedHdr = HeaderV2::decode(Data))
- Swap->Hdr = ExpectedHdr.get();
- else
+ auto ExpectedHdr = HeaderV2::decode(DE);
+ if (!ExpectedHdr)
return ExpectedHdr.takeError();
+ Swap->Hdr = *ExpectedHdr;
Hdr = &Swap->Hdr;
+ } else {
+ // Native endianness — cast directly from the mmap'd buffer.
+ Hdr = reinterpret_cast<const HeaderV2 *>(Buf.data());
}
- // Detect errors in the header and report any that are found. If we make it
- // past this without errors, we know we have a good magic value, a supported
- // version number, verified address offset size and a valid UUID size.
if (Error Err = Hdr->checkForError())
return Err;
+ // Parse GlobalData entries to find section locations.
+ uint64_t Offset = 24; // Fixed header size.
+ uint64_t AddrOffsetsOff = 0, AddrOffsetsSize = 0;
+ uint64_t AddrInfoOffsetsOff = 0, AddrInfoOffsetsSize = 0;
+ uint64_t StringTableOff = 0, StringTableSize = 0;
+ uint64_t FileTableOff = 0, FileTableSize = 0;
+ uint64_t FuncInfoOff = 0, FuncInfoSize = 0;
+
+ if (auto Err = parseGlobalDataEntries(
+ DE, Offset, BufSize, AddrOffsetsOff, AddrOffsetsSize,
+ AddrInfoOffsetsOff, AddrInfoOffsetsSize, StringTableOff,
+ StringTableSize, FileTableOff, FileTableSize, FuncInfoOff,
+ FuncInfoSize))
+ return Err;
+
+ // Validate required sections are present.
+ if (!AddrOffsetsSize)
+ return createStringError(std::errc::invalid_argument,
+ "missing AddrOffsets section");
+ if (!AddrInfoOffsetsSize)
+ return createStringError(std::errc::invalid_argument,
+ "missing AddrInfoOffsets section");
+ if (!StringTableSize)
+ return createStringError(std::errc::invalid_argument,
+ "missing StringTable section");
+ if (!FileTableSize)
+ return createStringError(std::errc::invalid_argument,
+ "missing FileTable section");
+
+ // Validate AddrOffsets size matches header.
+ if (AddrOffsetsSize !=
+ static_cast<uint64_t>(Hdr->NumAddresses) * Hdr->AddrOffSize)
+ return createStringError(std::errc::invalid_argument,
+ "AddrOffsets section size mismatch");
+
+ // Validate AddrInfoOffsets size matches header.
+ if (AddrInfoOffsetsSize !=
+ static_cast<uint64_t>(Hdr->NumAddresses) * Hdr->AddrInfoOffSize)
+ return createStringError(std::errc::invalid_argument,
+ "AddrInfoOffsets section size mismatch");
+
if (!Swap) {
- // This is the native endianness case that is most common and optimized for
- // efficient lookups. Here we just grab pointers to the native data and
- // use ArrayRef objects to allow efficient read only access.
-
- // Read the address offsets.
- if (FileData.padToAlignment(Hdr->AddrOffSize) ||
- FileData.readArray(AddrOffsets,
- Hdr->NumAddresses * Hdr->AddrOffSize))
- return createStringError(std::errc::invalid_argument,
- "failed to read address table");
+ // Native endianness — point ArrayRefs directly into the buffer.
+ AddrOffsets = ArrayRef<uint8_t>(
+ reinterpret_cast<const uint8_t *>(Buf.data() + AddrOffsetsOff),
+ AddrOffsetsSize);
+
+ if (Hdr->AddrInfoOffSize == 4) {
+ AddrInfoOffsets = ArrayRef<uint32_t>(
+ reinterpret_cast<const uint32_t *>(Buf.data() + AddrInfoOffsetsOff),
+ Hdr->NumAddresses);
+ } else {
+ return createStringError(std::errc::not_supported,
+ "8-byte AddrInfoOffsets not yet supported");
+ }
- // Read the address info offsets.
- if (FileData.padToAlignment(4) ||
- FileData.readArray(AddrInfoOffsets, Hdr->NumAddresses))
+ // FileTable: first 4 bytes is NumFiles, then FileEntry array.
+ if (FileTableSize < 4)
return createStringError(std::errc::invalid_argument,
- "failed to read address info offsets table");
-
- // Read the file table.
- uint32_t NumFiles = 0;
- if (FileData.readInteger(NumFiles) || FileData.readArray(Files, NumFiles))
+ "FileTable section too small");
+ uint32_t NumFiles;
+ memcpy(&NumFiles, Buf.data() + FileTableOff, 4);
+ if (FileTableSize < 4 + NumFiles * sizeof(FileEntry))
return createStringError(std::errc::invalid_argument,
- "failed to read file table");
-
- // TODO: V2 reader needs to read string table from GlobalData sections.
- return createStringError(std::errc::not_supported,
- "V2 native-endian reader not yet implemented");
-} else {
- // This is the non native endianness case that is not common and not
- // optimized for lookups. Here we decode the important tables into local
- // storage and then set the ArrayRef objects to point to these swapped
- // copies of the read only data so lookups can be as efficient as possible.
- DataExtractor Data(MemBuffer->getBuffer(), DataIsLittleEndian, 4);
-
- // Read the address offsets.
- uint64_t Offset = alignTo(sizeof(HeaderV2), Hdr->AddrOffSize);
- Swap->AddrOffsets.resize(Hdr->NumAddresses * Hdr->AddrOffSize);
- switch (Hdr->AddrOffSize) {
+ "FileTable section too small for %u files",
+ NumFiles);
+ Files = ArrayRef<FileEntry>(
+ reinterpret_cast<const FileEntry *>(Buf.data() + FileTableOff + 4),
+ NumFiles);
+
+ // String table.
+ StrTab.Data = Buf.substr(StringTableOff, StringTableSize);
+ } else {
+ // Swapped endianness — decode into local storage.
+
+ // AddrOffsets.
+ uint64_t AOff = AddrOffsetsOff;
+ Swap->AddrOffsets.resize(AddrOffsetsSize);
+ switch (Hdr->AddrOffSize) {
case 1:
- if (!Data.getU8(&Offset, Swap->AddrOffsets.data(), Hdr->NumAddresses))
+ if (!DE.getU8(&AOff, Swap->AddrOffsets.data(), Hdr->NumAddresses))
return createStringError(std::errc::invalid_argument,
- "failed to read address table");
+ "failed to read address table");
break;
case 2:
- if (!Data.getU16(&Offset,
- reinterpret_cast<uint16_t *>(Swap->AddrOffsets.data()),
- Hdr->NumAddresses))
+ if (!DE.getU16(&AOff,
+ reinterpret_cast<uint16_t *>(Swap->AddrOffsets.data()),
+ Hdr->NumAddresses))
return createStringError(std::errc::invalid_argument,
- "failed to read address table");
+ "failed to read address table");
break;
case 4:
- if (!Data.getU32(&Offset,
- reinterpret_cast<uint32_t *>(Swap->AddrOffsets.data()),
- Hdr->NumAddresses))
+ if (!DE.getU32(&AOff,
+ reinterpret_cast<uint32_t *>(Swap->AddrOffsets.data()),
+ Hdr->NumAddresses))
return createStringError(std::errc::invalid_argument,
- "failed to read address table");
+ "failed to read address table");
break;
case 8:
- if (!Data.getU64(&Offset,
- reinterpret_cast<uint64_t *>(Swap->AddrOffsets.data()),
- Hdr->NumAddresses))
+ if (!DE.getU64(&AOff,
+ reinterpret_cast<uint64_t *>(Swap->AddrOffsets.data()),
+ Hdr->NumAddresses))
return createStringError(std::errc::invalid_argument,
- "failed to read address table");
+ "failed to read address table");
+ break;
}
AddrOffsets = ArrayRef<uint8_t>(Swap->AddrOffsets);
- // Read the address info offsets.
- Offset = alignTo(Offset, 4);
- Swap->AddrInfoOffsets.resize(Hdr->NumAddresses);
- if (Data.getU32(&Offset, Swap->AddrInfoOffsets.data(), Hdr->NumAddresses))
+ // AddrInfoOffsets.
+ if (Hdr->AddrInfoOffSize == 4) {
+ uint64_t AIOff = AddrInfoOffsetsOff;
+ Swap->AddrInfoOffsets.resize(Hdr->NumAddresses);
+ if (!DE.getU32(&AIOff, Swap->AddrInfoOffsets.data(), Hdr->NumAddresses))
+ return createStringError(std::errc::invalid_argument,
+ "failed to read address info offsets");
AddrInfoOffsets = ArrayRef<uint32_t>(Swap->AddrInfoOffsets);
- else
- return createStringError(std::errc::invalid_argument,
- "failed to read address table");
- // Read the file table.
- const uint32_t NumFiles = Data.getU32(&Offset);
+ } else {
+ return createStringError(std::errc::not_supported,
+ "8-byte AddrInfoOffsets not yet supported");
+ }
+
+ // FileTable.
+ uint64_t FTOff = FileTableOff;
+ uint32_t NumFiles = DE.getU32(&FTOff);
if (NumFiles > 0) {
Swap->Files.resize(NumFiles);
- if (Data.getU32(&Offset, &Swap->Files[0].Dir, NumFiles*2))
- Files = ArrayRef<FileEntry>(Swap->Files);
- else
+ if (!DE.getU32(&FTOff, &Swap->Files[0].Dir, NumFiles * 2))
return createStringError(std::errc::invalid_argument,
"failed to read file table");
+ Files = ArrayRef<FileEntry>(Swap->Files);
}
- // TODO: V2 reader needs to read string table from GlobalData sections.
- return createStringError(std::errc::not_supported,
- "V2 swapped-endian reader not yet implemented");
+
+ // String table — raw bytes, no swapping needed.
+ StrTab.Data = Buf.substr(StringTableOff, StringTableSize);
}
return Error::success();
-
}
const HeaderV2 &GsymReaderV2::getHeader() const {
- // The only way to get a GsymReaderV2 is from GsymReaderV2::openFile(...) or
- // GsymReaderV2::copyBuffer() and the header must be valid and initialized to
- // a valid pointer value, so the assert below should not trigger.
assert(Hdr);
return *Hdr;
}
@@ -254,32 +361,20 @@ GsymReaderV2::getFunctionInfoDataForAddress(uint64_t Addr,
if (!ExpectedAddrIdx)
return ExpectedAddrIdx.takeError();
const uint64_t FirstAddrIdx = *ExpectedAddrIdx;
- // The AddrIdx is the first index of the function info entries that match
- // \a Addr. We need to iterate over all function info objects that start with
- // the same address until we find a range that contains \a Addr.
std::optional<uint64_t> FirstFuncStartAddr;
const size_t NumAddresses = getNumAddresses();
for (uint64_t AddrIdx = FirstAddrIdx; AddrIdx < NumAddresses; ++AddrIdx) {
auto ExpextedData = getFunctionInfoDataAtIndex(AddrIdx, FuncStartAddr);
- // If there was an error, return the error.
if (!ExpextedData)
return ExpextedData;
- // Remember the first function start address if it hasn't already been set.
- // If it is already valid, check to see if it matches the first function
- // start address and only continue if it matches.
if (FirstFuncStartAddr.has_value()) {
if (*FirstFuncStartAddr != FuncStartAddr)
- break; // Done with consecutive function entries with same address.
+ break;
} else {
FirstFuncStartAddr = FuncStartAddr;
}
- // Make sure the current function address ranges contains \a Addr.
- // Some symbols on Darwin don't have valid sizes, so if we run into a
- // symbol with zero size, then we have found a match for our address.
- // The first thing the encoding of a FunctionInfo object is the function
- // size.
uint64_t Offset = 0;
uint32_t FuncSize = ExpextedData->getU32(&Offset);
if (FuncSize == 0 ||
@@ -332,24 +427,47 @@ GsymReaderV2::getFunctionInfoAtIndex(uint64_t Idx) const {
llvm::Expected<LookupResult>
GsymReaderV2::lookup(uint64_t Addr,
std::optional<DataExtractor> *MergedFunctionsData) const {
- // TODO: V2 reader lookup not yet implemented — FunctionInfo::lookup expects
- // a GsymReader reference, not GsymReaderV2.
- return createStringError(std::errc::not_supported,
- "V2 reader lookup not yet implemented");
+ uint64_t FuncStartAddr = 0;
+ if (auto ExpectedData = getFunctionInfoDataForAddress(Addr, FuncStartAddr))
+ return FunctionInfo::lookup(*ExpectedData, *this, FuncStartAddr, Addr,
+ MergedFunctionsData);
+ else
+ return ExpectedData.takeError();
}
llvm::Expected<std::vector<LookupResult>>
GsymReaderV2::lookupAll(uint64_t Addr) const {
- // TODO: V2 reader lookupAll not yet implemented.
- return createStringError(std::errc::not_supported,
- "V2 reader lookupAll not yet implemented");
+ std::vector<LookupResult> Results;
+ std::optional<DataExtractor> MergedFunctionsData;
+
+ auto MainResult = lookup(Addr, &MergedFunctionsData);
+ if (!MainResult)
+ return MainResult.takeError();
+
+ Results.push_back(std::move(*MainResult));
+
+ if (MergedFunctionsData) {
+ auto ExpectedMergedFuncExtractors =
+ MergedFunctionsInfo::getFuncsDataExtractors(*MergedFunctionsData);
+ if (!ExpectedMergedFuncExtractors)
+ return ExpectedMergedFuncExtractors.takeError();
+
+ for (DataExtractor &MergedData : *ExpectedMergedFuncExtractors) {
+ if (auto FI = FunctionInfo::lookup(MergedData, *this,
+ MainResult->FuncRange.start(), Addr)) {
+ Results.push_back(std::move(*FI));
+ } else {
+ return FI.takeError();
+ }
+ }
+ }
+
+ return Results;
}
void GsymReaderV2::dump(raw_ostream &OS) {
const auto &Header = getHeader();
- // Dump the GSYM header.
OS << Header << "\n";
- // Dump the address table.
OS << "Address Table:\n";
OS << "INDEX OFFSET";
@@ -373,13 +491,11 @@ void GsymReaderV2::dump(raw_ostream &OS) {
}
OS << " (" << HEX64(*getAddress(I)) << ")\n";
}
- // Dump the address info offsets table.
OS << "\nAddress Info Offsets:\n";
OS << "INDEX Offset\n";
OS << "====== ==========\n";
for (uint32_t I = 0; I < Header.NumAddresses; ++I)
OS << format("[%4u] ", I) << HEX32(AddrInfoOffsets[I]) << "\n";
- // Dump the file table.
OS << "\nFiles:\n";
OS << "INDEX DIRECTORY BASENAME PATH\n";
OS << "====== ========== ========== ==============================\n";
@@ -501,7 +617,6 @@ void GsymReaderV2::dump(raw_ostream &OS, const InlineInfo &II, uint32_t Indent)
void GsymReaderV2::dump(raw_ostream &OS, std::optional<FileEntry> FE) {
if (FE) {
- // IF we have the file from index 0, then don't print anything
if (FE->Dir == 0 && FE->Base == 0)
return;
StringRef Dir = getString(FE->Dir);
diff --git a/llvm/unittests/DebugInfo/GSYM/GSYMV2Test.cpp b/llvm/unittests/DebugInfo/GSYM/GSYMV2Test.cpp
index 13ab384e6a53d..1d4a3f216fe32 100644
--- a/llvm/unittests/DebugInfo/GSYM/GSYMV2Test.cpp
+++ b/llvm/unittests/DebugInfo/GSYM/GSYMV2Test.cpp
@@ -11,6 +11,7 @@
#include "llvm/DebugInfo/GSYM/FunctionInfo.h"
#include "llvm/DebugInfo/GSYM/GlobalData.h"
#include "llvm/DebugInfo/GSYM/GsymCreatorV2.h"
+#include "llvm/DebugInfo/GSYM/GsymReaderV2.h"
#include "llvm/DebugInfo/GSYM/HeaderV2.h"
#include "llvm/DebugInfo/GSYM/OutputAggregator.h"
#include "llvm/Support/DataExtractor.h"
@@ -370,3 +371,388 @@ TEST(GSYMV2Test, TestUUIDSection) {
EXPECT_EQ(UUIDData, StringRef(reinterpret_cast<const char *>(UUID),
sizeof(UUID)));
}
+
+//===----------------------------------------------------------------------===//
+// Reader V2 tests (without creator — hand-crafted binary)
+//===----------------------------------------------------------------------===//
+
+/// Helper to build a minimal valid V2 GSYM binary in native endianness.
+/// Creates one function "main" at BaseAddr with size FuncSize.
+static SmallString<512> buildMinimalV2Binary(uint64_t BaseAddr,
+ uint32_t FuncSize) {
+ SmallString<512> Str;
+ raw_svector_ostream OS(Str);
+ FileWriter FW(OS, llvm::endianness::native);
+
+ // We'll build: header (24) + GlobalData entries (6 entries * 24 = 144) +
+ // sections. Total GlobalData entries: AddrOffsets, AddrInfoOffsets,
+ // StringTable, FileTable, FunctionInfo, EndOfList = 6.
+ constexpr uint64_t HeaderSize = 24;
+ constexpr uint64_t NumGlobalEntries = 6;
+ constexpr uint64_t GlobalDataSize = NumGlobalEntries * 24;
+ constexpr uint8_t AddrOffSize = 1;
+ constexpr uint8_t AddrInfoOffSize = 4;
+ constexpr uint32_t NumAddresses = 1;
+
+ // Layout sections sequentially after header + GlobalData.
+ uint64_t CurOffset = HeaderSize + GlobalDataSize;
+
+ // AddrOffsets: 1 address * 1 byte.
+ const uint64_t AddrOffsetsOff = CurOffset;
+ const uint64_t AddrOffsetsSize = NumAddresses * AddrOffSize;
+ CurOffset += AddrOffsetsSize;
+
+ // Pad to 4-byte alignment for AddrInfoOffsets.
+ CurOffset = llvm::alignTo(CurOffset, 4);
+ const uint64_t AddrInfoOffsetsOff = CurOffset;
+ const uint64_t AddrInfoOffsetsSize = NumAddresses * AddrInfoOffSize;
+ CurOffset += AddrInfoOffsetsSize;
+
+ // FileTable: 4 bytes (NumFiles=1) + 1 FileEntry (8 bytes) = 12.
+ CurOffset = llvm::alignTo(CurOffset, 4);
+ const uint64_t FileTableOff = CurOffset;
+ const uint64_t FileTableSize = 4 + 8; // 1 file entry (dir=0, base=0).
+ CurOffset += FileTableSize;
+
+ // StringTable: "\0main\0" = 6 bytes.
+ const uint64_t StringTableOff = CurOffset;
+ const char StrTabData[] = "\0main";
+ const uint64_t StringTableSize = sizeof(StrTabData); // includes trailing \0
+ CurOffset += StringTableSize;
+
+ // FunctionInfo: encode a minimal FunctionInfo.
+ CurOffset = llvm::alignTo(CurOffset, 4);
+ const uint64_t FuncInfoOff = CurOffset;
+ // FunctionInfo encoding: uint32_t Size, uint32_t Name (strp offset).
+ // "main" is at offset 1 in the string table.
+ // Minimal FI: size (4 bytes) + name (4 bytes) = 8 bytes, no line table or
+ // inline info (InfoType::EndOfList = 0 follows).
+ // Actually FunctionInfo::encode writes: size, name, then info types.
+ // Let's pre-encode a FunctionInfo to get exact bytes.
+ SmallString<64> FIBuf;
+ {
+ raw_svector_ostream FIOS(FIBuf);
+ FileWriter FIFW(FIOS, llvm::endianness::native);
+ FunctionInfo FI(BaseAddr, FuncSize, /*Name=*/1); // "main" at strtab offset 1
+ auto OffOrErr = FI.encode(FIFW);
+ assert(OffOrErr && "FunctionInfo encode failed");
+ (void)OffOrErr;
+ }
+ const uint64_t FuncInfoSize = FIBuf.size();
+ const uint64_t AddrInfoOffset = FuncInfoOff; // Points to start of FI section.
+
+ // Write header.
+ FW.writeU32(GSYM_MAGIC); // Magic
+ FW.writeU16(GSYM_VERSION_2); // Version
+ FW.writeU16(0); // Padding
+ FW.writeU64(BaseAddr); // BaseAddress
+ FW.writeU32(NumAddresses); // NumAddresses
+ FW.writeU8(AddrOffSize); // AddrOffSize
+ FW.writeU8(AddrInfoOffSize); // AddrInfoOffSize
+ FW.writeU8(4); // StrpSize
+ FW.writeU8(0); // Padding2
+
+ // GlobalData entries.
+ auto writeGD = [&](GlobalInfoType Type, uint64_t Off, uint64_t Size) {
+ FW.writeU32(static_cast<uint32_t>(Type));
+ FW.writeU32(0); // Padding
+ FW.writeU64(Off);
+ FW.writeU64(Size);
+ };
+ writeGD(GlobalInfoType::AddrOffsets, AddrOffsetsOff, AddrOffsetsSize);
+ writeGD(GlobalInfoType::AddrInfoOffsets, AddrInfoOffsetsOff, AddrInfoOffsetsSize);
+ writeGD(GlobalInfoType::StringTable, StringTableOff, StringTableSize);
+ writeGD(GlobalInfoType::FileTable, FileTableOff, FileTableSize);
+ writeGD(GlobalInfoType::FunctionInfo, FuncInfoOff, FuncInfoSize);
+ writeGD(GlobalInfoType::EndOfList, 0, 0);
+
+ // AddrOffsets section.
+ assert(FW.tell() == AddrOffsetsOff);
+ FW.writeU8(0); // Offset from BaseAddr = 0 for first function.
+
+ // Pad to AddrInfoOffsets.
+ FW.alignTo(4);
+ assert(FW.tell() == AddrInfoOffsetsOff);
+ FW.writeU32(static_cast<uint32_t>(AddrInfoOffset));
+
+ // FileTable.
+ FW.alignTo(4);
+ assert(FW.tell() == FileTableOff);
+ FW.writeU32(1); // NumFiles = 1
+ FW.writeU32(0); // File[0].Dir = 0
+ FW.writeU32(0); // File[0].Base = 0
+
+ // StringTable.
+ assert(FW.tell() == StringTableOff);
+ FW.writeData(
+ ArrayRef<uint8_t>(reinterpret_cast<const uint8_t *>(StrTabData),
+ StringTableSize));
+
+ // FunctionInfo.
+ FW.alignTo(4);
+ assert(FW.tell() == FuncInfoOff);
+ FW.writeData(
+ ArrayRef<uint8_t>(reinterpret_cast<const uint8_t *>(FIBuf.data()),
+ FIBuf.size()));
+
+ return Str;
+}
+
+TEST(GSYMV2Test, TestReaderV2ParseHandCrafted) {
+ // Build a minimal V2 binary by hand and verify the reader can parse it.
+ auto Bytes = buildMinimalV2Binary(0x1000, 0x100);
+ auto GR = GsymReaderV2::copyBuffer(StringRef(Bytes.data(), Bytes.size()));
+ ASSERT_THAT_EXPECTED(GR, Succeeded());
+
+ const HeaderV2 &Hdr = GR->getHeader();
+ EXPECT_EQ(Hdr.Magic, GSYM_MAGIC);
+ EXPECT_EQ(Hdr.Version, GSYM_VERSION_2);
+ EXPECT_EQ(Hdr.BaseAddress, 0x1000u);
+ EXPECT_EQ(Hdr.NumAddresses, 1u);
+ EXPECT_EQ(Hdr.AddrOffSize, 1u);
+ EXPECT_EQ(Hdr.AddrInfoOffSize, 4u);
+ EXPECT_EQ(GR->getNumAddresses(), 1u);
+
+ // Verify address lookup.
+ auto Addr = GR->getAddress(0);
+ ASSERT_TRUE(Addr.has_value());
+ EXPECT_EQ(*Addr, 0x1000u);
+
+ // Verify getString.
+ EXPECT_EQ(GR->getString(1), "main");
+
+ // Verify getFile (index 0 is the empty file entry).
+ auto FE = GR->getFile(0);
+ ASSERT_TRUE(FE.has_value());
+ EXPECT_EQ(FE->Dir, 0u);
+ EXPECT_EQ(FE->Base, 0u);
+}
+
+TEST(GSYMV2Test, TestReaderV2GetFunctionInfoHandCrafted) {
+ auto Bytes = buildMinimalV2Binary(0x1000, 0x100);
+ auto GR = GsymReaderV2::copyBuffer(StringRef(Bytes.data(), Bytes.size()));
+ ASSERT_THAT_EXPECTED(GR, Succeeded());
+
+ // getFunctionInfo should decode the function at 0x1000.
+ auto FI = GR->getFunctionInfo(0x1000);
+ ASSERT_THAT_EXPECTED(FI, Succeeded());
+ EXPECT_EQ(FI->Range, AddressRange(0x1000, 0x1100));
+ EXPECT_EQ(FI->Name, 1u); // "main" at strtab offset 1
+ EXPECT_EQ(GR->getString(FI->Name), "main");
+
+ // Address within the function range should also work.
+ auto FI2 = GR->getFunctionInfo(0x1050);
+ ASSERT_THAT_EXPECTED(FI2, Succeeded());
+ EXPECT_EQ(FI2->Range, AddressRange(0x1000, 0x1100));
+
+ // Address outside range should fail.
+ auto FI3 = GR->getFunctionInfo(0x2000);
+ EXPECT_THAT_EXPECTED(FI3, Failed());
+}
+
+TEST(GSYMV2Test, TestReaderV2LookupHandCrafted) {
+ auto Bytes = buildMinimalV2Binary(0x1000, 0x100);
+ auto GR = GsymReaderV2::copyBuffer(StringRef(Bytes.data(), Bytes.size()));
+ ASSERT_THAT_EXPECTED(GR, Succeeded());
+
+ // lookup should return a LookupResult.
+ auto LR = GR->lookup(0x1000);
+ ASSERT_THAT_EXPECTED(LR, Succeeded());
+ EXPECT_EQ(LR->FuncName, "main");
+ EXPECT_EQ(LR->FuncRange, AddressRange(0x1000, 0x1100));
+
+ // lookup within range.
+ auto LR2 = GR->lookup(0x1080);
+ ASSERT_THAT_EXPECTED(LR2, Succeeded());
+ EXPECT_EQ(LR2->FuncName, "main");
+
+ // lookup outside range.
+ auto LR3 = GR->lookup(0x2000);
+ EXPECT_THAT_EXPECTED(LR3, Failed());
+}
+
+TEST(GSYMV2Test, TestReaderV2InvalidMagic) {
+ // Create a buffer with invalid magic.
+ char Buf[24] = {};
+ Buf[0] = 'X'; // Bad magic.
+ auto GR = GsymReaderV2::copyBuffer(StringRef(Buf, sizeof(Buf)));
+ EXPECT_THAT_EXPECTED(GR, Failed());
+}
+
+TEST(GSYMV2Test, TestReaderV2TooSmall) {
+ // Buffer smaller than header.
+ char Buf[10] = {};
+ auto GR = GsymReaderV2::copyBuffer(StringRef(Buf, sizeof(Buf)));
+ EXPECT_THAT_EXPECTED(GR, Failed());
+}
+
+//===----------------------------------------------------------------------===//
+// Round-trip tests: Creator V2 -> Reader V2
+//===----------------------------------------------------------------------===//
+
+/// Helper to create, finalize, encode with GsymCreatorV2, then decode with
+/// GsymReaderV2 and return the reader.
+static Expected<GsymReaderV2> createAndReadV2(GsymCreatorV2 &GC) {
+ OutputAggregator Null(nullptr);
+ if (auto Err = GC.finalize(Null))
+ return std::move(Err);
+
+ SmallString<512> Str;
+ raw_svector_ostream OutStrm(Str);
+ FileWriter FW(OutStrm, llvm::endianness::native);
+ if (auto Err = GC.encode(FW))
+ return std::move(Err);
+
+ return GsymReaderV2::copyBuffer(OutStrm.str());
+}
+
+TEST(GSYMV2Test, TestRoundTripSingleFunction) {
+ GsymCreatorV2 GC;
+ const uint32_t Name = GC.insertString("hello");
+ GC.addFunctionInfo(FunctionInfo(0x2000, 0x200, Name));
+
+ auto GR = createAndReadV2(GC);
+ ASSERT_THAT_EXPECTED(GR, Succeeded());
+
+ EXPECT_EQ(GR->getNumAddresses(), 1u);
+ EXPECT_EQ(GR->getHeader().BaseAddress, 0x2000u);
+
+ auto FI = GR->getFunctionInfo(0x2000);
+ ASSERT_THAT_EXPECTED(FI, Succeeded());
+ EXPECT_EQ(FI->Range, AddressRange(0x2000, 0x2200));
+ EXPECT_EQ(GR->getString(FI->Name), "hello");
+}
+
+TEST(GSYMV2Test, TestRoundTripMultipleFunctions) {
+ GsymCreatorV2 GC;
+ const uint32_t Name1 = GC.insertString("alpha");
+ const uint32_t Name2 = GC.insertString("beta");
+ const uint32_t Name3 = GC.insertString("gamma");
+ GC.addFunctionInfo(FunctionInfo(0x1000, 0x100, Name1));
+ GC.addFunctionInfo(FunctionInfo(0x1100, 0x100, Name2));
+ GC.addFunctionInfo(FunctionInfo(0x1200, 0x100, Name3));
+
+ auto GR = createAndReadV2(GC);
+ ASSERT_THAT_EXPECTED(GR, Succeeded());
+
+ EXPECT_EQ(GR->getNumAddresses(), 3u);
+ EXPECT_EQ(GR->getHeader().BaseAddress, 0x1000u);
+
+ // Verify each function can be looked up by address.
+ auto FI1 = GR->getFunctionInfo(0x1000);
+ ASSERT_THAT_EXPECTED(FI1, Succeeded());
+ EXPECT_EQ(GR->getString(FI1->Name), "alpha");
+ EXPECT_EQ(FI1->Range, AddressRange(0x1000, 0x1100));
+
+ auto FI2 = GR->getFunctionInfo(0x1100);
+ ASSERT_THAT_EXPECTED(FI2, Succeeded());
+ EXPECT_EQ(GR->getString(FI2->Name), "beta");
+ EXPECT_EQ(FI2->Range, AddressRange(0x1100, 0x1200));
+
+ auto FI3 = GR->getFunctionInfo(0x1200);
+ ASSERT_THAT_EXPECTED(FI3, Succeeded());
+ EXPECT_EQ(GR->getString(FI3->Name), "gamma");
+ EXPECT_EQ(FI3->Range, AddressRange(0x1200, 0x1300));
+
+ // Lookup in the middle of a function.
+ auto FI2Mid = GR->getFunctionInfo(0x1150);
+ ASSERT_THAT_EXPECTED(FI2Mid, Succeeded());
+ EXPECT_EQ(GR->getString(FI2Mid->Name), "beta");
+}
+
+TEST(GSYMV2Test, TestRoundTripLookup) {
+ GsymCreatorV2 GC;
+ const uint32_t Name1 = GC.insertString("start");
+ const uint32_t Name2 = GC.insertString("end");
+ GC.addFunctionInfo(FunctionInfo(0x5000, 0x500, Name1));
+ GC.addFunctionInfo(FunctionInfo(0x5500, 0x500, Name2));
+
+ auto GR = createAndReadV2(GC);
+ ASSERT_THAT_EXPECTED(GR, Succeeded());
+
+ // Lookup first function.
+ auto LR1 = GR->lookup(0x5000);
+ ASSERT_THAT_EXPECTED(LR1, Succeeded());
+ EXPECT_EQ(LR1->FuncName, "start");
+ EXPECT_EQ(LR1->FuncRange, AddressRange(0x5000, 0x5500));
+
+ // Lookup second function.
+ auto LR2 = GR->lookup(0x5500);
+ ASSERT_THAT_EXPECTED(LR2, Succeeded());
+ EXPECT_EQ(LR2->FuncName, "end");
+
+ // Lookup within first function.
+ auto LR3 = GR->lookup(0x5100);
+ ASSERT_THAT_EXPECTED(LR3, Succeeded());
+ EXPECT_EQ(LR3->FuncName, "start");
+
+ // Lookup outside all functions.
+ auto LR4 = GR->lookup(0x6000);
+ EXPECT_THAT_EXPECTED(LR4, Failed());
+}
+
+TEST(GSYMV2Test, TestRoundTripGetFunctionInfoAtIndex) {
+ GsymCreatorV2 GC;
+ const uint32_t Name1 = GC.insertString("func_x");
+ const uint32_t Name2 = GC.insertString("func_y");
+ GC.addFunctionInfo(FunctionInfo(0x3000, 0x100, Name1));
+ GC.addFunctionInfo(FunctionInfo(0x3100, 0x100, Name2));
+
+ auto GR = createAndReadV2(GC);
+ ASSERT_THAT_EXPECTED(GR, Succeeded());
+
+ // Access by index.
+ auto FI0 = GR->getFunctionInfoAtIndex(0);
+ ASSERT_THAT_EXPECTED(FI0, Succeeded());
+ EXPECT_EQ(GR->getString(FI0->Name), "func_x");
+
+ auto FI1 = GR->getFunctionInfoAtIndex(1);
+ ASSERT_THAT_EXPECTED(FI1, Succeeded());
+ EXPECT_EQ(GR->getString(FI1->Name), "func_y");
+
+ // Out of bounds index.
+ auto FI2 = GR->getFunctionInfoAtIndex(2);
+ EXPECT_THAT_EXPECTED(FI2, Failed());
+}
+
+TEST(GSYMV2Test, TestRoundTripAddressTable) {
+ GsymCreatorV2 GC;
+ const uint32_t N1 = GC.insertString("a");
+ const uint32_t N2 = GC.insertString("b");
+ const uint32_t N3 = GC.insertString("c");
+ GC.addFunctionInfo(FunctionInfo(0x8000, 0x10, N1));
+ GC.addFunctionInfo(FunctionInfo(0x8020, 0x10, N2));
+ GC.addFunctionInfo(FunctionInfo(0x8040, 0x10, N3));
+
+ auto GR = createAndReadV2(GC);
+ ASSERT_THAT_EXPECTED(GR, Succeeded());
+
+ // Verify addresses via getAddress.
+ EXPECT_EQ(GR->getAddress(0), std::optional<uint64_t>(0x8000u));
+ EXPECT_EQ(GR->getAddress(1), std::optional<uint64_t>(0x8020u));
+ EXPECT_EQ(GR->getAddress(2), std::optional<uint64_t>(0x8040u));
+ EXPECT_EQ(GR->getAddress(3), std::nullopt); // Out of bounds.
+}
+
+TEST(GSYMV2Test, TestRoundTripLargeAddressOffsets) {
+ // Test with address offsets that require 4-byte AddrOffSize.
+ GsymCreatorV2 GC;
+ const uint32_t N1 = GC.insertString("near");
+ const uint32_t N2 = GC.insertString("far");
+ GC.addFunctionInfo(FunctionInfo(0x1000, 0x10, N1));
+ GC.addFunctionInfo(FunctionInfo(0x1000 + 0x20000, 0x10, N2));
+
+ auto GR = createAndReadV2(GC);
+ ASSERT_THAT_EXPECTED(GR, Succeeded());
+
+ EXPECT_EQ(GR->getHeader().AddrOffSize, 4u);
+ EXPECT_EQ(GR->getNumAddresses(), 2u);
+
+ auto FI1 = GR->getFunctionInfo(0x1000);
+ ASSERT_THAT_EXPECTED(FI1, Succeeded());
+ EXPECT_EQ(GR->getString(FI1->Name), "near");
+
+ auto FI2 = GR->getFunctionInfo(0x1000 + 0x20000);
+ ASSERT_THAT_EXPECTED(FI2, Succeeded());
+ EXPECT_EQ(GR->getString(FI2->Name), "far");
+}
>From 4c413aa7cedc25c51203c9ea322d67eab07e08ce Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Wed, 25 Mar 2026 21:12:47 -0700
Subject: [PATCH 12/45] Rename Creator V2 tests
---
llvm/unittests/DebugInfo/GSYM/GSYMV2Test.cpp | 50 +++++++++-----------
1 file changed, 23 insertions(+), 27 deletions(-)
diff --git a/llvm/unittests/DebugInfo/GSYM/GSYMV2Test.cpp b/llvm/unittests/DebugInfo/GSYM/GSYMV2Test.cpp
index 1d4a3f216fe32..73558b199ba89 100644
--- a/llvm/unittests/DebugInfo/GSYM/GSYMV2Test.cpp
+++ b/llvm/unittests/DebugInfo/GSYM/GSYMV2Test.cpp
@@ -23,6 +23,12 @@
using namespace llvm;
using namespace gsym;
+//===----------------------------------------------------------------------===//
+// Creator V2 tests
+//===----------------------------------------------------------------------===//
+
+/// Helper functions
+
static void checkError(std::string ExpectedMsg, Error Err) {
ASSERT_TRUE(bool(Err));
handleAllErrors(std::move(Err), [&](const ErrorInfoBase &Actual) {
@@ -60,17 +66,15 @@ static GlobalData decodeGlobalDataEntry(StringRef Data, uint64_t &Offset,
return GD;
}
-//===----------------------------------------------------------------------===//
-// Encode error tests
-//===----------------------------------------------------------------------===//
+/// Encode error tests
-TEST(GSYMV2Test, TestEncodeErrorNoFunctions) {
+TEST(GSYMV2Test, TestCreatorV2EncodeErrorNoFunctions) {
GsymCreatorV2 GC;
auto Result = encodeV2(GC, llvm::endianness::little);
checkError("no functions to encode", Result.takeError());
}
-TEST(GSYMV2Test, TestEncodeErrorNotFinalized) {
+TEST(GSYMV2Test, TestCreatorV2EncodeErrorNotFinalized) {
GsymCreatorV2 GC;
const uint32_t Name = GC.insertString("foo");
GC.addFunctionInfo(FunctionInfo(0x1000, 0x100, Name));
@@ -79,7 +83,7 @@ TEST(GSYMV2Test, TestEncodeErrorNotFinalized) {
Result.takeError());
}
-TEST(GSYMV2Test, TestDoubleFinalize) {
+TEST(GSYMV2Test, TestCreatorV2DoubleFinalize) {
GsymCreatorV2 GC;
const uint32_t Name = GC.insertString("foo");
GC.addFunctionInfo(FunctionInfo(0x1000, 0x100, Name));
@@ -91,9 +95,7 @@ TEST(GSYMV2Test, TestDoubleFinalize) {
checkError("already finalized", std::move(Err));
}
-//===----------------------------------------------------------------------===//
-// Header and GlobalData structure tests
-//===----------------------------------------------------------------------===//
+/// Header and GlobalData structure tests
/// Encode a V2 GSYM and verify the header fields and GlobalData layout.
static void TestV2HeaderAndGlobalData(llvm::endianness ByteOrder,
@@ -209,30 +211,28 @@ static void TestV2HeaderAndGlobalData(llvm::endianness ByteOrder,
}
}
-TEST(GSYMV2Test, TestHeaderAndGlobalDataLittle) {
+TEST(GSYMV2Test, TestCreatorV2HeaderAndGlobalDataLittle) {
TestV2HeaderAndGlobalData(llvm::endianness::little, 0x1000,
/*ExpectedAddrOffSize=*/1,
/*ExpectedNumAddresses=*/2,
/*HasUUID=*/true);
}
-TEST(GSYMV2Test, TestHeaderAndGlobalDataBig) {
+TEST(GSYMV2Test, TestCreatorV2HeaderAndGlobalDataBig) {
TestV2HeaderAndGlobalData(llvm::endianness::big, 0x1000,
/*ExpectedAddrOffSize=*/1,
/*ExpectedNumAddresses=*/2,
/*HasUUID=*/true);
}
-TEST(GSYMV2Test, TestHeaderAndGlobalDataNoUUID) {
+TEST(GSYMV2Test, TestCreatorV2HeaderAndGlobalDataNoUUID) {
TestV2HeaderAndGlobalData(llvm::endianness::little, 0x1000,
/*ExpectedAddrOffSize=*/1,
/*ExpectedNumAddresses=*/2,
/*HasUUID=*/false);
}
-//===----------------------------------------------------------------------===//
-// Address offset size tests
-//===----------------------------------------------------------------------===//
+/// Address offset size tests
static void TestV2AddrOffSize(uint64_t BaseAddr, uint64_t Func2Offset,
uint8_t ExpectedAddrOffSize) {
@@ -254,27 +254,25 @@ static void TestV2AddrOffSize(uint64_t BaseAddr, uint64_t Func2Offset,
EXPECT_EQ(HdrOrErr->AddrOffSize, ExpectedAddrOffSize);
}
-TEST(GSYMV2Test, TestAddrOffSize1Byte) {
+TEST(GSYMV2Test, TestCreatorV2AddrOffSize1Byte) {
TestV2AddrOffSize(0x1000, 0x20, 1);
}
-TEST(GSYMV2Test, TestAddrOffSize2Byte) {
+TEST(GSYMV2Test, TestCreatorV2AddrOffSize2Byte) {
TestV2AddrOffSize(0x1000, 0x200, 2);
}
-TEST(GSYMV2Test, TestAddrOffSize4Byte) {
+TEST(GSYMV2Test, TestCreatorV2AddrOffSize4Byte) {
TestV2AddrOffSize(0x1000, 0x20000, 4);
}
-TEST(GSYMV2Test, TestAddrOffSize8Byte) {
+TEST(GSYMV2Test, TestCreatorV2AddrOffSize8Byte) {
TestV2AddrOffSize(0x1000, 0x100000000ULL, 8);
}
-//===----------------------------------------------------------------------===//
-// AddrInfoOffsets verification
-//===----------------------------------------------------------------------===//
+/// AddrInfoOffsets verification
-TEST(GSYMV2Test, TestAddrInfoOffsetsPointToFunctionInfo) {
+TEST(GSYMV2Test, TestCreatorV2AddrInfoOffsetsPointToFunctionInfo) {
// Verify that each AddrInfoOffset entry points to a valid location within
// the FunctionInfo section.
GsymCreatorV2 GC;
@@ -333,11 +331,9 @@ TEST(GSYMV2Test, TestAddrInfoOffsetsPointToFunctionInfo) {
}
}
-//===----------------------------------------------------------------------===//
-// UUID section verification
-//===----------------------------------------------------------------------===//
+/// UUID section verification
-TEST(GSYMV2Test, TestUUIDSection) {
+TEST(GSYMV2Test, TestCreatorV2UUIDSection) {
GsymCreatorV2 GC;
const uint32_t Name = GC.insertString("main");
GC.addFunctionInfo(FunctionInfo(0x1000, 0x100, Name));
>From 0e7a8b764fc3fa9b8970ae3f11f4a2029e56f909 Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Wed, 25 Mar 2026 21:26:28 -0700
Subject: [PATCH 13/45] Add tests for swapped endianness
---
llvm/unittests/DebugInfo/GSYM/GSYMV2Test.cpp | 105 ++++++++++++++++++-
1 file changed, 103 insertions(+), 2 deletions(-)
diff --git a/llvm/unittests/DebugInfo/GSYM/GSYMV2Test.cpp b/llvm/unittests/DebugInfo/GSYM/GSYMV2Test.cpp
index 73558b199ba89..d7f8337cd9da4 100644
--- a/llvm/unittests/DebugInfo/GSYM/GSYMV2Test.cpp
+++ b/llvm/unittests/DebugInfo/GSYM/GSYMV2Test.cpp
@@ -588,14 +588,16 @@ TEST(GSYMV2Test, TestReaderV2TooSmall) {
/// Helper to create, finalize, encode with GsymCreatorV2, then decode with
/// GsymReaderV2 and return the reader.
-static Expected<GsymReaderV2> createAndReadV2(GsymCreatorV2 &GC) {
+static Expected<GsymReaderV2>
+createAndReadV2(GsymCreatorV2 &GC,
+ llvm::endianness ByteOrder = llvm::endianness::native) {
OutputAggregator Null(nullptr);
if (auto Err = GC.finalize(Null))
return std::move(Err);
SmallString<512> Str;
raw_svector_ostream OutStrm(Str);
- FileWriter FW(OutStrm, llvm::endianness::native);
+ FileWriter FW(OutStrm, ByteOrder);
if (auto Err = GC.encode(FW))
return std::move(Err);
@@ -752,3 +754,102 @@ TEST(GSYMV2Test, TestRoundTripLargeAddressOffsets) {
ASSERT_THAT_EXPECTED(FI2, Succeeded());
EXPECT_EQ(GR->getString(FI2->Name), "far");
}
+
+/// Swapped-endianness round-trip tests
+
+/// Get the non-native byte order.
+static llvm::endianness swappedEndianness() {
+ if constexpr (llvm::endianness::native == llvm::endianness::little)
+ return llvm::endianness::big;
+ else
+ return llvm::endianness::little;
+}
+
+TEST(GSYMV2Test, TestRoundTripSwappedSingleFunction) {
+ GsymCreatorV2 GC;
+ const uint32_t Name = GC.insertString("hello");
+ GC.addFunctionInfo(FunctionInfo(0x2000, 0x200, Name));
+
+ auto GR = createAndReadV2(GC, swappedEndianness());
+ ASSERT_THAT_EXPECTED(GR, Succeeded());
+
+ EXPECT_EQ(GR->getNumAddresses(), 1u);
+ EXPECT_EQ(GR->getHeader().BaseAddress, 0x2000u);
+
+ auto FI = GR->getFunctionInfo(0x2000);
+ ASSERT_THAT_EXPECTED(FI, Succeeded());
+ EXPECT_EQ(FI->Range, AddressRange(0x2000, 0x2200));
+ EXPECT_EQ(GR->getString(FI->Name), "hello");
+}
+
+TEST(GSYMV2Test, TestRoundTripSwappedMultipleFunctions) {
+ GsymCreatorV2 GC;
+ const uint32_t Name1 = GC.insertString("alpha");
+ const uint32_t Name2 = GC.insertString("beta");
+ const uint32_t Name3 = GC.insertString("gamma");
+ GC.addFunctionInfo(FunctionInfo(0x1000, 0x100, Name1));
+ GC.addFunctionInfo(FunctionInfo(0x1100, 0x100, Name2));
+ GC.addFunctionInfo(FunctionInfo(0x1200, 0x100, Name3));
+
+ auto GR = createAndReadV2(GC, swappedEndianness());
+ ASSERT_THAT_EXPECTED(GR, Succeeded());
+
+ EXPECT_EQ(GR->getNumAddresses(), 3u);
+
+ auto FI1 = GR->getFunctionInfo(0x1000);
+ ASSERT_THAT_EXPECTED(FI1, Succeeded());
+ EXPECT_EQ(GR->getString(FI1->Name), "alpha");
+
+ auto FI2 = GR->getFunctionInfo(0x1100);
+ ASSERT_THAT_EXPECTED(FI2, Succeeded());
+ EXPECT_EQ(GR->getString(FI2->Name), "beta");
+
+ auto FI3 = GR->getFunctionInfo(0x1200);
+ ASSERT_THAT_EXPECTED(FI3, Succeeded());
+ EXPECT_EQ(GR->getString(FI3->Name), "gamma");
+}
+
+TEST(GSYMV2Test, TestRoundTripSwappedLookup) {
+ GsymCreatorV2 GC;
+ const uint32_t Name1 = GC.insertString("start");
+ const uint32_t Name2 = GC.insertString("end");
+ GC.addFunctionInfo(FunctionInfo(0x5000, 0x500, Name1));
+ GC.addFunctionInfo(FunctionInfo(0x5500, 0x500, Name2));
+
+ auto GR = createAndReadV2(GC, swappedEndianness());
+ ASSERT_THAT_EXPECTED(GR, Succeeded());
+
+ auto LR1 = GR->lookup(0x5000);
+ ASSERT_THAT_EXPECTED(LR1, Succeeded());
+ EXPECT_EQ(LR1->FuncName, "start");
+ EXPECT_EQ(LR1->FuncRange, AddressRange(0x5000, 0x5500));
+
+ auto LR2 = GR->lookup(0x5500);
+ ASSERT_THAT_EXPECTED(LR2, Succeeded());
+ EXPECT_EQ(LR2->FuncName, "end");
+
+ auto LR3 = GR->lookup(0x5100);
+ ASSERT_THAT_EXPECTED(LR3, Succeeded());
+ EXPECT_EQ(LR3->FuncName, "start");
+
+ auto LR4 = GR->lookup(0x6000);
+ EXPECT_THAT_EXPECTED(LR4, Failed());
+}
+
+TEST(GSYMV2Test, TestRoundTripSwappedAddressTable) {
+ GsymCreatorV2 GC;
+ const uint32_t N1 = GC.insertString("a");
+ const uint32_t N2 = GC.insertString("b");
+ const uint32_t N3 = GC.insertString("c");
+ GC.addFunctionInfo(FunctionInfo(0x8000, 0x10, N1));
+ GC.addFunctionInfo(FunctionInfo(0x8020, 0x10, N2));
+ GC.addFunctionInfo(FunctionInfo(0x8040, 0x10, N3));
+
+ auto GR = createAndReadV2(GC, swappedEndianness());
+ ASSERT_THAT_EXPECTED(GR, Succeeded());
+
+ EXPECT_EQ(GR->getAddress(0), std::optional<uint64_t>(0x8000u));
+ EXPECT_EQ(GR->getAddress(1), std::optional<uint64_t>(0x8020u));
+ EXPECT_EQ(GR->getAddress(2), std::optional<uint64_t>(0x8040u));
+ EXPECT_EQ(GR->getAddress(3), std::nullopt);
+}
>From fcfcdbd88b64c199258e6765f6a8438a8dbd7ade Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Wed, 25 Mar 2026 21:48:22 -0700
Subject: [PATCH 14/45] Add creator interface and rename V1 creator to
GsymCreatorV1
---
.../include/llvm/DebugInfo/GSYM/GsymCreator.h | 89 +++++++++++++-----
.../llvm/DebugInfo/GSYM/GsymCreatorV2.h | 41 ++++----
llvm/lib/DebugInfo/GSYM/GsymCreator.cpp | 66 ++++++-------
llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp | 2 +-
llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp | 94 +++++++++----------
5 files changed, 167 insertions(+), 125 deletions(-)
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h b/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h
index 679c9cc0dd2ee..a7cfb527bb0c7 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h
@@ -132,7 +132,44 @@ class OutputAggregator;
/// FunctionInfo objects. Each encoded FunctionInfo's data is pointed to by an
/// entry in the Function Info Offsets Table. For details on the exact encoding
/// of FunctionInfo objects, see "llvm/DebugInfo/GSYM/FunctionInfo.h".
+/// Abstract interface for GSYM creators (V1 and V2).
+///
+/// This interface defines the common API used by DwarfTransformer,
+/// ObjectFileTransformer, and other consumers that need to populate
+/// a GSYM file regardless of the output format version.
class GsymCreator {
+public:
+ virtual ~GsymCreator() = default;
+
+ virtual uint32_t insertString(StringRef S, bool Copy = true) = 0;
+ virtual StringRef getString(uint32_t Offset) = 0;
+ virtual uint32_t
+ insertFile(StringRef Path,
+ sys::path::Style Style = sys::path::Style::native) = 0;
+ virtual void addFunctionInfo(FunctionInfo &&FI) = 0;
+ virtual size_t getNumFunctionInfos() const = 0;
+ virtual void
+ forEachFunctionInfo(
+ std::function<bool(FunctionInfo &)> const &Callback) = 0;
+ virtual void forEachFunctionInfo(
+ std::function<bool(const FunctionInfo &)> const &Callback) const = 0;
+ virtual llvm::Error finalize(OutputAggregator &OS) = 0;
+ virtual llvm::Error
+ save(StringRef Path, llvm::endianness ByteOrder,
+ std::optional<uint64_t> SegmentSize = std::nullopt) const = 0;
+ virtual llvm::Error encode(FileWriter &O) const = 0;
+ virtual llvm::Error loadCallSitesFromYAML(StringRef YAMLFile) = 0;
+ virtual void prepareMergedFunctions(OutputAggregator &Out) = 0;
+
+ virtual void setUUID(llvm::ArrayRef<uint8_t> UUIDBytes) = 0;
+ virtual void setBaseAddress(uint64_t Addr) = 0;
+ virtual void SetValidTextRanges(AddressRanges &TextRanges) = 0;
+ virtual const std::optional<AddressRanges> GetValidTextRanges() const = 0;
+ virtual bool IsValidTextAddress(uint64_t Addr) const = 0;
+ virtual bool isQuiet() const = 0;
+};
+
+class GsymCreatorV1 : public GsymCreator {
// Private member variables require Mutex protections
mutable std::mutex Mutex;
std::vector<FunctionInfo> Funcs;
@@ -216,7 +253,7 @@ class GsymCreator {
/// \returns The number of bytes it will take to encode the function info in
/// this GsymCreator. This helps calculate the size of the current GSYM
/// segment file.
- uint64_t copyFunctionInfo(const GsymCreator &SrcGC, size_t FuncInfoIdx);
+ uint64_t copyFunctionInfo(const GsymCreatorV1 &SrcGC, size_t FuncInfoIdx);
/// Copy a string from \a SrcGC into this object.
///
@@ -228,7 +265,7 @@ class GsymCreator {
/// \param SrcGC The source gsym creator to copy from.
/// \param StrOff The string table offset from \a SrcGC to copy.
/// \returns The new string table offset of the string within this object.
- uint32_t copyString(const GsymCreator &SrcGC, uint32_t StrOff);
+ uint32_t copyString(const GsymCreatorV1 &SrcGC, uint32_t StrOff);
/// Copy a file from \a SrcGC into this object.
///
@@ -244,7 +281,7 @@ class GsymCreator {
/// file index of zero will always return zero as the zero is a reserved file
/// index that means no file.
/// \returns The new file index of the file within this object.
- uint32_t copyFile(const GsymCreator &SrcGC, uint32_t FileIdx);
+ uint32_t copyFile(const GsymCreatorV1 &SrcGC, uint32_t FileIdx);
/// Inserts a FileEntry into the file table.
///
@@ -264,7 +301,7 @@ class GsymCreator {
/// \param II The inline info that contains file indexes and string offsets
/// that come from \a SrcGC. The entries will be updated by coping any files
/// and strings over into this object.
- void fixupInlineInfo(const GsymCreator &SrcGC, InlineInfo &II);
+ void fixupInlineInfo(const GsymCreatorV1 &SrcGC, InlineInfo &II);
/// Save this GSYM file into segments that are roughly \a SegmentSize in size.
///
@@ -283,7 +320,7 @@ class GsymCreator {
llvm::Error saveSegments(StringRef Path, llvm::endianness ByteOrder,
uint64_t SegmentSize) const;
- /// Let this creator know that this is a segment of another GsymCreator.
+ /// Let this creator know that this is a segment of another GsymCreatorV1.
///
/// When we have a segment, we know that function infos will be added in
/// ascending address range order without having to be finalized. We also
@@ -293,7 +330,7 @@ class GsymCreator {
}
public:
- LLVM_ABI GsymCreator(bool Quiet = false);
+ LLVM_ABI GsymCreatorV1(bool Quiet = false);
/// Save a GSYM file to a stand alone file.
///
@@ -311,13 +348,13 @@ class GsymCreator {
/// \returns An error object that indicates success or failure of the save.
LLVM_ABI llvm::Error
save(StringRef Path, llvm::endianness ByteOrder,
- std::optional<uint64_t> SegmentSize = std::nullopt) const;
+ std::optional<uint64_t> SegmentSize = std::nullopt) const override;
/// Encode a GSYM into the file writer stream at the current position.
///
/// \param O The stream to save the binary data to
/// \returns An error object that indicates success or failure of the save.
- LLVM_ABI llvm::Error encode(FileWriter &O) const;
+ LLVM_ABI llvm::Error encode(FileWriter &O) const override;
/// Insert a string into the GSYM string table.
///
@@ -329,7 +366,7 @@ class GsymCreator {
/// the string is owned by another object that will stay around
/// long enough for the GsymCreator to save the GSYM file.
/// \returns The unique 32 bit offset into the string table.
- LLVM_ABI uint32_t insertString(StringRef S, bool Copy = true);
+ LLVM_ABI uint32_t insertString(StringRef S, bool Copy = true) override;
/// Retrieve a string from the GSYM string table given its offset.
///
@@ -339,7 +376,7 @@ class GsymCreator {
/// \param Offset The offset of the string to retrieve, previously returned by
/// insertString.
/// \returns The string at the given offset in the string table.
- LLVM_ABI StringRef getString(uint32_t Offset);
+ LLVM_ABI StringRef getString(uint32_t Offset) override;
/// Insert a file into this GSYM creator.
///
@@ -353,7 +390,8 @@ class GsymCreator {
/// \param Style The path style for the "Path" parameter.
/// \returns The unique file index for the inserted file.
LLVM_ABI uint32_t
- insertFile(StringRef Path, sys::path::Style Style = sys::path::Style::native);
+ insertFile(StringRef Path,
+ sys::path::Style Style = sys::path::Style::native) override;
/// Add a function info to this GSYM creator.
///
@@ -362,7 +400,7 @@ class GsymCreator {
/// offsets for names and other strings.
///
/// \param FI The function info object to emplace into our functions list.
- LLVM_ABI void addFunctionInfo(FunctionInfo &&FI);
+ LLVM_ABI void addFunctionInfo(FunctionInfo &&FI) override;
/// Load call site information from a YAML file.
///
@@ -371,7 +409,7 @@ class GsymCreator {
///
/// \param YAMLFile The path to the YAML file containing call site
/// information.
- LLVM_ABI llvm::Error loadCallSitesFromYAML(StringRef YAMLFile);
+ LLVM_ABI llvm::Error loadCallSitesFromYAML(StringRef YAMLFile) override;
/// Organize merged FunctionInfo's
///
@@ -380,7 +418,7 @@ class GsymCreator {
///
/// \param Out Output stream to report information about how merged
/// FunctionInfo's were handled.
- LLVM_ABI void prepareMergedFunctions(OutputAggregator &Out);
+ LLVM_ABI void prepareMergedFunctions(OutputAggregator &Out) override;
/// Finalize the data in the GSYM creator prior to saving the data out.
///
@@ -391,12 +429,12 @@ class GsymCreator {
/// function infos, and function infos that were merged or removed.
/// \returns An error object that indicates success or failure of the
/// finalize.
- LLVM_ABI llvm::Error finalize(OutputAggregator &OS);
+ LLVM_ABI llvm::Error finalize(OutputAggregator &OS) override;
/// Set the UUID value.
///
/// \param UUIDBytes The new UUID bytes.
- void setUUID(llvm::ArrayRef<uint8_t> UUIDBytes) {
+ void setUUID(llvm::ArrayRef<uint8_t> UUIDBytes) override {
UUID.assign(UUIDBytes.begin(), UUIDBytes.end());
}
@@ -405,26 +443,27 @@ class GsymCreator {
/// \param Callback A callback function that will get called with each
/// FunctionInfo. If the callback returns false, stop iterating.
LLVM_ABI void
- forEachFunctionInfo(std::function<bool(FunctionInfo &)> const &Callback);
+ forEachFunctionInfo(
+ std::function<bool(FunctionInfo &)> const &Callback) override;
/// Thread safe const iteration over all function infos.
///
/// \param Callback A callback function that will get called with each
/// FunctionInfo. If the callback returns false, stop iterating.
LLVM_ABI void forEachFunctionInfo(
- std::function<bool(const FunctionInfo &)> const &Callback) const;
+ std::function<bool(const FunctionInfo &)> const &Callback) const override;
/// Get the current number of FunctionInfo objects contained in this
/// object.
- LLVM_ABI size_t getNumFunctionInfos() const;
+ LLVM_ABI size_t getNumFunctionInfos() const override;
/// Set valid .text address ranges that all functions must be contained in.
- void SetValidTextRanges(AddressRanges &TextRanges) {
+ void SetValidTextRanges(AddressRanges &TextRanges) override {
ValidTextRanges = TextRanges;
}
/// Get the valid text ranges.
- const std::optional<AddressRanges> GetValidTextRanges() const {
+ const std::optional<AddressRanges> GetValidTextRanges() const override {
return ValidTextRanges;
}
@@ -447,7 +486,7 @@ class GsymCreator {
///
/// \returns True if the address is in the valid text ranges or if no valid
/// text ranges have been set, false otherwise.
- LLVM_ABI bool IsValidTextAddress(uint64_t Addr) const;
+ LLVM_ABI bool IsValidTextAddress(uint64_t Addr) const override;
/// Set the base address to use for the GSYM file.
///
@@ -460,12 +499,12 @@ class GsymCreator {
///
/// \param Addr The address to use as the base address of the GSYM file
/// when it is saved to disk.
- void setBaseAddress(uint64_t Addr) {
+ void setBaseAddress(uint64_t Addr) override {
BaseAddress = Addr;
}
/// Whether the transformation should be quiet, i.e. not output warnings.
- bool isQuiet() const { return Quiet; }
+ bool isQuiet() const override { return Quiet; }
/// Create a segmented GSYM creator starting with function info index
@@ -486,7 +525,7 @@ class GsymCreator {
/// \returns An expected unique pointer to a GsymCreator or an error. The
/// returned unique pointer can be NULL if there are no more functions to
/// encode.
- LLVM_ABI llvm::Expected<std::unique_ptr<GsymCreator>>
+ LLVM_ABI llvm::Expected<std::unique_ptr<GsymCreatorV1>>
createSegment(uint64_t SegmentSize, size_t &FuncIdx) const;
};
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymCreatorV2.h b/llvm/include/llvm/DebugInfo/GSYM/GsymCreatorV2.h
index 18c2d90e4bac1..9e2067e0b1c72 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GsymCreatorV2.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymCreatorV2.h
@@ -20,6 +20,7 @@
#include "llvm/ADT/StringSet.h"
#include "llvm/DebugInfo/GSYM/FileEntry.h"
#include "llvm/DebugInfo/GSYM/FunctionInfo.h"
+#include "llvm/DebugInfo/GSYM/GsymCreator.h"
#include "llvm/MC/StringTableBuilder.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
@@ -97,7 +98,7 @@ class OutputAggregator;
///
/// - UUID: Raw UUID bytes of the original executable. Only present if a UUID
/// was set. No alignment requirement.
-class GsymCreatorV2 {
+class GsymCreatorV2 : public GsymCreator {
// Private member variables require Mutex protections
mutable std::mutex Mutex;
std::vector<FunctionInfo> Funcs;
@@ -269,13 +270,13 @@ class GsymCreatorV2 {
/// \returns An error object that indicates success or failure of the save.
LLVM_ABI llvm::Error
save(StringRef Path, llvm::endianness ByteOrder,
- std::optional<uint64_t> SegmentSize = std::nullopt) const;
+ std::optional<uint64_t> SegmentSize = std::nullopt) const override;
/// Encode a GSYM into the file writer stream at the current position.
///
/// \param O The stream to save the binary data to
/// \returns An error object that indicates success or failure of the save.
- LLVM_ABI llvm::Error encode(FileWriter &O) const;
+ LLVM_ABI llvm::Error encode(FileWriter &O) const override;
/// Insert a string into the GSYM string table.
///
@@ -287,7 +288,7 @@ class GsymCreatorV2 {
/// the string is owned by another object that will stay around
/// long enough for the GsymCreatorV2 to save the GSYM file.
/// \returns The unique 32 bit offset into the string table.
- LLVM_ABI uint32_t insertString(StringRef S, bool Copy = true);
+ LLVM_ABI uint32_t insertString(StringRef S, bool Copy = true) override;
/// Retrieve a string from the GSYM string table given its offset.
///
@@ -297,7 +298,7 @@ class GsymCreatorV2 {
/// \param Offset The offset of the string to retrieve, previously returned by
/// insertString.
/// \returns The string at the given offset in the string table.
- LLVM_ABI StringRef getString(uint32_t Offset);
+ LLVM_ABI StringRef getString(uint32_t Offset) override;
/// Insert a file into this GSYM creator.
///
@@ -311,7 +312,8 @@ class GsymCreatorV2 {
/// \param Style The path style for the "Path" parameter.
/// \returns The unique file index for the inserted file.
LLVM_ABI uint32_t
- insertFile(StringRef Path, sys::path::Style Style = sys::path::Style::native);
+ insertFile(StringRef Path,
+ sys::path::Style Style = sys::path::Style::native) override;
/// Add a function info to this GSYM creator.
///
@@ -320,7 +322,7 @@ class GsymCreatorV2 {
/// offsets for names and other strings.
///
/// \param FI The function info object to emplace into our functions list.
- LLVM_ABI void addFunctionInfo(FunctionInfo &&FI);
+ LLVM_ABI void addFunctionInfo(FunctionInfo &&FI) override;
/// Load call site information from a YAML file.
///
@@ -329,7 +331,7 @@ class GsymCreatorV2 {
///
/// \param YAMLFile The path to the YAML file containing call site
/// information.
- LLVM_ABI llvm::Error loadCallSitesFromYAML(StringRef YAMLFile);
+ LLVM_ABI llvm::Error loadCallSitesFromYAML(StringRef YAMLFile) override;
/// Organize merged FunctionInfo's
///
@@ -338,7 +340,7 @@ class GsymCreatorV2 {
///
/// \param Out Output stream to report information about how merged
/// FunctionInfo's were handled.
- LLVM_ABI void prepareMergedFunctions(OutputAggregator &Out);
+ LLVM_ABI void prepareMergedFunctions(OutputAggregator &Out) override;
/// Finalize the data in the GSYM creator prior to saving the data out.
///
@@ -349,12 +351,12 @@ class GsymCreatorV2 {
/// function infos, and function infos that were merged or removed.
/// \returns An error object that indicates success or failure of the
/// finalize.
- LLVM_ABI llvm::Error finalize(OutputAggregator &OS);
+ LLVM_ABI llvm::Error finalize(OutputAggregator &OS) override;
/// Set the UUID value.
///
/// \param UUIDBytes The new UUID bytes.
- void setUUID(llvm::ArrayRef<uint8_t> UUIDBytes) {
+ void setUUID(llvm::ArrayRef<uint8_t> UUIDBytes) override {
UUID.assign(UUIDBytes.begin(), UUIDBytes.end());
}
@@ -363,26 +365,27 @@ class GsymCreatorV2 {
/// \param Callback A callback function that will get called with each
/// FunctionInfo. If the callback returns false, stop iterating.
LLVM_ABI void
- forEachFunctionInfo(std::function<bool(FunctionInfo &)> const &Callback);
+ forEachFunctionInfo(
+ std::function<bool(FunctionInfo &)> const &Callback) override;
/// Thread safe const iteration over all function infos.
///
/// \param Callback A callback function that will get called with each
/// FunctionInfo. If the callback returns false, stop iterating.
LLVM_ABI void forEachFunctionInfo(
- std::function<bool(const FunctionInfo &)> const &Callback) const;
+ std::function<bool(const FunctionInfo &)> const &Callback) const override;
/// Get the current number of FunctionInfo objects contained in this
/// object.
- LLVM_ABI size_t getNumFunctionInfos() const;
+ LLVM_ABI size_t getNumFunctionInfos() const override;
/// Set valid .text address ranges that all functions must be contained in.
- void SetValidTextRanges(AddressRanges &TextRanges) {
+ void SetValidTextRanges(AddressRanges &TextRanges) override {
ValidTextRanges = TextRanges;
}
/// Get the valid text ranges.
- const std::optional<AddressRanges> GetValidTextRanges() const {
+ const std::optional<AddressRanges> GetValidTextRanges() const override {
return ValidTextRanges;
}
@@ -405,7 +408,7 @@ class GsymCreatorV2 {
///
/// \returns True if the address is in the valid text ranges or if no valid
/// text ranges have been set, false otherwise.
- LLVM_ABI bool IsValidTextAddress(uint64_t Addr) const;
+ LLVM_ABI bool IsValidTextAddress(uint64_t Addr) const override;
/// Set the base address to use for the GSYM file.
///
@@ -418,12 +421,12 @@ class GsymCreatorV2 {
///
/// \param Addr The address to use as the base address of the GSYM file
/// when it is saved to disk.
- void setBaseAddress(uint64_t Addr) {
+ void setBaseAddress(uint64_t Addr) override {
BaseAddress = Addr;
}
/// Whether the transformation should be quiet, i.e. not output warnings.
- bool isQuiet() const { return Quiet; }
+ bool isQuiet() const override { return Quiet; }
/// Create a segmented GSYM creator starting with function info index
diff --git a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp
index f26e69fda2540..5411a0dce1d1d 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp
@@ -21,12 +21,12 @@
using namespace llvm;
using namespace gsym;
-GsymCreator::GsymCreator(bool Quiet)
+GsymCreatorV1::GsymCreatorV1(bool Quiet)
: StrTab(StringTableBuilder::ELF), Quiet(Quiet) {
insertFile(StringRef());
}
-uint32_t GsymCreator::insertFile(StringRef Path, llvm::sys::path::Style Style) {
+uint32_t GsymCreatorV1::insertFile(StringRef Path, llvm::sys::path::Style Style) {
llvm::StringRef directory = llvm::sys::path::parent_path(Path, Style);
llvm::StringRef filename = llvm::sys::path::filename(Path, Style);
// We must insert the strings first, then call the FileEntry constructor.
@@ -38,7 +38,7 @@ uint32_t GsymCreator::insertFile(StringRef Path, llvm::sys::path::Style Style) {
return insertFileEntry(FileEntry(Dir, Base));
}
-uint32_t GsymCreator::insertFileEntry(FileEntry FE) {
+uint32_t GsymCreatorV1::insertFileEntry(FileEntry FE) {
std::lock_guard<std::mutex> Guard(Mutex);
const auto NextIndex = Files.size();
// Find FE in hash map and insert if not present.
@@ -48,7 +48,7 @@ uint32_t GsymCreator::insertFileEntry(FileEntry FE) {
return R.first->second;
}
-uint32_t GsymCreator::copyFile(const GsymCreator &SrcGC, uint32_t FileIdx) {
+uint32_t GsymCreatorV1::copyFile(const GsymCreatorV1 &SrcGC, uint32_t FileIdx) {
// File index zero is reserved for a FileEntry with no directory and no
// filename. Any other file and we need to copy the strings for the directory
// and filename.
@@ -65,7 +65,7 @@ uint32_t GsymCreator::copyFile(const GsymCreator &SrcGC, uint32_t FileIdx) {
return insertFileEntry(DstFE);
}
-llvm::Error GsymCreator::save(StringRef Path, llvm::endianness ByteOrder,
+llvm::Error GsymCreatorV1::save(StringRef Path, llvm::endianness ByteOrder,
std::optional<uint64_t> SegmentSize) const {
if (SegmentSize)
return saveSegments(Path, ByteOrder, *SegmentSize);
@@ -77,7 +77,7 @@ llvm::Error GsymCreator::save(StringRef Path, llvm::endianness ByteOrder,
return encode(O);
}
-llvm::Error GsymCreator::encode(FileWriter &O) const {
+llvm::Error GsymCreatorV1::encode(FileWriter &O) const {
std::lock_guard<std::mutex> Guard(Mutex);
if (Funcs.empty())
return createStringError(std::errc::invalid_argument,
@@ -203,13 +203,13 @@ llvm::Error GsymCreator::encode(FileWriter &O) const {
return ErrorSuccess();
}
-llvm::Error GsymCreator::loadCallSitesFromYAML(StringRef YAMLFile) {
+llvm::Error GsymCreatorV1::loadCallSitesFromYAML(StringRef YAMLFile) {
// Use the loader to load call site information from the YAML file.
CallSiteInfoLoader Loader(*this, Funcs);
return Loader.loadYAML(YAMLFile);
}
-void GsymCreator::prepareMergedFunctions(OutputAggregator &Out) {
+void GsymCreatorV1::prepareMergedFunctions(OutputAggregator &Out) {
// Nothing to do if we have less than 2 functions.
if (Funcs.size() < 2)
return;
@@ -252,7 +252,7 @@ void GsymCreator::prepareMergedFunctions(OutputAggregator &Out) {
std::swap(Funcs, TopLevelFuncs);
}
-llvm::Error GsymCreator::finalize(OutputAggregator &Out) {
+llvm::Error GsymCreatorV1::finalize(OutputAggregator &Out) {
std::lock_guard<std::mutex> Guard(Mutex);
if (Finalized)
return createStringError(std::errc::invalid_argument, "already finalized");
@@ -367,14 +367,14 @@ llvm::Error GsymCreator::finalize(OutputAggregator &Out) {
return Error::success();
}
-uint32_t GsymCreator::copyString(const GsymCreator &SrcGC, uint32_t StrOff) {
+uint32_t GsymCreatorV1::copyString(const GsymCreatorV1 &SrcGC, uint32_t StrOff) {
// String offset at zero is always the empty string, no copying needed.
if (StrOff == 0)
return 0;
return StrTab.add(SrcGC.StringOffsetMap.find(StrOff)->second);
}
-uint32_t GsymCreator::insertString(StringRef S, bool Copy) {
+uint32_t GsymCreatorV1::insertString(StringRef S, bool Copy) {
if (S.empty())
return 0;
@@ -400,19 +400,19 @@ uint32_t GsymCreator::insertString(StringRef S, bool Copy) {
return StrOff;
}
-StringRef GsymCreator::getString(uint32_t Offset) {
+StringRef GsymCreatorV1::getString(uint32_t Offset) {
auto I = StringOffsetMap.find(Offset);
assert(I != StringOffsetMap.end() &&
- "GsymCreator::getString expects a valid offset as parameter.");
+ "GsymCreatorV1::getString expects a valid offset as parameter.");
return I->second.val();
}
-void GsymCreator::addFunctionInfo(FunctionInfo &&FI) {
+void GsymCreatorV1::addFunctionInfo(FunctionInfo &&FI) {
std::lock_guard<std::mutex> Guard(Mutex);
Funcs.emplace_back(std::move(FI));
}
-void GsymCreator::forEachFunctionInfo(
+void GsymCreatorV1::forEachFunctionInfo(
std::function<bool(FunctionInfo &)> const &Callback) {
std::lock_guard<std::mutex> Guard(Mutex);
for (auto &FI : Funcs) {
@@ -421,7 +421,7 @@ void GsymCreator::forEachFunctionInfo(
}
}
-void GsymCreator::forEachFunctionInfo(
+void GsymCreatorV1::forEachFunctionInfo(
std::function<bool(const FunctionInfo &)> const &Callback) const {
std::lock_guard<std::mutex> Guard(Mutex);
for (const auto &FI : Funcs) {
@@ -430,18 +430,18 @@ void GsymCreator::forEachFunctionInfo(
}
}
-size_t GsymCreator::getNumFunctionInfos() const {
+size_t GsymCreatorV1::getNumFunctionInfos() const {
std::lock_guard<std::mutex> Guard(Mutex);
return Funcs.size();
}
-bool GsymCreator::IsValidTextAddress(uint64_t Addr) const {
+bool GsymCreatorV1::IsValidTextAddress(uint64_t Addr) const {
if (ValidTextRanges)
return ValidTextRanges->contains(Addr);
return true; // No valid text ranges has been set, so accept all ranges.
}
-std::optional<uint64_t> GsymCreator::getFirstFunctionAddress() const {
+std::optional<uint64_t> GsymCreatorV1::getFirstFunctionAddress() const {
// If we have finalized then Funcs are sorted. If we are a segment then
// Funcs will be sorted as well since function infos get added from an
// already finalized GsymCreator object where its functions were sorted and
@@ -451,7 +451,7 @@ std::optional<uint64_t> GsymCreator::getFirstFunctionAddress() const {
return std::nullopt;
}
-std::optional<uint64_t> GsymCreator::getLastFunctionAddress() const {
+std::optional<uint64_t> GsymCreatorV1::getLastFunctionAddress() const {
// If we have finalized then Funcs are sorted. If we are a segment then
// Funcs will be sorted as well since function infos get added from an
// already finalized GsymCreator object where its functions were sorted and
@@ -461,13 +461,13 @@ std::optional<uint64_t> GsymCreator::getLastFunctionAddress() const {
return std::nullopt;
}
-std::optional<uint64_t> GsymCreator::getBaseAddress() const {
+std::optional<uint64_t> GsymCreatorV1::getBaseAddress() const {
if (BaseAddress)
return BaseAddress;
return getFirstFunctionAddress();
}
-uint64_t GsymCreator::getMaxAddressOffset() const {
+uint64_t GsymCreatorV1::getMaxAddressOffset() const {
switch (getAddressOffsetSize()) {
case 1: return UINT8_MAX;
case 2: return UINT16_MAX;
@@ -477,7 +477,7 @@ uint64_t GsymCreator::getMaxAddressOffset() const {
llvm_unreachable("invalid address offset");
}
-uint8_t GsymCreator::getAddressOffsetSize() const {
+uint8_t GsymCreatorV1::getAddressOffsetSize() const {
const std::optional<uint64_t> BaseAddress = getBaseAddress();
const std::optional<uint64_t> LastFuncAddr = getLastFunctionAddress();
if (BaseAddress && LastFuncAddr) {
@@ -493,7 +493,7 @@ uint8_t GsymCreator::getAddressOffsetSize() const {
return 1;
}
-uint64_t GsymCreator::calculateHeaderAndTableSize() const {
+uint64_t GsymCreatorV1::calculateHeaderAndTableSize() const {
uint64_t Size = sizeof(Header);
const size_t NumFuncs = Funcs.size();
// Add size of address offset table
@@ -511,14 +511,14 @@ uint64_t GsymCreator::calculateHeaderAndTableSize() const {
// This function takes a InlineInfo class that was copy constructed from an
// InlineInfo from the \a SrcGC and updates all members that point to strings
// and files to point to strings and files from this GsymCreator.
-void GsymCreator::fixupInlineInfo(const GsymCreator &SrcGC, InlineInfo &II) {
+void GsymCreatorV1::fixupInlineInfo(const GsymCreatorV1 &SrcGC, InlineInfo &II) {
II.Name = copyString(SrcGC, II.Name);
II.CallFile = copyFile(SrcGC, II.CallFile);
for (auto &ChildII: II.Children)
fixupInlineInfo(SrcGC, ChildII);
}
-uint64_t GsymCreator::copyFunctionInfo(const GsymCreator &SrcGC, size_t FuncIdx) {
+uint64_t GsymCreatorV1::copyFunctionInfo(const GsymCreatorV1 &SrcGC, size_t FuncIdx) {
// To copy a function info we need to copy any files and strings over into
// this GsymCreator and then copy the function info and update the string
// table offsets to match the new offsets.
@@ -552,7 +552,7 @@ uint64_t GsymCreator::copyFunctionInfo(const GsymCreator &SrcGC, size_t FuncIdx)
return Funcs.back().cacheEncoding();
}
-llvm::Error GsymCreator::saveSegments(StringRef Path,
+llvm::Error GsymCreatorV1::saveSegments(StringRef Path,
llvm::endianness ByteOrder,
uint64_t SegmentSize) const {
if (SegmentSize == 0)
@@ -562,10 +562,10 @@ llvm::Error GsymCreator::saveSegments(StringRef Path,
size_t FuncIdx = 0;
const size_t NumFuncs = Funcs.size();
while (FuncIdx < NumFuncs) {
- llvm::Expected<std::unique_ptr<GsymCreator>> ExpectedGC =
+ llvm::Expected<std::unique_ptr<GsymCreatorV1>> ExpectedGC =
createSegment(SegmentSize, FuncIdx);
if (ExpectedGC) {
- GsymCreator *GC = ExpectedGC->get();
+ GsymCreatorV1 *GC = ExpectedGC->get();
if (!GC)
break; // We had not more functions to encode.
// Don't collect any messages at all
@@ -589,13 +589,13 @@ llvm::Error GsymCreator::saveSegments(StringRef Path,
return Error::success();
}
-llvm::Expected<std::unique_ptr<GsymCreator>>
-GsymCreator::createSegment(uint64_t SegmentSize, size_t &FuncIdx) const {
+llvm::Expected<std::unique_ptr<GsymCreatorV1>>
+GsymCreatorV1::createSegment(uint64_t SegmentSize, size_t &FuncIdx) const {
// No function entries, return empty unique pointer
if (FuncIdx >= Funcs.size())
- return std::unique_ptr<GsymCreator>();
+ return std::unique_ptr<GsymCreatorV1>();
- std::unique_ptr<GsymCreator> GC(new GsymCreator(/*Quiet=*/true));
+ std::unique_ptr<GsymCreatorV1> GC(new GsymCreatorV1(/*Quiet=*/true));
// Tell the creator that this is a segment.
GC->setIsSegment();
diff --git a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
index b05c651696bce..021e111aa9e97 100644
--- a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
+++ b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
@@ -348,7 +348,7 @@ static llvm::Error handleObjectFile(ObjectFile &Obj, const std::string &OutFile,
auto ThreadCount =
NumThreads > 0 ? NumThreads : std::thread::hardware_concurrency();
- GsymCreator Gsym(Quiet);
+ GsymCreatorV1 Gsym(Quiet);
// See if we can figure out the base address for a given object file, and if
// we can, then set the base address to use to this value. This will ease
diff --git a/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp b/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp
index f6e565cf53462..bfa030dda62d8 100644
--- a/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp
+++ b/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp
@@ -941,7 +941,7 @@ TEST(GSYMTest, TestHeaderEncodeDecode) {
}
static void TestGsymCreatorEncodeError(llvm::endianness ByteOrder,
- const GsymCreator &GC,
+ const GsymCreatorV1 &GC,
std::string ExpectedErrorMsg) {
SmallString<512> Str;
raw_svector_ostream OutStrm(Str);
@@ -959,7 +959,7 @@ TEST(GSYMTest, TestGsymCreatorEncodeErrors) {
// Verify we get an error when trying to encode an GsymCreator with no
// function infos. We shouldn't be saving a GSYM file in this case since
// there is nothing inside of it.
- GsymCreator GC;
+ GsymCreatorV1 GC;
TestGsymCreatorEncodeError(llvm::endianness::little, GC,
"no functions to encode");
const uint64_t FuncAddr = 0x1000;
@@ -1003,7 +1003,7 @@ TEST(GSYMTest, TestGsymCreatorEncodeErrors) {
"attempted to encode invalid InlineInfo object");
}
-static void Compare(const GsymCreator &GC, const GsymReaderV1 &GR) {
+static void Compare(const GsymCreatorV1 &GC, const GsymReaderV1 &GR) {
// Verify that all of the data in a GsymCreator is correctly decoded from
// a GsymReaderV1. To do this, we iterator over
GC.forEachFunctionInfo([&](const FunctionInfo &FI) -> bool {
@@ -1014,7 +1014,7 @@ static void Compare(const GsymCreator &GC, const GsymReaderV1 &GR) {
});
}
-static void TestEncodeDecode(const GsymCreator &GC, llvm::endianness ByteOrder,
+static void TestEncodeDecode(const GsymCreatorV1 &GC, llvm::endianness ByteOrder,
uint16_t Version, uint8_t AddrOffSize,
uint64_t BaseAddress, uint32_t NumAddresses,
ArrayRef<uint8_t> UUID) {
@@ -1037,7 +1037,7 @@ static void TestEncodeDecode(const GsymCreator &GC, llvm::endianness ByteOrder,
TEST(GSYMTest, TestGsymCreator1ByteAddrOffsets) {
uint8_t UUID[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
- GsymCreator GC;
+ GsymCreatorV1 GC;
GC.setUUID(UUID);
constexpr uint64_t BaseAddr = 0x1000;
constexpr uint8_t AddrOffSize = 1;
@@ -1060,7 +1060,7 @@ TEST(GSYMTest, TestGsymCreator1ByteAddrOffsets) {
TEST(GSYMTest, TestGsymCreator2ByteAddrOffsets) {
uint8_t UUID[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
- GsymCreator GC;
+ GsymCreatorV1 GC;
GC.setUUID(UUID);
constexpr uint64_t BaseAddr = 0x1000;
constexpr uint8_t AddrOffSize = 2;
@@ -1083,7 +1083,7 @@ TEST(GSYMTest, TestGsymCreator2ByteAddrOffsets) {
TEST(GSYMTest, TestGsymCreator4ByteAddrOffsets) {
uint8_t UUID[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
- GsymCreator GC;
+ GsymCreatorV1 GC;
GC.setUUID(UUID);
constexpr uint64_t BaseAddr = 0x1000;
constexpr uint8_t AddrOffSize = 4;
@@ -1106,7 +1106,7 @@ TEST(GSYMTest, TestGsymCreator4ByteAddrOffsets) {
TEST(GSYMTest, TestGsymCreator8ByteAddrOffsets) {
uint8_t UUID[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
- GsymCreator GC;
+ GsymCreatorV1 GC;
GC.setUUID(UUID);
constexpr uint64_t BaseAddr = 0x1000;
constexpr uint8_t AddrOffSize = 8;
@@ -1143,7 +1143,7 @@ static void VerifyFunctionInfoError(const GsymReaderV1 &GR, uint64_t Addr,
TEST(GSYMTest, TestGsymReaderV1) {
uint8_t UUID[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
- GsymCreator GC;
+ GsymCreatorV1 GC;
GC.setUUID(UUID);
constexpr uint64_t BaseAddr = 0x1000;
constexpr uint64_t Func1Addr = BaseAddr;
@@ -1188,7 +1188,7 @@ TEST(GSYMTest, TestGsymLookups) {
// FunctionInfo or InlineInfo, they only extract information needed for the
// lookup to happen which avoids allocations which can slow down
// symbolication.
- GsymCreator GC;
+ GsymCreatorV1 GC;
FunctionInfo FI(0x1000, 0x100, GC.insertString("main"));
const auto ByteOrder = llvm::endianness::native;
FI.OptLineTable = LineTable();
@@ -1338,7 +1338,7 @@ TEST(GSYMTest, TestDWARFFunctionWithAddresses) {
ASSERT_TRUE(DwarfContext.get() != nullptr);
auto &OS = llvm::nulls();
OutputAggregator OSAgg(&OS);
- GsymCreator GC;
+ GsymCreatorV1 GC;
DwarfTransformer DT(*DwarfContext, GC);
const uint32_t ThreadCount = 1;
ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
@@ -1416,7 +1416,7 @@ TEST(GSYMTest, TestDWARFFunctionWithAddressAndOffset) {
ASSERT_TRUE(DwarfContext.get() != nullptr);
auto &OS = llvm::nulls();
OutputAggregator OSAgg(&OS);
- GsymCreator GC;
+ GsymCreatorV1 GC;
DwarfTransformer DT(*DwarfContext, GC);
const uint32_t ThreadCount = 1;
ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
@@ -1524,7 +1524,7 @@ TEST(GSYMTest, TestDWARFStructMethodNoMangled) {
ASSERT_TRUE(DwarfContext.get() != nullptr);
auto &OS = llvm::nulls();
OutputAggregator OSAgg(&OS);
- GsymCreator GC;
+ GsymCreatorV1 GC;
DwarfTransformer DT(*DwarfContext, GC);
const uint32_t ThreadCount = 1;
ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
@@ -1625,7 +1625,7 @@ TEST(GSYMTest, TestDWARFTextRanges) {
ASSERT_TRUE(DwarfContext.get() != nullptr);
auto &OS = llvm::nulls();
OutputAggregator OSAgg(&OS);
- GsymCreator GC;
+ GsymCreatorV1 GC;
DwarfTransformer DT(*DwarfContext, GC);
// Only allow addresses between [0x1000 - 0x2000) to be linked into the
// GSYM.
@@ -1657,7 +1657,7 @@ TEST(GSYMTest, TestEmptySymbolEndAddressOfTextRanges) {
// Test that if we have valid text ranges and we have a symbol with no size
// as the last FunctionInfo entry that the size of the symbol gets set to the
// end address of the text range.
- GsymCreator GC;
+ GsymCreatorV1 GC;
AddressRanges TextRanges;
TextRanges.insert(AddressRange(0x1000, 0x2000));
GC.SetValidTextRanges(TextRanges);
@@ -1829,7 +1829,7 @@ TEST(GSYMTest, TestDWARFInlineInfo) {
ASSERT_TRUE(DwarfContext.get() != nullptr);
auto &OS = llvm::nulls();
OutputAggregator OSAgg(&OS);
- GsymCreator GC;
+ GsymCreatorV1 GC;
DwarfTransformer DT(*DwarfContext, GC);
const uint32_t ThreadCount = 1;
ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
@@ -2090,7 +2090,7 @@ TEST(GSYMTest, TestDWARFNoLines) {
ASSERT_TRUE(DwarfContext.get() != nullptr);
auto &OS = llvm::nulls();
OutputAggregator OSAgg(&OS);
- GsymCreator GC;
+ GsymCreatorV1 GC;
DwarfTransformer DT(*DwarfContext, GC);
const uint32_t ThreadCount = 1;
ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
@@ -2270,7 +2270,7 @@ TEST(GSYMTest, TestDWARFDeadStripAddr4) {
ASSERT_TRUE(DwarfContext.get() != nullptr);
auto &OS = llvm::nulls();
OutputAggregator OSAgg(&OS);
- GsymCreator GC;
+ GsymCreatorV1 GC;
DwarfTransformer DT(*DwarfContext, GC);
const uint32_t ThreadCount = 1;
ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
@@ -2411,7 +2411,7 @@ TEST(GSYMTest, TestDWARFDeadStripAddr8) {
ASSERT_TRUE(DwarfContext.get() != nullptr);
auto &OS = llvm::nulls();
OutputAggregator OSAgg(&OS);
- GsymCreator GC;
+ GsymCreatorV1 GC;
DwarfTransformer DT(*DwarfContext, GC);
const uint32_t ThreadCount = 1;
ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
@@ -2438,7 +2438,7 @@ TEST(GSYMTest, TestGsymCreatorMultipleSymbolsWithNoSize) {
// instead of being combined into a single entry. This function tests to make
// sure we only get one symbol.
uint8_t UUID[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
- GsymCreator GC;
+ GsymCreatorV1 GC;
GC.setUUID(UUID);
constexpr uint64_t BaseAddr = 0x1000;
constexpr uint8_t AddrOffSize = 1;
@@ -2460,7 +2460,7 @@ TEST(GSYMTest, TestGsymCreatorMultipleSymbolsWithNoSize) {
}
// Helper function to quickly create a FunctionInfo in a GsymCreator for testing.
-static void AddFunctionInfo(GsymCreator &GC, const char *FuncName,
+static void AddFunctionInfo(GsymCreatorV1 &GC, const char *FuncName,
uint64_t FuncAddr, const char *SourcePath,
const char *HeaderPath) {
FunctionInfo FI(FuncAddr, 0x30, GC.insertString(FuncName));
@@ -2501,7 +2501,7 @@ static void AddFunctionInfo(GsymCreator &GC, const char *FuncName,
// Finalize a GsymCreator, encode it and decode it and return the error or
// GsymReaderV1 that was successfully decoded.
-static Expected<GsymReaderV1> FinalizeEncodeAndDecode(GsymCreator &GC) {
+static Expected<GsymReaderV1> FinalizeEncodeAndDecode(GsymCreatorV1 &GC) {
OutputAggregator Null(nullptr);
Error FinalizeErr = GC.finalize(Null);
if (FinalizeErr)
@@ -2522,7 +2522,7 @@ TEST(GSYMTest, TestGsymSegmenting) {
// encoding multiple segments, then we verify that we get the same information
// when doing lookups on the full GSYM that was decoded from encoding the
// entire GSYM and also by decoding information from the segments themselves.
- GsymCreator GC;
+ GsymCreatorV1 GC;
GC.setBaseAddress(0);
AddFunctionInfo(GC, "main", 0x1000, "/tmp/main.c", "/tmp/main.h");
AddFunctionInfo(GC, "foo", 0x2000, "/tmp/foo.c", "/tmp/foo.h");
@@ -2548,7 +2548,7 @@ TEST(GSYMTest, TestGsymSegmenting) {
size_t FuncIdx = 0;
// Make sure we get an error if the segment size is too small to encode a
// single function info.
- llvm::Expected<std::unique_ptr<GsymCreator>> GCError =
+ llvm::Expected<std::unique_ptr<GsymCreatorV1>> GCError =
GC.createSegment(57, FuncIdx);
ASSERT_FALSE((bool)GCError);
checkError("a segment size of 57 is to small to fit any function infos, "
@@ -2557,25 +2557,25 @@ TEST(GSYMTest, TestGsymSegmenting) {
// encode any values into the segmented GsymCreator.
ASSERT_EQ(FuncIdx, (size_t)0);
- llvm::Expected<std::unique_ptr<GsymCreator>> GC1000 =
+ llvm::Expected<std::unique_ptr<GsymCreatorV1>> GC1000 =
GC.createSegment(128, FuncIdx);
ASSERT_THAT_EXPECTED(GC1000, Succeeded());
ASSERT_EQ(FuncIdx, (size_t)1);
- llvm::Expected<std::unique_ptr<GsymCreator>> GC2000 =
+ llvm::Expected<std::unique_ptr<GsymCreatorV1>> GC2000 =
GC.createSegment(128, FuncIdx);
ASSERT_THAT_EXPECTED(GC2000, Succeeded());
ASSERT_EQ(FuncIdx, (size_t)2);
- llvm::Expected<std::unique_ptr<GsymCreator>> GC3000 =
+ llvm::Expected<std::unique_ptr<GsymCreatorV1>> GC3000 =
GC.createSegment(128, FuncIdx);
ASSERT_THAT_EXPECTED(GC3000, Succeeded());
ASSERT_EQ(FuncIdx, (size_t)3);
- llvm::Expected<std::unique_ptr<GsymCreator>> GC4000 =
+ llvm::Expected<std::unique_ptr<GsymCreatorV1>> GC4000 =
GC.createSegment(128, FuncIdx);
ASSERT_THAT_EXPECTED(GC4000, Succeeded());
ASSERT_EQ(FuncIdx, (size_t)4);
// When there are no function infos left to encode we expect to get no error
// and get a NULL GsymCreator in the return value from createSegment.
- llvm::Expected<std::unique_ptr<GsymCreator>> GCNull =
+ llvm::Expected<std::unique_ptr<GsymCreatorV1>> GCNull =
GC.createSegment(128, FuncIdx);
ASSERT_THAT_EXPECTED(GCNull, Succeeded());
ASSERT_TRUE(GC1000.get() != nullptr);
@@ -2674,7 +2674,7 @@ TEST(GSYMTest, TestGsymSegmentingNoBase) {
// encoding multiple segments, then we verify that we get the same information
// when doing lookups on the full GSYM that was decoded from encoding the
// entire GSYM and also by decoding information from the segments themselves.
- GsymCreator GC;
+ GsymCreatorV1 GC;
AddFunctionInfo(GC, "main", 0x1000, "/tmp/main.c", "/tmp/main.h");
AddFunctionInfo(GC, "foo", 0x2000, "/tmp/foo.c", "/tmp/foo.h");
AddFunctionInfo(GC, "bar", 0x3000, "/tmp/bar.c", "/tmp/bar.h");
@@ -2699,7 +2699,7 @@ TEST(GSYMTest, TestGsymSegmentingNoBase) {
size_t FuncIdx = 0;
// Make sure we get an error if the segment size is too small to encode a
// single function info.
- llvm::Expected<std::unique_ptr<GsymCreator>> GCError =
+ llvm::Expected<std::unique_ptr<GsymCreatorV1>> GCError =
GC.createSegment(57, FuncIdx);
ASSERT_FALSE((bool)GCError);
checkError("a segment size of 57 is to small to fit any function infos, "
@@ -2708,25 +2708,25 @@ TEST(GSYMTest, TestGsymSegmentingNoBase) {
// encode any values into the segmented GsymCreator.
ASSERT_EQ(FuncIdx, (size_t)0);
- llvm::Expected<std::unique_ptr<GsymCreator>> GC1000 =
+ llvm::Expected<std::unique_ptr<GsymCreatorV1>> GC1000 =
GC.createSegment(128, FuncIdx);
ASSERT_THAT_EXPECTED(GC1000, Succeeded());
ASSERT_EQ(FuncIdx, (size_t)1);
- llvm::Expected<std::unique_ptr<GsymCreator>> GC2000 =
+ llvm::Expected<std::unique_ptr<GsymCreatorV1>> GC2000 =
GC.createSegment(128, FuncIdx);
ASSERT_THAT_EXPECTED(GC2000, Succeeded());
ASSERT_EQ(FuncIdx, (size_t)2);
- llvm::Expected<std::unique_ptr<GsymCreator>> GC3000 =
+ llvm::Expected<std::unique_ptr<GsymCreatorV1>> GC3000 =
GC.createSegment(128, FuncIdx);
ASSERT_THAT_EXPECTED(GC3000, Succeeded());
ASSERT_EQ(FuncIdx, (size_t)3);
- llvm::Expected<std::unique_ptr<GsymCreator>> GC4000 =
+ llvm::Expected<std::unique_ptr<GsymCreatorV1>> GC4000 =
GC.createSegment(128, FuncIdx);
ASSERT_THAT_EXPECTED(GC4000, Succeeded());
ASSERT_EQ(FuncIdx, (size_t)4);
// When there are no function infos left to encode we expect to get no error
// and get a NULL GsymCreator in the return value from createSegment.
- llvm::Expected<std::unique_ptr<GsymCreator>> GCNull =
+ llvm::Expected<std::unique_ptr<GsymCreatorV1>> GCNull =
GC.createSegment(128, FuncIdx);
ASSERT_THAT_EXPECTED(GCNull, Succeeded());
ASSERT_TRUE(GC1000.get() != nullptr);
@@ -3052,7 +3052,7 @@ TEST(GSYMTest, TestDWARFInlineRangeScopes) {
std::string errors;
raw_string_ostream OS(errors);
OutputAggregator OSAgg(&OS);
- GsymCreator GC;
+ GsymCreatorV1 GC;
DwarfTransformer DT(*DwarfContext, GC);
const uint32_t ThreadCount = 1;
ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
@@ -3280,7 +3280,7 @@ TEST(GSYMTest, TestDWARFEmptyInline) {
std::string errors;
raw_string_ostream OS(errors);
OutputAggregator OSAgg(&OS);
- GsymCreator GC;
+ GsymCreatorV1 GC;
DwarfTransformer DT(*DwarfContext, GC);
const uint32_t ThreadCount = 1;
ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
@@ -3517,7 +3517,7 @@ TEST(GSYMTest, TestFinalizeForLineTables) {
std::string errors;
raw_string_ostream OS(errors);
OutputAggregator OSAgg(&OS);
- GsymCreator GC;
+ GsymCreatorV1 GC;
DwarfTransformer DT(*DwarfContext, GC);
const uint32_t ThreadCount = 1;
ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
@@ -3797,7 +3797,7 @@ TEST(GSYMTest, TestRangeWarnings) {
std::string errors;
raw_string_ostream OS(errors);
OutputAggregator OSAgg(&OS);
- GsymCreator GC;
+ GsymCreatorV1 GC;
DwarfTransformer DT(*DwarfContext, GC);
const uint32_t ThreadCount = 1;
ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
@@ -3999,7 +3999,7 @@ TEST(GSYMTest, TestEmptyRangeWarnings) {
std::string errors;
raw_string_ostream OS(errors);
OutputAggregator OSAgg(&OS);
- GsymCreator GC;
+ GsymCreatorV1 GC;
DwarfTransformer DT(*DwarfContext, GC);
const uint32_t ThreadCount = 1;
ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
@@ -4151,7 +4151,7 @@ TEST(GSYMTest, TestEmptyLinkageName) {
std::string errors;
raw_string_ostream OS(errors);
OutputAggregator OSAgg(&OS);
- GsymCreator GC;
+ GsymCreatorV1 GC;
DwarfTransformer DT(*DwarfContext, GC);
const uint32_t ThreadCount = 1;
ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
@@ -4312,7 +4312,7 @@ TEST(GSYMTest, TestLineTablesWithEmptyRanges) {
std::string errors;
raw_string_ostream OS(errors);
OutputAggregator OSAgg(&OS);
- GsymCreator GC;
+ GsymCreatorV1 GC;
DwarfTransformer DT(*DwarfContext, GC);
const uint32_t ThreadCount = 1;
ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
@@ -4632,7 +4632,7 @@ TEST(GSYMTest, TestHandlingOfInvalidFileIndexes) {
std::string errors;
raw_string_ostream OS(errors);
OutputAggregator OSAgg(&OS);
- GsymCreator GC;
+ GsymCreatorV1 GC;
DwarfTransformer DT(*DwarfContext, GC);
const uint32_t ThreadCount = 1;
ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
@@ -4847,7 +4847,7 @@ TEST(GSYMTest, TestLookupsOfOverlappingAndUnequalRanges) {
std::string errors;
raw_string_ostream OS(errors);
OutputAggregator OSAgg(&OS);
- GsymCreator GC;
+ GsymCreatorV1 GC;
DwarfTransformer DT(*DwarfContext, GC);
const uint32_t ThreadCount = 1;
ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
@@ -4947,7 +4947,7 @@ TEST(GSYMTest, TestUnableToLocateDWO) {
std::string errors;
raw_string_ostream OS(errors);
OutputAggregator OSAgg(&OS);
- GsymCreator GC;
+ GsymCreatorV1 GC;
// Make a DWARF transformer that is MachO (Apple) to avoid warnings about
// not finding DWO files.
DwarfTransformer DT(*DwarfContext, GC, /*LDCS=*/false, /*MachO*/ true);
@@ -5074,7 +5074,7 @@ TEST(GSYMTest, TestDWARFTransformNoErrorForMissingFileDecl) {
std::string errors;
raw_string_ostream OS(errors);
OutputAggregator OSAgg(&OS);
- GsymCreator GC;
+ GsymCreatorV1 GC;
DwarfTransformer DT(*DwarfContext, GC);
const uint32_t ThreadCount = 1;
ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
>From 481fd7d315cb095b6d019ff4a79a221cbe9154a5 Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Wed, 25 Mar 2026 22:01:59 -0700
Subject: [PATCH 15/45] Extend reader interface
---
llvm/include/llvm/DebugInfo/GSYM/GsymReader.h | 87 ++++++++++++++-----
.../llvm/DebugInfo/GSYM/GsymReaderV2.h | 30 ++++---
2 files changed, 82 insertions(+), 35 deletions(-)
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
index 8709c033e2d89..615a244a1e721 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
@@ -40,17 +40,60 @@ class GsymReader {
virtual ~GsymReader() = default;
/// Get a string from the string table.
- ///
- /// \param Offset The string table offset for the string to retrieve.
- /// \returns The string from the string table.
virtual StringRef getString(uint32_t Offset) const = 0;
/// Get a file entry for the supplied file index.
- ///
- /// \param Index An index into the file table.
- /// \returns An optional FileEntry that will be valid if the file index is
- /// valid, or std::nullopt if the file index is out of bounds.
virtual std::optional<FileEntry> getFile(uint32_t Index) const = 0;
+
+ /// Get the full function info for an address.
+ virtual llvm::Expected<FunctionInfo> getFunctionInfo(uint64_t Addr) const = 0;
+
+ /// Get the full function info given an address index.
+ virtual llvm::Expected<FunctionInfo>
+ getFunctionInfoAtIndex(uint64_t AddrIdx) const = 0;
+
+ /// Lookup an address in the GSYM.
+ virtual llvm::Expected<LookupResult>
+ lookup(uint64_t Addr,
+ std::optional<DataExtractor> *MergedFuncsData = nullptr) const = 0;
+
+ /// Lookup all merged functions for a given address.
+ virtual llvm::Expected<std::vector<LookupResult>>
+ lookupAll(uint64_t Addr) const = 0;
+
+ /// Get the number of addresses in this GSYM file.
+ virtual uint32_t getNumAddresses() const = 0;
+
+ /// Gets an address from the address table.
+ virtual std::optional<uint64_t> getAddress(size_t Index) const = 0;
+
+ /// Dump the entire GSYM data contained in this object.
+ virtual void dump(raw_ostream &OS) = 0;
+
+ /// Dump a FunctionInfo object.
+ virtual void dump(raw_ostream &OS, const FunctionInfo &FI,
+ uint32_t Indent = 0) = 0;
+
+ /// Dump a MergedFunctionsInfo object.
+ virtual void dump(raw_ostream &OS, const MergedFunctionsInfo &MFI) = 0;
+
+ /// Dump a CallSiteInfo object.
+ virtual void dump(raw_ostream &OS, const CallSiteInfo &CSI) = 0;
+
+ /// Dump a CallSiteInfoCollection object.
+ virtual void dump(raw_ostream &OS, const CallSiteInfoCollection &CSIC,
+ uint32_t Indent = 0) = 0;
+
+ /// Dump a LineTable object.
+ virtual void dump(raw_ostream &OS, const LineTable <,
+ uint32_t Indent = 0) = 0;
+
+ /// Dump a InlineInfo object.
+ virtual void dump(raw_ostream &OS, const InlineInfo &II,
+ uint32_t Indent = 0) = 0;
+
+ /// Dump a FileEntry object.
+ virtual void dump(raw_ostream &OS, std::optional<FileEntry> FE) = 0;
};
/// GsymReaderV1 is used to read GSYM V1 data from a file or buffer.
@@ -128,7 +171,8 @@ class GsymReaderV1 : public GsymReader {
/// \returns An expected FunctionInfo that contains the function info object
/// or an error object that indicates reason for failing to lookup the
/// address.
- LLVM_ABI llvm::Expected<FunctionInfo> getFunctionInfo(uint64_t Addr) const;
+ LLVM_ABI llvm::Expected<FunctionInfo>
+ getFunctionInfo(uint64_t Addr) const override;
/// Get the full function info given an address index.
///
@@ -138,7 +182,7 @@ class GsymReaderV1 : public GsymReader {
/// or an error object that indicates reason for failing get the function
/// info object.
LLVM_ABI llvm::Expected<FunctionInfo>
- getFunctionInfoAtIndex(uint64_t AddrIdx) const;
+ getFunctionInfoAtIndex(uint64_t AddrIdx) const override;
/// Lookup an address in the a GSYM.
///
@@ -162,7 +206,7 @@ class GsymReaderV1 : public GsymReader {
/// for failing to lookup the address.
LLVM_ABI llvm::Expected<LookupResult>
lookup(uint64_t Addr,
- std::optional<DataExtractor> *MergedFuncsData = nullptr) const;
+ std::optional<DataExtractor> *MergedFuncsData = nullptr) const override;
/// Lookup all merged functions for a given address.
///
@@ -175,7 +219,7 @@ class GsymReaderV1 : public GsymReader {
/// \returns A vector of LookupResult objects, where the first element is the
/// primary result, followed by results for any merged functions
LLVM_ABI llvm::Expected<std::vector<LookupResult>>
- lookupAll(uint64_t Addr) const;
+ lookupAll(uint64_t Addr) const override;
/// Get a string from the string table.
///
@@ -201,7 +245,7 @@ class GsymReaderV1 : public GsymReader {
/// Dump the entire Gsym data contained in this object.
///
/// \param OS The output stream to dump to.
- LLVM_ABI void dump(raw_ostream &OS);
+ LLVM_ABI void dump(raw_ostream &OS) override;
/// Dump a FunctionInfo object.
///
@@ -215,7 +259,7 @@ class GsymReaderV1 : public GsymReader {
/// \param Indent The indentation as number of spaces. Used when dumping as an
/// item within MergedFunctionsInfo.
LLVM_ABI void dump(raw_ostream &OS, const FunctionInfo &FI,
- uint32_t Indent = 0);
+ uint32_t Indent = 0) override;
/// Dump a MergedFunctionsInfo object.
///
@@ -225,7 +269,7 @@ class GsymReaderV1 : public GsymReader {
/// \param OS The output stream to dump to.
///
/// \param MFI The object to dump.
- LLVM_ABI void dump(raw_ostream &OS, const MergedFunctionsInfo &MFI);
+ LLVM_ABI void dump(raw_ostream &OS, const MergedFunctionsInfo &MFI) override;
/// Dump a CallSiteInfo object.
///
@@ -235,7 +279,7 @@ class GsymReaderV1 : public GsymReader {
/// \param OS The output stream to dump to.
///
/// \param CSI The CallSiteInfo object to dump.
- LLVM_ABI void dump(raw_ostream &OS, const CallSiteInfo &CSI);
+ LLVM_ABI void dump(raw_ostream &OS, const CallSiteInfo &CSI) override;
/// Dump a CallSiteInfoCollection object.
///
@@ -249,7 +293,7 @@ class GsymReaderV1 : public GsymReader {
/// \param Indent The indentation as number of spaces. Used when dumping as an
/// item from within MergedFunctionsInfo.
LLVM_ABI void dump(raw_ostream &OS, const CallSiteInfoCollection &CSIC,
- uint32_t Indent = 0);
+ uint32_t Indent = 0) override;
/// Dump a LineTable object.
///
@@ -263,7 +307,8 @@ class GsymReaderV1 : public GsymReader {
///
/// \param Indent The indentation as number of spaces. Used when dumping as an
/// item from within MergedFunctionsInfo.
- LLVM_ABI void dump(raw_ostream &OS, const LineTable <, uint32_t Indent = 0);
+ LLVM_ABI void dump(raw_ostream &OS, const LineTable <,
+ uint32_t Indent = 0) override;
/// Dump a InlineInfo object.
///
@@ -277,7 +322,7 @@ class GsymReaderV1 : public GsymReader {
/// \param Indent The indentation as number of spaces. Used for recurive
/// dumping.
LLVM_ABI void dump(raw_ostream &OS, const InlineInfo &II,
- uint32_t Indent = 0);
+ uint32_t Indent = 0) override;
/// Dump a FileEntry object.
///
@@ -287,10 +332,10 @@ class GsymReaderV1 : public GsymReader {
/// \param OS The output stream to dump to.
///
/// \param FE The object to dump.
- LLVM_ABI void dump(raw_ostream &OS, std::optional<FileEntry> FE);
+ LLVM_ABI void dump(raw_ostream &OS, std::optional<FileEntry> FE) override;
/// Get the number of addresses in this Gsym file.
- uint32_t getNumAddresses() const {
+ uint32_t getNumAddresses() const override {
return Hdr->NumAddresses;
}
@@ -301,7 +346,7 @@ class GsymReaderV1 : public GsymReader {
/// \param Index A index into the address table.
/// \returns A resolved virtual address for adddress in the address table
/// or std::nullopt if Index is out of bounds.
- LLVM_ABI std::optional<uint64_t> getAddress(size_t Index) const;
+ LLVM_ABI std::optional<uint64_t> getAddress(size_t Index) const override;
protected:
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymReaderV2.h b/llvm/include/llvm/DebugInfo/GSYM/GsymReaderV2.h
index 192c5fee37b01..5ce96bfe45f59 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GsymReaderV2.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymReaderV2.h
@@ -106,7 +106,8 @@ class GsymReaderV2 : public GsymReader {
/// \returns An expected FunctionInfo that contains the function info object
/// or an error object that indicates reason for failing to lookup the
/// address.
- LLVM_ABI llvm::Expected<FunctionInfo> getFunctionInfo(uint64_t Addr) const;
+ LLVM_ABI llvm::Expected<FunctionInfo>
+ getFunctionInfo(uint64_t Addr) const override;
/// Get the full function info given an address index.
///
@@ -116,7 +117,7 @@ class GsymReaderV2 : public GsymReader {
/// or an error object that indicates reason for failing get the function
/// info object.
LLVM_ABI llvm::Expected<FunctionInfo>
- getFunctionInfoAtIndex(uint64_t AddrIdx) const;
+ getFunctionInfoAtIndex(uint64_t AddrIdx) const override;
/// Lookup an address in the a GSYM.
///
@@ -140,7 +141,7 @@ class GsymReaderV2 : public GsymReader {
/// for failing to lookup the address.
LLVM_ABI llvm::Expected<LookupResult>
lookup(uint64_t Addr,
- std::optional<DataExtractor> *MergedFuncsData = nullptr) const;
+ std::optional<DataExtractor> *MergedFuncsData = nullptr) const override;
/// Lookup all merged functions for a given address.
///
@@ -153,7 +154,7 @@ class GsymReaderV2 : public GsymReader {
/// \returns A vector of LookupResult objects, where the first element is the
/// primary result, followed by results for any merged functions
LLVM_ABI llvm::Expected<std::vector<LookupResult>>
- lookupAll(uint64_t Addr) const;
+ lookupAll(uint64_t Addr) const override;
/// Get a string from the string table.
///
@@ -179,7 +180,7 @@ class GsymReaderV2 : public GsymReader {
/// Dump the entire Gsym data contained in this object.
///
/// \param OS The output stream to dump to.
- LLVM_ABI void dump(raw_ostream &OS);
+ LLVM_ABI void dump(raw_ostream &OS) override;
/// Dump a FunctionInfo object.
///
@@ -193,7 +194,7 @@ class GsymReaderV2 : public GsymReader {
/// \param Indent The indentation as number of spaces. Used when dumping as an
/// item within MergedFunctionsInfo.
LLVM_ABI void dump(raw_ostream &OS, const FunctionInfo &FI,
- uint32_t Indent = 0);
+ uint32_t Indent = 0) override;
/// Dump a MergedFunctionsInfo object.
///
@@ -203,7 +204,7 @@ class GsymReaderV2 : public GsymReader {
/// \param OS The output stream to dump to.
///
/// \param MFI The object to dump.
- LLVM_ABI void dump(raw_ostream &OS, const MergedFunctionsInfo &MFI);
+ LLVM_ABI void dump(raw_ostream &OS, const MergedFunctionsInfo &MFI) override;
/// Dump a CallSiteInfo object.
///
@@ -213,7 +214,7 @@ class GsymReaderV2 : public GsymReader {
/// \param OS The output stream to dump to.
///
/// \param CSI The CallSiteInfo object to dump.
- LLVM_ABI void dump(raw_ostream &OS, const CallSiteInfo &CSI);
+ LLVM_ABI void dump(raw_ostream &OS, const CallSiteInfo &CSI) override;
/// Dump a CallSiteInfoCollection object.
///
@@ -227,7 +228,7 @@ class GsymReaderV2 : public GsymReader {
/// \param Indent The indentation as number of spaces. Used when dumping as an
/// item from within MergedFunctionsInfo.
LLVM_ABI void dump(raw_ostream &OS, const CallSiteInfoCollection &CSIC,
- uint32_t Indent = 0);
+ uint32_t Indent = 0) override;
/// Dump a LineTable object.
///
@@ -241,7 +242,8 @@ class GsymReaderV2 : public GsymReader {
///
/// \param Indent The indentation as number of spaces. Used when dumping as an
/// item from within MergedFunctionsInfo.
- LLVM_ABI void dump(raw_ostream &OS, const LineTable <, uint32_t Indent = 0);
+ LLVM_ABI void dump(raw_ostream &OS, const LineTable <,
+ uint32_t Indent = 0) override;
/// Dump a InlineInfo object.
///
@@ -255,7 +257,7 @@ class GsymReaderV2 : public GsymReader {
/// \param Indent The indentation as number of spaces. Used for recurive
/// dumping.
LLVM_ABI void dump(raw_ostream &OS, const InlineInfo &II,
- uint32_t Indent = 0);
+ uint32_t Indent = 0) override;
/// Dump a FileEntry object.
///
@@ -265,10 +267,10 @@ class GsymReaderV2 : public GsymReader {
/// \param OS The output stream to dump to.
///
/// \param FE The object to dump.
- LLVM_ABI void dump(raw_ostream &OS, std::optional<FileEntry> FE);
+ LLVM_ABI void dump(raw_ostream &OS, std::optional<FileEntry> FE) override;
/// Get the number of addresses in this Gsym file.
- uint32_t getNumAddresses() const {
+ uint32_t getNumAddresses() const override {
return Hdr->NumAddresses;
}
@@ -279,7 +281,7 @@ class GsymReaderV2 : public GsymReader {
/// \param Index A index into the address table.
/// \returns A resolved virtual address for adddress in the address table
/// or std::nullopt if Index is out of bounds.
- LLVM_ABI std::optional<uint64_t> getAddress(size_t Index) const;
+ LLVM_ABI std::optional<uint64_t> getAddress(size_t Index) const override;
protected:
>From 8736f52510480bfd56ca0ff1ff9b389fdeb58207 Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Wed, 25 Mar 2026 22:04:26 -0700
Subject: [PATCH 16/45] Use reader interface rather than the class for a
specific version
---
llvm/include/llvm/DebugInfo/GSYM/GsymContext.h | 6 +++---
llvm/lib/DebugInfo/GSYM/GsymContext.cpp | 4 ++--
llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp | 2 +-
3 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymContext.h b/llvm/include/llvm/DebugInfo/GSYM/GsymContext.h
index 72a6e4ce3e65a..f9382fa8d9577 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GsymContext.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymContext.h
@@ -17,7 +17,7 @@ namespace llvm {
namespace gsym {
-class GsymReaderV1;
+class GsymReader;
/// GSYM DI Context
/// This data structure is the top level entity that deals with GSYM
@@ -28,7 +28,7 @@ class GsymReaderV1;
/// the GSYM interfaces directly.
class GsymContext : public DIContext {
public:
- GsymContext(std::unique_ptr<GsymReaderV1> Reader);
+ GsymContext(std::unique_ptr<GsymReader> Reader);
~GsymContext() override;
GsymContext(GsymContext &) = delete;
@@ -56,7 +56,7 @@ class GsymContext : public DIContext {
getLocalsForAddress(object::SectionedAddress Address) override;
private:
- const std::unique_ptr<GsymReaderV1> Reader;
+ const std::unique_ptr<GsymReader> Reader;
};
} // end namespace gsym
diff --git a/llvm/lib/DebugInfo/GSYM/GsymContext.cpp b/llvm/lib/DebugInfo/GSYM/GsymContext.cpp
index ac88ca3f94970..62b4caa327d87 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymContext.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymContext.cpp
@@ -15,7 +15,7 @@ using namespace llvm;
using namespace llvm::gsym;
GsymContext::~GsymContext() = default;
-GsymContext::GsymContext(std::unique_ptr<GsymReaderV1> Reader)
+GsymContext::GsymContext(std::unique_ptr<GsymReader> Reader)
: DIContext(CK_GSYM), Reader(std::move(Reader)) {}
void GsymContext::dump(raw_ostream &OS, DIDumpOptions DumpOpts) {}
@@ -119,7 +119,7 @@ GsymContext::getLineInfoForAddressRange(object::SectionedAddress Address,
for (const auto &LineEntry : LT) {
if (StartAddr <= LineEntry.Addr && LineEntry.Addr < EndAddr) {
// Use LineEntry.Addr, LineEntry.File (which is a file index into the
- // files tables from the GsymReaderV1), and LineEntry.Line (source line
+ // files tables from the GsymReader), and LineEntry.Line (source line
// number) to add stuff to the DILineInfoTable
}
}
diff --git a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
index 021e111aa9e97..8b7f226eb9023 100644
--- a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
+++ b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
@@ -527,7 +527,7 @@ static llvm::Error convertFileToGSYM(OutputAggregator &Out) {
return Error::success();
}
-static void doLookup(GsymReaderV1 &Gsym, uint64_t Addr, raw_ostream &OS) {
+static void doLookup(GsymReader &Gsym, uint64_t Addr, raw_ostream &OS) {
if (UseMergedFunctions) {
if (auto Results = Gsym.lookupAll(Addr)) {
// If we have filters, count matching results first
>From 3d3e3b81747f08b0ffc280f8d372f541c30b27e5 Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Wed, 25 Mar 2026 22:14:53 -0700
Subject: [PATCH 17/45] Add v1/v2 options and auto-detection for reader
---
llvm/tools/llvm-gsymutil/Opts.td | 12 ++-
llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp | 114 +++++++++++++++++++--
2 files changed, 115 insertions(+), 11 deletions(-)
diff --git a/llvm/tools/llvm-gsymutil/Opts.td b/llvm/tools/llvm-gsymutil/Opts.td
index 15bc064ba6f2c..948cd8b366267 100644
--- a/llvm/tools/llvm-gsymutil/Opts.td
+++ b/llvm/tools/llvm-gsymutil/Opts.td
@@ -46,8 +46,16 @@ def addresses_from_stdin :
defm json_summary_file :
Eq<"json-summary-file",
"Output a categorized summary of errors into the JSON file specified.">;
-defm merged_functions_filter :
- Eq<"merged-functions-filter",
+defm merged_functions_filter :
+ Eq<"merged-functions-filter",
"When used with --address/--addresses-from-stdin and --merged-functions,\n"
"filters the merged functions output to only show functions matching any of the specified regex patterns.\n"
"Can be specified multiple times.">;
+defm reader_version :
+ Eq<"reader-version",
+ "Force the GSYM reader version (auto, v1, v2). Default: auto-detect from file.">,
+ Flags<[HelpHidden]>;
+defm creator_version :
+ Eq<"creator-version",
+ "Force the GSYM creator version (v1, v2). Default: v1.">,
+ Flags<[HelpHidden]>;
diff --git a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
index 8b7f226eb9023..cf0c2306e291b 100644
--- a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
+++ b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
@@ -41,7 +41,11 @@
#include "llvm/DebugInfo/GSYM/DwarfTransformer.h"
#include "llvm/DebugInfo/GSYM/FunctionInfo.h"
#include "llvm/DebugInfo/GSYM/GsymCreator.h"
+#include "llvm/DebugInfo/GSYM/GsymCreatorV2.h"
#include "llvm/DebugInfo/GSYM/GsymReader.h"
+#include "llvm/DebugInfo/GSYM/GsymReaderV2.h"
+#include "llvm/DebugInfo/GSYM/Header.h"
+#include "llvm/DebugInfo/GSYM/HeaderV2.h"
#include "llvm/DebugInfo/GSYM/InlineInfo.h"
#include "llvm/DebugInfo/GSYM/LookupResult.h"
#include "llvm/DebugInfo/GSYM/ObjectFileTransformer.h"
@@ -102,6 +106,12 @@ static bool LoadDwarfCallSites = false;
static std::string CallSiteYamlPath;
static std::vector<std::string> MergedFunctionsFilters;
+enum class ReaderVersion { Auto, V1, V2 };
+static ReaderVersion ForceReaderVersion = ReaderVersion::Auto;
+
+enum class CreatorVersion { V1, V2 };
+static CreatorVersion ForceCreatorVersion = CreatorVersion::V1;
+
static void parseArgs(int argc, char **argv) {
GSYMUtilOptTable Tbl;
llvm::StringRef ToolName = argv[0];
@@ -212,6 +222,36 @@ static void parseArgs(int argc, char **argv) {
std::exit(1);
}
}
+
+ if (const llvm::opt::Arg *A = Args.getLastArg(OPT_reader_version_EQ)) {
+ StringRef Val = A->getValue();
+ if (Val == "auto")
+ ForceReaderVersion = ReaderVersion::Auto;
+ else if (Val == "v1")
+ ForceReaderVersion = ReaderVersion::V1;
+ else if (Val == "v2")
+ ForceReaderVersion = ReaderVersion::V2;
+ else {
+ llvm::errs() << ToolName
+ << ": for the --reader-version option: '" << Val
+ << "' is invalid. Use 'auto', 'v1', or 'v2'.\n";
+ std::exit(1);
+ }
+ }
+
+ if (const llvm::opt::Arg *A = Args.getLastArg(OPT_creator_version_EQ)) {
+ StringRef Val = A->getValue();
+ if (Val == "v1")
+ ForceCreatorVersion = CreatorVersion::V1;
+ else if (Val == "v2")
+ ForceCreatorVersion = CreatorVersion::V2;
+ else {
+ llvm::errs() << ToolName
+ << ": for the --creator-version option: '" << Val
+ << "' is invalid. Use 'v1' or 'v2'.\n";
+ std::exit(1);
+ }
+ }
}
/// @}
@@ -348,7 +388,12 @@ static llvm::Error handleObjectFile(ObjectFile &Obj, const std::string &OutFile,
auto ThreadCount =
NumThreads > 0 ? NumThreads : std::thread::hardware_concurrency();
- GsymCreatorV1 Gsym(Quiet);
+ std::unique_ptr<GsymCreator> GsymPtr;
+ if (ForceCreatorVersion == CreatorVersion::V2)
+ GsymPtr = std::make_unique<GsymCreatorV2>(Quiet);
+ else
+ GsymPtr = std::make_unique<GsymCreatorV1>(Quiet);
+ GsymCreator &Gsym = *GsymPtr;
// See if we can figure out the base address for a given object file, and if
// we can, then set the base address to use to this value. This will ease
@@ -527,6 +572,56 @@ static llvm::Error convertFileToGSYM(OutputAggregator &Out) {
return Error::success();
}
+/// Detect the GSYM version by reading the version field from a file.
+static Expected<uint16_t> detectGsymVersion(StringRef Path) {
+ auto BufOrErr = MemoryBuffer::getFileOrSTDIN(Path);
+ if (!BufOrErr)
+ return createStringError(BufOrErr.getError(), "failed to open '%s'",
+ Path.str().c_str());
+ StringRef Data = (*BufOrErr)->getBuffer();
+ // Need at least 6 bytes: 4 (magic) + 2 (version).
+ if (Data.size() < 6)
+ return createStringError(std::errc::invalid_argument,
+ "file too small to be a GSYM file");
+ uint32_t Magic;
+ memcpy(&Magic, Data.data(), 4);
+ if (Magic != GSYM_MAGIC && Magic != llvm::byteswap(GSYM_MAGIC))
+ return createStringError(std::errc::invalid_argument,
+ "not a GSYM file (bad magic)");
+ uint16_t Version;
+ memcpy(&Version, Data.data() + 4, 2);
+ if (Magic != GSYM_MAGIC)
+ Version = llvm::byteswap(Version);
+ return Version;
+}
+
+/// Open a GSYM file, auto-detecting the version unless forced.
+static Expected<std::unique_ptr<GsymReader>> openGsymFile(StringRef Path) {
+ ReaderVersion RV = ForceReaderVersion;
+ if (RV == ReaderVersion::Auto) {
+ auto VersionOrErr = detectGsymVersion(Path);
+ if (!VersionOrErr)
+ return VersionOrErr.takeError();
+ if (*VersionOrErr == GSYM_VERSION)
+ RV = ReaderVersion::V1;
+ else if (*VersionOrErr == GSYM_VERSION_2)
+ RV = ReaderVersion::V2;
+ else
+ return createStringError(std::errc::invalid_argument,
+ "unsupported GSYM version %u", *VersionOrErr);
+ }
+ if (RV == ReaderVersion::V2) {
+ auto ReaderOrErr = GsymReaderV2::openFile(Path);
+ if (!ReaderOrErr)
+ return ReaderOrErr.takeError();
+ return std::make_unique<GsymReaderV2>(std::move(*ReaderOrErr));
+ }
+ auto ReaderOrErr = GsymReaderV1::openFile(Path);
+ if (!ReaderOrErr)
+ return ReaderOrErr.takeError();
+ return std::make_unique<GsymReaderV1>(std::move(*ReaderOrErr));
+}
+
static void doLookup(GsymReader &Gsym, uint64_t Addr, raw_ostream &OS) {
if (UseMergedFunctions) {
if (auto Results = Gsym.lookupAll(Addr)) {
@@ -661,7 +756,7 @@ int llvm_gsymutil_main(int argc, char **argv, const llvm::ToolContext &) {
std::string InputLine;
std::string CurrentGSYMPath;
- std::optional<Expected<GsymReaderV1>> CurrentGsym;
+ std::unique_ptr<GsymReader> CurrentGsym;
while (std::getline(std::cin, InputLine)) {
// Strip newline characters.
@@ -674,9 +769,10 @@ int llvm_gsymutil_main(int argc, char **argv, const llvm::ToolContext &) {
llvm::StringRef{StrippedInputLine}.split(' ');
if (GSYMPath != CurrentGSYMPath) {
- CurrentGsym = GsymReaderV1::openFile(GSYMPath);
- if (!*CurrentGsym)
- error(GSYMPath, CurrentGsym->takeError());
+ auto GsymOrErr = openGsymFile(GSYMPath);
+ if (!GsymOrErr)
+ error(GSYMPath, GsymOrErr.takeError());
+ CurrentGsym = std::move(*GsymOrErr);
CurrentGSYMPath = GSYMPath;
}
@@ -687,7 +783,7 @@ int llvm_gsymutil_main(int argc, char **argv, const llvm::ToolContext &) {
return 1;
}
- doLookup(**CurrentGsym, Addr, OS);
+ doLookup(*CurrentGsym, Addr, OS);
OS << "\n";
OS.flush();
@@ -698,19 +794,19 @@ int llvm_gsymutil_main(int argc, char **argv, const llvm::ToolContext &) {
// Dump or access data inside GSYM files
for (const auto &GSYMPath : InputFilenames) {
- auto Gsym = GsymReaderV1::openFile(GSYMPath);
+ auto Gsym = openGsymFile(GSYMPath);
if (!Gsym)
error(GSYMPath, Gsym.takeError());
if (LookupAddresses.empty()) {
- Gsym->dump(outs());
+ (*Gsym)->dump(outs());
continue;
}
// Lookup an address in a GSYM file and print any matches.
OS << "Looking up addresses in \"" << GSYMPath << "\":\n";
for (auto Addr : LookupAddresses) {
- doLookup(*Gsym, Addr, OS);
+ doLookup(**Gsym, Addr, OS);
}
}
return EXIT_SUCCESS;
>From 69a5c15762268f33a7148cbead6b2200d9b4ed35 Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Wed, 25 Mar 2026 22:27:26 -0700
Subject: [PATCH 18/45] Move auto-detection logic into the reader interface
---
llvm/include/llvm/DebugInfo/GSYM/GsymReader.h | 15 +++++
llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp | 7 ++-
llvm/lib/DebugInfo/GSYM/GsymReader.cpp | 59 +++++++++++++++++++
llvm/lib/DebugInfo/Symbolize/Symbolize.cpp | 10 +---
llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp | 58 ++++--------------
5 files changed, 92 insertions(+), 57 deletions(-)
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
index 615a244a1e721..5d3c7438c28e7 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
@@ -39,6 +39,21 @@ class GsymReader {
public:
virtual ~GsymReader() = default;
+ /// Open a GSYM file, auto-detecting the format version.
+ ///
+ /// \param Path The file path of the GSYM file to read.
+ /// \returns An expected unique_ptr to a GsymReader or an error.
+ LLVM_ABI static llvm::Expected<std::unique_ptr<GsymReader>>
+ openFile(StringRef Path);
+
+ /// Construct a GsymReader from a buffer, auto-detecting the format version.
+ ///
+ /// \param Bytes A set of bytes that will be copied and owned by the
+ /// returned object on success.
+ /// \returns An expected unique_ptr to a GsymReader or an error.
+ LLVM_ABI static llvm::Expected<std::unique_ptr<GsymReader>>
+ copyBuffer(StringRef Bytes);
+
/// Get a string from the string table.
virtual StringRef getString(uint32_t Offset) const = 0;
diff --git a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
index 7fba8abe99bb5..2145f23570b35 100644
--- a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
+++ b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
@@ -735,9 +735,10 @@ llvm::Error DwarfTransformer::verify(StringRef GsymPath,
OutputAggregator &Out) {
Out << "Verifying GSYM file \"" << GsymPath << "\":\n";
- auto Gsym = GsymReaderV1::openFile(GsymPath);
- if (!Gsym)
- return Gsym.takeError();
+ auto GsymOrErr = GsymReader::openFile(GsymPath);
+ if (!GsymOrErr)
+ return GsymOrErr.takeError();
+ auto &Gsym = *GsymOrErr;
auto NumAddrs = Gsym->getNumAddresses();
DILineInfoSpecifier DLIS(
diff --git a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
index c217a5cac1fd4..151d9f9027211 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
@@ -13,6 +13,9 @@
#include <stdio.h>
#include <stdlib.h>
+#include "llvm/DebugInfo/GSYM/GsymReaderV2.h"
+#include "llvm/DebugInfo/GSYM/Header.h"
+#include "llvm/DebugInfo/GSYM/HeaderV2.h"
#include "llvm/DebugInfo/GSYM/InlineInfo.h"
#include "llvm/DebugInfo/GSYM/LineTable.h"
#include "llvm/Support/BinaryStreamReader.h"
@@ -22,6 +25,62 @@
using namespace llvm;
using namespace gsym;
+/// Detect the GSYM version from raw bytes.
+static Expected<uint16_t> detectVersion(StringRef Data) {
+ // Need at least 6 bytes: 4 (magic) + 2 (version).
+ if (Data.size() < 6)
+ return createStringError(std::errc::invalid_argument,
+ "data too small to be a GSYM file");
+ uint32_t Magic;
+ memcpy(&Magic, Data.data(), 4);
+ if (Magic != GSYM_MAGIC && Magic != llvm::byteswap(GSYM_MAGIC))
+ return createStringError(std::errc::invalid_argument,
+ "not a GSYM file (bad magic)");
+ uint16_t Version;
+ memcpy(&Version, Data.data() + 4, 2);
+ if (Magic != GSYM_MAGIC)
+ Version = llvm::byteswap(Version);
+ return Version;
+}
+
+llvm::Expected<std::unique_ptr<GsymReader>>
+GsymReader::openFile(StringRef Path) {
+ auto BufOrErr = MemoryBuffer::getFileOrSTDIN(Path);
+ if (!BufOrErr)
+ return createStringError(BufOrErr.getError(), "failed to open '%s'",
+ Path.str().c_str());
+ auto VersionOrErr = detectVersion((*BufOrErr)->getBuffer());
+ if (!VersionOrErr)
+ return VersionOrErr.takeError();
+ if (*VersionOrErr == GSYM_VERSION_2) {
+ auto R = GsymReaderV2::openFile(Path);
+ if (!R)
+ return R.takeError();
+ return std::make_unique<GsymReaderV2>(std::move(*R));
+ }
+ auto R = GsymReaderV1::openFile(Path);
+ if (!R)
+ return R.takeError();
+ return std::make_unique<GsymReaderV1>(std::move(*R));
+}
+
+llvm::Expected<std::unique_ptr<GsymReader>>
+GsymReader::copyBuffer(StringRef Bytes) {
+ auto VersionOrErr = detectVersion(Bytes);
+ if (!VersionOrErr)
+ return VersionOrErr.takeError();
+ if (*VersionOrErr == GSYM_VERSION_2) {
+ auto R = GsymReaderV2::copyBuffer(Bytes);
+ if (!R)
+ return R.takeError();
+ return std::make_unique<GsymReaderV2>(std::move(*R));
+ }
+ auto R = GsymReaderV1::copyBuffer(Bytes);
+ if (!R)
+ return R.takeError();
+ return std::make_unique<GsymReaderV1>(std::move(*R));
+}
+
GsymReaderV1::GsymReaderV1(std::unique_ptr<MemoryBuffer> Buffer)
: MemBuffer(std::move(Buffer)), Endian(llvm::endianness::native) {}
diff --git a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
index 1883dcadf7c34..bbfb62de54fa9 100644
--- a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
+++ b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
@@ -779,14 +779,10 @@ LLVMSymbolizer::getOrCreateModuleInfo(StringRef ModuleName) {
// - Otherwise, create a DWARFContext.
const auto GsymFile = lookUpGsymFile(BinaryName.str());
if (!GsymFile.empty()) {
- auto ReaderOrErr = gsym::GsymReaderV1::openFile(GsymFile);
+ auto ReaderOrErr = gsym::GsymReader::openFile(GsymFile);
- if (ReaderOrErr) {
- std::unique_ptr<gsym::GsymReaderV1> Reader =
- std::make_unique<gsym::GsymReaderV1>(std::move(*ReaderOrErr));
-
- Context = std::make_unique<gsym::GsymContext>(std::move(Reader));
- }
+ if (ReaderOrErr)
+ Context = std::make_unique<gsym::GsymContext>(std::move(*ReaderOrErr));
}
if (!Context) {
if (auto CoffObject = dyn_cast<COFFObjectFile>(Objects.first)) {
diff --git a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
index cf0c2306e291b..0ccf493092b74 100644
--- a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
+++ b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
@@ -44,8 +44,6 @@
#include "llvm/DebugInfo/GSYM/GsymCreatorV2.h"
#include "llvm/DebugInfo/GSYM/GsymReader.h"
#include "llvm/DebugInfo/GSYM/GsymReaderV2.h"
-#include "llvm/DebugInfo/GSYM/Header.h"
-#include "llvm/DebugInfo/GSYM/HeaderV2.h"
#include "llvm/DebugInfo/GSYM/InlineInfo.h"
#include "llvm/DebugInfo/GSYM/LookupResult.h"
#include "llvm/DebugInfo/GSYM/ObjectFileTransformer.h"
@@ -572,54 +570,20 @@ static llvm::Error convertFileToGSYM(OutputAggregator &Out) {
return Error::success();
}
-/// Detect the GSYM version by reading the version field from a file.
-static Expected<uint16_t> detectGsymVersion(StringRef Path) {
- auto BufOrErr = MemoryBuffer::getFileOrSTDIN(Path);
- if (!BufOrErr)
- return createStringError(BufOrErr.getError(), "failed to open '%s'",
- Path.str().c_str());
- StringRef Data = (*BufOrErr)->getBuffer();
- // Need at least 6 bytes: 4 (magic) + 2 (version).
- if (Data.size() < 6)
- return createStringError(std::errc::invalid_argument,
- "file too small to be a GSYM file");
- uint32_t Magic;
- memcpy(&Magic, Data.data(), 4);
- if (Magic != GSYM_MAGIC && Magic != llvm::byteswap(GSYM_MAGIC))
- return createStringError(std::errc::invalid_argument,
- "not a GSYM file (bad magic)");
- uint16_t Version;
- memcpy(&Version, Data.data() + 4, 2);
- if (Magic != GSYM_MAGIC)
- Version = llvm::byteswap(Version);
- return Version;
-}
-
/// Open a GSYM file, auto-detecting the version unless forced.
static Expected<std::unique_ptr<GsymReader>> openGsymFile(StringRef Path) {
- ReaderVersion RV = ForceReaderVersion;
- if (RV == ReaderVersion::Auto) {
- auto VersionOrErr = detectGsymVersion(Path);
- if (!VersionOrErr)
- return VersionOrErr.takeError();
- if (*VersionOrErr == GSYM_VERSION)
- RV = ReaderVersion::V1;
- else if (*VersionOrErr == GSYM_VERSION_2)
- RV = ReaderVersion::V2;
- else
- return createStringError(std::errc::invalid_argument,
- "unsupported GSYM version %u", *VersionOrErr);
- }
- if (RV == ReaderVersion::V2) {
- auto ReaderOrErr = GsymReaderV2::openFile(Path);
- if (!ReaderOrErr)
- return ReaderOrErr.takeError();
- return std::make_unique<GsymReaderV2>(std::move(*ReaderOrErr));
+ if (ForceReaderVersion == ReaderVersion::Auto)
+ return GsymReader::openFile(Path);
+ if (ForceReaderVersion == ReaderVersion::V2) {
+ auto R = GsymReaderV2::openFile(Path);
+ if (!R)
+ return R.takeError();
+ return std::make_unique<GsymReaderV2>(std::move(*R));
}
- auto ReaderOrErr = GsymReaderV1::openFile(Path);
- if (!ReaderOrErr)
- return ReaderOrErr.takeError();
- return std::make_unique<GsymReaderV1>(std::move(*ReaderOrErr));
+ auto R = GsymReaderV1::openFile(Path);
+ if (!R)
+ return R.takeError();
+ return std::make_unique<GsymReaderV1>(std::move(*R));
}
static void doLookup(GsymReader &Gsym, uint64_t Addr, raw_ostream &OS) {
>From a561ecad1a5c73d2788978e583ee7ca99ceb9f42 Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Wed, 25 Mar 2026 22:42:04 -0700
Subject: [PATCH 19/45] Rename interface and classes, so that v1 classes get
their original names
---
.../llvm/DebugInfo/GSYM/CallSiteInfo.h | 8 +-
.../llvm/DebugInfo/GSYM/DwarfTransformer.h | 14 +-
.../llvm/DebugInfo/GSYM/FunctionInfo.h | 6 +-
.../include/llvm/DebugInfo/GSYM/GsymContext.h | 6 +-
.../include/llvm/DebugInfo/GSYM/GsymCreator.h | 34 +--
.../llvm/DebugInfo/GSYM/GsymCreatorV2.h | 2 +-
llvm/include/llvm/DebugInfo/GSYM/GsymReader.h | 52 ++--
.../llvm/DebugInfo/GSYM/GsymReaderV2.h | 2 +-
llvm/include/llvm/DebugInfo/GSYM/InlineInfo.h | 4 +-
.../llvm/DebugInfo/GSYM/MergedFunctionsInfo.h | 2 +-
.../DebugInfo/GSYM/ObjectFileTransformer.h | 6 +-
llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp | 14 +-
llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp | 2 +-
llvm/lib/DebugInfo/GSYM/GsymContext.cpp | 4 +-
llvm/lib/DebugInfo/GSYM/GsymCreator.cpp | 66 ++---
llvm/lib/DebugInfo/GSYM/GsymReader.cpp | 76 +++---
llvm/lib/DebugInfo/GSYM/InlineInfo.cpp | 4 +-
.../DebugInfo/GSYM/ObjectFileTransformer.cpp | 2 +-
llvm/lib/DebugInfo/Symbolize/Symbolize.cpp | 2 +-
llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp | 18 +-
llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp | 234 +++++++++---------
21 files changed, 279 insertions(+), 279 deletions(-)
diff --git a/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h b/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h
index 1a8219669e5bf..fd94061896439 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h
@@ -26,7 +26,7 @@ struct FunctionsYAML;
namespace gsym {
class FileWriter;
-class GsymCreator;
+class GsymCreatorBase;
struct FunctionInfo;
struct CallSiteInfo {
enum Flags : uint8_t {
@@ -96,8 +96,8 @@ class CallSiteInfoLoader {
/// Constructor that initializes the CallSiteInfoLoader with necessary data
/// structures.
///
- /// \param GCreator A reference to the GsymCreator.
- CallSiteInfoLoader(GsymCreator &GCreator, std::vector<FunctionInfo> &Funcs)
+ /// \param GCreator A reference to the GsymCreatorBase.
+ CallSiteInfoLoader(GsymCreatorBase &GCreator, std::vector<FunctionInfo> &Funcs)
: GCreator(GCreator), Funcs(Funcs) {}
/// This method reads the specified YAML file, parses its content, and updates
@@ -132,7 +132,7 @@ class CallSiteInfoLoader {
StringMap<FunctionInfo *> &FuncMap);
/// Reference to the parent Gsym Creator object.
- GsymCreator &GCreator;
+ GsymCreatorBase &GCreator;
/// Reference to the vector of FunctionInfo objects to be populated.
std::vector<FunctionInfo> &Funcs;
diff --git a/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h b/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h
index 2c59a5219292f..e45947b89297f 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h
@@ -23,11 +23,11 @@ namespace gsym {
struct CUInfo;
struct FunctionInfo;
-class GsymCreator;
+class GsymCreatorBase;
class OutputAggregator;
/// A class that transforms the DWARF in a DWARFContext into GSYM information
-/// by populating the GsymCreator object that it is constructed with. This
+/// by populating the GsymCreatorBase object that it is constructed with. This
/// class supports converting all DW_TAG_subprogram DIEs into
/// gsym::FunctionInfo objects that includes line table information and inline
/// function information. Creating a separate class to transform this data
@@ -48,12 +48,12 @@ class DwarfTransformer {
/// executable format). Apple has some compile unit attributes that look like
/// split DWARF, but they aren't and they can cause warnins to be emitted
/// about missing DWO files.
- DwarfTransformer(DWARFContext &D, GsymCreator &G, bool LDCS = false,
+ DwarfTransformer(DWARFContext &D, GsymCreatorBase &G, bool LDCS = false,
bool MachO = false)
: DICtx(D), Gsym(G), LoadDwarfCallSites(LDCS), IsMachO(MachO) {}
/// Extract the DWARF from the supplied object file and convert it into the
- /// Gsym format in the GsymCreator object that is passed in. Returns an
+ /// Gsym format in the GsymCreatorBase object that is passed in. Returns an
/// error if something fatal is encountered.
///
/// \param NumThreads The number of threads that the conversion process can
@@ -70,13 +70,13 @@ class DwarfTransformer {
private:
- /// Parse the DWARF in the object file and convert it into the GsymCreator.
+ /// Parse the DWARF in the object file and convert it into the GsymCreatorBase.
Error parse();
/// Handle any DIE (debug info entry) from the DWARF.
///
/// This function will find all DW_TAG_subprogram DIEs that convert them into
- /// GSYM FuntionInfo objects and add them to the GsymCreator supplied during
+ /// GSYM FuntionInfo objects and add them to the GsymCreatorBase supplied during
/// construction. The DIE and all its children will be recursively parsed
/// with calls to this function.
///
@@ -101,7 +101,7 @@ class DwarfTransformer {
void parseCallSiteInfoFromDwarf(CUInfo &CUI, DWARFDie Die, FunctionInfo &FI);
DWARFContext &DICtx;
- GsymCreator &Gsym;
+ GsymCreatorBase &Gsym;
bool LoadDwarfCallSites;
bool IsMachO;
diff --git a/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h b/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h
index 74cdd48697024..c3fee8af9e1bc 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h
@@ -25,7 +25,7 @@ class raw_ostream;
namespace gsym {
-class GsymReader;
+class GsymReaderBase;
/// Function information in GSYM files encodes information for one contiguous
/// address range. If a function has discontiguous address ranges, they will
/// need to be encoded using multiple FunctionInfo objects.
@@ -185,7 +185,7 @@ struct FunctionInfo {
/// \param GR The GSYM reader that contains the string and file table that
/// will be used to fill in information in the returned result.
///
- /// \param FuncAddr The function start address decoded from the GsymReader.
+ /// \param FuncAddr The function start address decoded from the GsymReaderBase.
///
/// \param Addr The address to lookup.
///
@@ -197,7 +197,7 @@ struct FunctionInfo {
/// encountered during decoding. An error should only be returned if the
/// address is not contained in the FunctionInfo or if the data is corrupted.
LLVM_ABI static llvm::Expected<LookupResult>
- lookup(DataExtractor &Data, const GsymReader &GR, uint64_t FuncAddr,
+ lookup(DataExtractor &Data, const GsymReaderBase &GR, uint64_t FuncAddr,
uint64_t Addr,
std::optional<DataExtractor> *MergedFuncsData = nullptr);
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymContext.h b/llvm/include/llvm/DebugInfo/GSYM/GsymContext.h
index f9382fa8d9577..030b4148dd444 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GsymContext.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymContext.h
@@ -17,7 +17,7 @@ namespace llvm {
namespace gsym {
-class GsymReader;
+class GsymReaderBase;
/// GSYM DI Context
/// This data structure is the top level entity that deals with GSYM
@@ -28,7 +28,7 @@ class GsymReader;
/// the GSYM interfaces directly.
class GsymContext : public DIContext {
public:
- GsymContext(std::unique_ptr<GsymReader> Reader);
+ GsymContext(std::unique_ptr<GsymReaderBase> Reader);
~GsymContext() override;
GsymContext(GsymContext &) = delete;
@@ -56,7 +56,7 @@ class GsymContext : public DIContext {
getLocalsForAddress(object::SectionedAddress Address) override;
private:
- const std::unique_ptr<GsymReader> Reader;
+ const std::unique_ptr<GsymReaderBase> Reader;
};
} // end namespace gsym
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h b/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h
index a7cfb527bb0c7..072ad9cffa426 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h
@@ -31,25 +31,25 @@ namespace gsym {
class FileWriter;
class OutputAggregator;
-/// GsymCreator is used to emit GSYM data to a stand alone file or section
+/// GsymCreatorBase is used to emit GSYM data to a stand alone file or section
/// within a file.
///
-/// The GsymCreator is designed to be used in 3 stages:
+/// The GsymCreatorBase is designed to be used in 3 stages:
/// - Create FunctionInfo objects and add them
-/// - Finalize the GsymCreator object
+/// - Finalize the GsymCreatorBase object
/// - Save to file or section
///
/// The first stage involves creating FunctionInfo objects from another source
/// of information like compiler debug info metadata, DWARF or Breakpad files.
/// Any strings in the FunctionInfo or contained information, like InlineInfo
/// or LineTable objects, should get the string table offsets by calling
-/// GsymCreator::insertString(...). Any file indexes that are needed should be
-/// obtained by calling GsymCreator::insertFile(...). All of the function calls
-/// in GsymCreator are thread safe. This allows multiple threads to create and
+/// GsymCreatorBase::insertString(...). Any file indexes that are needed should be
+/// obtained by calling GsymCreatorBase::insertFile(...). All of the function calls
+/// in GsymCreatorBase are thread safe. This allows multiple threads to create and
/// add FunctionInfo objects while parsing debug information.
///
/// Once all of the FunctionInfo objects have been added, the
-/// GsymCreator::finalize(...) must be called prior to saving. This function
+/// GsymCreatorBase::finalize(...) must be called prior to saving. This function
/// will sort the FunctionInfo objects, finalize the string table, and do any
/// other passes on the information needed to prepare the information to be
/// saved.
@@ -137,9 +137,9 @@ class OutputAggregator;
/// This interface defines the common API used by DwarfTransformer,
/// ObjectFileTransformer, and other consumers that need to populate
/// a GSYM file regardless of the output format version.
-class GsymCreator {
+class GsymCreatorBase {
public:
- virtual ~GsymCreator() = default;
+ virtual ~GsymCreatorBase() = default;
virtual uint32_t insertString(StringRef S, bool Copy = true) = 0;
virtual StringRef getString(uint32_t Offset) = 0;
@@ -169,7 +169,7 @@ class GsymCreator {
virtual bool isQuiet() const = 0;
};
-class GsymCreatorV1 : public GsymCreator {
+class GsymCreator : public GsymCreatorBase {
// Private member variables require Mutex protections
mutable std::mutex Mutex;
std::vector<FunctionInfo> Funcs;
@@ -253,7 +253,7 @@ class GsymCreatorV1 : public GsymCreator {
/// \returns The number of bytes it will take to encode the function info in
/// this GsymCreator. This helps calculate the size of the current GSYM
/// segment file.
- uint64_t copyFunctionInfo(const GsymCreatorV1 &SrcGC, size_t FuncInfoIdx);
+ uint64_t copyFunctionInfo(const GsymCreator &SrcGC, size_t FuncInfoIdx);
/// Copy a string from \a SrcGC into this object.
///
@@ -265,7 +265,7 @@ class GsymCreatorV1 : public GsymCreator {
/// \param SrcGC The source gsym creator to copy from.
/// \param StrOff The string table offset from \a SrcGC to copy.
/// \returns The new string table offset of the string within this object.
- uint32_t copyString(const GsymCreatorV1 &SrcGC, uint32_t StrOff);
+ uint32_t copyString(const GsymCreator &SrcGC, uint32_t StrOff);
/// Copy a file from \a SrcGC into this object.
///
@@ -281,7 +281,7 @@ class GsymCreatorV1 : public GsymCreator {
/// file index of zero will always return zero as the zero is a reserved file
/// index that means no file.
/// \returns The new file index of the file within this object.
- uint32_t copyFile(const GsymCreatorV1 &SrcGC, uint32_t FileIdx);
+ uint32_t copyFile(const GsymCreator &SrcGC, uint32_t FileIdx);
/// Inserts a FileEntry into the file table.
///
@@ -301,7 +301,7 @@ class GsymCreatorV1 : public GsymCreator {
/// \param II The inline info that contains file indexes and string offsets
/// that come from \a SrcGC. The entries will be updated by coping any files
/// and strings over into this object.
- void fixupInlineInfo(const GsymCreatorV1 &SrcGC, InlineInfo &II);
+ void fixupInlineInfo(const GsymCreator &SrcGC, InlineInfo &II);
/// Save this GSYM file into segments that are roughly \a SegmentSize in size.
///
@@ -320,7 +320,7 @@ class GsymCreatorV1 : public GsymCreator {
llvm::Error saveSegments(StringRef Path, llvm::endianness ByteOrder,
uint64_t SegmentSize) const;
- /// Let this creator know that this is a segment of another GsymCreatorV1.
+ /// Let this creator know that this is a segment of another GsymCreator.
///
/// When we have a segment, we know that function infos will be added in
/// ascending address range order without having to be finalized. We also
@@ -330,7 +330,7 @@ class GsymCreatorV1 : public GsymCreator {
}
public:
- LLVM_ABI GsymCreatorV1(bool Quiet = false);
+ LLVM_ABI GsymCreator(bool Quiet = false);
/// Save a GSYM file to a stand alone file.
///
@@ -525,7 +525,7 @@ class GsymCreatorV1 : public GsymCreator {
/// \returns An expected unique pointer to a GsymCreator or an error. The
/// returned unique pointer can be NULL if there are no more functions to
/// encode.
- LLVM_ABI llvm::Expected<std::unique_ptr<GsymCreatorV1>>
+ LLVM_ABI llvm::Expected<std::unique_ptr<GsymCreator>>
createSegment(uint64_t SegmentSize, size_t &FuncIdx) const;
};
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymCreatorV2.h b/llvm/include/llvm/DebugInfo/GSYM/GsymCreatorV2.h
index 9e2067e0b1c72..771ccef83b3cf 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GsymCreatorV2.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymCreatorV2.h
@@ -98,7 +98,7 @@ class OutputAggregator;
///
/// - UUID: Raw UUID bytes of the original executable. Only present if a UUID
/// was set. No alignment requirement.
-class GsymCreatorV2 : public GsymCreator {
+class GsymCreatorV2 : public GsymCreatorBase {
// Private member variables require Mutex protections
mutable std::mutex Mutex;
std::vector<FunctionInfo> Funcs;
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
index 5d3c7438c28e7..4ed44d1827825 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
@@ -30,28 +30,28 @@ class raw_ostream;
namespace gsym {
-/// GsymReader is an abstract interface for reading GSYM data.
+/// GsymReaderBase is an abstract interface for reading GSYM data.
///
/// This interface provides the methods needed by FunctionInfo::lookup and
/// InlineInfo::lookup to resolve strings and files during symbolication.
-/// Both GsymReaderV1 and GsymReaderV2 implement this interface.
-class GsymReader {
+/// Both GsymReader and GsymReaderV2 implement this interface.
+class GsymReaderBase {
public:
- virtual ~GsymReader() = default;
+ virtual ~GsymReaderBase() = default;
/// Open a GSYM file, auto-detecting the format version.
///
/// \param Path The file path of the GSYM file to read.
- /// \returns An expected unique_ptr to a GsymReader or an error.
- LLVM_ABI static llvm::Expected<std::unique_ptr<GsymReader>>
+ /// \returns An expected unique_ptr to a GsymReaderBase or an error.
+ LLVM_ABI static llvm::Expected<std::unique_ptr<GsymReaderBase>>
openFile(StringRef Path);
- /// Construct a GsymReader from a buffer, auto-detecting the format version.
+ /// Construct a GsymReaderBase from a buffer, auto-detecting the format version.
///
/// \param Bytes A set of bytes that will be copied and owned by the
/// returned object on success.
- /// \returns An expected unique_ptr to a GsymReader or an error.
- LLVM_ABI static llvm::Expected<std::unique_ptr<GsymReader>>
+ /// \returns An expected unique_ptr to a GsymReaderBase or an error.
+ LLVM_ABI static llvm::Expected<std::unique_ptr<GsymReaderBase>>
copyBuffer(StringRef Bytes);
/// Get a string from the string table.
@@ -111,20 +111,20 @@ class GsymReader {
virtual void dump(raw_ostream &OS, std::optional<FileEntry> FE) = 0;
};
-/// GsymReaderV1 is used to read GSYM V1 data from a file or buffer.
+/// GsymReader is used to read GSYM V1 data from a file or buffer.
///
/// This class is optimized for very quick lookups when the endianness matches
/// the host system. The Header, address table, address info offsets, and file
/// table is designed to be mmap'ed as read only into memory and used without
/// any parsing needed. If the endianness doesn't match, we swap these objects
-/// and tables into GsymReaderV1::SwappedData and then point our header and
+/// and tables into GsymReader::SwappedData and then point our header and
/// ArrayRefs to this swapped internal data.
///
-/// GsymReaderV1 objects must use one of the static functions to create an
-/// instance: GsymReaderV1::openFile(...) and GsymReaderV1::copyBuffer(...).
+/// GsymReader objects must use one of the static functions to create an
+/// instance: GsymReader::openFile(...) and GsymReader::copyBuffer(...).
-class GsymReaderV1 : public GsymReader {
- GsymReaderV1(std::unique_ptr<MemoryBuffer> Buffer);
+class GsymReader : public GsymReaderBase {
+ GsymReader(std::unique_ptr<MemoryBuffer> Buffer);
llvm::Error parse();
std::unique_ptr<MemoryBuffer> MemBuffer;
@@ -148,23 +148,23 @@ class GsymReaderV1 : public GsymReader {
std::unique_ptr<SwappedData> Swap;
public:
- LLVM_ABI GsymReaderV1(GsymReaderV1 &&RHS);
- LLVM_ABI ~GsymReaderV1() override;
+ LLVM_ABI GsymReader(GsymReader &&RHS);
+ LLVM_ABI ~GsymReader() override;
- /// Construct a GsymReaderV1 from a file on disk.
+ /// Construct a GsymReader from a file on disk.
///
/// \param Path The file path the GSYM file to read.
- /// \returns An expected GsymReaderV1 that contains the object or an error
+ /// \returns An expected GsymReader that contains the object or an error
/// object that indicates reason for failing to read the GSYM.
- LLVM_ABI static llvm::Expected<GsymReaderV1> openFile(StringRef Path);
+ LLVM_ABI static llvm::Expected<GsymReader> openFile(StringRef Path);
- /// Construct a GsymReaderV1 from a buffer.
+ /// Construct a GsymReader from a buffer.
///
/// \param Bytes A set of bytes that will be copied and owned by the
/// returned object on success.
- /// \returns An expected GsymReaderV1 that contains the object or an error
+ /// \returns An expected GsymReader that contains the object or an error
/// object that indicates reason for failing to read the GSYM.
- LLVM_ABI static llvm::Expected<GsymReaderV1> copyBuffer(StringRef Bytes);
+ LLVM_ABI static llvm::Expected<GsymReader> copyBuffer(StringRef Bytes);
/// Access the GSYM header.
/// \returns A native endian version of the GSYM header.
@@ -444,10 +444,10 @@ class GsymReaderV1 : public GsymReader {
/// work of parsing the GSYM file and returning an error.
///
/// \param MemBuffer A memory buffer that will transfer ownership into the
- /// GsymReaderV1.
- /// \returns An expected GsymReaderV1 that contains the object or an error
+ /// GsymReader.
+ /// \returns An expected GsymReader that contains the object or an error
/// object that indicates reason for failing to read the GSYM.
- LLVM_ABI static llvm::Expected<llvm::gsym::GsymReaderV1>
+ LLVM_ABI static llvm::Expected<llvm::gsym::GsymReader>
create(std::unique_ptr<MemoryBuffer> &MemBuffer);
/// Given an address, find the address index.
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymReaderV2.h b/llvm/include/llvm/DebugInfo/GSYM/GsymReaderV2.h
index 5ce96bfe45f59..856f8032270b6 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GsymReaderV2.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymReaderV2.h
@@ -43,7 +43,7 @@ namespace gsym {
/// GsymReaderV2 objects must use one of the static functions to create an
/// instance: GsymReaderV2::openFile(...) and GsymReaderV2::copyBuffer(...).
-class GsymReaderV2 : public GsymReader {
+class GsymReaderV2 : public GsymReaderBase {
GsymReaderV2(std::unique_ptr<MemoryBuffer> Buffer);
llvm::Error parse();
diff --git a/llvm/include/llvm/DebugInfo/GSYM/InlineInfo.h b/llvm/include/llvm/DebugInfo/GSYM/InlineInfo.h
index 03b0a8da35a67..055c8b5d29c0a 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/InlineInfo.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/InlineInfo.h
@@ -22,7 +22,7 @@ class raw_ostream;
namespace gsym {
-class GsymReader;
+class GsymReaderBase;
/// Inline information stores the name of the inline function along with
/// an array of address ranges. It also stores the call file and call line
/// that called this inline function. This allows us to unwind inline call
@@ -118,7 +118,7 @@ struct InlineInfo {
/// \returns An error if the inline information is corrupt, or
/// Error::success() for all other cases, even when no information
/// is added to \a SrcLocs.
- LLVM_ABI static llvm::Error lookup(const GsymReader &GR, DataExtractor &Data,
+ LLVM_ABI static llvm::Error lookup(const GsymReaderBase &GR, DataExtractor &Data,
uint64_t BaseAddr, uint64_t Addr,
SourceLocations &SrcLocs);
diff --git a/llvm/include/llvm/DebugInfo/GSYM/MergedFunctionsInfo.h b/llvm/include/llvm/DebugInfo/GSYM/MergedFunctionsInfo.h
index 048db33229a8f..ecf908ec69183 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/MergedFunctionsInfo.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/MergedFunctionsInfo.h
@@ -20,7 +20,7 @@ class raw_ostream;
namespace gsym {
-class GsymReader;
+class GsymReaderBase;
struct FunctionInfo;
struct MergedFunctionsInfo {
std::vector<FunctionInfo> MergedFunctions;
diff --git a/llvm/include/llvm/DebugInfo/GSYM/ObjectFileTransformer.h b/llvm/include/llvm/DebugInfo/GSYM/ObjectFileTransformer.h
index 31fdf8da9bea0..f08a29cb8b72f 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/ObjectFileTransformer.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/ObjectFileTransformer.h
@@ -20,12 +20,12 @@ class ObjectFile;
namespace gsym {
-class GsymCreator;
+class GsymCreatorBase;
class OutputAggregator;
class ObjectFileTransformer {
public:
- /// Extract any object file data that is needed by the GsymCreator.
+ /// Extract any object file data that is needed by the GsymCreatorBase.
///
/// The extracted information includes the UUID of the binary and converting
/// all function symbols from any symbol tables into FunctionInfo objects.
@@ -42,7 +42,7 @@ class ObjectFileTransformer {
/// the DWARF, or Error::success() if all goes well.
LLVM_ABI static llvm::Error convert(const object::ObjectFile &Obj,
OutputAggregator &Output,
- GsymCreator &Gsym);
+ GsymCreatorBase &Gsym);
};
} // namespace gsym
diff --git a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
index 2145f23570b35..74a06ae221b8c 100644
--- a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
+++ b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
@@ -64,7 +64,7 @@ struct llvm::gsym::CUInfo {
/// the first client that asks for a compile unit file index will end up
/// doing the conversion, and subsequent clients will get the cached GSYM
/// index.
- std::optional<uint32_t> DWARFToGSYMFileIndex(GsymCreator &Gsym,
+ std::optional<uint32_t> DWARFToGSYMFileIndex(GsymCreatorBase &Gsym,
uint32_t DwarfFileIdx) {
if (!LineTable || DwarfFileIdx >= FileCache.size())
return std::nullopt;
@@ -121,15 +121,15 @@ static DWARFDie GetParentDeclContextDIE(DWARFDie &Die) {
return DWARFDie();
}
-/// Get the GsymCreator string table offset for the qualified name for the
+/// Get the GsymCreatorBase string table offset for the qualified name for the
/// DIE passed in. This function will avoid making copies of any strings in
-/// the GsymCreator when possible. We don't need to copy a string when the
+/// the GsymCreatorBase when possible. We don't need to copy a string when the
/// string comes from our .debug_str section or is an inlined string in the
/// .debug_info. If we create a qualified name string in this function by
/// combining multiple strings in the DWARF string table or info, we will make
/// a copy of the string when we add it to the string table.
static std::optional<uint32_t>
-getQualifiedNameIndex(DWARFDie &Die, uint64_t Language, GsymCreator &Gsym) {
+getQualifiedNameIndex(DWARFDie &Die, uint64_t Language, GsymCreatorBase &Gsym) {
// If the dwarf has mangled name, use mangled name
if (auto LinkageName = Die.getLinkageName()) {
// We have seen cases were linkage name is actually empty.
@@ -214,7 +214,7 @@ ConvertDWARFRanges(const DWARFAddressRangesVector &DwarfRanges) {
return Ranges;
}
-static void parseInlineInfo(GsymCreator &Gsym, OutputAggregator &Out,
+static void parseInlineInfo(GsymCreatorBase &Gsym, OutputAggregator &Out,
CUInfo &CUI, DWARFDie Die, uint32_t Depth,
FunctionInfo &FI, InlineInfo &Parent,
const AddressRanges &AllParentRanges,
@@ -308,7 +308,7 @@ static void parseInlineInfo(GsymCreator &Gsym, OutputAggregator &Out,
}
static void convertFunctionLineTable(OutputAggregator &Out, CUInfo &CUI,
- DWARFDie Die, GsymCreator &Gsym,
+ DWARFDie Die, GsymCreatorBase &Gsym,
FunctionInfo &FI) {
std::vector<uint32_t> RowVector;
const uint64_t StartAddress = FI.startAddress();
@@ -735,7 +735,7 @@ llvm::Error DwarfTransformer::verify(StringRef GsymPath,
OutputAggregator &Out) {
Out << "Verifying GSYM file \"" << GsymPath << "\":\n";
- auto GsymOrErr = GsymReader::openFile(GsymPath);
+ auto GsymOrErr = GsymReaderBase::openFile(GsymPath);
if (!GsymOrErr)
return GsymOrErr.takeError();
auto &Gsym = *GsymOrErr;
diff --git a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
index b6dcaeb323f59..9c8d7ddf5e511 100644
--- a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
+++ b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
@@ -236,7 +236,7 @@ llvm::Expected<uint64_t> FunctionInfo::encode(FileWriter &Out,
}
llvm::Expected<LookupResult>
-FunctionInfo::lookup(DataExtractor &Data, const GsymReader &GR,
+FunctionInfo::lookup(DataExtractor &Data, const GsymReaderBase &GR,
uint64_t FuncAddr, uint64_t Addr,
std::optional<DataExtractor> *MergedFuncsData) {
LookupResult LR;
diff --git a/llvm/lib/DebugInfo/GSYM/GsymContext.cpp b/llvm/lib/DebugInfo/GSYM/GsymContext.cpp
index 62b4caa327d87..1d3fe805719cd 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymContext.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymContext.cpp
@@ -15,7 +15,7 @@ using namespace llvm;
using namespace llvm::gsym;
GsymContext::~GsymContext() = default;
-GsymContext::GsymContext(std::unique_ptr<GsymReader> Reader)
+GsymContext::GsymContext(std::unique_ptr<GsymReaderBase> Reader)
: DIContext(CK_GSYM), Reader(std::move(Reader)) {}
void GsymContext::dump(raw_ostream &OS, DIDumpOptions DumpOpts) {}
@@ -119,7 +119,7 @@ GsymContext::getLineInfoForAddressRange(object::SectionedAddress Address,
for (const auto &LineEntry : LT) {
if (StartAddr <= LineEntry.Addr && LineEntry.Addr < EndAddr) {
// Use LineEntry.Addr, LineEntry.File (which is a file index into the
- // files tables from the GsymReader), and LineEntry.Line (source line
+ // files tables from the GsymReaderBase), and LineEntry.Line (source line
// number) to add stuff to the DILineInfoTable
}
}
diff --git a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp
index 5411a0dce1d1d..f26e69fda2540 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp
@@ -21,12 +21,12 @@
using namespace llvm;
using namespace gsym;
-GsymCreatorV1::GsymCreatorV1(bool Quiet)
+GsymCreator::GsymCreator(bool Quiet)
: StrTab(StringTableBuilder::ELF), Quiet(Quiet) {
insertFile(StringRef());
}
-uint32_t GsymCreatorV1::insertFile(StringRef Path, llvm::sys::path::Style Style) {
+uint32_t GsymCreator::insertFile(StringRef Path, llvm::sys::path::Style Style) {
llvm::StringRef directory = llvm::sys::path::parent_path(Path, Style);
llvm::StringRef filename = llvm::sys::path::filename(Path, Style);
// We must insert the strings first, then call the FileEntry constructor.
@@ -38,7 +38,7 @@ uint32_t GsymCreatorV1::insertFile(StringRef Path, llvm::sys::path::Style Style)
return insertFileEntry(FileEntry(Dir, Base));
}
-uint32_t GsymCreatorV1::insertFileEntry(FileEntry FE) {
+uint32_t GsymCreator::insertFileEntry(FileEntry FE) {
std::lock_guard<std::mutex> Guard(Mutex);
const auto NextIndex = Files.size();
// Find FE in hash map and insert if not present.
@@ -48,7 +48,7 @@ uint32_t GsymCreatorV1::insertFileEntry(FileEntry FE) {
return R.first->second;
}
-uint32_t GsymCreatorV1::copyFile(const GsymCreatorV1 &SrcGC, uint32_t FileIdx) {
+uint32_t GsymCreator::copyFile(const GsymCreator &SrcGC, uint32_t FileIdx) {
// File index zero is reserved for a FileEntry with no directory and no
// filename. Any other file and we need to copy the strings for the directory
// and filename.
@@ -65,7 +65,7 @@ uint32_t GsymCreatorV1::copyFile(const GsymCreatorV1 &SrcGC, uint32_t FileIdx) {
return insertFileEntry(DstFE);
}
-llvm::Error GsymCreatorV1::save(StringRef Path, llvm::endianness ByteOrder,
+llvm::Error GsymCreator::save(StringRef Path, llvm::endianness ByteOrder,
std::optional<uint64_t> SegmentSize) const {
if (SegmentSize)
return saveSegments(Path, ByteOrder, *SegmentSize);
@@ -77,7 +77,7 @@ llvm::Error GsymCreatorV1::save(StringRef Path, llvm::endianness ByteOrder,
return encode(O);
}
-llvm::Error GsymCreatorV1::encode(FileWriter &O) const {
+llvm::Error GsymCreator::encode(FileWriter &O) const {
std::lock_guard<std::mutex> Guard(Mutex);
if (Funcs.empty())
return createStringError(std::errc::invalid_argument,
@@ -203,13 +203,13 @@ llvm::Error GsymCreatorV1::encode(FileWriter &O) const {
return ErrorSuccess();
}
-llvm::Error GsymCreatorV1::loadCallSitesFromYAML(StringRef YAMLFile) {
+llvm::Error GsymCreator::loadCallSitesFromYAML(StringRef YAMLFile) {
// Use the loader to load call site information from the YAML file.
CallSiteInfoLoader Loader(*this, Funcs);
return Loader.loadYAML(YAMLFile);
}
-void GsymCreatorV1::prepareMergedFunctions(OutputAggregator &Out) {
+void GsymCreator::prepareMergedFunctions(OutputAggregator &Out) {
// Nothing to do if we have less than 2 functions.
if (Funcs.size() < 2)
return;
@@ -252,7 +252,7 @@ void GsymCreatorV1::prepareMergedFunctions(OutputAggregator &Out) {
std::swap(Funcs, TopLevelFuncs);
}
-llvm::Error GsymCreatorV1::finalize(OutputAggregator &Out) {
+llvm::Error GsymCreator::finalize(OutputAggregator &Out) {
std::lock_guard<std::mutex> Guard(Mutex);
if (Finalized)
return createStringError(std::errc::invalid_argument, "already finalized");
@@ -367,14 +367,14 @@ llvm::Error GsymCreatorV1::finalize(OutputAggregator &Out) {
return Error::success();
}
-uint32_t GsymCreatorV1::copyString(const GsymCreatorV1 &SrcGC, uint32_t StrOff) {
+uint32_t GsymCreator::copyString(const GsymCreator &SrcGC, uint32_t StrOff) {
// String offset at zero is always the empty string, no copying needed.
if (StrOff == 0)
return 0;
return StrTab.add(SrcGC.StringOffsetMap.find(StrOff)->second);
}
-uint32_t GsymCreatorV1::insertString(StringRef S, bool Copy) {
+uint32_t GsymCreator::insertString(StringRef S, bool Copy) {
if (S.empty())
return 0;
@@ -400,19 +400,19 @@ uint32_t GsymCreatorV1::insertString(StringRef S, bool Copy) {
return StrOff;
}
-StringRef GsymCreatorV1::getString(uint32_t Offset) {
+StringRef GsymCreator::getString(uint32_t Offset) {
auto I = StringOffsetMap.find(Offset);
assert(I != StringOffsetMap.end() &&
- "GsymCreatorV1::getString expects a valid offset as parameter.");
+ "GsymCreator::getString expects a valid offset as parameter.");
return I->second.val();
}
-void GsymCreatorV1::addFunctionInfo(FunctionInfo &&FI) {
+void GsymCreator::addFunctionInfo(FunctionInfo &&FI) {
std::lock_guard<std::mutex> Guard(Mutex);
Funcs.emplace_back(std::move(FI));
}
-void GsymCreatorV1::forEachFunctionInfo(
+void GsymCreator::forEachFunctionInfo(
std::function<bool(FunctionInfo &)> const &Callback) {
std::lock_guard<std::mutex> Guard(Mutex);
for (auto &FI : Funcs) {
@@ -421,7 +421,7 @@ void GsymCreatorV1::forEachFunctionInfo(
}
}
-void GsymCreatorV1::forEachFunctionInfo(
+void GsymCreator::forEachFunctionInfo(
std::function<bool(const FunctionInfo &)> const &Callback) const {
std::lock_guard<std::mutex> Guard(Mutex);
for (const auto &FI : Funcs) {
@@ -430,18 +430,18 @@ void GsymCreatorV1::forEachFunctionInfo(
}
}
-size_t GsymCreatorV1::getNumFunctionInfos() const {
+size_t GsymCreator::getNumFunctionInfos() const {
std::lock_guard<std::mutex> Guard(Mutex);
return Funcs.size();
}
-bool GsymCreatorV1::IsValidTextAddress(uint64_t Addr) const {
+bool GsymCreator::IsValidTextAddress(uint64_t Addr) const {
if (ValidTextRanges)
return ValidTextRanges->contains(Addr);
return true; // No valid text ranges has been set, so accept all ranges.
}
-std::optional<uint64_t> GsymCreatorV1::getFirstFunctionAddress() const {
+std::optional<uint64_t> GsymCreator::getFirstFunctionAddress() const {
// If we have finalized then Funcs are sorted. If we are a segment then
// Funcs will be sorted as well since function infos get added from an
// already finalized GsymCreator object where its functions were sorted and
@@ -451,7 +451,7 @@ std::optional<uint64_t> GsymCreatorV1::getFirstFunctionAddress() const {
return std::nullopt;
}
-std::optional<uint64_t> GsymCreatorV1::getLastFunctionAddress() const {
+std::optional<uint64_t> GsymCreator::getLastFunctionAddress() const {
// If we have finalized then Funcs are sorted. If we are a segment then
// Funcs will be sorted as well since function infos get added from an
// already finalized GsymCreator object where its functions were sorted and
@@ -461,13 +461,13 @@ std::optional<uint64_t> GsymCreatorV1::getLastFunctionAddress() const {
return std::nullopt;
}
-std::optional<uint64_t> GsymCreatorV1::getBaseAddress() const {
+std::optional<uint64_t> GsymCreator::getBaseAddress() const {
if (BaseAddress)
return BaseAddress;
return getFirstFunctionAddress();
}
-uint64_t GsymCreatorV1::getMaxAddressOffset() const {
+uint64_t GsymCreator::getMaxAddressOffset() const {
switch (getAddressOffsetSize()) {
case 1: return UINT8_MAX;
case 2: return UINT16_MAX;
@@ -477,7 +477,7 @@ uint64_t GsymCreatorV1::getMaxAddressOffset() const {
llvm_unreachable("invalid address offset");
}
-uint8_t GsymCreatorV1::getAddressOffsetSize() const {
+uint8_t GsymCreator::getAddressOffsetSize() const {
const std::optional<uint64_t> BaseAddress = getBaseAddress();
const std::optional<uint64_t> LastFuncAddr = getLastFunctionAddress();
if (BaseAddress && LastFuncAddr) {
@@ -493,7 +493,7 @@ uint8_t GsymCreatorV1::getAddressOffsetSize() const {
return 1;
}
-uint64_t GsymCreatorV1::calculateHeaderAndTableSize() const {
+uint64_t GsymCreator::calculateHeaderAndTableSize() const {
uint64_t Size = sizeof(Header);
const size_t NumFuncs = Funcs.size();
// Add size of address offset table
@@ -511,14 +511,14 @@ uint64_t GsymCreatorV1::calculateHeaderAndTableSize() const {
// This function takes a InlineInfo class that was copy constructed from an
// InlineInfo from the \a SrcGC and updates all members that point to strings
// and files to point to strings and files from this GsymCreator.
-void GsymCreatorV1::fixupInlineInfo(const GsymCreatorV1 &SrcGC, InlineInfo &II) {
+void GsymCreator::fixupInlineInfo(const GsymCreator &SrcGC, InlineInfo &II) {
II.Name = copyString(SrcGC, II.Name);
II.CallFile = copyFile(SrcGC, II.CallFile);
for (auto &ChildII: II.Children)
fixupInlineInfo(SrcGC, ChildII);
}
-uint64_t GsymCreatorV1::copyFunctionInfo(const GsymCreatorV1 &SrcGC, size_t FuncIdx) {
+uint64_t GsymCreator::copyFunctionInfo(const GsymCreator &SrcGC, size_t FuncIdx) {
// To copy a function info we need to copy any files and strings over into
// this GsymCreator and then copy the function info and update the string
// table offsets to match the new offsets.
@@ -552,7 +552,7 @@ uint64_t GsymCreatorV1::copyFunctionInfo(const GsymCreatorV1 &SrcGC, size_t Func
return Funcs.back().cacheEncoding();
}
-llvm::Error GsymCreatorV1::saveSegments(StringRef Path,
+llvm::Error GsymCreator::saveSegments(StringRef Path,
llvm::endianness ByteOrder,
uint64_t SegmentSize) const {
if (SegmentSize == 0)
@@ -562,10 +562,10 @@ llvm::Error GsymCreatorV1::saveSegments(StringRef Path,
size_t FuncIdx = 0;
const size_t NumFuncs = Funcs.size();
while (FuncIdx < NumFuncs) {
- llvm::Expected<std::unique_ptr<GsymCreatorV1>> ExpectedGC =
+ llvm::Expected<std::unique_ptr<GsymCreator>> ExpectedGC =
createSegment(SegmentSize, FuncIdx);
if (ExpectedGC) {
- GsymCreatorV1 *GC = ExpectedGC->get();
+ GsymCreator *GC = ExpectedGC->get();
if (!GC)
break; // We had not more functions to encode.
// Don't collect any messages at all
@@ -589,13 +589,13 @@ llvm::Error GsymCreatorV1::saveSegments(StringRef Path,
return Error::success();
}
-llvm::Expected<std::unique_ptr<GsymCreatorV1>>
-GsymCreatorV1::createSegment(uint64_t SegmentSize, size_t &FuncIdx) const {
+llvm::Expected<std::unique_ptr<GsymCreator>>
+GsymCreator::createSegment(uint64_t SegmentSize, size_t &FuncIdx) const {
// No function entries, return empty unique pointer
if (FuncIdx >= Funcs.size())
- return std::unique_ptr<GsymCreatorV1>();
+ return std::unique_ptr<GsymCreator>();
- std::unique_ptr<GsymCreatorV1> GC(new GsymCreatorV1(/*Quiet=*/true));
+ std::unique_ptr<GsymCreator> GC(new GsymCreator(/*Quiet=*/true));
// Tell the creator that this is a segment.
GC->setIsSegment();
diff --git a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
index 151d9f9027211..bc29eda1f9f5e 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
@@ -1,4 +1,4 @@
-//===- GsymReaderV1.cpp -----------------------------------------------------===//
+//===- GsymReader.cpp -----------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -43,8 +43,8 @@ static Expected<uint16_t> detectVersion(StringRef Data) {
return Version;
}
-llvm::Expected<std::unique_ptr<GsymReader>>
-GsymReader::openFile(StringRef Path) {
+llvm::Expected<std::unique_ptr<GsymReaderBase>>
+GsymReaderBase::openFile(StringRef Path) {
auto BufOrErr = MemoryBuffer::getFileOrSTDIN(Path);
if (!BufOrErr)
return createStringError(BufOrErr.getError(), "failed to open '%s'",
@@ -58,14 +58,14 @@ GsymReader::openFile(StringRef Path) {
return R.takeError();
return std::make_unique<GsymReaderV2>(std::move(*R));
}
- auto R = GsymReaderV1::openFile(Path);
+ auto R = GsymReader::openFile(Path);
if (!R)
return R.takeError();
- return std::make_unique<GsymReaderV1>(std::move(*R));
+ return std::make_unique<GsymReader>(std::move(*R));
}
-llvm::Expected<std::unique_ptr<GsymReader>>
-GsymReader::copyBuffer(StringRef Bytes) {
+llvm::Expected<std::unique_ptr<GsymReaderBase>>
+GsymReaderBase::copyBuffer(StringRef Bytes) {
auto VersionOrErr = detectVersion(Bytes);
if (!VersionOrErr)
return VersionOrErr.takeError();
@@ -75,20 +75,20 @@ GsymReader::copyBuffer(StringRef Bytes) {
return R.takeError();
return std::make_unique<GsymReaderV2>(std::move(*R));
}
- auto R = GsymReaderV1::copyBuffer(Bytes);
+ auto R = GsymReader::copyBuffer(Bytes);
if (!R)
return R.takeError();
- return std::make_unique<GsymReaderV1>(std::move(*R));
+ return std::make_unique<GsymReader>(std::move(*R));
}
-GsymReaderV1::GsymReaderV1(std::unique_ptr<MemoryBuffer> Buffer)
+GsymReader::GsymReader(std::unique_ptr<MemoryBuffer> Buffer)
: MemBuffer(std::move(Buffer)), Endian(llvm::endianness::native) {}
-GsymReaderV1::GsymReaderV1(GsymReaderV1 &&RHS) = default;
+GsymReader::GsymReader(GsymReader &&RHS) = default;
-GsymReaderV1::~GsymReaderV1() = default;
+GsymReader::~GsymReader() = default;
-llvm::Expected<GsymReaderV1> GsymReaderV1::openFile(StringRef Filename) {
+llvm::Expected<GsymReader> GsymReader::openFile(StringRef Filename) {
// Open the input file and return an appropriate error if needed.
ErrorOr<std::unique_ptr<MemoryBuffer>> BuffOrErr =
MemoryBuffer::getFileOrSTDIN(Filename);
@@ -98,17 +98,17 @@ llvm::Expected<GsymReaderV1> GsymReaderV1::openFile(StringRef Filename) {
return create(BuffOrErr.get());
}
-llvm::Expected<GsymReaderV1> GsymReaderV1::copyBuffer(StringRef Bytes) {
+llvm::Expected<GsymReader> GsymReader::copyBuffer(StringRef Bytes) {
auto MemBuffer = MemoryBuffer::getMemBufferCopy(Bytes, "GSYM bytes");
return create(MemBuffer);
}
-llvm::Expected<llvm::gsym::GsymReaderV1>
-GsymReaderV1::create(std::unique_ptr<MemoryBuffer> &MemBuffer) {
+llvm::Expected<llvm::gsym::GsymReader>
+GsymReader::create(std::unique_ptr<MemoryBuffer> &MemBuffer) {
if (!MemBuffer)
return createStringError(std::errc::invalid_argument,
"invalid memory buffer");
- GsymReaderV1 GR(std::move(MemBuffer));
+ GsymReader GR(std::move(MemBuffer));
llvm::Error Err = GR.parse();
if (Err)
return std::move(Err);
@@ -116,7 +116,7 @@ GsymReaderV1::create(std::unique_ptr<MemoryBuffer> &MemBuffer) {
}
llvm::Error
-GsymReaderV1::parse() {
+GsymReader::parse() {
BinaryStreamReader FileData(MemBuffer->getBuffer(), llvm::endianness::native);
// Check for the magic bytes. This file format is designed to be mmap'ed
// into a process and accessed as read only. This is done for performance
@@ -255,15 +255,15 @@ GsymReaderV1::parse() {
}
-const Header &GsymReaderV1::getHeader() const {
- // The only way to get a GsymReaderV1 is from GsymReaderV1::openFile(...) or
- // GsymReaderV1::copyBuffer() and the header must be valid and initialized to
+const Header &GsymReader::getHeader() const {
+ // The only way to get a GsymReader is from GsymReader::openFile(...) or
+ // GsymReader::copyBuffer() and the header must be valid and initialized to
// a valid pointer value, so the assert below should not trigger.
assert(Hdr);
return *Hdr;
}
-std::optional<uint64_t> GsymReaderV1::getAddress(size_t Index) const {
+std::optional<uint64_t> GsymReader::getAddress(size_t Index) const {
switch (Hdr->AddrOffSize) {
case 1: return addressForIndex<uint8_t>(Index);
case 2: return addressForIndex<uint16_t>(Index);
@@ -273,7 +273,7 @@ std::optional<uint64_t> GsymReaderV1::getAddress(size_t Index) const {
return std::nullopt;
}
-std::optional<uint64_t> GsymReaderV1::getAddressInfoOffset(size_t Index) const {
+std::optional<uint64_t> GsymReader::getAddressInfoOffset(size_t Index) const {
const auto NumAddrInfoOffsets = AddrInfoOffsets.size();
if (Index < NumAddrInfoOffsets)
return AddrInfoOffsets[Index];
@@ -281,7 +281,7 @@ std::optional<uint64_t> GsymReaderV1::getAddressInfoOffset(size_t Index) const {
}
Expected<uint64_t>
-GsymReaderV1::getAddressIndex(const uint64_t Addr) const {
+GsymReader::getAddressIndex(const uint64_t Addr) const {
if (Addr >= Hdr->BaseAddress) {
const uint64_t AddrOffset = Addr - Hdr->BaseAddress;
std::optional<uint64_t> AddrOffsetIndex;
@@ -312,7 +312,7 @@ GsymReaderV1::getAddressIndex(const uint64_t Addr) const {
}
llvm::Expected<DataExtractor>
-GsymReaderV1::getFunctionInfoDataForAddress(uint64_t Addr,
+GsymReader::getFunctionInfoDataForAddress(uint64_t Addr,
uint64_t &FuncStartAddr) const {
Expected<uint64_t> ExpectedAddrIdx = getAddressIndex(Addr);
if (!ExpectedAddrIdx)
@@ -355,7 +355,7 @@ GsymReaderV1::getFunctionInfoDataForAddress(uint64_t Addr,
}
llvm::Expected<DataExtractor>
-GsymReaderV1::getFunctionInfoDataAtIndex(uint64_t AddrIdx,
+GsymReader::getFunctionInfoDataAtIndex(uint64_t AddrIdx,
uint64_t &FuncStartAddr) const {
if (AddrIdx >= getNumAddresses())
return createStringError(std::errc::invalid_argument,
@@ -376,7 +376,7 @@ GsymReaderV1::getFunctionInfoDataAtIndex(uint64_t AddrIdx,
return DataExtractor(Bytes, Endian == llvm::endianness::little, 4);
}
-llvm::Expected<FunctionInfo> GsymReaderV1::getFunctionInfo(uint64_t Addr) const {
+llvm::Expected<FunctionInfo> GsymReader::getFunctionInfo(uint64_t Addr) const {
uint64_t FuncStartAddr = 0;
if (auto ExpectedData = getFunctionInfoDataForAddress(Addr, FuncStartAddr))
return FunctionInfo::decode(*ExpectedData, FuncStartAddr);
@@ -385,7 +385,7 @@ llvm::Expected<FunctionInfo> GsymReaderV1::getFunctionInfo(uint64_t Addr) const
}
llvm::Expected<FunctionInfo>
-GsymReaderV1::getFunctionInfoAtIndex(uint64_t Idx) const {
+GsymReader::getFunctionInfoAtIndex(uint64_t Idx) const {
uint64_t FuncStartAddr = 0;
if (auto ExpectedData = getFunctionInfoDataAtIndex(Idx, FuncStartAddr))
return FunctionInfo::decode(*ExpectedData, FuncStartAddr);
@@ -394,7 +394,7 @@ GsymReaderV1::getFunctionInfoAtIndex(uint64_t Idx) const {
}
llvm::Expected<LookupResult>
-GsymReaderV1::lookup(uint64_t Addr,
+GsymReader::lookup(uint64_t Addr,
std::optional<DataExtractor> *MergedFunctionsData) const {
uint64_t FuncStartAddr = 0;
if (auto ExpectedData = getFunctionInfoDataForAddress(Addr, FuncStartAddr))
@@ -405,7 +405,7 @@ GsymReaderV1::lookup(uint64_t Addr,
}
llvm::Expected<std::vector<LookupResult>>
-GsymReaderV1::lookupAll(uint64_t Addr) const {
+GsymReader::lookupAll(uint64_t Addr) const {
std::vector<LookupResult> Results;
std::optional<DataExtractor> MergedFunctionsData;
@@ -439,7 +439,7 @@ GsymReaderV1::lookupAll(uint64_t Addr) const {
return Results;
}
-void GsymReaderV1::dump(raw_ostream &OS) {
+void GsymReader::dump(raw_ostream &OS) {
const auto &Header = getHeader();
// Dump the GSYM header.
OS << Header << "\n";
@@ -494,7 +494,7 @@ void GsymReaderV1::dump(raw_ostream &OS) {
}
}
-void GsymReaderV1::dump(raw_ostream &OS, const FunctionInfo &FI,
+void GsymReader::dump(raw_ostream &OS, const FunctionInfo &FI,
uint32_t Indent) {
OS.indent(Indent);
OS << FI.Range << " \"" << getString(FI.Name) << "\"\n";
@@ -512,14 +512,14 @@ void GsymReaderV1::dump(raw_ostream &OS, const FunctionInfo &FI,
}
}
-void GsymReaderV1::dump(raw_ostream &OS, const MergedFunctionsInfo &MFI) {
+void GsymReader::dump(raw_ostream &OS, const MergedFunctionsInfo &MFI) {
for (uint32_t inx = 0; inx < MFI.MergedFunctions.size(); inx++) {
OS << "++ Merged FunctionInfos[" << inx << "]:\n";
dump(OS, MFI.MergedFunctions[inx], 4);
}
}
-void GsymReaderV1::dump(raw_ostream &OS, const CallSiteInfo &CSI) {
+void GsymReader::dump(raw_ostream &OS, const CallSiteInfo &CSI) {
OS << HEX16(CSI.ReturnOffset);
std::string Flags;
@@ -551,7 +551,7 @@ void GsymReaderV1::dump(raw_ostream &OS, const CallSiteInfo &CSI) {
}
}
-void GsymReaderV1::dump(raw_ostream &OS, const CallSiteInfoCollection &CSIC,
+void GsymReader::dump(raw_ostream &OS, const CallSiteInfoCollection &CSIC,
uint32_t Indent) {
OS.indent(Indent);
OS << "CallSites (by relative return offset):\n";
@@ -563,7 +563,7 @@ void GsymReaderV1::dump(raw_ostream &OS, const CallSiteInfoCollection &CSIC,
}
}
-void GsymReaderV1::dump(raw_ostream &OS, const LineTable <, uint32_t Indent) {
+void GsymReader::dump(raw_ostream &OS, const LineTable <, uint32_t Indent) {
OS.indent(Indent);
OS << "LineTable:\n";
for (auto &LE: LT) {
@@ -575,7 +575,7 @@ void GsymReaderV1::dump(raw_ostream &OS, const LineTable <, uint32_t Indent) {
}
}
-void GsymReaderV1::dump(raw_ostream &OS, const InlineInfo &II, uint32_t Indent) {
+void GsymReader::dump(raw_ostream &OS, const InlineInfo &II, uint32_t Indent) {
if (Indent == 0)
OS << "InlineInfo:\n";
else
@@ -593,7 +593,7 @@ void GsymReaderV1::dump(raw_ostream &OS, const InlineInfo &II, uint32_t Indent)
dump(OS, ChildII, Indent + 2);
}
-void GsymReaderV1::dump(raw_ostream &OS, std::optional<FileEntry> FE) {
+void GsymReader::dump(raw_ostream &OS, std::optional<FileEntry> FE) {
if (FE) {
// IF we have the file from index 0, then don't print anything
if (FE->Dir == 0 && FE->Base == 0)
diff --git a/llvm/lib/DebugInfo/GSYM/InlineInfo.cpp b/llvm/lib/DebugInfo/GSYM/InlineInfo.cpp
index 251e51bb67eab..35091199142b7 100644
--- a/llvm/lib/DebugInfo/GSYM/InlineInfo.cpp
+++ b/llvm/lib/DebugInfo/GSYM/InlineInfo.cpp
@@ -100,7 +100,7 @@ static bool skip(DataExtractor &Data, uint64_t &Offset, bool SkippedRanges) {
/// \param BaseAddr The address that the relative address range offsets are
/// relative to.
-static bool lookup(const GsymReader &GR, DataExtractor &Data, uint64_t &Offset,
+static bool lookup(const GsymReaderBase &GR, DataExtractor &Data, uint64_t &Offset,
uint64_t BaseAddr, uint64_t Addr, SourceLocations &SrcLocs,
llvm::Error &Err) {
InlineInfo Inline;
@@ -151,7 +151,7 @@ static bool lookup(const GsymReader &GR, DataExtractor &Data, uint64_t &Offset,
return true;
}
-llvm::Error InlineInfo::lookup(const GsymReader &GR, DataExtractor &Data,
+llvm::Error InlineInfo::lookup(const GsymReaderBase &GR, DataExtractor &Data,
uint64_t BaseAddr, uint64_t Addr,
SourceLocations &SrcLocs) {
// Call our recursive helper function starting at offset zero.
diff --git a/llvm/lib/DebugInfo/GSYM/ObjectFileTransformer.cpp b/llvm/lib/DebugInfo/GSYM/ObjectFileTransformer.cpp
index 122de4deea5df..5525325f76072 100644
--- a/llvm/lib/DebugInfo/GSYM/ObjectFileTransformer.cpp
+++ b/llvm/lib/DebugInfo/GSYM/ObjectFileTransformer.cpp
@@ -68,7 +68,7 @@ static std::vector<uint8_t> getUUID(const object::ObjectFile &Obj) {
llvm::Error ObjectFileTransformer::convert(const object::ObjectFile &Obj,
OutputAggregator &Out,
- GsymCreator &Gsym) {
+ GsymCreatorBase &Gsym) {
using namespace llvm::object;
const bool IsMachO = isa<MachOObjectFile>(&Obj);
diff --git a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
index bbfb62de54fa9..a01faf1dfbc33 100644
--- a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
+++ b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
@@ -779,7 +779,7 @@ LLVMSymbolizer::getOrCreateModuleInfo(StringRef ModuleName) {
// - Otherwise, create a DWARFContext.
const auto GsymFile = lookUpGsymFile(BinaryName.str());
if (!GsymFile.empty()) {
- auto ReaderOrErr = gsym::GsymReader::openFile(GsymFile);
+ auto ReaderOrErr = gsym::GsymReaderBase::openFile(GsymFile);
if (ReaderOrErr)
Context = std::make_unique<gsym::GsymContext>(std::move(*ReaderOrErr));
diff --git a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
index 0ccf493092b74..ec28c3822c238 100644
--- a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
+++ b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
@@ -386,12 +386,12 @@ static llvm::Error handleObjectFile(ObjectFile &Obj, const std::string &OutFile,
auto ThreadCount =
NumThreads > 0 ? NumThreads : std::thread::hardware_concurrency();
- std::unique_ptr<GsymCreator> GsymPtr;
+ std::unique_ptr<GsymCreatorBase> GsymPtr;
if (ForceCreatorVersion == CreatorVersion::V2)
GsymPtr = std::make_unique<GsymCreatorV2>(Quiet);
else
- GsymPtr = std::make_unique<GsymCreatorV1>(Quiet);
- GsymCreator &Gsym = *GsymPtr;
+ GsymPtr = std::make_unique<GsymCreator>(Quiet);
+ GsymCreatorBase &Gsym = *GsymPtr;
// See if we can figure out the base address for a given object file, and if
// we can, then set the base address to use to this value. This will ease
@@ -571,22 +571,22 @@ static llvm::Error convertFileToGSYM(OutputAggregator &Out) {
}
/// Open a GSYM file, auto-detecting the version unless forced.
-static Expected<std::unique_ptr<GsymReader>> openGsymFile(StringRef Path) {
+static Expected<std::unique_ptr<GsymReaderBase>> openGsymFile(StringRef Path) {
if (ForceReaderVersion == ReaderVersion::Auto)
- return GsymReader::openFile(Path);
+ return GsymReaderBase::openFile(Path);
if (ForceReaderVersion == ReaderVersion::V2) {
auto R = GsymReaderV2::openFile(Path);
if (!R)
return R.takeError();
return std::make_unique<GsymReaderV2>(std::move(*R));
}
- auto R = GsymReaderV1::openFile(Path);
+ auto R = GsymReader::openFile(Path);
if (!R)
return R.takeError();
- return std::make_unique<GsymReaderV1>(std::move(*R));
+ return std::make_unique<GsymReader>(std::move(*R));
}
-static void doLookup(GsymReader &Gsym, uint64_t Addr, raw_ostream &OS) {
+static void doLookup(GsymReaderBase &Gsym, uint64_t Addr, raw_ostream &OS) {
if (UseMergedFunctions) {
if (auto Results = Gsym.lookupAll(Addr)) {
// If we have filters, count matching results first
@@ -720,7 +720,7 @@ int llvm_gsymutil_main(int argc, char **argv, const llvm::ToolContext &) {
std::string InputLine;
std::string CurrentGSYMPath;
- std::unique_ptr<GsymReader> CurrentGsym;
+ std::unique_ptr<GsymReaderBase> CurrentGsym;
while (std::getline(std::cin, InputLine)) {
// Strip newline characters.
diff --git a/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp b/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp
index bfa030dda62d8..d56007371b2f2 100644
--- a/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp
+++ b/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp
@@ -941,7 +941,7 @@ TEST(GSYMTest, TestHeaderEncodeDecode) {
}
static void TestGsymCreatorEncodeError(llvm::endianness ByteOrder,
- const GsymCreatorV1 &GC,
+ const GsymCreator &GC,
std::string ExpectedErrorMsg) {
SmallString<512> Str;
raw_svector_ostream OutStrm(Str);
@@ -959,7 +959,7 @@ TEST(GSYMTest, TestGsymCreatorEncodeErrors) {
// Verify we get an error when trying to encode an GsymCreator with no
// function infos. We shouldn't be saving a GSYM file in this case since
// there is nothing inside of it.
- GsymCreatorV1 GC;
+ GsymCreator GC;
TestGsymCreatorEncodeError(llvm::endianness::little, GC,
"no functions to encode");
const uint64_t FuncAddr = 0x1000;
@@ -1003,9 +1003,9 @@ TEST(GSYMTest, TestGsymCreatorEncodeErrors) {
"attempted to encode invalid InlineInfo object");
}
-static void Compare(const GsymCreatorV1 &GC, const GsymReaderV1 &GR) {
+static void Compare(const GsymCreator &GC, const GsymReader &GR) {
// Verify that all of the data in a GsymCreator is correctly decoded from
- // a GsymReaderV1. To do this, we iterator over
+ // a GsymReader. To do this, we iterator over
GC.forEachFunctionInfo([&](const FunctionInfo &FI) -> bool {
auto DecodedFI = GR.getFunctionInfo(FI.Range.start());
EXPECT_TRUE(bool(DecodedFI));
@@ -1014,7 +1014,7 @@ static void Compare(const GsymCreatorV1 &GC, const GsymReaderV1 &GR) {
});
}
-static void TestEncodeDecode(const GsymCreatorV1 &GC, llvm::endianness ByteOrder,
+static void TestEncodeDecode(const GsymCreator &GC, llvm::endianness ByteOrder,
uint16_t Version, uint8_t AddrOffSize,
uint64_t BaseAddress, uint32_t NumAddresses,
ArrayRef<uint8_t> UUID) {
@@ -1023,7 +1023,7 @@ static void TestEncodeDecode(const GsymCreatorV1 &GC, llvm::endianness ByteOrder
FileWriter FW(OutStrm, ByteOrder);
llvm::Error Err = GC.encode(FW);
ASSERT_FALSE((bool)Err);
- Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
+ Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
ASSERT_TRUE(bool(GR));
const Header &Hdr = GR->getHeader();
EXPECT_EQ(Hdr.Version, Version);
@@ -1037,7 +1037,7 @@ static void TestEncodeDecode(const GsymCreatorV1 &GC, llvm::endianness ByteOrder
TEST(GSYMTest, TestGsymCreator1ByteAddrOffsets) {
uint8_t UUID[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
- GsymCreatorV1 GC;
+ GsymCreator GC;
GC.setUUID(UUID);
constexpr uint64_t BaseAddr = 0x1000;
constexpr uint8_t AddrOffSize = 1;
@@ -1060,7 +1060,7 @@ TEST(GSYMTest, TestGsymCreator1ByteAddrOffsets) {
TEST(GSYMTest, TestGsymCreator2ByteAddrOffsets) {
uint8_t UUID[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
- GsymCreatorV1 GC;
+ GsymCreator GC;
GC.setUUID(UUID);
constexpr uint64_t BaseAddr = 0x1000;
constexpr uint8_t AddrOffSize = 2;
@@ -1083,7 +1083,7 @@ TEST(GSYMTest, TestGsymCreator2ByteAddrOffsets) {
TEST(GSYMTest, TestGsymCreator4ByteAddrOffsets) {
uint8_t UUID[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
- GsymCreatorV1 GC;
+ GsymCreator GC;
GC.setUUID(UUID);
constexpr uint64_t BaseAddr = 0x1000;
constexpr uint8_t AddrOffSize = 4;
@@ -1106,7 +1106,7 @@ TEST(GSYMTest, TestGsymCreator4ByteAddrOffsets) {
TEST(GSYMTest, TestGsymCreator8ByteAddrOffsets) {
uint8_t UUID[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
- GsymCreatorV1 GC;
+ GsymCreator GC;
GC.setUUID(UUID);
constexpr uint64_t BaseAddr = 0x1000;
constexpr uint8_t AddrOffSize = 8;
@@ -1127,23 +1127,23 @@ TEST(GSYMTest, TestGsymCreator8ByteAddrOffsets) {
ArrayRef<uint8_t>(UUID));
}
-static void VerifyFunctionInfo(const GsymReaderV1 &GR, uint64_t Addr,
+static void VerifyFunctionInfo(const GsymReader &GR, uint64_t Addr,
const FunctionInfo &FI) {
auto ExpFI = GR.getFunctionInfo(Addr);
ASSERT_THAT_EXPECTED(ExpFI, Succeeded());
ASSERT_EQ(FI, ExpFI.get());
}
-static void VerifyFunctionInfoError(const GsymReaderV1 &GR, uint64_t Addr,
+static void VerifyFunctionInfoError(const GsymReader &GR, uint64_t Addr,
std::string ErrMessage) {
auto ExpFI = GR.getFunctionInfo(Addr);
ASSERT_FALSE(bool(ExpFI));
checkError(ErrMessage, ExpFI.takeError());
}
-TEST(GSYMTest, TestGsymReaderV1) {
+TEST(GSYMTest, TestGsymReader) {
uint8_t UUID[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
- GsymCreatorV1 GC;
+ GsymCreator GC;
GC.setUUID(UUID);
constexpr uint64_t BaseAddr = 0x1000;
constexpr uint64_t Func1Addr = BaseAddr;
@@ -1162,8 +1162,8 @@ TEST(GSYMTest, TestGsymReaderV1) {
FileWriter FW(OutStrm, ByteOrder);
llvm::Error Err = GC.encode(FW);
ASSERT_FALSE((bool)Err);
- if (auto ExpectedGR = GsymReaderV1::copyBuffer(OutStrm.str())) {
- const GsymReaderV1 &GR = ExpectedGR.get();
+ if (auto ExpectedGR = GsymReader::copyBuffer(OutStrm.str())) {
+ const GsymReader &GR = ExpectedGR.get();
VerifyFunctionInfoError(GR, Func1Addr-1, "address 0xfff is not in GSYM");
FunctionInfo Func1(Func1Addr, FuncSize, Func1Name);
@@ -1188,7 +1188,7 @@ TEST(GSYMTest, TestGsymLookups) {
// FunctionInfo or InlineInfo, they only extract information needed for the
// lookup to happen which avoids allocations which can slow down
// symbolication.
- GsymCreatorV1 GC;
+ GsymCreator GC;
FunctionInfo FI(0x1000, 0x100, GC.insertString("main"));
const auto ByteOrder = llvm::endianness::native;
FI.OptLineTable = LineTable();
@@ -1228,7 +1228,7 @@ TEST(GSYMTest, TestGsymLookups) {
FileWriter FW(OutStrm, ByteOrder);
llvm::Error Err = GC.encode(FW);
ASSERT_FALSE((bool)Err);
- Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
+ Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
ASSERT_TRUE(bool(GR));
// Verify inline info is correct when doing lookups.
@@ -1338,7 +1338,7 @@ TEST(GSYMTest, TestDWARFFunctionWithAddresses) {
ASSERT_TRUE(DwarfContext.get() != nullptr);
auto &OS = llvm::nulls();
OutputAggregator OSAgg(&OS);
- GsymCreatorV1 GC;
+ GsymCreator GC;
DwarfTransformer DT(*DwarfContext, GC);
const uint32_t ThreadCount = 1;
ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
@@ -1348,7 +1348,7 @@ TEST(GSYMTest, TestDWARFFunctionWithAddresses) {
const auto ByteOrder = llvm::endianness::native;
FileWriter FW(OutStrm, ByteOrder);
ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
- Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
+ Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
ASSERT_THAT_EXPECTED(GR, Succeeded());
// There should only be one function in our GSYM.
EXPECT_EQ(GR->getNumAddresses(), 1u);
@@ -1416,7 +1416,7 @@ TEST(GSYMTest, TestDWARFFunctionWithAddressAndOffset) {
ASSERT_TRUE(DwarfContext.get() != nullptr);
auto &OS = llvm::nulls();
OutputAggregator OSAgg(&OS);
- GsymCreatorV1 GC;
+ GsymCreator GC;
DwarfTransformer DT(*DwarfContext, GC);
const uint32_t ThreadCount = 1;
ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
@@ -1426,7 +1426,7 @@ TEST(GSYMTest, TestDWARFFunctionWithAddressAndOffset) {
const auto ByteOrder = llvm::endianness::native;
FileWriter FW(OutStrm, ByteOrder);
ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
- Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
+ Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
ASSERT_THAT_EXPECTED(GR, Succeeded());
// There should only be one function in our GSYM.
EXPECT_EQ(GR->getNumAddresses(), 1u);
@@ -1524,7 +1524,7 @@ TEST(GSYMTest, TestDWARFStructMethodNoMangled) {
ASSERT_TRUE(DwarfContext.get() != nullptr);
auto &OS = llvm::nulls();
OutputAggregator OSAgg(&OS);
- GsymCreatorV1 GC;
+ GsymCreator GC;
DwarfTransformer DT(*DwarfContext, GC);
const uint32_t ThreadCount = 1;
ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
@@ -1534,7 +1534,7 @@ TEST(GSYMTest, TestDWARFStructMethodNoMangled) {
const auto ByteOrder = llvm::endianness::native;
FileWriter FW(OutStrm, ByteOrder);
ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
- Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
+ Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
ASSERT_THAT_EXPECTED(GR, Succeeded());
// There should only be one function in our GSYM.
EXPECT_EQ(GR->getNumAddresses(), 1u);
@@ -1625,7 +1625,7 @@ TEST(GSYMTest, TestDWARFTextRanges) {
ASSERT_TRUE(DwarfContext.get() != nullptr);
auto &OS = llvm::nulls();
OutputAggregator OSAgg(&OS);
- GsymCreatorV1 GC;
+ GsymCreator GC;
DwarfTransformer DT(*DwarfContext, GC);
// Only allow addresses between [0x1000 - 0x2000) to be linked into the
// GSYM.
@@ -1640,7 +1640,7 @@ TEST(GSYMTest, TestDWARFTextRanges) {
const auto ByteOrder = llvm::endianness::native;
FileWriter FW(OutStrm, ByteOrder);
ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
- Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
+ Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
ASSERT_THAT_EXPECTED(GR, Succeeded());
// There should only be one function in our GSYM.
EXPECT_EQ(GR->getNumAddresses(), 1u);
@@ -1657,7 +1657,7 @@ TEST(GSYMTest, TestEmptySymbolEndAddressOfTextRanges) {
// Test that if we have valid text ranges and we have a symbol with no size
// as the last FunctionInfo entry that the size of the symbol gets set to the
// end address of the text range.
- GsymCreatorV1 GC;
+ GsymCreator GC;
AddressRanges TextRanges;
TextRanges.insert(AddressRange(0x1000, 0x2000));
GC.SetValidTextRanges(TextRanges);
@@ -1669,7 +1669,7 @@ TEST(GSYMTest, TestEmptySymbolEndAddressOfTextRanges) {
const auto ByteOrder = llvm::endianness::native;
FileWriter FW(OutStrm, ByteOrder);
ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
- Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
+ Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
ASSERT_THAT_EXPECTED(GR, Succeeded());
// There should only be one function in our GSYM.
EXPECT_EQ(GR->getNumAddresses(), 1u);
@@ -1829,7 +1829,7 @@ TEST(GSYMTest, TestDWARFInlineInfo) {
ASSERT_TRUE(DwarfContext.get() != nullptr);
auto &OS = llvm::nulls();
OutputAggregator OSAgg(&OS);
- GsymCreatorV1 GC;
+ GsymCreator GC;
DwarfTransformer DT(*DwarfContext, GC);
const uint32_t ThreadCount = 1;
ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
@@ -1839,7 +1839,7 @@ TEST(GSYMTest, TestDWARFInlineInfo) {
const auto ByteOrder = llvm::endianness::native;
FileWriter FW(OutStrm, ByteOrder);
ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
- Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
+ Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
ASSERT_THAT_EXPECTED(GR, Succeeded());
// There should only be one function in our GSYM.
EXPECT_EQ(GR->getNumAddresses(), 1u);
@@ -2090,7 +2090,7 @@ TEST(GSYMTest, TestDWARFNoLines) {
ASSERT_TRUE(DwarfContext.get() != nullptr);
auto &OS = llvm::nulls();
OutputAggregator OSAgg(&OS);
- GsymCreatorV1 GC;
+ GsymCreator GC;
DwarfTransformer DT(*DwarfContext, GC);
const uint32_t ThreadCount = 1;
ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
@@ -2100,7 +2100,7 @@ TEST(GSYMTest, TestDWARFNoLines) {
const auto ByteOrder = llvm::endianness::native;
FileWriter FW(OutStrm, ByteOrder);
ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
- Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
+ Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
ASSERT_THAT_EXPECTED(GR, Succeeded());
EXPECT_EQ(GR->getNumAddresses(), 4u);
@@ -2270,7 +2270,7 @@ TEST(GSYMTest, TestDWARFDeadStripAddr4) {
ASSERT_TRUE(DwarfContext.get() != nullptr);
auto &OS = llvm::nulls();
OutputAggregator OSAgg(&OS);
- GsymCreatorV1 GC;
+ GsymCreator GC;
DwarfTransformer DT(*DwarfContext, GC);
const uint32_t ThreadCount = 1;
ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
@@ -2280,7 +2280,7 @@ TEST(GSYMTest, TestDWARFDeadStripAddr4) {
const auto ByteOrder = llvm::endianness::native;
FileWriter FW(OutStrm, ByteOrder);
ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
- Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
+ Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
ASSERT_THAT_EXPECTED(GR, Succeeded());
// Test that the only function that made it was the "main" function.
@@ -2411,7 +2411,7 @@ TEST(GSYMTest, TestDWARFDeadStripAddr8) {
ASSERT_TRUE(DwarfContext.get() != nullptr);
auto &OS = llvm::nulls();
OutputAggregator OSAgg(&OS);
- GsymCreatorV1 GC;
+ GsymCreator GC;
DwarfTransformer DT(*DwarfContext, GC);
const uint32_t ThreadCount = 1;
ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
@@ -2421,7 +2421,7 @@ TEST(GSYMTest, TestDWARFDeadStripAddr8) {
const auto ByteOrder = llvm::endianness::native;
FileWriter FW(OutStrm, ByteOrder);
ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
- Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
+ Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
ASSERT_THAT_EXPECTED(GR, Succeeded());
// Test that the only function that made it was the "main" function.
@@ -2438,7 +2438,7 @@ TEST(GSYMTest, TestGsymCreatorMultipleSymbolsWithNoSize) {
// instead of being combined into a single entry. This function tests to make
// sure we only get one symbol.
uint8_t UUID[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
- GsymCreatorV1 GC;
+ GsymCreator GC;
GC.setUUID(UUID);
constexpr uint64_t BaseAddr = 0x1000;
constexpr uint8_t AddrOffSize = 1;
@@ -2460,7 +2460,7 @@ TEST(GSYMTest, TestGsymCreatorMultipleSymbolsWithNoSize) {
}
// Helper function to quickly create a FunctionInfo in a GsymCreator for testing.
-static void AddFunctionInfo(GsymCreatorV1 &GC, const char *FuncName,
+static void AddFunctionInfo(GsymCreator &GC, const char *FuncName,
uint64_t FuncAddr, const char *SourcePath,
const char *HeaderPath) {
FunctionInfo FI(FuncAddr, 0x30, GC.insertString(FuncName));
@@ -2500,8 +2500,8 @@ static void AddFunctionInfo(GsymCreatorV1 &GC, const char *FuncName,
}
// Finalize a GsymCreator, encode it and decode it and return the error or
-// GsymReaderV1 that was successfully decoded.
-static Expected<GsymReaderV1> FinalizeEncodeAndDecode(GsymCreatorV1 &GC) {
+// GsymReader that was successfully decoded.
+static Expected<GsymReader> FinalizeEncodeAndDecode(GsymCreator &GC) {
OutputAggregator Null(nullptr);
Error FinalizeErr = GC.finalize(Null);
if (FinalizeErr)
@@ -2513,7 +2513,7 @@ static Expected<GsymReaderV1> FinalizeEncodeAndDecode(GsymCreatorV1 &GC) {
llvm::Error Err = GC.encode(FW);
if (Err)
return std::move(Err);
- return GsymReaderV1::copyBuffer(OutStrm.str());
+ return GsymReader::copyBuffer(OutStrm.str());
}
TEST(GSYMTest, TestGsymSegmenting) {
@@ -2522,21 +2522,21 @@ TEST(GSYMTest, TestGsymSegmenting) {
// encoding multiple segments, then we verify that we get the same information
// when doing lookups on the full GSYM that was decoded from encoding the
// entire GSYM and also by decoding information from the segments themselves.
- GsymCreatorV1 GC;
+ GsymCreator GC;
GC.setBaseAddress(0);
AddFunctionInfo(GC, "main", 0x1000, "/tmp/main.c", "/tmp/main.h");
AddFunctionInfo(GC, "foo", 0x2000, "/tmp/foo.c", "/tmp/foo.h");
AddFunctionInfo(GC, "bar", 0x3000, "/tmp/bar.c", "/tmp/bar.h");
AddFunctionInfo(GC, "baz", 0x4000, "/tmp/baz.c", "/tmp/baz.h");
- Expected<GsymReaderV1> GR = FinalizeEncodeAndDecode(GC);
+ Expected<GsymReader> GR = FinalizeEncodeAndDecode(GC);
ASSERT_THAT_EXPECTED(GR, Succeeded());
//GR->dump(outs());
// Create segmented GSYM files where each file contains 1 function. We will
// then test doing lookups on the "GR", or the full GSYM file and then test
- // doing lookups on the GsymReaderV1 objects for each segment to ensure we get
+ // doing lookups on the GsymReader objects for each segment to ensure we get
// the exact same information. So after all of the code below we will have
- // GsymReaderV1 objects that each contain one function. We name the creators
+ // GsymReader objects that each contain one function. We name the creators
// and readers to match the one and only address they contain.
// GC1000 and GR1000 are for [0x1000-0x1030)
// GC2000 and GR2000 are for [0x2000-0x2030)
@@ -2548,7 +2548,7 @@ TEST(GSYMTest, TestGsymSegmenting) {
size_t FuncIdx = 0;
// Make sure we get an error if the segment size is too small to encode a
// single function info.
- llvm::Expected<std::unique_ptr<GsymCreatorV1>> GCError =
+ llvm::Expected<std::unique_ptr<GsymCreator>> GCError =
GC.createSegment(57, FuncIdx);
ASSERT_FALSE((bool)GCError);
checkError("a segment size of 57 is to small to fit any function infos, "
@@ -2557,25 +2557,25 @@ TEST(GSYMTest, TestGsymSegmenting) {
// encode any values into the segmented GsymCreator.
ASSERT_EQ(FuncIdx, (size_t)0);
- llvm::Expected<std::unique_ptr<GsymCreatorV1>> GC1000 =
+ llvm::Expected<std::unique_ptr<GsymCreator>> GC1000 =
GC.createSegment(128, FuncIdx);
ASSERT_THAT_EXPECTED(GC1000, Succeeded());
ASSERT_EQ(FuncIdx, (size_t)1);
- llvm::Expected<std::unique_ptr<GsymCreatorV1>> GC2000 =
+ llvm::Expected<std::unique_ptr<GsymCreator>> GC2000 =
GC.createSegment(128, FuncIdx);
ASSERT_THAT_EXPECTED(GC2000, Succeeded());
ASSERT_EQ(FuncIdx, (size_t)2);
- llvm::Expected<std::unique_ptr<GsymCreatorV1>> GC3000 =
+ llvm::Expected<std::unique_ptr<GsymCreator>> GC3000 =
GC.createSegment(128, FuncIdx);
ASSERT_THAT_EXPECTED(GC3000, Succeeded());
ASSERT_EQ(FuncIdx, (size_t)3);
- llvm::Expected<std::unique_ptr<GsymCreatorV1>> GC4000 =
+ llvm::Expected<std::unique_ptr<GsymCreator>> GC4000 =
GC.createSegment(128, FuncIdx);
ASSERT_THAT_EXPECTED(GC4000, Succeeded());
ASSERT_EQ(FuncIdx, (size_t)4);
// When there are no function infos left to encode we expect to get no error
// and get a NULL GsymCreator in the return value from createSegment.
- llvm::Expected<std::unique_ptr<GsymCreatorV1>> GCNull =
+ llvm::Expected<std::unique_ptr<GsymCreator>> GCNull =
GC.createSegment(128, FuncIdx);
ASSERT_THAT_EXPECTED(GCNull, Succeeded());
ASSERT_TRUE(GC1000.get() != nullptr);
@@ -2583,21 +2583,21 @@ TEST(GSYMTest, TestGsymSegmenting) {
ASSERT_TRUE(GC3000.get() != nullptr);
ASSERT_TRUE(GC4000.get() != nullptr);
ASSERT_TRUE(GCNull.get() == nullptr);
- // Encode and decode the GsymReaderV1 for each segment and verify they succeed.
- Expected<GsymReaderV1> GR1000 = FinalizeEncodeAndDecode(*GC1000.get());
+ // Encode and decode the GsymReader for each segment and verify they succeed.
+ Expected<GsymReader> GR1000 = FinalizeEncodeAndDecode(*GC1000.get());
ASSERT_THAT_EXPECTED(GR1000, Succeeded());
- Expected<GsymReaderV1> GR2000 = FinalizeEncodeAndDecode(*GC2000.get());
+ Expected<GsymReader> GR2000 = FinalizeEncodeAndDecode(*GC2000.get());
ASSERT_THAT_EXPECTED(GR2000, Succeeded());
- Expected<GsymReaderV1> GR3000 = FinalizeEncodeAndDecode(*GC3000.get());
+ Expected<GsymReader> GR3000 = FinalizeEncodeAndDecode(*GC3000.get());
ASSERT_THAT_EXPECTED(GR3000, Succeeded());
- Expected<GsymReaderV1> GR4000 = FinalizeEncodeAndDecode(*GC4000.get());
+ Expected<GsymReader> GR4000 = FinalizeEncodeAndDecode(*GC4000.get());
ASSERT_THAT_EXPECTED(GR4000, Succeeded());
// Verify that all lookups match the range [0x1000-0x1030) when doing lookups
- // in the GsymReaderV1 that contains all functions and from the segmented
- // GsymReaderV1 in GR1000.
+ // in the GsymReader that contains all functions and from the segmented
+ // GsymReader in GR1000.
for (uint64_t Addr = 0x1000; Addr < 0x1030; ++Addr) {
- // Lookup in the main GsymReaderV1 that contains all function infos
+ // Lookup in the main GsymReader that contains all function infos
auto MainLR = GR->lookup(Addr);
ASSERT_THAT_EXPECTED(MainLR, Succeeded());
auto SegmentLR = GR1000->lookup(Addr);
@@ -2612,10 +2612,10 @@ TEST(GSYMTest, TestGsymSegmenting) {
}
// Verify that all lookups match the range [0x2000-0x2030) when doing lookups
- // in the GsymReaderV1 that contains all functions and from the segmented
- // GsymReaderV1 in GR2000.
+ // in the GsymReader that contains all functions and from the segmented
+ // GsymReader in GR2000.
for (uint64_t Addr = 0x2000; Addr < 0x2030; ++Addr) {
- // Lookup in the main GsymReaderV1 that contains all function infos
+ // Lookup in the main GsymReader that contains all function infos
auto MainLR = GR->lookup(Addr);
ASSERT_THAT_EXPECTED(MainLR, Succeeded());
auto SegmentLR = GR2000->lookup(Addr);
@@ -2631,10 +2631,10 @@ TEST(GSYMTest, TestGsymSegmenting) {
}
// Verify that all lookups match the range [0x3000-0x3030) when doing lookups
- // in the GsymReaderV1 that contains all functions and from the segmented
- // GsymReaderV1 in GR3000.
+ // in the GsymReader that contains all functions and from the segmented
+ // GsymReader in GR3000.
for (uint64_t Addr = 0x3000; Addr < 0x3030; ++Addr) {
- // Lookup in the main GsymReaderV1 that contains all function infos
+ // Lookup in the main GsymReader that contains all function infos
auto MainLR = GR->lookup(Addr);
ASSERT_THAT_EXPECTED(MainLR, Succeeded());
auto SegmentLR = GR3000->lookup(Addr);
@@ -2649,13 +2649,13 @@ TEST(GSYMTest, TestGsymSegmenting) {
}
// Verify that all lookups match the range [0x4000-0x4030) when doing lookups
- // in the GsymReaderV1 that contains all functions and from the segmented
- // GsymReaderV1 in GR4000.
+ // in the GsymReader that contains all functions and from the segmented
+ // GsymReader in GR4000.
for (uint64_t Addr = 0x4000; Addr < 0x4030; ++Addr) {
- // Lookup in the main GsymReaderV1 that contains all function infos
+ // Lookup in the main GsymReader that contains all function infos
auto MainLR = GR->lookup(Addr);
ASSERT_THAT_EXPECTED(MainLR, Succeeded());
- // Lookup in the GsymReaderV1 for that contains 0x4000
+ // Lookup in the GsymReader for that contains 0x4000
auto SegmentLR = GR4000->lookup(Addr);
ASSERT_THAT_EXPECTED(SegmentLR, Succeeded());
// Make sure the lookup results match.
@@ -2674,20 +2674,20 @@ TEST(GSYMTest, TestGsymSegmentingNoBase) {
// encoding multiple segments, then we verify that we get the same information
// when doing lookups on the full GSYM that was decoded from encoding the
// entire GSYM and also by decoding information from the segments themselves.
- GsymCreatorV1 GC;
+ GsymCreator GC;
AddFunctionInfo(GC, "main", 0x1000, "/tmp/main.c", "/tmp/main.h");
AddFunctionInfo(GC, "foo", 0x2000, "/tmp/foo.c", "/tmp/foo.h");
AddFunctionInfo(GC, "bar", 0x3000, "/tmp/bar.c", "/tmp/bar.h");
AddFunctionInfo(GC, "baz", 0x4000, "/tmp/baz.c", "/tmp/baz.h");
- Expected<GsymReaderV1> GR = FinalizeEncodeAndDecode(GC);
+ Expected<GsymReader> GR = FinalizeEncodeAndDecode(GC);
ASSERT_THAT_EXPECTED(GR, Succeeded());
//GR->dump(outs());
// Create segmented GSYM files where each file contains 1 function. We will
// then test doing lookups on the "GR", or the full GSYM file and then test
- // doing lookups on the GsymReaderV1 objects for each segment to ensure we get
+ // doing lookups on the GsymReader objects for each segment to ensure we get
// the exact same information. So after all of the code below we will have
- // GsymReaderV1 objects that each contain one function. We name the creators
+ // GsymReader objects that each contain one function. We name the creators
// and readers to match the one and only address they contain.
// GC1000 and GR1000 are for [0x1000-0x1030)
// GC2000 and GR2000 are for [0x2000-0x2030)
@@ -2699,7 +2699,7 @@ TEST(GSYMTest, TestGsymSegmentingNoBase) {
size_t FuncIdx = 0;
// Make sure we get an error if the segment size is too small to encode a
// single function info.
- llvm::Expected<std::unique_ptr<GsymCreatorV1>> GCError =
+ llvm::Expected<std::unique_ptr<GsymCreator>> GCError =
GC.createSegment(57, FuncIdx);
ASSERT_FALSE((bool)GCError);
checkError("a segment size of 57 is to small to fit any function infos, "
@@ -2708,25 +2708,25 @@ TEST(GSYMTest, TestGsymSegmentingNoBase) {
// encode any values into the segmented GsymCreator.
ASSERT_EQ(FuncIdx, (size_t)0);
- llvm::Expected<std::unique_ptr<GsymCreatorV1>> GC1000 =
+ llvm::Expected<std::unique_ptr<GsymCreator>> GC1000 =
GC.createSegment(128, FuncIdx);
ASSERT_THAT_EXPECTED(GC1000, Succeeded());
ASSERT_EQ(FuncIdx, (size_t)1);
- llvm::Expected<std::unique_ptr<GsymCreatorV1>> GC2000 =
+ llvm::Expected<std::unique_ptr<GsymCreator>> GC2000 =
GC.createSegment(128, FuncIdx);
ASSERT_THAT_EXPECTED(GC2000, Succeeded());
ASSERT_EQ(FuncIdx, (size_t)2);
- llvm::Expected<std::unique_ptr<GsymCreatorV1>> GC3000 =
+ llvm::Expected<std::unique_ptr<GsymCreator>> GC3000 =
GC.createSegment(128, FuncIdx);
ASSERT_THAT_EXPECTED(GC3000, Succeeded());
ASSERT_EQ(FuncIdx, (size_t)3);
- llvm::Expected<std::unique_ptr<GsymCreatorV1>> GC4000 =
+ llvm::Expected<std::unique_ptr<GsymCreator>> GC4000 =
GC.createSegment(128, FuncIdx);
ASSERT_THAT_EXPECTED(GC4000, Succeeded());
ASSERT_EQ(FuncIdx, (size_t)4);
// When there are no function infos left to encode we expect to get no error
// and get a NULL GsymCreator in the return value from createSegment.
- llvm::Expected<std::unique_ptr<GsymCreatorV1>> GCNull =
+ llvm::Expected<std::unique_ptr<GsymCreator>> GCNull =
GC.createSegment(128, FuncIdx);
ASSERT_THAT_EXPECTED(GCNull, Succeeded());
ASSERT_TRUE(GC1000.get() != nullptr);
@@ -2734,21 +2734,21 @@ TEST(GSYMTest, TestGsymSegmentingNoBase) {
ASSERT_TRUE(GC3000.get() != nullptr);
ASSERT_TRUE(GC4000.get() != nullptr);
ASSERT_TRUE(GCNull.get() == nullptr);
- // Encode and decode the GsymReaderV1 for each segment and verify they succeed.
- Expected<GsymReaderV1> GR1000 = FinalizeEncodeAndDecode(*GC1000.get());
+ // Encode and decode the GsymReader for each segment and verify they succeed.
+ Expected<GsymReader> GR1000 = FinalizeEncodeAndDecode(*GC1000.get());
ASSERT_THAT_EXPECTED(GR1000, Succeeded());
- Expected<GsymReaderV1> GR2000 = FinalizeEncodeAndDecode(*GC2000.get());
+ Expected<GsymReader> GR2000 = FinalizeEncodeAndDecode(*GC2000.get());
ASSERT_THAT_EXPECTED(GR2000, Succeeded());
- Expected<GsymReaderV1> GR3000 = FinalizeEncodeAndDecode(*GC3000.get());
+ Expected<GsymReader> GR3000 = FinalizeEncodeAndDecode(*GC3000.get());
ASSERT_THAT_EXPECTED(GR3000, Succeeded());
- Expected<GsymReaderV1> GR4000 = FinalizeEncodeAndDecode(*GC4000.get());
+ Expected<GsymReader> GR4000 = FinalizeEncodeAndDecode(*GC4000.get());
ASSERT_THAT_EXPECTED(GR4000, Succeeded());
// Verify that all lookups match the range [0x1000-0x1030) when doing lookups
- // in the GsymReaderV1 that contains all functions and from the segmented
- // GsymReaderV1 in GR1000.
+ // in the GsymReader that contains all functions and from the segmented
+ // GsymReader in GR1000.
for (uint64_t Addr = 0x1000; Addr < 0x1030; ++Addr) {
- // Lookup in the main GsymReaderV1 that contains all function infos
+ // Lookup in the main GsymReader that contains all function infos
auto MainLR = GR->lookup(Addr);
ASSERT_THAT_EXPECTED(MainLR, Succeeded());
auto SegmentLR = GR1000->lookup(Addr);
@@ -2763,10 +2763,10 @@ TEST(GSYMTest, TestGsymSegmentingNoBase) {
}
// Verify that all lookups match the range [0x2000-0x2030) when doing lookups
- // in the GsymReaderV1 that contains all functions and from the segmented
- // GsymReaderV1 in GR2000.
+ // in the GsymReader that contains all functions and from the segmented
+ // GsymReader in GR2000.
for (uint64_t Addr = 0x2000; Addr < 0x2030; ++Addr) {
- // Lookup in the main GsymReaderV1 that contains all function infos
+ // Lookup in the main GsymReader that contains all function infos
auto MainLR = GR->lookup(Addr);
ASSERT_THAT_EXPECTED(MainLR, Succeeded());
auto SegmentLR = GR2000->lookup(Addr);
@@ -2782,10 +2782,10 @@ TEST(GSYMTest, TestGsymSegmentingNoBase) {
}
// Verify that all lookups match the range [0x3000-0x3030) when doing lookups
- // in the GsymReaderV1 that contains all functions and from the segmented
- // GsymReaderV1 in GR3000.
+ // in the GsymReader that contains all functions and from the segmented
+ // GsymReader in GR3000.
for (uint64_t Addr = 0x3000; Addr < 0x3030; ++Addr) {
- // Lookup in the main GsymReaderV1 that contains all function infos
+ // Lookup in the main GsymReader that contains all function infos
auto MainLR = GR->lookup(Addr);
ASSERT_THAT_EXPECTED(MainLR, Succeeded());
auto SegmentLR = GR3000->lookup(Addr);
@@ -2800,13 +2800,13 @@ TEST(GSYMTest, TestGsymSegmentingNoBase) {
}
// Verify that all lookups match the range [0x4000-0x4030) when doing lookups
- // in the GsymReaderV1 that contains all functions and from the segmented
- // GsymReaderV1 in GR4000.
+ // in the GsymReader that contains all functions and from the segmented
+ // GsymReader in GR4000.
for (uint64_t Addr = 0x4000; Addr < 0x4030; ++Addr) {
- // Lookup in the main GsymReaderV1 that contains all function infos
+ // Lookup in the main GsymReader that contains all function infos
auto MainLR = GR->lookup(Addr);
ASSERT_THAT_EXPECTED(MainLR, Succeeded());
- // Lookup in the GsymReaderV1 for that contains 0x4000
+ // Lookup in the GsymReader for that contains 0x4000
auto SegmentLR = GR4000->lookup(Addr);
ASSERT_THAT_EXPECTED(SegmentLR, Succeeded());
// Make sure the lookup results match.
@@ -3052,7 +3052,7 @@ TEST(GSYMTest, TestDWARFInlineRangeScopes) {
std::string errors;
raw_string_ostream OS(errors);
OutputAggregator OSAgg(&OS);
- GsymCreatorV1 GC;
+ GsymCreator GC;
DwarfTransformer DT(*DwarfContext, GC);
const uint32_t ThreadCount = 1;
ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
@@ -3062,7 +3062,7 @@ TEST(GSYMTest, TestDWARFInlineRangeScopes) {
const auto ByteOrder = llvm::endianness::native;
FileWriter FW(OutStrm, ByteOrder);
ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
- Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
+ Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
ASSERT_THAT_EXPECTED(GR, Succeeded());
// There should only be one function in our GSYM.
EXPECT_EQ(GR->getNumAddresses(), 1u);
@@ -3280,7 +3280,7 @@ TEST(GSYMTest, TestDWARFEmptyInline) {
std::string errors;
raw_string_ostream OS(errors);
OutputAggregator OSAgg(&OS);
- GsymCreatorV1 GC;
+ GsymCreator GC;
DwarfTransformer DT(*DwarfContext, GC);
const uint32_t ThreadCount = 1;
ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
@@ -3290,7 +3290,7 @@ TEST(GSYMTest, TestDWARFEmptyInline) {
const auto ByteOrder = llvm::endianness::native;
FileWriter FW(OutStrm, ByteOrder);
ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
- Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
+ Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
ASSERT_THAT_EXPECTED(GR, Succeeded());
// There should only be one function in our GSYM.
EXPECT_EQ(GR->getNumAddresses(), 1u);
@@ -3517,7 +3517,7 @@ TEST(GSYMTest, TestFinalizeForLineTables) {
std::string errors;
raw_string_ostream OS(errors);
OutputAggregator OSAgg(&OS);
- GsymCreatorV1 GC;
+ GsymCreator GC;
DwarfTransformer DT(*DwarfContext, GC);
const uint32_t ThreadCount = 1;
ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
@@ -3527,7 +3527,7 @@ TEST(GSYMTest, TestFinalizeForLineTables) {
const auto ByteOrder = llvm::endianness::native;
FileWriter FW(OutStrm, ByteOrder);
ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
- Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
+ Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
ASSERT_THAT_EXPECTED(GR, Succeeded());
// There should only be two functions in our GSYM.
EXPECT_EQ(GR->getNumAddresses(), 2u);
@@ -3797,7 +3797,7 @@ TEST(GSYMTest, TestRangeWarnings) {
std::string errors;
raw_string_ostream OS(errors);
OutputAggregator OSAgg(&OS);
- GsymCreatorV1 GC;
+ GsymCreator GC;
DwarfTransformer DT(*DwarfContext, GC);
const uint32_t ThreadCount = 1;
ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
@@ -3807,7 +3807,7 @@ TEST(GSYMTest, TestRangeWarnings) {
const auto ByteOrder = llvm::endianness::native;
FileWriter FW(OutStrm, ByteOrder);
ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
- Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
+ Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
ASSERT_THAT_EXPECTED(GR, Succeeded());
// There should be two functions in our GSYM.
EXPECT_EQ(GR->getNumAddresses(), 2u);
@@ -3999,7 +3999,7 @@ TEST(GSYMTest, TestEmptyRangeWarnings) {
std::string errors;
raw_string_ostream OS(errors);
OutputAggregator OSAgg(&OS);
- GsymCreatorV1 GC;
+ GsymCreator GC;
DwarfTransformer DT(*DwarfContext, GC);
const uint32_t ThreadCount = 1;
ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
@@ -4009,7 +4009,7 @@ TEST(GSYMTest, TestEmptyRangeWarnings) {
const auto ByteOrder = llvm::endianness::native;
FileWriter FW(OutStrm, ByteOrder);
ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
- Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
+ Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
ASSERT_THAT_EXPECTED(GR, Succeeded());
// There should be one function in our GSYM.
EXPECT_EQ(GR->getNumAddresses(), 1u);
@@ -4151,7 +4151,7 @@ TEST(GSYMTest, TestEmptyLinkageName) {
std::string errors;
raw_string_ostream OS(errors);
OutputAggregator OSAgg(&OS);
- GsymCreatorV1 GC;
+ GsymCreator GC;
DwarfTransformer DT(*DwarfContext, GC);
const uint32_t ThreadCount = 1;
ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
@@ -4161,7 +4161,7 @@ TEST(GSYMTest, TestEmptyLinkageName) {
const auto ByteOrder = llvm::endianness::native;
FileWriter FW(OutStrm, ByteOrder);
ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
- Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
+ Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
ASSERT_THAT_EXPECTED(GR, Succeeded());
// There should be one function in our GSYM.
EXPECT_EQ(GR->getNumAddresses(), 1u);
@@ -4312,7 +4312,7 @@ TEST(GSYMTest, TestLineTablesWithEmptyRanges) {
std::string errors;
raw_string_ostream OS(errors);
OutputAggregator OSAgg(&OS);
- GsymCreatorV1 GC;
+ GsymCreator GC;
DwarfTransformer DT(*DwarfContext, GC);
const uint32_t ThreadCount = 1;
ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
@@ -4322,7 +4322,7 @@ TEST(GSYMTest, TestLineTablesWithEmptyRanges) {
const auto ByteOrder = llvm::endianness::native;
FileWriter FW(OutStrm, ByteOrder);
ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
- Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
+ Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
ASSERT_THAT_EXPECTED(GR, Succeeded());
// There should be one function in our GSYM.
EXPECT_EQ(GR->getNumAddresses(), 1u);
@@ -4632,7 +4632,7 @@ TEST(GSYMTest, TestHandlingOfInvalidFileIndexes) {
std::string errors;
raw_string_ostream OS(errors);
OutputAggregator OSAgg(&OS);
- GsymCreatorV1 GC;
+ GsymCreator GC;
DwarfTransformer DT(*DwarfContext, GC);
const uint32_t ThreadCount = 1;
ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
@@ -4642,7 +4642,7 @@ TEST(GSYMTest, TestHandlingOfInvalidFileIndexes) {
const auto ByteOrder = llvm::endianness::native;
FileWriter FW(OutStrm, ByteOrder);
ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
- Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
+ Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
ASSERT_THAT_EXPECTED(GR, Succeeded());
// There should be one function in our GSYM.
EXPECT_EQ(GR->getNumAddresses(), 3u);
@@ -4847,7 +4847,7 @@ TEST(GSYMTest, TestLookupsOfOverlappingAndUnequalRanges) {
std::string errors;
raw_string_ostream OS(errors);
OutputAggregator OSAgg(&OS);
- GsymCreatorV1 GC;
+ GsymCreator GC;
DwarfTransformer DT(*DwarfContext, GC);
const uint32_t ThreadCount = 1;
ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
@@ -4857,7 +4857,7 @@ TEST(GSYMTest, TestLookupsOfOverlappingAndUnequalRanges) {
const auto ByteOrder = llvm::endianness::native;
FileWriter FW(OutStrm, ByteOrder);
ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
- Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
+ Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
ASSERT_THAT_EXPECTED(GR, Succeeded());
// There should be two functions in our GSYM.
EXPECT_EQ(GR->getNumAddresses(), 2u);
@@ -4947,7 +4947,7 @@ TEST(GSYMTest, TestUnableToLocateDWO) {
std::string errors;
raw_string_ostream OS(errors);
OutputAggregator OSAgg(&OS);
- GsymCreatorV1 GC;
+ GsymCreator GC;
// Make a DWARF transformer that is MachO (Apple) to avoid warnings about
// not finding DWO files.
DwarfTransformer DT(*DwarfContext, GC, /*LDCS=*/false, /*MachO*/ true);
@@ -5074,7 +5074,7 @@ TEST(GSYMTest, TestDWARFTransformNoErrorForMissingFileDecl) {
std::string errors;
raw_string_ostream OS(errors);
OutputAggregator OSAgg(&OS);
- GsymCreatorV1 GC;
+ GsymCreator GC;
DwarfTransformer DT(*DwarfContext, GC);
const uint32_t ThreadCount = 1;
ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
>From d117d49f9e5eaf51de77a2ec0a2497379aade831 Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Wed, 25 Mar 2026 22:50:41 -0700
Subject: [PATCH 20/45] Add version round-trip tests
---
llvm/unittests/DebugInfo/GSYM/GSYMV2Test.cpp | 257 ++++++++++++++++++-
1 file changed, 256 insertions(+), 1 deletion(-)
diff --git a/llvm/unittests/DebugInfo/GSYM/GSYMV2Test.cpp b/llvm/unittests/DebugInfo/GSYM/GSYMV2Test.cpp
index d7f8337cd9da4..549c1f0980008 100644
--- a/llvm/unittests/DebugInfo/GSYM/GSYMV2Test.cpp
+++ b/llvm/unittests/DebugInfo/GSYM/GSYMV2Test.cpp
@@ -10,11 +10,15 @@
#include "llvm/DebugInfo/GSYM/FileWriter.h"
#include "llvm/DebugInfo/GSYM/FunctionInfo.h"
#include "llvm/DebugInfo/GSYM/GlobalData.h"
+#include "llvm/DebugInfo/GSYM/GsymCreator.h"
#include "llvm/DebugInfo/GSYM/GsymCreatorV2.h"
+#include "llvm/DebugInfo/GSYM/GsymReader.h"
#include "llvm/DebugInfo/GSYM/GsymReaderV2.h"
#include "llvm/DebugInfo/GSYM/HeaderV2.h"
+#include "llvm/DebugInfo/GSYM/InlineInfo.h"
#include "llvm/DebugInfo/GSYM/OutputAggregator.h"
#include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/Path.h"
#include "llvm/Testing/Support/Error.h"
#include "gtest/gtest.h"
@@ -583,7 +587,7 @@ TEST(GSYMV2Test, TestReaderV2TooSmall) {
}
//===----------------------------------------------------------------------===//
-// Round-trip tests: Creator V2 -> Reader V2
+// Creator/reader round-trip tests: Creator V2 -> Reader V2
//===----------------------------------------------------------------------===//
/// Helper to create, finalize, encode with GsymCreatorV2, then decode with
@@ -853,3 +857,254 @@ TEST(GSYMV2Test, TestRoundTripSwappedAddressTable) {
EXPECT_EQ(GR->getAddress(2), std::optional<uint64_t>(0x8040u));
EXPECT_EQ(GR->getAddress(3), std::nullopt);
}
+
+//===----------------------------------------------------------------------===//
+// Version round-trip tests: V1 -> V2 -> V1 and V2 -> V1 -> V2
+//===----------------------------------------------------------------------===//
+
+/// Recursively re-insert inline info strings and files from a reader into a
+/// creator.
+static void fixupInlineInfoForTransfer(const GsymReaderBase &Reader,
+ GsymCreatorBase &Creator,
+ InlineInfo &II) {
+ II.Name = Creator.insertString(Reader.getString(II.Name));
+ if (II.CallFile != 0) {
+ if (auto FE = Reader.getFile(II.CallFile)) {
+ StringRef Dir = Reader.getString(FE->Dir);
+ StringRef Base = Reader.getString(FE->Base);
+ SmallString<128> Path;
+ if (!Dir.empty()) {
+ Path = Dir;
+ llvm::sys::path::append(Path, Base);
+ } else {
+ Path = Base;
+ }
+ II.CallFile = Creator.insertFile(Path);
+ }
+ }
+ for (auto &Child : II.Children)
+ fixupInlineInfoForTransfer(Reader, Creator, Child);
+}
+
+/// Transfer all function infos from a reader into a creator, re-inserting
+/// all strings and files so that offsets are valid in the new creator.
+static void transferFunctions(const GsymReaderBase &Reader,
+ GsymCreatorBase &Creator) {
+ for (uint32_t I = 0; I < Reader.getNumAddresses(); ++I) {
+ auto FI = Reader.getFunctionInfoAtIndex(I);
+ ASSERT_THAT_EXPECTED(FI, Succeeded());
+
+ // Re-insert function name.
+ FI->Name = Creator.insertString(Reader.getString(FI->Name));
+
+ // Re-insert line table file entries.
+ if (FI->OptLineTable) {
+ for (size_t J = 0; J < FI->OptLineTable->size(); ++J) {
+ LineEntry &LE = FI->OptLineTable->get(J);
+ if (LE.File != 0) {
+ if (auto FE = Reader.getFile(LE.File)) {
+ StringRef Dir = Reader.getString(FE->Dir);
+ StringRef Base = Reader.getString(FE->Base);
+ SmallString<128> Path;
+ if (!Dir.empty()) {
+ Path = Dir;
+ llvm::sys::path::append(Path, Base);
+ } else {
+ Path = Base;
+ }
+ LE.File = Creator.insertFile(Path);
+ }
+ }
+ }
+ }
+
+ // Re-insert inline info strings and files.
+ if (FI->Inline)
+ fixupInlineInfoForTransfer(Reader, Creator, *FI->Inline);
+
+ Creator.addFunctionInfo(std::move(*FI));
+ }
+}
+
+/// Encode a GsymCreatorBase to bytes.
+static SmallString<1024> encodeCreator(const GsymCreatorBase &GC) {
+ SmallString<1024> Str;
+ raw_svector_ostream OS(Str);
+ FileWriter FW(OS, llvm::endianness::native);
+ llvm::Error Err = GC.encode(FW);
+ EXPECT_FALSE(bool(Err));
+ return Str;
+}
+
+/// Collect lookup results for a set of addresses from a reader.
+static std::vector<LookupResult>
+collectLookups(const GsymReaderBase &Reader,
+ ArrayRef<uint64_t> Addrs) {
+ std::vector<LookupResult> Results;
+ for (auto Addr : Addrs) {
+ auto LR = Reader.lookup(Addr);
+ EXPECT_TRUE(bool(LR));
+ if (LR)
+ Results.push_back(std::move(*LR));
+ }
+ return Results;
+}
+
+TEST(GSYMV2Test, TestVersionRoundTripV1ToV2ToV1) {
+ // Create a V1 GSYM with line tables and inline info.
+ GsymCreator GC1;
+ FunctionInfo FI(0x1000, 0x100, GC1.insertString("main"));
+ FI.OptLineTable = LineTable();
+ const uint32_t MainFile = GC1.insertFile("/tmp/main.c");
+ const uint32_t FooFile = GC1.insertFile("/tmp/foo.h");
+ FI.OptLineTable->push(LineEntry(0x1000, MainFile, 5));
+ FI.OptLineTable->push(LineEntry(0x1010, FooFile, 10));
+ FI.OptLineTable->push(LineEntry(0x1020, MainFile, 8));
+ FI.Inline = InlineInfo();
+ FI.Inline->Name = GC1.insertString("inlined_func");
+ FI.Inline->CallFile = MainFile;
+ FI.Inline->CallLine = 6;
+ FI.Inline->Ranges.insert(AddressRange(0x1010, 0x1020));
+ InlineInfo NestedInline;
+ NestedInline.Name = GC1.insertString("deep_inline");
+ NestedInline.CallFile = FooFile;
+ NestedInline.CallLine = 33;
+ NestedInline.Ranges.insert(AddressRange(0x1012, 0x1018));
+ FI.Inline->Children.emplace_back(NestedInline);
+ GC1.addFunctionInfo(std::move(FI));
+
+ FunctionInfo FI2(0x1100, 0x50, GC1.insertString("helper"));
+ FI2.OptLineTable = LineTable();
+ FI2.OptLineTable->push(LineEntry(0x1100, MainFile, 20));
+ FI2.OptLineTable->push(LineEntry(0x1120, MainFile, 25));
+ GC1.addFunctionInfo(std::move(FI2));
+
+ OutputAggregator Null(nullptr);
+ ASSERT_FALSE(bool(GC1.finalize(Null)));
+ SmallString<1024> OrigV1Bytes = encodeCreator(GC1);
+ ASSERT_GT(OrigV1Bytes.size(), 0u);
+
+ // Read original V1.
+ auto OrigReader = GsymReader::copyBuffer(OrigV1Bytes);
+ ASSERT_THAT_EXPECTED(OrigReader, Succeeded());
+
+ // Collect lookup results from original V1.
+ std::vector<uint64_t> TestAddrs = {0x1000, 0x1008, 0x1010, 0x1012,
+ 0x1015, 0x1020, 0x1100, 0x1120};
+ auto OrigResults = collectLookups(*OrigReader, TestAddrs);
+ ASSERT_EQ(OrigResults.size(), TestAddrs.size());
+
+ // Convert V1 → V2.
+ GsymCreatorV2 GC2;
+ transferFunctions(*OrigReader, GC2);
+ ASSERT_FALSE(bool(GC2.finalize(Null)));
+ SmallString<1024> V2Bytes = encodeCreator(GC2);
+ ASSERT_GT(V2Bytes.size(), 0u);
+
+ auto V2Reader = GsymReaderV2::copyBuffer(V2Bytes);
+ ASSERT_THAT_EXPECTED(V2Reader, Succeeded());
+
+ // Verify V2 lookups match original V1.
+ auto V2Results = collectLookups(*V2Reader, TestAddrs);
+ ASSERT_EQ(V2Results.size(), TestAddrs.size());
+ for (size_t I = 0; I < TestAddrs.size(); ++I)
+ EXPECT_EQ(V2Results[I], OrigResults[I])
+ << "Mismatch at address " << TestAddrs[I] << " after V1->V2";
+
+ // Convert V2 → V1.
+ GsymCreator GC3;
+ transferFunctions(*V2Reader, GC3);
+ ASSERT_FALSE(bool(GC3.finalize(Null)));
+ SmallString<1024> FinalV1Bytes = encodeCreator(GC3);
+ ASSERT_GT(FinalV1Bytes.size(), 0u);
+
+ auto FinalReader = GsymReader::copyBuffer(FinalV1Bytes);
+ ASSERT_THAT_EXPECTED(FinalReader, Succeeded());
+
+ // Verify final V1 lookups match original V1.
+ auto FinalResults = collectLookups(*FinalReader, TestAddrs);
+ ASSERT_EQ(FinalResults.size(), TestAddrs.size());
+ for (size_t I = 0; I < TestAddrs.size(); ++I)
+ EXPECT_EQ(FinalResults[I], OrigResults[I])
+ << "Mismatch at address " << TestAddrs[I] << " after V1->V2->V1";
+}
+
+TEST(GSYMV2Test, TestVersionRoundTripV2ToV1ToV2) {
+ // Create a V2 GSYM with line tables and inline info.
+ GsymCreatorV2 GC1;
+ FunctionInfo FI(0x2000, 0x200, GC1.insertString("entry"));
+ FI.OptLineTable = LineTable();
+ const uint32_t SrcFile = GC1.insertFile("/src/app.cc");
+ const uint32_t HdrFile = GC1.insertFile("/src/util.h");
+ FI.OptLineTable->push(LineEntry(0x2000, SrcFile, 10));
+ FI.OptLineTable->push(LineEntry(0x2040, HdrFile, 50));
+ FI.OptLineTable->push(LineEntry(0x2080, HdrFile, 55));
+ FI.OptLineTable->push(LineEntry(0x20C0, SrcFile, 15));
+ FI.Inline = InlineInfo();
+ FI.Inline->Name = GC1.insertString("util_helper");
+ FI.Inline->CallFile = SrcFile;
+ FI.Inline->CallLine = 11;
+ FI.Inline->Ranges.insert(AddressRange(0x2040, 0x20C0));
+ InlineInfo Child;
+ Child.Name = GC1.insertString("util_detail");
+ Child.CallFile = HdrFile;
+ Child.CallLine = 52;
+ Child.Ranges.insert(AddressRange(0x2080, 0x20A0));
+ FI.Inline->Children.emplace_back(Child);
+ GC1.addFunctionInfo(std::move(FI));
+
+ FunctionInfo FI2(0x2200, 0x100, GC1.insertString("cleanup"));
+ FI2.OptLineTable = LineTable();
+ FI2.OptLineTable->push(LineEntry(0x2200, SrcFile, 30));
+ FI2.OptLineTable->push(LineEntry(0x2250, SrcFile, 35));
+ GC1.addFunctionInfo(std::move(FI2));
+
+ OutputAggregator Null(nullptr);
+ ASSERT_FALSE(bool(GC1.finalize(Null)));
+ SmallString<1024> OrigV2Bytes = encodeCreator(GC1);
+ ASSERT_GT(OrigV2Bytes.size(), 0u);
+
+ // Read original V2.
+ auto OrigReader = GsymReaderV2::copyBuffer(OrigV2Bytes);
+ ASSERT_THAT_EXPECTED(OrigReader, Succeeded());
+
+ // Collect lookup results from original V2.
+ std::vector<uint64_t> TestAddrs = {0x2000, 0x2020, 0x2040, 0x2080,
+ 0x2090, 0x20C0, 0x2200, 0x2250};
+ auto OrigResults = collectLookups(*OrigReader, TestAddrs);
+ ASSERT_EQ(OrigResults.size(), TestAddrs.size());
+
+ // Convert V2 → V1.
+ GsymCreator GC2;
+ transferFunctions(*OrigReader, GC2);
+ ASSERT_FALSE(bool(GC2.finalize(Null)));
+ SmallString<1024> V1Bytes = encodeCreator(GC2);
+ ASSERT_GT(V1Bytes.size(), 0u);
+
+ auto V1Reader = GsymReader::copyBuffer(V1Bytes);
+ ASSERT_THAT_EXPECTED(V1Reader, Succeeded());
+
+ // Verify V1 lookups match original V2.
+ auto V1Results = collectLookups(*V1Reader, TestAddrs);
+ ASSERT_EQ(V1Results.size(), TestAddrs.size());
+ for (size_t I = 0; I < TestAddrs.size(); ++I)
+ EXPECT_EQ(V1Results[I], OrigResults[I])
+ << "Mismatch at address " << TestAddrs[I] << " after V2->V1";
+
+ // Convert V1 → V2.
+ GsymCreatorV2 GC3;
+ transferFunctions(*V1Reader, GC3);
+ ASSERT_FALSE(bool(GC3.finalize(Null)));
+ SmallString<1024> FinalV2Bytes = encodeCreator(GC3);
+ ASSERT_GT(FinalV2Bytes.size(), 0u);
+
+ auto FinalReader = GsymReaderV2::copyBuffer(FinalV2Bytes);
+ ASSERT_THAT_EXPECTED(FinalReader, Succeeded());
+
+ // Verify final V2 lookups match original V2.
+ auto FinalResults = collectLookups(*FinalReader, TestAddrs);
+ ASSERT_EQ(FinalResults.size(), TestAddrs.size());
+ for (size_t I = 0; I < TestAddrs.size(); ++I)
+ EXPECT_EQ(FinalResults[I], OrigResults[I])
+ << "Mismatch at address " << TestAddrs[I] << " after V2->V1->V2";
+}
>From 4d860158fb18bd79b9a03d56b192c4a8143c3163 Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Wed, 25 Mar 2026 23:05:15 -0700
Subject: [PATCH 21/45] Add gsym-to-gsym --convert suport
---
llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp | 152 ++++++++++++++++++---
1 file changed, 134 insertions(+), 18 deletions(-)
diff --git a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
index ec28c3822c238..2cbac7b4fb236 100644
--- a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
+++ b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
@@ -38,14 +38,18 @@
#include <system_error>
#include <vector>
+#include "llvm/DebugInfo/GSYM/CallSiteInfo.h"
#include "llvm/DebugInfo/GSYM/DwarfTransformer.h"
#include "llvm/DebugInfo/GSYM/FunctionInfo.h"
#include "llvm/DebugInfo/GSYM/GsymCreator.h"
#include "llvm/DebugInfo/GSYM/GsymCreatorV2.h"
#include "llvm/DebugInfo/GSYM/GsymReader.h"
#include "llvm/DebugInfo/GSYM/GsymReaderV2.h"
+#include "llvm/DebugInfo/GSYM/Header.h"
#include "llvm/DebugInfo/GSYM/InlineInfo.h"
+#include "llvm/DebugInfo/GSYM/LineTable.h"
#include "llvm/DebugInfo/GSYM/LookupResult.h"
+#include "llvm/DebugInfo/GSYM/MergedFunctionsInfo.h"
#include "llvm/DebugInfo/GSYM/ObjectFileTransformer.h"
#include "llvm/DebugInfo/GSYM/OutputAggregator.h"
@@ -533,6 +537,129 @@ static llvm::Error handleBuffer(StringRef Filename, MemoryBufferRef Buffer,
return Error::success();
}
+/// Open a GSYM file, auto-detecting the version unless forced.
+static Expected<std::unique_ptr<GsymReaderBase>> openGsymFile(StringRef Path) {
+ if (ForceReaderVersion == ReaderVersion::Auto)
+ return GsymReaderBase::openFile(Path);
+ if (ForceReaderVersion == ReaderVersion::V2) {
+ auto R = GsymReaderV2::openFile(Path);
+ if (!R)
+ return R.takeError();
+ return std::make_unique<GsymReaderV2>(std::move(*R));
+ }
+ auto R = GsymReader::openFile(Path);
+ if (!R)
+ return R.takeError();
+ return std::make_unique<GsymReader>(std::move(*R));
+}
+
+/// Check if a file starts with the GSYM magic bytes.
+static bool isGSYMFile(StringRef Filename) {
+ auto BuffOrErr = MemoryBuffer::getFileOrSTDIN(Filename, /*IsText=*/false,
+ /*RequiresNullTerminator=*/false);
+ if (!BuffOrErr)
+ return false;
+ StringRef Data = (*BuffOrErr)->getBuffer();
+ if (Data.size() < 4)
+ return false;
+ uint32_t Magic;
+ memcpy(&Magic, Data.data(), sizeof(Magic));
+ return Magic == GSYM_MAGIC || Magic == GSYM_CIGAM;
+}
+
+/// Re-insert a file entry from a reader into a creator, reconstructing the
+/// full path from separate Dir and Base components.
+static uint32_t transferFile(const GsymReaderBase &Reader,
+ GsymCreatorBase &Creator, uint32_t FileIdx) {
+ auto FE = Reader.getFile(FileIdx);
+ if (!FE)
+ return FileIdx;
+ StringRef Dir = Reader.getString(FE->Dir);
+ StringRef Base = Reader.getString(FE->Base);
+ SmallString<128> Path;
+ if (!Dir.empty()) {
+ Path = Dir;
+ llvm::sys::path::append(Path, Base);
+ } else {
+ Path = Base;
+ }
+ return Creator.insertFile(Path);
+}
+
+/// Fix up string and file references in an InlineInfo tree so they refer to
+/// the creator's tables instead of the reader's.
+static void fixupInlineInfo(const GsymReaderBase &Reader,
+ GsymCreatorBase &Creator, InlineInfo &II) {
+ II.Name = Creator.insertString(Reader.getString(II.Name));
+ if (II.CallFile != 0)
+ II.CallFile = transferFile(Reader, Creator, II.CallFile);
+ for (auto &Child : II.Children)
+ fixupInlineInfo(Reader, Creator, Child);
+}
+
+/// Fix up all string and file references in a FunctionInfo so they refer to
+/// the creator's tables instead of the reader's.
+static void fixupFunctionInfo(const GsymReaderBase &Reader,
+ GsymCreatorBase &Creator, FunctionInfo &FI) {
+ FI.Name = Creator.insertString(Reader.getString(FI.Name));
+ if (FI.OptLineTable) {
+ for (size_t J = 0; J < FI.OptLineTable->size(); ++J) {
+ LineEntry &LE = FI.OptLineTable->get(J);
+ if (LE.File != 0)
+ LE.File = transferFile(Reader, Creator, LE.File);
+ }
+ }
+ if (FI.Inline)
+ fixupInlineInfo(Reader, Creator, *FI.Inline);
+ if (FI.CallSites) {
+ for (auto &CS : FI.CallSites->CallSites) {
+ for (auto &Idx : CS.MatchRegex)
+ Idx = Creator.insertString(Reader.getString(Idx));
+ }
+ }
+ if (FI.MergedFunctions) {
+ for (auto &MF : FI.MergedFunctions->MergedFunctions)
+ fixupFunctionInfo(Reader, Creator, MF);
+ }
+}
+
+/// Convert a GSYM file to a (possibly different version) GSYM file.
+static llvm::Error handleGSYMConversion(StringRef Filename,
+ const std::string &OutFile,
+ OutputAggregator &Out) {
+ auto ReaderOrErr = openGsymFile(Filename);
+ if (!ReaderOrErr)
+ return ReaderOrErr.takeError();
+ auto &Reader = **ReaderOrErr;
+
+ std::unique_ptr<GsymCreatorBase> CreatorPtr;
+ if (ForceCreatorVersion == CreatorVersion::V2)
+ CreatorPtr = std::make_unique<GsymCreatorV2>(Quiet);
+ else
+ CreatorPtr = std::make_unique<GsymCreator>(Quiet);
+ GsymCreatorBase &Creator = *CreatorPtr;
+
+ // Transfer all function infos, re-inserting strings and files.
+ for (uint32_t I = 0; I < Reader.getNumAddresses(); ++I) {
+ auto FI = Reader.getFunctionInfoAtIndex(I);
+ if (!FI)
+ return FI.takeError();
+ fixupFunctionInfo(Reader, Creator, *FI);
+ Creator.addFunctionInfo(std::move(*FI));
+ }
+
+ if (auto Err = Creator.finalize(Out))
+ return Err;
+
+ Out << "Output file (" << (ForceCreatorVersion == CreatorVersion::V2 ? "v2" : "v1")
+ << "): " << OutFile << "\n";
+
+ if (auto Err = Creator.save(OutFile, llvm::endianness::native))
+ return Err;
+
+ return Error::success();
+}
+
static llvm::Error handleFileConversionToGSYM(StringRef Filename,
const std::string &OutFile,
OutputAggregator &Out) {
@@ -544,8 +671,6 @@ static llvm::Error handleFileConversionToGSYM(StringRef Filename,
}
static llvm::Error convertFileToGSYM(OutputAggregator &Out) {
- // Expand any .dSYM bundles to the individual object files contained therein.
- std::vector<std::string> Objects;
std::string OutFile = OutputFilename;
if (OutFile.empty()) {
OutFile = ConvertFilename;
@@ -554,6 +679,13 @@ static llvm::Error convertFileToGSYM(OutputAggregator &Out) {
Out << "Input file: " << ConvertFilename << "\n";
+ // If the input is a GSYM file, do GSYM-to-GSYM conversion.
+ if (isGSYMFile(ConvertFilename))
+ return handleGSYMConversion(ConvertFilename, OutFile, Out);
+
+ // Otherwise, treat it as a DWARF object file.
+ // Expand any .dSYM bundles to the individual object files contained therein.
+ std::vector<std::string> Objects;
if (auto DsymObjectsOrErr =
MachOObjectFile::findDsymObjectMembers(ConvertFilename)) {
if (DsymObjectsOrErr->empty())
@@ -570,22 +702,6 @@ static llvm::Error convertFileToGSYM(OutputAggregator &Out) {
return Error::success();
}
-/// Open a GSYM file, auto-detecting the version unless forced.
-static Expected<std::unique_ptr<GsymReaderBase>> openGsymFile(StringRef Path) {
- if (ForceReaderVersion == ReaderVersion::Auto)
- return GsymReaderBase::openFile(Path);
- if (ForceReaderVersion == ReaderVersion::V2) {
- auto R = GsymReaderV2::openFile(Path);
- if (!R)
- return R.takeError();
- return std::make_unique<GsymReaderV2>(std::move(*R));
- }
- auto R = GsymReader::openFile(Path);
- if (!R)
- return R.takeError();
- return std::make_unique<GsymReader>(std::move(*R));
-}
-
static void doLookup(GsymReaderBase &Gsym, uint64_t Addr, raw_ostream &OS) {
if (UseMergedFunctions) {
if (auto Results = Gsym.lookupAll(Addr)) {
>From 35ba56f376cc1dcd9813c5a1be5835840d53d82f Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Thu, 26 Mar 2026 13:32:37 -0700
Subject: [PATCH 22/45] Rename GSYM interfaces and V1 classes for consistency
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
- Rename interfaces: GsymReaderBase → GsymReader, GsymCreatorBase → GsymCreator
- Rename V1 classes: GsymReader → GsymReaderV1, GsymCreator → GsymCreatorV1
- Move V1 classes to new files: GsymReaderV1.h/.cpp, GsymCreatorV1.h/.cpp
- GsymReader.h and GsymCreator.h now contain only the abstract interfaces
- Update all consumers, tests, and forward declarations
User prompt:
"For the gsym v2 (64-bit) project, sorry about the back and forth, but
can you rename the interface/class and file names, so that:
1. The interfaces should be named GsymReader and GsymCreator.
2. The classes should be named with V1/V2 suffixes (the V2 classes
already do, so you only need to change the V1 classes, which
currently don't have the V1 suffix).
3. Put the interfaces and v1/v2 classes into their corresponding file
names (i.e. create GsymReaderV1.h/cpp and GsymCreatorV1.h/cpp)."
---
.../llvm/DebugInfo/GSYM/CallSiteInfo.h | 8 +-
.../llvm/DebugInfo/GSYM/DwarfTransformer.h | 14 +-
.../llvm/DebugInfo/GSYM/FunctionInfo.h | 6 +-
.../include/llvm/DebugInfo/GSYM/GsymContext.h | 6 +-
.../include/llvm/DebugInfo/GSYM/GsymCreator.h | 461 +------------
.../llvm/DebugInfo/GSYM/GsymCreatorV1.h | 135 ++++
.../llvm/DebugInfo/GSYM/GsymCreatorV2.h | 2 +-
llvm/include/llvm/DebugInfo/GSYM/GsymReader.h | 412 +-----------
.../llvm/DebugInfo/GSYM/GsymReaderV1.h | 196 ++++++
.../llvm/DebugInfo/GSYM/GsymReaderV2.h | 2 +-
llvm/include/llvm/DebugInfo/GSYM/InlineInfo.h | 4 +-
.../llvm/DebugInfo/GSYM/MergedFunctionsInfo.h | 2 +-
.../DebugInfo/GSYM/ObjectFileTransformer.h | 6 +-
llvm/lib/DebugInfo/GSYM/CMakeLists.txt | 2 +
llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp | 14 +-
llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp | 2 +-
llvm/lib/DebugInfo/GSYM/GsymContext.cpp | 4 +-
llvm/lib/DebugInfo/GSYM/GsymCreator.cpp | 619 ------------------
llvm/lib/DebugInfo/GSYM/GsymCreatorV1.cpp | 494 ++++++++++++++
llvm/lib/DebugInfo/GSYM/GsymReader.cpp | 561 +---------------
llvm/lib/DebugInfo/GSYM/GsymReaderV1.cpp | 534 +++++++++++++++
llvm/lib/DebugInfo/GSYM/InlineInfo.cpp | 4 +-
.../DebugInfo/GSYM/ObjectFileTransformer.cpp | 2 +-
llvm/lib/DebugInfo/Symbolize/Symbolize.cpp | 2 +-
llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp | 38 +-
llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp | 282 ++++----
llvm/unittests/DebugInfo/GSYM/GSYMV2Test.cpp | 28 +-
27 files changed, 1604 insertions(+), 2236 deletions(-)
create mode 100644 llvm/include/llvm/DebugInfo/GSYM/GsymCreatorV1.h
create mode 100644 llvm/include/llvm/DebugInfo/GSYM/GsymReaderV1.h
create mode 100644 llvm/lib/DebugInfo/GSYM/GsymCreatorV1.cpp
create mode 100644 llvm/lib/DebugInfo/GSYM/GsymReaderV1.cpp
diff --git a/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h b/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h
index fd94061896439..1a8219669e5bf 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/CallSiteInfo.h
@@ -26,7 +26,7 @@ struct FunctionsYAML;
namespace gsym {
class FileWriter;
-class GsymCreatorBase;
+class GsymCreator;
struct FunctionInfo;
struct CallSiteInfo {
enum Flags : uint8_t {
@@ -96,8 +96,8 @@ class CallSiteInfoLoader {
/// Constructor that initializes the CallSiteInfoLoader with necessary data
/// structures.
///
- /// \param GCreator A reference to the GsymCreatorBase.
- CallSiteInfoLoader(GsymCreatorBase &GCreator, std::vector<FunctionInfo> &Funcs)
+ /// \param GCreator A reference to the GsymCreator.
+ CallSiteInfoLoader(GsymCreator &GCreator, std::vector<FunctionInfo> &Funcs)
: GCreator(GCreator), Funcs(Funcs) {}
/// This method reads the specified YAML file, parses its content, and updates
@@ -132,7 +132,7 @@ class CallSiteInfoLoader {
StringMap<FunctionInfo *> &FuncMap);
/// Reference to the parent Gsym Creator object.
- GsymCreatorBase &GCreator;
+ GsymCreator &GCreator;
/// Reference to the vector of FunctionInfo objects to be populated.
std::vector<FunctionInfo> &Funcs;
diff --git a/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h b/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h
index e45947b89297f..2c59a5219292f 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/DwarfTransformer.h
@@ -23,11 +23,11 @@ namespace gsym {
struct CUInfo;
struct FunctionInfo;
-class GsymCreatorBase;
+class GsymCreator;
class OutputAggregator;
/// A class that transforms the DWARF in a DWARFContext into GSYM information
-/// by populating the GsymCreatorBase object that it is constructed with. This
+/// by populating the GsymCreator object that it is constructed with. This
/// class supports converting all DW_TAG_subprogram DIEs into
/// gsym::FunctionInfo objects that includes line table information and inline
/// function information. Creating a separate class to transform this data
@@ -48,12 +48,12 @@ class DwarfTransformer {
/// executable format). Apple has some compile unit attributes that look like
/// split DWARF, but they aren't and they can cause warnins to be emitted
/// about missing DWO files.
- DwarfTransformer(DWARFContext &D, GsymCreatorBase &G, bool LDCS = false,
+ DwarfTransformer(DWARFContext &D, GsymCreator &G, bool LDCS = false,
bool MachO = false)
: DICtx(D), Gsym(G), LoadDwarfCallSites(LDCS), IsMachO(MachO) {}
/// Extract the DWARF from the supplied object file and convert it into the
- /// Gsym format in the GsymCreatorBase object that is passed in. Returns an
+ /// Gsym format in the GsymCreator object that is passed in. Returns an
/// error if something fatal is encountered.
///
/// \param NumThreads The number of threads that the conversion process can
@@ -70,13 +70,13 @@ class DwarfTransformer {
private:
- /// Parse the DWARF in the object file and convert it into the GsymCreatorBase.
+ /// Parse the DWARF in the object file and convert it into the GsymCreator.
Error parse();
/// Handle any DIE (debug info entry) from the DWARF.
///
/// This function will find all DW_TAG_subprogram DIEs that convert them into
- /// GSYM FuntionInfo objects and add them to the GsymCreatorBase supplied during
+ /// GSYM FuntionInfo objects and add them to the GsymCreator supplied during
/// construction. The DIE and all its children will be recursively parsed
/// with calls to this function.
///
@@ -101,7 +101,7 @@ class DwarfTransformer {
void parseCallSiteInfoFromDwarf(CUInfo &CUI, DWARFDie Die, FunctionInfo &FI);
DWARFContext &DICtx;
- GsymCreatorBase &Gsym;
+ GsymCreator &Gsym;
bool LoadDwarfCallSites;
bool IsMachO;
diff --git a/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h b/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h
index c3fee8af9e1bc..74cdd48697024 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h
@@ -25,7 +25,7 @@ class raw_ostream;
namespace gsym {
-class GsymReaderBase;
+class GsymReader;
/// Function information in GSYM files encodes information for one contiguous
/// address range. If a function has discontiguous address ranges, they will
/// need to be encoded using multiple FunctionInfo objects.
@@ -185,7 +185,7 @@ struct FunctionInfo {
/// \param GR The GSYM reader that contains the string and file table that
/// will be used to fill in information in the returned result.
///
- /// \param FuncAddr The function start address decoded from the GsymReaderBase.
+ /// \param FuncAddr The function start address decoded from the GsymReader.
///
/// \param Addr The address to lookup.
///
@@ -197,7 +197,7 @@ struct FunctionInfo {
/// encountered during decoding. An error should only be returned if the
/// address is not contained in the FunctionInfo or if the data is corrupted.
LLVM_ABI static llvm::Expected<LookupResult>
- lookup(DataExtractor &Data, const GsymReaderBase &GR, uint64_t FuncAddr,
+ lookup(DataExtractor &Data, const GsymReader &GR, uint64_t FuncAddr,
uint64_t Addr,
std::optional<DataExtractor> *MergedFuncsData = nullptr);
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymContext.h b/llvm/include/llvm/DebugInfo/GSYM/GsymContext.h
index 030b4148dd444..f9382fa8d9577 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GsymContext.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymContext.h
@@ -17,7 +17,7 @@ namespace llvm {
namespace gsym {
-class GsymReaderBase;
+class GsymReader;
/// GSYM DI Context
/// This data structure is the top level entity that deals with GSYM
@@ -28,7 +28,7 @@ class GsymReaderBase;
/// the GSYM interfaces directly.
class GsymContext : public DIContext {
public:
- GsymContext(std::unique_ptr<GsymReaderBase> Reader);
+ GsymContext(std::unique_ptr<GsymReader> Reader);
~GsymContext() override;
GsymContext(GsymContext &) = delete;
@@ -56,7 +56,7 @@ class GsymContext : public DIContext {
getLocalsForAddress(object::SectionedAddress Address) override;
private:
- const std::unique_ptr<GsymReaderBase> Reader;
+ const std::unique_ptr<GsymReader> Reader;
};
} // end namespace gsym
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h b/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h
index 072ad9cffa426..e3a023a3558b9 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h
@@ -31,115 +31,34 @@ namespace gsym {
class FileWriter;
class OutputAggregator;
-/// GsymCreatorBase is used to emit GSYM data to a stand alone file or section
-/// within a file.
+/// GsymCreator is an abstract interface for creating GSYM data.
///
-/// The GsymCreatorBase is designed to be used in 3 stages:
+/// The GsymCreator is designed to be used in 3 stages:
/// - Create FunctionInfo objects and add them
-/// - Finalize the GsymCreatorBase object
+/// - Finalize the GsymCreator object
/// - Save to file or section
///
/// The first stage involves creating FunctionInfo objects from another source
/// of information like compiler debug info metadata, DWARF or Breakpad files.
/// Any strings in the FunctionInfo or contained information, like InlineInfo
/// or LineTable objects, should get the string table offsets by calling
-/// GsymCreatorBase::insertString(...). Any file indexes that are needed should be
-/// obtained by calling GsymCreatorBase::insertFile(...). All of the function calls
-/// in GsymCreatorBase are thread safe. This allows multiple threads to create and
+/// GsymCreator::insertString(...). Any file indexes that are needed should be
+/// obtained by calling GsymCreator::insertFile(...). All of the function calls
+/// in GsymCreator are thread safe. This allows multiple threads to create and
/// add FunctionInfo objects while parsing debug information.
///
/// Once all of the FunctionInfo objects have been added, the
-/// GsymCreatorBase::finalize(...) must be called prior to saving. This function
+/// GsymCreator::finalize(...) must be called prior to saving. This function
/// will sort the FunctionInfo objects, finalize the string table, and do any
/// other passes on the information needed to prepare the information to be
/// saved.
///
/// Once the object has been finalized, it can be saved to a file or section.
///
-/// ENCODING
-///
-/// GSYM files are designed to be memory mapped into a process as shared, read
-/// only data, and used as is.
-///
-/// The GSYM file format when in a stand alone file consists of:
-/// - Header
-/// - Address Table
-/// - Function Info Offsets
-/// - File Table
-/// - String Table
-/// - Function Info Data
-///
-/// HEADER
-///
-/// The header is fully described in "llvm/DebugInfo/GSYM/Header.h".
-///
-/// ADDRESS TABLE
-///
-/// The address table immediately follows the header in the file and consists
-/// of Header.NumAddresses address offsets. These offsets are sorted and can be
-/// binary searched for efficient lookups. Addresses in the address table are
-/// stored as offsets from a 64 bit base address found in Header.BaseAddress.
-/// This allows the address table to contain 8, 16, or 32 offsets. This allows
-/// the address table to not require full 64 bit addresses for each address.
-/// The resulting GSYM size is smaller and causes fewer pages to be touched
-/// during address lookups when the address table is smaller. The size of the
-/// address offsets in the address table is specified in the header in
-/// Header.AddrOffSize. The first offset in the address table is aligned to
-/// Header.AddrOffSize alignment to ensure efficient access when loaded into
-/// memory.
-///
-/// FUNCTION INFO OFFSETS TABLE
-///
-/// The function info offsets table immediately follows the address table and
-/// consists of Header.NumAddresses 32 bit file offsets: one for each address
-/// in the address table. This data is aligned to a 4 byte boundary. The
-/// offsets in this table are the relative offsets from the start offset of the
-/// GSYM header and point to the function info data for each address in the
-/// address table. Keeping this data separate from the address table helps to
-/// reduce the number of pages that are touched when address lookups occur on a
-/// GSYM file.
-///
-/// FILE TABLE
-///
-/// The file table immediately follows the function info offsets table. The
-/// encoding of the FileTable is:
-///
-/// struct FileTable {
-/// uint32_t Count;
-/// FileEntry Files[];
-/// };
-///
-/// The file table starts with a 32 bit count of the number of files that are
-/// used in all of the function info, followed by that number of FileEntry
-/// structures. The file table is aligned to a 4 byte boundary, Each file in
-/// the file table is represented with a FileEntry structure.
-/// See "llvm/DebugInfo/GSYM/FileEntry.h" for details.
-///
-/// STRING TABLE
-///
-/// The string table follows the file table in stand alone GSYM files and
-/// contains all strings for everything contained in the GSYM file. Any string
-/// data should be added to the string table and any references to strings
-/// inside GSYM information must be stored as 32 bit string table offsets into
-/// this string table. The string table always starts with an empty string at
-/// offset zero and is followed by any strings needed by the GSYM information.
-/// The start of the string table is not aligned to any boundary.
-///
-/// FUNCTION INFO DATA
-///
-/// The function info data is the payload that contains information about the
-/// address that is being looked up. It contains all of the encoded
-/// FunctionInfo objects. Each encoded FunctionInfo's data is pointed to by an
-/// entry in the Function Info Offsets Table. For details on the exact encoding
-/// of FunctionInfo objects, see "llvm/DebugInfo/GSYM/FunctionInfo.h".
-/// Abstract interface for GSYM creators (V1 and V2).
-///
-/// This interface defines the common API used by DwarfTransformer,
-/// ObjectFileTransformer, and other consumers that need to populate
-/// a GSYM file regardless of the output format version.
-class GsymCreatorBase {
+/// Both GsymCreatorV1 and GsymCreatorV2 implement this interface.
+class GsymCreator {
public:
- virtual ~GsymCreatorBase() = default;
+ virtual ~GsymCreator() = default;
virtual uint32_t insertString(StringRef S, bool Copy = true) = 0;
virtual StringRef getString(uint32_t Offset) = 0;
@@ -169,366 +88,6 @@ class GsymCreatorBase {
virtual bool isQuiet() const = 0;
};
-class GsymCreator : public GsymCreatorBase {
- // Private member variables require Mutex protections
- mutable std::mutex Mutex;
- std::vector<FunctionInfo> Funcs;
- StringTableBuilder StrTab;
- StringSet<> StringStorage;
- DenseMap<llvm::gsym::FileEntry, uint32_t> FileEntryToIndex;
- // Needed for mapping string offsets back to the string stored in \a StrTab.
- DenseMap<uint64_t, CachedHashStringRef> StringOffsetMap;
- std::vector<llvm::gsym::FileEntry> Files;
- std::vector<uint8_t> UUID;
- std::optional<AddressRanges> ValidTextRanges;
- std::optional<uint64_t> BaseAddress;
- bool IsSegment = false;
- bool Finalized = false;
- bool Quiet;
-
-
- /// Get the first function start address.
- ///
- /// \returns The start address of the first FunctionInfo or std::nullopt if
- /// there are no function infos.
- std::optional<uint64_t> getFirstFunctionAddress() const;
-
- /// Get the last function address.
- ///
- /// \returns The start address of the last FunctionInfo or std::nullopt if
- /// there are no function infos.
- std::optional<uint64_t> getLastFunctionAddress() const;
-
- /// Get the base address to use for this GSYM file.
- ///
- /// \returns The base address to put into the header and to use when creating
- /// the address offset table or std::nullpt if there are no valid
- /// function infos or if the base address wasn't specified.
- std::optional<uint64_t> getBaseAddress() const;
-
- /// Get the size of an address offset in the address offset table.
- ///
- /// GSYM files store offsets from the base address in the address offset table
- /// and we store the size of the address offsets in the GSYM header. This
- /// function will calculate the size in bytes of these address offsets based
- /// on the current contents of the GSYM file.
- ///
- /// \returns The size in byets of the address offsets.
- uint8_t getAddressOffsetSize() const;
-
- /// Get the maximum address offset for the current address offset size.
- ///
- /// This is used when creating the address offset table to ensure we have
- /// values that are in range so we don't end up truncating address offsets
- /// when creating GSYM files as the code evolves.
- ///
- /// \returns The maximum address offset value that will be encoded into a GSYM
- /// file.
- uint64_t getMaxAddressOffset() const;
-
- /// Calculate the byte size of the GSYM header and tables sizes.
- ///
- /// This function will calculate the exact size in bytes of the encocded GSYM
- /// for the following items:
- /// - The GSYM header
- /// - The Address offset table
- /// - The Address info offset table
- /// - The file table
- /// - The string table
- ///
- /// This is used to help split GSYM files into segments.
- ///
- /// \returns Size in bytes the GSYM header and tables.
- uint64_t calculateHeaderAndTableSize() const;
-
- /// Copy a FunctionInfo from the \a SrcGC GSYM creator into this creator.
- ///
- /// Copy the function info and only the needed files and strings and add a
- /// converted FunctionInfo into this object. This is used to segment GSYM
- /// files into separate files while only transferring the files and strings
- /// that are needed from \a SrcGC.
- ///
- /// \param SrcGC The source gsym creator to copy from.
- /// \param FuncInfoIdx The function info index within \a SrcGC to copy.
- /// \returns The number of bytes it will take to encode the function info in
- /// this GsymCreator. This helps calculate the size of the current GSYM
- /// segment file.
- uint64_t copyFunctionInfo(const GsymCreator &SrcGC, size_t FuncInfoIdx);
-
- /// Copy a string from \a SrcGC into this object.
- ///
- /// Copy a string from \a SrcGC by string table offset into this GSYM creator.
- /// If a string has already been copied, the uniqued string table offset will
- /// be returned, otherwise the string will be copied and a unique offset will
- /// be returned.
- ///
- /// \param SrcGC The source gsym creator to copy from.
- /// \param StrOff The string table offset from \a SrcGC to copy.
- /// \returns The new string table offset of the string within this object.
- uint32_t copyString(const GsymCreator &SrcGC, uint32_t StrOff);
-
- /// Copy a file from \a SrcGC into this object.
- ///
- /// Copy a file from \a SrcGC by file index into this GSYM creator. Files
- /// consist of two string table entries, one for the directory and one for the
- /// filename, this function will copy any needed strings ensure the file is
- /// uniqued within this object. If a file already exists in this GSYM creator
- /// the uniqued index will be returned, else the stirngs will be copied and
- /// the new file index will be returned.
- ///
- /// \param SrcGC The source gsym creator to copy from.
- /// \param FileIdx The 1 based file table index within \a SrcGC to copy. A
- /// file index of zero will always return zero as the zero is a reserved file
- /// index that means no file.
- /// \returns The new file index of the file within this object.
- uint32_t copyFile(const GsymCreator &SrcGC, uint32_t FileIdx);
-
- /// Inserts a FileEntry into the file table.
- ///
- /// This is used to insert a file entry in a thread safe way into this object.
- ///
- /// \param FE A file entry object that contains valid string table offsets
- /// from this object already.
- uint32_t insertFileEntry(FileEntry FE);
-
- /// Fixup any string and file references by updating any file indexes and
- /// strings offsets in the InlineInfo parameter.
- ///
- /// When copying InlineInfo entries, we can simply make a copy of the object
- /// and then fixup the files and strings for efficiency.
- ///
- /// \param SrcGC The source gsym creator to copy from.
- /// \param II The inline info that contains file indexes and string offsets
- /// that come from \a SrcGC. The entries will be updated by coping any files
- /// and strings over into this object.
- void fixupInlineInfo(const GsymCreator &SrcGC, InlineInfo &II);
-
- /// Save this GSYM file into segments that are roughly \a SegmentSize in size.
- ///
- /// When segemented GSYM files are saved to disk, they will use \a Path as a
- /// prefix and then have the first function info address appended to the path
- /// when each segment is saved. Each segmented GSYM file has a only the
- /// strings and files that are needed to save the function infos that are in
- /// each segment. These smaller files are easy to compress and download
- /// separately and allow for efficient lookups with very large GSYM files and
- /// segmenting them allows servers to download only the segments that are
- /// needed.
- ///
- /// \param Path The path prefix to use when saving the GSYM files.
- /// \param ByteOrder The endianness to use when saving the file.
- /// \param SegmentSize The size in bytes to segment the GSYM file into.
- llvm::Error saveSegments(StringRef Path, llvm::endianness ByteOrder,
- uint64_t SegmentSize) const;
-
- /// Let this creator know that this is a segment of another GsymCreator.
- ///
- /// When we have a segment, we know that function infos will be added in
- /// ascending address range order without having to be finalized. We also
- /// don't need to sort and unique entries during the finalize function call.
- void setIsSegment() {
- IsSegment = true;
- }
-
-public:
- LLVM_ABI GsymCreator(bool Quiet = false);
-
- /// Save a GSYM file to a stand alone file.
- ///
- /// \param Path The file path to save the GSYM file to.
- /// \param ByteOrder The endianness to use when saving the file.
- /// \param SegmentSize The size in bytes to segment the GSYM file into. If
- /// this option is set this function will create N segments
- /// that are all around \a SegmentSize bytes in size. This
- /// allows a very large GSYM file to be broken up into
- /// shards. Each GSYM file will have its own file table,
- /// and string table that only have the files and strings
- /// needed for the shared. If this argument has no value,
- /// a single GSYM file that contains all function
- /// information will be created.
- /// \returns An error object that indicates success or failure of the save.
- LLVM_ABI llvm::Error
- save(StringRef Path, llvm::endianness ByteOrder,
- std::optional<uint64_t> SegmentSize = std::nullopt) const override;
-
- /// Encode a GSYM into the file writer stream at the current position.
- ///
- /// \param O The stream to save the binary data to
- /// \returns An error object that indicates success or failure of the save.
- LLVM_ABI llvm::Error encode(FileWriter &O) const override;
-
- /// Insert a string into the GSYM string table.
- ///
- /// All strings used by GSYM files must be uniqued by adding them to this
- /// string pool and using the returned offset for any string values.
- ///
- /// \param S The string to insert into the string table.
- /// \param Copy If true, then make a backing copy of the string. If false,
- /// the string is owned by another object that will stay around
- /// long enough for the GsymCreator to save the GSYM file.
- /// \returns The unique 32 bit offset into the string table.
- LLVM_ABI uint32_t insertString(StringRef S, bool Copy = true) override;
-
- /// Retrieve a string from the GSYM string table given its offset.
- ///
- /// The offset is assumed to be a valid offset into the string table.
- /// otherwise an assert will be triggered.
- ///
- /// \param Offset The offset of the string to retrieve, previously returned by
- /// insertString.
- /// \returns The string at the given offset in the string table.
- LLVM_ABI StringRef getString(uint32_t Offset) override;
-
- /// Insert a file into this GSYM creator.
- ///
- /// Inserts a file by adding a FileEntry into the "Files" member variable if
- /// the file has not already been added. The file path is split into
- /// directory and filename which are both added to the string table. This
- /// allows paths to be stored efficiently by reusing the directories that are
- /// common between multiple files.
- ///
- /// \param Path The path to the file to insert.
- /// \param Style The path style for the "Path" parameter.
- /// \returns The unique file index for the inserted file.
- LLVM_ABI uint32_t
- insertFile(StringRef Path,
- sys::path::Style Style = sys::path::Style::native) override;
-
- /// Add a function info to this GSYM creator.
- ///
- /// All information in the FunctionInfo object must use the
- /// GsymCreator::insertString(...) function when creating string table
- /// offsets for names and other strings.
- ///
- /// \param FI The function info object to emplace into our functions list.
- LLVM_ABI void addFunctionInfo(FunctionInfo &&FI) override;
-
- /// Load call site information from a YAML file.
- ///
- /// This function reads call site information from a specified YAML file and
- /// adds it to the GSYM data.
- ///
- /// \param YAMLFile The path to the YAML file containing call site
- /// information.
- LLVM_ABI llvm::Error loadCallSitesFromYAML(StringRef YAMLFile) override;
-
- /// Organize merged FunctionInfo's
- ///
- /// This method processes the list of function infos (Funcs) to identify and
- /// group functions with overlapping address ranges.
- ///
- /// \param Out Output stream to report information about how merged
- /// FunctionInfo's were handled.
- LLVM_ABI void prepareMergedFunctions(OutputAggregator &Out) override;
-
- /// Finalize the data in the GSYM creator prior to saving the data out.
- ///
- /// Finalize must be called after all FunctionInfo objects have been added
- /// and before GsymCreator::save() is called.
- ///
- /// \param OS Output stream to report duplicate function infos, overlapping
- /// function infos, and function infos that were merged or removed.
- /// \returns An error object that indicates success or failure of the
- /// finalize.
- LLVM_ABI llvm::Error finalize(OutputAggregator &OS) override;
-
- /// Set the UUID value.
- ///
- /// \param UUIDBytes The new UUID bytes.
- void setUUID(llvm::ArrayRef<uint8_t> UUIDBytes) override {
- UUID.assign(UUIDBytes.begin(), UUIDBytes.end());
- }
-
- /// Thread safe iteration over all function infos.
- ///
- /// \param Callback A callback function that will get called with each
- /// FunctionInfo. If the callback returns false, stop iterating.
- LLVM_ABI void
- forEachFunctionInfo(
- std::function<bool(FunctionInfo &)> const &Callback) override;
-
- /// Thread safe const iteration over all function infos.
- ///
- /// \param Callback A callback function that will get called with each
- /// FunctionInfo. If the callback returns false, stop iterating.
- LLVM_ABI void forEachFunctionInfo(
- std::function<bool(const FunctionInfo &)> const &Callback) const override;
-
- /// Get the current number of FunctionInfo objects contained in this
- /// object.
- LLVM_ABI size_t getNumFunctionInfos() const override;
-
- /// Set valid .text address ranges that all functions must be contained in.
- void SetValidTextRanges(AddressRanges &TextRanges) override {
- ValidTextRanges = TextRanges;
- }
-
- /// Get the valid text ranges.
- const std::optional<AddressRanges> GetValidTextRanges() const override {
- return ValidTextRanges;
- }
-
- /// Check if an address is a valid code address.
- ///
- /// Any functions whose addresses do not exist within these function bounds
- /// will not be converted into the final GSYM. This allows the object file
- /// to figure out the valid file address ranges of all the code sections
- /// and ensure we don't add invalid functions to the final output. Many
- /// linkers have issues when dead stripping functions from DWARF debug info
- /// where they set the DW_AT_low_pc to zero, but newer DWARF has the
- /// DW_AT_high_pc as an offset from the DW_AT_low_pc and these size
- /// attributes have no relocations that can be applied. This results in DWARF
- /// where many functions have an DW_AT_low_pc of zero and a valid offset size
- /// for DW_AT_high_pc. If we extract all valid ranges from an object file
- /// that are marked with executable permissions, we can properly ensure that
- /// these functions are removed.
- ///
- /// \param Addr An address to check.
- ///
- /// \returns True if the address is in the valid text ranges or if no valid
- /// text ranges have been set, false otherwise.
- LLVM_ABI bool IsValidTextAddress(uint64_t Addr) const override;
-
- /// Set the base address to use for the GSYM file.
- ///
- /// Setting the base address to use for the GSYM file. Object files typically
- /// get loaded from a base address when the OS loads them into memory. Using
- /// GSYM files for symbolication becomes easier if the base address in the
- /// GSYM header is the same address as it allows addresses to be easily slid
- /// and allows symbolication without needing to find the original base
- /// address in the original object file.
- ///
- /// \param Addr The address to use as the base address of the GSYM file
- /// when it is saved to disk.
- void setBaseAddress(uint64_t Addr) override {
- BaseAddress = Addr;
- }
-
- /// Whether the transformation should be quiet, i.e. not output warnings.
- bool isQuiet() const override { return Quiet; }
-
-
- /// Create a segmented GSYM creator starting with function info index
- /// \a FuncIdx.
- ///
- /// This function will create a GsymCreator object that will encode into
- /// roughly \a SegmentSize bytes and return it. It is used by the private
- /// saveSegments(...) function and also is used by the GSYM unit tests to test
- /// segmenting of GSYM files. The returned GsymCreator can be finalized and
- /// encoded.
- ///
- /// \param [in] SegmentSize The size in bytes to roughly segment the GSYM file
- /// into.
- /// \param [in,out] FuncIdx The index of the first function info to encode
- /// into the returned GsymCreator. This index will be updated so it can be
- /// used in subsequent calls to this function to allow more segments to be
- /// created.
- /// \returns An expected unique pointer to a GsymCreator or an error. The
- /// returned unique pointer can be NULL if there are no more functions to
- /// encode.
- LLVM_ABI llvm::Expected<std::unique_ptr<GsymCreator>>
- createSegment(uint64_t SegmentSize, size_t &FuncIdx) const;
-};
-
} // namespace gsym
} // namespace llvm
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymCreatorV1.h b/llvm/include/llvm/DebugInfo/GSYM/GsymCreatorV1.h
new file mode 100644
index 0000000000000..8bf537d14af90
--- /dev/null
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymCreatorV1.h
@@ -0,0 +1,135 @@
+//===- GsymCreatorV1.h ------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_GSYM_GSYMCREATORV1_H
+#define LLVM_DEBUGINFO_GSYM_GSYMCREATORV1_H
+
+#include "llvm/Support/Compiler.h"
+#include <functional>
+#include <memory>
+#include <mutex>
+#include <thread>
+
+#include "llvm/ADT/AddressRanges.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/DebugInfo/GSYM/FileEntry.h"
+#include "llvm/DebugInfo/GSYM/FunctionInfo.h"
+#include "llvm/DebugInfo/GSYM/GsymCreator.h"
+#include "llvm/MC/StringTableBuilder.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/Path.h"
+
+namespace llvm {
+
+namespace gsym {
+class FileWriter;
+class OutputAggregator;
+
+/// GsymCreatorV1 is used to emit GSYM V1 data to a stand alone file or section
+/// within a file.
+///
+/// See GsymCreator for the 3-stage usage pattern and file format documentation.
+class GsymCreatorV1 : public GsymCreator {
+ // Private member variables require Mutex protections
+ mutable std::mutex Mutex;
+ std::vector<FunctionInfo> Funcs;
+ StringTableBuilder StrTab;
+ StringSet<> StringStorage;
+ DenseMap<llvm::gsym::FileEntry, uint32_t> FileEntryToIndex;
+ // Needed for mapping string offsets back to the string stored in \a StrTab.
+ DenseMap<uint64_t, CachedHashStringRef> StringOffsetMap;
+ std::vector<llvm::gsym::FileEntry> Files;
+ std::vector<uint8_t> UUID;
+ std::optional<AddressRanges> ValidTextRanges;
+ std::optional<uint64_t> BaseAddress;
+ bool IsSegment = false;
+ bool Finalized = false;
+ bool Quiet;
+
+
+ std::optional<uint64_t> getFirstFunctionAddress() const;
+ std::optional<uint64_t> getLastFunctionAddress() const;
+ std::optional<uint64_t> getBaseAddress() const;
+ uint8_t getAddressOffsetSize() const;
+ uint64_t getMaxAddressOffset() const;
+ uint64_t calculateHeaderAndTableSize() const;
+ uint64_t copyFunctionInfo(const GsymCreatorV1 &SrcGC, size_t FuncInfoIdx);
+ uint32_t copyString(const GsymCreatorV1 &SrcGC, uint32_t StrOff);
+ uint32_t copyFile(const GsymCreatorV1 &SrcGC, uint32_t FileIdx);
+ uint32_t insertFileEntry(FileEntry FE);
+ void fixupInlineInfo(const GsymCreatorV1 &SrcGC, InlineInfo &II);
+ llvm::Error saveSegments(StringRef Path, llvm::endianness ByteOrder,
+ uint64_t SegmentSize) const;
+ void setIsSegment() {
+ IsSegment = true;
+ }
+
+public:
+ LLVM_ABI GsymCreatorV1(bool Quiet = false);
+
+ LLVM_ABI llvm::Error
+ save(StringRef Path, llvm::endianness ByteOrder,
+ std::optional<uint64_t> SegmentSize = std::nullopt) const override;
+
+ LLVM_ABI llvm::Error encode(FileWriter &O) const override;
+
+ LLVM_ABI uint32_t insertString(StringRef S, bool Copy = true) override;
+
+ LLVM_ABI StringRef getString(uint32_t Offset) override;
+
+ LLVM_ABI uint32_t
+ insertFile(StringRef Path,
+ sys::path::Style Style = sys::path::Style::native) override;
+
+ LLVM_ABI void addFunctionInfo(FunctionInfo &&FI) override;
+
+ LLVM_ABI llvm::Error loadCallSitesFromYAML(StringRef YAMLFile) override;
+
+ LLVM_ABI void prepareMergedFunctions(OutputAggregator &Out) override;
+
+ LLVM_ABI llvm::Error finalize(OutputAggregator &OS) override;
+
+ void setUUID(llvm::ArrayRef<uint8_t> UUIDBytes) override {
+ UUID.assign(UUIDBytes.begin(), UUIDBytes.end());
+ }
+
+ LLVM_ABI void
+ forEachFunctionInfo(
+ std::function<bool(FunctionInfo &)> const &Callback) override;
+
+ LLVM_ABI void forEachFunctionInfo(
+ std::function<bool(const FunctionInfo &)> const &Callback) const override;
+
+ LLVM_ABI size_t getNumFunctionInfos() const override;
+
+ void SetValidTextRanges(AddressRanges &TextRanges) override {
+ ValidTextRanges = TextRanges;
+ }
+
+ const std::optional<AddressRanges> GetValidTextRanges() const override {
+ return ValidTextRanges;
+ }
+
+ LLVM_ABI bool IsValidTextAddress(uint64_t Addr) const override;
+
+ void setBaseAddress(uint64_t Addr) override {
+ BaseAddress = Addr;
+ }
+
+ bool isQuiet() const override { return Quiet; }
+
+ LLVM_ABI llvm::Expected<std::unique_ptr<GsymCreatorV1>>
+ createSegment(uint64_t SegmentSize, size_t &FuncIdx) const;
+};
+
+} // namespace gsym
+} // namespace llvm
+
+#endif // LLVM_DEBUGINFO_GSYM_GSYMCREATORV1_H
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymCreatorV2.h b/llvm/include/llvm/DebugInfo/GSYM/GsymCreatorV2.h
index 771ccef83b3cf..9e2067e0b1c72 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GsymCreatorV2.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymCreatorV2.h
@@ -98,7 +98,7 @@ class OutputAggregator;
///
/// - UUID: Raw UUID bytes of the original executable. Only present if a UUID
/// was set. No alignment requirement.
-class GsymCreatorV2 : public GsymCreatorBase {
+class GsymCreatorV2 : public GsymCreator {
// Private member variables require Mutex protections
mutable std::mutex Mutex;
std::vector<FunctionInfo> Funcs;
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
index 4ed44d1827825..a279984f71e29 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
@@ -30,28 +30,28 @@ class raw_ostream;
namespace gsym {
-/// GsymReaderBase is an abstract interface for reading GSYM data.
+/// GsymReader is an abstract interface for reading GSYM data.
///
/// This interface provides the methods needed by FunctionInfo::lookup and
/// InlineInfo::lookup to resolve strings and files during symbolication.
-/// Both GsymReader and GsymReaderV2 implement this interface.
-class GsymReaderBase {
+/// Both GsymReaderV1 and GsymReaderV2 implement this interface.
+class GsymReader {
public:
- virtual ~GsymReaderBase() = default;
+ virtual ~GsymReader() = default;
/// Open a GSYM file, auto-detecting the format version.
///
/// \param Path The file path of the GSYM file to read.
- /// \returns An expected unique_ptr to a GsymReaderBase or an error.
- LLVM_ABI static llvm::Expected<std::unique_ptr<GsymReaderBase>>
+ /// \returns An expected unique_ptr to a GsymReader or an error.
+ LLVM_ABI static llvm::Expected<std::unique_ptr<GsymReader>>
openFile(StringRef Path);
- /// Construct a GsymReaderBase from a buffer, auto-detecting the format version.
+ /// Construct a GsymReader from a buffer, auto-detecting the format version.
///
/// \param Bytes A set of bytes that will be copied and owned by the
/// returned object on success.
- /// \returns An expected unique_ptr to a GsymReaderBase or an error.
- LLVM_ABI static llvm::Expected<std::unique_ptr<GsymReaderBase>>
+ /// \returns An expected unique_ptr to a GsymReader or an error.
+ LLVM_ABI static llvm::Expected<std::unique_ptr<GsymReader>>
copyBuffer(StringRef Bytes);
/// Get a string from the string table.
@@ -111,400 +111,6 @@ class GsymReaderBase {
virtual void dump(raw_ostream &OS, std::optional<FileEntry> FE) = 0;
};
-/// GsymReader is used to read GSYM V1 data from a file or buffer.
-///
-/// This class is optimized for very quick lookups when the endianness matches
-/// the host system. The Header, address table, address info offsets, and file
-/// table is designed to be mmap'ed as read only into memory and used without
-/// any parsing needed. If the endianness doesn't match, we swap these objects
-/// and tables into GsymReader::SwappedData and then point our header and
-/// ArrayRefs to this swapped internal data.
-///
-/// GsymReader objects must use one of the static functions to create an
-/// instance: GsymReader::openFile(...) and GsymReader::copyBuffer(...).
-
-class GsymReader : public GsymReaderBase {
- GsymReader(std::unique_ptr<MemoryBuffer> Buffer);
- llvm::Error parse();
-
- std::unique_ptr<MemoryBuffer> MemBuffer;
- StringRef GsymBytes;
- llvm::endianness Endian;
- const Header *Hdr = nullptr;
- ArrayRef<uint8_t> AddrOffsets;
- ArrayRef<uint32_t> AddrInfoOffsets;
- ArrayRef<FileEntry> Files;
- StringTable StrTab;
- /// When the GSYM file's endianness doesn't match the host system then
- /// we must decode all data structures that need to be swapped into
- /// local storage and set point the ArrayRef objects above to these swapped
- /// copies.
- struct SwappedData {
- Header Hdr;
- std::vector<uint8_t> AddrOffsets;
- std::vector<uint32_t> AddrInfoOffsets;
- std::vector<FileEntry> Files;
- };
- std::unique_ptr<SwappedData> Swap;
-
-public:
- LLVM_ABI GsymReader(GsymReader &&RHS);
- LLVM_ABI ~GsymReader() override;
-
- /// Construct a GsymReader from a file on disk.
- ///
- /// \param Path The file path the GSYM file to read.
- /// \returns An expected GsymReader that contains the object or an error
- /// object that indicates reason for failing to read the GSYM.
- LLVM_ABI static llvm::Expected<GsymReader> openFile(StringRef Path);
-
- /// Construct a GsymReader from a buffer.
- ///
- /// \param Bytes A set of bytes that will be copied and owned by the
- /// returned object on success.
- /// \returns An expected GsymReader that contains the object or an error
- /// object that indicates reason for failing to read the GSYM.
- LLVM_ABI static llvm::Expected<GsymReader> copyBuffer(StringRef Bytes);
-
- /// Access the GSYM header.
- /// \returns A native endian version of the GSYM header.
- LLVM_ABI const Header &getHeader() const;
-
- /// Get the full function info for an address.
- ///
- /// This should be called when a client will store a copy of the complete
- /// FunctionInfo for a given address. For one off lookups, use the lookup()
- /// function below.
- ///
- /// Symbolication server processes might want to parse the entire function
- /// info for a given address and cache it if the process stays around to
- /// service many symbolication addresses, like for parsing profiling
- /// information.
- ///
- /// \param Addr A virtual address from the orignal object file to lookup.
- ///
- /// \returns An expected FunctionInfo that contains the function info object
- /// or an error object that indicates reason for failing to lookup the
- /// address.
- LLVM_ABI llvm::Expected<FunctionInfo>
- getFunctionInfo(uint64_t Addr) const override;
-
- /// Get the full function info given an address index.
- ///
- /// \param AddrIdx A address index for an address in the address table.
- ///
- /// \returns An expected FunctionInfo that contains the function info object
- /// or an error object that indicates reason for failing get the function
- /// info object.
- LLVM_ABI llvm::Expected<FunctionInfo>
- getFunctionInfoAtIndex(uint64_t AddrIdx) const override;
-
- /// Lookup an address in the a GSYM.
- ///
- /// Lookup just the information needed for a specific address \a Addr. This
- /// function is faster that calling getFunctionInfo() as it will only return
- /// information that pertains to \a Addr and allows the parsing to skip any
- /// extra information encoded for other addresses. For example the line table
- /// parsing can stop when a matching LineEntry has been fouhnd, and the
- /// InlineInfo can stop parsing early once a match has been found and also
- /// skip information that doesn't match. This avoids memory allocations and
- /// is much faster for lookups.
- ///
- /// \param Addr A virtual address from the orignal object file to lookup.
- ///
- /// \param MergedFuncsData A pointer to an optional DataExtractor that, if
- /// non-null, will be set to the raw data of the MergedFunctionInfo, if
- /// present.
- ///
- /// \returns An expected LookupResult that contains only the information
- /// needed for the current address, or an error object that indicates reason
- /// for failing to lookup the address.
- LLVM_ABI llvm::Expected<LookupResult>
- lookup(uint64_t Addr,
- std::optional<DataExtractor> *MergedFuncsData = nullptr) const override;
-
- /// Lookup all merged functions for a given address.
- ///
- /// This function performs a lookup for the specified address and then
- /// retrieves additional LookupResults from any merged functions associated
- /// with the primary LookupResult.
- ///
- /// \param Addr The address to lookup.
- ///
- /// \returns A vector of LookupResult objects, where the first element is the
- /// primary result, followed by results for any merged functions
- LLVM_ABI llvm::Expected<std::vector<LookupResult>>
- lookupAll(uint64_t Addr) const override;
-
- /// Get a string from the string table.
- ///
- /// \param Offset The string table offset for the string to retrieve.
- /// \returns The string from the strin table.
- StringRef getString(uint32_t Offset) const override { return StrTab[Offset]; }
-
- /// Get the a file entry for the suppplied file index.
- ///
- /// Used to convert any file indexes in the FunctionInfo data back into
- /// files. This function can be used for iteration, but is more commonly used
- /// for random access when doing lookups.
- ///
- /// \param Index An index into the file table.
- /// \returns An optional FileInfo that will be valid if the file index is
- /// valid, or std::nullopt if the file index is out of bounds,
- std::optional<FileEntry> getFile(uint32_t Index) const override {
- if (Index < Files.size())
- return Files[Index];
- return std::nullopt;
- }
-
- /// Dump the entire Gsym data contained in this object.
- ///
- /// \param OS The output stream to dump to.
- LLVM_ABI void dump(raw_ostream &OS) override;
-
- /// Dump a FunctionInfo object.
- ///
- /// This function will convert any string table indexes and file indexes
- /// into human readable format.
- ///
- /// \param OS The output stream to dump to.
- ///
- /// \param FI The object to dump.
- ///
- /// \param Indent The indentation as number of spaces. Used when dumping as an
- /// item within MergedFunctionsInfo.
- LLVM_ABI void dump(raw_ostream &OS, const FunctionInfo &FI,
- uint32_t Indent = 0) override;
-
- /// Dump a MergedFunctionsInfo object.
- ///
- /// This function will dump a MergedFunctionsInfo object - basically by
- /// dumping the contained FunctionInfo objects with indentation.
- ///
- /// \param OS The output stream to dump to.
- ///
- /// \param MFI The object to dump.
- LLVM_ABI void dump(raw_ostream &OS, const MergedFunctionsInfo &MFI) override;
-
- /// Dump a CallSiteInfo object.
- ///
- /// This function will output the details of a CallSiteInfo object in a
- /// human-readable format.
- ///
- /// \param OS The output stream to dump to.
- ///
- /// \param CSI The CallSiteInfo object to dump.
- LLVM_ABI void dump(raw_ostream &OS, const CallSiteInfo &CSI) override;
-
- /// Dump a CallSiteInfoCollection object.
- ///
- /// This function will iterate over a collection of CallSiteInfo objects and
- /// dump each one.
- ///
- /// \param OS The output stream to dump to.
- ///
- /// \param CSIC The CallSiteInfoCollection object to dump.
- ///
- /// \param Indent The indentation as number of spaces. Used when dumping as an
- /// item from within MergedFunctionsInfo.
- LLVM_ABI void dump(raw_ostream &OS, const CallSiteInfoCollection &CSIC,
- uint32_t Indent = 0) override;
-
- /// Dump a LineTable object.
- ///
- /// This function will convert any string table indexes and file indexes
- /// into human readable format.
- ///
- ///
- /// \param OS The output stream to dump to.
- ///
- /// \param LT The object to dump.
- ///
- /// \param Indent The indentation as number of spaces. Used when dumping as an
- /// item from within MergedFunctionsInfo.
- LLVM_ABI void dump(raw_ostream &OS, const LineTable <,
- uint32_t Indent = 0) override;
-
- /// Dump a InlineInfo object.
- ///
- /// This function will convert any string table indexes and file indexes
- /// into human readable format.
- ///
- /// \param OS The output stream to dump to.
- ///
- /// \param II The object to dump.
- ///
- /// \param Indent The indentation as number of spaces. Used for recurive
- /// dumping.
- LLVM_ABI void dump(raw_ostream &OS, const InlineInfo &II,
- uint32_t Indent = 0) override;
-
- /// Dump a FileEntry object.
- ///
- /// This function will convert any string table indexes into human readable
- /// format.
- ///
- /// \param OS The output stream to dump to.
- ///
- /// \param FE The object to dump.
- LLVM_ABI void dump(raw_ostream &OS, std::optional<FileEntry> FE) override;
-
- /// Get the number of addresses in this Gsym file.
- uint32_t getNumAddresses() const override {
- return Hdr->NumAddresses;
- }
-
- /// Gets an address from the address table.
- ///
- /// Addresses are stored as offsets frrom the gsym::Header::BaseAddress.
- ///
- /// \param Index A index into the address table.
- /// \returns A resolved virtual address for adddress in the address table
- /// or std::nullopt if Index is out of bounds.
- LLVM_ABI std::optional<uint64_t> getAddress(size_t Index) const override;
-
-protected:
-
- /// Get an appropriate address info offsets array.
- ///
- /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8
- /// byte offsets from the The gsym::Header::BaseAddress. The table is stored
- /// internally as a array of bytes that are in the correct endianness. When
- /// we access this table we must get an array that matches those sizes. This
- /// templatized helper function is used when accessing address offsets in the
- /// AddrOffsets member variable.
- ///
- /// \returns An ArrayRef of an appropriate address offset size.
- template <class T> ArrayRef<T>
- getAddrOffsets() const {
- return ArrayRef<T>(reinterpret_cast<const T *>(AddrOffsets.data()),
- AddrOffsets.size()/sizeof(T));
- }
-
- /// Get an appropriate address from the address table.
- ///
- /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8
- /// byte address offsets from the The gsym::Header::BaseAddress. The table is
- /// stored internally as a array of bytes that are in the correct endianness.
- /// In order to extract an address from the address table we must access the
- /// address offset using the correct size and then add it to the BaseAddress
- /// in the header.
- ///
- /// \param Index An index into the AddrOffsets array.
- /// \returns An virtual address that matches the original object file for the
- /// address as the specified index, or std::nullopt if Index is out of bounds.
- template <class T>
- std::optional<uint64_t> addressForIndex(size_t Index) const {
- ArrayRef<T> AIO = getAddrOffsets<T>();
- if (Index < AIO.size())
- return AIO[Index] + Hdr->BaseAddress;
- return std::nullopt;
- }
- /// Lookup an address offset in the AddrOffsets table.
- ///
- /// Given an address offset, look it up using a binary search of the
- /// AddrOffsets table.
- ///
- /// \param AddrOffset An address offset, that has already been computed by
- /// subtracting the gsym::Header::BaseAddress.
- /// \returns The matching address offset index. This index will be used to
- /// extract the FunctionInfo data's offset from the AddrInfoOffsets array.
- template <class T>
- std::optional<uint64_t>
- getAddressOffsetIndex(const uint64_t AddrOffset) const {
- ArrayRef<T> AIO = getAddrOffsets<T>();
- const auto Begin = AIO.begin();
- const auto End = AIO.end();
- auto Iter = std::lower_bound(Begin, End, AddrOffset);
- // Watch for addresses that fall between the gsym::Header::BaseAddress and
- // the first address offset.
- if (Iter == Begin && AddrOffset < *Begin)
- return std::nullopt;
- if (Iter == End || AddrOffset < *Iter)
- --Iter;
-
- // GSYM files have sorted function infos with the most information (line
- // table and/or inline info) first in the array of function infos, so
- // always backup as much as possible as long as the address offset is the
- // same as the previous entry.
- while (Iter != Begin) {
- auto Prev = Iter - 1;
- if (*Prev == *Iter)
- Iter = Prev;
- else
- break;
- }
-
- return std::distance(Begin, Iter);
- }
-
- /// Create a GSYM from a memory buffer.
- ///
- /// Called by both openFile() and copyBuffer(), this function does all of the
- /// work of parsing the GSYM file and returning an error.
- ///
- /// \param MemBuffer A memory buffer that will transfer ownership into the
- /// GsymReader.
- /// \returns An expected GsymReader that contains the object or an error
- /// object that indicates reason for failing to read the GSYM.
- LLVM_ABI static llvm::Expected<llvm::gsym::GsymReader>
- create(std::unique_ptr<MemoryBuffer> &MemBuffer);
-
- /// Given an address, find the address index.
- ///
- /// Binary search the address table and find the matching address index.
- ///
- /// \param Addr A virtual address that matches the original object file
- /// to lookup.
- /// \returns An index into the address table. This index can be used to
- /// extract the FunctionInfo data's offset from the AddrInfoOffsets array.
- /// Returns an error if the address isn't in the GSYM with details of why.
- LLVM_ABI Expected<uint64_t> getAddressIndex(const uint64_t Addr) const;
-
- /// Given an address index, get the offset for the FunctionInfo.
- ///
- /// Looking up an address is done by finding the corresponding address
- /// index for the address. This index is then used to get the offset of the
- /// FunctionInfo data that we will decode using this function.
- ///
- /// \param Index An index into the address table.
- /// \returns An optional GSYM data offset for the offset of the FunctionInfo
- /// that needs to be decoded.
- LLVM_ABI std::optional<uint64_t> getAddressInfoOffset(size_t Index) const;
-
- /// Given an address, find the correct function info data and function
- /// address.
- ///
- /// Binary search the address table and find the matching address info
- /// and make sure that the function info contains the address. GSYM allows
- /// functions to overlap, and the most debug info is contained in the first
- /// entries due to the sorting when GSYM files are created. We can have
- /// multiple function info that start at the same address only if their
- /// address range doesn't match. So find the first entry that matches \a Addr
- /// and iterate forward until we find one that contains the address.
- ///
- /// \param[in] Addr A virtual address that matches the original object file
- /// to lookup.
- ///
- /// \param[out] FuncStartAddr A virtual address that is the base address of
- /// the function that is used for decoding the FunctionInfo.
- ///
- /// \returns An valid data extractor on success, or an error if we fail to
- /// find the address in a function info or corrrectly decode the data
- LLVM_ABI llvm::Expected<llvm::DataExtractor>
- getFunctionInfoDataForAddress(uint64_t Addr, uint64_t &FuncStartAddr) const;
-
- /// Get the function data and address given an address index.
- ///
- /// \param AddrIdx A address index from the address table.
- ///
- /// \returns An expected FunctionInfo that contains the function info object
- /// or an error object that indicates reason for failing to lookup the
- /// address.
- LLVM_ABI llvm::Expected<llvm::DataExtractor>
- getFunctionInfoDataAtIndex(uint64_t AddrIdx, uint64_t &FuncStartAddr) const;
-};
-
} // namespace gsym
} // namespace llvm
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymReaderV1.h b/llvm/include/llvm/DebugInfo/GSYM/GsymReaderV1.h
new file mode 100644
index 0000000000000..a3dac28616548
--- /dev/null
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymReaderV1.h
@@ -0,0 +1,196 @@
+//===- GsymReaderV1.h -------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_GSYM_GSYMREADERV1_H
+#define LLVM_DEBUGINFO_GSYM_GSYMREADERV1_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/DebugInfo/GSYM/FileEntry.h"
+#include "llvm/DebugInfo/GSYM/FunctionInfo.h"
+#include "llvm/DebugInfo/GSYM/GsymReader.h"
+#include "llvm/DebugInfo/GSYM/Header.h"
+#include "llvm/DebugInfo/GSYM/LineEntry.h"
+#include "llvm/DebugInfo/GSYM/StringTable.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/ErrorOr.h"
+#include <inttypes.h>
+#include <memory>
+#include <stdint.h>
+#include <vector>
+
+namespace llvm {
+class MemoryBuffer;
+class raw_ostream;
+
+namespace gsym {
+
+/// GsymReaderV1 is used to read GSYM V1 data from a file or buffer.
+///
+/// This class is optimized for very quick lookups when the endianness matches
+/// the host system. The Header, address table, address info offsets, and file
+/// table is designed to be mmap'ed as read only into memory and used without
+/// any parsing needed. If the endianness doesn't match, we swap these objects
+/// and tables into GsymReaderV1::SwappedData and then point our header and
+/// ArrayRefs to this swapped internal data.
+///
+/// GsymReaderV1 objects must use one of the static functions to create an
+/// instance: GsymReaderV1::openFile(...) and GsymReaderV1::copyBuffer(...).
+
+class GsymReaderV1 : public GsymReader {
+ GsymReaderV1(std::unique_ptr<MemoryBuffer> Buffer);
+ llvm::Error parse();
+
+ std::unique_ptr<MemoryBuffer> MemBuffer;
+ StringRef GsymBytes;
+ llvm::endianness Endian;
+ const Header *Hdr = nullptr;
+ ArrayRef<uint8_t> AddrOffsets;
+ ArrayRef<uint32_t> AddrInfoOffsets;
+ ArrayRef<FileEntry> Files;
+ StringTable StrTab;
+ /// When the GSYM file's endianness doesn't match the host system then
+ /// we must decode all data structures that need to be swapped into
+ /// local storage and set point the ArrayRef objects above to these swapped
+ /// copies.
+ struct SwappedData {
+ Header Hdr;
+ std::vector<uint8_t> AddrOffsets;
+ std::vector<uint32_t> AddrInfoOffsets;
+ std::vector<FileEntry> Files;
+ };
+ std::unique_ptr<SwappedData> Swap;
+
+public:
+ LLVM_ABI GsymReaderV1(GsymReaderV1 &&RHS);
+ LLVM_ABI ~GsymReaderV1() override;
+
+ /// Construct a GsymReaderV1 from a file on disk.
+ ///
+ /// \param Path The file path the GSYM file to read.
+ /// \returns An expected GsymReaderV1 that contains the object or an error
+ /// object that indicates reason for failing to read the GSYM.
+ LLVM_ABI static llvm::Expected<GsymReaderV1> openFile(StringRef Path);
+
+ /// Construct a GsymReaderV1 from a buffer.
+ ///
+ /// \param Bytes A set of bytes that will be copied and owned by the
+ /// returned object on success.
+ /// \returns An expected GsymReaderV1 that contains the object or an error
+ /// object that indicates reason for failing to read the GSYM.
+ LLVM_ABI static llvm::Expected<GsymReaderV1> copyBuffer(StringRef Bytes);
+
+ /// Access the GSYM header.
+ /// \returns A native endian version of the GSYM header.
+ LLVM_ABI const Header &getHeader() const;
+
+ LLVM_ABI llvm::Expected<FunctionInfo>
+ getFunctionInfo(uint64_t Addr) const override;
+
+ LLVM_ABI llvm::Expected<FunctionInfo>
+ getFunctionInfoAtIndex(uint64_t AddrIdx) const override;
+
+ LLVM_ABI llvm::Expected<LookupResult>
+ lookup(uint64_t Addr,
+ std::optional<DataExtractor> *MergedFuncsData = nullptr) const override;
+
+ LLVM_ABI llvm::Expected<std::vector<LookupResult>>
+ lookupAll(uint64_t Addr) const override;
+
+ StringRef getString(uint32_t Offset) const override { return StrTab[Offset]; }
+
+ std::optional<FileEntry> getFile(uint32_t Index) const override {
+ if (Index < Files.size())
+ return Files[Index];
+ return std::nullopt;
+ }
+
+ LLVM_ABI void dump(raw_ostream &OS) override;
+
+ LLVM_ABI void dump(raw_ostream &OS, const FunctionInfo &FI,
+ uint32_t Indent = 0) override;
+
+ LLVM_ABI void dump(raw_ostream &OS, const MergedFunctionsInfo &MFI) override;
+
+ LLVM_ABI void dump(raw_ostream &OS, const CallSiteInfo &CSI) override;
+
+ LLVM_ABI void dump(raw_ostream &OS, const CallSiteInfoCollection &CSIC,
+ uint32_t Indent = 0) override;
+
+ LLVM_ABI void dump(raw_ostream &OS, const LineTable <,
+ uint32_t Indent = 0) override;
+
+ LLVM_ABI void dump(raw_ostream &OS, const InlineInfo &II,
+ uint32_t Indent = 0) override;
+
+ LLVM_ABI void dump(raw_ostream &OS, std::optional<FileEntry> FE) override;
+
+ uint32_t getNumAddresses() const override {
+ return Hdr->NumAddresses;
+ }
+
+ LLVM_ABI std::optional<uint64_t> getAddress(size_t Index) const override;
+
+protected:
+
+ template <class T> ArrayRef<T>
+ getAddrOffsets() const {
+ return ArrayRef<T>(reinterpret_cast<const T *>(AddrOffsets.data()),
+ AddrOffsets.size()/sizeof(T));
+ }
+
+ template <class T>
+ std::optional<uint64_t> addressForIndex(size_t Index) const {
+ ArrayRef<T> AIO = getAddrOffsets<T>();
+ if (Index < AIO.size())
+ return AIO[Index] + Hdr->BaseAddress;
+ return std::nullopt;
+ }
+
+ template <class T>
+ std::optional<uint64_t>
+ getAddressOffsetIndex(const uint64_t AddrOffset) const {
+ ArrayRef<T> AIO = getAddrOffsets<T>();
+ const auto Begin = AIO.begin();
+ const auto End = AIO.end();
+ auto Iter = std::lower_bound(Begin, End, AddrOffset);
+ if (Iter == Begin && AddrOffset < *Begin)
+ return std::nullopt;
+ if (Iter == End || AddrOffset < *Iter)
+ --Iter;
+
+ while (Iter != Begin) {
+ auto Prev = Iter - 1;
+ if (*Prev == *Iter)
+ Iter = Prev;
+ else
+ break;
+ }
+
+ return std::distance(Begin, Iter);
+ }
+
+ LLVM_ABI static llvm::Expected<llvm::gsym::GsymReaderV1>
+ create(std::unique_ptr<MemoryBuffer> &MemBuffer);
+
+ LLVM_ABI Expected<uint64_t> getAddressIndex(const uint64_t Addr) const;
+
+ LLVM_ABI std::optional<uint64_t> getAddressInfoOffset(size_t Index) const;
+
+ LLVM_ABI llvm::Expected<llvm::DataExtractor>
+ getFunctionInfoDataForAddress(uint64_t Addr, uint64_t &FuncStartAddr) const;
+
+ LLVM_ABI llvm::Expected<llvm::DataExtractor>
+ getFunctionInfoDataAtIndex(uint64_t AddrIdx, uint64_t &FuncStartAddr) const;
+};
+
+} // namespace gsym
+} // namespace llvm
+
+#endif // LLVM_DEBUGINFO_GSYM_GSYMREADERV1_H
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymReaderV2.h b/llvm/include/llvm/DebugInfo/GSYM/GsymReaderV2.h
index 856f8032270b6..5ce96bfe45f59 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GsymReaderV2.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymReaderV2.h
@@ -43,7 +43,7 @@ namespace gsym {
/// GsymReaderV2 objects must use one of the static functions to create an
/// instance: GsymReaderV2::openFile(...) and GsymReaderV2::copyBuffer(...).
-class GsymReaderV2 : public GsymReaderBase {
+class GsymReaderV2 : public GsymReader {
GsymReaderV2(std::unique_ptr<MemoryBuffer> Buffer);
llvm::Error parse();
diff --git a/llvm/include/llvm/DebugInfo/GSYM/InlineInfo.h b/llvm/include/llvm/DebugInfo/GSYM/InlineInfo.h
index 055c8b5d29c0a..03b0a8da35a67 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/InlineInfo.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/InlineInfo.h
@@ -22,7 +22,7 @@ class raw_ostream;
namespace gsym {
-class GsymReaderBase;
+class GsymReader;
/// Inline information stores the name of the inline function along with
/// an array of address ranges. It also stores the call file and call line
/// that called this inline function. This allows us to unwind inline call
@@ -118,7 +118,7 @@ struct InlineInfo {
/// \returns An error if the inline information is corrupt, or
/// Error::success() for all other cases, even when no information
/// is added to \a SrcLocs.
- LLVM_ABI static llvm::Error lookup(const GsymReaderBase &GR, DataExtractor &Data,
+ LLVM_ABI static llvm::Error lookup(const GsymReader &GR, DataExtractor &Data,
uint64_t BaseAddr, uint64_t Addr,
SourceLocations &SrcLocs);
diff --git a/llvm/include/llvm/DebugInfo/GSYM/MergedFunctionsInfo.h b/llvm/include/llvm/DebugInfo/GSYM/MergedFunctionsInfo.h
index ecf908ec69183..048db33229a8f 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/MergedFunctionsInfo.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/MergedFunctionsInfo.h
@@ -20,7 +20,7 @@ class raw_ostream;
namespace gsym {
-class GsymReaderBase;
+class GsymReader;
struct FunctionInfo;
struct MergedFunctionsInfo {
std::vector<FunctionInfo> MergedFunctions;
diff --git a/llvm/include/llvm/DebugInfo/GSYM/ObjectFileTransformer.h b/llvm/include/llvm/DebugInfo/GSYM/ObjectFileTransformer.h
index f08a29cb8b72f..31fdf8da9bea0 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/ObjectFileTransformer.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/ObjectFileTransformer.h
@@ -20,12 +20,12 @@ class ObjectFile;
namespace gsym {
-class GsymCreatorBase;
+class GsymCreator;
class OutputAggregator;
class ObjectFileTransformer {
public:
- /// Extract any object file data that is needed by the GsymCreatorBase.
+ /// Extract any object file data that is needed by the GsymCreator.
///
/// The extracted information includes the UUID of the binary and converting
/// all function symbols from any symbol tables into FunctionInfo objects.
@@ -42,7 +42,7 @@ class ObjectFileTransformer {
/// the DWARF, or Error::success() if all goes well.
LLVM_ABI static llvm::Error convert(const object::ObjectFile &Obj,
OutputAggregator &Output,
- GsymCreatorBase &Gsym);
+ GsymCreator &Gsym);
};
} // namespace gsym
diff --git a/llvm/lib/DebugInfo/GSYM/CMakeLists.txt b/llvm/lib/DebugInfo/GSYM/CMakeLists.txt
index fd3d6d581123c..fcf0f8641e0aa 100644
--- a/llvm/lib/DebugInfo/GSYM/CMakeLists.txt
+++ b/llvm/lib/DebugInfo/GSYM/CMakeLists.txt
@@ -5,9 +5,11 @@ add_llvm_component_library(LLVMDebugInfoGSYM
FileWriter.cpp
FunctionInfo.cpp
GsymCreator.cpp
+ GsymCreatorV1.cpp
GsymCreatorV2.cpp
GsymContext.cpp
GsymReader.cpp
+ GsymReaderV1.cpp
GsymReaderV2.cpp
InlineInfo.cpp
LineTable.cpp
diff --git a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
index 74a06ae221b8c..2145f23570b35 100644
--- a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
+++ b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
@@ -64,7 +64,7 @@ struct llvm::gsym::CUInfo {
/// the first client that asks for a compile unit file index will end up
/// doing the conversion, and subsequent clients will get the cached GSYM
/// index.
- std::optional<uint32_t> DWARFToGSYMFileIndex(GsymCreatorBase &Gsym,
+ std::optional<uint32_t> DWARFToGSYMFileIndex(GsymCreator &Gsym,
uint32_t DwarfFileIdx) {
if (!LineTable || DwarfFileIdx >= FileCache.size())
return std::nullopt;
@@ -121,15 +121,15 @@ static DWARFDie GetParentDeclContextDIE(DWARFDie &Die) {
return DWARFDie();
}
-/// Get the GsymCreatorBase string table offset for the qualified name for the
+/// Get the GsymCreator string table offset for the qualified name for the
/// DIE passed in. This function will avoid making copies of any strings in
-/// the GsymCreatorBase when possible. We don't need to copy a string when the
+/// the GsymCreator when possible. We don't need to copy a string when the
/// string comes from our .debug_str section or is an inlined string in the
/// .debug_info. If we create a qualified name string in this function by
/// combining multiple strings in the DWARF string table or info, we will make
/// a copy of the string when we add it to the string table.
static std::optional<uint32_t>
-getQualifiedNameIndex(DWARFDie &Die, uint64_t Language, GsymCreatorBase &Gsym) {
+getQualifiedNameIndex(DWARFDie &Die, uint64_t Language, GsymCreator &Gsym) {
// If the dwarf has mangled name, use mangled name
if (auto LinkageName = Die.getLinkageName()) {
// We have seen cases were linkage name is actually empty.
@@ -214,7 +214,7 @@ ConvertDWARFRanges(const DWARFAddressRangesVector &DwarfRanges) {
return Ranges;
}
-static void parseInlineInfo(GsymCreatorBase &Gsym, OutputAggregator &Out,
+static void parseInlineInfo(GsymCreator &Gsym, OutputAggregator &Out,
CUInfo &CUI, DWARFDie Die, uint32_t Depth,
FunctionInfo &FI, InlineInfo &Parent,
const AddressRanges &AllParentRanges,
@@ -308,7 +308,7 @@ static void parseInlineInfo(GsymCreatorBase &Gsym, OutputAggregator &Out,
}
static void convertFunctionLineTable(OutputAggregator &Out, CUInfo &CUI,
- DWARFDie Die, GsymCreatorBase &Gsym,
+ DWARFDie Die, GsymCreator &Gsym,
FunctionInfo &FI) {
std::vector<uint32_t> RowVector;
const uint64_t StartAddress = FI.startAddress();
@@ -735,7 +735,7 @@ llvm::Error DwarfTransformer::verify(StringRef GsymPath,
OutputAggregator &Out) {
Out << "Verifying GSYM file \"" << GsymPath << "\":\n";
- auto GsymOrErr = GsymReaderBase::openFile(GsymPath);
+ auto GsymOrErr = GsymReader::openFile(GsymPath);
if (!GsymOrErr)
return GsymOrErr.takeError();
auto &Gsym = *GsymOrErr;
diff --git a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
index 9c8d7ddf5e511..b6dcaeb323f59 100644
--- a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
+++ b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
@@ -236,7 +236,7 @@ llvm::Expected<uint64_t> FunctionInfo::encode(FileWriter &Out,
}
llvm::Expected<LookupResult>
-FunctionInfo::lookup(DataExtractor &Data, const GsymReaderBase &GR,
+FunctionInfo::lookup(DataExtractor &Data, const GsymReader &GR,
uint64_t FuncAddr, uint64_t Addr,
std::optional<DataExtractor> *MergedFuncsData) {
LookupResult LR;
diff --git a/llvm/lib/DebugInfo/GSYM/GsymContext.cpp b/llvm/lib/DebugInfo/GSYM/GsymContext.cpp
index 1d3fe805719cd..62b4caa327d87 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymContext.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymContext.cpp
@@ -15,7 +15,7 @@ using namespace llvm;
using namespace llvm::gsym;
GsymContext::~GsymContext() = default;
-GsymContext::GsymContext(std::unique_ptr<GsymReaderBase> Reader)
+GsymContext::GsymContext(std::unique_ptr<GsymReader> Reader)
: DIContext(CK_GSYM), Reader(std::move(Reader)) {}
void GsymContext::dump(raw_ostream &OS, DIDumpOptions DumpOpts) {}
@@ -119,7 +119,7 @@ GsymContext::getLineInfoForAddressRange(object::SectionedAddress Address,
for (const auto &LineEntry : LT) {
if (StartAddr <= LineEntry.Addr && LineEntry.Addr < EndAddr) {
// Use LineEntry.Addr, LineEntry.File (which is a file index into the
- // files tables from the GsymReaderBase), and LineEntry.Line (source line
+ // files tables from the GsymReader), and LineEntry.Line (source line
// number) to add stuff to the DILineInfoTable
}
}
diff --git a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp
index f26e69fda2540..d6a249ba58b3b 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp
@@ -6,622 +6,3 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/GSYM/GsymCreator.h"
-#include "llvm/DebugInfo/GSYM/FileWriter.h"
-#include "llvm/DebugInfo/GSYM/Header.h"
-#include "llvm/DebugInfo/GSYM/LineTable.h"
-#include "llvm/DebugInfo/GSYM/OutputAggregator.h"
-#include "llvm/MC/StringTableBuilder.h"
-#include "llvm/Support/raw_ostream.h"
-
-#include <algorithm>
-#include <cassert>
-#include <functional>
-#include <vector>
-
-using namespace llvm;
-using namespace gsym;
-
-GsymCreator::GsymCreator(bool Quiet)
- : StrTab(StringTableBuilder::ELF), Quiet(Quiet) {
- insertFile(StringRef());
-}
-
-uint32_t GsymCreator::insertFile(StringRef Path, llvm::sys::path::Style Style) {
- llvm::StringRef directory = llvm::sys::path::parent_path(Path, Style);
- llvm::StringRef filename = llvm::sys::path::filename(Path, Style);
- // We must insert the strings first, then call the FileEntry constructor.
- // If we inline the insertString() function call into the constructor, the
- // call order is undefined due to parameter lists not having any ordering
- // requirements.
- const uint32_t Dir = insertString(directory);
- const uint32_t Base = insertString(filename);
- return insertFileEntry(FileEntry(Dir, Base));
-}
-
-uint32_t GsymCreator::insertFileEntry(FileEntry FE) {
- std::lock_guard<std::mutex> Guard(Mutex);
- const auto NextIndex = Files.size();
- // Find FE in hash map and insert if not present.
- auto R = FileEntryToIndex.insert(std::make_pair(FE, NextIndex));
- if (R.second)
- Files.emplace_back(FE);
- return R.first->second;
-}
-
-uint32_t GsymCreator::copyFile(const GsymCreator &SrcGC, uint32_t FileIdx) {
- // File index zero is reserved for a FileEntry with no directory and no
- // filename. Any other file and we need to copy the strings for the directory
- // and filename.
- if (FileIdx == 0)
- return 0;
- const FileEntry SrcFE = SrcGC.Files[FileIdx];
- // Copy the strings for the file and then add the newly converted file entry.
- uint32_t Dir =
- SrcFE.Dir == 0
- ? 0
- : StrTab.add(SrcGC.StringOffsetMap.find(SrcFE.Dir)->second);
- uint32_t Base = StrTab.add(SrcGC.StringOffsetMap.find(SrcFE.Base)->second);
- FileEntry DstFE(Dir, Base);
- return insertFileEntry(DstFE);
-}
-
-llvm::Error GsymCreator::save(StringRef Path, llvm::endianness ByteOrder,
- std::optional<uint64_t> SegmentSize) const {
- if (SegmentSize)
- return saveSegments(Path, ByteOrder, *SegmentSize);
- std::error_code EC;
- raw_fd_ostream OutStrm(Path, EC);
- if (EC)
- return llvm::errorCodeToError(EC);
- FileWriter O(OutStrm, ByteOrder);
- return encode(O);
-}
-
-llvm::Error GsymCreator::encode(FileWriter &O) const {
- std::lock_guard<std::mutex> Guard(Mutex);
- if (Funcs.empty())
- return createStringError(std::errc::invalid_argument,
- "no functions to encode");
- if (!Finalized)
- return createStringError(std::errc::invalid_argument,
- "GsymCreator wasn't finalized prior to encoding");
-
- if (Funcs.size() > UINT32_MAX)
- return createStringError(std::errc::invalid_argument,
- "too many FunctionInfos");
-
- std::optional<uint64_t> BaseAddress = getBaseAddress();
- // Base address should be valid if we have any functions.
- if (!BaseAddress)
- return createStringError(std::errc::invalid_argument,
- "invalid base address");
- Header Hdr;
- Hdr.Magic = GSYM_MAGIC;
- Hdr.Version = GSYM_VERSION;
- Hdr.AddrOffSize = getAddressOffsetSize();
- Hdr.UUIDSize = static_cast<uint8_t>(UUID.size());
- Hdr.BaseAddress = *BaseAddress;
- Hdr.NumAddresses = static_cast<uint32_t>(Funcs.size());
- Hdr.StrtabOffset = 0; // We will fix this up later.
- Hdr.StrtabSize = 0; // We will fix this up later.
- memset(Hdr.UUID, 0, sizeof(Hdr.UUID));
- if (UUID.size() > sizeof(Hdr.UUID))
- return createStringError(std::errc::invalid_argument,
- "invalid UUID size %u", (uint32_t)UUID.size());
- // Copy the UUID value if we have one.
- if (UUID.size() > 0)
- memcpy(Hdr.UUID, UUID.data(), UUID.size());
- // Write out the header.
- llvm::Error Err = Hdr.encode(O);
- if (Err)
- return Err;
-
- const uint64_t MaxAddressOffset = getMaxAddressOffset();
- // Write out the address offsets.
- O.alignTo(Hdr.AddrOffSize);
- for (const auto &FuncInfo : Funcs) {
- uint64_t AddrOffset = FuncInfo.startAddress() - Hdr.BaseAddress;
- // Make sure we calculated the address offsets byte size correctly by
- // verifying the current address offset is within ranges. We have seen bugs
- // introduced when the code changes that can cause problems here so it is
- // good to catch this during testing.
- assert(AddrOffset <= MaxAddressOffset);
- (void)MaxAddressOffset;
- switch (Hdr.AddrOffSize) {
- case 1:
- O.writeU8(static_cast<uint8_t>(AddrOffset));
- break;
- case 2:
- O.writeU16(static_cast<uint16_t>(AddrOffset));
- break;
- case 4:
- O.writeU32(static_cast<uint32_t>(AddrOffset));
- break;
- case 8:
- O.writeU64(AddrOffset);
- break;
- }
- }
-
- // Write out all zeros for the AddrInfoOffsets.
- O.alignTo(4);
- const off_t AddrInfoOffsetsOffset = O.tell();
- for (size_t i = 0, n = Funcs.size(); i < n; ++i)
- O.writeU32(0);
-
- // Write out the file table
- O.alignTo(4);
- assert(!Files.empty());
- assert(Files[0].Dir == 0);
- assert(Files[0].Base == 0);
- size_t NumFiles = Files.size();
- if (NumFiles > UINT32_MAX)
- return createStringError(std::errc::invalid_argument, "too many files");
- O.writeU32(static_cast<uint32_t>(NumFiles));
- for (auto File : Files) {
- O.writeU32(File.Dir);
- O.writeU32(File.Base);
- }
-
- // Write out the string table.
- const off_t StrtabOffset = O.tell();
- StrTab.write(O.get_stream());
- const off_t StrtabSize = O.tell() - StrtabOffset;
- std::vector<uint32_t> AddrInfoOffsets;
-
- // Verify that the size of the string table does not exceed 32-bit max.
- // This means the offsets in the string table will not exceed 32-bit max.
- if (StrtabSize > UINT32_MAX) {
- return createStringError(std::errc::invalid_argument,
- "string table size exceeded 32-bit max");
- }
-
- // Write out the address infos for each function info.
- for (const auto &FuncInfo : Funcs) {
- if (Expected<uint64_t> OffsetOrErr = FuncInfo.encode(O)) {
- // Verify that the address info offsets do not exceed 32-bit max.
- uint64_t Offset = OffsetOrErr.get();
- if (Offset > UINT32_MAX) {
- return createStringError(std::errc::invalid_argument,
- "address info offset exceeded 32-bit max");
- }
-
- AddrInfoOffsets.push_back(Offset);
- } else
- return OffsetOrErr.takeError();
- }
- // Fixup the string table offset and size in the header
- O.fixup32((uint32_t)StrtabOffset, offsetof(Header, StrtabOffset));
- O.fixup32((uint32_t)StrtabSize, offsetof(Header, StrtabSize));
-
- // Fixup all address info offsets
- uint64_t Offset = 0;
- for (auto AddrInfoOffset : AddrInfoOffsets) {
- O.fixup32(AddrInfoOffset, AddrInfoOffsetsOffset + Offset);
- Offset += 4;
- }
- return ErrorSuccess();
-}
-
-llvm::Error GsymCreator::loadCallSitesFromYAML(StringRef YAMLFile) {
- // Use the loader to load call site information from the YAML file.
- CallSiteInfoLoader Loader(*this, Funcs);
- return Loader.loadYAML(YAMLFile);
-}
-
-void GsymCreator::prepareMergedFunctions(OutputAggregator &Out) {
- // Nothing to do if we have less than 2 functions.
- if (Funcs.size() < 2)
- return;
-
- // Sort the function infos by address range first, preserving input order
- llvm::stable_sort(Funcs);
- std::vector<FunctionInfo> TopLevelFuncs;
-
- // Add the first function info to the top level functions
- TopLevelFuncs.emplace_back(std::move(Funcs.front()));
-
- // Now if the next function info has the same address range as the top level,
- // then merge it into the top level function, otherwise add it to the top
- // level.
- for (size_t Idx = 1; Idx < Funcs.size(); ++Idx) {
- FunctionInfo &TopFunc = TopLevelFuncs.back();
- FunctionInfo &MatchFunc = Funcs[Idx];
- if (TopFunc.Range == MatchFunc.Range) {
- // Both have the same range - add the 2nd func as a child of the 1st func
- if (!TopFunc.MergedFunctions)
- TopFunc.MergedFunctions = MergedFunctionsInfo();
- // Avoid adding duplicate functions to MergedFunctions. Since functions
- // are already ordered within the Funcs array, we can just check equality
- // against the last function in the merged array.
- else if (TopFunc.MergedFunctions->MergedFunctions.back() == MatchFunc)
- continue;
- TopFunc.MergedFunctions->MergedFunctions.emplace_back(
- std::move(MatchFunc));
- } else
- // No match, add the function as a top-level function
- TopLevelFuncs.emplace_back(std::move(MatchFunc));
- }
-
- uint32_t mergedCount = Funcs.size() - TopLevelFuncs.size();
- // If any functions were merged, print a message about it.
- if (mergedCount != 0)
- Out << "Have " << mergedCount
- << " merged functions as children of other functions\n";
-
- std::swap(Funcs, TopLevelFuncs);
-}
-
-llvm::Error GsymCreator::finalize(OutputAggregator &Out) {
- std::lock_guard<std::mutex> Guard(Mutex);
- if (Finalized)
- return createStringError(std::errc::invalid_argument, "already finalized");
- Finalized = true;
-
- // Don't let the string table indexes change by finalizing in order.
- StrTab.finalizeInOrder();
-
- // Remove duplicates function infos that have both entries from debug info
- // (DWARF or Breakpad) and entries from the SymbolTable.
- //
- // Also handle overlapping function. Usually there shouldn't be any, but they
- // can and do happen in some rare cases.
- //
- // (a) (b) (c)
- // ^ ^ ^ ^
- // |X |Y |X ^ |X
- // | | | |Y | ^
- // | | | v v |Y
- // v v v v
- //
- // In (a) and (b), Y is ignored and X will be reported for the full range.
- // In (c), both functions will be included in the result and lookups for an
- // address in the intersection will return Y because of binary search.
- //
- // Note that in case of (b), we cannot include Y in the result because then
- // we wouldn't find any function for range (end of Y, end of X)
- // with binary search
-
- const auto NumBefore = Funcs.size();
- // Only sort and unique if this isn't a segment. If this is a segment we
- // already finalized the main GsymCreator with all of the function infos
- // and then the already sorted and uniqued function infos were added to this
- // object.
- if (!IsSegment) {
- if (NumBefore > 1) {
- // Sort function infos so we can emit sorted functions. Use stable sort to
- // ensure determinism.
- llvm::stable_sort(Funcs);
- std::vector<FunctionInfo> FinalizedFuncs;
- FinalizedFuncs.reserve(Funcs.size());
- FinalizedFuncs.emplace_back(std::move(Funcs.front()));
- for (size_t Idx=1; Idx < NumBefore; ++Idx) {
- FunctionInfo &Prev = FinalizedFuncs.back();
- FunctionInfo &Curr = Funcs[Idx];
- // Empty ranges won't intersect, but we still need to
- // catch the case where we have multiple symbols at the
- // same address and coalesce them.
- const bool ranges_equal = Prev.Range == Curr.Range;
- if (ranges_equal || Prev.Range.intersects(Curr.Range)) {
- // Overlapping ranges or empty identical ranges.
- if (ranges_equal) {
- // Same address range. Check if one is from debug
- // info and the other is from a symbol table. If
- // so, then keep the one with debug info. Our
- // sorting guarantees that entries with matching
- // address ranges that have debug info are last in
- // the sort.
- if (!(Prev == Curr)) {
- if (Prev.hasRichInfo() && Curr.hasRichInfo())
- Out.Report(
- "Duplicate address ranges with different debug info.",
- [&](raw_ostream &OS) {
- OS << "warning: same address range contains "
- "different debug "
- << "info. Removing:\n"
- << Prev << "\nIn favor of this one:\n"
- << Curr << "\n";
- });
-
- // We want to swap the current entry with the previous since
- // later entries with the same range always have more debug info
- // or different debug info.
- std::swap(Prev, Curr);
- }
- } else {
- Out.Report("Overlapping function ranges", [&](raw_ostream &OS) {
- // print warnings about overlaps
- OS << "warning: function ranges overlap:\n"
- << Prev << "\n"
- << Curr << "\n";
- });
- FinalizedFuncs.emplace_back(std::move(Curr));
- }
- } else {
- if (Prev.Range.size() == 0 && Curr.Range.contains(Prev.Range.start())) {
- // Symbols on macOS don't have address ranges, so if the range
- // doesn't match and the size is zero, then we replace the empty
- // symbol function info with the current one.
- std::swap(Prev, Curr);
- } else {
- FinalizedFuncs.emplace_back(std::move(Curr));
- }
- }
- }
- std::swap(Funcs, FinalizedFuncs);
- }
- // If our last function info entry doesn't have a size and if we have valid
- // text ranges, we should set the size of the last entry since any search for
- // a high address might match our last entry. By fixing up this size, we can
- // help ensure we don't cause lookups to always return the last symbol that
- // has no size when doing lookups.
- if (!Funcs.empty() && Funcs.back().Range.size() == 0 && ValidTextRanges) {
- if (auto Range =
- ValidTextRanges->getRangeThatContains(Funcs.back().Range.start())) {
- Funcs.back().Range = {Funcs.back().Range.start(), Range->end()};
- }
- }
- Out << "Pruned " << NumBefore - Funcs.size() << " functions, ended with "
- << Funcs.size() << " total\n";
- }
- return Error::success();
-}
-
-uint32_t GsymCreator::copyString(const GsymCreator &SrcGC, uint32_t StrOff) {
- // String offset at zero is always the empty string, no copying needed.
- if (StrOff == 0)
- return 0;
- return StrTab.add(SrcGC.StringOffsetMap.find(StrOff)->second);
-}
-
-uint32_t GsymCreator::insertString(StringRef S, bool Copy) {
- if (S.empty())
- return 0;
-
- // The hash can be calculated outside the lock.
- CachedHashStringRef CHStr(S);
- std::lock_guard<std::mutex> Guard(Mutex);
- if (Copy) {
- // We need to provide backing storage for the string if requested
- // since StringTableBuilder stores references to strings. Any string
- // that comes from a section in an object file doesn't need to be
- // copied, but any string created by code will need to be copied.
- // This allows GsymCreator to be really fast when parsing DWARF and
- // other object files as most strings don't need to be copied.
- if (!StrTab.contains(CHStr))
- CHStr = CachedHashStringRef{StringStorage.insert(S).first->getKey(),
- CHStr.hash()};
- }
- const uint32_t StrOff = StrTab.add(CHStr);
- // Save a mapping of string offsets to the cached string reference in case
- // we need to segment the GSYM file and copy string from one string table to
- // another.
- StringOffsetMap.try_emplace(StrOff, CHStr);
- return StrOff;
-}
-
-StringRef GsymCreator::getString(uint32_t Offset) {
- auto I = StringOffsetMap.find(Offset);
- assert(I != StringOffsetMap.end() &&
- "GsymCreator::getString expects a valid offset as parameter.");
- return I->second.val();
-}
-
-void GsymCreator::addFunctionInfo(FunctionInfo &&FI) {
- std::lock_guard<std::mutex> Guard(Mutex);
- Funcs.emplace_back(std::move(FI));
-}
-
-void GsymCreator::forEachFunctionInfo(
- std::function<bool(FunctionInfo &)> const &Callback) {
- std::lock_guard<std::mutex> Guard(Mutex);
- for (auto &FI : Funcs) {
- if (!Callback(FI))
- break;
- }
-}
-
-void GsymCreator::forEachFunctionInfo(
- std::function<bool(const FunctionInfo &)> const &Callback) const {
- std::lock_guard<std::mutex> Guard(Mutex);
- for (const auto &FI : Funcs) {
- if (!Callback(FI))
- break;
- }
-}
-
-size_t GsymCreator::getNumFunctionInfos() const {
- std::lock_guard<std::mutex> Guard(Mutex);
- return Funcs.size();
-}
-
-bool GsymCreator::IsValidTextAddress(uint64_t Addr) const {
- if (ValidTextRanges)
- return ValidTextRanges->contains(Addr);
- return true; // No valid text ranges has been set, so accept all ranges.
-}
-
-std::optional<uint64_t> GsymCreator::getFirstFunctionAddress() const {
- // If we have finalized then Funcs are sorted. If we are a segment then
- // Funcs will be sorted as well since function infos get added from an
- // already finalized GsymCreator object where its functions were sorted and
- // uniqued.
- if ((Finalized || IsSegment) && !Funcs.empty())
- return std::optional<uint64_t>(Funcs.front().startAddress());
- return std::nullopt;
-}
-
-std::optional<uint64_t> GsymCreator::getLastFunctionAddress() const {
- // If we have finalized then Funcs are sorted. If we are a segment then
- // Funcs will be sorted as well since function infos get added from an
- // already finalized GsymCreator object where its functions were sorted and
- // uniqued.
- if ((Finalized || IsSegment) && !Funcs.empty())
- return std::optional<uint64_t>(Funcs.back().startAddress());
- return std::nullopt;
-}
-
-std::optional<uint64_t> GsymCreator::getBaseAddress() const {
- if (BaseAddress)
- return BaseAddress;
- return getFirstFunctionAddress();
-}
-
-uint64_t GsymCreator::getMaxAddressOffset() const {
- switch (getAddressOffsetSize()) {
- case 1: return UINT8_MAX;
- case 2: return UINT16_MAX;
- case 4: return UINT32_MAX;
- case 8: return UINT64_MAX;
- }
- llvm_unreachable("invalid address offset");
-}
-
-uint8_t GsymCreator::getAddressOffsetSize() const {
- const std::optional<uint64_t> BaseAddress = getBaseAddress();
- const std::optional<uint64_t> LastFuncAddr = getLastFunctionAddress();
- if (BaseAddress && LastFuncAddr) {
- const uint64_t AddrDelta = *LastFuncAddr - *BaseAddress;
- if (AddrDelta <= UINT8_MAX)
- return 1;
- else if (AddrDelta <= UINT16_MAX)
- return 2;
- else if (AddrDelta <= UINT32_MAX)
- return 4;
- return 8;
- }
- return 1;
-}
-
-uint64_t GsymCreator::calculateHeaderAndTableSize() const {
- uint64_t Size = sizeof(Header);
- const size_t NumFuncs = Funcs.size();
- // Add size of address offset table
- Size += NumFuncs * getAddressOffsetSize();
- // Add size of address info offsets which are 32 bit integers in version 1.
- Size += NumFuncs * sizeof(uint32_t);
- // Add file table size
- Size += Files.size() * sizeof(FileEntry);
- // Add string table size
- Size += StrTab.getSize();
-
- return Size;
-}
-
-// This function takes a InlineInfo class that was copy constructed from an
-// InlineInfo from the \a SrcGC and updates all members that point to strings
-// and files to point to strings and files from this GsymCreator.
-void GsymCreator::fixupInlineInfo(const GsymCreator &SrcGC, InlineInfo &II) {
- II.Name = copyString(SrcGC, II.Name);
- II.CallFile = copyFile(SrcGC, II.CallFile);
- for (auto &ChildII: II.Children)
- fixupInlineInfo(SrcGC, ChildII);
-}
-
-uint64_t GsymCreator::copyFunctionInfo(const GsymCreator &SrcGC, size_t FuncIdx) {
- // To copy a function info we need to copy any files and strings over into
- // this GsymCreator and then copy the function info and update the string
- // table offsets to match the new offsets.
- const FunctionInfo &SrcFI = SrcGC.Funcs[FuncIdx];
-
- FunctionInfo DstFI;
- DstFI.Range = SrcFI.Range;
- DstFI.Name = copyString(SrcGC, SrcFI.Name);
- // Copy the line table if there is one.
- if (SrcFI.OptLineTable) {
- // Copy the entire line table.
- DstFI.OptLineTable = LineTable(SrcFI.OptLineTable.value());
- // Fixup all LineEntry::File entries which are indexes in the the file table
- // from SrcGC and must be converted to file indexes from this GsymCreator.
- LineTable &DstLT = DstFI.OptLineTable.value();
- const size_t NumLines = DstLT.size();
- for (size_t I=0; I<NumLines; ++I) {
- LineEntry &LE = DstLT.get(I);
- LE.File = copyFile(SrcGC, LE.File);
- }
- }
- // Copy the inline information if needed.
- if (SrcFI.Inline) {
- // Make a copy of the source inline information.
- DstFI.Inline = SrcFI.Inline.value();
- // Fixup all strings and files in the copied inline information.
- fixupInlineInfo(SrcGC, *DstFI.Inline);
- }
- std::lock_guard<std::mutex> Guard(Mutex);
- Funcs.emplace_back(DstFI);
- return Funcs.back().cacheEncoding();
-}
-
-llvm::Error GsymCreator::saveSegments(StringRef Path,
- llvm::endianness ByteOrder,
- uint64_t SegmentSize) const {
- if (SegmentSize == 0)
- return createStringError(std::errc::invalid_argument,
- "invalid segment size zero");
-
- size_t FuncIdx = 0;
- const size_t NumFuncs = Funcs.size();
- while (FuncIdx < NumFuncs) {
- llvm::Expected<std::unique_ptr<GsymCreator>> ExpectedGC =
- createSegment(SegmentSize, FuncIdx);
- if (ExpectedGC) {
- GsymCreator *GC = ExpectedGC->get();
- if (!GC)
- break; // We had not more functions to encode.
- // Don't collect any messages at all
- OutputAggregator Out(nullptr);
- llvm::Error Err = GC->finalize(Out);
- if (Err)
- return Err;
- std::string SegmentedGsymPath;
- raw_string_ostream SGP(SegmentedGsymPath);
- std::optional<uint64_t> FirstFuncAddr = GC->getFirstFunctionAddress();
- if (FirstFuncAddr) {
- SGP << Path << "-" << llvm::format_hex(*FirstFuncAddr, 1);
- Err = GC->save(SegmentedGsymPath, ByteOrder, std::nullopt);
- if (Err)
- return Err;
- }
- } else {
- return ExpectedGC.takeError();
- }
- }
- return Error::success();
-}
-
-llvm::Expected<std::unique_ptr<GsymCreator>>
-GsymCreator::createSegment(uint64_t SegmentSize, size_t &FuncIdx) const {
- // No function entries, return empty unique pointer
- if (FuncIdx >= Funcs.size())
- return std::unique_ptr<GsymCreator>();
-
- std::unique_ptr<GsymCreator> GC(new GsymCreator(/*Quiet=*/true));
-
- // Tell the creator that this is a segment.
- GC->setIsSegment();
-
- // Set the base address if there is one.
- if (BaseAddress)
- GC->setBaseAddress(*BaseAddress);
- // Copy the UUID value from this object into the new creator.
- GC->setUUID(UUID);
- const size_t NumFuncs = Funcs.size();
- // Track how big the function infos are for the current segment so we can
- // emit segments that are close to the requested size. It is quick math to
- // determine the current header and tables sizes, so we can do that each loop.
- uint64_t SegmentFuncInfosSize = 0;
- for (; FuncIdx < NumFuncs; ++FuncIdx) {
- const uint64_t HeaderAndTableSize = GC->calculateHeaderAndTableSize();
- if (HeaderAndTableSize + SegmentFuncInfosSize >= SegmentSize) {
- if (SegmentFuncInfosSize == 0)
- return createStringError(std::errc::invalid_argument,
- "a segment size of %" PRIu64 " is to small to "
- "fit any function infos, specify a larger value",
- SegmentSize);
-
- break;
- }
- SegmentFuncInfosSize += alignTo(GC->copyFunctionInfo(*this, FuncIdx), 4);
- }
- return std::move(GC);
-}
diff --git a/llvm/lib/DebugInfo/GSYM/GsymCreatorV1.cpp b/llvm/lib/DebugInfo/GSYM/GsymCreatorV1.cpp
new file mode 100644
index 0000000000000..a22d10b17a102
--- /dev/null
+++ b/llvm/lib/DebugInfo/GSYM/GsymCreatorV1.cpp
@@ -0,0 +1,494 @@
+//===- GsymCreatorV1.cpp --------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/GSYM/GsymCreatorV1.h"
+#include "llvm/DebugInfo/GSYM/FileWriter.h"
+#include "llvm/DebugInfo/GSYM/Header.h"
+#include "llvm/DebugInfo/GSYM/LineTable.h"
+#include "llvm/DebugInfo/GSYM/OutputAggregator.h"
+#include "llvm/MC/StringTableBuilder.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <algorithm>
+#include <cassert>
+#include <functional>
+#include <vector>
+
+using namespace llvm;
+using namespace gsym;
+
+GsymCreatorV1::GsymCreatorV1(bool Quiet)
+ : StrTab(StringTableBuilder::ELF), Quiet(Quiet) {
+ insertFile(StringRef());
+}
+
+uint32_t GsymCreatorV1::insertFile(StringRef Path, llvm::sys::path::Style Style) {
+ llvm::StringRef directory = llvm::sys::path::parent_path(Path, Style);
+ llvm::StringRef filename = llvm::sys::path::filename(Path, Style);
+ const uint32_t Dir = insertString(directory);
+ const uint32_t Base = insertString(filename);
+ return insertFileEntry(FileEntry(Dir, Base));
+}
+
+uint32_t GsymCreatorV1::insertFileEntry(FileEntry FE) {
+ std::lock_guard<std::mutex> Guard(Mutex);
+ const auto NextIndex = Files.size();
+ auto R = FileEntryToIndex.insert(std::make_pair(FE, NextIndex));
+ if (R.second)
+ Files.emplace_back(FE);
+ return R.first->second;
+}
+
+uint32_t GsymCreatorV1::copyFile(const GsymCreatorV1 &SrcGC, uint32_t FileIdx) {
+ if (FileIdx == 0)
+ return 0;
+ const FileEntry SrcFE = SrcGC.Files[FileIdx];
+ uint32_t Dir =
+ SrcFE.Dir == 0
+ ? 0
+ : StrTab.add(SrcGC.StringOffsetMap.find(SrcFE.Dir)->second);
+ uint32_t Base = StrTab.add(SrcGC.StringOffsetMap.find(SrcFE.Base)->second);
+ FileEntry DstFE(Dir, Base);
+ return insertFileEntry(DstFE);
+}
+
+llvm::Error GsymCreatorV1::save(StringRef Path, llvm::endianness ByteOrder,
+ std::optional<uint64_t> SegmentSize) const {
+ if (SegmentSize)
+ return saveSegments(Path, ByteOrder, *SegmentSize);
+ std::error_code EC;
+ raw_fd_ostream OutStrm(Path, EC);
+ if (EC)
+ return llvm::errorCodeToError(EC);
+ FileWriter O(OutStrm, ByteOrder);
+ return encode(O);
+}
+
+llvm::Error GsymCreatorV1::encode(FileWriter &O) const {
+ std::lock_guard<std::mutex> Guard(Mutex);
+ if (Funcs.empty())
+ return createStringError(std::errc::invalid_argument,
+ "no functions to encode");
+ if (!Finalized)
+ return createStringError(std::errc::invalid_argument,
+ "GsymCreator wasn't finalized prior to encoding");
+
+ if (Funcs.size() > UINT32_MAX)
+ return createStringError(std::errc::invalid_argument,
+ "too many FunctionInfos");
+
+ std::optional<uint64_t> BaseAddress = getBaseAddress();
+ if (!BaseAddress)
+ return createStringError(std::errc::invalid_argument,
+ "invalid base address");
+ Header Hdr;
+ Hdr.Magic = GSYM_MAGIC;
+ Hdr.Version = GSYM_VERSION;
+ Hdr.AddrOffSize = getAddressOffsetSize();
+ Hdr.UUIDSize = static_cast<uint8_t>(UUID.size());
+ Hdr.BaseAddress = *BaseAddress;
+ Hdr.NumAddresses = static_cast<uint32_t>(Funcs.size());
+ Hdr.StrtabOffset = 0;
+ Hdr.StrtabSize = 0;
+ memset(Hdr.UUID, 0, sizeof(Hdr.UUID));
+ if (UUID.size() > sizeof(Hdr.UUID))
+ return createStringError(std::errc::invalid_argument,
+ "invalid UUID size %u", (uint32_t)UUID.size());
+ if (UUID.size() > 0)
+ memcpy(Hdr.UUID, UUID.data(), UUID.size());
+ llvm::Error Err = Hdr.encode(O);
+ if (Err)
+ return Err;
+
+ const uint64_t MaxAddressOffset = getMaxAddressOffset();
+ O.alignTo(Hdr.AddrOffSize);
+ for (const auto &FuncInfo : Funcs) {
+ uint64_t AddrOffset = FuncInfo.startAddress() - Hdr.BaseAddress;
+ assert(AddrOffset <= MaxAddressOffset);
+ (void)MaxAddressOffset;
+ switch (Hdr.AddrOffSize) {
+ case 1:
+ O.writeU8(static_cast<uint8_t>(AddrOffset));
+ break;
+ case 2:
+ O.writeU16(static_cast<uint16_t>(AddrOffset));
+ break;
+ case 4:
+ O.writeU32(static_cast<uint32_t>(AddrOffset));
+ break;
+ case 8:
+ O.writeU64(AddrOffset);
+ break;
+ }
+ }
+
+ O.alignTo(4);
+ const off_t AddrInfoOffsetsOffset = O.tell();
+ for (size_t i = 0, n = Funcs.size(); i < n; ++i)
+ O.writeU32(0);
+
+ O.alignTo(4);
+ assert(!Files.empty());
+ assert(Files[0].Dir == 0);
+ assert(Files[0].Base == 0);
+ size_t NumFiles = Files.size();
+ if (NumFiles > UINT32_MAX)
+ return createStringError(std::errc::invalid_argument, "too many files");
+ O.writeU32(static_cast<uint32_t>(NumFiles));
+ for (auto File : Files) {
+ O.writeU32(File.Dir);
+ O.writeU32(File.Base);
+ }
+
+ const off_t StrtabOffset = O.tell();
+ StrTab.write(O.get_stream());
+ const off_t StrtabSize = O.tell() - StrtabOffset;
+ std::vector<uint32_t> AddrInfoOffsets;
+
+ if (StrtabSize > UINT32_MAX) {
+ return createStringError(std::errc::invalid_argument,
+ "string table size exceeded 32-bit max");
+ }
+
+ for (const auto &FuncInfo : Funcs) {
+ if (Expected<uint64_t> OffsetOrErr = FuncInfo.encode(O)) {
+ uint64_t Offset = OffsetOrErr.get();
+ if (Offset > UINT32_MAX) {
+ return createStringError(std::errc::invalid_argument,
+ "address info offset exceeded 32-bit max");
+ }
+
+ AddrInfoOffsets.push_back(Offset);
+ } else
+ return OffsetOrErr.takeError();
+ }
+ O.fixup32((uint32_t)StrtabOffset, offsetof(Header, StrtabOffset));
+ O.fixup32((uint32_t)StrtabSize, offsetof(Header, StrtabSize));
+
+ uint64_t Offset = 0;
+ for (auto AddrInfoOffset : AddrInfoOffsets) {
+ O.fixup32(AddrInfoOffset, AddrInfoOffsetsOffset + Offset);
+ Offset += 4;
+ }
+ return ErrorSuccess();
+}
+
+llvm::Error GsymCreatorV1::loadCallSitesFromYAML(StringRef YAMLFile) {
+ CallSiteInfoLoader Loader(*this, Funcs);
+ return Loader.loadYAML(YAMLFile);
+}
+
+void GsymCreatorV1::prepareMergedFunctions(OutputAggregator &Out) {
+ if (Funcs.size() < 2)
+ return;
+
+ llvm::stable_sort(Funcs);
+ std::vector<FunctionInfo> TopLevelFuncs;
+
+ TopLevelFuncs.emplace_back(std::move(Funcs.front()));
+
+ for (size_t Idx = 1; Idx < Funcs.size(); ++Idx) {
+ FunctionInfo &TopFunc = TopLevelFuncs.back();
+ FunctionInfo &MatchFunc = Funcs[Idx];
+ if (TopFunc.Range == MatchFunc.Range) {
+ if (!TopFunc.MergedFunctions)
+ TopFunc.MergedFunctions = MergedFunctionsInfo();
+ else if (TopFunc.MergedFunctions->MergedFunctions.back() == MatchFunc)
+ continue;
+ TopFunc.MergedFunctions->MergedFunctions.emplace_back(
+ std::move(MatchFunc));
+ } else
+ TopLevelFuncs.emplace_back(std::move(MatchFunc));
+ }
+
+ uint32_t mergedCount = Funcs.size() - TopLevelFuncs.size();
+ if (mergedCount != 0)
+ Out << "Have " << mergedCount
+ << " merged functions as children of other functions\n";
+
+ std::swap(Funcs, TopLevelFuncs);
+}
+
+llvm::Error GsymCreatorV1::finalize(OutputAggregator &Out) {
+ std::lock_guard<std::mutex> Guard(Mutex);
+ if (Finalized)
+ return createStringError(std::errc::invalid_argument, "already finalized");
+ Finalized = true;
+
+ StrTab.finalizeInOrder();
+
+ const auto NumBefore = Funcs.size();
+ if (!IsSegment) {
+ if (NumBefore > 1) {
+ llvm::stable_sort(Funcs);
+ std::vector<FunctionInfo> FinalizedFuncs;
+ FinalizedFuncs.reserve(Funcs.size());
+ FinalizedFuncs.emplace_back(std::move(Funcs.front()));
+ for (size_t Idx=1; Idx < NumBefore; ++Idx) {
+ FunctionInfo &Prev = FinalizedFuncs.back();
+ FunctionInfo &Curr = Funcs[Idx];
+ const bool ranges_equal = Prev.Range == Curr.Range;
+ if (ranges_equal || Prev.Range.intersects(Curr.Range)) {
+ if (ranges_equal) {
+ if (!(Prev == Curr)) {
+ if (Prev.hasRichInfo() && Curr.hasRichInfo())
+ Out.Report(
+ "Duplicate address ranges with different debug info.",
+ [&](raw_ostream &OS) {
+ OS << "warning: same address range contains "
+ "different debug "
+ << "info. Removing:\n"
+ << Prev << "\nIn favor of this one:\n"
+ << Curr << "\n";
+ });
+
+ std::swap(Prev, Curr);
+ }
+ } else {
+ Out.Report("Overlapping function ranges", [&](raw_ostream &OS) {
+ OS << "warning: function ranges overlap:\n"
+ << Prev << "\n"
+ << Curr << "\n";
+ });
+ FinalizedFuncs.emplace_back(std::move(Curr));
+ }
+ } else {
+ if (Prev.Range.size() == 0 && Curr.Range.contains(Prev.Range.start())) {
+ std::swap(Prev, Curr);
+ } else {
+ FinalizedFuncs.emplace_back(std::move(Curr));
+ }
+ }
+ }
+ std::swap(Funcs, FinalizedFuncs);
+ }
+ if (!Funcs.empty() && Funcs.back().Range.size() == 0 && ValidTextRanges) {
+ if (auto Range =
+ ValidTextRanges->getRangeThatContains(Funcs.back().Range.start())) {
+ Funcs.back().Range = {Funcs.back().Range.start(), Range->end()};
+ }
+ }
+ Out << "Pruned " << NumBefore - Funcs.size() << " functions, ended with "
+ << Funcs.size() << " total\n";
+ }
+ return Error::success();
+}
+
+uint32_t GsymCreatorV1::copyString(const GsymCreatorV1 &SrcGC, uint32_t StrOff) {
+ if (StrOff == 0)
+ return 0;
+ return StrTab.add(SrcGC.StringOffsetMap.find(StrOff)->second);
+}
+
+uint32_t GsymCreatorV1::insertString(StringRef S, bool Copy) {
+ if (S.empty())
+ return 0;
+
+ CachedHashStringRef CHStr(S);
+ std::lock_guard<std::mutex> Guard(Mutex);
+ if (Copy) {
+ if (!StrTab.contains(CHStr))
+ CHStr = CachedHashStringRef{StringStorage.insert(S).first->getKey(),
+ CHStr.hash()};
+ }
+ const uint32_t StrOff = StrTab.add(CHStr);
+ StringOffsetMap.try_emplace(StrOff, CHStr);
+ return StrOff;
+}
+
+StringRef GsymCreatorV1::getString(uint32_t Offset) {
+ auto I = StringOffsetMap.find(Offset);
+ assert(I != StringOffsetMap.end() &&
+ "GsymCreatorV1::getString expects a valid offset as parameter.");
+ return I->second.val();
+}
+
+void GsymCreatorV1::addFunctionInfo(FunctionInfo &&FI) {
+ std::lock_guard<std::mutex> Guard(Mutex);
+ Funcs.emplace_back(std::move(FI));
+}
+
+void GsymCreatorV1::forEachFunctionInfo(
+ std::function<bool(FunctionInfo &)> const &Callback) {
+ std::lock_guard<std::mutex> Guard(Mutex);
+ for (auto &FI : Funcs) {
+ if (!Callback(FI))
+ break;
+ }
+}
+
+void GsymCreatorV1::forEachFunctionInfo(
+ std::function<bool(const FunctionInfo &)> const &Callback) const {
+ std::lock_guard<std::mutex> Guard(Mutex);
+ for (const auto &FI : Funcs) {
+ if (!Callback(FI))
+ break;
+ }
+}
+
+size_t GsymCreatorV1::getNumFunctionInfos() const {
+ std::lock_guard<std::mutex> Guard(Mutex);
+ return Funcs.size();
+}
+
+bool GsymCreatorV1::IsValidTextAddress(uint64_t Addr) const {
+ if (ValidTextRanges)
+ return ValidTextRanges->contains(Addr);
+ return true;
+}
+
+std::optional<uint64_t> GsymCreatorV1::getFirstFunctionAddress() const {
+ if ((Finalized || IsSegment) && !Funcs.empty())
+ return std::optional<uint64_t>(Funcs.front().startAddress());
+ return std::nullopt;
+}
+
+std::optional<uint64_t> GsymCreatorV1::getLastFunctionAddress() const {
+ if ((Finalized || IsSegment) && !Funcs.empty())
+ return std::optional<uint64_t>(Funcs.back().startAddress());
+ return std::nullopt;
+}
+
+std::optional<uint64_t> GsymCreatorV1::getBaseAddress() const {
+ if (BaseAddress)
+ return BaseAddress;
+ return getFirstFunctionAddress();
+}
+
+uint64_t GsymCreatorV1::getMaxAddressOffset() const {
+ switch (getAddressOffsetSize()) {
+ case 1: return UINT8_MAX;
+ case 2: return UINT16_MAX;
+ case 4: return UINT32_MAX;
+ case 8: return UINT64_MAX;
+ }
+ llvm_unreachable("invalid address offset");
+}
+
+uint8_t GsymCreatorV1::getAddressOffsetSize() const {
+ const std::optional<uint64_t> BaseAddress = getBaseAddress();
+ const std::optional<uint64_t> LastFuncAddr = getLastFunctionAddress();
+ if (BaseAddress && LastFuncAddr) {
+ const uint64_t AddrDelta = *LastFuncAddr - *BaseAddress;
+ if (AddrDelta <= UINT8_MAX)
+ return 1;
+ else if (AddrDelta <= UINT16_MAX)
+ return 2;
+ else if (AddrDelta <= UINT32_MAX)
+ return 4;
+ return 8;
+ }
+ return 1;
+}
+
+uint64_t GsymCreatorV1::calculateHeaderAndTableSize() const {
+ uint64_t Size = sizeof(Header);
+ const size_t NumFuncs = Funcs.size();
+ Size += NumFuncs * getAddressOffsetSize();
+ Size += NumFuncs * sizeof(uint32_t);
+ Size += Files.size() * sizeof(FileEntry);
+ Size += StrTab.getSize();
+
+ return Size;
+}
+
+void GsymCreatorV1::fixupInlineInfo(const GsymCreatorV1 &SrcGC, InlineInfo &II) {
+ II.Name = copyString(SrcGC, II.Name);
+ II.CallFile = copyFile(SrcGC, II.CallFile);
+ for (auto &ChildII: II.Children)
+ fixupInlineInfo(SrcGC, ChildII);
+}
+
+uint64_t GsymCreatorV1::copyFunctionInfo(const GsymCreatorV1 &SrcGC, size_t FuncIdx) {
+ const FunctionInfo &SrcFI = SrcGC.Funcs[FuncIdx];
+
+ FunctionInfo DstFI;
+ DstFI.Range = SrcFI.Range;
+ DstFI.Name = copyString(SrcGC, SrcFI.Name);
+ if (SrcFI.OptLineTable) {
+ DstFI.OptLineTable = LineTable(SrcFI.OptLineTable.value());
+ LineTable &DstLT = DstFI.OptLineTable.value();
+ const size_t NumLines = DstLT.size();
+ for (size_t I=0; I<NumLines; ++I) {
+ LineEntry &LE = DstLT.get(I);
+ LE.File = copyFile(SrcGC, LE.File);
+ }
+ }
+ if (SrcFI.Inline) {
+ DstFI.Inline = SrcFI.Inline.value();
+ fixupInlineInfo(SrcGC, *DstFI.Inline);
+ }
+ std::lock_guard<std::mutex> Guard(Mutex);
+ Funcs.emplace_back(DstFI);
+ return Funcs.back().cacheEncoding();
+}
+
+llvm::Error GsymCreatorV1::saveSegments(StringRef Path,
+ llvm::endianness ByteOrder,
+ uint64_t SegmentSize) const {
+ if (SegmentSize == 0)
+ return createStringError(std::errc::invalid_argument,
+ "invalid segment size zero");
+
+ size_t FuncIdx = 0;
+ const size_t NumFuncs = Funcs.size();
+ while (FuncIdx < NumFuncs) {
+ llvm::Expected<std::unique_ptr<GsymCreatorV1>> ExpectedGC =
+ createSegment(SegmentSize, FuncIdx);
+ if (ExpectedGC) {
+ GsymCreatorV1 *GC = ExpectedGC->get();
+ if (!GC)
+ break;
+ OutputAggregator Out(nullptr);
+ llvm::Error Err = GC->finalize(Out);
+ if (Err)
+ return Err;
+ std::string SegmentedGsymPath;
+ raw_string_ostream SGP(SegmentedGsymPath);
+ std::optional<uint64_t> FirstFuncAddr = GC->getFirstFunctionAddress();
+ if (FirstFuncAddr) {
+ SGP << Path << "-" << llvm::format_hex(*FirstFuncAddr, 1);
+ Err = GC->save(SegmentedGsymPath, ByteOrder, std::nullopt);
+ if (Err)
+ return Err;
+ }
+ } else {
+ return ExpectedGC.takeError();
+ }
+ }
+ return Error::success();
+}
+
+llvm::Expected<std::unique_ptr<GsymCreatorV1>>
+GsymCreatorV1::createSegment(uint64_t SegmentSize, size_t &FuncIdx) const {
+ if (FuncIdx >= Funcs.size())
+ return std::unique_ptr<GsymCreatorV1>();
+
+ std::unique_ptr<GsymCreatorV1> GC(new GsymCreatorV1(/*Quiet=*/true));
+
+ GC->setIsSegment();
+
+ if (BaseAddress)
+ GC->setBaseAddress(*BaseAddress);
+ GC->setUUID(UUID);
+ const size_t NumFuncs = Funcs.size();
+ uint64_t SegmentFuncInfosSize = 0;
+ for (; FuncIdx < NumFuncs; ++FuncIdx) {
+ const uint64_t HeaderAndTableSize = GC->calculateHeaderAndTableSize();
+ if (HeaderAndTableSize + SegmentFuncInfosSize >= SegmentSize) {
+ if (SegmentFuncInfosSize == 0)
+ return createStringError(std::errc::invalid_argument,
+ "a segment size of %" PRIu64 " is to small to "
+ "fit any function infos, specify a larger value",
+ SegmentSize);
+
+ break;
+ }
+ SegmentFuncInfosSize += alignTo(GC->copyFunctionInfo(*this, FuncIdx), 4);
+ }
+ return std::move(GC);
+}
diff --git a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
index bc29eda1f9f5e..a94ce824249d4 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
@@ -8,18 +8,10 @@
#include "llvm/DebugInfo/GSYM/GsymReader.h"
-#include <assert.h>
-#include <inttypes.h>
-#include <stdio.h>
-#include <stdlib.h>
-
+#include "llvm/DebugInfo/GSYM/GsymReaderV1.h"
#include "llvm/DebugInfo/GSYM/GsymReaderV2.h"
#include "llvm/DebugInfo/GSYM/Header.h"
#include "llvm/DebugInfo/GSYM/HeaderV2.h"
-#include "llvm/DebugInfo/GSYM/InlineInfo.h"
-#include "llvm/DebugInfo/GSYM/LineTable.h"
-#include "llvm/Support/BinaryStreamReader.h"
-#include "llvm/Support/DataExtractor.h"
#include "llvm/Support/MemoryBuffer.h"
using namespace llvm;
@@ -43,8 +35,8 @@ static Expected<uint16_t> detectVersion(StringRef Data) {
return Version;
}
-llvm::Expected<std::unique_ptr<GsymReaderBase>>
-GsymReaderBase::openFile(StringRef Path) {
+llvm::Expected<std::unique_ptr<GsymReader>>
+GsymReader::openFile(StringRef Path) {
auto BufOrErr = MemoryBuffer::getFileOrSTDIN(Path);
if (!BufOrErr)
return createStringError(BufOrErr.getError(), "failed to open '%s'",
@@ -58,14 +50,14 @@ GsymReaderBase::openFile(StringRef Path) {
return R.takeError();
return std::make_unique<GsymReaderV2>(std::move(*R));
}
- auto R = GsymReader::openFile(Path);
+ auto R = GsymReaderV1::openFile(Path);
if (!R)
return R.takeError();
- return std::make_unique<GsymReader>(std::move(*R));
+ return std::make_unique<GsymReaderV1>(std::move(*R));
}
-llvm::Expected<std::unique_ptr<GsymReaderBase>>
-GsymReaderBase::copyBuffer(StringRef Bytes) {
+llvm::Expected<std::unique_ptr<GsymReader>>
+GsymReader::copyBuffer(StringRef Bytes) {
auto VersionOrErr = detectVersion(Bytes);
if (!VersionOrErr)
return VersionOrErr.takeError();
@@ -75,543 +67,8 @@ GsymReaderBase::copyBuffer(StringRef Bytes) {
return R.takeError();
return std::make_unique<GsymReaderV2>(std::move(*R));
}
- auto R = GsymReader::copyBuffer(Bytes);
+ auto R = GsymReaderV1::copyBuffer(Bytes);
if (!R)
return R.takeError();
- return std::make_unique<GsymReader>(std::move(*R));
-}
-
-GsymReader::GsymReader(std::unique_ptr<MemoryBuffer> Buffer)
- : MemBuffer(std::move(Buffer)), Endian(llvm::endianness::native) {}
-
-GsymReader::GsymReader(GsymReader &&RHS) = default;
-
-GsymReader::~GsymReader() = default;
-
-llvm::Expected<GsymReader> GsymReader::openFile(StringRef Filename) {
- // Open the input file and return an appropriate error if needed.
- ErrorOr<std::unique_ptr<MemoryBuffer>> BuffOrErr =
- MemoryBuffer::getFileOrSTDIN(Filename);
- auto Err = BuffOrErr.getError();
- if (Err)
- return llvm::errorCodeToError(Err);
- return create(BuffOrErr.get());
-}
-
-llvm::Expected<GsymReader> GsymReader::copyBuffer(StringRef Bytes) {
- auto MemBuffer = MemoryBuffer::getMemBufferCopy(Bytes, "GSYM bytes");
- return create(MemBuffer);
-}
-
-llvm::Expected<llvm::gsym::GsymReader>
-GsymReader::create(std::unique_ptr<MemoryBuffer> &MemBuffer) {
- if (!MemBuffer)
- return createStringError(std::errc::invalid_argument,
- "invalid memory buffer");
- GsymReader GR(std::move(MemBuffer));
- llvm::Error Err = GR.parse();
- if (Err)
- return std::move(Err);
- return std::move(GR);
-}
-
-llvm::Error
-GsymReader::parse() {
- BinaryStreamReader FileData(MemBuffer->getBuffer(), llvm::endianness::native);
- // Check for the magic bytes. This file format is designed to be mmap'ed
- // into a process and accessed as read only. This is done for performance
- // and efficiency for symbolicating and parsing GSYM data.
- if (FileData.readObject(Hdr))
- return createStringError(std::errc::invalid_argument,
- "not enough data for a GSYM header");
-
- const auto HostByteOrder = llvm::endianness::native;
- switch (Hdr->Magic) {
- case GSYM_MAGIC:
- Endian = HostByteOrder;
- break;
- case GSYM_CIGAM:
- // This is a GSYM file, but not native endianness.
- Endian = sys::IsBigEndianHost ? llvm::endianness::little
- : llvm::endianness::big;
- Swap.reset(new SwappedData);
- break;
- default:
- return createStringError(std::errc::invalid_argument,
- "not a GSYM file");
- }
-
- bool DataIsLittleEndian = HostByteOrder != llvm::endianness::little;
- // Read a correctly byte swapped header if we need to.
- if (Swap) {
- DataExtractor Data(MemBuffer->getBuffer(), DataIsLittleEndian, 4);
- if (auto ExpectedHdr = Header::decode(Data))
- Swap->Hdr = ExpectedHdr.get();
- else
- return ExpectedHdr.takeError();
- Hdr = &Swap->Hdr;
- }
-
- // Detect errors in the header and report any that are found. If we make it
- // past this without errors, we know we have a good magic value, a supported
- // version number, verified address offset size and a valid UUID size.
- if (Error Err = Hdr->checkForError())
- return Err;
-
- if (!Swap) {
- // This is the native endianness case that is most common and optimized for
- // efficient lookups. Here we just grab pointers to the native data and
- // use ArrayRef objects to allow efficient read only access.
-
- // Read the address offsets.
- if (FileData.padToAlignment(Hdr->AddrOffSize) ||
- FileData.readArray(AddrOffsets,
- Hdr->NumAddresses * Hdr->AddrOffSize))
- return createStringError(std::errc::invalid_argument,
- "failed to read address table");
-
- // Read the address info offsets.
- if (FileData.padToAlignment(4) ||
- FileData.readArray(AddrInfoOffsets, Hdr->NumAddresses))
- return createStringError(std::errc::invalid_argument,
- "failed to read address info offsets table");
-
- // Read the file table.
- uint32_t NumFiles = 0;
- if (FileData.readInteger(NumFiles) || FileData.readArray(Files, NumFiles))
- return createStringError(std::errc::invalid_argument,
- "failed to read file table");
-
- // Get the string table.
- FileData.setOffset(Hdr->StrtabOffset);
- if (FileData.readFixedString(StrTab.Data, Hdr->StrtabSize))
- return createStringError(std::errc::invalid_argument,
- "failed to read string table");
-} else {
- // This is the non native endianness case that is not common and not
- // optimized for lookups. Here we decode the important tables into local
- // storage and then set the ArrayRef objects to point to these swapped
- // copies of the read only data so lookups can be as efficient as possible.
- DataExtractor Data(MemBuffer->getBuffer(), DataIsLittleEndian, 4);
-
- // Read the address offsets.
- uint64_t Offset = alignTo(sizeof(Header), Hdr->AddrOffSize);
- Swap->AddrOffsets.resize(Hdr->NumAddresses * Hdr->AddrOffSize);
- switch (Hdr->AddrOffSize) {
- case 1:
- if (!Data.getU8(&Offset, Swap->AddrOffsets.data(), Hdr->NumAddresses))
- return createStringError(std::errc::invalid_argument,
- "failed to read address table");
- break;
- case 2:
- if (!Data.getU16(&Offset,
- reinterpret_cast<uint16_t *>(Swap->AddrOffsets.data()),
- Hdr->NumAddresses))
- return createStringError(std::errc::invalid_argument,
- "failed to read address table");
- break;
- case 4:
- if (!Data.getU32(&Offset,
- reinterpret_cast<uint32_t *>(Swap->AddrOffsets.data()),
- Hdr->NumAddresses))
- return createStringError(std::errc::invalid_argument,
- "failed to read address table");
- break;
- case 8:
- if (!Data.getU64(&Offset,
- reinterpret_cast<uint64_t *>(Swap->AddrOffsets.data()),
- Hdr->NumAddresses))
- return createStringError(std::errc::invalid_argument,
- "failed to read address table");
- }
- AddrOffsets = ArrayRef<uint8_t>(Swap->AddrOffsets);
-
- // Read the address info offsets.
- Offset = alignTo(Offset, 4);
- Swap->AddrInfoOffsets.resize(Hdr->NumAddresses);
- if (Data.getU32(&Offset, Swap->AddrInfoOffsets.data(), Hdr->NumAddresses))
- AddrInfoOffsets = ArrayRef<uint32_t>(Swap->AddrInfoOffsets);
- else
- return createStringError(std::errc::invalid_argument,
- "failed to read address table");
- // Read the file table.
- const uint32_t NumFiles = Data.getU32(&Offset);
- if (NumFiles > 0) {
- Swap->Files.resize(NumFiles);
- if (Data.getU32(&Offset, &Swap->Files[0].Dir, NumFiles*2))
- Files = ArrayRef<FileEntry>(Swap->Files);
- else
- return createStringError(std::errc::invalid_argument,
- "failed to read file table");
- }
- // Get the string table.
- StrTab.Data = MemBuffer->getBuffer().substr(Hdr->StrtabOffset,
- Hdr->StrtabSize);
- if (StrTab.Data.empty())
- return createStringError(std::errc::invalid_argument,
- "failed to read string table");
- }
- return Error::success();
-
-}
-
-const Header &GsymReader::getHeader() const {
- // The only way to get a GsymReader is from GsymReader::openFile(...) or
- // GsymReader::copyBuffer() and the header must be valid and initialized to
- // a valid pointer value, so the assert below should not trigger.
- assert(Hdr);
- return *Hdr;
-}
-
-std::optional<uint64_t> GsymReader::getAddress(size_t Index) const {
- switch (Hdr->AddrOffSize) {
- case 1: return addressForIndex<uint8_t>(Index);
- case 2: return addressForIndex<uint16_t>(Index);
- case 4: return addressForIndex<uint32_t>(Index);
- case 8: return addressForIndex<uint64_t>(Index);
- }
- return std::nullopt;
-}
-
-std::optional<uint64_t> GsymReader::getAddressInfoOffset(size_t Index) const {
- const auto NumAddrInfoOffsets = AddrInfoOffsets.size();
- if (Index < NumAddrInfoOffsets)
- return AddrInfoOffsets[Index];
- return std::nullopt;
-}
-
-Expected<uint64_t>
-GsymReader::getAddressIndex(const uint64_t Addr) const {
- if (Addr >= Hdr->BaseAddress) {
- const uint64_t AddrOffset = Addr - Hdr->BaseAddress;
- std::optional<uint64_t> AddrOffsetIndex;
- switch (Hdr->AddrOffSize) {
- case 1:
- AddrOffsetIndex = getAddressOffsetIndex<uint8_t>(AddrOffset);
- break;
- case 2:
- AddrOffsetIndex = getAddressOffsetIndex<uint16_t>(AddrOffset);
- break;
- case 4:
- AddrOffsetIndex = getAddressOffsetIndex<uint32_t>(AddrOffset);
- break;
- case 8:
- AddrOffsetIndex = getAddressOffsetIndex<uint64_t>(AddrOffset);
- break;
- default:
- return createStringError(std::errc::invalid_argument,
- "unsupported address offset size %u",
- Hdr->AddrOffSize);
- }
- if (AddrOffsetIndex)
- return *AddrOffsetIndex;
- }
- return createStringError(std::errc::invalid_argument,
- "address 0x%" PRIx64 " is not in GSYM", Addr);
-
-}
-
-llvm::Expected<DataExtractor>
-GsymReader::getFunctionInfoDataForAddress(uint64_t Addr,
- uint64_t &FuncStartAddr) const {
- Expected<uint64_t> ExpectedAddrIdx = getAddressIndex(Addr);
- if (!ExpectedAddrIdx)
- return ExpectedAddrIdx.takeError();
- const uint64_t FirstAddrIdx = *ExpectedAddrIdx;
- // The AddrIdx is the first index of the function info entries that match
- // \a Addr. We need to iterate over all function info objects that start with
- // the same address until we find a range that contains \a Addr.
- std::optional<uint64_t> FirstFuncStartAddr;
- const size_t NumAddresses = getNumAddresses();
- for (uint64_t AddrIdx = FirstAddrIdx; AddrIdx < NumAddresses; ++AddrIdx) {
- auto ExpextedData = getFunctionInfoDataAtIndex(AddrIdx, FuncStartAddr);
- // If there was an error, return the error.
- if (!ExpextedData)
- return ExpextedData;
-
- // Remember the first function start address if it hasn't already been set.
- // If it is already valid, check to see if it matches the first function
- // start address and only continue if it matches.
- if (FirstFuncStartAddr.has_value()) {
- if (*FirstFuncStartAddr != FuncStartAddr)
- break; // Done with consecutive function entries with same address.
- } else {
- FirstFuncStartAddr = FuncStartAddr;
- }
- // Make sure the current function address ranges contains \a Addr.
- // Some symbols on Darwin don't have valid sizes, so if we run into a
- // symbol with zero size, then we have found a match for our address.
-
- // The first thing the encoding of a FunctionInfo object is the function
- // size.
- uint64_t Offset = 0;
- uint32_t FuncSize = ExpextedData->getU32(&Offset);
- if (FuncSize == 0 ||
- AddressRange(FuncStartAddr, FuncStartAddr + FuncSize).contains(Addr))
- return ExpextedData;
- }
- return createStringError(std::errc::invalid_argument,
- "address 0x%" PRIx64 " is not in GSYM", Addr);
-}
-
-llvm::Expected<DataExtractor>
-GsymReader::getFunctionInfoDataAtIndex(uint64_t AddrIdx,
- uint64_t &FuncStartAddr) const {
- if (AddrIdx >= getNumAddresses())
- return createStringError(std::errc::invalid_argument,
- "invalid address index %" PRIu64, AddrIdx);
- const uint32_t AddrInfoOffset = AddrInfoOffsets[AddrIdx];
- assert((Endian == endianness::big || Endian == endianness::little) &&
- "Endian must be either big or little");
- StringRef Bytes = MemBuffer->getBuffer().substr(AddrInfoOffset);
- if (Bytes.empty())
- return createStringError(std::errc::invalid_argument,
- "invalid address info offset 0x%" PRIx32,
- AddrInfoOffset);
- std::optional<uint64_t> OptFuncStartAddr = getAddress(AddrIdx);
- if (!OptFuncStartAddr)
- return createStringError(std::errc::invalid_argument,
- "failed to extract address[%" PRIu64 "]", AddrIdx);
- FuncStartAddr = *OptFuncStartAddr;
- return DataExtractor(Bytes, Endian == llvm::endianness::little, 4);
-}
-
-llvm::Expected<FunctionInfo> GsymReader::getFunctionInfo(uint64_t Addr) const {
- uint64_t FuncStartAddr = 0;
- if (auto ExpectedData = getFunctionInfoDataForAddress(Addr, FuncStartAddr))
- return FunctionInfo::decode(*ExpectedData, FuncStartAddr);
- else
- return ExpectedData.takeError();
-}
-
-llvm::Expected<FunctionInfo>
-GsymReader::getFunctionInfoAtIndex(uint64_t Idx) const {
- uint64_t FuncStartAddr = 0;
- if (auto ExpectedData = getFunctionInfoDataAtIndex(Idx, FuncStartAddr))
- return FunctionInfo::decode(*ExpectedData, FuncStartAddr);
- else
- return ExpectedData.takeError();
-}
-
-llvm::Expected<LookupResult>
-GsymReader::lookup(uint64_t Addr,
- std::optional<DataExtractor> *MergedFunctionsData) const {
- uint64_t FuncStartAddr = 0;
- if (auto ExpectedData = getFunctionInfoDataForAddress(Addr, FuncStartAddr))
- return FunctionInfo::lookup(*ExpectedData, *this, FuncStartAddr, Addr,
- MergedFunctionsData);
- else
- return ExpectedData.takeError();
-}
-
-llvm::Expected<std::vector<LookupResult>>
-GsymReader::lookupAll(uint64_t Addr) const {
- std::vector<LookupResult> Results;
- std::optional<DataExtractor> MergedFunctionsData;
-
- // First perform a lookup to get the primary function info result.
- auto MainResult = lookup(Addr, &MergedFunctionsData);
- if (!MainResult)
- return MainResult.takeError();
-
- // Add the main result as the first entry.
- Results.push_back(std::move(*MainResult));
-
- // Now process any merged functions data that was found during the lookup.
- if (MergedFunctionsData) {
- // Get data extractors for each merged function.
- auto ExpectedMergedFuncExtractors =
- MergedFunctionsInfo::getFuncsDataExtractors(*MergedFunctionsData);
- if (!ExpectedMergedFuncExtractors)
- return ExpectedMergedFuncExtractors.takeError();
-
- // Process each merged function data.
- for (DataExtractor &MergedData : *ExpectedMergedFuncExtractors) {
- if (auto FI = FunctionInfo::lookup(MergedData, *this,
- MainResult->FuncRange.start(), Addr)) {
- Results.push_back(std::move(*FI));
- } else {
- return FI.takeError();
- }
- }
- }
-
- return Results;
-}
-
-void GsymReader::dump(raw_ostream &OS) {
- const auto &Header = getHeader();
- // Dump the GSYM header.
- OS << Header << "\n";
- // Dump the address table.
- OS << "Address Table:\n";
- OS << "INDEX OFFSET";
-
- switch (Hdr->AddrOffSize) {
- case 1: OS << "8 "; break;
- case 2: OS << "16"; break;
- case 4: OS << "32"; break;
- case 8: OS << "64"; break;
- default: OS << "??"; break;
- }
- OS << " (ADDRESS)\n";
- OS << "====== =============================== \n";
- for (uint32_t I = 0; I < Header.NumAddresses; ++I) {
- OS << format("[%4u] ", I);
- switch (Hdr->AddrOffSize) {
- case 1: OS << HEX8(getAddrOffsets<uint8_t>()[I]); break;
- case 2: OS << HEX16(getAddrOffsets<uint16_t>()[I]); break;
- case 4: OS << HEX32(getAddrOffsets<uint32_t>()[I]); break;
- case 8: OS << HEX32(getAddrOffsets<uint64_t>()[I]); break;
- default: break;
- }
- OS << " (" << HEX64(*getAddress(I)) << ")\n";
- }
- // Dump the address info offsets table.
- OS << "\nAddress Info Offsets:\n";
- OS << "INDEX Offset\n";
- OS << "====== ==========\n";
- for (uint32_t I = 0; I < Header.NumAddresses; ++I)
- OS << format("[%4u] ", I) << HEX32(AddrInfoOffsets[I]) << "\n";
- // Dump the file table.
- OS << "\nFiles:\n";
- OS << "INDEX DIRECTORY BASENAME PATH\n";
- OS << "====== ========== ========== ==============================\n";
- for (uint32_t I = 0; I < Files.size(); ++I) {
- OS << format("[%4u] ", I) << HEX32(Files[I].Dir) << ' '
- << HEX32(Files[I].Base) << ' ';
- dump(OS, getFile(I));
- OS << "\n";
- }
- OS << "\n" << StrTab << "\n";
-
- for (uint32_t I = 0; I < Header.NumAddresses; ++I) {
- OS << "FunctionInfo @ " << HEX32(AddrInfoOffsets[I]) << ": ";
- if (auto FI = getFunctionInfoAtIndex(I))
- dump(OS, *FI);
- else
- logAllUnhandledErrors(FI.takeError(), OS, "FunctionInfo:");
- }
-}
-
-void GsymReader::dump(raw_ostream &OS, const FunctionInfo &FI,
- uint32_t Indent) {
- OS.indent(Indent);
- OS << FI.Range << " \"" << getString(FI.Name) << "\"\n";
- if (FI.OptLineTable)
- dump(OS, *FI.OptLineTable, Indent);
- if (FI.Inline)
- dump(OS, *FI.Inline, Indent);
-
- if (FI.CallSites)
- dump(OS, *FI.CallSites, Indent);
-
- if (FI.MergedFunctions) {
- assert(Indent == 0 && "MergedFunctionsInfo should only exist at top level");
- dump(OS, *FI.MergedFunctions);
- }
-}
-
-void GsymReader::dump(raw_ostream &OS, const MergedFunctionsInfo &MFI) {
- for (uint32_t inx = 0; inx < MFI.MergedFunctions.size(); inx++) {
- OS << "++ Merged FunctionInfos[" << inx << "]:\n";
- dump(OS, MFI.MergedFunctions[inx], 4);
- }
-}
-
-void GsymReader::dump(raw_ostream &OS, const CallSiteInfo &CSI) {
- OS << HEX16(CSI.ReturnOffset);
-
- std::string Flags;
- auto addFlag = [&](const char *Flag) {
- if (!Flags.empty())
- Flags += " | ";
- Flags += Flag;
- };
-
- if (CSI.Flags == CallSiteInfo::Flags::None)
- Flags = "None";
- else {
- if (CSI.Flags & CallSiteInfo::Flags::InternalCall)
- addFlag("InternalCall");
-
- if (CSI.Flags & CallSiteInfo::Flags::ExternalCall)
- addFlag("ExternalCall");
- }
- OS << " Flags[" << Flags << "]";
-
- if (!CSI.MatchRegex.empty()) {
- OS << " MatchRegex[";
- for (uint32_t i = 0; i < CSI.MatchRegex.size(); ++i) {
- if (i > 0)
- OS << ";";
- OS << getString(CSI.MatchRegex[i]);
- }
- OS << "]";
- }
-}
-
-void GsymReader::dump(raw_ostream &OS, const CallSiteInfoCollection &CSIC,
- uint32_t Indent) {
- OS.indent(Indent);
- OS << "CallSites (by relative return offset):\n";
- for (const auto &CS : CSIC.CallSites) {
- OS.indent(Indent);
- OS << " ";
- dump(OS, CS);
- OS << "\n";
- }
-}
-
-void GsymReader::dump(raw_ostream &OS, const LineTable <, uint32_t Indent) {
- OS.indent(Indent);
- OS << "LineTable:\n";
- for (auto &LE: LT) {
- OS.indent(Indent);
- OS << " " << HEX64(LE.Addr) << ' ';
- if (LE.File)
- dump(OS, getFile(LE.File));
- OS << ':' << LE.Line << '\n';
- }
-}
-
-void GsymReader::dump(raw_ostream &OS, const InlineInfo &II, uint32_t Indent) {
- if (Indent == 0)
- OS << "InlineInfo:\n";
- else
- OS.indent(Indent);
- OS << II.Ranges << ' ' << getString(II.Name);
- if (II.CallFile != 0) {
- if (auto File = getFile(II.CallFile)) {
- OS << " called from ";
- dump(OS, File);
- OS << ':' << II.CallLine;
- }
- }
- OS << '\n';
- for (const auto &ChildII: II.Children)
- dump(OS, ChildII, Indent + 2);
-}
-
-void GsymReader::dump(raw_ostream &OS, std::optional<FileEntry> FE) {
- if (FE) {
- // IF we have the file from index 0, then don't print anything
- if (FE->Dir == 0 && FE->Base == 0)
- return;
- StringRef Dir = getString(FE->Dir);
- StringRef Base = getString(FE->Base);
- if (!Dir.empty()) {
- OS << Dir;
- if (Dir.contains('\\') && !Dir.contains('/'))
- OS << '\\';
- else
- OS << '/';
- }
- if (!Base.empty()) {
- OS << Base;
- }
- if (!Dir.empty() || !Base.empty())
- return;
- }
- OS << "<invalid-file>";
+ return std::make_unique<GsymReaderV1>(std::move(*R));
}
diff --git a/llvm/lib/DebugInfo/GSYM/GsymReaderV1.cpp b/llvm/lib/DebugInfo/GSYM/GsymReaderV1.cpp
new file mode 100644
index 0000000000000..4e6264f352a18
--- /dev/null
+++ b/llvm/lib/DebugInfo/GSYM/GsymReaderV1.cpp
@@ -0,0 +1,534 @@
+//===- GsymReaderV1.cpp ---------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/GSYM/GsymReaderV1.h"
+
+#include <assert.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "llvm/DebugInfo/GSYM/Header.h"
+#include "llvm/DebugInfo/GSYM/InlineInfo.h"
+#include "llvm/DebugInfo/GSYM/LineTable.h"
+#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/MemoryBuffer.h"
+
+using namespace llvm;
+using namespace gsym;
+
+GsymReaderV1::GsymReaderV1(std::unique_ptr<MemoryBuffer> Buffer)
+ : MemBuffer(std::move(Buffer)), Endian(llvm::endianness::native) {}
+
+GsymReaderV1::GsymReaderV1(GsymReaderV1 &&RHS) = default;
+
+GsymReaderV1::~GsymReaderV1() = default;
+
+llvm::Expected<GsymReaderV1> GsymReaderV1::openFile(StringRef Filename) {
+ // Open the input file and return an appropriate error if needed.
+ ErrorOr<std::unique_ptr<MemoryBuffer>> BuffOrErr =
+ MemoryBuffer::getFileOrSTDIN(Filename);
+ auto Err = BuffOrErr.getError();
+ if (Err)
+ return llvm::errorCodeToError(Err);
+ return create(BuffOrErr.get());
+}
+
+llvm::Expected<GsymReaderV1> GsymReaderV1::copyBuffer(StringRef Bytes) {
+ auto MemBuffer = MemoryBuffer::getMemBufferCopy(Bytes, "GSYM bytes");
+ return create(MemBuffer);
+}
+
+llvm::Expected<llvm::gsym::GsymReaderV1>
+GsymReaderV1::create(std::unique_ptr<MemoryBuffer> &MemBuffer) {
+ if (!MemBuffer)
+ return createStringError(std::errc::invalid_argument,
+ "invalid memory buffer");
+ GsymReaderV1 GR(std::move(MemBuffer));
+ llvm::Error Err = GR.parse();
+ if (Err)
+ return std::move(Err);
+ return std::move(GR);
+}
+
+llvm::Error
+GsymReaderV1::parse() {
+ BinaryStreamReader FileData(MemBuffer->getBuffer(), llvm::endianness::native);
+ // Check for the magic bytes. This file format is designed to be mmap'ed
+ // into a process and accessed as read only. This is done for performance
+ // and efficiency for symbolicating and parsing GSYM data.
+ if (FileData.readObject(Hdr))
+ return createStringError(std::errc::invalid_argument,
+ "not enough data for a GSYM header");
+
+ const auto HostByteOrder = llvm::endianness::native;
+ switch (Hdr->Magic) {
+ case GSYM_MAGIC:
+ Endian = HostByteOrder;
+ break;
+ case GSYM_CIGAM:
+ // This is a GSYM file, but not native endianness.
+ Endian = sys::IsBigEndianHost ? llvm::endianness::little
+ : llvm::endianness::big;
+ Swap.reset(new SwappedData);
+ break;
+ default:
+ return createStringError(std::errc::invalid_argument,
+ "not a GSYM file");
+ }
+
+ bool DataIsLittleEndian = HostByteOrder != llvm::endianness::little;
+ // Read a correctly byte swapped header if we need to.
+ if (Swap) {
+ DataExtractor Data(MemBuffer->getBuffer(), DataIsLittleEndian, 4);
+ if (auto ExpectedHdr = Header::decode(Data))
+ Swap->Hdr = ExpectedHdr.get();
+ else
+ return ExpectedHdr.takeError();
+ Hdr = &Swap->Hdr;
+ }
+
+ // Detect errors in the header and report any that are found. If we make it
+ // past this without errors, we know we have a good magic value, a supported
+ // version number, verified address offset size and a valid UUID size.
+ if (Error Err = Hdr->checkForError())
+ return Err;
+
+ if (!Swap) {
+ // This is the native endianness case that is most common and optimized for
+ // efficient lookups. Here we just grab pointers to the native data and
+ // use ArrayRef objects to allow efficient read only access.
+
+ // Read the address offsets.
+ if (FileData.padToAlignment(Hdr->AddrOffSize) ||
+ FileData.readArray(AddrOffsets,
+ Hdr->NumAddresses * Hdr->AddrOffSize))
+ return createStringError(std::errc::invalid_argument,
+ "failed to read address table");
+
+ // Read the address info offsets.
+ if (FileData.padToAlignment(4) ||
+ FileData.readArray(AddrInfoOffsets, Hdr->NumAddresses))
+ return createStringError(std::errc::invalid_argument,
+ "failed to read address info offsets table");
+
+ // Read the file table.
+ uint32_t NumFiles = 0;
+ if (FileData.readInteger(NumFiles) || FileData.readArray(Files, NumFiles))
+ return createStringError(std::errc::invalid_argument,
+ "failed to read file table");
+
+ // Get the string table.
+ FileData.setOffset(Hdr->StrtabOffset);
+ if (FileData.readFixedString(StrTab.Data, Hdr->StrtabSize))
+ return createStringError(std::errc::invalid_argument,
+ "failed to read string table");
+} else {
+ // This is the non native endianness case that is not common and not
+ // optimized for lookups. Here we decode the important tables into local
+ // storage and then set the ArrayRef objects to point to these swapped
+ // copies of the read only data so lookups can be as efficient as possible.
+ DataExtractor Data(MemBuffer->getBuffer(), DataIsLittleEndian, 4);
+
+ // Read the address offsets.
+ uint64_t Offset = alignTo(sizeof(Header), Hdr->AddrOffSize);
+ Swap->AddrOffsets.resize(Hdr->NumAddresses * Hdr->AddrOffSize);
+ switch (Hdr->AddrOffSize) {
+ case 1:
+ if (!Data.getU8(&Offset, Swap->AddrOffsets.data(), Hdr->NumAddresses))
+ return createStringError(std::errc::invalid_argument,
+ "failed to read address table");
+ break;
+ case 2:
+ if (!Data.getU16(&Offset,
+ reinterpret_cast<uint16_t *>(Swap->AddrOffsets.data()),
+ Hdr->NumAddresses))
+ return createStringError(std::errc::invalid_argument,
+ "failed to read address table");
+ break;
+ case 4:
+ if (!Data.getU32(&Offset,
+ reinterpret_cast<uint32_t *>(Swap->AddrOffsets.data()),
+ Hdr->NumAddresses))
+ return createStringError(std::errc::invalid_argument,
+ "failed to read address table");
+ break;
+ case 8:
+ if (!Data.getU64(&Offset,
+ reinterpret_cast<uint64_t *>(Swap->AddrOffsets.data()),
+ Hdr->NumAddresses))
+ return createStringError(std::errc::invalid_argument,
+ "failed to read address table");
+ }
+ AddrOffsets = ArrayRef<uint8_t>(Swap->AddrOffsets);
+
+ // Read the address info offsets.
+ Offset = alignTo(Offset, 4);
+ Swap->AddrInfoOffsets.resize(Hdr->NumAddresses);
+ if (Data.getU32(&Offset, Swap->AddrInfoOffsets.data(), Hdr->NumAddresses))
+ AddrInfoOffsets = ArrayRef<uint32_t>(Swap->AddrInfoOffsets);
+ else
+ return createStringError(std::errc::invalid_argument,
+ "failed to read address table");
+ // Read the file table.
+ const uint32_t NumFiles = Data.getU32(&Offset);
+ if (NumFiles > 0) {
+ Swap->Files.resize(NumFiles);
+ if (Data.getU32(&Offset, &Swap->Files[0].Dir, NumFiles*2))
+ Files = ArrayRef<FileEntry>(Swap->Files);
+ else
+ return createStringError(std::errc::invalid_argument,
+ "failed to read file table");
+ }
+ // Get the string table.
+ StrTab.Data = MemBuffer->getBuffer().substr(Hdr->StrtabOffset,
+ Hdr->StrtabSize);
+ if (StrTab.Data.empty())
+ return createStringError(std::errc::invalid_argument,
+ "failed to read string table");
+ }
+ return Error::success();
+
+}
+
+const Header &GsymReaderV1::getHeader() const {
+ assert(Hdr);
+ return *Hdr;
+}
+
+std::optional<uint64_t> GsymReaderV1::getAddress(size_t Index) const {
+ switch (Hdr->AddrOffSize) {
+ case 1: return addressForIndex<uint8_t>(Index);
+ case 2: return addressForIndex<uint16_t>(Index);
+ case 4: return addressForIndex<uint32_t>(Index);
+ case 8: return addressForIndex<uint64_t>(Index);
+ }
+ return std::nullopt;
+}
+
+std::optional<uint64_t> GsymReaderV1::getAddressInfoOffset(size_t Index) const {
+ const auto NumAddrInfoOffsets = AddrInfoOffsets.size();
+ if (Index < NumAddrInfoOffsets)
+ return AddrInfoOffsets[Index];
+ return std::nullopt;
+}
+
+Expected<uint64_t>
+GsymReaderV1::getAddressIndex(const uint64_t Addr) const {
+ if (Addr >= Hdr->BaseAddress) {
+ const uint64_t AddrOffset = Addr - Hdr->BaseAddress;
+ std::optional<uint64_t> AddrOffsetIndex;
+ switch (Hdr->AddrOffSize) {
+ case 1:
+ AddrOffsetIndex = getAddressOffsetIndex<uint8_t>(AddrOffset);
+ break;
+ case 2:
+ AddrOffsetIndex = getAddressOffsetIndex<uint16_t>(AddrOffset);
+ break;
+ case 4:
+ AddrOffsetIndex = getAddressOffsetIndex<uint32_t>(AddrOffset);
+ break;
+ case 8:
+ AddrOffsetIndex = getAddressOffsetIndex<uint64_t>(AddrOffset);
+ break;
+ default:
+ return createStringError(std::errc::invalid_argument,
+ "unsupported address offset size %u",
+ Hdr->AddrOffSize);
+ }
+ if (AddrOffsetIndex)
+ return *AddrOffsetIndex;
+ }
+ return createStringError(std::errc::invalid_argument,
+ "address 0x%" PRIx64 " is not in GSYM", Addr);
+
+}
+
+llvm::Expected<DataExtractor>
+GsymReaderV1::getFunctionInfoDataForAddress(uint64_t Addr,
+ uint64_t &FuncStartAddr) const {
+ Expected<uint64_t> ExpectedAddrIdx = getAddressIndex(Addr);
+ if (!ExpectedAddrIdx)
+ return ExpectedAddrIdx.takeError();
+ const uint64_t FirstAddrIdx = *ExpectedAddrIdx;
+ std::optional<uint64_t> FirstFuncStartAddr;
+ const size_t NumAddresses = getNumAddresses();
+ for (uint64_t AddrIdx = FirstAddrIdx; AddrIdx < NumAddresses; ++AddrIdx) {
+ auto ExpextedData = getFunctionInfoDataAtIndex(AddrIdx, FuncStartAddr);
+ if (!ExpextedData)
+ return ExpextedData;
+
+ if (FirstFuncStartAddr.has_value()) {
+ if (*FirstFuncStartAddr != FuncStartAddr)
+ break;
+ } else {
+ FirstFuncStartAddr = FuncStartAddr;
+ }
+
+ uint64_t Offset = 0;
+ uint32_t FuncSize = ExpextedData->getU32(&Offset);
+ if (FuncSize == 0 ||
+ AddressRange(FuncStartAddr, FuncStartAddr + FuncSize).contains(Addr))
+ return ExpextedData;
+ }
+ return createStringError(std::errc::invalid_argument,
+ "address 0x%" PRIx64 " is not in GSYM", Addr);
+}
+
+llvm::Expected<DataExtractor>
+GsymReaderV1::getFunctionInfoDataAtIndex(uint64_t AddrIdx,
+ uint64_t &FuncStartAddr) const {
+ if (AddrIdx >= getNumAddresses())
+ return createStringError(std::errc::invalid_argument,
+ "invalid address index %" PRIu64, AddrIdx);
+ const uint32_t AddrInfoOffset = AddrInfoOffsets[AddrIdx];
+ assert((Endian == endianness::big || Endian == endianness::little) &&
+ "Endian must be either big or little");
+ StringRef Bytes = MemBuffer->getBuffer().substr(AddrInfoOffset);
+ if (Bytes.empty())
+ return createStringError(std::errc::invalid_argument,
+ "invalid address info offset 0x%" PRIx32,
+ AddrInfoOffset);
+ std::optional<uint64_t> OptFuncStartAddr = getAddress(AddrIdx);
+ if (!OptFuncStartAddr)
+ return createStringError(std::errc::invalid_argument,
+ "failed to extract address[%" PRIu64 "]", AddrIdx);
+ FuncStartAddr = *OptFuncStartAddr;
+ return DataExtractor(Bytes, Endian == llvm::endianness::little, 4);
+}
+
+llvm::Expected<FunctionInfo> GsymReaderV1::getFunctionInfo(uint64_t Addr) const {
+ uint64_t FuncStartAddr = 0;
+ if (auto ExpectedData = getFunctionInfoDataForAddress(Addr, FuncStartAddr))
+ return FunctionInfo::decode(*ExpectedData, FuncStartAddr);
+ else
+ return ExpectedData.takeError();
+}
+
+llvm::Expected<FunctionInfo>
+GsymReaderV1::getFunctionInfoAtIndex(uint64_t Idx) const {
+ uint64_t FuncStartAddr = 0;
+ if (auto ExpectedData = getFunctionInfoDataAtIndex(Idx, FuncStartAddr))
+ return FunctionInfo::decode(*ExpectedData, FuncStartAddr);
+ else
+ return ExpectedData.takeError();
+}
+
+llvm::Expected<LookupResult>
+GsymReaderV1::lookup(uint64_t Addr,
+ std::optional<DataExtractor> *MergedFunctionsData) const {
+ uint64_t FuncStartAddr = 0;
+ if (auto ExpectedData = getFunctionInfoDataForAddress(Addr, FuncStartAddr))
+ return FunctionInfo::lookup(*ExpectedData, *this, FuncStartAddr, Addr,
+ MergedFunctionsData);
+ else
+ return ExpectedData.takeError();
+}
+
+llvm::Expected<std::vector<LookupResult>>
+GsymReaderV1::lookupAll(uint64_t Addr) const {
+ std::vector<LookupResult> Results;
+ std::optional<DataExtractor> MergedFunctionsData;
+
+ auto MainResult = lookup(Addr, &MergedFunctionsData);
+ if (!MainResult)
+ return MainResult.takeError();
+
+ Results.push_back(std::move(*MainResult));
+
+ if (MergedFunctionsData) {
+ auto ExpectedMergedFuncExtractors =
+ MergedFunctionsInfo::getFuncsDataExtractors(*MergedFunctionsData);
+ if (!ExpectedMergedFuncExtractors)
+ return ExpectedMergedFuncExtractors.takeError();
+
+ for (DataExtractor &MergedData : *ExpectedMergedFuncExtractors) {
+ if (auto FI = FunctionInfo::lookup(MergedData, *this,
+ MainResult->FuncRange.start(), Addr)) {
+ Results.push_back(std::move(*FI));
+ } else {
+ return FI.takeError();
+ }
+ }
+ }
+
+ return Results;
+}
+
+void GsymReaderV1::dump(raw_ostream &OS) {
+ const auto &Header = getHeader();
+ OS << Header << "\n";
+ OS << "Address Table:\n";
+ OS << "INDEX OFFSET";
+
+ switch (Hdr->AddrOffSize) {
+ case 1: OS << "8 "; break;
+ case 2: OS << "16"; break;
+ case 4: OS << "32"; break;
+ case 8: OS << "64"; break;
+ default: OS << "??"; break;
+ }
+ OS << " (ADDRESS)\n";
+ OS << "====== =============================== \n";
+ for (uint32_t I = 0; I < Header.NumAddresses; ++I) {
+ OS << format("[%4u] ", I);
+ switch (Hdr->AddrOffSize) {
+ case 1: OS << HEX8(getAddrOffsets<uint8_t>()[I]); break;
+ case 2: OS << HEX16(getAddrOffsets<uint16_t>()[I]); break;
+ case 4: OS << HEX32(getAddrOffsets<uint32_t>()[I]); break;
+ case 8: OS << HEX32(getAddrOffsets<uint64_t>()[I]); break;
+ default: break;
+ }
+ OS << " (" << HEX64(*getAddress(I)) << ")\n";
+ }
+ OS << "\nAddress Info Offsets:\n";
+ OS << "INDEX Offset\n";
+ OS << "====== ==========\n";
+ for (uint32_t I = 0; I < Header.NumAddresses; ++I)
+ OS << format("[%4u] ", I) << HEX32(AddrInfoOffsets[I]) << "\n";
+ OS << "\nFiles:\n";
+ OS << "INDEX DIRECTORY BASENAME PATH\n";
+ OS << "====== ========== ========== ==============================\n";
+ for (uint32_t I = 0; I < Files.size(); ++I) {
+ OS << format("[%4u] ", I) << HEX32(Files[I].Dir) << ' '
+ << HEX32(Files[I].Base) << ' ';
+ dump(OS, getFile(I));
+ OS << "\n";
+ }
+ OS << "\n" << StrTab << "\n";
+
+ for (uint32_t I = 0; I < Header.NumAddresses; ++I) {
+ OS << "FunctionInfo @ " << HEX32(AddrInfoOffsets[I]) << ": ";
+ if (auto FI = getFunctionInfoAtIndex(I))
+ dump(OS, *FI);
+ else
+ logAllUnhandledErrors(FI.takeError(), OS, "FunctionInfo:");
+ }
+}
+
+void GsymReaderV1::dump(raw_ostream &OS, const FunctionInfo &FI,
+ uint32_t Indent) {
+ OS.indent(Indent);
+ OS << FI.Range << " \"" << getString(FI.Name) << "\"\n";
+ if (FI.OptLineTable)
+ dump(OS, *FI.OptLineTable, Indent);
+ if (FI.Inline)
+ dump(OS, *FI.Inline, Indent);
+
+ if (FI.CallSites)
+ dump(OS, *FI.CallSites, Indent);
+
+ if (FI.MergedFunctions) {
+ assert(Indent == 0 && "MergedFunctionsInfo should only exist at top level");
+ dump(OS, *FI.MergedFunctions);
+ }
+}
+
+void GsymReaderV1::dump(raw_ostream &OS, const MergedFunctionsInfo &MFI) {
+ for (uint32_t inx = 0; inx < MFI.MergedFunctions.size(); inx++) {
+ OS << "++ Merged FunctionInfos[" << inx << "]:\n";
+ dump(OS, MFI.MergedFunctions[inx], 4);
+ }
+}
+
+void GsymReaderV1::dump(raw_ostream &OS, const CallSiteInfo &CSI) {
+ OS << HEX16(CSI.ReturnOffset);
+
+ std::string Flags;
+ auto addFlag = [&](const char *Flag) {
+ if (!Flags.empty())
+ Flags += " | ";
+ Flags += Flag;
+ };
+
+ if (CSI.Flags == CallSiteInfo::Flags::None)
+ Flags = "None";
+ else {
+ if (CSI.Flags & CallSiteInfo::Flags::InternalCall)
+ addFlag("InternalCall");
+
+ if (CSI.Flags & CallSiteInfo::Flags::ExternalCall)
+ addFlag("ExternalCall");
+ }
+ OS << " Flags[" << Flags << "]";
+
+ if (!CSI.MatchRegex.empty()) {
+ OS << " MatchRegex[";
+ for (uint32_t i = 0; i < CSI.MatchRegex.size(); ++i) {
+ if (i > 0)
+ OS << ";";
+ OS << getString(CSI.MatchRegex[i]);
+ }
+ OS << "]";
+ }
+}
+
+void GsymReaderV1::dump(raw_ostream &OS, const CallSiteInfoCollection &CSIC,
+ uint32_t Indent) {
+ OS.indent(Indent);
+ OS << "CallSites (by relative return offset):\n";
+ for (const auto &CS : CSIC.CallSites) {
+ OS.indent(Indent);
+ OS << " ";
+ dump(OS, CS);
+ OS << "\n";
+ }
+}
+
+void GsymReaderV1::dump(raw_ostream &OS, const LineTable <, uint32_t Indent) {
+ OS.indent(Indent);
+ OS << "LineTable:\n";
+ for (auto &LE: LT) {
+ OS.indent(Indent);
+ OS << " " << HEX64(LE.Addr) << ' ';
+ if (LE.File)
+ dump(OS, getFile(LE.File));
+ OS << ':' << LE.Line << '\n';
+ }
+}
+
+void GsymReaderV1::dump(raw_ostream &OS, const InlineInfo &II, uint32_t Indent) {
+ if (Indent == 0)
+ OS << "InlineInfo:\n";
+ else
+ OS.indent(Indent);
+ OS << II.Ranges << ' ' << getString(II.Name);
+ if (II.CallFile != 0) {
+ if (auto File = getFile(II.CallFile)) {
+ OS << " called from ";
+ dump(OS, File);
+ OS << ':' << II.CallLine;
+ }
+ }
+ OS << '\n';
+ for (const auto &ChildII: II.Children)
+ dump(OS, ChildII, Indent + 2);
+}
+
+void GsymReaderV1::dump(raw_ostream &OS, std::optional<FileEntry> FE) {
+ if (FE) {
+ if (FE->Dir == 0 && FE->Base == 0)
+ return;
+ StringRef Dir = getString(FE->Dir);
+ StringRef Base = getString(FE->Base);
+ if (!Dir.empty()) {
+ OS << Dir;
+ if (Dir.contains('\\') && !Dir.contains('/'))
+ OS << '\\';
+ else
+ OS << '/';
+ }
+ if (!Base.empty()) {
+ OS << Base;
+ }
+ if (!Dir.empty() || !Base.empty())
+ return;
+ }
+ OS << "<invalid-file>";
+}
diff --git a/llvm/lib/DebugInfo/GSYM/InlineInfo.cpp b/llvm/lib/DebugInfo/GSYM/InlineInfo.cpp
index 35091199142b7..251e51bb67eab 100644
--- a/llvm/lib/DebugInfo/GSYM/InlineInfo.cpp
+++ b/llvm/lib/DebugInfo/GSYM/InlineInfo.cpp
@@ -100,7 +100,7 @@ static bool skip(DataExtractor &Data, uint64_t &Offset, bool SkippedRanges) {
/// \param BaseAddr The address that the relative address range offsets are
/// relative to.
-static bool lookup(const GsymReaderBase &GR, DataExtractor &Data, uint64_t &Offset,
+static bool lookup(const GsymReader &GR, DataExtractor &Data, uint64_t &Offset,
uint64_t BaseAddr, uint64_t Addr, SourceLocations &SrcLocs,
llvm::Error &Err) {
InlineInfo Inline;
@@ -151,7 +151,7 @@ static bool lookup(const GsymReaderBase &GR, DataExtractor &Data, uint64_t &Offs
return true;
}
-llvm::Error InlineInfo::lookup(const GsymReaderBase &GR, DataExtractor &Data,
+llvm::Error InlineInfo::lookup(const GsymReader &GR, DataExtractor &Data,
uint64_t BaseAddr, uint64_t Addr,
SourceLocations &SrcLocs) {
// Call our recursive helper function starting at offset zero.
diff --git a/llvm/lib/DebugInfo/GSYM/ObjectFileTransformer.cpp b/llvm/lib/DebugInfo/GSYM/ObjectFileTransformer.cpp
index 5525325f76072..122de4deea5df 100644
--- a/llvm/lib/DebugInfo/GSYM/ObjectFileTransformer.cpp
+++ b/llvm/lib/DebugInfo/GSYM/ObjectFileTransformer.cpp
@@ -68,7 +68,7 @@ static std::vector<uint8_t> getUUID(const object::ObjectFile &Obj) {
llvm::Error ObjectFileTransformer::convert(const object::ObjectFile &Obj,
OutputAggregator &Out,
- GsymCreatorBase &Gsym) {
+ GsymCreator &Gsym) {
using namespace llvm::object;
const bool IsMachO = isa<MachOObjectFile>(&Obj);
diff --git a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
index a01faf1dfbc33..bbfb62de54fa9 100644
--- a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
+++ b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
@@ -779,7 +779,7 @@ LLVMSymbolizer::getOrCreateModuleInfo(StringRef ModuleName) {
// - Otherwise, create a DWARFContext.
const auto GsymFile = lookUpGsymFile(BinaryName.str());
if (!GsymFile.empty()) {
- auto ReaderOrErr = gsym::GsymReaderBase::openFile(GsymFile);
+ auto ReaderOrErr = gsym::GsymReader::openFile(GsymFile);
if (ReaderOrErr)
Context = std::make_unique<gsym::GsymContext>(std::move(*ReaderOrErr));
diff --git a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
index 2cbac7b4fb236..a89f19dd5eb61 100644
--- a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
+++ b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
@@ -42,8 +42,10 @@
#include "llvm/DebugInfo/GSYM/DwarfTransformer.h"
#include "llvm/DebugInfo/GSYM/FunctionInfo.h"
#include "llvm/DebugInfo/GSYM/GsymCreator.h"
+#include "llvm/DebugInfo/GSYM/GsymCreatorV1.h"
#include "llvm/DebugInfo/GSYM/GsymCreatorV2.h"
#include "llvm/DebugInfo/GSYM/GsymReader.h"
+#include "llvm/DebugInfo/GSYM/GsymReaderV1.h"
#include "llvm/DebugInfo/GSYM/GsymReaderV2.h"
#include "llvm/DebugInfo/GSYM/Header.h"
#include "llvm/DebugInfo/GSYM/InlineInfo.h"
@@ -390,12 +392,12 @@ static llvm::Error handleObjectFile(ObjectFile &Obj, const std::string &OutFile,
auto ThreadCount =
NumThreads > 0 ? NumThreads : std::thread::hardware_concurrency();
- std::unique_ptr<GsymCreatorBase> GsymPtr;
+ std::unique_ptr<GsymCreator> GsymPtr;
if (ForceCreatorVersion == CreatorVersion::V2)
GsymPtr = std::make_unique<GsymCreatorV2>(Quiet);
else
- GsymPtr = std::make_unique<GsymCreator>(Quiet);
- GsymCreatorBase &Gsym = *GsymPtr;
+ GsymPtr = std::make_unique<GsymCreatorV1>(Quiet);
+ GsymCreator &Gsym = *GsymPtr;
// See if we can figure out the base address for a given object file, and if
// we can, then set the base address to use to this value. This will ease
@@ -538,19 +540,19 @@ static llvm::Error handleBuffer(StringRef Filename, MemoryBufferRef Buffer,
}
/// Open a GSYM file, auto-detecting the version unless forced.
-static Expected<std::unique_ptr<GsymReaderBase>> openGsymFile(StringRef Path) {
+static Expected<std::unique_ptr<GsymReader>> openGsymFile(StringRef Path) {
if (ForceReaderVersion == ReaderVersion::Auto)
- return GsymReaderBase::openFile(Path);
+ return GsymReader::openFile(Path);
if (ForceReaderVersion == ReaderVersion::V2) {
auto R = GsymReaderV2::openFile(Path);
if (!R)
return R.takeError();
return std::make_unique<GsymReaderV2>(std::move(*R));
}
- auto R = GsymReader::openFile(Path);
+ auto R = GsymReaderV1::openFile(Path);
if (!R)
return R.takeError();
- return std::make_unique<GsymReader>(std::move(*R));
+ return std::make_unique<GsymReaderV1>(std::move(*R));
}
/// Check if a file starts with the GSYM magic bytes.
@@ -569,8 +571,8 @@ static bool isGSYMFile(StringRef Filename) {
/// Re-insert a file entry from a reader into a creator, reconstructing the
/// full path from separate Dir and Base components.
-static uint32_t transferFile(const GsymReaderBase &Reader,
- GsymCreatorBase &Creator, uint32_t FileIdx) {
+static uint32_t transferFile(const GsymReader &Reader,
+ GsymCreator &Creator, uint32_t FileIdx) {
auto FE = Reader.getFile(FileIdx);
if (!FE)
return FileIdx;
@@ -588,8 +590,8 @@ static uint32_t transferFile(const GsymReaderBase &Reader,
/// Fix up string and file references in an InlineInfo tree so they refer to
/// the creator's tables instead of the reader's.
-static void fixupInlineInfo(const GsymReaderBase &Reader,
- GsymCreatorBase &Creator, InlineInfo &II) {
+static void fixupInlineInfo(const GsymReader &Reader,
+ GsymCreator &Creator, InlineInfo &II) {
II.Name = Creator.insertString(Reader.getString(II.Name));
if (II.CallFile != 0)
II.CallFile = transferFile(Reader, Creator, II.CallFile);
@@ -599,8 +601,8 @@ static void fixupInlineInfo(const GsymReaderBase &Reader,
/// Fix up all string and file references in a FunctionInfo so they refer to
/// the creator's tables instead of the reader's.
-static void fixupFunctionInfo(const GsymReaderBase &Reader,
- GsymCreatorBase &Creator, FunctionInfo &FI) {
+static void fixupFunctionInfo(const GsymReader &Reader,
+ GsymCreator &Creator, FunctionInfo &FI) {
FI.Name = Creator.insertString(Reader.getString(FI.Name));
if (FI.OptLineTable) {
for (size_t J = 0; J < FI.OptLineTable->size(); ++J) {
@@ -632,12 +634,12 @@ static llvm::Error handleGSYMConversion(StringRef Filename,
return ReaderOrErr.takeError();
auto &Reader = **ReaderOrErr;
- std::unique_ptr<GsymCreatorBase> CreatorPtr;
+ std::unique_ptr<GsymCreator> CreatorPtr;
if (ForceCreatorVersion == CreatorVersion::V2)
CreatorPtr = std::make_unique<GsymCreatorV2>(Quiet);
else
- CreatorPtr = std::make_unique<GsymCreator>(Quiet);
- GsymCreatorBase &Creator = *CreatorPtr;
+ CreatorPtr = std::make_unique<GsymCreatorV1>(Quiet);
+ GsymCreator &Creator = *CreatorPtr;
// Transfer all function infos, re-inserting strings and files.
for (uint32_t I = 0; I < Reader.getNumAddresses(); ++I) {
@@ -702,7 +704,7 @@ static llvm::Error convertFileToGSYM(OutputAggregator &Out) {
return Error::success();
}
-static void doLookup(GsymReaderBase &Gsym, uint64_t Addr, raw_ostream &OS) {
+static void doLookup(GsymReader &Gsym, uint64_t Addr, raw_ostream &OS) {
if (UseMergedFunctions) {
if (auto Results = Gsym.lookupAll(Addr)) {
// If we have filters, count matching results first
@@ -836,7 +838,7 @@ int llvm_gsymutil_main(int argc, char **argv, const llvm::ToolContext &) {
std::string InputLine;
std::string CurrentGSYMPath;
- std::unique_ptr<GsymReaderBase> CurrentGsym;
+ std::unique_ptr<GsymReader> CurrentGsym;
while (std::getline(std::cin, InputLine)) {
// Strip newline characters.
diff --git a/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp b/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp
index d56007371b2f2..5066d1491c12e 100644
--- a/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp
+++ b/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp
@@ -14,8 +14,8 @@
#include "llvm/DebugInfo/GSYM/FileEntry.h"
#include "llvm/DebugInfo/GSYM/FileWriter.h"
#include "llvm/DebugInfo/GSYM/FunctionInfo.h"
-#include "llvm/DebugInfo/GSYM/GsymCreator.h"
-#include "llvm/DebugInfo/GSYM/GsymReader.h"
+#include "llvm/DebugInfo/GSYM/GsymCreatorV1.h"
+#include "llvm/DebugInfo/GSYM/GsymReaderV1.h"
#include "llvm/DebugInfo/GSYM/Header.h"
#include "llvm/DebugInfo/GSYM/InlineInfo.h"
#include "llvm/DebugInfo/GSYM/OutputAggregator.h"
@@ -940,8 +940,8 @@ TEST(GSYMTest, TestHeaderEncodeDecode) {
TestHeaderEncodeDecode(H, llvm::endianness::big);
}
-static void TestGsymCreatorEncodeError(llvm::endianness ByteOrder,
- const GsymCreator &GC,
+static void TestGsymCreatorV1EncodeError(llvm::endianness ByteOrder,
+ const GsymCreatorV1 &GC,
std::string ExpectedErrorMsg) {
SmallString<512> Str;
raw_svector_ostream OutStrm(Str);
@@ -951,24 +951,24 @@ static void TestGsymCreatorEncodeError(llvm::endianness ByteOrder,
checkError(ExpectedErrorMsg, std::move(Err));
}
-TEST(GSYMTest, TestGsymCreatorEncodeErrors) {
+TEST(GSYMTest, TestGsymCreatorV1EncodeErrors) {
const uint8_t ValidUUID[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
14, 15, 16};
const uint8_t InvalidUUID[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
14, 15, 16, 17, 18, 19, 20, 21};
- // Verify we get an error when trying to encode an GsymCreator with no
+ // Verify we get an error when trying to encode an GsymCreatorV1 with no
// function infos. We shouldn't be saving a GSYM file in this case since
// there is nothing inside of it.
- GsymCreator GC;
- TestGsymCreatorEncodeError(llvm::endianness::little, GC,
+ GsymCreatorV1 GC;
+ TestGsymCreatorV1EncodeError(llvm::endianness::little, GC,
"no functions to encode");
const uint64_t FuncAddr = 0x1000;
const uint64_t FuncSize = 0x100;
const uint32_t FuncName = GC.insertString("foo");
- // Verify we get an error trying to encode a GsymCreator that isn't
+ // Verify we get an error trying to encode a GsymCreatorV1 that isn't
// finalized.
GC.addFunctionInfo(FunctionInfo(FuncAddr, FuncSize, FuncName));
- TestGsymCreatorEncodeError(llvm::endianness::little, GC,
+ TestGsymCreatorV1EncodeError(llvm::endianness::little, GC,
"GsymCreator wasn't finalized prior to encoding");
std::string finalizeIssues;
raw_string_ostream OS(finalizeIssues);
@@ -978,10 +978,10 @@ TEST(GSYMTest, TestGsymCreatorEncodeErrors) {
finalizeErr = GC.finalize(Agg);
ASSERT_TRUE(bool(finalizeErr));
checkError("already finalized", std::move(finalizeErr));
- // Verify we get an error trying to encode a GsymCreator with a UUID that is
+ // Verify we get an error trying to encode a GsymCreatorV1 with a UUID that is
// too long.
GC.setUUID(InvalidUUID);
- TestGsymCreatorEncodeError(llvm::endianness::little, GC,
+ TestGsymCreatorV1EncodeError(llvm::endianness::little, GC,
"invalid UUID size 21");
GC.setUUID(ValidUUID);
// Verify errors are propagated when we try to encoding an invalid line
@@ -990,7 +990,7 @@ TEST(GSYMTest, TestGsymCreatorEncodeErrors) {
FI.OptLineTable = LineTable(); // Invalid line table.
return false; // Stop iterating
});
- TestGsymCreatorEncodeError(llvm::endianness::little, GC,
+ TestGsymCreatorV1EncodeError(llvm::endianness::little, GC,
"attempted to encode invalid LineTable object");
// Verify errors are propagated when we try to encoding an invalid inline
// info.
@@ -999,13 +999,13 @@ TEST(GSYMTest, TestGsymCreatorEncodeErrors) {
FI.Inline = InlineInfo(); // Invalid InlineInfo.
return false; // Stop iterating
});
- TestGsymCreatorEncodeError(llvm::endianness::little, GC,
+ TestGsymCreatorV1EncodeError(llvm::endianness::little, GC,
"attempted to encode invalid InlineInfo object");
}
-static void Compare(const GsymCreator &GC, const GsymReader &GR) {
- // Verify that all of the data in a GsymCreator is correctly decoded from
- // a GsymReader. To do this, we iterator over
+static void Compare(const GsymCreatorV1 &GC, const GsymReaderV1 &GR) {
+ // Verify that all of the data in a GsymCreatorV1 is correctly decoded from
+ // a GsymReaderV1. To do this, we iterator over
GC.forEachFunctionInfo([&](const FunctionInfo &FI) -> bool {
auto DecodedFI = GR.getFunctionInfo(FI.Range.start());
EXPECT_TRUE(bool(DecodedFI));
@@ -1014,7 +1014,7 @@ static void Compare(const GsymCreator &GC, const GsymReader &GR) {
});
}
-static void TestEncodeDecode(const GsymCreator &GC, llvm::endianness ByteOrder,
+static void TestEncodeDecode(const GsymCreatorV1 &GC, llvm::endianness ByteOrder,
uint16_t Version, uint8_t AddrOffSize,
uint64_t BaseAddress, uint32_t NumAddresses,
ArrayRef<uint8_t> UUID) {
@@ -1023,7 +1023,7 @@ static void TestEncodeDecode(const GsymCreator &GC, llvm::endianness ByteOrder,
FileWriter FW(OutStrm, ByteOrder);
llvm::Error Err = GC.encode(FW);
ASSERT_FALSE((bool)Err);
- Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
+ Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
ASSERT_TRUE(bool(GR));
const Header &Hdr = GR->getHeader();
EXPECT_EQ(Hdr.Version, Version);
@@ -1035,9 +1035,9 @@ static void TestEncodeDecode(const GsymCreator &GC, llvm::endianness ByteOrder,
Compare(GC, GR.get());
}
-TEST(GSYMTest, TestGsymCreator1ByteAddrOffsets) {
+TEST(GSYMTest, TestGsymCreatorV11ByteAddrOffsets) {
uint8_t UUID[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
- GsymCreator GC;
+ GsymCreatorV1 GC;
GC.setUUID(UUID);
constexpr uint64_t BaseAddr = 0x1000;
constexpr uint8_t AddrOffSize = 1;
@@ -1058,9 +1058,9 @@ TEST(GSYMTest, TestGsymCreator1ByteAddrOffsets) {
ArrayRef<uint8_t>(UUID));
}
-TEST(GSYMTest, TestGsymCreator2ByteAddrOffsets) {
+TEST(GSYMTest, TestGsymCreatorV12ByteAddrOffsets) {
uint8_t UUID[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
- GsymCreator GC;
+ GsymCreatorV1 GC;
GC.setUUID(UUID);
constexpr uint64_t BaseAddr = 0x1000;
constexpr uint8_t AddrOffSize = 2;
@@ -1081,9 +1081,9 @@ TEST(GSYMTest, TestGsymCreator2ByteAddrOffsets) {
ArrayRef<uint8_t>(UUID));
}
-TEST(GSYMTest, TestGsymCreator4ByteAddrOffsets) {
+TEST(GSYMTest, TestGsymCreatorV14ByteAddrOffsets) {
uint8_t UUID[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
- GsymCreator GC;
+ GsymCreatorV1 GC;
GC.setUUID(UUID);
constexpr uint64_t BaseAddr = 0x1000;
constexpr uint8_t AddrOffSize = 4;
@@ -1104,9 +1104,9 @@ TEST(GSYMTest, TestGsymCreator4ByteAddrOffsets) {
ArrayRef<uint8_t>(UUID));
}
-TEST(GSYMTest, TestGsymCreator8ByteAddrOffsets) {
+TEST(GSYMTest, TestGsymCreatorV18ByteAddrOffsets) {
uint8_t UUID[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
- GsymCreator GC;
+ GsymCreatorV1 GC;
GC.setUUID(UUID);
constexpr uint64_t BaseAddr = 0x1000;
constexpr uint8_t AddrOffSize = 8;
@@ -1127,23 +1127,23 @@ TEST(GSYMTest, TestGsymCreator8ByteAddrOffsets) {
ArrayRef<uint8_t>(UUID));
}
-static void VerifyFunctionInfo(const GsymReader &GR, uint64_t Addr,
+static void VerifyFunctionInfo(const GsymReaderV1 &GR, uint64_t Addr,
const FunctionInfo &FI) {
auto ExpFI = GR.getFunctionInfo(Addr);
ASSERT_THAT_EXPECTED(ExpFI, Succeeded());
ASSERT_EQ(FI, ExpFI.get());
}
-static void VerifyFunctionInfoError(const GsymReader &GR, uint64_t Addr,
+static void VerifyFunctionInfoError(const GsymReaderV1 &GR, uint64_t Addr,
std::string ErrMessage) {
auto ExpFI = GR.getFunctionInfo(Addr);
ASSERT_FALSE(bool(ExpFI));
checkError(ErrMessage, ExpFI.takeError());
}
-TEST(GSYMTest, TestGsymReader) {
+TEST(GSYMTest, TestGsymReaderV1) {
uint8_t UUID[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
- GsymCreator GC;
+ GsymCreatorV1 GC;
GC.setUUID(UUID);
constexpr uint64_t BaseAddr = 0x1000;
constexpr uint64_t Func1Addr = BaseAddr;
@@ -1162,8 +1162,8 @@ TEST(GSYMTest, TestGsymReader) {
FileWriter FW(OutStrm, ByteOrder);
llvm::Error Err = GC.encode(FW);
ASSERT_FALSE((bool)Err);
- if (auto ExpectedGR = GsymReader::copyBuffer(OutStrm.str())) {
- const GsymReader &GR = ExpectedGR.get();
+ if (auto ExpectedGR = GsymReaderV1::copyBuffer(OutStrm.str())) {
+ const GsymReaderV1 &GR = ExpectedGR.get();
VerifyFunctionInfoError(GR, Func1Addr-1, "address 0xfff is not in GSYM");
FunctionInfo Func1(Func1Addr, FuncSize, Func1Name);
@@ -1188,7 +1188,7 @@ TEST(GSYMTest, TestGsymLookups) {
// FunctionInfo or InlineInfo, they only extract information needed for the
// lookup to happen which avoids allocations which can slow down
// symbolication.
- GsymCreator GC;
+ GsymCreatorV1 GC;
FunctionInfo FI(0x1000, 0x100, GC.insertString("main"));
const auto ByteOrder = llvm::endianness::native;
FI.OptLineTable = LineTable();
@@ -1228,7 +1228,7 @@ TEST(GSYMTest, TestGsymLookups) {
FileWriter FW(OutStrm, ByteOrder);
llvm::Error Err = GC.encode(FW);
ASSERT_FALSE((bool)Err);
- Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
+ Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
ASSERT_TRUE(bool(GR));
// Verify inline info is correct when doing lookups.
@@ -1338,7 +1338,7 @@ TEST(GSYMTest, TestDWARFFunctionWithAddresses) {
ASSERT_TRUE(DwarfContext.get() != nullptr);
auto &OS = llvm::nulls();
OutputAggregator OSAgg(&OS);
- GsymCreator GC;
+ GsymCreatorV1 GC;
DwarfTransformer DT(*DwarfContext, GC);
const uint32_t ThreadCount = 1;
ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
@@ -1348,7 +1348,7 @@ TEST(GSYMTest, TestDWARFFunctionWithAddresses) {
const auto ByteOrder = llvm::endianness::native;
FileWriter FW(OutStrm, ByteOrder);
ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
- Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
+ Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
ASSERT_THAT_EXPECTED(GR, Succeeded());
// There should only be one function in our GSYM.
EXPECT_EQ(GR->getNumAddresses(), 1u);
@@ -1416,7 +1416,7 @@ TEST(GSYMTest, TestDWARFFunctionWithAddressAndOffset) {
ASSERT_TRUE(DwarfContext.get() != nullptr);
auto &OS = llvm::nulls();
OutputAggregator OSAgg(&OS);
- GsymCreator GC;
+ GsymCreatorV1 GC;
DwarfTransformer DT(*DwarfContext, GC);
const uint32_t ThreadCount = 1;
ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
@@ -1426,7 +1426,7 @@ TEST(GSYMTest, TestDWARFFunctionWithAddressAndOffset) {
const auto ByteOrder = llvm::endianness::native;
FileWriter FW(OutStrm, ByteOrder);
ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
- Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
+ Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
ASSERT_THAT_EXPECTED(GR, Succeeded());
// There should only be one function in our GSYM.
EXPECT_EQ(GR->getNumAddresses(), 1u);
@@ -1524,7 +1524,7 @@ TEST(GSYMTest, TestDWARFStructMethodNoMangled) {
ASSERT_TRUE(DwarfContext.get() != nullptr);
auto &OS = llvm::nulls();
OutputAggregator OSAgg(&OS);
- GsymCreator GC;
+ GsymCreatorV1 GC;
DwarfTransformer DT(*DwarfContext, GC);
const uint32_t ThreadCount = 1;
ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
@@ -1534,7 +1534,7 @@ TEST(GSYMTest, TestDWARFStructMethodNoMangled) {
const auto ByteOrder = llvm::endianness::native;
FileWriter FW(OutStrm, ByteOrder);
ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
- Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
+ Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
ASSERT_THAT_EXPECTED(GR, Succeeded());
// There should only be one function in our GSYM.
EXPECT_EQ(GR->getNumAddresses(), 1u);
@@ -1625,7 +1625,7 @@ TEST(GSYMTest, TestDWARFTextRanges) {
ASSERT_TRUE(DwarfContext.get() != nullptr);
auto &OS = llvm::nulls();
OutputAggregator OSAgg(&OS);
- GsymCreator GC;
+ GsymCreatorV1 GC;
DwarfTransformer DT(*DwarfContext, GC);
// Only allow addresses between [0x1000 - 0x2000) to be linked into the
// GSYM.
@@ -1640,7 +1640,7 @@ TEST(GSYMTest, TestDWARFTextRanges) {
const auto ByteOrder = llvm::endianness::native;
FileWriter FW(OutStrm, ByteOrder);
ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
- Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
+ Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
ASSERT_THAT_EXPECTED(GR, Succeeded());
// There should only be one function in our GSYM.
EXPECT_EQ(GR->getNumAddresses(), 1u);
@@ -1657,7 +1657,7 @@ TEST(GSYMTest, TestEmptySymbolEndAddressOfTextRanges) {
// Test that if we have valid text ranges and we have a symbol with no size
// as the last FunctionInfo entry that the size of the symbol gets set to the
// end address of the text range.
- GsymCreator GC;
+ GsymCreatorV1 GC;
AddressRanges TextRanges;
TextRanges.insert(AddressRange(0x1000, 0x2000));
GC.SetValidTextRanges(TextRanges);
@@ -1669,7 +1669,7 @@ TEST(GSYMTest, TestEmptySymbolEndAddressOfTextRanges) {
const auto ByteOrder = llvm::endianness::native;
FileWriter FW(OutStrm, ByteOrder);
ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
- Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
+ Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
ASSERT_THAT_EXPECTED(GR, Succeeded());
// There should only be one function in our GSYM.
EXPECT_EQ(GR->getNumAddresses(), 1u);
@@ -1829,7 +1829,7 @@ TEST(GSYMTest, TestDWARFInlineInfo) {
ASSERT_TRUE(DwarfContext.get() != nullptr);
auto &OS = llvm::nulls();
OutputAggregator OSAgg(&OS);
- GsymCreator GC;
+ GsymCreatorV1 GC;
DwarfTransformer DT(*DwarfContext, GC);
const uint32_t ThreadCount = 1;
ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
@@ -1839,7 +1839,7 @@ TEST(GSYMTest, TestDWARFInlineInfo) {
const auto ByteOrder = llvm::endianness::native;
FileWriter FW(OutStrm, ByteOrder);
ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
- Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
+ Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
ASSERT_THAT_EXPECTED(GR, Succeeded());
// There should only be one function in our GSYM.
EXPECT_EQ(GR->getNumAddresses(), 1u);
@@ -2090,7 +2090,7 @@ TEST(GSYMTest, TestDWARFNoLines) {
ASSERT_TRUE(DwarfContext.get() != nullptr);
auto &OS = llvm::nulls();
OutputAggregator OSAgg(&OS);
- GsymCreator GC;
+ GsymCreatorV1 GC;
DwarfTransformer DT(*DwarfContext, GC);
const uint32_t ThreadCount = 1;
ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
@@ -2100,7 +2100,7 @@ TEST(GSYMTest, TestDWARFNoLines) {
const auto ByteOrder = llvm::endianness::native;
FileWriter FW(OutStrm, ByteOrder);
ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
- Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
+ Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
ASSERT_THAT_EXPECTED(GR, Succeeded());
EXPECT_EQ(GR->getNumAddresses(), 4u);
@@ -2270,7 +2270,7 @@ TEST(GSYMTest, TestDWARFDeadStripAddr4) {
ASSERT_TRUE(DwarfContext.get() != nullptr);
auto &OS = llvm::nulls();
OutputAggregator OSAgg(&OS);
- GsymCreator GC;
+ GsymCreatorV1 GC;
DwarfTransformer DT(*DwarfContext, GC);
const uint32_t ThreadCount = 1;
ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
@@ -2280,7 +2280,7 @@ TEST(GSYMTest, TestDWARFDeadStripAddr4) {
const auto ByteOrder = llvm::endianness::native;
FileWriter FW(OutStrm, ByteOrder);
ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
- Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
+ Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
ASSERT_THAT_EXPECTED(GR, Succeeded());
// Test that the only function that made it was the "main" function.
@@ -2411,7 +2411,7 @@ TEST(GSYMTest, TestDWARFDeadStripAddr8) {
ASSERT_TRUE(DwarfContext.get() != nullptr);
auto &OS = llvm::nulls();
OutputAggregator OSAgg(&OS);
- GsymCreator GC;
+ GsymCreatorV1 GC;
DwarfTransformer DT(*DwarfContext, GC);
const uint32_t ThreadCount = 1;
ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
@@ -2421,7 +2421,7 @@ TEST(GSYMTest, TestDWARFDeadStripAddr8) {
const auto ByteOrder = llvm::endianness::native;
FileWriter FW(OutStrm, ByteOrder);
ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
- Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
+ Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
ASSERT_THAT_EXPECTED(GR, Succeeded());
// Test that the only function that made it was the "main" function.
@@ -2433,12 +2433,12 @@ TEST(GSYMTest, TestDWARFDeadStripAddr8) {
EXPECT_EQ(MethodName, "main");
}
-TEST(GSYMTest, TestGsymCreatorMultipleSymbolsWithNoSize) {
+TEST(GSYMTest, TestGsymCreatorV1MultipleSymbolsWithNoSize) {
// Multiple symbols at the same address with zero size were being emitted
// instead of being combined into a single entry. This function tests to make
// sure we only get one symbol.
uint8_t UUID[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
- GsymCreator GC;
+ GsymCreatorV1 GC;
GC.setUUID(UUID);
constexpr uint64_t BaseAddr = 0x1000;
constexpr uint8_t AddrOffSize = 1;
@@ -2459,8 +2459,8 @@ TEST(GSYMTest, TestGsymCreatorMultipleSymbolsWithNoSize) {
ArrayRef<uint8_t>(UUID));
}
-// Helper function to quickly create a FunctionInfo in a GsymCreator for testing.
-static void AddFunctionInfo(GsymCreator &GC, const char *FuncName,
+// Helper function to quickly create a FunctionInfo in a GsymCreatorV1 for testing.
+static void AddFunctionInfo(GsymCreatorV1 &GC, const char *FuncName,
uint64_t FuncAddr, const char *SourcePath,
const char *HeaderPath) {
FunctionInfo FI(FuncAddr, 0x30, GC.insertString(FuncName));
@@ -2499,9 +2499,9 @@ static void AddFunctionInfo(GsymCreator &GC, const char *FuncName,
GC.addFunctionInfo(std::move(FI));
}
-// Finalize a GsymCreator, encode it and decode it and return the error or
-// GsymReader that was successfully decoded.
-static Expected<GsymReader> FinalizeEncodeAndDecode(GsymCreator &GC) {
+// Finalize a GsymCreatorV1, encode it and decode it and return the error or
+// GsymReaderV1 that was successfully decoded.
+static Expected<GsymReaderV1> FinalizeEncodeAndDecode(GsymCreatorV1 &GC) {
OutputAggregator Null(nullptr);
Error FinalizeErr = GC.finalize(Null);
if (FinalizeErr)
@@ -2513,7 +2513,7 @@ static Expected<GsymReader> FinalizeEncodeAndDecode(GsymCreator &GC) {
llvm::Error Err = GC.encode(FW);
if (Err)
return std::move(Err);
- return GsymReader::copyBuffer(OutStrm.str());
+ return GsymReaderV1::copyBuffer(OutStrm.str());
}
TEST(GSYMTest, TestGsymSegmenting) {
@@ -2522,21 +2522,21 @@ TEST(GSYMTest, TestGsymSegmenting) {
// encoding multiple segments, then we verify that we get the same information
// when doing lookups on the full GSYM that was decoded from encoding the
// entire GSYM and also by decoding information from the segments themselves.
- GsymCreator GC;
+ GsymCreatorV1 GC;
GC.setBaseAddress(0);
AddFunctionInfo(GC, "main", 0x1000, "/tmp/main.c", "/tmp/main.h");
AddFunctionInfo(GC, "foo", 0x2000, "/tmp/foo.c", "/tmp/foo.h");
AddFunctionInfo(GC, "bar", 0x3000, "/tmp/bar.c", "/tmp/bar.h");
AddFunctionInfo(GC, "baz", 0x4000, "/tmp/baz.c", "/tmp/baz.h");
- Expected<GsymReader> GR = FinalizeEncodeAndDecode(GC);
+ Expected<GsymReaderV1> GR = FinalizeEncodeAndDecode(GC);
ASSERT_THAT_EXPECTED(GR, Succeeded());
//GR->dump(outs());
// Create segmented GSYM files where each file contains 1 function. We will
// then test doing lookups on the "GR", or the full GSYM file and then test
- // doing lookups on the GsymReader objects for each segment to ensure we get
+ // doing lookups on the GsymReaderV1 objects for each segment to ensure we get
// the exact same information. So after all of the code below we will have
- // GsymReader objects that each contain one function. We name the creators
+ // GsymReaderV1 objects that each contain one function. We name the creators
// and readers to match the one and only address they contain.
// GC1000 and GR1000 are for [0x1000-0x1030)
// GC2000 and GR2000 are for [0x2000-0x2030)
@@ -2548,34 +2548,34 @@ TEST(GSYMTest, TestGsymSegmenting) {
size_t FuncIdx = 0;
// Make sure we get an error if the segment size is too small to encode a
// single function info.
- llvm::Expected<std::unique_ptr<GsymCreator>> GCError =
+ llvm::Expected<std::unique_ptr<GsymCreatorV1>> GCError =
GC.createSegment(57, FuncIdx);
ASSERT_FALSE((bool)GCError);
checkError("a segment size of 57 is to small to fit any function infos, "
"specify a larger value", GCError.takeError());
// Make sure that the function index didn't get incremented when we didn't
- // encode any values into the segmented GsymCreator.
+ // encode any values into the segmented GsymCreatorV1.
ASSERT_EQ(FuncIdx, (size_t)0);
- llvm::Expected<std::unique_ptr<GsymCreator>> GC1000 =
+ llvm::Expected<std::unique_ptr<GsymCreatorV1>> GC1000 =
GC.createSegment(128, FuncIdx);
ASSERT_THAT_EXPECTED(GC1000, Succeeded());
ASSERT_EQ(FuncIdx, (size_t)1);
- llvm::Expected<std::unique_ptr<GsymCreator>> GC2000 =
+ llvm::Expected<std::unique_ptr<GsymCreatorV1>> GC2000 =
GC.createSegment(128, FuncIdx);
ASSERT_THAT_EXPECTED(GC2000, Succeeded());
ASSERT_EQ(FuncIdx, (size_t)2);
- llvm::Expected<std::unique_ptr<GsymCreator>> GC3000 =
+ llvm::Expected<std::unique_ptr<GsymCreatorV1>> GC3000 =
GC.createSegment(128, FuncIdx);
ASSERT_THAT_EXPECTED(GC3000, Succeeded());
ASSERT_EQ(FuncIdx, (size_t)3);
- llvm::Expected<std::unique_ptr<GsymCreator>> GC4000 =
+ llvm::Expected<std::unique_ptr<GsymCreatorV1>> GC4000 =
GC.createSegment(128, FuncIdx);
ASSERT_THAT_EXPECTED(GC4000, Succeeded());
ASSERT_EQ(FuncIdx, (size_t)4);
// When there are no function infos left to encode we expect to get no error
- // and get a NULL GsymCreator in the return value from createSegment.
- llvm::Expected<std::unique_ptr<GsymCreator>> GCNull =
+ // and get a NULL GsymCreatorV1 in the return value from createSegment.
+ llvm::Expected<std::unique_ptr<GsymCreatorV1>> GCNull =
GC.createSegment(128, FuncIdx);
ASSERT_THAT_EXPECTED(GCNull, Succeeded());
ASSERT_TRUE(GC1000.get() != nullptr);
@@ -2583,21 +2583,21 @@ TEST(GSYMTest, TestGsymSegmenting) {
ASSERT_TRUE(GC3000.get() != nullptr);
ASSERT_TRUE(GC4000.get() != nullptr);
ASSERT_TRUE(GCNull.get() == nullptr);
- // Encode and decode the GsymReader for each segment and verify they succeed.
- Expected<GsymReader> GR1000 = FinalizeEncodeAndDecode(*GC1000.get());
+ // Encode and decode the GsymReaderV1 for each segment and verify they succeed.
+ Expected<GsymReaderV1> GR1000 = FinalizeEncodeAndDecode(*GC1000.get());
ASSERT_THAT_EXPECTED(GR1000, Succeeded());
- Expected<GsymReader> GR2000 = FinalizeEncodeAndDecode(*GC2000.get());
+ Expected<GsymReaderV1> GR2000 = FinalizeEncodeAndDecode(*GC2000.get());
ASSERT_THAT_EXPECTED(GR2000, Succeeded());
- Expected<GsymReader> GR3000 = FinalizeEncodeAndDecode(*GC3000.get());
+ Expected<GsymReaderV1> GR3000 = FinalizeEncodeAndDecode(*GC3000.get());
ASSERT_THAT_EXPECTED(GR3000, Succeeded());
- Expected<GsymReader> GR4000 = FinalizeEncodeAndDecode(*GC4000.get());
+ Expected<GsymReaderV1> GR4000 = FinalizeEncodeAndDecode(*GC4000.get());
ASSERT_THAT_EXPECTED(GR4000, Succeeded());
// Verify that all lookups match the range [0x1000-0x1030) when doing lookups
- // in the GsymReader that contains all functions and from the segmented
- // GsymReader in GR1000.
+ // in the GsymReaderV1 that contains all functions and from the segmented
+ // GsymReaderV1 in GR1000.
for (uint64_t Addr = 0x1000; Addr < 0x1030; ++Addr) {
- // Lookup in the main GsymReader that contains all function infos
+ // Lookup in the main GsymReaderV1 that contains all function infos
auto MainLR = GR->lookup(Addr);
ASSERT_THAT_EXPECTED(MainLR, Succeeded());
auto SegmentLR = GR1000->lookup(Addr);
@@ -2612,10 +2612,10 @@ TEST(GSYMTest, TestGsymSegmenting) {
}
// Verify that all lookups match the range [0x2000-0x2030) when doing lookups
- // in the GsymReader that contains all functions and from the segmented
- // GsymReader in GR2000.
+ // in the GsymReaderV1 that contains all functions and from the segmented
+ // GsymReaderV1 in GR2000.
for (uint64_t Addr = 0x2000; Addr < 0x2030; ++Addr) {
- // Lookup in the main GsymReader that contains all function infos
+ // Lookup in the main GsymReaderV1 that contains all function infos
auto MainLR = GR->lookup(Addr);
ASSERT_THAT_EXPECTED(MainLR, Succeeded());
auto SegmentLR = GR2000->lookup(Addr);
@@ -2631,10 +2631,10 @@ TEST(GSYMTest, TestGsymSegmenting) {
}
// Verify that all lookups match the range [0x3000-0x3030) when doing lookups
- // in the GsymReader that contains all functions and from the segmented
- // GsymReader in GR3000.
+ // in the GsymReaderV1 that contains all functions and from the segmented
+ // GsymReaderV1 in GR3000.
for (uint64_t Addr = 0x3000; Addr < 0x3030; ++Addr) {
- // Lookup in the main GsymReader that contains all function infos
+ // Lookup in the main GsymReaderV1 that contains all function infos
auto MainLR = GR->lookup(Addr);
ASSERT_THAT_EXPECTED(MainLR, Succeeded());
auto SegmentLR = GR3000->lookup(Addr);
@@ -2649,13 +2649,13 @@ TEST(GSYMTest, TestGsymSegmenting) {
}
// Verify that all lookups match the range [0x4000-0x4030) when doing lookups
- // in the GsymReader that contains all functions and from the segmented
- // GsymReader in GR4000.
+ // in the GsymReaderV1 that contains all functions and from the segmented
+ // GsymReaderV1 in GR4000.
for (uint64_t Addr = 0x4000; Addr < 0x4030; ++Addr) {
- // Lookup in the main GsymReader that contains all function infos
+ // Lookup in the main GsymReaderV1 that contains all function infos
auto MainLR = GR->lookup(Addr);
ASSERT_THAT_EXPECTED(MainLR, Succeeded());
- // Lookup in the GsymReader for that contains 0x4000
+ // Lookup in the GsymReaderV1 for that contains 0x4000
auto SegmentLR = GR4000->lookup(Addr);
ASSERT_THAT_EXPECTED(SegmentLR, Succeeded());
// Make sure the lookup results match.
@@ -2674,20 +2674,20 @@ TEST(GSYMTest, TestGsymSegmentingNoBase) {
// encoding multiple segments, then we verify that we get the same information
// when doing lookups on the full GSYM that was decoded from encoding the
// entire GSYM and also by decoding information from the segments themselves.
- GsymCreator GC;
+ GsymCreatorV1 GC;
AddFunctionInfo(GC, "main", 0x1000, "/tmp/main.c", "/tmp/main.h");
AddFunctionInfo(GC, "foo", 0x2000, "/tmp/foo.c", "/tmp/foo.h");
AddFunctionInfo(GC, "bar", 0x3000, "/tmp/bar.c", "/tmp/bar.h");
AddFunctionInfo(GC, "baz", 0x4000, "/tmp/baz.c", "/tmp/baz.h");
- Expected<GsymReader> GR = FinalizeEncodeAndDecode(GC);
+ Expected<GsymReaderV1> GR = FinalizeEncodeAndDecode(GC);
ASSERT_THAT_EXPECTED(GR, Succeeded());
//GR->dump(outs());
// Create segmented GSYM files where each file contains 1 function. We will
// then test doing lookups on the "GR", or the full GSYM file and then test
- // doing lookups on the GsymReader objects for each segment to ensure we get
+ // doing lookups on the GsymReaderV1 objects for each segment to ensure we get
// the exact same information. So after all of the code below we will have
- // GsymReader objects that each contain one function. We name the creators
+ // GsymReaderV1 objects that each contain one function. We name the creators
// and readers to match the one and only address they contain.
// GC1000 and GR1000 are for [0x1000-0x1030)
// GC2000 and GR2000 are for [0x2000-0x2030)
@@ -2699,34 +2699,34 @@ TEST(GSYMTest, TestGsymSegmentingNoBase) {
size_t FuncIdx = 0;
// Make sure we get an error if the segment size is too small to encode a
// single function info.
- llvm::Expected<std::unique_ptr<GsymCreator>> GCError =
+ llvm::Expected<std::unique_ptr<GsymCreatorV1>> GCError =
GC.createSegment(57, FuncIdx);
ASSERT_FALSE((bool)GCError);
checkError("a segment size of 57 is to small to fit any function infos, "
"specify a larger value", GCError.takeError());
// Make sure that the function index didn't get incremented when we didn't
- // encode any values into the segmented GsymCreator.
+ // encode any values into the segmented GsymCreatorV1.
ASSERT_EQ(FuncIdx, (size_t)0);
- llvm::Expected<std::unique_ptr<GsymCreator>> GC1000 =
+ llvm::Expected<std::unique_ptr<GsymCreatorV1>> GC1000 =
GC.createSegment(128, FuncIdx);
ASSERT_THAT_EXPECTED(GC1000, Succeeded());
ASSERT_EQ(FuncIdx, (size_t)1);
- llvm::Expected<std::unique_ptr<GsymCreator>> GC2000 =
+ llvm::Expected<std::unique_ptr<GsymCreatorV1>> GC2000 =
GC.createSegment(128, FuncIdx);
ASSERT_THAT_EXPECTED(GC2000, Succeeded());
ASSERT_EQ(FuncIdx, (size_t)2);
- llvm::Expected<std::unique_ptr<GsymCreator>> GC3000 =
+ llvm::Expected<std::unique_ptr<GsymCreatorV1>> GC3000 =
GC.createSegment(128, FuncIdx);
ASSERT_THAT_EXPECTED(GC3000, Succeeded());
ASSERT_EQ(FuncIdx, (size_t)3);
- llvm::Expected<std::unique_ptr<GsymCreator>> GC4000 =
+ llvm::Expected<std::unique_ptr<GsymCreatorV1>> GC4000 =
GC.createSegment(128, FuncIdx);
ASSERT_THAT_EXPECTED(GC4000, Succeeded());
ASSERT_EQ(FuncIdx, (size_t)4);
// When there are no function infos left to encode we expect to get no error
- // and get a NULL GsymCreator in the return value from createSegment.
- llvm::Expected<std::unique_ptr<GsymCreator>> GCNull =
+ // and get a NULL GsymCreatorV1 in the return value from createSegment.
+ llvm::Expected<std::unique_ptr<GsymCreatorV1>> GCNull =
GC.createSegment(128, FuncIdx);
ASSERT_THAT_EXPECTED(GCNull, Succeeded());
ASSERT_TRUE(GC1000.get() != nullptr);
@@ -2734,21 +2734,21 @@ TEST(GSYMTest, TestGsymSegmentingNoBase) {
ASSERT_TRUE(GC3000.get() != nullptr);
ASSERT_TRUE(GC4000.get() != nullptr);
ASSERT_TRUE(GCNull.get() == nullptr);
- // Encode and decode the GsymReader for each segment and verify they succeed.
- Expected<GsymReader> GR1000 = FinalizeEncodeAndDecode(*GC1000.get());
+ // Encode and decode the GsymReaderV1 for each segment and verify they succeed.
+ Expected<GsymReaderV1> GR1000 = FinalizeEncodeAndDecode(*GC1000.get());
ASSERT_THAT_EXPECTED(GR1000, Succeeded());
- Expected<GsymReader> GR2000 = FinalizeEncodeAndDecode(*GC2000.get());
+ Expected<GsymReaderV1> GR2000 = FinalizeEncodeAndDecode(*GC2000.get());
ASSERT_THAT_EXPECTED(GR2000, Succeeded());
- Expected<GsymReader> GR3000 = FinalizeEncodeAndDecode(*GC3000.get());
+ Expected<GsymReaderV1> GR3000 = FinalizeEncodeAndDecode(*GC3000.get());
ASSERT_THAT_EXPECTED(GR3000, Succeeded());
- Expected<GsymReader> GR4000 = FinalizeEncodeAndDecode(*GC4000.get());
+ Expected<GsymReaderV1> GR4000 = FinalizeEncodeAndDecode(*GC4000.get());
ASSERT_THAT_EXPECTED(GR4000, Succeeded());
// Verify that all lookups match the range [0x1000-0x1030) when doing lookups
- // in the GsymReader that contains all functions and from the segmented
- // GsymReader in GR1000.
+ // in the GsymReaderV1 that contains all functions and from the segmented
+ // GsymReaderV1 in GR1000.
for (uint64_t Addr = 0x1000; Addr < 0x1030; ++Addr) {
- // Lookup in the main GsymReader that contains all function infos
+ // Lookup in the main GsymReaderV1 that contains all function infos
auto MainLR = GR->lookup(Addr);
ASSERT_THAT_EXPECTED(MainLR, Succeeded());
auto SegmentLR = GR1000->lookup(Addr);
@@ -2763,10 +2763,10 @@ TEST(GSYMTest, TestGsymSegmentingNoBase) {
}
// Verify that all lookups match the range [0x2000-0x2030) when doing lookups
- // in the GsymReader that contains all functions and from the segmented
- // GsymReader in GR2000.
+ // in the GsymReaderV1 that contains all functions and from the segmented
+ // GsymReaderV1 in GR2000.
for (uint64_t Addr = 0x2000; Addr < 0x2030; ++Addr) {
- // Lookup in the main GsymReader that contains all function infos
+ // Lookup in the main GsymReaderV1 that contains all function infos
auto MainLR = GR->lookup(Addr);
ASSERT_THAT_EXPECTED(MainLR, Succeeded());
auto SegmentLR = GR2000->lookup(Addr);
@@ -2782,10 +2782,10 @@ TEST(GSYMTest, TestGsymSegmentingNoBase) {
}
// Verify that all lookups match the range [0x3000-0x3030) when doing lookups
- // in the GsymReader that contains all functions and from the segmented
- // GsymReader in GR3000.
+ // in the GsymReaderV1 that contains all functions and from the segmented
+ // GsymReaderV1 in GR3000.
for (uint64_t Addr = 0x3000; Addr < 0x3030; ++Addr) {
- // Lookup in the main GsymReader that contains all function infos
+ // Lookup in the main GsymReaderV1 that contains all function infos
auto MainLR = GR->lookup(Addr);
ASSERT_THAT_EXPECTED(MainLR, Succeeded());
auto SegmentLR = GR3000->lookup(Addr);
@@ -2800,13 +2800,13 @@ TEST(GSYMTest, TestGsymSegmentingNoBase) {
}
// Verify that all lookups match the range [0x4000-0x4030) when doing lookups
- // in the GsymReader that contains all functions and from the segmented
- // GsymReader in GR4000.
+ // in the GsymReaderV1 that contains all functions and from the segmented
+ // GsymReaderV1 in GR4000.
for (uint64_t Addr = 0x4000; Addr < 0x4030; ++Addr) {
- // Lookup in the main GsymReader that contains all function infos
+ // Lookup in the main GsymReaderV1 that contains all function infos
auto MainLR = GR->lookup(Addr);
ASSERT_THAT_EXPECTED(MainLR, Succeeded());
- // Lookup in the GsymReader for that contains 0x4000
+ // Lookup in the GsymReaderV1 for that contains 0x4000
auto SegmentLR = GR4000->lookup(Addr);
ASSERT_THAT_EXPECTED(SegmentLR, Succeeded());
// Make sure the lookup results match.
@@ -3052,7 +3052,7 @@ TEST(GSYMTest, TestDWARFInlineRangeScopes) {
std::string errors;
raw_string_ostream OS(errors);
OutputAggregator OSAgg(&OS);
- GsymCreator GC;
+ GsymCreatorV1 GC;
DwarfTransformer DT(*DwarfContext, GC);
const uint32_t ThreadCount = 1;
ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
@@ -3062,7 +3062,7 @@ TEST(GSYMTest, TestDWARFInlineRangeScopes) {
const auto ByteOrder = llvm::endianness::native;
FileWriter FW(OutStrm, ByteOrder);
ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
- Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
+ Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
ASSERT_THAT_EXPECTED(GR, Succeeded());
// There should only be one function in our GSYM.
EXPECT_EQ(GR->getNumAddresses(), 1u);
@@ -3280,7 +3280,7 @@ TEST(GSYMTest, TestDWARFEmptyInline) {
std::string errors;
raw_string_ostream OS(errors);
OutputAggregator OSAgg(&OS);
- GsymCreator GC;
+ GsymCreatorV1 GC;
DwarfTransformer DT(*DwarfContext, GC);
const uint32_t ThreadCount = 1;
ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
@@ -3290,7 +3290,7 @@ TEST(GSYMTest, TestDWARFEmptyInline) {
const auto ByteOrder = llvm::endianness::native;
FileWriter FW(OutStrm, ByteOrder);
ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
- Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
+ Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
ASSERT_THAT_EXPECTED(GR, Succeeded());
// There should only be one function in our GSYM.
EXPECT_EQ(GR->getNumAddresses(), 1u);
@@ -3517,7 +3517,7 @@ TEST(GSYMTest, TestFinalizeForLineTables) {
std::string errors;
raw_string_ostream OS(errors);
OutputAggregator OSAgg(&OS);
- GsymCreator GC;
+ GsymCreatorV1 GC;
DwarfTransformer DT(*DwarfContext, GC);
const uint32_t ThreadCount = 1;
ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
@@ -3527,7 +3527,7 @@ TEST(GSYMTest, TestFinalizeForLineTables) {
const auto ByteOrder = llvm::endianness::native;
FileWriter FW(OutStrm, ByteOrder);
ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
- Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
+ Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
ASSERT_THAT_EXPECTED(GR, Succeeded());
// There should only be two functions in our GSYM.
EXPECT_EQ(GR->getNumAddresses(), 2u);
@@ -3797,7 +3797,7 @@ TEST(GSYMTest, TestRangeWarnings) {
std::string errors;
raw_string_ostream OS(errors);
OutputAggregator OSAgg(&OS);
- GsymCreator GC;
+ GsymCreatorV1 GC;
DwarfTransformer DT(*DwarfContext, GC);
const uint32_t ThreadCount = 1;
ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
@@ -3807,7 +3807,7 @@ TEST(GSYMTest, TestRangeWarnings) {
const auto ByteOrder = llvm::endianness::native;
FileWriter FW(OutStrm, ByteOrder);
ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
- Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
+ Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
ASSERT_THAT_EXPECTED(GR, Succeeded());
// There should be two functions in our GSYM.
EXPECT_EQ(GR->getNumAddresses(), 2u);
@@ -3999,7 +3999,7 @@ TEST(GSYMTest, TestEmptyRangeWarnings) {
std::string errors;
raw_string_ostream OS(errors);
OutputAggregator OSAgg(&OS);
- GsymCreator GC;
+ GsymCreatorV1 GC;
DwarfTransformer DT(*DwarfContext, GC);
const uint32_t ThreadCount = 1;
ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
@@ -4009,7 +4009,7 @@ TEST(GSYMTest, TestEmptyRangeWarnings) {
const auto ByteOrder = llvm::endianness::native;
FileWriter FW(OutStrm, ByteOrder);
ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
- Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
+ Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
ASSERT_THAT_EXPECTED(GR, Succeeded());
// There should be one function in our GSYM.
EXPECT_EQ(GR->getNumAddresses(), 1u);
@@ -4151,7 +4151,7 @@ TEST(GSYMTest, TestEmptyLinkageName) {
std::string errors;
raw_string_ostream OS(errors);
OutputAggregator OSAgg(&OS);
- GsymCreator GC;
+ GsymCreatorV1 GC;
DwarfTransformer DT(*DwarfContext, GC);
const uint32_t ThreadCount = 1;
ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
@@ -4161,7 +4161,7 @@ TEST(GSYMTest, TestEmptyLinkageName) {
const auto ByteOrder = llvm::endianness::native;
FileWriter FW(OutStrm, ByteOrder);
ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
- Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
+ Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
ASSERT_THAT_EXPECTED(GR, Succeeded());
// There should be one function in our GSYM.
EXPECT_EQ(GR->getNumAddresses(), 1u);
@@ -4312,7 +4312,7 @@ TEST(GSYMTest, TestLineTablesWithEmptyRanges) {
std::string errors;
raw_string_ostream OS(errors);
OutputAggregator OSAgg(&OS);
- GsymCreator GC;
+ GsymCreatorV1 GC;
DwarfTransformer DT(*DwarfContext, GC);
const uint32_t ThreadCount = 1;
ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
@@ -4322,7 +4322,7 @@ TEST(GSYMTest, TestLineTablesWithEmptyRanges) {
const auto ByteOrder = llvm::endianness::native;
FileWriter FW(OutStrm, ByteOrder);
ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
- Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
+ Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
ASSERT_THAT_EXPECTED(GR, Succeeded());
// There should be one function in our GSYM.
EXPECT_EQ(GR->getNumAddresses(), 1u);
@@ -4632,7 +4632,7 @@ TEST(GSYMTest, TestHandlingOfInvalidFileIndexes) {
std::string errors;
raw_string_ostream OS(errors);
OutputAggregator OSAgg(&OS);
- GsymCreator GC;
+ GsymCreatorV1 GC;
DwarfTransformer DT(*DwarfContext, GC);
const uint32_t ThreadCount = 1;
ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
@@ -4642,7 +4642,7 @@ TEST(GSYMTest, TestHandlingOfInvalidFileIndexes) {
const auto ByteOrder = llvm::endianness::native;
FileWriter FW(OutStrm, ByteOrder);
ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
- Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
+ Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
ASSERT_THAT_EXPECTED(GR, Succeeded());
// There should be one function in our GSYM.
EXPECT_EQ(GR->getNumAddresses(), 3u);
@@ -4847,7 +4847,7 @@ TEST(GSYMTest, TestLookupsOfOverlappingAndUnequalRanges) {
std::string errors;
raw_string_ostream OS(errors);
OutputAggregator OSAgg(&OS);
- GsymCreator GC;
+ GsymCreatorV1 GC;
DwarfTransformer DT(*DwarfContext, GC);
const uint32_t ThreadCount = 1;
ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
@@ -4857,7 +4857,7 @@ TEST(GSYMTest, TestLookupsOfOverlappingAndUnequalRanges) {
const auto ByteOrder = llvm::endianness::native;
FileWriter FW(OutStrm, ByteOrder);
ASSERT_THAT_ERROR(GC.encode(FW), Succeeded());
- Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
+ Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
ASSERT_THAT_EXPECTED(GR, Succeeded());
// There should be two functions in our GSYM.
EXPECT_EQ(GR->getNumAddresses(), 2u);
@@ -4947,7 +4947,7 @@ TEST(GSYMTest, TestUnableToLocateDWO) {
std::string errors;
raw_string_ostream OS(errors);
OutputAggregator OSAgg(&OS);
- GsymCreator GC;
+ GsymCreatorV1 GC;
// Make a DWARF transformer that is MachO (Apple) to avoid warnings about
// not finding DWO files.
DwarfTransformer DT(*DwarfContext, GC, /*LDCS=*/false, /*MachO*/ true);
@@ -5074,7 +5074,7 @@ TEST(GSYMTest, TestDWARFTransformNoErrorForMissingFileDecl) {
std::string errors;
raw_string_ostream OS(errors);
OutputAggregator OSAgg(&OS);
- GsymCreator GC;
+ GsymCreatorV1 GC;
DwarfTransformer DT(*DwarfContext, GC);
const uint32_t ThreadCount = 1;
ASSERT_THAT_ERROR(DT.convert(ThreadCount, OSAgg), Succeeded());
diff --git a/llvm/unittests/DebugInfo/GSYM/GSYMV2Test.cpp b/llvm/unittests/DebugInfo/GSYM/GSYMV2Test.cpp
index 549c1f0980008..d91dfefc2d769 100644
--- a/llvm/unittests/DebugInfo/GSYM/GSYMV2Test.cpp
+++ b/llvm/unittests/DebugInfo/GSYM/GSYMV2Test.cpp
@@ -11,8 +11,10 @@
#include "llvm/DebugInfo/GSYM/FunctionInfo.h"
#include "llvm/DebugInfo/GSYM/GlobalData.h"
#include "llvm/DebugInfo/GSYM/GsymCreator.h"
+#include "llvm/DebugInfo/GSYM/GsymCreatorV1.h"
#include "llvm/DebugInfo/GSYM/GsymCreatorV2.h"
#include "llvm/DebugInfo/GSYM/GsymReader.h"
+#include "llvm/DebugInfo/GSYM/GsymReaderV1.h"
#include "llvm/DebugInfo/GSYM/GsymReaderV2.h"
#include "llvm/DebugInfo/GSYM/HeaderV2.h"
#include "llvm/DebugInfo/GSYM/InlineInfo.h"
@@ -864,8 +866,8 @@ TEST(GSYMV2Test, TestRoundTripSwappedAddressTable) {
/// Recursively re-insert inline info strings and files from a reader into a
/// creator.
-static void fixupInlineInfoForTransfer(const GsymReaderBase &Reader,
- GsymCreatorBase &Creator,
+static void fixupInlineInfoForTransfer(const GsymReader &Reader,
+ GsymCreator &Creator,
InlineInfo &II) {
II.Name = Creator.insertString(Reader.getString(II.Name));
if (II.CallFile != 0) {
@@ -888,8 +890,8 @@ static void fixupInlineInfoForTransfer(const GsymReaderBase &Reader,
/// Transfer all function infos from a reader into a creator, re-inserting
/// all strings and files so that offsets are valid in the new creator.
-static void transferFunctions(const GsymReaderBase &Reader,
- GsymCreatorBase &Creator) {
+static void transferFunctions(const GsymReader &Reader,
+ GsymCreator &Creator) {
for (uint32_t I = 0; I < Reader.getNumAddresses(); ++I) {
auto FI = Reader.getFunctionInfoAtIndex(I);
ASSERT_THAT_EXPECTED(FI, Succeeded());
@@ -926,8 +928,8 @@ static void transferFunctions(const GsymReaderBase &Reader,
}
}
-/// Encode a GsymCreatorBase to bytes.
-static SmallString<1024> encodeCreator(const GsymCreatorBase &GC) {
+/// Encode a GsymCreator to bytes.
+static SmallString<1024> encodeCreator(const GsymCreator &GC) {
SmallString<1024> Str;
raw_svector_ostream OS(Str);
FileWriter FW(OS, llvm::endianness::native);
@@ -938,7 +940,7 @@ static SmallString<1024> encodeCreator(const GsymCreatorBase &GC) {
/// Collect lookup results for a set of addresses from a reader.
static std::vector<LookupResult>
-collectLookups(const GsymReaderBase &Reader,
+collectLookups(const GsymReader &Reader,
ArrayRef<uint64_t> Addrs) {
std::vector<LookupResult> Results;
for (auto Addr : Addrs) {
@@ -952,7 +954,7 @@ collectLookups(const GsymReaderBase &Reader,
TEST(GSYMV2Test, TestVersionRoundTripV1ToV2ToV1) {
// Create a V1 GSYM with line tables and inline info.
- GsymCreator GC1;
+ GsymCreatorV1 GC1;
FunctionInfo FI(0x1000, 0x100, GC1.insertString("main"));
FI.OptLineTable = LineTable();
const uint32_t MainFile = GC1.insertFile("/tmp/main.c");
@@ -985,7 +987,7 @@ TEST(GSYMV2Test, TestVersionRoundTripV1ToV2ToV1) {
ASSERT_GT(OrigV1Bytes.size(), 0u);
// Read original V1.
- auto OrigReader = GsymReader::copyBuffer(OrigV1Bytes);
+ auto OrigReader = GsymReaderV1::copyBuffer(OrigV1Bytes);
ASSERT_THAT_EXPECTED(OrigReader, Succeeded());
// Collect lookup results from original V1.
@@ -1012,13 +1014,13 @@ TEST(GSYMV2Test, TestVersionRoundTripV1ToV2ToV1) {
<< "Mismatch at address " << TestAddrs[I] << " after V1->V2";
// Convert V2 → V1.
- GsymCreator GC3;
+ GsymCreatorV1 GC3;
transferFunctions(*V2Reader, GC3);
ASSERT_FALSE(bool(GC3.finalize(Null)));
SmallString<1024> FinalV1Bytes = encodeCreator(GC3);
ASSERT_GT(FinalV1Bytes.size(), 0u);
- auto FinalReader = GsymReader::copyBuffer(FinalV1Bytes);
+ auto FinalReader = GsymReaderV1::copyBuffer(FinalV1Bytes);
ASSERT_THAT_EXPECTED(FinalReader, Succeeded());
// Verify final V1 lookups match original V1.
@@ -1075,13 +1077,13 @@ TEST(GSYMV2Test, TestVersionRoundTripV2ToV1ToV2) {
ASSERT_EQ(OrigResults.size(), TestAddrs.size());
// Convert V2 → V1.
- GsymCreator GC2;
+ GsymCreatorV1 GC2;
transferFunctions(*OrigReader, GC2);
ASSERT_FALSE(bool(GC2.finalize(Null)));
SmallString<1024> V1Bytes = encodeCreator(GC2);
ASSERT_GT(V1Bytes.size(), 0u);
- auto V1Reader = GsymReader::copyBuffer(V1Bytes);
+ auto V1Reader = GsymReaderV1::copyBuffer(V1Bytes);
ASSERT_THAT_EXPECTED(V1Reader, Succeeded());
// Verify V1 lookups match original V2.
>From e9b68f79be60dd3d0bc1d2fe022da516d7bddb83 Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Thu, 26 Mar 2026 14:11:30 -0700
Subject: [PATCH 23/45] Move shared fields and methods from V1/V2 into
GsymCreator/GsymReader base classes
Creator: moved all fields and ~25 identical methods to GsymCreator base.
Only encode(), calculateHeaderAndTableSize(), and loadCallSitesFromYAML()
remain as virtual overrides. Added createNew() virtual factory for
createSegment() support.
Reader: moved shared fields (MemBuffer, Endian, AddrOffsets, AddrInfoOffsets,
Files, StrTab) and cached header values (BaseAddress, NumAddresses, AddrOffSize)
to GsymReader base. Moved ~20 identical methods including getAddress(),
getFunctionInfoDataForAddress(), lookup(), lookupAll(), and all dump overloads
(except dump(OS) which prints the version-specific header). Only parse() and
getHeader() remain in subclasses.
User prompt:
"Can you see if the v1/v2 classes share the same members (fields and methods),
and move them into the base classes?"
---
.../include/llvm/DebugInfo/GSYM/GsymCreator.h | 133 +++--
.../llvm/DebugInfo/GSYM/GsymCreatorV1.h | 110 +---
.../llvm/DebugInfo/GSYM/GsymCreatorV2.h | 433 +-------------
llvm/include/llvm/DebugInfo/GSYM/GsymReader.h | 146 +++--
.../llvm/DebugInfo/GSYM/GsymReaderV1.h | 153 +----
.../llvm/DebugInfo/GSYM/GsymReaderV2.h | 388 +------------
llvm/lib/DebugInfo/GSYM/GsymCreator.cpp | 360 ++++++++++++
llvm/lib/DebugInfo/GSYM/GsymCreatorV1.cpp | 378 +------------
llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp | 531 +-----------------
llvm/lib/DebugInfo/GSYM/GsymReader.cpp | 288 +++++++++-
llvm/lib/DebugInfo/GSYM/GsymReaderV1.cpp | 358 +-----------
llvm/lib/DebugInfo/GSYM/GsymReaderV2.cpp | 342 +----------
llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp | 26 +-
13 files changed, 942 insertions(+), 2704 deletions(-)
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h b/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h
index e3a023a3558b9..eabe4089ef206 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h
@@ -31,61 +31,108 @@ namespace gsym {
class FileWriter;
class OutputAggregator;
-/// GsymCreator is an abstract interface for creating GSYM data.
+/// GsymCreator is the base class for creating GSYM data.
///
/// The GsymCreator is designed to be used in 3 stages:
/// - Create FunctionInfo objects and add them
/// - Finalize the GsymCreator object
/// - Save to file or section
///
-/// The first stage involves creating FunctionInfo objects from another source
-/// of information like compiler debug info metadata, DWARF or Breakpad files.
-/// Any strings in the FunctionInfo or contained information, like InlineInfo
-/// or LineTable objects, should get the string table offsets by calling
-/// GsymCreator::insertString(...). Any file indexes that are needed should be
-/// obtained by calling GsymCreator::insertFile(...). All of the function calls
-/// in GsymCreator are thread safe. This allows multiple threads to create and
-/// add FunctionInfo objects while parsing debug information.
-///
-/// Once all of the FunctionInfo objects have been added, the
-/// GsymCreator::finalize(...) must be called prior to saving. This function
-/// will sort the FunctionInfo objects, finalize the string table, and do any
-/// other passes on the information needed to prepare the information to be
-/// saved.
-///
-/// Once the object has been finalized, it can be saved to a file or section.
-///
-/// Both GsymCreatorV1 and GsymCreatorV2 implement this interface.
+/// This base class contains all shared state and logic. Subclasses
+/// (GsymCreatorV1, GsymCreatorV2) implement version-specific encoding.
class GsymCreator {
+protected:
+ mutable std::mutex Mutex;
+ std::vector<FunctionInfo> Funcs;
+ StringTableBuilder StrTab;
+ StringSet<> StringStorage;
+ DenseMap<llvm::gsym::FileEntry, uint32_t> FileEntryToIndex;
+ DenseMap<uint64_t, CachedHashStringRef> StringOffsetMap;
+ std::vector<llvm::gsym::FileEntry> Files;
+ std::vector<uint8_t> UUID;
+ std::optional<AddressRanges> ValidTextRanges;
+ std::optional<uint64_t> BaseAddress;
+ bool IsSegment = false;
+ bool Finalized = false;
+ bool Quiet;
+
+ LLVM_ABI std::optional<uint64_t> getFirstFunctionAddress() const;
+ LLVM_ABI std::optional<uint64_t> getLastFunctionAddress() const;
+ LLVM_ABI std::optional<uint64_t> getBaseAddress() const;
+ LLVM_ABI uint8_t getAddressOffsetSize() const;
+ LLVM_ABI uint64_t getMaxAddressOffset() const;
+
+ LLVM_ABI uint32_t insertFileEntry(FileEntry FE);
+ LLVM_ABI uint64_t copyFunctionInfo(const GsymCreator &SrcGC,
+ size_t FuncInfoIdx);
+ LLVM_ABI uint32_t copyString(const GsymCreator &SrcGC, uint32_t StrOff);
+ LLVM_ABI uint32_t copyFile(const GsymCreator &SrcGC, uint32_t FileIdx);
+ LLVM_ABI void fixupInlineInfo(const GsymCreator &SrcGC, InlineInfo &II);
+
+ LLVM_ABI llvm::Error saveSegments(StringRef Path,
+ llvm::endianness ByteOrder,
+ uint64_t SegmentSize) const;
+
+ void setIsSegment() { IsSegment = true; }
+
+ /// Version-specific: calculate header and table sizes.
+ virtual uint64_t calculateHeaderAndTableSize() const = 0;
+
+ /// Version-specific: create a new empty creator of the same version.
+ virtual std::unique_ptr<GsymCreator> createNew(bool Quiet) const = 0;
+
public:
+ LLVM_ABI GsymCreator(bool Quiet = false);
virtual ~GsymCreator() = default;
- virtual uint32_t insertString(StringRef S, bool Copy = true) = 0;
- virtual StringRef getString(uint32_t Offset) = 0;
- virtual uint32_t
- insertFile(StringRef Path,
- sys::path::Style Style = sys::path::Style::native) = 0;
- virtual void addFunctionInfo(FunctionInfo &&FI) = 0;
- virtual size_t getNumFunctionInfos() const = 0;
- virtual void
- forEachFunctionInfo(
- std::function<bool(FunctionInfo &)> const &Callback) = 0;
- virtual void forEachFunctionInfo(
- std::function<bool(const FunctionInfo &)> const &Callback) const = 0;
- virtual llvm::Error finalize(OutputAggregator &OS) = 0;
- virtual llvm::Error
- save(StringRef Path, llvm::endianness ByteOrder,
- std::optional<uint64_t> SegmentSize = std::nullopt) const = 0;
+ /// Version-specific: encode to a FileWriter.
virtual llvm::Error encode(FileWriter &O) const = 0;
+
+ /// Version-specific: load call site info from YAML.
virtual llvm::Error loadCallSitesFromYAML(StringRef YAMLFile) = 0;
- virtual void prepareMergedFunctions(OutputAggregator &Out) = 0;
-
- virtual void setUUID(llvm::ArrayRef<uint8_t> UUIDBytes) = 0;
- virtual void setBaseAddress(uint64_t Addr) = 0;
- virtual void SetValidTextRanges(AddressRanges &TextRanges) = 0;
- virtual const std::optional<AddressRanges> GetValidTextRanges() const = 0;
- virtual bool IsValidTextAddress(uint64_t Addr) const = 0;
- virtual bool isQuiet() const = 0;
+
+ LLVM_ABI llvm::Error
+ save(StringRef Path, llvm::endianness ByteOrder,
+ std::optional<uint64_t> SegmentSize = std::nullopt) const;
+
+ LLVM_ABI uint32_t insertString(StringRef S, bool Copy = true);
+ LLVM_ABI StringRef getString(uint32_t Offset);
+
+ LLVM_ABI uint32_t
+ insertFile(StringRef Path,
+ sys::path::Style Style = sys::path::Style::native);
+
+ LLVM_ABI void addFunctionInfo(FunctionInfo &&FI);
+ LLVM_ABI size_t getNumFunctionInfos() const;
+
+ LLVM_ABI void
+ forEachFunctionInfo(
+ std::function<bool(FunctionInfo &)> const &Callback);
+ LLVM_ABI void forEachFunctionInfo(
+ std::function<bool(const FunctionInfo &)> const &Callback) const;
+
+ LLVM_ABI llvm::Error finalize(OutputAggregator &OS);
+ LLVM_ABI void prepareMergedFunctions(OutputAggregator &Out);
+
+ void setUUID(llvm::ArrayRef<uint8_t> UUIDBytes) {
+ UUID.assign(UUIDBytes.begin(), UUIDBytes.end());
+ }
+
+ void setBaseAddress(uint64_t Addr) { BaseAddress = Addr; }
+
+ void SetValidTextRanges(AddressRanges &TextRanges) {
+ ValidTextRanges = TextRanges;
+ }
+
+ const std::optional<AddressRanges> GetValidTextRanges() const {
+ return ValidTextRanges;
+ }
+
+ LLVM_ABI bool IsValidTextAddress(uint64_t Addr) const;
+ bool isQuiet() const { return Quiet; }
+
+ LLVM_ABI llvm::Expected<std::unique_ptr<GsymCreator>>
+ createSegment(uint64_t SegmentSize, size_t &FuncIdx) const;
};
} // namespace gsym
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymCreatorV1.h b/llvm/include/llvm/DebugInfo/GSYM/GsymCreatorV1.h
index 8bf537d14af90..5e683c52ff212 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GsymCreatorV1.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymCreatorV1.h
@@ -9,124 +9,20 @@
#ifndef LLVM_DEBUGINFO_GSYM_GSYMCREATORV1_H
#define LLVM_DEBUGINFO_GSYM_GSYMCREATORV1_H
-#include "llvm/Support/Compiler.h"
-#include <functional>
-#include <memory>
-#include <mutex>
-#include <thread>
-
-#include "llvm/ADT/AddressRanges.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/StringSet.h"
-#include "llvm/DebugInfo/GSYM/FileEntry.h"
-#include "llvm/DebugInfo/GSYM/FunctionInfo.h"
#include "llvm/DebugInfo/GSYM/GsymCreator.h"
-#include "llvm/MC/StringTableBuilder.h"
-#include "llvm/Support/Endian.h"
-#include "llvm/Support/Error.h"
-#include "llvm/Support/Path.h"
namespace llvm {
-
namespace gsym {
-class FileWriter;
-class OutputAggregator;
-/// GsymCreatorV1 is used to emit GSYM V1 data to a stand alone file or section
-/// within a file.
-///
-/// See GsymCreator for the 3-stage usage pattern and file format documentation.
class GsymCreatorV1 : public GsymCreator {
- // Private member variables require Mutex protections
- mutable std::mutex Mutex;
- std::vector<FunctionInfo> Funcs;
- StringTableBuilder StrTab;
- StringSet<> StringStorage;
- DenseMap<llvm::gsym::FileEntry, uint32_t> FileEntryToIndex;
- // Needed for mapping string offsets back to the string stored in \a StrTab.
- DenseMap<uint64_t, CachedHashStringRef> StringOffsetMap;
- std::vector<llvm::gsym::FileEntry> Files;
- std::vector<uint8_t> UUID;
- std::optional<AddressRanges> ValidTextRanges;
- std::optional<uint64_t> BaseAddress;
- bool IsSegment = false;
- bool Finalized = false;
- bool Quiet;
-
-
- std::optional<uint64_t> getFirstFunctionAddress() const;
- std::optional<uint64_t> getLastFunctionAddress() const;
- std::optional<uint64_t> getBaseAddress() const;
- uint8_t getAddressOffsetSize() const;
- uint64_t getMaxAddressOffset() const;
- uint64_t calculateHeaderAndTableSize() const;
- uint64_t copyFunctionInfo(const GsymCreatorV1 &SrcGC, size_t FuncInfoIdx);
- uint32_t copyString(const GsymCreatorV1 &SrcGC, uint32_t StrOff);
- uint32_t copyFile(const GsymCreatorV1 &SrcGC, uint32_t FileIdx);
- uint32_t insertFileEntry(FileEntry FE);
- void fixupInlineInfo(const GsymCreatorV1 &SrcGC, InlineInfo &II);
- llvm::Error saveSegments(StringRef Path, llvm::endianness ByteOrder,
- uint64_t SegmentSize) const;
- void setIsSegment() {
- IsSegment = true;
- }
+ uint64_t calculateHeaderAndTableSize() const override;
+ std::unique_ptr<GsymCreator> createNew(bool Quiet) const override;
public:
- LLVM_ABI GsymCreatorV1(bool Quiet = false);
-
- LLVM_ABI llvm::Error
- save(StringRef Path, llvm::endianness ByteOrder,
- std::optional<uint64_t> SegmentSize = std::nullopt) const override;
+ GsymCreatorV1(bool Quiet = false) : GsymCreator(Quiet) {}
LLVM_ABI llvm::Error encode(FileWriter &O) const override;
-
- LLVM_ABI uint32_t insertString(StringRef S, bool Copy = true) override;
-
- LLVM_ABI StringRef getString(uint32_t Offset) override;
-
- LLVM_ABI uint32_t
- insertFile(StringRef Path,
- sys::path::Style Style = sys::path::Style::native) override;
-
- LLVM_ABI void addFunctionInfo(FunctionInfo &&FI) override;
-
LLVM_ABI llvm::Error loadCallSitesFromYAML(StringRef YAMLFile) override;
-
- LLVM_ABI void prepareMergedFunctions(OutputAggregator &Out) override;
-
- LLVM_ABI llvm::Error finalize(OutputAggregator &OS) override;
-
- void setUUID(llvm::ArrayRef<uint8_t> UUIDBytes) override {
- UUID.assign(UUIDBytes.begin(), UUIDBytes.end());
- }
-
- LLVM_ABI void
- forEachFunctionInfo(
- std::function<bool(FunctionInfo &)> const &Callback) override;
-
- LLVM_ABI void forEachFunctionInfo(
- std::function<bool(const FunctionInfo &)> const &Callback) const override;
-
- LLVM_ABI size_t getNumFunctionInfos() const override;
-
- void SetValidTextRanges(AddressRanges &TextRanges) override {
- ValidTextRanges = TextRanges;
- }
-
- const std::optional<AddressRanges> GetValidTextRanges() const override {
- return ValidTextRanges;
- }
-
- LLVM_ABI bool IsValidTextAddress(uint64_t Addr) const override;
-
- void setBaseAddress(uint64_t Addr) override {
- BaseAddress = Addr;
- }
-
- bool isQuiet() const override { return Quiet; }
-
- LLVM_ABI llvm::Expected<std::unique_ptr<GsymCreatorV1>>
- createSegment(uint64_t SegmentSize, size_t &FuncIdx) const;
};
} // namespace gsym
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymCreatorV2.h b/llvm/include/llvm/DebugInfo/GSYM/GsymCreatorV2.h
index 9e2067e0b1c72..9d222ce2262ae 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GsymCreatorV2.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymCreatorV2.h
@@ -9,446 +9,21 @@
#ifndef LLVM_DEBUGINFO_GSYM_GSYMCREATORV2_H
#define LLVM_DEBUGINFO_GSYM_GSYMCREATORV2_H
-#include "llvm/Support/Compiler.h"
-#include <functional>
-#include <memory>
-#include <mutex>
-#include <thread>
-
-#include "llvm/ADT/AddressRanges.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/StringSet.h"
-#include "llvm/DebugInfo/GSYM/FileEntry.h"
-#include "llvm/DebugInfo/GSYM/FunctionInfo.h"
#include "llvm/DebugInfo/GSYM/GsymCreator.h"
-#include "llvm/MC/StringTableBuilder.h"
-#include "llvm/Support/Endian.h"
-#include "llvm/Support/Error.h"
-#include "llvm/Support/Path.h"
namespace llvm {
-
namespace gsym {
-class FileWriter;
-class OutputAggregator;
-/// GsymCreatorV2 is used to emit GSYM V2 data to a stand alone file or section
-/// within a file.
-///
-/// The GsymCreatorV2 is designed to be used in 3 stages:
-/// - Create FunctionInfo objects and add them
-/// - Finalize the GsymCreatorV2 object
-/// - Save to file or section
-///
-/// The first stage involves creating FunctionInfo objects from another source
-/// of information like compiler debug info metadata, DWARF or Breakpad files.
-/// Any strings in the FunctionInfo or contained information, like InlineInfo
-/// or LineTable objects, should get the string table offsets by calling
-/// GsymCreatorV2::insertString(...). Any file indexes that are needed should be
-/// obtained by calling GsymCreatorV2::insertFile(...). All of the function calls
-/// in GsymCreatorV2 are thread safe. This allows multiple threads to create and
-/// add FunctionInfo objects while parsing debug information.
-///
-/// Once all of the FunctionInfo objects have been added, the
-/// GsymCreatorV2::finalize(...) must be called prior to saving. This function
-/// will sort the FunctionInfo objects, finalize the string table, and do any
-/// other passes on the information needed to prepare the information to be
-/// saved.
-///
-/// Once the object has been finalized, it can be saved to a file or section.
-///
-/// ENCODING
-///
-/// GSYM V2 files are designed to be memory mapped into a process as shared,
-/// read only data, and used as is.
-///
-/// The V2 file layout is:
-///
-/// [HeaderV2 - 24 bytes fixed]
-/// [GlobalData entries - array of 24-byte entries, terminated by EndOfList]
-/// [... data sections at file offsets specified by GlobalData entries ...]
-///
-/// The header is fully described in "llvm/DebugInfo/GSYM/HeaderV2.h".
-/// Each GlobalData entry (see "llvm/DebugInfo/GSYM/GlobalData.h") describes
-/// a data section by its type, file offset, and size. Sections can appear in
-/// any order since each entry contains an absolute file offset (relative to
-/// the start of the GSYM data). The GlobalData array is terminated by an
-/// entry with type EndOfList and all other fields set to zero.
-///
-/// The data sections are:
-///
-/// - AddrOffsets: Sorted address offset table with Header.NumAddresses
-/// entries, each Header.AddrOffSize bytes. Addresses are stored as offsets
-/// from Header.BaseAddress. Aligned to Header.AddrOffSize.
-///
-/// - AddrInfoOffsets: File offset table with Header.NumAddresses entries,
-/// each Header.AddrInfoOffSize bytes. Each entry is the file offset (from
-/// the start of the GSYM data) to the corresponding FunctionInfo. Aligned
-/// to Header.AddrInfoOffSize.
-///
-/// - FileTable: A uint32_t count followed by that many FileEntry structs.
-/// See "llvm/DebugInfo/GSYM/FileEntry.h". Aligned to 4 bytes.
-///
-/// - StringTable: NULL-terminated strings referenced by offset. Starts with
-/// an empty string at offset zero. No alignment requirement.
-///
-/// - FunctionInfo: Encoded FunctionInfo objects. Each entry is pointed to by
-/// the AddrInfoOffsets table. See "llvm/DebugInfo/GSYM/FunctionInfo.h".
-/// Aligned to 4 bytes.
-///
-/// - UUID: Raw UUID bytes of the original executable. Only present if a UUID
-/// was set. No alignment requirement.
+/// GsymCreatorV2 emits GSYM V2 data with a GlobalData-based section layout.
class GsymCreatorV2 : public GsymCreator {
- // Private member variables require Mutex protections
- mutable std::mutex Mutex;
- std::vector<FunctionInfo> Funcs;
- StringTableBuilder StrTab;
- StringSet<> StringStorage;
- DenseMap<llvm::gsym::FileEntry, uint32_t> FileEntryToIndex;
- // Needed for mapping string offsets back to the string stored in \a StrTab.
- DenseMap<uint64_t, CachedHashStringRef> StringOffsetMap;
- std::vector<llvm::gsym::FileEntry> Files;
- std::vector<uint8_t> UUID;
- std::optional<AddressRanges> ValidTextRanges;
- std::optional<uint64_t> BaseAddress;
- bool IsSegment = false;
- bool Finalized = false;
- bool Quiet;
-
-
- /// Get the first function start address.
- ///
- /// \returns The start address of the first FunctionInfo or std::nullopt if
- /// there are no function infos.
- std::optional<uint64_t> getFirstFunctionAddress() const;
-
- /// Get the last function address.
- ///
- /// \returns The start address of the last FunctionInfo or std::nullopt if
- /// there are no function infos.
- std::optional<uint64_t> getLastFunctionAddress() const;
-
- /// Get the base address to use for this GSYM file.
- ///
- /// \returns The base address to put into the header and to use when creating
- /// the address offset table or std::nullpt if there are no valid
- /// function infos or if the base address wasn't specified.
- std::optional<uint64_t> getBaseAddress() const;
-
- /// Get the size of an address offset in the address offset table.
- ///
- /// GSYM files store offsets from the base address in the address offset table
- /// and we store the size of the address offsets in the GSYM header. This
- /// function will calculate the size in bytes of these address offsets based
- /// on the current contents of the GSYM file.
- ///
- /// \returns The size in byets of the address offsets.
- uint8_t getAddressOffsetSize() const;
-
- /// Get the maximum address offset for the current address offset size.
- ///
- /// This is used when creating the address offset table to ensure we have
- /// values that are in range so we don't end up truncating address offsets
- /// when creating GSYM files as the code evolves.
- ///
- /// \returns The maximum address offset value that will be encoded into a GSYM
- /// file.
- uint64_t getMaxAddressOffset() const;
-
- /// Calculate the byte size of the GSYM V2 header, GlobalData entries, and
- /// table sections (everything except FunctionInfo data).
- ///
- /// This is used to help split GSYM files into segments.
- ///
- /// \returns Size in bytes of the header and tables.
- uint64_t calculateHeaderAndTableSize() const;
-
- /// Copy a FunctionInfo from the \a SrcGC GSYM creator into this creator.
- ///
- /// Copy the function info and only the needed files and strings and add a
- /// converted FunctionInfo into this object. This is used to segment GSYM
- /// files into separate files while only transferring the files and strings
- /// that are needed from \a SrcGC.
- ///
- /// \param SrcGC The source gsym creator to copy from.
- /// \param FuncInfoIdx The function info index within \a SrcGC to copy.
- /// \returns The number of bytes it will take to encode the function info in
- /// this GsymCreatorV2. This helps calculate the size of the current GSYM
- /// segment file.
- uint64_t copyFunctionInfo(const GsymCreatorV2 &SrcGC, size_t FuncInfoIdx);
-
- /// Copy a string from \a SrcGC into this object.
- ///
- /// Copy a string from \a SrcGC by string table offset into this GSYM creator.
- /// If a string has already been copied, the uniqued string table offset will
- /// be returned, otherwise the string will be copied and a unique offset will
- /// be returned.
- ///
- /// \param SrcGC The source gsym creator to copy from.
- /// \param StrOff The string table offset from \a SrcGC to copy.
- /// \returns The new string table offset of the string within this object.
- uint32_t copyString(const GsymCreatorV2 &SrcGC, uint32_t StrOff);
-
- /// Copy a file from \a SrcGC into this object.
- ///
- /// Copy a file from \a SrcGC by file index into this GSYM creator. Files
- /// consist of two string table entries, one for the directory and one for the
- /// filename, this function will copy any needed strings ensure the file is
- /// uniqued within this object. If a file already exists in this GSYM creator
- /// the uniqued index will be returned, else the stirngs will be copied and
- /// the new file index will be returned.
- ///
- /// \param SrcGC The source gsym creator to copy from.
- /// \param FileIdx The 1 based file table index within \a SrcGC to copy. A
- /// file index of zero will always return zero as the zero is a reserved file
- /// index that means no file.
- /// \returns The new file index of the file within this object.
- uint32_t copyFile(const GsymCreatorV2 &SrcGC, uint32_t FileIdx);
-
- /// Inserts a FileEntry into the file table.
- ///
- /// This is used to insert a file entry in a thread safe way into this object.
- ///
- /// \param FE A file entry object that contains valid string table offsets
- /// from this object already.
- uint32_t insertFileEntry(FileEntry FE);
-
- /// Fixup any string and file references by updating any file indexes and
- /// strings offsets in the InlineInfo parameter.
- ///
- /// When copying InlineInfo entries, we can simply make a copy of the object
- /// and then fixup the files and strings for efficiency.
- ///
- /// \param SrcGC The source gsym creator to copy from.
- /// \param II The inline info that contains file indexes and string offsets
- /// that come from \a SrcGC. The entries will be updated by coping any files
- /// and strings over into this object.
- void fixupInlineInfo(const GsymCreatorV2 &SrcGC, InlineInfo &II);
-
- /// Save this GSYM file into segments that are roughly \a SegmentSize in size.
- ///
- /// When segemented GSYM files are saved to disk, they will use \a Path as a
- /// prefix and then have the first function info address appended to the path
- /// when each segment is saved. Each segmented GSYM file has a only the
- /// strings and files that are needed to save the function infos that are in
- /// each segment. These smaller files are easy to compress and download
- /// separately and allow for efficient lookups with very large GSYM files and
- /// segmenting them allows servers to download only the segments that are
- /// needed.
- ///
- /// \param Path The path prefix to use when saving the GSYM files.
- /// \param ByteOrder The endianness to use when saving the file.
- /// \param SegmentSize The size in bytes to segment the GSYM file into.
- llvm::Error saveSegments(StringRef Path, llvm::endianness ByteOrder,
- uint64_t SegmentSize) const;
-
- /// Let this creator know that this is a segment of another GsymCreatorV2.
- ///
- /// When we have a segment, we know that function infos will be added in
- /// ascending address range order without having to be finalized. We also
- /// don't need to sort and unique entries during the finalize function call.
- void setIsSegment() {
- IsSegment = true;
- }
+ uint64_t calculateHeaderAndTableSize() const override;
+ std::unique_ptr<GsymCreator> createNew(bool Quiet) const override;
public:
- LLVM_ABI GsymCreatorV2(bool Quiet = false);
+ GsymCreatorV2(bool Quiet = false) : GsymCreator(Quiet) {}
- /// Save a GSYM file to a stand alone file.
- ///
- /// \param Path The file path to save the GSYM file to.
- /// \param ByteOrder The endianness to use when saving the file.
- /// \param SegmentSize The size in bytes to segment the GSYM file into. If
- /// this option is set this function will create N segments
- /// that are all around \a SegmentSize bytes in size. This
- /// allows a very large GSYM file to be broken up into
- /// shards. Each GSYM file will have its own file table,
- /// and string table that only have the files and strings
- /// needed for the shared. If this argument has no value,
- /// a single GSYM file that contains all function
- /// information will be created.
- /// \returns An error object that indicates success or failure of the save.
- LLVM_ABI llvm::Error
- save(StringRef Path, llvm::endianness ByteOrder,
- std::optional<uint64_t> SegmentSize = std::nullopt) const override;
-
- /// Encode a GSYM into the file writer stream at the current position.
- ///
- /// \param O The stream to save the binary data to
- /// \returns An error object that indicates success or failure of the save.
LLVM_ABI llvm::Error encode(FileWriter &O) const override;
-
- /// Insert a string into the GSYM string table.
- ///
- /// All strings used by GSYM files must be uniqued by adding them to this
- /// string pool and using the returned offset for any string values.
- ///
- /// \param S The string to insert into the string table.
- /// \param Copy If true, then make a backing copy of the string. If false,
- /// the string is owned by another object that will stay around
- /// long enough for the GsymCreatorV2 to save the GSYM file.
- /// \returns The unique 32 bit offset into the string table.
- LLVM_ABI uint32_t insertString(StringRef S, bool Copy = true) override;
-
- /// Retrieve a string from the GSYM string table given its offset.
- ///
- /// The offset is assumed to be a valid offset into the string table.
- /// otherwise an assert will be triggered.
- ///
- /// \param Offset The offset of the string to retrieve, previously returned by
- /// insertString.
- /// \returns The string at the given offset in the string table.
- LLVM_ABI StringRef getString(uint32_t Offset) override;
-
- /// Insert a file into this GSYM creator.
- ///
- /// Inserts a file by adding a FileEntry into the "Files" member variable if
- /// the file has not already been added. The file path is split into
- /// directory and filename which are both added to the string table. This
- /// allows paths to be stored efficiently by reusing the directories that are
- /// common between multiple files.
- ///
- /// \param Path The path to the file to insert.
- /// \param Style The path style for the "Path" parameter.
- /// \returns The unique file index for the inserted file.
- LLVM_ABI uint32_t
- insertFile(StringRef Path,
- sys::path::Style Style = sys::path::Style::native) override;
-
- /// Add a function info to this GSYM creator.
- ///
- /// All information in the FunctionInfo object must use the
- /// GsymCreatorV2::insertString(...) function when creating string table
- /// offsets for names and other strings.
- ///
- /// \param FI The function info object to emplace into our functions list.
- LLVM_ABI void addFunctionInfo(FunctionInfo &&FI) override;
-
- /// Load call site information from a YAML file.
- ///
- /// This function reads call site information from a specified YAML file and
- /// adds it to the GSYM data.
- ///
- /// \param YAMLFile The path to the YAML file containing call site
- /// information.
LLVM_ABI llvm::Error loadCallSitesFromYAML(StringRef YAMLFile) override;
-
- /// Organize merged FunctionInfo's
- ///
- /// This method processes the list of function infos (Funcs) to identify and
- /// group functions with overlapping address ranges.
- ///
- /// \param Out Output stream to report information about how merged
- /// FunctionInfo's were handled.
- LLVM_ABI void prepareMergedFunctions(OutputAggregator &Out) override;
-
- /// Finalize the data in the GSYM creator prior to saving the data out.
- ///
- /// Finalize must be called after all FunctionInfo objects have been added
- /// and before GsymCreatorV2::save() is called.
- ///
- /// \param OS Output stream to report duplicate function infos, overlapping
- /// function infos, and function infos that were merged or removed.
- /// \returns An error object that indicates success or failure of the
- /// finalize.
- LLVM_ABI llvm::Error finalize(OutputAggregator &OS) override;
-
- /// Set the UUID value.
- ///
- /// \param UUIDBytes The new UUID bytes.
- void setUUID(llvm::ArrayRef<uint8_t> UUIDBytes) override {
- UUID.assign(UUIDBytes.begin(), UUIDBytes.end());
- }
-
- /// Thread safe iteration over all function infos.
- ///
- /// \param Callback A callback function that will get called with each
- /// FunctionInfo. If the callback returns false, stop iterating.
- LLVM_ABI void
- forEachFunctionInfo(
- std::function<bool(FunctionInfo &)> const &Callback) override;
-
- /// Thread safe const iteration over all function infos.
- ///
- /// \param Callback A callback function that will get called with each
- /// FunctionInfo. If the callback returns false, stop iterating.
- LLVM_ABI void forEachFunctionInfo(
- std::function<bool(const FunctionInfo &)> const &Callback) const override;
-
- /// Get the current number of FunctionInfo objects contained in this
- /// object.
- LLVM_ABI size_t getNumFunctionInfos() const override;
-
- /// Set valid .text address ranges that all functions must be contained in.
- void SetValidTextRanges(AddressRanges &TextRanges) override {
- ValidTextRanges = TextRanges;
- }
-
- /// Get the valid text ranges.
- const std::optional<AddressRanges> GetValidTextRanges() const override {
- return ValidTextRanges;
- }
-
- /// Check if an address is a valid code address.
- ///
- /// Any functions whose addresses do not exist within these function bounds
- /// will not be converted into the final GSYM. This allows the object file
- /// to figure out the valid file address ranges of all the code sections
- /// and ensure we don't add invalid functions to the final output. Many
- /// linkers have issues when dead stripping functions from DWARF debug info
- /// where they set the DW_AT_low_pc to zero, but newer DWARF has the
- /// DW_AT_high_pc as an offset from the DW_AT_low_pc and these size
- /// attributes have no relocations that can be applied. This results in DWARF
- /// where many functions have an DW_AT_low_pc of zero and a valid offset size
- /// for DW_AT_high_pc. If we extract all valid ranges from an object file
- /// that are marked with executable permissions, we can properly ensure that
- /// these functions are removed.
- ///
- /// \param Addr An address to check.
- ///
- /// \returns True if the address is in the valid text ranges or if no valid
- /// text ranges have been set, false otherwise.
- LLVM_ABI bool IsValidTextAddress(uint64_t Addr) const override;
-
- /// Set the base address to use for the GSYM file.
- ///
- /// Setting the base address to use for the GSYM file. Object files typically
- /// get loaded from a base address when the OS loads them into memory. Using
- /// GSYM files for symbolication becomes easier if the base address in the
- /// GSYM header is the same address as it allows addresses to be easily slid
- /// and allows symbolication without needing to find the original base
- /// address in the original object file.
- ///
- /// \param Addr The address to use as the base address of the GSYM file
- /// when it is saved to disk.
- void setBaseAddress(uint64_t Addr) override {
- BaseAddress = Addr;
- }
-
- /// Whether the transformation should be quiet, i.e. not output warnings.
- bool isQuiet() const override { return Quiet; }
-
-
- /// Create a segmented GSYM creator starting with function info index
- /// \a FuncIdx.
- ///
- /// This function will create a GsymCreatorV2 object that will encode into
- /// roughly \a SegmentSize bytes and return it. It is used by the private
- /// saveSegments(...) function and also is used by the GSYM unit tests to test
- /// segmenting of GSYM files. The returned GsymCreatorV2 can be finalized and
- /// encoded.
- ///
- /// \param [in] SegmentSize The size in bytes to roughly segment the GSYM file
- /// into.
- /// \param [in,out] FuncIdx The index of the first function info to encode
- /// into the returned GsymCreatorV2. This index will be updated so it can be
- /// used in subsequent calls to this function to allow more segments to be
- /// created.
- /// \returns An expected unique pointer to a GsymCreatorV2 or an error. The
- /// returned unique pointer can be NULL if there are no more functions to
- /// encode.
- LLVM_ABI llvm::Expected<std::unique_ptr<GsymCreatorV2>>
- createSegment(uint64_t SegmentSize, size_t &FuncIdx) const;
};
} // namespace gsym
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
index a279984f71e29..35a1a58be9bfa 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
@@ -30,85 +30,119 @@ class raw_ostream;
namespace gsym {
-/// GsymReader is an abstract interface for reading GSYM data.
+/// GsymReader is the base class for reading GSYM data.
///
-/// This interface provides the methods needed by FunctionInfo::lookup and
-/// InlineInfo::lookup to resolve strings and files during symbolication.
-/// Both GsymReaderV1 and GsymReaderV2 implement this interface.
+/// This class contains all shared state and logic for V1 and V2 readers.
+/// Subclasses implement version-specific parsing (parse()) and header access.
class GsymReader {
+protected:
+ std::unique_ptr<MemoryBuffer> MemBuffer;
+ llvm::endianness Endian;
+ ArrayRef<uint8_t> AddrOffsets;
+ ArrayRef<uint32_t> AddrInfoOffsets;
+ ArrayRef<FileEntry> Files;
+ StringTable StrTab;
+
+ // Cached header values, populated by subclass parse().
+ uint64_t CachedBaseAddress = 0;
+ uint32_t CachedNumAddresses = 0;
+ uint8_t CachedAddrOffSize = 0;
+
+ LLVM_ABI GsymReader(std::unique_ptr<MemoryBuffer> Buffer);
+
+ template <class T> ArrayRef<T>
+ getAddrOffsets() const {
+ return ArrayRef<T>(reinterpret_cast<const T *>(AddrOffsets.data()),
+ AddrOffsets.size()/sizeof(T));
+ }
+
+ template <class T>
+ std::optional<uint64_t> addressForIndex(size_t Index) const {
+ ArrayRef<T> AIO = getAddrOffsets<T>();
+ if (Index < AIO.size())
+ return AIO[Index] + CachedBaseAddress;
+ return std::nullopt;
+ }
+
+ template <class T>
+ std::optional<uint64_t>
+ getAddressOffsetIndex(const uint64_t AddrOffset) const {
+ ArrayRef<T> AIO = getAddrOffsets<T>();
+ const auto Begin = AIO.begin();
+ const auto End = AIO.end();
+ auto Iter = std::lower_bound(Begin, End, AddrOffset);
+ if (Iter == Begin && AddrOffset < *Begin)
+ return std::nullopt;
+ if (Iter == End || AddrOffset < *Iter)
+ --Iter;
+
+ while (Iter != Begin) {
+ auto Prev = Iter - 1;
+ if (*Prev == *Iter)
+ Iter = Prev;
+ else
+ break;
+ }
+
+ return std::distance(Begin, Iter);
+ }
+
+ LLVM_ABI Expected<uint64_t> getAddressIndex(const uint64_t Addr) const;
+ LLVM_ABI std::optional<uint64_t> getAddressInfoOffset(size_t Index) const;
+ LLVM_ABI llvm::Expected<llvm::DataExtractor>
+ getFunctionInfoDataForAddress(uint64_t Addr, uint64_t &FuncStartAddr) const;
+ LLVM_ABI llvm::Expected<llvm::DataExtractor>
+ getFunctionInfoDataAtIndex(uint64_t AddrIdx, uint64_t &FuncStartAddr) const;
+
public:
+ LLVM_ABI GsymReader(GsymReader &&RHS);
virtual ~GsymReader() = default;
/// Open a GSYM file, auto-detecting the format version.
- ///
- /// \param Path The file path of the GSYM file to read.
- /// \returns An expected unique_ptr to a GsymReader or an error.
LLVM_ABI static llvm::Expected<std::unique_ptr<GsymReader>>
openFile(StringRef Path);
/// Construct a GsymReader from a buffer, auto-detecting the format version.
- ///
- /// \param Bytes A set of bytes that will be copied and owned by the
- /// returned object on success.
- /// \returns An expected unique_ptr to a GsymReader or an error.
LLVM_ABI static llvm::Expected<std::unique_ptr<GsymReader>>
copyBuffer(StringRef Bytes);
- /// Get a string from the string table.
- virtual StringRef getString(uint32_t Offset) const = 0;
+ StringRef getString(uint32_t Offset) const { return StrTab[Offset]; }
- /// Get a file entry for the supplied file index.
- virtual std::optional<FileEntry> getFile(uint32_t Index) const = 0;
+ std::optional<FileEntry> getFile(uint32_t Index) const {
+ if (Index < Files.size())
+ return Files[Index];
+ return std::nullopt;
+ }
- /// Get the full function info for an address.
- virtual llvm::Expected<FunctionInfo> getFunctionInfo(uint64_t Addr) const = 0;
+ uint32_t getNumAddresses() const { return CachedNumAddresses; }
- /// Get the full function info given an address index.
- virtual llvm::Expected<FunctionInfo>
- getFunctionInfoAtIndex(uint64_t AddrIdx) const = 0;
+ LLVM_ABI llvm::Expected<FunctionInfo> getFunctionInfo(uint64_t Addr) const;
+ LLVM_ABI llvm::Expected<FunctionInfo>
+ getFunctionInfoAtIndex(uint64_t AddrIdx) const;
- /// Lookup an address in the GSYM.
- virtual llvm::Expected<LookupResult>
+ LLVM_ABI llvm::Expected<LookupResult>
lookup(uint64_t Addr,
- std::optional<DataExtractor> *MergedFuncsData = nullptr) const = 0;
-
- /// Lookup all merged functions for a given address.
- virtual llvm::Expected<std::vector<LookupResult>>
- lookupAll(uint64_t Addr) const = 0;
+ std::optional<DataExtractor> *MergedFuncsData = nullptr) const;
- /// Get the number of addresses in this GSYM file.
- virtual uint32_t getNumAddresses() const = 0;
+ LLVM_ABI llvm::Expected<std::vector<LookupResult>>
+ lookupAll(uint64_t Addr) const;
- /// Gets an address from the address table.
- virtual std::optional<uint64_t> getAddress(size_t Index) const = 0;
+ LLVM_ABI std::optional<uint64_t> getAddress(size_t Index) const;
- /// Dump the entire GSYM data contained in this object.
+ /// Dump the entire GSYM data. Version-specific (header format differs).
virtual void dump(raw_ostream &OS) = 0;
- /// Dump a FunctionInfo object.
- virtual void dump(raw_ostream &OS, const FunctionInfo &FI,
- uint32_t Indent = 0) = 0;
-
- /// Dump a MergedFunctionsInfo object.
- virtual void dump(raw_ostream &OS, const MergedFunctionsInfo &MFI) = 0;
-
- /// Dump a CallSiteInfo object.
- virtual void dump(raw_ostream &OS, const CallSiteInfo &CSI) = 0;
-
- /// Dump a CallSiteInfoCollection object.
- virtual void dump(raw_ostream &OS, const CallSiteInfoCollection &CSIC,
- uint32_t Indent = 0) = 0;
-
- /// Dump a LineTable object.
- virtual void dump(raw_ostream &OS, const LineTable <,
- uint32_t Indent = 0) = 0;
-
- /// Dump a InlineInfo object.
- virtual void dump(raw_ostream &OS, const InlineInfo &II,
- uint32_t Indent = 0) = 0;
-
- /// Dump a FileEntry object.
- virtual void dump(raw_ostream &OS, std::optional<FileEntry> FE) = 0;
+ LLVM_ABI void dump(raw_ostream &OS, const FunctionInfo &FI,
+ uint32_t Indent = 0);
+ LLVM_ABI void dump(raw_ostream &OS, const MergedFunctionsInfo &MFI);
+ LLVM_ABI void dump(raw_ostream &OS, const CallSiteInfo &CSI);
+ LLVM_ABI void dump(raw_ostream &OS, const CallSiteInfoCollection &CSIC,
+ uint32_t Indent = 0);
+ LLVM_ABI void dump(raw_ostream &OS, const LineTable <,
+ uint32_t Indent = 0);
+ LLVM_ABI void dump(raw_ostream &OS, const InlineInfo &II,
+ uint32_t Indent = 0);
+ LLVM_ABI void dump(raw_ostream &OS, std::optional<FileEntry> FE);
};
} // namespace gsym
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymReaderV1.h b/llvm/include/llvm/DebugInfo/GSYM/GsymReaderV1.h
index a3dac28616548..f489beecdf336 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GsymReaderV1.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymReaderV1.h
@@ -9,56 +9,20 @@
#ifndef LLVM_DEBUGINFO_GSYM_GSYMREADERV1_H
#define LLVM_DEBUGINFO_GSYM_GSYMREADERV1_H
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/DebugInfo/GSYM/FileEntry.h"
-#include "llvm/DebugInfo/GSYM/FunctionInfo.h"
#include "llvm/DebugInfo/GSYM/GsymReader.h"
#include "llvm/DebugInfo/GSYM/Header.h"
-#include "llvm/DebugInfo/GSYM/LineEntry.h"
-#include "llvm/DebugInfo/GSYM/StringTable.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/DataExtractor.h"
-#include "llvm/Support/Endian.h"
-#include "llvm/Support/ErrorOr.h"
-#include <inttypes.h>
-#include <memory>
-#include <stdint.h>
-#include <vector>
namespace llvm {
class MemoryBuffer;
-class raw_ostream;
namespace gsym {
-/// GsymReaderV1 is used to read GSYM V1 data from a file or buffer.
-///
-/// This class is optimized for very quick lookups when the endianness matches
-/// the host system. The Header, address table, address info offsets, and file
-/// table is designed to be mmap'ed as read only into memory and used without
-/// any parsing needed. If the endianness doesn't match, we swap these objects
-/// and tables into GsymReaderV1::SwappedData and then point our header and
-/// ArrayRefs to this swapped internal data.
-///
-/// GsymReaderV1 objects must use one of the static functions to create an
-/// instance: GsymReaderV1::openFile(...) and GsymReaderV1::copyBuffer(...).
-
+/// GsymReaderV1 reads GSYM V1 data from a file or buffer.
class GsymReaderV1 : public GsymReader {
GsymReaderV1(std::unique_ptr<MemoryBuffer> Buffer);
llvm::Error parse();
- std::unique_ptr<MemoryBuffer> MemBuffer;
- StringRef GsymBytes;
- llvm::endianness Endian;
const Header *Hdr = nullptr;
- ArrayRef<uint8_t> AddrOffsets;
- ArrayRef<uint32_t> AddrInfoOffsets;
- ArrayRef<FileEntry> Files;
- StringTable StrTab;
- /// When the GSYM file's endianness doesn't match the host system then
- /// we must decode all data structures that need to be swapped into
- /// local storage and set point the ArrayRef objects above to these swapped
- /// copies.
struct SwappedData {
Header Hdr;
std::vector<uint8_t> AddrOffsets;
@@ -67,127 +31,20 @@ class GsymReaderV1 : public GsymReader {
};
std::unique_ptr<SwappedData> Swap;
+ LLVM_ABI static llvm::Expected<GsymReaderV1>
+ create(std::unique_ptr<MemoryBuffer> &MemBuffer);
+
public:
LLVM_ABI GsymReaderV1(GsymReaderV1 &&RHS);
LLVM_ABI ~GsymReaderV1() override;
- /// Construct a GsymReaderV1 from a file on disk.
- ///
- /// \param Path The file path the GSYM file to read.
- /// \returns An expected GsymReaderV1 that contains the object or an error
- /// object that indicates reason for failing to read the GSYM.
LLVM_ABI static llvm::Expected<GsymReaderV1> openFile(StringRef Path);
-
- /// Construct a GsymReaderV1 from a buffer.
- ///
- /// \param Bytes A set of bytes that will be copied and owned by the
- /// returned object on success.
- /// \returns An expected GsymReaderV1 that contains the object or an error
- /// object that indicates reason for failing to read the GSYM.
LLVM_ABI static llvm::Expected<GsymReaderV1> copyBuffer(StringRef Bytes);
- /// Access the GSYM header.
- /// \returns A native endian version of the GSYM header.
LLVM_ABI const Header &getHeader() const;
- LLVM_ABI llvm::Expected<FunctionInfo>
- getFunctionInfo(uint64_t Addr) const override;
-
- LLVM_ABI llvm::Expected<FunctionInfo>
- getFunctionInfoAtIndex(uint64_t AddrIdx) const override;
-
- LLVM_ABI llvm::Expected<LookupResult>
- lookup(uint64_t Addr,
- std::optional<DataExtractor> *MergedFuncsData = nullptr) const override;
-
- LLVM_ABI llvm::Expected<std::vector<LookupResult>>
- lookupAll(uint64_t Addr) const override;
-
- StringRef getString(uint32_t Offset) const override { return StrTab[Offset]; }
-
- std::optional<FileEntry> getFile(uint32_t Index) const override {
- if (Index < Files.size())
- return Files[Index];
- return std::nullopt;
- }
-
+ using GsymReader::dump;
LLVM_ABI void dump(raw_ostream &OS) override;
-
- LLVM_ABI void dump(raw_ostream &OS, const FunctionInfo &FI,
- uint32_t Indent = 0) override;
-
- LLVM_ABI void dump(raw_ostream &OS, const MergedFunctionsInfo &MFI) override;
-
- LLVM_ABI void dump(raw_ostream &OS, const CallSiteInfo &CSI) override;
-
- LLVM_ABI void dump(raw_ostream &OS, const CallSiteInfoCollection &CSIC,
- uint32_t Indent = 0) override;
-
- LLVM_ABI void dump(raw_ostream &OS, const LineTable <,
- uint32_t Indent = 0) override;
-
- LLVM_ABI void dump(raw_ostream &OS, const InlineInfo &II,
- uint32_t Indent = 0) override;
-
- LLVM_ABI void dump(raw_ostream &OS, std::optional<FileEntry> FE) override;
-
- uint32_t getNumAddresses() const override {
- return Hdr->NumAddresses;
- }
-
- LLVM_ABI std::optional<uint64_t> getAddress(size_t Index) const override;
-
-protected:
-
- template <class T> ArrayRef<T>
- getAddrOffsets() const {
- return ArrayRef<T>(reinterpret_cast<const T *>(AddrOffsets.data()),
- AddrOffsets.size()/sizeof(T));
- }
-
- template <class T>
- std::optional<uint64_t> addressForIndex(size_t Index) const {
- ArrayRef<T> AIO = getAddrOffsets<T>();
- if (Index < AIO.size())
- return AIO[Index] + Hdr->BaseAddress;
- return std::nullopt;
- }
-
- template <class T>
- std::optional<uint64_t>
- getAddressOffsetIndex(const uint64_t AddrOffset) const {
- ArrayRef<T> AIO = getAddrOffsets<T>();
- const auto Begin = AIO.begin();
- const auto End = AIO.end();
- auto Iter = std::lower_bound(Begin, End, AddrOffset);
- if (Iter == Begin && AddrOffset < *Begin)
- return std::nullopt;
- if (Iter == End || AddrOffset < *Iter)
- --Iter;
-
- while (Iter != Begin) {
- auto Prev = Iter - 1;
- if (*Prev == *Iter)
- Iter = Prev;
- else
- break;
- }
-
- return std::distance(Begin, Iter);
- }
-
- LLVM_ABI static llvm::Expected<llvm::gsym::GsymReaderV1>
- create(std::unique_ptr<MemoryBuffer> &MemBuffer);
-
- LLVM_ABI Expected<uint64_t> getAddressIndex(const uint64_t Addr) const;
-
- LLVM_ABI std::optional<uint64_t> getAddressInfoOffset(size_t Index) const;
-
- LLVM_ABI llvm::Expected<llvm::DataExtractor>
- getFunctionInfoDataForAddress(uint64_t Addr, uint64_t &FuncStartAddr) const;
-
- LLVM_ABI llvm::Expected<llvm::DataExtractor>
- getFunctionInfoDataAtIndex(uint64_t AddrIdx, uint64_t &FuncStartAddr) const;
};
} // namespace gsym
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymReaderV2.h b/llvm/include/llvm/DebugInfo/GSYM/GsymReaderV2.h
index 5ce96bfe45f59..d4851140dfc2a 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GsymReaderV2.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymReaderV2.h
@@ -9,56 +9,20 @@
#ifndef LLVM_DEBUGINFO_GSYM_GSYMREADERV2_H
#define LLVM_DEBUGINFO_GSYM_GSYMREADERV2_H
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/DebugInfo/GSYM/FileEntry.h"
-#include "llvm/DebugInfo/GSYM/FunctionInfo.h"
#include "llvm/DebugInfo/GSYM/GsymReader.h"
#include "llvm/DebugInfo/GSYM/HeaderV2.h"
-#include "llvm/DebugInfo/GSYM/LineEntry.h"
-#include "llvm/DebugInfo/GSYM/StringTable.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/DataExtractor.h"
-#include "llvm/Support/Endian.h"
-#include "llvm/Support/ErrorOr.h"
-#include <inttypes.h>
-#include <memory>
-#include <stdint.h>
-#include <vector>
namespace llvm {
class MemoryBuffer;
-class raw_ostream;
namespace gsym {
-/// GsymReaderV2 is used to read GSYM V2 data from a file or buffer.
-///
-/// This class is optimized for very quick lookups when the endianness matches
-/// the host system. The HeaderV2, address table, address info offsets, and file
-/// table is designed to be mmap'ed as read only into memory and used without
-/// any parsing needed. If the endianness doesn't match, we swap these objects
-/// and tables into GsymReaderV2::SwappedData and then point our header and
-/// ArrayRefs to this swapped internal data.
-///
-/// GsymReaderV2 objects must use one of the static functions to create an
-/// instance: GsymReaderV2::openFile(...) and GsymReaderV2::copyBuffer(...).
-
+/// GsymReaderV2 reads GSYM V2 data from a file or buffer.
class GsymReaderV2 : public GsymReader {
GsymReaderV2(std::unique_ptr<MemoryBuffer> Buffer);
llvm::Error parse();
- std::unique_ptr<MemoryBuffer> MemBuffer;
- StringRef GsymBytes;
- llvm::endianness Endian;
const HeaderV2 *Hdr = nullptr;
- ArrayRef<uint8_t> AddrOffsets;
- ArrayRef<uint32_t> AddrInfoOffsets;
- ArrayRef<FileEntry> Files;
- StringTable StrTab;
- /// When the GSYM file's endianness doesn't match the host system then
- /// we must decode all data structures that need to be swapped into
- /// local storage and set point the ArrayRef objects above to these swapped
- /// copies.
struct SwappedData {
HeaderV2 Hdr;
std::vector<uint8_t> AddrOffsets;
@@ -67,362 +31,20 @@ class GsymReaderV2 : public GsymReader {
};
std::unique_ptr<SwappedData> Swap;
+ LLVM_ABI static llvm::Expected<GsymReaderV2>
+ create(std::unique_ptr<MemoryBuffer> &MemBuffer);
+
public:
LLVM_ABI GsymReaderV2(GsymReaderV2 &&RHS);
LLVM_ABI ~GsymReaderV2() override;
- /// Construct a GsymReaderV2 from a file on disk.
- ///
- /// \param Path The file path the GSYM file to read.
- /// \returns An expected GsymReaderV2 that contains the object or an error
- /// object that indicates reason for failing to read the GSYM.
LLVM_ABI static llvm::Expected<GsymReaderV2> openFile(StringRef Path);
-
- /// Construct a GsymReaderV2 from a buffer.
- ///
- /// \param Bytes A set of bytes that will be copied and owned by the
- /// returned object on success.
- /// \returns An expected GsymReaderV2 that contains the object or an error
- /// object that indicates reason for failing to read the GSYM.
LLVM_ABI static llvm::Expected<GsymReaderV2> copyBuffer(StringRef Bytes);
- /// Access the GSYM header.
- /// \returns A native endian version of the GSYM header.
LLVM_ABI const HeaderV2 &getHeader() const;
- /// Get the full function info for an address.
- ///
- /// This should be called when a client will store a copy of the complete
- /// FunctionInfo for a given address. For one off lookups, use the lookup()
- /// function below.
- ///
- /// Symbolication server processes might want to parse the entire function
- /// info for a given address and cache it if the process stays around to
- /// service many symbolication addresses, like for parsing profiling
- /// information.
- ///
- /// \param Addr A virtual address from the orignal object file to lookup.
- ///
- /// \returns An expected FunctionInfo that contains the function info object
- /// or an error object that indicates reason for failing to lookup the
- /// address.
- LLVM_ABI llvm::Expected<FunctionInfo>
- getFunctionInfo(uint64_t Addr) const override;
-
- /// Get the full function info given an address index.
- ///
- /// \param AddrIdx A address index for an address in the address table.
- ///
- /// \returns An expected FunctionInfo that contains the function info object
- /// or an error object that indicates reason for failing get the function
- /// info object.
- LLVM_ABI llvm::Expected<FunctionInfo>
- getFunctionInfoAtIndex(uint64_t AddrIdx) const override;
-
- /// Lookup an address in the a GSYM.
- ///
- /// Lookup just the information needed for a specific address \a Addr. This
- /// function is faster that calling getFunctionInfo() as it will only return
- /// information that pertains to \a Addr and allows the parsing to skip any
- /// extra information encoded for other addresses. For example the line table
- /// parsing can stop when a matching LineEntry has been fouhnd, and the
- /// InlineInfo can stop parsing early once a match has been found and also
- /// skip information that doesn't match. This avoids memory allocations and
- /// is much faster for lookups.
- ///
- /// \param Addr A virtual address from the orignal object file to lookup.
- ///
- /// \param MergedFuncsData A pointer to an optional DataExtractor that, if
- /// non-null, will be set to the raw data of the MergedFunctionInfo, if
- /// present.
- ///
- /// \returns An expected LookupResult that contains only the information
- /// needed for the current address, or an error object that indicates reason
- /// for failing to lookup the address.
- LLVM_ABI llvm::Expected<LookupResult>
- lookup(uint64_t Addr,
- std::optional<DataExtractor> *MergedFuncsData = nullptr) const override;
-
- /// Lookup all merged functions for a given address.
- ///
- /// This function performs a lookup for the specified address and then
- /// retrieves additional LookupResults from any merged functions associated
- /// with the primary LookupResult.
- ///
- /// \param Addr The address to lookup.
- ///
- /// \returns A vector of LookupResult objects, where the first element is the
- /// primary result, followed by results for any merged functions
- LLVM_ABI llvm::Expected<std::vector<LookupResult>>
- lookupAll(uint64_t Addr) const override;
-
- /// Get a string from the string table.
- ///
- /// \param Offset The string table offset for the string to retrieve.
- /// \returns The string from the strin table.
- StringRef getString(uint32_t Offset) const override { return StrTab[Offset]; }
-
- /// Get the a file entry for the suppplied file index.
- ///
- /// Used to convert any file indexes in the FunctionInfo data back into
- /// files. This function can be used for iteration, but is more commonly used
- /// for random access when doing lookups.
- ///
- /// \param Index An index into the file table.
- /// \returns An optional FileInfo that will be valid if the file index is
- /// valid, or std::nullopt if the file index is out of bounds,
- std::optional<FileEntry> getFile(uint32_t Index) const override {
- if (Index < Files.size())
- return Files[Index];
- return std::nullopt;
- }
-
- /// Dump the entire Gsym data contained in this object.
- ///
- /// \param OS The output stream to dump to.
+ using GsymReader::dump;
LLVM_ABI void dump(raw_ostream &OS) override;
-
- /// Dump a FunctionInfo object.
- ///
- /// This function will convert any string table indexes and file indexes
- /// into human readable format.
- ///
- /// \param OS The output stream to dump to.
- ///
- /// \param FI The object to dump.
- ///
- /// \param Indent The indentation as number of spaces. Used when dumping as an
- /// item within MergedFunctionsInfo.
- LLVM_ABI void dump(raw_ostream &OS, const FunctionInfo &FI,
- uint32_t Indent = 0) override;
-
- /// Dump a MergedFunctionsInfo object.
- ///
- /// This function will dump a MergedFunctionsInfo object - basically by
- /// dumping the contained FunctionInfo objects with indentation.
- ///
- /// \param OS The output stream to dump to.
- ///
- /// \param MFI The object to dump.
- LLVM_ABI void dump(raw_ostream &OS, const MergedFunctionsInfo &MFI) override;
-
- /// Dump a CallSiteInfo object.
- ///
- /// This function will output the details of a CallSiteInfo object in a
- /// human-readable format.
- ///
- /// \param OS The output stream to dump to.
- ///
- /// \param CSI The CallSiteInfo object to dump.
- LLVM_ABI void dump(raw_ostream &OS, const CallSiteInfo &CSI) override;
-
- /// Dump a CallSiteInfoCollection object.
- ///
- /// This function will iterate over a collection of CallSiteInfo objects and
- /// dump each one.
- ///
- /// \param OS The output stream to dump to.
- ///
- /// \param CSIC The CallSiteInfoCollection object to dump.
- ///
- /// \param Indent The indentation as number of spaces. Used when dumping as an
- /// item from within MergedFunctionsInfo.
- LLVM_ABI void dump(raw_ostream &OS, const CallSiteInfoCollection &CSIC,
- uint32_t Indent = 0) override;
-
- /// Dump a LineTable object.
- ///
- /// This function will convert any string table indexes and file indexes
- /// into human readable format.
- ///
- ///
- /// \param OS The output stream to dump to.
- ///
- /// \param LT The object to dump.
- ///
- /// \param Indent The indentation as number of spaces. Used when dumping as an
- /// item from within MergedFunctionsInfo.
- LLVM_ABI void dump(raw_ostream &OS, const LineTable <,
- uint32_t Indent = 0) override;
-
- /// Dump a InlineInfo object.
- ///
- /// This function will convert any string table indexes and file indexes
- /// into human readable format.
- ///
- /// \param OS The output stream to dump to.
- ///
- /// \param II The object to dump.
- ///
- /// \param Indent The indentation as number of spaces. Used for recurive
- /// dumping.
- LLVM_ABI void dump(raw_ostream &OS, const InlineInfo &II,
- uint32_t Indent = 0) override;
-
- /// Dump a FileEntry object.
- ///
- /// This function will convert any string table indexes into human readable
- /// format.
- ///
- /// \param OS The output stream to dump to.
- ///
- /// \param FE The object to dump.
- LLVM_ABI void dump(raw_ostream &OS, std::optional<FileEntry> FE) override;
-
- /// Get the number of addresses in this Gsym file.
- uint32_t getNumAddresses() const override {
- return Hdr->NumAddresses;
- }
-
- /// Gets an address from the address table.
- ///
- /// Addresses are stored as offsets frrom the gsym::HeaderV2::BaseAddress.
- ///
- /// \param Index A index into the address table.
- /// \returns A resolved virtual address for adddress in the address table
- /// or std::nullopt if Index is out of bounds.
- LLVM_ABI std::optional<uint64_t> getAddress(size_t Index) const override;
-
-protected:
-
- /// Get an appropriate address info offsets array.
- ///
- /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8
- /// byte offsets from the The gsym::HeaderV2::BaseAddress. The table is stored
- /// internally as a array of bytes that are in the correct endianness. When
- /// we access this table we must get an array that matches those sizes. This
- /// templatized helper function is used when accessing address offsets in the
- /// AddrOffsets member variable.
- ///
- /// \returns An ArrayRef of an appropriate address offset size.
- template <class T> ArrayRef<T>
- getAddrOffsets() const {
- return ArrayRef<T>(reinterpret_cast<const T *>(AddrOffsets.data()),
- AddrOffsets.size()/sizeof(T));
- }
-
- /// Get an appropriate address from the address table.
- ///
- /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8
- /// byte address offsets from the The gsym::HeaderV2::BaseAddress. The table is
- /// stored internally as a array of bytes that are in the correct endianness.
- /// In order to extract an address from the address table we must access the
- /// address offset using the correct size and then add it to the BaseAddress
- /// in the header.
- ///
- /// \param Index An index into the AddrOffsets array.
- /// \returns An virtual address that matches the original object file for the
- /// address as the specified index, or std::nullopt if Index is out of bounds.
- template <class T>
- std::optional<uint64_t> addressForIndex(size_t Index) const {
- ArrayRef<T> AIO = getAddrOffsets<T>();
- if (Index < AIO.size())
- return AIO[Index] + Hdr->BaseAddress;
- return std::nullopt;
- }
- /// Lookup an address offset in the AddrOffsets table.
- ///
- /// Given an address offset, look it up using a binary search of the
- /// AddrOffsets table.
- ///
- /// \param AddrOffset An address offset, that has already been computed by
- /// subtracting the gsym::HeaderV2::BaseAddress.
- /// \returns The matching address offset index. This index will be used to
- /// extract the FunctionInfo data's offset from the AddrInfoOffsets array.
- template <class T>
- std::optional<uint64_t>
- getAddressOffsetIndex(const uint64_t AddrOffset) const {
- ArrayRef<T> AIO = getAddrOffsets<T>();
- const auto Begin = AIO.begin();
- const auto End = AIO.end();
- auto Iter = std::lower_bound(Begin, End, AddrOffset);
- // Watch for addresses that fall between the gsym::HeaderV2::BaseAddress and
- // the first address offset.
- if (Iter == Begin && AddrOffset < *Begin)
- return std::nullopt;
- if (Iter == End || AddrOffset < *Iter)
- --Iter;
-
- // GSYM files have sorted function infos with the most information (line
- // table and/or inline info) first in the array of function infos, so
- // always backup as much as possible as long as the address offset is the
- // same as the previous entry.
- while (Iter != Begin) {
- auto Prev = Iter - 1;
- if (*Prev == *Iter)
- Iter = Prev;
- else
- break;
- }
-
- return std::distance(Begin, Iter);
- }
-
- /// Create a GSYM from a memory buffer.
- ///
- /// Called by both openFile() and copyBuffer(), this function does all of the
- /// work of parsing the GSYM file and returning an error.
- ///
- /// \param MemBuffer A memory buffer that will transfer ownership into the
- /// GsymReaderV2.
- /// \returns An expected GsymReaderV2 that contains the object or an error
- /// object that indicates reason for failing to read the GSYM.
- LLVM_ABI static llvm::Expected<llvm::gsym::GsymReaderV2>
- create(std::unique_ptr<MemoryBuffer> &MemBuffer);
-
- /// Given an address, find the address index.
- ///
- /// Binary search the address table and find the matching address index.
- ///
- /// \param Addr A virtual address that matches the original object file
- /// to lookup.
- /// \returns An index into the address table. This index can be used to
- /// extract the FunctionInfo data's offset from the AddrInfoOffsets array.
- /// Returns an error if the address isn't in the GSYM with details of why.
- LLVM_ABI Expected<uint64_t> getAddressIndex(const uint64_t Addr) const;
-
- /// Given an address index, get the offset for the FunctionInfo.
- ///
- /// Looking up an address is done by finding the corresponding address
- /// index for the address. This index is then used to get the offset of the
- /// FunctionInfo data that we will decode using this function.
- ///
- /// \param Index An index into the address table.
- /// \returns An optional GSYM data offset for the offset of the FunctionInfo
- /// that needs to be decoded.
- LLVM_ABI std::optional<uint64_t> getAddressInfoOffset(size_t Index) const;
-
- /// Given an address, find the correct function info data and function
- /// address.
- ///
- /// Binary search the address table and find the matching address info
- /// and make sure that the function info contains the address. GSYM allows
- /// functions to overlap, and the most debug info is contained in the first
- /// entries due to the sorting when GSYM files are created. We can have
- /// multiple function info that start at the same address only if their
- /// address range doesn't match. So find the first entry that matches \a Addr
- /// and iterate forward until we find one that contains the address.
- ///
- /// \param[in] Addr A virtual address that matches the original object file
- /// to lookup.
- ///
- /// \param[out] FuncStartAddr A virtual address that is the base address of
- /// the function that is used for decoding the FunctionInfo.
- ///
- /// \returns An valid data extractor on success, or an error if we fail to
- /// find the address in a function info or corrrectly decode the data
- LLVM_ABI llvm::Expected<llvm::DataExtractor>
- getFunctionInfoDataForAddress(uint64_t Addr, uint64_t &FuncStartAddr) const;
-
- /// Get the function data and address given an address index.
- ///
- /// \param AddrIdx A address index from the address table.
- ///
- /// \returns An expected FunctionInfo that contains the function info object
- /// or an error object that indicates reason for failing to lookup the
- /// address.
- LLVM_ABI llvm::Expected<llvm::DataExtractor>
- getFunctionInfoDataAtIndex(uint64_t AddrIdx, uint64_t &FuncStartAddr) const;
};
} // namespace gsym
diff --git a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp
index d6a249ba58b3b..961ff212cf892 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp
@@ -6,3 +6,363 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/GSYM/GsymCreator.h"
+#include "llvm/DebugInfo/GSYM/FileWriter.h"
+#include "llvm/DebugInfo/GSYM/Header.h"
+#include "llvm/DebugInfo/GSYM/LineTable.h"
+#include "llvm/DebugInfo/GSYM/OutputAggregator.h"
+#include "llvm/MC/StringTableBuilder.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <algorithm>
+#include <cassert>
+#include <functional>
+#include <vector>
+
+using namespace llvm;
+using namespace gsym;
+
+GsymCreator::GsymCreator(bool Quiet)
+ : StrTab(StringTableBuilder::ELF), Quiet(Quiet) {
+ insertFile(StringRef());
+}
+
+uint32_t GsymCreator::insertFile(StringRef Path, llvm::sys::path::Style Style) {
+ llvm::StringRef directory = llvm::sys::path::parent_path(Path, Style);
+ llvm::StringRef filename = llvm::sys::path::filename(Path, Style);
+ const uint32_t Dir = insertString(directory);
+ const uint32_t Base = insertString(filename);
+ return insertFileEntry(FileEntry(Dir, Base));
+}
+
+uint32_t GsymCreator::insertFileEntry(FileEntry FE) {
+ std::lock_guard<std::mutex> Guard(Mutex);
+ const auto NextIndex = Files.size();
+ auto R = FileEntryToIndex.insert(std::make_pair(FE, NextIndex));
+ if (R.second)
+ Files.emplace_back(FE);
+ return R.first->second;
+}
+
+uint32_t GsymCreator::copyFile(const GsymCreator &SrcGC, uint32_t FileIdx) {
+ if (FileIdx == 0)
+ return 0;
+ const FileEntry SrcFE = SrcGC.Files[FileIdx];
+ uint32_t Dir =
+ SrcFE.Dir == 0
+ ? 0
+ : StrTab.add(SrcGC.StringOffsetMap.find(SrcFE.Dir)->second);
+ uint32_t Base = StrTab.add(SrcGC.StringOffsetMap.find(SrcFE.Base)->second);
+ FileEntry DstFE(Dir, Base);
+ return insertFileEntry(DstFE);
+}
+
+llvm::Error GsymCreator::save(StringRef Path, llvm::endianness ByteOrder,
+ std::optional<uint64_t> SegmentSize) const {
+ if (SegmentSize)
+ return saveSegments(Path, ByteOrder, *SegmentSize);
+ std::error_code EC;
+ raw_fd_ostream OutStrm(Path, EC);
+ if (EC)
+ return llvm::errorCodeToError(EC);
+ FileWriter O(OutStrm, ByteOrder);
+ return encode(O);
+}
+
+uint32_t GsymCreator::copyString(const GsymCreator &SrcGC, uint32_t StrOff) {
+ if (StrOff == 0)
+ return 0;
+ return StrTab.add(SrcGC.StringOffsetMap.find(StrOff)->second);
+}
+
+uint32_t GsymCreator::insertString(StringRef S, bool Copy) {
+ if (S.empty())
+ return 0;
+
+ CachedHashStringRef CHStr(S);
+ std::lock_guard<std::mutex> Guard(Mutex);
+ if (Copy) {
+ if (!StrTab.contains(CHStr))
+ CHStr = CachedHashStringRef{StringStorage.insert(S).first->getKey(),
+ CHStr.hash()};
+ }
+ const uint32_t StrOff = StrTab.add(CHStr);
+ StringOffsetMap.try_emplace(StrOff, CHStr);
+ return StrOff;
+}
+
+StringRef GsymCreator::getString(uint32_t Offset) {
+ auto I = StringOffsetMap.find(Offset);
+ assert(I != StringOffsetMap.end() &&
+ "GsymCreator::getString expects a valid offset as parameter.");
+ return I->second.val();
+}
+
+void GsymCreator::addFunctionInfo(FunctionInfo &&FI) {
+ std::lock_guard<std::mutex> Guard(Mutex);
+ Funcs.emplace_back(std::move(FI));
+}
+
+void GsymCreator::forEachFunctionInfo(
+ std::function<bool(FunctionInfo &)> const &Callback) {
+ std::lock_guard<std::mutex> Guard(Mutex);
+ for (auto &FI : Funcs) {
+ if (!Callback(FI))
+ break;
+ }
+}
+
+void GsymCreator::forEachFunctionInfo(
+ std::function<bool(const FunctionInfo &)> const &Callback) const {
+ std::lock_guard<std::mutex> Guard(Mutex);
+ for (const auto &FI : Funcs) {
+ if (!Callback(FI))
+ break;
+ }
+}
+
+size_t GsymCreator::getNumFunctionInfos() const {
+ std::lock_guard<std::mutex> Guard(Mutex);
+ return Funcs.size();
+}
+
+bool GsymCreator::IsValidTextAddress(uint64_t Addr) const {
+ if (ValidTextRanges)
+ return ValidTextRanges->contains(Addr);
+ return true;
+}
+
+std::optional<uint64_t> GsymCreator::getFirstFunctionAddress() const {
+ if ((Finalized || IsSegment) && !Funcs.empty())
+ return std::optional<uint64_t>(Funcs.front().startAddress());
+ return std::nullopt;
+}
+
+std::optional<uint64_t> GsymCreator::getLastFunctionAddress() const {
+ if ((Finalized || IsSegment) && !Funcs.empty())
+ return std::optional<uint64_t>(Funcs.back().startAddress());
+ return std::nullopt;
+}
+
+std::optional<uint64_t> GsymCreator::getBaseAddress() const {
+ if (BaseAddress)
+ return BaseAddress;
+ return getFirstFunctionAddress();
+}
+
+uint64_t GsymCreator::getMaxAddressOffset() const {
+ switch (getAddressOffsetSize()) {
+ case 1: return UINT8_MAX;
+ case 2: return UINT16_MAX;
+ case 4: return UINT32_MAX;
+ case 8: return UINT64_MAX;
+ }
+ llvm_unreachable("invalid address offset");
+}
+
+uint8_t GsymCreator::getAddressOffsetSize() const {
+ const std::optional<uint64_t> BaseAddress = getBaseAddress();
+ const std::optional<uint64_t> LastFuncAddr = getLastFunctionAddress();
+ if (BaseAddress && LastFuncAddr) {
+ const uint64_t AddrDelta = *LastFuncAddr - *BaseAddress;
+ if (AddrDelta <= UINT8_MAX)
+ return 1;
+ else if (AddrDelta <= UINT16_MAX)
+ return 2;
+ else if (AddrDelta <= UINT32_MAX)
+ return 4;
+ return 8;
+ }
+ return 1;
+}
+
+void GsymCreator::prepareMergedFunctions(OutputAggregator &Out) {
+ if (Funcs.size() < 2)
+ return;
+
+ llvm::stable_sort(Funcs);
+ std::vector<FunctionInfo> TopLevelFuncs;
+ TopLevelFuncs.emplace_back(std::move(Funcs.front()));
+
+ for (size_t Idx = 1; Idx < Funcs.size(); ++Idx) {
+ FunctionInfo &TopFunc = TopLevelFuncs.back();
+ FunctionInfo &MatchFunc = Funcs[Idx];
+ if (TopFunc.Range == MatchFunc.Range) {
+ if (!TopFunc.MergedFunctions)
+ TopFunc.MergedFunctions = MergedFunctionsInfo();
+ else if (TopFunc.MergedFunctions->MergedFunctions.back() == MatchFunc)
+ continue;
+ TopFunc.MergedFunctions->MergedFunctions.emplace_back(
+ std::move(MatchFunc));
+ } else
+ TopLevelFuncs.emplace_back(std::move(MatchFunc));
+ }
+
+ uint32_t mergedCount = Funcs.size() - TopLevelFuncs.size();
+ if (mergedCount != 0)
+ Out << "Have " << mergedCount
+ << " merged functions as children of other functions\n";
+
+ std::swap(Funcs, TopLevelFuncs);
+}
+
+llvm::Error GsymCreator::finalize(OutputAggregator &Out) {
+ std::lock_guard<std::mutex> Guard(Mutex);
+ if (Finalized)
+ return createStringError(std::errc::invalid_argument, "already finalized");
+ Finalized = true;
+
+ StrTab.finalizeInOrder();
+
+ const auto NumBefore = Funcs.size();
+ if (!IsSegment) {
+ if (NumBefore > 1) {
+ llvm::stable_sort(Funcs);
+ std::vector<FunctionInfo> FinalizedFuncs;
+ FinalizedFuncs.reserve(Funcs.size());
+ FinalizedFuncs.emplace_back(std::move(Funcs.front()));
+ for (size_t Idx=1; Idx < NumBefore; ++Idx) {
+ FunctionInfo &Prev = FinalizedFuncs.back();
+ FunctionInfo &Curr = Funcs[Idx];
+ const bool ranges_equal = Prev.Range == Curr.Range;
+ if (ranges_equal || Prev.Range.intersects(Curr.Range)) {
+ if (ranges_equal) {
+ if (!(Prev == Curr)) {
+ if (Prev.hasRichInfo() && Curr.hasRichInfo())
+ Out.Report(
+ "Duplicate address ranges with different debug info.",
+ [&](raw_ostream &OS) {
+ OS << "warning: same address range contains "
+ "different debug "
+ << "info. Removing:\n"
+ << Prev << "\nIn favor of this one:\n"
+ << Curr << "\n";
+ });
+ std::swap(Prev, Curr);
+ }
+ } else {
+ Out.Report("Overlapping function ranges", [&](raw_ostream &OS) {
+ OS << "warning: function ranges overlap:\n"
+ << Prev << "\n"
+ << Curr << "\n";
+ });
+ FinalizedFuncs.emplace_back(std::move(Curr));
+ }
+ } else {
+ if (Prev.Range.size() == 0 && Curr.Range.contains(Prev.Range.start())) {
+ std::swap(Prev, Curr);
+ } else {
+ FinalizedFuncs.emplace_back(std::move(Curr));
+ }
+ }
+ }
+ std::swap(Funcs, FinalizedFuncs);
+ }
+ if (!Funcs.empty() && Funcs.back().Range.size() == 0 && ValidTextRanges) {
+ if (auto Range =
+ ValidTextRanges->getRangeThatContains(Funcs.back().Range.start())) {
+ Funcs.back().Range = {Funcs.back().Range.start(), Range->end()};
+ }
+ }
+ Out << "Pruned " << NumBefore - Funcs.size() << " functions, ended with "
+ << Funcs.size() << " total\n";
+ }
+ return Error::success();
+}
+
+void GsymCreator::fixupInlineInfo(const GsymCreator &SrcGC, InlineInfo &II) {
+ II.Name = copyString(SrcGC, II.Name);
+ II.CallFile = copyFile(SrcGC, II.CallFile);
+ for (auto &ChildII: II.Children)
+ fixupInlineInfo(SrcGC, ChildII);
+}
+
+uint64_t GsymCreator::copyFunctionInfo(const GsymCreator &SrcGC,
+ size_t FuncIdx) {
+ const FunctionInfo &SrcFI = SrcGC.Funcs[FuncIdx];
+
+ FunctionInfo DstFI;
+ DstFI.Range = SrcFI.Range;
+ DstFI.Name = copyString(SrcGC, SrcFI.Name);
+ if (SrcFI.OptLineTable) {
+ DstFI.OptLineTable = LineTable(SrcFI.OptLineTable.value());
+ LineTable &DstLT = DstFI.OptLineTable.value();
+ const size_t NumLines = DstLT.size();
+ for (size_t I=0; I<NumLines; ++I) {
+ LineEntry &LE = DstLT.get(I);
+ LE.File = copyFile(SrcGC, LE.File);
+ }
+ }
+ if (SrcFI.Inline) {
+ DstFI.Inline = SrcFI.Inline.value();
+ fixupInlineInfo(SrcGC, *DstFI.Inline);
+ }
+ std::lock_guard<std::mutex> Guard(Mutex);
+ Funcs.emplace_back(DstFI);
+ return Funcs.back().cacheEncoding();
+}
+
+llvm::Error GsymCreator::saveSegments(StringRef Path,
+ llvm::endianness ByteOrder,
+ uint64_t SegmentSize) const {
+ if (SegmentSize == 0)
+ return createStringError(std::errc::invalid_argument,
+ "invalid segment size zero");
+
+ size_t FuncIdx = 0;
+ const size_t NumFuncs = Funcs.size();
+ while (FuncIdx < NumFuncs) {
+ llvm::Expected<std::unique_ptr<GsymCreator>> ExpectedGC =
+ createSegment(SegmentSize, FuncIdx);
+ if (ExpectedGC) {
+ GsymCreator *GC = ExpectedGC->get();
+ if (!GC)
+ break;
+ OutputAggregator Out(nullptr);
+ llvm::Error Err = GC->finalize(Out);
+ if (Err)
+ return Err;
+ std::string SegmentedGsymPath;
+ raw_string_ostream SGP(SegmentedGsymPath);
+ std::optional<uint64_t> FirstFuncAddr = GC->getFirstFunctionAddress();
+ if (FirstFuncAddr) {
+ SGP << Path << "-" << llvm::format_hex(*FirstFuncAddr, 1);
+ Err = GC->save(SegmentedGsymPath, ByteOrder, std::nullopt);
+ if (Err)
+ return Err;
+ }
+ } else {
+ return ExpectedGC.takeError();
+ }
+ }
+ return Error::success();
+}
+
+llvm::Expected<std::unique_ptr<GsymCreator>>
+GsymCreator::createSegment(uint64_t SegmentSize, size_t &FuncIdx) const {
+ if (FuncIdx >= Funcs.size())
+ return std::unique_ptr<GsymCreator>();
+
+ std::unique_ptr<GsymCreator> GC = createNew(/*Quiet=*/true);
+
+ GC->setIsSegment();
+
+ if (BaseAddress)
+ GC->setBaseAddress(*BaseAddress);
+ GC->setUUID(UUID);
+ const size_t NumFuncs = Funcs.size();
+ uint64_t SegmentFuncInfosSize = 0;
+ for (; FuncIdx < NumFuncs; ++FuncIdx) {
+ const uint64_t HeaderAndTableSize = GC->calculateHeaderAndTableSize();
+ if (HeaderAndTableSize + SegmentFuncInfosSize >= SegmentSize) {
+ if (SegmentFuncInfosSize == 0)
+ return createStringError(std::errc::invalid_argument,
+ "a segment size of %" PRIu64 " is to small to "
+ "fit any function infos, specify a larger value",
+ SegmentSize);
+
+ break;
+ }
+ SegmentFuncInfosSize += alignTo(GC->copyFunctionInfo(*this, FuncIdx), 4);
+ }
+ return std::move(GC);
+}
diff --git a/llvm/lib/DebugInfo/GSYM/GsymCreatorV1.cpp b/llvm/lib/DebugInfo/GSYM/GsymCreatorV1.cpp
index a22d10b17a102..71e22d14401ca 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymCreatorV1.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymCreatorV1.cpp
@@ -8,64 +8,29 @@
#include "llvm/DebugInfo/GSYM/GsymCreatorV1.h"
#include "llvm/DebugInfo/GSYM/FileWriter.h"
#include "llvm/DebugInfo/GSYM/Header.h"
-#include "llvm/DebugInfo/GSYM/LineTable.h"
-#include "llvm/DebugInfo/GSYM/OutputAggregator.h"
-#include "llvm/MC/StringTableBuilder.h"
-#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
#include <cassert>
-#include <functional>
-#include <vector>
using namespace llvm;
using namespace gsym;
-GsymCreatorV1::GsymCreatorV1(bool Quiet)
- : StrTab(StringTableBuilder::ELF), Quiet(Quiet) {
- insertFile(StringRef());
+std::unique_ptr<GsymCreator> GsymCreatorV1::createNew(bool Quiet) const {
+ return std::make_unique<GsymCreatorV1>(Quiet);
}
-uint32_t GsymCreatorV1::insertFile(StringRef Path, llvm::sys::path::Style Style) {
- llvm::StringRef directory = llvm::sys::path::parent_path(Path, Style);
- llvm::StringRef filename = llvm::sys::path::filename(Path, Style);
- const uint32_t Dir = insertString(directory);
- const uint32_t Base = insertString(filename);
- return insertFileEntry(FileEntry(Dir, Base));
-}
-
-uint32_t GsymCreatorV1::insertFileEntry(FileEntry FE) {
- std::lock_guard<std::mutex> Guard(Mutex);
- const auto NextIndex = Files.size();
- auto R = FileEntryToIndex.insert(std::make_pair(FE, NextIndex));
- if (R.second)
- Files.emplace_back(FE);
- return R.first->second;
-}
-
-uint32_t GsymCreatorV1::copyFile(const GsymCreatorV1 &SrcGC, uint32_t FileIdx) {
- if (FileIdx == 0)
- return 0;
- const FileEntry SrcFE = SrcGC.Files[FileIdx];
- uint32_t Dir =
- SrcFE.Dir == 0
- ? 0
- : StrTab.add(SrcGC.StringOffsetMap.find(SrcFE.Dir)->second);
- uint32_t Base = StrTab.add(SrcGC.StringOffsetMap.find(SrcFE.Base)->second);
- FileEntry DstFE(Dir, Base);
- return insertFileEntry(DstFE);
+uint64_t GsymCreatorV1::calculateHeaderAndTableSize() const {
+ uint64_t Size = sizeof(Header);
+ const size_t NumFuncs = Funcs.size();
+ Size += NumFuncs * getAddressOffsetSize();
+ Size += NumFuncs * sizeof(uint32_t);
+ Size += Files.size() * sizeof(FileEntry);
+ Size += StrTab.getSize();
+ return Size;
}
-llvm::Error GsymCreatorV1::save(StringRef Path, llvm::endianness ByteOrder,
- std::optional<uint64_t> SegmentSize) const {
- if (SegmentSize)
- return saveSegments(Path, ByteOrder, *SegmentSize);
- std::error_code EC;
- raw_fd_ostream OutStrm(Path, EC);
- if (EC)
- return llvm::errorCodeToError(EC);
- FileWriter O(OutStrm, ByteOrder);
- return encode(O);
+llvm::Error GsymCreatorV1::loadCallSitesFromYAML(StringRef YAMLFile) {
+ CallSiteInfoLoader Loader(*this, Funcs);
+ return Loader.loadYAML(YAMLFile);
}
llvm::Error GsymCreatorV1::encode(FileWriter &O) const {
@@ -161,7 +126,6 @@ llvm::Error GsymCreatorV1::encode(FileWriter &O) const {
return createStringError(std::errc::invalid_argument,
"address info offset exceeded 32-bit max");
}
-
AddrInfoOffsets.push_back(Offset);
} else
return OffsetOrErr.takeError();
@@ -176,319 +140,3 @@ llvm::Error GsymCreatorV1::encode(FileWriter &O) const {
}
return ErrorSuccess();
}
-
-llvm::Error GsymCreatorV1::loadCallSitesFromYAML(StringRef YAMLFile) {
- CallSiteInfoLoader Loader(*this, Funcs);
- return Loader.loadYAML(YAMLFile);
-}
-
-void GsymCreatorV1::prepareMergedFunctions(OutputAggregator &Out) {
- if (Funcs.size() < 2)
- return;
-
- llvm::stable_sort(Funcs);
- std::vector<FunctionInfo> TopLevelFuncs;
-
- TopLevelFuncs.emplace_back(std::move(Funcs.front()));
-
- for (size_t Idx = 1; Idx < Funcs.size(); ++Idx) {
- FunctionInfo &TopFunc = TopLevelFuncs.back();
- FunctionInfo &MatchFunc = Funcs[Idx];
- if (TopFunc.Range == MatchFunc.Range) {
- if (!TopFunc.MergedFunctions)
- TopFunc.MergedFunctions = MergedFunctionsInfo();
- else if (TopFunc.MergedFunctions->MergedFunctions.back() == MatchFunc)
- continue;
- TopFunc.MergedFunctions->MergedFunctions.emplace_back(
- std::move(MatchFunc));
- } else
- TopLevelFuncs.emplace_back(std::move(MatchFunc));
- }
-
- uint32_t mergedCount = Funcs.size() - TopLevelFuncs.size();
- if (mergedCount != 0)
- Out << "Have " << mergedCount
- << " merged functions as children of other functions\n";
-
- std::swap(Funcs, TopLevelFuncs);
-}
-
-llvm::Error GsymCreatorV1::finalize(OutputAggregator &Out) {
- std::lock_guard<std::mutex> Guard(Mutex);
- if (Finalized)
- return createStringError(std::errc::invalid_argument, "already finalized");
- Finalized = true;
-
- StrTab.finalizeInOrder();
-
- const auto NumBefore = Funcs.size();
- if (!IsSegment) {
- if (NumBefore > 1) {
- llvm::stable_sort(Funcs);
- std::vector<FunctionInfo> FinalizedFuncs;
- FinalizedFuncs.reserve(Funcs.size());
- FinalizedFuncs.emplace_back(std::move(Funcs.front()));
- for (size_t Idx=1; Idx < NumBefore; ++Idx) {
- FunctionInfo &Prev = FinalizedFuncs.back();
- FunctionInfo &Curr = Funcs[Idx];
- const bool ranges_equal = Prev.Range == Curr.Range;
- if (ranges_equal || Prev.Range.intersects(Curr.Range)) {
- if (ranges_equal) {
- if (!(Prev == Curr)) {
- if (Prev.hasRichInfo() && Curr.hasRichInfo())
- Out.Report(
- "Duplicate address ranges with different debug info.",
- [&](raw_ostream &OS) {
- OS << "warning: same address range contains "
- "different debug "
- << "info. Removing:\n"
- << Prev << "\nIn favor of this one:\n"
- << Curr << "\n";
- });
-
- std::swap(Prev, Curr);
- }
- } else {
- Out.Report("Overlapping function ranges", [&](raw_ostream &OS) {
- OS << "warning: function ranges overlap:\n"
- << Prev << "\n"
- << Curr << "\n";
- });
- FinalizedFuncs.emplace_back(std::move(Curr));
- }
- } else {
- if (Prev.Range.size() == 0 && Curr.Range.contains(Prev.Range.start())) {
- std::swap(Prev, Curr);
- } else {
- FinalizedFuncs.emplace_back(std::move(Curr));
- }
- }
- }
- std::swap(Funcs, FinalizedFuncs);
- }
- if (!Funcs.empty() && Funcs.back().Range.size() == 0 && ValidTextRanges) {
- if (auto Range =
- ValidTextRanges->getRangeThatContains(Funcs.back().Range.start())) {
- Funcs.back().Range = {Funcs.back().Range.start(), Range->end()};
- }
- }
- Out << "Pruned " << NumBefore - Funcs.size() << " functions, ended with "
- << Funcs.size() << " total\n";
- }
- return Error::success();
-}
-
-uint32_t GsymCreatorV1::copyString(const GsymCreatorV1 &SrcGC, uint32_t StrOff) {
- if (StrOff == 0)
- return 0;
- return StrTab.add(SrcGC.StringOffsetMap.find(StrOff)->second);
-}
-
-uint32_t GsymCreatorV1::insertString(StringRef S, bool Copy) {
- if (S.empty())
- return 0;
-
- CachedHashStringRef CHStr(S);
- std::lock_guard<std::mutex> Guard(Mutex);
- if (Copy) {
- if (!StrTab.contains(CHStr))
- CHStr = CachedHashStringRef{StringStorage.insert(S).first->getKey(),
- CHStr.hash()};
- }
- const uint32_t StrOff = StrTab.add(CHStr);
- StringOffsetMap.try_emplace(StrOff, CHStr);
- return StrOff;
-}
-
-StringRef GsymCreatorV1::getString(uint32_t Offset) {
- auto I = StringOffsetMap.find(Offset);
- assert(I != StringOffsetMap.end() &&
- "GsymCreatorV1::getString expects a valid offset as parameter.");
- return I->second.val();
-}
-
-void GsymCreatorV1::addFunctionInfo(FunctionInfo &&FI) {
- std::lock_guard<std::mutex> Guard(Mutex);
- Funcs.emplace_back(std::move(FI));
-}
-
-void GsymCreatorV1::forEachFunctionInfo(
- std::function<bool(FunctionInfo &)> const &Callback) {
- std::lock_guard<std::mutex> Guard(Mutex);
- for (auto &FI : Funcs) {
- if (!Callback(FI))
- break;
- }
-}
-
-void GsymCreatorV1::forEachFunctionInfo(
- std::function<bool(const FunctionInfo &)> const &Callback) const {
- std::lock_guard<std::mutex> Guard(Mutex);
- for (const auto &FI : Funcs) {
- if (!Callback(FI))
- break;
- }
-}
-
-size_t GsymCreatorV1::getNumFunctionInfos() const {
- std::lock_guard<std::mutex> Guard(Mutex);
- return Funcs.size();
-}
-
-bool GsymCreatorV1::IsValidTextAddress(uint64_t Addr) const {
- if (ValidTextRanges)
- return ValidTextRanges->contains(Addr);
- return true;
-}
-
-std::optional<uint64_t> GsymCreatorV1::getFirstFunctionAddress() const {
- if ((Finalized || IsSegment) && !Funcs.empty())
- return std::optional<uint64_t>(Funcs.front().startAddress());
- return std::nullopt;
-}
-
-std::optional<uint64_t> GsymCreatorV1::getLastFunctionAddress() const {
- if ((Finalized || IsSegment) && !Funcs.empty())
- return std::optional<uint64_t>(Funcs.back().startAddress());
- return std::nullopt;
-}
-
-std::optional<uint64_t> GsymCreatorV1::getBaseAddress() const {
- if (BaseAddress)
- return BaseAddress;
- return getFirstFunctionAddress();
-}
-
-uint64_t GsymCreatorV1::getMaxAddressOffset() const {
- switch (getAddressOffsetSize()) {
- case 1: return UINT8_MAX;
- case 2: return UINT16_MAX;
- case 4: return UINT32_MAX;
- case 8: return UINT64_MAX;
- }
- llvm_unreachable("invalid address offset");
-}
-
-uint8_t GsymCreatorV1::getAddressOffsetSize() const {
- const std::optional<uint64_t> BaseAddress = getBaseAddress();
- const std::optional<uint64_t> LastFuncAddr = getLastFunctionAddress();
- if (BaseAddress && LastFuncAddr) {
- const uint64_t AddrDelta = *LastFuncAddr - *BaseAddress;
- if (AddrDelta <= UINT8_MAX)
- return 1;
- else if (AddrDelta <= UINT16_MAX)
- return 2;
- else if (AddrDelta <= UINT32_MAX)
- return 4;
- return 8;
- }
- return 1;
-}
-
-uint64_t GsymCreatorV1::calculateHeaderAndTableSize() const {
- uint64_t Size = sizeof(Header);
- const size_t NumFuncs = Funcs.size();
- Size += NumFuncs * getAddressOffsetSize();
- Size += NumFuncs * sizeof(uint32_t);
- Size += Files.size() * sizeof(FileEntry);
- Size += StrTab.getSize();
-
- return Size;
-}
-
-void GsymCreatorV1::fixupInlineInfo(const GsymCreatorV1 &SrcGC, InlineInfo &II) {
- II.Name = copyString(SrcGC, II.Name);
- II.CallFile = copyFile(SrcGC, II.CallFile);
- for (auto &ChildII: II.Children)
- fixupInlineInfo(SrcGC, ChildII);
-}
-
-uint64_t GsymCreatorV1::copyFunctionInfo(const GsymCreatorV1 &SrcGC, size_t FuncIdx) {
- const FunctionInfo &SrcFI = SrcGC.Funcs[FuncIdx];
-
- FunctionInfo DstFI;
- DstFI.Range = SrcFI.Range;
- DstFI.Name = copyString(SrcGC, SrcFI.Name);
- if (SrcFI.OptLineTable) {
- DstFI.OptLineTable = LineTable(SrcFI.OptLineTable.value());
- LineTable &DstLT = DstFI.OptLineTable.value();
- const size_t NumLines = DstLT.size();
- for (size_t I=0; I<NumLines; ++I) {
- LineEntry &LE = DstLT.get(I);
- LE.File = copyFile(SrcGC, LE.File);
- }
- }
- if (SrcFI.Inline) {
- DstFI.Inline = SrcFI.Inline.value();
- fixupInlineInfo(SrcGC, *DstFI.Inline);
- }
- std::lock_guard<std::mutex> Guard(Mutex);
- Funcs.emplace_back(DstFI);
- return Funcs.back().cacheEncoding();
-}
-
-llvm::Error GsymCreatorV1::saveSegments(StringRef Path,
- llvm::endianness ByteOrder,
- uint64_t SegmentSize) const {
- if (SegmentSize == 0)
- return createStringError(std::errc::invalid_argument,
- "invalid segment size zero");
-
- size_t FuncIdx = 0;
- const size_t NumFuncs = Funcs.size();
- while (FuncIdx < NumFuncs) {
- llvm::Expected<std::unique_ptr<GsymCreatorV1>> ExpectedGC =
- createSegment(SegmentSize, FuncIdx);
- if (ExpectedGC) {
- GsymCreatorV1 *GC = ExpectedGC->get();
- if (!GC)
- break;
- OutputAggregator Out(nullptr);
- llvm::Error Err = GC->finalize(Out);
- if (Err)
- return Err;
- std::string SegmentedGsymPath;
- raw_string_ostream SGP(SegmentedGsymPath);
- std::optional<uint64_t> FirstFuncAddr = GC->getFirstFunctionAddress();
- if (FirstFuncAddr) {
- SGP << Path << "-" << llvm::format_hex(*FirstFuncAddr, 1);
- Err = GC->save(SegmentedGsymPath, ByteOrder, std::nullopt);
- if (Err)
- return Err;
- }
- } else {
- return ExpectedGC.takeError();
- }
- }
- return Error::success();
-}
-
-llvm::Expected<std::unique_ptr<GsymCreatorV1>>
-GsymCreatorV1::createSegment(uint64_t SegmentSize, size_t &FuncIdx) const {
- if (FuncIdx >= Funcs.size())
- return std::unique_ptr<GsymCreatorV1>();
-
- std::unique_ptr<GsymCreatorV1> GC(new GsymCreatorV1(/*Quiet=*/true));
-
- GC->setIsSegment();
-
- if (BaseAddress)
- GC->setBaseAddress(*BaseAddress);
- GC->setUUID(UUID);
- const size_t NumFuncs = Funcs.size();
- uint64_t SegmentFuncInfosSize = 0;
- for (; FuncIdx < NumFuncs; ++FuncIdx) {
- const uint64_t HeaderAndTableSize = GC->calculateHeaderAndTableSize();
- if (HeaderAndTableSize + SegmentFuncInfosSize >= SegmentSize) {
- if (SegmentFuncInfosSize == 0)
- return createStringError(std::errc::invalid_argument,
- "a segment size of %" PRIu64 " is to small to "
- "fit any function infos, specify a larger value",
- SegmentSize);
-
- break;
- }
- SegmentFuncInfosSize += alignTo(GC->copyFunctionInfo(*this, FuncIdx), 4);
- }
- return std::move(GC);
-}
diff --git a/llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp b/llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp
index 7804a96f1616e..f4c512b2db5e5 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp
@@ -9,74 +9,36 @@
#include "llvm/DebugInfo/GSYM/FileWriter.h"
#include "llvm/DebugInfo/GSYM/GlobalData.h"
#include "llvm/DebugInfo/GSYM/HeaderV2.h"
-#include "llvm/DebugInfo/GSYM/LineTable.h"
-#include "llvm/DebugInfo/GSYM/OutputAggregator.h"
-#include "llvm/MC/StringTableBuilder.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
#include <cassert>
-#include <functional>
-#include <vector>
using namespace llvm;
using namespace gsym;
-GsymCreatorV2::GsymCreatorV2(bool Quiet)
- : StrTab(StringTableBuilder::ELF), Quiet(Quiet) {
- insertFile(StringRef());
+std::unique_ptr<GsymCreator> GsymCreatorV2::createNew(bool Quiet) const {
+ return std::make_unique<GsymCreatorV2>(Quiet);
}
-uint32_t GsymCreatorV2::insertFile(StringRef Path, llvm::sys::path::Style Style) {
- llvm::StringRef directory = llvm::sys::path::parent_path(Path, Style);
- llvm::StringRef filename = llvm::sys::path::filename(Path, Style);
- // We must insert the strings first, then call the FileEntry constructor.
- // If we inline the insertString() function call into the constructor, the
- // call order is undefined due to parameter lists not having any ordering
- // requirements.
- const uint32_t Dir = insertString(directory);
- const uint32_t Base = insertString(filename);
- return insertFileEntry(FileEntry(Dir, Base));
-}
-
-uint32_t GsymCreatorV2::insertFileEntry(FileEntry FE) {
- std::lock_guard<std::mutex> Guard(Mutex);
- const auto NextIndex = Files.size();
- // Find FE in hash map and insert if not present.
- auto R = FileEntryToIndex.insert(std::make_pair(FE, NextIndex));
- if (R.second)
- Files.emplace_back(FE);
- return R.first->second;
-}
-
-uint32_t GsymCreatorV2::copyFile(const GsymCreatorV2 &SrcGC, uint32_t FileIdx) {
- // File index zero is reserved for a FileEntry with no directory and no
- // filename. Any other file and we need to copy the strings for the directory
- // and filename.
- if (FileIdx == 0)
- return 0;
- const FileEntry SrcFE = SrcGC.Files[FileIdx];
- // Copy the strings for the file and then add the newly converted file entry.
- uint32_t Dir =
- SrcFE.Dir == 0
- ? 0
- : StrTab.add(SrcGC.StringOffsetMap.find(SrcFE.Dir)->second);
- uint32_t Base = StrTab.add(SrcGC.StringOffsetMap.find(SrcFE.Base)->second);
- FileEntry DstFE(Dir, Base);
- return insertFileEntry(DstFE);
+uint64_t GsymCreatorV2::calculateHeaderAndTableSize() const {
+ constexpr uint64_t HeaderSize = 24;
+ const size_t NumFuncs = Funcs.size();
+ const uint32_t NumEntries = 5 + (UUID.empty() ? 0 : 1) + 1;
+ uint64_t Size = HeaderSize + NumEntries * 24;
+ Size = llvm::alignTo(Size, getAddressOffsetSize());
+ Size += NumFuncs * getAddressOffsetSize();
+ Size = llvm::alignTo(Size, 4);
+ Size += NumFuncs * 4;
+ Size = llvm::alignTo(Size, 4);
+ Size += 4 + Files.size() * sizeof(FileEntry);
+ Size += StrTab.getSize();
+ Size += UUID.size();
+ return Size;
}
-llvm::Error GsymCreatorV2::save(StringRef Path, llvm::endianness ByteOrder,
- std::optional<uint64_t> SegmentSize) const {
- if (SegmentSize)
- return saveSegments(Path, ByteOrder, *SegmentSize);
- std::error_code EC;
- raw_fd_ostream OutStrm(Path, EC);
- if (EC)
- return llvm::errorCodeToError(EC);
- FileWriter O(OutStrm, ByteOrder);
- return encode(O);
+llvm::Error GsymCreatorV2::loadCallSitesFromYAML(StringRef YAMLFile) {
+ return createStringError(std::errc::not_supported,
+ "call site loading not yet supported in V2");
}
/// Write a single GlobalData entry to the output stream.
@@ -122,20 +84,13 @@ llvm::Error GsymCreatorV2::encode(FileWriter &O) const {
const uint64_t FISectionSize = FIBuf.size();
const uint64_t StringTableSize = StrTab.getSize();
- // Determine StrpSize based on string table size.
const uint8_t StrpSize = (StringTableSize > UINT32_MAX) ? 8 : 4;
- // Compute number of GlobalData entries.
const bool HasUUID = !UUID.empty();
- // Sections: AddrOffsets, AddrInfoOffsets, StringTable, FileTable, FunctionInfo
- // Plus UUID if present, plus EndOfList terminator.
const uint32_t NumGlobalDataEntries = 5 + (HasUUID ? 1 : 0) + 1;
const uint64_t GlobalDataArraySize =
static_cast<uint64_t>(NumGlobalDataEntries) * 24;
- // Plan the file layout. All offsets are relative to the start of the GSYM
- // data (i.e., the start of the header). We place sections sequentially after
- // the header and GlobalData entries in a convenient order.
constexpr uint64_t HeaderSize = 24;
uint64_t CurOffset = HeaderSize + GlobalDataArraySize;
@@ -145,16 +100,14 @@ llvm::Error GsymCreatorV2::encode(FileWriter &O) const {
const uint64_t AddrOffsetsSize = Funcs.size() * AddrOffSize;
CurOffset += AddrOffsetsSize;
- // Determine AddrInfoOffSize: if the estimated end of the FunctionInfo section
- // would exceed UINT32_MAX, use 8-byte offsets.
+ // Determine AddrInfoOffSize.
uint8_t AddrInfoOffSize = 4;
{
- // Estimate conservatively with 4-byte AddrInfoOffsets.
uint64_t Est = CurOffset;
Est = llvm::alignTo(Est, 4);
- Est += Funcs.size() * 4; // AddrInfoOffsets
+ Est += Funcs.size() * 4;
Est = llvm::alignTo(Est, 4);
- Est += 4 + Files.size() * sizeof(FileEntry); // FileTable
+ Est += 4 + Files.size() * sizeof(FileEntry);
Est += StringTableSize;
Est = llvm::alignTo(Est, 4);
Est += FISectionSize;
@@ -174,7 +127,7 @@ llvm::Error GsymCreatorV2::encode(FileWriter &O) const {
const uint64_t FileTableSize = 4 + Files.size() * sizeof(FileEntry);
CurOffset += FileTableSize;
- // StringTable section (no alignment requirement).
+ // StringTable section.
const uint64_t StringTableOffset = CurOffset;
CurOffset += StringTableSize;
@@ -183,7 +136,7 @@ llvm::Error GsymCreatorV2::encode(FileWriter &O) const {
const uint64_t FISectionOffset = CurOffset;
CurOffset += FISectionSize;
- // UUID section (no alignment requirement).
+ // UUID section.
const uint64_t UUIDOffset = CurOffset;
const uint64_t UUIDSectionSize = UUID.size();
@@ -215,7 +168,6 @@ llvm::Error GsymCreatorV2::encode(FileWriter &O) const {
if (HasUUID)
writeGlobalDataEntry(O, GlobalInfoType::UUID,
UUIDOffset, UUIDSectionSize);
- // EndOfList terminator.
writeGlobalDataEntry(O, GlobalInfoType::EndOfList, 0, 0);
// Write AddrOffsets section.
@@ -234,8 +186,7 @@ llvm::Error GsymCreatorV2::encode(FileWriter &O) const {
}
}
- // Write AddrInfoOffsets section. Each entry is the absolute file offset
- // (from the start of the GSYM data) to the corresponding FunctionInfo.
+ // Write AddrInfoOffsets section.
O.alignTo(AddrInfoOffSize);
assert(O.tell() == AddrInfoOffsetsOffset);
for (uint64_t RelOff : FIRelativeOffsets) {
@@ -268,7 +219,7 @@ llvm::Error GsymCreatorV2::encode(FileWriter &O) const {
assert(O.tell() == StringTableOffset);
StrTab.write(O.get_stream());
- // Write FunctionInfo section (pre-encoded data).
+ // Write FunctionInfo section.
O.alignTo(4);
assert(O.tell() == FISectionOffset);
O.writeData(ArrayRef<uint8_t>(reinterpret_cast<const uint8_t *>(FIBuf.data()),
@@ -282,433 +233,3 @@ llvm::Error GsymCreatorV2::encode(FileWriter &O) const {
return Error::success();
}
-
-llvm::Error GsymCreatorV2::loadCallSitesFromYAML(StringRef YAMLFile) {
- // TODO: Implement V2-specific call site loading.
- return createStringError(std::errc::not_supported,
- "call site loading not yet supported in V2");
-}
-
-void GsymCreatorV2::prepareMergedFunctions(OutputAggregator &Out) {
- // Nothing to do if we have less than 2 functions.
- if (Funcs.size() < 2)
- return;
-
- // Sort the function infos by address range first, preserving input order
- llvm::stable_sort(Funcs);
- std::vector<FunctionInfo> TopLevelFuncs;
-
- // Add the first function info to the top level functions
- TopLevelFuncs.emplace_back(std::move(Funcs.front()));
-
- // Now if the next function info has the same address range as the top level,
- // then merge it into the top level function, otherwise add it to the top
- // level.
- for (size_t Idx = 1; Idx < Funcs.size(); ++Idx) {
- FunctionInfo &TopFunc = TopLevelFuncs.back();
- FunctionInfo &MatchFunc = Funcs[Idx];
- if (TopFunc.Range == MatchFunc.Range) {
- // Both have the same range - add the 2nd func as a child of the 1st func
- if (!TopFunc.MergedFunctions)
- TopFunc.MergedFunctions = MergedFunctionsInfo();
- // Avoid adding duplicate functions to MergedFunctions. Since functions
- // are already ordered within the Funcs array, we can just check equality
- // against the last function in the merged array.
- else if (TopFunc.MergedFunctions->MergedFunctions.back() == MatchFunc)
- continue;
- TopFunc.MergedFunctions->MergedFunctions.emplace_back(
- std::move(MatchFunc));
- } else
- // No match, add the function as a top-level function
- TopLevelFuncs.emplace_back(std::move(MatchFunc));
- }
-
- uint32_t mergedCount = Funcs.size() - TopLevelFuncs.size();
- // If any functions were merged, print a message about it.
- if (mergedCount != 0)
- Out << "Have " << mergedCount
- << " merged functions as children of other functions\n";
-
- std::swap(Funcs, TopLevelFuncs);
-}
-
-llvm::Error GsymCreatorV2::finalize(OutputAggregator &Out) {
- std::lock_guard<std::mutex> Guard(Mutex);
- if (Finalized)
- return createStringError(std::errc::invalid_argument, "already finalized");
- Finalized = true;
-
- // Don't let the string table indexes change by finalizing in order.
- StrTab.finalizeInOrder();
-
- // Remove duplicates function infos that have both entries from debug info
- // (DWARF or Breakpad) and entries from the SymbolTable.
- //
- // Also handle overlapping function. Usually there shouldn't be any, but they
- // can and do happen in some rare cases.
- //
- // (a) (b) (c)
- // ^ ^ ^ ^
- // |X |Y |X ^ |X
- // | | | |Y | ^
- // | | | v v |Y
- // v v v v
- //
- // In (a) and (b), Y is ignored and X will be reported for the full range.
- // In (c), both functions will be included in the result and lookups for an
- // address in the intersection will return Y because of binary search.
- //
- // Note that in case of (b), we cannot include Y in the result because then
- // we wouldn't find any function for range (end of Y, end of X)
- // with binary search
-
- const auto NumBefore = Funcs.size();
- // Only sort and unique if this isn't a segment. If this is a segment we
- // already finalized the main GsymCreatorV2 with all of the function infos
- // and then the already sorted and uniqued function infos were added to this
- // object.
- if (!IsSegment) {
- if (NumBefore > 1) {
- // Sort function infos so we can emit sorted functions. Use stable sort to
- // ensure determinism.
- llvm::stable_sort(Funcs);
- std::vector<FunctionInfo> FinalizedFuncs;
- FinalizedFuncs.reserve(Funcs.size());
- FinalizedFuncs.emplace_back(std::move(Funcs.front()));
- for (size_t Idx=1; Idx < NumBefore; ++Idx) {
- FunctionInfo &Prev = FinalizedFuncs.back();
- FunctionInfo &Curr = Funcs[Idx];
- // Empty ranges won't intersect, but we still need to
- // catch the case where we have multiple symbols at the
- // same address and coalesce them.
- const bool ranges_equal = Prev.Range == Curr.Range;
- if (ranges_equal || Prev.Range.intersects(Curr.Range)) {
- // Overlapping ranges or empty identical ranges.
- if (ranges_equal) {
- // Same address range. Check if one is from debug
- // info and the other is from a symbol table. If
- // so, then keep the one with debug info. Our
- // sorting guarantees that entries with matching
- // address ranges that have debug info are last in
- // the sort.
- if (!(Prev == Curr)) {
- if (Prev.hasRichInfo() && Curr.hasRichInfo())
- Out.Report(
- "Duplicate address ranges with different debug info.",
- [&](raw_ostream &OS) {
- OS << "warning: same address range contains "
- "different debug "
- << "info. Removing:\n"
- << Prev << "\nIn favor of this one:\n"
- << Curr << "\n";
- });
-
- // We want to swap the current entry with the previous since
- // later entries with the same range always have more debug info
- // or different debug info.
- std::swap(Prev, Curr);
- }
- } else {
- Out.Report("Overlapping function ranges", [&](raw_ostream &OS) {
- // print warnings about overlaps
- OS << "warning: function ranges overlap:\n"
- << Prev << "\n"
- << Curr << "\n";
- });
- FinalizedFuncs.emplace_back(std::move(Curr));
- }
- } else {
- if (Prev.Range.size() == 0 && Curr.Range.contains(Prev.Range.start())) {
- // Symbols on macOS don't have address ranges, so if the range
- // doesn't match and the size is zero, then we replace the empty
- // symbol function info with the current one.
- std::swap(Prev, Curr);
- } else {
- FinalizedFuncs.emplace_back(std::move(Curr));
- }
- }
- }
- std::swap(Funcs, FinalizedFuncs);
- }
- // If our last function info entry doesn't have a size and if we have valid
- // text ranges, we should set the size of the last entry since any search for
- // a high address might match our last entry. By fixing up this size, we can
- // help ensure we don't cause lookups to always return the last symbol that
- // has no size when doing lookups.
- if (!Funcs.empty() && Funcs.back().Range.size() == 0 && ValidTextRanges) {
- if (auto Range =
- ValidTextRanges->getRangeThatContains(Funcs.back().Range.start())) {
- Funcs.back().Range = {Funcs.back().Range.start(), Range->end()};
- }
- }
- Out << "Pruned " << NumBefore - Funcs.size() << " functions, ended with "
- << Funcs.size() << " total\n";
- }
- return Error::success();
-}
-
-uint32_t GsymCreatorV2::copyString(const GsymCreatorV2 &SrcGC, uint32_t StrOff) {
- // String offset at zero is always the empty string, no copying needed.
- if (StrOff == 0)
- return 0;
- return StrTab.add(SrcGC.StringOffsetMap.find(StrOff)->second);
-}
-
-uint32_t GsymCreatorV2::insertString(StringRef S, bool Copy) {
- if (S.empty())
- return 0;
-
- // The hash can be calculated outside the lock.
- CachedHashStringRef CHStr(S);
- std::lock_guard<std::mutex> Guard(Mutex);
- if (Copy) {
- // We need to provide backing storage for the string if requested
- // since StringTableBuilder stores references to strings. Any string
- // that comes from a section in an object file doesn't need to be
- // copied, but any string created by code will need to be copied.
- // This allows GsymCreatorV2 to be really fast when parsing DWARF and
- // other object files as most strings don't need to be copied.
- if (!StrTab.contains(CHStr))
- CHStr = CachedHashStringRef{StringStorage.insert(S).first->getKey(),
- CHStr.hash()};
- }
- const uint32_t StrOff = StrTab.add(CHStr);
- // Save a mapping of string offsets to the cached string reference in case
- // we need to segment the GSYM file and copy string from one string table to
- // another.
- StringOffsetMap.try_emplace(StrOff, CHStr);
- return StrOff;
-}
-
-StringRef GsymCreatorV2::getString(uint32_t Offset) {
- auto I = StringOffsetMap.find(Offset);
- assert(I != StringOffsetMap.end() &&
- "GsymCreatorV2::getString expects a valid offset as parameter.");
- return I->second.val();
-}
-
-void GsymCreatorV2::addFunctionInfo(FunctionInfo &&FI) {
- std::lock_guard<std::mutex> Guard(Mutex);
- Funcs.emplace_back(std::move(FI));
-}
-
-void GsymCreatorV2::forEachFunctionInfo(
- std::function<bool(FunctionInfo &)> const &Callback) {
- std::lock_guard<std::mutex> Guard(Mutex);
- for (auto &FI : Funcs) {
- if (!Callback(FI))
- break;
- }
-}
-
-void GsymCreatorV2::forEachFunctionInfo(
- std::function<bool(const FunctionInfo &)> const &Callback) const {
- std::lock_guard<std::mutex> Guard(Mutex);
- for (const auto &FI : Funcs) {
- if (!Callback(FI))
- break;
- }
-}
-
-size_t GsymCreatorV2::getNumFunctionInfos() const {
- std::lock_guard<std::mutex> Guard(Mutex);
- return Funcs.size();
-}
-
-bool GsymCreatorV2::IsValidTextAddress(uint64_t Addr) const {
- if (ValidTextRanges)
- return ValidTextRanges->contains(Addr);
- return true; // No valid text ranges has been set, so accept all ranges.
-}
-
-std::optional<uint64_t> GsymCreatorV2::getFirstFunctionAddress() const {
- // If we have finalized then Funcs are sorted. If we are a segment then
- // Funcs will be sorted as well since function infos get added from an
- // already finalized GsymCreatorV2 object where its functions were sorted and
- // uniqued.
- if ((Finalized || IsSegment) && !Funcs.empty())
- return std::optional<uint64_t>(Funcs.front().startAddress());
- return std::nullopt;
-}
-
-std::optional<uint64_t> GsymCreatorV2::getLastFunctionAddress() const {
- // If we have finalized then Funcs are sorted. If we are a segment then
- // Funcs will be sorted as well since function infos get added from an
- // already finalized GsymCreatorV2 object where its functions were sorted and
- // uniqued.
- if ((Finalized || IsSegment) && !Funcs.empty())
- return std::optional<uint64_t>(Funcs.back().startAddress());
- return std::nullopt;
-}
-
-std::optional<uint64_t> GsymCreatorV2::getBaseAddress() const {
- if (BaseAddress)
- return BaseAddress;
- return getFirstFunctionAddress();
-}
-
-uint64_t GsymCreatorV2::getMaxAddressOffset() const {
- switch (getAddressOffsetSize()) {
- case 1: return UINT8_MAX;
- case 2: return UINT16_MAX;
- case 4: return UINT32_MAX;
- case 8: return UINT64_MAX;
- }
- llvm_unreachable("invalid address offset");
-}
-
-uint8_t GsymCreatorV2::getAddressOffsetSize() const {
- const std::optional<uint64_t> BaseAddress = getBaseAddress();
- const std::optional<uint64_t> LastFuncAddr = getLastFunctionAddress();
- if (BaseAddress && LastFuncAddr) {
- const uint64_t AddrDelta = *LastFuncAddr - *BaseAddress;
- if (AddrDelta <= UINT8_MAX)
- return 1;
- else if (AddrDelta <= UINT16_MAX)
- return 2;
- else if (AddrDelta <= UINT32_MAX)
- return 4;
- return 8;
- }
- return 1;
-}
-
-uint64_t GsymCreatorV2::calculateHeaderAndTableSize() const {
- constexpr uint64_t HeaderSize = 24;
- const size_t NumFuncs = Funcs.size();
- // GlobalData entries: 5 sections + UUID (if any) + EndOfList terminator.
- const uint32_t NumEntries = 5 + (UUID.empty() ? 0 : 1) + 1;
- uint64_t Size = HeaderSize + NumEntries * 24;
- // AddrOffsets
- Size = llvm::alignTo(Size, getAddressOffsetSize());
- Size += NumFuncs * getAddressOffsetSize();
- // AddrInfoOffsets (assume 4-byte entries for estimation)
- Size = llvm::alignTo(Size, 4);
- Size += NumFuncs * 4;
- // FileTable
- Size = llvm::alignTo(Size, 4);
- Size += 4 + Files.size() * sizeof(FileEntry);
- // StringTable
- Size += StrTab.getSize();
- // UUID
- Size += UUID.size();
- return Size;
-}
-
-// This function takes a InlineInfo class that was copy constructed from an
-// InlineInfo from the \a SrcGC and updates all members that point to strings
-// and files to point to strings and files from this GsymCreatorV2.
-void GsymCreatorV2::fixupInlineInfo(const GsymCreatorV2 &SrcGC, InlineInfo &II) {
- II.Name = copyString(SrcGC, II.Name);
- II.CallFile = copyFile(SrcGC, II.CallFile);
- for (auto &ChildII: II.Children)
- fixupInlineInfo(SrcGC, ChildII);
-}
-
-uint64_t GsymCreatorV2::copyFunctionInfo(const GsymCreatorV2 &SrcGC, size_t FuncIdx) {
- // To copy a function info we need to copy any files and strings over into
- // this GsymCreatorV2 and then copy the function info and update the string
- // table offsets to match the new offsets.
- const FunctionInfo &SrcFI = SrcGC.Funcs[FuncIdx];
-
- FunctionInfo DstFI;
- DstFI.Range = SrcFI.Range;
- DstFI.Name = copyString(SrcGC, SrcFI.Name);
- // Copy the line table if there is one.
- if (SrcFI.OptLineTable) {
- // Copy the entire line table.
- DstFI.OptLineTable = LineTable(SrcFI.OptLineTable.value());
- // Fixup all LineEntry::File entries which are indexes in the the file table
- // from SrcGC and must be converted to file indexes from this GsymCreatorV2.
- LineTable &DstLT = DstFI.OptLineTable.value();
- const size_t NumLines = DstLT.size();
- for (size_t I=0; I<NumLines; ++I) {
- LineEntry &LE = DstLT.get(I);
- LE.File = copyFile(SrcGC, LE.File);
- }
- }
- // Copy the inline information if needed.
- if (SrcFI.Inline) {
- // Make a copy of the source inline information.
- DstFI.Inline = SrcFI.Inline.value();
- // Fixup all strings and files in the copied inline information.
- fixupInlineInfo(SrcGC, *DstFI.Inline);
- }
- std::lock_guard<std::mutex> Guard(Mutex);
- Funcs.emplace_back(DstFI);
- return Funcs.back().cacheEncoding();
-}
-
-llvm::Error GsymCreatorV2::saveSegments(StringRef Path,
- llvm::endianness ByteOrder,
- uint64_t SegmentSize) const {
- if (SegmentSize == 0)
- return createStringError(std::errc::invalid_argument,
- "invalid segment size zero");
-
- size_t FuncIdx = 0;
- const size_t NumFuncs = Funcs.size();
- while (FuncIdx < NumFuncs) {
- llvm::Expected<std::unique_ptr<GsymCreatorV2>> ExpectedGC =
- createSegment(SegmentSize, FuncIdx);
- if (ExpectedGC) {
- GsymCreatorV2 *GC = ExpectedGC->get();
- if (!GC)
- break; // We had not more functions to encode.
- // Don't collect any messages at all
- OutputAggregator Out(nullptr);
- llvm::Error Err = GC->finalize(Out);
- if (Err)
- return Err;
- std::string SegmentedGsymPath;
- raw_string_ostream SGP(SegmentedGsymPath);
- std::optional<uint64_t> FirstFuncAddr = GC->getFirstFunctionAddress();
- if (FirstFuncAddr) {
- SGP << Path << "-" << llvm::format_hex(*FirstFuncAddr, 1);
- Err = GC->save(SegmentedGsymPath, ByteOrder, std::nullopt);
- if (Err)
- return Err;
- }
- } else {
- return ExpectedGC.takeError();
- }
- }
- return Error::success();
-}
-
-llvm::Expected<std::unique_ptr<GsymCreatorV2>>
-GsymCreatorV2::createSegment(uint64_t SegmentSize, size_t &FuncIdx) const {
- // No function entries, return empty unique pointer
- if (FuncIdx >= Funcs.size())
- return std::unique_ptr<GsymCreatorV2>();
-
- std::unique_ptr<GsymCreatorV2> GC(new GsymCreatorV2(/*Quiet=*/true));
-
- // Tell the creator that this is a segment.
- GC->setIsSegment();
-
- // Set the base address if there is one.
- if (BaseAddress)
- GC->setBaseAddress(*BaseAddress);
- // Copy the UUID value from this object into the new creator.
- GC->setUUID(UUID);
- const size_t NumFuncs = Funcs.size();
- // Track how big the function infos are for the current segment so we can
- // emit segments that are close to the requested size. It is quick math to
- // determine the current header and tables sizes, so we can do that each loop.
- uint64_t SegmentFuncInfosSize = 0;
- for (; FuncIdx < NumFuncs; ++FuncIdx) {
- const uint64_t HeaderAndTableSize = GC->calculateHeaderAndTableSize();
- if (HeaderAndTableSize + SegmentFuncInfosSize >= SegmentSize) {
- if (SegmentFuncInfosSize == 0)
- return createStringError(std::errc::invalid_argument,
- "a segment size of %" PRIu64 " is to small to "
- "fit any function infos, specify a larger value",
- SegmentSize);
-
- break;
- }
- SegmentFuncInfosSize += alignTo(GC->copyFunctionInfo(*this, FuncIdx), 4);
- }
- return std::move(GC);
-}
diff --git a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
index a94ce824249d4..104059debd2bf 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
@@ -8,18 +8,29 @@
#include "llvm/DebugInfo/GSYM/GsymReader.h"
+#include <assert.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+
#include "llvm/DebugInfo/GSYM/GsymReaderV1.h"
#include "llvm/DebugInfo/GSYM/GsymReaderV2.h"
#include "llvm/DebugInfo/GSYM/Header.h"
#include "llvm/DebugInfo/GSYM/HeaderV2.h"
+#include "llvm/DebugInfo/GSYM/InlineInfo.h"
+#include "llvm/DebugInfo/GSYM/LineTable.h"
#include "llvm/Support/MemoryBuffer.h"
using namespace llvm;
using namespace gsym;
+GsymReader::GsymReader(std::unique_ptr<MemoryBuffer> Buffer)
+ : MemBuffer(std::move(Buffer)), Endian(llvm::endianness::native) {}
+
+GsymReader::GsymReader(GsymReader &&RHS) = default;
+
/// Detect the GSYM version from raw bytes.
static Expected<uint16_t> detectVersion(StringRef Data) {
- // Need at least 6 bytes: 4 (magic) + 2 (version).
if (Data.size() < 6)
return createStringError(std::errc::invalid_argument,
"data too small to be a GSYM file");
@@ -72,3 +83,278 @@ GsymReader::copyBuffer(StringRef Bytes) {
return R.takeError();
return std::make_unique<GsymReaderV1>(std::move(*R));
}
+
+std::optional<uint64_t> GsymReader::getAddress(size_t Index) const {
+ switch (CachedAddrOffSize) {
+ case 1: return addressForIndex<uint8_t>(Index);
+ case 2: return addressForIndex<uint16_t>(Index);
+ case 4: return addressForIndex<uint32_t>(Index);
+ case 8: return addressForIndex<uint64_t>(Index);
+ }
+ return std::nullopt;
+}
+
+std::optional<uint64_t> GsymReader::getAddressInfoOffset(size_t Index) const {
+ if (Index < AddrInfoOffsets.size())
+ return AddrInfoOffsets[Index];
+ return std::nullopt;
+}
+
+Expected<uint64_t>
+GsymReader::getAddressIndex(const uint64_t Addr) const {
+ if (Addr >= CachedBaseAddress) {
+ const uint64_t AddrOffset = Addr - CachedBaseAddress;
+ std::optional<uint64_t> AddrOffsetIndex;
+ switch (CachedAddrOffSize) {
+ case 1:
+ AddrOffsetIndex = getAddressOffsetIndex<uint8_t>(AddrOffset);
+ break;
+ case 2:
+ AddrOffsetIndex = getAddressOffsetIndex<uint16_t>(AddrOffset);
+ break;
+ case 4:
+ AddrOffsetIndex = getAddressOffsetIndex<uint32_t>(AddrOffset);
+ break;
+ case 8:
+ AddrOffsetIndex = getAddressOffsetIndex<uint64_t>(AddrOffset);
+ break;
+ default:
+ return createStringError(std::errc::invalid_argument,
+ "unsupported address offset size %u",
+ CachedAddrOffSize);
+ }
+ if (AddrOffsetIndex)
+ return *AddrOffsetIndex;
+ }
+ return createStringError(std::errc::invalid_argument,
+ "address 0x%" PRIx64 " is not in GSYM", Addr);
+}
+
+llvm::Expected<DataExtractor>
+GsymReader::getFunctionInfoDataForAddress(uint64_t Addr,
+ uint64_t &FuncStartAddr) const {
+ Expected<uint64_t> ExpectedAddrIdx = getAddressIndex(Addr);
+ if (!ExpectedAddrIdx)
+ return ExpectedAddrIdx.takeError();
+ const uint64_t FirstAddrIdx = *ExpectedAddrIdx;
+ std::optional<uint64_t> FirstFuncStartAddr;
+ const size_t NumAddresses = getNumAddresses();
+ for (uint64_t AddrIdx = FirstAddrIdx; AddrIdx < NumAddresses; ++AddrIdx) {
+ auto ExpextedData = getFunctionInfoDataAtIndex(AddrIdx, FuncStartAddr);
+ if (!ExpextedData)
+ return ExpextedData;
+
+ if (FirstFuncStartAddr.has_value()) {
+ if (*FirstFuncStartAddr != FuncStartAddr)
+ break;
+ } else {
+ FirstFuncStartAddr = FuncStartAddr;
+ }
+
+ uint64_t Offset = 0;
+ uint32_t FuncSize = ExpextedData->getU32(&Offset);
+ if (FuncSize == 0 ||
+ AddressRange(FuncStartAddr, FuncStartAddr + FuncSize).contains(Addr))
+ return ExpextedData;
+ }
+ return createStringError(std::errc::invalid_argument,
+ "address 0x%" PRIx64 " is not in GSYM", Addr);
+}
+
+llvm::Expected<DataExtractor>
+GsymReader::getFunctionInfoDataAtIndex(uint64_t AddrIdx,
+ uint64_t &FuncStartAddr) const {
+ if (AddrIdx >= getNumAddresses())
+ return createStringError(std::errc::invalid_argument,
+ "invalid address index %" PRIu64, AddrIdx);
+ const uint32_t AddrInfoOffset = AddrInfoOffsets[AddrIdx];
+ assert((Endian == endianness::big || Endian == endianness::little) &&
+ "Endian must be either big or little");
+ StringRef Bytes = MemBuffer->getBuffer().substr(AddrInfoOffset);
+ if (Bytes.empty())
+ return createStringError(std::errc::invalid_argument,
+ "invalid address info offset 0x%" PRIx32,
+ AddrInfoOffset);
+ std::optional<uint64_t> OptFuncStartAddr = getAddress(AddrIdx);
+ if (!OptFuncStartAddr)
+ return createStringError(std::errc::invalid_argument,
+ "failed to extract address[%" PRIu64 "]", AddrIdx);
+ FuncStartAddr = *OptFuncStartAddr;
+ return DataExtractor(Bytes, Endian == llvm::endianness::little, 4);
+}
+
+llvm::Expected<FunctionInfo> GsymReader::getFunctionInfo(uint64_t Addr) const {
+ uint64_t FuncStartAddr = 0;
+ if (auto ExpectedData = getFunctionInfoDataForAddress(Addr, FuncStartAddr))
+ return FunctionInfo::decode(*ExpectedData, FuncStartAddr);
+ else
+ return ExpectedData.takeError();
+}
+
+llvm::Expected<FunctionInfo>
+GsymReader::getFunctionInfoAtIndex(uint64_t Idx) const {
+ uint64_t FuncStartAddr = 0;
+ if (auto ExpectedData = getFunctionInfoDataAtIndex(Idx, FuncStartAddr))
+ return FunctionInfo::decode(*ExpectedData, FuncStartAddr);
+ else
+ return ExpectedData.takeError();
+}
+
+llvm::Expected<LookupResult>
+GsymReader::lookup(uint64_t Addr,
+ std::optional<DataExtractor> *MergedFunctionsData) const {
+ uint64_t FuncStartAddr = 0;
+ if (auto ExpectedData = getFunctionInfoDataForAddress(Addr, FuncStartAddr))
+ return FunctionInfo::lookup(*ExpectedData, *this, FuncStartAddr, Addr,
+ MergedFunctionsData);
+ else
+ return ExpectedData.takeError();
+}
+
+llvm::Expected<std::vector<LookupResult>>
+GsymReader::lookupAll(uint64_t Addr) const {
+ std::vector<LookupResult> Results;
+ std::optional<DataExtractor> MergedFunctionsData;
+
+ auto MainResult = lookup(Addr, &MergedFunctionsData);
+ if (!MainResult)
+ return MainResult.takeError();
+
+ Results.push_back(std::move(*MainResult));
+
+ if (MergedFunctionsData) {
+ auto ExpectedMergedFuncExtractors =
+ MergedFunctionsInfo::getFuncsDataExtractors(*MergedFunctionsData);
+ if (!ExpectedMergedFuncExtractors)
+ return ExpectedMergedFuncExtractors.takeError();
+
+ for (DataExtractor &MergedData : *ExpectedMergedFuncExtractors) {
+ if (auto FI = FunctionInfo::lookup(MergedData, *this,
+ MainResult->FuncRange.start(), Addr)) {
+ Results.push_back(std::move(*FI));
+ } else {
+ return FI.takeError();
+ }
+ }
+ }
+
+ return Results;
+}
+
+void GsymReader::dump(raw_ostream &OS, const FunctionInfo &FI,
+ uint32_t Indent) {
+ OS.indent(Indent);
+ OS << FI.Range << " \"" << getString(FI.Name) << "\"\n";
+ if (FI.OptLineTable)
+ dump(OS, *FI.OptLineTable, Indent);
+ if (FI.Inline)
+ dump(OS, *FI.Inline, Indent);
+ if (FI.CallSites)
+ dump(OS, *FI.CallSites, Indent);
+ if (FI.MergedFunctions) {
+ assert(Indent == 0 && "MergedFunctionsInfo should only exist at top level");
+ dump(OS, *FI.MergedFunctions);
+ }
+}
+
+void GsymReader::dump(raw_ostream &OS, const MergedFunctionsInfo &MFI) {
+ for (uint32_t inx = 0; inx < MFI.MergedFunctions.size(); inx++) {
+ OS << "++ Merged FunctionInfos[" << inx << "]:\n";
+ dump(OS, MFI.MergedFunctions[inx], 4);
+ }
+}
+
+void GsymReader::dump(raw_ostream &OS, const CallSiteInfo &CSI) {
+ OS << HEX16(CSI.ReturnOffset);
+
+ std::string Flags;
+ auto addFlag = [&](const char *Flag) {
+ if (!Flags.empty())
+ Flags += " | ";
+ Flags += Flag;
+ };
+
+ if (CSI.Flags == CallSiteInfo::Flags::None)
+ Flags = "None";
+ else {
+ if (CSI.Flags & CallSiteInfo::Flags::InternalCall)
+ addFlag("InternalCall");
+ if (CSI.Flags & CallSiteInfo::Flags::ExternalCall)
+ addFlag("ExternalCall");
+ }
+ OS << " Flags[" << Flags << "]";
+
+ if (!CSI.MatchRegex.empty()) {
+ OS << " MatchRegex[";
+ for (uint32_t i = 0; i < CSI.MatchRegex.size(); ++i) {
+ if (i > 0)
+ OS << ";";
+ OS << getString(CSI.MatchRegex[i]);
+ }
+ OS << "]";
+ }
+}
+
+void GsymReader::dump(raw_ostream &OS, const CallSiteInfoCollection &CSIC,
+ uint32_t Indent) {
+ OS.indent(Indent);
+ OS << "CallSites (by relative return offset):\n";
+ for (const auto &CS : CSIC.CallSites) {
+ OS.indent(Indent);
+ OS << " ";
+ dump(OS, CS);
+ OS << "\n";
+ }
+}
+
+void GsymReader::dump(raw_ostream &OS, const LineTable <, uint32_t Indent) {
+ OS.indent(Indent);
+ OS << "LineTable:\n";
+ for (auto &LE: LT) {
+ OS.indent(Indent);
+ OS << " " << HEX64(LE.Addr) << ' ';
+ if (LE.File)
+ dump(OS, getFile(LE.File));
+ OS << ':' << LE.Line << '\n';
+ }
+}
+
+void GsymReader::dump(raw_ostream &OS, const InlineInfo &II, uint32_t Indent) {
+ if (Indent == 0)
+ OS << "InlineInfo:\n";
+ else
+ OS.indent(Indent);
+ OS << II.Ranges << ' ' << getString(II.Name);
+ if (II.CallFile != 0) {
+ if (auto File = getFile(II.CallFile)) {
+ OS << " called from ";
+ dump(OS, File);
+ OS << ':' << II.CallLine;
+ }
+ }
+ OS << '\n';
+ for (const auto &ChildII: II.Children)
+ dump(OS, ChildII, Indent + 2);
+}
+
+void GsymReader::dump(raw_ostream &OS, std::optional<FileEntry> FE) {
+ if (FE) {
+ if (FE->Dir == 0 && FE->Base == 0)
+ return;
+ StringRef Dir = getString(FE->Dir);
+ StringRef Base = getString(FE->Base);
+ if (!Dir.empty()) {
+ OS << Dir;
+ if (Dir.contains('\\') && !Dir.contains('/'))
+ OS << '\\';
+ else
+ OS << '/';
+ }
+ if (!Base.empty()) {
+ OS << Base;
+ }
+ if (!Dir.empty() || !Base.empty())
+ return;
+ }
+ OS << "<invalid-file>";
+}
diff --git a/llvm/lib/DebugInfo/GSYM/GsymReaderV1.cpp b/llvm/lib/DebugInfo/GSYM/GsymReaderV1.cpp
index 4e6264f352a18..5d7c371f9cf81 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymReaderV1.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymReaderV1.cpp
@@ -10,12 +10,8 @@
#include <assert.h>
#include <inttypes.h>
-#include <stdio.h>
-#include <stdlib.h>
#include "llvm/DebugInfo/GSYM/Header.h"
-#include "llvm/DebugInfo/GSYM/InlineInfo.h"
-#include "llvm/DebugInfo/GSYM/LineTable.h"
#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/DataExtractor.h"
#include "llvm/Support/MemoryBuffer.h"
@@ -24,14 +20,12 @@ using namespace llvm;
using namespace gsym;
GsymReaderV1::GsymReaderV1(std::unique_ptr<MemoryBuffer> Buffer)
- : MemBuffer(std::move(Buffer)), Endian(llvm::endianness::native) {}
+ : GsymReader(std::move(Buffer)) {}
GsymReaderV1::GsymReaderV1(GsymReaderV1 &&RHS) = default;
-
GsymReaderV1::~GsymReaderV1() = default;
llvm::Expected<GsymReaderV1> GsymReaderV1::openFile(StringRef Filename) {
- // Open the input file and return an appropriate error if needed.
ErrorOr<std::unique_ptr<MemoryBuffer>> BuffOrErr =
MemoryBuffer::getFileOrSTDIN(Filename);
auto Err = BuffOrErr.getError();
@@ -41,28 +35,23 @@ llvm::Expected<GsymReaderV1> GsymReaderV1::openFile(StringRef Filename) {
}
llvm::Expected<GsymReaderV1> GsymReaderV1::copyBuffer(StringRef Bytes) {
- auto MemBuffer = MemoryBuffer::getMemBufferCopy(Bytes, "GSYM bytes");
- return create(MemBuffer);
+ auto MB = MemoryBuffer::getMemBufferCopy(Bytes, "GSYM bytes");
+ return create(MB);
}
-llvm::Expected<llvm::gsym::GsymReaderV1>
-GsymReaderV1::create(std::unique_ptr<MemoryBuffer> &MemBuffer) {
- if (!MemBuffer)
+llvm::Expected<GsymReaderV1>
+GsymReaderV1::create(std::unique_ptr<MemoryBuffer> &MB) {
+ if (!MB)
return createStringError(std::errc::invalid_argument,
"invalid memory buffer");
- GsymReaderV1 GR(std::move(MemBuffer));
- llvm::Error Err = GR.parse();
- if (Err)
+ GsymReaderV1 GR(std::move(MB));
+ if (auto Err = GR.parse())
return std::move(Err);
return std::move(GR);
}
-llvm::Error
-GsymReaderV1::parse() {
+llvm::Error GsymReaderV1::parse() {
BinaryStreamReader FileData(MemBuffer->getBuffer(), llvm::endianness::native);
- // Check for the magic bytes. This file format is designed to be mmap'ed
- // into a process and accessed as read only. This is done for performance
- // and efficiency for symbolicating and parsing GSYM data.
if (FileData.readObject(Hdr))
return createStringError(std::errc::invalid_argument,
"not enough data for a GSYM header");
@@ -73,7 +62,6 @@ GsymReaderV1::parse() {
Endian = HostByteOrder;
break;
case GSYM_CIGAM:
- // This is a GSYM file, but not native endianness.
Endian = sys::IsBigEndianHost ? llvm::endianness::little
: llvm::endianness::big;
Swap.reset(new SwappedData);
@@ -84,7 +72,6 @@ GsymReaderV1::parse() {
}
bool DataIsLittleEndian = HostByteOrder != llvm::endianness::little;
- // Read a correctly byte swapped header if we need to.
if (Swap) {
DataExtractor Data(MemBuffer->getBuffer(), DataIsLittleEndian, 4);
if (auto ExpectedHdr = Header::decode(Data))
@@ -94,52 +81,41 @@ GsymReaderV1::parse() {
Hdr = &Swap->Hdr;
}
- // Detect errors in the header and report any that are found. If we make it
- // past this without errors, we know we have a good magic value, a supported
- // version number, verified address offset size and a valid UUID size.
if (Error Err = Hdr->checkForError())
return Err;
- if (!Swap) {
- // This is the native endianness case that is most common and optimized for
- // efficient lookups. Here we just grab pointers to the native data and
- // use ArrayRef objects to allow efficient read only access.
+ // Populate cached header values in the base class.
+ CachedBaseAddress = Hdr->BaseAddress;
+ CachedNumAddresses = Hdr->NumAddresses;
+ CachedAddrOffSize = Hdr->AddrOffSize;
- // Read the address offsets.
+ if (!Swap) {
if (FileData.padToAlignment(Hdr->AddrOffSize) ||
FileData.readArray(AddrOffsets,
Hdr->NumAddresses * Hdr->AddrOffSize))
return createStringError(std::errc::invalid_argument,
"failed to read address table");
- // Read the address info offsets.
if (FileData.padToAlignment(4) ||
FileData.readArray(AddrInfoOffsets, Hdr->NumAddresses))
return createStringError(std::errc::invalid_argument,
"failed to read address info offsets table");
- // Read the file table.
uint32_t NumFiles = 0;
if (FileData.readInteger(NumFiles) || FileData.readArray(Files, NumFiles))
return createStringError(std::errc::invalid_argument,
"failed to read file table");
- // Get the string table.
FileData.setOffset(Hdr->StrtabOffset);
if (FileData.readFixedString(StrTab.Data, Hdr->StrtabSize))
return createStringError(std::errc::invalid_argument,
"failed to read string table");
-} else {
- // This is the non native endianness case that is not common and not
- // optimized for lookups. Here we decode the important tables into local
- // storage and then set the ArrayRef objects to point to these swapped
- // copies of the read only data so lookups can be as efficient as possible.
- DataExtractor Data(MemBuffer->getBuffer(), DataIsLittleEndian, 4);
+ } else {
+ DataExtractor Data(MemBuffer->getBuffer(), DataIsLittleEndian, 4);
- // Read the address offsets.
- uint64_t Offset = alignTo(sizeof(Header), Hdr->AddrOffSize);
- Swap->AddrOffsets.resize(Hdr->NumAddresses * Hdr->AddrOffSize);
- switch (Hdr->AddrOffSize) {
+ uint64_t Offset = alignTo(sizeof(Header), Hdr->AddrOffSize);
+ Swap->AddrOffsets.resize(Hdr->NumAddresses * Hdr->AddrOffSize);
+ switch (Hdr->AddrOffSize) {
case 1:
if (!Data.getU8(&Offset, Swap->AddrOffsets.data(), Hdr->NumAddresses))
return createStringError(std::errc::invalid_argument,
@@ -168,7 +144,6 @@ GsymReaderV1::parse() {
}
AddrOffsets = ArrayRef<uint8_t>(Swap->AddrOffsets);
- // Read the address info offsets.
Offset = alignTo(Offset, 4);
Swap->AddrInfoOffsets.resize(Hdr->NumAddresses);
if (Data.getU32(&Offset, Swap->AddrInfoOffsets.data(), Hdr->NumAddresses))
@@ -176,7 +151,7 @@ GsymReaderV1::parse() {
else
return createStringError(std::errc::invalid_argument,
"failed to read address table");
- // Read the file table.
+
const uint32_t NumFiles = Data.getU32(&Offset);
if (NumFiles > 0) {
Swap->Files.resize(NumFiles);
@@ -186,7 +161,7 @@ GsymReaderV1::parse() {
return createStringError(std::errc::invalid_argument,
"failed to read file table");
}
- // Get the string table.
+
StrTab.Data = MemBuffer->getBuffer().substr(Hdr->StrtabOffset,
Hdr->StrtabSize);
if (StrTab.Data.empty())
@@ -194,7 +169,6 @@ GsymReaderV1::parse() {
"failed to read string table");
}
return Error::success();
-
}
const Header &GsymReaderV1::getHeader() const {
@@ -202,172 +176,13 @@ const Header &GsymReaderV1::getHeader() const {
return *Hdr;
}
-std::optional<uint64_t> GsymReaderV1::getAddress(size_t Index) const {
- switch (Hdr->AddrOffSize) {
- case 1: return addressForIndex<uint8_t>(Index);
- case 2: return addressForIndex<uint16_t>(Index);
- case 4: return addressForIndex<uint32_t>(Index);
- case 8: return addressForIndex<uint64_t>(Index);
- }
- return std::nullopt;
-}
-
-std::optional<uint64_t> GsymReaderV1::getAddressInfoOffset(size_t Index) const {
- const auto NumAddrInfoOffsets = AddrInfoOffsets.size();
- if (Index < NumAddrInfoOffsets)
- return AddrInfoOffsets[Index];
- return std::nullopt;
-}
-
-Expected<uint64_t>
-GsymReaderV1::getAddressIndex(const uint64_t Addr) const {
- if (Addr >= Hdr->BaseAddress) {
- const uint64_t AddrOffset = Addr - Hdr->BaseAddress;
- std::optional<uint64_t> AddrOffsetIndex;
- switch (Hdr->AddrOffSize) {
- case 1:
- AddrOffsetIndex = getAddressOffsetIndex<uint8_t>(AddrOffset);
- break;
- case 2:
- AddrOffsetIndex = getAddressOffsetIndex<uint16_t>(AddrOffset);
- break;
- case 4:
- AddrOffsetIndex = getAddressOffsetIndex<uint32_t>(AddrOffset);
- break;
- case 8:
- AddrOffsetIndex = getAddressOffsetIndex<uint64_t>(AddrOffset);
- break;
- default:
- return createStringError(std::errc::invalid_argument,
- "unsupported address offset size %u",
- Hdr->AddrOffSize);
- }
- if (AddrOffsetIndex)
- return *AddrOffsetIndex;
- }
- return createStringError(std::errc::invalid_argument,
- "address 0x%" PRIx64 " is not in GSYM", Addr);
-
-}
-
-llvm::Expected<DataExtractor>
-GsymReaderV1::getFunctionInfoDataForAddress(uint64_t Addr,
- uint64_t &FuncStartAddr) const {
- Expected<uint64_t> ExpectedAddrIdx = getAddressIndex(Addr);
- if (!ExpectedAddrIdx)
- return ExpectedAddrIdx.takeError();
- const uint64_t FirstAddrIdx = *ExpectedAddrIdx;
- std::optional<uint64_t> FirstFuncStartAddr;
- const size_t NumAddresses = getNumAddresses();
- for (uint64_t AddrIdx = FirstAddrIdx; AddrIdx < NumAddresses; ++AddrIdx) {
- auto ExpextedData = getFunctionInfoDataAtIndex(AddrIdx, FuncStartAddr);
- if (!ExpextedData)
- return ExpextedData;
-
- if (FirstFuncStartAddr.has_value()) {
- if (*FirstFuncStartAddr != FuncStartAddr)
- break;
- } else {
- FirstFuncStartAddr = FuncStartAddr;
- }
-
- uint64_t Offset = 0;
- uint32_t FuncSize = ExpextedData->getU32(&Offset);
- if (FuncSize == 0 ||
- AddressRange(FuncStartAddr, FuncStartAddr + FuncSize).contains(Addr))
- return ExpextedData;
- }
- return createStringError(std::errc::invalid_argument,
- "address 0x%" PRIx64 " is not in GSYM", Addr);
-}
-
-llvm::Expected<DataExtractor>
-GsymReaderV1::getFunctionInfoDataAtIndex(uint64_t AddrIdx,
- uint64_t &FuncStartAddr) const {
- if (AddrIdx >= getNumAddresses())
- return createStringError(std::errc::invalid_argument,
- "invalid address index %" PRIu64, AddrIdx);
- const uint32_t AddrInfoOffset = AddrInfoOffsets[AddrIdx];
- assert((Endian == endianness::big || Endian == endianness::little) &&
- "Endian must be either big or little");
- StringRef Bytes = MemBuffer->getBuffer().substr(AddrInfoOffset);
- if (Bytes.empty())
- return createStringError(std::errc::invalid_argument,
- "invalid address info offset 0x%" PRIx32,
- AddrInfoOffset);
- std::optional<uint64_t> OptFuncStartAddr = getAddress(AddrIdx);
- if (!OptFuncStartAddr)
- return createStringError(std::errc::invalid_argument,
- "failed to extract address[%" PRIu64 "]", AddrIdx);
- FuncStartAddr = *OptFuncStartAddr;
- return DataExtractor(Bytes, Endian == llvm::endianness::little, 4);
-}
-
-llvm::Expected<FunctionInfo> GsymReaderV1::getFunctionInfo(uint64_t Addr) const {
- uint64_t FuncStartAddr = 0;
- if (auto ExpectedData = getFunctionInfoDataForAddress(Addr, FuncStartAddr))
- return FunctionInfo::decode(*ExpectedData, FuncStartAddr);
- else
- return ExpectedData.takeError();
-}
-
-llvm::Expected<FunctionInfo>
-GsymReaderV1::getFunctionInfoAtIndex(uint64_t Idx) const {
- uint64_t FuncStartAddr = 0;
- if (auto ExpectedData = getFunctionInfoDataAtIndex(Idx, FuncStartAddr))
- return FunctionInfo::decode(*ExpectedData, FuncStartAddr);
- else
- return ExpectedData.takeError();
-}
-
-llvm::Expected<LookupResult>
-GsymReaderV1::lookup(uint64_t Addr,
- std::optional<DataExtractor> *MergedFunctionsData) const {
- uint64_t FuncStartAddr = 0;
- if (auto ExpectedData = getFunctionInfoDataForAddress(Addr, FuncStartAddr))
- return FunctionInfo::lookup(*ExpectedData, *this, FuncStartAddr, Addr,
- MergedFunctionsData);
- else
- return ExpectedData.takeError();
-}
-
-llvm::Expected<std::vector<LookupResult>>
-GsymReaderV1::lookupAll(uint64_t Addr) const {
- std::vector<LookupResult> Results;
- std::optional<DataExtractor> MergedFunctionsData;
-
- auto MainResult = lookup(Addr, &MergedFunctionsData);
- if (!MainResult)
- return MainResult.takeError();
-
- Results.push_back(std::move(*MainResult));
-
- if (MergedFunctionsData) {
- auto ExpectedMergedFuncExtractors =
- MergedFunctionsInfo::getFuncsDataExtractors(*MergedFunctionsData);
- if (!ExpectedMergedFuncExtractors)
- return ExpectedMergedFuncExtractors.takeError();
-
- for (DataExtractor &MergedData : *ExpectedMergedFuncExtractors) {
- if (auto FI = FunctionInfo::lookup(MergedData, *this,
- MainResult->FuncRange.start(), Addr)) {
- Results.push_back(std::move(*FI));
- } else {
- return FI.takeError();
- }
- }
- }
-
- return Results;
-}
-
void GsymReaderV1::dump(raw_ostream &OS) {
const auto &Header = getHeader();
OS << Header << "\n";
OS << "Address Table:\n";
OS << "INDEX OFFSET";
- switch (Hdr->AddrOffSize) {
+ switch (CachedAddrOffSize) {
case 1: OS << "8 "; break;
case 2: OS << "16"; break;
case 4: OS << "32"; break;
@@ -376,9 +191,9 @@ void GsymReaderV1::dump(raw_ostream &OS) {
}
OS << " (ADDRESS)\n";
OS << "====== =============================== \n";
- for (uint32_t I = 0; I < Header.NumAddresses; ++I) {
+ for (uint32_t I = 0; I < CachedNumAddresses; ++I) {
OS << format("[%4u] ", I);
- switch (Hdr->AddrOffSize) {
+ switch (CachedAddrOffSize) {
case 1: OS << HEX8(getAddrOffsets<uint8_t>()[I]); break;
case 2: OS << HEX16(getAddrOffsets<uint16_t>()[I]); break;
case 4: OS << HEX32(getAddrOffsets<uint32_t>()[I]); break;
@@ -390,7 +205,7 @@ void GsymReaderV1::dump(raw_ostream &OS) {
OS << "\nAddress Info Offsets:\n";
OS << "INDEX Offset\n";
OS << "====== ==========\n";
- for (uint32_t I = 0; I < Header.NumAddresses; ++I)
+ for (uint32_t I = 0; I < CachedNumAddresses; ++I)
OS << format("[%4u] ", I) << HEX32(AddrInfoOffsets[I]) << "\n";
OS << "\nFiles:\n";
OS << "INDEX DIRECTORY BASENAME PATH\n";
@@ -403,7 +218,7 @@ void GsymReaderV1::dump(raw_ostream &OS) {
}
OS << "\n" << StrTab << "\n";
- for (uint32_t I = 0; I < Header.NumAddresses; ++I) {
+ for (uint32_t I = 0; I < CachedNumAddresses; ++I) {
OS << "FunctionInfo @ " << HEX32(AddrInfoOffsets[I]) << ": ";
if (auto FI = getFunctionInfoAtIndex(I))
dump(OS, *FI);
@@ -411,124 +226,3 @@ void GsymReaderV1::dump(raw_ostream &OS) {
logAllUnhandledErrors(FI.takeError(), OS, "FunctionInfo:");
}
}
-
-void GsymReaderV1::dump(raw_ostream &OS, const FunctionInfo &FI,
- uint32_t Indent) {
- OS.indent(Indent);
- OS << FI.Range << " \"" << getString(FI.Name) << "\"\n";
- if (FI.OptLineTable)
- dump(OS, *FI.OptLineTable, Indent);
- if (FI.Inline)
- dump(OS, *FI.Inline, Indent);
-
- if (FI.CallSites)
- dump(OS, *FI.CallSites, Indent);
-
- if (FI.MergedFunctions) {
- assert(Indent == 0 && "MergedFunctionsInfo should only exist at top level");
- dump(OS, *FI.MergedFunctions);
- }
-}
-
-void GsymReaderV1::dump(raw_ostream &OS, const MergedFunctionsInfo &MFI) {
- for (uint32_t inx = 0; inx < MFI.MergedFunctions.size(); inx++) {
- OS << "++ Merged FunctionInfos[" << inx << "]:\n";
- dump(OS, MFI.MergedFunctions[inx], 4);
- }
-}
-
-void GsymReaderV1::dump(raw_ostream &OS, const CallSiteInfo &CSI) {
- OS << HEX16(CSI.ReturnOffset);
-
- std::string Flags;
- auto addFlag = [&](const char *Flag) {
- if (!Flags.empty())
- Flags += " | ";
- Flags += Flag;
- };
-
- if (CSI.Flags == CallSiteInfo::Flags::None)
- Flags = "None";
- else {
- if (CSI.Flags & CallSiteInfo::Flags::InternalCall)
- addFlag("InternalCall");
-
- if (CSI.Flags & CallSiteInfo::Flags::ExternalCall)
- addFlag("ExternalCall");
- }
- OS << " Flags[" << Flags << "]";
-
- if (!CSI.MatchRegex.empty()) {
- OS << " MatchRegex[";
- for (uint32_t i = 0; i < CSI.MatchRegex.size(); ++i) {
- if (i > 0)
- OS << ";";
- OS << getString(CSI.MatchRegex[i]);
- }
- OS << "]";
- }
-}
-
-void GsymReaderV1::dump(raw_ostream &OS, const CallSiteInfoCollection &CSIC,
- uint32_t Indent) {
- OS.indent(Indent);
- OS << "CallSites (by relative return offset):\n";
- for (const auto &CS : CSIC.CallSites) {
- OS.indent(Indent);
- OS << " ";
- dump(OS, CS);
- OS << "\n";
- }
-}
-
-void GsymReaderV1::dump(raw_ostream &OS, const LineTable <, uint32_t Indent) {
- OS.indent(Indent);
- OS << "LineTable:\n";
- for (auto &LE: LT) {
- OS.indent(Indent);
- OS << " " << HEX64(LE.Addr) << ' ';
- if (LE.File)
- dump(OS, getFile(LE.File));
- OS << ':' << LE.Line << '\n';
- }
-}
-
-void GsymReaderV1::dump(raw_ostream &OS, const InlineInfo &II, uint32_t Indent) {
- if (Indent == 0)
- OS << "InlineInfo:\n";
- else
- OS.indent(Indent);
- OS << II.Ranges << ' ' << getString(II.Name);
- if (II.CallFile != 0) {
- if (auto File = getFile(II.CallFile)) {
- OS << " called from ";
- dump(OS, File);
- OS << ':' << II.CallLine;
- }
- }
- OS << '\n';
- for (const auto &ChildII: II.Children)
- dump(OS, ChildII, Indent + 2);
-}
-
-void GsymReaderV1::dump(raw_ostream &OS, std::optional<FileEntry> FE) {
- if (FE) {
- if (FE->Dir == 0 && FE->Base == 0)
- return;
- StringRef Dir = getString(FE->Dir);
- StringRef Base = getString(FE->Base);
- if (!Dir.empty()) {
- OS << Dir;
- if (Dir.contains('\\') && !Dir.contains('/'))
- OS << '\\';
- else
- OS << '/';
- }
- if (!Base.empty()) {
- OS << Base;
- }
- if (!Dir.empty() || !Base.empty())
- return;
- }
- OS << "<invalid-file>";
-}
diff --git a/llvm/lib/DebugInfo/GSYM/GsymReaderV2.cpp b/llvm/lib/DebugInfo/GSYM/GsymReaderV2.cpp
index a0e54f066db37..318de3411a036 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymReaderV2.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymReaderV2.cpp
@@ -10,13 +10,8 @@
#include <assert.h>
#include <inttypes.h>
-#include <stdio.h>
-#include <stdlib.h>
#include "llvm/DebugInfo/GSYM/GlobalData.h"
-#include "llvm/DebugInfo/GSYM/InlineInfo.h"
-#include "llvm/DebugInfo/GSYM/LineTable.h"
-#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/DataExtractor.h"
#include "llvm/Support/MemoryBuffer.h"
@@ -24,14 +19,12 @@ using namespace llvm;
using namespace gsym;
GsymReaderV2::GsymReaderV2(std::unique_ptr<MemoryBuffer> Buffer)
- : MemBuffer(std::move(Buffer)), Endian(llvm::endianness::native) {}
+ : GsymReader(std::move(Buffer)) {}
GsymReaderV2::GsymReaderV2(GsymReaderV2 &&RHS) = default;
-
GsymReaderV2::~GsymReaderV2() = default;
llvm::Expected<GsymReaderV2> GsymReaderV2::openFile(StringRef Filename) {
- // Open the input file and return an appropriate error if needed.
ErrorOr<std::unique_ptr<MemoryBuffer>> BuffOrErr =
MemoryBuffer::getFileOrSTDIN(Filename);
auto Err = BuffOrErr.getError();
@@ -41,24 +34,22 @@ llvm::Expected<GsymReaderV2> GsymReaderV2::openFile(StringRef Filename) {
}
llvm::Expected<GsymReaderV2> GsymReaderV2::copyBuffer(StringRef Bytes) {
- auto MemBuffer = MemoryBuffer::getMemBufferCopy(Bytes, "GSYM bytes");
- return create(MemBuffer);
+ auto MB = MemoryBuffer::getMemBufferCopy(Bytes, "GSYM bytes");
+ return create(MB);
}
-llvm::Expected<llvm::gsym::GsymReaderV2>
-GsymReaderV2::create(std::unique_ptr<MemoryBuffer> &MemBuffer) {
- if (!MemBuffer)
+llvm::Expected<GsymReaderV2>
+GsymReaderV2::create(std::unique_ptr<MemoryBuffer> &MB) {
+ if (!MB)
return createStringError(std::errc::invalid_argument,
"invalid memory buffer");
- GsymReaderV2 GR(std::move(MemBuffer));
- llvm::Error Err = GR.parse();
- if (Err)
+ GsymReaderV2 GR(std::move(MB));
+ if (auto Err = GR.parse())
return std::move(Err);
return std::move(GR);
}
/// Helper to parse GlobalData entries and populate section offsets/sizes.
-/// Works for both native and swapped endianness paths.
static llvm::Error
parseGlobalDataEntries(DataExtractor &DE, uint64_t &Offset,
uint64_t BufSize,
@@ -77,7 +68,6 @@ parseGlobalDataEntries(DataExtractor &DE, uint64_t &Offset,
if (Type == GlobalInfoType::EndOfList)
return Error::success();
- // Validate that the section fits within the buffer.
if (FileOffset + FileSize > BufSize)
return createStringError(std::errc::invalid_argument,
"GlobalData section type %u extends beyond "
@@ -108,7 +98,6 @@ parseGlobalDataEntries(DataExtractor &DE, uint64_t &Offset,
FuncInfoSize = FileSize;
break;
case GlobalInfoType::UUID:
- // UUID is noted but not needed for lookups.
break;
case GlobalInfoType::EndOfList:
llvm_unreachable("handled above");
@@ -118,8 +107,7 @@ parseGlobalDataEntries(DataExtractor &DE, uint64_t &Offset,
"GlobalData array not terminated by EndOfList");
}
-llvm::Error
-GsymReaderV2::parse() {
+llvm::Error GsymReaderV2::parse() {
const StringRef Buf = MemBuffer->getBuffer();
const uint64_t BufSize = Buf.size();
@@ -127,7 +115,6 @@ GsymReaderV2::parse() {
return createStringError(std::errc::invalid_argument,
"not enough data for a GSYM V2 header");
- // Check magic to determine endianness.
const auto HostByteOrder = llvm::endianness::native;
uint32_t Magic;
memcpy(&Magic, Buf.data(), 4);
@@ -147,7 +134,6 @@ GsymReaderV2::parse() {
const bool IsLittleEndian = (Endian == llvm::endianness::little);
- // Decode the header.
DataExtractor DE(Buf, IsLittleEndian, 8);
if (Swap) {
auto ExpectedHdr = HeaderV2::decode(DE);
@@ -156,15 +142,19 @@ GsymReaderV2::parse() {
Swap->Hdr = *ExpectedHdr;
Hdr = &Swap->Hdr;
} else {
- // Native endianness — cast directly from the mmap'd buffer.
Hdr = reinterpret_cast<const HeaderV2 *>(Buf.data());
}
if (Error Err = Hdr->checkForError())
return Err;
+ // Populate cached header values in the base class.
+ CachedBaseAddress = Hdr->BaseAddress;
+ CachedNumAddresses = Hdr->NumAddresses;
+ CachedAddrOffSize = Hdr->AddrOffSize;
+
// Parse GlobalData entries to find section locations.
- uint64_t Offset = 24; // Fixed header size.
+ uint64_t Offset = 24;
uint64_t AddrOffsetsOff = 0, AddrOffsetsSize = 0;
uint64_t AddrInfoOffsetsOff = 0, AddrInfoOffsetsSize = 0;
uint64_t StringTableOff = 0, StringTableSize = 0;
@@ -178,7 +168,6 @@ GsymReaderV2::parse() {
FuncInfoSize))
return Err;
- // Validate required sections are present.
if (!AddrOffsetsSize)
return createStringError(std::errc::invalid_argument,
"missing AddrOffsets section");
@@ -192,20 +181,17 @@ GsymReaderV2::parse() {
return createStringError(std::errc::invalid_argument,
"missing FileTable section");
- // Validate AddrOffsets size matches header.
if (AddrOffsetsSize !=
static_cast<uint64_t>(Hdr->NumAddresses) * Hdr->AddrOffSize)
return createStringError(std::errc::invalid_argument,
"AddrOffsets section size mismatch");
- // Validate AddrInfoOffsets size matches header.
if (AddrInfoOffsetsSize !=
static_cast<uint64_t>(Hdr->NumAddresses) * Hdr->AddrInfoOffSize)
return createStringError(std::errc::invalid_argument,
"AddrInfoOffsets section size mismatch");
if (!Swap) {
- // Native endianness — point ArrayRefs directly into the buffer.
AddrOffsets = ArrayRef<uint8_t>(
reinterpret_cast<const uint8_t *>(Buf.data() + AddrOffsetsOff),
AddrOffsetsSize);
@@ -219,7 +205,6 @@ GsymReaderV2::parse() {
"8-byte AddrInfoOffsets not yet supported");
}
- // FileTable: first 4 bytes is NumFiles, then FileEntry array.
if (FileTableSize < 4)
return createStringError(std::errc::invalid_argument,
"FileTable section too small");
@@ -233,12 +218,8 @@ GsymReaderV2::parse() {
reinterpret_cast<const FileEntry *>(Buf.data() + FileTableOff + 4),
NumFiles);
- // String table.
StrTab.Data = Buf.substr(StringTableOff, StringTableSize);
} else {
- // Swapped endianness — decode into local storage.
-
- // AddrOffsets.
uint64_t AOff = AddrOffsetsOff;
Swap->AddrOffsets.resize(AddrOffsetsSize);
switch (Hdr->AddrOffSize) {
@@ -271,7 +252,6 @@ GsymReaderV2::parse() {
}
AddrOffsets = ArrayRef<uint8_t>(Swap->AddrOffsets);
- // AddrInfoOffsets.
if (Hdr->AddrInfoOffSize == 4) {
uint64_t AIOff = AddrInfoOffsetsOff;
Swap->AddrInfoOffsets.resize(Hdr->NumAddresses);
@@ -284,7 +264,6 @@ GsymReaderV2::parse() {
"8-byte AddrInfoOffsets not yet supported");
}
- // FileTable.
uint64_t FTOff = FileTableOff;
uint32_t NumFiles = DE.getU32(&FTOff);
if (NumFiles > 0) {
@@ -295,7 +274,6 @@ GsymReaderV2::parse() {
Files = ArrayRef<FileEntry>(Swap->Files);
}
- // String table — raw bytes, no swapping needed.
StrTab.Data = Buf.substr(StringTableOff, StringTableSize);
}
return Error::success();
@@ -306,172 +284,13 @@ const HeaderV2 &GsymReaderV2::getHeader() const {
return *Hdr;
}
-std::optional<uint64_t> GsymReaderV2::getAddress(size_t Index) const {
- switch (Hdr->AddrOffSize) {
- case 1: return addressForIndex<uint8_t>(Index);
- case 2: return addressForIndex<uint16_t>(Index);
- case 4: return addressForIndex<uint32_t>(Index);
- case 8: return addressForIndex<uint64_t>(Index);
- }
- return std::nullopt;
-}
-
-std::optional<uint64_t> GsymReaderV2::getAddressInfoOffset(size_t Index) const {
- const auto NumAddrInfoOffsets = AddrInfoOffsets.size();
- if (Index < NumAddrInfoOffsets)
- return AddrInfoOffsets[Index];
- return std::nullopt;
-}
-
-Expected<uint64_t>
-GsymReaderV2::getAddressIndex(const uint64_t Addr) const {
- if (Addr >= Hdr->BaseAddress) {
- const uint64_t AddrOffset = Addr - Hdr->BaseAddress;
- std::optional<uint64_t> AddrOffsetIndex;
- switch (Hdr->AddrOffSize) {
- case 1:
- AddrOffsetIndex = getAddressOffsetIndex<uint8_t>(AddrOffset);
- break;
- case 2:
- AddrOffsetIndex = getAddressOffsetIndex<uint16_t>(AddrOffset);
- break;
- case 4:
- AddrOffsetIndex = getAddressOffsetIndex<uint32_t>(AddrOffset);
- break;
- case 8:
- AddrOffsetIndex = getAddressOffsetIndex<uint64_t>(AddrOffset);
- break;
- default:
- return createStringError(std::errc::invalid_argument,
- "unsupported address offset size %u",
- Hdr->AddrOffSize);
- }
- if (AddrOffsetIndex)
- return *AddrOffsetIndex;
- }
- return createStringError(std::errc::invalid_argument,
- "address 0x%" PRIx64 " is not in GSYM", Addr);
-
-}
-
-llvm::Expected<DataExtractor>
-GsymReaderV2::getFunctionInfoDataForAddress(uint64_t Addr,
- uint64_t &FuncStartAddr) const {
- Expected<uint64_t> ExpectedAddrIdx = getAddressIndex(Addr);
- if (!ExpectedAddrIdx)
- return ExpectedAddrIdx.takeError();
- const uint64_t FirstAddrIdx = *ExpectedAddrIdx;
- std::optional<uint64_t> FirstFuncStartAddr;
- const size_t NumAddresses = getNumAddresses();
- for (uint64_t AddrIdx = FirstAddrIdx; AddrIdx < NumAddresses; ++AddrIdx) {
- auto ExpextedData = getFunctionInfoDataAtIndex(AddrIdx, FuncStartAddr);
- if (!ExpextedData)
- return ExpextedData;
-
- if (FirstFuncStartAddr.has_value()) {
- if (*FirstFuncStartAddr != FuncStartAddr)
- break;
- } else {
- FirstFuncStartAddr = FuncStartAddr;
- }
-
- uint64_t Offset = 0;
- uint32_t FuncSize = ExpextedData->getU32(&Offset);
- if (FuncSize == 0 ||
- AddressRange(FuncStartAddr, FuncStartAddr + FuncSize).contains(Addr))
- return ExpextedData;
- }
- return createStringError(std::errc::invalid_argument,
- "address 0x%" PRIx64 " is not in GSYM", Addr);
-}
-
-llvm::Expected<DataExtractor>
-GsymReaderV2::getFunctionInfoDataAtIndex(uint64_t AddrIdx,
- uint64_t &FuncStartAddr) const {
- if (AddrIdx >= getNumAddresses())
- return createStringError(std::errc::invalid_argument,
- "invalid address index %" PRIu64, AddrIdx);
- const uint32_t AddrInfoOffset = AddrInfoOffsets[AddrIdx];
- assert((Endian == endianness::big || Endian == endianness::little) &&
- "Endian must be either big or little");
- StringRef Bytes = MemBuffer->getBuffer().substr(AddrInfoOffset);
- if (Bytes.empty())
- return createStringError(std::errc::invalid_argument,
- "invalid address info offset 0x%" PRIx32,
- AddrInfoOffset);
- std::optional<uint64_t> OptFuncStartAddr = getAddress(AddrIdx);
- if (!OptFuncStartAddr)
- return createStringError(std::errc::invalid_argument,
- "failed to extract address[%" PRIu64 "]", AddrIdx);
- FuncStartAddr = *OptFuncStartAddr;
- return DataExtractor(Bytes, Endian == llvm::endianness::little, 4);
-}
-
-llvm::Expected<FunctionInfo> GsymReaderV2::getFunctionInfo(uint64_t Addr) const {
- uint64_t FuncStartAddr = 0;
- if (auto ExpectedData = getFunctionInfoDataForAddress(Addr, FuncStartAddr))
- return FunctionInfo::decode(*ExpectedData, FuncStartAddr);
- else
- return ExpectedData.takeError();
-}
-
-llvm::Expected<FunctionInfo>
-GsymReaderV2::getFunctionInfoAtIndex(uint64_t Idx) const {
- uint64_t FuncStartAddr = 0;
- if (auto ExpectedData = getFunctionInfoDataAtIndex(Idx, FuncStartAddr))
- return FunctionInfo::decode(*ExpectedData, FuncStartAddr);
- else
- return ExpectedData.takeError();
-}
-
-llvm::Expected<LookupResult>
-GsymReaderV2::lookup(uint64_t Addr,
- std::optional<DataExtractor> *MergedFunctionsData) const {
- uint64_t FuncStartAddr = 0;
- if (auto ExpectedData = getFunctionInfoDataForAddress(Addr, FuncStartAddr))
- return FunctionInfo::lookup(*ExpectedData, *this, FuncStartAddr, Addr,
- MergedFunctionsData);
- else
- return ExpectedData.takeError();
-}
-
-llvm::Expected<std::vector<LookupResult>>
-GsymReaderV2::lookupAll(uint64_t Addr) const {
- std::vector<LookupResult> Results;
- std::optional<DataExtractor> MergedFunctionsData;
-
- auto MainResult = lookup(Addr, &MergedFunctionsData);
- if (!MainResult)
- return MainResult.takeError();
-
- Results.push_back(std::move(*MainResult));
-
- if (MergedFunctionsData) {
- auto ExpectedMergedFuncExtractors =
- MergedFunctionsInfo::getFuncsDataExtractors(*MergedFunctionsData);
- if (!ExpectedMergedFuncExtractors)
- return ExpectedMergedFuncExtractors.takeError();
-
- for (DataExtractor &MergedData : *ExpectedMergedFuncExtractors) {
- if (auto FI = FunctionInfo::lookup(MergedData, *this,
- MainResult->FuncRange.start(), Addr)) {
- Results.push_back(std::move(*FI));
- } else {
- return FI.takeError();
- }
- }
- }
-
- return Results;
-}
-
void GsymReaderV2::dump(raw_ostream &OS) {
const auto &Header = getHeader();
OS << Header << "\n";
OS << "Address Table:\n";
OS << "INDEX OFFSET";
- switch (Hdr->AddrOffSize) {
+ switch (CachedAddrOffSize) {
case 1: OS << "8 "; break;
case 2: OS << "16"; break;
case 4: OS << "32"; break;
@@ -480,9 +299,9 @@ void GsymReaderV2::dump(raw_ostream &OS) {
}
OS << " (ADDRESS)\n";
OS << "====== =============================== \n";
- for (uint32_t I = 0; I < Header.NumAddresses; ++I) {
+ for (uint32_t I = 0; I < CachedNumAddresses; ++I) {
OS << format("[%4u] ", I);
- switch (Hdr->AddrOffSize) {
+ switch (CachedAddrOffSize) {
case 1: OS << HEX8(getAddrOffsets<uint8_t>()[I]); break;
case 2: OS << HEX16(getAddrOffsets<uint16_t>()[I]); break;
case 4: OS << HEX32(getAddrOffsets<uint32_t>()[I]); break;
@@ -494,7 +313,7 @@ void GsymReaderV2::dump(raw_ostream &OS) {
OS << "\nAddress Info Offsets:\n";
OS << "INDEX Offset\n";
OS << "====== ==========\n";
- for (uint32_t I = 0; I < Header.NumAddresses; ++I)
+ for (uint32_t I = 0; I < CachedNumAddresses; ++I)
OS << format("[%4u] ", I) << HEX32(AddrInfoOffsets[I]) << "\n";
OS << "\nFiles:\n";
OS << "INDEX DIRECTORY BASENAME PATH\n";
@@ -507,7 +326,7 @@ void GsymReaderV2::dump(raw_ostream &OS) {
}
OS << "\n" << StrTab << "\n";
- for (uint32_t I = 0; I < Header.NumAddresses; ++I) {
+ for (uint32_t I = 0; I < CachedNumAddresses; ++I) {
OS << "FunctionInfo @ " << HEX32(AddrInfoOffsets[I]) << ": ";
if (auto FI = getFunctionInfoAtIndex(I))
dump(OS, *FI);
@@ -515,124 +334,3 @@ void GsymReaderV2::dump(raw_ostream &OS) {
logAllUnhandledErrors(FI.takeError(), OS, "FunctionInfo:");
}
}
-
-void GsymReaderV2::dump(raw_ostream &OS, const FunctionInfo &FI,
- uint32_t Indent) {
- OS.indent(Indent);
- OS << FI.Range << " \"" << getString(FI.Name) << "\"\n";
- if (FI.OptLineTable)
- dump(OS, *FI.OptLineTable, Indent);
- if (FI.Inline)
- dump(OS, *FI.Inline, Indent);
-
- if (FI.CallSites)
- dump(OS, *FI.CallSites, Indent);
-
- if (FI.MergedFunctions) {
- assert(Indent == 0 && "MergedFunctionsInfo should only exist at top level");
- dump(OS, *FI.MergedFunctions);
- }
-}
-
-void GsymReaderV2::dump(raw_ostream &OS, const MergedFunctionsInfo &MFI) {
- for (uint32_t inx = 0; inx < MFI.MergedFunctions.size(); inx++) {
- OS << "++ Merged FunctionInfos[" << inx << "]:\n";
- dump(OS, MFI.MergedFunctions[inx], 4);
- }
-}
-
-void GsymReaderV2::dump(raw_ostream &OS, const CallSiteInfo &CSI) {
- OS << HEX16(CSI.ReturnOffset);
-
- std::string Flags;
- auto addFlag = [&](const char *Flag) {
- if (!Flags.empty())
- Flags += " | ";
- Flags += Flag;
- };
-
- if (CSI.Flags == CallSiteInfo::Flags::None)
- Flags = "None";
- else {
- if (CSI.Flags & CallSiteInfo::Flags::InternalCall)
- addFlag("InternalCall");
-
- if (CSI.Flags & CallSiteInfo::Flags::ExternalCall)
- addFlag("ExternalCall");
- }
- OS << " Flags[" << Flags << "]";
-
- if (!CSI.MatchRegex.empty()) {
- OS << " MatchRegex[";
- for (uint32_t i = 0; i < CSI.MatchRegex.size(); ++i) {
- if (i > 0)
- OS << ";";
- OS << getString(CSI.MatchRegex[i]);
- }
- OS << "]";
- }
-}
-
-void GsymReaderV2::dump(raw_ostream &OS, const CallSiteInfoCollection &CSIC,
- uint32_t Indent) {
- OS.indent(Indent);
- OS << "CallSites (by relative return offset):\n";
- for (const auto &CS : CSIC.CallSites) {
- OS.indent(Indent);
- OS << " ";
- dump(OS, CS);
- OS << "\n";
- }
-}
-
-void GsymReaderV2::dump(raw_ostream &OS, const LineTable <, uint32_t Indent) {
- OS.indent(Indent);
- OS << "LineTable:\n";
- for (auto &LE: LT) {
- OS.indent(Indent);
- OS << " " << HEX64(LE.Addr) << ' ';
- if (LE.File)
- dump(OS, getFile(LE.File));
- OS << ':' << LE.Line << '\n';
- }
-}
-
-void GsymReaderV2::dump(raw_ostream &OS, const InlineInfo &II, uint32_t Indent) {
- if (Indent == 0)
- OS << "InlineInfo:\n";
- else
- OS.indent(Indent);
- OS << II.Ranges << ' ' << getString(II.Name);
- if (II.CallFile != 0) {
- if (auto File = getFile(II.CallFile)) {
- OS << " called from ";
- dump(OS, File);
- OS << ':' << II.CallLine;
- }
- }
- OS << '\n';
- for (const auto &ChildII: II.Children)
- dump(OS, ChildII, Indent + 2);
-}
-
-void GsymReaderV2::dump(raw_ostream &OS, std::optional<FileEntry> FE) {
- if (FE) {
- if (FE->Dir == 0 && FE->Base == 0)
- return;
- StringRef Dir = getString(FE->Dir);
- StringRef Base = getString(FE->Base);
- if (!Dir.empty()) {
- OS << Dir;
- if (Dir.contains('\\') && !Dir.contains('/'))
- OS << '\\';
- else
- OS << '/';
- }
- if (!Base.empty()) {
- OS << Base;
- }
- if (!Dir.empty() || !Base.empty())
- return;
- }
- OS << "<invalid-file>";
-}
diff --git a/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp b/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp
index 5066d1491c12e..2d95ad51802f5 100644
--- a/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp
+++ b/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp
@@ -2501,7 +2501,7 @@ static void AddFunctionInfo(GsymCreatorV1 &GC, const char *FuncName,
// Finalize a GsymCreatorV1, encode it and decode it and return the error or
// GsymReaderV1 that was successfully decoded.
-static Expected<GsymReaderV1> FinalizeEncodeAndDecode(GsymCreatorV1 &GC) {
+static Expected<GsymReaderV1> FinalizeEncodeAndDecode(GsymCreator &GC) {
OutputAggregator Null(nullptr);
Error FinalizeErr = GC.finalize(Null);
if (FinalizeErr)
@@ -2548,7 +2548,7 @@ TEST(GSYMTest, TestGsymSegmenting) {
size_t FuncIdx = 0;
// Make sure we get an error if the segment size is too small to encode a
// single function info.
- llvm::Expected<std::unique_ptr<GsymCreatorV1>> GCError =
+ llvm::Expected<std::unique_ptr<GsymCreator>> GCError =
GC.createSegment(57, FuncIdx);
ASSERT_FALSE((bool)GCError);
checkError("a segment size of 57 is to small to fit any function infos, "
@@ -2557,25 +2557,25 @@ TEST(GSYMTest, TestGsymSegmenting) {
// encode any values into the segmented GsymCreatorV1.
ASSERT_EQ(FuncIdx, (size_t)0);
- llvm::Expected<std::unique_ptr<GsymCreatorV1>> GC1000 =
+ llvm::Expected<std::unique_ptr<GsymCreator>> GC1000 =
GC.createSegment(128, FuncIdx);
ASSERT_THAT_EXPECTED(GC1000, Succeeded());
ASSERT_EQ(FuncIdx, (size_t)1);
- llvm::Expected<std::unique_ptr<GsymCreatorV1>> GC2000 =
+ llvm::Expected<std::unique_ptr<GsymCreator>> GC2000 =
GC.createSegment(128, FuncIdx);
ASSERT_THAT_EXPECTED(GC2000, Succeeded());
ASSERT_EQ(FuncIdx, (size_t)2);
- llvm::Expected<std::unique_ptr<GsymCreatorV1>> GC3000 =
+ llvm::Expected<std::unique_ptr<GsymCreator>> GC3000 =
GC.createSegment(128, FuncIdx);
ASSERT_THAT_EXPECTED(GC3000, Succeeded());
ASSERT_EQ(FuncIdx, (size_t)3);
- llvm::Expected<std::unique_ptr<GsymCreatorV1>> GC4000 =
+ llvm::Expected<std::unique_ptr<GsymCreator>> GC4000 =
GC.createSegment(128, FuncIdx);
ASSERT_THAT_EXPECTED(GC4000, Succeeded());
ASSERT_EQ(FuncIdx, (size_t)4);
// When there are no function infos left to encode we expect to get no error
// and get a NULL GsymCreatorV1 in the return value from createSegment.
- llvm::Expected<std::unique_ptr<GsymCreatorV1>> GCNull =
+ llvm::Expected<std::unique_ptr<GsymCreator>> GCNull =
GC.createSegment(128, FuncIdx);
ASSERT_THAT_EXPECTED(GCNull, Succeeded());
ASSERT_TRUE(GC1000.get() != nullptr);
@@ -2699,7 +2699,7 @@ TEST(GSYMTest, TestGsymSegmentingNoBase) {
size_t FuncIdx = 0;
// Make sure we get an error if the segment size is too small to encode a
// single function info.
- llvm::Expected<std::unique_ptr<GsymCreatorV1>> GCError =
+ llvm::Expected<std::unique_ptr<GsymCreator>> GCError =
GC.createSegment(57, FuncIdx);
ASSERT_FALSE((bool)GCError);
checkError("a segment size of 57 is to small to fit any function infos, "
@@ -2708,25 +2708,25 @@ TEST(GSYMTest, TestGsymSegmentingNoBase) {
// encode any values into the segmented GsymCreatorV1.
ASSERT_EQ(FuncIdx, (size_t)0);
- llvm::Expected<std::unique_ptr<GsymCreatorV1>> GC1000 =
+ llvm::Expected<std::unique_ptr<GsymCreator>> GC1000 =
GC.createSegment(128, FuncIdx);
ASSERT_THAT_EXPECTED(GC1000, Succeeded());
ASSERT_EQ(FuncIdx, (size_t)1);
- llvm::Expected<std::unique_ptr<GsymCreatorV1>> GC2000 =
+ llvm::Expected<std::unique_ptr<GsymCreator>> GC2000 =
GC.createSegment(128, FuncIdx);
ASSERT_THAT_EXPECTED(GC2000, Succeeded());
ASSERT_EQ(FuncIdx, (size_t)2);
- llvm::Expected<std::unique_ptr<GsymCreatorV1>> GC3000 =
+ llvm::Expected<std::unique_ptr<GsymCreator>> GC3000 =
GC.createSegment(128, FuncIdx);
ASSERT_THAT_EXPECTED(GC3000, Succeeded());
ASSERT_EQ(FuncIdx, (size_t)3);
- llvm::Expected<std::unique_ptr<GsymCreatorV1>> GC4000 =
+ llvm::Expected<std::unique_ptr<GsymCreator>> GC4000 =
GC.createSegment(128, FuncIdx);
ASSERT_THAT_EXPECTED(GC4000, Succeeded());
ASSERT_EQ(FuncIdx, (size_t)4);
// When there are no function infos left to encode we expect to get no error
// and get a NULL GsymCreatorV1 in the return value from createSegment.
- llvm::Expected<std::unique_ptr<GsymCreatorV1>> GCNull =
+ llvm::Expected<std::unique_ptr<GsymCreator>> GCNull =
GC.createSegment(128, FuncIdx);
ASSERT_THAT_EXPECTED(GCNull, Succeeded());
ASSERT_TRUE(GC1000.get() != nullptr);
>From 41d4a43e1b0baad6b7154ee6262fc1a17d4f85f3 Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Thu, 26 Mar 2026 14:39:23 -0700
Subject: [PATCH 24/45] Restore original documentation to GsymReader.h and
GsymCreator.h
Move shared V1/V2 code to base classes and restore all original
doc comments from the base commit (2632ffeab1b4). Only adjusted
wording where the original text no longer applied (e.g., class doc
now mentions subclasses and version-specific parsing/encoding).
User prompts since last commit:
1. "Comparing to the base commit (2632ffeab1b4), it seems we are
losing a lot of the documentation from the original GsymReader.h
and GsymCreator.h. Can you add back those documentation
(copy/paste), and only make changes when the original wording is
no longer true?"
2. "memorize and commit"
---
.../include/llvm/DebugInfo/GSYM/GsymCreator.h | 375 +++++++++++++++++-
llvm/include/llvm/DebugInfo/GSYM/GsymReader.h | 273 ++++++++++++-
2 files changed, 636 insertions(+), 12 deletions(-)
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h b/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h
index eabe4089ef206..541703f80a4e9 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h
@@ -31,22 +31,120 @@ namespace gsym {
class FileWriter;
class OutputAggregator;
-/// GsymCreator is the base class for creating GSYM data.
+/// GsymCreator is used to emit GSYM data to a stand alone file or section
+/// within a file.
///
/// The GsymCreator is designed to be used in 3 stages:
/// - Create FunctionInfo objects and add them
/// - Finalize the GsymCreator object
/// - Save to file or section
///
+/// The first stage involves creating FunctionInfo objects from another source
+/// of information like compiler debug info metadata, DWARF or Breakpad files.
+/// Any strings in the FunctionInfo or contained information, like InlineInfo
+/// or LineTable objects, should get the string table offsets by calling
+/// GsymCreator::insertString(...). Any file indexes that are needed should be
+/// obtained by calling GsymCreator::insertFile(...). All of the function calls
+/// in GsymCreator are thread safe. This allows multiple threads to create and
+/// add FunctionInfo objects while parsing debug information.
+///
+/// Once all of the FunctionInfo objects have been added, the
+/// GsymCreator::finalize(...) must be called prior to saving. This function
+/// will sort the FunctionInfo objects, finalize the string table, and do any
+/// other passes on the information needed to prepare the information to be
+/// saved.
+///
+/// Once the object has been finalized, it can be saved to a file or section.
+///
/// This base class contains all shared state and logic. Subclasses
-/// (GsymCreatorV1, GsymCreatorV2) implement version-specific encoding.
+/// (GsymCreatorV1, GsymCreatorV2) implement version-specific encoding via
+/// encode() and calculateHeaderAndTableSize().
+///
+/// ENCODING
+///
+/// GSYM files are designed to be memory mapped into a process as shared, read
+/// only data, and used as is.
+///
+/// The GSYM file format when in a stand alone file consists of:
+/// - Header
+/// - Address Table
+/// - Function Info Offsets
+/// - File Table
+/// - String Table
+/// - Function Info Data
+///
+/// HEADER
+///
+/// The header is fully described in "llvm/DebugInfo/GSYM/Header.h".
+///
+/// ADDRESS TABLE
+///
+/// The address table immediately follows the header in the file and consists
+/// of Header.NumAddresses address offsets. These offsets are sorted and can be
+/// binary searched for efficient lookups. Addresses in the address table are
+/// stored as offsets from a 64 bit base address found in Header.BaseAddress.
+/// This allows the address table to contain 8, 16, or 32 offsets. This allows
+/// the address table to not require full 64 bit addresses for each address.
+/// The resulting GSYM size is smaller and causes fewer pages to be touched
+/// during address lookups when the address table is smaller. The size of the
+/// address offsets in the address table is specified in the header in
+/// Header.AddrOffSize. The first offset in the address table is aligned to
+/// Header.AddrOffSize alignment to ensure efficient access when loaded into
+/// memory.
+///
+/// FUNCTION INFO OFFSETS TABLE
+///
+/// The function info offsets table immediately follows the address table and
+/// consists of Header.NumAddresses 32 bit file offsets: one for each address
+/// in the address table. This data is aligned to a 4 byte boundary. The
+/// offsets in this table are the relative offsets from the start offset of the
+/// GSYM header and point to the function info data for each address in the
+/// address table. Keeping this data separate from the address table helps to
+/// reduce the number of pages that are touched when address lookups occur on a
+/// GSYM file.
+///
+/// FILE TABLE
+///
+/// The file table immediately follows the function info offsets table. The
+/// encoding of the FileTable is:
+///
+/// struct FileTable {
+/// uint32_t Count;
+/// FileEntry Files[];
+/// };
+///
+/// The file table starts with a 32 bit count of the number of files that are
+/// used in all of the function info, followed by that number of FileEntry
+/// structures. The file table is aligned to a 4 byte boundary, Each file in
+/// the file table is represented with a FileEntry structure.
+/// See "llvm/DebugInfo/GSYM/FileEntry.h" for details.
+///
+/// STRING TABLE
+///
+/// The string table follows the file table in stand alone GSYM files and
+/// contains all strings for everything contained in the GSYM file. Any string
+/// data should be added to the string table and any references to strings
+/// inside GSYM information must be stored as 32 bit string table offsets into
+/// this string table. The string table always starts with an empty string at
+/// offset zero and is followed by any strings needed by the GSYM information.
+/// The start of the string table is not aligned to any boundary.
+///
+/// FUNCTION INFO DATA
+///
+/// The function info data is the payload that contains information about the
+/// address that is being looked up. It contains all of the encoded
+/// FunctionInfo objects. Each encoded FunctionInfo's data is pointed to by an
+/// entry in the Function Info Offsets Table. For details on the exact encoding
+/// of FunctionInfo objects, see "llvm/DebugInfo/GSYM/FunctionInfo.h".
class GsymCreator {
protected:
+ // Private member variables require Mutex protections
mutable std::mutex Mutex;
std::vector<FunctionInfo> Funcs;
StringTableBuilder StrTab;
StringSet<> StringStorage;
DenseMap<llvm::gsym::FileEntry, uint32_t> FileEntryToIndex;
+ // Needed for mapping string offsets back to the string stored in \a StrTab.
DenseMap<uint64_t, CachedHashStringRef> StringOffsetMap;
std::vector<llvm::gsym::FileEntry> Files;
std::vector<uint8_t> UUID;
@@ -56,81 +154,346 @@ class GsymCreator {
bool Finalized = false;
bool Quiet;
+ /// Get the first function start address.
+ ///
+ /// \returns The start address of the first FunctionInfo or std::nullopt if
+ /// there are no function infos.
LLVM_ABI std::optional<uint64_t> getFirstFunctionAddress() const;
+
+ /// Get the last function address.
+ ///
+ /// \returns The start address of the last FunctionInfo or std::nullopt if
+ /// there are no function infos.
LLVM_ABI std::optional<uint64_t> getLastFunctionAddress() const;
+
+ /// Get the base address to use for this GSYM file.
+ ///
+ /// \returns The base address to put into the header and to use when creating
+ /// the address offset table or std::nullpt if there are no valid
+ /// function infos or if the base address wasn't specified.
LLVM_ABI std::optional<uint64_t> getBaseAddress() const;
+
+ /// Get the size of an address offset in the address offset table.
+ ///
+ /// GSYM files store offsets from the base address in the address offset table
+ /// and we store the size of the address offsets in the GSYM header. This
+ /// function will calculate the size in bytes of these address offsets based
+ /// on the current contents of the GSYM file.
+ ///
+ /// \returns The size in byets of the address offsets.
LLVM_ABI uint8_t getAddressOffsetSize() const;
+
+ /// Get the maximum address offset for the current address offset size.
+ ///
+ /// This is used when creating the address offset table to ensure we have
+ /// values that are in range so we don't end up truncating address offsets
+ /// when creating GSYM files as the code evolves.
+ ///
+ /// \returns The maximum address offset value that will be encoded into a GSYM
+ /// file.
LLVM_ABI uint64_t getMaxAddressOffset() const;
+ /// Inserts a FileEntry into the file table.
+ ///
+ /// This is used to insert a file entry in a thread safe way into this object.
+ ///
+ /// \param FE A file entry object that contains valid string table offsets
+ /// from this object already.
LLVM_ABI uint32_t insertFileEntry(FileEntry FE);
+
+ /// Copy a FunctionInfo from the \a SrcGC GSYM creator into this creator.
+ ///
+ /// Copy the function info and only the needed files and strings and add a
+ /// converted FunctionInfo into this object. This is used to segment GSYM
+ /// files into separate files while only transferring the files and strings
+ /// that are needed from \a SrcGC.
+ ///
+ /// \param SrcGC The source gsym creator to copy from.
+ /// \param FuncInfoIdx The function info index within \a SrcGC to copy.
+ /// \returns The number of bytes it will take to encode the function info in
+ /// this GsymCreator. This helps calculate the size of the current GSYM
+ /// segment file.
LLVM_ABI uint64_t copyFunctionInfo(const GsymCreator &SrcGC,
size_t FuncInfoIdx);
+
+ /// Copy a string from \a SrcGC into this object.
+ ///
+ /// Copy a string from \a SrcGC by string table offset into this GSYM creator.
+ /// If a string has already been copied, the uniqued string table offset will
+ /// be returned, otherwise the string will be copied and a unique offset will
+ /// be returned.
+ ///
+ /// \param SrcGC The source gsym creator to copy from.
+ /// \param StrOff The string table offset from \a SrcGC to copy.
+ /// \returns The new string table offset of the string within this object.
LLVM_ABI uint32_t copyString(const GsymCreator &SrcGC, uint32_t StrOff);
+
+ /// Copy a file from \a SrcGC into this object.
+ ///
+ /// Copy a file from \a SrcGC by file index into this GSYM creator. Files
+ /// consist of two string table entries, one for the directory and one for the
+ /// filename, this function will copy any needed strings ensure the file is
+ /// uniqued within this object. If a file already exists in this GSYM creator
+ /// the uniqued index will be returned, else the stirngs will be copied and
+ /// the new file index will be returned.
+ ///
+ /// \param SrcGC The source gsym creator to copy from.
+ /// \param FileIdx The 1 based file table index within \a SrcGC to copy. A
+ /// file index of zero will always return zero as the zero is a reserved file
+ /// index that means no file.
+ /// \returns The new file index of the file within this object.
LLVM_ABI uint32_t copyFile(const GsymCreator &SrcGC, uint32_t FileIdx);
+
+ /// Fixup any string and file references by updating any file indexes and
+ /// strings offsets in the InlineInfo parameter.
+ ///
+ /// When copying InlineInfo entries, we can simply make a copy of the object
+ /// and then fixup the files and strings for efficiency.
+ ///
+ /// \param SrcGC The source gsym creator to copy from.
+ /// \param II The inline info that contains file indexes and string offsets
+ /// that come from \a SrcGC. The entries will be updated by coping any files
+ /// and strings over into this object.
LLVM_ABI void fixupInlineInfo(const GsymCreator &SrcGC, InlineInfo &II);
+ /// Save this GSYM file into segments that are roughly \a SegmentSize in size.
+ ///
+ /// When segemented GSYM files are saved to disk, they will use \a Path as a
+ /// prefix and then have the first function info address appended to the path
+ /// when each segment is saved. Each segmented GSYM file has a only the
+ /// strings and files that are needed to save the function infos that are in
+ /// each segment. These smaller files are easy to compress and download
+ /// separately and allow for efficient lookups with very large GSYM files and
+ /// segmenting them allows servers to download only the segments that are
+ /// needed.
+ ///
+ /// \param Path The path prefix to use when saving the GSYM files.
+ /// \param ByteOrder The endianness to use when saving the file.
+ /// \param SegmentSize The size in bytes to segment the GSYM file into.
LLVM_ABI llvm::Error saveSegments(StringRef Path,
llvm::endianness ByteOrder,
uint64_t SegmentSize) const;
+ /// Let this creator know that this is a segment of another GsymCreator.
+ ///
+ /// When we have a segment, we know that function infos will be added in
+ /// ascending address range order without having to be finalized. We also
+ /// don't need to sort and unique entries during the finalize function call.
void setIsSegment() { IsSegment = true; }
- /// Version-specific: calculate header and table sizes.
+ /// Calculate the byte size of the GSYM header and tables sizes.
+ ///
+ /// Version-specific because V1 and V2 have different header and table
+ /// layouts.
+ ///
+ /// This is used to help split GSYM files into segments.
+ ///
+ /// \returns Size in bytes the GSYM header and tables.
virtual uint64_t calculateHeaderAndTableSize() const = 0;
- /// Version-specific: create a new empty creator of the same version.
+ /// Create a new empty creator of the same version.
+ ///
+ /// Used by createSegment() to create segment creators of the correct
+ /// version type.
virtual std::unique_ptr<GsymCreator> createNew(bool Quiet) const = 0;
public:
LLVM_ABI GsymCreator(bool Quiet = false);
virtual ~GsymCreator() = default;
- /// Version-specific: encode to a FileWriter.
+ /// Encode a GSYM into the file writer stream at the current position.
+ ///
+ /// Version-specific because V1 and V2 have completely different file
+ /// layouts.
+ ///
+ /// \param O The stream to save the binary data to
+ /// \returns An error object that indicates success or failure of the save.
virtual llvm::Error encode(FileWriter &O) const = 0;
- /// Version-specific: load call site info from YAML.
+ /// Load call site information from a YAML file.
+ ///
+ /// This function reads call site information from a specified YAML file and
+ /// adds it to the GSYM data.
+ ///
+ /// \param YAMLFile The path to the YAML file containing call site
+ /// information.
virtual llvm::Error loadCallSitesFromYAML(StringRef YAMLFile) = 0;
+ /// Save a GSYM file to a stand alone file.
+ ///
+ /// \param Path The file path to save the GSYM file to.
+ /// \param ByteOrder The endianness to use when saving the file.
+ /// \param SegmentSize The size in bytes to segment the GSYM file into. If
+ /// this option is set this function will create N segments
+ /// that are all around \a SegmentSize bytes in size. This
+ /// allows a very large GSYM file to be broken up into
+ /// shards. Each GSYM file will have its own file table,
+ /// and string table that only have the files and strings
+ /// needed for the shared. If this argument has no value,
+ /// a single GSYM file that contains all function
+ /// information will be created.
+ /// \returns An error object that indicates success or failure of the save.
LLVM_ABI llvm::Error
save(StringRef Path, llvm::endianness ByteOrder,
std::optional<uint64_t> SegmentSize = std::nullopt) const;
+ /// Insert a string into the GSYM string table.
+ ///
+ /// All strings used by GSYM files must be uniqued by adding them to this
+ /// string pool and using the returned offset for any string values.
+ ///
+ /// \param S The string to insert into the string table.
+ /// \param Copy If true, then make a backing copy of the string. If false,
+ /// the string is owned by another object that will stay around
+ /// long enough for the GsymCreator to save the GSYM file.
+ /// \returns The unique 32 bit offset into the string table.
LLVM_ABI uint32_t insertString(StringRef S, bool Copy = true);
+
+ /// Retrieve a string from the GSYM string table given its offset.
+ ///
+ /// The offset is assumed to be a valid offset into the string table.
+ /// otherwise an assert will be triggered.
+ ///
+ /// \param Offset The offset of the string to retrieve, previously returned by
+ /// insertString.
+ /// \returns The string at the given offset in the string table.
LLVM_ABI StringRef getString(uint32_t Offset);
+ /// Insert a file into this GSYM creator.
+ ///
+ /// Inserts a file by adding a FileEntry into the "Files" member variable if
+ /// the file has not already been added. The file path is split into
+ /// directory and filename which are both added to the string table. This
+ /// allows paths to be stored efficiently by reusing the directories that are
+ /// common between multiple files.
+ ///
+ /// \param Path The path to the file to insert.
+ /// \param Style The path style for the "Path" parameter.
+ /// \returns The unique file index for the inserted file.
LLVM_ABI uint32_t
insertFile(StringRef Path,
sys::path::Style Style = sys::path::Style::native);
+ /// Add a function info to this GSYM creator.
+ ///
+ /// All information in the FunctionInfo object must use the
+ /// GsymCreator::insertString(...) function when creating string table
+ /// offsets for names and other strings.
+ ///
+ /// \param FI The function info object to emplace into our functions list.
LLVM_ABI void addFunctionInfo(FunctionInfo &&FI);
+
+ /// Get the current number of FunctionInfo objects contained in this
+ /// object.
LLVM_ABI size_t getNumFunctionInfos() const;
+ /// Thread safe iteration over all function infos.
+ ///
+ /// \param Callback A callback function that will get called with each
+ /// FunctionInfo. If the callback returns false, stop iterating.
LLVM_ABI void
forEachFunctionInfo(
std::function<bool(FunctionInfo &)> const &Callback);
+
+ /// Thread safe const iteration over all function infos.
+ ///
+ /// \param Callback A callback function that will get called with each
+ /// FunctionInfo. If the callback returns false, stop iterating.
LLVM_ABI void forEachFunctionInfo(
std::function<bool(const FunctionInfo &)> const &Callback) const;
+ /// Finalize the data in the GSYM creator prior to saving the data out.
+ ///
+ /// Finalize must be called after all FunctionInfo objects have been added
+ /// and before GsymCreator::save() is called.
+ ///
+ /// \param OS Output stream to report duplicate function infos, overlapping
+ /// function infos, and function infos that were merged or removed.
+ /// \returns An error object that indicates success or failure of the
+ /// finalize.
LLVM_ABI llvm::Error finalize(OutputAggregator &OS);
+
+ /// Organize merged FunctionInfo's
+ ///
+ /// This method processes the list of function infos (Funcs) to identify and
+ /// group functions with overlapping address ranges.
+ ///
+ /// \param Out Output stream to report information about how merged
+ /// FunctionInfo's were handled.
LLVM_ABI void prepareMergedFunctions(OutputAggregator &Out);
+ /// Set the UUID value.
+ ///
+ /// \param UUIDBytes The new UUID bytes.
void setUUID(llvm::ArrayRef<uint8_t> UUIDBytes) {
UUID.assign(UUIDBytes.begin(), UUIDBytes.end());
}
+ /// Set the base address to use for the GSYM file.
+ ///
+ /// Setting the base address to use for the GSYM file. Object files typically
+ /// get loaded from a base address when the OS loads them into memory. Using
+ /// GSYM files for symbolication becomes easier if the base address in the
+ /// GSYM header is the same address as it allows addresses to be easily slid
+ /// and allows symbolication without needing to find the original base
+ /// address in the original object file.
+ ///
+ /// \param Addr The address to use as the base address of the GSYM file
+ /// when it is saved to disk.
void setBaseAddress(uint64_t Addr) { BaseAddress = Addr; }
+ /// Set valid .text address ranges that all functions must be contained in.
void SetValidTextRanges(AddressRanges &TextRanges) {
ValidTextRanges = TextRanges;
}
+ /// Get the valid text ranges.
const std::optional<AddressRanges> GetValidTextRanges() const {
return ValidTextRanges;
}
+ /// Check if an address is a valid code address.
+ ///
+ /// Any functions whose addresses do not exist within these function bounds
+ /// will not be converted into the final GSYM. This allows the object file
+ /// to figure out the valid file address ranges of all the code sections
+ /// and ensure we don't add invalid functions to the final output. Many
+ /// linkers have issues when dead stripping functions from DWARF debug info
+ /// where they set the DW_AT_low_pc to zero, but newer DWARF has the
+ /// DW_AT_high_pc as an offset from the DW_AT_low_pc and these size
+ /// attributes have no relocations that can be applied. This results in DWARF
+ /// where many functions have an DW_AT_low_pc of zero and a valid offset size
+ /// for DW_AT_high_pc. If we extract all valid ranges from an object file
+ /// that are marked with executable permissions, we can properly ensure that
+ /// these functions are removed.
+ ///
+ /// \param Addr An address to check.
+ ///
+ /// \returns True if the address is in the valid text ranges or if no valid
+ /// text ranges have been set, false otherwise.
LLVM_ABI bool IsValidTextAddress(uint64_t Addr) const;
+
+ /// Whether the transformation should be quiet, i.e. not output warnings.
bool isQuiet() const { return Quiet; }
+ /// Create a segmented GSYM creator starting with function info index
+ /// \a FuncIdx.
+ ///
+ /// This function will create a GsymCreator object that will encode into
+ /// roughly \a SegmentSize bytes and return it. It is used by the private
+ /// saveSegments(...) function and also is used by the GSYM unit tests to test
+ /// segmenting of GSYM files. The returned GsymCreator can be finalized and
+ /// encoded.
+ ///
+ /// \param [in] SegmentSize The size in bytes to roughly segment the GSYM file
+ /// into.
+ /// \param [in,out] FuncIdx The index of the first function info to encode
+ /// into the returned GsymCreator. This index will be updated so it can be
+ /// used in subsequent calls to this function to allow more segments to be
+ /// created.
+ /// \returns An expected unique pointer to a GsymCreator or an error. The
+ /// returned unique pointer can be NULL if there are no more functions to
+ /// encode.
LLVM_ABI llvm::Expected<std::unique_ptr<GsymCreator>>
createSegment(uint64_t SegmentSize, size_t &FuncIdx) const;
};
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
index 35a1a58be9bfa..cfbef93e01770 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
@@ -30,10 +30,21 @@ class raw_ostream;
namespace gsym {
-/// GsymReader is the base class for reading GSYM data.
+/// GsymReader is used to read GSYM data from a file or buffer.
///
-/// This class contains all shared state and logic for V1 and V2 readers.
-/// Subclasses implement version-specific parsing (parse()) and header access.
+/// This class is optimized for very quick lookups when the endianness matches
+/// the host system. The header, address table, address info offsets, and file
+/// table is designed to be mmap'ed as read only into memory and used without
+/// any parsing needed. If the endianness doesn't match, we swap these objects
+/// and tables into version-specific SwappedData and then point the ArrayRefs
+/// to the swapped internal data.
+///
+/// This base class contains all shared state and logic. Subclasses
+/// (GsymReaderV1, GsymReaderV2) implement version-specific parsing via
+/// parse() and header access via getHeader().
+///
+/// GsymReader objects must use one of the static functions to create an
+/// instance: GsymReader::openFile(...) and GsymReader::copyBuffer(...).
class GsymReader {
protected:
std::unique_ptr<MemoryBuffer> MemBuffer;
@@ -43,19 +54,43 @@ class GsymReader {
ArrayRef<FileEntry> Files;
StringTable StrTab;
- // Cached header values, populated by subclass parse().
+ /// Cached header values, populated by subclass parse().
+ /// These allow shared methods to access common header fields without
+ /// needing the version-specific header type.
uint64_t CachedBaseAddress = 0;
uint32_t CachedNumAddresses = 0;
uint8_t CachedAddrOffSize = 0;
LLVM_ABI GsymReader(std::unique_ptr<MemoryBuffer> Buffer);
+ /// Get an appropriate address info offsets array.
+ ///
+ /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8
+ /// byte offsets from the base address. The table is stored internally as a
+ /// array of bytes that are in the correct endianness. When we access this
+ /// table we must get an array that matches those sizes. This templatized
+ /// helper function is used when accessing address offsets in the AddrOffsets
+ /// member variable.
+ ///
+ /// \returns An ArrayRef of an appropriate address offset size.
template <class T> ArrayRef<T>
getAddrOffsets() const {
return ArrayRef<T>(reinterpret_cast<const T *>(AddrOffsets.data()),
AddrOffsets.size()/sizeof(T));
}
+ /// Get an appropriate address from the address table.
+ ///
+ /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8
+ /// byte address offsets from the base address. The table is stored
+ /// internally as a array of bytes that are in the correct endianness. In
+ /// order to extract an address from the address table we must access the
+ /// address offset using the correct size and then add it to the base
+ /// address.
+ ///
+ /// \param Index An index into the AddrOffsets array.
+ /// \returns An virtual address that matches the original object file for the
+ /// address as the specified index, or std::nullopt if Index is out of bounds.
template <class T>
std::optional<uint64_t> addressForIndex(size_t Index) const {
ArrayRef<T> AIO = getAddrOffsets<T>();
@@ -64,6 +99,15 @@ class GsymReader {
return std::nullopt;
}
+ /// Lookup an address offset in the AddrOffsets table.
+ ///
+ /// Given an address offset, look it up using a binary search of the
+ /// AddrOffsets table.
+ ///
+ /// \param AddrOffset An address offset, that has already been computed by
+ /// subtracting the base address.
+ /// \returns The matching address offset index. This index will be used to
+ /// extract the FunctionInfo data's offset from the AddrInfoOffsets array.
template <class T>
std::optional<uint64_t>
getAddressOffsetIndex(const uint64_t AddrOffset) const {
@@ -71,11 +115,17 @@ class GsymReader {
const auto Begin = AIO.begin();
const auto End = AIO.end();
auto Iter = std::lower_bound(Begin, End, AddrOffset);
+ // Watch for addresses that fall between the base address and the first
+ // address offset.
if (Iter == Begin && AddrOffset < *Begin)
return std::nullopt;
if (Iter == End || AddrOffset < *Iter)
--Iter;
+ // GSYM files have sorted function infos with the most information (line
+ // table and/or inline info) first in the array of function infos, so
+ // always backup as much as possible as long as the address offset is the
+ // same as the previous entry.
while (Iter != Begin) {
auto Prev = Iter - 1;
if (*Prev == *Iter)
@@ -87,10 +137,57 @@ class GsymReader {
return std::distance(Begin, Iter);
}
+ /// Given an address, find the address index.
+ ///
+ /// Binary search the address table and find the matching address index.
+ ///
+ /// \param Addr A virtual address that matches the original object file
+ /// to lookup.
+ /// \returns An index into the address table. This index can be used to
+ /// extract the FunctionInfo data's offset from the AddrInfoOffsets array.
+ /// Returns an error if the address isn't in the GSYM with details of why.
LLVM_ABI Expected<uint64_t> getAddressIndex(const uint64_t Addr) const;
+
+ /// Given an address index, get the offset for the FunctionInfo.
+ ///
+ /// Looking up an address is done by finding the corresponding address
+ /// index for the address. This index is then used to get the offset of the
+ /// FunctionInfo data that we will decode using this function.
+ ///
+ /// \param Index An index into the address table.
+ /// \returns An optional GSYM data offset for the offset of the FunctionInfo
+ /// that needs to be decoded.
LLVM_ABI std::optional<uint64_t> getAddressInfoOffset(size_t Index) const;
+
+ /// Given an address, find the correct function info data and function
+ /// address.
+ ///
+ /// Binary search the address table and find the matching address info
+ /// and make sure that the function info contains the address. GSYM allows
+ /// functions to overlap, and the most debug info is contained in the first
+ /// entries due to the sorting when GSYM files are created. We can have
+ /// multiple function info that start at the same address only if their
+ /// address range doesn't match. So find the first entry that matches \a Addr
+ /// and iterate forward until we find one that contains the address.
+ ///
+ /// \param[in] Addr A virtual address that matches the original object file
+ /// to lookup.
+ ///
+ /// \param[out] FuncStartAddr A virtual address that is the base address of
+ /// the function that is used for decoding the FunctionInfo.
+ ///
+ /// \returns An valid data extractor on success, or an error if we fail to
+ /// find the address in a function info or corrrectly decode the data
LLVM_ABI llvm::Expected<llvm::DataExtractor>
getFunctionInfoDataForAddress(uint64_t Addr, uint64_t &FuncStartAddr) const;
+
+ /// Get the function data and address given an address index.
+ ///
+ /// \param AddrIdx A address index from the address table.
+ ///
+ /// \returns An expected FunctionInfo that contains the function info object
+ /// or an error object that indicates reason for failing to lookup the
+ /// address.
LLVM_ABI llvm::Expected<llvm::DataExtractor>
getFunctionInfoDataAtIndex(uint64_t AddrIdx, uint64_t &FuncStartAddr) const;
@@ -98,50 +195,214 @@ class GsymReader {
LLVM_ABI GsymReader(GsymReader &&RHS);
virtual ~GsymReader() = default;
- /// Open a GSYM file, auto-detecting the format version.
+ /// Construct a GsymReader from a file on disk, auto-detecting the format
+ /// version.
+ ///
+ /// \param Path The file path the GSYM file to read.
+ /// \returns An expected unique_ptr to a GsymReader or an error object that
+ /// indicates reason for failing to read the GSYM.
LLVM_ABI static llvm::Expected<std::unique_ptr<GsymReader>>
openFile(StringRef Path);
/// Construct a GsymReader from a buffer, auto-detecting the format version.
+ ///
+ /// \param Bytes A set of bytes that will be copied and owned by the
+ /// returned object on success.
+ /// \returns An expected unique_ptr to a GsymReader or an error object that
+ /// indicates reason for failing to read the GSYM.
LLVM_ABI static llvm::Expected<std::unique_ptr<GsymReader>>
copyBuffer(StringRef Bytes);
+ /// Get a string from the string table.
+ ///
+ /// \param Offset The string table offset for the string to retrieve.
+ /// \returns The string from the strin table.
StringRef getString(uint32_t Offset) const { return StrTab[Offset]; }
+ /// Get the a file entry for the suppplied file index.
+ ///
+ /// Used to convert any file indexes in the FunctionInfo data back into
+ /// files. This function can be used for iteration, but is more commonly used
+ /// for random access when doing lookups.
+ ///
+ /// \param Index An index into the file table.
+ /// \returns An optional FileInfo that will be valid if the file index is
+ /// valid, or std::nullopt if the file index is out of bounds,
std::optional<FileEntry> getFile(uint32_t Index) const {
if (Index < Files.size())
return Files[Index];
return std::nullopt;
}
+ /// Get the number of addresses in this Gsym file.
uint32_t getNumAddresses() const { return CachedNumAddresses; }
+ /// Get the full function info for an address.
+ ///
+ /// This should be called when a client will store a copy of the complete
+ /// FunctionInfo for a given address. For one off lookups, use the lookup()
+ /// function below.
+ ///
+ /// Symbolication server processes might want to parse the entire function
+ /// info for a given address and cache it if the process stays around to
+ /// service many symbolication addresses, like for parsing profiling
+ /// information.
+ ///
+ /// \param Addr A virtual address from the orignal object file to lookup.
+ ///
+ /// \returns An expected FunctionInfo that contains the function info object
+ /// or an error object that indicates reason for failing to lookup the
+ /// address.
LLVM_ABI llvm::Expected<FunctionInfo> getFunctionInfo(uint64_t Addr) const;
+
+ /// Get the full function info given an address index.
+ ///
+ /// \param AddrIdx A address index for an address in the address table.
+ ///
+ /// \returns An expected FunctionInfo that contains the function info object
+ /// or an error object that indicates reason for failing get the function
+ /// info object.
LLVM_ABI llvm::Expected<FunctionInfo>
getFunctionInfoAtIndex(uint64_t AddrIdx) const;
+ /// Lookup an address in the a GSYM.
+ ///
+ /// Lookup just the information needed for a specific address \a Addr. This
+ /// function is faster that calling getFunctionInfo() as it will only return
+ /// information that pertains to \a Addr and allows the parsing to skip any
+ /// extra information encoded for other addresses. For example the line table
+ /// parsing can stop when a matching LineEntry has been fouhnd, and the
+ /// InlineInfo can stop parsing early once a match has been found and also
+ /// skip information that doesn't match. This avoids memory allocations and
+ /// is much faster for lookups.
+ ///
+ /// \param Addr A virtual address from the orignal object file to lookup.
+ ///
+ /// \param MergedFuncsData A pointer to an optional DataExtractor that, if
+ /// non-null, will be set to the raw data of the MergedFunctionInfo, if
+ /// present.
+ ///
+ /// \returns An expected LookupResult that contains only the information
+ /// needed for the current address, or an error object that indicates reason
+ /// for failing to lookup the address.
LLVM_ABI llvm::Expected<LookupResult>
lookup(uint64_t Addr,
std::optional<DataExtractor> *MergedFuncsData = nullptr) const;
+ /// Lookup all merged functions for a given address.
+ ///
+ /// This function performs a lookup for the specified address and then
+ /// retrieves additional LookupResults from any merged functions associated
+ /// with the primary LookupResult.
+ ///
+ /// \param Addr The address to lookup.
+ ///
+ /// \returns A vector of LookupResult objects, where the first element is the
+ /// primary result, followed by results for any merged functions
LLVM_ABI llvm::Expected<std::vector<LookupResult>>
lookupAll(uint64_t Addr) const;
+ /// Gets an address from the address table.
+ ///
+ /// Addresses are stored as offsets from the base address.
+ ///
+ /// \param Index A index into the address table.
+ /// \returns A resolved virtual address for adddress in the address table
+ /// or std::nullopt if Index is out of bounds.
LLVM_ABI std::optional<uint64_t> getAddress(size_t Index) const;
- /// Dump the entire GSYM data. Version-specific (header format differs).
+ /// Dump the entire Gsym data contained in this object.
+ ///
+ /// Version-specific because the header format differs between V1 and V2.
+ ///
+ /// \param OS The output stream to dump to.
virtual void dump(raw_ostream &OS) = 0;
+ /// Dump a FunctionInfo object.
+ ///
+ /// This function will convert any string table indexes and file indexes
+ /// into human readable format.
+ ///
+ /// \param OS The output stream to dump to.
+ ///
+ /// \param FI The object to dump.
+ ///
+ /// \param Indent The indentation as number of spaces. Used when dumping as an
+ /// item within MergedFunctionsInfo.
LLVM_ABI void dump(raw_ostream &OS, const FunctionInfo &FI,
uint32_t Indent = 0);
+
+ /// Dump a MergedFunctionsInfo object.
+ ///
+ /// This function will dump a MergedFunctionsInfo object - basically by
+ /// dumping the contained FunctionInfo objects with indentation.
+ ///
+ /// \param OS The output stream to dump to.
+ ///
+ /// \param MFI The object to dump.
LLVM_ABI void dump(raw_ostream &OS, const MergedFunctionsInfo &MFI);
+
+ /// Dump a CallSiteInfo object.
+ ///
+ /// This function will output the details of a CallSiteInfo object in a
+ /// human-readable format.
+ ///
+ /// \param OS The output stream to dump to.
+ ///
+ /// \param CSI The CallSiteInfo object to dump.
LLVM_ABI void dump(raw_ostream &OS, const CallSiteInfo &CSI);
+
+ /// Dump a CallSiteInfoCollection object.
+ ///
+ /// This function will iterate over a collection of CallSiteInfo objects and
+ /// dump each one.
+ ///
+ /// \param OS The output stream to dump to.
+ ///
+ /// \param CSIC The CallSiteInfoCollection object to dump.
+ ///
+ /// \param Indent The indentation as number of spaces. Used when dumping as an
+ /// item from within MergedFunctionsInfo.
LLVM_ABI void dump(raw_ostream &OS, const CallSiteInfoCollection &CSIC,
uint32_t Indent = 0);
+
+ /// Dump a LineTable object.
+ ///
+ /// This function will convert any string table indexes and file indexes
+ /// into human readable format.
+ ///
+ ///
+ /// \param OS The output stream to dump to.
+ ///
+ /// \param LT The object to dump.
+ ///
+ /// \param Indent The indentation as number of spaces. Used when dumping as an
+ /// item from within MergedFunctionsInfo.
LLVM_ABI void dump(raw_ostream &OS, const LineTable <,
uint32_t Indent = 0);
+
+ /// Dump a InlineInfo object.
+ ///
+ /// This function will convert any string table indexes and file indexes
+ /// into human readable format.
+ ///
+ /// \param OS The output stream to dump to.
+ ///
+ /// \param II The object to dump.
+ ///
+ /// \param Indent The indentation as number of spaces. Used for recurive
+ /// dumping.
LLVM_ABI void dump(raw_ostream &OS, const InlineInfo &II,
uint32_t Indent = 0);
+
+ /// Dump a FileEntry object.
+ ///
+ /// This function will convert any string table indexes into human readable
+ /// format.
+ ///
+ /// \param OS The output stream to dump to.
+ ///
+ /// \param FE The object to dump.
LLVM_ABI void dump(raw_ostream &OS, std::optional<FileEntry> FE);
};
>From 2dcad2bfcdff0fecf268d29dca72eb9bc94a54f3 Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Thu, 26 Mar 2026 14:51:50 -0700
Subject: [PATCH 25/45] Reorder declarations to match base commit and fix
LLVM_ABI usage
Reorder function declarations in GsymReader.h and GsymCreator.h to
match the original order from the base commit (2632ffeab1b4), minimizing
diff noise. Also removed LLVM_ABI from protected methods that did not
have it in the original.
User prompts since last commit:
1. "Actually, comparing to the original GsymReader.h and GsymCreator.h,
can you adjust the order of the functions so that they be the same as
the base commit, so that minimal change shows up on diff view?
Currently I see a lot of changes on the diff view just because
function orders have changed.
Also, if the function has LLVM_ABI in the base commit, keep it here.
If the function does NOT have LLVM_ABI in the base commit, don't add
it here."
2. "memorize, commit and push"
---
.../include/llvm/DebugInfo/GSYM/GsymCreator.h | 168 ++++-----
llvm/include/llvm/DebugInfo/GSYM/GsymReader.h | 324 +++++++++---------
2 files changed, 248 insertions(+), 244 deletions(-)
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h b/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h
index 541703f80a4e9..9b60def594416 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h
@@ -154,24 +154,25 @@ class GsymCreator {
bool Finalized = false;
bool Quiet;
+
/// Get the first function start address.
///
/// \returns The start address of the first FunctionInfo or std::nullopt if
/// there are no function infos.
- LLVM_ABI std::optional<uint64_t> getFirstFunctionAddress() const;
+ std::optional<uint64_t> getFirstFunctionAddress() const;
/// Get the last function address.
///
/// \returns The start address of the last FunctionInfo or std::nullopt if
/// there are no function infos.
- LLVM_ABI std::optional<uint64_t> getLastFunctionAddress() const;
+ std::optional<uint64_t> getLastFunctionAddress() const;
/// Get the base address to use for this GSYM file.
///
/// \returns The base address to put into the header and to use when creating
/// the address offset table or std::nullpt if there are no valid
/// function infos or if the base address wasn't specified.
- LLVM_ABI std::optional<uint64_t> getBaseAddress() const;
+ std::optional<uint64_t> getBaseAddress() const;
/// Get the size of an address offset in the address offset table.
///
@@ -181,7 +182,7 @@ class GsymCreator {
/// on the current contents of the GSYM file.
///
/// \returns The size in byets of the address offsets.
- LLVM_ABI uint8_t getAddressOffsetSize() const;
+ uint8_t getAddressOffsetSize() const;
/// Get the maximum address offset for the current address offset size.
///
@@ -191,15 +192,17 @@ class GsymCreator {
///
/// \returns The maximum address offset value that will be encoded into a GSYM
/// file.
- LLVM_ABI uint64_t getMaxAddressOffset() const;
+ uint64_t getMaxAddressOffset() const;
- /// Inserts a FileEntry into the file table.
+ /// Calculate the byte size of the GSYM header and tables sizes.
///
- /// This is used to insert a file entry in a thread safe way into this object.
+ /// Version-specific because V1 and V2 have different header and table
+ /// layouts.
///
- /// \param FE A file entry object that contains valid string table offsets
- /// from this object already.
- LLVM_ABI uint32_t insertFileEntry(FileEntry FE);
+ /// This is used to help split GSYM files into segments.
+ ///
+ /// \returns Size in bytes the GSYM header and tables.
+ virtual uint64_t calculateHeaderAndTableSize() const = 0;
/// Copy a FunctionInfo from the \a SrcGC GSYM creator into this creator.
///
@@ -213,8 +216,7 @@ class GsymCreator {
/// \returns The number of bytes it will take to encode the function info in
/// this GsymCreator. This helps calculate the size of the current GSYM
/// segment file.
- LLVM_ABI uint64_t copyFunctionInfo(const GsymCreator &SrcGC,
- size_t FuncInfoIdx);
+ uint64_t copyFunctionInfo(const GsymCreator &SrcGC, size_t FuncInfoIdx);
/// Copy a string from \a SrcGC into this object.
///
@@ -226,7 +228,7 @@ class GsymCreator {
/// \param SrcGC The source gsym creator to copy from.
/// \param StrOff The string table offset from \a SrcGC to copy.
/// \returns The new string table offset of the string within this object.
- LLVM_ABI uint32_t copyString(const GsymCreator &SrcGC, uint32_t StrOff);
+ uint32_t copyString(const GsymCreator &SrcGC, uint32_t StrOff);
/// Copy a file from \a SrcGC into this object.
///
@@ -242,7 +244,15 @@ class GsymCreator {
/// file index of zero will always return zero as the zero is a reserved file
/// index that means no file.
/// \returns The new file index of the file within this object.
- LLVM_ABI uint32_t copyFile(const GsymCreator &SrcGC, uint32_t FileIdx);
+ uint32_t copyFile(const GsymCreator &SrcGC, uint32_t FileIdx);
+
+ /// Inserts a FileEntry into the file table.
+ ///
+ /// This is used to insert a file entry in a thread safe way into this object.
+ ///
+ /// \param FE A file entry object that contains valid string table offsets
+ /// from this object already.
+ uint32_t insertFileEntry(FileEntry FE);
/// Fixup any string and file references by updating any file indexes and
/// strings offsets in the InlineInfo parameter.
@@ -254,7 +264,7 @@ class GsymCreator {
/// \param II The inline info that contains file indexes and string offsets
/// that come from \a SrcGC. The entries will be updated by coping any files
/// and strings over into this object.
- LLVM_ABI void fixupInlineInfo(const GsymCreator &SrcGC, InlineInfo &II);
+ void fixupInlineInfo(const GsymCreator &SrcGC, InlineInfo &II);
/// Save this GSYM file into segments that are roughly \a SegmentSize in size.
///
@@ -270,26 +280,17 @@ class GsymCreator {
/// \param Path The path prefix to use when saving the GSYM files.
/// \param ByteOrder The endianness to use when saving the file.
/// \param SegmentSize The size in bytes to segment the GSYM file into.
- LLVM_ABI llvm::Error saveSegments(StringRef Path,
- llvm::endianness ByteOrder,
- uint64_t SegmentSize) const;
+ llvm::Error saveSegments(StringRef Path, llvm::endianness ByteOrder,
+ uint64_t SegmentSize) const;
/// Let this creator know that this is a segment of another GsymCreator.
///
/// When we have a segment, we know that function infos will be added in
/// ascending address range order without having to be finalized. We also
/// don't need to sort and unique entries during the finalize function call.
- void setIsSegment() { IsSegment = true; }
-
- /// Calculate the byte size of the GSYM header and tables sizes.
- ///
- /// Version-specific because V1 and V2 have different header and table
- /// layouts.
- ///
- /// This is used to help split GSYM files into segments.
- ///
- /// \returns Size in bytes the GSYM header and tables.
- virtual uint64_t calculateHeaderAndTableSize() const = 0;
+ void setIsSegment() {
+ IsSegment = true;
+ }
/// Create a new empty creator of the same version.
///
@@ -301,24 +302,6 @@ class GsymCreator {
LLVM_ABI GsymCreator(bool Quiet = false);
virtual ~GsymCreator() = default;
- /// Encode a GSYM into the file writer stream at the current position.
- ///
- /// Version-specific because V1 and V2 have completely different file
- /// layouts.
- ///
- /// \param O The stream to save the binary data to
- /// \returns An error object that indicates success or failure of the save.
- virtual llvm::Error encode(FileWriter &O) const = 0;
-
- /// Load call site information from a YAML file.
- ///
- /// This function reads call site information from a specified YAML file and
- /// adds it to the GSYM data.
- ///
- /// \param YAMLFile The path to the YAML file containing call site
- /// information.
- virtual llvm::Error loadCallSitesFromYAML(StringRef YAMLFile) = 0;
-
/// Save a GSYM file to a stand alone file.
///
/// \param Path The file path to save the GSYM file to.
@@ -337,6 +320,15 @@ class GsymCreator {
save(StringRef Path, llvm::endianness ByteOrder,
std::optional<uint64_t> SegmentSize = std::nullopt) const;
+ /// Encode a GSYM into the file writer stream at the current position.
+ ///
+ /// Version-specific because V1 and V2 have completely different file
+ /// layouts.
+ ///
+ /// \param O The stream to save the binary data to
+ /// \returns An error object that indicates success or failure of the save.
+ virtual llvm::Error encode(FileWriter &O) const = 0;
+
/// Insert a string into the GSYM string table.
///
/// All strings used by GSYM files must be uniqued by adding them to this
@@ -371,8 +363,7 @@ class GsymCreator {
/// \param Style The path style for the "Path" parameter.
/// \returns The unique file index for the inserted file.
LLVM_ABI uint32_t
- insertFile(StringRef Path,
- sys::path::Style Style = sys::path::Style::native);
+ insertFile(StringRef Path, sys::path::Style Style = sys::path::Style::native);
/// Add a function info to this GSYM creator.
///
@@ -383,24 +374,23 @@ class GsymCreator {
/// \param FI The function info object to emplace into our functions list.
LLVM_ABI void addFunctionInfo(FunctionInfo &&FI);
- /// Get the current number of FunctionInfo objects contained in this
- /// object.
- LLVM_ABI size_t getNumFunctionInfos() const;
-
- /// Thread safe iteration over all function infos.
+ /// Load call site information from a YAML file.
///
- /// \param Callback A callback function that will get called with each
- /// FunctionInfo. If the callback returns false, stop iterating.
- LLVM_ABI void
- forEachFunctionInfo(
- std::function<bool(FunctionInfo &)> const &Callback);
+ /// This function reads call site information from a specified YAML file and
+ /// adds it to the GSYM data.
+ ///
+ /// \param YAMLFile The path to the YAML file containing call site
+ /// information.
+ virtual llvm::Error loadCallSitesFromYAML(StringRef YAMLFile) = 0;
- /// Thread safe const iteration over all function infos.
+ /// Organize merged FunctionInfo's
///
- /// \param Callback A callback function that will get called with each
- /// FunctionInfo. If the callback returns false, stop iterating.
- LLVM_ABI void forEachFunctionInfo(
- std::function<bool(const FunctionInfo &)> const &Callback) const;
+ /// This method processes the list of function infos (Funcs) to identify and
+ /// group functions with overlapping address ranges.
+ ///
+ /// \param Out Output stream to report information about how merged
+ /// FunctionInfo's were handled.
+ LLVM_ABI void prepareMergedFunctions(OutputAggregator &Out);
/// Finalize the data in the GSYM creator prior to saving the data out.
///
@@ -413,15 +403,6 @@ class GsymCreator {
/// finalize.
LLVM_ABI llvm::Error finalize(OutputAggregator &OS);
- /// Organize merged FunctionInfo's
- ///
- /// This method processes the list of function infos (Funcs) to identify and
- /// group functions with overlapping address ranges.
- ///
- /// \param Out Output stream to report information about how merged
- /// FunctionInfo's were handled.
- LLVM_ABI void prepareMergedFunctions(OutputAggregator &Out);
-
/// Set the UUID value.
///
/// \param UUIDBytes The new UUID bytes.
@@ -429,18 +410,23 @@ class GsymCreator {
UUID.assign(UUIDBytes.begin(), UUIDBytes.end());
}
- /// Set the base address to use for the GSYM file.
+ /// Thread safe iteration over all function infos.
///
- /// Setting the base address to use for the GSYM file. Object files typically
- /// get loaded from a base address when the OS loads them into memory. Using
- /// GSYM files for symbolication becomes easier if the base address in the
- /// GSYM header is the same address as it allows addresses to be easily slid
- /// and allows symbolication without needing to find the original base
- /// address in the original object file.
+ /// \param Callback A callback function that will get called with each
+ /// FunctionInfo. If the callback returns false, stop iterating.
+ LLVM_ABI void
+ forEachFunctionInfo(std::function<bool(FunctionInfo &)> const &Callback);
+
+ /// Thread safe const iteration over all function infos.
///
- /// \param Addr The address to use as the base address of the GSYM file
- /// when it is saved to disk.
- void setBaseAddress(uint64_t Addr) { BaseAddress = Addr; }
+ /// \param Callback A callback function that will get called with each
+ /// FunctionInfo. If the callback returns false, stop iterating.
+ LLVM_ABI void forEachFunctionInfo(
+ std::function<bool(const FunctionInfo &)> const &Callback) const;
+
+ /// Get the current number of FunctionInfo objects contained in this
+ /// object.
+ LLVM_ABI size_t getNumFunctionInfos() const;
/// Set valid .text address ranges that all functions must be contained in.
void SetValidTextRanges(AddressRanges &TextRanges) {
@@ -473,9 +459,25 @@ class GsymCreator {
/// text ranges have been set, false otherwise.
LLVM_ABI bool IsValidTextAddress(uint64_t Addr) const;
+ /// Set the base address to use for the GSYM file.
+ ///
+ /// Setting the base address to use for the GSYM file. Object files typically
+ /// get loaded from a base address when the OS loads them into memory. Using
+ /// GSYM files for symbolication becomes easier if the base address in the
+ /// GSYM header is the same address as it allows addresses to be easily slid
+ /// and allows symbolication without needing to find the original base
+ /// address in the original object file.
+ ///
+ /// \param Addr The address to use as the base address of the GSYM file
+ /// when it is saved to disk.
+ void setBaseAddress(uint64_t Addr) {
+ BaseAddress = Addr;
+ }
+
/// Whether the transformation should be quiet, i.e. not output warnings.
bool isQuiet() const { return Quiet; }
+
/// Create a segmented GSYM creator starting with function info index
/// \a FuncIdx.
///
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
index cfbef93e01770..c1a5697399be8 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
@@ -61,135 +61,7 @@ class GsymReader {
uint32_t CachedNumAddresses = 0;
uint8_t CachedAddrOffSize = 0;
- LLVM_ABI GsymReader(std::unique_ptr<MemoryBuffer> Buffer);
-
- /// Get an appropriate address info offsets array.
- ///
- /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8
- /// byte offsets from the base address. The table is stored internally as a
- /// array of bytes that are in the correct endianness. When we access this
- /// table we must get an array that matches those sizes. This templatized
- /// helper function is used when accessing address offsets in the AddrOffsets
- /// member variable.
- ///
- /// \returns An ArrayRef of an appropriate address offset size.
- template <class T> ArrayRef<T>
- getAddrOffsets() const {
- return ArrayRef<T>(reinterpret_cast<const T *>(AddrOffsets.data()),
- AddrOffsets.size()/sizeof(T));
- }
-
- /// Get an appropriate address from the address table.
- ///
- /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8
- /// byte address offsets from the base address. The table is stored
- /// internally as a array of bytes that are in the correct endianness. In
- /// order to extract an address from the address table we must access the
- /// address offset using the correct size and then add it to the base
- /// address.
- ///
- /// \param Index An index into the AddrOffsets array.
- /// \returns An virtual address that matches the original object file for the
- /// address as the specified index, or std::nullopt if Index is out of bounds.
- template <class T>
- std::optional<uint64_t> addressForIndex(size_t Index) const {
- ArrayRef<T> AIO = getAddrOffsets<T>();
- if (Index < AIO.size())
- return AIO[Index] + CachedBaseAddress;
- return std::nullopt;
- }
-
- /// Lookup an address offset in the AddrOffsets table.
- ///
- /// Given an address offset, look it up using a binary search of the
- /// AddrOffsets table.
- ///
- /// \param AddrOffset An address offset, that has already been computed by
- /// subtracting the base address.
- /// \returns The matching address offset index. This index will be used to
- /// extract the FunctionInfo data's offset from the AddrInfoOffsets array.
- template <class T>
- std::optional<uint64_t>
- getAddressOffsetIndex(const uint64_t AddrOffset) const {
- ArrayRef<T> AIO = getAddrOffsets<T>();
- const auto Begin = AIO.begin();
- const auto End = AIO.end();
- auto Iter = std::lower_bound(Begin, End, AddrOffset);
- // Watch for addresses that fall between the base address and the first
- // address offset.
- if (Iter == Begin && AddrOffset < *Begin)
- return std::nullopt;
- if (Iter == End || AddrOffset < *Iter)
- --Iter;
-
- // GSYM files have sorted function infos with the most information (line
- // table and/or inline info) first in the array of function infos, so
- // always backup as much as possible as long as the address offset is the
- // same as the previous entry.
- while (Iter != Begin) {
- auto Prev = Iter - 1;
- if (*Prev == *Iter)
- Iter = Prev;
- else
- break;
- }
-
- return std::distance(Begin, Iter);
- }
-
- /// Given an address, find the address index.
- ///
- /// Binary search the address table and find the matching address index.
- ///
- /// \param Addr A virtual address that matches the original object file
- /// to lookup.
- /// \returns An index into the address table. This index can be used to
- /// extract the FunctionInfo data's offset from the AddrInfoOffsets array.
- /// Returns an error if the address isn't in the GSYM with details of why.
- LLVM_ABI Expected<uint64_t> getAddressIndex(const uint64_t Addr) const;
-
- /// Given an address index, get the offset for the FunctionInfo.
- ///
- /// Looking up an address is done by finding the corresponding address
- /// index for the address. This index is then used to get the offset of the
- /// FunctionInfo data that we will decode using this function.
- ///
- /// \param Index An index into the address table.
- /// \returns An optional GSYM data offset for the offset of the FunctionInfo
- /// that needs to be decoded.
- LLVM_ABI std::optional<uint64_t> getAddressInfoOffset(size_t Index) const;
-
- /// Given an address, find the correct function info data and function
- /// address.
- ///
- /// Binary search the address table and find the matching address info
- /// and make sure that the function info contains the address. GSYM allows
- /// functions to overlap, and the most debug info is contained in the first
- /// entries due to the sorting when GSYM files are created. We can have
- /// multiple function info that start at the same address only if their
- /// address range doesn't match. So find the first entry that matches \a Addr
- /// and iterate forward until we find one that contains the address.
- ///
- /// \param[in] Addr A virtual address that matches the original object file
- /// to lookup.
- ///
- /// \param[out] FuncStartAddr A virtual address that is the base address of
- /// the function that is used for decoding the FunctionInfo.
- ///
- /// \returns An valid data extractor on success, or an error if we fail to
- /// find the address in a function info or corrrectly decode the data
- LLVM_ABI llvm::Expected<llvm::DataExtractor>
- getFunctionInfoDataForAddress(uint64_t Addr, uint64_t &FuncStartAddr) const;
-
- /// Get the function data and address given an address index.
- ///
- /// \param AddrIdx A address index from the address table.
- ///
- /// \returns An expected FunctionInfo that contains the function info object
- /// or an error object that indicates reason for failing to lookup the
- /// address.
- LLVM_ABI llvm::Expected<llvm::DataExtractor>
- getFunctionInfoDataAtIndex(uint64_t AddrIdx, uint64_t &FuncStartAddr) const;
+ GsymReader(std::unique_ptr<MemoryBuffer> Buffer);
public:
LLVM_ABI GsymReader(GsymReader &&RHS);
@@ -213,30 +85,6 @@ class GsymReader {
LLVM_ABI static llvm::Expected<std::unique_ptr<GsymReader>>
copyBuffer(StringRef Bytes);
- /// Get a string from the string table.
- ///
- /// \param Offset The string table offset for the string to retrieve.
- /// \returns The string from the strin table.
- StringRef getString(uint32_t Offset) const { return StrTab[Offset]; }
-
- /// Get the a file entry for the suppplied file index.
- ///
- /// Used to convert any file indexes in the FunctionInfo data back into
- /// files. This function can be used for iteration, but is more commonly used
- /// for random access when doing lookups.
- ///
- /// \param Index An index into the file table.
- /// \returns An optional FileInfo that will be valid if the file index is
- /// valid, or std::nullopt if the file index is out of bounds,
- std::optional<FileEntry> getFile(uint32_t Index) const {
- if (Index < Files.size())
- return Files[Index];
- return std::nullopt;
- }
-
- /// Get the number of addresses in this Gsym file.
- uint32_t getNumAddresses() const { return CachedNumAddresses; }
-
/// Get the full function info for an address.
///
/// This should be called when a client will store a copy of the complete
@@ -302,14 +150,26 @@ class GsymReader {
LLVM_ABI llvm::Expected<std::vector<LookupResult>>
lookupAll(uint64_t Addr) const;
- /// Gets an address from the address table.
+ /// Get a string from the string table.
///
- /// Addresses are stored as offsets from the base address.
+ /// \param Offset The string table offset for the string to retrieve.
+ /// \returns The string from the strin table.
+ StringRef getString(uint32_t Offset) const { return StrTab[Offset]; }
+
+ /// Get the a file entry for the suppplied file index.
///
- /// \param Index A index into the address table.
- /// \returns A resolved virtual address for adddress in the address table
- /// or std::nullopt if Index is out of bounds.
- LLVM_ABI std::optional<uint64_t> getAddress(size_t Index) const;
+ /// Used to convert any file indexes in the FunctionInfo data back into
+ /// files. This function can be used for iteration, but is more commonly used
+ /// for random access when doing lookups.
+ ///
+ /// \param Index An index into the file table.
+ /// \returns An optional FileInfo that will be valid if the file index is
+ /// valid, or std::nullopt if the file index is out of bounds,
+ std::optional<FileEntry> getFile(uint32_t Index) const {
+ if (Index < Files.size())
+ return Files[Index];
+ return std::nullopt;
+ }
/// Dump the entire Gsym data contained in this object.
///
@@ -378,8 +238,7 @@ class GsymReader {
///
/// \param Indent The indentation as number of spaces. Used when dumping as an
/// item from within MergedFunctionsInfo.
- LLVM_ABI void dump(raw_ostream &OS, const LineTable <,
- uint32_t Indent = 0);
+ LLVM_ABI void dump(raw_ostream &OS, const LineTable <, uint32_t Indent = 0);
/// Dump a InlineInfo object.
///
@@ -404,6 +263,149 @@ class GsymReader {
///
/// \param FE The object to dump.
LLVM_ABI void dump(raw_ostream &OS, std::optional<FileEntry> FE);
+
+ /// Get the number of addresses in this Gsym file.
+ uint32_t getNumAddresses() const {
+ return CachedNumAddresses;
+ }
+
+ /// Gets an address from the address table.
+ ///
+ /// Addresses are stored as offsets from the base address.
+ ///
+ /// \param Index A index into the address table.
+ /// \returns A resolved virtual address for adddress in the address table
+ /// or std::nullopt if Index is out of bounds.
+ LLVM_ABI std::optional<uint64_t> getAddress(size_t Index) const;
+
+protected:
+
+ /// Get an appropriate address info offsets array.
+ ///
+ /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8
+ /// byte offsets from the base address. The table is stored internally as a
+ /// array of bytes that are in the correct endianness. When we access this
+ /// table we must get an array that matches those sizes. This templatized
+ /// helper function is used when accessing address offsets in the AddrOffsets
+ /// member variable.
+ ///
+ /// \returns An ArrayRef of an appropriate address offset size.
+ template <class T> ArrayRef<T>
+ getAddrOffsets() const {
+ return ArrayRef<T>(reinterpret_cast<const T *>(AddrOffsets.data()),
+ AddrOffsets.size()/sizeof(T));
+ }
+
+ /// Get an appropriate address from the address table.
+ ///
+ /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8
+ /// byte address offsets from the base address. The table is stored
+ /// internally as a array of bytes that are in the correct endianness. In
+ /// order to extract an address from the address table we must access the
+ /// address offset using the correct size and then add it to the base
+ /// address.
+ ///
+ /// \param Index An index into the AddrOffsets array.
+ /// \returns An virtual address that matches the original object file for the
+ /// address as the specified index, or std::nullopt if Index is out of bounds.
+ template <class T>
+ std::optional<uint64_t> addressForIndex(size_t Index) const {
+ ArrayRef<T> AIO = getAddrOffsets<T>();
+ if (Index < AIO.size())
+ return AIO[Index] + CachedBaseAddress;
+ return std::nullopt;
+ }
+ /// Lookup an address offset in the AddrOffsets table.
+ ///
+ /// Given an address offset, look it up using a binary search of the
+ /// AddrOffsets table.
+ ///
+ /// \param AddrOffset An address offset, that has already been computed by
+ /// subtracting the base address.
+ /// \returns The matching address offset index. This index will be used to
+ /// extract the FunctionInfo data's offset from the AddrInfoOffsets array.
+ template <class T>
+ std::optional<uint64_t>
+ getAddressOffsetIndex(const uint64_t AddrOffset) const {
+ ArrayRef<T> AIO = getAddrOffsets<T>();
+ const auto Begin = AIO.begin();
+ const auto End = AIO.end();
+ auto Iter = std::lower_bound(Begin, End, AddrOffset);
+ // Watch for addresses that fall between the base address and the first
+ // address offset.
+ if (Iter == Begin && AddrOffset < *Begin)
+ return std::nullopt;
+ if (Iter == End || AddrOffset < *Iter)
+ --Iter;
+
+ // GSYM files have sorted function infos with the most information (line
+ // table and/or inline info) first in the array of function infos, so
+ // always backup as much as possible as long as the address offset is the
+ // same as the previous entry.
+ while (Iter != Begin) {
+ auto Prev = Iter - 1;
+ if (*Prev == *Iter)
+ Iter = Prev;
+ else
+ break;
+ }
+
+ return std::distance(Begin, Iter);
+ }
+
+ /// Given an address, find the address index.
+ ///
+ /// Binary search the address table and find the matching address index.
+ ///
+ /// \param Addr A virtual address that matches the original object file
+ /// to lookup.
+ /// \returns An index into the address table. This index can be used to
+ /// extract the FunctionInfo data's offset from the AddrInfoOffsets array.
+ /// Returns an error if the address isn't in the GSYM with details of why.
+ LLVM_ABI Expected<uint64_t> getAddressIndex(const uint64_t Addr) const;
+
+ /// Given an address index, get the offset for the FunctionInfo.
+ ///
+ /// Looking up an address is done by finding the corresponding address
+ /// index for the address. This index is then used to get the offset of the
+ /// FunctionInfo data that we will decode using this function.
+ ///
+ /// \param Index An index into the address table.
+ /// \returns An optional GSYM data offset for the offset of the FunctionInfo
+ /// that needs to be decoded.
+ LLVM_ABI std::optional<uint64_t> getAddressInfoOffset(size_t Index) const;
+
+ /// Given an address, find the correct function info data and function
+ /// address.
+ ///
+ /// Binary search the address table and find the matching address info
+ /// and make sure that the function info contains the address. GSYM allows
+ /// functions to overlap, and the most debug info is contained in the first
+ /// entries due to the sorting when GSYM files are created. We can have
+ /// multiple function info that start at the same address only if their
+ /// address range doesn't match. So find the first entry that matches \a Addr
+ /// and iterate forward until we find one that contains the address.
+ ///
+ /// \param[in] Addr A virtual address that matches the original object file
+ /// to lookup.
+ ///
+ /// \param[out] FuncStartAddr A virtual address that is the base address of
+ /// the function that is used for decoding the FunctionInfo.
+ ///
+ /// \returns An valid data extractor on success, or an error if we fail to
+ /// find the address in a function info or corrrectly decode the data
+ LLVM_ABI llvm::Expected<llvm::DataExtractor>
+ getFunctionInfoDataForAddress(uint64_t Addr, uint64_t &FuncStartAddr) const;
+
+ /// Get the function data and address given an address index.
+ ///
+ /// \param AddrIdx A address index from the address table.
+ ///
+ /// \returns An expected FunctionInfo that contains the function info object
+ /// or an error object that indicates reason for failing to lookup the
+ /// address.
+ LLVM_ABI llvm::Expected<llvm::DataExtractor>
+ getFunctionInfoDataAtIndex(uint64_t AddrIdx, uint64_t &FuncStartAddr) const;
};
} // namespace gsym
>From 071efdd44695d3c270169400b7e735ed91a1fb51 Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Thu, 26 Mar 2026 15:45:29 -0700
Subject: [PATCH 26/45] Rename Padding2 to StrTableEncoding in HeaderV2
Add StrTableEncodingType enum class with a single Default value (0).
Replace the Padding2 field with StrTableEncoding to allow future
string table encoding formats. Update validation to use a switch
on the enum.
User prompts since last commit:
1. "In header v2, could you change Padding2 to StrTableEncoding, which
is an enum classes that has only one value. Add a comment for that
field saying 'allow for future encoding for string table'."
2. "memorize, commit, push"
---
llvm/include/llvm/DebugInfo/GSYM/HeaderV2.h | 13 ++++++++++---
llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp | 3 ++-
llvm/lib/DebugInfo/GSYM/HeaderV2.cpp | 17 +++++++++++------
llvm/unittests/DebugInfo/GSYM/GSYMV2Test.cpp | 4 ++--
4 files changed, 25 insertions(+), 12 deletions(-)
diff --git a/llvm/include/llvm/DebugInfo/GSYM/HeaderV2.h b/llvm/include/llvm/DebugInfo/GSYM/HeaderV2.h
index f6072b0da58a0..b29c81da6684a 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/HeaderV2.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/HeaderV2.h
@@ -25,6 +25,12 @@ class FileWriter;
constexpr uint32_t GSYM_VERSION_2 = 2;
+/// Encoding format for the string table.
+enum class StrTableEncodingType : uint8_t {
+ /// A list of NULL-terminated strings (same as V1).
+ Default = 0,
+};
+
/// The GSYM V2 header.
///
/// The GSYM V2 header is found at the start of a stand alone GSYM file, or as
@@ -90,8 +96,8 @@ struct HeaderV2 {
/// The size in bytes of each string table reference (strp) in FunctionInfo
/// and other data structures within GlobalData.
uint8_t StrpSize;
- /// Padding for alignment. Must be set to zero.
- uint8_t Padding2;
+ /// String table encoding. Allows for future encoding for string table.
+ uint8_t StrTableEncoding;
/// The starting point of the GlobalData array. This is a list of GlobalData
/// entries, each describing a section in the GSYM file (e.g. AddrOffsets,
/// FunctionInfo, UUID, StringTable). The array is terminated by an entry
@@ -110,7 +116,8 @@ struct HeaderV2 {
/// - check that the address offset size is supported
/// - check that the address info offset size is supported
/// - check that the strp size is supported
- /// - check that padding fields are zero
+ /// - check that the padding field is zero
+ /// - check that the string table encoding is supported
///
/// \returns An error if anything is wrong in the header, or Error::success()
/// if there are no errors.
diff --git a/llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp b/llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp
index f4c512b2db5e5..70d396241a668 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp
@@ -150,7 +150,8 @@ llvm::Error GsymCreatorV2::encode(FileWriter &O) const {
Hdr.AddrOffSize = AddrOffSize;
Hdr.AddrInfoOffSize = AddrInfoOffSize;
Hdr.StrpSize = StrpSize;
- Hdr.Padding2 = 0;
+ Hdr.StrTableEncoding =
+ static_cast<uint8_t>(StrTableEncodingType::Default);
if (auto Err = Hdr.encode(O))
return Err;
diff --git a/llvm/lib/DebugInfo/GSYM/HeaderV2.cpp b/llvm/lib/DebugInfo/GSYM/HeaderV2.cpp
index 4372e20ea9e61..bb2239e7b8006 100644
--- a/llvm/lib/DebugInfo/GSYM/HeaderV2.cpp
+++ b/llvm/lib/DebugInfo/GSYM/HeaderV2.cpp
@@ -69,9 +69,14 @@ llvm::Error HeaderV2::checkForError() const {
if (Padding != 0)
return createStringError(std::errc::invalid_argument,
"padding must be zero, got %u", Padding);
- if (Padding2 != 0)
+ switch (static_cast<StrTableEncodingType>(StrTableEncoding)) {
+ case StrTableEncodingType::Default:
+ break;
+ default:
return createStringError(std::errc::invalid_argument,
- "padding2 must be zero, got %u", Padding2);
+ "unsupported string table encoding %u",
+ StrTableEncoding);
+ }
return Error::success();
}
@@ -80,7 +85,7 @@ llvm::Expected<HeaderV2> HeaderV2::decode(DataExtractor &Data) {
// The fixed portion of the HeaderV2 is 24 bytes:
// Magic(4) + Version(2) + Padding(2) + BaseAddress(8) +
// NumAddresses(4) + AddrOffSize(1) + AddrInfoOffSize(1) +
- // StrpSize(1) + Padding2(1)
+ // StrpSize(1) + StrTableEncoding(1) = 24 bytes
constexpr uint64_t FixedHeaderSize = 24;
if (!Data.isValidOffsetForDataOfSize(Offset, FixedHeaderSize))
return createStringError(std::errc::invalid_argument,
@@ -94,7 +99,7 @@ llvm::Expected<HeaderV2> HeaderV2::decode(DataExtractor &Data) {
H.AddrOffSize = Data.getU8(&Offset);
H.AddrInfoOffSize = Data.getU8(&Offset);
H.StrpSize = Data.getU8(&Offset);
- H.Padding2 = Data.getU8(&Offset);
+ H.StrTableEncoding = Data.getU8(&Offset);
if (llvm::Error Err = H.checkForError())
return std::move(Err);
return H;
@@ -111,7 +116,7 @@ llvm::Error HeaderV2::encode(FileWriter &O) const {
O.writeU8(AddrOffSize);
O.writeU8(AddrInfoOffSize);
O.writeU8(StrpSize);
- O.writeU8(Padding2);
+ O.writeU8(StrTableEncoding);
return Error::success();
}
@@ -121,5 +126,5 @@ bool llvm::gsym::operator==(const HeaderV2 &LHS, const HeaderV2 &RHS) {
LHS.NumAddresses == RHS.NumAddresses &&
LHS.AddrOffSize == RHS.AddrOffSize &&
LHS.AddrInfoOffSize == RHS.AddrInfoOffSize &&
- LHS.StrpSize == RHS.StrpSize && LHS.Padding2 == RHS.Padding2;
+ LHS.StrpSize == RHS.StrpSize && LHS.StrTableEncoding == RHS.StrTableEncoding;
}
diff --git a/llvm/unittests/DebugInfo/GSYM/GSYMV2Test.cpp b/llvm/unittests/DebugInfo/GSYM/GSYMV2Test.cpp
index d91dfefc2d769..0710b2959e756 100644
--- a/llvm/unittests/DebugInfo/GSYM/GSYMV2Test.cpp
+++ b/llvm/unittests/DebugInfo/GSYM/GSYMV2Test.cpp
@@ -139,7 +139,7 @@ static void TestV2HeaderAndGlobalData(llvm::endianness ByteOrder,
EXPECT_EQ(Hdr.AddrOffSize, ExpectedAddrOffSize);
EXPECT_EQ(Hdr.AddrInfoOffSize, 4u); // Small file, should be 4 bytes.
EXPECT_EQ(Hdr.StrpSize, 4u); // Small string table, should be 4 bytes.
- EXPECT_EQ(Hdr.Padding2, 0u);
+ EXPECT_EQ(Hdr.StrTableEncoding, 0u);
// Decode GlobalData entries starting at offset 24 (after fixed header).
uint64_t Offset = 24;
@@ -452,7 +452,7 @@ static SmallString<512> buildMinimalV2Binary(uint64_t BaseAddr,
FW.writeU8(AddrOffSize); // AddrOffSize
FW.writeU8(AddrInfoOffSize); // AddrInfoOffSize
FW.writeU8(4); // StrpSize
- FW.writeU8(0); // Padding2
+ FW.writeU8(0); // StrTableEncoding
// GlobalData entries.
auto writeGD = [&](GlobalInfoType Type, uint64_t Off, uint64_t Size) {
>From 82ea70319d92b29e16e6eb8fa34ec852abf7504c Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Thu, 26 Mar 2026 15:53:56 -0700
Subject: [PATCH 27/45] Remove GlobalDataStart[0] from HeaderV2, use
sizeof(HeaderV2) instead
Remove the zero-length array field and replace all hardcoded 24 with
sizeof(HeaderV2) in GsymReaderV2.cpp, GsymCreatorV2.cpp, and
HeaderV2.cpp. Add a comment in HeaderV2.h explaining that the
GlobalData array follows at offset sizeof(HeaderV2).
User prompts since last commit:
1. "Change GlobalData[0]'s type to GlobalData, and rename the field to
GlobalDataStart."
2. "Will it be easier to use GlobalDataStart in the code, or to use
the size of the HeaderV2 struct?"
3. "In that case, maybe we should remove the GlobalDataStart field and
just use sizeof(HeaderV2)"
4. "memorize, commit, push"
---
llvm/include/llvm/DebugInfo/GSYM/HeaderV2.h | 11 +++++------
llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp | 4 ++--
llvm/lib/DebugInfo/GSYM/GsymReaderV2.cpp | 4 ++--
llvm/lib/DebugInfo/GSYM/HeaderV2.cpp | 4 ++--
4 files changed, 11 insertions(+), 12 deletions(-)
diff --git a/llvm/include/llvm/DebugInfo/GSYM/HeaderV2.h b/llvm/include/llvm/DebugInfo/GSYM/HeaderV2.h
index b29c81da6684a..e51599472bb55 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/HeaderV2.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/HeaderV2.h
@@ -98,12 +98,11 @@ struct HeaderV2 {
uint8_t StrpSize;
/// String table encoding. Allows for future encoding for string table.
uint8_t StrTableEncoding;
- /// The starting point of the GlobalData array. This is a list of GlobalData
- /// entries, each describing a section in the GSYM file (e.g. AddrOffsets,
- /// FunctionInfo, UUID, StringTable). The array is terminated by an entry
- /// with Type set to EndOfList and FileOffset, FileSize, and Padding all
- /// set to zero.
- uint8_t GlobalData[0];
+ // The GlobalData array immediately follows the header at offset
+ // sizeof(HeaderV2). Each GlobalData entry describes a section in the GSYM
+ // file (e.g. AddrOffsets, FunctionInfo, UUID, StringTable). The array is
+ // terminated by an entry with Type set to EndOfList and all other fields
+ // set to zero. See GlobalData.h for details.
/// Check if a header is valid and return an error if anything is wrong.
///
diff --git a/llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp b/llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp
index 70d396241a668..44a9b27d6f0b6 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp
@@ -21,7 +21,7 @@ std::unique_ptr<GsymCreator> GsymCreatorV2::createNew(bool Quiet) const {
}
uint64_t GsymCreatorV2::calculateHeaderAndTableSize() const {
- constexpr uint64_t HeaderSize = 24;
+ const uint64_t HeaderSize = sizeof(HeaderV2);
const size_t NumFuncs = Funcs.size();
const uint32_t NumEntries = 5 + (UUID.empty() ? 0 : 1) + 1;
uint64_t Size = HeaderSize + NumEntries * 24;
@@ -91,7 +91,7 @@ llvm::Error GsymCreatorV2::encode(FileWriter &O) const {
const uint64_t GlobalDataArraySize =
static_cast<uint64_t>(NumGlobalDataEntries) * 24;
- constexpr uint64_t HeaderSize = 24;
+ const uint64_t HeaderSize = sizeof(HeaderV2);
uint64_t CurOffset = HeaderSize + GlobalDataArraySize;
// AddrOffsets section.
diff --git a/llvm/lib/DebugInfo/GSYM/GsymReaderV2.cpp b/llvm/lib/DebugInfo/GSYM/GsymReaderV2.cpp
index 318de3411a036..3efad0969cef6 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymReaderV2.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymReaderV2.cpp
@@ -111,7 +111,7 @@ llvm::Error GsymReaderV2::parse() {
const StringRef Buf = MemBuffer->getBuffer();
const uint64_t BufSize = Buf.size();
- if (BufSize < 24)
+ if (BufSize < sizeof(HeaderV2))
return createStringError(std::errc::invalid_argument,
"not enough data for a GSYM V2 header");
@@ -154,7 +154,7 @@ llvm::Error GsymReaderV2::parse() {
CachedAddrOffSize = Hdr->AddrOffSize;
// Parse GlobalData entries to find section locations.
- uint64_t Offset = 24;
+ uint64_t Offset = sizeof(HeaderV2);
uint64_t AddrOffsetsOff = 0, AddrOffsetsSize = 0;
uint64_t AddrInfoOffsetsOff = 0, AddrInfoOffsetsSize = 0;
uint64_t StringTableOff = 0, StringTableSize = 0;
diff --git a/llvm/lib/DebugInfo/GSYM/HeaderV2.cpp b/llvm/lib/DebugInfo/GSYM/HeaderV2.cpp
index bb2239e7b8006..ee1b27dadbe31 100644
--- a/llvm/lib/DebugInfo/GSYM/HeaderV2.cpp
+++ b/llvm/lib/DebugInfo/GSYM/HeaderV2.cpp
@@ -85,8 +85,8 @@ llvm::Expected<HeaderV2> HeaderV2::decode(DataExtractor &Data) {
// The fixed portion of the HeaderV2 is 24 bytes:
// Magic(4) + Version(2) + Padding(2) + BaseAddress(8) +
// NumAddresses(4) + AddrOffSize(1) + AddrInfoOffSize(1) +
- // StrpSize(1) + StrTableEncoding(1) = 24 bytes
- constexpr uint64_t FixedHeaderSize = 24;
+ // StrpSize(1) + StrTableEncoding(1)
+ const uint64_t FixedHeaderSize = sizeof(HeaderV2);
if (!Data.isValidOffsetForDataOfSize(Offset, FixedHeaderSize))
return createStringError(std::errc::invalid_argument,
"not enough data for a gsym::HeaderV2");
>From f783ce000c8be5708e30bdb6b5a0a24a0d723b85 Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Thu, 26 Mar 2026 16:43:10 -0700
Subject: [PATCH 28/45] Update documentation for GlobalInfoTypes
---
llvm/include/llvm/DebugInfo/GSYM/GlobalData.h | 18 ++++++++++++++++++
llvm/include/llvm/DebugInfo/GSYM/HeaderV2.h | 12 ++----------
2 files changed, 20 insertions(+), 10 deletions(-)
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GlobalData.h b/llvm/include/llvm/DebugInfo/GSYM/GlobalData.h
index dd5721f024704..4190e66c79971 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GlobalData.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GlobalData.h
@@ -16,11 +16,29 @@ namespace gsym {
enum class GlobalInfoType : uint32_t {
EndOfList = 0u,
+ // The address offsets table. It's a list of function addresses subtracted by
+ // HeaderV2::BaseAddress, hence "offset".
+ //
+ // This table and the address
+ // info offsets table (see below) have the same number of items. The items are
+ // 1-1 mapped.
+ //
+ // Given an address, this table is used to do a binary search to find the
+ // index into the address info offsets table, where the location of the
+ // FunctionInfo for the same function can be found in the GSYM.
AddrOffsets = 1u,
+ // The address info offsets table. It's a list of file offsets in the GSYM
+ // where FunctionInfo for each functions can be found.
AddrInfoOffsets = 2u,
+ // The string table. It contains all the strings used by the rest of the GSYM.
+ // The exact storage of the strings is determined by
+ // HeaderV2::StrTableEncoding.
StringTable = 3u,
+ // The file table. It's a list of files, referred by FunctionInfo objects.
FileTable = 4u,
+ // A list of FunctionInfo objects, terminated by EndOfList.
FunctionInfo = 5u,
+ // Optional UUID of the GSYM.
UUID = 6u,
};
diff --git a/llvm/include/llvm/DebugInfo/GSYM/HeaderV2.h b/llvm/include/llvm/DebugInfo/GSYM/HeaderV2.h
index e51599472bb55..653afbf30c67f 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/HeaderV2.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/HeaderV2.h
@@ -47,16 +47,8 @@ enum class StrTableEncodingType : uint8_t {
/// file offset, and size. The sections can appear in any order in the file
/// since each GlobalData entry contains an absolute file offset. The
/// GlobalData array is terminated by an entry with type EndOfList and all
-/// other fields set to zero.
-///
-/// The GlobalInfoType values are:
-/// EndOfList = 0 (terminates GlobalData array)
-/// AddrOffsets = 1 (address offset table)
-/// AddrInfoOffsets = 2 (address info offset table)
-/// StringTable = 3 (string table)
-/// FileTable = 4 (file table)
-/// FunctionInfo = 5 (FunctionInfo data blob)
-/// UUID = 6 (binary UUID)
+/// other fields set to zero. See GlobalInfoType (in GlobalData.h) for all
+/// section types.
///
/// The header structure is encoded exactly as it appears in the structure
/// definition with no gaps between members. Alignment should not change from
>From cba7e0e097fbf977b7abf88ce4f888e5a9722097 Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Thu, 26 Mar 2026 16:56:01 -0700
Subject: [PATCH 29/45] Extract shared encode() helpers into GsymCreator base
class
Add validateForEncoding(), encodeAddrOffsets(), and encodeFileTable()
to the base class, reducing duplication in V1 and V2 encode() methods.
User prompts since last commit:
1. "In v1/v2 readers' encode() methods, there are some code that they
share and are almost identical. See one example below. Can you see
what can be refactored into a common function in the base class, so
that the code can be reused?
Do this with minimal change to the code that is being refactored."
2. "memorize, commit, push"
---
.../include/llvm/DebugInfo/GSYM/GsymCreator.h | 23 ++++++++
llvm/lib/DebugInfo/GSYM/GsymCreator.cpp | 50 +++++++++++++++++
llvm/lib/DebugInfo/GSYM/GsymCreatorV1.cpp | 54 +++----------------
llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp | 44 +++------------
llvm/unittests/DebugInfo/GSYM/GSYMV2Test.cpp | 2 +-
5 files changed, 86 insertions(+), 87 deletions(-)
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h b/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h
index 9b60def594416..c4be4d2f1e9cc 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h
@@ -194,6 +194,29 @@ class GsymCreator {
/// file.
uint64_t getMaxAddressOffset() const;
+ /// Validate that the creator is ready for encoding.
+ ///
+ /// Checks that functions exist, the creator is finalized, the function count
+ /// fits in 32 bits, and the base address is valid.
+ ///
+ /// \param[out] BaseAddr Set to the base address on success.
+ /// \returns An error if validation fails, or Error::success().
+ llvm::Error validateForEncoding(std::optional<uint64_t> &BaseAddr) const;
+
+ /// Write the address offsets table to the output stream.
+ ///
+ /// \param O The file writer to write to.
+ /// \param AddrOffSize The byte width of each address offset.
+ /// \param BaseAddr The base address to subtract from each function address.
+ void encodeAddrOffsets(FileWriter &O, uint8_t AddrOffSize,
+ uint64_t BaseAddr) const;
+
+ /// Write the file table to the output stream.
+ ///
+ /// \param O The file writer to write to.
+ /// \returns An error if the file table is too large, or Error::success().
+ llvm::Error encodeFileTable(FileWriter &O) const;
+
/// Calculate the byte size of the GSYM header and tables sizes.
///
/// Version-specific because V1 and V2 have different header and table
diff --git a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp
index 961ff212cf892..8ef4ac210ed64 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp
@@ -149,6 +149,56 @@ std::optional<uint64_t> GsymCreator::getBaseAddress() const {
return getFirstFunctionAddress();
}
+llvm::Error GsymCreator::validateForEncoding(
+ std::optional<uint64_t> &BaseAddr) const {
+ if (Funcs.empty())
+ return createStringError(std::errc::invalid_argument,
+ "no functions to encode");
+ if (!Finalized)
+ return createStringError(std::errc::invalid_argument,
+ "GsymCreator wasn't finalized prior to encoding");
+ if (Funcs.size() > UINT32_MAX)
+ return createStringError(std::errc::invalid_argument,
+ "too many FunctionInfos");
+ BaseAddr = getBaseAddress();
+ if (!BaseAddr)
+ return createStringError(std::errc::invalid_argument,
+ "invalid base address");
+ return Error::success();
+}
+
+void GsymCreator::encodeAddrOffsets(FileWriter &O, uint8_t AddrOffSize,
+ uint64_t BaseAddr) const {
+ const uint64_t MaxAddressOffset = getMaxAddressOffset();
+ O.alignTo(AddrOffSize);
+ for (const auto &FI : Funcs) {
+ uint64_t AddrOffset = FI.startAddress() - BaseAddr;
+ assert(AddrOffset <= MaxAddressOffset);
+ (void)MaxAddressOffset;
+ switch (AddrOffSize) {
+ case 1: O.writeU8(static_cast<uint8_t>(AddrOffset)); break;
+ case 2: O.writeU16(static_cast<uint16_t>(AddrOffset)); break;
+ case 4: O.writeU32(static_cast<uint32_t>(AddrOffset)); break;
+ case 8: O.writeU64(AddrOffset); break;
+ }
+ }
+}
+
+llvm::Error GsymCreator::encodeFileTable(FileWriter &O) const {
+ O.alignTo(4);
+ assert(!Files.empty());
+ assert(Files[0].Dir == 0);
+ assert(Files[0].Base == 0);
+ if (Files.size() > UINT32_MAX)
+ return createStringError(std::errc::invalid_argument, "too many files");
+ O.writeU32(static_cast<uint32_t>(Files.size()));
+ for (const auto &File : Files) {
+ O.writeU32(File.Dir);
+ O.writeU32(File.Base);
+ }
+ return Error::success();
+}
+
uint64_t GsymCreator::getMaxAddressOffset() const {
switch (getAddressOffsetSize()) {
case 1: return UINT8_MAX;
diff --git a/llvm/lib/DebugInfo/GSYM/GsymCreatorV1.cpp b/llvm/lib/DebugInfo/GSYM/GsymCreatorV1.cpp
index 71e22d14401ca..8d6702c165212 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymCreatorV1.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymCreatorV1.cpp
@@ -35,21 +35,9 @@ llvm::Error GsymCreatorV1::loadCallSitesFromYAML(StringRef YAMLFile) {
llvm::Error GsymCreatorV1::encode(FileWriter &O) const {
std::lock_guard<std::mutex> Guard(Mutex);
- if (Funcs.empty())
- return createStringError(std::errc::invalid_argument,
- "no functions to encode");
- if (!Finalized)
- return createStringError(std::errc::invalid_argument,
- "GsymCreator wasn't finalized prior to encoding");
-
- if (Funcs.size() > UINT32_MAX)
- return createStringError(std::errc::invalid_argument,
- "too many FunctionInfos");
-
- std::optional<uint64_t> BaseAddress = getBaseAddress();
- if (!BaseAddress)
- return createStringError(std::errc::invalid_argument,
- "invalid base address");
+ std::optional<uint64_t> BaseAddress;
+ if (auto Err = validateForEncoding(BaseAddress))
+ return Err;
Header Hdr;
Hdr.Magic = GSYM_MAGIC;
Hdr.Version = GSYM_VERSION;
@@ -69,45 +57,15 @@ llvm::Error GsymCreatorV1::encode(FileWriter &O) const {
if (Err)
return Err;
- const uint64_t MaxAddressOffset = getMaxAddressOffset();
- O.alignTo(Hdr.AddrOffSize);
- for (const auto &FuncInfo : Funcs) {
- uint64_t AddrOffset = FuncInfo.startAddress() - Hdr.BaseAddress;
- assert(AddrOffset <= MaxAddressOffset);
- (void)MaxAddressOffset;
- switch (Hdr.AddrOffSize) {
- case 1:
- O.writeU8(static_cast<uint8_t>(AddrOffset));
- break;
- case 2:
- O.writeU16(static_cast<uint16_t>(AddrOffset));
- break;
- case 4:
- O.writeU32(static_cast<uint32_t>(AddrOffset));
- break;
- case 8:
- O.writeU64(AddrOffset);
- break;
- }
- }
+ encodeAddrOffsets(O, Hdr.AddrOffSize, Hdr.BaseAddress);
O.alignTo(4);
const off_t AddrInfoOffsetsOffset = O.tell();
for (size_t i = 0, n = Funcs.size(); i < n; ++i)
O.writeU32(0);
- O.alignTo(4);
- assert(!Files.empty());
- assert(Files[0].Dir == 0);
- assert(Files[0].Base == 0);
- size_t NumFiles = Files.size();
- if (NumFiles > UINT32_MAX)
- return createStringError(std::errc::invalid_argument, "too many files");
- O.writeU32(static_cast<uint32_t>(NumFiles));
- for (auto File : Files) {
- O.writeU32(File.Dir);
- O.writeU32(File.Base);
- }
+ if (auto Err = encodeFileTable(O))
+ return Err;
const off_t StrtabOffset = O.tell();
StrTab.write(O.get_stream());
diff --git a/llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp b/llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp
index 44a9b27d6f0b6..0ad3203655447 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp
@@ -52,20 +52,9 @@ static void writeGlobalDataEntry(FileWriter &O, GlobalInfoType Type,
llvm::Error GsymCreatorV2::encode(FileWriter &O) const {
std::lock_guard<std::mutex> Guard(Mutex);
- if (Funcs.empty())
- return createStringError(std::errc::invalid_argument,
- "no functions to encode");
- if (!Finalized)
- return createStringError(std::errc::invalid_argument,
- "GsymCreatorV2 wasn't finalized prior to encoding");
- if (Funcs.size() > UINT32_MAX)
- return createStringError(std::errc::invalid_argument,
- "too many FunctionInfos");
-
- std::optional<uint64_t> BaseAddr = getBaseAddress();
- if (!BaseAddr)
- return createStringError(std::errc::invalid_argument,
- "invalid base address");
+ std::optional<uint64_t> BaseAddr;
+ if (auto Err = validateForEncoding(BaseAddr))
+ return Err;
const uint8_t AddrOffSize = getAddressOffsetSize();
@@ -172,20 +161,8 @@ llvm::Error GsymCreatorV2::encode(FileWriter &O) const {
writeGlobalDataEntry(O, GlobalInfoType::EndOfList, 0, 0);
// Write AddrOffsets section.
- O.alignTo(AddrOffSize);
assert(O.tell() == AddrOffsetsOffset);
- const uint64_t MaxAddressOffset = getMaxAddressOffset();
- for (const auto &FI : Funcs) {
- uint64_t AddrOffset = FI.startAddress() - *BaseAddr;
- assert(AddrOffset <= MaxAddressOffset);
- (void)MaxAddressOffset;
- switch (AddrOffSize) {
- case 1: O.writeU8(static_cast<uint8_t>(AddrOffset)); break;
- case 2: O.writeU16(static_cast<uint16_t>(AddrOffset)); break;
- case 4: O.writeU32(static_cast<uint32_t>(AddrOffset)); break;
- case 8: O.writeU64(AddrOffset); break;
- }
- }
+ encodeAddrOffsets(O, AddrOffSize, *BaseAddr);
// Write AddrInfoOffsets section.
O.alignTo(AddrInfoOffSize);
@@ -203,18 +180,9 @@ llvm::Error GsymCreatorV2::encode(FileWriter &O) const {
}
// Write FileTable section.
- O.alignTo(4);
assert(O.tell() == FileTableOffset);
- assert(!Files.empty());
- assert(Files[0].Dir == 0);
- assert(Files[0].Base == 0);
- if (Files.size() > UINT32_MAX)
- return createStringError(std::errc::invalid_argument, "too many files");
- O.writeU32(static_cast<uint32_t>(Files.size()));
- for (const auto &File : Files) {
- O.writeU32(File.Dir);
- O.writeU32(File.Base);
- }
+ if (auto Err = encodeFileTable(O))
+ return Err;
// Write StringTable section.
assert(O.tell() == StringTableOffset);
diff --git a/llvm/unittests/DebugInfo/GSYM/GSYMV2Test.cpp b/llvm/unittests/DebugInfo/GSYM/GSYMV2Test.cpp
index 0710b2959e756..3d62848afb4fb 100644
--- a/llvm/unittests/DebugInfo/GSYM/GSYMV2Test.cpp
+++ b/llvm/unittests/DebugInfo/GSYM/GSYMV2Test.cpp
@@ -85,7 +85,7 @@ TEST(GSYMV2Test, TestCreatorV2EncodeErrorNotFinalized) {
const uint32_t Name = GC.insertString("foo");
GC.addFunctionInfo(FunctionInfo(0x1000, 0x100, Name));
auto Result = encodeV2(GC, llvm::endianness::little);
- checkError("GsymCreatorV2 wasn't finalized prior to encoding",
+ checkError("GsymCreator wasn't finalized prior to encoding",
Result.takeError());
}
>From 2a6a76e97fd8afd4ad3d483f626e43f9f1405042 Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Thu, 26 Mar 2026 17:41:25 -0700
Subject: [PATCH 30/45] Add encode() and decode() to GlobalData, use in
GsymCreatorV2
Add GlobalData::encode() and GlobalData::decode() methods with a new
GlobalData.cpp. Replace the static writeGlobalDataEntry() helper in
GsymCreatorV2::encode() with inline GlobalData{...}.encode(O) calls.
User prompts since last commit:
1. "First let's add encode() and decode() methods into GlobalData,
then in GsymCreatorV2::encode(), use GlobalData::encode() to write
to O instead of calling writeGlobalDataEntry(), which should be
removed."
2. "Can you add a getGlobalData<T>(GlobalInfoType) method to return
the data for a given section? ..."
[Reverted by user request]
3. "Do you think it will be better if the code first create a
std::vector of GlobalData, then use a for-loop to call their
encode()? I'm actually asking you to comparing, not asking you to
agree."
4. "If we move the vector construction to before, i.e. build it as we
compute the layout, will that be better?"
5. "So overall the existing approach (calling encode on local
GlobalData{}) without a for-loop is equally good?"
6. "memorize, commit, push"
---
llvm/include/llvm/DebugInfo/GSYM/GlobalData.h | 19 +++++++++
llvm/lib/DebugInfo/GSYM/CMakeLists.txt | 1 +
llvm/lib/DebugInfo/GSYM/GlobalData.cpp | 35 ++++++++++++++++
llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp | 42 +++++++++----------
4 files changed, 75 insertions(+), 22 deletions(-)
create mode 100644 llvm/lib/DebugInfo/GSYM/GlobalData.cpp
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GlobalData.h b/llvm/include/llvm/DebugInfo/GSYM/GlobalData.h
index 4190e66c79971..612cb4c676cc9 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GlobalData.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GlobalData.h
@@ -9,10 +9,15 @@
#ifndef LLVM_DEBUGINFO_GSYM_GLOBALDATA_H
#define LLVM_DEBUGINFO_GSYM_GLOBALDATA_H
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Error.h"
#include <cstdint>
namespace llvm {
+class DataExtractor;
+
namespace gsym {
+class FileWriter;
enum class GlobalInfoType : uint32_t {
EndOfList = 0u,
@@ -50,6 +55,20 @@ struct GlobalData {
uint32_t Padding;
uint64_t FileOffset;
uint64_t FileSize;
+
+ /// Encode this GlobalData entry into a FileWriter stream.
+ ///
+ /// \param O The binary stream to write the data to.
+ /// \returns An error object that indicates success or failure.
+ LLVM_ABI llvm::Error encode(FileWriter &O) const;
+
+ /// Decode a GlobalData entry from a binary data stream.
+ ///
+ /// \param Data The binary stream to read from.
+ /// \param Offset The offset to start reading from. Updated on success.
+ /// \returns A GlobalData entry or an error.
+ LLVM_ABI static llvm::Expected<GlobalData> decode(DataExtractor &Data,
+ uint64_t &Offset);
};
} // namespace gsym
diff --git a/llvm/lib/DebugInfo/GSYM/CMakeLists.txt b/llvm/lib/DebugInfo/GSYM/CMakeLists.txt
index fcf0f8641e0aa..605329f7fcd4a 100644
--- a/llvm/lib/DebugInfo/GSYM/CMakeLists.txt
+++ b/llvm/lib/DebugInfo/GSYM/CMakeLists.txt
@@ -4,6 +4,7 @@ add_llvm_component_library(LLVMDebugInfoGSYM
HeaderV2.cpp
FileWriter.cpp
FunctionInfo.cpp
+ GlobalData.cpp
GsymCreator.cpp
GsymCreatorV1.cpp
GsymCreatorV2.cpp
diff --git a/llvm/lib/DebugInfo/GSYM/GlobalData.cpp b/llvm/lib/DebugInfo/GSYM/GlobalData.cpp
new file mode 100644
index 0000000000000..c5e662bf64615
--- /dev/null
+++ b/llvm/lib/DebugInfo/GSYM/GlobalData.cpp
@@ -0,0 +1,35 @@
+//===- GlobalData.cpp -----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/GSYM/GlobalData.h"
+#include "llvm/DebugInfo/GSYM/FileWriter.h"
+#include "llvm/Support/DataExtractor.h"
+
+using namespace llvm;
+using namespace gsym;
+
+llvm::Error GlobalData::encode(FileWriter &O) const {
+ O.writeU32(static_cast<uint32_t>(Type));
+ O.writeU32(Padding);
+ O.writeU64(FileOffset);
+ O.writeU64(FileSize);
+ return Error::success();
+}
+
+llvm::Expected<GlobalData> GlobalData::decode(DataExtractor &Data,
+ uint64_t &Offset) {
+ if (!Data.isValidOffsetForDataOfSize(Offset, 24))
+ return createStringError(std::errc::invalid_argument,
+ "not enough data for a GlobalData entry");
+ GlobalData GD;
+ GD.Type = static_cast<GlobalInfoType>(Data.getU32(&Offset));
+ GD.Padding = Data.getU32(&Offset);
+ GD.FileOffset = Data.getU64(&Offset);
+ GD.FileSize = Data.getU64(&Offset);
+ return GD;
+}
diff --git a/llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp b/llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp
index 0ad3203655447..e78a09cee0d0d 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp
@@ -41,15 +41,6 @@ llvm::Error GsymCreatorV2::loadCallSitesFromYAML(StringRef YAMLFile) {
"call site loading not yet supported in V2");
}
-/// Write a single GlobalData entry to the output stream.
-static void writeGlobalDataEntry(FileWriter &O, GlobalInfoType Type,
- uint64_t FileOffset, uint64_t FileSize) {
- O.writeU32(static_cast<uint32_t>(Type));
- O.writeU32(0); // Padding
- O.writeU64(FileOffset);
- O.writeU64(FileSize);
-}
-
llvm::Error GsymCreatorV2::encode(FileWriter &O) const {
std::lock_guard<std::mutex> Guard(Mutex);
std::optional<uint64_t> BaseAddr;
@@ -145,20 +136,27 @@ llvm::Error GsymCreatorV2::encode(FileWriter &O) const {
return Err;
// Write GlobalData entries.
- writeGlobalDataEntry(O, GlobalInfoType::AddrOffsets,
- AddrOffsetsOffset, AddrOffsetsSize);
- writeGlobalDataEntry(O, GlobalInfoType::AddrInfoOffsets,
- AddrInfoOffsetsOffset, AddrInfoOffsetsSize);
- writeGlobalDataEntry(O, GlobalInfoType::FileTable,
- FileTableOffset, FileTableSize);
- writeGlobalDataEntry(O, GlobalInfoType::StringTable,
- StringTableOffset, StringTableSize);
- writeGlobalDataEntry(O, GlobalInfoType::FunctionInfo,
- FISectionOffset, FISectionSize);
+ if (auto Err = GlobalData{GlobalInfoType::AddrOffsets, 0,
+ AddrOffsetsOffset, AddrOffsetsSize}.encode(O))
+ return Err;
+ if (auto Err = GlobalData{GlobalInfoType::AddrInfoOffsets, 0,
+ AddrInfoOffsetsOffset, AddrInfoOffsetsSize}.encode(O))
+ return Err;
+ if (auto Err = GlobalData{GlobalInfoType::FileTable, 0,
+ FileTableOffset, FileTableSize}.encode(O))
+ return Err;
+ if (auto Err = GlobalData{GlobalInfoType::StringTable, 0,
+ StringTableOffset, StringTableSize}.encode(O))
+ return Err;
+ if (auto Err = GlobalData{GlobalInfoType::FunctionInfo, 0,
+ FISectionOffset, FISectionSize}.encode(O))
+ return Err;
if (HasUUID)
- writeGlobalDataEntry(O, GlobalInfoType::UUID,
- UUIDOffset, UUIDSectionSize);
- writeGlobalDataEntry(O, GlobalInfoType::EndOfList, 0, 0);
+ if (auto Err = GlobalData{GlobalInfoType::UUID, 0,
+ UUIDOffset, UUIDSectionSize}.encode(O))
+ return Err;
+ if (auto Err = GlobalData{GlobalInfoType::EndOfList, 0, 0, 0}.encode(O))
+ return Err;
// Write AddrOffsets section.
assert(O.tell() == AddrOffsetsOffset);
>From db20d318c87c53eec6437d88529b3b2ff33a1fb4 Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Thu, 26 Mar 2026 17:50:06 -0700
Subject: [PATCH 31/45] Use GlobalData::encode()/decode(), simplify call sites
- Change GlobalData::encode() to return void (can't fail)
- Remove error checking from 7 encode() call sites in GsymCreatorV2
- Use GlobalData::decode() in parseGlobalDataEntries() in GsymReaderV2
- Add padding validation in GlobalData::decode()
User prompts since last commit:
1. "is there any where the GlobalData::decode() can be used to
simplify code?"
2. "yes please. also, for the 7 call sites of the GlobalData::encode()
that we just talked about, can you remove the error checking,
because I think it's safe to not do that, and that will make the
code much more readable."
3. "I still want the encode method to return an error, but we don't
have to check it in those 7 call sites."
4. "Actually, you are right, change the return type to void"
5. "In GlobalData::decode() please check that the padding is all zero."
6. "memorize, commit, push"
---
llvm/include/llvm/DebugInfo/GSYM/GlobalData.h | 3 +-
llvm/lib/DebugInfo/GSYM/GlobalData.cpp | 7 ++-
llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp | 32 ++++++--------
llvm/lib/DebugInfo/GSYM/GsymReaderV2.cpp | 43 +++++++++----------
4 files changed, 39 insertions(+), 46 deletions(-)
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GlobalData.h b/llvm/include/llvm/DebugInfo/GSYM/GlobalData.h
index 612cb4c676cc9..ddc5bc18728bb 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GlobalData.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GlobalData.h
@@ -59,8 +59,7 @@ struct GlobalData {
/// Encode this GlobalData entry into a FileWriter stream.
///
/// \param O The binary stream to write the data to.
- /// \returns An error object that indicates success or failure.
- LLVM_ABI llvm::Error encode(FileWriter &O) const;
+ LLVM_ABI void encode(FileWriter &O) const;
/// Decode a GlobalData entry from a binary data stream.
///
diff --git a/llvm/lib/DebugInfo/GSYM/GlobalData.cpp b/llvm/lib/DebugInfo/GSYM/GlobalData.cpp
index c5e662bf64615..7429e85c51e6c 100644
--- a/llvm/lib/DebugInfo/GSYM/GlobalData.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GlobalData.cpp
@@ -13,12 +13,11 @@
using namespace llvm;
using namespace gsym;
-llvm::Error GlobalData::encode(FileWriter &O) const {
+void GlobalData::encode(FileWriter &O) const {
O.writeU32(static_cast<uint32_t>(Type));
O.writeU32(Padding);
O.writeU64(FileOffset);
O.writeU64(FileSize);
- return Error::success();
}
llvm::Expected<GlobalData> GlobalData::decode(DataExtractor &Data,
@@ -31,5 +30,9 @@ llvm::Expected<GlobalData> GlobalData::decode(DataExtractor &Data,
GD.Padding = Data.getU32(&Offset);
GD.FileOffset = Data.getU64(&Offset);
GD.FileSize = Data.getU64(&Offset);
+ if (GD.Padding != 0)
+ return createStringError(std::errc::invalid_argument,
+ "GlobalData entry padding must be zero, got %u",
+ GD.Padding);
return GD;
}
diff --git a/llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp b/llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp
index e78a09cee0d0d..cf768e97482c8 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp
@@ -136,27 +136,19 @@ llvm::Error GsymCreatorV2::encode(FileWriter &O) const {
return Err;
// Write GlobalData entries.
- if (auto Err = GlobalData{GlobalInfoType::AddrOffsets, 0,
- AddrOffsetsOffset, AddrOffsetsSize}.encode(O))
- return Err;
- if (auto Err = GlobalData{GlobalInfoType::AddrInfoOffsets, 0,
- AddrInfoOffsetsOffset, AddrInfoOffsetsSize}.encode(O))
- return Err;
- if (auto Err = GlobalData{GlobalInfoType::FileTable, 0,
- FileTableOffset, FileTableSize}.encode(O))
- return Err;
- if (auto Err = GlobalData{GlobalInfoType::StringTable, 0,
- StringTableOffset, StringTableSize}.encode(O))
- return Err;
- if (auto Err = GlobalData{GlobalInfoType::FunctionInfo, 0,
- FISectionOffset, FISectionSize}.encode(O))
- return Err;
+ GlobalData{GlobalInfoType::AddrOffsets, 0,
+ AddrOffsetsOffset, AddrOffsetsSize}.encode(O);
+ GlobalData{GlobalInfoType::AddrInfoOffsets, 0,
+ AddrInfoOffsetsOffset, AddrInfoOffsetsSize}.encode(O);
+ GlobalData{GlobalInfoType::FileTable, 0,
+ FileTableOffset, FileTableSize}.encode(O);
+ GlobalData{GlobalInfoType::StringTable, 0,
+ StringTableOffset, StringTableSize}.encode(O);
+ GlobalData{GlobalInfoType::FunctionInfo, 0,
+ FISectionOffset, FISectionSize}.encode(O);
if (HasUUID)
- if (auto Err = GlobalData{GlobalInfoType::UUID, 0,
- UUIDOffset, UUIDSectionSize}.encode(O))
- return Err;
- if (auto Err = GlobalData{GlobalInfoType::EndOfList, 0, 0, 0}.encode(O))
- return Err;
+ GlobalData{GlobalInfoType::UUID, 0, UUIDOffset, UUIDSectionSize}.encode(O);
+ GlobalData{GlobalInfoType::EndOfList, 0, 0, 0}.encode(O);
// Write AddrOffsets section.
assert(O.tell() == AddrOffsetsOffset);
diff --git a/llvm/lib/DebugInfo/GSYM/GsymReaderV2.cpp b/llvm/lib/DebugInfo/GSYM/GsymReaderV2.cpp
index 3efad0969cef6..c159d6a465f5f 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymReaderV2.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymReaderV2.cpp
@@ -58,44 +58,43 @@ parseGlobalDataEntries(DataExtractor &DE, uint64_t &Offset,
uint64_t &StringTableOff, uint64_t &StringTableSize,
uint64_t &FileTableOff, uint64_t &FileTableSize,
uint64_t &FuncInfoOff, uint64_t &FuncInfoSize) {
- while (Offset + 24 <= BufSize) {
- auto Type = static_cast<GlobalInfoType>(DE.getU32(&Offset));
- uint32_t Pad = DE.getU32(&Offset);
- uint64_t FileOffset = DE.getU64(&Offset);
- uint64_t FileSize = DE.getU64(&Offset);
- (void)Pad;
-
- if (Type == GlobalInfoType::EndOfList)
+ while (Offset + sizeof(GlobalData) <= BufSize) {
+ auto GDOrErr = GlobalData::decode(DE, Offset);
+ if (!GDOrErr)
+ return GDOrErr.takeError();
+ const GlobalData &GD = *GDOrErr;
+
+ if (GD.Type == GlobalInfoType::EndOfList)
return Error::success();
- if (FileOffset + FileSize > BufSize)
+ if (GD.FileOffset + GD.FileSize > BufSize)
return createStringError(std::errc::invalid_argument,
"GlobalData section type %u extends beyond "
"buffer (offset=%" PRIu64 ", size=%" PRIu64
", bufsize=%" PRIu64 ")",
- static_cast<uint32_t>(Type), FileOffset,
- FileSize, BufSize);
+ static_cast<uint32_t>(GD.Type), GD.FileOffset,
+ GD.FileSize, BufSize);
- switch (Type) {
+ switch (GD.Type) {
case GlobalInfoType::AddrOffsets:
- AddrOffsetsOff = FileOffset;
- AddrOffsetsSize = FileSize;
+ AddrOffsetsOff = GD.FileOffset;
+ AddrOffsetsSize = GD.FileSize;
break;
case GlobalInfoType::AddrInfoOffsets:
- AddrInfoOffsetsOff = FileOffset;
- AddrInfoOffsetsSize = FileSize;
+ AddrInfoOffsetsOff = GD.FileOffset;
+ AddrInfoOffsetsSize = GD.FileSize;
break;
case GlobalInfoType::StringTable:
- StringTableOff = FileOffset;
- StringTableSize = FileSize;
+ StringTableOff = GD.FileOffset;
+ StringTableSize = GD.FileSize;
break;
case GlobalInfoType::FileTable:
- FileTableOff = FileOffset;
- FileTableSize = FileSize;
+ FileTableOff = GD.FileOffset;
+ FileTableSize = GD.FileSize;
break;
case GlobalInfoType::FunctionInfo:
- FuncInfoOff = FileOffset;
- FuncInfoSize = FileSize;
+ FuncInfoOff = GD.FileOffset;
+ FuncInfoSize = GD.FileSize;
break;
case GlobalInfoType::UUID:
break;
>From 5549662c00aeefd921b46c32b622caf94e12ca41 Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Thu, 26 Mar 2026 17:52:44 -0700
Subject: [PATCH 32/45] Move UUID to be the first GlobalData section in V2
layout
UUID is now positioned right after the GlobalData array (before
AddrOffsets) and listed first in the GlobalData entry list.
User prompts since last commit:
1. "Move UUID to be the first GlobalData section (both the UUID's
position in the file, and in the GlobalData list)"
2. "memorize, commit, push"
---
llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp | 26 ++++++++++++-----------
1 file changed, 14 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp b/llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp
index cf768e97482c8..321a4ea8c9784 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp
@@ -74,6 +74,12 @@ llvm::Error GsymCreatorV2::encode(FileWriter &O) const {
const uint64_t HeaderSize = sizeof(HeaderV2);
uint64_t CurOffset = HeaderSize + GlobalDataArraySize;
+ // UUID section (first, no alignment requirement).
+ const uint64_t UUIDOffset = CurOffset;
+ const uint64_t UUIDSectionSize = UUID.size();
+ if (HasUUID)
+ CurOffset += UUIDSectionSize;
+
// AddrOffsets section.
CurOffset = llvm::alignTo(CurOffset, AddrOffSize);
const uint64_t AddrOffsetsOffset = CurOffset;
@@ -116,10 +122,6 @@ llvm::Error GsymCreatorV2::encode(FileWriter &O) const {
const uint64_t FISectionOffset = CurOffset;
CurOffset += FISectionSize;
- // UUID section.
- const uint64_t UUIDOffset = CurOffset;
- const uint64_t UUIDSectionSize = UUID.size();
-
// Build and write the header.
HeaderV2 Hdr;
Hdr.Magic = GSYM_MAGIC;
@@ -136,6 +138,8 @@ llvm::Error GsymCreatorV2::encode(FileWriter &O) const {
return Err;
// Write GlobalData entries.
+ if (HasUUID)
+ GlobalData{GlobalInfoType::UUID, 0, UUIDOffset, UUIDSectionSize}.encode(O);
GlobalData{GlobalInfoType::AddrOffsets, 0,
AddrOffsetsOffset, AddrOffsetsSize}.encode(O);
GlobalData{GlobalInfoType::AddrInfoOffsets, 0,
@@ -146,10 +150,14 @@ llvm::Error GsymCreatorV2::encode(FileWriter &O) const {
StringTableOffset, StringTableSize}.encode(O);
GlobalData{GlobalInfoType::FunctionInfo, 0,
FISectionOffset, FISectionSize}.encode(O);
- if (HasUUID)
- GlobalData{GlobalInfoType::UUID, 0, UUIDOffset, UUIDSectionSize}.encode(O);
GlobalData{GlobalInfoType::EndOfList, 0, 0, 0}.encode(O);
+ // Write UUID section.
+ if (HasUUID) {
+ assert(O.tell() == UUIDOffset);
+ O.writeData(ArrayRef<uint8_t>(UUID.data(), UUID.size()));
+ }
+
// Write AddrOffsets section.
assert(O.tell() == AddrOffsetsOffset);
encodeAddrOffsets(O, AddrOffSize, *BaseAddr);
@@ -184,11 +192,5 @@ llvm::Error GsymCreatorV2::encode(FileWriter &O) const {
O.writeData(ArrayRef<uint8_t>(reinterpret_cast<const uint8_t *>(FIBuf.data()),
FIBuf.size()));
- // Write UUID section.
- if (HasUUID) {
- assert(O.tell() == UUIDOffset);
- O.writeData(ArrayRef<uint8_t>(UUID.data(), UUID.size()));
- }
-
return Error::success();
}
>From 88a50ee6fd5b2b33b8c18cdba6a38b27c3dfcceb Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Thu, 26 Mar 2026 17:59:10 -0700
Subject: [PATCH 33/45] Remove --reader-version, rename --creator-version to
--output-version
Remove the --reader-version option (reader always auto-detects).
Rename --creator-version to --output-version. Remove the now-trivial
openGsymFile() wrapper and call GsymReader::openFile() directly.
User prompts since last commit:
1. "Remove the --reader-version option. Rename --creator-version to
--output-version"
2. "I don't think we need openGsymFile() any more"
3. "memorize, commit, push"
---
llvm/tools/llvm-gsymutil/Opts.td | 10 ++--
llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp | 61 +++++-----------------
2 files changed, 15 insertions(+), 56 deletions(-)
diff --git a/llvm/tools/llvm-gsymutil/Opts.td b/llvm/tools/llvm-gsymutil/Opts.td
index 948cd8b366267..214beb9f39547 100644
--- a/llvm/tools/llvm-gsymutil/Opts.td
+++ b/llvm/tools/llvm-gsymutil/Opts.td
@@ -51,11 +51,7 @@ defm merged_functions_filter :
"When used with --address/--addresses-from-stdin and --merged-functions,\n"
"filters the merged functions output to only show functions matching any of the specified regex patterns.\n"
"Can be specified multiple times.">;
-defm reader_version :
- Eq<"reader-version",
- "Force the GSYM reader version (auto, v1, v2). Default: auto-detect from file.">,
- Flags<[HelpHidden]>;
-defm creator_version :
- Eq<"creator-version",
- "Force the GSYM creator version (v1, v2). Default: v1.">,
+defm output_version :
+ Eq<"output-version",
+ "Set the GSYM output version (v1, v2). Default: v1.">,
Flags<[HelpHidden]>;
diff --git a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
index a89f19dd5eb61..e4de60455a510 100644
--- a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
+++ b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
@@ -45,8 +45,6 @@
#include "llvm/DebugInfo/GSYM/GsymCreatorV1.h"
#include "llvm/DebugInfo/GSYM/GsymCreatorV2.h"
#include "llvm/DebugInfo/GSYM/GsymReader.h"
-#include "llvm/DebugInfo/GSYM/GsymReaderV1.h"
-#include "llvm/DebugInfo/GSYM/GsymReaderV2.h"
#include "llvm/DebugInfo/GSYM/Header.h"
#include "llvm/DebugInfo/GSYM/InlineInfo.h"
#include "llvm/DebugInfo/GSYM/LineTable.h"
@@ -110,11 +108,8 @@ static bool LoadDwarfCallSites = false;
static std::string CallSiteYamlPath;
static std::vector<std::string> MergedFunctionsFilters;
-enum class ReaderVersion { Auto, V1, V2 };
-static ReaderVersion ForceReaderVersion = ReaderVersion::Auto;
-
-enum class CreatorVersion { V1, V2 };
-static CreatorVersion ForceCreatorVersion = CreatorVersion::V1;
+enum class OutputVersion { V1, V2 };
+static OutputVersion ForceOutputVersion = OutputVersion::V1;
static void parseArgs(int argc, char **argv) {
GSYMUtilOptTable Tbl;
@@ -227,31 +222,15 @@ static void parseArgs(int argc, char **argv) {
}
}
- if (const llvm::opt::Arg *A = Args.getLastArg(OPT_reader_version_EQ)) {
- StringRef Val = A->getValue();
- if (Val == "auto")
- ForceReaderVersion = ReaderVersion::Auto;
- else if (Val == "v1")
- ForceReaderVersion = ReaderVersion::V1;
- else if (Val == "v2")
- ForceReaderVersion = ReaderVersion::V2;
- else {
- llvm::errs() << ToolName
- << ": for the --reader-version option: '" << Val
- << "' is invalid. Use 'auto', 'v1', or 'v2'.\n";
- std::exit(1);
- }
- }
-
- if (const llvm::opt::Arg *A = Args.getLastArg(OPT_creator_version_EQ)) {
+ if (const llvm::opt::Arg *A = Args.getLastArg(OPT_output_version_EQ)) {
StringRef Val = A->getValue();
if (Val == "v1")
- ForceCreatorVersion = CreatorVersion::V1;
+ ForceOutputVersion = OutputVersion::V1;
else if (Val == "v2")
- ForceCreatorVersion = CreatorVersion::V2;
+ ForceOutputVersion = OutputVersion::V2;
else {
llvm::errs() << ToolName
- << ": for the --creator-version option: '" << Val
+ << ": for the --output-version option: '" << Val
<< "' is invalid. Use 'v1' or 'v2'.\n";
std::exit(1);
}
@@ -393,7 +372,7 @@ static llvm::Error handleObjectFile(ObjectFile &Obj, const std::string &OutFile,
NumThreads > 0 ? NumThreads : std::thread::hardware_concurrency();
std::unique_ptr<GsymCreator> GsymPtr;
- if (ForceCreatorVersion == CreatorVersion::V2)
+ if (ForceOutputVersion == OutputVersion::V2)
GsymPtr = std::make_unique<GsymCreatorV2>(Quiet);
else
GsymPtr = std::make_unique<GsymCreatorV1>(Quiet);
@@ -539,22 +518,6 @@ static llvm::Error handleBuffer(StringRef Filename, MemoryBufferRef Buffer,
return Error::success();
}
-/// Open a GSYM file, auto-detecting the version unless forced.
-static Expected<std::unique_ptr<GsymReader>> openGsymFile(StringRef Path) {
- if (ForceReaderVersion == ReaderVersion::Auto)
- return GsymReader::openFile(Path);
- if (ForceReaderVersion == ReaderVersion::V2) {
- auto R = GsymReaderV2::openFile(Path);
- if (!R)
- return R.takeError();
- return std::make_unique<GsymReaderV2>(std::move(*R));
- }
- auto R = GsymReaderV1::openFile(Path);
- if (!R)
- return R.takeError();
- return std::make_unique<GsymReaderV1>(std::move(*R));
-}
-
/// Check if a file starts with the GSYM magic bytes.
static bool isGSYMFile(StringRef Filename) {
auto BuffOrErr = MemoryBuffer::getFileOrSTDIN(Filename, /*IsText=*/false,
@@ -629,13 +592,13 @@ static void fixupFunctionInfo(const GsymReader &Reader,
static llvm::Error handleGSYMConversion(StringRef Filename,
const std::string &OutFile,
OutputAggregator &Out) {
- auto ReaderOrErr = openGsymFile(Filename);
+ auto ReaderOrErr = GsymReader::openFile(Filename);
if (!ReaderOrErr)
return ReaderOrErr.takeError();
auto &Reader = **ReaderOrErr;
std::unique_ptr<GsymCreator> CreatorPtr;
- if (ForceCreatorVersion == CreatorVersion::V2)
+ if (ForceOutputVersion == OutputVersion::V2)
CreatorPtr = std::make_unique<GsymCreatorV2>(Quiet);
else
CreatorPtr = std::make_unique<GsymCreatorV1>(Quiet);
@@ -653,7 +616,7 @@ static llvm::Error handleGSYMConversion(StringRef Filename,
if (auto Err = Creator.finalize(Out))
return Err;
- Out << "Output file (" << (ForceCreatorVersion == CreatorVersion::V2 ? "v2" : "v1")
+ Out << "Output file (" << (ForceOutputVersion == OutputVersion::V2 ? "v2" : "v1")
<< "): " << OutFile << "\n";
if (auto Err = Creator.save(OutFile, llvm::endianness::native))
@@ -851,7 +814,7 @@ int llvm_gsymutil_main(int argc, char **argv, const llvm::ToolContext &) {
llvm::StringRef{StrippedInputLine}.split(' ');
if (GSYMPath != CurrentGSYMPath) {
- auto GsymOrErr = openGsymFile(GSYMPath);
+ auto GsymOrErr = GsymReader::openFile(GSYMPath);
if (!GsymOrErr)
error(GSYMPath, GsymOrErr.takeError());
CurrentGsym = std::move(*GsymOrErr);
@@ -876,7 +839,7 @@ int llvm_gsymutil_main(int argc, char **argv, const llvm::ToolContext &) {
// Dump or access data inside GSYM files
for (const auto &GSYMPath : InputFilenames) {
- auto Gsym = openGsymFile(GSYMPath);
+ auto Gsym = GsymReader::openFile(GSYMPath);
if (!Gsym)
error(GSYMPath, Gsym.takeError());
>From cafa90a32cfbfad4f19f5ff990ec3f430a343c8d Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Thu, 26 Mar 2026 18:05:11 -0700
Subject: [PATCH 34/45] Rename GSYM_VERSION to GSYM_VERSION_1, explicitly check
V1 in reader
Rename GSYM_VERSION to GSYM_VERSION_1 to match the GSYM_VERSION_2
pattern. Also update GsymReader::openFile() and copyBuffer() to
explicitly check for V1 instead of falling through, returning an
error for unsupported versions.
User prompts since last commit:
1. "In GsymReaderBase::openFile(), for the V1 case, if should not just
use the 'else', but actually check the V1, and for any other
version, return error"
2. "Which is more consistent with the code base? 'if {return} else if
{return} else return', or the current 'if {return} if {return}
return'?"
3. "Change GSYM_VERSION to GSYM_VERSION_1 so that we have
GSYM_VERSION_2, GSYM_VERSION_1, and that's a pattern for future
versions."
4. "memorize, commit, push"
---
llvm/include/llvm/DebugInfo/GSYM/Header.h | 2 +-
llvm/lib/DebugInfo/GSYM/GsymCreatorV1.cpp | 2 +-
llvm/lib/DebugInfo/GSYM/GsymReader.cpp | 24 ++++++++++++++--------
llvm/lib/DebugInfo/GSYM/Header.cpp | 2 +-
llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp | 24 +++++++++++-----------
5 files changed, 31 insertions(+), 23 deletions(-)
diff --git a/llvm/include/llvm/DebugInfo/GSYM/Header.h b/llvm/include/llvm/DebugInfo/GSYM/Header.h
index 317b9bbdca80e..c326e7a309e35 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/Header.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/Header.h
@@ -24,7 +24,7 @@ class FileWriter;
constexpr uint32_t GSYM_MAGIC = 0x4753594d; // 'GSYM'
constexpr uint32_t GSYM_CIGAM = 0x4d595347; // 'MYSG'
-constexpr uint32_t GSYM_VERSION = 1;
+constexpr uint32_t GSYM_VERSION_1 = 1;
constexpr size_t GSYM_MAX_UUID_SIZE = 20;
/// The GSYM header.
diff --git a/llvm/lib/DebugInfo/GSYM/GsymCreatorV1.cpp b/llvm/lib/DebugInfo/GSYM/GsymCreatorV1.cpp
index 8d6702c165212..56ceeb3c75405 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymCreatorV1.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymCreatorV1.cpp
@@ -40,7 +40,7 @@ llvm::Error GsymCreatorV1::encode(FileWriter &O) const {
return Err;
Header Hdr;
Hdr.Magic = GSYM_MAGIC;
- Hdr.Version = GSYM_VERSION;
+ Hdr.Version = GSYM_VERSION_1;
Hdr.AddrOffSize = getAddressOffsetSize();
Hdr.UUIDSize = static_cast<uint8_t>(UUID.size());
Hdr.BaseAddress = *BaseAddress;
diff --git a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
index 104059debd2bf..ee3b227ee2748 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
@@ -61,10 +61,14 @@ GsymReader::openFile(StringRef Path) {
return R.takeError();
return std::make_unique<GsymReaderV2>(std::move(*R));
}
- auto R = GsymReaderV1::openFile(Path);
- if (!R)
- return R.takeError();
- return std::make_unique<GsymReaderV1>(std::move(*R));
+ if (*VersionOrErr == GSYM_VERSION_1) {
+ auto R = GsymReaderV1::openFile(Path);
+ if (!R)
+ return R.takeError();
+ return std::make_unique<GsymReaderV1>(std::move(*R));
+ }
+ return createStringError(std::errc::invalid_argument,
+ "unsupported GSYM version %u", *VersionOrErr);
}
llvm::Expected<std::unique_ptr<GsymReader>>
@@ -78,10 +82,14 @@ GsymReader::copyBuffer(StringRef Bytes) {
return R.takeError();
return std::make_unique<GsymReaderV2>(std::move(*R));
}
- auto R = GsymReaderV1::copyBuffer(Bytes);
- if (!R)
- return R.takeError();
- return std::make_unique<GsymReaderV1>(std::move(*R));
+ if (*VersionOrErr == GSYM_VERSION_1) {
+ auto R = GsymReaderV1::copyBuffer(Bytes);
+ if (!R)
+ return R.takeError();
+ return std::make_unique<GsymReaderV1>(std::move(*R));
+ }
+ return createStringError(std::errc::invalid_argument,
+ "unsupported GSYM version %u", *VersionOrErr);
}
std::optional<uint64_t> GsymReader::getAddress(size_t Index) const {
diff --git a/llvm/lib/DebugInfo/GSYM/Header.cpp b/llvm/lib/DebugInfo/GSYM/Header.cpp
index 0b3fb9c498949..14128d5ada267 100644
--- a/llvm/lib/DebugInfo/GSYM/Header.cpp
+++ b/llvm/lib/DebugInfo/GSYM/Header.cpp
@@ -42,7 +42,7 @@ llvm::Error Header::checkForError() const {
if (Magic != GSYM_MAGIC)
return createStringError(std::errc::invalid_argument,
"invalid GSYM magic 0x%8.8x", Magic);
- if (Version != GSYM_VERSION)
+ if (Version != GSYM_VERSION_1)
return createStringError(std::errc::invalid_argument,
"unsupported GSYM version %u", Version);
switch (AddrOffSize) {
diff --git a/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp b/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp
index 2d95ad51802f5..57d30680760dd 100644
--- a/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp
+++ b/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp
@@ -865,7 +865,7 @@ static void TestHeaderDecodeError(StringRef Bytes,
// Populate a GSYM header with valid values.
static void InitHeader(Header &H) {
H.Magic = GSYM_MAGIC;
- H.Version = GSYM_VERSION;
+ H.Version = GSYM_VERSION_1;
H.AddrOffSize = 4;
H.UUIDSize = 16;
H.BaseAddress = 0x1000;
@@ -910,7 +910,7 @@ TEST(GSYMTest, TestHeaderDecodeErrors) {
FW.fixup32(GSYM_MAGIC, offsetof(Header, Magic));
FW.fixup32(12, offsetof(Header, Version));
TestHeaderDecodeError(OutStrm.str(), "unsupported GSYM version 12");
- FW.fixup32(GSYM_VERSION, offsetof(Header, Version));
+ FW.fixup32(GSYM_VERSION_1, offsetof(Header, Version));
FW.fixup32(12, offsetof(Header, AddrOffSize));
TestHeaderDecodeError(OutStrm.str(), "invalid address offset size 12");
FW.fixup32(4, offsetof(Header, AddrOffSize));
@@ -1048,11 +1048,11 @@ TEST(GSYMTest, TestGsymCreatorV11ByteAddrOffsets) {
OutputAggregator Null(nullptr);
Error Err = GC.finalize(Null);
ASSERT_FALSE(Err);
- TestEncodeDecode(GC, llvm::endianness::little, GSYM_VERSION, AddrOffSize,
+ TestEncodeDecode(GC, llvm::endianness::little, GSYM_VERSION_1, AddrOffSize,
BaseAddr,
2, // NumAddresses
ArrayRef<uint8_t>(UUID));
- TestEncodeDecode(GC, llvm::endianness::big, GSYM_VERSION, AddrOffSize,
+ TestEncodeDecode(GC, llvm::endianness::big, GSYM_VERSION_1, AddrOffSize,
BaseAddr,
2, // NumAddresses
ArrayRef<uint8_t>(UUID));
@@ -1071,11 +1071,11 @@ TEST(GSYMTest, TestGsymCreatorV12ByteAddrOffsets) {
OutputAggregator Null(nullptr);
Error Err = GC.finalize(Null);
ASSERT_FALSE(Err);
- TestEncodeDecode(GC, llvm::endianness::little, GSYM_VERSION, AddrOffSize,
+ TestEncodeDecode(GC, llvm::endianness::little, GSYM_VERSION_1, AddrOffSize,
BaseAddr,
2, // NumAddresses
ArrayRef<uint8_t>(UUID));
- TestEncodeDecode(GC, llvm::endianness::big, GSYM_VERSION, AddrOffSize,
+ TestEncodeDecode(GC, llvm::endianness::big, GSYM_VERSION_1, AddrOffSize,
BaseAddr,
2, // NumAddresses
ArrayRef<uint8_t>(UUID));
@@ -1094,11 +1094,11 @@ TEST(GSYMTest, TestGsymCreatorV14ByteAddrOffsets) {
OutputAggregator Null(nullptr);
Error Err = GC.finalize(Null);
ASSERT_FALSE(Err);
- TestEncodeDecode(GC, llvm::endianness::little, GSYM_VERSION, AddrOffSize,
+ TestEncodeDecode(GC, llvm::endianness::little, GSYM_VERSION_1, AddrOffSize,
BaseAddr,
2, // NumAddresses
ArrayRef<uint8_t>(UUID));
- TestEncodeDecode(GC, llvm::endianness::big, GSYM_VERSION, AddrOffSize,
+ TestEncodeDecode(GC, llvm::endianness::big, GSYM_VERSION_1, AddrOffSize,
BaseAddr,
2, // NumAddresses
ArrayRef<uint8_t>(UUID));
@@ -1117,11 +1117,11 @@ TEST(GSYMTest, TestGsymCreatorV18ByteAddrOffsets) {
OutputAggregator Null(nullptr);
Error Err = GC.finalize(Null);
ASSERT_FALSE(Err);
- TestEncodeDecode(GC, llvm::endianness::little, GSYM_VERSION, AddrOffSize,
+ TestEncodeDecode(GC, llvm::endianness::little, GSYM_VERSION_1, AddrOffSize,
BaseAddr,
2, // NumAddresses
ArrayRef<uint8_t>(UUID));
- TestEncodeDecode(GC, llvm::endianness::big, GSYM_VERSION, AddrOffSize,
+ TestEncodeDecode(GC, llvm::endianness::big, GSYM_VERSION_1, AddrOffSize,
BaseAddr,
2, // NumAddresses
ArrayRef<uint8_t>(UUID));
@@ -2449,11 +2449,11 @@ TEST(GSYMTest, TestGsymCreatorV1MultipleSymbolsWithNoSize) {
OutputAggregator Null(nullptr);
Error Err = GC.finalize(Null);
ASSERT_FALSE(Err);
- TestEncodeDecode(GC, llvm::endianness::little, GSYM_VERSION, AddrOffSize,
+ TestEncodeDecode(GC, llvm::endianness::little, GSYM_VERSION_1, AddrOffSize,
BaseAddr,
1, // NumAddresses
ArrayRef<uint8_t>(UUID));
- TestEncodeDecode(GC, llvm::endianness::big, GSYM_VERSION, AddrOffSize,
+ TestEncodeDecode(GC, llvm::endianness::big, GSYM_VERSION_1, AddrOffSize,
BaseAddr,
1, // NumAddresses
ArrayRef<uint8_t>(UUID));
>From 30928a3f27ee1aa61d029101711c5c6b9ef8c560 Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Fri, 27 Mar 2026 08:26:06 -0700
Subject: [PATCH 35/45] Refactor parseGlobalDataEntries to return map instead
of output params
Return Expected<std::map<GlobalInfoType, GlobalData>> instead of taking
10 output parameters. The map is used as a local in parse() with named
const GlobalData & references for each required section.
Prompts:
- "What do you think about the following: Add std::map<GlobalInfoDataType,
GlobalData> GlobalDataEntries into GsymReader, add accessors like
GetGlobalDataFileOffset(GlobalInfoType) and SetGlobalDataFileOffset(
GlobalInfoType, offset) into GsymReader (the base class), then use these
methods in parseGlobalDataEntries(). Can these methods be used in other
places? Will such refactoring make the code simpler?"
- "Let's do std::map<GlobalInfoType, GlobalData> as a local variable in
parse(). That way the output parameters are eliminated. We can table the
accessors for later."
- "const auto &AddrOffsetsGD don't use auto, use the actual offset type,
uint64_t I think"
- "memorize, commit, push"
---
llvm/lib/DebugInfo/GSYM/GsymReaderV2.cpp | 106 +++++++++--------------
1 file changed, 40 insertions(+), 66 deletions(-)
diff --git a/llvm/lib/DebugInfo/GSYM/GsymReaderV2.cpp b/llvm/lib/DebugInfo/GSYM/GsymReaderV2.cpp
index c159d6a465f5f..3c37b0055df9c 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymReaderV2.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymReaderV2.cpp
@@ -10,6 +10,7 @@
#include <assert.h>
#include <inttypes.h>
+#include <map>
#include "llvm/DebugInfo/GSYM/GlobalData.h"
#include "llvm/Support/DataExtractor.h"
@@ -49,15 +50,11 @@ GsymReaderV2::create(std::unique_ptr<MemoryBuffer> &MB) {
return std::move(GR);
}
-/// Helper to parse GlobalData entries and populate section offsets/sizes.
-static llvm::Error
+/// Helper to parse GlobalData entries from a GSYM V2 file.
+static llvm::Expected<std::map<GlobalInfoType, GlobalData>>
parseGlobalDataEntries(DataExtractor &DE, uint64_t &Offset,
- uint64_t BufSize,
- uint64_t &AddrOffsetsOff, uint64_t &AddrOffsetsSize,
- uint64_t &AddrInfoOffsetsOff, uint64_t &AddrInfoOffsetsSize,
- uint64_t &StringTableOff, uint64_t &StringTableSize,
- uint64_t &FileTableOff, uint64_t &FileTableSize,
- uint64_t &FuncInfoOff, uint64_t &FuncInfoSize) {
+ uint64_t BufSize) {
+ std::map<GlobalInfoType, GlobalData> Sections;
while (Offset + sizeof(GlobalData) <= BufSize) {
auto GDOrErr = GlobalData::decode(DE, Offset);
if (!GDOrErr)
@@ -65,7 +62,7 @@ parseGlobalDataEntries(DataExtractor &DE, uint64_t &Offset,
const GlobalData &GD = *GDOrErr;
if (GD.Type == GlobalInfoType::EndOfList)
- return Error::success();
+ return Sections;
if (GD.FileOffset + GD.FileSize > BufSize)
return createStringError(std::errc::invalid_argument,
@@ -75,32 +72,7 @@ parseGlobalDataEntries(DataExtractor &DE, uint64_t &Offset,
static_cast<uint32_t>(GD.Type), GD.FileOffset,
GD.FileSize, BufSize);
- switch (GD.Type) {
- case GlobalInfoType::AddrOffsets:
- AddrOffsetsOff = GD.FileOffset;
- AddrOffsetsSize = GD.FileSize;
- break;
- case GlobalInfoType::AddrInfoOffsets:
- AddrInfoOffsetsOff = GD.FileOffset;
- AddrInfoOffsetsSize = GD.FileSize;
- break;
- case GlobalInfoType::StringTable:
- StringTableOff = GD.FileOffset;
- StringTableSize = GD.FileSize;
- break;
- case GlobalInfoType::FileTable:
- FileTableOff = GD.FileOffset;
- FileTableSize = GD.FileSize;
- break;
- case GlobalInfoType::FunctionInfo:
- FuncInfoOff = GD.FileOffset;
- FuncInfoSize = GD.FileSize;
- break;
- case GlobalInfoType::UUID:
- break;
- case GlobalInfoType::EndOfList:
- llvm_unreachable("handled above");
- }
+ Sections[GD.Type] = GD;
}
return createStringError(std::errc::invalid_argument,
"GlobalData array not terminated by EndOfList");
@@ -154,73 +126,74 @@ llvm::Error GsymReaderV2::parse() {
// Parse GlobalData entries to find section locations.
uint64_t Offset = sizeof(HeaderV2);
- uint64_t AddrOffsetsOff = 0, AddrOffsetsSize = 0;
- uint64_t AddrInfoOffsetsOff = 0, AddrInfoOffsetsSize = 0;
- uint64_t StringTableOff = 0, StringTableSize = 0;
- uint64_t FileTableOff = 0, FileTableSize = 0;
- uint64_t FuncInfoOff = 0, FuncInfoSize = 0;
-
- if (auto Err = parseGlobalDataEntries(
- DE, Offset, BufSize, AddrOffsetsOff, AddrOffsetsSize,
- AddrInfoOffsetsOff, AddrInfoOffsetsSize, StringTableOff,
- StringTableSize, FileTableOff, FileTableSize, FuncInfoOff,
- FuncInfoSize))
- return Err;
+ auto SectionsOrErr = parseGlobalDataEntries(DE, Offset, BufSize);
+ if (!SectionsOrErr)
+ return SectionsOrErr.takeError();
+ auto &Sections = *SectionsOrErr;
- if (!AddrOffsetsSize)
+ if (!Sections.count(GlobalInfoType::AddrOffsets))
return createStringError(std::errc::invalid_argument,
"missing AddrOffsets section");
- if (!AddrInfoOffsetsSize)
+ if (!Sections.count(GlobalInfoType::AddrInfoOffsets))
return createStringError(std::errc::invalid_argument,
"missing AddrInfoOffsets section");
- if (!StringTableSize)
+ if (!Sections.count(GlobalInfoType::StringTable))
return createStringError(std::errc::invalid_argument,
"missing StringTable section");
- if (!FileTableSize)
+ if (!Sections.count(GlobalInfoType::FileTable))
return createStringError(std::errc::invalid_argument,
"missing FileTable section");
- if (AddrOffsetsSize !=
+ const GlobalData &AddrOffsetsGD = Sections[GlobalInfoType::AddrOffsets];
+ const GlobalData &AddrInfoOffsetsGD = Sections[GlobalInfoType::AddrInfoOffsets];
+ const GlobalData &StringTableGD = Sections[GlobalInfoType::StringTable];
+ const GlobalData &FileTableGD = Sections[GlobalInfoType::FileTable];
+
+ if (AddrOffsetsGD.FileSize !=
static_cast<uint64_t>(Hdr->NumAddresses) * Hdr->AddrOffSize)
return createStringError(std::errc::invalid_argument,
"AddrOffsets section size mismatch");
- if (AddrInfoOffsetsSize !=
+ if (AddrInfoOffsetsGD.FileSize !=
static_cast<uint64_t>(Hdr->NumAddresses) * Hdr->AddrInfoOffSize)
return createStringError(std::errc::invalid_argument,
"AddrInfoOffsets section size mismatch");
if (!Swap) {
AddrOffsets = ArrayRef<uint8_t>(
- reinterpret_cast<const uint8_t *>(Buf.data() + AddrOffsetsOff),
- AddrOffsetsSize);
+ reinterpret_cast<const uint8_t *>(Buf.data() +
+ AddrOffsetsGD.FileOffset),
+ AddrOffsetsGD.FileSize);
if (Hdr->AddrInfoOffSize == 4) {
AddrInfoOffsets = ArrayRef<uint32_t>(
- reinterpret_cast<const uint32_t *>(Buf.data() + AddrInfoOffsetsOff),
+ reinterpret_cast<const uint32_t *>(Buf.data() +
+ AddrInfoOffsetsGD.FileOffset),
Hdr->NumAddresses);
} else {
return createStringError(std::errc::not_supported,
"8-byte AddrInfoOffsets not yet supported");
}
- if (FileTableSize < 4)
+ if (FileTableGD.FileSize < 4)
return createStringError(std::errc::invalid_argument,
"FileTable section too small");
uint32_t NumFiles;
- memcpy(&NumFiles, Buf.data() + FileTableOff, 4);
- if (FileTableSize < 4 + NumFiles * sizeof(FileEntry))
+ memcpy(&NumFiles, Buf.data() + FileTableGD.FileOffset, 4);
+ if (FileTableGD.FileSize < 4 + NumFiles * sizeof(FileEntry))
return createStringError(std::errc::invalid_argument,
"FileTable section too small for %u files",
NumFiles);
Files = ArrayRef<FileEntry>(
- reinterpret_cast<const FileEntry *>(Buf.data() + FileTableOff + 4),
+ reinterpret_cast<const FileEntry *>(Buf.data() +
+ FileTableGD.FileOffset + 4),
NumFiles);
- StrTab.Data = Buf.substr(StringTableOff, StringTableSize);
+ StrTab.Data = Buf.substr(StringTableGD.FileOffset,
+ StringTableGD.FileSize);
} else {
- uint64_t AOff = AddrOffsetsOff;
- Swap->AddrOffsets.resize(AddrOffsetsSize);
+ uint64_t AOff = AddrOffsetsGD.FileOffset;
+ Swap->AddrOffsets.resize(AddrOffsetsGD.FileSize);
switch (Hdr->AddrOffSize) {
case 1:
if (!DE.getU8(&AOff, Swap->AddrOffsets.data(), Hdr->NumAddresses))
@@ -252,7 +225,7 @@ llvm::Error GsymReaderV2::parse() {
AddrOffsets = ArrayRef<uint8_t>(Swap->AddrOffsets);
if (Hdr->AddrInfoOffSize == 4) {
- uint64_t AIOff = AddrInfoOffsetsOff;
+ uint64_t AIOff = AddrInfoOffsetsGD.FileOffset;
Swap->AddrInfoOffsets.resize(Hdr->NumAddresses);
if (!DE.getU32(&AIOff, Swap->AddrInfoOffsets.data(), Hdr->NumAddresses))
return createStringError(std::errc::invalid_argument,
@@ -263,7 +236,7 @@ llvm::Error GsymReaderV2::parse() {
"8-byte AddrInfoOffsets not yet supported");
}
- uint64_t FTOff = FileTableOff;
+ uint64_t FTOff = FileTableGD.FileOffset;
uint32_t NumFiles = DE.getU32(&FTOff);
if (NumFiles > 0) {
Swap->Files.resize(NumFiles);
@@ -273,7 +246,8 @@ llvm::Error GsymReaderV2::parse() {
Files = ArrayRef<FileEntry>(Swap->Files);
}
- StrTab.Data = Buf.substr(StringTableOff, StringTableSize);
+ StrTab.Data = Buf.substr(StringTableGD.FileOffset,
+ StringTableGD.FileSize);
}
return Error::success();
}
>From 6b37ac9b7f8e4e8bb58ad6c20dd65b8fb79dc578 Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Fri, 27 Mar 2026 08:47:07 -0700
Subject: [PATCH 36/45] Simplify OFFSET header in GsymReaderV2::dump()
Replace switch statement with format("%2u", CachedAddrOffSize * 8) to
compute the bit-width directly, eliminating the "??" default case.
Prompts:
- "I fixed the ?? in GsymReaderV2::dump(). Memorize, commit, push"
---
llvm/lib/DebugInfo/GSYM/GsymReaderV2.cpp | 11 +----------
1 file changed, 1 insertion(+), 10 deletions(-)
diff --git a/llvm/lib/DebugInfo/GSYM/GsymReaderV2.cpp b/llvm/lib/DebugInfo/GSYM/GsymReaderV2.cpp
index 3c37b0055df9c..3a0f4cfefb3e9 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymReaderV2.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymReaderV2.cpp
@@ -261,16 +261,7 @@ void GsymReaderV2::dump(raw_ostream &OS) {
const auto &Header = getHeader();
OS << Header << "\n";
OS << "Address Table:\n";
- OS << "INDEX OFFSET";
-
- switch (CachedAddrOffSize) {
- case 1: OS << "8 "; break;
- case 2: OS << "16"; break;
- case 4: OS << "32"; break;
- case 8: OS << "64"; break;
- default: OS << "??"; break;
- }
- OS << " (ADDRESS)\n";
+ OS << "INDEX OFFSET" << format("%2u", CachedAddrOffSize * 8) << " (ADDRESS)\n";
OS << "====== =============================== \n";
for (uint32_t I = 0; I < CachedNumAddresses; ++I) {
OS << format("[%4u] ", I);
>From 9e1963f8189d4eecdc6f3e322c8a1c70f8b56937 Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Fri, 27 Mar 2026 10:07:57 -0700
Subject: [PATCH 37/45] GlobalData::encode() returns llvm::Error, validates
Padding==0
Also refactor GsymCreatorV2::encode() to build GlobalData entries in a
SmallVector and write them in a loop instead of repeated if-error-return
blocks.
Prompts:
- "In GlobalData::encode(), change return type to llvm::Error, and check
that Padding should be zero."
- "Is there any way to make the repeated callsites look nicer?"
- "I think that might be better" (re: SmallVector + loop approach)
- "memorize, commit, push"
---
llvm/include/llvm/DebugInfo/GSYM/GlobalData.h | 3 +-
llvm/lib/DebugInfo/GSYM/GlobalData.cpp | 7 ++++-
llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp | 28 +++++++++++--------
3 files changed, 24 insertions(+), 14 deletions(-)
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GlobalData.h b/llvm/include/llvm/DebugInfo/GSYM/GlobalData.h
index ddc5bc18728bb..5aa1378eb39dd 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GlobalData.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GlobalData.h
@@ -59,7 +59,8 @@ struct GlobalData {
/// Encode this GlobalData entry into a FileWriter stream.
///
/// \param O The binary stream to write the data to.
- LLVM_ABI void encode(FileWriter &O) const;
+ /// \returns An error if the entry is invalid (e.g., non-zero padding).
+ LLVM_ABI llvm::Error encode(FileWriter &O) const;
/// Decode a GlobalData entry from a binary data stream.
///
diff --git a/llvm/lib/DebugInfo/GSYM/GlobalData.cpp b/llvm/lib/DebugInfo/GSYM/GlobalData.cpp
index 7429e85c51e6c..0f0b59a7e6ddc 100644
--- a/llvm/lib/DebugInfo/GSYM/GlobalData.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GlobalData.cpp
@@ -13,11 +13,16 @@
using namespace llvm;
using namespace gsym;
-void GlobalData::encode(FileWriter &O) const {
+llvm::Error GlobalData::encode(FileWriter &O) const {
+ if (Padding != 0)
+ return createStringError(std::errc::invalid_argument,
+ "GlobalData entry padding must be zero, got %u",
+ Padding);
O.writeU32(static_cast<uint32_t>(Type));
O.writeU32(Padding);
O.writeU64(FileOffset);
O.writeU64(FileSize);
+ return Error::success();
}
llvm::Expected<GlobalData> GlobalData::decode(DataExtractor &Data,
diff --git a/llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp b/llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp
index 321a4ea8c9784..7c11a1b7930f5 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp
@@ -138,19 +138,23 @@ llvm::Error GsymCreatorV2::encode(FileWriter &O) const {
return Err;
// Write GlobalData entries.
+ SmallVector<GlobalData, 8> GDEntries;
if (HasUUID)
- GlobalData{GlobalInfoType::UUID, 0, UUIDOffset, UUIDSectionSize}.encode(O);
- GlobalData{GlobalInfoType::AddrOffsets, 0,
- AddrOffsetsOffset, AddrOffsetsSize}.encode(O);
- GlobalData{GlobalInfoType::AddrInfoOffsets, 0,
- AddrInfoOffsetsOffset, AddrInfoOffsetsSize}.encode(O);
- GlobalData{GlobalInfoType::FileTable, 0,
- FileTableOffset, FileTableSize}.encode(O);
- GlobalData{GlobalInfoType::StringTable, 0,
- StringTableOffset, StringTableSize}.encode(O);
- GlobalData{GlobalInfoType::FunctionInfo, 0,
- FISectionOffset, FISectionSize}.encode(O);
- GlobalData{GlobalInfoType::EndOfList, 0, 0, 0}.encode(O);
+ GDEntries.push_back({GlobalInfoType::UUID, 0, UUIDOffset, UUIDSectionSize});
+ GDEntries.push_back({GlobalInfoType::AddrOffsets, 0,
+ AddrOffsetsOffset, AddrOffsetsSize});
+ GDEntries.push_back({GlobalInfoType::AddrInfoOffsets, 0,
+ AddrInfoOffsetsOffset, AddrInfoOffsetsSize});
+ GDEntries.push_back({GlobalInfoType::FileTable, 0,
+ FileTableOffset, FileTableSize});
+ GDEntries.push_back({GlobalInfoType::StringTable, 0,
+ StringTableOffset, StringTableSize});
+ GDEntries.push_back({GlobalInfoType::FunctionInfo, 0,
+ FISectionOffset, FISectionSize});
+ GDEntries.push_back({GlobalInfoType::EndOfList, 0, 0, 0});
+ for (const GlobalData &GD : GDEntries)
+ if (auto Err = GD.encode(O))
+ return Err;
// Write UUID section.
if (HasUUID) {
>From 75dbd6ee980eb197a56d19c14b2d5ac6e91c86c7 Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Fri, 27 Mar 2026 10:17:11 -0700
Subject: [PATCH 38/45] Restore original function order in GsymCreator.cpp to
minimize diff
Functions like copyString, insertString, getString, addFunctionInfo,
forEachFunctionInfo, etc. were moved earlier in the file during the
V1/V2 refactoring but their content didn't change. Restoring the
original order eliminates ~140 lines of spurious add/remove in the
diff against the base commit.
New functions (validateForEncoding, encodeAddrOffsets, encodeFileTable)
placed where the removed calculateHeaderAndTableSize was.
Prompts:
- "In GsymCreator, there are copyString(), insertString(), getString(),
and other functions, which are just moving their location in the file.
Can you move them in the file, so that the diff view don't show them
as added/removed?"
- "Can you check the same problem in all other files that we changed?"
- "memorize, commit, push"
---
llvm/lib/DebugInfo/GSYM/GsymCreator.cpp | 240 ++++++++++++------------
1 file changed, 120 insertions(+), 120 deletions(-)
diff --git a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp
index 8ef4ac210ed64..e4054cd54e092 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp
@@ -68,6 +68,100 @@ llvm::Error GsymCreator::save(StringRef Path, llvm::endianness ByteOrder,
return encode(O);
}
+void GsymCreator::prepareMergedFunctions(OutputAggregator &Out) {
+ if (Funcs.size() < 2)
+ return;
+
+ llvm::stable_sort(Funcs);
+ std::vector<FunctionInfo> TopLevelFuncs;
+ TopLevelFuncs.emplace_back(std::move(Funcs.front()));
+
+ for (size_t Idx = 1; Idx < Funcs.size(); ++Idx) {
+ FunctionInfo &TopFunc = TopLevelFuncs.back();
+ FunctionInfo &MatchFunc = Funcs[Idx];
+ if (TopFunc.Range == MatchFunc.Range) {
+ if (!TopFunc.MergedFunctions)
+ TopFunc.MergedFunctions = MergedFunctionsInfo();
+ else if (TopFunc.MergedFunctions->MergedFunctions.back() == MatchFunc)
+ continue;
+ TopFunc.MergedFunctions->MergedFunctions.emplace_back(
+ std::move(MatchFunc));
+ } else
+ TopLevelFuncs.emplace_back(std::move(MatchFunc));
+ }
+
+ uint32_t mergedCount = Funcs.size() - TopLevelFuncs.size();
+ if (mergedCount != 0)
+ Out << "Have " << mergedCount
+ << " merged functions as children of other functions\n";
+
+ std::swap(Funcs, TopLevelFuncs);
+}
+
+llvm::Error GsymCreator::finalize(OutputAggregator &Out) {
+ std::lock_guard<std::mutex> Guard(Mutex);
+ if (Finalized)
+ return createStringError(std::errc::invalid_argument, "already finalized");
+ Finalized = true;
+
+ StrTab.finalizeInOrder();
+
+ const auto NumBefore = Funcs.size();
+ if (!IsSegment) {
+ if (NumBefore > 1) {
+ llvm::stable_sort(Funcs);
+ std::vector<FunctionInfo> FinalizedFuncs;
+ FinalizedFuncs.reserve(Funcs.size());
+ FinalizedFuncs.emplace_back(std::move(Funcs.front()));
+ for (size_t Idx=1; Idx < NumBefore; ++Idx) {
+ FunctionInfo &Prev = FinalizedFuncs.back();
+ FunctionInfo &Curr = Funcs[Idx];
+ const bool ranges_equal = Prev.Range == Curr.Range;
+ if (ranges_equal || Prev.Range.intersects(Curr.Range)) {
+ if (ranges_equal) {
+ if (!(Prev == Curr)) {
+ if (Prev.hasRichInfo() && Curr.hasRichInfo())
+ Out.Report(
+ "Duplicate address ranges with different debug info.",
+ [&](raw_ostream &OS) {
+ OS << "warning: same address range contains "
+ "different debug "
+ << "info. Removing:\n"
+ << Prev << "\nIn favor of this one:\n"
+ << Curr << "\n";
+ });
+ std::swap(Prev, Curr);
+ }
+ } else {
+ Out.Report("Overlapping function ranges", [&](raw_ostream &OS) {
+ OS << "warning: function ranges overlap:\n"
+ << Prev << "\n"
+ << Curr << "\n";
+ });
+ FinalizedFuncs.emplace_back(std::move(Curr));
+ }
+ } else {
+ if (Prev.Range.size() == 0 && Curr.Range.contains(Prev.Range.start())) {
+ std::swap(Prev, Curr);
+ } else {
+ FinalizedFuncs.emplace_back(std::move(Curr));
+ }
+ }
+ }
+ std::swap(Funcs, FinalizedFuncs);
+ }
+ if (!Funcs.empty() && Funcs.back().Range.size() == 0 && ValidTextRanges) {
+ if (auto Range =
+ ValidTextRanges->getRangeThatContains(Funcs.back().Range.start())) {
+ Funcs.back().Range = {Funcs.back().Range.start(), Range->end()};
+ }
+ }
+ Out << "Pruned " << NumBefore - Funcs.size() << " functions, ended with "
+ << Funcs.size() << " total\n";
+ }
+ return Error::success();
+}
+
uint32_t GsymCreator::copyString(const GsymCreator &SrcGC, uint32_t StrOff) {
if (StrOff == 0)
return 0;
@@ -149,6 +243,32 @@ std::optional<uint64_t> GsymCreator::getBaseAddress() const {
return getFirstFunctionAddress();
}
+uint64_t GsymCreator::getMaxAddressOffset() const {
+ switch (getAddressOffsetSize()) {
+ case 1: return UINT8_MAX;
+ case 2: return UINT16_MAX;
+ case 4: return UINT32_MAX;
+ case 8: return UINT64_MAX;
+ }
+ llvm_unreachable("invalid address offset");
+}
+
+uint8_t GsymCreator::getAddressOffsetSize() const {
+ const std::optional<uint64_t> BaseAddress = getBaseAddress();
+ const std::optional<uint64_t> LastFuncAddr = getLastFunctionAddress();
+ if (BaseAddress && LastFuncAddr) {
+ const uint64_t AddrDelta = *LastFuncAddr - *BaseAddress;
+ if (AddrDelta <= UINT8_MAX)
+ return 1;
+ else if (AddrDelta <= UINT16_MAX)
+ return 2;
+ else if (AddrDelta <= UINT32_MAX)
+ return 4;
+ return 8;
+ }
+ return 1;
+}
+
llvm::Error GsymCreator::validateForEncoding(
std::optional<uint64_t> &BaseAddr) const {
if (Funcs.empty())
@@ -199,126 +319,6 @@ llvm::Error GsymCreator::encodeFileTable(FileWriter &O) const {
return Error::success();
}
-uint64_t GsymCreator::getMaxAddressOffset() const {
- switch (getAddressOffsetSize()) {
- case 1: return UINT8_MAX;
- case 2: return UINT16_MAX;
- case 4: return UINT32_MAX;
- case 8: return UINT64_MAX;
- }
- llvm_unreachable("invalid address offset");
-}
-
-uint8_t GsymCreator::getAddressOffsetSize() const {
- const std::optional<uint64_t> BaseAddress = getBaseAddress();
- const std::optional<uint64_t> LastFuncAddr = getLastFunctionAddress();
- if (BaseAddress && LastFuncAddr) {
- const uint64_t AddrDelta = *LastFuncAddr - *BaseAddress;
- if (AddrDelta <= UINT8_MAX)
- return 1;
- else if (AddrDelta <= UINT16_MAX)
- return 2;
- else if (AddrDelta <= UINT32_MAX)
- return 4;
- return 8;
- }
- return 1;
-}
-
-void GsymCreator::prepareMergedFunctions(OutputAggregator &Out) {
- if (Funcs.size() < 2)
- return;
-
- llvm::stable_sort(Funcs);
- std::vector<FunctionInfo> TopLevelFuncs;
- TopLevelFuncs.emplace_back(std::move(Funcs.front()));
-
- for (size_t Idx = 1; Idx < Funcs.size(); ++Idx) {
- FunctionInfo &TopFunc = TopLevelFuncs.back();
- FunctionInfo &MatchFunc = Funcs[Idx];
- if (TopFunc.Range == MatchFunc.Range) {
- if (!TopFunc.MergedFunctions)
- TopFunc.MergedFunctions = MergedFunctionsInfo();
- else if (TopFunc.MergedFunctions->MergedFunctions.back() == MatchFunc)
- continue;
- TopFunc.MergedFunctions->MergedFunctions.emplace_back(
- std::move(MatchFunc));
- } else
- TopLevelFuncs.emplace_back(std::move(MatchFunc));
- }
-
- uint32_t mergedCount = Funcs.size() - TopLevelFuncs.size();
- if (mergedCount != 0)
- Out << "Have " << mergedCount
- << " merged functions as children of other functions\n";
-
- std::swap(Funcs, TopLevelFuncs);
-}
-
-llvm::Error GsymCreator::finalize(OutputAggregator &Out) {
- std::lock_guard<std::mutex> Guard(Mutex);
- if (Finalized)
- return createStringError(std::errc::invalid_argument, "already finalized");
- Finalized = true;
-
- StrTab.finalizeInOrder();
-
- const auto NumBefore = Funcs.size();
- if (!IsSegment) {
- if (NumBefore > 1) {
- llvm::stable_sort(Funcs);
- std::vector<FunctionInfo> FinalizedFuncs;
- FinalizedFuncs.reserve(Funcs.size());
- FinalizedFuncs.emplace_back(std::move(Funcs.front()));
- for (size_t Idx=1; Idx < NumBefore; ++Idx) {
- FunctionInfo &Prev = FinalizedFuncs.back();
- FunctionInfo &Curr = Funcs[Idx];
- const bool ranges_equal = Prev.Range == Curr.Range;
- if (ranges_equal || Prev.Range.intersects(Curr.Range)) {
- if (ranges_equal) {
- if (!(Prev == Curr)) {
- if (Prev.hasRichInfo() && Curr.hasRichInfo())
- Out.Report(
- "Duplicate address ranges with different debug info.",
- [&](raw_ostream &OS) {
- OS << "warning: same address range contains "
- "different debug "
- << "info. Removing:\n"
- << Prev << "\nIn favor of this one:\n"
- << Curr << "\n";
- });
- std::swap(Prev, Curr);
- }
- } else {
- Out.Report("Overlapping function ranges", [&](raw_ostream &OS) {
- OS << "warning: function ranges overlap:\n"
- << Prev << "\n"
- << Curr << "\n";
- });
- FinalizedFuncs.emplace_back(std::move(Curr));
- }
- } else {
- if (Prev.Range.size() == 0 && Curr.Range.contains(Prev.Range.start())) {
- std::swap(Prev, Curr);
- } else {
- FinalizedFuncs.emplace_back(std::move(Curr));
- }
- }
- }
- std::swap(Funcs, FinalizedFuncs);
- }
- if (!Funcs.empty() && Funcs.back().Range.size() == 0 && ValidTextRanges) {
- if (auto Range =
- ValidTextRanges->getRangeThatContains(Funcs.back().Range.start())) {
- Funcs.back().Range = {Funcs.back().Range.start(), Range->end()};
- }
- }
- Out << "Pruned " << NumBefore - Funcs.size() << " functions, ended with "
- << Funcs.size() << " total\n";
- }
- return Error::success();
-}
-
void GsymCreator::fixupInlineInfo(const GsymCreator &SrcGC, InlineInfo &II) {
II.Name = copyString(SrcGC, II.Name);
II.CallFile = copyFile(SrcGC, II.CallFile);
>From f2853fa8e6931780210877089eb522e4bfdf9d14 Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Fri, 27 Mar 2026 10:36:26 -0700
Subject: [PATCH 39/45] Restore accidentally stripped inline comments in kept
functions
Restore inline comments that were removed from functions that were
moved (but not modified) during the V1/V2 refactoring.
GsymCreator.cpp: 112 comment lines restored across 14 functions
(insertFile, insertFileEntry, copyFile, prepareMergedFunctions,
finalize, copyString, insertString, IsValidTextAddress,
getFirstFunctionAddress, getLastFunctionAddress, fixupInlineInfo,
copyFunctionInfo, saveSegments, createSegment).
GsymReader.cpp: 18 comment lines restored across 3 functions
(getFunctionInfoDataForAddress, lookupAll, dump).
GSYMTest.cpp and llvm-gsymutil.cpp were audited and found to not
need changes (class renames and relocated comments respectively).
User messages since last commit:
- "Can you tell me why for GsymCreator.cpp 117 lines are added, but only ~75 lines are 'restored'?"
- "memorize, commit, push"
---
llvm/lib/DebugInfo/GSYM/GsymCreator.cpp | 117 +++++++++++++++++++++++-
llvm/lib/DebugInfo/GSYM/GsymReader.cpp | 20 +++-
2 files changed, 134 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp
index e4054cd54e092..51f4609e94747 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp
@@ -29,6 +29,10 @@ GsymCreator::GsymCreator(bool Quiet)
uint32_t GsymCreator::insertFile(StringRef Path, llvm::sys::path::Style Style) {
llvm::StringRef directory = llvm::sys::path::parent_path(Path, Style);
llvm::StringRef filename = llvm::sys::path::filename(Path, Style);
+ // We must insert the strings first, then call the FileEntry constructor.
+ // If we inline the insertString() function call into the constructor, the
+ // call order is undefined due to parameter lists not having any ordering
+ // requirements.
const uint32_t Dir = insertString(directory);
const uint32_t Base = insertString(filename);
return insertFileEntry(FileEntry(Dir, Base));
@@ -37,6 +41,7 @@ uint32_t GsymCreator::insertFile(StringRef Path, llvm::sys::path::Style Style) {
uint32_t GsymCreator::insertFileEntry(FileEntry FE) {
std::lock_guard<std::mutex> Guard(Mutex);
const auto NextIndex = Files.size();
+ // Find FE in hash map and insert if not present.
auto R = FileEntryToIndex.insert(std::make_pair(FE, NextIndex));
if (R.second)
Files.emplace_back(FE);
@@ -44,9 +49,13 @@ uint32_t GsymCreator::insertFileEntry(FileEntry FE) {
}
uint32_t GsymCreator::copyFile(const GsymCreator &SrcGC, uint32_t FileIdx) {
+ // File index zero is reserved for a FileEntry with no directory and no
+ // filename. Any other file and we need to copy the strings for the directory
+ // and filename.
if (FileIdx == 0)
return 0;
const FileEntry SrcFE = SrcGC.Files[FileIdx];
+ // Copy the strings for the file and then add the newly converted file entry.
uint32_t Dir =
SrcFE.Dir == 0
? 0
@@ -69,28 +78,41 @@ llvm::Error GsymCreator::save(StringRef Path, llvm::endianness ByteOrder,
}
void GsymCreator::prepareMergedFunctions(OutputAggregator &Out) {
+ // Nothing to do if we have less than 2 functions.
if (Funcs.size() < 2)
return;
+ // Sort the function infos by address range first, preserving input order
llvm::stable_sort(Funcs);
std::vector<FunctionInfo> TopLevelFuncs;
+
+ // Add the first function info to the top level functions
TopLevelFuncs.emplace_back(std::move(Funcs.front()));
+ // Now if the next function info has the same address range as the top level,
+ // then merge it into the top level function, otherwise add it to the top
+ // level.
for (size_t Idx = 1; Idx < Funcs.size(); ++Idx) {
FunctionInfo &TopFunc = TopLevelFuncs.back();
FunctionInfo &MatchFunc = Funcs[Idx];
if (TopFunc.Range == MatchFunc.Range) {
+ // Both have the same range - add the 2nd func as a child of the 1st func
if (!TopFunc.MergedFunctions)
TopFunc.MergedFunctions = MergedFunctionsInfo();
+ // Avoid adding duplicate functions to MergedFunctions. Since functions
+ // are already ordered within the Funcs array, we can just check equality
+ // against the last function in the merged array.
else if (TopFunc.MergedFunctions->MergedFunctions.back() == MatchFunc)
continue;
TopFunc.MergedFunctions->MergedFunctions.emplace_back(
std::move(MatchFunc));
} else
+ // No match, add the function as a top-level function
TopLevelFuncs.emplace_back(std::move(MatchFunc));
}
uint32_t mergedCount = Funcs.size() - TopLevelFuncs.size();
+ // If any functions were merged, print a message about it.
if (mergedCount != 0)
Out << "Have " << mergedCount
<< " merged functions as children of other functions\n";
@@ -104,11 +126,39 @@ llvm::Error GsymCreator::finalize(OutputAggregator &Out) {
return createStringError(std::errc::invalid_argument, "already finalized");
Finalized = true;
+ // Don't let the string table indexes change by finalizing in order.
StrTab.finalizeInOrder();
+ // Remove duplicates function infos that have both entries from debug info
+ // (DWARF or Breakpad) and entries from the SymbolTable.
+ //
+ // Also handle overlapping function. Usually there shouldn't be any, but they
+ // can and do happen in some rare cases.
+ //
+ // (a) (b) (c)
+ // ^ ^ ^ ^
+ // |X |Y |X ^ |X
+ // | | | |Y | ^
+ // | | | v v |Y
+ // v v v v
+ //
+ // In (a) and (b), Y is ignored and X will be reported for the full range.
+ // In (c), both functions will be included in the result and lookups for an
+ // address in the intersection will return Y because of binary search.
+ //
+ // Note that in case of (b), we cannot include Y in the result because then
+ // we wouldn't find any function for range (end of Y, end of X)
+ // with binary search
+
const auto NumBefore = Funcs.size();
+ // Only sort and unique if this isn't a segment. If this is a segment we
+ // already finalized the main GsymCreator with all of the function infos
+ // and then the already sorted and uniqued function infos were added to this
+ // object.
if (!IsSegment) {
if (NumBefore > 1) {
+ // Sort function infos so we can emit sorted functions. Use stable sort to
+ // ensure determinism.
llvm::stable_sort(Funcs);
std::vector<FunctionInfo> FinalizedFuncs;
FinalizedFuncs.reserve(Funcs.size());
@@ -116,9 +166,19 @@ llvm::Error GsymCreator::finalize(OutputAggregator &Out) {
for (size_t Idx=1; Idx < NumBefore; ++Idx) {
FunctionInfo &Prev = FinalizedFuncs.back();
FunctionInfo &Curr = Funcs[Idx];
+ // Empty ranges won't intersect, but we still need to
+ // catch the case where we have multiple symbols at the
+ // same address and coalesce them.
const bool ranges_equal = Prev.Range == Curr.Range;
if (ranges_equal || Prev.Range.intersects(Curr.Range)) {
+ // Overlapping ranges or empty identical ranges.
if (ranges_equal) {
+ // Same address range. Check if one is from debug
+ // info and the other is from a symbol table. If
+ // so, then keep the one with debug info. Our
+ // sorting guarantees that entries with matching
+ // address ranges that have debug info are last in
+ // the sort.
if (!(Prev == Curr)) {
if (Prev.hasRichInfo() && Curr.hasRichInfo())
Out.Report(
@@ -130,10 +190,15 @@ llvm::Error GsymCreator::finalize(OutputAggregator &Out) {
<< Prev << "\nIn favor of this one:\n"
<< Curr << "\n";
});
+
+ // We want to swap the current entry with the previous since
+ // later entries with the same range always have more debug info
+ // or different debug info.
std::swap(Prev, Curr);
}
} else {
Out.Report("Overlapping function ranges", [&](raw_ostream &OS) {
+ // print warnings about overlaps
OS << "warning: function ranges overlap:\n"
<< Prev << "\n"
<< Curr << "\n";
@@ -142,6 +207,9 @@ llvm::Error GsymCreator::finalize(OutputAggregator &Out) {
}
} else {
if (Prev.Range.size() == 0 && Curr.Range.contains(Prev.Range.start())) {
+ // Symbols on macOS don't have address ranges, so if the range
+ // doesn't match and the size is zero, then we replace the empty
+ // symbol function info with the current one.
std::swap(Prev, Curr);
} else {
FinalizedFuncs.emplace_back(std::move(Curr));
@@ -150,6 +218,11 @@ llvm::Error GsymCreator::finalize(OutputAggregator &Out) {
}
std::swap(Funcs, FinalizedFuncs);
}
+ // If our last function info entry doesn't have a size and if we have valid
+ // text ranges, we should set the size of the last entry since any search for
+ // a high address might match our last entry. By fixing up this size, we can
+ // help ensure we don't cause lookups to always return the last symbol that
+ // has no size when doing lookups.
if (!Funcs.empty() && Funcs.back().Range.size() == 0 && ValidTextRanges) {
if (auto Range =
ValidTextRanges->getRangeThatContains(Funcs.back().Range.start())) {
@@ -163,6 +236,7 @@ llvm::Error GsymCreator::finalize(OutputAggregator &Out) {
}
uint32_t GsymCreator::copyString(const GsymCreator &SrcGC, uint32_t StrOff) {
+ // String offset at zero is always the empty string, no copying needed.
if (StrOff == 0)
return 0;
return StrTab.add(SrcGC.StringOffsetMap.find(StrOff)->second);
@@ -172,14 +246,24 @@ uint32_t GsymCreator::insertString(StringRef S, bool Copy) {
if (S.empty())
return 0;
+ // The hash can be calculated outside the lock.
CachedHashStringRef CHStr(S);
std::lock_guard<std::mutex> Guard(Mutex);
if (Copy) {
+ // We need to provide backing storage for the string if requested
+ // since StringTableBuilder stores references to strings. Any string
+ // that comes from a section in an object file doesn't need to be
+ // copied, but any string created by code will need to be copied.
+ // This allows GsymCreator to be really fast when parsing DWARF and
+ // other object files as most strings don't need to be copied.
if (!StrTab.contains(CHStr))
CHStr = CachedHashStringRef{StringStorage.insert(S).first->getKey(),
CHStr.hash()};
}
const uint32_t StrOff = StrTab.add(CHStr);
+ // Save a mapping of string offsets to the cached string reference in case
+ // we need to segment the GSYM file and copy string from one string table to
+ // another.
StringOffsetMap.try_emplace(StrOff, CHStr);
return StrOff;
}
@@ -222,16 +306,24 @@ size_t GsymCreator::getNumFunctionInfos() const {
bool GsymCreator::IsValidTextAddress(uint64_t Addr) const {
if (ValidTextRanges)
return ValidTextRanges->contains(Addr);
- return true;
+ return true; // No valid text ranges has been set, so accept all ranges.
}
std::optional<uint64_t> GsymCreator::getFirstFunctionAddress() const {
+ // If we have finalized then Funcs are sorted. If we are a segment then
+ // Funcs will be sorted as well since function infos get added from an
+ // already finalized GsymCreator object where its functions were sorted and
+ // uniqued.
if ((Finalized || IsSegment) && !Funcs.empty())
return std::optional<uint64_t>(Funcs.front().startAddress());
return std::nullopt;
}
std::optional<uint64_t> GsymCreator::getLastFunctionAddress() const {
+ // If we have finalized then Funcs are sorted. If we are a segment then
+ // Funcs will be sorted as well since function infos get added from an
+ // already finalized GsymCreator object where its functions were sorted and
+ // uniqued.
if ((Finalized || IsSegment) && !Funcs.empty())
return std::optional<uint64_t>(Funcs.back().startAddress());
return std::nullopt;
@@ -319,6 +411,9 @@ llvm::Error GsymCreator::encodeFileTable(FileWriter &O) const {
return Error::success();
}
+// This function takes a InlineInfo class that was copy constructed from an
+// InlineInfo from the \a SrcGC and updates all members that point to strings
+// and files to point to strings and files from this GsymCreator.
void GsymCreator::fixupInlineInfo(const GsymCreator &SrcGC, InlineInfo &II) {
II.Name = copyString(SrcGC, II.Name);
II.CallFile = copyFile(SrcGC, II.CallFile);
@@ -328,13 +423,20 @@ void GsymCreator::fixupInlineInfo(const GsymCreator &SrcGC, InlineInfo &II) {
uint64_t GsymCreator::copyFunctionInfo(const GsymCreator &SrcGC,
size_t FuncIdx) {
+ // To copy a function info we need to copy any files and strings over into
+ // this GsymCreator and then copy the function info and update the string
+ // table offsets to match the new offsets.
const FunctionInfo &SrcFI = SrcGC.Funcs[FuncIdx];
FunctionInfo DstFI;
DstFI.Range = SrcFI.Range;
DstFI.Name = copyString(SrcGC, SrcFI.Name);
+ // Copy the line table if there is one.
if (SrcFI.OptLineTable) {
+ // Copy the entire line table.
DstFI.OptLineTable = LineTable(SrcFI.OptLineTable.value());
+ // Fixup all LineEntry::File entries which are indexes in the the file table
+ // from SrcGC and must be converted to file indexes from this GsymCreator.
LineTable &DstLT = DstFI.OptLineTable.value();
const size_t NumLines = DstLT.size();
for (size_t I=0; I<NumLines; ++I) {
@@ -342,8 +444,11 @@ uint64_t GsymCreator::copyFunctionInfo(const GsymCreator &SrcGC,
LE.File = copyFile(SrcGC, LE.File);
}
}
+ // Copy the inline information if needed.
if (SrcFI.Inline) {
+ // Make a copy of the source inline information.
DstFI.Inline = SrcFI.Inline.value();
+ // Fixup all strings and files in the copied inline information.
fixupInlineInfo(SrcGC, *DstFI.Inline);
}
std::lock_guard<std::mutex> Guard(Mutex);
@@ -366,7 +471,8 @@ llvm::Error GsymCreator::saveSegments(StringRef Path,
if (ExpectedGC) {
GsymCreator *GC = ExpectedGC->get();
if (!GC)
- break;
+ break; // We had not more functions to encode.
+ // Don't collect any messages at all
OutputAggregator Out(nullptr);
llvm::Error Err = GC->finalize(Out);
if (Err)
@@ -389,17 +495,24 @@ llvm::Error GsymCreator::saveSegments(StringRef Path,
llvm::Expected<std::unique_ptr<GsymCreator>>
GsymCreator::createSegment(uint64_t SegmentSize, size_t &FuncIdx) const {
+ // No function entries, return empty unique pointer
if (FuncIdx >= Funcs.size())
return std::unique_ptr<GsymCreator>();
std::unique_ptr<GsymCreator> GC = createNew(/*Quiet=*/true);
+ // Tell the creator that this is a segment.
GC->setIsSegment();
+ // Set the base address if there is one.
if (BaseAddress)
GC->setBaseAddress(*BaseAddress);
+ // Copy the UUID value from this object into the new creator.
GC->setUUID(UUID);
const size_t NumFuncs = Funcs.size();
+ // Track how big the function infos are for the current segment so we can
+ // emit segments that are close to the requested size. It is quick math to
+ // determine the current header and tables sizes, so we can do that each loop.
uint64_t SegmentFuncInfosSize = 0;
for (; FuncIdx < NumFuncs; ++FuncIdx) {
const uint64_t HeaderAndTableSize = GC->calculateHeaderAndTableSize();
diff --git a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
index ee3b227ee2748..3275bc46b0146 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
@@ -145,20 +145,32 @@ GsymReader::getFunctionInfoDataForAddress(uint64_t Addr,
if (!ExpectedAddrIdx)
return ExpectedAddrIdx.takeError();
const uint64_t FirstAddrIdx = *ExpectedAddrIdx;
+ // The AddrIdx is the first index of the function info entries that match
+ // \a Addr. We need to iterate over all function info objects that start with
+ // the same address until we find a range that contains \a Addr.
std::optional<uint64_t> FirstFuncStartAddr;
const size_t NumAddresses = getNumAddresses();
for (uint64_t AddrIdx = FirstAddrIdx; AddrIdx < NumAddresses; ++AddrIdx) {
auto ExpextedData = getFunctionInfoDataAtIndex(AddrIdx, FuncStartAddr);
+ // If there was an error, return the error.
if (!ExpextedData)
return ExpextedData;
+ // Remember the first function start address if it hasn't already been set.
+ // If it is already valid, check to see if it matches the first function
+ // start address and only continue if it matches.
if (FirstFuncStartAddr.has_value()) {
if (*FirstFuncStartAddr != FuncStartAddr)
- break;
+ break; // Done with consecutive function entries with same address.
} else {
FirstFuncStartAddr = FuncStartAddr;
}
+ // Make sure the current function address ranges contains \a Addr.
+ // Some symbols on Darwin don't have valid sizes, so if we run into a
+ // symbol with zero size, then we have found a match for our address.
+ // The first thing the encoding of a FunctionInfo object is the function
+ // size.
uint64_t Offset = 0;
uint32_t FuncSize = ExpextedData->getU32(&Offset);
if (FuncSize == 0 ||
@@ -224,18 +236,23 @@ GsymReader::lookupAll(uint64_t Addr) const {
std::vector<LookupResult> Results;
std::optional<DataExtractor> MergedFunctionsData;
+ // First perform a lookup to get the primary function info result.
auto MainResult = lookup(Addr, &MergedFunctionsData);
if (!MainResult)
return MainResult.takeError();
+ // Add the main result as the first entry.
Results.push_back(std::move(*MainResult));
+ // Now process any merged functions data that was found during the lookup.
if (MergedFunctionsData) {
+ // Get data extractors for each merged function.
auto ExpectedMergedFuncExtractors =
MergedFunctionsInfo::getFuncsDataExtractors(*MergedFunctionsData);
if (!ExpectedMergedFuncExtractors)
return ExpectedMergedFuncExtractors.takeError();
+ // Process each merged function data.
for (DataExtractor &MergedData : *ExpectedMergedFuncExtractors) {
if (auto FI = FunctionInfo::lookup(MergedData, *this,
MainResult->FuncRange.start(), Addr)) {
@@ -347,6 +364,7 @@ void GsymReader::dump(raw_ostream &OS, const InlineInfo &II, uint32_t Indent) {
void GsymReader::dump(raw_ostream &OS, std::optional<FileEntry> FE) {
if (FE) {
+ // IF we have the file from index 0, then don't print anything
if (FE->Dir == 0 && FE->Base == 0)
return;
StringRef Dir = getString(FE->Dir);
>From 51806dcce7c40f1ed7a737d6a5fcc9ccbaf95bea Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Fri, 27 Mar 2026 10:45:18 -0700
Subject: [PATCH 40/45] Restore accidentally removed blank lines in kept code
Restore 5 blank lines that were stripped from unchanged code during
the V1/V2 refactoring:
- GsymReader.h: before `class GsymReader {`
- GsymReader.cpp: end of getAddressIndex(), two in dump(FunctionInfo),
one in dump(CallSiteInfo)
User messages since last commit:
- "There are some removals of single empty lines without other changes
next to it. Can you find which file/line has these issues?"
- "Did you not remove the empty line between 276-277 and 278-279 in
the new file (old line 446 and 449)?"
- "Can you update your search criteria so that they can actually return
these 4 locations (and so maybe there are more locations)?"
- "Yes please"
- "memorize, commit, push"
---
llvm/include/llvm/DebugInfo/GSYM/GsymReader.h | 1 +
llvm/lib/DebugInfo/GSYM/GsymReader.cpp | 4 ++++
2 files changed, 5 insertions(+)
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
index c1a5697399be8..e26a177e72abd 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
@@ -45,6 +45,7 @@ namespace gsym {
///
/// GsymReader objects must use one of the static functions to create an
/// instance: GsymReader::openFile(...) and GsymReader::copyBuffer(...).
+
class GsymReader {
protected:
std::unique_ptr<MemoryBuffer> MemBuffer;
diff --git a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
index 3275bc46b0146..2edaa58147b6d 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
@@ -136,6 +136,7 @@ GsymReader::getAddressIndex(const uint64_t Addr) const {
}
return createStringError(std::errc::invalid_argument,
"address 0x%" PRIx64 " is not in GSYM", Addr);
+
}
llvm::Expected<DataExtractor>
@@ -274,8 +275,10 @@ void GsymReader::dump(raw_ostream &OS, const FunctionInfo &FI,
dump(OS, *FI.OptLineTable, Indent);
if (FI.Inline)
dump(OS, *FI.Inline, Indent);
+
if (FI.CallSites)
dump(OS, *FI.CallSites, Indent);
+
if (FI.MergedFunctions) {
assert(Indent == 0 && "MergedFunctionsInfo should only exist at top level");
dump(OS, *FI.MergedFunctions);
@@ -304,6 +307,7 @@ void GsymReader::dump(raw_ostream &OS, const CallSiteInfo &CSI) {
else {
if (CSI.Flags & CallSiteInfo::Flags::InternalCall)
addFlag("InternalCall");
+
if (CSI.Flags & CallSiteInfo::Flags::ExternalCall)
addFlag("ExternalCall");
}
>From 17a0ef693634c45673f38ded0976ce910df5f60f Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Fri, 27 Mar 2026 10:52:02 -0700
Subject: [PATCH 41/45] Revert formatting-only changes in kept code
Revert 2 cosmetic-only changes that should be done separately:
- GsymCreator.cpp: undo line-wrapping of copyFunctionInfo signature
- Opts.td: restore original trailing whitespace on merged_functions_filter
User messages since last commit:
- "The change around copyFunctionInfo in GsymCreator.cpp is just
formatting change, right?"
- "Comparing to the base commit, can you see if there other such
formatting-only changes? Give me file/line"
- "Revert both changes. Cosmetic/formating changes should be done
separately."
- "memorize, commit, push"
---
llvm/lib/DebugInfo/GSYM/GsymCreator.cpp | 3 +--
llvm/tools/llvm-gsymutil/Opts.td | 4 ++--
2 files changed, 3 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp
index 51f4609e94747..de94d7e5f601d 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp
@@ -421,8 +421,7 @@ void GsymCreator::fixupInlineInfo(const GsymCreator &SrcGC, InlineInfo &II) {
fixupInlineInfo(SrcGC, ChildII);
}
-uint64_t GsymCreator::copyFunctionInfo(const GsymCreator &SrcGC,
- size_t FuncIdx) {
+uint64_t GsymCreator::copyFunctionInfo(const GsymCreator &SrcGC, size_t FuncIdx) {
// To copy a function info we need to copy any files and strings over into
// this GsymCreator and then copy the function info and update the string
// table offsets to match the new offsets.
diff --git a/llvm/tools/llvm-gsymutil/Opts.td b/llvm/tools/llvm-gsymutil/Opts.td
index 214beb9f39547..40f9c69ab95d5 100644
--- a/llvm/tools/llvm-gsymutil/Opts.td
+++ b/llvm/tools/llvm-gsymutil/Opts.td
@@ -46,8 +46,8 @@ def addresses_from_stdin :
defm json_summary_file :
Eq<"json-summary-file",
"Output a categorized summary of errors into the JSON file specified.">;
-defm merged_functions_filter :
- Eq<"merged-functions-filter",
+defm merged_functions_filter :
+ Eq<"merged-functions-filter",
"When used with --address/--addresses-from-stdin and --merged-functions,\n"
"filters the merged functions output to only show functions matching any of the specified regex patterns.\n"
"Can be specified multiple times.">;
>From fd4d267c9d3d3c04db7915b51553ac6fc7843f38 Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Fri, 27 Mar 2026 11:05:47 -0700
Subject: [PATCH 42/45] Left-align offset size in GsymReaderV2::dump() header
Change format from %2u to %-2u so the bit-width number is
left-aligned (e.g. "8 " instead of " 8"), keeping the
(ADDRESS) text properly positioned.
User messages since last commit:
- "I just changed GsymReaderV2::dump() to print the offset size to be
left-aligned, can you verify that?"
- "memorize, commit, push"
---
llvm/lib/DebugInfo/GSYM/GsymReaderV2.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/DebugInfo/GSYM/GsymReaderV2.cpp b/llvm/lib/DebugInfo/GSYM/GsymReaderV2.cpp
index 3a0f4cfefb3e9..09dd7cbb3757d 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymReaderV2.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymReaderV2.cpp
@@ -261,7 +261,7 @@ void GsymReaderV2::dump(raw_ostream &OS) {
const auto &Header = getHeader();
OS << Header << "\n";
OS << "Address Table:\n";
- OS << "INDEX OFFSET" << format("%2u", CachedAddrOffSize * 8) << " (ADDRESS)\n";
+ OS << "INDEX OFFSET" << format("%-2u", CachedAddrOffSize * 8) << " (ADDRESS)\n";
OS << "====== =============================== \n";
for (uint32_t I = 0; I < CachedNumAddresses; ++I) {
OS << format("[%4u] ", I);
>From cb00a542cd81db6e931086fbccf90d6938d69b7a Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Mon, 30 Mar 2026 09:00:21 -0700
Subject: [PATCH 43/45] Fix comments and switches
---
llvm/include/llvm/DebugInfo/GSYM/GlobalData.h | 7 +-
llvm/include/llvm/DebugInfo/GSYM/GsymReader.h | 23 +++---
llvm/include/llvm/DebugInfo/GSYM/HeaderV2.h | 3 +-
llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp | 2 +-
llvm/lib/DebugInfo/GSYM/GsymReader.cpp | 71 +++++++++++--------
5 files changed, 58 insertions(+), 48 deletions(-)
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GlobalData.h b/llvm/include/llvm/DebugInfo/GSYM/GlobalData.h
index 5aa1378eb39dd..b63b431ffc9b5 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GlobalData.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GlobalData.h
@@ -22,11 +22,10 @@ class FileWriter;
enum class GlobalInfoType : uint32_t {
EndOfList = 0u,
// The address offsets table. It's a list of function addresses subtracted by
- // HeaderV2::BaseAddress, hence "offset".
+ // the base address, hence "offset".
//
- // This table and the address
- // info offsets table (see below) have the same number of items. The items are
- // 1-1 mapped.
+ // This table and the address info offsets table (see below) have the same
+ // number of items. The items are 1-1 mapped.
//
// Given an address, this table is used to do a binary search to find the
// index into the address info offsets table, where the location of the
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
index e26a177e72abd..ed75bb7229bda 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
@@ -283,12 +283,12 @@ class GsymReader {
/// Get an appropriate address info offsets array.
///
- /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8
- /// byte offsets from the base address. The table is stored internally as a
- /// array of bytes that are in the correct endianness. When we access this
- /// table we must get an array that matches those sizes. This templatized
- /// helper function is used when accessing address offsets in the AddrOffsets
- /// member variable.
+ /// The address table in the GSYM file is stored as array of 1-8 byte offsets
+ /// from the base address. The table is stored internally as an array of
+ /// bytes that are in the correct endianness. When we access this table we
+ /// must get an array that matches those sizes. This templatized helper
+ /// function is used when accessing address offsets in the AddrOffsets member
+ /// variable.
///
/// \returns An ArrayRef of an appropriate address offset size.
template <class T> ArrayRef<T>
@@ -299,12 +299,11 @@ class GsymReader {
/// Get an appropriate address from the address table.
///
- /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8
- /// byte address offsets from the base address. The table is stored
- /// internally as a array of bytes that are in the correct endianness. In
- /// order to extract an address from the address table we must access the
- /// address offset using the correct size and then add it to the base
- /// address.
+ /// The address table in the GSYM file is stored as array of 1-8 byte address
+ /// offsets from the base address. The table is stored internally as a array
+ /// of bytes that are in the correct endianness. In order to extract an
+ /// address from the address table we must access the address offset using
+ /// the correct size and then add it to the base address.
///
/// \param Index An index into the AddrOffsets array.
/// \returns An virtual address that matches the original object file for the
diff --git a/llvm/include/llvm/DebugInfo/GSYM/HeaderV2.h b/llvm/include/llvm/DebugInfo/GSYM/HeaderV2.h
index 653afbf30c67f..80aabefefe7d4 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/HeaderV2.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/HeaderV2.h
@@ -27,7 +27,8 @@ constexpr uint32_t GSYM_VERSION_2 = 2;
/// Encoding format for the string table.
enum class StrTableEncodingType : uint8_t {
- /// A list of NULL-terminated strings (same as V1).
+ /// A list of NULL-terminated strings (same as V1). The first string at
+ /// offset zero must be the empty C string.
Default = 0,
};
diff --git a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
index 2145f23570b35..96206e8b11f41 100644
--- a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
+++ b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp
@@ -735,7 +735,7 @@ llvm::Error DwarfTransformer::verify(StringRef GsymPath,
OutputAggregator &Out) {
Out << "Verifying GSYM file \"" << GsymPath << "\":\n";
- auto GsymOrErr = GsymReader::openFile(GsymPath);
+ llvm::Expected<std::unique_ptr<GsymReader>> GsymOrErr = GsymReader::openFile(GsymPath);
if (!GsymOrErr)
return GsymOrErr.takeError();
auto &Gsym = *GsymOrErr;
diff --git a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
index 2edaa58147b6d..9f570a7100c36 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
@@ -29,8 +29,9 @@ GsymReader::GsymReader(std::unique_ptr<MemoryBuffer> Buffer)
GsymReader::GsymReader(GsymReader &&RHS) = default;
-/// Detect the GSYM version from raw bytes.
-static Expected<uint16_t> detectVersion(StringRef Data) {
+/// Check magic bytes and return the GSYM version from raw bytes.
+/// If magic bytes are invalid, return error.
+static Expected<uint16_t> checkMagicAndDetectVersion(StringRef Data) {
if (Data.size() < 6)
return createStringError(std::errc::invalid_argument,
"data too small to be a GSYM file");
@@ -52,44 +53,54 @@ GsymReader::openFile(StringRef Path) {
if (!BufOrErr)
return createStringError(BufOrErr.getError(), "failed to open '%s'",
Path.str().c_str());
- auto VersionOrErr = detectVersion((*BufOrErr)->getBuffer());
+ auto VersionOrErr = checkMagicAndDetectVersion((*BufOrErr)->getBuffer());
if (!VersionOrErr)
return VersionOrErr.takeError();
- if (*VersionOrErr == GSYM_VERSION_2) {
- auto R = GsymReaderV2::openFile(Path);
- if (!R)
- return R.takeError();
- return std::make_unique<GsymReaderV2>(std::move(*R));
- }
- if (*VersionOrErr == GSYM_VERSION_1) {
- auto R = GsymReaderV1::openFile(Path);
- if (!R)
- return R.takeError();
- return std::make_unique<GsymReaderV1>(std::move(*R));
+ switch (*VersionOrErr) {
+ case GSYM_VERSION_1:
+ {
+ auto R = GsymReaderV1::openFile(Path);
+ if (!R)
+ return R.takeError();
+ return std::make_unique<GsymReaderV1>(std::move(*R));
+ }
+ case GSYM_VERSION_2:
+ {
+ auto R = GsymReaderV2::openFile(Path);
+ if (!R)
+ return R.takeError();
+ return std::make_unique<GsymReaderV2>(std::move(*R));
+ }
+ default:
+ return createStringError(std::errc::invalid_argument,
+ "unsupported GSYM version %u", *VersionOrErr);
}
- return createStringError(std::errc::invalid_argument,
- "unsupported GSYM version %u", *VersionOrErr);
}
llvm::Expected<std::unique_ptr<GsymReader>>
GsymReader::copyBuffer(StringRef Bytes) {
- auto VersionOrErr = detectVersion(Bytes);
+ auto VersionOrErr = checkMagicAndDetectVersion(Bytes);
if (!VersionOrErr)
return VersionOrErr.takeError();
- if (*VersionOrErr == GSYM_VERSION_2) {
- auto R = GsymReaderV2::copyBuffer(Bytes);
- if (!R)
- return R.takeError();
- return std::make_unique<GsymReaderV2>(std::move(*R));
- }
- if (*VersionOrErr == GSYM_VERSION_1) {
- auto R = GsymReaderV1::copyBuffer(Bytes);
- if (!R)
- return R.takeError();
- return std::make_unique<GsymReaderV1>(std::move(*R));
+ switch (*VersionOrErr) {
+ case GSYM_VERSION_1:
+ {
+ auto R = GsymReaderV1::copyBuffer(Bytes);
+ if (!R)
+ return R.takeError();
+ return std::make_unique<GsymReaderV1>(std::move(*R));
+ }
+ case GSYM_VERSION_2:
+ {
+ auto R = GsymReaderV2::copyBuffer(Bytes);
+ if (!R)
+ return R.takeError();
+ return std::make_unique<GsymReaderV2>(std::move(*R));
+ }
+ default:
+ return createStringError(std::errc::invalid_argument,
+ "unsupported GSYM version %u", *VersionOrErr);
}
- return createStringError(std::errc::invalid_argument,
- "unsupported GSYM version %u", *VersionOrErr);
}
std::optional<uint64_t> GsymReader::getAddress(size_t Index) const {
>From 88ae5f48674bb72f5d0dd13d82cc7ece87d9c9c7 Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Mon, 30 Mar 2026 09:21:55 -0700
Subject: [PATCH 44/45] Remove Padding field from GlobalData, simplify encode()
Remove the uint32_t Padding field from GlobalData since we use
DataExtractor encode/decode (not mmap+cast), making alignment
padding unnecessary. Change encode() return type from llvm::Error
to void. Inline GlobalData construction at callsite instead of
using SmallVector. Each GlobalData entry shrinks from 24 to 20
bytes on disk.
User messages:
- "Is it correct that in the GlobalData, we can remove the Padding field?"
- "Let's remove it entirely. Can you implement the change and then run tests?"
- "Can you also change the return type of GlobalData::encode() to void?"
- "Change the callsite to not use vector, and instead directly construct GlobalData inline and call encode()"
- "Instead of the magic number 20, is it better to use sizeof(GlobalData)?" (Answer: no, sizeof is 24 due to compiler alignment padding)
- "20 is fine."
- "memorize, commit, push"
- "Which comments can I resolve?"
- "Can you click 'Resolve conversation' for me for these three comments?"
- "Done. Can you update the commit's message to include the above messages from me?"
---
llvm/include/llvm/DebugInfo/GSYM/GlobalData.h | 4 +--
llvm/lib/DebugInfo/GSYM/GlobalData.cpp | 15 ++--------
llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp | 30 ++++++++-----------
llvm/unittests/DebugInfo/GSYM/GSYMV2Test.cpp | 7 ++---
4 files changed, 18 insertions(+), 38 deletions(-)
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GlobalData.h b/llvm/include/llvm/DebugInfo/GSYM/GlobalData.h
index b63b431ffc9b5..80dec19507a12 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GlobalData.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GlobalData.h
@@ -51,15 +51,13 @@ enum class GlobalInfoType : uint32_t {
/// sections can be located at arbitrary file offsets.
struct GlobalData {
GlobalInfoType Type;
- uint32_t Padding;
uint64_t FileOffset;
uint64_t FileSize;
/// Encode this GlobalData entry into a FileWriter stream.
///
/// \param O The binary stream to write the data to.
- /// \returns An error if the entry is invalid (e.g., non-zero padding).
- LLVM_ABI llvm::Error encode(FileWriter &O) const;
+ LLVM_ABI void encode(FileWriter &O) const;
/// Decode a GlobalData entry from a binary data stream.
///
diff --git a/llvm/lib/DebugInfo/GSYM/GlobalData.cpp b/llvm/lib/DebugInfo/GSYM/GlobalData.cpp
index 0f0b59a7e6ddc..5bf4719974933 100644
--- a/llvm/lib/DebugInfo/GSYM/GlobalData.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GlobalData.cpp
@@ -13,31 +13,20 @@
using namespace llvm;
using namespace gsym;
-llvm::Error GlobalData::encode(FileWriter &O) const {
- if (Padding != 0)
- return createStringError(std::errc::invalid_argument,
- "GlobalData entry padding must be zero, got %u",
- Padding);
+void GlobalData::encode(FileWriter &O) const {
O.writeU32(static_cast<uint32_t>(Type));
- O.writeU32(Padding);
O.writeU64(FileOffset);
O.writeU64(FileSize);
- return Error::success();
}
llvm::Expected<GlobalData> GlobalData::decode(DataExtractor &Data,
uint64_t &Offset) {
- if (!Data.isValidOffsetForDataOfSize(Offset, 24))
+ if (!Data.isValidOffsetForDataOfSize(Offset, 20))
return createStringError(std::errc::invalid_argument,
"not enough data for a GlobalData entry");
GlobalData GD;
GD.Type = static_cast<GlobalInfoType>(Data.getU32(&Offset));
- GD.Padding = Data.getU32(&Offset);
GD.FileOffset = Data.getU64(&Offset);
GD.FileSize = Data.getU64(&Offset);
- if (GD.Padding != 0)
- return createStringError(std::errc::invalid_argument,
- "GlobalData entry padding must be zero, got %u",
- GD.Padding);
return GD;
}
diff --git a/llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp b/llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp
index 7c11a1b7930f5..fb94796fe8aa0 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp
@@ -69,7 +69,7 @@ llvm::Error GsymCreatorV2::encode(FileWriter &O) const {
const bool HasUUID = !UUID.empty();
const uint32_t NumGlobalDataEntries = 5 + (HasUUID ? 1 : 0) + 1;
const uint64_t GlobalDataArraySize =
- static_cast<uint64_t>(NumGlobalDataEntries) * 24;
+ static_cast<uint64_t>(NumGlobalDataEntries) * 20;
const uint64_t HeaderSize = sizeof(HeaderV2);
uint64_t CurOffset = HeaderSize + GlobalDataArraySize;
@@ -138,23 +138,19 @@ llvm::Error GsymCreatorV2::encode(FileWriter &O) const {
return Err;
// Write GlobalData entries.
- SmallVector<GlobalData, 8> GDEntries;
if (HasUUID)
- GDEntries.push_back({GlobalInfoType::UUID, 0, UUIDOffset, UUIDSectionSize});
- GDEntries.push_back({GlobalInfoType::AddrOffsets, 0,
- AddrOffsetsOffset, AddrOffsetsSize});
- GDEntries.push_back({GlobalInfoType::AddrInfoOffsets, 0,
- AddrInfoOffsetsOffset, AddrInfoOffsetsSize});
- GDEntries.push_back({GlobalInfoType::FileTable, 0,
- FileTableOffset, FileTableSize});
- GDEntries.push_back({GlobalInfoType::StringTable, 0,
- StringTableOffset, StringTableSize});
- GDEntries.push_back({GlobalInfoType::FunctionInfo, 0,
- FISectionOffset, FISectionSize});
- GDEntries.push_back({GlobalInfoType::EndOfList, 0, 0, 0});
- for (const GlobalData &GD : GDEntries)
- if (auto Err = GD.encode(O))
- return Err;
+ GlobalData{GlobalInfoType::UUID, UUIDOffset, UUIDSectionSize}.encode(O);
+ GlobalData{GlobalInfoType::AddrOffsets,
+ AddrOffsetsOffset, AddrOffsetsSize}.encode(O);
+ GlobalData{GlobalInfoType::AddrInfoOffsets,
+ AddrInfoOffsetsOffset, AddrInfoOffsetsSize}.encode(O);
+ GlobalData{GlobalInfoType::FileTable,
+ FileTableOffset, FileTableSize}.encode(O);
+ GlobalData{GlobalInfoType::StringTable,
+ StringTableOffset, StringTableSize}.encode(O);
+ GlobalData{GlobalInfoType::FunctionInfo,
+ FISectionOffset, FISectionSize}.encode(O);
+ GlobalData{GlobalInfoType::EndOfList, 0, 0}.encode(O);
// Write UUID section.
if (HasUUID) {
diff --git a/llvm/unittests/DebugInfo/GSYM/GSYMV2Test.cpp b/llvm/unittests/DebugInfo/GSYM/GSYMV2Test.cpp
index 3d62848afb4fb..70eeef705f585 100644
--- a/llvm/unittests/DebugInfo/GSYM/GSYMV2Test.cpp
+++ b/llvm/unittests/DebugInfo/GSYM/GSYMV2Test.cpp
@@ -66,7 +66,6 @@ static GlobalData decodeGlobalDataEntry(StringRef Data, uint64_t &Offset,
DataExtractor DE(Data, ByteOrder == llvm::endianness::little, 8);
GlobalData GD;
GD.Type = static_cast<GlobalInfoType>(DE.getU32(&Offset));
- GD.Padding = DE.getU32(&Offset);
GD.FileOffset = DE.getU64(&Offset);
GD.FileSize = DE.getU64(&Offset);
return GD;
@@ -153,7 +152,6 @@ static void TestV2HeaderAndGlobalData(llvm::endianness ByteOrder,
while (Offset < Data.size()) {
GlobalData GD = decodeGlobalDataEntry(Data, Offset, ByteOrder);
- EXPECT_EQ(GD.Padding, 0u);
switch (GD.Type) {
case GlobalInfoType::EndOfList:
@@ -386,12 +384,12 @@ static SmallString<512> buildMinimalV2Binary(uint64_t BaseAddr,
raw_svector_ostream OS(Str);
FileWriter FW(OS, llvm::endianness::native);
- // We'll build: header (24) + GlobalData entries (6 entries * 24 = 144) +
+ // We'll build: header (24) + GlobalData entries (6 entries * 20 = 120) +
// sections. Total GlobalData entries: AddrOffsets, AddrInfoOffsets,
// StringTable, FileTable, FunctionInfo, EndOfList = 6.
constexpr uint64_t HeaderSize = 24;
constexpr uint64_t NumGlobalEntries = 6;
- constexpr uint64_t GlobalDataSize = NumGlobalEntries * 24;
+ constexpr uint64_t GlobalDataSize = NumGlobalEntries * 20;
constexpr uint8_t AddrOffSize = 1;
constexpr uint8_t AddrInfoOffSize = 4;
constexpr uint32_t NumAddresses = 1;
@@ -457,7 +455,6 @@ static SmallString<512> buildMinimalV2Binary(uint64_t BaseAddr,
// GlobalData entries.
auto writeGD = [&](GlobalInfoType Type, uint64_t Off, uint64_t Size) {
FW.writeU32(static_cast<uint32_t>(Type));
- FW.writeU32(0); // Padding
FW.writeU64(Off);
FW.writeU64(Size);
};
>From 1a6da8c60982051ef4391041b8e7744e3de39541 Mon Sep 17 00:00:00 2001
From: Roy Shi <royshi at meta.com>
Date: Mon, 30 Mar 2026 09:56:49 -0700
Subject: [PATCH 45/45] Add virtual accessors to GsymReader, refactor tests for
V1+V2
Add 5 pure virtual accessors to GsymReader base class:
getBaseAddress(), getNumAddresses(), getAddressOffsetByteSize(),
getAddressInfoOffsetByteSize(), getStringOffsetByteSize().
Implement in GsymReaderV1 and GsymReaderV2. Remove cached fields
from base class, update all call sites to use virtual accessors.
Refactor 7 V1-specific tests into template functions that work with
both GsymCreatorV1 and GsymCreatorV2, adding 7 new V2 test cases.
Fix missing O.alignTo(AddrOffSize) in GsymCreatorV2::encode() before
AddrOffsets section, exposed by 8-byte offset V2 test with UUID.
User messages:
- "K let's add the three virtual accessors, update call sites to use them, and refactor the tests so that they can be run on both V1 and V2 classes."
- "Can you also add getAddressInfoOffsetByteSize() and getStringOffsetByteSize()?"
- "I changed a few lines. Can you rebuild & retest?"
- "memorize, commit, push"
---
llvm/include/llvm/DebugInfo/GSYM/GsymReader.h | 29 +--
.../llvm/DebugInfo/GSYM/GsymReaderV1.h | 6 +
.../llvm/DebugInfo/GSYM/GsymReaderV2.h | 6 +
llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp | 1 +
llvm/lib/DebugInfo/GSYM/GsymReader.cpp | 14 +-
llvm/lib/DebugInfo/GSYM/GsymReaderV1.cpp | 15 +-
llvm/lib/DebugInfo/GSYM/GsymReaderV2.cpp | 15 +-
llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp | 184 ++++++++++--------
8 files changed, 147 insertions(+), 123 deletions(-)
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
index ed75bb7229bda..cf5a4735cfc4f 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
@@ -55,19 +55,27 @@ class GsymReader {
ArrayRef<FileEntry> Files;
StringTable StrTab;
- /// Cached header values, populated by subclass parse().
- /// These allow shared methods to access common header fields without
- /// needing the version-specific header type.
- uint64_t CachedBaseAddress = 0;
- uint32_t CachedNumAddresses = 0;
- uint8_t CachedAddrOffSize = 0;
-
GsymReader(std::unique_ptr<MemoryBuffer> Buffer);
public:
LLVM_ABI GsymReader(GsymReader &&RHS);
virtual ~GsymReader() = default;
+ /// Get the base address of this GSYM file.
+ virtual uint64_t getBaseAddress() const = 0;
+
+ /// Get the number of addresses in this GSYM file.
+ virtual uint64_t getNumAddresses() const = 0;
+
+ /// Get the address offset byte size for this GSYM file.
+ virtual uint64_t getAddressOffsetByteSize() const = 0;
+
+ /// Get the address info offset byte size for this GSYM file.
+ virtual uint64_t getAddressInfoOffsetByteSize() const = 0;
+
+ /// Get the string offset byte size for this GSYM file.
+ virtual uint64_t getStringOffsetByteSize() const = 0;
+
/// Construct a GsymReader from a file on disk, auto-detecting the format
/// version.
///
@@ -265,11 +273,6 @@ class GsymReader {
/// \param FE The object to dump.
LLVM_ABI void dump(raw_ostream &OS, std::optional<FileEntry> FE);
- /// Get the number of addresses in this Gsym file.
- uint32_t getNumAddresses() const {
- return CachedNumAddresses;
- }
-
/// Gets an address from the address table.
///
/// Addresses are stored as offsets from the base address.
@@ -312,7 +315,7 @@ class GsymReader {
std::optional<uint64_t> addressForIndex(size_t Index) const {
ArrayRef<T> AIO = getAddrOffsets<T>();
if (Index < AIO.size())
- return AIO[Index] + CachedBaseAddress;
+ return AIO[Index] + getBaseAddress();
return std::nullopt;
}
/// Lookup an address offset in the AddrOffsets table.
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymReaderV1.h b/llvm/include/llvm/DebugInfo/GSYM/GsymReaderV1.h
index f489beecdf336..87cc390c6a985 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GsymReaderV1.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymReaderV1.h
@@ -43,6 +43,12 @@ class GsymReaderV1 : public GsymReader {
LLVM_ABI const Header &getHeader() const;
+ uint64_t getBaseAddress() const override { return getHeader().BaseAddress; }
+ uint64_t getNumAddresses() const override { return getHeader().NumAddresses; }
+ uint64_t getAddressOffsetByteSize() const override { return getHeader().AddrOffSize; }
+ uint64_t getAddressInfoOffsetByteSize() const override { return 4; }
+ uint64_t getStringOffsetByteSize() const override { return 4; }
+
using GsymReader::dump;
LLVM_ABI void dump(raw_ostream &OS) override;
};
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymReaderV2.h b/llvm/include/llvm/DebugInfo/GSYM/GsymReaderV2.h
index d4851140dfc2a..e15f8c5a75518 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GsymReaderV2.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymReaderV2.h
@@ -43,6 +43,12 @@ class GsymReaderV2 : public GsymReader {
LLVM_ABI const HeaderV2 &getHeader() const;
+ uint64_t getBaseAddress() const override { return getHeader().BaseAddress; }
+ uint64_t getNumAddresses() const override { return getHeader().NumAddresses; }
+ uint64_t getAddressOffsetByteSize() const override { return getHeader().AddrOffSize; }
+ uint64_t getAddressInfoOffsetByteSize() const override { return getHeader().AddrInfoOffSize; }
+ uint64_t getStringOffsetByteSize() const override { return getHeader().StrpSize; }
+
using GsymReader::dump;
LLVM_ABI void dump(raw_ostream &OS) override;
};
diff --git a/llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp b/llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp
index fb94796fe8aa0..95728b76d1ea1 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymCreatorV2.cpp
@@ -159,6 +159,7 @@ llvm::Error GsymCreatorV2::encode(FileWriter &O) const {
}
// Write AddrOffsets section.
+ O.alignTo(AddrOffSize);
assert(O.tell() == AddrOffsetsOffset);
encodeAddrOffsets(O, AddrOffSize, *BaseAddr);
diff --git a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
index 9f570a7100c36..6efed33030f92 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
@@ -104,7 +104,7 @@ GsymReader::copyBuffer(StringRef Bytes) {
}
std::optional<uint64_t> GsymReader::getAddress(size_t Index) const {
- switch (CachedAddrOffSize) {
+ switch (getAddressOffsetByteSize()) {
case 1: return addressForIndex<uint8_t>(Index);
case 2: return addressForIndex<uint16_t>(Index);
case 4: return addressForIndex<uint32_t>(Index);
@@ -121,10 +121,12 @@ std::optional<uint64_t> GsymReader::getAddressInfoOffset(size_t Index) const {
Expected<uint64_t>
GsymReader::getAddressIndex(const uint64_t Addr) const {
- if (Addr >= CachedBaseAddress) {
- const uint64_t AddrOffset = Addr - CachedBaseAddress;
+ const uint64_t BaseAddress = getBaseAddress();
+ if (Addr >= BaseAddress) {
+ const uint64_t AddrOffset = Addr - BaseAddress;
std::optional<uint64_t> AddrOffsetIndex;
- switch (CachedAddrOffSize) {
+ const uint64_t AddressOffsetByteSize = getAddressOffsetByteSize();
+ switch (AddressOffsetByteSize) {
case 1:
AddrOffsetIndex = getAddressOffsetIndex<uint8_t>(AddrOffset);
break;
@@ -139,8 +141,8 @@ GsymReader::getAddressIndex(const uint64_t Addr) const {
break;
default:
return createStringError(std::errc::invalid_argument,
- "unsupported address offset size %u",
- CachedAddrOffSize);
+ "unsupported address offset size %" PRIu64,
+ AddressOffsetByteSize);
}
if (AddrOffsetIndex)
return *AddrOffsetIndex;
diff --git a/llvm/lib/DebugInfo/GSYM/GsymReaderV1.cpp b/llvm/lib/DebugInfo/GSYM/GsymReaderV1.cpp
index 5d7c371f9cf81..9d3442f221f8e 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymReaderV1.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymReaderV1.cpp
@@ -84,11 +84,6 @@ llvm::Error GsymReaderV1::parse() {
if (Error Err = Hdr->checkForError())
return Err;
- // Populate cached header values in the base class.
- CachedBaseAddress = Hdr->BaseAddress;
- CachedNumAddresses = Hdr->NumAddresses;
- CachedAddrOffSize = Hdr->AddrOffSize;
-
if (!Swap) {
if (FileData.padToAlignment(Hdr->AddrOffSize) ||
FileData.readArray(AddrOffsets,
@@ -182,7 +177,7 @@ void GsymReaderV1::dump(raw_ostream &OS) {
OS << "Address Table:\n";
OS << "INDEX OFFSET";
- switch (CachedAddrOffSize) {
+ switch (getAddressOffsetByteSize()) {
case 1: OS << "8 "; break;
case 2: OS << "16"; break;
case 4: OS << "32"; break;
@@ -191,9 +186,9 @@ void GsymReaderV1::dump(raw_ostream &OS) {
}
OS << " (ADDRESS)\n";
OS << "====== =============================== \n";
- for (uint32_t I = 0; I < CachedNumAddresses; ++I) {
+ for (uint32_t I = 0; I < getNumAddresses(); ++I) {
OS << format("[%4u] ", I);
- switch (CachedAddrOffSize) {
+ switch (getAddressOffsetByteSize()) {
case 1: OS << HEX8(getAddrOffsets<uint8_t>()[I]); break;
case 2: OS << HEX16(getAddrOffsets<uint16_t>()[I]); break;
case 4: OS << HEX32(getAddrOffsets<uint32_t>()[I]); break;
@@ -205,7 +200,7 @@ void GsymReaderV1::dump(raw_ostream &OS) {
OS << "\nAddress Info Offsets:\n";
OS << "INDEX Offset\n";
OS << "====== ==========\n";
- for (uint32_t I = 0; I < CachedNumAddresses; ++I)
+ for (uint32_t I = 0; I < getNumAddresses(); ++I)
OS << format("[%4u] ", I) << HEX32(AddrInfoOffsets[I]) << "\n";
OS << "\nFiles:\n";
OS << "INDEX DIRECTORY BASENAME PATH\n";
@@ -218,7 +213,7 @@ void GsymReaderV1::dump(raw_ostream &OS) {
}
OS << "\n" << StrTab << "\n";
- for (uint32_t I = 0; I < CachedNumAddresses; ++I) {
+ for (uint32_t I = 0; I < getNumAddresses(); ++I) {
OS << "FunctionInfo @ " << HEX32(AddrInfoOffsets[I]) << ": ";
if (auto FI = getFunctionInfoAtIndex(I))
dump(OS, *FI);
diff --git a/llvm/lib/DebugInfo/GSYM/GsymReaderV2.cpp b/llvm/lib/DebugInfo/GSYM/GsymReaderV2.cpp
index 09dd7cbb3757d..440ba580fee41 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymReaderV2.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymReaderV2.cpp
@@ -119,11 +119,6 @@ llvm::Error GsymReaderV2::parse() {
if (Error Err = Hdr->checkForError())
return Err;
- // Populate cached header values in the base class.
- CachedBaseAddress = Hdr->BaseAddress;
- CachedNumAddresses = Hdr->NumAddresses;
- CachedAddrOffSize = Hdr->AddrOffSize;
-
// Parse GlobalData entries to find section locations.
uint64_t Offset = sizeof(HeaderV2);
auto SectionsOrErr = parseGlobalDataEntries(DE, Offset, BufSize);
@@ -261,11 +256,11 @@ void GsymReaderV2::dump(raw_ostream &OS) {
const auto &Header = getHeader();
OS << Header << "\n";
OS << "Address Table:\n";
- OS << "INDEX OFFSET" << format("%-2u", CachedAddrOffSize * 8) << " (ADDRESS)\n";
+ OS << "INDEX OFFSET" << format("%-2u", getAddressOffsetByteSize() * 8) << " (ADDRESS)\n";
OS << "====== =============================== \n";
- for (uint32_t I = 0; I < CachedNumAddresses; ++I) {
+ for (uint32_t I = 0; I < getNumAddresses(); ++I) {
OS << format("[%4u] ", I);
- switch (CachedAddrOffSize) {
+ switch (getAddressOffsetByteSize()) {
case 1: OS << HEX8(getAddrOffsets<uint8_t>()[I]); break;
case 2: OS << HEX16(getAddrOffsets<uint16_t>()[I]); break;
case 4: OS << HEX32(getAddrOffsets<uint32_t>()[I]); break;
@@ -277,7 +272,7 @@ void GsymReaderV2::dump(raw_ostream &OS) {
OS << "\nAddress Info Offsets:\n";
OS << "INDEX Offset\n";
OS << "====== ==========\n";
- for (uint32_t I = 0; I < CachedNumAddresses; ++I)
+ for (uint32_t I = 0; I < getNumAddresses(); ++I)
OS << format("[%4u] ", I) << HEX32(AddrInfoOffsets[I]) << "\n";
OS << "\nFiles:\n";
OS << "INDEX DIRECTORY BASENAME PATH\n";
@@ -290,7 +285,7 @@ void GsymReaderV2::dump(raw_ostream &OS) {
}
OS << "\n" << StrTab << "\n";
- for (uint32_t I = 0; I < CachedNumAddresses; ++I) {
+ for (uint32_t I = 0; I < getNumAddresses(); ++I) {
OS << "FunctionInfo @ " << HEX32(AddrInfoOffsets[I]) << ": ";
if (auto FI = getFunctionInfoAtIndex(I))
dump(OS, *FI);
diff --git a/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp b/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp
index 57d30680760dd..82df30942c89e 100644
--- a/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp
+++ b/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp
@@ -15,7 +15,9 @@
#include "llvm/DebugInfo/GSYM/FileWriter.h"
#include "llvm/DebugInfo/GSYM/FunctionInfo.h"
#include "llvm/DebugInfo/GSYM/GsymCreatorV1.h"
+#include "llvm/DebugInfo/GSYM/GsymCreatorV2.h"
#include "llvm/DebugInfo/GSYM/GsymReaderV1.h"
+#include "llvm/DebugInfo/GSYM/GsymReaderV2.h"
#include "llvm/DebugInfo/GSYM/Header.h"
#include "llvm/DebugInfo/GSYM/InlineInfo.h"
#include "llvm/DebugInfo/GSYM/OutputAggregator.h"
@@ -1003,9 +1005,9 @@ TEST(GSYMTest, TestGsymCreatorV1EncodeErrors) {
"attempted to encode invalid InlineInfo object");
}
-static void Compare(const GsymCreatorV1 &GC, const GsymReaderV1 &GR) {
- // Verify that all of the data in a GsymCreatorV1 is correctly decoded from
- // a GsymReaderV1. To do this, we iterator over
+static void Compare(const GsymCreator &GC, const GsymReader &GR) {
+ // Verify that all of the data in a GsymCreator is correctly decoded from
+ // a GsymReader. To do this, we iterate over
GC.forEachFunctionInfo([&](const FunctionInfo &FI) -> bool {
auto DecodedFI = GR.getFunctionInfo(FI.Range.start());
EXPECT_TRUE(bool(DecodedFI));
@@ -1014,30 +1016,26 @@ static void Compare(const GsymCreatorV1 &GC, const GsymReaderV1 &GR) {
});
}
-static void TestEncodeDecode(const GsymCreatorV1 &GC, llvm::endianness ByteOrder,
- uint16_t Version, uint8_t AddrOffSize,
- uint64_t BaseAddress, uint32_t NumAddresses,
- ArrayRef<uint8_t> UUID) {
+static void TestEncodeDecode(const GsymCreator &GC, llvm::endianness ByteOrder,
+ uint8_t AddrOffSize, uint64_t BaseAddress,
+ uint32_t NumAddresses) {
SmallString<512> Str;
raw_svector_ostream OutStrm(Str);
FileWriter FW(OutStrm, ByteOrder);
llvm::Error Err = GC.encode(FW);
ASSERT_FALSE((bool)Err);
- Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
+ auto GR = GsymReader::copyBuffer(OutStrm.str());
ASSERT_TRUE(bool(GR));
- const Header &Hdr = GR->getHeader();
- EXPECT_EQ(Hdr.Version, Version);
- EXPECT_EQ(Hdr.AddrOffSize, AddrOffSize);
- EXPECT_EQ(Hdr.UUIDSize, UUID.size());
- EXPECT_EQ(Hdr.BaseAddress, BaseAddress);
- EXPECT_EQ(Hdr.NumAddresses, NumAddresses);
- EXPECT_EQ(ArrayRef<uint8_t>(Hdr.UUID, Hdr.UUIDSize), UUID);
- Compare(GC, GR.get());
+ EXPECT_EQ((*GR)->getAddressOffsetByteSize(), AddrOffSize);
+ EXPECT_EQ((*GR)->getBaseAddress(), BaseAddress);
+ EXPECT_EQ((*GR)->getNumAddresses(), NumAddresses);
+ Compare(GC, **GR);
}
-TEST(GSYMTest, TestGsymCreatorV11ByteAddrOffsets) {
+template <typename CreatorT>
+static void TestGsymCreator1ByteAddrOffsetsImpl() {
uint8_t UUID[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
- GsymCreatorV1 GC;
+ CreatorT GC;
GC.setUUID(UUID);
constexpr uint64_t BaseAddr = 0x1000;
constexpr uint8_t AddrOffSize = 1;
@@ -1048,19 +1046,21 @@ TEST(GSYMTest, TestGsymCreatorV11ByteAddrOffsets) {
OutputAggregator Null(nullptr);
Error Err = GC.finalize(Null);
ASSERT_FALSE(Err);
- TestEncodeDecode(GC, llvm::endianness::little, GSYM_VERSION_1, AddrOffSize,
- BaseAddr,
- 2, // NumAddresses
- ArrayRef<uint8_t>(UUID));
- TestEncodeDecode(GC, llvm::endianness::big, GSYM_VERSION_1, AddrOffSize,
- BaseAddr,
- 2, // NumAddresses
- ArrayRef<uint8_t>(UUID));
+ TestEncodeDecode(GC, llvm::endianness::little, AddrOffSize, BaseAddr, 2);
+ TestEncodeDecode(GC, llvm::endianness::big, AddrOffSize, BaseAddr, 2);
}
-TEST(GSYMTest, TestGsymCreatorV12ByteAddrOffsets) {
+TEST(GSYMTest, TestGsymCreatorV11ByteAddrOffsets) {
+ TestGsymCreator1ByteAddrOffsetsImpl<GsymCreatorV1>();
+}
+TEST(GSYMTest, TestGsymCreatorV21ByteAddrOffsets) {
+ TestGsymCreator1ByteAddrOffsetsImpl<GsymCreatorV2>();
+}
+
+template <typename CreatorT>
+static void TestGsymCreator2ByteAddrOffsetsImpl() {
uint8_t UUID[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
- GsymCreatorV1 GC;
+ CreatorT GC;
GC.setUUID(UUID);
constexpr uint64_t BaseAddr = 0x1000;
constexpr uint8_t AddrOffSize = 2;
@@ -1071,19 +1071,21 @@ TEST(GSYMTest, TestGsymCreatorV12ByteAddrOffsets) {
OutputAggregator Null(nullptr);
Error Err = GC.finalize(Null);
ASSERT_FALSE(Err);
- TestEncodeDecode(GC, llvm::endianness::little, GSYM_VERSION_1, AddrOffSize,
- BaseAddr,
- 2, // NumAddresses
- ArrayRef<uint8_t>(UUID));
- TestEncodeDecode(GC, llvm::endianness::big, GSYM_VERSION_1, AddrOffSize,
- BaseAddr,
- 2, // NumAddresses
- ArrayRef<uint8_t>(UUID));
+ TestEncodeDecode(GC, llvm::endianness::little, AddrOffSize, BaseAddr, 2);
+ TestEncodeDecode(GC, llvm::endianness::big, AddrOffSize, BaseAddr, 2);
}
-TEST(GSYMTest, TestGsymCreatorV14ByteAddrOffsets) {
+TEST(GSYMTest, TestGsymCreatorV12ByteAddrOffsets) {
+ TestGsymCreator2ByteAddrOffsetsImpl<GsymCreatorV1>();
+}
+TEST(GSYMTest, TestGsymCreatorV22ByteAddrOffsets) {
+ TestGsymCreator2ByteAddrOffsetsImpl<GsymCreatorV2>();
+}
+
+template <typename CreatorT>
+static void TestGsymCreator4ByteAddrOffsetsImpl() {
uint8_t UUID[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
- GsymCreatorV1 GC;
+ CreatorT GC;
GC.setUUID(UUID);
constexpr uint64_t BaseAddr = 0x1000;
constexpr uint8_t AddrOffSize = 4;
@@ -1094,19 +1096,21 @@ TEST(GSYMTest, TestGsymCreatorV14ByteAddrOffsets) {
OutputAggregator Null(nullptr);
Error Err = GC.finalize(Null);
ASSERT_FALSE(Err);
- TestEncodeDecode(GC, llvm::endianness::little, GSYM_VERSION_1, AddrOffSize,
- BaseAddr,
- 2, // NumAddresses
- ArrayRef<uint8_t>(UUID));
- TestEncodeDecode(GC, llvm::endianness::big, GSYM_VERSION_1, AddrOffSize,
- BaseAddr,
- 2, // NumAddresses
- ArrayRef<uint8_t>(UUID));
+ TestEncodeDecode(GC, llvm::endianness::little, AddrOffSize, BaseAddr, 2);
+ TestEncodeDecode(GC, llvm::endianness::big, AddrOffSize, BaseAddr, 2);
}
-TEST(GSYMTest, TestGsymCreatorV18ByteAddrOffsets) {
+TEST(GSYMTest, TestGsymCreatorV14ByteAddrOffsets) {
+ TestGsymCreator4ByteAddrOffsetsImpl<GsymCreatorV1>();
+}
+TEST(GSYMTest, TestGsymCreatorV24ByteAddrOffsets) {
+ TestGsymCreator4ByteAddrOffsetsImpl<GsymCreatorV2>();
+}
+
+template <typename CreatorT>
+static void TestGsymCreator8ByteAddrOffsetsImpl() {
uint8_t UUID[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
- GsymCreatorV1 GC;
+ CreatorT GC;
GC.setUUID(UUID);
constexpr uint64_t BaseAddr = 0x1000;
constexpr uint8_t AddrOffSize = 8;
@@ -1117,33 +1121,35 @@ TEST(GSYMTest, TestGsymCreatorV18ByteAddrOffsets) {
OutputAggregator Null(nullptr);
Error Err = GC.finalize(Null);
ASSERT_FALSE(Err);
- TestEncodeDecode(GC, llvm::endianness::little, GSYM_VERSION_1, AddrOffSize,
- BaseAddr,
- 2, // NumAddresses
- ArrayRef<uint8_t>(UUID));
- TestEncodeDecode(GC, llvm::endianness::big, GSYM_VERSION_1, AddrOffSize,
- BaseAddr,
- 2, // NumAddresses
- ArrayRef<uint8_t>(UUID));
+ TestEncodeDecode(GC, llvm::endianness::little, AddrOffSize, BaseAddr, 2);
+ TestEncodeDecode(GC, llvm::endianness::big, AddrOffSize, BaseAddr, 2);
+}
+
+TEST(GSYMTest, TestGsymCreatorV18ByteAddrOffsets) {
+ TestGsymCreator8ByteAddrOffsetsImpl<GsymCreatorV1>();
+}
+TEST(GSYMTest, TestGsymCreatorV28ByteAddrOffsets) {
+ TestGsymCreator8ByteAddrOffsetsImpl<GsymCreatorV2>();
}
-static void VerifyFunctionInfo(const GsymReaderV1 &GR, uint64_t Addr,
+static void VerifyFunctionInfo(const GsymReader &GR, uint64_t Addr,
const FunctionInfo &FI) {
auto ExpFI = GR.getFunctionInfo(Addr);
ASSERT_THAT_EXPECTED(ExpFI, Succeeded());
ASSERT_EQ(FI, ExpFI.get());
}
-static void VerifyFunctionInfoError(const GsymReaderV1 &GR, uint64_t Addr,
+static void VerifyFunctionInfoError(const GsymReader &GR, uint64_t Addr,
std::string ErrMessage) {
auto ExpFI = GR.getFunctionInfo(Addr);
ASSERT_FALSE(bool(ExpFI));
checkError(ErrMessage, ExpFI.takeError());
}
-TEST(GSYMTest, TestGsymReaderV1) {
+template <typename CreatorT>
+static void TestGsymReaderImpl() {
uint8_t UUID[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
- GsymCreatorV1 GC;
+ CreatorT GC;
GC.setUUID(UUID);
constexpr uint64_t BaseAddr = 0x1000;
constexpr uint64_t Func1Addr = BaseAddr;
@@ -1162,8 +1168,8 @@ TEST(GSYMTest, TestGsymReaderV1) {
FileWriter FW(OutStrm, ByteOrder);
llvm::Error Err = GC.encode(FW);
ASSERT_FALSE((bool)Err);
- if (auto ExpectedGR = GsymReaderV1::copyBuffer(OutStrm.str())) {
- const GsymReaderV1 &GR = ExpectedGR.get();
+ if (auto ExpectedGR = GsymReader::copyBuffer(OutStrm.str())) {
+ const GsymReader &GR = **ExpectedGR;
VerifyFunctionInfoError(GR, Func1Addr-1, "address 0xfff is not in GSYM");
FunctionInfo Func1(Func1Addr, FuncSize, Func1Name);
@@ -1182,13 +1188,17 @@ TEST(GSYMTest, TestGsymReaderV1) {
}
}
-TEST(GSYMTest, TestGsymLookups) {
+TEST(GSYMTest, TestGsymReaderV1) { TestGsymReaderImpl<GsymCreatorV1>(); }
+TEST(GSYMTest, TestGsymReaderV2) { TestGsymReaderImpl<GsymCreatorV2>(); }
+
+template <typename CreatorT>
+static void TestGsymLookupsImpl() {
// Test creating a GSYM file with a function that has a inline information.
// Verify that lookups work correctly. Lookups do not decode the entire
// FunctionInfo or InlineInfo, they only extract information needed for the
// lookup to happen which avoids allocations which can slow down
// symbolication.
- GsymCreatorV1 GC;
+ CreatorT GC;
FunctionInfo FI(0x1000, 0x100, GC.insertString("main"));
const auto ByteOrder = llvm::endianness::native;
FI.OptLineTable = LineTable();
@@ -1228,58 +1238,62 @@ TEST(GSYMTest, TestGsymLookups) {
FileWriter FW(OutStrm, ByteOrder);
llvm::Error Err = GC.encode(FW);
ASSERT_FALSE((bool)Err);
- Expected<GsymReaderV1> GR = GsymReaderV1::copyBuffer(OutStrm.str());
- ASSERT_TRUE(bool(GR));
+ auto GROrErr = GsymReader::copyBuffer(OutStrm.str());
+ ASSERT_TRUE(bool(GROrErr));
+ const GsymReader &GR = **GROrErr;
// Verify inline info is correct when doing lookups.
- auto LR = GR->lookup(0x1000);
+ auto LR = GR.lookup(0x1000);
ASSERT_THAT_EXPECTED(LR, Succeeded());
EXPECT_THAT(LR->Locations,
testing::ElementsAre(SourceLocation{"main", "/tmp", "main.c", 5}));
- LR = GR->lookup(0x100F);
+ LR = GR.lookup(0x100F);
ASSERT_THAT_EXPECTED(LR, Succeeded());
EXPECT_THAT(LR->Locations,
testing::ElementsAre(SourceLocation{"main", "/tmp", "main.c", 5, 15}));
- LR = GR->lookup(0x1010);
+ LR = GR.lookup(0x1010);
ASSERT_THAT_EXPECTED(LR, Succeeded());
EXPECT_THAT(LR->Locations,
testing::ElementsAre(SourceLocation{"inline1", "/tmp", "foo.h", 10},
SourceLocation{"main", "/tmp", "main.c", 6, 16}));
- LR = GR->lookup(0x1012);
+ LR = GR.lookup(0x1012);
ASSERT_THAT_EXPECTED(LR, Succeeded());
EXPECT_THAT(LR->Locations,
testing::ElementsAre(SourceLocation{"inline2", "/tmp", "foo.h", 20},
SourceLocation{"inline1", "/tmp", "foo.h", 33, 2},
SourceLocation{"main", "/tmp", "main.c", 6, 18}));
- LR = GR->lookup(0x1014);
+ LR = GR.lookup(0x1014);
ASSERT_THAT_EXPECTED(LR, Succeeded());
EXPECT_THAT(LR->Locations,
testing::ElementsAre(SourceLocation{"inline1", "/tmp", "foo.h", 11, 4},
SourceLocation{"main", "/tmp", "main.c", 6, 20}));
- LR = GR->lookup(0x1016);
+ LR = GR.lookup(0x1016);
ASSERT_THAT_EXPECTED(LR, Succeeded());
EXPECT_THAT(LR->Locations,
testing::ElementsAre(SourceLocation{"inline3", "/tmp", "foo.h", 30},
SourceLocation{"inline1", "/tmp", "foo.h", 35, 6},
SourceLocation{"main", "/tmp", "main.c", 6, 22}));
- LR = GR->lookup(0x1018);
+ LR = GR.lookup(0x1018);
ASSERT_THAT_EXPECTED(LR, Succeeded());
EXPECT_THAT(LR->Locations,
testing::ElementsAre(SourceLocation{"inline1", "/tmp", "foo.h", 12, 8},
SourceLocation{"main", "/tmp", "main.c", 6, 24}));
- LR = GR->lookup(0x1020);
+ LR = GR.lookup(0x1020);
ASSERT_THAT_EXPECTED(LR, Succeeded());
EXPECT_THAT(LR->Locations,
testing::ElementsAre(SourceLocation{"main", "/tmp", "main.c", 8, 32}));
}
+TEST(GSYMTest, TestGsymLookups) { TestGsymLookupsImpl<GsymCreatorV1>(); }
+TEST(GSYMTest, TestGsymLookupsV2) { TestGsymLookupsImpl<GsymCreatorV2>(); }
+
TEST(GSYMTest, TestDWARFFunctionWithAddresses) {
// Create a single compile unit with a single function and make sure it gets
@@ -2433,12 +2447,13 @@ TEST(GSYMTest, TestDWARFDeadStripAddr8) {
EXPECT_EQ(MethodName, "main");
}
-TEST(GSYMTest, TestGsymCreatorV1MultipleSymbolsWithNoSize) {
+template <typename CreatorT>
+static void TestGsymCreatorMultipleSymbolsWithNoSizeImpl() {
// Multiple symbols at the same address with zero size were being emitted
// instead of being combined into a single entry. This function tests to make
// sure we only get one symbol.
uint8_t UUID[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
- GsymCreatorV1 GC;
+ CreatorT GC;
GC.setUUID(UUID);
constexpr uint64_t BaseAddr = 0x1000;
constexpr uint8_t AddrOffSize = 1;
@@ -2449,14 +2464,15 @@ TEST(GSYMTest, TestGsymCreatorV1MultipleSymbolsWithNoSize) {
OutputAggregator Null(nullptr);
Error Err = GC.finalize(Null);
ASSERT_FALSE(Err);
- TestEncodeDecode(GC, llvm::endianness::little, GSYM_VERSION_1, AddrOffSize,
- BaseAddr,
- 1, // NumAddresses
- ArrayRef<uint8_t>(UUID));
- TestEncodeDecode(GC, llvm::endianness::big, GSYM_VERSION_1, AddrOffSize,
- BaseAddr,
- 1, // NumAddresses
- ArrayRef<uint8_t>(UUID));
+ TestEncodeDecode(GC, llvm::endianness::little, AddrOffSize, BaseAddr, 1);
+ TestEncodeDecode(GC, llvm::endianness::big, AddrOffSize, BaseAddr, 1);
+}
+
+TEST(GSYMTest, TestGsymCreatorV1MultipleSymbolsWithNoSize) {
+ TestGsymCreatorMultipleSymbolsWithNoSizeImpl<GsymCreatorV1>();
+}
+TEST(GSYMTest, TestGsymCreatorV2MultipleSymbolsWithNoSize) {
+ TestGsymCreatorMultipleSymbolsWithNoSizeImpl<GsymCreatorV2>();
}
// Helper function to quickly create a FunctionInfo in a GsymCreatorV1 for testing.
More information about the llvm-commits
mailing list