[llvm] a14e3c2 - Revert "Add the ability to segment GSYM files."
Douglas Yung via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 3 00:26:01 PST 2023
Author: Douglas Yung
Date: 2023-03-03T00:25:06-08:00
New Revision: a14e3c2aa79f31a3f644ed3e124c84d4e63ab198
URL: https://github.com/llvm/llvm-project/commit/a14e3c2aa79f31a3f644ed3e124c84d4e63ab198
DIFF: https://github.com/llvm/llvm-project/commit/a14e3c2aa79f31a3f644ed3e124c84d4e63ab198.diff
LOG: Revert "Add the ability to segment GSYM files."
This reverts commit fe758254181a824d73ad960b651b42f671f8936b.
This change was causing several buildbot failures:
- https://lab.llvm.org/buildbot/#/builders/38/builds/10105
- https://lab.llvm.org/buildbot/#/builders/192/builds/562
- https://lab.llvm.org/buildbot/#/builders/109/builds/58893
- https://lab.llvm.org/buildbot/#/builders/16/builds/44360
- https://lab.llvm.org/buildbot/#/builders/247/builds/2095
- https://lab.llvm.org/buildbot/#/builders/196/builds/27236
- https://lab.llvm.org/buildbot/#/builders/54/builds/3714
Added:
Modified:
llvm/include/llvm/DebugInfo/GSYM/FileWriter.h
llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h
llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h
llvm/include/llvm/DebugInfo/GSYM/LookupResult.h
llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
llvm/lib/DebugInfo/GSYM/GsymCreator.cpp
llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/DebugInfo/GSYM/FileWriter.h b/llvm/include/llvm/DebugInfo/GSYM/FileWriter.h
index 74811240e0b5e..84b5687597227 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/FileWriter.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/FileWriter.h
@@ -113,8 +113,6 @@ class FileWriter {
return OS;
}
- llvm::support::endianness getByteOrder() const { return ByteOrder; }
-
private:
FileWriter(const FileWriter &rhs) = delete;
void operator=(const FileWriter &rhs) = delete;
diff --git a/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h b/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h
index 8198ffa8621b2..713e3c239e95f 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h
@@ -90,10 +90,6 @@ struct FunctionInfo {
uint32_t Name; ///< String table offset in the string table.
std::optional<LineTable> OptLineTable;
std::optional<InlineInfo> Inline;
- /// If we encode a FunctionInfo during segmenting so we know its size, we can
- /// cache that encoding here so we don't need to re-encode it when saving the
- /// GSYM file.
- SmallString<32> EncodingCache;
FunctionInfo(uint64_t Addr = 0, uint64_t Size = 0, uint32_t N = 0)
: Range(Addr, Addr + Size), Name(N) {}
@@ -144,17 +140,6 @@ struct FunctionInfo {
/// function info that was successfully written into the stream.
llvm::Expected<uint64_t> encode(FileWriter &O) const;
- /// Encode this function info into the internal byte cache and return the size
- /// in bytes.
- ///
- /// When segmenting GSYM files we need to know how big each FunctionInfo will
- /// encode into so we can generate segments of the right size. We don't want
- /// to have to encode a FunctionInfo twice, so we can cache the encoded bytes
- /// and re-use then when calling FunctionInfo::encode(...).
- ///
- /// \returns The size in bytes of the FunctionInfo if it were to be encoded
- /// into a byte stream.
- uint64_t cacheEncoding();
/// Lookup an address within a FunctionInfo object's data stream.
///
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h b/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h
index c7cee0593c035..2eac8b43f006c 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h
@@ -137,8 +137,6 @@ class GsymCreator {
StringTableBuilder StrTab;
StringSet<> StringStorage;
DenseMap<llvm::gsym::FileEntry, uint32_t> FileEntryToIndex;
- // Needed for mapping string offsets back to the string stored in \a StrTab.
- DenseMap<uint64_t, CachedHashStringRef> StringOffsetMap;
std::vector<llvm::gsym::FileEntry> Files;
std::vector<uint8_t> UUID;
std::optional<AddressRanges> ValidTextRanges;
@@ -147,149 +145,6 @@ class GsymCreator {
bool Finalized = false;
bool Quiet;
-
- /// Get the first function start address.
- ///
- /// \returns The start address of the first FunctionInfo or std::nullopt if
- /// there are no function infos.
- std::optional<uint64_t> getFirstFunctionAddress() const;
-
- /// Get the last function address.
- ///
- /// \returns The start address of the last FunctionInfo or std::nullopt if
- /// there are no function infos.
- std::optional<uint64_t> getLastFunctionAddress() const;
-
- /// Get the base address to use for this GSYM file.
- ///
- /// \returns The base address to put into the header and to use when creating
- /// the address offset table or std::nullpt if there are no valid
- /// function infos or if the base address wasn't specified.
- std::optional<uint64_t> getBaseAddress() const;
-
- /// Get the size of an address offset in the address offset table.
- ///
- /// GSYM files store offsets from the base address in the address offset table
- /// and we store the size of the address offsets in the GSYM header. This
- /// function will calculate the size in bytes of these address offsets based
- /// on the current contents of the GSYM file.
- ///
- /// \returns The size in byets of the address offsets.
- uint8_t getAddressOffsetSize() const;
-
- /// Get the maximum address offset for the current address offset size.
- ///
- /// This is used when creating the address offset table to ensure we have
- /// values that are in range so we don't end up truncating address offsets
- /// when creating GSYM files as the code evolves.
- ///
- /// \returns The maximum address offset value that will be encoded into a GSYM
- /// file.
- uint64_t getMaxAddressOffset() const;
-
- /// Calculate the byte size of the GSYM header and tables sizes.
- ///
- /// This function will calculate the exact size in bytes of the encocded GSYM
- /// for the following items:
- /// - The GSYM header
- /// - The Address offset table
- /// - The Address info offset table
- /// - The file table
- /// - The string table
- ///
- /// This is used to help split GSYM files into segments.
- ///
- /// \returns Size in bytes the GSYM header and tables.
- uint64_t calculateHeaderAndTableSize() const;
-
- /// Copy a FunctionInfo from the \a SrcGC GSYM creator into this creator.
- ///
- /// Copy the function info and only the needed files and strings and add a
- /// converted FunctionInfo into this object. This is used to segment GSYM
- /// files into separate files while only transferring the files and strings
- /// that are needed from \a SrcGC.
- ///
- /// \param SrcGC The source gsym creator to copy from.
- /// \param FuncInfoIdx The function info index within \a SrcGC to copy.
- /// \returns The number of bytes it will take to encode the function info in
- /// this GsymCreator. This helps calculate the size of the current GSYM
- /// segment file.
- uint64_t copyFunctionInfo(const GsymCreator &SrcGC, size_t FuncInfoIdx);
-
- /// Copy a string from \a SrcGC into this object.
- ///
- /// Copy a string from \a SrcGC by string table offset into this GSYM creator.
- /// If a string has already been copied, the uniqued string table offset will
- /// be returned, otherwise the string will be copied and a unique offset will
- /// be returned.
- ///
- /// \param SrcGC The source gsym creator to copy from.
- /// \param StrOff The string table offset from \a SrcGC to copy.
- /// \returns The new string table offset of the string within this object.
- uint32_t copyString(const GsymCreator &SrcGC, uint32_t StrOff);
-
- /// Copy a file from \a SrcGC into this object.
- ///
- /// Copy a file from \a SrcGC by file index into this GSYM creator. Files
- /// consist of two string table entries, one for the directory and one for the
- /// filename, this function will copy any needed strings ensure the file is
- /// uniqued within this object. If a file already exists in this GSYM creator
- /// the uniqued index will be returned, else the stirngs will be copied and
- /// the new file index will be returned.
- ///
- /// \param SrcGC The source gsym creator to copy from.
- /// \param FileIdx The 1 based file table index within \a SrcGC to copy. A
- /// file index of zero will always return zero as the zero is a reserved file
- /// index that means no file.
- /// \returns The new file index of the file within this object.
- uint32_t copyFile(const GsymCreator &SrcGC, uint32_t FileIdx);
-
- /// Inserts a FileEntry into the file table.
- ///
- /// This is used to insert a file entry in a thread safe way into this object.
- ///
- /// \param FE A file entry object that contains valid string table offsets
- /// from this object already.
- uint32_t insertFileEntry(FileEntry FE);
-
- /// Fixup any string and file references by updating any file indexes and
- /// strings offsets in the InlineInfo parameter.
- ///
- /// When copying InlineInfo entries, we can simply make a copy of the object
- /// and then fixup the files and strings for efficiency.
- ///
- /// \param SrcGC The source gsym creator to copy from.
- /// \param II The inline info that contains file indexes and string offsets
- /// that come from \a SrcGC. The entries will be updated by coping any files
- /// and strings over into this object.
- void fixupInlineInfo(const GsymCreator &SrcGC, InlineInfo &II);
-
- /// Get the first function info address from this GSYM file.
- ///
- /// This is used to add a suffix to segmented GSYM files to indicate the first
- /// address for the first function info within the file.
- ///
- /// \returns The first function info address.
- uint64_t getFirstFunctionInfoAddress() const;
-
- /// Save this GSYM file into segments that are roughly \a SegmentSize in size.
- ///
- /// When segemented GSYM files are saved to disk, they will use \a Path as a
- /// prefix and then have the first function info address appended to the path
- /// when each segment is saved. Each segmented GSYM file has a only the
- /// strings and files that are needed to save the function infos that are in
- /// each segment. These smaller files are easy to compress and download
- /// separately and allow for efficient lookups with very large GSYM files and
- /// segmenting them allows servers to download only the segments that are
- /// needed.
- ///
- /// \param Path The path prefix to use when saving the GSYM files.
- /// \param ByteOrder The endianness to use when saving the file.
- /// \param SegmentSize The size in bytes to segment the GSYM file into.
- llvm::Error saveSegments(StringRef Path,
- llvm::support::endianness ByteOrder,
- uint64_t SegmentSize) const;
-
public:
GsymCreator(bool Quiet = false);
@@ -297,18 +152,8 @@ class GsymCreator {
///
/// \param Path The file path to save the GSYM file to.
/// \param ByteOrder The endianness to use when saving the file.
- /// \param SegmentSize The size in bytes to segment the GSYM file into. If
- /// this option is set this function will create N segments
- /// that are all around \a SegmentSize bytes in size. This
- /// allows a very large GSYM file to be broken up into
- /// shards. Each GSYM file will have its own file table,
- /// and string table that only have the files and strings
- /// needed for the shared. If this argument has no value,
- /// a single GSYM file that contains all function
- /// information will be created.
/// \returns An error object that indicates success or failure of the save.
- llvm::Error save(StringRef Path, llvm::support::endianness ByteOrder,
- std::optional<uint64_t> SegmentSize = std::nullopt) const;
+ llvm::Error save(StringRef Path, llvm::support::endianness ByteOrder) const;
/// Encode a GSYM into the file writer stream at the current position.
///
@@ -446,28 +291,6 @@ class GsymCreator {
/// Whether the transformation should be quiet, i.e. not output warnings.
bool isQuiet() const { return Quiet; }
-
-
- /// Create a segmented GSYM creator starting with function info index
- /// \a FuncIdx.
- ///
- /// This function will create a GsymCreator object that will encode into
- /// roughly \a SegmentSize bytes and return it. It is used by the private
- /// saveSegments(...) function and also is used by the GSYM unit tests to test
- /// segmenting of GSYM files. The returned GsymCreator can be finalized and
- /// encoded.
- ///
- /// \param [in] SegmentSize The size in bytes to roughly segment the GSYM file
- /// into.
- /// \param [in,out] FuncIdx The index of the first function info to encode
- /// into the returned GsymCreator. This index will be updated so it can be
- /// used in subsequent calls to this function to allow more segments to be
- /// created.
- /// \returns An expected unique pointer to a GsymCreator or an error. The
- /// returned unique pointer can be NULL if there are no more functions to
- /// encode.
- llvm::Expected<std::unique_ptr<GsymCreator>>
- createSegment(uint64_t SegmentSize, size_t &FuncIdx) const;
};
} // namespace gsym
diff --git a/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h b/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h
index 9ccc96fbb4d5c..44e58f5220028 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h
@@ -52,16 +52,6 @@ struct LookupResult {
std::string getSourceFile(uint32_t Index) const;
};
-inline bool operator==(const LookupResult &LHS, const LookupResult &RHS) {
- if (LHS.LookupAddr != RHS.LookupAddr)
- return false;
- if (LHS.FuncRange != RHS.FuncRange)
- return false;
- if (LHS.FuncName != RHS.FuncName)
- return false;
- return LHS.Locations == RHS.Locations;
-}
-
raw_ostream &operator<<(raw_ostream &OS, const LookupResult &R);
} // namespace gsym
diff --git a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
index 145a43d3b381b..51058fc09cf19 100644
--- a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
+++ b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
@@ -96,83 +96,57 @@ llvm::Expected<FunctionInfo> FunctionInfo::decode(DataExtractor &Data,
return std::move(FI);
}
-uint64_t FunctionInfo::cacheEncoding() {
- EncodingCache.clear();
- if (!isValid())
- return 0;
- raw_svector_ostream OutStrm(EncodingCache);
- FileWriter FW(OutStrm, support::endian::system_endianness());
- llvm::Expected<uint64_t> Result = encode(FW);
- if (!Result) {
- EncodingCache.clear();
- consumeError(Result.takeError());
- return 0;
- }
- return EncodingCache.size();
-}
-
-llvm::Expected<uint64_t> FunctionInfo::encode(FileWriter &Out) const {
+llvm::Expected<uint64_t> FunctionInfo::encode(FileWriter &O) const {
if (!isValid())
return createStringError(std::errc::invalid_argument,
"attempted to encode invalid FunctionInfo object");
// Align FunctionInfo data to a 4 byte alignment.
- Out.alignTo(4);
- const uint64_t FuncInfoOffset = Out.tell();
- // Check if we have already encoded this function info into EncodingCache.
- // This will be non empty when creating segmented GSYM files as we need to
- // precompute exactly how big FunctionInfo objects encode into so we can
- // accurately make segments of a specific size.
- if (!EncodingCache.empty() &&
- support::endian::system_endianness() == Out.getByteOrder()) {
- // We already encoded this object, just write out the bytes.
- Out.writeData(llvm::ArrayRef<uint8_t>((const uint8_t *)EncodingCache.data(),
- EncodingCache.size()));
- return FuncInfoOffset;
- }
+ O.alignTo(4);
+ const uint64_t FuncInfoOffset = O.tell();
// Write the size in bytes of this function as a uint32_t. This can be zero
// if we just have a symbol from a symbol table and that symbol has no size.
- Out.writeU32(size());
+ O.writeU32(size());
// Write the name of this function as a uint32_t string table offset.
- Out.writeU32(Name);
+ O.writeU32(Name);
if (OptLineTable) {
- Out.writeU32(InfoType::LineTableInfo);
+ O.writeU32(InfoType::LineTableInfo);
// Write a uint32_t length as zero for now, we will fix this up after
// writing the LineTable out with the number of bytes that were written.
- Out.writeU32(0);
- const auto StartOffset = Out.tell();
- llvm::Error err = OptLineTable->encode(Out, Range.start());
+ O.writeU32(0);
+ const auto StartOffset = O.tell();
+ llvm::Error err = OptLineTable->encode(O, Range.start());
if (err)
return std::move(err);
- const auto Length = Out.tell() - StartOffset;
+ const auto Length = O.tell() - StartOffset;
if (Length > UINT32_MAX)
return createStringError(std::errc::invalid_argument,
"LineTable length is greater than UINT32_MAX");
// Fixup the size of the LineTable data with the correct size.
- Out.fixup32(static_cast<uint32_t>(Length), StartOffset - 4);
+ O.fixup32(static_cast<uint32_t>(Length), StartOffset - 4);
}
// Write out the inline function info if we have any and if it is valid.
if (Inline) {
- Out.writeU32(InfoType::InlineInfo);
+ O.writeU32(InfoType::InlineInfo);
// Write a uint32_t length as zero for now, we will fix this up after
// writing the LineTable out with the number of bytes that were written.
- Out.writeU32(0);
- const auto StartOffset = Out.tell();
- llvm::Error err = Inline->encode(Out, Range.start());
+ O.writeU32(0);
+ const auto StartOffset = O.tell();
+ llvm::Error err = Inline->encode(O, Range.start());
if (err)
return std::move(err);
- const auto Length = Out.tell() - StartOffset;
+ const auto Length = O.tell() - StartOffset;
if (Length > UINT32_MAX)
return createStringError(std::errc::invalid_argument,
"InlineInfo length is greater than UINT32_MAX");
// Fixup the size of the InlineInfo data with the correct size.
- Out.fixup32(static_cast<uint32_t>(Length), StartOffset - 4);
+ O.fixup32(static_cast<uint32_t>(Length), StartOffset - 4);
}
// Terminate the data chunks with and end of list with zero size
- Out.writeU32(InfoType::EndOfList);
- Out.writeU32(0);
+ O.writeU32(InfoType::EndOfList);
+ O.writeU32(0);
return FuncInfoOffset;
}
diff --git a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp
index d83edc103a2f5..8281938770cf1 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp
@@ -34,10 +34,8 @@ uint32_t GsymCreator::insertFile(StringRef Path, llvm::sys::path::Style Style) {
// requirements.
const uint32_t Dir = insertString(directory);
const uint32_t Base = insertString(filename);
- return insertFileEntry(FileEntry(Dir, Base));
-}
+ FileEntry FE(Dir, Base);
-uint32_t GsymCreator::insertFileEntry(FileEntry FE) {
std::lock_guard<std::mutex> Guard(Mutex);
const auto NextIndex = Files.size();
// Find FE in hash map and insert if not present.
@@ -47,26 +45,8 @@ uint32_t GsymCreator::insertFileEntry(FileEntry FE) {
return R.first->second;
}
-uint32_t GsymCreator::copyFile(const GsymCreator &SrcGC, uint32_t FileIdx) {
- // File index zero is reserved for a FileEntry with no directory and no
- // filename. Any other file and we need to copy the strings for the directory
- // and filename.
- if (FileIdx == 0)
- return 0;
- const FileEntry SrcFE = SrcGC.Files[FileIdx];
- // Copy the strings for the file and then add the newly converted file entry.
- uint32_t Dir = StrTab.add(SrcGC.StringOffsetMap.find(SrcFE.Dir)->second);
- uint32_t Base = StrTab.add(SrcGC.StringOffsetMap.find(SrcFE.Base)->second);
- FileEntry DstFE(Dir, Base);
- return insertFileEntry(DstFE);
-}
-
-
llvm::Error GsymCreator::save(StringRef Path,
- llvm::support::endianness ByteOrder,
- std::optional<uint64_t> SegmentSize) const {
- if (SegmentSize)
- return saveSegments(Path, ByteOrder, *SegmentSize);
+ llvm::support::endianness ByteOrder) const {
std::error_code EC;
raw_fd_ostream OutStrm(Path, EC);
if (EC)
@@ -88,17 +68,16 @@ llvm::Error GsymCreator::encode(FileWriter &O) const {
return createStringError(std::errc::invalid_argument,
"too many FunctionInfos");
- std::optional<uint64_t> BaseAddress = getBaseAddress();
- // Base address should be valid if we have any functions.
- if (!BaseAddress)
- return createStringError(std::errc::invalid_argument,
- "invalid base address");
+ const uint64_t MinAddr =
+ BaseAddress ? *BaseAddress : Funcs.front().startAddress();
+ const uint64_t MaxAddr = Funcs.back().startAddress();
+ const uint64_t AddrDelta = MaxAddr - MinAddr;
Header Hdr;
Hdr.Magic = GSYM_MAGIC;
Hdr.Version = GSYM_VERSION;
- Hdr.AddrOffSize = getAddressOffsetSize();
+ Hdr.AddrOffSize = 0;
Hdr.UUIDSize = static_cast<uint8_t>(UUID.size());
- Hdr.BaseAddress = *BaseAddress;
+ Hdr.BaseAddress = MinAddr;
Hdr.NumAddresses = static_cast<uint32_t>(Funcs.size());
Hdr.StrtabOffset = 0; // We will fix this up later.
Hdr.StrtabSize = 0; // We will fix this up later.
@@ -106,6 +85,15 @@ llvm::Error GsymCreator::encode(FileWriter &O) const {
if (UUID.size() > sizeof(Hdr.UUID))
return createStringError(std::errc::invalid_argument,
"invalid UUID size %u", (uint32_t)UUID.size());
+ // Set the address offset size correctly in the GSYM header.
+ if (AddrDelta <= UINT8_MAX)
+ Hdr.AddrOffSize = 1;
+ else if (AddrDelta <= UINT16_MAX)
+ Hdr.AddrOffSize = 2;
+ else if (AddrDelta <= UINT32_MAX)
+ Hdr.AddrOffSize = 4;
+ else
+ Hdr.AddrOffSize = 8;
// Copy the UUID value if we have one.
if (UUID.size() > 0)
memcpy(Hdr.UUID, UUID.data(), UUID.size());
@@ -114,16 +102,10 @@ llvm::Error GsymCreator::encode(FileWriter &O) const {
if (Err)
return Err;
- const uint64_t MaxAddressOffset = getMaxAddressOffset();
// Write out the address offsets.
O.alignTo(Hdr.AddrOffSize);
for (const auto &FuncInfo : Funcs) {
uint64_t AddrOffset = FuncInfo.startAddress() - Hdr.BaseAddress;
- // Make sure we calculated the address offsets byte size correctly by
- // verifying the current address offset is within ranges. We have seen bugs
- // introduced when the code changes that can cause problems here so it is
- // good to catch this during testing.
- assert(AddrOffset <= MaxAddressOffset);
switch (Hdr.AddrOffSize) {
case 1:
O.writeU8(static_cast<uint8_t>(AddrOffset));
@@ -160,7 +142,7 @@ llvm::Error GsymCreator::encode(FileWriter &O) const {
O.writeU32(File.Base);
}
- // Write out the string table.
+ // Write out the sting table.
const off_t StrtabOffset = O.tell();
StrTab.write(O.get_stream());
const off_t StrtabSize = O.tell() - StrtabOffset;
@@ -318,13 +300,6 @@ llvm::Error GsymCreator::finalize(llvm::raw_ostream &OS) {
return Error::success();
}
-uint32_t GsymCreator::copyString(const GsymCreator &SrcGC, uint32_t StrOff) {
- // String offset at zero is always the empty string, no copying needed.
- if (StrOff == 0)
- return 0;
- return StrTab.add(SrcGC.StringOffsetMap.find(StrOff)->second);
-}
-
uint32_t GsymCreator::insertString(StringRef S, bool Copy) {
if (S.empty())
return 0;
@@ -343,13 +318,7 @@ uint32_t GsymCreator::insertString(StringRef S, bool Copy) {
CHStr = CachedHashStringRef{StringStorage.insert(S).first->getKey(),
CHStr.hash()};
}
- const uint32_t StrOff = StrTab.add(CHStr);
- // Save a mapping of string offsets to the cached string reference in case
- // we need to segment the GSYM file and copy string from one string table to
- // another.
- if (StringOffsetMap.count(StrOff) == 0)
- StringOffsetMap.insert(std::make_pair(StrOff, CHStr));
- return StrOff;
+ return StrTab.add(CHStr);
}
void GsymCreator::addFunctionInfo(FunctionInfo &&FI) {
@@ -391,187 +360,3 @@ bool GsymCreator::hasFunctionInfoForAddress(uint64_t Addr) const {
std::lock_guard<std::mutex> Guard(Mutex);
return Ranges.contains(Addr);
}
-
-std::optional<uint64_t> GsymCreator::getFirstFunctionAddress() const {
- if (Finalized && !Funcs.empty())
- return std::optional<uint64_t>(Funcs.front().startAddress());
- // This code gets used by the segmentation of GSYM files to help determine the
- // size of the GSYM header while continually adding new FunctionInfo objects
- // to this object, so we haven't finalized this object yet.
- if (Ranges.empty())
- return std::nullopt;
- return std::optional<uint64_t>(Ranges.begin()->start());
-}
-
-std::optional<uint64_t> GsymCreator::getLastFunctionAddress() const {
- if (Finalized && !Funcs.empty())
- return std::optional<uint64_t>(Funcs.back().startAddress());
- // This code gets used by the segmentation of GSYM files to help determine the
- // size of the GSYM header while continually adding new FunctionInfo objects
- // to this object, so we haven't finalized this object yet.
- if (Ranges.empty())
- return std::nullopt;
- return std::optional<uint64_t>((Ranges.end() - 1)->end());
-}
-
-std::optional<uint64_t> GsymCreator::getBaseAddress() const {
- if (BaseAddress)
- return BaseAddress;
- return getFirstFunctionAddress();
-}
-
-uint64_t GsymCreator::getMaxAddressOffset() const {
- switch (getAddressOffsetSize()) {
- case 1: return UINT8_MAX;
- case 2: return UINT16_MAX;
- case 4: return UINT32_MAX;
- case 8: return UINT64_MAX;
- }
- llvm_unreachable("invalid address offset");
-}
-
-uint8_t GsymCreator::getAddressOffsetSize() const {
- const std::optional<uint64_t> BaseAddress = getBaseAddress();
- const std::optional<uint64_t> LastFuncAddr = getLastFunctionAddress();
- if (BaseAddress && LastFuncAddr) {
- const uint64_t AddrDelta = *LastFuncAddr - *BaseAddress;
- if (AddrDelta <= UINT8_MAX)
- return 1;
- else if (AddrDelta <= UINT16_MAX)
- return 2;
- else if (AddrDelta <= UINT32_MAX)
- return 4;
- return 8;
- }
- return 1;
-}
-
-uint64_t GsymCreator::calculateHeaderAndTableSize() const {
- uint64_t Size = sizeof(Header);
- const size_t NumFuncs = Funcs.size();
- // Add size of address offset table
- Size += NumFuncs * getAddressOffsetSize();
- // Add size of address info offsets which are 32 bit integers in version 1.
- Size += NumFuncs * sizeof(uint32_t);
- // Add file table size
- Size += Files.size() * sizeof(FileEntry);
- // Add string table size
- Size += StrTab.getSize();
-
- return Size;
-}
-
-// This function takes a InlineInfo class that was copy constructed from an
-// InlineInfo from the \a SrcGC and updates all members that point to strings
-// and files to point to strings and files from this GsymCreator.
-void GsymCreator::fixupInlineInfo(const GsymCreator &SrcGC, InlineInfo &II) {
- II.Name = copyString(SrcGC, II.Name);
- II.CallFile = copyFile(SrcGC, II.CallFile);
- for (auto &ChildII: II.Children)
- fixupInlineInfo(SrcGC, ChildII);
-}
-
-uint64_t GsymCreator::copyFunctionInfo(const GsymCreator &SrcGC, size_t FuncIdx) {
- // To copy a function info we need to copy any files and strings over into
- // this GsymCreator and then copy the function info and update the string
- // table offsets to match the new offsets.
- const FunctionInfo &SrcFI = SrcGC.Funcs[FuncIdx];
- Ranges.insert(SrcFI.Range);
-
- FunctionInfo DstFI;
- DstFI.Range = SrcFI.Range;
- DstFI.Name = copyString(SrcGC, SrcFI.Name);
- // Copy the line table if there is one.
- if (SrcFI.OptLineTable) {
- // Copy the entire line table.
- DstFI.OptLineTable = LineTable(SrcFI.OptLineTable.value());
- // Fixup all LineEntry::File entries which are indexes in the the file table
- // from SrcGC and must be converted to file indexes from this GsymCreator.
- LineTable &DstLT = DstFI.OptLineTable.value();
- const size_t NumLines = DstLT.size();
- for (size_t I=0; I<NumLines; ++I) {
- LineEntry &LE = DstLT.get(I);
- LE.File = copyFile(SrcGC, LE.File);
- }
- }
- // Copy the inline information if needed.
- if (SrcFI.Inline) {
- // Make a copy of the source inline information.
- DstFI.Inline = SrcFI.Inline.value();
- // Fixup all strings and files in the copied inline information.
- fixupInlineInfo(SrcGC, *DstFI.Inline);
- }
- std::lock_guard<std::mutex> Guard(Mutex);
- Funcs.push_back(DstFI);
- return Funcs.back().cacheEncoding();
-}
-
-llvm::Error GsymCreator::saveSegments(StringRef Path,
- llvm::support::endianness ByteOrder,
- uint64_t SegmentSize) const {
- if (SegmentSize == 0)
- return createStringError(std::errc::invalid_argument,
- "invalid segment size zero");
-
- size_t FuncIdx = 0;
- const size_t NumFuncs = Funcs.size();
- while (FuncIdx < NumFuncs) {
- llvm::Expected<std::unique_ptr<GsymCreator>> ExpectedGC =
- createSegment(SegmentSize, FuncIdx);
- if (ExpectedGC) {
- GsymCreator *GC = ExpectedGC->get();
- if (GC == NULL)
- break; // We had not more functions to encode.
- raw_null_ostream ErrorStrm;
- llvm::Error Err = GC->finalize(ErrorStrm);
- if (Err)
- return Err;
- std::string SegmentedGsymPath;
- raw_string_ostream SGP(SegmentedGsymPath);
- std::optional<uint64_t> FirstFuncAddr = GC->getFirstFunctionAddress();
- if (FirstFuncAddr) {
- SGP << Path << "-" << llvm::format_hex(*FirstFuncAddr, 1);
- SGP.flush();
- Err = GC->save(SegmentedGsymPath, ByteOrder, std::nullopt);
- if (Err)
- return Err;
- }
- } else {
- return ExpectedGC.takeError();
- }
- }
- return Error::success();
-}
-
-llvm::Expected<std::unique_ptr<GsymCreator>>
-GsymCreator::createSegment(uint64_t SegmentSize, size_t &FuncIdx) const {
- // No function entries, return empty unique pointer
- if (FuncIdx >= Funcs.size())
- return std::unique_ptr<GsymCreator>();
-
- std::unique_ptr<GsymCreator> GC(new GsymCreator(/*Quiet=*/true));
- // Set the base address if there is one.
- if (BaseAddress)
- GC->setBaseAddress(*BaseAddress);
- // Copy the UUID value from this object into the new creator.
- GC->setUUID(UUID);
- const size_t NumFuncs = Funcs.size();
- // Track how big the function infos are for the current segment so we can
- // emit segments that are close to the requested size. It is quick math to
- // determine the current header and tables sizes, so we can do that each loop.
- uint64_t SegmentFuncInfosSize = 0;
- for (; FuncIdx < NumFuncs; ++FuncIdx) {
- const uint64_t HeaderAndTableSize = GC->calculateHeaderAndTableSize();
- if (HeaderAndTableSize + SegmentFuncInfosSize >= SegmentSize) {
- if (SegmentFuncInfosSize == 0)
- return createStringError(std::errc::invalid_argument,
- "a segment size of %" PRIu64 " is to small to "
- "fit any function infos, specify a larger value",
- SegmentSize);
-
- break;
- }
- SegmentFuncInfosSize += alignTo(GC->copyFunctionInfo(*this, FuncIdx), 4);
- }
- return GC;
-}
diff --git a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
index faefc7db1bd1b..dbd1dc2e4a716 100644
--- a/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
+++ b/llvm/tools/llvm-gsymutil/llvm-gsymutil.cpp
@@ -30,7 +30,6 @@
#include <inttypes.h>
#include <iostream>
#include <map>
-#include <optional>
#include <string>
#include <system_error>
#include <vector>
@@ -108,13 +107,6 @@ static opt<unsigned>
"number of cores on the current machine."),
cl::value_desc("n"), cat(ConversionOptions));
-static opt<uint64_t>
- SegmentSize("segment-size",
- desc("Specify the size in bytes of the size the final GSYM file "
- "should be segmented into. This allows GSYM files to be "
- "split across multiple files."),
- cl::value_desc("s"), cat(ConversionOptions));
-
static opt<bool>
Quiet("quiet", desc("Do not output warnings about the debug information"),
cat(ConversionOptions));
@@ -318,11 +310,7 @@ static llvm::Error handleObjectFile(ObjectFile &Obj,
// Save the GSYM file to disk.
support::endianness Endian =
Obj.makeTriple().isLittleEndian() ? support::little : support::big;
-
- std::optional<uint64_t> OptSegmentSize;
- if (SegmentSize > 0)
- OptSegmentSize = SegmentSize;
- if (auto Err = Gsym.save(OutFile, Endian, OptSegmentSize))
+ if (auto Err = Gsym.save(OutFile, Endian))
return Err;
// Verify the DWARF if requested. This will ensure all the info in the DWARF
diff --git a/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp b/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp
index 77e60af090bba..366c6a5ff74db 100644
--- a/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp
+++ b/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp
@@ -2443,211 +2443,3 @@ TEST(GSYMTest, TestGsymCreatorMultipleSymbolsWithNoSize) {
1, // NumAddresses
ArrayRef<uint8_t>(UUID));
}
-
-// Helper function to quickly create a FunctionInfo in a GsymCreator for testing.
-static void AddFunctionInfo(GsymCreator &GC, const char *FuncName,
- uint64_t FuncAddr, const char *SourcePath,
- const char *HeaderPath) {
- FunctionInfo FI(FuncAddr, 0x30, GC.insertString(FuncName));
- FI.OptLineTable = LineTable();
- const uint32_t SourceFileIdx = GC.insertFile(SourcePath);
- const uint32_t HeaderFileIdx = GC.insertFile(HeaderPath);
- FI.OptLineTable->push(LineEntry(FuncAddr+0x00, SourceFileIdx, 5));
- FI.OptLineTable->push(LineEntry(FuncAddr+0x10, HeaderFileIdx, 10));
- FI.OptLineTable->push(LineEntry(FuncAddr+0x12, HeaderFileIdx, 20));
- FI.OptLineTable->push(LineEntry(FuncAddr+0x14, HeaderFileIdx, 11));
- FI.OptLineTable->push(LineEntry(FuncAddr+0x16, HeaderFileIdx, 30));
- FI.OptLineTable->push(LineEntry(FuncAddr+0x18, HeaderFileIdx, 12));
- FI.OptLineTable->push(LineEntry(FuncAddr+0x20, SourceFileIdx, 8));
- FI.Inline = InlineInfo();
-
- std::string InlineName1(FuncName); InlineName1.append("1");
- std::string InlineName2(FuncName); InlineName2.append("2");
- std::string InlineName3(FuncName); InlineName3.append("3");
-
- FI.Inline->Name = GC.insertString(InlineName1);
- FI.Inline->CallFile = SourceFileIdx;
- FI.Inline->CallLine = 6;
- FI.Inline->Ranges.insert(AddressRange(FuncAddr + 0x10, FuncAddr + 0x20));
- InlineInfo Inline2;
- Inline2.Name = GC.insertString(InlineName2);
- Inline2.CallFile = HeaderFileIdx;
- Inline2.CallLine = 33;
- Inline2.Ranges.insert(AddressRange(FuncAddr + 0x12, FuncAddr + 0x14));
- FI.Inline->Children.emplace_back(Inline2);
- InlineInfo Inline3;
- Inline3.Name = GC.insertString(InlineName3);
- Inline3.CallFile = HeaderFileIdx;
- Inline3.CallLine = 35;
- Inline3.Ranges.insert(AddressRange(FuncAddr + 0x16, FuncAddr + 0x18));
- FI.Inline->Children.emplace_back(Inline3);
- GC.addFunctionInfo(std::move(FI));
-}
-
-// Finalize a GsymCreator, encode it and decode it and return the error or
-// GsymReader that was successfully decoded.
-static Expected<GsymReader> FinalizeEncodeAndDecode(GsymCreator &GC) {
- Error FinalizeErr = GC.finalize(llvm::nulls());
- if (FinalizeErr)
- return FinalizeErr;
- SmallString<1024> Str;
- raw_svector_ostream OutStrm(Str);
- const auto ByteOrder = support::endian::system_endianness();
- FileWriter FW(OutStrm, ByteOrder);
- llvm::Error Err = GC.encode(FW);
- if (Err)
- return Err;
- return GsymReader::copyBuffer(OutStrm.str());
-}
-
-TEST(GSYMTest, TestGsymSegmenting) {
- // Test creating a GSYM file with function infos and segment the information.
- // We verify segmenting is working by creating a full GSYM and also by
- // encoding multiple segments, then we verify that we get the same information
- // when doing lookups on the full GSYM that was decoded from encoding the
- // entire GSYM and also by decoding information from the segments themselves.
- GsymCreator GC;
- GC.setBaseAddress(0);
- AddFunctionInfo(GC, "main", 0x1000, "/tmp/main.c", "/tmp/main.h");
- AddFunctionInfo(GC, "foo", 0x2000, "/tmp/foo.c", "/tmp/foo.h");
- AddFunctionInfo(GC, "bar", 0x3000, "/tmp/bar.c", "/tmp/bar.h");
- AddFunctionInfo(GC, "baz", 0x4000, "/tmp/baz.c", "/tmp/baz.h");
- Expected<GsymReader> GR = FinalizeEncodeAndDecode(GC);
- ASSERT_THAT_EXPECTED(GR, Succeeded());
- //GR->dump(outs());
-
- // Create segmented GSYM files where each file contains 1 function. We will
- // then test doing lookups on the "GR", or the full GSYM file and then test
- // doing lookups on the GsymReader objects for each segment to ensure we get
- // the exact same information. So after all of the code below we will have
- // GsymReader objects that each contain one function. We name the creators
- // and readers to match the one and only address they contain.
- // GC1000 and GR1000 are for [0x1000-0x1030)
- // GC2000 and GR2000 are for [0x2000-0x2030)
- // GC3000 and GR3000 are for [0x3000-0x3030)
- // GC4000 and GR4000 are for [0x4000-0x4030)
-
- // Create the segments and verify that FuncIdx, an in/out parameter, gets
- // updated as expected.
- size_t FuncIdx = 0;
- // Make sure we get an error if the segment size is too small to encode a
- // single function info.
- llvm::Expected<std::unique_ptr<GsymCreator>> GCError =
- GC.createSegment(57, FuncIdx);
- ASSERT_FALSE((bool)GCError);
- checkError("a segment size of 57 is to small to fit any function infos, "
- "specify a larger value", GCError.takeError());
- // Make sure that the function index didn't get incremented when we didn't
- // encode any values into the segmented GsymCreator.
- ASSERT_EQ(FuncIdx, (size_t)0);
-
- llvm::Expected<std::unique_ptr<GsymCreator>> GC1000 =
- GC.createSegment(128, FuncIdx);
- ASSERT_THAT_EXPECTED(GC1000, Succeeded());
- ASSERT_EQ(FuncIdx, (size_t)1);
- llvm::Expected<std::unique_ptr<GsymCreator>> GC2000 =
- GC.createSegment(128, FuncIdx);
- ASSERT_THAT_EXPECTED(GC2000, Succeeded());
- ASSERT_EQ(FuncIdx, (size_t)2);
- llvm::Expected<std::unique_ptr<GsymCreator>> GC3000 =
- GC.createSegment(128, FuncIdx);
- ASSERT_THAT_EXPECTED(GC3000, Succeeded());
- ASSERT_EQ(FuncIdx, (size_t)3);
- llvm::Expected<std::unique_ptr<GsymCreator>> GC4000 =
- GC.createSegment(128, FuncIdx);
- ASSERT_THAT_EXPECTED(GC4000, Succeeded());
- ASSERT_EQ(FuncIdx, (size_t)4);
- // When there are no function infos left to encode we expect to get no error
- // and get a NULL GsymCreator in the return value from createSegment.
- llvm::Expected<std::unique_ptr<GsymCreator>> GCNull =
- GC.createSegment(128, FuncIdx);
- ASSERT_THAT_EXPECTED(GCNull, Succeeded());
- ASSERT_TRUE(GC1000.get() != nullptr);
- ASSERT_TRUE(GC2000.get() != nullptr);
- ASSERT_TRUE(GC3000.get() != nullptr);
- ASSERT_TRUE(GC4000.get() != nullptr);
- ASSERT_TRUE(GCNull.get() == nullptr);
- // Encode and decode the GsymReader for each segment and verify they succeed.
- Expected<GsymReader> GR1000 = FinalizeEncodeAndDecode(*GC1000.get());
- ASSERT_THAT_EXPECTED(GR1000, Succeeded());
- Expected<GsymReader> GR2000 = FinalizeEncodeAndDecode(*GC2000.get());
- ASSERT_THAT_EXPECTED(GR2000, Succeeded());
- Expected<GsymReader> GR3000 = FinalizeEncodeAndDecode(*GC3000.get());
- ASSERT_THAT_EXPECTED(GR3000, Succeeded());
- Expected<GsymReader> GR4000 = FinalizeEncodeAndDecode(*GC4000.get());
- ASSERT_THAT_EXPECTED(GR4000, Succeeded());
-
- // Verify that all lookups match the range [0x1000-0x1030) when doing lookups
- // in the GsymReader that contains all functions and from the segmented
- // GsymReader in GR1000.
- for (uint64_t Addr = 0x1000; Addr < 0x1030; ++Addr) {
- // Lookup in the main GsymReader that contains all function infos
- auto MainLR = GR->lookup(Addr);
- ASSERT_THAT_EXPECTED(MainLR, Succeeded());
- auto SegmentLR = GR1000->lookup(Addr);
- ASSERT_THAT_EXPECTED(SegmentLR, Succeeded());
- // Make sure the lookup results match.
- EXPECT_EQ(MainLR.get(), SegmentLR.get());
- // Make sure that the lookups on the functions that are not in the segment
- // fail as expected.
- ASSERT_THAT_EXPECTED(GR1000->lookup(0x2000), Failed());
- ASSERT_THAT_EXPECTED(GR1000->lookup(0x3000), Failed());
- ASSERT_THAT_EXPECTED(GR1000->lookup(0x4000), Failed());
- }
-
- // Verify that all lookups match the range [0x2000-0x2030) when doing lookups
- // in the GsymReader that contains all functions and from the segmented
- // GsymReader in GR2000.
- for (uint64_t Addr = 0x2000; Addr < 0x2030; ++Addr) {
- // Lookup in the main GsymReader that contains all function infos
- auto MainLR = GR->lookup(Addr);
- ASSERT_THAT_EXPECTED(MainLR, Succeeded());
- auto SegmentLR = GR2000->lookup(Addr);
- ASSERT_THAT_EXPECTED(SegmentLR, Succeeded());
- // Make sure the lookup results match.
- EXPECT_EQ(MainLR.get(), SegmentLR.get());
- // Make sure that the lookups on the functions that are not in the segment
- // fail as expected.
- ASSERT_THAT_EXPECTED(GR2000->lookup(0x1000), Failed());
- ASSERT_THAT_EXPECTED(GR2000->lookup(0x3000), Failed());
- ASSERT_THAT_EXPECTED(GR2000->lookup(0x4000), Failed());
-
- }
-
- // Verify that all lookups match the range [0x3000-0x3030) when doing lookups
- // in the GsymReader that contains all functions and from the segmented
- // GsymReader in GR3000.
- for (uint64_t Addr = 0x3000; Addr < 0x3030; ++Addr) {
- // Lookup in the main GsymReader that contains all function infos
- auto MainLR = GR->lookup(Addr);
- ASSERT_THAT_EXPECTED(MainLR, Succeeded());
- auto SegmentLR = GR3000->lookup(Addr);
- ASSERT_THAT_EXPECTED(SegmentLR, Succeeded());
- // Make sure the lookup results match.
- EXPECT_EQ(MainLR.get(), SegmentLR.get());
- // Make sure that the lookups on the functions that are not in the segment
- // fail as expected.
- ASSERT_THAT_EXPECTED(GR3000->lookup(0x1000), Failed());
- ASSERT_THAT_EXPECTED(GR3000->lookup(0x2000), Failed());
- ASSERT_THAT_EXPECTED(GR3000->lookup(0x4000), Failed());
-}
-
- // Verify that all lookups match the range [0x4000-0x4030) when doing lookups
- // in the GsymReader that contains all functions and from the segmented
- // GsymReader in GR4000.
- for (uint64_t Addr = 0x4000; Addr < 0x4030; ++Addr) {
- // Lookup in the main GsymReader that contains all function infos
- auto MainLR = GR->lookup(Addr);
- ASSERT_THAT_EXPECTED(MainLR, Succeeded());
- // Lookup in the GsymReader for that contains 0x4000
- auto SegmentLR = GR4000->lookup(Addr);
- ASSERT_THAT_EXPECTED(SegmentLR, Succeeded());
- // Make sure the lookup results match.
- EXPECT_EQ(MainLR.get(), SegmentLR.get());
- // Make sure that the lookups on the functions that are not in the segment
- // fail as expected.
- ASSERT_THAT_EXPECTED(GR4000->lookup(0x1000), Failed());
- ASSERT_THAT_EXPECTED(GR4000->lookup(0x2000), Failed());
- ASSERT_THAT_EXPECTED(GR4000->lookup(0x3000), Failed());
- }
-}
More information about the llvm-commits
mailing list