[llvm] efda523 - Fix compress/decompress in LLVM Offloading API (#150064)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 6 13:34:06 PDT 2025
Author: David Salinas
Date: 2025-10-06T16:34:02-04:00
New Revision: efda523188c45ecedad408cf48267fef97dfb2bc
URL: https://github.com/llvm/llvm-project/commit/efda523188c45ecedad408cf48267fef97dfb2bc
DIFF: https://github.com/llvm/llvm-project/commit/efda523188c45ecedad408cf48267fef97dfb2bc.diff
LOG: Fix compress/decompress in LLVM Offloading API (#150064)
Co-authored-by: dsalinas_amdeng <david.salinas at amd.com>
Added:
Modified:
llvm/include/llvm/Object/OffloadBundle.h
llvm/lib/Object/OffloadBundle.cpp
llvm/tools/llvm-objdump/OffloadDump.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/Object/OffloadBundle.h b/llvm/include/llvm/Object/OffloadBundle.h
index 18be62b10c518..bbb313c06c441 100644
--- a/llvm/include/llvm/Object/OffloadBundle.h
+++ b/llvm/include/llvm/Object/OffloadBundle.h
@@ -32,29 +32,41 @@ namespace llvm {
namespace object {
+// CompressedOffloadBundle represents the format for the compressed offload
+// bundles.
+//
+// The format is as follows:
+// - Magic Number (4 bytes) - A constant "CCOB".
+// - Version (2 bytes)
+// - Compression Method (2 bytes) - Uses the values from
+// llvm::compression::Format.
+// - Total file size (4 bytes in V2, 8 bytes in V3).
+// - Uncompressed Size (4 bytes in V1/V2, 8 bytes in V3).
+// - Truncated MD5 Hash (8 bytes).
+// - Compressed Data (variable length).
class CompressedOffloadBundle {
private:
- static inline const size_t MagicSize = 4;
- static inline const size_t VersionFieldSize = sizeof(uint16_t);
- static inline const size_t MethodFieldSize = sizeof(uint16_t);
- static inline const size_t FileSizeFieldSize = sizeof(uint32_t);
- static inline const size_t UncompressedSizeFieldSize = sizeof(uint32_t);
- static inline const size_t HashFieldSize = sizeof(uint64_t);
- static inline const size_t V1HeaderSize =
- MagicSize + VersionFieldSize + MethodFieldSize +
- UncompressedSizeFieldSize + HashFieldSize;
- static inline const size_t V2HeaderSize =
- MagicSize + VersionFieldSize + FileSizeFieldSize + MethodFieldSize +
- UncompressedSizeFieldSize + HashFieldSize;
static inline const llvm::StringRef MagicNumber = "CCOB";
- static inline const uint16_t Version = 2;
public:
- LLVM_ABI static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
+ struct CompressedBundleHeader {
+ unsigned Version;
+ llvm::compression::Format CompressionFormat;
+ std::optional<size_t> FileSize;
+ size_t UncompressedFileSize;
+ uint64_t Hash;
+
+ static llvm::Expected<CompressedBundleHeader> tryParse(llvm::StringRef);
+ };
+
+ static inline const uint16_t DefaultVersion = 3;
+
+ static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
compress(llvm::compression::Params P, const llvm::MemoryBuffer &Input,
- bool Verbose = false);
- LLVM_ABI static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
- decompress(llvm::MemoryBufferRef &Input, bool Verbose = false);
+ uint16_t Version, raw_ostream *VerboseStream = nullptr);
+ static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
+ decompress(const llvm::MemoryBuffer &Input,
+ raw_ostream *VerboseStream = nullptr);
};
/// Bundle entry in binary clang-offload-bundler format.
@@ -62,12 +74,12 @@ struct OffloadBundleEntry {
uint64_t Offset = 0u;
uint64_t Size = 0u;
uint64_t IDLength = 0u;
- StringRef ID;
+ std::string ID;
OffloadBundleEntry(uint64_t O, uint64_t S, uint64_t I, StringRef T)
- : Offset(O), Size(S), IDLength(I), ID(T) {}
+ : Offset(O), Size(S), IDLength(I), ID(T.str()) {}
void dumpInfo(raw_ostream &OS) {
OS << "Offset = " << Offset << ", Size = " << Size
- << ", ID Length = " << IDLength << ", ID = " << ID;
+ << ", ID Length = " << IDLength << ", ID = " << ID << "\n";
}
void dumpURI(raw_ostream &OS, StringRef FilePath) {
OS << ID.data() << "\tfile://" << FilePath << "#offset=" << Offset
@@ -81,16 +93,21 @@ class OffloadBundleFatBin {
uint64_t Size = 0u;
StringRef FileName;
uint64_t NumberOfEntries;
+ bool Decompressed;
SmallVector<OffloadBundleEntry> Entries;
public:
+ std::unique_ptr<MemoryBuffer> DecompressedBuffer;
+
SmallVector<OffloadBundleEntry> getEntries() { return Entries; }
uint64_t getSize() const { return Size; }
StringRef getFileName() const { return FileName; }
uint64_t getNumEntries() const { return NumberOfEntries; }
+ bool isDecompressed() const { return Decompressed; }
LLVM_ABI static Expected<std::unique_ptr<OffloadBundleFatBin>>
- create(MemoryBufferRef, uint64_t SectionOffset, StringRef FileName);
+ create(MemoryBufferRef, uint64_t SectionOffset, StringRef FileName,
+ bool Decompress = false);
LLVM_ABI Error extractBundle(const ObjectFile &Source);
LLVM_ABI Error dumpEntryToCodeObject();
@@ -106,9 +123,14 @@ class OffloadBundleFatBin {
Entry.dumpURI(outs(), FileName);
}
- OffloadBundleFatBin(MemoryBufferRef Source, StringRef File)
- : FileName(File), NumberOfEntries(0),
- Entries(SmallVector<OffloadBundleEntry>()) {}
+ OffloadBundleFatBin(MemoryBufferRef Source, StringRef File,
+ bool Decompress = false)
+ : FileName(File), NumberOfEntries(0), Decompressed(Decompress),
+ Entries(SmallVector<OffloadBundleEntry>()) {
+ if (Decompress)
+ DecompressedBuffer =
+ MemoryBuffer::getMemBufferCopy(Source.getBuffer(), File);
+ }
};
enum UriTypeT { FILE_URI, MEMORY_URI };
@@ -191,6 +213,10 @@ LLVM_ABI Error extractOffloadBundleFatBinary(
LLVM_ABI Error extractCodeObject(const ObjectFile &Source, int64_t Offset,
int64_t Size, StringRef OutputFileName);
+/// Extract code object memory from the given \p Source object file at \p Offset
+/// and of \p Size, and copy into \p OutputFileName.
+LLVM_ABI Error extractCodeObject(MemoryBufferRef Buffer, int64_t Offset,
+ int64_t Size, StringRef OutputFileName);
/// Extracts an Offload Bundle Entry given by URI
LLVM_ABI Error extractOffloadBundleByURI(StringRef URIstr);
diff --git a/llvm/lib/Object/OffloadBundle.cpp b/llvm/lib/Object/OffloadBundle.cpp
index 329dcbf2d939a..046cde8640b49 100644
--- a/llvm/lib/Object/OffloadBundle.cpp
+++ b/llvm/lib/Object/OffloadBundle.cpp
@@ -25,38 +25,71 @@
using namespace llvm;
using namespace llvm::object;
-static llvm::TimerGroup
- OffloadBundlerTimerGroup("Offload Bundler Timer Group",
- "Timer group for offload bundler");
+static TimerGroup OffloadBundlerTimerGroup("Offload Bundler Timer Group",
+ "Timer group for offload bundler");
// Extract an Offload bundle (usually a Offload Bundle) from a fat_bin
-// section
+// section.
Error extractOffloadBundle(MemoryBufferRef Contents, uint64_t SectionOffset,
StringRef FileName,
SmallVectorImpl<OffloadBundleFatBin> &Bundles) {
size_t Offset = 0;
size_t NextbundleStart = 0;
+ StringRef Magic;
+ std::unique_ptr<MemoryBuffer> Buffer;
// There could be multiple offloading bundles stored at this section.
- while (NextbundleStart != StringRef::npos) {
- std::unique_ptr<MemoryBuffer> Buffer =
+ while ((NextbundleStart != StringRef::npos) &&
+ (Offset < Contents.getBuffer().size())) {
+ Buffer =
MemoryBuffer::getMemBuffer(Contents.getBuffer().drop_front(Offset), "",
/*RequiresNullTerminator=*/false);
- // Create the FatBinBindle object. This will also create the Bundle Entry
- // list info.
- auto FatBundleOrErr =
- OffloadBundleFatBin::create(*Buffer, SectionOffset + Offset, FileName);
- if (!FatBundleOrErr)
- return FatBundleOrErr.takeError();
-
- // Add current Bundle to list.
- Bundles.emplace_back(std::move(**FatBundleOrErr));
-
- // Find the next bundle by searching for the magic string
- StringRef Str = Buffer->getBuffer();
- NextbundleStart = Str.find(StringRef("__CLANG_OFFLOAD_BUNDLE__"), 24);
+ if (identify_magic((*Buffer).getBuffer()) ==
+ file_magic::offload_bundle_compressed) {
+ Magic = "CCOB";
+ // Decompress this bundle first.
+ NextbundleStart = (*Buffer).getBuffer().find(Magic, Magic.size());
+ if (NextbundleStart == StringRef::npos)
+ NextbundleStart = (*Buffer).getBuffer().size();
+
+ ErrorOr<std::unique_ptr<MemoryBuffer>> CodeOrErr =
+ MemoryBuffer::getMemBuffer(
+ (*Buffer).getBuffer().take_front(NextbundleStart), FileName,
+ false);
+ if (std::error_code EC = CodeOrErr.getError())
+ return createFileError(FileName, EC);
+
+ Expected<std::unique_ptr<MemoryBuffer>> DecompressedBufferOrErr =
+ CompressedOffloadBundle::decompress(**CodeOrErr, nullptr);
+ if (!DecompressedBufferOrErr)
+ return createStringError("failed to decompress input: " +
+ toString(DecompressedBufferOrErr.takeError()));
+
+ auto FatBundleOrErr = OffloadBundleFatBin::create(
+ **DecompressedBufferOrErr, Offset, FileName, true);
+ if (!FatBundleOrErr)
+ return FatBundleOrErr.takeError();
+
+ // Add current Bundle to list.
+ Bundles.emplace_back(std::move(**FatBundleOrErr));
+
+ } else if (identify_magic((*Buffer).getBuffer()) ==
+ file_magic::offload_bundle) {
+ // Create the OffloadBundleFatBin object. This will also create the Bundle
+ // Entry list info.
+ auto FatBundleOrErr = OffloadBundleFatBin::create(
+ *Buffer, SectionOffset + Offset, FileName);
+ if (!FatBundleOrErr)
+ return FatBundleOrErr.takeError();
+
+ // Add current Bundle to list.
+ Bundles.emplace_back(std::move(**FatBundleOrErr));
+
+ Magic = "__CLANG_OFFLOAD_BUNDLE__";
+ NextbundleStart = (*Buffer).getBuffer().find(Magic, Magic.size());
+ }
if (NextbundleStart != StringRef::npos)
Offset += NextbundleStart;
@@ -82,7 +115,7 @@ Error OffloadBundleFatBin::readEntries(StringRef Buffer,
NumberOfEntries = NumOfEntries;
- // For each Bundle Entry (code object)
+ // For each Bundle Entry (code object).
for (uint64_t I = 0; I < NumOfEntries; I++) {
uint64_t EntrySize;
uint64_t EntryOffset;
@@ -112,19 +145,22 @@ Error OffloadBundleFatBin::readEntries(StringRef Buffer,
Expected<std::unique_ptr<OffloadBundleFatBin>>
OffloadBundleFatBin::create(MemoryBufferRef Buf, uint64_t SectionOffset,
- StringRef FileName) {
+ StringRef FileName, bool Decompress) {
if (Buf.getBufferSize() < 24)
return errorCodeToError(object_error::parse_failed);
// Check for magic bytes.
- if (identify_magic(Buf.getBuffer()) != file_magic::offload_bundle)
+ if ((identify_magic(Buf.getBuffer()) != file_magic::offload_bundle) &&
+ (identify_magic(Buf.getBuffer()) !=
+ file_magic::offload_bundle_compressed))
return errorCodeToError(object_error::parse_failed);
std::unique_ptr<OffloadBundleFatBin> TheBundle(
new OffloadBundleFatBin(Buf, FileName));
- // Read the Bundle Entries
- Error Err = TheBundle->readEntries(Buf.getBuffer(), SectionOffset);
+ // Read the Bundle Entries.
+ Error Err =
+ TheBundle->readEntries(Buf.getBuffer(), Decompress ? 0 : SectionOffset);
if (Err)
return Err;
@@ -132,7 +168,7 @@ OffloadBundleFatBin::create(MemoryBufferRef Buf, uint64_t SectionOffset,
}
Error OffloadBundleFatBin::extractBundle(const ObjectFile &Source) {
- // This will extract all entries in the Bundle
+ // This will extract all entries in the Bundle.
for (OffloadBundleEntry &Entry : Entries) {
if (Entry.Size == 0)
@@ -161,40 +197,21 @@ Error object::extractOffloadBundleFatBinary(
return Buffer.takeError();
// If it does not start with the reserved suffix, just skip this section.
- if ((llvm::identify_magic(*Buffer) == llvm::file_magic::offload_bundle) ||
+ if ((llvm::identify_magic(*Buffer) == file_magic::offload_bundle) ||
(llvm::identify_magic(*Buffer) ==
- llvm::file_magic::offload_bundle_compressed)) {
+ file_magic::offload_bundle_compressed)) {
uint64_t SectionOffset = 0;
if (Obj.isELF()) {
SectionOffset = ELFSectionRef(Sec).getOffset();
- } else if (Obj.isCOFF()) // TODO: add COFF Support
+ } else if (Obj.isCOFF()) // TODO: add COFF Support.
return createStringError(object_error::parse_failed,
- "COFF object files not supported.\n");
+ "COFF object files not supported");
MemoryBufferRef Contents(*Buffer, Obj.getFileName());
-
- if (llvm::identify_magic(*Buffer) ==
- llvm::file_magic::offload_bundle_compressed) {
- // Decompress the input if necessary.
- Expected<std::unique_ptr<MemoryBuffer>> DecompressedBufferOrErr =
- CompressedOffloadBundle::decompress(Contents, false);
-
- if (!DecompressedBufferOrErr)
- return createStringError(
- inconvertibleErrorCode(),
- "Failed to decompress input: " +
- llvm::toString(DecompressedBufferOrErr.takeError()));
-
- MemoryBuffer &DecompressedInput = **DecompressedBufferOrErr;
- if (Error Err = extractOffloadBundle(DecompressedInput, SectionOffset,
- Obj.getFileName(), Bundles))
- return Err;
- } else {
- if (Error Err = extractOffloadBundle(Contents, SectionOffset,
- Obj.getFileName(), Bundles))
- return Err;
- }
+ if (Error Err = extractOffloadBundle(Contents, SectionOffset,
+ Obj.getFileName(), Bundles))
+ return Err;
}
}
return Error::success();
@@ -222,8 +239,22 @@ Error object::extractCodeObject(const ObjectFile &Source, int64_t Offset,
return Error::success();
}
+Error object::extractCodeObject(const MemoryBufferRef Buffer, int64_t Offset,
+ int64_t Size, StringRef OutputFileName) {
+ Expected<std::unique_ptr<FileOutputBuffer>> BufferOrErr =
+ FileOutputBuffer::create(OutputFileName, Size);
+ if (!BufferOrErr)
+ return BufferOrErr.takeError();
+
+ std::unique_ptr<FileOutputBuffer> Buf = std::move(*BufferOrErr);
+ std::copy(Buffer.getBufferStart() + Offset,
+ Buffer.getBufferStart() + Offset + Size, Buf->getBufferStart());
+
+ return Buf->commit();
+}
+
// given a file name, offset, and size, extract data into a code object file,
-// into file <SourceFile>-offset<Offset>-size<Size>.co
+// into file "<SourceFile>-offset<Offset>-size<Size>.co".
Error object::extractOffloadBundleByURI(StringRef URIstr) {
// create a URI object
Expected<std::unique_ptr<OffloadBundleURI>> UriOrErr(
@@ -236,7 +267,7 @@ Error object::extractOffloadBundleByURI(StringRef URIstr) {
OutputFile +=
"-offset" + itostr(Uri.Offset) + "-size" + itostr(Uri.Size) + ".co";
- // Create an ObjectFile object from uri.file_uri
+ // Create an ObjectFile object from uri.file_uri.
auto ObjOrErr = ObjectFile::createObjectFile(Uri.FileName);
if (!ObjOrErr)
return ObjOrErr.takeError();
@@ -249,7 +280,7 @@ Error object::extractOffloadBundleByURI(StringRef URIstr) {
return Error::success();
}
-// Utility function to format numbers with commas
+// Utility function to format numbers with commas.
static std::string formatWithCommas(unsigned long long Value) {
std::string Num = std::to_string(Value);
int InsertPosition = Num.length() - 3;
@@ -260,87 +291,278 @@ static std::string formatWithCommas(unsigned long long Value) {
return Num;
}
-llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
-CompressedOffloadBundle::decompress(llvm::MemoryBufferRef &Input,
- bool Verbose) {
- StringRef Blob = Input.getBuffer();
+Expected<std::unique_ptr<MemoryBuffer>>
+CompressedOffloadBundle::compress(compression::Params P,
+ const MemoryBuffer &Input, uint16_t Version,
+ raw_ostream *VerboseStream) {
+ if (!compression::zstd::isAvailable() && !compression::zlib::isAvailable())
+ return createStringError("compression not supported.");
+ Timer HashTimer("Hash Calculation Timer", "Hash calculation time",
+ OffloadBundlerTimerGroup);
+ if (VerboseStream)
+ HashTimer.startTimer();
+ MD5 Hash;
+ MD5::MD5Result Result;
+ Hash.update(Input.getBuffer());
+ Hash.final(Result);
+ uint64_t TruncatedHash = Result.low();
+ if (VerboseStream)
+ HashTimer.stopTimer();
+
+ SmallVector<uint8_t, 0> CompressedBuffer;
+ auto BufferUint8 = ArrayRef<uint8_t>(
+ reinterpret_cast<const uint8_t *>(Input.getBuffer().data()),
+ Input.getBuffer().size());
+ Timer CompressTimer("Compression Timer", "Compression time",
+ OffloadBundlerTimerGroup);
+ if (VerboseStream)
+ CompressTimer.startTimer();
+ compression::compress(P, BufferUint8, CompressedBuffer);
+ if (VerboseStream)
+ CompressTimer.stopTimer();
+
+ uint16_t CompressionMethod = static_cast<uint16_t>(P.format);
+
+ // Store sizes in 64-bit variables first.
+ uint64_t UncompressedSize64 = Input.getBuffer().size();
+ uint64_t TotalFileSize64;
+
+ // Calculate total file size based on version.
+ if (Version == 2) {
+ // For V2, ensure the sizes don't exceed 32-bit limit.
+ if (UncompressedSize64 > std::numeric_limits<uint32_t>::max())
+ return createStringError("uncompressed size (%llu) exceeds version 2 "
+ "unsigned 32-bit integer limit",
+ UncompressedSize64);
+ TotalFileSize64 = MagicNumber.size() + sizeof(uint32_t) + sizeof(Version) +
+ sizeof(CompressionMethod) + sizeof(uint32_t) +
+ sizeof(TruncatedHash) + CompressedBuffer.size();
+ if (TotalFileSize64 > std::numeric_limits<uint32_t>::max())
+ return createStringError("total file size (%llu) exceeds version 2 "
+ "unsigned 32-bit integer limit",
+ TotalFileSize64);
+
+ } else { // Version 3.
+ TotalFileSize64 = MagicNumber.size() + sizeof(uint64_t) + sizeof(Version) +
+ sizeof(CompressionMethod) + sizeof(uint64_t) +
+ sizeof(TruncatedHash) + CompressedBuffer.size();
+ }
+
+ SmallVector<char, 0> FinalBuffer;
+ raw_svector_ostream OS(FinalBuffer);
+ OS << MagicNumber;
+ OS.write(reinterpret_cast<const char *>(&Version), sizeof(Version));
+ OS.write(reinterpret_cast<const char *>(&CompressionMethod),
+ sizeof(CompressionMethod));
+
+ // Write size fields according to version.
+ if (Version == 2) {
+ uint32_t TotalFileSize32 = static_cast<uint32_t>(TotalFileSize64);
+ uint32_t UncompressedSize32 = static_cast<uint32_t>(UncompressedSize64);
+ OS.write(reinterpret_cast<const char *>(&TotalFileSize32),
+ sizeof(TotalFileSize32));
+ OS.write(reinterpret_cast<const char *>(&UncompressedSize32),
+ sizeof(UncompressedSize32));
+ } else { // Version 3.
+ OS.write(reinterpret_cast<const char *>(&TotalFileSize64),
+ sizeof(TotalFileSize64));
+ OS.write(reinterpret_cast<const char *>(&UncompressedSize64),
+ sizeof(UncompressedSize64));
+ }
+
+ OS.write(reinterpret_cast<const char *>(&TruncatedHash),
+ sizeof(TruncatedHash));
+ OS.write(reinterpret_cast<const char *>(CompressedBuffer.data()),
+ CompressedBuffer.size());
+
+ if (VerboseStream) {
+ auto MethodUsed = P.format == compression::Format::Zstd ? "zstd" : "zlib";
+ double CompressionRate =
+ static_cast<double>(UncompressedSize64) / CompressedBuffer.size();
+ double CompressionTimeSeconds = CompressTimer.getTotalTime().getWallTime();
+ double CompressionSpeedMBs =
+ (UncompressedSize64 / (1024.0 * 1024.0)) / CompressionTimeSeconds;
+ *VerboseStream << "Compressed bundle format version: " << Version << "\n"
+ << "Total file size (including headers): "
+ << formatWithCommas(TotalFileSize64) << " bytes\n"
+ << "Compression method used: " << MethodUsed << "\n"
+ << "Compression level: " << P.level << "\n"
+ << "Binary size before compression: "
+ << formatWithCommas(UncompressedSize64) << " bytes\n"
+ << "Binary size after compression: "
+ << formatWithCommas(CompressedBuffer.size()) << " bytes\n"
+ << "Compression rate: " << format("%.2lf", CompressionRate)
+ << "\n"
+ << "Compression ratio: "
+ << format("%.2lf%%", 100.0 / CompressionRate) << "\n"
+ << "Compression speed: "
+ << format("%.2lf MB/s", CompressionSpeedMBs) << "\n"
+ << "Truncated MD5 hash: " << format_hex(TruncatedHash, 16)
+ << "\n";
+ }
+
+ return MemoryBuffer::getMemBufferCopy(
+ StringRef(FinalBuffer.data(), FinalBuffer.size()));
+}
+
+// Use packed structs to avoid padding, such that the structs map the serialized
+// format.
+LLVM_PACKED_START
+union RawCompressedBundleHeader {
+ struct CommonFields {
+ uint32_t Magic;
+ uint16_t Version;
+ uint16_t Method;
+ };
+
+ struct V1Header {
+ CommonFields Common;
+ uint32_t UncompressedFileSize;
+ uint64_t Hash;
+ };
+
+ struct V2Header {
+ CommonFields Common;
+ uint32_t FileSize;
+ uint32_t UncompressedFileSize;
+ uint64_t Hash;
+ };
+
+ struct V3Header {
+ CommonFields Common;
+ uint64_t FileSize;
+ uint64_t UncompressedFileSize;
+ uint64_t Hash;
+ };
+
+ CommonFields Common;
+ V1Header V1;
+ V2Header V2;
+ V3Header V3;
+};
+LLVM_PACKED_END
+
+// Helper method to get header size based on version.
+static size_t getHeaderSize(uint16_t Version) {
+ switch (Version) {
+ case 1:
+ return sizeof(RawCompressedBundleHeader::V1Header);
+ case 2:
+ return sizeof(RawCompressedBundleHeader::V2Header);
+ case 3:
+ return sizeof(RawCompressedBundleHeader::V3Header);
+ default:
+ llvm_unreachable("Unsupported version");
+ }
+}
- if (Blob.size() < V1HeaderSize)
- return llvm::MemoryBuffer::getMemBufferCopy(Blob);
+Expected<CompressedOffloadBundle::CompressedBundleHeader>
+CompressedOffloadBundle::CompressedBundleHeader::tryParse(StringRef Blob) {
+ assert(Blob.size() >= sizeof(RawCompressedBundleHeader::CommonFields));
+ assert(identify_magic(Blob) == file_magic::offload_bundle_compressed);
+
+ RawCompressedBundleHeader Header;
+ std::memcpy(&Header, Blob.data(), std::min(Blob.size(), sizeof(Header)));
+
+ CompressedBundleHeader Normalized;
+ Normalized.Version = Header.Common.Version;
+
+ size_t RequiredSize = getHeaderSize(Normalized.Version);
+
+ if (Blob.size() < RequiredSize)
+ return createStringError("compressed bundle header size too small");
+
+ switch (Normalized.Version) {
+ case 1:
+ Normalized.UncompressedFileSize = Header.V1.UncompressedFileSize;
+ Normalized.Hash = Header.V1.Hash;
+ break;
+ case 2:
+ Normalized.FileSize = Header.V2.FileSize;
+ Normalized.UncompressedFileSize = Header.V2.UncompressedFileSize;
+ Normalized.Hash = Header.V2.Hash;
+ break;
+ case 3:
+ Normalized.FileSize = Header.V3.FileSize;
+ Normalized.UncompressedFileSize = Header.V3.UncompressedFileSize;
+ Normalized.Hash = Header.V3.Hash;
+ break;
+ default:
+ return createStringError("unknown compressed bundle version");
+ }
- if (llvm::identify_magic(Blob) !=
- llvm::file_magic::offload_bundle_compressed) {
- if (Verbose)
- llvm::errs() << "Uncompressed bundle.\n";
- return llvm::MemoryBuffer::getMemBufferCopy(Blob);
+ // Determine compression format.
+ switch (Header.Common.Method) {
+ case static_cast<uint16_t>(compression::Format::Zlib):
+ case static_cast<uint16_t>(compression::Format::Zstd):
+ Normalized.CompressionFormat =
+ static_cast<compression::Format>(Header.Common.Method);
+ break;
+ default:
+ return createStringError("unknown compressing method");
}
- size_t CurrentOffset = MagicSize;
+ return Normalized;
+}
- uint16_t ThisVersion;
- memcpy(&ThisVersion, Blob.data() + CurrentOffset, sizeof(uint16_t));
- CurrentOffset += VersionFieldSize;
+Expected<std::unique_ptr<MemoryBuffer>>
+CompressedOffloadBundle::decompress(const MemoryBuffer &Input,
+ raw_ostream *VerboseStream) {
+ StringRef Blob = Input.getBuffer();
- uint16_t CompressionMethod;
- memcpy(&CompressionMethod, Blob.data() + CurrentOffset, sizeof(uint16_t));
- CurrentOffset += MethodFieldSize;
+ // Check minimum header size (using V1 as it's the smallest).
+ if (Blob.size() < sizeof(RawCompressedBundleHeader::CommonFields))
+ return MemoryBuffer::getMemBufferCopy(Blob);
- uint32_t TotalFileSize;
- if (ThisVersion >= 2) {
- if (Blob.size() < V2HeaderSize)
- return createStringError(inconvertibleErrorCode(),
- "Compressed bundle header size too small");
- memcpy(&TotalFileSize, Blob.data() + CurrentOffset, sizeof(uint32_t));
- CurrentOffset += FileSizeFieldSize;
+ if (identify_magic(Blob) != file_magic::offload_bundle_compressed) {
+ if (VerboseStream)
+ *VerboseStream << "Uncompressed bundle\n";
+ return MemoryBuffer::getMemBufferCopy(Blob);
}
- uint32_t UncompressedSize;
- memcpy(&UncompressedSize, Blob.data() + CurrentOffset, sizeof(uint32_t));
- CurrentOffset += UncompressedSizeFieldSize;
-
- uint64_t StoredHash;
- memcpy(&StoredHash, Blob.data() + CurrentOffset, sizeof(uint64_t));
- CurrentOffset += HashFieldSize;
-
- llvm::compression::Format CompressionFormat;
- if (CompressionMethod ==
- static_cast<uint16_t>(llvm::compression::Format::Zlib))
- CompressionFormat = llvm::compression::Format::Zlib;
- else if (CompressionMethod ==
- static_cast<uint16_t>(llvm::compression::Format::Zstd))
- CompressionFormat = llvm::compression::Format::Zstd;
- else
- return createStringError(inconvertibleErrorCode(),
- "Unknown compressing method");
-
- llvm::Timer DecompressTimer("Decompression Timer", "Decompression time",
- OffloadBundlerTimerGroup);
- if (Verbose)
+ Expected<CompressedBundleHeader> HeaderOrErr =
+ CompressedBundleHeader::tryParse(Blob);
+ if (!HeaderOrErr)
+ return HeaderOrErr.takeError();
+
+ const CompressedBundleHeader &Normalized = *HeaderOrErr;
+ unsigned ThisVersion = Normalized.Version;
+ size_t HeaderSize = getHeaderSize(ThisVersion);
+
+ compression::Format CompressionFormat = Normalized.CompressionFormat;
+
+ size_t TotalFileSize = Normalized.FileSize.value_or(0);
+ size_t UncompressedSize = Normalized.UncompressedFileSize;
+ auto StoredHash = Normalized.Hash;
+
+ Timer DecompressTimer("Decompression Timer", "Decompression time",
+ OffloadBundlerTimerGroup);
+ if (VerboseStream)
DecompressTimer.startTimer();
SmallVector<uint8_t, 0> DecompressedData;
- StringRef CompressedData = Blob.substr(CurrentOffset);
- if (llvm::Error DecompressionError = llvm::compression::decompress(
- CompressionFormat, llvm::arrayRefFromStringRef(CompressedData),
+ StringRef CompressedData =
+ Blob.substr(HeaderSize, TotalFileSize - HeaderSize);
+
+ if (Error DecompressionError = compression::decompress(
+ CompressionFormat, arrayRefFromStringRef(CompressedData),
DecompressedData, UncompressedSize))
- return createStringError(inconvertibleErrorCode(),
- "Could not decompress embedded file contents: " +
- llvm::toString(std::move(DecompressionError)));
+ return createStringError("could not decompress embedded file contents: " +
+ toString(std::move(DecompressionError)));
- if (Verbose) {
+ if (VerboseStream) {
DecompressTimer.stopTimer();
double DecompressionTimeSeconds =
DecompressTimer.getTotalTime().getWallTime();
// Recalculate MD5 hash for integrity check.
- llvm::Timer HashRecalcTimer("Hash Recalculation Timer",
- "Hash recalculation time",
- OffloadBundlerTimerGroup);
+ Timer HashRecalcTimer("Hash Recalculation Timer", "Hash recalculation time",
+ OffloadBundlerTimerGroup);
HashRecalcTimer.startTimer();
- llvm::MD5 Hash;
- llvm::MD5::MD5Result Result;
- Hash.update(llvm::ArrayRef<uint8_t>(DecompressedData));
+ MD5 Hash;
+ MD5::MD5Result Result;
+ Hash.update(ArrayRef<uint8_t>(DecompressedData));
Hash.final(Result);
uint64_t RecalculatedHash = Result.low();
HashRecalcTimer.stopTimer();
@@ -351,118 +573,28 @@ CompressedOffloadBundle::decompress(llvm::MemoryBufferRef &Input,
double DecompressionSpeedMBs =
(UncompressedSize / (1024.0 * 1024.0)) / DecompressionTimeSeconds;
- llvm::errs() << "Compressed bundle format version: " << ThisVersion << "\n";
+ *VerboseStream << "Compressed bundle format version: " << ThisVersion
+ << "\n";
if (ThisVersion >= 2)
- llvm::errs() << "Total file size (from header): "
- << formatWithCommas(TotalFileSize) << " bytes\n";
- llvm::errs() << "Decompression method: "
- << (CompressionFormat == llvm::compression::Format::Zlib
- ? "zlib"
- : "zstd")
- << "\n"
- << "Size before decompression: "
- << formatWithCommas(CompressedData.size()) << " bytes\n"
- << "Size after decompression: "
- << formatWithCommas(UncompressedSize) << " bytes\n"
- << "Compression rate: "
- << llvm::format("%.2lf", CompressionRate) << "\n"
- << "Compression ratio: "
- << llvm::format("%.2lf%%", 100.0 / CompressionRate) << "\n"
- << "Decompression speed: "
- << llvm::format("%.2lf MB/s", DecompressionSpeedMBs) << "\n"
- << "Stored hash: " << llvm::format_hex(StoredHash, 16) << "\n"
- << "Recalculated hash: "
- << llvm::format_hex(RecalculatedHash, 16) << "\n"
- << "Hashes match: " << (HashMatch ? "Yes" : "No") << "\n";
+ *VerboseStream << "Total file size (from header): "
+ << formatWithCommas(TotalFileSize) << " bytes\n";
+ *VerboseStream
+ << "Decompression method: "
+ << (CompressionFormat == compression::Format::Zlib ? "zlib" : "zstd")
+ << "\n"
+ << "Size before decompression: "
+ << formatWithCommas(CompressedData.size()) << " bytes\n"
+ << "Size after decompression: " << formatWithCommas(UncompressedSize)
+ << " bytes\n"
+ << "Compression rate: " << format("%.2lf", CompressionRate) << "\n"
+ << "Compression ratio: " << format("%.2lf%%", 100.0 / CompressionRate)
+ << "\n"
+ << "Decompression speed: "
+ << format("%.2lf MB/s", DecompressionSpeedMBs) << "\n"
+ << "Stored hash: " << format_hex(StoredHash, 16) << "\n"
+ << "Recalculated hash: " << format_hex(RecalculatedHash, 16) << "\n"
+ << "Hashes match: " << (HashMatch ? "Yes" : "No") << "\n";
}
- return llvm::MemoryBuffer::getMemBufferCopy(
- llvm::toStringRef(DecompressedData));
-}
-
-llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
-CompressedOffloadBundle::compress(llvm::compression::Params P,
- const llvm::MemoryBuffer &Input,
- bool Verbose) {
- if (!llvm::compression::zstd::isAvailable() &&
- !llvm::compression::zlib::isAvailable())
- return createStringError(llvm::inconvertibleErrorCode(),
- "Compression not supported");
-
- llvm::Timer HashTimer("Hash Calculation Timer", "Hash calculation time",
- OffloadBundlerTimerGroup);
- if (Verbose)
- HashTimer.startTimer();
- llvm::MD5 Hash;
- llvm::MD5::MD5Result Result;
- Hash.update(Input.getBuffer());
- Hash.final(Result);
- uint64_t TruncatedHash = Result.low();
- if (Verbose)
- HashTimer.stopTimer();
-
- SmallVector<uint8_t, 0> CompressedBuffer;
- auto BufferUint8 = llvm::ArrayRef<uint8_t>(
- reinterpret_cast<const uint8_t *>(Input.getBuffer().data()),
- Input.getBuffer().size());
-
- llvm::Timer CompressTimer("Compression Timer", "Compression time",
- OffloadBundlerTimerGroup);
- if (Verbose)
- CompressTimer.startTimer();
- llvm::compression::compress(P, BufferUint8, CompressedBuffer);
- if (Verbose)
- CompressTimer.stopTimer();
-
- uint16_t CompressionMethod = static_cast<uint16_t>(P.format);
- uint32_t UncompressedSize = Input.getBuffer().size();
- uint32_t TotalFileSize = MagicNumber.size() + sizeof(TotalFileSize) +
- sizeof(Version) + sizeof(CompressionMethod) +
- sizeof(UncompressedSize) + sizeof(TruncatedHash) +
- CompressedBuffer.size();
-
- SmallVector<char, 0> FinalBuffer;
- llvm::raw_svector_ostream OS(FinalBuffer);
- OS << MagicNumber;
- OS.write(reinterpret_cast<const char *>(&Version), sizeof(Version));
- OS.write(reinterpret_cast<const char *>(&CompressionMethod),
- sizeof(CompressionMethod));
- OS.write(reinterpret_cast<const char *>(&TotalFileSize),
- sizeof(TotalFileSize));
- OS.write(reinterpret_cast<const char *>(&UncompressedSize),
- sizeof(UncompressedSize));
- OS.write(reinterpret_cast<const char *>(&TruncatedHash),
- sizeof(TruncatedHash));
- OS.write(reinterpret_cast<const char *>(CompressedBuffer.data()),
- CompressedBuffer.size());
-
- if (Verbose) {
- auto MethodUsed =
- P.format == llvm::compression::Format::Zstd ? "zstd" : "zlib";
- double CompressionRate =
- static_cast<double>(UncompressedSize) / CompressedBuffer.size();
- double CompressionTimeSeconds = CompressTimer.getTotalTime().getWallTime();
- double CompressionSpeedMBs =
- (UncompressedSize / (1024.0 * 1024.0)) / CompressionTimeSeconds;
-
- llvm::errs() << "Compressed bundle format version: " << Version << "\n"
- << "Total file size (including headers): "
- << formatWithCommas(TotalFileSize) << " bytes\n"
- << "Compression method used: " << MethodUsed << "\n"
- << "Compression level: " << P.level << "\n"
- << "Binary size before compression: "
- << formatWithCommas(UncompressedSize) << " bytes\n"
- << "Binary size after compression: "
- << formatWithCommas(CompressedBuffer.size()) << " bytes\n"
- << "Compression rate: "
- << llvm::format("%.2lf", CompressionRate) << "\n"
- << "Compression ratio: "
- << llvm::format("%.2lf%%", 100.0 / CompressionRate) << "\n"
- << "Compression speed: "
- << llvm::format("%.2lf MB/s", CompressionSpeedMBs) << "\n"
- << "Truncated MD5 hash: "
- << llvm::format_hex(TruncatedHash, 16) << "\n";
- }
- return llvm::MemoryBuffer::getMemBufferCopy(
- llvm::StringRef(FinalBuffer.data(), FinalBuffer.size()));
+ return MemoryBuffer::getMemBufferCopy(toStringRef(DecompressedData));
}
diff --git a/llvm/tools/llvm-objdump/OffloadDump.cpp b/llvm/tools/llvm-objdump/OffloadDump.cpp
index 8a0deb35ba151..a77537dd90eeb 100644
--- a/llvm/tools/llvm-objdump/OffloadDump.cpp
+++ b/llvm/tools/llvm-objdump/OffloadDump.cpp
@@ -87,21 +87,30 @@ void llvm::dumpOffloadBundleFatBinary(const ObjectFile &O, StringRef ArchName) {
if (Error Err = llvm::object::extractOffloadBundleFatBinary(O, FoundBundles))
reportError(O.getFileName(), "while extracting offload FatBin bundles: " +
toString(std::move(Err)));
-
for (const auto &[BundleNum, Bundle] : llvm::enumerate(FoundBundles)) {
for (OffloadBundleEntry &Entry : Bundle.getEntries()) {
- if (!ArchName.empty() && !Entry.ID.contains(ArchName))
+ if (!ArchName.empty() && Entry.ID.find(ArchName) != std::string::npos)
continue;
// create file name for this object file: <source-filename>.<Bundle
// Number>.<EntryID>
- std::string str = Bundle.getFileName().str() + "." + itostr(BundleNum) +
- "." + Entry.ID.str();
- if (Error Err = object::extractCodeObject(O, Entry.Offset, Entry.Size,
- StringRef(str)))
- reportError(O.getFileName(),
- "while extracting offload Bundle Entries: " +
- toString(std::move(Err)));
+ std::string str =
+ Bundle.getFileName().str() + "." + itostr(BundleNum) + "." + Entry.ID;
+
+ if (Bundle.isDecompressed()) {
+ if (Error Err = object::extractCodeObject(
+ Bundle.DecompressedBuffer->getMemBufferRef(), Entry.Offset,
+ Entry.Size, StringRef(str)))
+ reportError(O.getFileName(),
+ "while extracting offload Bundle Entries: " +
+ toString(std::move(Err)));
+ } else {
+ if (Error Err = object::extractCodeObject(O, Entry.Offset, Entry.Size,
+ StringRef(str)))
+ reportError(O.getFileName(),
+ "while extracting offload Bundle Entries: " +
+ toString(std::move(Err)));
+ }
outs() << "Extracting offload bundle: " << str << "\n";
}
}
More information about the llvm-commits
mailing list