[llvm] efda523 - Fix compress/decompress in LLVM Offloading API (#150064)

via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 6 13:34:06 PDT 2025


Author: David Salinas
Date: 2025-10-06T16:34:02-04:00
New Revision: efda523188c45ecedad408cf48267fef97dfb2bc

URL: https://github.com/llvm/llvm-project/commit/efda523188c45ecedad408cf48267fef97dfb2bc
DIFF: https://github.com/llvm/llvm-project/commit/efda523188c45ecedad408cf48267fef97dfb2bc.diff

LOG: Fix compress/decompress in LLVM Offloading API (#150064)

Co-authored-by: dsalinas_amdeng <david.salinas at amd.com>

Added: 
    

Modified: 
    llvm/include/llvm/Object/OffloadBundle.h
    llvm/lib/Object/OffloadBundle.cpp
    llvm/tools/llvm-objdump/OffloadDump.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Object/OffloadBundle.h b/llvm/include/llvm/Object/OffloadBundle.h
index 18be62b10c518..bbb313c06c441 100644
--- a/llvm/include/llvm/Object/OffloadBundle.h
+++ b/llvm/include/llvm/Object/OffloadBundle.h
@@ -32,29 +32,41 @@ namespace llvm {
 
 namespace object {
 
+// CompressedOffloadBundle represents the format for the compressed offload
+// bundles.
+//
+// The format is as follows:
+// - Magic Number (4 bytes) - A constant "CCOB".
+// - Version (2 bytes)
+// - Compression Method (2 bytes) - Uses the values from
+// llvm::compression::Format.
+// - Total file size (4 bytes in V2, 8 bytes in V3).
+// - Uncompressed Size (4 bytes in V1/V2, 8 bytes in V3).
+// - Truncated MD5 Hash (8 bytes).
+// - Compressed Data (variable length).
 class CompressedOffloadBundle {
 private:
-  static inline const size_t MagicSize = 4;
-  static inline const size_t VersionFieldSize = sizeof(uint16_t);
-  static inline const size_t MethodFieldSize = sizeof(uint16_t);
-  static inline const size_t FileSizeFieldSize = sizeof(uint32_t);
-  static inline const size_t UncompressedSizeFieldSize = sizeof(uint32_t);
-  static inline const size_t HashFieldSize = sizeof(uint64_t);
-  static inline const size_t V1HeaderSize =
-      MagicSize + VersionFieldSize + MethodFieldSize +
-      UncompressedSizeFieldSize + HashFieldSize;
-  static inline const size_t V2HeaderSize =
-      MagicSize + VersionFieldSize + FileSizeFieldSize + MethodFieldSize +
-      UncompressedSizeFieldSize + HashFieldSize;
   static inline const llvm::StringRef MagicNumber = "CCOB";
-  static inline const uint16_t Version = 2;
 
 public:
-  LLVM_ABI static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
+  struct CompressedBundleHeader {
+    unsigned Version;
+    llvm::compression::Format CompressionFormat;
+    std::optional<size_t> FileSize;
+    size_t UncompressedFileSize;
+    uint64_t Hash;
+
+    static llvm::Expected<CompressedBundleHeader> tryParse(llvm::StringRef);
+  };
+
+  static inline const uint16_t DefaultVersion = 3;
+
+  static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
   compress(llvm::compression::Params P, const llvm::MemoryBuffer &Input,
-           bool Verbose = false);
-  LLVM_ABI static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
-  decompress(llvm::MemoryBufferRef &Input, bool Verbose = false);
+           uint16_t Version, raw_ostream *VerboseStream = nullptr);
+  static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
+  decompress(const llvm::MemoryBuffer &Input,
+             raw_ostream *VerboseStream = nullptr);
 };
 
 /// Bundle entry in binary clang-offload-bundler format.
@@ -62,12 +74,12 @@ struct OffloadBundleEntry {
   uint64_t Offset = 0u;
   uint64_t Size = 0u;
   uint64_t IDLength = 0u;
-  StringRef ID;
+  std::string ID;
   OffloadBundleEntry(uint64_t O, uint64_t S, uint64_t I, StringRef T)
-      : Offset(O), Size(S), IDLength(I), ID(T) {}
+      : Offset(O), Size(S), IDLength(I), ID(T.str()) {}
   void dumpInfo(raw_ostream &OS) {
     OS << "Offset = " << Offset << ", Size = " << Size
-       << ", ID Length = " << IDLength << ", ID = " << ID;
+       << ", ID Length = " << IDLength << ", ID = " << ID << "\n";
   }
   void dumpURI(raw_ostream &OS, StringRef FilePath) {
     OS << ID.data() << "\tfile://" << FilePath << "#offset=" << Offset
@@ -81,16 +93,21 @@ class OffloadBundleFatBin {
   uint64_t Size = 0u;
   StringRef FileName;
   uint64_t NumberOfEntries;
+  bool Decompressed;
   SmallVector<OffloadBundleEntry> Entries;
 
 public:
+  std::unique_ptr<MemoryBuffer> DecompressedBuffer;
+
   SmallVector<OffloadBundleEntry> getEntries() { return Entries; }
   uint64_t getSize() const { return Size; }
   StringRef getFileName() const { return FileName; }
   uint64_t getNumEntries() const { return NumberOfEntries; }
+  bool isDecompressed() const { return Decompressed; }
 
   LLVM_ABI static Expected<std::unique_ptr<OffloadBundleFatBin>>
-  create(MemoryBufferRef, uint64_t SectionOffset, StringRef FileName);
+  create(MemoryBufferRef, uint64_t SectionOffset, StringRef FileName,
+         bool Decompress = false);
   LLVM_ABI Error extractBundle(const ObjectFile &Source);
 
   LLVM_ABI Error dumpEntryToCodeObject();
@@ -106,9 +123,14 @@ class OffloadBundleFatBin {
       Entry.dumpURI(outs(), FileName);
   }
 
-  OffloadBundleFatBin(MemoryBufferRef Source, StringRef File)
-      : FileName(File), NumberOfEntries(0),
-        Entries(SmallVector<OffloadBundleEntry>()) {}
+  OffloadBundleFatBin(MemoryBufferRef Source, StringRef File,
+                      bool Decompress = false)
+      : FileName(File), NumberOfEntries(0), Decompressed(Decompress),
+        Entries(SmallVector<OffloadBundleEntry>()) {
+    if (Decompress)
+      DecompressedBuffer =
+          MemoryBuffer::getMemBufferCopy(Source.getBuffer(), File);
+  }
 };
 
 enum UriTypeT { FILE_URI, MEMORY_URI };
@@ -191,6 +213,10 @@ LLVM_ABI Error extractOffloadBundleFatBinary(
 LLVM_ABI Error extractCodeObject(const ObjectFile &Source, int64_t Offset,
                                  int64_t Size, StringRef OutputFileName);
 
+/// Extract code object memory from the given \p Source object file at \p Offset
+/// and of \p Size, and copy into \p OutputFileName.
+LLVM_ABI Error extractCodeObject(MemoryBufferRef Buffer, int64_t Offset,
+                                 int64_t Size, StringRef OutputFileName);
 /// Extracts an Offload Bundle Entry given by URI
 LLVM_ABI Error extractOffloadBundleByURI(StringRef URIstr);
 

diff  --git a/llvm/lib/Object/OffloadBundle.cpp b/llvm/lib/Object/OffloadBundle.cpp
index 329dcbf2d939a..046cde8640b49 100644
--- a/llvm/lib/Object/OffloadBundle.cpp
+++ b/llvm/lib/Object/OffloadBundle.cpp
@@ -25,38 +25,71 @@
 using namespace llvm;
 using namespace llvm::object;
 
-static llvm::TimerGroup
-    OffloadBundlerTimerGroup("Offload Bundler Timer Group",
-                             "Timer group for offload bundler");
+static TimerGroup OffloadBundlerTimerGroup("Offload Bundler Timer Group",
+                                           "Timer group for offload bundler");
 
 // Extract an Offload bundle (usually a Offload Bundle) from a fat_bin
-// section
+// section.
 Error extractOffloadBundle(MemoryBufferRef Contents, uint64_t SectionOffset,
                            StringRef FileName,
                            SmallVectorImpl<OffloadBundleFatBin> &Bundles) {
 
   size_t Offset = 0;
   size_t NextbundleStart = 0;
+  StringRef Magic;
+  std::unique_ptr<MemoryBuffer> Buffer;
 
   // There could be multiple offloading bundles stored at this section.
-  while (NextbundleStart != StringRef::npos) {
-    std::unique_ptr<MemoryBuffer> Buffer =
+  while ((NextbundleStart != StringRef::npos) &&
+         (Offset < Contents.getBuffer().size())) {
+    Buffer =
         MemoryBuffer::getMemBuffer(Contents.getBuffer().drop_front(Offset), "",
                                    /*RequiresNullTerminator=*/false);
 
-    // Create the FatBinBindle object. This will also create the Bundle Entry
-    // list info.
-    auto FatBundleOrErr =
-        OffloadBundleFatBin::create(*Buffer, SectionOffset + Offset, FileName);
-    if (!FatBundleOrErr)
-      return FatBundleOrErr.takeError();
-
-    // Add current Bundle to list.
-    Bundles.emplace_back(std::move(**FatBundleOrErr));
-
-    // Find the next bundle by searching for the magic string
-    StringRef Str = Buffer->getBuffer();
-    NextbundleStart = Str.find(StringRef("__CLANG_OFFLOAD_BUNDLE__"), 24);
+    if (identify_magic((*Buffer).getBuffer()) ==
+        file_magic::offload_bundle_compressed) {
+      Magic = "CCOB";
+      // Decompress this bundle first.
+      NextbundleStart = (*Buffer).getBuffer().find(Magic, Magic.size());
+      if (NextbundleStart == StringRef::npos)
+        NextbundleStart = (*Buffer).getBuffer().size();
+
+      ErrorOr<std::unique_ptr<MemoryBuffer>> CodeOrErr =
+          MemoryBuffer::getMemBuffer(
+              (*Buffer).getBuffer().take_front(NextbundleStart), FileName,
+              false);
+      if (std::error_code EC = CodeOrErr.getError())
+        return createFileError(FileName, EC);
+
+      Expected<std::unique_ptr<MemoryBuffer>> DecompressedBufferOrErr =
+          CompressedOffloadBundle::decompress(**CodeOrErr, nullptr);
+      if (!DecompressedBufferOrErr)
+        return createStringError("failed to decompress input: " +
+                                 toString(DecompressedBufferOrErr.takeError()));
+
+      auto FatBundleOrErr = OffloadBundleFatBin::create(
+          **DecompressedBufferOrErr, Offset, FileName, true);
+      if (!FatBundleOrErr)
+        return FatBundleOrErr.takeError();
+
+      // Add current Bundle to list.
+      Bundles.emplace_back(std::move(**FatBundleOrErr));
+
+    } else if (identify_magic((*Buffer).getBuffer()) ==
+               file_magic::offload_bundle) {
+      // Create the OffloadBundleFatBin object. This will also create the Bundle
+      // Entry list info.
+      auto FatBundleOrErr = OffloadBundleFatBin::create(
+          *Buffer, SectionOffset + Offset, FileName);
+      if (!FatBundleOrErr)
+        return FatBundleOrErr.takeError();
+
+      // Add current Bundle to list.
+      Bundles.emplace_back(std::move(**FatBundleOrErr));
+
+      Magic = "__CLANG_OFFLOAD_BUNDLE__";
+      NextbundleStart = (*Buffer).getBuffer().find(Magic, Magic.size());
+    }
 
     if (NextbundleStart != StringRef::npos)
       Offset += NextbundleStart;
@@ -82,7 +115,7 @@ Error OffloadBundleFatBin::readEntries(StringRef Buffer,
 
   NumberOfEntries = NumOfEntries;
 
-  // For each Bundle Entry (code object)
+  // For each Bundle Entry (code object).
   for (uint64_t I = 0; I < NumOfEntries; I++) {
     uint64_t EntrySize;
     uint64_t EntryOffset;
@@ -112,19 +145,22 @@ Error OffloadBundleFatBin::readEntries(StringRef Buffer,
 
 Expected<std::unique_ptr<OffloadBundleFatBin>>
 OffloadBundleFatBin::create(MemoryBufferRef Buf, uint64_t SectionOffset,
-                            StringRef FileName) {
+                            StringRef FileName, bool Decompress) {
   if (Buf.getBufferSize() < 24)
     return errorCodeToError(object_error::parse_failed);
 
   // Check for magic bytes.
-  if (identify_magic(Buf.getBuffer()) != file_magic::offload_bundle)
+  if ((identify_magic(Buf.getBuffer()) != file_magic::offload_bundle) &&
+      (identify_magic(Buf.getBuffer()) !=
+       file_magic::offload_bundle_compressed))
     return errorCodeToError(object_error::parse_failed);
 
   std::unique_ptr<OffloadBundleFatBin> TheBundle(
       new OffloadBundleFatBin(Buf, FileName));
 
-  // Read the Bundle Entries
-  Error Err = TheBundle->readEntries(Buf.getBuffer(), SectionOffset);
+  // Read the Bundle Entries.
+  Error Err =
+      TheBundle->readEntries(Buf.getBuffer(), Decompress ? 0 : SectionOffset);
   if (Err)
     return Err;
 
@@ -132,7 +168,7 @@ OffloadBundleFatBin::create(MemoryBufferRef Buf, uint64_t SectionOffset,
 }
 
 Error OffloadBundleFatBin::extractBundle(const ObjectFile &Source) {
-  // This will extract all entries in the Bundle
+  // This will extract all entries in the Bundle.
   for (OffloadBundleEntry &Entry : Entries) {
 
     if (Entry.Size == 0)
@@ -161,40 +197,21 @@ Error object::extractOffloadBundleFatBinary(
       return Buffer.takeError();
 
     // If it does not start with the reserved suffix, just skip this section.
-    if ((llvm::identify_magic(*Buffer) == llvm::file_magic::offload_bundle) ||
+    if ((llvm::identify_magic(*Buffer) == file_magic::offload_bundle) ||
         (llvm::identify_magic(*Buffer) ==
-         llvm::file_magic::offload_bundle_compressed)) {
+         file_magic::offload_bundle_compressed)) {
 
       uint64_t SectionOffset = 0;
       if (Obj.isELF()) {
         SectionOffset = ELFSectionRef(Sec).getOffset();
-      } else if (Obj.isCOFF()) // TODO: add COFF Support
+      } else if (Obj.isCOFF()) // TODO: add COFF Support.
         return createStringError(object_error::parse_failed,
-                                 "COFF object files not supported.\n");
+                                 "COFF object files not supported");
 
       MemoryBufferRef Contents(*Buffer, Obj.getFileName());
-
-      if (llvm::identify_magic(*Buffer) ==
-          llvm::file_magic::offload_bundle_compressed) {
-        // Decompress the input if necessary.
-        Expected<std::unique_ptr<MemoryBuffer>> DecompressedBufferOrErr =
-            CompressedOffloadBundle::decompress(Contents, false);
-
-        if (!DecompressedBufferOrErr)
-          return createStringError(
-              inconvertibleErrorCode(),
-              "Failed to decompress input: " +
-                  llvm::toString(DecompressedBufferOrErr.takeError()));
-
-        MemoryBuffer &DecompressedInput = **DecompressedBufferOrErr;
-        if (Error Err = extractOffloadBundle(DecompressedInput, SectionOffset,
-                                             Obj.getFileName(), Bundles))
-          return Err;
-      } else {
-        if (Error Err = extractOffloadBundle(Contents, SectionOffset,
-                                             Obj.getFileName(), Bundles))
-          return Err;
-      }
+      if (Error Err = extractOffloadBundle(Contents, SectionOffset,
+                                           Obj.getFileName(), Bundles))
+        return Err;
     }
   }
   return Error::success();
@@ -222,8 +239,22 @@ Error object::extractCodeObject(const ObjectFile &Source, int64_t Offset,
   return Error::success();
 }
 
+Error object::extractCodeObject(const MemoryBufferRef Buffer, int64_t Offset,
+                                int64_t Size, StringRef OutputFileName) {
+  Expected<std::unique_ptr<FileOutputBuffer>> BufferOrErr =
+      FileOutputBuffer::create(OutputFileName, Size);
+  if (!BufferOrErr)
+    return BufferOrErr.takeError();
+
+  std::unique_ptr<FileOutputBuffer> Buf = std::move(*BufferOrErr);
+  std::copy(Buffer.getBufferStart() + Offset,
+            Buffer.getBufferStart() + Offset + Size, Buf->getBufferStart());
+
+  return Buf->commit();
+}
+
 // given a file name, offset, and size, extract data into a code object file,
-// into file <SourceFile>-offset<Offset>-size<Size>.co
+// into file "<SourceFile>-offset<Offset>-size<Size>.co".
 Error object::extractOffloadBundleByURI(StringRef URIstr) {
   // create a URI object
   Expected<std::unique_ptr<OffloadBundleURI>> UriOrErr(
@@ -236,7 +267,7 @@ Error object::extractOffloadBundleByURI(StringRef URIstr) {
   OutputFile +=
       "-offset" + itostr(Uri.Offset) + "-size" + itostr(Uri.Size) + ".co";
 
-  // Create an ObjectFile object from uri.file_uri
+  // Create an ObjectFile object from uri.file_uri.
   auto ObjOrErr = ObjectFile::createObjectFile(Uri.FileName);
   if (!ObjOrErr)
     return ObjOrErr.takeError();
@@ -249,7 +280,7 @@ Error object::extractOffloadBundleByURI(StringRef URIstr) {
   return Error::success();
 }
 
-// Utility function to format numbers with commas
+// Utility function to format numbers with commas.
 static std::string formatWithCommas(unsigned long long Value) {
   std::string Num = std::to_string(Value);
   int InsertPosition = Num.length() - 3;
@@ -260,87 +291,278 @@ static std::string formatWithCommas(unsigned long long Value) {
   return Num;
 }
 
-llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
-CompressedOffloadBundle::decompress(llvm::MemoryBufferRef &Input,
-                                    bool Verbose) {
-  StringRef Blob = Input.getBuffer();
+Expected<std::unique_ptr<MemoryBuffer>>
+CompressedOffloadBundle::compress(compression::Params P,
+                                  const MemoryBuffer &Input, uint16_t Version,
+                                  raw_ostream *VerboseStream) {
+  if (!compression::zstd::isAvailable() && !compression::zlib::isAvailable())
+    return createStringError("compression not supported.");
+  Timer HashTimer("Hash Calculation Timer", "Hash calculation time",
+                  OffloadBundlerTimerGroup);
+  if (VerboseStream)
+    HashTimer.startTimer();
+  MD5 Hash;
+  MD5::MD5Result Result;
+  Hash.update(Input.getBuffer());
+  Hash.final(Result);
+  uint64_t TruncatedHash = Result.low();
+  if (VerboseStream)
+    HashTimer.stopTimer();
+
+  SmallVector<uint8_t, 0> CompressedBuffer;
+  auto BufferUint8 = ArrayRef<uint8_t>(
+      reinterpret_cast<const uint8_t *>(Input.getBuffer().data()),
+      Input.getBuffer().size());
+  Timer CompressTimer("Compression Timer", "Compression time",
+                      OffloadBundlerTimerGroup);
+  if (VerboseStream)
+    CompressTimer.startTimer();
+  compression::compress(P, BufferUint8, CompressedBuffer);
+  if (VerboseStream)
+    CompressTimer.stopTimer();
+
+  uint16_t CompressionMethod = static_cast<uint16_t>(P.format);
+
+  // Store sizes in 64-bit variables first.
+  uint64_t UncompressedSize64 = Input.getBuffer().size();
+  uint64_t TotalFileSize64;
+
+  // Calculate total file size based on version.
+  if (Version == 2) {
+    // For V2, ensure the sizes don't exceed 32-bit limit.
+    if (UncompressedSize64 > std::numeric_limits<uint32_t>::max())
+      return createStringError("uncompressed size (%llu) exceeds version 2 "
+                               "unsigned 32-bit integer limit",
+                               UncompressedSize64);
+    TotalFileSize64 = MagicNumber.size() + sizeof(uint32_t) + sizeof(Version) +
+                      sizeof(CompressionMethod) + sizeof(uint32_t) +
+                      sizeof(TruncatedHash) + CompressedBuffer.size();
+    if (TotalFileSize64 > std::numeric_limits<uint32_t>::max())
+      return createStringError("total file size (%llu) exceeds version 2 "
+                               "unsigned 32-bit integer limit",
+                               TotalFileSize64);
+
+  } else { // Version 3.
+    TotalFileSize64 = MagicNumber.size() + sizeof(uint64_t) + sizeof(Version) +
+                      sizeof(CompressionMethod) + sizeof(uint64_t) +
+                      sizeof(TruncatedHash) + CompressedBuffer.size();
+  }
+
+  SmallVector<char, 0> FinalBuffer;
+  raw_svector_ostream OS(FinalBuffer);
+  OS << MagicNumber;
+  OS.write(reinterpret_cast<const char *>(&Version), sizeof(Version));
+  OS.write(reinterpret_cast<const char *>(&CompressionMethod),
+           sizeof(CompressionMethod));
+
+  // Write size fields according to version.
+  if (Version == 2) {
+    uint32_t TotalFileSize32 = static_cast<uint32_t>(TotalFileSize64);
+    uint32_t UncompressedSize32 = static_cast<uint32_t>(UncompressedSize64);
+    OS.write(reinterpret_cast<const char *>(&TotalFileSize32),
+             sizeof(TotalFileSize32));
+    OS.write(reinterpret_cast<const char *>(&UncompressedSize32),
+             sizeof(UncompressedSize32));
+  } else { // Version 3.
+    OS.write(reinterpret_cast<const char *>(&TotalFileSize64),
+             sizeof(TotalFileSize64));
+    OS.write(reinterpret_cast<const char *>(&UncompressedSize64),
+             sizeof(UncompressedSize64));
+  }
+
+  OS.write(reinterpret_cast<const char *>(&TruncatedHash),
+           sizeof(TruncatedHash));
+  OS.write(reinterpret_cast<const char *>(CompressedBuffer.data()),
+           CompressedBuffer.size());
+
+  if (VerboseStream) {
+    auto MethodUsed = P.format == compression::Format::Zstd ? "zstd" : "zlib";
+    double CompressionRate =
+        static_cast<double>(UncompressedSize64) / CompressedBuffer.size();
+    double CompressionTimeSeconds = CompressTimer.getTotalTime().getWallTime();
+    double CompressionSpeedMBs =
+        (UncompressedSize64 / (1024.0 * 1024.0)) / CompressionTimeSeconds;
+    *VerboseStream << "Compressed bundle format version: " << Version << "\n"
+                   << "Total file size (including headers): "
+                   << formatWithCommas(TotalFileSize64) << " bytes\n"
+                   << "Compression method used: " << MethodUsed << "\n"
+                   << "Compression level: " << P.level << "\n"
+                   << "Binary size before compression: "
+                   << formatWithCommas(UncompressedSize64) << " bytes\n"
+                   << "Binary size after compression: "
+                   << formatWithCommas(CompressedBuffer.size()) << " bytes\n"
+                   << "Compression rate: " << format("%.2lf", CompressionRate)
+                   << "\n"
+                   << "Compression ratio: "
+                   << format("%.2lf%%", 100.0 / CompressionRate) << "\n"
+                   << "Compression speed: "
+                   << format("%.2lf MB/s", CompressionSpeedMBs) << "\n"
+                   << "Truncated MD5 hash: " << format_hex(TruncatedHash, 16)
+                   << "\n";
+  }
+
+  return MemoryBuffer::getMemBufferCopy(
+      StringRef(FinalBuffer.data(), FinalBuffer.size()));
+}
+
+// Use packed structs to avoid padding, such that the structs map the serialized
+// format.
+LLVM_PACKED_START
+union RawCompressedBundleHeader {
+  struct CommonFields {
+    uint32_t Magic;
+    uint16_t Version;
+    uint16_t Method;
+  };
+
+  struct V1Header {
+    CommonFields Common;
+    uint32_t UncompressedFileSize;
+    uint64_t Hash;
+  };
+
+  struct V2Header {
+    CommonFields Common;
+    uint32_t FileSize;
+    uint32_t UncompressedFileSize;
+    uint64_t Hash;
+  };
+
+  struct V3Header {
+    CommonFields Common;
+    uint64_t FileSize;
+    uint64_t UncompressedFileSize;
+    uint64_t Hash;
+  };
+
+  CommonFields Common;
+  V1Header V1;
+  V2Header V2;
+  V3Header V3;
+};
+LLVM_PACKED_END
+
+// Helper method to get header size based on version.
+static size_t getHeaderSize(uint16_t Version) {
+  switch (Version) {
+  case 1:
+    return sizeof(RawCompressedBundleHeader::V1Header);
+  case 2:
+    return sizeof(RawCompressedBundleHeader::V2Header);
+  case 3:
+    return sizeof(RawCompressedBundleHeader::V3Header);
+  default:
+    llvm_unreachable("Unsupported version");
+  }
+}
 
-  if (Blob.size() < V1HeaderSize)
-    return llvm::MemoryBuffer::getMemBufferCopy(Blob);
+Expected<CompressedOffloadBundle::CompressedBundleHeader>
+CompressedOffloadBundle::CompressedBundleHeader::tryParse(StringRef Blob) {
+  assert(Blob.size() >= sizeof(RawCompressedBundleHeader::CommonFields));
+  assert(identify_magic(Blob) == file_magic::offload_bundle_compressed);
+
+  RawCompressedBundleHeader Header;
+  std::memcpy(&Header, Blob.data(), std::min(Blob.size(), sizeof(Header)));
+
+  CompressedBundleHeader Normalized;
+  Normalized.Version = Header.Common.Version;
+
+  size_t RequiredSize = getHeaderSize(Normalized.Version);
+
+  if (Blob.size() < RequiredSize)
+    return createStringError("compressed bundle header size too small");
+
+  switch (Normalized.Version) {
+  case 1:
+    Normalized.UncompressedFileSize = Header.V1.UncompressedFileSize;
+    Normalized.Hash = Header.V1.Hash;
+    break;
+  case 2:
+    Normalized.FileSize = Header.V2.FileSize;
+    Normalized.UncompressedFileSize = Header.V2.UncompressedFileSize;
+    Normalized.Hash = Header.V2.Hash;
+    break;
+  case 3:
+    Normalized.FileSize = Header.V3.FileSize;
+    Normalized.UncompressedFileSize = Header.V3.UncompressedFileSize;
+    Normalized.Hash = Header.V3.Hash;
+    break;
+  default:
+    return createStringError("unknown compressed bundle version");
+  }
 
-  if (llvm::identify_magic(Blob) !=
-      llvm::file_magic::offload_bundle_compressed) {
-    if (Verbose)
-      llvm::errs() << "Uncompressed bundle.\n";
-    return llvm::MemoryBuffer::getMemBufferCopy(Blob);
+  // Determine compression format.
+  switch (Header.Common.Method) {
+  case static_cast<uint16_t>(compression::Format::Zlib):
+  case static_cast<uint16_t>(compression::Format::Zstd):
+    Normalized.CompressionFormat =
+        static_cast<compression::Format>(Header.Common.Method);
+    break;
+  default:
+    return createStringError("unknown compressing method");
   }
 
-  size_t CurrentOffset = MagicSize;
+  return Normalized;
+}
 
-  uint16_t ThisVersion;
-  memcpy(&ThisVersion, Blob.data() + CurrentOffset, sizeof(uint16_t));
-  CurrentOffset += VersionFieldSize;
+Expected<std::unique_ptr<MemoryBuffer>>
+CompressedOffloadBundle::decompress(const MemoryBuffer &Input,
+                                    raw_ostream *VerboseStream) {
+  StringRef Blob = Input.getBuffer();
 
-  uint16_t CompressionMethod;
-  memcpy(&CompressionMethod, Blob.data() + CurrentOffset, sizeof(uint16_t));
-  CurrentOffset += MethodFieldSize;
+  // Check minimum header size (using V1 as it's the smallest).
+  if (Blob.size() < sizeof(RawCompressedBundleHeader::CommonFields))
+    return MemoryBuffer::getMemBufferCopy(Blob);
 
-  uint32_t TotalFileSize;
-  if (ThisVersion >= 2) {
-    if (Blob.size() < V2HeaderSize)
-      return createStringError(inconvertibleErrorCode(),
-                               "Compressed bundle header size too small");
-    memcpy(&TotalFileSize, Blob.data() + CurrentOffset, sizeof(uint32_t));
-    CurrentOffset += FileSizeFieldSize;
+  if (identify_magic(Blob) != file_magic::offload_bundle_compressed) {
+    if (VerboseStream)
+      *VerboseStream << "Uncompressed bundle\n";
+    return MemoryBuffer::getMemBufferCopy(Blob);
   }
 
-  uint32_t UncompressedSize;
-  memcpy(&UncompressedSize, Blob.data() + CurrentOffset, sizeof(uint32_t));
-  CurrentOffset += UncompressedSizeFieldSize;
-
-  uint64_t StoredHash;
-  memcpy(&StoredHash, Blob.data() + CurrentOffset, sizeof(uint64_t));
-  CurrentOffset += HashFieldSize;
-
-  llvm::compression::Format CompressionFormat;
-  if (CompressionMethod ==
-      static_cast<uint16_t>(llvm::compression::Format::Zlib))
-    CompressionFormat = llvm::compression::Format::Zlib;
-  else if (CompressionMethod ==
-           static_cast<uint16_t>(llvm::compression::Format::Zstd))
-    CompressionFormat = llvm::compression::Format::Zstd;
-  else
-    return createStringError(inconvertibleErrorCode(),
-                             "Unknown compressing method");
-
-  llvm::Timer DecompressTimer("Decompression Timer", "Decompression time",
-                              OffloadBundlerTimerGroup);
-  if (Verbose)
+  Expected<CompressedBundleHeader> HeaderOrErr =
+      CompressedBundleHeader::tryParse(Blob);
+  if (!HeaderOrErr)
+    return HeaderOrErr.takeError();
+
+  const CompressedBundleHeader &Normalized = *HeaderOrErr;
+  unsigned ThisVersion = Normalized.Version;
+  size_t HeaderSize = getHeaderSize(ThisVersion);
+
+  compression::Format CompressionFormat = Normalized.CompressionFormat;
+
+  size_t TotalFileSize = Normalized.FileSize.value_or(0);
+  size_t UncompressedSize = Normalized.UncompressedFileSize;
+  auto StoredHash = Normalized.Hash;
+
+  Timer DecompressTimer("Decompression Timer", "Decompression time",
+                        OffloadBundlerTimerGroup);
+  if (VerboseStream)
     DecompressTimer.startTimer();
 
   SmallVector<uint8_t, 0> DecompressedData;
-  StringRef CompressedData = Blob.substr(CurrentOffset);
-  if (llvm::Error DecompressionError = llvm::compression::decompress(
-          CompressionFormat, llvm::arrayRefFromStringRef(CompressedData),
+  StringRef CompressedData =
+      Blob.substr(HeaderSize, TotalFileSize - HeaderSize);
+
+  if (Error DecompressionError = compression::decompress(
+          CompressionFormat, arrayRefFromStringRef(CompressedData),
           DecompressedData, UncompressedSize))
-    return createStringError(inconvertibleErrorCode(),
-                             "Could not decompress embedded file contents: " +
-                                 llvm::toString(std::move(DecompressionError)));
+    return createStringError("could not decompress embedded file contents: " +
+                             toString(std::move(DecompressionError)));
 
-  if (Verbose) {
+  if (VerboseStream) {
     DecompressTimer.stopTimer();
 
     double DecompressionTimeSeconds =
         DecompressTimer.getTotalTime().getWallTime();
 
     // Recalculate MD5 hash for integrity check.
-    llvm::Timer HashRecalcTimer("Hash Recalculation Timer",
-                                "Hash recalculation time",
-                                OffloadBundlerTimerGroup);
+    Timer HashRecalcTimer("Hash Recalculation Timer", "Hash recalculation time",
+                          OffloadBundlerTimerGroup);
     HashRecalcTimer.startTimer();
-    llvm::MD5 Hash;
-    llvm::MD5::MD5Result Result;
-    Hash.update(llvm::ArrayRef<uint8_t>(DecompressedData));
+    MD5 Hash;
+    MD5::MD5Result Result;
+    Hash.update(ArrayRef<uint8_t>(DecompressedData));
     Hash.final(Result);
     uint64_t RecalculatedHash = Result.low();
     HashRecalcTimer.stopTimer();
@@ -351,118 +573,28 @@ CompressedOffloadBundle::decompress(llvm::MemoryBufferRef &Input,
     double DecompressionSpeedMBs =
         (UncompressedSize / (1024.0 * 1024.0)) / DecompressionTimeSeconds;
 
-    llvm::errs() << "Compressed bundle format version: " << ThisVersion << "\n";
+    *VerboseStream << "Compressed bundle format version: " << ThisVersion
+                   << "\n";
     if (ThisVersion >= 2)
-      llvm::errs() << "Total file size (from header): "
-                   << formatWithCommas(TotalFileSize) << " bytes\n";
-    llvm::errs() << "Decompression method: "
-                 << (CompressionFormat == llvm::compression::Format::Zlib
-                         ? "zlib"
-                         : "zstd")
-                 << "\n"
-                 << "Size before decompression: "
-                 << formatWithCommas(CompressedData.size()) << " bytes\n"
-                 << "Size after decompression: "
-                 << formatWithCommas(UncompressedSize) << " bytes\n"
-                 << "Compression rate: "
-                 << llvm::format("%.2lf", CompressionRate) << "\n"
-                 << "Compression ratio: "
-                 << llvm::format("%.2lf%%", 100.0 / CompressionRate) << "\n"
-                 << "Decompression speed: "
-                 << llvm::format("%.2lf MB/s", DecompressionSpeedMBs) << "\n"
-                 << "Stored hash: " << llvm::format_hex(StoredHash, 16) << "\n"
-                 << "Recalculated hash: "
-                 << llvm::format_hex(RecalculatedHash, 16) << "\n"
-                 << "Hashes match: " << (HashMatch ? "Yes" : "No") << "\n";
+      *VerboseStream << "Total file size (from header): "
+                     << formatWithCommas(TotalFileSize) << " bytes\n";
+    *VerboseStream
+        << "Decompression method: "
+        << (CompressionFormat == compression::Format::Zlib ? "zlib" : "zstd")
+        << "\n"
+        << "Size before decompression: "
+        << formatWithCommas(CompressedData.size()) << " bytes\n"
+        << "Size after decompression: " << formatWithCommas(UncompressedSize)
+        << " bytes\n"
+        << "Compression rate: " << format("%.2lf", CompressionRate) << "\n"
+        << "Compression ratio: " << format("%.2lf%%", 100.0 / CompressionRate)
+        << "\n"
+        << "Decompression speed: "
+        << format("%.2lf MB/s", DecompressionSpeedMBs) << "\n"
+        << "Stored hash: " << format_hex(StoredHash, 16) << "\n"
+        << "Recalculated hash: " << format_hex(RecalculatedHash, 16) << "\n"
+        << "Hashes match: " << (HashMatch ? "Yes" : "No") << "\n";
   }
 
-  return llvm::MemoryBuffer::getMemBufferCopy(
-      llvm::toStringRef(DecompressedData));
-}
-
-llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
-CompressedOffloadBundle::compress(llvm::compression::Params P,
-                                  const llvm::MemoryBuffer &Input,
-                                  bool Verbose) {
-  if (!llvm::compression::zstd::isAvailable() &&
-      !llvm::compression::zlib::isAvailable())
-    return createStringError(llvm::inconvertibleErrorCode(),
-                             "Compression not supported");
-
-  llvm::Timer HashTimer("Hash Calculation Timer", "Hash calculation time",
-                        OffloadBundlerTimerGroup);
-  if (Verbose)
-    HashTimer.startTimer();
-  llvm::MD5 Hash;
-  llvm::MD5::MD5Result Result;
-  Hash.update(Input.getBuffer());
-  Hash.final(Result);
-  uint64_t TruncatedHash = Result.low();
-  if (Verbose)
-    HashTimer.stopTimer();
-
-  SmallVector<uint8_t, 0> CompressedBuffer;
-  auto BufferUint8 = llvm::ArrayRef<uint8_t>(
-      reinterpret_cast<const uint8_t *>(Input.getBuffer().data()),
-      Input.getBuffer().size());
-
-  llvm::Timer CompressTimer("Compression Timer", "Compression time",
-                            OffloadBundlerTimerGroup);
-  if (Verbose)
-    CompressTimer.startTimer();
-  llvm::compression::compress(P, BufferUint8, CompressedBuffer);
-  if (Verbose)
-    CompressTimer.stopTimer();
-
-  uint16_t CompressionMethod = static_cast<uint16_t>(P.format);
-  uint32_t UncompressedSize = Input.getBuffer().size();
-  uint32_t TotalFileSize = MagicNumber.size() + sizeof(TotalFileSize) +
-                           sizeof(Version) + sizeof(CompressionMethod) +
-                           sizeof(UncompressedSize) + sizeof(TruncatedHash) +
-                           CompressedBuffer.size();
-
-  SmallVector<char, 0> FinalBuffer;
-  llvm::raw_svector_ostream OS(FinalBuffer);
-  OS << MagicNumber;
-  OS.write(reinterpret_cast<const char *>(&Version), sizeof(Version));
-  OS.write(reinterpret_cast<const char *>(&CompressionMethod),
-           sizeof(CompressionMethod));
-  OS.write(reinterpret_cast<const char *>(&TotalFileSize),
-           sizeof(TotalFileSize));
-  OS.write(reinterpret_cast<const char *>(&UncompressedSize),
-           sizeof(UncompressedSize));
-  OS.write(reinterpret_cast<const char *>(&TruncatedHash),
-           sizeof(TruncatedHash));
-  OS.write(reinterpret_cast<const char *>(CompressedBuffer.data()),
-           CompressedBuffer.size());
-
-  if (Verbose) {
-    auto MethodUsed =
-        P.format == llvm::compression::Format::Zstd ? "zstd" : "zlib";
-    double CompressionRate =
-        static_cast<double>(UncompressedSize) / CompressedBuffer.size();
-    double CompressionTimeSeconds = CompressTimer.getTotalTime().getWallTime();
-    double CompressionSpeedMBs =
-        (UncompressedSize / (1024.0 * 1024.0)) / CompressionTimeSeconds;
-
-    llvm::errs() << "Compressed bundle format version: " << Version << "\n"
-                 << "Total file size (including headers): "
-                 << formatWithCommas(TotalFileSize) << " bytes\n"
-                 << "Compression method used: " << MethodUsed << "\n"
-                 << "Compression level: " << P.level << "\n"
-                 << "Binary size before compression: "
-                 << formatWithCommas(UncompressedSize) << " bytes\n"
-                 << "Binary size after compression: "
-                 << formatWithCommas(CompressedBuffer.size()) << " bytes\n"
-                 << "Compression rate: "
-                 << llvm::format("%.2lf", CompressionRate) << "\n"
-                 << "Compression ratio: "
-                 << llvm::format("%.2lf%%", 100.0 / CompressionRate) << "\n"
-                 << "Compression speed: "
-                 << llvm::format("%.2lf MB/s", CompressionSpeedMBs) << "\n"
-                 << "Truncated MD5 hash: "
-                 << llvm::format_hex(TruncatedHash, 16) << "\n";
-  }
-  return llvm::MemoryBuffer::getMemBufferCopy(
-      llvm::StringRef(FinalBuffer.data(), FinalBuffer.size()));
+  return MemoryBuffer::getMemBufferCopy(toStringRef(DecompressedData));
 }

diff  --git a/llvm/tools/llvm-objdump/OffloadDump.cpp b/llvm/tools/llvm-objdump/OffloadDump.cpp
index 8a0deb35ba151..a77537dd90eeb 100644
--- a/llvm/tools/llvm-objdump/OffloadDump.cpp
+++ b/llvm/tools/llvm-objdump/OffloadDump.cpp
@@ -87,21 +87,30 @@ void llvm::dumpOffloadBundleFatBinary(const ObjectFile &O, StringRef ArchName) {
   if (Error Err = llvm::object::extractOffloadBundleFatBinary(O, FoundBundles))
     reportError(O.getFileName(), "while extracting offload FatBin bundles: " +
                                      toString(std::move(Err)));
-
   for (const auto &[BundleNum, Bundle] : llvm::enumerate(FoundBundles)) {
     for (OffloadBundleEntry &Entry : Bundle.getEntries()) {
-      if (!ArchName.empty() && !Entry.ID.contains(ArchName))
+      if (!ArchName.empty() && Entry.ID.find(ArchName) != std::string::npos)
         continue;
 
       // create file name for this object file:  <source-filename>.<Bundle
       // Number>.<EntryID>
-      std::string str = Bundle.getFileName().str() + "." + itostr(BundleNum) +
-                        "." + Entry.ID.str();
-      if (Error Err = object::extractCodeObject(O, Entry.Offset, Entry.Size,
-                                                StringRef(str)))
-        reportError(O.getFileName(),
-                    "while extracting offload Bundle Entries: " +
-                        toString(std::move(Err)));
+      std::string str =
+          Bundle.getFileName().str() + "." + itostr(BundleNum) + "." + Entry.ID;
+
+      if (Bundle.isDecompressed()) {
+        if (Error Err = object::extractCodeObject(
+                Bundle.DecompressedBuffer->getMemBufferRef(), Entry.Offset,
+                Entry.Size, StringRef(str)))
+          reportError(O.getFileName(),
+                      "while extracting offload Bundle Entries: " +
+                          toString(std::move(Err)));
+      } else {
+        if (Error Err = object::extractCodeObject(O, Entry.Offset, Entry.Size,
+                                                  StringRef(str)))
+          reportError(O.getFileName(),
+                      "while extracting offload Bundle Entries: " +
+                          toString(std::move(Err)));
+      }
       outs() << "Extracting offload bundle: " << str << "\n";
     }
   }


        


More information about the llvm-commits mailing list