[clang] [OffloadBundler] Expose function to parse compressed bundle headers (PR #130284)

via cfe-commits cfe-commits at lists.llvm.org
Fri Mar 7 05:48:40 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang

Author: Juan Manuel Martinez CaamaƱo (jmmartinez)

<details>
<summary>Changes</summary>

In COMGR we hash the header of compressed bundles. For this we take the first bytes of the buffer (according to the maximum header size) and hash them. This is not future-proof if we ever introduce new versions of the header.

This patch proposes an API to obtain a version-independent header.

---
Full diff: https://github.com/llvm/llvm-project/pull/130284.diff


2 Files Affected:

- (modified) clang/include/clang/Driver/OffloadBundler.h (+10-40) 
- (modified) clang/lib/Driver/OffloadBundler.cpp (+117-61) 


``````````diff
diff --git a/clang/include/clang/Driver/OffloadBundler.h b/clang/include/clang/Driver/OffloadBundler.h
index 31c11e25ecd9f..2b5bf710f80b1 100644
--- a/clang/include/clang/Driver/OffloadBundler.h
+++ b/clang/include/clang/Driver/OffloadBundler.h
@@ -107,50 +107,20 @@ struct OffloadTargetInfo {
 // - Compressed Data (variable length).
 class CompressedOffloadBundle {
 private:
-  static inline const size_t MagicSize = 4;
-  static inline const size_t VersionFieldSize = sizeof(uint16_t);
-  static inline const size_t MethodFieldSize = sizeof(uint16_t);
-  // Legacy size fields for V1/V2
-  static inline const size_t FileSizeFieldSizeV2 = sizeof(uint32_t);
-  static inline const size_t UncompressedSizeFieldSizeV2 = sizeof(uint32_t);
-  // New size fields for V3
-  static inline const size_t FileSizeFieldSizeV3 = sizeof(uint64_t);
-  static inline const size_t UncompressedSizeFieldSizeV3 = sizeof(uint64_t);
-  static inline const size_t HashFieldSize = sizeof(uint64_t);
-
-  // Keep V1 header size for backward compatibility
-  static inline const size_t V1HeaderSize =
-      MagicSize + VersionFieldSize + MethodFieldSize +
-      UncompressedSizeFieldSizeV2 + HashFieldSize;
-
-  // Keep V2 header size for backward compatibility
-  static inline const size_t V2HeaderSize =
-      MagicSize + VersionFieldSize + FileSizeFieldSizeV2 + MethodFieldSize +
-      UncompressedSizeFieldSizeV2 + HashFieldSize;
-
-  // Add V3 header size with 64-bit fields
-  static inline const size_t V3HeaderSize =
-      MagicSize + VersionFieldSize + FileSizeFieldSizeV3 + MethodFieldSize +
-      UncompressedSizeFieldSizeV3 + HashFieldSize;
-
   static inline const llvm::StringRef MagicNumber = "CCOB";
 
 public:
-  static inline const uint16_t DefaultVersion = 2;
+  struct CompressedBundleHeader {
+    unsigned Version;
+    llvm::compression::Format CompressionFormat;
+    std::optional<size_t> FileSize;
+    size_t UncompressedFileSize;
+    uint64_t Hash;
 
-  // Helper method to get header size based on version
-  static size_t getHeaderSize(uint16_t Version) {
-    switch (Version) {
-    case 1:
-      return V1HeaderSize;
-    case 2:
-      return V2HeaderSize;
-    case 3:
-      return V3HeaderSize;
-    default:
-      llvm_unreachable("Unsupported version");
-    }
-  }
+    static llvm::Expected<CompressedBundleHeader> tryParse(llvm::StringRef);
+  };
+
+  static inline const uint16_t DefaultVersion = 2;
 
   static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
   compress(llvm::compression::Params P, const llvm::MemoryBuffer &Input,
diff --git a/clang/lib/Driver/OffloadBundler.cpp b/clang/lib/Driver/OffloadBundler.cpp
index 12d763e5c65b6..6a6adc277f032 100644
--- a/clang/lib/Driver/OffloadBundler.cpp
+++ b/clang/lib/Driver/OffloadBundler.cpp
@@ -29,6 +29,7 @@
 #include "llvm/Object/Binary.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Compression.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/EndianStream.h"
@@ -1121,13 +1122,116 @@ CompressedOffloadBundle::compress(llvm::compression::Params P,
       llvm::StringRef(FinalBuffer.data(), FinalBuffer.size()));
 }
 
+// Use packed structs to avoid padding, such that the structs map the serialized
+// format.
+LLVM_PACKED_START
+union RawCompressedBundleHeader {
+  struct CommonFields {
+    uint32_t Magic;
+    uint16_t Version;
+    uint16_t Method;
+  };
+
+  struct V1Header {
+    CommonFields Common;
+    uint32_t UncompressedFileSize;
+    uint64_t Hash;
+  };
+
+  struct V2Header {
+    CommonFields Common;
+    uint32_t FileSize;
+    uint32_t UncompressedFileSize;
+    uint64_t Hash;
+  };
+
+  struct V3Header {
+    CommonFields Common;
+    uint64_t FileSize;
+    uint64_t UncompressedFileSize;
+    uint64_t Hash;
+  };
+
+  CommonFields Common;
+  V1Header V1;
+  V2Header V2;
+  V3Header V3;
+};
+LLVM_PACKED_END
+
+// Helper method to get header size based on version
+static size_t getHeaderSize(uint16_t Version) {
+  switch (Version) {
+  case 1:
+    return sizeof(RawCompressedBundleHeader::V1Header);
+  case 2:
+    return sizeof(RawCompressedBundleHeader::V2Header);
+  case 3:
+    return sizeof(RawCompressedBundleHeader::V3Header);
+  default:
+    llvm_unreachable("Unsupported version");
+  }
+}
+
+Expected<CompressedOffloadBundle::CompressedBundleHeader>
+CompressedOffloadBundle::CompressedBundleHeader::tryParse(StringRef Blob) {
+  assert(Blob.size() > sizeof(RawCompressedBundleHeader::CommonFields));
+  assert(llvm::identify_magic(Blob) ==
+         llvm::file_magic::offload_bundle_compressed);
+
+  RawCompressedBundleHeader Header;
+  memcpy(&Header, Blob.data(), std::min(Blob.size(), sizeof(Header)));
+
+  CompressedBundleHeader Normalized;
+  Normalized.Version = Header.Common.Version;
+
+  size_t RequiredSize = getHeaderSize(Normalized.Version);
+  if (Blob.size() < RequiredSize)
+    return createStringError(inconvertibleErrorCode(),
+                             "Compressed bundle header size too small");
+
+  switch (Normalized.Version) {
+  case 1:
+    Normalized.UncompressedFileSize = Header.V1.UncompressedFileSize;
+    Normalized.Hash = Header.V1.Hash;
+    break;
+  case 2:
+    Normalized.FileSize = Header.V2.FileSize;
+    Normalized.UncompressedFileSize = Header.V2.UncompressedFileSize;
+    Normalized.Hash = Header.V2.Hash;
+    break;
+  case 3:
+    Normalized.FileSize = Header.V3.FileSize;
+    Normalized.UncompressedFileSize = Header.V3.UncompressedFileSize;
+    Normalized.Hash = Header.V3.Hash;
+    break;
+  default:
+    return createStringError(inconvertibleErrorCode(),
+                             "Unknown compressed bundle version");
+  }
+
+  // Determine compression format
+  switch (Header.Common.Method) {
+  case static_cast<uint16_t>(compression::Format::Zlib):
+  case static_cast<uint16_t>(compression::Format::Zstd):
+    Normalized.CompressionFormat =
+        static_cast<compression::Format>(Header.Common.Method);
+    break;
+  default:
+    return createStringError(inconvertibleErrorCode(),
+                             "Unknown compressing method");
+  }
+
+  return Normalized;
+}
+
 llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
 CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input,
                                     bool Verbose) {
   StringRef Blob = Input.getBuffer();
 
   // Check minimum header size (using V1 as it's the smallest)
-  if (Blob.size() < V1HeaderSize)
+  if (Blob.size() < sizeof(RawCompressedBundleHeader::CommonFields))
     return llvm::MemoryBuffer::getMemBufferCopy(Blob);
 
   if (llvm::identify_magic(Blob) !=
@@ -1137,68 +1241,20 @@ CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input,
     return llvm::MemoryBuffer::getMemBufferCopy(Blob);
   }
 
-  size_t CurrentOffset = MagicSize;
-
-  // Read version
-  uint16_t ThisVersion;
-  memcpy(&ThisVersion, Blob.data() + CurrentOffset, sizeof(uint16_t));
-  CurrentOffset += VersionFieldSize;
-
-  // Verify header size based on version
-  if (ThisVersion >= 2 && ThisVersion <= 3) {
-    size_t RequiredSize = (ThisVersion == 2) ? V2HeaderSize : V3HeaderSize;
-    if (Blob.size() < RequiredSize)
-      return createStringError(inconvertibleErrorCode(),
-                               "Compressed bundle header size too small");
-  }
-
-  // Read compression method
-  uint16_t CompressionMethod;
-  memcpy(&CompressionMethod, Blob.data() + CurrentOffset, sizeof(uint16_t));
-  CurrentOffset += MethodFieldSize;
-
-  // Read total file size (version 2+)
-  uint64_t TotalFileSize = 0;
-  if (ThisVersion >= 2) {
-    if (ThisVersion == 2) {
-      uint32_t TotalFileSize32;
-      memcpy(&TotalFileSize32, Blob.data() + CurrentOffset, sizeof(uint32_t));
-      TotalFileSize = TotalFileSize32;
-      CurrentOffset += FileSizeFieldSizeV2;
-    } else { // Version 3
-      memcpy(&TotalFileSize, Blob.data() + CurrentOffset, sizeof(uint64_t));
-      CurrentOffset += FileSizeFieldSizeV3;
-    }
-  }
+  Expected<CompressedBundleHeader> HeaderOrErr =
+      CompressedBundleHeader::tryParse(Blob);
+  if (!HeaderOrErr)
+    return HeaderOrErr.takeError();
 
-  // Read uncompressed size
-  uint64_t UncompressedSize = 0;
-  if (ThisVersion <= 2) {
-    uint32_t UncompressedSize32;
-    memcpy(&UncompressedSize32, Blob.data() + CurrentOffset, sizeof(uint32_t));
-    UncompressedSize = UncompressedSize32;
-    CurrentOffset += UncompressedSizeFieldSizeV2;
-  } else { // Version 3
-    memcpy(&UncompressedSize, Blob.data() + CurrentOffset, sizeof(uint64_t));
-    CurrentOffset += UncompressedSizeFieldSizeV3;
-  }
+  const CompressedBundleHeader &Normalized = *HeaderOrErr;
+  unsigned ThisVersion = Normalized.Version;
+  size_t HeaderSize = getHeaderSize(ThisVersion);
 
-  // Read hash
-  uint64_t StoredHash;
-  memcpy(&StoredHash, Blob.data() + CurrentOffset, sizeof(uint64_t));
-  CurrentOffset += HashFieldSize;
+  llvm::compression::Format CompressionFormat = Normalized.CompressionFormat;
 
-  // Determine compression format
-  llvm::compression::Format CompressionFormat;
-  if (CompressionMethod ==
-      static_cast<uint16_t>(llvm::compression::Format::Zlib))
-    CompressionFormat = llvm::compression::Format::Zlib;
-  else if (CompressionMethod ==
-           static_cast<uint16_t>(llvm::compression::Format::Zstd))
-    CompressionFormat = llvm::compression::Format::Zstd;
-  else
-    return createStringError(inconvertibleErrorCode(),
-                             "Unknown compressing method");
+  size_t TotalFileSize = Normalized.FileSize.value_or(0);
+  size_t UncompressedSize = Normalized.UncompressedFileSize;
+  auto StoredHash = Normalized.Hash;
 
   llvm::Timer DecompressTimer("Decompression Timer", "Decompression time",
                               *ClangOffloadBundlerTimerGroup);
@@ -1206,7 +1262,7 @@ CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input,
     DecompressTimer.startTimer();
 
   SmallVector<uint8_t, 0> DecompressedData;
-  StringRef CompressedData = Blob.substr(CurrentOffset);
+  StringRef CompressedData = Blob.substr(HeaderSize);
   if (llvm::Error DecompressionError = llvm::compression::decompress(
           CompressionFormat, llvm::arrayRefFromStringRef(CompressedData),
           DecompressedData, UncompressedSize))

``````````

</details>


https://github.com/llvm/llvm-project/pull/130284


More information about the cfe-commits mailing list