[clang] [OffloadBundler] Expose function to parse compressed bundle headers (PR #130284)
Juan Manuel Martinez CaamaƱo via cfe-commits
cfe-commits at lists.llvm.org
Fri Mar 7 05:48:07 PST 2025
https://github.com/jmmartinez created https://github.com/llvm/llvm-project/pull/130284
In COMGR we hash the header of compressed bundles. For this we take the first bytes of the buffer (according to the maximum header size) and hash them. This is not future-proof if we ever introduce new versions of the header.
This patch proposes an API to obtain a version-independent header.
>From 27adc9ef08bf2ac98319885fb461975f37a98bc2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Juan=20Manuel=20Martinez=20Caama=C3=B1o?= <juamarti at amd.com>
Date: Fri, 7 Mar 2025 12:29:31 +0100
Subject: [PATCH] [OffloadBundler] Expose API to parse compressed bundle
headers
In COMGR we hash the header of compressed bundles. For this we take the
first bytes of the buffer (according to the maximum header size) and
hash them.
To have a more stable API, and to be able to pick only the hash field (which is
the only one we are actually interested in) of the header, we propose
a version independant header version that is common to all versions.
---
clang/include/clang/Driver/OffloadBundler.h | 50 ++----
clang/lib/Driver/OffloadBundler.cpp | 178 +++++++++++++-------
2 files changed, 127 insertions(+), 101 deletions(-)
diff --git a/clang/include/clang/Driver/OffloadBundler.h b/clang/include/clang/Driver/OffloadBundler.h
index 31c11e25ecd9f..2b5bf710f80b1 100644
--- a/clang/include/clang/Driver/OffloadBundler.h
+++ b/clang/include/clang/Driver/OffloadBundler.h
@@ -107,50 +107,20 @@ struct OffloadTargetInfo {
// - Compressed Data (variable length).
class CompressedOffloadBundle {
private:
- static inline const size_t MagicSize = 4;
- static inline const size_t VersionFieldSize = sizeof(uint16_t);
- static inline const size_t MethodFieldSize = sizeof(uint16_t);
- // Legacy size fields for V1/V2
- static inline const size_t FileSizeFieldSizeV2 = sizeof(uint32_t);
- static inline const size_t UncompressedSizeFieldSizeV2 = sizeof(uint32_t);
- // New size fields for V3
- static inline const size_t FileSizeFieldSizeV3 = sizeof(uint64_t);
- static inline const size_t UncompressedSizeFieldSizeV3 = sizeof(uint64_t);
- static inline const size_t HashFieldSize = sizeof(uint64_t);
-
- // Keep V1 header size for backward compatibility
- static inline const size_t V1HeaderSize =
- MagicSize + VersionFieldSize + MethodFieldSize +
- UncompressedSizeFieldSizeV2 + HashFieldSize;
-
- // Keep V2 header size for backward compatibility
- static inline const size_t V2HeaderSize =
- MagicSize + VersionFieldSize + FileSizeFieldSizeV2 + MethodFieldSize +
- UncompressedSizeFieldSizeV2 + HashFieldSize;
-
- // Add V3 header size with 64-bit fields
- static inline const size_t V3HeaderSize =
- MagicSize + VersionFieldSize + FileSizeFieldSizeV3 + MethodFieldSize +
- UncompressedSizeFieldSizeV3 + HashFieldSize;
-
static inline const llvm::StringRef MagicNumber = "CCOB";
public:
- static inline const uint16_t DefaultVersion = 2;
+ struct CompressedBundleHeader {
+ unsigned Version;
+ llvm::compression::Format CompressionFormat;
+ std::optional<size_t> FileSize;
+ size_t UncompressedFileSize;
+ uint64_t Hash;
- // Helper method to get header size based on version
- static size_t getHeaderSize(uint16_t Version) {
- switch (Version) {
- case 1:
- return V1HeaderSize;
- case 2:
- return V2HeaderSize;
- case 3:
- return V3HeaderSize;
- default:
- llvm_unreachable("Unsupported version");
- }
- }
+ static llvm::Expected<CompressedBundleHeader> tryParse(llvm::StringRef);
+ };
+
+ static inline const uint16_t DefaultVersion = 2;
static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
compress(llvm::compression::Params P, const llvm::MemoryBuffer &Input,
diff --git a/clang/lib/Driver/OffloadBundler.cpp b/clang/lib/Driver/OffloadBundler.cpp
index 12d763e5c65b6..6a6adc277f032 100644
--- a/clang/lib/Driver/OffloadBundler.cpp
+++ b/clang/lib/Driver/OffloadBundler.cpp
@@ -29,6 +29,7 @@
#include "llvm/Object/Binary.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Compression.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/EndianStream.h"
@@ -1121,13 +1122,116 @@ CompressedOffloadBundle::compress(llvm::compression::Params P,
llvm::StringRef(FinalBuffer.data(), FinalBuffer.size()));
}
+// Use packed structs to avoid padding, such that the structs map the serialized
+// format.
+LLVM_PACKED_START
+union RawCompressedBundleHeader {
+ struct CommonFields {
+ uint32_t Magic;
+ uint16_t Version;
+ uint16_t Method;
+ };
+
+ struct V1Header {
+ CommonFields Common;
+ uint32_t UncompressedFileSize;
+ uint64_t Hash;
+ };
+
+ struct V2Header {
+ CommonFields Common;
+ uint32_t FileSize;
+ uint32_t UncompressedFileSize;
+ uint64_t Hash;
+ };
+
+ struct V3Header {
+ CommonFields Common;
+ uint64_t FileSize;
+ uint64_t UncompressedFileSize;
+ uint64_t Hash;
+ };
+
+ CommonFields Common;
+ V1Header V1;
+ V2Header V2;
+ V3Header V3;
+};
+LLVM_PACKED_END
+
+// Helper method to get header size based on version
+static size_t getHeaderSize(uint16_t Version) {
+ switch (Version) {
+ case 1:
+ return sizeof(RawCompressedBundleHeader::V1Header);
+ case 2:
+ return sizeof(RawCompressedBundleHeader::V2Header);
+ case 3:
+ return sizeof(RawCompressedBundleHeader::V3Header);
+ default:
+ llvm_unreachable("Unsupported version");
+ }
+}
+
+Expected<CompressedOffloadBundle::CompressedBundleHeader>
+CompressedOffloadBundle::CompressedBundleHeader::tryParse(StringRef Blob) {
+ assert(Blob.size() > sizeof(RawCompressedBundleHeader::CommonFields));
+ assert(llvm::identify_magic(Blob) ==
+ llvm::file_magic::offload_bundle_compressed);
+
+ RawCompressedBundleHeader Header;
+ memcpy(&Header, Blob.data(), std::min(Blob.size(), sizeof(Header)));
+
+ CompressedBundleHeader Normalized;
+ Normalized.Version = Header.Common.Version;
+
+ size_t RequiredSize = getHeaderSize(Normalized.Version);
+ if (Blob.size() < RequiredSize)
+ return createStringError(inconvertibleErrorCode(),
+ "Compressed bundle header size too small");
+
+ switch (Normalized.Version) {
+ case 1:
+ Normalized.UncompressedFileSize = Header.V1.UncompressedFileSize;
+ Normalized.Hash = Header.V1.Hash;
+ break;
+ case 2:
+ Normalized.FileSize = Header.V2.FileSize;
+ Normalized.UncompressedFileSize = Header.V2.UncompressedFileSize;
+ Normalized.Hash = Header.V2.Hash;
+ break;
+ case 3:
+ Normalized.FileSize = Header.V3.FileSize;
+ Normalized.UncompressedFileSize = Header.V3.UncompressedFileSize;
+ Normalized.Hash = Header.V3.Hash;
+ break;
+ default:
+ return createStringError(inconvertibleErrorCode(),
+ "Unknown compressed bundle version");
+ }
+
+ // Determine compression format
+ switch (Header.Common.Method) {
+ case static_cast<uint16_t>(compression::Format::Zlib):
+ case static_cast<uint16_t>(compression::Format::Zstd):
+ Normalized.CompressionFormat =
+ static_cast<compression::Format>(Header.Common.Method);
+ break;
+ default:
+ return createStringError(inconvertibleErrorCode(),
+ "Unknown compressing method");
+ }
+
+ return Normalized;
+}
+
llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input,
bool Verbose) {
StringRef Blob = Input.getBuffer();
// Check minimum header size (using V1 as it's the smallest)
- if (Blob.size() < V1HeaderSize)
+ if (Blob.size() < sizeof(RawCompressedBundleHeader::CommonFields))
return llvm::MemoryBuffer::getMemBufferCopy(Blob);
if (llvm::identify_magic(Blob) !=
@@ -1137,68 +1241,20 @@ CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input,
return llvm::MemoryBuffer::getMemBufferCopy(Blob);
}
- size_t CurrentOffset = MagicSize;
-
- // Read version
- uint16_t ThisVersion;
- memcpy(&ThisVersion, Blob.data() + CurrentOffset, sizeof(uint16_t));
- CurrentOffset += VersionFieldSize;
-
- // Verify header size based on version
- if (ThisVersion >= 2 && ThisVersion <= 3) {
- size_t RequiredSize = (ThisVersion == 2) ? V2HeaderSize : V3HeaderSize;
- if (Blob.size() < RequiredSize)
- return createStringError(inconvertibleErrorCode(),
- "Compressed bundle header size too small");
- }
-
- // Read compression method
- uint16_t CompressionMethod;
- memcpy(&CompressionMethod, Blob.data() + CurrentOffset, sizeof(uint16_t));
- CurrentOffset += MethodFieldSize;
-
- // Read total file size (version 2+)
- uint64_t TotalFileSize = 0;
- if (ThisVersion >= 2) {
- if (ThisVersion == 2) {
- uint32_t TotalFileSize32;
- memcpy(&TotalFileSize32, Blob.data() + CurrentOffset, sizeof(uint32_t));
- TotalFileSize = TotalFileSize32;
- CurrentOffset += FileSizeFieldSizeV2;
- } else { // Version 3
- memcpy(&TotalFileSize, Blob.data() + CurrentOffset, sizeof(uint64_t));
- CurrentOffset += FileSizeFieldSizeV3;
- }
- }
+ Expected<CompressedBundleHeader> HeaderOrErr =
+ CompressedBundleHeader::tryParse(Blob);
+ if (!HeaderOrErr)
+ return HeaderOrErr.takeError();
- // Read uncompressed size
- uint64_t UncompressedSize = 0;
- if (ThisVersion <= 2) {
- uint32_t UncompressedSize32;
- memcpy(&UncompressedSize32, Blob.data() + CurrentOffset, sizeof(uint32_t));
- UncompressedSize = UncompressedSize32;
- CurrentOffset += UncompressedSizeFieldSizeV2;
- } else { // Version 3
- memcpy(&UncompressedSize, Blob.data() + CurrentOffset, sizeof(uint64_t));
- CurrentOffset += UncompressedSizeFieldSizeV3;
- }
+ const CompressedBundleHeader &Normalized = *HeaderOrErr;
+ unsigned ThisVersion = Normalized.Version;
+ size_t HeaderSize = getHeaderSize(ThisVersion);
- // Read hash
- uint64_t StoredHash;
- memcpy(&StoredHash, Blob.data() + CurrentOffset, sizeof(uint64_t));
- CurrentOffset += HashFieldSize;
+ llvm::compression::Format CompressionFormat = Normalized.CompressionFormat;
- // Determine compression format
- llvm::compression::Format CompressionFormat;
- if (CompressionMethod ==
- static_cast<uint16_t>(llvm::compression::Format::Zlib))
- CompressionFormat = llvm::compression::Format::Zlib;
- else if (CompressionMethod ==
- static_cast<uint16_t>(llvm::compression::Format::Zstd))
- CompressionFormat = llvm::compression::Format::Zstd;
- else
- return createStringError(inconvertibleErrorCode(),
- "Unknown compressing method");
+ size_t TotalFileSize = Normalized.FileSize.value_or(0);
+ size_t UncompressedSize = Normalized.UncompressedFileSize;
+ auto StoredHash = Normalized.Hash;
llvm::Timer DecompressTimer("Decompression Timer", "Decompression time",
*ClangOffloadBundlerTimerGroup);
@@ -1206,7 +1262,7 @@ CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input,
DecompressTimer.startTimer();
SmallVector<uint8_t, 0> DecompressedData;
- StringRef CompressedData = Blob.substr(CurrentOffset);
+ StringRef CompressedData = Blob.substr(HeaderSize);
if (llvm::Error DecompressionError = llvm::compression::decompress(
CompressionFormat, llvm::arrayRefFromStringRef(CompressedData),
DecompressedData, UncompressedSize))
More information about the cfe-commits
mailing list