[clang] 614d855 - [OffloadBundler] Expose function to parse compressed bundle headers (#130284)
via cfe-commits
cfe-commits at lists.llvm.org
Wed Mar 19 01:10:45 PDT 2025
Author: Juan Manuel Martinez CaamaƱo
Date: 2025-03-19T09:10:40+01:00
New Revision: 614d8557dcd41aae31bd50e93fed3487f235ef11
URL: https://github.com/llvm/llvm-project/commit/614d8557dcd41aae31bd50e93fed3487f235ef11
DIFF: https://github.com/llvm/llvm-project/commit/614d8557dcd41aae31bd50e93fed3487f235ef11.diff
LOG: [OffloadBundler] Expose function to parse compressed bundle headers (#130284)
In COMGR we hash the header of compressed bundles. For this we take the
first bytes of the buffer (according to the maximum header size) and
hash them.
To have a more stable API, and to be able to pick only the hash field (which is
the only one we are actually interested in) of the header, we propose
a version independent header version that is common to all versions.
Added:
Modified:
clang/include/clang/Driver/OffloadBundler.h
clang/lib/Driver/OffloadBundler.cpp
Removed:
################################################################################
diff --git a/clang/include/clang/Driver/OffloadBundler.h b/clang/include/clang/Driver/OffloadBundler.h
index cbe507c000601..667156a524b79 100644
--- a/clang/include/clang/Driver/OffloadBundler.h
+++ b/clang/include/clang/Driver/OffloadBundler.h
@@ -107,50 +107,20 @@ struct OffloadTargetInfo {
// - Compressed Data (variable length).
class CompressedOffloadBundle {
private:
- static inline const size_t MagicSize = 4;
- static inline const size_t VersionFieldSize = sizeof(uint16_t);
- static inline const size_t MethodFieldSize = sizeof(uint16_t);
- // Legacy size fields for V1/V2
- static inline const size_t FileSizeFieldSizeV2 = sizeof(uint32_t);
- static inline const size_t UncompressedSizeFieldSizeV2 = sizeof(uint32_t);
- // New size fields for V3
- static inline const size_t FileSizeFieldSizeV3 = sizeof(uint64_t);
- static inline const size_t UncompressedSizeFieldSizeV3 = sizeof(uint64_t);
- static inline const size_t HashFieldSize = sizeof(uint64_t);
-
- // Keep V1 header size for backward compatibility
- static inline const size_t V1HeaderSize =
- MagicSize + VersionFieldSize + MethodFieldSize +
- UncompressedSizeFieldSizeV2 + HashFieldSize;
-
- // Keep V2 header size for backward compatibility
- static inline const size_t V2HeaderSize =
- MagicSize + VersionFieldSize + FileSizeFieldSizeV2 + MethodFieldSize +
- UncompressedSizeFieldSizeV2 + HashFieldSize;
-
- // Add V3 header size with 64-bit fields
- static inline const size_t V3HeaderSize =
- MagicSize + VersionFieldSize + FileSizeFieldSizeV3 + MethodFieldSize +
- UncompressedSizeFieldSizeV3 + HashFieldSize;
-
static inline const llvm::StringRef MagicNumber = "CCOB";
public:
- static inline const uint16_t DefaultVersion = 2;
+ struct CompressedBundleHeader {
+ unsigned Version;
+ llvm::compression::Format CompressionFormat;
+ std::optional<size_t> FileSize;
+ size_t UncompressedFileSize;
+ uint64_t Hash;
- // Helper method to get header size based on version
- static size_t getHeaderSize(uint16_t Version) {
- switch (Version) {
- case 1:
- return V1HeaderSize;
- case 2:
- return V2HeaderSize;
- case 3:
- return V3HeaderSize;
- default:
- llvm_unreachable("Unsupported version");
- }
- }
+ static llvm::Expected<CompressedBundleHeader> tryParse(llvm::StringRef);
+ };
+
+ static inline const uint16_t DefaultVersion = 2;
static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
compress(llvm::compression::Params P, const llvm::MemoryBuffer &Input,
diff --git a/clang/lib/Driver/OffloadBundler.cpp b/clang/lib/Driver/OffloadBundler.cpp
index 6dfb72fb223ce..859e44fb9bdb2 100644
--- a/clang/lib/Driver/OffloadBundler.cpp
+++ b/clang/lib/Driver/OffloadBundler.cpp
@@ -29,6 +29,7 @@
#include "llvm/Object/Binary.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Compression.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/EndianStream.h"
@@ -1127,13 +1128,116 @@ CompressedOffloadBundle::compress(llvm::compression::Params P,
llvm::StringRef(FinalBuffer.data(), FinalBuffer.size()));
}
+// Use packed structs to avoid padding, such that the structs map the serialized
+// format.
+LLVM_PACKED_START
+union RawCompressedBundleHeader {
+ struct CommonFields {
+ uint32_t Magic;
+ uint16_t Version;
+ uint16_t Method;
+ };
+
+ struct V1Header {
+ CommonFields Common;
+ uint32_t UncompressedFileSize;
+ uint64_t Hash;
+ };
+
+ struct V2Header {
+ CommonFields Common;
+ uint32_t FileSize;
+ uint32_t UncompressedFileSize;
+ uint64_t Hash;
+ };
+
+ struct V3Header {
+ CommonFields Common;
+ uint64_t FileSize;
+ uint64_t UncompressedFileSize;
+ uint64_t Hash;
+ };
+
+ CommonFields Common;
+ V1Header V1;
+ V2Header V2;
+ V3Header V3;
+};
+LLVM_PACKED_END
+
+// Helper method to get header size based on version
+static size_t getHeaderSize(uint16_t Version) {
+ switch (Version) {
+ case 1:
+ return sizeof(RawCompressedBundleHeader::V1Header);
+ case 2:
+ return sizeof(RawCompressedBundleHeader::V2Header);
+ case 3:
+ return sizeof(RawCompressedBundleHeader::V3Header);
+ default:
+ llvm_unreachable("Unsupported version");
+ }
+}
+
+Expected<CompressedOffloadBundle::CompressedBundleHeader>
+CompressedOffloadBundle::CompressedBundleHeader::tryParse(StringRef Blob) {
+ assert(Blob.size() >= sizeof(RawCompressedBundleHeader::CommonFields));
+ assert(llvm::identify_magic(Blob) ==
+ llvm::file_magic::offload_bundle_compressed);
+
+ RawCompressedBundleHeader Header;
+ memcpy(&Header, Blob.data(), std::min(Blob.size(), sizeof(Header)));
+
+ CompressedBundleHeader Normalized;
+ Normalized.Version = Header.Common.Version;
+
+ size_t RequiredSize = getHeaderSize(Normalized.Version);
+ if (Blob.size() < RequiredSize)
+ return createStringError(inconvertibleErrorCode(),
+ "Compressed bundle header size too small");
+
+ switch (Normalized.Version) {
+ case 1:
+ Normalized.UncompressedFileSize = Header.V1.UncompressedFileSize;
+ Normalized.Hash = Header.V1.Hash;
+ break;
+ case 2:
+ Normalized.FileSize = Header.V2.FileSize;
+ Normalized.UncompressedFileSize = Header.V2.UncompressedFileSize;
+ Normalized.Hash = Header.V2.Hash;
+ break;
+ case 3:
+ Normalized.FileSize = Header.V3.FileSize;
+ Normalized.UncompressedFileSize = Header.V3.UncompressedFileSize;
+ Normalized.Hash = Header.V3.Hash;
+ break;
+ default:
+ return createStringError(inconvertibleErrorCode(),
+ "Unknown compressed bundle version");
+ }
+
+ // Determine compression format
+ switch (Header.Common.Method) {
+ case static_cast<uint16_t>(compression::Format::Zlib):
+ case static_cast<uint16_t>(compression::Format::Zstd):
+ Normalized.CompressionFormat =
+ static_cast<compression::Format>(Header.Common.Method);
+ break;
+ default:
+ return createStringError(inconvertibleErrorCode(),
+ "Unknown compressing method");
+ }
+
+ return Normalized;
+}
+
llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input,
bool Verbose) {
StringRef Blob = Input.getBuffer();
// Check minimum header size (using V1 as it's the smallest)
- if (Blob.size() < V1HeaderSize)
+ if (Blob.size() < sizeof(RawCompressedBundleHeader::CommonFields))
return llvm::MemoryBuffer::getMemBufferCopy(Blob);
if (llvm::identify_magic(Blob) !=
@@ -1143,68 +1247,20 @@ CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input,
return llvm::MemoryBuffer::getMemBufferCopy(Blob);
}
- size_t CurrentOffset = MagicSize;
-
- // Read version
- uint16_t ThisVersion;
- memcpy(&ThisVersion, Blob.data() + CurrentOffset, sizeof(uint16_t));
- CurrentOffset += VersionFieldSize;
-
- // Verify header size based on version
- if (ThisVersion >= 2 && ThisVersion <= 3) {
- size_t RequiredSize = (ThisVersion == 2) ? V2HeaderSize : V3HeaderSize;
- if (Blob.size() < RequiredSize)
- return createStringError(inconvertibleErrorCode(),
- "Compressed bundle header size too small");
- }
-
- // Read compression method
- uint16_t CompressionMethod;
- memcpy(&CompressionMethod, Blob.data() + CurrentOffset, sizeof(uint16_t));
- CurrentOffset += MethodFieldSize;
-
- // Read total file size (version 2+)
- uint64_t TotalFileSize = 0;
- if (ThisVersion >= 2) {
- if (ThisVersion == 2) {
- uint32_t TotalFileSize32;
- memcpy(&TotalFileSize32, Blob.data() + CurrentOffset, sizeof(uint32_t));
- TotalFileSize = TotalFileSize32;
- CurrentOffset += FileSizeFieldSizeV2;
- } else { // Version 3
- memcpy(&TotalFileSize, Blob.data() + CurrentOffset, sizeof(uint64_t));
- CurrentOffset += FileSizeFieldSizeV3;
- }
- }
+ Expected<CompressedBundleHeader> HeaderOrErr =
+ CompressedBundleHeader::tryParse(Blob);
+ if (!HeaderOrErr)
+ return HeaderOrErr.takeError();
- // Read uncompressed size
- uint64_t UncompressedSize = 0;
- if (ThisVersion <= 2) {
- uint32_t UncompressedSize32;
- memcpy(&UncompressedSize32, Blob.data() + CurrentOffset, sizeof(uint32_t));
- UncompressedSize = UncompressedSize32;
- CurrentOffset += UncompressedSizeFieldSizeV2;
- } else { // Version 3
- memcpy(&UncompressedSize, Blob.data() + CurrentOffset, sizeof(uint64_t));
- CurrentOffset += UncompressedSizeFieldSizeV3;
- }
+ const CompressedBundleHeader &Normalized = *HeaderOrErr;
+ unsigned ThisVersion = Normalized.Version;
+ size_t HeaderSize = getHeaderSize(ThisVersion);
- // Read hash
- uint64_t StoredHash;
- memcpy(&StoredHash, Blob.data() + CurrentOffset, sizeof(uint64_t));
- CurrentOffset += HashFieldSize;
+ llvm::compression::Format CompressionFormat = Normalized.CompressionFormat;
- // Determine compression format
- llvm::compression::Format CompressionFormat;
- if (CompressionMethod ==
- static_cast<uint16_t>(llvm::compression::Format::Zlib))
- CompressionFormat = llvm::compression::Format::Zlib;
- else if (CompressionMethod ==
- static_cast<uint16_t>(llvm::compression::Format::Zstd))
- CompressionFormat = llvm::compression::Format::Zstd;
- else
- return createStringError(inconvertibleErrorCode(),
- "Unknown compressing method");
+ size_t TotalFileSize = Normalized.FileSize.value_or(0);
+ size_t UncompressedSize = Normalized.UncompressedFileSize;
+ auto StoredHash = Normalized.Hash;
llvm::Timer DecompressTimer("Decompression Timer", "Decompression time",
*ClangOffloadBundlerTimerGroup);
@@ -1212,7 +1268,7 @@ CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input,
DecompressTimer.startTimer();
SmallVector<uint8_t, 0> DecompressedData;
- StringRef CompressedData = Blob.substr(CurrentOffset);
+ StringRef CompressedData = Blob.substr(HeaderSize);
if (llvm::Error DecompressionError = llvm::compression::decompress(
CompressionFormat, llvm::arrayRefFromStringRef(CompressedData),
DecompressedData, UncompressedSize))
More information about the cfe-commits
mailing list