[clang] 78dca4a - [ClangOffloadBundler] Add file size to header (#88827)

via cfe-commits cfe-commits at lists.llvm.org
Fri Apr 19 07:40:20 PDT 2024


Author: Yaxun (Sam) Liu
Date: 2024-04-19T10:40:16-04:00
New Revision: 78dca4af5a9fd77972f80e9f1ff33a00b95f669e

URL: https://github.com/llvm/llvm-project/commit/78dca4af5a9fd77972f80e9f1ff33a00b95f669e
DIFF: https://github.com/llvm/llvm-project/commit/78dca4af5a9fd77972f80e9f1ff33a00b95f669e.diff

LOG: [ClangOffloadBundler] Add file size to header (#88827)

__hipRegisterFatBinary only accepts one pointer argument. It is expected
to get the fat binary size from the header.

This patch adds a file size field to the header of the compressed
bundle.

Added: 
    

Modified: 
    clang/docs/ClangOffloadBundler.rst
    clang/include/clang/Driver/OffloadBundler.h
    clang/lib/Driver/OffloadBundler.cpp
    clang/test/Driver/clang-offload-bundler-zstd.c

Removed: 
    


################################################################################
diff  --git a/clang/docs/ClangOffloadBundler.rst b/clang/docs/ClangOffloadBundler.rst
index 1f8c85a08f8c79..515e6c00a3b800 100644
--- a/clang/docs/ClangOffloadBundler.rst
+++ b/clang/docs/ClangOffloadBundler.rst
@@ -518,11 +518,14 @@ The compressed offload bundle begins with a header followed by the compressed bi
     This is a unique identifier to distinguish compressed offload bundles. The value is the string 'CCOB' (Compressed Clang Offload Bundle).
 
 - **Version Number (16-bit unsigned int)**:
-    This denotes the version of the compressed offload bundle format. The current version is `1`.
+    This denotes the version of the compressed offload bundle format. The current version is `2`.
 
 - **Compression Method (16-bit unsigned int)**:
     This field indicates the compression method used. The value corresponds to either `zlib` or `zstd`, represented as a 16-bit unsigned integer cast from the LLVM compression enumeration.
 
+- **Total File Size (32-bit unsigned int)**:
+    This is the total size (in bytes) of the file, including the header. Available in version 2 and above.
+
 - **Uncompressed Binary Size (32-bit unsigned int)**:
     This is the size (in bytes) of the binary data before it was compressed.
 

diff  --git a/clang/include/clang/Driver/OffloadBundler.h b/clang/include/clang/Driver/OffloadBundler.h
index 65d33bfbd2825f..57ecbdcb7d040e 100644
--- a/clang/include/clang/Driver/OffloadBundler.h
+++ b/clang/include/clang/Driver/OffloadBundler.h
@@ -100,6 +100,7 @@ struct OffloadTargetInfo {
 // - Version (2 bytes)
 // - Compression Method (2 bytes) - Uses the values from
 // llvm::compression::Format.
+// - Total file size (4 bytes). Available in version 2 and above.
 // - Uncompressed Size (4 bytes).
 // - Truncated MD5 Hash (8 bytes).
 // - Compressed Data (variable length).
@@ -109,13 +110,17 @@ class CompressedOffloadBundle {
   static inline const size_t MagicSize = 4;
   static inline const size_t VersionFieldSize = sizeof(uint16_t);
   static inline const size_t MethodFieldSize = sizeof(uint16_t);
-  static inline const size_t SizeFieldSize = sizeof(uint32_t);
-  static inline const size_t HashFieldSize = 8;
-  static inline const size_t HeaderSize = MagicSize + VersionFieldSize +
-                                          MethodFieldSize + SizeFieldSize +
-                                          HashFieldSize;
+  static inline const size_t FileSizeFieldSize = sizeof(uint32_t);
+  static inline const size_t UncompressedSizeFieldSize = sizeof(uint32_t);
+  static inline const size_t HashFieldSize = sizeof(uint64_t);
+  static inline const size_t V1HeaderSize =
+      MagicSize + VersionFieldSize + MethodFieldSize +
+      UncompressedSizeFieldSize + HashFieldSize;
+  static inline const size_t V2HeaderSize =
+      MagicSize + VersionFieldSize + FileSizeFieldSize + MethodFieldSize +
+      UncompressedSizeFieldSize + HashFieldSize;
   static inline const llvm::StringRef MagicNumber = "CCOB";
-  static inline const uint16_t Version = 1;
+  static inline const uint16_t Version = 2;
 
 public:
   static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>

diff  --git a/clang/lib/Driver/OffloadBundler.cpp b/clang/lib/Driver/OffloadBundler.cpp
index 77c89356bc76bb..8cc82a0ee71687 100644
--- a/clang/lib/Driver/OffloadBundler.cpp
+++ b/clang/lib/Driver/OffloadBundler.cpp
@@ -1010,6 +1010,10 @@ CompressedOffloadBundle::compress(llvm::compression::Params P,
 
   uint16_t CompressionMethod = static_cast<uint16_t>(P.format);
   uint32_t UncompressedSize = Input.getBuffer().size();
+  uint32_t TotalFileSize = MagicNumber.size() + sizeof(TotalFileSize) +
+                           sizeof(Version) + sizeof(CompressionMethod) +
+                           sizeof(UncompressedSize) + sizeof(TruncatedHash) +
+                           CompressedBuffer.size();
 
   SmallVector<char, 0> FinalBuffer;
   llvm::raw_svector_ostream OS(FinalBuffer);
@@ -1017,6 +1021,8 @@ CompressedOffloadBundle::compress(llvm::compression::Params P,
   OS.write(reinterpret_cast<const char *>(&Version), sizeof(Version));
   OS.write(reinterpret_cast<const char *>(&CompressionMethod),
            sizeof(CompressionMethod));
+  OS.write(reinterpret_cast<const char *>(&TotalFileSize),
+           sizeof(TotalFileSize));
   OS.write(reinterpret_cast<const char *>(&UncompressedSize),
            sizeof(UncompressedSize));
   OS.write(reinterpret_cast<const char *>(&TruncatedHash),
@@ -1034,6 +1040,8 @@ CompressedOffloadBundle::compress(llvm::compression::Params P,
         (UncompressedSize / (1024.0 * 1024.0)) / CompressionTimeSeconds;
 
     llvm::errs() << "Compressed bundle format version: " << Version << "\n"
+                 << "Total file size (including headers): "
+                 << formatWithCommas(TotalFileSize) << " bytes\n"
                  << "Compression method used: " << MethodUsed << "\n"
                  << "Compression level: " << P.level << "\n"
                  << "Binary size before compression: "
@@ -1059,9 +1067,9 @@ CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input,
 
   StringRef Blob = Input.getBuffer();
 
-  if (Blob.size() < HeaderSize) {
+  if (Blob.size() < V1HeaderSize)
     return llvm::MemoryBuffer::getMemBufferCopy(Blob);
-  }
+
   if (llvm::identify_magic(Blob) !=
       llvm::file_magic::offload_bundle_compressed) {
     if (Verbose)
@@ -1069,21 +1077,32 @@ CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input,
     return llvm::MemoryBuffer::getMemBufferCopy(Blob);
   }
 
+  size_t CurrentOffset = MagicSize;
+
   uint16_t ThisVersion;
+  memcpy(&ThisVersion, Blob.data() + CurrentOffset, sizeof(uint16_t));
+  CurrentOffset += VersionFieldSize;
+
   uint16_t CompressionMethod;
+  memcpy(&CompressionMethod, Blob.data() + CurrentOffset, sizeof(uint16_t));
+  CurrentOffset += MethodFieldSize;
+
+  uint32_t TotalFileSize;
+  if (ThisVersion >= 2) {
+    if (Blob.size() < V2HeaderSize)
+      return createStringError(inconvertibleErrorCode(),
+                               "Compressed bundle header size too small");
+    memcpy(&TotalFileSize, Blob.data() + CurrentOffset, sizeof(uint32_t));
+    CurrentOffset += FileSizeFieldSize;
+  }
+
   uint32_t UncompressedSize;
+  memcpy(&UncompressedSize, Blob.data() + CurrentOffset, sizeof(uint32_t));
+  CurrentOffset += UncompressedSizeFieldSize;
+
   uint64_t StoredHash;
-  memcpy(&ThisVersion, Input.getBuffer().data() + MagicNumber.size(),
-         sizeof(uint16_t));
-  memcpy(&CompressionMethod, Blob.data() + MagicSize + VersionFieldSize,
-         sizeof(uint16_t));
-  memcpy(&UncompressedSize,
-         Blob.data() + MagicSize + VersionFieldSize + MethodFieldSize,
-         sizeof(uint32_t));
-  memcpy(&StoredHash,
-         Blob.data() + MagicSize + VersionFieldSize + MethodFieldSize +
-             SizeFieldSize,
-         sizeof(uint64_t));
+  memcpy(&StoredHash, Blob.data() + CurrentOffset, sizeof(uint64_t));
+  CurrentOffset += HashFieldSize;
 
   llvm::compression::Format CompressionFormat;
   if (CompressionMethod ==
@@ -1102,7 +1121,7 @@ CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input,
     DecompressTimer.startTimer();
 
   SmallVector<uint8_t, 0> DecompressedData;
-  StringRef CompressedData = Blob.substr(HeaderSize);
+  StringRef CompressedData = Blob.substr(CurrentOffset);
   if (llvm::Error DecompressionError = llvm::compression::decompress(
           CompressionFormat, llvm::arrayRefFromStringRef(CompressedData),
           DecompressedData, UncompressedSize))
@@ -1135,8 +1154,11 @@ CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input,
     double DecompressionSpeedMBs =
         (UncompressedSize / (1024.0 * 1024.0)) / DecompressionTimeSeconds;
 
-    llvm::errs() << "Compressed bundle format version: " << ThisVersion << "\n"
-                 << "Decompression method: "
+    llvm::errs() << "Compressed bundle format version: " << ThisVersion << "\n";
+    if (ThisVersion >= 2)
+      llvm::errs() << "Total file size (from header): "
+                   << formatWithCommas(TotalFileSize) << " bytes\n";
+    llvm::errs() << "Decompression method: "
                  << (CompressionFormat == llvm::compression::Format::Zlib
                          ? "zlib"
                          : "zstd")

diff  --git a/clang/test/Driver/clang-offload-bundler-zstd.c b/clang/test/Driver/clang-offload-bundler-zstd.c
index 4485e57309bbbc..a424981c69716f 100644
--- a/clang/test/Driver/clang-offload-bundler-zstd.c
+++ b/clang/test/Driver/clang-offload-bundler-zstd.c
@@ -22,19 +22,22 @@
 // Check compression/decompression of offload bundle.
 //
 // RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
-// RUN:   -input=%t.tgt1 -input=%t.tgt2 -output=%t.hip.bundle.bc -compress -verbose 2>&1 | \
-// RUN:   FileCheck -check-prefix=COMPRESS %s
+// RUN:   -input=%t.tgt1 -input=%t.tgt2 -output=%t.hip.bundle.bc -compress -verbose >%t.1.txt 2>&1
 // RUN: clang-offload-bundler -type=bc -list -input=%t.hip.bundle.bc | FileCheck -check-prefix=NOHOST %s
 // RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
-// RUN:   -output=%t.res.tgt1 -output=%t.res.tgt2 -input=%t.hip.bundle.bc -unbundle -verbose 2>&1 | \
-// RUN:   FileCheck -check-prefix=DECOMPRESS %s
+// RUN:   -output=%t.res.tgt1 -output=%t.res.tgt2 -input=%t.hip.bundle.bc -unbundle -verbose >%t.2.txt 2>&1
+// RUN: cat %t.1.txt %t.2.txt | FileCheck %s
 // RUN: 
diff  %t.tgt1 %t.res.tgt1
 // RUN: 
diff  %t.tgt2 %t.res.tgt2
 //
-// COMPRESS: Compression method used: zstd
-// COMPRESS: Compression level: 3 
-// DECOMPRESS: Decompression method: zstd
-// DECOMPRESS: Hashes match: Yes
+// CHECK: Compressed bundle format version: 2
+// CHECK: Total file size (including headers): [[SIZE:[0-9]*]] bytes
+// CHECK: Compression method used: zstd
+// CHECK: Compression level: 3
+// CHECK: Compressed bundle format version: 2
+// CHECK: Total file size (from header): [[SIZE]] bytes
+// CHECK: Decompression method: zstd
+// CHECK: Hashes match: Yes
 // NOHOST-NOT: host-
 // NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx900
 // NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx906


        


More information about the cfe-commits mailing list