[clang] 7e28234 - Reland "[HIP] Support compressing device binary"

Yaxun Liu via cfe-commits cfe-commits at lists.llvm.org
Thu Oct 5 08:21:10 PDT 2023


Author: Yaxun (Sam) Liu
Date: 2023-10-05T11:20:46-04:00
New Revision: 7e2823438e920d25364ff92b62ad90020c31bb59

URL: https://github.com/llvm/llvm-project/commit/7e2823438e920d25364ff92b62ad90020c31bb59
DIFF: https://github.com/llvm/llvm-project/commit/7e2823438e920d25364ff92b62ad90020c31bb59.diff

LOG: Reland "[HIP] Support compressing device binary"

Original PR: https://github.com/llvm/llvm-project/pull/67162

The commit was reverted due to UB detected by santizer:

https://lab.llvm.org/buildbot/#/builders/238/builds/5955

clang/lib/Driver/OffloadBundler.cpp:1012:25: runtime error:
 load of misaligned address 0xaaaae2d90e7c for type
 'const uint64_t' (aka 'const unsigned long'), which
 requires 8 byte alignment

It was fixed by using memcpy instead of dereferencing int*
casted from unaligned char*.

Added: 
    clang/test/Driver/clang-offload-bundler-zlib.c
    clang/test/Driver/clang-offload-bundler-zstd.c
    clang/test/Driver/hip-offload-compress-zlib.hip
    clang/test/Driver/hip-offload-compress-zstd.hip

Modified: 
    clang/docs/ClangOffloadBundler.rst
    clang/include/clang/Driver/OffloadBundler.h
    clang/include/clang/Driver/Options.td
    clang/lib/Driver/OffloadBundler.cpp
    clang/lib/Driver/ToolChains/Clang.cpp
    clang/lib/Driver/ToolChains/HIPUtility.cpp
    clang/tools/clang-offload-bundler/CMakeLists.txt
    clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp
    llvm/include/llvm/BinaryFormat/Magic.h
    llvm/lib/BinaryFormat/Magic.cpp
    llvm/lib/Object/Binary.cpp
    llvm/lib/Object/ObjectFile.cpp

Removed: 
    


################################################################################
diff  --git a/clang/docs/ClangOffloadBundler.rst b/clang/docs/ClangOffloadBundler.rst
index d08bf4b97781fa4..1e21d3e7264d5c3 100644
--- a/clang/docs/ClangOffloadBundler.rst
+++ b/clang/docs/ClangOffloadBundler.rst
@@ -309,3 +309,30 @@ target by comparing bundle ID's. Two bundle ID's are considered compatible if:
   * Their offload kind are the same
   * Their target triple are the same
   * Their GPUArch are the same
+
+Compression and Decompression
+=============================
+
+``clang-offload-bundler`` provides features to compress and decompress the full
+bundle, leveraging inherent redundancies within the bundle entries. Use the
+`-compress` command-line option to enable this compression capability.
+
+The compressed offload bundle begins with a header followed by the compressed binary data:
+
+- **Magic Number (4 bytes)**:
+    This is a unique identifier to distinguish compressed offload bundles. The value is the string 'CCOB' (Compressed Clang Offload Bundle).
+
+- **Version Number (16-bit unsigned int)**:
+    This denotes the version of the compressed offload bundle format. The current version is `1`.
+
+- **Compression Method (16-bit unsigned int)**:
+    This field indicates the compression method used. The value corresponds to either `zlib` or `zstd`, represented as a 16-bit unsigned integer cast from the LLVM compression enumeration.
+
+- **Uncompressed Binary Size (32-bit unsigned int)**:
+    This is the size (in bytes) of the binary data before it was compressed.
+
+- **Hash (64-bit unsigned int)**:
+    This is a 64-bit truncated MD5 hash of the uncompressed binary data. It serves for verification and caching purposes.
+
+- **Compressed Data**:
+    The actual compressed binary data follows the header. Its size can be inferred from the total size of the file minus the header size.

diff  --git a/clang/include/clang/Driver/OffloadBundler.h b/clang/include/clang/Driver/OffloadBundler.h
index 28473c53662de2c..17df31d31071d99 100644
--- a/clang/include/clang/Driver/OffloadBundler.h
+++ b/clang/include/clang/Driver/OffloadBundler.h
@@ -19,6 +19,7 @@
 
 #include "llvm/Support/Error.h"
 #include "llvm/TargetParser/Triple.h"
+#include <llvm/Support/MemoryBuffer.h>
 #include <string>
 #include <vector>
 
@@ -26,11 +27,15 @@ namespace clang {
 
 class OffloadBundlerConfig {
 public:
+  OffloadBundlerConfig();
+
   bool AllowNoHost = false;
   bool AllowMissingBundles = false;
   bool CheckInputArchive = false;
   bool PrintExternalCommands = false;
   bool HipOpenmpCompatible = false;
+  bool Compress = false;
+  bool Verbose = false;
 
   unsigned BundleAlignment = 1;
   unsigned HostInputIndex = ~0u;
@@ -84,6 +89,38 @@ struct OffloadTargetInfo {
   std::string str() const;
 };
 
+// CompressedOffloadBundle represents the format for the compressed offload
+// bundles.
+//
+// The format is as follows:
+// - Magic Number (4 bytes) - A constant "CCOB".
+// - Version (2 bytes)
+// - Compression Method (2 bytes) - Uses the values from
+// llvm::compression::Format.
+// - Uncompressed Size (4 bytes).
+// - Truncated MD5 Hash (8 bytes).
+// - Compressed Data (variable length).
+
+class CompressedOffloadBundle {
+private:
+  static inline const size_t MagicSize = 4;
+  static inline const size_t VersionFieldSize = sizeof(uint16_t);
+  static inline const size_t MethodFieldSize = sizeof(uint16_t);
+  static inline const size_t SizeFieldSize = sizeof(uint32_t);
+  static inline const size_t HashFieldSize = 8;
+  static inline const size_t HeaderSize = MagicSize + VersionFieldSize +
+                                          MethodFieldSize + SizeFieldSize +
+                                          HashFieldSize;
+  static inline const llvm::StringRef MagicNumber = "CCOB";
+  static inline const uint16_t Version = 1;
+
+public:
+  static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
+  compress(const llvm::MemoryBuffer &Input, bool Verbose = false);
+  static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
+  decompress(const llvm::MemoryBuffer &Input, bool Verbose = false);
+};
+
 } // namespace clang
 
 #endif // LLVM_CLANG_DRIVER_OFFLOADBUNDLER_H

diff  --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index e7d8fa1c587d4e0..381126be176ac4f 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -1183,6 +1183,10 @@ def fgpu_inline_threshold_EQ : Joined<["-"], "fgpu-inline-threshold=">,
 def fgpu_sanitize : Flag<["-"], "fgpu-sanitize">, Group<f_Group>,
   HelpText<"Enable sanitizer for supported offloading devices">;
 def fno_gpu_sanitize : Flag<["-"], "fno-gpu-sanitize">, Group<f_Group>;
+
+def offload_compress : Flag<["--"], "offload-compress">,
+  HelpText<"Compress offload device binaries (HIP only)">;
+def no_offload_compress : Flag<["--"], "no-offload-compress">;
 }
 
 // CUDA options

diff  --git a/clang/lib/Driver/OffloadBundler.cpp b/clang/lib/Driver/OffloadBundler.cpp
index d11c41605bf39ee..0ad9de299603ba0 100644
--- a/clang/lib/Driver/OffloadBundler.cpp
+++ b/clang/lib/Driver/OffloadBundler.cpp
@@ -21,24 +21,29 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/Magic.h"
 #include "llvm/Object/Archive.h"
 #include "llvm/Object/ArchiveWriter.h"
 #include "llvm/Object/Binary.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/Casting.h"
+#include "llvm/Support/Compression.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/EndianStream.h"
 #include "llvm/Support/Errc.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/ErrorOr.h"
 #include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MD5.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Program.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/StringSaver.h"
+#include "llvm/Support/Timer.h"
 #include "llvm/Support/WithColor.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/TargetParser/Host.h"
@@ -48,6 +53,7 @@
 #include <cstddef>
 #include <cstdint>
 #include <forward_list>
+#include <llvm/Support/Process.h>
 #include <memory>
 #include <set>
 #include <string>
@@ -58,6 +64,10 @@ using namespace llvm;
 using namespace llvm::object;
 using namespace clang;
 
+static llvm::TimerGroup
+    ClangOffloadBundlerTimerGroup("Clang Offload Bundler Timer Group",
+                                  "Timer group for clang offload bundler");
+
 /// Magic string that marks the existence of offloading data.
 #define OFFLOAD_BUNDLER_MAGIC_STR "__CLANG_OFFLOAD_BUNDLE__"
 
@@ -224,20 +234,22 @@ class FileHandler {
 
   /// Write the header of the bundled file to \a OS based on the information
   /// gathered from \a Inputs.
-  virtual Error WriteHeader(raw_fd_ostream &OS,
+  virtual Error WriteHeader(raw_ostream &OS,
                             ArrayRef<std::unique_ptr<MemoryBuffer>> Inputs) = 0;
 
   /// Write the marker that initiates a bundle for the triple \a TargetTriple to
   /// \a OS.
-  virtual Error WriteBundleStart(raw_fd_ostream &OS,
-                                 StringRef TargetTriple) = 0;
+  virtual Error WriteBundleStart(raw_ostream &OS, StringRef TargetTriple) = 0;
 
   /// Write the marker that closes a bundle for the triple \a TargetTriple to \a
   /// OS.
-  virtual Error WriteBundleEnd(raw_fd_ostream &OS, StringRef TargetTriple) = 0;
+  virtual Error WriteBundleEnd(raw_ostream &OS, StringRef TargetTriple) = 0;
 
   /// Write the bundle from \a Input into \a OS.
-  virtual Error WriteBundle(raw_fd_ostream &OS, MemoryBuffer &Input) = 0;
+  virtual Error WriteBundle(raw_ostream &OS, MemoryBuffer &Input) = 0;
+
+  /// Finalize output file.
+  virtual Error finalizeOutputFile() { return Error::success(); }
 
   /// List bundle IDs in \a Input.
   virtual Error listBundleIDs(MemoryBuffer &Input) {
@@ -311,7 +323,7 @@ static uint64_t Read8byteIntegerFromBuffer(StringRef Buffer, size_t pos) {
 }
 
 /// Write 8-byte integers to a buffer in little-endian format.
-static void Write8byteIntegerToBuffer(raw_fd_ostream &OS, uint64_t Val) {
+static void Write8byteIntegerToBuffer(raw_ostream &OS, uint64_t Val) {
   llvm::support::endian::write(OS, Val, llvm::support::little);
 }
 
@@ -359,8 +371,7 @@ class BinaryFileHandler final : public FileHandler {
       return Error::success();
 
     // Check if no magic was found.
-    StringRef Magic(FC.data(), sizeof(OFFLOAD_BUNDLER_MAGIC_STR) - 1);
-    if (!Magic.equals(OFFLOAD_BUNDLER_MAGIC_STR))
+    if (llvm::identify_magic(FC) != llvm::file_magic::offload_bundle)
       return Error::success();
 
     // Read number of bundles.
@@ -435,7 +446,7 @@ class BinaryFileHandler final : public FileHandler {
     return Error::success();
   }
 
-  Error WriteHeader(raw_fd_ostream &OS,
+  Error WriteHeader(raw_ostream &OS,
                     ArrayRef<std::unique_ptr<MemoryBuffer>> Inputs) final {
 
     // Compute size of the header.
@@ -472,19 +483,27 @@ class BinaryFileHandler final : public FileHandler {
     return Error::success();
   }
 
-  Error WriteBundleStart(raw_fd_ostream &OS, StringRef TargetTriple) final {
+  Error WriteBundleStart(raw_ostream &OS, StringRef TargetTriple) final {
     CurWriteBundleTarget = TargetTriple.str();
     return Error::success();
   }
 
-  Error WriteBundleEnd(raw_fd_ostream &OS, StringRef TargetTriple) final {
+  Error WriteBundleEnd(raw_ostream &OS, StringRef TargetTriple) final {
     return Error::success();
   }
 
-  Error WriteBundle(raw_fd_ostream &OS, MemoryBuffer &Input) final {
+  Error WriteBundle(raw_ostream &OS, MemoryBuffer &Input) final {
     auto BI = BundlesInfo[CurWriteBundleTarget];
-    OS.seek(BI.Offset);
+
+    // Pad with 0 to reach specified offset.
+    size_t CurrentPos = OS.tell();
+    size_t PaddingSize = BI.Offset > CurrentPos ? BI.Offset - CurrentPos : 0;
+    for (size_t I = 0; I < PaddingSize; ++I)
+      OS.write('\0');
+    assert(OS.tell() == BI.Offset);
+
     OS.write(Input.getBufferStart(), Input.getBufferSize());
+
     return Error::success();
   }
 };
@@ -541,7 +560,7 @@ class ObjectFileHandler final : public FileHandler {
       return NameOrErr.takeError();
 
     // If it does not start with the reserved suffix, just skip this section.
-    if (!NameOrErr->startswith(OFFLOAD_BUNDLER_MAGIC_STR))
+    if (llvm::identify_magic(*NameOrErr) != llvm::file_magic::offload_bundle)
       return std::nullopt;
 
     // Return the triple that is right after the reserved prefix.
@@ -607,7 +626,7 @@ class ObjectFileHandler final : public FileHandler {
     return Error::success();
   }
 
-  Error WriteHeader(raw_fd_ostream &OS,
+  Error WriteHeader(raw_ostream &OS,
                     ArrayRef<std::unique_ptr<MemoryBuffer>> Inputs) final {
     assert(BundlerConfig.HostInputIndex != ~0u &&
            "Host input index not defined.");
@@ -617,12 +636,16 @@ class ObjectFileHandler final : public FileHandler {
     return Error::success();
   }
 
-  Error WriteBundleStart(raw_fd_ostream &OS, StringRef TargetTriple) final {
+  Error WriteBundleStart(raw_ostream &OS, StringRef TargetTriple) final {
     ++NumberOfProcessedInputs;
     return Error::success();
   }
 
-  Error WriteBundleEnd(raw_fd_ostream &OS, StringRef TargetTriple) final {
+  Error WriteBundleEnd(raw_ostream &OS, StringRef TargetTriple) final {
+    return Error::success();
+  }
+
+  Error finalizeOutputFile() final {
     assert(NumberOfProcessedInputs <= NumberOfInputs &&
            "Processing more inputs that actually exist!");
     assert(BundlerConfig.HostInputIndex != ~0u &&
@@ -640,10 +663,6 @@ class ObjectFileHandler final : public FileHandler {
     assert(BundlerConfig.ObjcopyPath != "" &&
            "llvm-objcopy path not specified");
 
-    // We write to the output file directly. So, we close it and use the name
-    // to pass down to llvm-objcopy.
-    OS.close();
-
     // Temporary files that need to be removed.
     TempFileHandlerRAII TempFiles;
 
@@ -684,7 +703,7 @@ class ObjectFileHandler final : public FileHandler {
     return Error::success();
   }
 
-  Error WriteBundle(raw_fd_ostream &OS, MemoryBuffer &Input) final {
+  Error WriteBundle(raw_ostream &OS, MemoryBuffer &Input) final {
     return Error::success();
   }
 
@@ -781,22 +800,22 @@ class TextFileHandler final : public FileHandler {
     return Error::success();
   }
 
-  Error WriteHeader(raw_fd_ostream &OS,
+  Error WriteHeader(raw_ostream &OS,
                     ArrayRef<std::unique_ptr<MemoryBuffer>> Inputs) final {
     return Error::success();
   }
 
-  Error WriteBundleStart(raw_fd_ostream &OS, StringRef TargetTriple) final {
+  Error WriteBundleStart(raw_ostream &OS, StringRef TargetTriple) final {
     OS << BundleStartString << TargetTriple << "\n";
     return Error::success();
   }
 
-  Error WriteBundleEnd(raw_fd_ostream &OS, StringRef TargetTriple) final {
+  Error WriteBundleEnd(raw_ostream &OS, StringRef TargetTriple) final {
     OS << BundleEndString << TargetTriple << "\n";
     return Error::success();
   }
 
-  Error WriteBundle(raw_fd_ostream &OS, MemoryBuffer &Input) final {
+  Error WriteBundle(raw_ostream &OS, MemoryBuffer &Input) final {
     OS << Input.getBuffer();
     return Error::success();
   }
@@ -881,6 +900,187 @@ CreateFileHandler(MemoryBuffer &FirstInput,
                            "'" + FilesType + "': invalid file type specified");
 }
 
+OffloadBundlerConfig::OffloadBundlerConfig() {
+  auto IgnoreEnvVarOpt =
+      llvm::sys::Process::GetEnv("OFFLOAD_BUNDLER_IGNORE_ENV_VAR");
+  if (IgnoreEnvVarOpt.has_value() && IgnoreEnvVarOpt.value() == "1")
+    return;
+
+  auto VerboseEnvVarOpt = llvm::sys::Process::GetEnv("OFFLOAD_BUNDLER_VERBOSE");
+  if (VerboseEnvVarOpt.has_value())
+    Verbose = VerboseEnvVarOpt.value() == "1";
+
+  auto CompressEnvVarOpt =
+      llvm::sys::Process::GetEnv("OFFLOAD_BUNDLER_COMPRESS");
+  if (CompressEnvVarOpt.has_value())
+    Compress = CompressEnvVarOpt.value() == "1";
+}
+
+llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
+CompressedOffloadBundle::compress(const llvm::MemoryBuffer &Input,
+                                  bool Verbose) {
+  llvm::Timer HashTimer("Hash Calculation Timer", "Hash calculation time",
+                        ClangOffloadBundlerTimerGroup);
+  if (Verbose)
+    HashTimer.startTimer();
+  llvm::MD5 Hash;
+  llvm::MD5::MD5Result Result;
+  Hash.update(Input.getBuffer());
+  Hash.final(Result);
+  uint64_t TruncatedHash = Result.low();
+  if (Verbose)
+    HashTimer.stopTimer();
+
+  SmallVector<uint8_t, 0> CompressedBuffer;
+  auto BufferUint8 = llvm::ArrayRef<uint8_t>(
+      reinterpret_cast<const uint8_t *>(Input.getBuffer().data()),
+      Input.getBuffer().size());
+
+  llvm::compression::Format CompressionFormat;
+
+  if (llvm::compression::zstd::isAvailable())
+    CompressionFormat = llvm::compression::Format::Zstd;
+  else if (llvm::compression::zlib::isAvailable())
+    CompressionFormat = llvm::compression::Format::Zlib;
+  else
+    return createStringError(llvm::inconvertibleErrorCode(),
+                             "Compression not supported");
+
+  llvm::Timer CompressTimer("Compression Timer", "Compression time",
+                            ClangOffloadBundlerTimerGroup);
+  if (Verbose)
+    CompressTimer.startTimer();
+  llvm::compression::compress(CompressionFormat, BufferUint8, CompressedBuffer);
+  if (Verbose)
+    CompressTimer.stopTimer();
+
+  uint16_t CompressionMethod = static_cast<uint16_t>(CompressionFormat);
+  uint32_t UncompressedSize = Input.getBuffer().size();
+
+  SmallVector<char, 0> FinalBuffer;
+  llvm::raw_svector_ostream OS(FinalBuffer);
+  OS << MagicNumber;
+  OS.write(reinterpret_cast<const char *>(&Version), sizeof(Version));
+  OS.write(reinterpret_cast<const char *>(&CompressionMethod),
+           sizeof(CompressionMethod));
+  OS.write(reinterpret_cast<const char *>(&UncompressedSize),
+           sizeof(UncompressedSize));
+  OS.write(reinterpret_cast<const char *>(&TruncatedHash),
+           sizeof(TruncatedHash));
+  OS.write(reinterpret_cast<const char *>(CompressedBuffer.data()),
+           CompressedBuffer.size());
+
+  if (Verbose) {
+    auto MethodUsed =
+        CompressionFormat == llvm::compression::Format::Zstd ? "zstd" : "zlib";
+    llvm::errs() << "Compressed bundle format version: " << Version << "\n"
+                 << "Compression method used: " << MethodUsed << "\n"
+                 << "Binary size before compression: " << UncompressedSize
+                 << " bytes\n"
+                 << "Binary size after compression: " << CompressedBuffer.size()
+                 << " bytes\n"
+                 << "Truncated MD5 hash: "
+                 << llvm::format_hex(TruncatedHash, 16) << "\n";
+  }
+
+  return llvm::MemoryBuffer::getMemBufferCopy(
+      llvm::StringRef(FinalBuffer.data(), FinalBuffer.size()));
+}
+
+llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
+CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input,
+                                    bool Verbose) {
+
+  StringRef Blob = Input.getBuffer();
+
+  if (Blob.size() < HeaderSize) {
+    return llvm::MemoryBuffer::getMemBufferCopy(Blob);
+  }
+  if (llvm::identify_magic(Blob) !=
+      llvm::file_magic::offload_bundle_compressed) {
+    if (Verbose)
+      llvm::errs() << "Uncompressed bundle.\n";
+    return llvm::MemoryBuffer::getMemBufferCopy(Blob);
+  }
+
+  uint16_t ThisVersion;
+  uint16_t CompressionMethod;
+  uint32_t UncompressedSize;
+  uint64_t StoredHash;
+  memcpy(&ThisVersion, Input.getBuffer().data() + MagicNumber.size(),
+         sizeof(uint16_t));
+  memcpy(&CompressionMethod, Blob.data() + MagicSize + VersionFieldSize,
+         sizeof(uint16_t));
+  memcpy(&UncompressedSize,
+         Blob.data() + MagicSize + VersionFieldSize + MethodFieldSize,
+         sizeof(uint32_t));
+  memcpy(&StoredHash,
+         Blob.data() + MagicSize + VersionFieldSize + MethodFieldSize +
+             SizeFieldSize,
+         sizeof(uint64_t));
+
+  llvm::compression::Format CompressionFormat;
+  if (CompressionMethod ==
+      static_cast<uint16_t>(llvm::compression::Format::Zlib))
+    CompressionFormat = llvm::compression::Format::Zlib;
+  else if (CompressionMethod ==
+           static_cast<uint16_t>(llvm::compression::Format::Zstd))
+    CompressionFormat = llvm::compression::Format::Zstd;
+  else
+    return createStringError(inconvertibleErrorCode(),
+                             "Unknown compressing method");
+
+  llvm::Timer DecompressTimer("Decompression Timer", "Decompression time",
+                              ClangOffloadBundlerTimerGroup);
+  if (Verbose)
+    DecompressTimer.startTimer();
+
+  SmallVector<uint8_t, 0> DecompressedData;
+  StringRef CompressedData = Blob.substr(HeaderSize);
+  if (llvm::Error DecompressionError = llvm::compression::decompress(
+          CompressionFormat, llvm::arrayRefFromStringRef(CompressedData),
+          DecompressedData, UncompressedSize))
+    return createStringError(inconvertibleErrorCode(),
+                             "Could not decompress embedded file contents: " +
+                                 llvm::toString(std::move(DecompressionError)));
+
+  if (Verbose) {
+    DecompressTimer.stopTimer();
+
+    // Recalculate MD5 hash
+    llvm::Timer HashRecalcTimer("Hash Recalculation Timer",
+                                "Hash recalculation time",
+                                ClangOffloadBundlerTimerGroup);
+    HashRecalcTimer.startTimer();
+    llvm::MD5 Hash;
+    llvm::MD5::MD5Result Result;
+    Hash.update(llvm::ArrayRef<uint8_t>(DecompressedData.data(),
+                                        DecompressedData.size()));
+    Hash.final(Result);
+    uint64_t RecalculatedHash = Result.low();
+    HashRecalcTimer.stopTimer();
+    bool HashMatch = (StoredHash == RecalculatedHash);
+
+    llvm::errs() << "Compressed bundle format version: " << ThisVersion << "\n"
+                 << "Decompression method: "
+                 << (CompressionFormat == llvm::compression::Format::Zlib
+                         ? "zlib"
+                         : "zstd")
+                 << "\n"
+                 << "Size before decompression: " << CompressedData.size()
+                 << " bytes\n"
+                 << "Size after decompression: " << UncompressedSize
+                 << " bytes\n"
+                 << "Stored hash: " << llvm::format_hex(StoredHash, 16) << "\n"
+                 << "Recalculated hash: "
+                 << llvm::format_hex(RecalculatedHash, 16) << "\n"
+                 << "Hashes match: " << (HashMatch ? "Yes" : "No") << "\n";
+  }
+
+  return llvm::MemoryBuffer::getMemBufferCopy(
+      llvm::toStringRef(DecompressedData));
+}
+
 // List bundle IDs. Return true if an error was found.
 Error OffloadBundler::ListBundleIDsInFile(
     StringRef InputFileName, const OffloadBundlerConfig &BundlerConfig) {
@@ -890,28 +1090,35 @@ Error OffloadBundler::ListBundleIDsInFile(
   if (std::error_code EC = CodeOrErr.getError())
     return createFileError(InputFileName, EC);
 
-  MemoryBuffer &Input = **CodeOrErr;
+  // Decompress the input if necessary.
+  Expected<std::unique_ptr<MemoryBuffer>> DecompressedBufferOrErr =
+      CompressedOffloadBundle::decompress(**CodeOrErr, BundlerConfig.Verbose);
+  if (!DecompressedBufferOrErr)
+    return createStringError(
+        inconvertibleErrorCode(),
+        "Failed to decompress input: " +
+            llvm::toString(DecompressedBufferOrErr.takeError()));
+
+  MemoryBuffer &DecompressedInput = **DecompressedBufferOrErr;
 
   // Select the right files handler.
   Expected<std::unique_ptr<FileHandler>> FileHandlerOrErr =
-      CreateFileHandler(Input, BundlerConfig);
+      CreateFileHandler(DecompressedInput, BundlerConfig);
   if (!FileHandlerOrErr)
     return FileHandlerOrErr.takeError();
 
   std::unique_ptr<FileHandler> &FH = *FileHandlerOrErr;
   assert(FH);
-  return FH->listBundleIDs(Input);
+  return FH->listBundleIDs(DecompressedInput);
 }
 
 /// Bundle the files. Return true if an error was found.
 Error OffloadBundler::BundleFiles() {
   std::error_code EC;
 
-  // Create output file.
-  raw_fd_ostream OutputFile(BundlerConfig.OutputFileNames.front(), EC,
-                            sys::fs::OF_None);
-  if (EC)
-    return createFileError(BundlerConfig.OutputFileNames.front(), EC);
+  // Create a buffer to hold the content before compressing.
+  SmallVector<char, 0> Buffer;
+  llvm::raw_svector_ostream BufferStream(Buffer);
 
   // Open input files.
   SmallVector<std::unique_ptr<MemoryBuffer>, 8u> InputBuffers;
@@ -938,22 +1145,46 @@ Error OffloadBundler::BundleFiles() {
   assert(FH);
 
   // Write header.
-  if (Error Err = FH->WriteHeader(OutputFile, InputBuffers))
+  if (Error Err = FH->WriteHeader(BufferStream, InputBuffers))
     return Err;
 
   // Write all bundles along with the start/end markers. If an error was found
   // writing the end of the bundle component, abort the bundle writing.
   auto Input = InputBuffers.begin();
   for (auto &Triple : BundlerConfig.TargetNames) {
-    if (Error Err = FH->WriteBundleStart(OutputFile, Triple))
+    if (Error Err = FH->WriteBundleStart(BufferStream, Triple))
       return Err;
-    if (Error Err = FH->WriteBundle(OutputFile, **Input))
+    if (Error Err = FH->WriteBundle(BufferStream, **Input))
       return Err;
-    if (Error Err = FH->WriteBundleEnd(OutputFile, Triple))
+    if (Error Err = FH->WriteBundleEnd(BufferStream, Triple))
       return Err;
     ++Input;
   }
-  return Error::success();
+
+  raw_fd_ostream OutputFile(BundlerConfig.OutputFileNames.front(), EC,
+                            sys::fs::OF_None);
+  if (EC)
+    return createFileError(BundlerConfig.OutputFileNames.front(), EC);
+
+  SmallVector<char, 0> CompressedBuffer;
+  if (BundlerConfig.Compress) {
+    std::unique_ptr<llvm::MemoryBuffer> BufferMemory =
+        llvm::MemoryBuffer::getMemBufferCopy(
+            llvm::StringRef(Buffer.data(), Buffer.size()));
+    auto CompressionResult =
+        CompressedOffloadBundle::compress(*BufferMemory, BundlerConfig.Verbose);
+    if (auto Error = CompressionResult.takeError())
+      return Error;
+
+    auto CompressedMemBuffer = std::move(CompressionResult.get());
+    CompressedBuffer.assign(CompressedMemBuffer->getBufferStart(),
+                            CompressedMemBuffer->getBufferEnd());
+  } else
+    CompressedBuffer = Buffer;
+
+  OutputFile.write(CompressedBuffer.data(), CompressedBuffer.size());
+
+  return FH->finalizeOutputFile();
 }
 
 // Unbundle the files. Return true if an error was found.
@@ -964,7 +1195,16 @@ Error OffloadBundler::UnbundleFiles() {
   if (std::error_code EC = CodeOrErr.getError())
     return createFileError(BundlerConfig.InputFileNames.front(), EC);
 
-  MemoryBuffer &Input = **CodeOrErr;
+  // Decompress the input if necessary.
+  Expected<std::unique_ptr<MemoryBuffer>> DecompressedBufferOrErr =
+      CompressedOffloadBundle::decompress(**CodeOrErr, BundlerConfig.Verbose);
+  if (!DecompressedBufferOrErr)
+    return createStringError(
+        inconvertibleErrorCode(),
+        "Failed to decompress input: " +
+            llvm::toString(DecompressedBufferOrErr.takeError()));
+
+  MemoryBuffer &Input = **DecompressedBufferOrErr;
 
   // Select the right files handler.
   Expected<std::unique_ptr<FileHandler>> FileHandlerOrErr =
@@ -1169,11 +1409,23 @@ Error OffloadBundler::UnbundleArchive() {
     if (!CodeObjectBufferRefOrErr)
       return CodeObjectBufferRefOrErr.takeError();
 
-    auto CodeObjectBuffer =
+    auto TempCodeObjectBuffer =
         MemoryBuffer::getMemBuffer(*CodeObjectBufferRefOrErr, false);
 
+    // Decompress the buffer if necessary.
+    Expected<std::unique_ptr<MemoryBuffer>> DecompressedBufferOrErr =
+        CompressedOffloadBundle::decompress(*TempCodeObjectBuffer,
+                                            BundlerConfig.Verbose);
+    if (!DecompressedBufferOrErr)
+      return createStringError(
+          inconvertibleErrorCode(),
+          "Failed to decompress code object: " +
+              llvm::toString(DecompressedBufferOrErr.takeError()));
+
+    MemoryBuffer &CodeObjectBuffer = **DecompressedBufferOrErr;
+
     Expected<std::unique_ptr<FileHandler>> FileHandlerOrErr =
-        CreateFileHandler(*CodeObjectBuffer, BundlerConfig);
+        CreateFileHandler(CodeObjectBuffer, BundlerConfig);
     if (!FileHandlerOrErr)
       return FileHandlerOrErr.takeError();
 
@@ -1181,11 +1433,11 @@ Error OffloadBundler::UnbundleArchive() {
     assert(FileHandler &&
            "FileHandle creation failed for file in the archive!");
 
-    if (Error ReadErr = FileHandler->ReadHeader(*CodeObjectBuffer))
+    if (Error ReadErr = FileHandler->ReadHeader(CodeObjectBuffer))
       return ReadErr;
 
     Expected<std::optional<StringRef>> CurBundleIDOrErr =
-        FileHandler->ReadBundleStart(*CodeObjectBuffer);
+        FileHandler->ReadBundleStart(CodeObjectBuffer);
     if (!CurBundleIDOrErr)
       return CurBundleIDOrErr.takeError();
 
@@ -1206,7 +1458,7 @@ Error OffloadBundler::UnbundleArchive() {
                                              BundlerConfig)) {
         std::string BundleData;
         raw_string_ostream DataStream(BundleData);
-        if (Error Err = FileHandler->ReadBundle(DataStream, *CodeObjectBuffer))
+        if (Error Err = FileHandler->ReadBundle(DataStream, CodeObjectBuffer))
           return Err;
 
         for (auto &CompatibleTarget : CompatibleTargets) {
@@ -1244,11 +1496,11 @@ Error OffloadBundler::UnbundleArchive() {
         }
       }
 
-      if (Error Err = FileHandler->ReadBundleEnd(*CodeObjectBuffer))
+      if (Error Err = FileHandler->ReadBundleEnd(CodeObjectBuffer))
         return Err;
 
       Expected<std::optional<StringRef>> NextTripleOrErr =
-          FileHandler->ReadBundleStart(*CodeObjectBuffer);
+          FileHandler->ReadBundleStart(CodeObjectBuffer);
       if (!NextTripleOrErr)
         return NextTripleOrErr.takeError();
 

diff  --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 4af3ed2dd88ea65..c74f6ff447261dc 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -8472,6 +8472,11 @@ void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA,
     }
     CmdArgs.push_back(TCArgs.MakeArgString(UB));
   }
+  if (TCArgs.hasFlag(options::OPT_offload_compress,
+                     options::OPT_no_offload_compress, false))
+    CmdArgs.push_back("-compress");
+  if (TCArgs.hasArg(options::OPT_v))
+    CmdArgs.push_back("-verbose");
   // All the inputs are encoded as commands.
   C.addCommand(std::make_unique<Command>(
       JA, *this, ResponseFileSupport::None(),
@@ -8555,6 +8560,8 @@ void OffloadBundler::ConstructJobMultipleOutputs(
   }
   CmdArgs.push_back("-unbundle");
   CmdArgs.push_back("-allow-missing-bundles");
+  if (TCArgs.hasArg(options::OPT_v))
+    CmdArgs.push_back("-verbose");
 
   // All the inputs are encoded as commands.
   C.addCommand(std::make_unique<Command>(

diff  --git a/clang/lib/Driver/ToolChains/HIPUtility.cpp b/clang/lib/Driver/ToolChains/HIPUtility.cpp
index 8b9d8db90ffab78..04efdcba20ea740 100644
--- a/clang/lib/Driver/ToolChains/HIPUtility.cpp
+++ b/clang/lib/Driver/ToolChains/HIPUtility.cpp
@@ -84,6 +84,12 @@ void HIP::constructHIPFatbinCommand(Compilation &C, const JobAction &JA,
       Args.MakeArgString(std::string("-output=").append(Output));
   BundlerArgs.push_back(BundlerOutputArg);
 
+  if (Args.hasFlag(options::OPT_offload_compress,
+                   options::OPT_no_offload_compress, false))
+    BundlerArgs.push_back("-compress");
+  if (Args.hasArg(options::OPT_v))
+    BundlerArgs.push_back("-verbose");
+
   const char *Bundler = Args.MakeArgString(
       T.getToolChain().GetProgramPath("clang-offload-bundler"));
   C.addCommand(std::make_unique<Command>(

diff  --git a/clang/test/Driver/clang-offload-bundler-zlib.c b/clang/test/Driver/clang-offload-bundler-zlib.c
new file mode 100644
index 000000000000000..c46c32a4a0537b4
--- /dev/null
+++ b/clang/test/Driver/clang-offload-bundler-zlib.c
@@ -0,0 +1,75 @@
+// REQUIRES: zlib
+// REQUIRES: x86-registered-target
+// UNSUPPORTED: target={{.*}}-darwin{{.*}}, target={{.*}}-aix{{.*}}
+
+//
+// Generate the host binary to be bundled.
+//
+// RUN: %clang -O0 -target %itanium_abi_triple %s -c -emit-llvm -o %t.bc
+
+//
+// Generate an empty file to help with the checks of empty files.
+//
+// RUN: touch %t.empty
+
+//
+// Generate device binaries to be bundled.
+//
+// RUN: echo 'Content of device file 1' > %t.tgt1
+// RUN: echo 'Content of device file 2' > %t.tgt2
+
+//
+// Check compression/decompression of offload bundle.
+//
+// RUN: env OFFLOAD_BUNDLER_COMPRESS=1 OFFLOAD_BUNDLER_VERBOSE=1 \
+// RUN:   clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
+// RUN:   -input=%t.tgt1 -input=%t.tgt2 -output=%t.hip.bundle.bc 2>&1 | \
+// RUN:   FileCheck -check-prefix=COMPRESS %s
+// RUN: clang-offload-bundler -type=bc -list -input=%t.hip.bundle.bc | FileCheck -check-prefix=NOHOST %s
+// RUN: env OFFLOAD_BUNDLER_VERBOSE=1 \
+// RUN:   clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
+// RUN:   -output=%t.res.tgt1 -output=%t.res.tgt2 -input=%t.hip.bundle.bc -unbundle 2>&1 | \
+// RUN:   FileCheck -check-prefix=DECOMPRESS %s
+// RUN: 
diff  %t.tgt1 %t.res.tgt1
+// RUN: 
diff  %t.tgt2 %t.res.tgt2
+
+//
+// COMPRESS: Compression method used:
+// DECOMPRESS: Decompression method:
+// NOHOST-NOT: host-
+// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx900
+// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx906
+//
+
+//
+// Check -bundle-align option.
+//
+
+// RUN: clang-offload-bundler -bundle-align=4096 -type=bc -targets=host-%itanium_abi_triple,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -input=%t.bc -input=%t.tgt1 -input=%t.tgt2 -output=%t.bundle3.bc -compress
+// RUN: clang-offload-bundler -type=bc -targets=host-%itanium_abi_triple,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -output=%t.res.bc -output=%t.res.tgt1 -output=%t.res.tgt2 -input=%t.bundle3.bc -unbundle
+// RUN: 
diff  %t.bc %t.res.bc
+// RUN: 
diff  %t.tgt1 %t.res.tgt1
+// RUN: 
diff  %t.tgt2 %t.res.tgt2
+
+//
+// Check unbundling archive.
+//
+// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
+// RUN:   -input=%t.tgt1 -input=%t.tgt2 -output=%T/hip_bundle1.bc -compress
+// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
+// RUN:   -input=%t.tgt1 -input=%t.tgt2 -output=%T/hip_bundle2.bc -compress
+// RUN: llvm-ar cr %T/hip_archive.a %T/hip_bundle1.bc %T/hip_bundle2.bc
+// RUN: clang-offload-bundler -unbundle -type=a -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
+// RUN:   -output=%T/hip_900.a -output=%T/hip_906.a -input=%T/hip_archive.a
+// RUN: llvm-ar t %T/hip_900.a | FileCheck -check-prefix=HIP-AR-900 %s
+// RUN: llvm-ar t %T/hip_906.a | FileCheck -check-prefix=HIP-AR-906 %s
+// HIP-AR-900-DAG: hip_bundle1-hip-amdgcn-amd-amdhsa--gfx900
+// HIP-AR-900-DAG: hip_bundle2-hip-amdgcn-amd-amdhsa--gfx900
+// HIP-AR-906-DAG: hip_bundle1-hip-amdgcn-amd-amdhsa--gfx906
+// HIP-AR-906-DAG: hip_bundle2-hip-amdgcn-amd-amdhsa--gfx906
+
+// Some code so that we can create a binary out of this file.
+int A = 0;
+void test_func(void) {
+  ++A;
+}

diff  --git a/clang/test/Driver/clang-offload-bundler-zstd.c b/clang/test/Driver/clang-offload-bundler-zstd.c
new file mode 100644
index 000000000000000..b2b588b72d4d6f2
--- /dev/null
+++ b/clang/test/Driver/clang-offload-bundler-zstd.c
@@ -0,0 +1,72 @@
+// REQUIRES: zstd
+// REQUIRES: x86-registered-target
+// UNSUPPORTED: target={{.*}}-darwin{{.*}}, target={{.*}}-aix{{.*}}
+
+//
+// Generate the host binary to be bundled.
+//
+// RUN: %clang -O0 -target %itanium_abi_triple %s -c -emit-llvm -o %t.bc
+
+//
+// Generate an empty file to help with the checks of empty files.
+//
+// RUN: touch %t.empty
+
+//
+// Generate device binaries to be bundled.
+//
+// RUN: echo 'Content of device file 1' > %t.tgt1
+// RUN: echo 'Content of device file 2' > %t.tgt2
+
+//
+// Check compression/decompression of offload bundle.
+//
+// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
+// RUN:   -input=%t.tgt1 -input=%t.tgt2 -output=%t.hip.bundle.bc -compress -verbose 2>&1 | \
+// RUN:   FileCheck -check-prefix=COMPRESS %s
+// RUN: clang-offload-bundler -type=bc -list -input=%t.hip.bundle.bc | FileCheck -check-prefix=NOHOST %s
+// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
+// RUN:   -output=%t.res.tgt1 -output=%t.res.tgt2 -input=%t.hip.bundle.bc -unbundle -verbose 2>&1 | \
+// RUN:   FileCheck -check-prefix=DECOMPRESS %s
+// RUN: 
diff  %t.tgt1 %t.res.tgt1
+// RUN: 
diff  %t.tgt2 %t.res.tgt2
+//
+// COMPRESS: Compression method used
+// DECOMPRESS: Decompression method
+// NOHOST-NOT: host-
+// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx900
+// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx906
+//
+
+//
+// Check -bundle-align option.
+//
+
+// RUN: clang-offload-bundler -bundle-align=4096 -type=bc -targets=host-%itanium_abi_triple,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -input=%t.bc -input=%t.tgt1 -input=%t.tgt2 -output=%t.bundle3.bc -compress
+// RUN: clang-offload-bundler -type=bc -targets=host-%itanium_abi_triple,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -output=%t.res.bc -output=%t.res.tgt1 -output=%t.res.tgt2 -input=%t.bundle3.bc -unbundle
+// RUN: 
diff  %t.bc %t.res.bc
+// RUN: 
diff  %t.tgt1 %t.res.tgt1
+// RUN: 
diff  %t.tgt2 %t.res.tgt2
+
+//
+// Check unbundling archive.
+//
+// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
+// RUN:   -input=%t.tgt1 -input=%t.tgt2 -output=%T/hip_bundle1.bc -compress
+// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
+// RUN:   -input=%t.tgt1 -input=%t.tgt2 -output=%T/hip_bundle2.bc -compress
+// RUN: llvm-ar cr %T/hip_archive.a %T/hip_bundle1.bc %T/hip_bundle2.bc
+// RUN: clang-offload-bundler -unbundle -type=a -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
+// RUN:   -output=%T/hip_900.a -output=%T/hip_906.a -input=%T/hip_archive.a
+// RUN: llvm-ar t %T/hip_900.a | FileCheck -check-prefix=HIP-AR-900 %s
+// RUN: llvm-ar t %T/hip_906.a | FileCheck -check-prefix=HIP-AR-906 %s
+// HIP-AR-900-DAG: hip_bundle1-hip-amdgcn-amd-amdhsa--gfx900
+// HIP-AR-900-DAG: hip_bundle2-hip-amdgcn-amd-amdhsa--gfx900
+// HIP-AR-906-DAG: hip_bundle1-hip-amdgcn-amd-amdhsa--gfx906
+// HIP-AR-906-DAG: hip_bundle2-hip-amdgcn-amd-amdhsa--gfx906
+
+// Some code so that we can create a binary out of this file.
+int A = 0;
+void test_func(void) {
+  ++A;
+}

diff  --git a/clang/test/Driver/hip-offload-compress-zlib.hip b/clang/test/Driver/hip-offload-compress-zlib.hip
new file mode 100644
index 000000000000000..a29b6d037350d47
--- /dev/null
+++ b/clang/test/Driver/hip-offload-compress-zlib.hip
@@ -0,0 +1,45 @@
+// REQUIRES: zlib
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// Test compress bundled bitcode.
+
+// RUN: rm -rf %T/a.bc
+// RUN: %clang -c -v --target=x86_64-linux-gnu \
+// RUN:   -x hip --offload-arch=gfx1100 --offload-arch=gfx1101 \
+// RUN:   -fgpu-rdc -nogpuinc -nogpulib \
+// RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
+// RUN:   --offload-compress --offload-device-only --gpu-bundle-output \
+// RUN:   -o %T/a.bc \
+// RUN: 2>&1 | FileCheck %s
+
+// CHECK: clang-offload-bundler{{.*}} -type=bc
+// CHECK-SAME: -targets={{.*}}hip-amdgcn-amd-amdhsa-gfx1100,hip-amdgcn-amd-amdhsa-gfx1101
+// CHECK-SAME: -compress -verbose
+// CHECK: Compressed bundle format
+
+// Test uncompress of bundled bitcode.
+
+// RUN: %clang --hip-link -### -v --target=x86_64-linux-gnu \
+// RUN:   --offload-arch=gfx1100 --offload-arch=gfx1101 \
+// RUN:   -fgpu-rdc -nogpulib \
+// RUN:   %T/a.bc --offload-device-only \
+// RUN: 2>&1 | FileCheck -check-prefix=UNBUNDLE %s
+
+// UNBUNDLE: clang-offload-bundler{{.*}} "-type=bc"
+// UNBUNDLE-SAME: -targets={{.*}}hip-amdgcn-amd-amdhsa-gfx1100,hip-amdgcn-amd-amdhsa-gfx1101
+// UNBUNDLE-SAME: -unbundle
+// UNBUNDLE-SAME: -verbose
+
+// Test compress bundled code objects.
+
+// RUN: %clang -c -### -v --target=x86_64-linux-gnu \
+// RUN:   -x hip --offload-arch=gfx1100 --offload-arch=gfx1101 \
+// RUN:   -nogpuinc -nogpulib \
+// RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
+// RUN:   --offload-compress \
+// RUN: 2>&1 | FileCheck -check-prefix=CO %s
+
+// CO: clang-offload-bundler{{.*}} "-type=o"
+// CO-SAME: -targets={{.*}}hipv4-amdgcn-amd-amdhsa--gfx1100,hipv4-amdgcn-amd-amdhsa--gfx1101
+// CO-SAME: "-compress" "-verbose"

diff  --git a/clang/test/Driver/hip-offload-compress-zstd.hip b/clang/test/Driver/hip-offload-compress-zstd.hip
new file mode 100644
index 000000000000000..688c2c85329c192
--- /dev/null
+++ b/clang/test/Driver/hip-offload-compress-zstd.hip
@@ -0,0 +1,45 @@
+// REQUIRES: zstd
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// Test compress bundled bitcode.
+
+// RUN: rm -rf %T/a.bc
+// RUN: %clang -c -v --target=x86_64-linux-gnu \
+// RUN:   -x hip --offload-arch=gfx1100 --offload-arch=gfx1101 \
+// RUN:   -fgpu-rdc -nogpuinc -nogpulib \
+// RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
+// RUN:   --offload-compress --offload-device-only --gpu-bundle-output \
+// RUN:   -o %T/a.bc \
+// RUN: 2>&1 | FileCheck %s
+
+// CHECK: clang-offload-bundler{{.*}} -type=bc
+// CHECK-SAME: -targets={{.*}}hip-amdgcn-amd-amdhsa-gfx1100,hip-amdgcn-amd-amdhsa-gfx1101
+// CHECK-SAME: -compress -verbose
+// CHECK: Compressed bundle format
+
+// Test uncompress of bundled bitcode.
+
+// RUN: %clang --hip-link -### -v --target=x86_64-linux-gnu \
+// RUN:   --offload-arch=gfx1100 --offload-arch=gfx1101 \
+// RUN:   -fgpu-rdc -nogpulib \
+// RUN:   %T/a.bc --offload-device-only \
+// RUN: 2>&1 | FileCheck -check-prefix=UNBUNDLE %s
+
+// UNBUNDLE: clang-offload-bundler{{.*}} "-type=bc"
+// UNBUNDLE-SAME: -targets={{.*}}hip-amdgcn-amd-amdhsa-gfx1100,hip-amdgcn-amd-amdhsa-gfx1101
+// UNBUNDLE-SAME: -unbundle
+// UNBUNDLE-SAME: -verbose
+
+// Test compress bundled code objects.
+
+// RUN: %clang -c -### -v --target=x86_64-linux-gnu \
+// RUN:   -x hip --offload-arch=gfx1100 --offload-arch=gfx1101 \
+// RUN:   -nogpuinc -nogpulib \
+// RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
+// RUN:   --offload-compress \
+// RUN: 2>&1 | FileCheck -check-prefix=CO %s
+
+// CO: clang-offload-bundler{{.*}} "-type=o"
+// CO-SAME: -targets={{.*}}hipv4-amdgcn-amd-amdhsa--gfx1100,hipv4-amdgcn-amd-amdhsa--gfx1101
+// CO-SAME: "-compress" "-verbose"

diff  --git a/clang/tools/clang-offload-bundler/CMakeLists.txt b/clang/tools/clang-offload-bundler/CMakeLists.txt
index dabd82382cdf08d..dec2881589a538b 100644
--- a/clang/tools/clang-offload-bundler/CMakeLists.txt
+++ b/clang/tools/clang-offload-bundler/CMakeLists.txt
@@ -1,4 +1,5 @@
 set(LLVM_LINK_COMPONENTS
+  BinaryFormat
   Object
   Support
   TargetParser

diff  --git a/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp b/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp
index f20157622d79f30..90c475e541f4c33 100644
--- a/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp
+++ b/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp
@@ -136,6 +136,11 @@ int main(int argc, const char **argv) {
     cl::desc("Treat hip and hipv4 offload kinds as "
              "compatible with openmp kind, and vice versa.\n"),
     cl::init(false), cl::cat(ClangOffloadBundlerCategory));
+  cl::opt<bool> Compress("compress",
+                         cl::desc("Compress output file when bundling.\n"),
+                         cl::init(false), cl::cat(ClangOffloadBundlerCategory));
+  cl::opt<bool> Verbose("verbose", cl::desc("Print debug information.\n"),
+                        cl::init(false), cl::cat(ClangOffloadBundlerCategory));
 
   // Process commandline options and report errors
   sys::PrintStackTraceOnErrorSignal(argv[0]);
@@ -163,6 +168,11 @@ int main(int argc, const char **argv) {
   BundlerConfig.BundleAlignment = BundleAlignment;
   BundlerConfig.FilesType = FilesType;
   BundlerConfig.ObjcopyPath = "";
+  // Do not override the default value Compress and Verbose in BundlerConfig.
+  if (Compress.getNumOccurrences() > 0)
+    BundlerConfig.Compress = Compress;
+  if (Verbose.getNumOccurrences() > 0)
+    BundlerConfig.Verbose = Verbose;
 
   BundlerConfig.TargetNames = TargetNames;
   BundlerConfig.InputFileNames = InputFileNames;

diff  --git a/llvm/include/llvm/BinaryFormat/Magic.h b/llvm/include/llvm/BinaryFormat/Magic.h
index 329c96f5c14c404..a28710dcdfaf2c6 100644
--- a/llvm/include/llvm/BinaryFormat/Magic.h
+++ b/llvm/include/llvm/BinaryFormat/Magic.h
@@ -42,19 +42,21 @@ struct file_magic {
     macho_universal_binary,                   ///< Mach-O universal binary
     macho_file_set,                           ///< Mach-O file set binary
     minidump,                                 ///< Windows minidump file
-    coff_cl_gl_object,   ///< Microsoft cl.exe's intermediate code file
-    coff_object,         ///< COFF object file
-    coff_import_library, ///< COFF import library
-    pecoff_executable,   ///< PECOFF executable file
-    windows_resource,    ///< Windows compiled resource file (.res)
-    xcoff_object_32,     ///< 32-bit XCOFF object file
-    xcoff_object_64,     ///< 64-bit XCOFF object file
-    wasm_object,         ///< WebAssembly Object file
-    pdb,                 ///< Windows PDB debug info file
-    tapi_file,           ///< Text-based Dynamic Library Stub file
-    cuda_fatbinary,      ///< CUDA Fatbinary object file
-    offload_binary,      ///< LLVM offload object file
-    dxcontainer_object,  ///< DirectX container file
+    coff_cl_gl_object,         ///< Microsoft cl.exe's intermediate code file
+    coff_object,               ///< COFF object file
+    coff_import_library,       ///< COFF import library
+    pecoff_executable,         ///< PECOFF executable file
+    windows_resource,          ///< Windows compiled resource file (.res)
+    xcoff_object_32,           ///< 32-bit XCOFF object file
+    xcoff_object_64,           ///< 64-bit XCOFF object file
+    wasm_object,               ///< WebAssembly Object file
+    pdb,                       ///< Windows PDB debug info file
+    tapi_file,                 ///< Text-based Dynamic Library Stub file
+    cuda_fatbinary,            ///< CUDA Fatbinary object file
+    offload_binary,            ///< LLVM offload object file
+    dxcontainer_object,        ///< DirectX container file
+    offload_bundle,            ///< Clang offload bundle file
+    offload_bundle_compressed, ///< Compressed clang offload bundle file
   };
 
   bool is_object() const { return V != unknown; }

diff  --git a/llvm/lib/BinaryFormat/Magic.cpp b/llvm/lib/BinaryFormat/Magic.cpp
index 025334f9f3f42b3..420224df57df418 100644
--- a/llvm/lib/BinaryFormat/Magic.cpp
+++ b/llvm/lib/BinaryFormat/Magic.cpp
@@ -87,6 +87,10 @@ file_magic llvm::identify_magic(StringRef Magic) {
     if (startswith(Magic, "BC\xC0\xDE"))
       return file_magic::bitcode;
     break;
+  case 'C':
+    if (startswith(Magic, "CCOB"))
+      return file_magic::offload_bundle_compressed;
+    break;
   case '!':
     if (startswith(Magic, "!<arch>\n") || startswith(Magic, "!<thin>\n"))
       return file_magic::archive;
@@ -251,6 +255,13 @@ file_magic llvm::identify_magic(StringRef Magic) {
       return file_magic::coff_object;
     break;
 
+  case '_': {
+    const char OBMagic[] = "__CLANG_OFFLOAD_BUNDLE__";
+    if (Magic.size() >= sizeof(OBMagic) && startswith(Magic, OBMagic))
+      return file_magic::offload_bundle;
+    break;
+  }
+
   default:
     break;
   }

diff  --git a/llvm/lib/Object/Binary.cpp b/llvm/lib/Object/Binary.cpp
index d18aed8b3b8c8d8..0ee9f7fac448a26 100644
--- a/llvm/lib/Object/Binary.cpp
+++ b/llvm/lib/Object/Binary.cpp
@@ -87,6 +87,8 @@ Expected<std::unique_ptr<Binary>> object::createBinary(MemoryBufferRef Buffer,
   case file_magic::cuda_fatbinary:
   case file_magic::coff_cl_gl_object:
   case file_magic::dxcontainer_object:
+  case file_magic::offload_bundle:
+  case file_magic::offload_bundle_compressed:
     // Unrecognized object file format.
     return errorCodeToError(object_error::invalid_file_type);
   case file_magic::offload_binary:

diff  --git a/llvm/lib/Object/ObjectFile.cpp b/llvm/lib/Object/ObjectFile.cpp
index 2a672467f422dc8..428166f58070d0b 100644
--- a/llvm/lib/Object/ObjectFile.cpp
+++ b/llvm/lib/Object/ObjectFile.cpp
@@ -158,6 +158,8 @@ ObjectFile::createObjectFile(MemoryBufferRef Object, file_magic Type,
   case file_magic::cuda_fatbinary:
   case file_magic::offload_binary:
   case file_magic::dxcontainer_object:
+  case file_magic::offload_bundle:
+  case file_magic::offload_bundle_compressed:
     return errorCodeToError(object_error::invalid_file_type);
   case file_magic::tapi_file:
     return errorCodeToError(object_error::invalid_file_type);


        


More information about the cfe-commits mailing list