[clang] [HIP] Support compressing device binary (PR #67162)

Yaxun Liu via cfe-commits cfe-commits at lists.llvm.org
Mon Oct 2 11:10:58 PDT 2023


https://github.com/yxsamliu updated https://github.com/llvm/llvm-project/pull/67162

>From b1c9550da1074414228a45594478fec7ecbf1779 Mon Sep 17 00:00:00 2001
From: "Yaxun (Sam) Liu" <yaxun.liu at amd.com>
Date: Thu, 21 Sep 2023 13:52:16 -0400
Subject: [PATCH] [HIP] Support compressing device binary

Add option -f[no-]offload-compress to clang to enable/disable
compression of device binary for HIP. By default it is disabled.

Add option -compress to clang-offload-bundler to enable
compression of offload bundle. By default it is disabled.

When enabled, zstd or zlib is used for compression when
available.

When disabled, it is NFC compared to previous behavior. The
same offload bundle format is used as before.

Clang-offload-bundler automatically detects whether the
input file to be unbundled is compressed and the compression
method and decompress if necessary.
---
 clang/docs/ClangOffloadBundler.rst            |  27 ++
 clang/include/clang/Driver/OffloadBundler.h   |  37 ++
 clang/include/clang/Driver/Options.td         |   4 +
 clang/lib/Driver/OffloadBundler.cpp           | 343 +++++++++++++++---
 clang/lib/Driver/ToolChains/Clang.cpp         |   7 +
 clang/lib/Driver/ToolChains/HIPUtility.cpp    |   6 +
 .../test/Driver/clang-offload-bundler-zlib.c  |  75 ++++
 .../test/Driver/clang-offload-bundler-zstd.c  |  72 ++++
 .../test/Driver/hip-offload-compress-zlib.hip |  47 +++
 .../test/Driver/hip-offload-compress-zstd.hip |  47 +++
 .../clang-offload-bundler/CMakeLists.txt      |   1 +
 .../ClangOffloadBundler.cpp                   |  10 +
 llvm/include/llvm/BinaryFormat/Magic.h        |  28 +-
 llvm/lib/BinaryFormat/Magic.cpp               |  11 +
 llvm/lib/Object/Binary.cpp                    |   2 +
 15 files changed, 657 insertions(+), 60 deletions(-)
 create mode 100644 clang/test/Driver/clang-offload-bundler-zlib.c
 create mode 100644 clang/test/Driver/clang-offload-bundler-zstd.c
 create mode 100644 clang/test/Driver/hip-offload-compress-zlib.hip
 create mode 100644 clang/test/Driver/hip-offload-compress-zstd.hip

diff --git a/clang/docs/ClangOffloadBundler.rst b/clang/docs/ClangOffloadBundler.rst
index d08bf4b97781fa4..1e21d3e7264d5c3 100644
--- a/clang/docs/ClangOffloadBundler.rst
+++ b/clang/docs/ClangOffloadBundler.rst
@@ -309,3 +309,30 @@ target by comparing bundle ID's. Two bundle ID's are considered compatible if:
   * Their offload kind are the same
   * Their target triple are the same
   * Their GPUArch are the same
+
+Compression and Decompression
+=============================
+
+``clang-offload-bundler`` provides features to compress and decompress the full
+bundle, leveraging inherent redundancies within the bundle entries. Use the
+`-compress` command-line option to enable this compression capability.
+
+The compressed offload bundle begins with a header followed by the compressed binary data:
+
+- **Magic Number (4 bytes)**:
+    This is a unique identifier to distinguish compressed offload bundles. The value is the string 'CCOB' (Compressed Clang Offload Bundle).
+
+- **Version Number (16-bit unsigned int)**:
+    This denotes the version of the compressed offload bundle format. The current version is `1`.
+
+- **Compression Method (16-bit unsigned int)**:
+    This field indicates the compression method used. The value corresponds to either `zlib` or `zstd`, represented as a 16-bit unsigned integer cast from the LLVM compression enumeration.
+
+- **Uncompressed Binary Size (32-bit unsigned int)**:
+    This is the size (in bytes) of the binary data before it was compressed.
+
+- **Hash (64-bit unsigned int)**:
+    This is a 64-bit truncated MD5 hash of the uncompressed binary data. It serves for verification and caching purposes.
+
+- **Compressed Data**:
+    The actual compressed binary data follows the header. Its size can be inferred from the total size of the file minus the header size.
diff --git a/clang/include/clang/Driver/OffloadBundler.h b/clang/include/clang/Driver/OffloadBundler.h
index 28473c53662de2c..17df31d31071d99 100644
--- a/clang/include/clang/Driver/OffloadBundler.h
+++ b/clang/include/clang/Driver/OffloadBundler.h
@@ -19,6 +19,7 @@
 
 #include "llvm/Support/Error.h"
 #include "llvm/TargetParser/Triple.h"
+#include <llvm/Support/MemoryBuffer.h>
 #include <string>
 #include <vector>
 
@@ -26,11 +27,15 @@ namespace clang {
 
 class OffloadBundlerConfig {
 public:
+  OffloadBundlerConfig();
+
   bool AllowNoHost = false;
   bool AllowMissingBundles = false;
   bool CheckInputArchive = false;
   bool PrintExternalCommands = false;
   bool HipOpenmpCompatible = false;
+  bool Compress = false;
+  bool Verbose = false;
 
   unsigned BundleAlignment = 1;
   unsigned HostInputIndex = ~0u;
@@ -84,6 +89,38 @@ struct OffloadTargetInfo {
   std::string str() const;
 };
 
+// CompressedOffloadBundle represents the format for the compressed offload
+// bundles.
+//
+// The format is as follows:
+// - Magic Number (4 bytes) - A constant "CCOB".
+// - Version (2 bytes)
+// - Compression Method (2 bytes) - Uses the values from
+// llvm::compression::Format.
+// - Uncompressed Size (4 bytes).
+// - Truncated MD5 Hash (8 bytes).
+// - Compressed Data (variable length).
+
+class CompressedOffloadBundle {
+private:
+  static inline const size_t MagicSize = 4;
+  static inline const size_t VersionFieldSize = sizeof(uint16_t);
+  static inline const size_t MethodFieldSize = sizeof(uint16_t);
+  static inline const size_t SizeFieldSize = sizeof(uint32_t);
+  static inline const size_t HashFieldSize = 8;
+  static inline const size_t HeaderSize = MagicSize + VersionFieldSize +
+                                          MethodFieldSize + SizeFieldSize +
+                                          HashFieldSize;
+  static inline const llvm::StringRef MagicNumber = "CCOB";
+  static inline const uint16_t Version = 1;
+
+public:
+  static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
+  compress(const llvm::MemoryBuffer &Input, bool Verbose = false);
+  static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
+  decompress(const llvm::MemoryBuffer &Input, bool Verbose = false);
+};
+
 } // namespace clang
 
 #endif // LLVM_CLANG_DRIVER_OFFLOADBUNDLER_H
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index ee4e23f335e7875..625588f6359b4c3 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -1183,6 +1183,10 @@ def fgpu_inline_threshold_EQ : Joined<["-"], "fgpu-inline-threshold=">,
 def fgpu_sanitize : Flag<["-"], "fgpu-sanitize">, Group<f_Group>,
   HelpText<"Enable sanitizer for supported offloading devices">;
 def fno_gpu_sanitize : Flag<["-"], "fno-gpu-sanitize">, Group<f_Group>;
+
+def offload_compress : Flag<["--"], "offload-compress">,
+  HelpText<"Compress offload device binaries (HIP only)">;
+def no_offload_compress : Flag<["--"], "no-offload-compress">;
 }
 
 // CUDA options
diff --git a/clang/lib/Driver/OffloadBundler.cpp b/clang/lib/Driver/OffloadBundler.cpp
index d11c41605bf39ee..87be9cee26faafd 100644
--- a/clang/lib/Driver/OffloadBundler.cpp
+++ b/clang/lib/Driver/OffloadBundler.cpp
@@ -21,24 +21,29 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/Magic.h"
 #include "llvm/Object/Archive.h"
 #include "llvm/Object/ArchiveWriter.h"
 #include "llvm/Object/Binary.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/Casting.h"
+#include "llvm/Support/Compression.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/EndianStream.h"
 #include "llvm/Support/Errc.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/ErrorOr.h"
 #include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MD5.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Program.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/StringSaver.h"
+#include "llvm/Support/Timer.h"
 #include "llvm/Support/WithColor.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/TargetParser/Host.h"
@@ -48,6 +53,7 @@
 #include <cstddef>
 #include <cstdint>
 #include <forward_list>
+#include <llvm/Support/Process.h>
 #include <memory>
 #include <set>
 #include <string>
@@ -58,6 +64,10 @@ using namespace llvm;
 using namespace llvm::object;
 using namespace clang;
 
+static llvm::TimerGroup
+    ClangOffloadBundlerTimerGroup("Clang Offload Bundler Timer Group",
+                                  "Timer group for clang offload bundler");
+
 /// Magic string that marks the existence of offloading data.
 #define OFFLOAD_BUNDLER_MAGIC_STR "__CLANG_OFFLOAD_BUNDLE__"
 
@@ -224,20 +234,22 @@ class FileHandler {
 
   /// Write the header of the bundled file to \a OS based on the information
   /// gathered from \a Inputs.
-  virtual Error WriteHeader(raw_fd_ostream &OS,
+  virtual Error WriteHeader(raw_ostream &OS,
                             ArrayRef<std::unique_ptr<MemoryBuffer>> Inputs) = 0;
 
   /// Write the marker that initiates a bundle for the triple \a TargetTriple to
   /// \a OS.
-  virtual Error WriteBundleStart(raw_fd_ostream &OS,
-                                 StringRef TargetTriple) = 0;
+  virtual Error WriteBundleStart(raw_ostream &OS, StringRef TargetTriple) = 0;
 
   /// Write the marker that closes a bundle for the triple \a TargetTriple to \a
   /// OS.
-  virtual Error WriteBundleEnd(raw_fd_ostream &OS, StringRef TargetTriple) = 0;
+  virtual Error WriteBundleEnd(raw_ostream &OS, StringRef TargetTriple) = 0;
 
   /// Write the bundle from \a Input into \a OS.
-  virtual Error WriteBundle(raw_fd_ostream &OS, MemoryBuffer &Input) = 0;
+  virtual Error WriteBundle(raw_ostream &OS, MemoryBuffer &Input) = 0;
+
+  /// Finalize output file.
+  virtual Error finalizeOutputFile() { return Error::success(); }
 
   /// List bundle IDs in \a Input.
   virtual Error listBundleIDs(MemoryBuffer &Input) {
@@ -311,7 +323,7 @@ static uint64_t Read8byteIntegerFromBuffer(StringRef Buffer, size_t pos) {
 }
 
 /// Write 8-byte integers to a buffer in little-endian format.
-static void Write8byteIntegerToBuffer(raw_fd_ostream &OS, uint64_t Val) {
+static void Write8byteIntegerToBuffer(raw_ostream &OS, uint64_t Val) {
   llvm::support::endian::write(OS, Val, llvm::support::little);
 }
 
@@ -359,8 +371,7 @@ class BinaryFileHandler final : public FileHandler {
       return Error::success();
 
     // Check if no magic was found.
-    StringRef Magic(FC.data(), sizeof(OFFLOAD_BUNDLER_MAGIC_STR) - 1);
-    if (!Magic.equals(OFFLOAD_BUNDLER_MAGIC_STR))
+    if (llvm::identify_magic(FC) != llvm::file_magic::offload_bundle)
       return Error::success();
 
     // Read number of bundles.
@@ -435,7 +446,7 @@ class BinaryFileHandler final : public FileHandler {
     return Error::success();
   }
 
-  Error WriteHeader(raw_fd_ostream &OS,
+  Error WriteHeader(raw_ostream &OS,
                     ArrayRef<std::unique_ptr<MemoryBuffer>> Inputs) final {
 
     // Compute size of the header.
@@ -472,19 +483,27 @@ class BinaryFileHandler final : public FileHandler {
     return Error::success();
   }
 
-  Error WriteBundleStart(raw_fd_ostream &OS, StringRef TargetTriple) final {
+  Error WriteBundleStart(raw_ostream &OS, StringRef TargetTriple) final {
     CurWriteBundleTarget = TargetTriple.str();
     return Error::success();
   }
 
-  Error WriteBundleEnd(raw_fd_ostream &OS, StringRef TargetTriple) final {
+  Error WriteBundleEnd(raw_ostream &OS, StringRef TargetTriple) final {
     return Error::success();
   }
 
-  Error WriteBundle(raw_fd_ostream &OS, MemoryBuffer &Input) final {
+  Error WriteBundle(raw_ostream &OS, MemoryBuffer &Input) final {
     auto BI = BundlesInfo[CurWriteBundleTarget];
-    OS.seek(BI.Offset);
+
+    // Pad with 0 to reach specified offset.
+    size_t CurrentPos = OS.tell();
+    size_t PaddingSize = BI.Offset > CurrentPos ? BI.Offset - CurrentPos : 0;
+    for (size_t I = 0; I < PaddingSize; ++I)
+      OS.write('\0');
+    assert(OS.tell() == BI.Offset);
+
     OS.write(Input.getBufferStart(), Input.getBufferSize());
+
     return Error::success();
   }
 };
@@ -541,7 +560,7 @@ class ObjectFileHandler final : public FileHandler {
       return NameOrErr.takeError();
 
     // If it does not start with the reserved suffix, just skip this section.
-    if (!NameOrErr->startswith(OFFLOAD_BUNDLER_MAGIC_STR))
+    if (llvm::identify_magic(*NameOrErr) != llvm::file_magic::offload_bundle)
       return std::nullopt;
 
     // Return the triple that is right after the reserved prefix.
@@ -607,7 +626,7 @@ class ObjectFileHandler final : public FileHandler {
     return Error::success();
   }
 
-  Error WriteHeader(raw_fd_ostream &OS,
+  Error WriteHeader(raw_ostream &OS,
                     ArrayRef<std::unique_ptr<MemoryBuffer>> Inputs) final {
     assert(BundlerConfig.HostInputIndex != ~0u &&
            "Host input index not defined.");
@@ -617,12 +636,16 @@ class ObjectFileHandler final : public FileHandler {
     return Error::success();
   }
 
-  Error WriteBundleStart(raw_fd_ostream &OS, StringRef TargetTriple) final {
+  Error WriteBundleStart(raw_ostream &OS, StringRef TargetTriple) final {
     ++NumberOfProcessedInputs;
     return Error::success();
   }
 
-  Error WriteBundleEnd(raw_fd_ostream &OS, StringRef TargetTriple) final {
+  Error WriteBundleEnd(raw_ostream &OS, StringRef TargetTriple) final {
+    return Error::success();
+  }
+
+  Error finalizeOutputFile() final {
     assert(NumberOfProcessedInputs <= NumberOfInputs &&
            "Processing more inputs that actually exist!");
     assert(BundlerConfig.HostInputIndex != ~0u &&
@@ -640,10 +663,6 @@ class ObjectFileHandler final : public FileHandler {
     assert(BundlerConfig.ObjcopyPath != "" &&
            "llvm-objcopy path not specified");
 
-    // We write to the output file directly. So, we close it and use the name
-    // to pass down to llvm-objcopy.
-    OS.close();
-
     // Temporary files that need to be removed.
     TempFileHandlerRAII TempFiles;
 
@@ -684,7 +703,7 @@ class ObjectFileHandler final : public FileHandler {
     return Error::success();
   }
 
-  Error WriteBundle(raw_fd_ostream &OS, MemoryBuffer &Input) final {
+  Error WriteBundle(raw_ostream &OS, MemoryBuffer &Input) final {
     return Error::success();
   }
 
@@ -781,22 +800,22 @@ class TextFileHandler final : public FileHandler {
     return Error::success();
   }
 
-  Error WriteHeader(raw_fd_ostream &OS,
+  Error WriteHeader(raw_ostream &OS,
                     ArrayRef<std::unique_ptr<MemoryBuffer>> Inputs) final {
     return Error::success();
   }
 
-  Error WriteBundleStart(raw_fd_ostream &OS, StringRef TargetTriple) final {
+  Error WriteBundleStart(raw_ostream &OS, StringRef TargetTriple) final {
     OS << BundleStartString << TargetTriple << "\n";
     return Error::success();
   }
 
-  Error WriteBundleEnd(raw_fd_ostream &OS, StringRef TargetTriple) final {
+  Error WriteBundleEnd(raw_ostream &OS, StringRef TargetTriple) final {
     OS << BundleEndString << TargetTriple << "\n";
     return Error::success();
   }
 
-  Error WriteBundle(raw_fd_ostream &OS, MemoryBuffer &Input) final {
+  Error WriteBundle(raw_ostream &OS, MemoryBuffer &Input) final {
     OS << Input.getBuffer();
     return Error::success();
   }
@@ -881,6 +900,184 @@ CreateFileHandler(MemoryBuffer &FirstInput,
                            "'" + FilesType + "': invalid file type specified");
 }
 
+OffloadBundlerConfig::OffloadBundlerConfig() {
+  auto IgnoreEnvVarOpt =
+      llvm::sys::Process::GetEnv("OFFLOAD_BUNDLER_IGNORE_ENV_VAR");
+  if (IgnoreEnvVarOpt.has_value() && IgnoreEnvVarOpt.value() == "1")
+    return;
+
+  auto VerboseEnvVarOpt = llvm::sys::Process::GetEnv("OFFLOAD_BUNDLER_VERBOSE");
+  if (VerboseEnvVarOpt.has_value())
+    Verbose = VerboseEnvVarOpt.value() == "1";
+
+  auto CompressEnvVarOpt =
+      llvm::sys::Process::GetEnv("OFFLOAD_BUNDLER_COMPRESS");
+  if (CompressEnvVarOpt.has_value())
+    Compress = CompressEnvVarOpt.value() == "1";
+}
+
+llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
+CompressedOffloadBundle::compress(const llvm::MemoryBuffer &Input,
+                                  bool Verbose) {
+  llvm::Timer HashTimer("Hash Calculation Timer", "Hash calculation time",
+                        ClangOffloadBundlerTimerGroup);
+  if (Verbose)
+    HashTimer.startTimer();
+  llvm::MD5 Hash;
+  llvm::MD5::MD5Result Result;
+  Hash.update(Input.getBuffer());
+  Hash.final(Result);
+  uint64_t TruncatedHash = Result.low();
+  if (Verbose)
+    HashTimer.stopTimer();
+
+  SmallVector<uint8_t, 0> CompressedBuffer;
+  auto BufferUint8 = llvm::ArrayRef<uint8_t>(
+      reinterpret_cast<const uint8_t *>(Input.getBuffer().data()),
+      Input.getBuffer().size());
+
+  llvm::compression::Format CompressionFormat;
+
+  if (llvm::compression::zstd::isAvailable())
+    CompressionFormat = llvm::compression::Format::Zstd;
+  else if (llvm::compression::zlib::isAvailable())
+    CompressionFormat = llvm::compression::Format::Zlib;
+  else
+    return createStringError(llvm::inconvertibleErrorCode(),
+                             "Compression not supported");
+
+  llvm::Timer CompressTimer("Compression Timer", "Compression time",
+                            ClangOffloadBundlerTimerGroup);
+  if (Verbose)
+    CompressTimer.startTimer();
+  llvm::compression::compress(CompressionFormat, BufferUint8, CompressedBuffer);
+  if (Verbose)
+    CompressTimer.stopTimer();
+
+  uint16_t CompressionMethod = static_cast<uint16_t>(CompressionFormat);
+  uint32_t UncompressedSize = Input.getBuffer().size();
+
+  SmallVector<char, 0> FinalBuffer;
+  FinalBuffer.append(MagicNumber.begin(), MagicNumber.end());
+  FinalBuffer.append(reinterpret_cast<const char *>(&Version),
+                     reinterpret_cast<const char *>(&Version) +
+                         sizeof(Version));
+  FinalBuffer.append(reinterpret_cast<char *>(&CompressionMethod),
+                     reinterpret_cast<char *>(&CompressionMethod) +
+                         sizeof(CompressionMethod));
+  FinalBuffer.append(reinterpret_cast<char *>(&UncompressedSize),
+                     reinterpret_cast<char *>(&UncompressedSize) +
+                         sizeof(UncompressedSize));
+  FinalBuffer.append(reinterpret_cast<char *>(&TruncatedHash),
+                     reinterpret_cast<char *>(&TruncatedHash) +
+                         sizeof(TruncatedHash));
+  FinalBuffer.append(CompressedBuffer.begin(), CompressedBuffer.end());
+
+  if (Verbose) {
+    auto MethodUsed =
+        CompressionFormat == llvm::compression::Format::Zstd ? "zstd" : "zlib";
+    llvm::errs() << "Compressed bundle format version: " << Version << "\n"
+                 << "Compression method used: " << MethodUsed << "\n"
+                 << "Binary size before compression: " << UncompressedSize
+                 << " bytes\n"
+                 << "Binary size after compression: " << CompressedBuffer.size()
+                 << " bytes\n"
+                 << "Truncated MD5 hash: "
+                 << llvm::format_hex(TruncatedHash, 16) << "\n";
+  }
+
+  return llvm::MemoryBuffer::getMemBufferCopy(
+      llvm::StringRef(FinalBuffer.data(), FinalBuffer.size()));
+}
+
+llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
+CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input,
+                                    bool Verbose) {
+
+  StringRef Blob = Input.getBuffer();
+
+  if (Blob.size() < HeaderSize) {
+    return llvm::MemoryBuffer::getMemBufferCopy(Blob);
+  }
+  if (llvm::identify_magic(Blob) !=
+      llvm::file_magic::offload_bundle_compressed) {
+    if (Verbose)
+      llvm::errs() << "Uncompressed bundle.\n";
+    return llvm::MemoryBuffer::getMemBufferCopy(Blob);
+  }
+
+  uint16_t ThisVersion = *reinterpret_cast<const uint16_t *>(
+      Input.getBuffer().data() + MagicNumber.size());
+  uint16_t CompressionMethod = *reinterpret_cast<const uint16_t *>(
+      Blob.data() + MagicSize + VersionFieldSize);
+  uint32_t UncompressedSize = *reinterpret_cast<const uint32_t *>(
+      Blob.data() + MagicSize + VersionFieldSize + MethodFieldSize);
+  uint64_t StoredHash = *reinterpret_cast<const uint64_t *>(
+      Blob.data() + MagicSize + VersionFieldSize + MethodFieldSize +
+      SizeFieldSize);
+
+  llvm::compression::Format CompressionFormat;
+  if (CompressionMethod ==
+      static_cast<uint16_t>(llvm::compression::Format::Zlib))
+    CompressionFormat = llvm::compression::Format::Zlib;
+  else if (CompressionMethod ==
+           static_cast<uint16_t>(llvm::compression::Format::Zstd))
+    CompressionFormat = llvm::compression::Format::Zstd;
+  else
+    return createStringError(inconvertibleErrorCode(),
+                             "Unknown compressing method");
+
+  llvm::Timer DecompressTimer("Decompression Timer", "Decompression time",
+                              ClangOffloadBundlerTimerGroup);
+  if (Verbose)
+    DecompressTimer.startTimer();
+
+  SmallVector<uint8_t, 0> DecompressedData;
+  StringRef CompressedData = Blob.substr(HeaderSize);
+  if (llvm::Error DecompressionError = llvm::compression::decompress(
+          CompressionFormat, llvm::arrayRefFromStringRef(CompressedData),
+          DecompressedData, UncompressedSize))
+    return createStringError(inconvertibleErrorCode(),
+                             "Could not decompress embedded file contents: " +
+                                 llvm::toString(std::move(DecompressionError)));
+
+  if (Verbose) {
+    DecompressTimer.stopTimer();
+
+    // Recalculate MD5 hash
+    llvm::Timer HashRecalcTimer("Hash Recalculation Timer",
+                                "Hash recalculation time",
+                                ClangOffloadBundlerTimerGroup);
+    HashRecalcTimer.startTimer();
+    llvm::MD5 Hash;
+    llvm::MD5::MD5Result Result;
+    Hash.update(llvm::ArrayRef<uint8_t>(DecompressedData.data(),
+                                        DecompressedData.size()));
+    Hash.final(Result);
+    uint64_t RecalculatedHash = Result.low();
+    HashRecalcTimer.stopTimer();
+    bool HashMatch = (StoredHash == RecalculatedHash);
+
+    llvm::errs() << "Compressed bundle format version: " << ThisVersion << "\n"
+                 << "Decompression method: "
+                 << (CompressionFormat == llvm::compression::Format::Zlib
+                         ? "zlib"
+                         : "zstd")
+                 << "\n"
+                 << "Size before decompression: " << CompressedData.size()
+                 << " bytes\n"
+                 << "Size after decompression: " << UncompressedSize
+                 << " bytes\n"
+                 << "Stored hash: " << llvm::format_hex(StoredHash, 16) << "\n"
+                 << "Recalculated hash: "
+                 << llvm::format_hex(RecalculatedHash, 16) << "\n"
+                 << "Hashes match: " << (HashMatch ? "Yes" : "No") << "\n";
+  }
+
+  return llvm::MemoryBuffer::getMemBufferCopy(
+      llvm::toStringRef(DecompressedData));
+}
+
 // List bundle IDs. Return true if an error was found.
 Error OffloadBundler::ListBundleIDsInFile(
     StringRef InputFileName, const OffloadBundlerConfig &BundlerConfig) {
@@ -890,28 +1087,35 @@ Error OffloadBundler::ListBundleIDsInFile(
   if (std::error_code EC = CodeOrErr.getError())
     return createFileError(InputFileName, EC);
 
-  MemoryBuffer &Input = **CodeOrErr;
+  // Decompress the input if necessary.
+  Expected<std::unique_ptr<MemoryBuffer>> DecompressedBufferOrErr =
+      CompressedOffloadBundle::decompress(**CodeOrErr, BundlerConfig.Verbose);
+  if (!DecompressedBufferOrErr)
+    return createStringError(
+        inconvertibleErrorCode(),
+        "Failed to decompress input: " +
+            llvm::toString(DecompressedBufferOrErr.takeError()));
+
+  MemoryBuffer &DecompressedInput = **DecompressedBufferOrErr;
 
   // Select the right files handler.
   Expected<std::unique_ptr<FileHandler>> FileHandlerOrErr =
-      CreateFileHandler(Input, BundlerConfig);
+      CreateFileHandler(DecompressedInput, BundlerConfig);
   if (!FileHandlerOrErr)
     return FileHandlerOrErr.takeError();
 
   std::unique_ptr<FileHandler> &FH = *FileHandlerOrErr;
   assert(FH);
-  return FH->listBundleIDs(Input);
+  return FH->listBundleIDs(DecompressedInput);
 }
 
 /// Bundle the files. Return true if an error was found.
 Error OffloadBundler::BundleFiles() {
   std::error_code EC;
 
-  // Create output file.
-  raw_fd_ostream OutputFile(BundlerConfig.OutputFileNames.front(), EC,
-                            sys::fs::OF_None);
-  if (EC)
-    return createFileError(BundlerConfig.OutputFileNames.front(), EC);
+  // Create a buffer to hold the content before compressing.
+  SmallVector<char, 0> Buffer;
+  llvm::raw_svector_ostream BufferStream(Buffer);
 
   // Open input files.
   SmallVector<std::unique_ptr<MemoryBuffer>, 8u> InputBuffers;
@@ -938,22 +1142,46 @@ Error OffloadBundler::BundleFiles() {
   assert(FH);
 
   // Write header.
-  if (Error Err = FH->WriteHeader(OutputFile, InputBuffers))
+  if (Error Err = FH->WriteHeader(BufferStream, InputBuffers))
     return Err;
 
   // Write all bundles along with the start/end markers. If an error was found
   // writing the end of the bundle component, abort the bundle writing.
   auto Input = InputBuffers.begin();
   for (auto &Triple : BundlerConfig.TargetNames) {
-    if (Error Err = FH->WriteBundleStart(OutputFile, Triple))
+    if (Error Err = FH->WriteBundleStart(BufferStream, Triple))
       return Err;
-    if (Error Err = FH->WriteBundle(OutputFile, **Input))
+    if (Error Err = FH->WriteBundle(BufferStream, **Input))
       return Err;
-    if (Error Err = FH->WriteBundleEnd(OutputFile, Triple))
+    if (Error Err = FH->WriteBundleEnd(BufferStream, Triple))
       return Err;
     ++Input;
   }
-  return Error::success();
+
+  raw_fd_ostream OutputFile(BundlerConfig.OutputFileNames.front(), EC,
+                            sys::fs::OF_None);
+  if (EC)
+    return createFileError(BundlerConfig.OutputFileNames.front(), EC);
+
+  SmallVector<char, 0> CompressedBuffer;
+  if (BundlerConfig.Compress) {
+    std::unique_ptr<llvm::MemoryBuffer> BufferMemory =
+        llvm::MemoryBuffer::getMemBufferCopy(
+            llvm::StringRef(Buffer.data(), Buffer.size()));
+    auto CompressionResult =
+        CompressedOffloadBundle::compress(*BufferMemory, BundlerConfig.Verbose);
+    if (auto Error = CompressionResult.takeError())
+      return Error;
+
+    auto CompressedMemBuffer = std::move(CompressionResult.get());
+    CompressedBuffer.assign(CompressedMemBuffer->getBufferStart(),
+                            CompressedMemBuffer->getBufferEnd());
+  } else
+    CompressedBuffer = Buffer;
+
+  OutputFile.write(CompressedBuffer.data(), CompressedBuffer.size());
+
+  return FH->finalizeOutputFile();
 }
 
 // Unbundle the files. Return true if an error was found.
@@ -964,7 +1192,16 @@ Error OffloadBundler::UnbundleFiles() {
   if (std::error_code EC = CodeOrErr.getError())
     return createFileError(BundlerConfig.InputFileNames.front(), EC);
 
-  MemoryBuffer &Input = **CodeOrErr;
+  // Decompress the input if necessary.
+  Expected<std::unique_ptr<MemoryBuffer>> DecompressedBufferOrErr =
+      CompressedOffloadBundle::decompress(**CodeOrErr, BundlerConfig.Verbose);
+  if (!DecompressedBufferOrErr)
+    return createStringError(
+        inconvertibleErrorCode(),
+        "Failed to decompress input: " +
+            llvm::toString(DecompressedBufferOrErr.takeError()));
+
+  MemoryBuffer &Input = **DecompressedBufferOrErr;
 
   // Select the right files handler.
   Expected<std::unique_ptr<FileHandler>> FileHandlerOrErr =
@@ -1169,11 +1406,23 @@ Error OffloadBundler::UnbundleArchive() {
     if (!CodeObjectBufferRefOrErr)
       return CodeObjectBufferRefOrErr.takeError();
 
-    auto CodeObjectBuffer =
+    auto TempCodeObjectBuffer =
         MemoryBuffer::getMemBuffer(*CodeObjectBufferRefOrErr, false);
 
+    // Decompress the buffer if necessary.
+    Expected<std::unique_ptr<MemoryBuffer>> DecompressedBufferOrErr =
+        CompressedOffloadBundle::decompress(*TempCodeObjectBuffer,
+                                            BundlerConfig.Verbose);
+    if (!DecompressedBufferOrErr)
+      return createStringError(
+          inconvertibleErrorCode(),
+          "Failed to decompress code object: " +
+              llvm::toString(DecompressedBufferOrErr.takeError()));
+
+    MemoryBuffer &CodeObjectBuffer = **DecompressedBufferOrErr;
+
     Expected<std::unique_ptr<FileHandler>> FileHandlerOrErr =
-        CreateFileHandler(*CodeObjectBuffer, BundlerConfig);
+        CreateFileHandler(CodeObjectBuffer, BundlerConfig);
     if (!FileHandlerOrErr)
       return FileHandlerOrErr.takeError();
 
@@ -1181,11 +1430,11 @@ Error OffloadBundler::UnbundleArchive() {
     assert(FileHandler &&
            "FileHandle creation failed for file in the archive!");
 
-    if (Error ReadErr = FileHandler->ReadHeader(*CodeObjectBuffer))
+    if (Error ReadErr = FileHandler->ReadHeader(CodeObjectBuffer))
       return ReadErr;
 
     Expected<std::optional<StringRef>> CurBundleIDOrErr =
-        FileHandler->ReadBundleStart(*CodeObjectBuffer);
+        FileHandler->ReadBundleStart(CodeObjectBuffer);
     if (!CurBundleIDOrErr)
       return CurBundleIDOrErr.takeError();
 
@@ -1206,7 +1455,7 @@ Error OffloadBundler::UnbundleArchive() {
                                              BundlerConfig)) {
         std::string BundleData;
         raw_string_ostream DataStream(BundleData);
-        if (Error Err = FileHandler->ReadBundle(DataStream, *CodeObjectBuffer))
+        if (Error Err = FileHandler->ReadBundle(DataStream, CodeObjectBuffer))
           return Err;
 
         for (auto &CompatibleTarget : CompatibleTargets) {
@@ -1244,11 +1493,11 @@ Error OffloadBundler::UnbundleArchive() {
         }
       }
 
-      if (Error Err = FileHandler->ReadBundleEnd(*CodeObjectBuffer))
+      if (Error Err = FileHandler->ReadBundleEnd(CodeObjectBuffer))
         return Err;
 
       Expected<std::optional<StringRef>> NextTripleOrErr =
-          FileHandler->ReadBundleStart(*CodeObjectBuffer);
+          FileHandler->ReadBundleStart(CodeObjectBuffer);
       if (!NextTripleOrErr)
         return NextTripleOrErr.takeError();
 
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index a694d00b569a590..0ee111c9b87cd41 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -8468,6 +8468,11 @@ void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA,
     }
     CmdArgs.push_back(TCArgs.MakeArgString(UB));
   }
+  if (TCArgs.hasFlag(options::OPT_offload_compress,
+                     options::OPT_no_offload_compress, false))
+    CmdArgs.push_back("-compress");
+  if (TCArgs.hasArg(options::OPT_v))
+    CmdArgs.push_back("-verbose");
   // All the inputs are encoded as commands.
   C.addCommand(std::make_unique<Command>(
       JA, *this, ResponseFileSupport::None(),
@@ -8551,6 +8556,8 @@ void OffloadBundler::ConstructJobMultipleOutputs(
   }
   CmdArgs.push_back("-unbundle");
   CmdArgs.push_back("-allow-missing-bundles");
+  if (TCArgs.hasArg(options::OPT_v))
+    CmdArgs.push_back("-verbose");
 
   // All the inputs are encoded as commands.
   C.addCommand(std::make_unique<Command>(
diff --git a/clang/lib/Driver/ToolChains/HIPUtility.cpp b/clang/lib/Driver/ToolChains/HIPUtility.cpp
index 8b9d8db90ffab78..04efdcba20ea740 100644
--- a/clang/lib/Driver/ToolChains/HIPUtility.cpp
+++ b/clang/lib/Driver/ToolChains/HIPUtility.cpp
@@ -84,6 +84,12 @@ void HIP::constructHIPFatbinCommand(Compilation &C, const JobAction &JA,
       Args.MakeArgString(std::string("-output=").append(Output));
   BundlerArgs.push_back(BundlerOutputArg);
 
+  if (Args.hasFlag(options::OPT_offload_compress,
+                   options::OPT_no_offload_compress, false))
+    BundlerArgs.push_back("-compress");
+  if (Args.hasArg(options::OPT_v))
+    BundlerArgs.push_back("-verbose");
+
   const char *Bundler = Args.MakeArgString(
       T.getToolChain().GetProgramPath("clang-offload-bundler"));
   C.addCommand(std::make_unique<Command>(
diff --git a/clang/test/Driver/clang-offload-bundler-zlib.c b/clang/test/Driver/clang-offload-bundler-zlib.c
new file mode 100644
index 000000000000000..c46c32a4a0537b4
--- /dev/null
+++ b/clang/test/Driver/clang-offload-bundler-zlib.c
@@ -0,0 +1,75 @@
+// REQUIRES: zlib
+// REQUIRES: x86-registered-target
+// UNSUPPORTED: target={{.*}}-darwin{{.*}}, target={{.*}}-aix{{.*}}
+
+//
+// Generate the host binary to be bundled.
+//
+// RUN: %clang -O0 -target %itanium_abi_triple %s -c -emit-llvm -o %t.bc
+
+//
+// Generate an empty file to help with the checks of empty files.
+//
+// RUN: touch %t.empty
+
+//
+// Generate device binaries to be bundled.
+//
+// RUN: echo 'Content of device file 1' > %t.tgt1
+// RUN: echo 'Content of device file 2' > %t.tgt2
+
+//
+// Check compression/decompression of offload bundle.
+//
+// RUN: env OFFLOAD_BUNDLER_COMPRESS=1 OFFLOAD_BUNDLER_VERBOSE=1 \
+// RUN:   clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
+// RUN:   -input=%t.tgt1 -input=%t.tgt2 -output=%t.hip.bundle.bc 2>&1 | \
+// RUN:   FileCheck -check-prefix=COMPRESS %s
+// RUN: clang-offload-bundler -type=bc -list -input=%t.hip.bundle.bc | FileCheck -check-prefix=NOHOST %s
+// RUN: env OFFLOAD_BUNDLER_VERBOSE=1 \
+// RUN:   clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
+// RUN:   -output=%t.res.tgt1 -output=%t.res.tgt2 -input=%t.hip.bundle.bc -unbundle 2>&1 | \
+// RUN:   FileCheck -check-prefix=DECOMPRESS %s
+// RUN: diff %t.tgt1 %t.res.tgt1
+// RUN: diff %t.tgt2 %t.res.tgt2
+
+//
+// COMPRESS: Compression method used:
+// DECOMPRESS: Decompression method:
+// NOHOST-NOT: host-
+// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx900
+// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx906
+//
+
+//
+// Check -bundle-align option.
+//
+
+// RUN: clang-offload-bundler -bundle-align=4096 -type=bc -targets=host-%itanium_abi_triple,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -input=%t.bc -input=%t.tgt1 -input=%t.tgt2 -output=%t.bundle3.bc -compress
+// RUN: clang-offload-bundler -type=bc -targets=host-%itanium_abi_triple,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -output=%t.res.bc -output=%t.res.tgt1 -output=%t.res.tgt2 -input=%t.bundle3.bc -unbundle
+// RUN: diff %t.bc %t.res.bc
+// RUN: diff %t.tgt1 %t.res.tgt1
+// RUN: diff %t.tgt2 %t.res.tgt2
+
+//
+// Check unbundling archive.
+//
+// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
+// RUN:   -input=%t.tgt1 -input=%t.tgt2 -output=%T/hip_bundle1.bc -compress
+// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
+// RUN:   -input=%t.tgt1 -input=%t.tgt2 -output=%T/hip_bundle2.bc -compress
+// RUN: llvm-ar cr %T/hip_archive.a %T/hip_bundle1.bc %T/hip_bundle2.bc
+// RUN: clang-offload-bundler -unbundle -type=a -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
+// RUN:   -output=%T/hip_900.a -output=%T/hip_906.a -input=%T/hip_archive.a
+// RUN: llvm-ar t %T/hip_900.a | FileCheck -check-prefix=HIP-AR-900 %s
+// RUN: llvm-ar t %T/hip_906.a | FileCheck -check-prefix=HIP-AR-906 %s
+// HIP-AR-900-DAG: hip_bundle1-hip-amdgcn-amd-amdhsa--gfx900
+// HIP-AR-900-DAG: hip_bundle2-hip-amdgcn-amd-amdhsa--gfx900
+// HIP-AR-906-DAG: hip_bundle1-hip-amdgcn-amd-amdhsa--gfx906
+// HIP-AR-906-DAG: hip_bundle2-hip-amdgcn-amd-amdhsa--gfx906
+
+// Some code so that we can create a binary out of this file.
+int A = 0;
+void test_func(void) {
+  ++A;
+}
diff --git a/clang/test/Driver/clang-offload-bundler-zstd.c b/clang/test/Driver/clang-offload-bundler-zstd.c
new file mode 100644
index 000000000000000..b2b588b72d4d6f2
--- /dev/null
+++ b/clang/test/Driver/clang-offload-bundler-zstd.c
@@ -0,0 +1,72 @@
+// REQUIRES: zstd
+// REQUIRES: x86-registered-target
+// UNSUPPORTED: target={{.*}}-darwin{{.*}}, target={{.*}}-aix{{.*}}
+
+//
+// Generate the host binary to be bundled.
+//
+// RUN: %clang -O0 -target %itanium_abi_triple %s -c -emit-llvm -o %t.bc
+
+//
+// Generate an empty file to help with the checks of empty files.
+//
+// RUN: touch %t.empty
+
+//
+// Generate device binaries to be bundled.
+//
+// RUN: echo 'Content of device file 1' > %t.tgt1
+// RUN: echo 'Content of device file 2' > %t.tgt2
+
+//
+// Check compression/decompression of offload bundle.
+//
+// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
+// RUN:   -input=%t.tgt1 -input=%t.tgt2 -output=%t.hip.bundle.bc -compress -verbose 2>&1 | \
+// RUN:   FileCheck -check-prefix=COMPRESS %s
+// RUN: clang-offload-bundler -type=bc -list -input=%t.hip.bundle.bc | FileCheck -check-prefix=NOHOST %s
+// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
+// RUN:   -output=%t.res.tgt1 -output=%t.res.tgt2 -input=%t.hip.bundle.bc -unbundle -verbose 2>&1 | \
+// RUN:   FileCheck -check-prefix=DECOMPRESS %s
+// RUN: diff %t.tgt1 %t.res.tgt1
+// RUN: diff %t.tgt2 %t.res.tgt2
+//
+// COMPRESS: Compression method used
+// DECOMPRESS: Decompression method
+// NOHOST-NOT: host-
+// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx900
+// NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx906
+//
+
+//
+// Check -bundle-align option.
+//
+
+// RUN: clang-offload-bundler -bundle-align=4096 -type=bc -targets=host-%itanium_abi_triple,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -input=%t.bc -input=%t.tgt1 -input=%t.tgt2 -output=%t.bundle3.bc -compress
+// RUN: clang-offload-bundler -type=bc -targets=host-%itanium_abi_triple,openmp-powerpc64le-ibm-linux-gnu,openmp-x86_64-pc-linux-gnu -output=%t.res.bc -output=%t.res.tgt1 -output=%t.res.tgt2 -input=%t.bundle3.bc -unbundle
+// RUN: diff %t.bc %t.res.bc
+// RUN: diff %t.tgt1 %t.res.tgt1
+// RUN: diff %t.tgt2 %t.res.tgt2
+
+//
+// Check unbundling archive.
+//
+// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
+// RUN:   -input=%t.tgt1 -input=%t.tgt2 -output=%T/hip_bundle1.bc -compress
+// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
+// RUN:   -input=%t.tgt1 -input=%t.tgt2 -output=%T/hip_bundle2.bc -compress
+// RUN: llvm-ar cr %T/hip_archive.a %T/hip_bundle1.bc %T/hip_bundle2.bc
+// RUN: clang-offload-bundler -unbundle -type=a -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
+// RUN:   -output=%T/hip_900.a -output=%T/hip_906.a -input=%T/hip_archive.a
+// RUN: llvm-ar t %T/hip_900.a | FileCheck -check-prefix=HIP-AR-900 %s
+// RUN: llvm-ar t %T/hip_906.a | FileCheck -check-prefix=HIP-AR-906 %s
+// HIP-AR-900-DAG: hip_bundle1-hip-amdgcn-amd-amdhsa--gfx900
+// HIP-AR-900-DAG: hip_bundle2-hip-amdgcn-amd-amdhsa--gfx900
+// HIP-AR-906-DAG: hip_bundle1-hip-amdgcn-amd-amdhsa--gfx906
+// HIP-AR-906-DAG: hip_bundle2-hip-amdgcn-amd-amdhsa--gfx906
+
+// Some code so that we can create a binary out of this file.
+int A = 0;
+void test_func(void) {
+  ++A;
+}
diff --git a/clang/test/Driver/hip-offload-compress-zlib.hip b/clang/test/Driver/hip-offload-compress-zlib.hip
new file mode 100644
index 000000000000000..8bfaf376279efda
--- /dev/null
+++ b/clang/test/Driver/hip-offload-compress-zlib.hip
@@ -0,0 +1,47 @@
+// REQUIRES: zlib
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// Test compress bundled bitcode.
+
+// RUN: rm -rf %T/a.bc
+// RUN: %clang -c -v --target=x86_64-linux-gnu \
+// RUN:   -x hip --offload-arch=gfx1100 --offload-arch=gfx1101 \
+// RUN:   -fgpu-rdc -nogpuinc -nogpulib \
+// RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
+// RUN:   --offload-compress --offload-device-only --gpu-bundle-output \
+// RUN:   -o %T/a.bc \
+// RUN: 2>&1 | FileCheck %s
+
+// CHECK: clang-offload-bundler{{.*}} -type=bc
+// CHECK-SAME: -targets={{.*}}hip-amdgcn-amd-amdhsa-gfx1100,hip-amdgcn-amd-amdhsa-gfx1101
+// CHECK-SAME: -compress -verbose
+// CHECK: Compressed bundle format
+
+// Test uncompress of bundled bitcode.
+
+// RUN: %clang --hip-link -v --target=x86_64-linux-gnu \
+// RUN:   --offload-arch=gfx1100 --offload-arch=gfx1101 \
+// RUN:   -fgpu-rdc -nogpulib \
+// RUN:   %T/a.bc --offload-device-only \
+// RUN: 2>&1 | FileCheck -check-prefix=UNBUNDLE %s
+
+// UNBUNDLE: clang-offload-bundler{{.*}} -type=bc
+// UNBUNDLE-SAME: -targets={{.*}}hip-amdgcn-amd-amdhsa-gfx1100,hip-amdgcn-amd-amdhsa-gfx1101
+// UNBUNDLE-SAME: -unbundle
+// UNBUNDLE-SAME: -verbose
+// UNBUNDLE: Compressed bundle format
+
+// Test compress bundled code objects.
+
+// RUN: %clang -c -v --target=x86_64-linux-gnu \
+// RUN:   -x hip --offload-arch=gfx1100 --offload-arch=gfx1101 \
+// RUN:   -nogpuinc -nogpulib \
+// RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
+// RUN:   --offload-compress \
+// RUN: 2>&1 | FileCheck -check-prefix=CO %s
+
+// CO: clang-offload-bundler{{.*}} -type=o
+// CO-SAME: -targets={{.*}}hipv4-amdgcn-amd-amdhsa--gfx1100,hipv4-amdgcn-amd-amdhsa--gfx1101
+// CO-SAME: -compress -verbose
+// CO: Compressed bundle format
diff --git a/clang/test/Driver/hip-offload-compress-zstd.hip b/clang/test/Driver/hip-offload-compress-zstd.hip
new file mode 100644
index 000000000000000..6bd6eb0b9192d61
--- /dev/null
+++ b/clang/test/Driver/hip-offload-compress-zstd.hip
@@ -0,0 +1,47 @@
+// REQUIRES: zstd
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// Test compress bundled bitcode.
+
+// RUN: rm -rf %T/a.bc
+// RUN: %clang -c -v --target=x86_64-linux-gnu \
+// RUN:   -x hip --offload-arch=gfx1100 --offload-arch=gfx1101 \
+// RUN:   -fgpu-rdc -nogpuinc -nogpulib \
+// RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
+// RUN:   --offload-compress --offload-device-only --gpu-bundle-output \
+// RUN:   -o %T/a.bc \
+// RUN: 2>&1 | FileCheck %s
+
+// CHECK: clang-offload-bundler{{.*}} -type=bc
+// CHECK-SAME: -targets={{.*}}hip-amdgcn-amd-amdhsa-gfx1100,hip-amdgcn-amd-amdhsa-gfx1101
+// CHECK-SAME: -compress -verbose
+// CHECK: Compressed bundle format
+
+// Test uncompress of bundled bitcode.
+
+// RUN: %clang --hip-link -v --target=x86_64-linux-gnu \
+// RUN:   --offload-arch=gfx1100 --offload-arch=gfx1101 \
+// RUN:   -fgpu-rdc -nogpulib \
+// RUN:   %T/a.bc --offload-device-only \
+// RUN: 2>&1 | FileCheck -check-prefix=UNBUNDLE %s
+
+// UNBUNDLE: clang-offload-bundler{{.*}} -type=bc
+// UNBUNDLE-SAME: -targets={{.*}}hip-amdgcn-amd-amdhsa-gfx1100,hip-amdgcn-amd-amdhsa-gfx1101
+// UNBUNDLE-SAME: -unbundle
+// UNBUNDLE-SAME: -verbose
+// UNBUNDLE: Compressed bundle format
+
+// Test compress bundled code objects.
+
+// RUN: %clang -c -v --target=x86_64-linux-gnu \
+// RUN:   -x hip --offload-arch=gfx1100 --offload-arch=gfx1101 \
+// RUN:   -nogpuinc -nogpulib \
+// RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
+// RUN:   --offload-compress \
+// RUN: 2>&1 | FileCheck -check-prefix=CO %s
+
+// CO: clang-offload-bundler{{.*}} -type=o
+// CO-SAME: -targets={{.*}}hipv4-amdgcn-amd-amdhsa--gfx1100,hipv4-amdgcn-amd-amdhsa--gfx1101
+// CO-SAME: -compress -verbose
+// CO: Compressed bundle format
diff --git a/clang/tools/clang-offload-bundler/CMakeLists.txt b/clang/tools/clang-offload-bundler/CMakeLists.txt
index dabd82382cdf08d..dec2881589a538b 100644
--- a/clang/tools/clang-offload-bundler/CMakeLists.txt
+++ b/clang/tools/clang-offload-bundler/CMakeLists.txt
@@ -1,4 +1,5 @@
 set(LLVM_LINK_COMPONENTS
+  BinaryFormat
   Object
   Support
   TargetParser
diff --git a/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp b/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp
index f20157622d79f30..90c475e541f4c33 100644
--- a/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp
+++ b/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp
@@ -136,6 +136,11 @@ int main(int argc, const char **argv) {
     cl::desc("Treat hip and hipv4 offload kinds as "
              "compatible with openmp kind, and vice versa.\n"),
     cl::init(false), cl::cat(ClangOffloadBundlerCategory));
+  cl::opt<bool> Compress("compress",
+                         cl::desc("Compress output file when bundling.\n"),
+                         cl::init(false), cl::cat(ClangOffloadBundlerCategory));
+  cl::opt<bool> Verbose("verbose", cl::desc("Print debug information.\n"),
+                        cl::init(false), cl::cat(ClangOffloadBundlerCategory));
 
   // Process commandline options and report errors
   sys::PrintStackTraceOnErrorSignal(argv[0]);
@@ -163,6 +168,11 @@ int main(int argc, const char **argv) {
   BundlerConfig.BundleAlignment = BundleAlignment;
   BundlerConfig.FilesType = FilesType;
   BundlerConfig.ObjcopyPath = "";
+  // Do not override the default value Compress and Verbose in BundlerConfig.
+  if (Compress.getNumOccurrences() > 0)
+    BundlerConfig.Compress = Compress;
+  if (Verbose.getNumOccurrences() > 0)
+    BundlerConfig.Verbose = Verbose;
 
   BundlerConfig.TargetNames = TargetNames;
   BundlerConfig.InputFileNames = InputFileNames;
diff --git a/llvm/include/llvm/BinaryFormat/Magic.h b/llvm/include/llvm/BinaryFormat/Magic.h
index 329c96f5c14c404..a28710dcdfaf2c6 100644
--- a/llvm/include/llvm/BinaryFormat/Magic.h
+++ b/llvm/include/llvm/BinaryFormat/Magic.h
@@ -42,19 +42,21 @@ struct file_magic {
     macho_universal_binary,                   ///< Mach-O universal binary
     macho_file_set,                           ///< Mach-O file set binary
     minidump,                                 ///< Windows minidump file
-    coff_cl_gl_object,   ///< Microsoft cl.exe's intermediate code file
-    coff_object,         ///< COFF object file
-    coff_import_library, ///< COFF import library
-    pecoff_executable,   ///< PECOFF executable file
-    windows_resource,    ///< Windows compiled resource file (.res)
-    xcoff_object_32,     ///< 32-bit XCOFF object file
-    xcoff_object_64,     ///< 64-bit XCOFF object file
-    wasm_object,         ///< WebAssembly Object file
-    pdb,                 ///< Windows PDB debug info file
-    tapi_file,           ///< Text-based Dynamic Library Stub file
-    cuda_fatbinary,      ///< CUDA Fatbinary object file
-    offload_binary,      ///< LLVM offload object file
-    dxcontainer_object,  ///< DirectX container file
+    coff_cl_gl_object,         ///< Microsoft cl.exe's intermediate code file
+    coff_object,               ///< COFF object file
+    coff_import_library,       ///< COFF import library
+    pecoff_executable,         ///< PECOFF executable file
+    windows_resource,          ///< Windows compiled resource file (.res)
+    xcoff_object_32,           ///< 32-bit XCOFF object file
+    xcoff_object_64,           ///< 64-bit XCOFF object file
+    wasm_object,               ///< WebAssembly Object file
+    pdb,                       ///< Windows PDB debug info file
+    tapi_file,                 ///< Text-based Dynamic Library Stub file
+    cuda_fatbinary,            ///< CUDA Fatbinary object file
+    offload_binary,            ///< LLVM offload object file
+    dxcontainer_object,        ///< DirectX container file
+    offload_bundle,            ///< Clang offload bundle file
+    offload_bundle_compressed, ///< Compressed clang offload bundle file
   };
 
   bool is_object() const { return V != unknown; }
diff --git a/llvm/lib/BinaryFormat/Magic.cpp b/llvm/lib/BinaryFormat/Magic.cpp
index 025334f9f3f42b3..420224df57df418 100644
--- a/llvm/lib/BinaryFormat/Magic.cpp
+++ b/llvm/lib/BinaryFormat/Magic.cpp
@@ -87,6 +87,10 @@ file_magic llvm::identify_magic(StringRef Magic) {
     if (startswith(Magic, "BC\xC0\xDE"))
       return file_magic::bitcode;
     break;
+  case 'C':
+    if (startswith(Magic, "CCOB"))
+      return file_magic::offload_bundle_compressed;
+    break;
   case '!':
     if (startswith(Magic, "!<arch>\n") || startswith(Magic, "!<thin>\n"))
       return file_magic::archive;
@@ -251,6 +255,13 @@ file_magic llvm::identify_magic(StringRef Magic) {
       return file_magic::coff_object;
     break;
 
+  case '_': {
+    const char OBMagic[] = "__CLANG_OFFLOAD_BUNDLE__";
+    if (Magic.size() >= sizeof(OBMagic) && startswith(Magic, OBMagic))
+      return file_magic::offload_bundle;
+    break;
+  }
+
   default:
     break;
   }
diff --git a/llvm/lib/Object/Binary.cpp b/llvm/lib/Object/Binary.cpp
index d18aed8b3b8c8d8..0ee9f7fac448a26 100644
--- a/llvm/lib/Object/Binary.cpp
+++ b/llvm/lib/Object/Binary.cpp
@@ -87,6 +87,8 @@ Expected<std::unique_ptr<Binary>> object::createBinary(MemoryBufferRef Buffer,
   case file_magic::cuda_fatbinary:
   case file_magic::coff_cl_gl_object:
   case file_magic::dxcontainer_object:
+  case file_magic::offload_bundle:
+  case file_magic::offload_bundle_compressed:
     // Unrecognized object file format.
     return errorCodeToError(object_error::invalid_file_type);
   case file_magic::offload_binary:



More information about the cfe-commits mailing list