[clang] [llvm] [HIP] add --offload-compression-level= option (PR #83605)

Yaxun Liu via cfe-commits cfe-commits at lists.llvm.org
Fri Mar 8 07:55:39 PST 2024


https://github.com/yxsamliu updated https://github.com/llvm/llvm-project/pull/83605

>From 16796bc8eb3b32436903db4b689d4cb9cfc348d8 Mon Sep 17 00:00:00 2001
From: "Yaxun (Sam) Liu" <yaxun.liu at amd.com>
Date: Fri, 1 Mar 2024 13:16:45 -0500
Subject: [PATCH] [HIP] add --offload-compression-level= option

Added --offload-compression-level= option to clang and -compression-level=
option to clang-offload-bundler for controlling compression level.

Added support of long distance matching (LDM) for llvm::zstd which is off
by default. Enable it for clang-offload-bundler by default since it
improves compression rate in general.

Change default compression level to 3 for zstd for clang-offload-bundler
since it works well for bundle entry size from 1KB to 32MB, which should
cover most of the clang-offload-bundler usage. Users can still specify
compression level by -compression-level= option if necessary.
---
 clang/include/clang/Driver/OffloadBundler.h   |   6 +-
 clang/include/clang/Driver/Options.td         |   4 +
 clang/lib/Driver/OffloadBundler.cpp           | 113 ++++++++++++++----
 clang/lib/Driver/ToolChains/Clang.cpp         |  20 +++-
 clang/lib/Driver/ToolChains/Clang.h           |   2 +
 clang/lib/Driver/ToolChains/HIPUtility.cpp    |   7 +-
 .../test/Driver/clang-offload-bundler-zlib.c  |  21 +++-
 .../test/Driver/clang-offload-bundler-zstd.c  |  19 ++-
 .../test/Driver/hip-offload-compress-zlib.hip |   7 +-
 .../test/Driver/hip-offload-compress-zstd.hip |   5 +-
 .../ClangOffloadBundler.cpp                   |   5 +
 llvm/include/llvm/Support/Compression.h       |   5 +-
 llvm/lib/Support/Compression.cpp              |  40 +++++--
 13 files changed, 197 insertions(+), 57 deletions(-)

diff --git a/clang/include/clang/Driver/OffloadBundler.h b/clang/include/clang/Driver/OffloadBundler.h
index 84349abe185fa4..65d33bfbd2825f 100644
--- a/clang/include/clang/Driver/OffloadBundler.h
+++ b/clang/include/clang/Driver/OffloadBundler.h
@@ -17,6 +17,7 @@
 #ifndef LLVM_CLANG_DRIVER_OFFLOADBUNDLER_H
 #define LLVM_CLANG_DRIVER_OFFLOADBUNDLER_H
 
+#include "llvm/Support/Compression.h"
 #include "llvm/Support/Error.h"
 #include "llvm/TargetParser/Triple.h"
 #include <llvm/Support/MemoryBuffer.h>
@@ -36,6 +37,8 @@ class OffloadBundlerConfig {
   bool HipOpenmpCompatible = false;
   bool Compress = false;
   bool Verbose = false;
+  llvm::compression::Format CompressionFormat;
+  int CompressionLevel;
 
   unsigned BundleAlignment = 1;
   unsigned HostInputIndex = ~0u;
@@ -116,7 +119,8 @@ class CompressedOffloadBundle {
 
 public:
   static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
-  compress(const llvm::MemoryBuffer &Input, bool Verbose = false);
+  compress(llvm::compression::Params P, const llvm::MemoryBuffer &Input,
+           bool Verbose = false);
   static llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
   decompress(const llvm::MemoryBuffer &Input, bool Verbose = false);
 };
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 5b3d366dbcf91b..2d26a7983f397b 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -1264,6 +1264,10 @@ def fno_gpu_sanitize : Flag<["-"], "fno-gpu-sanitize">, Group<f_Group>;
 def offload_compress : Flag<["--"], "offload-compress">,
   HelpText<"Compress offload device binaries (HIP only)">;
 def no_offload_compress : Flag<["--"], "no-offload-compress">;
+
+def offload_compression_level_EQ : Joined<["--"], "offload-compression-level=">,
+  Flags<[HelpHidden]>,
+  HelpText<"Compression level for offload device binaries (HIP only)">;
 }
 
 // CUDA options
diff --git a/clang/lib/Driver/OffloadBundler.cpp b/clang/lib/Driver/OffloadBundler.cpp
index f9eadfaec88dec..a077a9648b0e9b 100644
--- a/clang/lib/Driver/OffloadBundler.cpp
+++ b/clang/lib/Driver/OffloadBundler.cpp
@@ -924,6 +924,17 @@ CreateFileHandler(MemoryBuffer &FirstInput,
 }
 
 OffloadBundlerConfig::OffloadBundlerConfig() {
+  if (llvm::compression::zstd::isAvailable()) {
+    CompressionFormat = llvm::compression::Format::Zstd;
+    // Compression level 3 is usually sufficient for zstd since long distance
+    // matching is enabled.
+    CompressionLevel = 3;
+  } else if (llvm::compression::zlib::isAvailable()) {
+    CompressionFormat = llvm::compression::Format::Zlib;
+    // Use default level for zlib since higher level does not have significant
+    // improvement.
+    CompressionLevel = llvm::compression::zlib::DefaultCompression;
+  }
   auto IgnoreEnvVarOpt =
       llvm::sys::Process::GetEnv("OFFLOAD_BUNDLER_IGNORE_ENV_VAR");
   if (IgnoreEnvVarOpt.has_value() && IgnoreEnvVarOpt.value() == "1")
@@ -937,11 +948,41 @@ OffloadBundlerConfig::OffloadBundlerConfig() {
       llvm::sys::Process::GetEnv("OFFLOAD_BUNDLER_COMPRESS");
   if (CompressEnvVarOpt.has_value())
     Compress = CompressEnvVarOpt.value() == "1";
+
+  auto CompressionLevelEnvVarOpt =
+      llvm::sys::Process::GetEnv("OFFLOAD_BUNDLER_COMPRESSION_LEVEL");
+  if (CompressionLevelEnvVarOpt.has_value()) {
+    llvm::StringRef CompressionLevelStr = CompressionLevelEnvVarOpt.value();
+    int Level;
+    if (!CompressionLevelStr.getAsInteger(10, Level))
+      CompressionLevel = Level;
+    else
+      llvm::errs()
+          << "Warning: Invalid value for OFFLOAD_BUNDLER_COMPRESSION_LEVEL: "
+          << CompressionLevelStr.str() << ". Ignoring it.\n";
+  }
+}
+
+// Utility function to format numbers with commas
+static std::string formatWithCommas(unsigned long long Value) {
+  std::string Num = std::to_string(Value);
+  int InsertPosition = Num.length() - 3;
+  while (InsertPosition > 0) {
+    Num.insert(InsertPosition, ",");
+    InsertPosition -= 3;
+  }
+  return Num;
 }
 
 llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
-CompressedOffloadBundle::compress(const llvm::MemoryBuffer &Input,
+CompressedOffloadBundle::compress(llvm::compression::Params P,
+                                  const llvm::MemoryBuffer &Input,
                                   bool Verbose) {
+  if (!llvm::compression::zstd::isAvailable() &&
+      !llvm::compression::zlib::isAvailable())
+    return createStringError(llvm::inconvertibleErrorCode(),
+                             "Compression not supported");
+
   llvm::Timer HashTimer("Hash Calculation Timer", "Hash calculation time",
                         ClangOffloadBundlerTimerGroup);
   if (Verbose)
@@ -959,25 +1000,15 @@ CompressedOffloadBundle::compress(const llvm::MemoryBuffer &Input,
       reinterpret_cast<const uint8_t *>(Input.getBuffer().data()),
       Input.getBuffer().size());
 
-  llvm::compression::Format CompressionFormat;
-
-  if (llvm::compression::zstd::isAvailable())
-    CompressionFormat = llvm::compression::Format::Zstd;
-  else if (llvm::compression::zlib::isAvailable())
-    CompressionFormat = llvm::compression::Format::Zlib;
-  else
-    return createStringError(llvm::inconvertibleErrorCode(),
-                             "Compression not supported");
-
   llvm::Timer CompressTimer("Compression Timer", "Compression time",
                             ClangOffloadBundlerTimerGroup);
   if (Verbose)
     CompressTimer.startTimer();
-  llvm::compression::compress(CompressionFormat, BufferUint8, CompressedBuffer);
+  llvm::compression::compress(P, BufferUint8, CompressedBuffer);
   if (Verbose)
     CompressTimer.stopTimer();
 
-  uint16_t CompressionMethod = static_cast<uint16_t>(CompressionFormat);
+  uint16_t CompressionMethod = static_cast<uint16_t>(P.format);
   uint32_t UncompressedSize = Input.getBuffer().size();
 
   SmallVector<char, 0> FinalBuffer;
@@ -995,17 +1026,31 @@ CompressedOffloadBundle::compress(const llvm::MemoryBuffer &Input,
 
   if (Verbose) {
     auto MethodUsed =
-        CompressionFormat == llvm::compression::Format::Zstd ? "zstd" : "zlib";
+        P.format == llvm::compression::Format::Zstd ? "zstd" : "zlib";
+    double CompressionRate =
+        static_cast<double>(UncompressedSize) / CompressedBuffer.size();
+    double CompressionTimeSeconds =
+        CompressTimer.getTotalTime()
+            .getWallTime();
+    double CompressionSpeedMBs =
+        (UncompressedSize / (1024.0 * 1024.0)) / CompressionTimeSeconds;
+
     llvm::errs() << "Compressed bundle format version: " << Version << "\n"
                  << "Compression method used: " << MethodUsed << "\n"
-                 << "Binary size before compression: " << UncompressedSize
-                 << " bytes\n"
-                 << "Binary size after compression: " << CompressedBuffer.size()
-                 << " bytes\n"
+                 << "Compression level: " << P.level << "\n"
+                 << "Binary size before compression: "
+                 << formatWithCommas(UncompressedSize) << " bytes\n"
+                 << "Binary size after compression: "
+                 << formatWithCommas(CompressedBuffer.size()) << " bytes\n"
+                 << "Compression rate: "
+                 << llvm::format("%.2lf", CompressionRate) << "\n"
+                 << "Compression ratio: "
+                 << llvm::format("%.2lf%%", 100.0 / CompressionRate) << "\n"
+                 << "Compression speed: "
+                 << llvm::format("%.2lf MB/s", CompressionSpeedMBs) << "\n"
                  << "Truncated MD5 hash: "
                  << llvm::format_hex(TruncatedHash, 16) << "\n";
   }
-
   return llvm::MemoryBuffer::getMemBufferCopy(
       llvm::StringRef(FinalBuffer.data(), FinalBuffer.size()));
 }
@@ -1070,7 +1115,10 @@ CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input,
   if (Verbose) {
     DecompressTimer.stopTimer();
 
-    // Recalculate MD5 hash
+    double DecompressionTimeSeconds =
+        DecompressTimer.getTotalTime().getWallTime();
+
+    // Recalculate MD5 hash for integrity check
     llvm::Timer HashRecalcTimer("Hash Recalculation Timer",
                                 "Hash recalculation time",
                                 ClangOffloadBundlerTimerGroup);
@@ -1084,16 +1132,27 @@ CompressedOffloadBundle::decompress(const llvm::MemoryBuffer &Input,
     HashRecalcTimer.stopTimer();
     bool HashMatch = (StoredHash == RecalculatedHash);
 
+    double CompressionRate =
+        static_cast<double>(UncompressedSize) / CompressedData.size();
+    double DecompressionSpeedMBs =
+        (UncompressedSize / (1024.0 * 1024.0)) / DecompressionTimeSeconds;
+
     llvm::errs() << "Compressed bundle format version: " << ThisVersion << "\n"
                  << "Decompression method: "
                  << (CompressionFormat == llvm::compression::Format::Zlib
                          ? "zlib"
                          : "zstd")
                  << "\n"
-                 << "Size before decompression: " << CompressedData.size()
-                 << " bytes\n"
-                 << "Size after decompression: " << UncompressedSize
-                 << " bytes\n"
+                 << "Size before decompression: "
+                 << formatWithCommas(CompressedData.size()) << " bytes\n"
+                 << "Size after decompression: "
+                 << formatWithCommas(UncompressedSize) << " bytes\n"
+                 << "Compression rate: "
+                 << llvm::format("%.2lf", CompressionRate) << "\n"
+                 << "Compression ratio: "
+                 << llvm::format("%.2lf%%", 100.0 / CompressionRate) << "\n"
+                 << "Decompression speed: "
+                 << llvm::format("%.2lf MB/s", DecompressionSpeedMBs) << "\n"
                  << "Stored hash: " << llvm::format_hex(StoredHash, 16) << "\n"
                  << "Recalculated hash: "
                  << llvm::format_hex(RecalculatedHash, 16) << "\n"
@@ -1287,8 +1346,10 @@ Error OffloadBundler::BundleFiles() {
     std::unique_ptr<llvm::MemoryBuffer> BufferMemory =
         llvm::MemoryBuffer::getMemBufferCopy(
             llvm::StringRef(Buffer.data(), Buffer.size()));
-    auto CompressionResult =
-        CompressedOffloadBundle::compress(*BufferMemory, BundlerConfig.Verbose);
+    auto CompressionResult = CompressedOffloadBundle::compress(
+        {BundlerConfig.CompressionFormat, BundlerConfig.CompressionLevel,
+         /*zstdEnableLdm=*/true},
+        *BufferMemory, BundlerConfig.Verbose);
     if (auto Error = CompressionResult.takeError())
       return Error;
 
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index fa17f6295d6ea7..003a7d7f0a07ad 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -8524,6 +8524,20 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA,
 }
 
 // Begin OffloadBundler
+void OffloadBundler::addCommonArgs(const llvm::opt::ArgList &TCArgs,
+                                   llvm::opt::ArgStringList &CmdArgs) {
+  if (TCArgs.hasFlag(options::OPT_offload_compress,
+                     options::OPT_no_offload_compress, false))
+    CmdArgs.push_back("-compress");
+  if (TCArgs.hasArg(options::OPT_v))
+    CmdArgs.push_back("-verbose");
+  if (auto *Arg =
+          TCArgs.getLastArg(options::OPT_offload_compression_level_EQ)) {
+    std::string CompressionLevelArg =
+        std::string("-compression-level=") + Arg->getValue();
+    CmdArgs.push_back(TCArgs.MakeArgString(CompressionLevelArg));
+  }
+}
 
 void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA,
                                   const InputInfo &Output,
@@ -8622,11 +8636,7 @@ void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA,
     }
     CmdArgs.push_back(TCArgs.MakeArgString(UB));
   }
-  if (TCArgs.hasFlag(options::OPT_offload_compress,
-                     options::OPT_no_offload_compress, false))
-    CmdArgs.push_back("-compress");
-  if (TCArgs.hasArg(options::OPT_v))
-    CmdArgs.push_back("-verbose");
+  addCommonArgs(TCArgs, CmdArgs);
   // All the inputs are encoded as commands.
   C.addCommand(std::make_unique<Command>(
       JA, *this, ResponseFileSupport::None(),
diff --git a/clang/lib/Driver/ToolChains/Clang.h b/clang/lib/Driver/ToolChains/Clang.h
index 0f503c4bd1c4fe..9930a773150396 100644
--- a/clang/lib/Driver/ToolChains/Clang.h
+++ b/clang/lib/Driver/ToolChains/Clang.h
@@ -157,6 +157,8 @@ class LLVM_LIBRARY_VISIBILITY OffloadBundler final : public Tool {
                                    const InputInfoList &Inputs,
                                    const llvm::opt::ArgList &TCArgs,
                                    const char *LinkingOutput) const override;
+  static void addCommonArgs(const llvm::opt::ArgList &TCArgs,
+                            llvm::opt::ArgStringList &CmdArgs);
 };
 
 /// Offload binary tool.
diff --git a/clang/lib/Driver/ToolChains/HIPUtility.cpp b/clang/lib/Driver/ToolChains/HIPUtility.cpp
index fcecf2e1313bb5..3cc733d30caec0 100644
--- a/clang/lib/Driver/ToolChains/HIPUtility.cpp
+++ b/clang/lib/Driver/ToolChains/HIPUtility.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "HIPUtility.h"
+#include "Clang.h"
 #include "CommonArgs.h"
 #include "clang/Driver/Compilation.h"
 #include "clang/Driver/Options.h"
@@ -258,11 +259,7 @@ void HIP::constructHIPFatbinCommand(Compilation &C, const JobAction &JA,
       Args.MakeArgString(std::string("-output=").append(Output));
   BundlerArgs.push_back(BundlerOutputArg);
 
-  if (Args.hasFlag(options::OPT_offload_compress,
-                   options::OPT_no_offload_compress, false))
-    BundlerArgs.push_back("-compress");
-  if (Args.hasArg(options::OPT_v))
-    BundlerArgs.push_back("-verbose");
+  OffloadBundler::addCommonArgs(Args, BundlerArgs);
 
   const char *Bundler = Args.MakeArgString(
       T.getToolChain().GetProgramPath("clang-offload-bundler"));
diff --git a/clang/test/Driver/clang-offload-bundler-zlib.c b/clang/test/Driver/clang-offload-bundler-zlib.c
index a57ee6da9a86a6..15b60341a8dbde 100644
--- a/clang/test/Driver/clang-offload-bundler-zlib.c
+++ b/clang/test/Driver/clang-offload-bundler-zlib.c
@@ -1,4 +1,4 @@
-// REQUIRES: zlib
+// REQUIRES: zlib && !zstd
 // REQUIRES: x86-registered-target
 // UNSUPPORTED: target={{.*}}-darwin{{.*}}, target={{.*}}-aix{{.*}}
 
@@ -34,13 +34,28 @@
 // RUN: diff %t.tgt2 %t.res.tgt2
 
 //
-// COMPRESS: Compression method used:
-// DECOMPRESS: Decompression method:
+// COMPRESS: Compression method used: zlib
+// COMPRESS: Compression level: 6
+// DECOMPRESS: Decompression method: zlib
+// DECOMPRESS: Hashes match: Yes
 // NOHOST-NOT: host-
 // NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx900
 // NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx906
 //
 
+// Check -compression-level= option
+
+// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
+// RUN:   -input=%t.tgt1 -input=%t.tgt2 -output=%t.hip.bundle.bc -compress -verbose -compression-level=9 2>&1 | \
+// RUN:   FileCheck -check-prefix=LEVEL %s
+// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
+// RUN:   -output=%t.res.tgt1 -output=%t.res.tgt2 -input=%t.hip.bundle.bc -unbundle
+// RUN: diff %t.tgt1 %t.res.tgt1
+// RUN: diff %t.tgt2 %t.res.tgt2
+//
+// LEVEL: Compression method used: zlib
+// LEVEL: Compression level: 9
+
 //
 // Check -bundle-align option.
 //
diff --git a/clang/test/Driver/clang-offload-bundler-zstd.c b/clang/test/Driver/clang-offload-bundler-zstd.c
index 3b577d4d166a3f..c1eb3f6e7ebd07 100644
--- a/clang/test/Driver/clang-offload-bundler-zstd.c
+++ b/clang/test/Driver/clang-offload-bundler-zstd.c
@@ -31,13 +31,28 @@
 // RUN: diff %t.tgt1 %t.res.tgt1
 // RUN: diff %t.tgt2 %t.res.tgt2
 //
-// COMPRESS: Compression method used
-// DECOMPRESS: Decompression method
+// COMPRESS: Compression method used: zstd
+// COMPRESS: Compression level: 20
+// DECOMPRESS: Decompression method: zstd
+// DECOMPRESS: Hashes match: Yes
 // NOHOST-NOT: host-
 // NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx900
 // NOHOST-DAG: hip-amdgcn-amd-amdhsa--gfx906
 //
 
+// Check -compression-level= option
+
+// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
+// RUN:   -input=%t.tgt1 -input=%t.tgt2 -output=%t.hip.bundle.bc -compress -verbose -compression-level=9 2>&1 | \
+// RUN:   FileCheck -check-prefix=LEVEL %s
+// RUN: clang-offload-bundler -type=bc -targets=hip-amdgcn-amd-amdhsa--gfx900,hip-amdgcn-amd-amdhsa--gfx906 \
+// RUN:   -output=%t.res.tgt1 -output=%t.res.tgt2 -input=%t.hip.bundle.bc -unbundle
+// RUN: diff %t.tgt1 %t.res.tgt1
+// RUN: diff %t.tgt2 %t.res.tgt2
+//
+// LEVEL: Compression method used: zstd
+// LEVEL: Compression level: 9
+
 //
 // Check -bundle-align option.
 //
diff --git a/clang/test/Driver/hip-offload-compress-zlib.hip b/clang/test/Driver/hip-offload-compress-zlib.hip
index 7a269c566bb93c..c1566c5f192c2d 100644
--- a/clang/test/Driver/hip-offload-compress-zlib.hip
+++ b/clang/test/Driver/hip-offload-compress-zlib.hip
@@ -1,4 +1,4 @@
-// REQUIRES: zlib
+// REQUIRES: zlib && !zstd
 // REQUIRES: x86-registered-target
 // REQUIRES: amdgpu-registered-target
 
@@ -9,13 +9,14 @@
 // RUN:   -x hip --offload-arch=gfx1100 --offload-arch=gfx1101 \
 // RUN:   --no-offload-new-driver -fgpu-rdc -nogpuinc -nogpulib \
 // RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
-// RUN:   --offload-compress --offload-device-only --gpu-bundle-output \
+// RUN:   --offload-compress --offload-compression-level=9 \
+// RUN:   --offload-device-only --gpu-bundle-output \
 // RUN:   -o %t.bc \
 // RUN: 2>&1 | FileCheck %s
 
 // CHECK: clang-offload-bundler{{.*}} -type=bc
 // CHECK-SAME: -targets={{.*}}hip-amdgcn-amd-amdhsa-gfx1100,hip-amdgcn-amd-amdhsa-gfx1101
-// CHECK-SAME: -compress -verbose
+// CHECK-SAME: -compress -verbose -compression-level=9
 // CHECK: Compressed bundle format
 
 // Test uncompress of bundled bitcode.
diff --git a/clang/test/Driver/hip-offload-compress-zstd.hip b/clang/test/Driver/hip-offload-compress-zstd.hip
index fa7fb3b6d5b5cb..ede7d59f113c86 100644
--- a/clang/test/Driver/hip-offload-compress-zstd.hip
+++ b/clang/test/Driver/hip-offload-compress-zstd.hip
@@ -9,13 +9,14 @@
 // RUN:   -x hip --offload-arch=gfx1100 --offload-arch=gfx1101 \
 // RUN:   --no-offload-new-driver -fgpu-rdc -nogpuinc -nogpulib \
 // RUN:   %S/Inputs/hip_multiple_inputs/a.cu \
-// RUN:   --offload-compress --offload-device-only --gpu-bundle-output \
+// RUN:   --offload-compress --offload-compression-level=9 \
+// RUN:   --offload-device-only --gpu-bundle-output \
 // RUN:   -o %t.bc \
 // RUN: 2>&1 | FileCheck %s
 
 // CHECK: clang-offload-bundler{{.*}} -type=bc
 // CHECK-SAME: -targets={{.*}}hip-amdgcn-amd-amdhsa-gfx1100,hip-amdgcn-amd-amdhsa-gfx1101
-// CHECK-SAME: -compress -verbose
+// CHECK-SAME: -compress -verbose -compression-level=9
 // CHECK: Compressed bundle format
 
 // Test uncompress of bundled bitcode.
diff --git a/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp b/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp
index ec67e24552e9c9..e336417586f70b 100644
--- a/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp
+++ b/clang/tools/clang-offload-bundler/ClangOffloadBundler.cpp
@@ -145,6 +145,9 @@ int main(int argc, const char **argv) {
                          cl::init(false), cl::cat(ClangOffloadBundlerCategory));
   cl::opt<bool> Verbose("verbose", cl::desc("Print debug information.\n"),
                         cl::init(false), cl::cat(ClangOffloadBundlerCategory));
+  cl::opt<int> CompressionLevel(
+      "compression-level", cl::desc("Specify the compression level (integer)"),
+      cl::value_desc("n"), cl::Optional, cl::cat(ClangOffloadBundlerCategory));
 
   // Process commandline options and report errors
   sys::PrintStackTraceOnErrorSignal(argv[0]);
@@ -178,6 +181,8 @@ int main(int argc, const char **argv) {
     BundlerConfig.Compress = Compress;
   if (Verbose.getNumOccurrences() > 0)
     BundlerConfig.Verbose = Verbose;
+  if (CompressionLevel.getNumOccurrences() > 0)
+    BundlerConfig.CompressionLevel = CompressionLevel;
 
   BundlerConfig.TargetNames = TargetNames;
   BundlerConfig.InputFileNames = InputFileNames;
diff --git a/llvm/include/llvm/Support/Compression.h b/llvm/include/llvm/Support/Compression.h
index c3ba3274d6ed87..2a8da9e96d356f 100644
--- a/llvm/include/llvm/Support/Compression.h
+++ b/llvm/include/llvm/Support/Compression.h
@@ -63,7 +63,7 @@ bool isAvailable();
 
 void compress(ArrayRef<uint8_t> Input,
               SmallVectorImpl<uint8_t> &CompressedBuffer,
-              int Level = DefaultCompression);
+              int Level = DefaultCompression, bool EnableLdm = false);
 
 Error decompress(ArrayRef<uint8_t> Input, uint8_t *Output,
                  size_t &UncompressedSize);
@@ -94,10 +94,13 @@ struct Params {
   constexpr Params(Format F)
       : format(F), level(F == Format::Zlib ? zlib::DefaultCompression
                                            : zstd::DefaultCompression) {}
+  constexpr Params(Format F, int L, bool Ldm = false)
+      : format(F), level(L), zstdEnableLdm(Ldm) {}
   Params(DebugCompressionType Type) : Params(formatFor(Type)) {}
 
   Format format;
   int level;
+  bool zstdEnableLdm = false; // Enable zstd long distance matching
   // This may support multi-threading for zstd in the future. Note that
   // different threads may produce different output, so be careful if certain
   // output determinism is desired.
diff --git a/llvm/lib/Support/Compression.cpp b/llvm/lib/Support/Compression.cpp
index 8e57ba798f5207..1294366e761807 100644
--- a/llvm/lib/Support/Compression.cpp
+++ b/llvm/lib/Support/Compression.cpp
@@ -50,7 +50,7 @@ void compression::compress(Params P, ArrayRef<uint8_t> Input,
     zlib::compress(Input, Output, P.level);
     break;
   case compression::Format::Zstd:
-    zstd::compress(Input, Output, P.level);
+    zstd::compress(Input, Output, P.level, P.zstdEnableLdm);
     break;
   }
 }
@@ -163,17 +163,39 @@ Error zlib::decompress(ArrayRef<uint8_t> Input,
 
 bool zstd::isAvailable() { return true; }
 
+#include <zstd.h> // Ensure ZSTD library is included
+
 void zstd::compress(ArrayRef<uint8_t> Input,
-                    SmallVectorImpl<uint8_t> &CompressedBuffer, int Level) {
-  unsigned long CompressedBufferSize = ::ZSTD_compressBound(Input.size());
+                    SmallVectorImpl<uint8_t> &CompressedBuffer, int Level,
+                    bool EnableLdm) {
+  ZSTD_CCtx *Cctx = ZSTD_createCCtx();
+  if (!Cctx)
+    report_bad_alloc_error("Failed to create ZSTD_CCtx");
+
+  if (ZSTD_isError(ZSTD_CCtx_setParameter(
+          Cctx, ZSTD_c_enableLongDistanceMatching, EnableLdm ? 1 : 0))) {
+    ZSTD_freeCCtx(Cctx);
+    report_bad_alloc_error("Failed to set ZSTD_c_enableLongDistanceMatching");
+  }
+
+  if (ZSTD_isError(
+          ZSTD_CCtx_setParameter(Cctx, ZSTD_c_compressionLevel, Level))) {
+    ZSTD_freeCCtx(Cctx);
+    report_bad_alloc_error("Failed to set ZSTD_c_compressionLevel");
+  }
+
+  unsigned long CompressedBufferSize = ZSTD_compressBound(Input.size());
   CompressedBuffer.resize_for_overwrite(CompressedBufferSize);
-  unsigned long CompressedSize =
-      ::ZSTD_compress((char *)CompressedBuffer.data(), CompressedBufferSize,
-                      (const char *)Input.data(), Input.size(), Level);
+
+  size_t const CompressedSize =
+      ZSTD_compress2(Cctx, CompressedBuffer.data(), CompressedBufferSize,
+                     Input.data(), Input.size());
+
+  ZSTD_freeCCtx(Cctx);
+
   if (ZSTD_isError(CompressedSize))
-    report_bad_alloc_error("Allocation failed");
-  // Tell MemorySanitizer that zstd output buffer is fully initialized.
-  // This avoids a false report when running LLVM with uninstrumented ZLib.
+    report_bad_alloc_error("Compression failed");
+
   __msan_unpoison(CompressedBuffer.data(), CompressedSize);
   if (CompressedSize < CompressedBuffer.size())
     CompressedBuffer.truncate(CompressedSize);



More information about the cfe-commits mailing list