[llvm] [HIP] Support compressing bundle by LZMA (PR #83297)

Yaxun Liu via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 28 09:34:13 PST 2024


https://github.com/yxsamliu updated https://github.com/llvm/llvm-project/pull/83297

>From 86637f5ceb3f941ec415431a5ab5abc085ef77bf Mon Sep 17 00:00:00 2001
From: "Yaxun (Sam) Liu" <yaxun.liu at amd.com>
Date: Wed, 28 Feb 2024 12:30:51 -0500
Subject: [PATCH] Add LZMA for compression/decompression

This patch adds liblzma as an alternative compression/decompression
method to zlib/zstd.
---
 llvm/CMakeLists.txt                          |  2 +
 llvm/cmake/config-ix.cmake                   | 25 +++++
 llvm/cmake/modules/LLVMConfig.cmake.in       |  5 +
 llvm/docs/CMake.rst                          |  5 +
 llvm/include/llvm/Config/llvm-config.h.cmake |  3 +
 llvm/include/llvm/Support/Compression.h      | 26 +++++-
 llvm/lib/Support/CMakeLists.txt              | 17 ++++
 llvm/lib/Support/Compression.cpp             | 98 ++++++++++++++++++++
 llvm/test/CMakeLists.txt                     |  1 +
 llvm/test/lit.site.cfg.py.in                 |  1 +
 llvm/utils/lit/lit/llvm/config.py            |  3 +
 11 files changed, 184 insertions(+), 2 deletions(-)

diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index f5f7d3f3253fd3..be500d51d22a7a 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -552,6 +552,8 @@ set(LLVM_ENABLE_ZLIB "ON" CACHE STRING "Use zlib for compression/decompression i
 
 set(LLVM_ENABLE_ZSTD "ON" CACHE STRING "Use zstd for compression/decompression if available. Can be ON, OFF, or FORCE_ON")
 
+set(LLVM_ENABLE_LZMA "ON" CACHE STRING "Use lzma for compression/decompression if available. Can be ON, OFF, or FORCE_ON")
+
 set(LLVM_USE_STATIC_ZSTD FALSE CACHE BOOL "Use static version of zstd. Can be TRUE, FALSE")
 
 set(LLVM_ENABLE_CURL "OFF" CACHE STRING "Use libcurl for the HTTP client if available. Can be ON, OFF, or FORCE_ON")
diff --git a/llvm/cmake/config-ix.cmake b/llvm/cmake/config-ix.cmake
index bf1b110245bb2f..4ac1e58cf565b1 100644
--- a/llvm/cmake/config-ix.cmake
+++ b/llvm/cmake/config-ix.cmake
@@ -162,6 +162,31 @@ if(LLVM_ENABLE_ZSTD)
 endif()
 set(LLVM_ENABLE_ZSTD ${zstd_FOUND})
 
+set(LZMA_FOUND 0)
+if(LLVM_ENABLE_LZMA)
+  if(LLVM_ENABLE_LZMA STREQUAL FORCE_ON)
+    find_package(LibLZMA REQUIRED)
+    if(NOT LIBLZMA_FOUND)
+      message(FATAL_ERROR "Failed to configure lzma, but LLVM_ENABLE_LZMA is FORCE_ON")
+    endif()
+  else()
+    find_package(LibLZMA QUIET)
+  endif()
+  if(LIBLZMA_FOUND)
+    # Check if lzma we found is usable; for example, we may have found a 32-bit
+    # library on a 64-bit system which would result in a link-time failure.
+    cmake_push_check_state()
+    list(APPEND CMAKE_REQUIRED_INCLUDES ${LIBLZMA_INCLUDE_DIRS})
+    list(APPEND CMAKE_REQUIRED_LIBRARIES ${LIBLZMA_LIBRARIES})
+    check_symbol_exists(lzma_lzma_preset lzma.h HAVE_LZMA)
+    cmake_pop_check_state()
+    if(LLVM_ENABLE_LZMA STREQUAL FORCE_ON AND NOT HAVE_LZMA)
+      message(FATAL_ERROR "Failed to configure lzma")
+    endif()
+  endif()
+endif()
+set(LLVM_ENABLE_LZMA ${LIBLZMA_FOUND})
+
 if(LLVM_ENABLE_LIBXML2)
   if(LLVM_ENABLE_LIBXML2 STREQUAL FORCE_ON)
     find_package(LibXml2 REQUIRED)
diff --git a/llvm/cmake/modules/LLVMConfig.cmake.in b/llvm/cmake/modules/LLVMConfig.cmake.in
index 770a9caea322e6..660e056f113859 100644
--- a/llvm/cmake/modules/LLVMConfig.cmake.in
+++ b/llvm/cmake/modules/LLVMConfig.cmake.in
@@ -80,6 +80,11 @@ if(LLVM_ENABLE_ZSTD)
   find_package(zstd)
 endif()
 
+set(LLVM_ENABLE_LZMA @LLVM_ENABLE_LZMA@)
+if(LLVM_ENABLE_LZMA)
+  find_package(LibLZMA)
+endif()
+
 set(LLVM_ENABLE_LIBXML2 @LLVM_ENABLE_LIBXML2@)
 if(LLVM_ENABLE_LIBXML2)
   find_package(LibXml2)
diff --git a/llvm/docs/CMake.rst b/llvm/docs/CMake.rst
index abef4f8103140f..d7f86caa71202b 100644
--- a/llvm/docs/CMake.rst
+++ b/llvm/docs/CMake.rst
@@ -629,6 +629,11 @@ enabled sub-projects. Nearly all of these variable names begin with
   zstd. Allowed values are ``OFF``, ``ON`` (default, enable if zstd is found),
   and ``FORCE_ON`` (error if zstd is not found).
 
+**LLVM_ENABLE_LZMA**:STRING
+  Used to decide if LLVM tools should support compression/decompression with
+  lzma. Allowed values are ``OFF``, ``ON`` (default, enable if lzma is found),
+  and ``FORCE_ON`` (error if lzma is not found).
+
 **LLVM_EXPERIMENTAL_TARGETS_TO_BUILD**:STRING
   Semicolon-separated list of experimental targets to build and linked into
   llvm. This will build the experimental target without needing it to add to the
diff --git a/llvm/include/llvm/Config/llvm-config.h.cmake b/llvm/include/llvm/Config/llvm-config.h.cmake
index 6605ea60df99e1..47e53f8b4ee7bc 100644
--- a/llvm/include/llvm/Config/llvm-config.h.cmake
+++ b/llvm/include/llvm/Config/llvm-config.h.cmake
@@ -173,6 +173,9 @@
 /* Define if zstd compression is available */
 #cmakedefine01 LLVM_ENABLE_ZSTD
 
+/* Define if lzma compression is available */
+#cmakedefine01 LLVM_ENABLE_LZMA
+
 /* Define if LLVM is using tflite */
 #cmakedefine LLVM_HAVE_TFLITE
 
diff --git a/llvm/include/llvm/Support/Compression.h b/llvm/include/llvm/Support/Compression.h
index c3ba3274d6ed87..6dc7b162772d90 100644
--- a/llvm/include/llvm/Support/Compression.h
+++ b/llvm/include/llvm/Support/Compression.h
@@ -73,9 +73,31 @@ Error decompress(ArrayRef<uint8_t> Input, SmallVectorImpl<uint8_t> &Output,
 
 } // End of namespace zstd
 
+namespace lzma {
+
+constexpr int NoCompression = 0;
+constexpr int BestSpeedCompression = 1;
+constexpr int DefaultCompression = 6;
+constexpr int BestSizeCompression = 9;
+
+bool isAvailable();
+
+void compress(ArrayRef<uint8_t> Input,
+              SmallVectorImpl<uint8_t> &CompressedBuffer,
+              int Level = DefaultCompression);
+
+Error decompress(ArrayRef<uint8_t> Input, uint8_t *Output,
+                 size_t &UncompressedSize);
+
+Error decompress(ArrayRef<uint8_t> Input, SmallVectorImpl<uint8_t> &Output,
+                 size_t UncompressedSize);
+
+} // End of namespace lzma
+
 enum class Format {
   Zlib,
   Zstd,
+  Lzma,
 };
 
 inline Format formatFor(DebugCompressionType Type) {
@@ -104,8 +126,8 @@ struct Params {
 };
 
 // Return nullptr if LLVM was built with support (LLVM_ENABLE_ZLIB,
-// LLVM_ENABLE_ZSTD) for the specified compression format; otherwise
-// return a string literal describing the reason.
+// LLVM_ENABLE_ZSTD, LLVM_ENABLE_LZMA) for the specified compression format;
+// otherwise return a string literal describing the reason.
 const char *getReasonIfUnsupported(Format F);
 
 // Compress Input with the specified format P.Format. If Level is -1, use
diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt
index 1f2d82427552f7..1ed0dcd435ecf8 100644
--- a/llvm/lib/Support/CMakeLists.txt
+++ b/llvm/lib/Support/CMakeLists.txt
@@ -37,6 +37,10 @@ if(LLVM_ENABLE_ZSTD)
   list(APPEND imported_libs ${zstd_target})
 endif()
 
+if(LLVM_ENABLE_LZMA)
+  list(APPEND imported_libs LibLZMA::LibLZMA)
+endif()
+
 if( MSVC OR MINGW )
   # libuuid required for FOLDERID_Profile usage in lib/Support/Windows/Path.inc.
   # advapi32 required for CryptAcquireContextW in lib/Support/Windows/Path.inc.
@@ -323,6 +327,19 @@ if(LLVM_ENABLE_ZSTD)
   set(llvm_system_libs ${llvm_system_libs} "${zstd_library}")
 endif()
 
+if(LLVM_ENABLE_LZMA)
+  # CMAKE_BUILD_TYPE is only meaningful to single-configuration generators.
+  if(CMAKE_BUILD_TYPE)
+    string(TOUPPER ${CMAKE_BUILD_TYPE} build_type)
+    get_property(lzma_library TARGET LibLZMA::LibLZMA PROPERTY LOCATION_${build_type})
+  endif()
+  if(NOT lzma_library)
+    get_property(lzma_library TARGET LibLZMA::LibLZMA PROPERTY LOCATION)
+  endif()
+  get_library_name(${lzma_library} lzma_library)
+  set(llvm_system_libs ${llvm_system_libs} "${lzma_library}")
+endif()
+
 if(LLVM_ENABLE_TERMINFO)
   if(NOT terminfo_library)
     get_property(terminfo_library TARGET Terminfo::terminfo PROPERTY LOCATION)
diff --git a/llvm/lib/Support/Compression.cpp b/llvm/lib/Support/Compression.cpp
index 8e57ba798f5207..f88560e58e8135 100644
--- a/llvm/lib/Support/Compression.cpp
+++ b/llvm/lib/Support/Compression.cpp
@@ -23,6 +23,9 @@
 #if LLVM_ENABLE_ZSTD
 #include <zstd.h>
 #endif
+#if LLVM_ENABLE_LZMA
+#include <lzma.h>
+#endif
 
 using namespace llvm;
 using namespace llvm::compression;
@@ -39,6 +42,11 @@ const char *compression::getReasonIfUnsupported(compression::Format F) {
       return nullptr;
     return "LLVM was not built with LLVM_ENABLE_ZSTD or did not find zstd at "
            "build time";
+  case compression::Format::Lzma:
+    if (lzma::isAvailable())
+      return nullptr;
+    return "LLVM was not built with LLVM_ENABLE_LZMA or did not find lzma at "
+           "build time";
   }
   llvm_unreachable("");
 }
@@ -52,6 +60,9 @@ void compression::compress(Params P, ArrayRef<uint8_t> Input,
   case compression::Format::Zstd:
     zstd::compress(Input, Output, P.level);
     break;
+  case compression::Format::Lzma:
+    lzma::compress(Input, Output, P.level);
+    break;
   }
 }
 
@@ -62,6 +73,8 @@ Error compression::decompress(DebugCompressionType T, ArrayRef<uint8_t> Input,
     return zlib::decompress(Input, Output, UncompressedSize);
   case compression::Format::Zstd:
     return zstd::decompress(Input, Output, UncompressedSize);
+  case compression::Format::Lzma:
+    break;
   }
   llvm_unreachable("");
 }
@@ -74,6 +87,8 @@ Error compression::decompress(compression::Format F, ArrayRef<uint8_t> Input,
     return zlib::decompress(Input, Output, UncompressedSize);
   case compression::Format::Zstd:
     return zstd::decompress(Input, Output, UncompressedSize);
+  case compression::Format::Lzma:
+    return lzma::decompress(Input, Output, UncompressedSize);
   }
   llvm_unreachable("");
 }
@@ -218,3 +233,86 @@ Error zstd::decompress(ArrayRef<uint8_t> Input,
   llvm_unreachable("zstd::decompress is unavailable");
 }
 #endif
+#if LLVM_ENABLE_LZMA
+
+bool lzma::isAvailable() { return true; }
+
+void lzma::compress(ArrayRef<uint8_t> Input,
+                    SmallVectorImpl<uint8_t> &CompressedBuffer, int Level) {
+  lzma_options_lzma Opt;
+  if (lzma_lzma_preset(&Opt, Level) != LZMA_OK) {
+    report_bad_alloc_error("lzma::compress failed: preset error");
+    return;
+  }
+
+  lzma_filter Filters[] = {{LZMA_FILTER_LZMA2, &Opt},
+                           {LZMA_VLI_UNKNOWN, nullptr}};
+
+  size_t MaxOutSize = lzma_stream_buffer_bound(Input.size());
+  CompressedBuffer.resize_for_overwrite(MaxOutSize);
+
+  size_t OutPos = 0;
+  lzma_ret Ret = lzma_stream_buffer_encode(
+      Filters, LZMA_CHECK_CRC64, nullptr, Input.data(), Input.size(),
+      CompressedBuffer.data(), &OutPos, MaxOutSize);
+  if (Ret == LZMA_OK)
+    CompressedBuffer.resize(OutPos);
+  else
+    report_bad_alloc_error("lzma::compress failed");
+}
+
+Error lzma::decompress(ArrayRef<uint8_t> Input, uint8_t *Output,
+                       size_t &UncompressedSize) {
+  const size_t DecoderMemoryLimit = 100 * 1024 * 1024;
+  lzma_stream Strm = LZMA_STREAM_INIT;
+  size_t InPos = 0;
+  size_t OutPos = 0;
+
+  lzma_ret Ret = lzma_auto_decoder(&Strm, DecoderMemoryLimit, 0);
+  if (Ret != LZMA_OK)
+    return make_error<StringError>("Failed to initialize LZMA decoder",
+                                   inconvertibleErrorCode());
+
+  Strm.next_in = Input.data();
+  Strm.avail_in = Input.size();
+  Strm.next_out = Output;
+  Strm.avail_out = UncompressedSize;
+
+  Ret = lzma_code(&Strm, LZMA_FINISH);
+  if (Ret == LZMA_STREAM_END) {
+    UncompressedSize = Strm.total_out;
+    lzma_end(&Strm);
+    return Error::success();
+  } else {
+    lzma_end(&Strm);
+    return make_error<StringError>("LZMA decompression failed",
+                                   inconvertibleErrorCode());
+  }
+}
+
+Error lzma::decompress(ArrayRef<uint8_t> Input,
+                       SmallVectorImpl<uint8_t> &Output,
+                       size_t UncompressedSize) {
+  Output.resize_for_overwrite(UncompressedSize);
+  Error E = lzma::decompress(Input, Output.data(), UncompressedSize);
+  if (UncompressedSize < Output.size())
+    Output.truncate(UncompressedSize);
+  return E;
+}
+
+#else
+bool lzma::isAvailable() { return false; }
+void lzma::compress(ArrayRef<uint8_t> Input,
+                    SmallVectorImpl<uint8_t> &CompressedBuffer, int Level) {
+  llvm_unreachable("lzma::compress is unavailable");
+}
+Error lzma::decompress(ArrayRef<uint8_t> Input, uint8_t *Output,
+                       size_t &UncompressedSize) {
+  llvm_unreachable("lzma::decompress is unavailable");
+}
+Error lzma::decompress(ArrayRef<uint8_t> Input,
+                       SmallVectorImpl<uint8_t> &Output,
+                       size_t UncompressedSize) {
+  llvm_unreachable("lzma::decompress is unavailable");
+}
+#endif
diff --git a/llvm/test/CMakeLists.txt b/llvm/test/CMakeLists.txt
index 6127b76db06b7f..777a54784203a4 100644
--- a/llvm/test/CMakeLists.txt
+++ b/llvm/test/CMakeLists.txt
@@ -8,6 +8,7 @@ llvm_canonicalize_cmake_booleans(
   LLVM_ENABLE_HTTPLIB
   LLVM_ENABLE_ZLIB
   LLVM_ENABLE_ZSTD
+  LLVM_ENABLE_LZMA
   LLVM_ENABLE_LIBXML2
   LLVM_LINK_LLVM_DYLIB
   LLVM_TOOL_LTO_BUILD
diff --git a/llvm/test/lit.site.cfg.py.in b/llvm/test/lit.site.cfg.py.in
index b6f255d472d16f..7cdca4083295f5 100644
--- a/llvm/test/lit.site.cfg.py.in
+++ b/llvm/test/lit.site.cfg.py.in
@@ -35,6 +35,7 @@ config.llvm_use_intel_jitevents = @LLVM_USE_INTEL_JITEVENTS@
 config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@"
 config.have_zlib = @LLVM_ENABLE_ZLIB@
 config.have_zstd = @LLVM_ENABLE_ZSTD@
+config.have_lzma = @LLVM_ENABLE_LZMA@
 config.have_libxml2 = @LLVM_ENABLE_LIBXML2@
 config.have_curl = @LLVM_ENABLE_CURL@
 config.have_httplib = @LLVM_ENABLE_HTTPLIB@
diff --git a/llvm/utils/lit/lit/llvm/config.py b/llvm/utils/lit/lit/llvm/config.py
index 96b4f7bc86772d..6e307da7354118 100644
--- a/llvm/utils/lit/lit/llvm/config.py
+++ b/llvm/utils/lit/lit/llvm/config.py
@@ -131,6 +131,9 @@ def __init__(self, lit_config, config):
         have_zstd = getattr(config, "have_zstd", None)
         if have_zstd:
             features.add("zstd")
+        have_lzma = getattr(config, "have_lzma", None)
+        if have_lzma:
+            features.add("lzma")
 
         if getattr(config, "reverse_iteration", None):
             features.add("reverse_iteration")



More information about the llvm-commits mailing list