[llvm] e939bf6 - [llvm] add zstd to `llvm::compression` namespace

Cole Kissane via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 19 10:54:48 PDT 2022


Author: Cole Kissane
Date: 2022-07-19T10:54:36-07:00
New Revision: e939bf67e34037970192fa90ad22ab7628f5de33

URL: https://github.com/llvm/llvm-project/commit/e939bf67e34037970192fa90ad22ab7628f5de33
DIFF: https://github.com/llvm/llvm-project/commit/e939bf67e34037970192fa90ad22ab7628f5de33.diff

LOG: [llvm] add zstd to `llvm::compression` namespace

- add zstd to `llvm::compression` namespace
- add a CMake option `LLVM_ENABLE_ZSTD` with behavior mirroring that of `LLVM_ENABLE_ZLIB`
- add tests for zstd to `llvm/unittests/Support/CompressionTest.cpp`
- debian users should install libzstd when using `LLVM_ENABLE_ZSTD=FORCE_ON` from source due to this bug https://bugs.launchpad.net/ubuntu/+source/libzstd/+bug/1941956

Reviewed By: leonardchan, MaskRay

Differential Revision: https://reviews.llvm.org/D128465

Added: 
    

Modified: 
    llvm/CMakeLists.txt
    llvm/cmake/config-ix.cmake
    llvm/cmake/modules/LLVMConfig.cmake.in
    llvm/include/llvm/Config/llvm-config.h.cmake
    llvm/include/llvm/Support/Compression.h
    llvm/lib/Support/CMakeLists.txt
    llvm/lib/Support/Compression.cpp
    llvm/test/lit.site.cfg.py.in
    llvm/unittests/Support/CompressionTest.cpp
    utils/bazel/llvm_configs/llvm-config.h.cmake

Removed: 
    


################################################################################
diff  --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index 53c4158866edc..45399dc0537e1 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -438,6 +438,8 @@ endif()
 
 set(LLVM_ENABLE_ZLIB "ON" CACHE STRING "Use zlib for compression/decompression if available. Can be ON, OFF, or FORCE_ON")
 
+set(LLVM_ENABLE_ZSTD "ON" CACHE STRING "Use zstd for compression/decompression if available. Can be ON, OFF, or FORCE_ON")
+
 set(LLVM_ENABLE_CURL "OFF" CACHE STRING "Use libcurl for the HTTP client if available. Can be ON, OFF, or FORCE_ON")
 
 set(LLVM_ENABLE_HTTPLIB "OFF" CACHE STRING "Use cpp-httplib HTTP server library if available. Can be ON, OFF, or FORCE_ON")

diff  --git a/llvm/cmake/config-ix.cmake b/llvm/cmake/config-ix.cmake
index 1d6743d9f4603..d2c3d57a6be55 100644
--- a/llvm/cmake/config-ix.cmake
+++ b/llvm/cmake/config-ix.cmake
@@ -138,6 +138,19 @@ else()
   set(LLVM_ENABLE_ZLIB 0)
 endif()
 
+set(zstd_FOUND 0)
+if(LLVM_ENABLE_ZSTD)
+  if(LLVM_ENABLE_ZSTD STREQUAL FORCE_ON)
+    find_package(zstd REQUIRED)
+    if(NOT zstd_FOUND)
+      message(FATAL_ERROR "Failed to configure zstd, but LLVM_ENABLE_ZSTD is FORCE_ON")
+    endif()
+  elseif(NOT LLVM_USE_SANITIZER MATCHES "Memory.*")
+    find_package(zstd)
+  endif()
+endif()
+set(LLVM_ENABLE_ZSTD ${zstd_FOUND})
+
 if(LLVM_ENABLE_LIBXML2)
   if(LLVM_ENABLE_LIBXML2 STREQUAL FORCE_ON)
     find_package(LibXml2 REQUIRED)

diff  --git a/llvm/cmake/modules/LLVMConfig.cmake.in b/llvm/cmake/modules/LLVMConfig.cmake.in
index d95fa919ef512..71a2a856def6b 100644
--- a/llvm/cmake/modules/LLVMConfig.cmake.in
+++ b/llvm/cmake/modules/LLVMConfig.cmake.in
@@ -73,6 +73,12 @@ if(LLVM_ENABLE_ZLIB)
   find_package(ZLIB)
 endif()
 
+set(LLVM_ENABLE_ZSTD @LLVM_ENABLE_ZSTD@)
+if(LLVM_ENABLE_ZSTD)
+  set(zstd_ROOT @zstd_ROOT@)
+  find_package(zstd)
+endif()
+
 set(LLVM_ENABLE_LIBXML2 @LLVM_ENABLE_LIBXML2@)
 if(LLVM_ENABLE_LIBXML2)
   find_package(LibXml2)

diff  --git a/llvm/include/llvm/Config/llvm-config.h.cmake b/llvm/include/llvm/Config/llvm-config.h.cmake
index 3355f12337203..96fbe6f771d2a 100644
--- a/llvm/include/llvm/Config/llvm-config.h.cmake
+++ b/llvm/include/llvm/Config/llvm-config.h.cmake
@@ -95,6 +95,9 @@
 /* Define if zlib compression is available */
 #cmakedefine01 LLVM_ENABLE_ZLIB
 
+/* Define if zstd compression is available */
+#cmakedefine01 LLVM_ENABLE_ZSTD
+
 /* Define if LLVM was built with a dependency to the libtensorflow dynamic library */
 #cmakedefine LLVM_HAVE_TF_API
 

diff  --git a/llvm/include/llvm/Support/Compression.h b/llvm/include/llvm/Support/Compression.h
index cde9fa22cf4c8..8500396d88a0b 100644
--- a/llvm/include/llvm/Support/Compression.h
+++ b/llvm/include/llvm/Support/Compression.h
@@ -43,6 +43,28 @@ Error uncompress(ArrayRef<uint8_t> Input,
 
 } // End of namespace zlib
 
+namespace zstd {
+
+constexpr int NoCompression = -5;
+constexpr int BestSpeedCompression = 1;
+constexpr int DefaultCompression = 5;
+constexpr int BestSizeCompression = 12;
+
+bool isAvailable();
+
+void compress(ArrayRef<uint8_t> Input,
+              SmallVectorImpl<uint8_t> &CompressedBuffer,
+              int Level = DefaultCompression);
+
+Error uncompress(ArrayRef<uint8_t> Input, uint8_t *UncompressedBuffer,
+                 size_t &UncompressedSize);
+
+Error uncompress(ArrayRef<uint8_t> Input,
+                 SmallVectorImpl<uint8_t> &UncompressedBuffer,
+                 size_t UncompressedSize);
+
+} // End of namespace zstd
+
 } // End of namespace compression
 
 } // End of namespace llvm

diff  --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt
index e1045e459d706..b2447cd168652 100644
--- a/llvm/lib/Support/CMakeLists.txt
+++ b/llvm/lib/Support/CMakeLists.txt
@@ -25,6 +25,10 @@ if(LLVM_ENABLE_ZLIB)
   set(imported_libs ZLIB::ZLIB)
 endif()
 
+if(LLVM_ENABLE_ZSTD)
+  list(APPEND imported_libs zstd::libzstd_shared)
+endif()
+
 if( MSVC OR MINGW )
   # libuuid required for FOLDERID_Profile usage in lib/Support/Windows/Path.inc.
   # advapi32 required for CryptAcquireContextW in lib/Support/Windows/Path.inc.
@@ -289,6 +293,18 @@ if(LLVM_ENABLE_ZLIB)
   set(llvm_system_libs ${llvm_system_libs} "${zlib_library}")
 endif()
 
+if(LLVM_ENABLE_ZSTD)
+  # CMAKE_BUILD_TYPE is only meaningful to single-configuration generators.
+  if(CMAKE_BUILD_TYPE)
+    string(TOUPPER ${CMAKE_BUILD_TYPE} build_type)
+    get_property(zstd_library TARGET zstd::libzstd_shared PROPERTY LOCATION_${build_type})
+  endif()
+  if(NOT zstd_library)
+    get_property(zstd_library TARGET zstd::libzstd_shared PROPERTY LOCATION)
+  endif()
+  set(llvm_system_libs ${llvm_system_libs} "${zstd_library}")
+endif()
+
 if(LLVM_ENABLE_TERMINFO)
   if(NOT terminfo_library)
     get_property(terminfo_library TARGET Terminfo::terminfo PROPERTY LOCATION)

diff  --git a/llvm/lib/Support/Compression.cpp b/llvm/lib/Support/Compression.cpp
index 21191972fb8b3..e8fb715aa770e 100644
--- a/llvm/lib/Support/Compression.cpp
+++ b/llvm/lib/Support/Compression.cpp
@@ -20,6 +20,9 @@
 #if LLVM_ENABLE_ZLIB
 #include <zlib.h>
 #endif
+#if LLVM_ENABLE_ZSTD
+#include <zstd.h>
+#endif
 
 using namespace llvm;
 using namespace llvm::compression;
@@ -100,3 +103,65 @@ Error zlib::uncompress(ArrayRef<uint8_t> Input,
   llvm_unreachable("zlib::uncompress is unavailable");
 }
 #endif
+
+#if LLVM_ENABLE_ZSTD
+
+bool zstd::isAvailable() { return true; }
+
+void zstd::compress(ArrayRef<uint8_t> Input,
+                    SmallVectorImpl<uint8_t> &CompressedBuffer, int Level) {
+  unsigned long CompressedBufferSize = ::ZSTD_compressBound(Input.size());
+  CompressedBuffer.resize_for_overwrite(CompressedBufferSize);
+  unsigned long CompressedSize =
+      ::ZSTD_compress((char *)CompressedBuffer.data(), CompressedBufferSize,
+                      (const char *)Input.data(), Input.size(), Level);
+  if (ZSTD_isError(CompressedSize))
+    report_bad_alloc_error("Allocation failed");
+  // Tell MemorySanitizer that zstd output buffer is fully initialized.
+  // This avoids a false report when running LLVM with uninstrumented ZLib.
+  __msan_unpoison(CompressedBuffer.data(), CompressedSize);
+  if (CompressedSize < CompressedBuffer.size())
+    CompressedBuffer.truncate(CompressedSize);
+}
+
+Error zstd::uncompress(ArrayRef<uint8_t> Input, uint8_t *UncompressedBuffer,
+                       size_t &UncompressedSize) {
+  const size_t Res =
+      ::ZSTD_decompress(UncompressedBuffer, UncompressedSize,
+                        (const uint8_t *)Input.data(), Input.size());
+  UncompressedSize = Res;
+  // Tell MemorySanitizer that zstd output buffer is fully initialized.
+  // This avoids a false report when running LLVM with uninstrumented ZLib.
+  __msan_unpoison(UncompressedBuffer, UncompressedSize);
+  return ZSTD_isError(Res) ? make_error<StringError>(ZSTD_getErrorName(Res),
+                                                     inconvertibleErrorCode())
+                           : Error::success();
+}
+
+Error zstd::uncompress(ArrayRef<uint8_t> Input,
+                       SmallVectorImpl<uint8_t> &UncompressedBuffer,
+                       size_t UncompressedSize) {
+  UncompressedBuffer.resize_for_overwrite(UncompressedSize);
+  Error E =
+      zstd::uncompress(Input, UncompressedBuffer.data(), UncompressedSize);
+  if (UncompressedSize < UncompressedBuffer.size())
+    UncompressedBuffer.truncate(UncompressedSize);
+  return E;
+}
+
+#else
+bool zstd::isAvailable() { return false; }
+void zstd::compress(ArrayRef<uint8_t> Input,
+                    SmallVectorImpl<uint8_t> &CompressedBuffer, int Level) {
+  llvm_unreachable("zstd::compress is unavailable");
+}
+Error zstd::uncompress(ArrayRef<uint8_t> Input, uint8_t *UncompressedBuffer,
+                       size_t &UncompressedSize) {
+  llvm_unreachable("zstd::uncompress is unavailable");
+}
+Error zstd::uncompress(ArrayRef<uint8_t> Input,
+                       SmallVectorImpl<uint8_t> &UncompressedBuffer,
+                       size_t UncompressedSize) {
+  llvm_unreachable("zstd::uncompress is unavailable");
+}
+#endif

diff  --git a/llvm/test/lit.site.cfg.py.in b/llvm/test/lit.site.cfg.py.in
index 520a54bc108f5..09210e2e56d4c 100644
--- a/llvm/test/lit.site.cfg.py.in
+++ b/llvm/test/lit.site.cfg.py.in
@@ -37,6 +37,7 @@ config.host_ldflags = '@HOST_LDFLAGS@'
 config.llvm_use_intel_jitevents = @LLVM_USE_INTEL_JITEVENTS@
 config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@"
 config.have_zlib = @LLVM_ENABLE_ZLIB@
+config.have_zstd = @LLVM_ENABLE_ZSTD@
 config.have_libxar = @LLVM_HAVE_LIBXAR@
 config.have_libxml2 = @LLVM_ENABLE_LIBXML2@
 config.have_curl = @LLVM_ENABLE_CURL@

diff  --git a/llvm/unittests/Support/CompressionTest.cpp b/llvm/unittests/Support/CompressionTest.cpp
index 8e6189ebe2248..a89dadf5f9ae8 100644
--- a/llvm/unittests/Support/CompressionTest.cpp
+++ b/llvm/unittests/Support/CompressionTest.cpp
@@ -61,4 +61,42 @@ TEST(CompressionTest, Zlib) {
 }
 #endif
 
+#if LLVM_ENABLE_ZSTD
+static void testZstdCompression(StringRef Input, int Level) {
+  SmallVector<uint8_t, 0> Compressed;
+  SmallVector<uint8_t, 0> Uncompressed;
+  zstd::compress(arrayRefFromStringRef(Input), Compressed, Level);
+
+  // Check that uncompressed buffer is the same as original.
+  Error E = zstd::uncompress(Compressed, Uncompressed, Input.size());
+  consumeError(std::move(E));
+
+  EXPECT_EQ(Input, toStringRef(Uncompressed));
+  if (Input.size() > 0) {
+    // Uncompression fails if expected length is too short.
+    E = zstd::uncompress(Compressed, Uncompressed, Input.size() - 1);
+    EXPECT_EQ("Destination buffer is too small", llvm::toString(std::move(E)));
+  }
+}
+
+TEST(CompressionTest, Zstd) {
+  testZstdCompression("", zstd::DefaultCompression);
+
+  testZstdCompression("hello, world!", zstd::NoCompression);
+  testZstdCompression("hello, world!", zstd::BestSizeCompression);
+  testZstdCompression("hello, world!", zstd::BestSpeedCompression);
+  testZstdCompression("hello, world!", zstd::DefaultCompression);
+
+  const size_t kSize = 1024;
+  char BinaryData[kSize];
+  for (size_t i = 0; i < kSize; ++i)
+    BinaryData[i] = i & 255;
+  StringRef BinaryDataStr(BinaryData, kSize);
+
+  testZstdCompression(BinaryDataStr, zstd::NoCompression);
+  testZstdCompression(BinaryDataStr, zstd::BestSizeCompression);
+  testZstdCompression(BinaryDataStr, zstd::BestSpeedCompression);
+  testZstdCompression(BinaryDataStr, zstd::DefaultCompression);
+}
+#endif
 }

diff  --git a/utils/bazel/llvm_configs/llvm-config.h.cmake b/utils/bazel/llvm_configs/llvm-config.h.cmake
index 3355f12337203..96fbe6f771d2a 100644
--- a/utils/bazel/llvm_configs/llvm-config.h.cmake
+++ b/utils/bazel/llvm_configs/llvm-config.h.cmake
@@ -95,6 +95,9 @@
 /* Define if zlib compression is available */
 #cmakedefine01 LLVM_ENABLE_ZLIB
 
+/* Define if zstd compression is available */
+#cmakedefine01 LLVM_ENABLE_ZSTD
+
 /* Define if LLVM was built with a dependency to the libtensorflow dynamic library */
 #cmakedefine LLVM_HAVE_TF_API
 


        


More information about the llvm-commits mailing list