[llvm] [llvm] Add API to get decompressed size of a zstd compressed buffer. (PR #107020)

Udit Agarwal via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 2 14:00:15 PDT 2024


https://github.com/uditagarwal97 created https://github.com/llvm/llvm-project/pull/107020

Unlike `zlib` (AFAIK), `zstd` stores the size of the original, uncompressed buffer in the header and provides the following API to retrieve it:
```
unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize);
```

This PR adds a wrapper around this API in `llvm::compression::zstd` namespace so that the users won't have to keep track of the original, uncompressed size when passing around zstd-compressed buffers.

>From 988da17b66a8861acdba31cd9feeae1651c9e03c Mon Sep 17 00:00:00 2001
From: "Agarwal, Udit" <udit.agarwal at intel.com>
Date: Mon, 2 Sep 2024 13:51:45 -0700
Subject: [PATCH] [llvm] Add API to get decompressed size of a zstd compressed
 buffer.

---
 llvm/include/llvm/Support/Compression.h    |  2 ++
 llvm/lib/Support/Compression.cpp           | 18 ++++++++++++++++++
 llvm/unittests/Support/CompressionTest.cpp |  9 ++++++++-
 3 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/llvm/include/llvm/Support/Compression.h b/llvm/include/llvm/Support/Compression.h
index 2a8da9e96d356f..da7b6f88d955b0 100644
--- a/llvm/include/llvm/Support/Compression.h
+++ b/llvm/include/llvm/Support/Compression.h
@@ -71,6 +71,8 @@ Error decompress(ArrayRef<uint8_t> Input, uint8_t *Output,
 Error decompress(ArrayRef<uint8_t> Input, SmallVectorImpl<uint8_t> &Output,
                  size_t UncompressedSize);
 
+// Get the size of the decompressed data.
+Error getDecompressedSize(ArrayRef<uint8_t> Input, size_t &UncompressedSize);
 } // End of namespace zstd
 
 enum class Format {
diff --git a/llvm/lib/Support/Compression.cpp b/llvm/lib/Support/Compression.cpp
index badaf68ab59cd0..ab012dfb5612b1 100644
--- a/llvm/lib/Support/Compression.cpp
+++ b/llvm/lib/Support/Compression.cpp
@@ -224,6 +224,20 @@ Error zstd::decompress(ArrayRef<uint8_t> Input,
   return E;
 }
 
+Error zstd::getDecompressedSize(ArrayRef<uint8_t> Input,
+                                size_t &UncompressedSize) {
+
+  unsigned long long Res = ZSTD_getFrameContentSize(Input.data(), Input.size());
+
+  // ZSTD_getFrameContentSize returns unsigned long long, but the size
+  // of uncompressed data should be bounded by size_t.
+  UncompressedSize = static_cast<size_t>(Res);
+
+  return ZSTD_isError(Res) ? make_error<StringError>(ZSTD_getErrorName(Res),
+                                                     inconvertibleErrorCode())
+                           : Error::success();
+}
+
 #else
 bool zstd::isAvailable() { return false; }
 void zstd::compress(ArrayRef<uint8_t> Input,
@@ -240,4 +254,8 @@ Error zstd::decompress(ArrayRef<uint8_t> Input,
                        size_t UncompressedSize) {
   llvm_unreachable("zstd::decompress is unavailable");
 }
+Error zstd::getDecompressedSize(ArrayRef<uint8_t> Input,
+                                size_t &UncompressedSize) {
+  llvm_unreachable("zstd::getDecompressedSize is unavailable");
+}
 #endif
diff --git a/llvm/unittests/Support/CompressionTest.cpp b/llvm/unittests/Support/CompressionTest.cpp
index 5d326cafbe3a1c..058be206d28b1f 100644
--- a/llvm/unittests/Support/CompressionTest.cpp
+++ b/llvm/unittests/Support/CompressionTest.cpp
@@ -73,8 +73,15 @@ static void testZstdCompression(StringRef Input, int Level) {
   SmallVector<uint8_t, 0> Uncompressed;
   zstd::compress(arrayRefFromStringRef(Input), Compressed, Level);
 
+  // Check that getDecompressedSize returns the size of the original buffer.
+  size_t DecompressedSize;
+  Error E = zstd::getDecompressedSize(Compressed, DecompressedSize);
+  EXPECT_FALSE(std::move(E));
+
+  EXPECT_EQ(DecompressedSize, Input.size());
+
   // Check that uncompressed buffer is the same as original.
-  Error E = zstd::decompress(Compressed, Uncompressed, Input.size());
+  E = zstd::decompress(Compressed, Uncompressed, Input.size());
   EXPECT_FALSE(std::move(E));
   EXPECT_EQ(Input, toStringRef(Uncompressed));
 



More information about the llvm-commits mailing list