[llvm] [llvm] Add API to get decompressed size of a zstd compressed buffer. (PR #107020)
Udit Agarwal via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 2 14:00:15 PDT 2024
https://github.com/uditagarwal97 created https://github.com/llvm/llvm-project/pull/107020
Unlike `zlib` (AFAIK), `zstd` stores the size of the original, uncompressed buffer in the header and provides the following API to retrieve it:
```
unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize);
```
This PR adds a wrapper around this API in `llvm::compression::zstd` namespace so that the users won't have to keep track of the original, uncompressed size when passing around zstd-compressed buffers.
>From 988da17b66a8861acdba31cd9feeae1651c9e03c Mon Sep 17 00:00:00 2001
From: "Agarwal, Udit" <udit.agarwal at intel.com>
Date: Mon, 2 Sep 2024 13:51:45 -0700
Subject: [PATCH] [llvm] Add API to get decompressed size of a zstd compressed
buffer.
---
llvm/include/llvm/Support/Compression.h | 2 ++
llvm/lib/Support/Compression.cpp | 18 ++++++++++++++++++
llvm/unittests/Support/CompressionTest.cpp | 9 ++++++++-
3 files changed, 28 insertions(+), 1 deletion(-)
diff --git a/llvm/include/llvm/Support/Compression.h b/llvm/include/llvm/Support/Compression.h
index 2a8da9e96d356f..da7b6f88d955b0 100644
--- a/llvm/include/llvm/Support/Compression.h
+++ b/llvm/include/llvm/Support/Compression.h
@@ -71,6 +71,8 @@ Error decompress(ArrayRef<uint8_t> Input, uint8_t *Output,
Error decompress(ArrayRef<uint8_t> Input, SmallVectorImpl<uint8_t> &Output,
size_t UncompressedSize);
+// Get the size of the decompressed data.
+Error getDecompressedSize(ArrayRef<uint8_t> Input, size_t &UncompressedSize);
} // End of namespace zstd
enum class Format {
diff --git a/llvm/lib/Support/Compression.cpp b/llvm/lib/Support/Compression.cpp
index badaf68ab59cd0..ab012dfb5612b1 100644
--- a/llvm/lib/Support/Compression.cpp
+++ b/llvm/lib/Support/Compression.cpp
@@ -224,6 +224,20 @@ Error zstd::decompress(ArrayRef<uint8_t> Input,
return E;
}
+Error zstd::getDecompressedSize(ArrayRef<uint8_t> Input,
+ size_t &UncompressedSize) {
+
+ unsigned long long Res = ZSTD_getFrameContentSize(Input.data(), Input.size());
+
+ // ZSTD_getFrameContentSize returns unsigned long long, but the size
+ // of uncompressed data should be bounded by size_t.
+ UncompressedSize = static_cast<size_t>(Res);
+
+ return ZSTD_isError(Res) ? make_error<StringError>(ZSTD_getErrorName(Res),
+ inconvertibleErrorCode())
+ : Error::success();
+}
+
#else
bool zstd::isAvailable() { return false; }
void zstd::compress(ArrayRef<uint8_t> Input,
@@ -240,4 +254,8 @@ Error zstd::decompress(ArrayRef<uint8_t> Input,
size_t UncompressedSize) {
llvm_unreachable("zstd::decompress is unavailable");
}
+Error zstd::getDecompressedSize(ArrayRef<uint8_t> Input,
+ size_t &UncompressedSize) {
+ llvm_unreachable("zstd::getDecompressedSize is unavailable");
+}
#endif
diff --git a/llvm/unittests/Support/CompressionTest.cpp b/llvm/unittests/Support/CompressionTest.cpp
index 5d326cafbe3a1c..058be206d28b1f 100644
--- a/llvm/unittests/Support/CompressionTest.cpp
+++ b/llvm/unittests/Support/CompressionTest.cpp
@@ -73,8 +73,15 @@ static void testZstdCompression(StringRef Input, int Level) {
SmallVector<uint8_t, 0> Uncompressed;
zstd::compress(arrayRefFromStringRef(Input), Compressed, Level);
+ // Check that getDecompressedSize returns the size of the original buffer.
+ size_t DecompressedSize;
+ Error E = zstd::getDecompressedSize(Compressed, DecompressedSize);
+ EXPECT_FALSE(std::move(E));
+
+ EXPECT_EQ(DecompressedSize, Input.size());
+
// Check that uncompressed buffer is the same as original.
- Error E = zstd::decompress(Compressed, Uncompressed, Input.size());
+ E = zstd::decompress(Compressed, Uncompressed, Input.size());
EXPECT_FALSE(std::move(E));
EXPECT_EQ(Input, toStringRef(Uncompressed));
More information about the llvm-commits
mailing list