[llvm] Implement streaming compression for compressed ELF sections. (PR #87211)
Owen Anderson via llvm-commits
llvm-commits at lists.llvm.org
Sun Mar 31 18:23:02 PDT 2024
https://github.com/resistor created https://github.com/llvm/llvm-project/pull/87211
None
>From 6f74a9649955d4b7dc9bd525bbd88b6831e58bd4 Mon Sep 17 00:00:00 2001
From: Owen Anderson <resistor at mac.com>
Date: Fri, 22 Mar 2024 22:49:30 -0500
Subject: [PATCH] Implement streaming compression for compressed ELF sections.
---
llvm/include/llvm/Support/Compression.h | 11 +++
llvm/lib/MC/ELFObjectWriter.cpp | 22 ++---
llvm/lib/Support/Compression.cpp | 110 +++++++++++++++++++++
llvm/unittests/Support/CompressionTest.cpp | 18 ++++
4 files changed, 147 insertions(+), 14 deletions(-)
diff --git a/llvm/include/llvm/Support/Compression.h b/llvm/include/llvm/Support/Compression.h
index 2a8da9e96d356f..09457df95250c0 100644
--- a/llvm/include/llvm/Support/Compression.h
+++ b/llvm/include/llvm/Support/Compression.h
@@ -19,6 +19,7 @@
namespace llvm {
template <typename T> class SmallVectorImpl;
class Error;
+class raw_ostream;
// None indicates no compression. The other members are a subset of
// compression::Format, which is used for compressed debug sections in some
@@ -44,6 +45,9 @@ void compress(ArrayRef<uint8_t> Input,
SmallVectorImpl<uint8_t> &CompressedBuffer,
int Level = DefaultCompression);
+void compressToStream(ArrayRef<uint8_t> Input, raw_ostream &OS,
+ int Level = DefaultCompression);
+
Error decompress(ArrayRef<uint8_t> Input, uint8_t *Output,
size_t &UncompressedSize);
@@ -65,6 +69,9 @@ void compress(ArrayRef<uint8_t> Input,
SmallVectorImpl<uint8_t> &CompressedBuffer,
int Level = DefaultCompression, bool EnableLdm = false);
+void compressToStream(ArrayRef<uint8_t> Input, raw_ostream &OS,
+ int Level = DefaultCompression, bool EnableLdm = false);
+
Error decompress(ArrayRef<uint8_t> Input, uint8_t *Output,
size_t &UncompressedSize);
@@ -116,6 +123,10 @@ const char *getReasonIfUnsupported(Format F);
void compress(Params P, ArrayRef<uint8_t> Input,
SmallVectorImpl<uint8_t> &Output);
+// Compress Input into a raw_ostream, without buffering the entire compressed
+// output. Compression parameters are the same as for `compress`.
+void compressToStream(Params P, ArrayRef<uint8_t> Input, raw_ostream &OS);
+
// Decompress Input. The uncompressed size must be available.
Error decompress(DebugCompressionType T, ArrayRef<uint8_t> Input,
uint8_t *Output, size_t UncompressedSize);
diff --git a/llvm/lib/MC/ELFObjectWriter.cpp b/llvm/lib/MC/ELFObjectWriter.cpp
index f4c6cbc8dd4442..4b345f1cde1f1a 100644
--- a/llvm/lib/MC/ELFObjectWriter.cpp
+++ b/llvm/lib/MC/ELFObjectWriter.cpp
@@ -145,9 +145,7 @@ struct ELFWriter {
uint64_t align(Align Alignment);
- bool maybeWriteCompression(uint32_t ChType, uint64_t Size,
- SmallVectorImpl<uint8_t> &CompressedContents,
- Align Alignment);
+ bool maybeWriteCompression(uint32_t ChType, uint64_t Size, Align Alignment);
public:
ELFWriter(ELFObjectWriter &OWriter, raw_pwrite_stream &OS,
@@ -848,12 +846,9 @@ MCSectionELF *ELFWriter::createRelocationSection(MCContext &Ctx,
}
// Include the debug info compression header.
-bool ELFWriter::maybeWriteCompression(
- uint32_t ChType, uint64_t Size,
- SmallVectorImpl<uint8_t> &CompressedContents, Align Alignment) {
- uint64_t HdrSize =
- is64Bit() ? sizeof(ELF::Elf64_Chdr) : sizeof(ELF::Elf32_Chdr);
- if (Size <= HdrSize + CompressedContents.size())
+bool ELFWriter::maybeWriteCompression(uint32_t ChType, uint64_t Size,
+ Align Alignment) {
+ if (Size <= 128)
return false;
// Platform specific header is followed by compressed data.
if (is64Bit()) {
@@ -904,10 +899,8 @@ void ELFWriter::writeSectionData(const MCAssembler &Asm, MCSection &Sec,
ChType = ELF::ELFCOMPRESS_ZSTD;
break;
}
- compression::compress(compression::Params(CompressionType), Uncompressed,
- Compressed);
- if (!maybeWriteCompression(ChType, UncompressedData.size(), Compressed,
- Sec.getAlign())) {
+
+ if (!maybeWriteCompression(ChType, UncompressedData.size(), Sec.getAlign())) {
W.OS << UncompressedData;
return;
}
@@ -916,7 +909,8 @@ void ELFWriter::writeSectionData(const MCAssembler &Asm, MCSection &Sec,
// Alignment field should reflect the requirements of
// the compressed section header.
Section.setAlignment(is64Bit() ? Align(8) : Align(4));
- W.OS << toStringRef(Compressed);
+ compression::compressToStream(compression::Params(CompressionType),
+ Uncompressed, W.OS);
}
void ELFWriter::WriteSecHdrEntry(uint32_t Name, uint32_t Type, uint64_t Flags,
diff --git a/llvm/lib/Support/Compression.cpp b/llvm/lib/Support/Compression.cpp
index badaf68ab59cd0..2c5380bd4e22c2 100644
--- a/llvm/lib/Support/Compression.cpp
+++ b/llvm/lib/Support/Compression.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/Compression.h"
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Config/config.h"
@@ -55,6 +56,18 @@ void compression::compress(Params P, ArrayRef<uint8_t> Input,
}
}
+void compression::compressToStream(Params P, ArrayRef<uint8_t> Input,
+ raw_ostream &OS) {
+ switch (P.format) {
+ case compression::Format::Zlib:
+ zlib::compressToStream(Input, OS, P.level);
+ break;
+ case compression::Format::Zstd:
+ zstd::compressToStream(Input, OS, P.level, P.zstdEnableLdm);
+ break;
+ }
+}
+
Error compression::decompress(DebugCompressionType T, ArrayRef<uint8_t> Input,
uint8_t *Output, size_t UncompressedSize) {
switch (formatFor(T)) {
@@ -120,6 +133,49 @@ void zlib::compress(ArrayRef<uint8_t> Input,
CompressedBuffer.truncate(CompressedSize);
}
+void zlib::compressToStream(ArrayRef<uint8_t> Input, raw_ostream &OS,
+ int Level) {
+ // Allocate a fixed size buffer to hold the output.
+ constexpr size_t OutBufferSize = 4096;
+ auto OutBuffer = std::make_unique<Bytef[]>(OutBufferSize);
+
+ z_stream ZStream;
+ ZStream.zalloc = Z_NULL;
+ ZStream.zfree = Z_NULL;
+ ZStream.opaque = Z_NULL;
+
+ int ZErr = deflateInit(&ZStream, Level);
+ if (ZErr != Z_OK)
+ report_bad_alloc_error("Failed to create ZStream");
+
+ // Ensure that the z_stream is cleaned up on all exit paths.
+ auto DeflateEndOnExit = make_scope_exit([&]() { deflateEnd(&ZStream); });
+
+ ZStream.next_in =
+ reinterpret_cast<Bytef *>(const_cast<uint8_t *>(Input.data()));
+ ZStream.avail_in = Input.size();
+
+ // Repeatedly deflate into the output buffer and flush it into the
+ // output stream. Repeat until we have drained the entire compression
+ // state.
+ while (ZErr != Z_STREAM_END) {
+ ZStream.next_out = OutBuffer.get();
+ ZStream.avail_out = OutBufferSize;
+
+ ZErr = deflate(&ZStream, Z_FINISH);
+ if (ZErr == Z_STREAM_ERROR || ZErr == Z_BUF_ERROR)
+ report_fatal_error(convertZlibCodeToString(ZErr));
+
+ // Tell MemorySanitizer that zlib output buffer is fully initialized.
+ // This avoids a false report when running LLVM with uninstrumented ZLib.
+ __msan_unpoison(OutputBuffer.data(), OutBufferSize - ZStream.avail_out);
+
+ if (ZStream.avail_out < OutBufferSize)
+ OS.write(reinterpret_cast<char *>(OutBuffer.get()),
+ OutBufferSize - ZStream.avail_out);
+ }
+}
+
Error zlib::decompress(ArrayRef<uint8_t> Input, uint8_t *Output,
size_t &UncompressedSize) {
int Res = ::uncompress((Bytef *)Output, (uLongf *)&UncompressedSize,
@@ -148,6 +204,10 @@ void zlib::compress(ArrayRef<uint8_t> Input,
SmallVectorImpl<uint8_t> &CompressedBuffer, int Level) {
llvm_unreachable("zlib::compress is unavailable");
}
+void zlib::compressToStream(ArrayRef<uint8_t> Input, raw_ostream &OS,
+ int Level = DefaultCompression) {
+ llvm_unreachable("zlib::compressToStream is unavailable");
+}
Error zlib::decompress(ArrayRef<uint8_t> Input, uint8_t *UncompressedBuffer,
size_t &UncompressedSize) {
llvm_unreachable("zlib::decompress is unavailable");
@@ -201,6 +261,51 @@ void zstd::compress(ArrayRef<uint8_t> Input,
CompressedBuffer.truncate(CompressedSize);
}
+void zstd::compressToStream(ArrayRef<uint8_t> Input, raw_ostream &OS, int Level,
+ bool EnableLdm) {
+ // Allocate a buffer to hold the output.
+ size_t OutBufferSize = ZSTD_CStreamOutSize();
+ auto OutBuffer = std::make_unique<char[]>(OutBufferSize);
+
+ ZSTD_CStream *CStream = ZSTD_createCStream();
+ if (!CStream)
+ report_bad_alloc_error("Failed to create ZSTD_CCtx");
+
+ // Ensure that the ZSTD_CStream is cleaned up on all exit paths.
+ auto FreeCStreamOnExit =
+ make_scope_exit([=]() { ZSTD_freeCStream(CStream); });
+
+ if (ZSTD_isError(ZSTD_CCtx_setParameter(
+ CStream, ZSTD_c_enableLongDistanceMatching, EnableLdm ? 1 : 0))) {
+ report_bad_alloc_error("Failed to set ZSTD_c_enableLongDistanceMatching");
+ }
+
+ if (ZSTD_isError(
+ ZSTD_CCtx_setParameter(CStream, ZSTD_c_compressionLevel, Level))) {
+ report_bad_alloc_error("Failed to set ZSTD_c_compressionLevel");
+ }
+
+ ZSTD_inBuffer ZInput = {Input.data(), Input.size(), 0};
+
+ // Repeatedly compress into the output buffer and flush it into the
+ // output stream. Repeat until we have drained the entire compression
+ // state.
+ size_t ZRet;
+ do {
+ ZSTD_outBuffer ZOutput = {OutBuffer.get(), OutBufferSize, 0};
+ ZRet = ZSTD_compressStream2(CStream, &ZOutput, &ZInput, ZSTD_e_end);
+ if (ZSTD_isError(ZRet))
+ report_fatal_error(ZSTD_getErrorName(ZRet));
+
+ // Tell MemorySanitizer that zstd output buffer is fully initialized.
+ // This avoids a false report when running LLVM with uninstrumented ZStd.
+ __msan_unpoison(OutputBuffer.data(), ZOutput.pos);
+
+ if (ZOutput.pos > 0)
+ OS.write(reinterpret_cast<char *>(OutBuffer.get()), ZOutput.pos);
+ } while (ZRet != 0);
+}
+
Error zstd::decompress(ArrayRef<uint8_t> Input, uint8_t *Output,
size_t &UncompressedSize) {
const size_t Res = ::ZSTD_decompress(
@@ -231,6 +336,11 @@ void zstd::compress(ArrayRef<uint8_t> Input,
bool EnableLdm) {
llvm_unreachable("zstd::compress is unavailable");
}
+void zstd::compressToStream(ArrayRef<uint8_t> Input, raw_ostream &OS,
+ int Level = DefaultCompression,
+ bool EnableLdm = false) {
+ llvm_unreachable("zstd::compressToStream is unavailable");
+}
Error zstd::decompress(ArrayRef<uint8_t> Input, uint8_t *Output,
size_t &UncompressedSize) {
llvm_unreachable("zstd::decompress is unavailable");
diff --git a/llvm/unittests/Support/CompressionTest.cpp b/llvm/unittests/Support/CompressionTest.cpp
index 5d326cafbe3a1c..ead1007404a5f3 100644
--- a/llvm/unittests/Support/CompressionTest.cpp
+++ b/llvm/unittests/Support/CompressionTest.cpp
@@ -28,6 +28,15 @@ static void testZlibCompression(StringRef Input, int Level) {
SmallVector<uint8_t, 0> Uncompressed;
zlib::compress(arrayRefFromStringRef(Input), Compressed, Level);
+ // Check that stream compression results are the same as bulk compression.
+ SmallVector<char, 0> StreamCompressed;
+ raw_svector_ostream Stream(StreamCompressed);
+ zlib::compressToStream(arrayRefFromStringRef(Input), Stream, Level);
+ EXPECT_EQ(StreamCompressed.size(), Compressed.size());
+ for (size_t i = 0, e = StreamCompressed.size(); i != e; ++i) {
+ EXPECT_EQ(llvm::bit_cast<uint8_t>(StreamCompressed[i]), Compressed[i]);
+ }
+
// Check that uncompressed buffer is the same as original.
Error E = zlib::decompress(Compressed, Uncompressed, Input.size());
EXPECT_FALSE(std::move(E));
@@ -73,6 +82,15 @@ static void testZstdCompression(StringRef Input, int Level) {
SmallVector<uint8_t, 0> Uncompressed;
zstd::compress(arrayRefFromStringRef(Input), Compressed, Level);
+ // Check that stream compression results are the same as bulk compression.
+ SmallVector<char, 0> StreamCompressed;
+ raw_svector_ostream Stream(StreamCompressed);
+ zstd::compressToStream(arrayRefFromStringRef(Input), Stream, Level);
+ EXPECT_EQ(StreamCompressed.size(), Compressed.size());
+ for (size_t i = 0, e = StreamCompressed.size(); i != e; ++i) {
+ EXPECT_EQ(llvm::bit_cast<uint8_t>(StreamCompressed[i]), Compressed[i]);
+ }
+
// Check that uncompressed buffer is the same as original.
Error E = zstd::decompress(Compressed, Uncompressed, Input.size());
EXPECT_FALSE(std::move(E));
More information about the llvm-commits
mailing list