[llvm] Implement streaming compression for compressed ELF sections. (PR #87211)

Owen Anderson via llvm-commits llvm-commits at lists.llvm.org
Sat Apr 6 07:44:54 PDT 2024


https://github.com/resistor updated https://github.com/llvm/llvm-project/pull/87211

>From f64de315a4d46467ea08ac8e079c5bcfdab7298d Mon Sep 17 00:00:00 2001
From: Owen Anderson <resistor at mac.com>
Date: Fri, 22 Mar 2024 22:49:30 -0500
Subject: [PATCH] Implement streaming compression for compressed ELF sections.

---
 llvm/include/llvm/Support/Compression.h    |  11 +++
 llvm/lib/MC/ELFObjectWriter.cpp            |  22 ++---
 llvm/lib/Support/Compression.cpp           | 110 +++++++++++++++++++++
 llvm/unittests/Support/CompressionTest.cpp |  18 ++++
 4 files changed, 147 insertions(+), 14 deletions(-)

diff --git a/llvm/include/llvm/Support/Compression.h b/llvm/include/llvm/Support/Compression.h
index 2a8da9e96d356f..09457df95250c0 100644
--- a/llvm/include/llvm/Support/Compression.h
+++ b/llvm/include/llvm/Support/Compression.h
@@ -19,6 +19,7 @@
 namespace llvm {
 template <typename T> class SmallVectorImpl;
 class Error;
+class raw_ostream;
 
 // None indicates no compression. The other members are a subset of
 // compression::Format, which is used for compressed debug sections in some
@@ -44,6 +45,9 @@ void compress(ArrayRef<uint8_t> Input,
               SmallVectorImpl<uint8_t> &CompressedBuffer,
               int Level = DefaultCompression);
 
+void compressToStream(ArrayRef<uint8_t> Input, raw_ostream &OS,
+                      int Level = DefaultCompression);
+
 Error decompress(ArrayRef<uint8_t> Input, uint8_t *Output,
                  size_t &UncompressedSize);
 
@@ -65,6 +69,9 @@ void compress(ArrayRef<uint8_t> Input,
               SmallVectorImpl<uint8_t> &CompressedBuffer,
               int Level = DefaultCompression, bool EnableLdm = false);
 
+void compressToStream(ArrayRef<uint8_t> Input, raw_ostream &OS,
+                      int Level = DefaultCompression, bool EnableLdm = false);
+
 Error decompress(ArrayRef<uint8_t> Input, uint8_t *Output,
                  size_t &UncompressedSize);
 
@@ -116,6 +123,10 @@ const char *getReasonIfUnsupported(Format F);
 void compress(Params P, ArrayRef<uint8_t> Input,
               SmallVectorImpl<uint8_t> &Output);
 
+// Compress Input into a raw_ostream, without buffering the entire compressed
+// output. Compression parameters are the same as for `compress`.
+void compressToStream(Params P, ArrayRef<uint8_t> Input, raw_ostream &OS);
+
 // Decompress Input. The uncompressed size must be available.
 Error decompress(DebugCompressionType T, ArrayRef<uint8_t> Input,
                  uint8_t *Output, size_t UncompressedSize);
diff --git a/llvm/lib/MC/ELFObjectWriter.cpp b/llvm/lib/MC/ELFObjectWriter.cpp
index 005521bad6e014..bb0b2b4e0cfa78 100644
--- a/llvm/lib/MC/ELFObjectWriter.cpp
+++ b/llvm/lib/MC/ELFObjectWriter.cpp
@@ -144,9 +144,7 @@ struct ELFWriter {
 
   uint64_t align(Align Alignment);
 
-  bool maybeWriteCompression(uint32_t ChType, uint64_t Size,
-                             SmallVectorImpl<uint8_t> &CompressedContents,
-                             Align Alignment);
+  bool maybeWriteCompression(uint32_t ChType, uint64_t Size, Align Alignment);
 
 public:
   ELFWriter(ELFObjectWriter &OWriter, raw_pwrite_stream &OS,
@@ -841,12 +839,9 @@ MCSectionELF *ELFWriter::createRelocationSection(MCContext &Ctx,
 }
 
 // Include the debug info compression header.
-bool ELFWriter::maybeWriteCompression(
-    uint32_t ChType, uint64_t Size,
-    SmallVectorImpl<uint8_t> &CompressedContents, Align Alignment) {
-  uint64_t HdrSize =
-      is64Bit() ? sizeof(ELF::Elf64_Chdr) : sizeof(ELF::Elf32_Chdr);
-  if (Size <= HdrSize + CompressedContents.size())
+bool ELFWriter::maybeWriteCompression(uint32_t ChType, uint64_t Size,
+                                      Align Alignment) {
+  if (Size <= 128)
     return false;
   // Platform specific header is followed by compressed data.
   if (is64Bit()) {
@@ -897,10 +892,8 @@ void ELFWriter::writeSectionData(const MCAssembler &Asm, MCSection &Sec,
     ChType = ELF::ELFCOMPRESS_ZSTD;
     break;
   }
-  compression::compress(compression::Params(CompressionType), Uncompressed,
-                        Compressed);
-  if (!maybeWriteCompression(ChType, UncompressedData.size(), Compressed,
-                             Sec.getAlign())) {
+
+  if (!maybeWriteCompression(ChType, UncompressedData.size(), Sec.getAlign())) {
     W.OS << UncompressedData;
     return;
   }
@@ -909,7 +902,8 @@ void ELFWriter::writeSectionData(const MCAssembler &Asm, MCSection &Sec,
   // Alignment field should reflect the requirements of
   // the compressed section header.
   Section.setAlignment(is64Bit() ? Align(8) : Align(4));
-  W.OS << toStringRef(Compressed);
+  compression::compressToStream(compression::Params(CompressionType),
+                                Uncompressed, W.OS);
 }
 
 void ELFWriter::WriteSecHdrEntry(uint32_t Name, uint32_t Type, uint64_t Flags,
diff --git a/llvm/lib/Support/Compression.cpp b/llvm/lib/Support/Compression.cpp
index badaf68ab59cd0..2c5380bd4e22c2 100644
--- a/llvm/lib/Support/Compression.cpp
+++ b/llvm/lib/Support/Compression.cpp
@@ -11,6 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/Compression.h"
+#include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Config/config.h"
@@ -55,6 +56,18 @@ void compression::compress(Params P, ArrayRef<uint8_t> Input,
   }
 }
 
+void compression::compressToStream(Params P, ArrayRef<uint8_t> Input,
+                                   raw_ostream &OS) {
+  switch (P.format) {
+  case compression::Format::Zlib:
+    zlib::compressToStream(Input, OS, P.level);
+    break;
+  case compression::Format::Zstd:
+    zstd::compressToStream(Input, OS, P.level, P.zstdEnableLdm);
+    break;
+  }
+}
+
 Error compression::decompress(DebugCompressionType T, ArrayRef<uint8_t> Input,
                               uint8_t *Output, size_t UncompressedSize) {
   switch (formatFor(T)) {
@@ -120,6 +133,49 @@ void zlib::compress(ArrayRef<uint8_t> Input,
     CompressedBuffer.truncate(CompressedSize);
 }
 
+void zlib::compressToStream(ArrayRef<uint8_t> Input, raw_ostream &OS,
+                            int Level) {
+  // Allocate a fixed size buffer to hold the output.
+  constexpr size_t OutBufferSize = 4096;
+  auto OutBuffer = std::make_unique<Bytef[]>(OutBufferSize);
+
+  z_stream ZStream;
+  ZStream.zalloc = Z_NULL;
+  ZStream.zfree = Z_NULL;
+  ZStream.opaque = Z_NULL;
+
+  int ZErr = deflateInit(&ZStream, Level);
+  if (ZErr != Z_OK)
+    report_bad_alloc_error("Failed to create ZStream");
+
+  // Ensure that the z_stream is cleaned up on all exit paths.
+  auto DeflateEndOnExit = make_scope_exit([&]() { deflateEnd(&ZStream); });
+
+  ZStream.next_in =
+      reinterpret_cast<Bytef *>(const_cast<uint8_t *>(Input.data()));
+  ZStream.avail_in = Input.size();
+
+  // Repeatedly deflate into the output buffer and flush it into the
+  // output stream. Repeat until we have drained the entire compression
+  // state.
+  while (ZErr != Z_STREAM_END) {
+    ZStream.next_out = OutBuffer.get();
+    ZStream.avail_out = OutBufferSize;
+
+    ZErr = deflate(&ZStream, Z_FINISH);
+    if (ZErr == Z_STREAM_ERROR || ZErr == Z_BUF_ERROR)
+      report_fatal_error(convertZlibCodeToString(ZErr));
+
+    // Tell MemorySanitizer that zlib output buffer is fully initialized.
+    // This avoids a false report when running LLVM with uninstrumented ZLib.
+    __msan_unpoison(OutputBuffer.data(), OutBufferSize - ZStream.avail_out);
+
+    if (ZStream.avail_out < OutBufferSize)
+      OS.write(reinterpret_cast<char *>(OutBuffer.get()),
+               OutBufferSize - ZStream.avail_out);
+  }
+}
+
 Error zlib::decompress(ArrayRef<uint8_t> Input, uint8_t *Output,
                        size_t &UncompressedSize) {
   int Res = ::uncompress((Bytef *)Output, (uLongf *)&UncompressedSize,
@@ -148,6 +204,10 @@ void zlib::compress(ArrayRef<uint8_t> Input,
                     SmallVectorImpl<uint8_t> &CompressedBuffer, int Level) {
   llvm_unreachable("zlib::compress is unavailable");
 }
+void zlib::compressToStream(ArrayRef<uint8_t> Input, raw_ostream &OS,
+                            int Level = DefaultCompression) {
+  llvm_unreachable("zlib::compressToStream is unavailable");
+}
 Error zlib::decompress(ArrayRef<uint8_t> Input, uint8_t *UncompressedBuffer,
                        size_t &UncompressedSize) {
   llvm_unreachable("zlib::decompress is unavailable");
@@ -201,6 +261,51 @@ void zstd::compress(ArrayRef<uint8_t> Input,
     CompressedBuffer.truncate(CompressedSize);
 }
 
+void zstd::compressToStream(ArrayRef<uint8_t> Input, raw_ostream &OS, int Level,
+                            bool EnableLdm) {
+  // Allocate a buffer to hold the output.
+  size_t OutBufferSize = ZSTD_CStreamOutSize();
+  auto OutBuffer = std::make_unique<char[]>(OutBufferSize);
+
+  ZSTD_CStream *CStream = ZSTD_createCStream();
+  if (!CStream)
+    report_bad_alloc_error("Failed to create ZSTD_CCtx");
+
+  // Ensure that the ZSTD_CStream is cleaned up on all exit paths.
+  auto FreeCStreamOnExit =
+      make_scope_exit([=]() { ZSTD_freeCStream(CStream); });
+
+  if (ZSTD_isError(ZSTD_CCtx_setParameter(
+          CStream, ZSTD_c_enableLongDistanceMatching, EnableLdm ? 1 : 0))) {
+    report_bad_alloc_error("Failed to set ZSTD_c_enableLongDistanceMatching");
+  }
+
+  if (ZSTD_isError(
+          ZSTD_CCtx_setParameter(CStream, ZSTD_c_compressionLevel, Level))) {
+    report_bad_alloc_error("Failed to set ZSTD_c_compressionLevel");
+  }
+
+  ZSTD_inBuffer ZInput = {Input.data(), Input.size(), 0};
+
+  // Repeatedly compress into the output buffer and flush it into the
+  // output stream. Repeat until we have drained the entire compression
+  // state.
+  size_t ZRet;
+  do {
+    ZSTD_outBuffer ZOutput = {OutBuffer.get(), OutBufferSize, 0};
+    ZRet = ZSTD_compressStream2(CStream, &ZOutput, &ZInput, ZSTD_e_end);
+    if (ZSTD_isError(ZRet))
+      report_fatal_error(ZSTD_getErrorName(ZRet));
+
+    // Tell MemorySanitizer that zstd output buffer is fully initialized.
+    // This avoids a false report when running LLVM with uninstrumented ZStd.
+    __msan_unpoison(OutputBuffer.data(), ZOutput.pos);
+
+    if (ZOutput.pos > 0)
+      OS.write(reinterpret_cast<char *>(OutBuffer.get()), ZOutput.pos);
+  } while (ZRet != 0);
+}
+
 Error zstd::decompress(ArrayRef<uint8_t> Input, uint8_t *Output,
                        size_t &UncompressedSize) {
   const size_t Res = ::ZSTD_decompress(
@@ -231,6 +336,11 @@ void zstd::compress(ArrayRef<uint8_t> Input,
                     bool EnableLdm) {
   llvm_unreachable("zstd::compress is unavailable");
 }
+void zstd::compressToStream(ArrayRef<uint8_t> Input, raw_ostream &OS,
+                            int Level = DefaultCompression,
+                            bool EnableLdm = false) {
+  llvm_unreachable("zstd::compressToStream is unavailable");
+}
 Error zstd::decompress(ArrayRef<uint8_t> Input, uint8_t *Output,
                        size_t &UncompressedSize) {
   llvm_unreachable("zstd::decompress is unavailable");
diff --git a/llvm/unittests/Support/CompressionTest.cpp b/llvm/unittests/Support/CompressionTest.cpp
index 5d326cafbe3a1c..ead1007404a5f3 100644
--- a/llvm/unittests/Support/CompressionTest.cpp
+++ b/llvm/unittests/Support/CompressionTest.cpp
@@ -28,6 +28,15 @@ static void testZlibCompression(StringRef Input, int Level) {
   SmallVector<uint8_t, 0> Uncompressed;
   zlib::compress(arrayRefFromStringRef(Input), Compressed, Level);
 
+  // Check that stream compression results are the same as bulk compression.
+  SmallVector<char, 0> StreamCompressed;
+  raw_svector_ostream Stream(StreamCompressed);
+  zlib::compressToStream(arrayRefFromStringRef(Input), Stream, Level);
+  EXPECT_EQ(StreamCompressed.size(), Compressed.size());
+  for (size_t i = 0, e = StreamCompressed.size(); i != e; ++i) {
+    EXPECT_EQ(llvm::bit_cast<uint8_t>(StreamCompressed[i]), Compressed[i]);
+  }
+
   // Check that uncompressed buffer is the same as original.
   Error E = zlib::decompress(Compressed, Uncompressed, Input.size());
   EXPECT_FALSE(std::move(E));
@@ -73,6 +82,15 @@ static void testZstdCompression(StringRef Input, int Level) {
   SmallVector<uint8_t, 0> Uncompressed;
   zstd::compress(arrayRefFromStringRef(Input), Compressed, Level);
 
+  // Check that stream compression results are the same as bulk compression.
+  SmallVector<char, 0> StreamCompressed;
+  raw_svector_ostream Stream(StreamCompressed);
+  zstd::compressToStream(arrayRefFromStringRef(Input), Stream, Level);
+  EXPECT_EQ(StreamCompressed.size(), Compressed.size());
+  for (size_t i = 0, e = StreamCompressed.size(); i != e; ++i) {
+    EXPECT_EQ(llvm::bit_cast<uint8_t>(StreamCompressed[i]), Compressed[i]);
+  }
+
   // Check that uncompressed buffer is the same as original.
   Error E = zstd::decompress(Compressed, Uncompressed, Input.size());
   EXPECT_FALSE(std::move(E));



More information about the llvm-commits mailing list