[lld] 6d44a1e - [ELF] Adjust --compress-sections to support compression level

via llvm-commits llvm-commits at lists.llvm.org
Wed May 1 11:40:50 PDT 2024


Author: Fangrui Song
Date: 2024-05-01T11:40:46-07:00
New Revision: 6d44a1ef55b559e59d725b07ffe1da988b4e5f1c

URL: https://github.com/llvm/llvm-project/commit/6d44a1ef55b559e59d725b07ffe1da988b4e5f1c
DIFF: https://github.com/llvm/llvm-project/commit/6d44a1ef55b559e59d725b07ffe1da988b4e5f1c.diff

LOG: [ELF] Adjust --compress-sections to support compression level

zstd excels at scaling from low-ratio-very-fast to
high-ratio-pretty-slow. Some users prioritize speed and prefer disk read
speed, while others focus on achieving the highest compression ratio
possible, similar to traditional high-ratio codecs like LZMA.

Add an optional `level` to `--compress-sections` (#84855) to cater to
these diverse needs. While we initially aimed for a one-size-fits-all
approach, this no longer seems to work.
(https://richg42.blogspot.com/2015/11/the-lossless-decompression-pareto.html)

When --compress-debug-sections is used together, make
--compress-sections take precedence since --compress-sections is usually
more specific.

Remove the level distinction between -O/-O1 and -O2 for
--compress-debug-sections=zlib for a more consistent user experience.

Pull Request: https://github.com/llvm/llvm-project/pull/90567

Added: 
    

Modified: 
    lld/ELF/Config.h
    lld/ELF/Driver.cpp
    lld/ELF/Options.td
    lld/ELF/OutputSections.cpp
    lld/docs/ReleaseNotes.rst
    lld/docs/ld.lld.1
    lld/test/ELF/compress-sections.s
    lld/test/ELF/compressed-debug-level.test

Removed: 
    


################################################################################
diff  --git a/lld/ELF/Config.h b/lld/ELF/Config.h
index 33bfa42b0fcbf0..c55b547a733c77 100644
--- a/lld/ELF/Config.h
+++ b/lld/ELF/Config.h
@@ -224,7 +224,8 @@ struct Config {
   bool checkSections;
   bool checkDynamicRelocs;
   std::optional<llvm::DebugCompressionType> compressDebugSections;
-  llvm::SmallVector<std::pair<llvm::GlobPattern, llvm::DebugCompressionType>, 0>
+  llvm::SmallVector<
+      std::tuple<llvm::GlobPattern, llvm::DebugCompressionType, unsigned>, 0>
       compressSections;
   bool cref;
   llvm::SmallVector<std::pair<llvm::GlobPattern, uint64_t>, 0>

diff  --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index a5b47f020f8726..b29e1e1a67f139 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -1533,9 +1533,17 @@ static void readConfigs(opt::InputArgList &args) {
             ": parse error, not 'section-glob=[none|zlib|zstd]'");
       continue;
     }
-    auto type = getCompressionType(fields[1], arg->getSpelling());
+    auto [typeStr, levelStr] = fields[1].split(':');
+    auto type = getCompressionType(typeStr, arg->getSpelling());
+    unsigned level = 0;
+    if (fields[1].size() != typeStr.size() &&
+        !llvm::to_integer(levelStr, level)) {
+      error(arg->getSpelling() +
+            ": expected a non-negative integer compression level, but got '" +
+            levelStr + "'");
+    }
     if (Expected<GlobPattern> pat = GlobPattern::create(fields[0])) {
-      config->compressSections.emplace_back(std::move(*pat), type);
+      config->compressSections.emplace_back(std::move(*pat), type, level);
     } else {
       error(arg->getSpelling() + ": " + toString(pat.takeError()));
       continue;

diff  --git a/lld/ELF/Options.td b/lld/ELF/Options.td
index 72eaf157a181cf..73a4f9662a561f 100644
--- a/lld/ELF/Options.td
+++ b/lld/ELF/Options.td
@@ -68,8 +68,9 @@ defm compress_debug_sections:
   MetaVarName<"[none,zlib,zstd]">;
 
 defm compress_sections: EEq<"compress-sections",
-  "Compress non-SHF_ALLOC output sections matching <section-glob>">,
-  MetaVarName<"<section-glob>=[none|zlib|zstd]">;
+  "Compress output sections that match the glob and do not have the SHF_ALLOC flag."
+  "The compression level is <level> (if specified) or a default speed-focused level">,
+  MetaVarName<"<section-glob>={none,zlib,zstd}[:level]">;
 
 defm defsym: Eq<"defsym", "Define a symbol alias">, MetaVarName<"<symbol>=<value>">;
 

diff  --git a/lld/ELF/OutputSections.cpp b/lld/ELF/OutputSections.cpp
index 1b09e5b0a55742..2dbbff06a89087 100644
--- a/lld/ELF/OutputSections.cpp
+++ b/lld/ELF/OutputSections.cpp
@@ -339,12 +339,13 @@ template <class ELFT> void OutputSection::maybeCompress() {
   (void)sizeof(Elf_Chdr);
 
   DebugCompressionType ctype = DebugCompressionType::None;
-  for (auto &[glob, t] : config->compressSections)
-    if (glob.match(name))
-      ctype = t;
+  unsigned level = 0; // default compression level
   if (!(flags & SHF_ALLOC) && config->compressDebugSections &&
       name.starts_with(".debug_") && size)
     ctype = *config->compressDebugSections;
+  for (auto &[glob, t, l] : config->compressSections)
+    if (glob.match(name))
+      std::tie(ctype, level) = {t, l};
   if (ctype == DebugCompressionType::None)
     return;
   if (flags & SHF_ALLOC) {
@@ -376,13 +377,14 @@ template <class ELFT> void OutputSection::maybeCompress() {
   auto shardsOut = std::make_unique<SmallVector<uint8_t, 0>[]>(numShards);
 
 #if LLVM_ENABLE_ZSTD
-  // Use ZSTD's streaming compression API which permits parallel workers working
-  // on the stream. See http://facebook.github.io/zstd/zstd_manual.html
-  // "Streaming compression - HowTo".
+  // Use ZSTD's streaming compression API. See
+  // http://facebook.github.io/zstd/zstd_manual.html "Streaming compression -
+  // HowTo".
   if (ctype == DebugCompressionType::Zstd) {
     parallelFor(0, numShards, [&](size_t i) {
       SmallVector<uint8_t, 0> out;
       ZSTD_CCtx *cctx = ZSTD_createCCtx();
+      ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, level);
       ZSTD_inBuffer zib = {shardsIn[i].data(), shardsIn[i].size(), 0};
       ZSTD_outBuffer zob = {nullptr, 0, 0};
       size_t size;
@@ -410,12 +412,10 @@ template <class ELFT> void OutputSection::maybeCompress() {
 
 #if LLVM_ENABLE_ZLIB
   // We chose 1 (Z_BEST_SPEED) as the default compression level because it is
-  // the fastest. If -O2 is given, we use level 6 to compress debug info more by
-  // ~15%. We found that level 7 to 9 doesn't make much 
diff erence (~1% more
-  // compression) while they take significant amount of time (~2x), so level 6
-  // seems enough.
+  // fast and provides decent compression ratios.
   if (ctype == DebugCompressionType::Zlib) {
-    const int level = config->optimize >= 2 ? 6 : Z_BEST_SPEED;
+    if (!level)
+      level = Z_BEST_SPEED;
 
     // Compress shards and compute Alder-32 checksums. Use Z_SYNC_FLUSH for all
     // shards but the last to flush the output to a byte boundary to be

diff  --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst
index a7ed49726fd99a..f8fdebfeaecf26 100644
--- a/lld/docs/ReleaseNotes.rst
+++ b/lld/docs/ReleaseNotes.rst
@@ -26,9 +26,12 @@ Non-comprehensive list of changes in this release
 ELF Improvements
 ----------------
 
-* ``--compress-sections <section-glib>=[none|zlib|zstd]`` is added to compress
+* ``--compress-sections <section-glib>={none,zlib,zstd}[:level]`` is added to compress
   matched output sections without the ``SHF_ALLOC`` flag.
   (`#84855 <https://github.com/llvm/llvm-project/pull/84855>`_)
+  (`#90567 <https://github.com/llvm/llvm-project/pull/90567>`_)
+* The default compression level for zlib is now independent of linker
+  optimization level (``Z_BEST_SPEED``).
 * ``GNU_PROPERTY_AARCH64_FEATURE_PAUTH`` notes, ``R_AARCH64_AUTH_ABS64`` and
   ``R_AARCH64_AUTH_RELATIVE`` relocations are now supported.
   (`#72714 <https://github.com/llvm/llvm-project/pull/72714>`_)

diff  --git a/lld/docs/ld.lld.1 b/lld/docs/ld.lld.1
index 3861120915e8bc..9ea1a9c52f2a13 100644
--- a/lld/docs/ld.lld.1
+++ b/lld/docs/ld.lld.1
@@ -156,16 +156,16 @@ may be
 No compression.
 .It Cm zlib
 The default compression level is 1 (fastest) as the debug info usually
-compresses well at that level. If you want to compress it more,
-you can specify
-.Fl O2
-to set the compression level to 6.
+compresses well at that level.
 .It Cm zstd
-The compression level is 5.
+Use the default compression level in zstd.
 .El
 .Pp
-.It Fl -compress-sections Ns = Ns Ar section-glob=[none|zlib|zstd]
+.It Fl -compress-sections Ns = Ns Ar section-glob={none,zlib,zstd}[:level]
 Compress output sections that match the glob and do not have the SHF_ALLOC flag.
+The compression level is
+.Cm level
+(if specified) or a default speed-focused level.
 This is like a generalized
 .Cm --compress-debug-sections.
 .It Fl -cref

diff  --git a/lld/test/ELF/compress-sections.s b/lld/test/ELF/compress-sections.s
index 59b5408c9624ac..aa30c7a90474f6 100644
--- a/lld/test/ELF/compress-sections.s
+++ b/lld/test/ELF/compress-sections.s
@@ -16,7 +16,7 @@
 # CHECK1: 0000000000000010  0 NOTYPE  LOCAL  DEFAULT   [[#]] (nonalloc0) sym0
 # CHECK1: 0000000000000008  0 NOTYPE  LOCAL  DEFAULT   [[#]] (nonalloc1) sym1
 
-# RUN: ld.lld -pie a.o --compress-sections '*c0=zlib' --compress-sections .debug_str=zstd -o out2
+# RUN: ld.lld -pie a.o --compress-sections '*c0=zlib' --compress-sections .debug_str=zstd:3 -o out2
 # RUN: llvm-readelf -SrsX -x nonalloc0 -x .debug_str out2 | FileCheck %s --check-prefix=CHECK2
 
 # CHECK2:      Name       Type          Address     Off      Size     ES Flg Lk Inf Al
@@ -39,11 +39,11 @@
 # CHECK2-NEXT: 02000000 00000000 38000000 00000000
 # CHECK2-NEXT: 01000000 00000000 {{.*}}
 
-## --compress-debug-sections=none takes precedence.
-# RUN: ld.lld a.o --compress-debug-sections=none --compress-sections .debug_str=zstd -o out3
+## --compress-sections takes precedence.
+# RUN: ld.lld a.o --compress-sections .debug_str=zstd --compress-debug-sections=none -o out3
 # RUN: llvm-readelf -S out3 | FileCheck %s --check-prefix=CHECK3
 
-# CHECK3:      .debug_str PROGBITS 0000000000000000 [[#%x,]] [[#%x,]] 01 MS   0   0  1
+# CHECK3:      .debug_str PROGBITS 0000000000000000 [[#%x,]] [[#%x,]] 01 MSC  0   0  1
 
 # RUN: not ld.lld a.o --compress-sections '*0=zlib' 2>&1 | \
 # RUN:   FileCheck %s --check-prefix=ERR-ALLOC --implicit-check-not=error:
@@ -62,6 +62,16 @@
 # ERR3:      unknown --compress-sections value: zlib-gabi
 # ERR3-NEXT: --compress-sections: parse error, not 'section-glob=[none|zlib|zstd]'
 
+# RUN: not ld.lld a.o --compress-sections='a=zlib:' --compress-sections='a=zlib:-1' 2>&1 | \
+# RUN:   FileCheck %s --check-prefix=ERR4 --implicit-check-not=error:
+# ERR4: error: --compress-sections: expected a non-negative integer compression level, but got ''
+# ERR4: error: --compress-sections: expected a non-negative integer compression level, but got '-1'
+
+## Invalid compression level for zlib.
+# RUN: not ld.lld a.o --compress-sections='.debug*=zlib:99' 2>&1 | \
+# RUN:   FileCheck %s --check-prefix=ERR6 --implicit-check-not=error:
+# ERR6: error: --compress-sections: deflateInit2 returned -2
+
 .globl _start
 _start:
   ret

diff  --git a/lld/test/ELF/compressed-debug-level.test b/lld/test/ELF/compressed-debug-level.test
index ee95f126799722..ce3a194bd7c2b9 100644
--- a/lld/test/ELF/compressed-debug-level.test
+++ b/lld/test/ELF/compressed-debug-level.test
@@ -2,22 +2,20 @@
 
 # RUN: yaml2obj %s -o %t.o
 
+## LLD uses zlib compression of level 1 by default. Unlike previous versions,
+## -O does not change the level.
 # RUN: ld.lld %t.o -o %t.default --compress-debug-sections=zlib
 # RUN: llvm-readelf --sections %t.default | FileCheck -check-prefixes=HEADER,LEVEL1 %s
 
 # RUN: ld.lld -O0 %t.o -o %t.O0 --compress-debug-sections=zlib
-# RUN: llvm-readelf --sections %t.O0 | FileCheck -check-prefixes=HEADER,LEVEL1 %s
 # RUN: cmp %t.default %t.O0
 
-# RUN: ld.lld -O1 %t.o -o %t.O1 --compress-debug-sections=zlib
-# RUN: llvm-readelf --sections %t.O1 | FileCheck -check-prefixes=HEADER,LEVEL1 %s
-# RUN: cmp %t.default %t.O1
-
 # RUN: ld.lld -O2 %t.o -o %t.O2 --compress-debug-sections=zlib
-# RUN: llvm-readelf --sections %t.O2 | FileCheck -check-prefixes=HEADER,LEVEL6 %s
+# RUN: cmp %t.default %t.O2
 
-## LLD uses zlib compression of level 1 when -O0, -O1 and level 6 when -O2.
-## Here we check how -O flag affects the size of compressed sections produced.
+## --compression-level specifies the level.
+# RUN: ld.lld %t.o -o %t.6 --compress-sections=.debug_info=zlib:6
+# RUN: llvm-readelf --sections %t.6 | FileCheck -check-prefixes=HEADER,LEVEL6 %s
 
 # HEADER: [Nr] Name        Type     Address  Off    Size
 # LEVEL1: [ 1] .debug_info PROGBITS 00000000 000094 00001{{[bc]}}


        


More information about the llvm-commits mailing list