[llvm] [llvm-readobj,ELF] Support --decompress/-z (PR #82594)

via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 21 23:22:33 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-binary-utilities

Author: Fangrui Song (MaskRay)

<details>
<summary>Changes</summary>

When a section has the SHF_COMPRESSED flag, -p/-x dump the compressed
content by default. In GNU readelf, if --decompress/-z is specified,
-p/-x will dump the decompressed content. This patch implements the
option.

Close #<!-- -->82507


---
Full diff: https://github.com/llvm/llvm-project/pull/82594.diff


8 Files Affected:

- (modified) llvm/docs/CommandGuide/llvm-readelf.rst (+6-1) 
- (modified) llvm/docs/CommandGuide/llvm-readobj.rst (+5) 
- (added) llvm/test/tools/llvm-readobj/ELF/decompress-unsupported-zlib.test (+32) 
- (added) llvm/test/tools/llvm-readobj/ELF/decompress.test (+67) 
- (modified) llvm/tools/llvm-readobj/ObjDumper.cpp (+24-2) 
- (modified) llvm/tools/llvm-readobj/ObjDumper.h (+2-2) 
- (modified) llvm/tools/llvm-readobj/Opts.td (+2) 
- (modified) llvm/tools/llvm-readobj/llvm-readobj.cpp (+4-2) 


``````````diff
diff --git a/llvm/docs/CommandGuide/llvm-readelf.rst b/llvm/docs/CommandGuide/llvm-readelf.rst
index 6ee4a5dfb15917..c302a6316ccc9b 100644
--- a/llvm/docs/CommandGuide/llvm-readelf.rst
+++ b/llvm/docs/CommandGuide/llvm-readelf.rst
@@ -38,6 +38,11 @@ OPTIONS
  Display the contents of the basic block address map section(s), which contain the
  address of each function, along with the relative offset of each basic block.
 
+.. option:: --decompress, -z
+
+  Dump decompressed section content when used with ``-x`` or ``-p``.
+  If the section(s) are not compressed, they are displayed as is.
+
 .. option:: --demangle, -C
 
  Display demangled symbol names in the output.
@@ -72,7 +77,7 @@ OPTIONS
 
 .. option:: --elf-output-style=<value>
 
- Format ELF information in the specified style. Valid options are ``LLVM``,
+ Format ELF information in the specified style. Valid options are ``````LLVM``,
  ``GNU``, and ``JSON``. ``LLVM`` output is an expanded and structured format.
  ``GNU`` (the default) output mimics the equivalent GNU :program:`readelf`
  output. ``JSON`` is JSON formatted output intended for machine consumption.
diff --git a/llvm/docs/CommandGuide/llvm-readobj.rst b/llvm/docs/CommandGuide/llvm-readobj.rst
index cb9232ef5e560a..6d78a038723445 100644
--- a/llvm/docs/CommandGuide/llvm-readobj.rst
+++ b/llvm/docs/CommandGuide/llvm-readobj.rst
@@ -56,6 +56,11 @@ file formats.
 
  Display the address-significance table.
 
+.. option:: --decompress, -z
+
+  Dump decompressed section content when used with ``-x`` or ``-p``.
+  If the section(s) are not compressed, they are displayed as is.
+
 .. option:: --expand-relocs
 
  When used with :option:`--relocs`, display each relocation in an expanded
diff --git a/llvm/test/tools/llvm-readobj/ELF/decompress-unsupported-zlib.test b/llvm/test/tools/llvm-readobj/ELF/decompress-unsupported-zlib.test
new file mode 100644
index 00000000000000..f4c73de7ca6c9d
--- /dev/null
+++ b/llvm/test/tools/llvm-readobj/ELF/decompress-unsupported-zlib.test
@@ -0,0 +1,32 @@
+# UNSUPPORTED: zlib
+# RUN: yaml2obj %s -o %t
+# RUN: llvm-readobj -z -p .a -x .b %t 2>&1 | FileCheck %s -DFILE=%t
+
+# CHECK:      String dump of section '.a':
+# CHECK-NEXT: warning: '[[FILE]]': LLVM was not built with LLVM_ENABLE_ZLIB or did not find zlib at build time
+# CHECK-NEXT: [     0] .
+# CHECK-NEXT: [     8] .
+# CHECK-NEXT: [    10] .
+# CHECK-NEXT: [    18] x.c.
+# CHECK-NEXT: [    1e] .
+# CHECK-NEXT: [    20] .
+# CHECK-NEXT: Hex dump of section '.b':
+# CHECK-NEXT: warning: '[[FILE]]': LLVM was not built with LLVM_ENABLE_ZLIB or did not find zlib at build time
+# CHECK-NEXT: 0x00000000 01000000 00000000 01000000 00000000 ................
+# CHECK-NEXT: 0x00000010 01000000 00000000 789c6304 00000200 ........x.c.....
+# CHECK-NEXT: 0x00000020 02                                  .
+
+--- !ELF
+FileHeader:
+  Class: ELFCLASS64
+  Data:  ELFDATA2LSB
+  Type:  ET_REL
+Sections:
+  - Name: .a
+    Type: SHT_PROGBITS
+    Flags: [SHF_COMPRESSED]
+    Content: 010000000000000001000000000000000100000000000000789c63040000020002
+  - Name: .b
+    Type: SHT_PROGBITS
+    Flags: [SHF_COMPRESSED]
+    Content: 010000000000000001000000000000000100000000000000789c63040000020002
diff --git a/llvm/test/tools/llvm-readobj/ELF/decompress.test b/llvm/test/tools/llvm-readobj/ELF/decompress.test
new file mode 100644
index 00000000000000..638b5cc37d3a89
--- /dev/null
+++ b/llvm/test/tools/llvm-readobj/ELF/decompress.test
@@ -0,0 +1,67 @@
+# REQUIRES: zlib
+## Test --decompress/-z
+
+# RUN: yaml2obj %s -o %t
+
+# RUN: llvm-readelf -z -x .strings -x .not_null_terminated %t | FileCheck %s --check-prefix=HEX
+# RUN: llvm-readobj --decompress -p .strings -p .not_null_terminated %t | FileCheck %s --check-prefix=STR
+
+# HEX:      Hex dump of section '.strings':
+# HEX-NEXT: 0x00000000 68657265 00617265 00736f6d 65007374 here.are.some.st
+# HEX-NEXT: 0x00000010 72696e67 7300                       rings.
+# HEX:      Hex dump of section '.not_null_terminated':
+# HEX-NEXT: 0x00000000 6e6f006e 756c6c                     no.null
+
+# RUN: llvm-readobj -x .strings -p .not_null_terminated %t | FileCheck %s --check-prefix=COMPRESSED
+
+# COMPRESSED:      String dump of section '.not_null_terminated':
+# COMPRESSED-NEXT: [     0] no
+# COMPRESSED-NEXT: [     3] null
+# COMPRESSED-NEXT: Hex dump of section '.strings':
+# COMPRESSED-NEXT: 0x00000000 01000000 00000000 16000000 00000000 ................
+# COMPRESSED-NEXT: 0x00000010 00000000 00000000 789ccb48 2d4a6548 ........x..H-JeH
+# COMPRESSED-NEXT: 0x00000020 04e2e2fc 5c205152 9499975e cc000058 ....\ QR...^...X
+# COMPRESSED-NEXT: 0x00000030 2e079b                              ...
+
+# STR:      String dump of section '.strings':
+# STR-NEXT: [ 0] here
+# STR-NEXT: [ 5] are
+# STR-NEXT: [ 9] some
+# STR-NEXT: [ e] strings
+# STR-EMPTY:
+# STR-NEXT: String dump of section '.not_null_terminated':
+# STR-NEXT: [ 0] no
+# STR-NEXT: [ 3] null{{$}}
+# STR-NOT:  {{.}}
+
+# RUN: llvm-readelf -z -p .invalid1 -x .invalid2 %t 2>&1 | FileCheck %s -DFILE=%t --check-prefix=INVALID
+
+# INVALID:      String dump of section '.invalid1':
+# INVALID-NEXT: warning: '[[FILE]]': corrupted compressed section header
+# INVALID-NEXT: [     0] .
+# INVALID-NEXT: Hex dump of section '.invalid2':
+# INVALID-NEXT: warning: '[[FILE]]': zlib error: Z_DATA_ERROR
+# INVALID-NEXT: 0x00000000 01000000 00000000 16000000 00000000 ................
+# INVALID-NEXT: 0x00000010 00000000 00000000 78                ........x
+
+--- !ELF
+FileHeader:
+  Class: ELFCLASS64
+  Data:  ELFDATA2LSB
+  Type:  ET_REL
+Sections:
+  - Name: .strings
+    Type: SHT_PROGBITS
+    Flags: [SHF_COMPRESSED]
+    Content: 010000000000000016000000000000000000000000000000789ccb482d4a654804e2e2fc5c2051529499975ecc0000582e079b
+  - Name: .not_null_terminated
+    Type: SHT_PROGBITS
+    Content: 6e6f006e756c6c
+  - Name: .invalid1
+    Type: SHT_PROGBITS
+    Flags: [SHF_COMPRESSED]
+    Content: 01
+  - Name: .invalid2
+    Type: SHT_PROGBITS
+    Flags: [SHF_COMPRESSED]
+    Content: 01000000000000001600000000000000000000000000000078
diff --git a/llvm/tools/llvm-readobj/ObjDumper.cpp b/llvm/tools/llvm-readobj/ObjDumper.cpp
index 59060ac217e32f..0d3fea71aafd42 100644
--- a/llvm/tools/llvm-readobj/ObjDumper.cpp
+++ b/llvm/tools/llvm-readobj/ObjDumper.cpp
@@ -14,6 +14,7 @@
 #include "ObjDumper.h"
 #include "llvm-readobj.h"
 #include "llvm/Object/Archive.h"
+#include "llvm/Object/Decompressor.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/FormatVariadic.h"
@@ -142,8 +143,23 @@ getSectionRefsByNameOrIndex(const object::ObjectFile &Obj,
   return Ret;
 }
 
+static void maybeDecompress(const object::ObjectFile &Obj,
+                            StringRef SectionName, StringRef &SectionContent,
+                            SmallString<0> &Out) {
+  Expected<object::Decompressor> Decompressor = object::Decompressor::create(
+      SectionName, SectionContent, Obj.isLittleEndian(), Obj.is64Bit());
+  if (!Decompressor)
+    reportWarning(Decompressor.takeError(), Obj.getFileName());
+  else if (auto Err = Decompressor->resizeAndDecompress(Out))
+    reportWarning(std::move(Err), Obj.getFileName());
+  else
+    SectionContent = Out;
+}
+
 void ObjDumper::printSectionsAsString(const object::ObjectFile &Obj,
-                                      ArrayRef<std::string> Sections) {
+                                      ArrayRef<std::string> Sections,
+                                      bool Decompress) {
+  SmallString<0> Out;
   bool First = true;
   for (object::SectionRef Section :
        getSectionRefsByNameOrIndex(Obj, Sections)) {
@@ -156,12 +172,16 @@ void ObjDumper::printSectionsAsString(const object::ObjectFile &Obj,
 
     StringRef SectionContent =
         unwrapOrError(Obj.getFileName(), Section.getContents());
+    if (Decompress && Section.isCompressed())
+      maybeDecompress(Obj, SectionName, SectionContent, Out);
     printAsStringList(SectionContent);
   }
 }
 
 void ObjDumper::printSectionsAsHex(const object::ObjectFile &Obj,
-                                   ArrayRef<std::string> Sections) {
+                                   ArrayRef<std::string> Sections,
+                                   bool Decompress) {
+  SmallString<0> Out;
   bool First = true;
   for (object::SectionRef Section :
        getSectionRefsByNameOrIndex(Obj, Sections)) {
@@ -174,6 +194,8 @@ void ObjDumper::printSectionsAsHex(const object::ObjectFile &Obj,
 
     StringRef SectionContent =
         unwrapOrError(Obj.getFileName(), Section.getContents());
+    if (Decompress && Section.isCompressed())
+      maybeDecompress(Obj, SectionName, SectionContent, Out);
     const uint8_t *SecContent = SectionContent.bytes_begin();
     const uint8_t *SecEnd = SecContent + SectionContent.size();
 
diff --git a/llvm/tools/llvm-readobj/ObjDumper.h b/llvm/tools/llvm-readobj/ObjDumper.h
index 1d679453581bc8..3958dd3a333332 100644
--- a/llvm/tools/llvm-readobj/ObjDumper.h
+++ b/llvm/tools/llvm-readobj/ObjDumper.h
@@ -175,9 +175,9 @@ class ObjDumper {
   void printAsStringList(StringRef StringContent, size_t StringDataOffset = 0);
 
   void printSectionsAsString(const object::ObjectFile &Obj,
-                             ArrayRef<std::string> Sections);
+                             ArrayRef<std::string> Sections, bool Decompress);
   void printSectionsAsHex(const object::ObjectFile &Obj,
-                          ArrayRef<std::string> Sections);
+                          ArrayRef<std::string> Sections, bool Decompress);
 
   std::function<Error(const Twine &Msg)> WarningHandler;
   void reportUniqueWarning(Error Err) const;
diff --git a/llvm/tools/llvm-readobj/Opts.td b/llvm/tools/llvm-readobj/Opts.td
index e2d93c6ec229e9..018facc278e891 100644
--- a/llvm/tools/llvm-readobj/Opts.td
+++ b/llvm/tools/llvm-readobj/Opts.td
@@ -20,6 +20,7 @@ def all : FF<"all", "Equivalent to setting: --file-header, --program-headers, --
 def arch_specific : FF<"arch-specific", "Display architecture-specific information">;
 def bb_addr_map : FF<"bb-addr-map", "Display the BB address map section">;
 def cg_profile : FF<"cg-profile", "Display call graph profile section">;
+def decompress : FF<"decompress", "Dump decompressed section content when used with -x or -p">;
 defm demangle : BB<"demangle", "Demangle symbol names", "Do not demangle symbol names (default)">;
 def dependent_libraries : FF<"dependent-libraries", "Display the dependent libraries section">;
 def dyn_relocations : FF<"dyn-relocations", "Display the dynamic relocation entries in the file">;
@@ -139,3 +140,4 @@ def : F<"u", "Alias for --unwind">, Alias<unwind>;
 def : F<"X", "Alias for --extra-sym-info">, Alias<extra_sym_info>, Group<grp_elf>;
 def : F<"V", "Alias for --version-info">, Alias<version_info>, Group<grp_elf>;
 def : JoinedOrSeparate<["-"], "x">, Alias<hex_dump_EQ>, HelpText<"Alias for --hex-dump">, MetaVarName<"<name or index>">;
+def : F<"z", "Alias for --decompress">, Alias<decompress>;
diff --git a/llvm/tools/llvm-readobj/llvm-readobj.cpp b/llvm/tools/llvm-readobj/llvm-readobj.cpp
index f9d605d35244bf..979433d69011c3 100644
--- a/llvm/tools/llvm-readobj/llvm-readobj.cpp
+++ b/llvm/tools/llvm-readobj/llvm-readobj.cpp
@@ -97,6 +97,7 @@ static bool ArchSpecificInfo;
 static bool BBAddrMap;
 bool ExpandRelocs;
 static bool CGProfile;
+static bool Decompress;
 bool Demangle;
 static bool DependentLibraries;
 static bool DynRelocs;
@@ -212,6 +213,7 @@ static void parseOptions(const opt::InputArgList &Args) {
   opts::ArchSpecificInfo = Args.hasArg(OPT_arch_specific);
   opts::BBAddrMap = Args.hasArg(OPT_bb_addr_map);
   opts::CGProfile = Args.hasArg(OPT_cg_profile);
+  opts::Decompress = Args.hasArg(OPT_decompress);
   opts::Demangle = Args.hasFlag(OPT_demangle, OPT_no_demangle, false);
   opts::DependentLibraries = Args.hasArg(OPT_dependent_libraries);
   opts::DynRelocs = Args.hasArg(OPT_dyn_relocations);
@@ -439,9 +441,9 @@ static void dumpObject(ObjectFile &Obj, ScopedPrinter &Writer,
     Dumper->printSymbols(opts::Symbols, opts::DynamicSymbols,
                          opts::ExtraSymInfo, SymComp);
   if (!opts::StringDump.empty())
-    Dumper->printSectionsAsString(Obj, opts::StringDump);
+    Dumper->printSectionsAsString(Obj, opts::StringDump, opts::Decompress);
   if (!opts::HexDump.empty())
-    Dumper->printSectionsAsHex(Obj, opts::HexDump);
+    Dumper->printSectionsAsHex(Obj, opts::HexDump, opts::Decompress);
   if (opts::HashTable)
     Dumper->printHashTable();
   if (opts::GnuHashTable)

``````````

</details>


https://github.com/llvm/llvm-project/pull/82594


More information about the llvm-commits mailing list