[llvm] [llvm-readobj, ELF] Support reading bianry has more than PN_XNUM segments (PR #162648)

via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 9 05:46:50 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-binary-utilities

Author: None (aokblast)

<details>
<summary>Changes</summary>

Some binary, like FreeBSD coredump, use program headers to store mmaps
information. It is possible for program to use more than
PN_XNUM program headers. Therefore, we implement the support of PN_XNBUM
in readelf and objcopy.

---
Full diff: https://github.com/llvm/llvm-project/pull/162648.diff


7 Files Affected:

- (modified) llvm/include/llvm/BinaryFormat/ELF.h (+2) 
- (modified) llvm/include/llvm/Object/ELF.h (+41-19) 
- (modified) llvm/include/llvm/Object/ELFTypes.h (+5) 
- (modified) llvm/test/tools/llvm-objcopy/ELF/many-sections.test (+2-2) 
- (added) llvm/test/tools/llvm-readobj/ELF/Inputs/many-segments.o.gz () 
- (added) llvm/test/tools/llvm-readobj/ELF/many-segments.test (+79) 
- (modified) llvm/tools/llvm-readobj/ELFDumper.cpp (+32-16) 


``````````diff
diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h
index e619b186dfe3d..136f8cfbde818 100644
--- a/llvm/include/llvm/BinaryFormat/ELF.h
+++ b/llvm/include/llvm/BinaryFormat/ELF.h
@@ -1123,6 +1123,8 @@ struct Elf64_Shdr {
   Elf64_Xword sh_entsize;
 };
 
+enum { PN_XNUM = 0xffff };
+
 // Special section indices.
 enum {
   SHN_UNDEF = 0,          // Undefined, missing, irrelevant, or meaningless
diff --git a/llvm/include/llvm/Object/ELF.h b/llvm/include/llvm/Object/ELF.h
index 59f63eb6b5bb6..3b96c0e2b9d1f 100644
--- a/llvm/include/llvm/Object/ELF.h
+++ b/llvm/include/llvm/Object/ELF.h
@@ -278,9 +278,16 @@ class ELFFile {
   std::vector<Elf_Shdr> FakeSections;
   SmallString<0> FakeSectionStrings;
 
+  Elf_Word RealPhNum;
+  Elf_Word RealShNum;
+  Elf_Word RealShStrNdx;
+
   ELFFile(StringRef Object);
 
 public:
+  Elf_Word getPhNum() const { return RealPhNum; }
+  Elf_Word getShNum() const { return RealShNum; }
+  Elf_Word getShStrNdx() const { return RealShStrNdx; }
   const Elf_Ehdr &getHeader() const {
     return *reinterpret_cast<const Elf_Ehdr *>(base());
   }
@@ -379,22 +386,21 @@ class ELFFile {
 
   /// Iterate over program header table.
   Expected<Elf_Phdr_Range> program_headers() const {
-    if (getHeader().e_phnum && getHeader().e_phentsize != sizeof(Elf_Phdr))
+    if (RealPhNum && getHeader().e_phentsize != sizeof(Elf_Phdr))
       return createError("invalid e_phentsize: " +
                          Twine(getHeader().e_phentsize));
 
-    uint64_t HeadersSize =
-        (uint64_t)getHeader().e_phnum * getHeader().e_phentsize;
+    uint64_t HeadersSize = (uint64_t)RealPhNum * getHeader().e_phentsize;
     uint64_t PhOff = getHeader().e_phoff;
     if (PhOff + HeadersSize < PhOff || PhOff + HeadersSize > getBufSize())
       return createError("program headers are longer than binary of size " +
                          Twine(getBufSize()) + ": e_phoff = 0x" +
                          Twine::utohexstr(getHeader().e_phoff) +
-                         ", e_phnum = " + Twine(getHeader().e_phnum) +
+                         ", e_phnum = " + Twine(RealPhNum) +
                          ", e_phentsize = " + Twine(getHeader().e_phentsize));
 
     auto *Begin = reinterpret_cast<const Elf_Phdr *>(base() + PhOff);
-    return ArrayRef(Begin, Begin + getHeader().e_phnum);
+    return ArrayRef(Begin, Begin + RealPhNum);
   }
 
   /// Get an iterator over notes in a program header.
@@ -772,18 +778,10 @@ template <class ELFT>
 Expected<StringRef>
 ELFFile<ELFT>::getSectionStringTable(Elf_Shdr_Range Sections,
                                      WarningHandler WarnHandler) const {
-  uint32_t Index = getHeader().e_shstrndx;
-  if (Index == ELF::SHN_XINDEX) {
-    // If the section name string table section index is greater than
-    // or equal to SHN_LORESERVE, then the actual index of the section name
-    // string table section is contained in the sh_link field of the section
-    // header at index 0.
-    if (Sections.empty())
-      return createError(
-          "e_shstrndx == SHN_XINDEX, but the section header table is empty");
-
-    Index = Sections[0].sh_link;
-  }
+  uint32_t Index = RealShStrNdx;
+  if (Index == ELF::SHN_XINDEX)
+    return createError(
+        "e_shstrndx == SHN_XINDEX, but the section header table is empty");
 
   // There is no section name string table. Return FakeSectionStrings which
   // is non-empty if we have created fake sections.
@@ -889,7 +887,31 @@ Expected<uint64_t> ELFFile<ELFT>::getDynSymtabSize() const {
   return 0;
 }
 
-template <class ELFT> ELFFile<ELFT>::ELFFile(StringRef Object) : Buf(Object) {}
+template <class ELFT> ELFFile<ELFT>::ELFFile(StringRef Object) : Buf(Object) {
+  const Elf_Ehdr &Header = getHeader();
+  RealPhNum = Header.e_phnum;
+  RealShNum = Header.e_shnum;
+  RealShStrNdx = Header.e_shstrndx;
+  if (!Header.hasPhdrNumExtension())
+    return;
+
+  // An ELF binary may report `hasExtendedHeader` as true but not actually
+  // include an extended header. For example, a core dump can contain 65,535
+  // segments but no sections at all. We defer reporting an error until section
+  // 0 is accessed. Consumers should handle and emit the error themselves when
+  // they attempt to access it.
+  auto SecOrErr = getSection(0);
+  if (!SecOrErr) {
+    consumeError(SecOrErr.takeError());
+    return;
+  }
+  if (RealPhNum == 0xFFFF)
+    RealPhNum = (*SecOrErr)->sh_info;
+  if (RealShNum == ELF::SHN_UNDEF)
+    RealShNum = (*SecOrErr)->sh_size;
+  if (RealShStrNdx == ELF::SHN_XINDEX)
+    RealShStrNdx = (*SecOrErr)->sh_link;
+}
 
 template <class ELFT>
 Expected<ELFFile<ELFT>> ELFFile<ELFT>::create(StringRef Object) {
@@ -956,7 +978,7 @@ Expected<typename ELFT::ShdrRange> ELFFile<ELFT>::sections() const {
   const Elf_Shdr *First =
       reinterpret_cast<const Elf_Shdr *>(base() + SectionTableOffset);
 
-  uintX_t NumSections = getHeader().e_shnum;
+  uintX_t NumSections = RealShNum;
   if (NumSections == 0)
     NumSections = First->sh_size;
 
diff --git a/llvm/include/llvm/Object/ELFTypes.h b/llvm/include/llvm/Object/ELFTypes.h
index 5a26e2fc31458..b791f7486fe97 100644
--- a/llvm/include/llvm/Object/ELFTypes.h
+++ b/llvm/include/llvm/Object/ELFTypes.h
@@ -529,6 +529,11 @@ struct Elf_Ehdr_Impl {
 
   unsigned char getFileClass() const { return e_ident[ELF::EI_CLASS]; }
   unsigned char getDataEncoding() const { return e_ident[ELF::EI_DATA]; }
+  bool hasPhdrNumExtension() const {
+    return (e_phnum == ELF::PN_XNUM || e_shnum == ELF::SHN_UNDEF ||
+            e_shstrndx == ELF::SHN_XINDEX) &&
+           e_shoff != 0;
+  }
 };
 
 template <endianness Endianness>
diff --git a/llvm/test/tools/llvm-objcopy/ELF/many-sections.test b/llvm/test/tools/llvm-objcopy/ELF/many-sections.test
index 6622db237026f..4c618acb5c951 100644
--- a/llvm/test/tools/llvm-objcopy/ELF/many-sections.test
+++ b/llvm/test/tools/llvm-objcopy/ELF/many-sections.test
@@ -6,8 +6,8 @@ RUN: llvm-readobj --file-headers --sections --symbols %t2 | FileCheck %s
 RUN: llvm-readelf --symbols %t2 | FileCheck --check-prefix=SYMS %s
 
 ## The ELF header should have e_shnum == 0 and e_shstrndx == SHN_XINDEX.
-# CHECK:        SectionHeaderCount: 0
-# CHECK-NEXT:   StringTableSectionIndex: 65535
+# CHECK:        SectionHeaderCount: 0 (65540)
+# CHECK-NEXT:   StringTableSectionIndex: 65535 (65539)
 
 ## The first section header should store the real section header count and
 ## shstrndx in its fields.
diff --git a/llvm/test/tools/llvm-readobj/ELF/Inputs/many-segments.o.gz b/llvm/test/tools/llvm-readobj/ELF/Inputs/many-segments.o.gz
new file mode 100644
index 0000000000000..0709ed1d6389e
Binary files /dev/null and b/llvm/test/tools/llvm-readobj/ELF/Inputs/many-segments.o.gz differ
diff --git a/llvm/test/tools/llvm-readobj/ELF/many-segments.test b/llvm/test/tools/llvm-readobj/ELF/many-segments.test
new file mode 100644
index 0000000000000..20c31e97c8aca
--- /dev/null
+++ b/llvm/test/tools/llvm-readobj/ELF/many-segments.test
@@ -0,0 +1,79 @@
+## Show that llvm-readelf can handle an input file with many segments.
+
+RUN: %python %p/../../llvm-objcopy/Inputs/ungzip.py %p/Inputs/many-segments.o.gz > %t
+RUN: llvm-readobj --file-headers --sections --segments %t2 | FileCheck %s
+RUN: llvm-readelf --segments %t2 | FileCheck --check-prefix=SYMS %s
+
+## The ELF header should have e_phnum == PN_XNUM
+# CHECK:        ProgramHeaderCount: 65535 (66549)
+## The first section header should store the real program header count in its fields.
+# CHECK:      Section {
+# CHECK-NEXT:   Index: 0
+# CHECK-NEXT:   Name:
+# CHECK-NEXT:   Type: SHT_NULL
+# CHECK-NEXT:   Flags [
+# CHECK-NEXT:   ]
+# CHECK-NEXT:   Address:
+# CHECK-NEXT:   Offset:
+# CHECK-NEXT:   Size:
+# CHECK-NEXT:   Link:
+# CHECK-NEXT:   Info: 66549
+
+## Show that the symbols with segments indexes around the reserved range still
+## have the right segment indexes afterwards.
+# 65535th segment
+# CHECK:         Offset: 0x1183B000
+# CHECK-NEXT:	 VirtualAddress: 0x349139F3000
+# CHECK:		 }
+# CHECK-NEXT  ProgramHeader {
+# CHECK-NEXT    Type: PT_LOAD (0x1)
+# CHECK-NEXT    Offset: 0x1183C000
+# CHECK-NEXT    VirtualAddress: 0x349139F4000
+# CHECK-NEXT    PhysicalAddress: 0x0
+# CHECK-NEXT    FileSize: 4096
+# CHECK-NEXT    MemSize: 4096
+# CHECK-NEXT    Flags [ (0x4)
+# CHECK-NEXT      PF_R (0x4)
+# CHECK-NEXT    ]
+# CHECK-NEXT    Alignment: 4096
+# CHECK-NEXT  }
+# CHECK-NEXT  ProgramHeader {
+# CHECK-NEXT    Type: PT_LOAD (0x1)
+# CHECK-NEXT    Offset: 0x1183D000
+# CHECK-NEXT    VirtualAddress: 0x349139F5000
+# CHECK-NEXT    PhysicalAddress: 0x0
+# CHECK-NEXT    FileSize: 4096
+# CHECK-NEXT    MemSize: 4096
+# CHECK-NEXT    Flags [ (0x6)
+# CHECK-NEXT      PF_R (0x4)
+# CHECK-NEXT      PF_W (0x2)
+# CHECK-NEXT    ]
+# CHECK-NEXT    Alignment: 4096
+# CHECK-NEXT  }
+# CHECK-NEXT  ProgramHeader {
+# CHECK-NEXT    Type: PT_LOAD (0x1)
+# CHECK-NEXT    Offset: 0x1183E000
+# CHECK-NEXT    VirtualAddress: 0x349139F6000
+# CHECK-NEXT    PhysicalAddress: 0x0
+# CHECK-NEXT    FileSize: 4096
+# CHECK-NEXT    MemSize: 4096
+# CHECK-NEXT    Flags [ (0x4)
+# CHECK-NEXT      PF_R (0x4)
+# CHECK-NEXT    ]
+# CHECK-NEXT    Alignment: 4096
+# CHECK-NEXT  }
+# CHECK        ProgramHeader {
+# CHECK-NEXT    Type: PT_LOAD (0x1)
+# CHECK-NEXT    Offset: 0x11C31000
+# CHECK-NEXT    VirtualAddress: 0x30D8E7868000
+# CHECK-NEXT    PhysicalAddress: 0x0
+# CHECK-NEXT    FileSize: 8192
+# CHECK-NEXT    MemSize: 8192
+# CHECK-NEXT    Flags [ (0x6)
+# CHECK-NEXT      PF_R (0x4)
+# CHECK-NEXT      PF_W (0x2)
+# CHECK-NEXT    ]
+# CHECK-NEXT    Alignment: 4096
+# CHECK-NEXT  }
+
+# SYMS: There are 66549 program headers, starting at offset 64
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index ab93316907cc6..53d3a439f1e62 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -3572,12 +3572,30 @@ static inline void printFields(formatted_raw_ostream &OS, StringRef Str1,
   OS.flush();
 }
 
+template <class ELFT>
+static std::string getProgramHeadersNumString(const ELFFile<ELFT> &Obj,
+                                              StringRef FileName) {
+  if (Obj.getHeader().e_phnum != ELF::PN_XNUM)
+    return to_string(Obj.getHeader().e_phnum);
+
+  Expected<ArrayRef<typename ELFT::Shdr>> ArrOrErr = Obj.sections();
+  if (!ArrOrErr) {
+    // In this case we can ignore an error, because we have already reported a
+    // warning about the broken section header table earlier.
+    consumeError(ArrOrErr.takeError());
+    return "<?>";
+  }
+
+  if (Obj.getHeader().e_phnum == Obj.getPhNum())
+    return "65535";
+  return "65535 (" + to_string(Obj.getPhNum()) + ")";
+}
+
 template <class ELFT>
 static std::string getSectionHeadersNumString(const ELFFile<ELFT> &Obj,
                                               StringRef FileName) {
-  const typename ELFT::Ehdr &ElfHeader = Obj.getHeader();
-  if (ElfHeader.e_shnum != 0)
-    return to_string(ElfHeader.e_shnum);
+  if (Obj.getHeader().e_shnum != 0)
+    return to_string(Obj.getHeader().e_shnum);
 
   Expected<ArrayRef<typename ELFT::Shdr>> ArrOrErr = Obj.sections();
   if (!ArrOrErr) {
@@ -3587,17 +3605,16 @@ static std::string getSectionHeadersNumString(const ELFFile<ELFT> &Obj,
     return "<?>";
   }
 
-  if (ArrOrErr->empty())
+  if (Obj.getHeader().e_shnum == Obj.getShNum())
     return "0";
-  return "0 (" + to_string((*ArrOrErr)[0].sh_size) + ")";
+  return "0 (" + to_string(Obj.getShNum()) + ")";
 }
 
 template <class ELFT>
 static std::string getSectionHeaderTableIndexString(const ELFFile<ELFT> &Obj,
                                                     StringRef FileName) {
-  const typename ELFT::Ehdr &ElfHeader = Obj.getHeader();
-  if (ElfHeader.e_shstrndx != SHN_XINDEX)
-    return to_string(ElfHeader.e_shstrndx);
+  if (Obj.getHeader().e_shstrndx != SHN_XINDEX)
+    return to_string(Obj.getHeader().e_shstrndx);
 
   Expected<ArrayRef<typename ELFT::Shdr>> ArrOrErr = Obj.sections();
   if (!ArrOrErr) {
@@ -3607,10 +3624,9 @@ static std::string getSectionHeaderTableIndexString(const ELFFile<ELFT> &Obj,
     return "<?>";
   }
 
-  if (ArrOrErr->empty())
+  if (Obj.getHeader().e_shstrndx == Obj.getShStrNdx())
     return "65535 (corrupt: out of range)";
-  return to_string(ElfHeader.e_shstrndx) + " (" +
-         to_string((*ArrOrErr)[0].sh_link) + ")";
+  return "65535 (" + to_string(Obj.getShStrNdx()) + ")";
 }
 
 static const EnumEntry<unsigned> *getObjectFileEnumEntry(unsigned Type) {
@@ -3765,7 +3781,7 @@ template <class ELFT> void GNUELFDumper<ELFT>::printFileHeaders() {
   printFields(OS, "Size of this header:", Str);
   Str = to_string(e.e_phentsize) + " (bytes)";
   printFields(OS, "Size of program headers:", Str);
-  Str = to_string(e.e_phnum);
+  Str = getProgramHeadersNumString(this->Obj, this->FileName);
   printFields(OS, "Number of program headers:", Str);
   Str = to_string(e.e_shentsize) + " (bytes)";
   printFields(OS, "Size of section headers:", Str);
@@ -4778,8 +4794,7 @@ void GNUELFDumper<ELFT>::printProgramHeaders(
     return;
 
   if (PrintProgramHeaders) {
-    const Elf_Ehdr &Header = this->Obj.getHeader();
-    if (Header.e_phnum == 0) {
+    if (this->Obj.getPhNum() == 0) {
       OS << "\nThere are no program headers in this file.\n";
     } else {
       printProgramHeaders();
@@ -4798,7 +4813,7 @@ template <class ELFT> void GNUELFDumper<ELFT>::printProgramHeaders() {
   OS << "\nElf file type is "
      << enumToString(Header.e_type, ArrayRef(ElfObjectFileType)) << "\n"
      << "Entry point " << format_hex(Header.e_entry, 3) << "\n"
-     << "There are " << Header.e_phnum << " program headers,"
+     << "There are " << this->Obj.getPhNum() << " program headers,"
      << " starting at offset " << Header.e_phoff << "\n\n"
      << "Program Headers:\n";
   if (ELFT::Is64Bits)
@@ -7470,7 +7485,8 @@ template <class ELFT> void LLVMELFDumper<ELFT>::printFileHeaders() {
       W.printFlags("Flags", E.e_flags);
     W.printNumber("HeaderSize", E.e_ehsize);
     W.printNumber("ProgramHeaderEntrySize", E.e_phentsize);
-    W.printNumber("ProgramHeaderCount", E.e_phnum);
+    W.printString("ProgramHeaderCount",
+                  getProgramHeadersNumString(this->Obj, this->FileName));
     W.printNumber("SectionHeaderEntrySize", E.e_shentsize);
     W.printString("SectionHeaderCount",
                   getSectionHeadersNumString(this->Obj, this->FileName));

``````````

</details>


https://github.com/llvm/llvm-project/pull/162648


More information about the llvm-commits mailing list