[llvm] [Object][ELF] Support extended header for Object Parser in ELF (PR #162288)

via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 9 05:30:06 PDT 2025


https://github.com/aokblast updated https://github.com/llvm/llvm-project/pull/162288

>From d2740bcf5c84d4f5d71e4058b25169034ec8f5e5 Mon Sep 17 00:00:00 2001
From: ShengYi Hung <aokblast at FreeBSD.org>
Date: Tue, 7 Oct 2025 21:46:52 +0800
Subject: [PATCH] [Object, ELF] Implement PN_XNUM extension for program headers

In ELF file, there is a possible extended header for those phnum, shnum,
and shstrndx larger than the maximum of 16 bits. This extended header
use section 0 to record these fields in 32 bits.  We implment this
feature so that programs rely on ELFFile::program_headers() can get the
correct number of segments. Also, the consumers don't have to check the
section 0 themselve, insteead, they can use the getPhNum() as an
alternative.
---
 llvm/include/llvm/BinaryFormat/ELF.h |  2 +
 llvm/include/llvm/Object/ELF.h       | 60 +++++++++++++++++++---------
 llvm/include/llvm/Object/ELFTypes.h  |  5 +++
 3 files changed, 48 insertions(+), 19 deletions(-)

diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h
index e619b186dfe3d..136f8cfbde818 100644
--- a/llvm/include/llvm/BinaryFormat/ELF.h
+++ b/llvm/include/llvm/BinaryFormat/ELF.h
@@ -1123,6 +1123,8 @@ struct Elf64_Shdr {
   Elf64_Xword sh_entsize;
 };
 
+enum { PN_XNUM = 0xffff };
+
 // Special section indices.
 enum {
   SHN_UNDEF = 0,          // Undefined, missing, irrelevant, or meaningless
diff --git a/llvm/include/llvm/Object/ELF.h b/llvm/include/llvm/Object/ELF.h
index 59f63eb6b5bb6..3b96c0e2b9d1f 100644
--- a/llvm/include/llvm/Object/ELF.h
+++ b/llvm/include/llvm/Object/ELF.h
@@ -278,9 +278,16 @@ class ELFFile {
   std::vector<Elf_Shdr> FakeSections;
   SmallString<0> FakeSectionStrings;
 
+  Elf_Word RealPhNum;
+  Elf_Word RealShNum;
+  Elf_Word RealShStrNdx;
+
   ELFFile(StringRef Object);
 
 public:
+  Elf_Word getPhNum() const { return RealPhNum; }
+  Elf_Word getShNum() const { return RealShNum; }
+  Elf_Word getShStrNdx() const { return RealShStrNdx; }
   const Elf_Ehdr &getHeader() const {
     return *reinterpret_cast<const Elf_Ehdr *>(base());
   }
@@ -379,22 +386,21 @@ class ELFFile {
 
   /// Iterate over program header table.
   Expected<Elf_Phdr_Range> program_headers() const {
-    if (getHeader().e_phnum && getHeader().e_phentsize != sizeof(Elf_Phdr))
+    if (RealPhNum && getHeader().e_phentsize != sizeof(Elf_Phdr))
       return createError("invalid e_phentsize: " +
                          Twine(getHeader().e_phentsize));
 
-    uint64_t HeadersSize =
-        (uint64_t)getHeader().e_phnum * getHeader().e_phentsize;
+    uint64_t HeadersSize = (uint64_t)RealPhNum * getHeader().e_phentsize;
     uint64_t PhOff = getHeader().e_phoff;
     if (PhOff + HeadersSize < PhOff || PhOff + HeadersSize > getBufSize())
       return createError("program headers are longer than binary of size " +
                          Twine(getBufSize()) + ": e_phoff = 0x" +
                          Twine::utohexstr(getHeader().e_phoff) +
-                         ", e_phnum = " + Twine(getHeader().e_phnum) +
+                         ", e_phnum = " + Twine(RealPhNum) +
                          ", e_phentsize = " + Twine(getHeader().e_phentsize));
 
     auto *Begin = reinterpret_cast<const Elf_Phdr *>(base() + PhOff);
-    return ArrayRef(Begin, Begin + getHeader().e_phnum);
+    return ArrayRef(Begin, Begin + RealPhNum);
   }
 
   /// Get an iterator over notes in a program header.
@@ -772,18 +778,10 @@ template <class ELFT>
 Expected<StringRef>
 ELFFile<ELFT>::getSectionStringTable(Elf_Shdr_Range Sections,
                                      WarningHandler WarnHandler) const {
-  uint32_t Index = getHeader().e_shstrndx;
-  if (Index == ELF::SHN_XINDEX) {
-    // If the section name string table section index is greater than
-    // or equal to SHN_LORESERVE, then the actual index of the section name
-    // string table section is contained in the sh_link field of the section
-    // header at index 0.
-    if (Sections.empty())
-      return createError(
-          "e_shstrndx == SHN_XINDEX, but the section header table is empty");
-
-    Index = Sections[0].sh_link;
-  }
+  uint32_t Index = RealShStrNdx;
+  if (Index == ELF::SHN_XINDEX)
+    return createError(
+        "e_shstrndx == SHN_XINDEX, but the section header table is empty");
 
   // There is no section name string table. Return FakeSectionStrings which
   // is non-empty if we have created fake sections.
@@ -889,7 +887,31 @@ Expected<uint64_t> ELFFile<ELFT>::getDynSymtabSize() const {
   return 0;
 }
 
-template <class ELFT> ELFFile<ELFT>::ELFFile(StringRef Object) : Buf(Object) {}
+template <class ELFT> ELFFile<ELFT>::ELFFile(StringRef Object) : Buf(Object) {
+  const Elf_Ehdr &Header = getHeader();
+  RealPhNum = Header.e_phnum;
+  RealShNum = Header.e_shnum;
+  RealShStrNdx = Header.e_shstrndx;
+  if (!Header.hasPhdrNumExtension())
+    return;
+
+  // An ELF binary may report `hasExtendedHeader` as true but not actually
+  // include an extended header. For example, a core dump can contain 65,535
+  // segments but no sections at all. We defer reporting an error until section
+  // 0 is accessed. Consumers should handle and emit the error themselves when
+  // they attempt to access it.
+  auto SecOrErr = getSection(0);
+  if (!SecOrErr) {
+    consumeError(SecOrErr.takeError());
+    return;
+  }
+  if (RealPhNum == 0xFFFF)
+    RealPhNum = (*SecOrErr)->sh_info;
+  if (RealShNum == ELF::SHN_UNDEF)
+    RealShNum = (*SecOrErr)->sh_size;
+  if (RealShStrNdx == ELF::SHN_XINDEX)
+    RealShStrNdx = (*SecOrErr)->sh_link;
+}
 
 template <class ELFT>
 Expected<ELFFile<ELFT>> ELFFile<ELFT>::create(StringRef Object) {
@@ -956,7 +978,7 @@ Expected<typename ELFT::ShdrRange> ELFFile<ELFT>::sections() const {
   const Elf_Shdr *First =
       reinterpret_cast<const Elf_Shdr *>(base() + SectionTableOffset);
 
-  uintX_t NumSections = getHeader().e_shnum;
+  uintX_t NumSections = RealShNum;
   if (NumSections == 0)
     NumSections = First->sh_size;
 
diff --git a/llvm/include/llvm/Object/ELFTypes.h b/llvm/include/llvm/Object/ELFTypes.h
index 5a26e2fc31458..b791f7486fe97 100644
--- a/llvm/include/llvm/Object/ELFTypes.h
+++ b/llvm/include/llvm/Object/ELFTypes.h
@@ -529,6 +529,11 @@ struct Elf_Ehdr_Impl {
 
   unsigned char getFileClass() const { return e_ident[ELF::EI_CLASS]; }
   unsigned char getDataEncoding() const { return e_ident[ELF::EI_DATA]; }
+  bool hasPhdrNumExtension() const {
+    return (e_phnum == ELF::PN_XNUM || e_shnum == ELF::SHN_UNDEF ||
+            e_shstrndx == ELF::SHN_XINDEX) &&
+           e_shoff != 0;
+  }
 };
 
 template <endianness Endianness>



More information about the llvm-commits mailing list