[Lldb-commits] [lldb] Add support for reading the dynamic symbol table from PT_DYNAMIC (PR #116689)

Greg Clayton via lldb-commits lldb-commits at lists.llvm.org
Mon Nov 18 12:15:17 PST 2024


https://github.com/clayborg updated https://github.com/llvm/llvm-project/pull/116689

>From 4dbb6fe631926bdcdcb89d981a457dd7599bffad Mon Sep 17 00:00:00 2001
From: Greg Clayton <clayborg at gmail.com>
Date: Mon, 18 Nov 2024 12:11:56 -0800
Subject: [PATCH 1/2] Add support for reading the dynamic symbol table from
 PT_DYNAMIC (with buildbot fixes)

Allow LLDB to parse the dynamic symbol table from an ELF file or memory image in an ELF file that has no section headers. This patch uses the ability to parse the PT_DYNAMIC segment and find the DT_SYMTAB, DT_SYMENT, DT_HASH or DT_GNU_HASH to find and parse the dynamic symbol table if the section headers are not present. It also adds a helper function to read data from a .dynamic key/value pair entry correctly from the file or from memory.
---
 .../Plugins/ObjectFile/ELF/ObjectFileELF.cpp  | 182 ++++++++++++++++--
 .../Plugins/ObjectFile/ELF/ObjectFileELF.h    |  41 ++++
 2 files changed, 202 insertions(+), 21 deletions(-)

diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
index 9c7dff8127f473..8df226817326dd 100644
--- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
+++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
@@ -44,6 +44,7 @@
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/MipsABIFlags.h"
+#include "lldb/Target/Process.h"
 
 #define CASE_AND_STREAM(s, def, width)                                         \
   case def:                                                                    \
@@ -3007,9 +3008,10 @@ void ObjectFileELF::ParseSymtab(Symtab &lldb_symtab) {
   // section, nomatter if .symtab was already parsed or not. This is because
   // minidebuginfo normally removes the .symtab symbols which have their
   // matching .dynsym counterparts.
+  Section *dynsym = nullptr;
   if (!symtab ||
       GetSectionList()->FindSectionByName(ConstString(".gnu_debugdata"))) {
-    Section *dynsym =
+    dynsym =
         section_list->FindSectionByType(eSectionTypeELFDynamicSymbols, true)
             .get();
     if (dynsym) {
@@ -3019,6 +3021,20 @@ void ObjectFileELF::ParseSymtab(Symtab &lldb_symtab) {
       m_address_class_map.merge(address_class_map);
     }
   }
+  if (!dynsym) {
+    // Try and read the dynamic symbol table from the .dynamic section.
+    uint32_t num_symbols = 0;
+    std::optional<DataExtractor> symtab_data =
+        GetDynsymDataFromDynamic(num_symbols);
+    std::optional<DataExtractor> strtab_data = GetDynstrData();
+    if (symtab_data && strtab_data) {
+      auto [num_symbols_parsed, address_class_map] =
+          ParseSymbols(&lldb_symtab, symbol_id, section_list, num_symbols,
+                        symtab_data.value(), strtab_data.value());
+      symbol_id += num_symbols_parsed;
+      m_address_class_map.merge(address_class_map);
+    }
+  }
 
   // DT_JMPREL
   //      If present, this entry's d_ptr member holds the address of
@@ -3828,6 +3844,33 @@ ObjectFileELF::MapFileDataWritable(const FileSpec &file, uint64_t Size,
                                                          Offset);
 }
 
+std::optional<DataExtractor>
+ObjectFileELF::ReadDataFromDynamic(const ELFDynamic *dyn, uint64_t length,
+                                   uint64_t offset) {
+  // ELFDynamic values contain a "d_ptr" member that will be a load address if
+  // we have an ELF file read from memory, or it will be a file address if it
+  // was read from a ELF file. This function will correctly fetch data pointed
+  // to by the ELFDynamic::d_ptr, or return std::nullopt if the data isn't
+  // available.
+  const lldb::addr_t d_ptr_addr = dyn->d_ptr + offset;
+  if (ProcessSP process_sp = m_process_wp.lock()) {
+    if (DataBufferSP data_sp = ReadMemory(process_sp, d_ptr_addr, length))
+      return DataExtractor(data_sp, GetByteOrder(), GetAddressByteSize());
+  } else {
+    // We have an ELF file with no section headers or we didn't find the
+    // .dynamic section. Try and find the .dynstr section.
+    Address addr;
+    if (!addr.ResolveAddressUsingFileSections(d_ptr_addr, GetSectionList()))
+      return std::nullopt;
+    DataExtractor data;
+    addr.GetSection()->GetSectionData(data);
+    return DataExtractor(data,
+                         d_ptr_addr - addr.GetSection()->GetFileAddress(),
+                         length);
+  }
+  return std::nullopt;
+}
+
 std::optional<DataExtractor> ObjectFileELF::GetDynstrData() {
   if (SectionList *section_list = GetSectionList()) {
     // Find the SHT_DYNAMIC section.
@@ -3855,31 +3898,15 @@ std::optional<DataExtractor> ObjectFileELF::GetDynstrData() {
   // and represent the dynamic symbol tables's string table. These are needed
   // by the dynamic loader and we can read them from a process' address space.
   //
-  // When loading and ELF file from memory, only the program headers end up
-  // being mapped into memory, and we can find these values in the PT_DYNAMIC
-  // segment.
+  // When loading and ELF file from memory, only the program headers are
+  // guaranteed end up being mapped into memory, and we can find these values in
+  // the PT_DYNAMIC segment.
   const ELFDynamic *strtab = FindDynamicSymbol(DT_STRTAB);
   const ELFDynamic *strsz = FindDynamicSymbol(DT_STRSZ);
   if (strtab == nullptr || strsz == nullptr)
     return std::nullopt;
 
-  if (ProcessSP process_sp = m_process_wp.lock()) {
-    if (DataBufferSP data_sp =
-            ReadMemory(process_sp, strtab->d_ptr, strsz->d_val))
-      return DataExtractor(data_sp, GetByteOrder(), GetAddressByteSize());
-  } else {
-    // We have an ELF file with no section headers or we didn't find the
-    // .dynamic section. Try and find the .dynstr section.
-    Address addr;
-    if (addr.ResolveAddressUsingFileSections(strtab->d_ptr, GetSectionList())) {
-      DataExtractor data;
-      addr.GetSection()->GetSectionData(data);
-      return DataExtractor(data,
-                           strtab->d_ptr - addr.GetSection()->GetFileAddress(),
-                           strsz->d_val);
-    }
-  }
-  return std::nullopt;
+  return ReadDataFromDynamic(strtab, strsz->d_val, /*offset=*/0);
 }
 
 std::optional<lldb_private::DataExtractor> ObjectFileELF::GetDynamicData() {
@@ -3912,3 +3939,116 @@ std::optional<lldb_private::DataExtractor> ObjectFileELF::GetDynamicData() {
   }
   return std::nullopt;
 }
+
+std::optional<uint32_t> ObjectFileELF::GetNumSymbolsFromDynamicHash() {
+  const ELFDynamic *hash = FindDynamicSymbol(DT_HASH);
+  if (hash == nullptr)
+    return std::nullopt;
+
+  // The DT_HASH header looks like this:
+  struct DtHashHeader {
+    uint32_t nbucket;
+    uint32_t nchain;
+  };
+  if (auto data = ReadDataFromDynamic(hash, 8)) {
+    // We don't need the number of buckets value "nbucket", we just need the
+    // "nchain" value which contains the number of symbols.
+    offset_t offset = offsetof(DtHashHeader, nchain);
+    return data->GetU32(&offset);
+  }
+
+  return std::nullopt;
+}
+
+std::optional<uint32_t> ObjectFileELF::GetNumSymbolsFromDynamicGnuHash() {
+  const ELFDynamic *gnu_hash = FindDynamicSymbol(DT_GNU_HASH);
+  if (gnu_hash == nullptr)
+    return std::nullopt;
+
+  // Create a DT_GNU_HASH header
+  // https://flapenguin.me/elf-dt-gnu-hash
+  struct DtGnuHashHeader {
+    uint32_t nbuckets = 0;
+    uint32_t symoffset = 0;
+    uint32_t bloom_size = 0;
+    uint32_t bloom_shift = 0;
+  };
+  uint32_t num_symbols = 0;
+  // Read enogh data for the DT_GNU_HASH header so we can extract the values.
+  if (auto data = ReadDataFromDynamic(gnu_hash, sizeof(DtGnuHashHeader))) {
+    offset_t offset = 0;
+    DtGnuHashHeader header;
+    header.nbuckets = data->GetU32(&offset);
+    header.symoffset = data->GetU32(&offset);
+    header.bloom_size = data->GetU32(&offset);
+    header.bloom_shift = data->GetU32(&offset);
+    const size_t addr_size = GetAddressByteSize();
+    const addr_t buckets_offset =
+        sizeof(DtGnuHashHeader) + addr_size * header.bloom_size;
+    std::vector<uint32_t> buckets;
+    if (auto bucket_data = ReadDataFromDynamic(gnu_hash, header.nbuckets * 4, buckets_offset)) {
+      offset = 0;
+      for (uint32_t i = 0; i < header.nbuckets; ++i)
+        buckets.push_back(bucket_data->GetU32(&offset));
+      // Locate the chain that handles the largest index bucket.
+      uint32_t last_symbol = 0;
+      for (uint32_t bucket_value : buckets)
+        last_symbol = std::max(bucket_value, last_symbol);
+      if (last_symbol < header.symoffset) {
+        num_symbols = header.symoffset;
+      } else {
+        // Walk the bucket's chain to add the chain length to the total.
+        const addr_t chains_base_offset = buckets_offset + header.nbuckets * 4;
+        for (;;) {
+          if (auto chain_entry_data = ReadDataFromDynamic(gnu_hash, 4, chains_base_offset + (last_symbol - header.symoffset) * 4)) {
+            offset = 0;
+            uint32_t chain_entry = chain_entry_data->GetU32(&offset);
+            ++last_symbol;
+            // If the low bit is set, this entry is the end of the chain.
+            if (chain_entry & 1)
+              break;
+          } else {
+            break;
+          }
+        }
+        num_symbols = last_symbol;
+      }
+    }
+  }
+  if (num_symbols > 0)
+    return num_symbols;
+
+  return std::nullopt;
+}
+
+std::optional<DataExtractor>
+ObjectFileELF::GetDynsymDataFromDynamic(uint32_t &num_symbols) {
+  // Every ELF file which represents an executable or shared library has
+  // mandatory .dynamic entries. The DT_SYMTAB value contains a pointer to the
+  // symbol table, and DT_SYMENT contains the size of a symbol table entry.
+  // We then can use either the DT_HASH or DT_GNU_HASH to find the number of
+  // symbols in the symbol table as the symbol count is not stored in the
+  // .dynamic section as a key/value pair.
+  //
+  // When loading and ELF file from memory, only the program headers end up
+  // being mapped into memory, and we can find these values in the PT_DYNAMIC
+  // segment.
+  num_symbols = 0;
+  // Get the process in case this is an in memory ELF file.
+  ProcessSP process_sp(m_process_wp.lock());
+  const ELFDynamic *symtab = FindDynamicSymbol(DT_SYMTAB);
+  const ELFDynamic *syment = FindDynamicSymbol(DT_SYMENT);
+  // DT_SYMTAB and DT_SYMENT are mandatory.
+  if (symtab == nullptr || syment == nullptr)
+    return std::nullopt;
+
+  if (std::optional<uint32_t> syms = GetNumSymbolsFromDynamicHash())
+    num_symbols = *syms;
+  else if (std::optional<uint32_t> syms = GetNumSymbolsFromDynamicGnuHash())
+    num_symbols = *syms;
+  else
+    return std::nullopt;
+  if (num_symbols == 0)
+    return std::nullopt;
+  return ReadDataFromDynamic(symtab, syment->d_val * num_symbols);
+}
diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h
index aba3a5bfcbf5b6..16c216eb81e729 100644
--- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h
+++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h
@@ -435,6 +435,47 @@ class ObjectFileELF : public lldb_private::ObjectFile {
   /// \return The bytes that represent the string table data or \c std::nullopt
   ///         if an error occured.
   std::optional<lldb_private::DataExtractor> GetDynstrData();
+
+  /// Read the bytes pointed to by the \a dyn dynamic entry.
+  ///
+  /// ELFDynamic::d_ptr values contain file addresses if we load the ELF file
+  /// form a file on disk, or they contain load addresses if they were read
+  /// from memory. This function will correctly extract the data in both cases
+  /// if it is available.
+  ///
+  /// \param[in] dyn The dynamic entry to use to fetch the data from.
+  ///
+  /// \param[in] length The number of bytes to read.
+  ///
+  /// \param[in] offset The number of bytes to skip after the d_ptr value
+  ///                   before reading data.
+  ///
+  /// \return The bytes that represent the dynanic entries data or
+  ///         \c std::nullopt if an error occured or the data is not available.
+  std::optional<lldb_private::DataExtractor>
+  ReadDataFromDynamic(const elf::ELFDynamic *dyn, uint64_t length,
+                      uint64_t offset = 0);
+
+  /// Get the bytes that represent the dynamic symbol table from the .dynamic
+  /// section from process memory.
+  ///
+  /// This functon uses the DT_SYMTAB value from the .dynamic section to read
+  /// the symbols table data from process memory. The number of symbols in the
+  /// symbol table is calculated by looking at the DT_HASH or DT_GNU_HASH
+  /// values as the symbol count isn't stored in the .dynamic section.
+  ///
+  /// \return The bytes that represent the symbol table data from the .dynamic
+  ///         section or section headers or \c std::nullopt if an error
+  ///         occured or if there is no dynamic symbol data available.
+  std::optional<lldb_private::DataExtractor>
+  GetDynsymDataFromDynamic(uint32_t &num_symbols);
+
+  /// Get the number of symbols from the DT_HASH dynamic entry.
+  std::optional<uint32_t> GetNumSymbolsFromDynamicHash();
+
+  /// Get the number of symbols from the DT_GNU_HASH dynamic entry.
+  std::optional<uint32_t> GetNumSymbolsFromDynamicGnuHash();
+
 };
 
 #endif // LLDB_SOURCE_PLUGINS_OBJECTFILE_ELF_OBJECTFILEELF_H

>From 1bcfd2a9a1e7ea4ee984f7198ae4ba4148c4861f Mon Sep 17 00:00:00 2001
From: Greg Clayton <clayborg at gmail.com>
Date: Mon, 18 Nov 2024 12:14:36 -0800
Subject: [PATCH 2/2] Fix issues with buildbots.

Extra symbols were being added to the symbol table via the new dynamic symtab loading code. I moved this functionality so we only do it if we need to and avoid the extra symbols.
---
 .../Plugins/ObjectFile/ELF/ObjectFileELF.cpp  | 31 +++++++++----------
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
index 8df226817326dd..6fb30e3c9dd7af 100644
--- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
+++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
@@ -3008,10 +3008,9 @@ void ObjectFileELF::ParseSymtab(Symtab &lldb_symtab) {
   // section, nomatter if .symtab was already parsed or not. This is because
   // minidebuginfo normally removes the .symtab symbols which have their
   // matching .dynsym counterparts.
-  Section *dynsym = nullptr;
   if (!symtab ||
       GetSectionList()->FindSectionByName(ConstString(".gnu_debugdata"))) {
-    dynsym =
+    Section *dynsym =
         section_list->FindSectionByType(eSectionTypeELFDynamicSymbols, true)
             .get();
     if (dynsym) {
@@ -3019,20 +3018,20 @@ void ObjectFileELF::ParseSymtab(Symtab &lldb_symtab) {
           ParseSymbolTable(&lldb_symtab, symbol_id, dynsym);
       symbol_id += num_symbols;
       m_address_class_map.merge(address_class_map);
-    }
-  }
-  if (!dynsym) {
-    // Try and read the dynamic symbol table from the .dynamic section.
-    uint32_t num_symbols = 0;
-    std::optional<DataExtractor> symtab_data =
-        GetDynsymDataFromDynamic(num_symbols);
-    std::optional<DataExtractor> strtab_data = GetDynstrData();
-    if (symtab_data && strtab_data) {
-      auto [num_symbols_parsed, address_class_map] =
-          ParseSymbols(&lldb_symtab, symbol_id, section_list, num_symbols,
-                        symtab_data.value(), strtab_data.value());
-      symbol_id += num_symbols_parsed;
-      m_address_class_map.merge(address_class_map);
+    } else {
+      // Try and read the dynamic symbol table from the .dynamic section.
+      uint32_t dynamic_num_symbols = 0;
+      std::optional<DataExtractor> symtab_data =
+          GetDynsymDataFromDynamic(dynamic_num_symbols);
+      std::optional<DataExtractor> strtab_data = GetDynstrData();
+      if (symtab_data && strtab_data) {
+        auto [num_symbols_parsed, address_class_map] =
+            ParseSymbols(&lldb_symtab, symbol_id, section_list,
+                         dynamic_num_symbols, symtab_data.value(),
+                         strtab_data.value());
+        symbol_id += num_symbols_parsed;
+        m_address_class_map.merge(address_class_map);
+      }
     }
   }
 



More information about the lldb-commits mailing list