[Lldb-commits] [lldb] Add support for reading the dynamic symbol table from PT_DYNAMIC (PR #112596)
via lldb-commits
lldb-commits at lists.llvm.org
Wed Oct 16 11:43:13 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-lldb
Author: Greg Clayton (clayborg)
<details>
<summary>Changes</summary>
Allow LLDB to parse the dynamic symbol table from an ELF file or memory image in an ELF file that has no section headers. This patch uses the ability to parse the PT_DYNAMIC segment and find the DT_SYMTAB, DT_SYMENT, DT_HASH or DT_GNU_HASH to find and parse the dynamic symbol table if the section headers are not present. It also adds a helper function to read data from a .dynamic key/value pair entry correctly from the file or from memory.
---
Patch is 33.95 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/112596.diff
3 Files Affected:
- (modified) lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp (+146-17)
- (modified) lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h (+34)
- (added) lldb/test/Shell/ObjectFile/ELF/elf-dynsym.yaml (+631)
``````````diff
diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
index 10d09662c0a47a..7374ac10a1e27a 100644
--- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
+++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
@@ -44,6 +44,8 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/MipsABIFlags.h"
+#include "lldb/Target/Process.h"
+
#define CASE_AND_STREAM(s, def, width) \
case def: \
@@ -2990,18 +2992,34 @@ void ObjectFileELF::ParseSymtab(Symtab &lldb_symtab) {
// section, nomatter if .symtab was already parsed or not. This is because
// minidebuginfo normally removes the .symtab symbols which have their
// matching .dynsym counterparts.
+ bool found_dynsym = false;
if (!symtab ||
GetSectionList()->FindSectionByName(ConstString(".gnu_debugdata"))) {
Section *dynsym =
section_list->FindSectionByType(eSectionTypeELFDynamicSymbols, true)
.get();
if (dynsym) {
+ found_dynsym = true;
auto [num_symbols, address_class_map] =
ParseSymbolTable(&lldb_symtab, symbol_id, dynsym);
symbol_id += num_symbols;
m_address_class_map.merge(address_class_map);
}
}
+ if (!found_dynsym) {
+ // Try and read the dynamic symbol table from the .dynamic section.
+ uint32_t num_symbols = 0;
+ std::optional<DataExtractor> symtab_data =
+ GetDynsymDataFromDynamic(num_symbols);
+ std::optional<DataExtractor> strtab_data = GetDynstrData();
+ if (symtab_data && strtab_data) {
+ auto [num_symbols_parsed, address_class_map] =
+ ParseSymbols(&lldb_symtab, symbol_id, section_list, num_symbols,
+ symtab_data.value(), strtab_data.value());
+ symbol_id += num_symbols_parsed;
+ m_address_class_map.merge(address_class_map);
+ }
+ }
// DT_JMPREL
// If present, this entry's d_ptr member holds the address of
@@ -3811,6 +3829,33 @@ ObjectFileELF::MapFileDataWritable(const FileSpec &file, uint64_t Size,
Offset);
}
+std::optional<DataExtractor>
+ObjectFileELF::ReadDataFromDynamic(const ELFDynamic *dyn, uint64_t length,
+ uint64_t offset) {
+ // ELFDynamic values contain a "d_ptr" member that will be a load address if
+ // we have an ELF file read from memory, or it will be a file address if it
+ // was read from a ELF file. This function will correctly fetch data pointed
+ // to by the ELFDynamic::d_ptr, or return std::nullopt if the data isn't
+ // available.
+ const lldb::addr_t d_ptr_addr = dyn->d_ptr + offset;
+ if (ProcessSP process_sp = m_process_wp.lock()) {
+ if (DataBufferSP data_sp = ReadMemory(process_sp, d_ptr_addr, length))
+ return DataExtractor(data_sp, GetByteOrder(), GetAddressByteSize());
+ } else {
+ // We have an ELF file with no section headers or we didn't find the
+ // .dynamic section. Try and find the .dynstr section.
+ Address addr;
+ if (!addr.ResolveAddressUsingFileSections(d_ptr_addr, GetSectionList()))
+ return std::nullopt;
+ DataExtractor data;
+ addr.GetSection()->GetSectionData(data);
+ return DataExtractor(data,
+ d_ptr_addr - addr.GetSection()->GetFileAddress(),
+ length);
+ }
+ return std::nullopt;
+}
+
std::optional<DataExtractor> ObjectFileELF::GetDynstrData() {
if (SectionList *section_list = GetSectionList()) {
// Find the SHT_DYNAMIC section.
@@ -3846,23 +3891,7 @@ std::optional<DataExtractor> ObjectFileELF::GetDynstrData() {
if (strtab == nullptr || strsz == nullptr)
return std::nullopt;
- if (ProcessSP process_sp = m_process_wp.lock()) {
- if (DataBufferSP data_sp =
- ReadMemory(process_sp, strtab->d_ptr, strsz->d_val))
- return DataExtractor(data_sp, GetByteOrder(), GetAddressByteSize());
- } else {
- // We have an ELF file with no section headers or we didn't find the
- // .dynamic section. Try and find the .dynstr section.
- Address addr;
- if (addr.ResolveAddressUsingFileSections(strtab->d_ptr, GetSectionList())) {
- DataExtractor data;
- addr.GetSection()->GetSectionData(data);
- return DataExtractor(data,
- strtab->d_ptr - addr.GetSection()->GetFileAddress(),
- strsz->d_val);
- }
- }
- return std::nullopt;
+ return ReadDataFromDynamic(strtab, strsz->d_val, /*offset=*/0);
}
std::optional<lldb_private::DataExtractor> ObjectFileELF::GetDynamicData() {
@@ -3895,3 +3924,103 @@ std::optional<lldb_private::DataExtractor> ObjectFileELF::GetDynamicData() {
}
return std::nullopt;
}
+
+
+std::optional<DataExtractor>
+ObjectFileELF::GetDynsymDataFromDynamic(uint32_t &num_symbols) {
+ // Every ELF file which represents an executable or shared library has
+ // mandatory .dynamic entries. The DT_SYMTAB value contains a pointer to the
+ // symbol table, and DT_SYMENT contains the size of a symbol table entry.
+ // We then can use either the DT_HASH or DT_GNU_HASH to find the number of
+ // symbols in the symbol table as the symbol count is not stored in the
+ // .dynamic section as a key/value pair.
+ //
+ // When loading and ELF file from memory, only the program headers end up
+ // being mapped into memory, and we can find these values in the PT_DYNAMIC
+ // segment.
+ num_symbols = 0;
+ // Get the process in case this is an in memory ELF file.
+ ProcessSP process_sp(m_process_wp.lock());
+ const ELFDynamic *symtab = FindDynamicSymbol(DT_SYMTAB);
+ const ELFDynamic *syment = FindDynamicSymbol(DT_SYMENT);
+ const ELFDynamic *hash = FindDynamicSymbol(DT_HASH);
+ const ELFDynamic *gnu_hash = FindDynamicSymbol(DT_GNU_HASH);
+ // DT_SYMTAB and DT_SYMENT are mandatory.
+ if (symtab == nullptr || syment == nullptr)
+ return std::nullopt;
+ // We must have either a DT_HASH or a DT_GNU_HASH.
+ if (hash == nullptr && gnu_hash == nullptr)
+ return std::nullopt;
+ // The number of symbols in the symbol table is the number of entries in the
+ // symbol table divided by the size of each symbol table entry.
+ // We must figure out the number of symbols in the symbol table using the
+ // DT_HASH or the DT_GNU_HASH as the number of symbols isn't stored anywhere
+ // in the .dynamic section.
+
+ lldb::offset_t offset;
+ if (hash) {
+ // The DT_HASH header contains the number of symbols in the "nchain"
+ // member. The header looks like this:
+ // struct DT_HASH_HEADER {
+ // uint32_t nbucket;
+ // uint32_t nchain;
+ // };
+ if (auto data = ReadDataFromDynamic(hash, 8)) {
+ offset = 4;
+ num_symbols = data->GetU32(&offset);
+ }
+ }
+ if (num_symbols == 0 && gnu_hash) {
+ struct DT_GNU_HASH_HEADER {
+ uint32_t nbuckets = 0;
+ uint32_t symoffset = 0;
+ uint32_t bloom_size = 0;
+ uint32_t bloom_shift = 0;
+ };
+ if (auto data = ReadDataFromDynamic(gnu_hash, sizeof(DT_GNU_HASH_HEADER))) {
+ offset = 0;
+ DT_GNU_HASH_HEADER header;
+ header.nbuckets = data->GetU32(&offset);
+ header.symoffset = data->GetU32(&offset);
+ header.bloom_size = data->GetU32(&offset);
+ header.bloom_shift = data->GetU32(&offset);
+ const size_t addr_size = GetAddressByteSize();
+ const addr_t buckets_offset =
+ sizeof(DT_GNU_HASH_HEADER) + addr_size * header.bloom_size;
+ std::vector<uint32_t> buckets;
+ if (auto bucket_data = ReadDataFromDynamic(gnu_hash, header.nbuckets * 4, buckets_offset)) {
+ offset = 0;
+ for (uint32_t i = 0; i < header.nbuckets; ++i)
+ buckets.push_back(bucket_data->GetU32(&offset));
+ // Locate the chain that handles the largest index bucket.
+ uint32_t last_symbol = 0;
+ for (uint32_t bucket_value : buckets)
+ last_symbol = std::max(bucket_value, last_symbol);
+ if (last_symbol < header.symoffset) {
+ num_symbols = header.symoffset;
+ } else {
+ // Walk the bucket's chain to add the chain length to the total.
+ const addr_t chains_base_offset = buckets_offset + header.nbuckets * 4;
+ for (;;) {
+ if (auto chain_entry_data = ReadDataFromDynamic(gnu_hash, 4, chains_base_offset + (last_symbol - header.symoffset) * 4)) {
+ offset = 0;
+ uint32_t chain_entry = chain_entry_data->GetU32(&offset);
+ ++last_symbol;
+ // If the low bit is set, this entry is the end of the chain.
+ if (chain_entry & 1)
+ break;
+ } else {
+ break;
+ }
+ }
+ num_symbols = last_symbol;
+ }
+ }
+ }
+ if (num_symbols > 0)
+ ++num_symbols; // First symbol is always all zeros
+ }
+ if (num_symbols == 0)
+ return std::nullopt;
+ return ReadDataFromDynamic(symtab, syment->d_val * num_symbols);
+}
diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h
index aba3a5bfcbf5b6..34d9ae74fbb23f 100644
--- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h
+++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h
@@ -435,6 +435,40 @@ class ObjectFileELF : public lldb_private::ObjectFile {
/// \return The bytes that represent the string table data or \c std::nullopt
/// if an error occured.
std::optional<lldb_private::DataExtractor> GetDynstrData();
+
+ /// Read the bytes pointed to by the \a dyn dynamic entry.
+ ///
+ /// ELFDynamic::d_ptr values contain file addresses if we load the ELF file
+ /// form a file on disk, or they contain load addresses if they were read
+ /// from memory. This function will correctly extract the data in both cases
+ /// if it is available.
+ ///
+ /// \param[in] dyn The dynamic entry to use to fetch the data from.
+ ///
+ /// \param[in] length The number of bytes to read.
+ ///
+ /// \param[in] offset The number of bytes to skip after the d_ptr value
+ /// before reading data.
+ ///
+ /// \return The bytes that represent the dynanic entries data or
+ /// \c std::nullopt if an error occured or the data is not available.
+ std::optional<lldb_private::DataExtractor>
+ ReadDataFromDynamic(const elf::ELFDynamic *dyn, uint64_t length,
+ uint64_t offset = 0);
+
+ /// Get the bytes that represent the dynamic symbol table from the .dynamic
+ /// section from process memory.
+ ///
+ /// This functon uses the DT_SYMTAB value from the .dynamic section to read
+ /// the symbols table data from process memory. The number of symbols in the
+ /// symbol table is calculated by looking at the DT_HASH or DT_GNU_HASH
+ /// values as the symbol count isn't stored in the .dynamic section.
+ ///
+ /// \return The bytes that represent the symbol table data from the .dynamic
+ /// section or section headers or \c std::nullopt if an error
+ /// occured or if there is no dynamic symbol data available.
+ std::optional<lldb_private::DataExtractor>
+ GetDynsymDataFromDynamic(uint32_t &num_symbols);
};
#endif // LLDB_SOURCE_PLUGINS_OBJECTFILE_ELF_OBJECTFILEELF_H
diff --git a/lldb/test/Shell/ObjectFile/ELF/elf-dynsym.yaml b/lldb/test/Shell/ObjectFile/ELF/elf-dynsym.yaml
new file mode 100644
index 00000000000000..2763aac1df4893
--- /dev/null
+++ b/lldb/test/Shell/ObjectFile/ELF/elf-dynsym.yaml
@@ -0,0 +1,631 @@
+## This test verifies that loading an ELF file that has no section headers can
+## load the dynamic symbol table using the DT_SYMTAB, DT_SYMENT, DT_HASH or
+## the DT_GNU_HASH .dynamic key/value pairs that are loaded via the PT_DYNAMIC
+## segment.
+##
+## This test will convert a shared library from yaml, strip its section headers,
+## and varify that LLDB can load the dynamic symbol table. We must manually
+## strip the section headers from a full shared library because our ELF YAML
+## support in obj2yaml/yaml2obj doesn't support ELF files with program headers
+## only, they must have sections or the file doesn't get recreated correctlty.
+
+# RUN: yaml2obj %s -o %t
+# RUN: llvm-strip --strip-sections %t -o %t.noshdrs
+
+# RUN: %lldb -b \
+# RUN: -o "target create -d '%t.noshdrs'" \
+# RUN: -o "image dump objfile" \
+# RUN: | FileCheck %s --dump-input=always
+# CHECK: (lldb) image dump objfile
+# CHECK: Dumping headers for 1 module(s).
+# CHECK: ObjectFileELF, file =
+# CHECK: ELF Header
+# Make sure there are no section headers
+# CHECK: e_shnum = 0x00000000
+# Make sure we were able to load the symbols
+# CHECK: elf-dynsym.yaml.tmp.noshdrs, num_symbols = 9:
+# CHECK: [ 0] 1 Undefined 0x0000000000000000 0x0000000000000000 0x00000022 __cxa_finalize
+# CHECK: [ 1] 2 X Undefined 0x0000000000000000 0x0000000000000000 0x00000012 puts
+# CHECK: [ 2] 3 Undefined 0x0000000000000000 0x0000000000000000 0x00000020 _ITM_deregisterTMCloneTable
+# CHECK: [ 3] 4 Undefined 0x0000000000000000 0x0000000000000000 0x00000020 __gmon_start__
+# CHECK: [ 4] 5 Undefined 0x0000000000000000 0x0000000000000000 0x00000020 _ITM_registerTMCloneTable
+# CHECK: [ 5] 6 X Code 0x0000000000001135 0x0000000000000016 0x00000012 baz()
+# CHECK: [ 6] 7 X Code 0x000000000000111f 0x0000000000000016 0x00000012 bar()
+# CHECK: [ 7] 8 X Code 0x000000000000114b 0x0000000000000016 0x00000012 biz()
+# CHECK: [ 8] 9 X Code 0x0000000000001109 0x0000000000000016 0x00000012 foo()
+
+--- !ELF
+FileHeader:
+ Class: ELFCLASS64
+ Data: ELFDATA2LSB
+ Type: ET_DYN
+ Machine: EM_X86_64
+ Entry: 0x1050
+ProgramHeaders:
+ - Type: PT_LOAD
+ Flags: [ PF_R ]
+ FirstSec: .note.gnu.build-id
+ LastSec: .rela.plt
+ Align: 0x1000
+ Offset: 0x0
+ - Type: PT_LOAD
+ Flags: [ PF_X, PF_R ]
+ FirstSec: .init
+ LastSec: .fini
+ VAddr: 0x1000
+ Align: 0x1000
+ Offset: 0x1000
+ - Type: PT_LOAD
+ Flags: [ PF_R ]
+ FirstSec: .rodata
+ LastSec: .eh_frame
+ VAddr: 0x2000
+ Align: 0x1000
+ Offset: 0x2000
+ - Type: PT_LOAD
+ Flags: [ PF_W, PF_R ]
+ FirstSec: .init_array
+ LastSec: .bss
+ VAddr: 0x3DC8
+ Align: 0x1000
+ Offset: 0x2DC8
+ - Type: PT_DYNAMIC
+ Flags: [ PF_W, PF_R ]
+ FirstSec: .dynamic
+ LastSec: .dynamic
+ VAddr: 0x3DE0
+ Align: 0x8
+ Offset: 0x2DE0
+ - Type: PT_NOTE
+ Flags: [ PF_R ]
+ FirstSec: .note.gnu.build-id
+ LastSec: .note.gnu.build-id
+ VAddr: 0x238
+ Align: 0x4
+ Offset: 0x238
+ - Type: PT_GNU_EH_FRAME
+ Flags: [ PF_R ]
+ FirstSec: .eh_frame_hdr
+ LastSec: .eh_frame_hdr
+ VAddr: 0x202C
+ Align: 0x4
+ Offset: 0x202C
+ - Type: PT_GNU_STACK
+ Flags: [ PF_W, PF_R ]
+ Align: 0x10
+ Offset: 0x0
+ - Type: PT_GNU_RELRO
+ Flags: [ PF_R ]
+ FirstSec: .init_array
+ LastSec: .got
+ VAddr: 0x3DC8
+ Offset: 0x2DC8
+Sections:
+ - Name: .note.gnu.build-id
+ Type: SHT_NOTE
+ Flags: [ SHF_ALLOC ]
+ Address: 0x238
+ AddressAlign: 0x4
+ Notes:
+ - Name: GNU
+ Desc: E98A07D11FFBEC0C57492B71EEE529C65D183408
+ Type: NT_PRPSINFO
+ - Name: .gnu.hash
+ Type: SHT_GNU_HASH
+ Flags: [ SHF_ALLOC ]
+ Address: 0x260
+ Link: .dynsym
+ AddressAlign: 0x8
+ Header:
+ SymNdx: 0x6
+ Shift2: 0x6
+ BloomFilter: [ 0x3021080800001010 ]
+ HashBuckets: [ 0x0, 0x6, 0x0 ]
+ HashValues: [ 0x6A5EBD44, 0x6A5EBC3C, 0x6A5EDF4C, 0x6A6128EB ]
+ - Name: .dynsym
+ Type: SHT_DYNSYM
+ Flags: [ SHF_ALLOC ]
+ Address: 0x298
+ Link: .dynstr
+ AddressAlign: 0x8
+ - Name: .dynstr
+ Type: SHT_STRTAB
+ Flags: [ SHF_ALLOC ]
+ Address: 0x388
+ AddressAlign: 0x1
+ - Name: .gnu.version
+ Type: SHT_GNU_versym
+ Flags: [ SHF_ALLOC ]
+ Address: 0x44E
+ Link: .dynsym
+ AddressAlign: 0x2
+ Entries: [ 0, 2, 2, 0, 0, 0, 1, 1, 1, 1 ]
+ - Name: .gnu.version_r
+ Type: SHT_GNU_verneed
+ Flags: [ SHF_ALLOC ]
+ Address: 0x468
+ Link: .dynstr
+ AddressAlign: 0x8
+ Dependencies:
+ - Version: 1
+ File: libc.so.6
+ Entries:
+ - Name: GLIBC_2.2.5
+ Hash: 157882997
+ Flags: 0
+ Other: 2
+ - Name: .rela.dyn
+ Type: SHT_RELA
+ Flags: [ SHF_ALLOC ]
+ Address: 0x488
+ Link: .dynsym
+ AddressAlign: 0x8
+ Relocations:
+ - Offset: 0x3DC8
+ Type: R_X86_64_RELATIVE
+ Addend: 4352
+ - Offset: 0x3DD0
+ Type: R_X86_64_RELATIVE
+ Addend: 4288
+ - Offset: 0x3DD8
+ Type: R_X86_64_RELATIVE
+ Addend: 15832
+ - Offset: 0x3FE0
+ Symbol: __cxa_finalize
+ Type: R_X86_64_GLOB_DAT
+ - Offset: 0x3FE8
+ Symbol: _ITM_deregisterTMCloneTable
+ Type: R_X86_64_GLOB_DAT
+ - Offset: 0x3FF0
+ Symbol: __gmon_start__
+ Type: R_X86_64_GLOB_DAT
+ - Offset: 0x3FF8
+ Symbol: _ITM_registerTMCloneTable
+ Type: R_X86_64_GLOB_DAT
+ - Name: .rela.plt
+ Type: SHT_RELA
+ Flags: [ SHF_ALLOC, SHF_INFO_LINK ]
+ Address: 0x530
+ Link: .dynsym
+ AddressAlign: 0x8
+ Info: .got.plt
+ Relocations:
+ - Offset: 0x4018
+ Symbol: __cxa_finalize
+ Type: R_X86_64_JUMP_SLOT
+ - Offset: 0x4020
+ Symbol: puts
+ Type: R_X86_64_JUMP_SLOT
+ - Name: .init
+ Type: SHT_PROGBITS
+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+ Address: 0x1000
+ AddressAlign: 0x4
+ Offset: 0x1000
+ Content: F30F1EFA4883EC08488B05E12F00004885C07402FFD04883C408C3
+ - Name: .plt
+ Type: SHT_PROGBITS
+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+ Address: 0x1020
+ AddressAlign: 0x10
+ EntSize: 0x10
+ Content: FF35E22F0000FF25E42F00000F1F4000FF25E22F00006800000000E9E0FFFFFFFF25DA2F00006801000000E9D0FFFFFF
+ - Name: .text
+ Type: SHT_PROGBITS
+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+ Address: 0x1050
+ AddressAlign: 0x10
+ Content: 488D3DD12F0000488D05CA2F00004839F87415488B057E2F00004885C07409FFE00F1F8000000000C30F1F8000000000488D3DA12F0000488D359A2F00004829FE4889F048C1EE3F48C1F8034801C648D1FE7414488B054D2F00004885C07408FFE0660F1F440000C30F1F8000000000F30F1EFA803D5D2F000000752B5548833D0A2F0000004889E5740C488D3DF62C0000E849FFFFFFE864FFFFFFC605352F0000015DC30F1F00C30F1F8000000000F30F1EFAE977FFFFFF554889E5488D05EC0E00004889C7E824FFFFFF905DC3554889E5488D05E10E00004889C7E80EFFFFFF905DC3554889E5488D05D60E00004889C7E8F8FEFFFF905DC3554889E5488D05CB0E00004889C7E8E2FEFFFF905DC3
+ - Name: .fini
+ Type: SHT_PROGBITS
+ Flags: [ SHF_ALLOC, SHF_EXECINSTR ]
+ Address: 0x1164
+ AddressAlign: 0x4
+ Content: F30F1EFA4883EC084883C408C3
+ - Name: ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/112596
More information about the lldb-commits
mailing list