[Lldb-commits] [lldb] [lldb][ELF] Return address class map from symbol table parsing methods (PR #91585)

David Spickett via lldb-commits lldb-commits at lists.llvm.org
Thu May 9 05:13:45 PDT 2024


https://github.com/DavidSpickett created https://github.com/llvm/llvm-project/pull/91585

Instead of updating the member of the ObjectFileELF instance. This means that if one object file asks another to parse the symbol table, that first object's address class map is not left empty, which would break Arm and MIPS targets that need this information.

This will fix the code added in
https://github.com/llvm/llvm-project/pull/90622 which broke the test `Expr/TestStringLiteralExpr.test` on 32 bit Arm Linux.

This happened because we had the program file, then asked for a better object file, which returned the same program file again. This creates a second ObjectFileELF for the same file, so when we tell the second instance to parse the symbol table it actually calls into the first instance, leaving the address class map of the second instance empty.

Which caused us to put an Arm breakpoint instuction at a Thumb return address and broke the ability to call mmap.

>From 34c5ac8292b34d96039c6c69326f3fceb5cc3499 Mon Sep 17 00:00:00 2001
From: David Spickett <david.spickett at linaro.org>
Date: Thu, 9 May 2024 11:23:32 +0000
Subject: [PATCH] [lldb][ELF] Return address class map from symbol table
 parsing methods

Instead of updating the member of the ObjectFileELF instance.
This means that if one object file asks another to parse the symbol table,
that first object's address class map is not left empty, which would
break Arm and MIPS targets that need this information.

This will fix the code added in
https://github.com/llvm/llvm-project/pull/90622 which broke
the test `Expr/TestStringLiteralExpr.test` on 32 bit Arm Linux.

This happened because we had the program file, then asked for a better
object file, which returned the same program file again. This creates
a second ObjectFileELF for the same file, so when we tell the second
instance to parse the symbol table it actually calls into the first
instance, leaving the address class map of the second instance empty.

Which caused us to put an Arm breakpoint instuction at a Thumb
return address and broke the ability to call mmap.
---
 .../Plugins/ObjectFile/ELF/ObjectFileELF.cpp  | 68 +++++++++++--------
 .../Plugins/ObjectFile/ELF/ObjectFileELF.h    | 21 +++---
 2 files changed, 51 insertions(+), 38 deletions(-)

diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
index 1646ee9aa34a6..c216cc2d054c8 100644
--- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
+++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
@@ -2060,13 +2060,14 @@ static char FindArmAarch64MappingSymbol(const char *symbol_name) {
 #define IS_MICROMIPS(ST_OTHER) (((ST_OTHER)&STO_MIPS_ISA) == STO_MICROMIPS)
 
 // private
-unsigned ObjectFileELF::ParseSymbols(Symtab *symtab, user_id_t start_id,
-                                     SectionList *section_list,
-                                     const size_t num_symbols,
-                                     const DataExtractor &symtab_data,
-                                     const DataExtractor &strtab_data) {
+std::pair<unsigned, ObjectFileELF::FileAddressToAddressClassMap>
+ObjectFileELF::ParseSymbols(Symtab *symtab, user_id_t start_id,
+                            SectionList *section_list, const size_t num_symbols,
+                            const DataExtractor &symtab_data,
+                            const DataExtractor &strtab_data) {
   ELFSymbol symbol;
   lldb::offset_t offset = 0;
+  FileAddressToAddressClassMap address_class_map;
 
   static ConstString text_section_name(".text");
   static ConstString init_section_name(".init");
@@ -2213,18 +2214,18 @@ unsigned ObjectFileELF::ParseSymbols(Symtab *symtab, user_id_t start_id,
             switch (mapping_symbol) {
             case 'a':
               // $a[.<any>]* - marks an ARM instruction sequence
-              m_address_class_map[symbol.st_value] = AddressClass::eCode;
+              address_class_map[symbol.st_value] = AddressClass::eCode;
               break;
             case 'b':
             case 't':
               // $b[.<any>]* - marks a THUMB BL instruction sequence
               // $t[.<any>]* - marks a THUMB instruction sequence
-              m_address_class_map[symbol.st_value] =
+              address_class_map[symbol.st_value] =
                   AddressClass::eCodeAlternateISA;
               break;
             case 'd':
               // $d[.<any>]* - marks a data item sequence (e.g. lit pool)
-              m_address_class_map[symbol.st_value] = AddressClass::eData;
+              address_class_map[symbol.st_value] = AddressClass::eData;
               break;
             }
           }
@@ -2238,11 +2239,11 @@ unsigned ObjectFileELF::ParseSymbols(Symtab *symtab, user_id_t start_id,
             switch (mapping_symbol) {
             case 'x':
               // $x[.<any>]* - marks an A64 instruction sequence
-              m_address_class_map[symbol.st_value] = AddressClass::eCode;
+              address_class_map[symbol.st_value] = AddressClass::eCode;
               break;
             case 'd':
               // $d[.<any>]* - marks a data item sequence (e.g. lit pool)
-              m_address_class_map[symbol.st_value] = AddressClass::eData;
+              address_class_map[symbol.st_value] = AddressClass::eData;
               break;
             }
           }
@@ -2260,11 +2261,11 @@ unsigned ObjectFileELF::ParseSymbols(Symtab *symtab, user_id_t start_id,
             // conjunction with symbol.st_value to produce the final
             // symbol_value that we store in the symtab.
             symbol_value_offset = -1;
-            m_address_class_map[symbol.st_value ^ 1] =
+            address_class_map[symbol.st_value ^ 1] =
                 AddressClass::eCodeAlternateISA;
           } else {
             // This address is ARM
-            m_address_class_map[symbol.st_value] = AddressClass::eCode;
+            address_class_map[symbol.st_value] = AddressClass::eCode;
           }
         }
       }
@@ -2285,17 +2286,17 @@ unsigned ObjectFileELF::ParseSymbols(Symtab *symtab, user_id_t start_id,
       */
       if (arch.IsMIPS()) {
         if (IS_MICROMIPS(symbol.st_other))
-          m_address_class_map[symbol.st_value] = AddressClass::eCodeAlternateISA;
+          address_class_map[symbol.st_value] = AddressClass::eCodeAlternateISA;
         else if ((symbol.st_value & 1) && (symbol_type == eSymbolTypeCode)) {
           symbol.st_value = symbol.st_value & (~1ull);
-          m_address_class_map[symbol.st_value] = AddressClass::eCodeAlternateISA;
+          address_class_map[symbol.st_value] = AddressClass::eCodeAlternateISA;
         } else {
           if (symbol_type == eSymbolTypeCode)
-            m_address_class_map[symbol.st_value] = AddressClass::eCode;
+            address_class_map[symbol.st_value] = AddressClass::eCode;
           else if (symbol_type == eSymbolTypeData)
-            m_address_class_map[symbol.st_value] = AddressClass::eData;
+            address_class_map[symbol.st_value] = AddressClass::eData;
           else
-            m_address_class_map[symbol.st_value] = AddressClass::eUnknown;
+            address_class_map[symbol.st_value] = AddressClass::eUnknown;
         }
       }
     }
@@ -2392,24 +2393,27 @@ unsigned ObjectFileELF::ParseSymbols(Symtab *symtab, user_id_t start_id,
       dc_symbol.SetIsWeak(true);
     symtab->AddSymbol(dc_symbol);
   }
-  return i;
+  return {i, address_class_map};
 }
 
-unsigned ObjectFileELF::ParseSymbolTable(Symtab *symbol_table,
-                                         user_id_t start_id,
-                                         lldb_private::Section *symtab) {
+std::pair<unsigned, ObjectFileELF::FileAddressToAddressClassMap>
+ObjectFileELF::ParseSymbolTable(Symtab *symbol_table, user_id_t start_id,
+                                lldb_private::Section *symtab) {
   if (symtab->GetObjectFile() != this) {
     // If the symbol table section is owned by a different object file, have it
     // do the parsing.
     ObjectFileELF *obj_file_elf =
         static_cast<ObjectFileELF *>(symtab->GetObjectFile());
-    return obj_file_elf->ParseSymbolTable(symbol_table, start_id, symtab);
+    auto [num_symbols, address_class_map] =
+        obj_file_elf->ParseSymbolTable(symbol_table, start_id, symtab);
+    m_address_class_map.merge(address_class_map);
+    return {num_symbols, address_class_map};
   }
 
   // Get section list for this object file.
   SectionList *section_list = m_sections_up.get();
   if (!section_list)
-    return 0;
+    return {};
 
   user_id_t symtab_id = symtab->GetID();
   const ELFSectionHeaderInfo *symtab_hdr = GetSectionHeaderByIndex(symtab_id);
@@ -2435,7 +2439,7 @@ unsigned ObjectFileELF::ParseSymbolTable(Symtab *symbol_table,
     }
   }
 
-  return 0;
+  return {0, {}};
 }
 
 size_t ObjectFileELF::ParseDynamicSymbols() {
@@ -2972,8 +2976,12 @@ void ObjectFileELF::ParseSymtab(Symtab &lldb_symtab) {
   // while the reverse is not necessarily true.
   Section *symtab =
       section_list->FindSectionByType(eSectionTypeELFSymbolTable, true).get();
-  if (symtab)
-    symbol_id += ParseSymbolTable(&lldb_symtab, symbol_id, symtab);
+  if (symtab) {
+    auto [num_symbols, address_class_map] =
+        ParseSymbolTable(&lldb_symtab, symbol_id, symtab);
+    m_address_class_map.merge(address_class_map);
+    symbol_id += num_symbols;
+  }
 
   // The symtab section is non-allocable and can be stripped, while the
   // .dynsym section which should always be always be there. To support the
@@ -2986,8 +2994,12 @@ void ObjectFileELF::ParseSymtab(Symtab &lldb_symtab) {
     Section *dynsym =
         section_list->FindSectionByType(eSectionTypeELFDynamicSymbols, true)
             .get();
-    if (dynsym)
-      symbol_id += ParseSymbolTable(&lldb_symtab, symbol_id, dynsym);
+    if (dynsym) {
+      auto [num_symbols, address_class_map] =
+          ParseSymbolTable(&lldb_symtab, symbol_id, dynsym);
+      symbol_id += num_symbols;
+      m_address_class_map.merge(address_class_map);
+    }
   }
 
   // DT_JMPREL
diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h
index bc8e34981a9d8..716bbe01638f3 100644
--- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h
+++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.h
@@ -285,18 +285,19 @@ class ObjectFileELF : public lldb_private::ObjectFile {
 
   /// Populates the symbol table with all non-dynamic linker symbols.  This
   /// method will parse the symbols only once.  Returns the number of symbols
-  /// parsed.
-  unsigned ParseSymbolTable(lldb_private::Symtab *symbol_table,
-                            lldb::user_id_t start_id,
-                            lldb_private::Section *symtab);
+  /// parsed and a map of address types (used by targets like Arm that have
+  /// an alternative ISA mode like Thumb).
+  std::pair<unsigned, FileAddressToAddressClassMap>
+  ParseSymbolTable(lldb_private::Symtab *symbol_table, lldb::user_id_t start_id,
+                   lldb_private::Section *symtab);
 
   /// Helper routine for ParseSymbolTable().
-  unsigned ParseSymbols(lldb_private::Symtab *symbol_table,
-                        lldb::user_id_t start_id,
-                        lldb_private::SectionList *section_list,
-                        const size_t num_symbols,
-                        const lldb_private::DataExtractor &symtab_data,
-                        const lldb_private::DataExtractor &strtab_data);
+  std::pair<unsigned, FileAddressToAddressClassMap>
+  ParseSymbols(lldb_private::Symtab *symbol_table, lldb::user_id_t start_id,
+               lldb_private::SectionList *section_list,
+               const size_t num_symbols,
+               const lldb_private::DataExtractor &symtab_data,
+               const lldb_private::DataExtractor &strtab_data);
 
   /// Scans the relocation entries and adds a set of artificial symbols to the
   /// given symbol table for each PLT slot.  Returns the number of symbols



More information about the lldb-commits mailing list