[Lldb-commits] [lldb] [LLDB][ELF] Fix section unification to not just use names. (PR #90099)

Alastair Houghton via lldb-commits lldb-commits at lists.llvm.org
Tue Apr 30 07:31:30 PDT 2024


https://github.com/al45tair updated https://github.com/llvm/llvm-project/pull/90099

>From ce54a7fb339a00029da266c9f518e344aac5d19e Mon Sep 17 00:00:00 2001
From: Alastair Houghton <ahoughton at apple.com>
Date: Thu, 25 Apr 2024 11:35:55 +0100
Subject: [PATCH 1/5] [LLDB][ELF] Fix section unification to not just use
 names.

Section unification cannot just use names, because it's valid for ELF
binaries to have multiple sections with the same name.  We should check
other section properties too.

rdar://124467787
---
 .../Plugins/ObjectFile/ELF/ObjectFileELF.cpp  | 64 +++++++++++++++----
 1 file changed, 53 insertions(+), 11 deletions(-)

diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
index 0d95a1c12bde35..60fc68c96bcc1c 100644
--- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
+++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
@@ -1854,6 +1854,49 @@ class VMAddressProvider {
 };
 }
 
+namespace {
+  // We have to do this because ELF doesn't have section IDs, and also
+  // doesn't require section names to be unique.  (We use the section index
+  // for section IDs, but that isn't guaranteed to be the same in separate
+  // debug images.)
+  SectionSP FindMatchingSection(const SectionList &section_list,
+                                SectionSP section) {
+    SectionSP sect_sp;
+
+    addr_t vm_addr = section->GetFileAddress();
+    ConstString name = section->GetName();
+    offset_t file_size = section->GetFileSize();
+    offset_t byte_size = section->GetByteSize();
+    SectionType type = section->GetType();
+    bool thread_specific = section->IsThreadSpecific();
+    uint32_t permissions = section->GetPermissions();
+    uint32_t alignment = section->GetLog2Align();
+
+    for (auto sect_iter = section_list.begin();
+         sect_iter != section_list.end();
+         ++sect_iter) {
+      if ((*sect_iter)->GetName() == name
+          && (*sect_iter)->GetType() == type
+          && (*sect_iter)->IsThreadSpecific() == thread_specific
+          && (*sect_iter)->GetPermissions() == permissions
+          && (*sect_iter)->GetFileSize() == file_size
+          && (*sect_iter)->GetByteSize() == byte_size
+          && (*sect_iter)->GetFileAddress() == vm_addr
+          && (*sect_iter)->GetLog2Align() == alignment) {
+        sect_sp = *sect_iter;
+        break;
+      } else {
+        sect_sp = FindMatchingSection((*sect_iter)->GetChildren(),
+                                      section);
+        if (sect_sp)
+          break;
+      }
+    }
+
+    return sect_sp;
+  }
+}
+
 void ObjectFileELF::CreateSections(SectionList &unified_section_list) {
   if (m_sections_up)
     return;
@@ -2067,10 +2110,8 @@ unsigned ObjectFileELF::ParseSymbols(Symtab *symtab, user_id_t start_id,
   SectionList *module_section_list =
       module_sp ? module_sp->GetSectionList() : nullptr;
 
-  // Local cache to avoid doing a FindSectionByName for each symbol. The "const
-  // char*" key must came from a ConstString object so they can be compared by
-  // pointer
-  std::unordered_map<const char *, lldb::SectionSP> section_name_to_section;
+  // Cache the section mapping
+  std::unordered_map<lldb::SectionSP, lldb::SectionSP> section_map;
 
   unsigned i;
   for (i = 0; i < num_symbols; ++i) {
@@ -2275,14 +2316,15 @@ unsigned ObjectFileELF::ParseSymbols(Symtab *symtab, user_id_t start_id,
 
     if (symbol_section_sp && module_section_list &&
         module_section_list != section_list) {
-      ConstString sect_name = symbol_section_sp->GetName();
-      auto section_it = section_name_to_section.find(sect_name.GetCString());
-      if (section_it == section_name_to_section.end())
+      auto section_it = section_map.find(symbol_section_sp);
+      if (section_it == section_map.end()) {
         section_it =
-            section_name_to_section
-                .emplace(sect_name.GetCString(),
-                         module_section_list->FindSectionByName(sect_name))
-                .first;
+          section_map
+              .emplace(symbol_section_sp,
+                       FindMatchingSection(*module_section_list,
+                                           symbol_section_sp))
+              .first;
+      }
       if (section_it->second)
         symbol_section_sp = section_it->second;
     }

>From 9653351729b4ef2d98faba936b8fa6fb51a9a47c Mon Sep 17 00:00:00 2001
From: Alastair Houghton <ahoughton at apple.com>
Date: Fri, 26 Apr 2024 14:53:20 +0100
Subject: [PATCH 2/5] [LLDB][ELF] Address review feedback, add test.

Fixed a couple of nits from review, and fixed up formatting.

Also added a test.

rdar://124467787
---
 .../Plugins/ObjectFile/ELF/ObjectFileELF.cpp  |  86 +++++++++---------
 .../ELF/Inputs/two-text-sections.elf          | Bin 0 -> 4976 bytes
 .../ObjectFile/ELF/two-text-sections.test     |   8 ++
 3 files changed, 49 insertions(+), 45 deletions(-)
 create mode 100644 lldb/test/Shell/ObjectFile/ELF/Inputs/two-text-sections.elf
 create mode 100644 lldb/test/Shell/ObjectFile/ELF/two-text-sections.test

diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
index 60fc68c96bcc1c..153aa0938f6942 100644
--- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
+++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
@@ -1854,47 +1854,40 @@ class VMAddressProvider {
 };
 }
 
-namespace {
-  // We have to do this because ELF doesn't have section IDs, and also
-  // doesn't require section names to be unique.  (We use the section index
-  // for section IDs, but that isn't guaranteed to be the same in separate
-  // debug images.)
-  SectionSP FindMatchingSection(const SectionList &section_list,
-                                SectionSP section) {
-    SectionSP sect_sp;
-
-    addr_t vm_addr = section->GetFileAddress();
-    ConstString name = section->GetName();
-    offset_t file_size = section->GetFileSize();
-    offset_t byte_size = section->GetByteSize();
-    SectionType type = section->GetType();
-    bool thread_specific = section->IsThreadSpecific();
-    uint32_t permissions = section->GetPermissions();
-    uint32_t alignment = section->GetLog2Align();
-
-    for (auto sect_iter = section_list.begin();
-         sect_iter != section_list.end();
-         ++sect_iter) {
-      if ((*sect_iter)->GetName() == name
-          && (*sect_iter)->GetType() == type
-          && (*sect_iter)->IsThreadSpecific() == thread_specific
-          && (*sect_iter)->GetPermissions() == permissions
-          && (*sect_iter)->GetFileSize() == file_size
-          && (*sect_iter)->GetByteSize() == byte_size
-          && (*sect_iter)->GetFileAddress() == vm_addr
-          && (*sect_iter)->GetLog2Align() == alignment) {
-        sect_sp = *sect_iter;
+// We have to do this because ELF doesn't have section IDs, and also
+// doesn't require section names to be unique.  (We use the section index
+// for section IDs, but that isn't guaranteed to be the same in separate
+// debug images.)
+static SectionSP FindMatchingSection(const SectionList &section_list,
+                                     SectionSP section) {
+  SectionSP sect_sp;
+
+  addr_t vm_addr = section->GetFileAddress();
+  ConstString name = section->GetName();
+  offset_t file_size = section->GetFileSize();
+  offset_t byte_size = section->GetByteSize();
+  SectionType type = section->GetType();
+  bool thread_specific = section->IsThreadSpecific();
+  uint32_t permissions = section->GetPermissions();
+  uint32_t alignment = section->GetLog2Align();
+
+  for (auto sect : section_list) {
+    if (sect->GetName() == name && sect->GetType() == type &&
+        sect->IsThreadSpecific() == thread_specific &&
+        sect->GetPermissions() == permissions &&
+        sect->GetFileSize() == file_size && sect->GetByteSize() == byte_size &&
+        sect->GetFileAddress() == vm_addr &&
+        sect->GetLog2Align() == alignment) {
+      sect_sp = sect;
+      break;
+    } else {
+      sect_sp = FindMatchingSection(sect->GetChildren(), section);
+      if (sect_sp)
         break;
-      } else {
-        sect_sp = FindMatchingSection((*sect_iter)->GetChildren(),
-                                      section);
-        if (sect_sp)
-          break;
-      }
     }
-
-    return sect_sp;
   }
+
+  return sect_sp;
 }
 
 void ObjectFileELF::CreateSections(SectionList &unified_section_list) {
@@ -2110,7 +2103,11 @@ unsigned ObjectFileELF::ParseSymbols(Symtab *symtab, user_id_t start_id,
   SectionList *module_section_list =
       module_sp ? module_sp->GetSectionList() : nullptr;
 
-  // Cache the section mapping
+  // We might have debug information in a separate object, in which case
+  // we need to map the sections from that object to the sections in the
+  // main object during symbol lookup.  If we had to compare the sections
+  // for every single symbol, that would be expensive, so this map is
+  // used to accelerate the process.
   std::unordered_map<lldb::SectionSP, lldb::SectionSP> section_map;
 
   unsigned i;
@@ -2318,12 +2315,11 @@ unsigned ObjectFileELF::ParseSymbols(Symtab *symtab, user_id_t start_id,
         module_section_list != section_list) {
       auto section_it = section_map.find(symbol_section_sp);
       if (section_it == section_map.end()) {
-        section_it =
-          section_map
-              .emplace(symbol_section_sp,
-                       FindMatchingSection(*module_section_list,
-                                           symbol_section_sp))
-              .first;
+        section_it = section_map
+                         .emplace(symbol_section_sp,
+                                  FindMatchingSection(*module_section_list,
+                                                      symbol_section_sp))
+                         .first;
       }
       if (section_it->second)
         symbol_section_sp = section_it->second;
diff --git a/lldb/test/Shell/ObjectFile/ELF/Inputs/two-text-sections.elf b/lldb/test/Shell/ObjectFile/ELF/Inputs/two-text-sections.elf
new file mode 100644
index 0000000000000000000000000000000000000000..82cdde8fa9b5fdaa10b29dc085bf1d61b467c19d
GIT binary patch
literal 4976
zcmeHLOH0E*5T3?Y3kvn-L9n2B>(U;4T$F(LIEc3%C8RdCf=wXViamMu=1=KQDELRb
zsDHzm-PtB2hN7N?9Z0^JZyvk*wU_ME>E)SIsemyDjv<WzTu!vsO$Bj>;NZIkRaLu`
zrqFXa()huVL8xnj)>tN&W2n0nVeBd>ytuA&@%(=MTF90XKdmnvWD`~atAJI&Dqt0`
z3RnfK0#*U5fK|XMU={es3M}H at YxoeJUv;1#-S--8(cYhPCVfXxfywN9UpK5NaNsS+
zZy{fYg~Ip!P6^*C;bA!TZb#vbyo*BeBRL4-l<|VF2cFkW5-*W{EWrzUzVrcv3?3y2
zOn?X at 8Hj#35_H(+Ll7r4OeBLu#?tSiXK*}Ju^ypL_SYBbHoN;k-{?2t!Rk&VvxvDK
zF;v>$P}G!lo^ru1qk(+?5hiE`{u0{EeM`QO(^Q+a6%4BQ{I-7;duc|&c>T>>g8r9T
x+rz-g66`m)|Ak{(gZ4;!CEL&dO~l#WnIo8R|3QW$H-G+Z<i6|w<o=U6{a-LMN00yj

literal 0
HcmV?d00001

diff --git a/lldb/test/Shell/ObjectFile/ELF/two-text-sections.test b/lldb/test/Shell/ObjectFile/ELF/two-text-sections.test
new file mode 100644
index 00000000000000..4afc8c04e6548a
--- /dev/null
+++ b/lldb/test/Shell/ObjectFile/ELF/two-text-sections.test
@@ -0,0 +1,8 @@
+# Test symbol lookup when we have duplicate section names.
+#
+# (See https://github.com/llvm/llvm-project/issues/88001)
+
+# RUN: lldb-test symbols %S/Inputs/two-text-sections.elf
+
+# CHECK: 0x00000000004000b0 {{.*}} my_function
+# CHECK: 0x00000000004000e0 {{.*}} my_other_function

>From 9cbd30493cb32898f6f27ab70c0660aa5be20aed Mon Sep 17 00:00:00 2001
From: Alastair Houghton <ahoughton at apple.com>
Date: Fri, 26 Apr 2024 17:00:07 +0100
Subject: [PATCH 3/5] [LLDB][ELF] Don't match on file size.

As pointed out by @labath, if you use `objcopy --only-keep-debug`, only
placeholder sections will be present so the file sizes won't match.

We already ignore file offsets when doing the comparison.

rdar://124467787
---
 lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
index 153aa0938f6942..f7928704d33a4d 100644
--- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
+++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
@@ -1864,7 +1864,6 @@ static SectionSP FindMatchingSection(const SectionList &section_list,
 
   addr_t vm_addr = section->GetFileAddress();
   ConstString name = section->GetName();
-  offset_t file_size = section->GetFileSize();
   offset_t byte_size = section->GetByteSize();
   SectionType type = section->GetType();
   bool thread_specific = section->IsThreadSpecific();
@@ -1875,8 +1874,7 @@ static SectionSP FindMatchingSection(const SectionList &section_list,
     if (sect->GetName() == name && sect->GetType() == type &&
         sect->IsThreadSpecific() == thread_specific &&
         sect->GetPermissions() == permissions &&
-        sect->GetFileSize() == file_size && sect->GetByteSize() == byte_size &&
-        sect->GetFileAddress() == vm_addr &&
+        sect->GetByteSize() == byte_size && sect->GetFileAddress() == vm_addr &&
         sect->GetLog2Align() == alignment) {
       sect_sp = sect;
       break;

>From 39c6fce1b2da887be89f193158adc7157df59447 Mon Sep 17 00:00:00 2001
From: Alastair Houghton <ahoughton at apple.com>
Date: Mon, 29 Apr 2024 12:07:12 +0100
Subject: [PATCH 4/5] [LLDB][ELF] Use `yaml2obj` for the test.

Rather than including a binary, use `yaml2obj` for the duplicate section
name test.

rdar://124467787
---
 .../ELF/Inputs/two-text-sections.elf          | Bin 4976 -> 0 bytes
 .../ObjectFile/ELF/two-text-sections.test     |   8 ---
 .../ObjectFile/ELF/two-text-sections.yaml     |  48 ++++++++++++++++++
 3 files changed, 48 insertions(+), 8 deletions(-)
 delete mode 100644 lldb/test/Shell/ObjectFile/ELF/Inputs/two-text-sections.elf
 delete mode 100644 lldb/test/Shell/ObjectFile/ELF/two-text-sections.test
 create mode 100644 lldb/test/Shell/ObjectFile/ELF/two-text-sections.yaml

diff --git a/lldb/test/Shell/ObjectFile/ELF/Inputs/two-text-sections.elf b/lldb/test/Shell/ObjectFile/ELF/Inputs/two-text-sections.elf
deleted file mode 100644
index 82cdde8fa9b5fdaa10b29dc085bf1d61b467c19d..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 4976
zcmeHLOH0E*5T3?Y3kvn-L9n2B>(U;4T$F(LIEc3%C8RdCf=wXViamMu=1=KQDELRb
zsDHzm-PtB2hN7N?9Z0^JZyvk*wU_ME>E)SIsemyDjv<WzTu!vsO$Bj>;NZIkRaLu`
zrqFXa()huVL8xnj)>tN&W2n0nVeBd>ytuA&@%(=MTF90XKdmnvWD`~atAJI&Dqt0`
z3RnfK0#*U5fK|XMU={es3M}H at YxoeJUv;1#-S--8(cYhPCVfXxfywN9UpK5NaNsS+
zZy{fYg~Ip!P6^*C;bA!TZb#vbyo*BeBRL4-l<|VF2cFkW5-*W{EWrzUzVrcv3?3y2
zOn?X at 8Hj#35_H(+Ll7r4OeBLu#?tSiXK*}Ju^ypL_SYBbHoN;k-{?2t!Rk&VvxvDK
zF;v>$P}G!lo^ru1qk(+?5hiE`{u0{EeM`QO(^Q+a6%4BQ{I-7;duc|&c>T>>g8r9T
x+rz-g66`m)|Ak{(gZ4;!CEL&dO~l#WnIo8R|3QW$H-G+Z<i6|w<o=U6{a-LMN00yj

diff --git a/lldb/test/Shell/ObjectFile/ELF/two-text-sections.test b/lldb/test/Shell/ObjectFile/ELF/two-text-sections.test
deleted file mode 100644
index 4afc8c04e6548a..00000000000000
--- a/lldb/test/Shell/ObjectFile/ELF/two-text-sections.test
+++ /dev/null
@@ -1,8 +0,0 @@
-# Test symbol lookup when we have duplicate section names.
-#
-# (See https://github.com/llvm/llvm-project/issues/88001)
-
-# RUN: lldb-test symbols %S/Inputs/two-text-sections.elf
-
-# CHECK: 0x00000000004000b0 {{.*}} my_function
-# CHECK: 0x00000000004000e0 {{.*}} my_other_function
diff --git a/lldb/test/Shell/ObjectFile/ELF/two-text-sections.yaml b/lldb/test/Shell/ObjectFile/ELF/two-text-sections.yaml
new file mode 100644
index 00000000000000..8b2fd47df1a1fa
--- /dev/null
+++ b/lldb/test/Shell/ObjectFile/ELF/two-text-sections.yaml
@@ -0,0 +1,48 @@
+# Test handling of object files that have duplicate sections.  This is legal,
+# according to the System V ABI (Edition 4.1); see 4-20 where it says:
+#
+#   Section names with a dot (.) prefix are reserved for the system,
+#   although applications may use these sections if their existing
+#   meanings are satisfactory. ... **An object file may have more than
+#   one section with the same name.**
+#
+# (See https://github.com/llvm/llvm-project/issues/88001)
+
+# RUN: yaml2obj %s -o %t
+# RUN: lldb-test symbols %t | FileCheck %s
+
+# CHECK: 0x0000000000400010 {{.*}} my_function
+# CHECK: 0x0000000000401020 {{.*}} my_other_function
+
+!ELF
+FileHeader:
+  Class:           ELFCLASS64
+  Data:            ELFDATA2LSB
+  Type:            ET_EXEC
+  Machine:         EM_X86_64
+ProgramHeaders:
+  - Type:            PT_LOAD
+    Flags:           [ PF_X, PF_R ]
+    FirstSec:        .text
+    LastSec:         '.text (1)'
+    VAddr:           0x400000
+    Align:           0x1000
+    Offset:          0x0
+Sections:
+  - Name:            .text
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    Address:         0x400010
+    AddressAlign:    0x10
+  - Name:            '.text (1)'
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR, SHF_GNU_RETAIN ]
+    Address:         0x401000
+    AddressAlign:    0x10
+Symbols:
+  - Name:            my_function
+    Section:         .text
+    Value:           0x400010
+  - Name:            my_other_function
+    Section:         '.text (1)'
+    Value:           0x401020

>From 5e287416370d0673030cffc8f723e7eaadd191cc Mon Sep 17 00:00:00 2001
From: Alastair Houghton <ahoughton at apple.com>
Date: Mon, 29 Apr 2024 15:32:27 +0100
Subject: [PATCH 5/5] [LLDB][ELF] Also ignore section type when matching.

The section type can change when using `objcopy --only-keep-debug`,
apparently.

rdar://124467787
---
 lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
index f7928704d33a4d..16f6d2e884b577 100644
--- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
+++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
@@ -1865,13 +1865,12 @@ static SectionSP FindMatchingSection(const SectionList &section_list,
   addr_t vm_addr = section->GetFileAddress();
   ConstString name = section->GetName();
   offset_t byte_size = section->GetByteSize();
-  SectionType type = section->GetType();
   bool thread_specific = section->IsThreadSpecific();
   uint32_t permissions = section->GetPermissions();
   uint32_t alignment = section->GetLog2Align();
 
   for (auto sect : section_list) {
-    if (sect->GetName() == name && sect->GetType() == type &&
+    if (sect->GetName() == name &&
         sect->IsThreadSpecific() == thread_specific &&
         sect->GetPermissions() == permissions &&
         sect->GetByteSize() == byte_size && sect->GetFileAddress() == vm_addr &&



More information about the lldb-commits mailing list