[Lldb-commits] [lldb] [LLDB][NativePDB] Estimate symbol sizes (PR #165727)

via lldb-commits lldb-commits at lists.llvm.org
Thu Oct 30 09:33:58 PDT 2025


https://github.com/Nerixyz updated https://github.com/llvm/llvm-project/pull/165727

>From 5a71656da61510bda407327bbf8a5b6585006c0e Mon Sep 17 00:00:00 2001
From: Nerixyz <nerixdev at outlook.de>
Date: Wed, 29 Oct 2025 21:58:44 +0100
Subject: [PATCH 1/2] [LLDB][NativePDB] Estimate symbol sizes

---
 .../NativePDB/SymbolFileNativePDB.cpp         | 67 ++++++++++++++-----
 .../multiple-slides/TestMultipleSlides.py     |  7 +-
 .../Shell/SymbolFile/NativePDB/symtab.cpp     | 30 ++++-----
 3 files changed, 69 insertions(+), 35 deletions(-)

diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp
index e76b7a3cf274a..be5cb1a619803 100644
--- a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp
+++ b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp
@@ -1130,7 +1130,35 @@ void SymbolFileNativePDB::AddSymbols(Symtab &symtab) {
   if (!section_list)
     return;
 
-  for (auto pid : m_index->publics().getPublicsTable()) {
+  PublicSym32 last_sym;
+  size_t last_sym_idx = 0;
+  lldb::SectionSP section_sp;
+
+  // To estimate the size of a symbol, we use the difference to the next symbol.
+  // If there's no next symbol or the section/segment changed, the symbol will
+  // take the remaining space. The estimate can be too high in case there's
+  // padding between symbols. This similar to the algorithm used by the DIA
+  // SDK.
+  auto finish_last_symbol = [&](const PublicSym32 *next) {
+    if (!section_sp)
+      return;
+    Symbol *last = symtab.SymbolAtIndex(last_sym_idx);
+    if (!last)
+      return;
+
+    if (next && last_sym.Segment == next->Segment) {
+      assert(last_sym.Offset <= next->Offset);
+      last->SetByteSize(next->Offset - last_sym.Offset);
+    } else {
+      // the last symbol was the last in its section
+      assert(section_sp->GetByteSize() >= last_sym.Offset);
+      assert(!next || next->Segment > last_sym.Segment);
+      last->SetByteSize(section_sp->GetByteSize() - last_sym.Offset);
+    }
+  };
+
+  // the address map is sorted by the address of a symbol
+  for (auto pid : m_index->publics().getAddressMap()) {
     PdbGlobalSymId global{pid, true};
     CVSymbol sym = m_index->ReadSymbolRecord(global);
     auto kind = sym.kind();
@@ -1138,8 +1166,11 @@ void SymbolFileNativePDB::AddSymbols(Symtab &symtab) {
       continue;
     PublicSym32 pub =
         llvm::cantFail(SymbolDeserializer::deserializeAs<PublicSym32>(sym));
+    finish_last_symbol(&pub);
+
+    if (!section_sp || last_sym.Segment != pub.Segment)
+      section_sp = section_list->FindSectionByID(pub.Segment);
 
-    auto section_sp = section_list->FindSectionByID(pub.Segment);
     if (!section_sp)
       continue;
 
@@ -1148,20 +1179,24 @@ void SymbolFileNativePDB::AddSymbols(Symtab &symtab) {
         (pub.Flags & PublicSymFlags::Code) != PublicSymFlags::None)
       type = eSymbolTypeCode;
 
-    symtab.AddSymbol(Symbol(/*symID=*/pid,
-                            /*name=*/pub.Name,
-                            /*type=*/type,
-                            /*external=*/true,
-                            /*is_debug=*/true,
-                            /*is_trampoline=*/false,
-                            /*is_artificial=*/false,
-                            /*section_sp=*/section_sp,
-                            /*value=*/pub.Offset,
-                            /*size=*/0,
-                            /*size_is_valid=*/false,
-                            /*contains_linker_annotations=*/false,
-                            /*flags=*/0));
-  }
+    last_sym_idx =
+        symtab.AddSymbol(Symbol(/*symID=*/pid,
+                                /*name=*/pub.Name,
+                                /*type=*/type,
+                                /*external=*/true,
+                                /*is_debug=*/true,
+                                /*is_trampoline=*/false,
+                                /*is_artificial=*/false,
+                                /*section_sp=*/section_sp,
+                                /*value=*/pub.Offset,
+                                /*size=*/0,
+                                /*size_is_valid=*/false,
+                                /*contains_linker_annotations=*/false,
+                                /*flags=*/0));
+    last_sym = pub;
+  }
+
+  finish_last_symbol(nullptr);
 }
 
 size_t SymbolFileNativePDB::ParseFunctions(CompileUnit &comp_unit) {
diff --git a/lldb/test/API/functionalities/multiple-slides/TestMultipleSlides.py b/lldb/test/API/functionalities/multiple-slides/TestMultipleSlides.py
index 7fd2ff4229004..5fd2b767a6237 100644
--- a/lldb/test/API/functionalities/multiple-slides/TestMultipleSlides.py
+++ b/lldb/test/API/functionalities/multiple-slides/TestMultipleSlides.py
@@ -12,10 +12,6 @@
 class MultipleSlidesTestCase(TestBase):
     NO_DEBUG_INFO_TESTCASE = True
 
-    # The intermediate object main.o is compiled without debug info, but
-    # a.out is linked with `-gdwarf` on Windows. This creates a PDB.
-    # However, in the native PDB plugin, the symbols don't have a size.
-    @expectedFailureWindows
     def test_mulitple_slides(self):
         """Test that a binary can be slid multiple times correctly."""
         self.build()
@@ -33,10 +29,13 @@ def test_mulitple_slides(self):
             first_sym.GetEndAddress().GetOffset()
             - first_sym.GetStartAddress().GetOffset()
         )
+        int_size = target.FindFirstType("int").GetByteSize()
+        self.assertGreaterEqual(first_size, 2048 * int_size)
         second_size = (
             second_sym.GetEndAddress().GetOffset()
             - second_sym.GetStartAddress().GetOffset()
         )
+        self.assertGreaterEqual(second_size, 2048 * int_size)
 
         # View the first element of `first` and `second` while
         # they have no load address set.
diff --git a/lldb/test/Shell/SymbolFile/NativePDB/symtab.cpp b/lldb/test/Shell/SymbolFile/NativePDB/symtab.cpp
index beb5ae2f90256..75c59c560fad9 100644
--- a/lldb/test/Shell/SymbolFile/NativePDB/symtab.cpp
+++ b/lldb/test/Shell/SymbolFile/NativePDB/symtab.cpp
@@ -42,18 +42,18 @@ int main(int argc, char **argv) {
   return ns::a_function() + b.b_func();
 }
 
-// CHECK-DAG: Code {{.*}} main
-// CHECK-DAG: Code {{.*}} ?b_func@?$B at F@ns@@QEBAHXZ
-// CHECK-DAG: Code {{.*}} ?something at A@@QEAAXXZ
-// CHECK-DAG: Code {{.*}} ??_GDyn at ns@@UEAAPEAXI at Z
-// CHECK-DAG: Code {{.*}} ??2 at YAPEAX_K@Z
-// CHECK-DAG: Code {{.*}} ??3 at YAXPEAX_K@Z
-// CHECK-DAG: Code {{.*}} ?static_fn at C@?$B at H@ns@@SAHXZ
-// CHECK-DAG: Code {{.*}} ?a_function at ns@@YAHXZ
-// CHECK-DAG: Code {{.*}} ?static_fn at C@?$B at _N@ns@@SAHXZ
-// CHECK-DAG: Code {{.*}} ??1Dyn at ns@@UEAA at XZ
-// CHECK-DAG: Code {{.*}} ??0Dyn at ns@@QEAA at XZ
-// CHECK-DAG: Data {{.*}} ?global_int@@3HA
-// CHECK-DAG: Data {{.*}} ??_7Dyn at ns@@6B@
-// CHECK-DAG: Data {{.*}} ?global_a@@3UA@@A
-// CHECK-DAG: Data {{.*}} ?global_c@@3UC@?$B at _J@ns@@A
+// CHECK-DAG: Code 0x{{[0-9a-f]+}} 0x{{0*[1-9a-f][0-9a-f]*}} 0x00000000 main
+// CHECK-DAG: Code 0x{{[0-9a-f]+}} 0x{{0*[1-9a-f][0-9a-f]*}} 0x00000000 ?b_func@?$B at F@ns@@QEBAHXZ
+// CHECK-DAG: Code 0x{{[0-9a-f]+}} 0x{{0*[1-9a-f][0-9a-f]*}} 0x00000000 ?something at A@@QEAAXXZ
+// CHECK-DAG: Code 0x{{[0-9a-f]+}} 0x{{0*[1-9a-f][0-9a-f]*}} 0x00000000 ??_GDyn at ns@@UEAAPEAXI at Z
+// CHECK-DAG: Code 0x{{[0-9a-f]+}} 0x{{0*[1-9a-f][0-9a-f]*}} 0x00000000 ??2 at YAPEAX_K@Z
+// CHECK-DAG: Code 0x{{[0-9a-f]+}} 0x{{0*[1-9a-f][0-9a-f]*}} 0x00000000 ??3 at YAXPEAX_K@Z
+// CHECK-DAG: Code 0x{{[0-9a-f]+}} 0x{{0*[1-9a-f][0-9a-f]*}} 0x00000000 ?static_fn at C@?$B at H@ns@@SAHXZ
+// CHECK-DAG: Code 0x{{[0-9a-f]+}} 0x{{0*[1-9a-f][0-9a-f]*}} 0x00000000 ?a_function at ns@@YAHXZ
+// CHECK-DAG: Code 0x{{[0-9a-f]+}} 0x{{0*[1-9a-f][0-9a-f]*}} 0x00000000 ?static_fn at C@?$B at _N@ns@@SAHXZ
+// CHECK-DAG: Code 0x{{[0-9a-f]+}} 0x{{0*[1-9a-f][0-9a-f]*}} 0x00000000 ??1Dyn at ns@@UEAA at XZ
+// CHECK-DAG: Code 0x{{[0-9a-f]+}} 0x{{0*[1-9a-f][0-9a-f]*}} 0x00000000 ??0Dyn at ns@@QEAA at XZ
+// CHECK-DAG: Data 0x{{[0-9a-f]+}} 0x{{0*[1-9a-f][0-9a-f]*}} 0x00000000 ?global_int@@3HA
+// CHECK-DAG: Data 0x{{[0-9a-f]+}} 0x{{0*[1-9a-f][0-9a-f]*}} 0x00000000 ??_7Dyn at ns@@6B@
+// CHECK-DAG: Data 0x{{[0-9a-f]+}} 0x{{0*[1-9a-f][0-9a-f]*}} 0x00000000 ?global_a@@3UA@@A
+// CHECK-DAG: Data 0x{{[0-9a-f]+}} 0x{{0*[1-9a-f][0-9a-f]*}} 0x00000000 ?global_c@@3UC@?$B at _J@ns@@A

>From c23bf1ffb5bc1af7930011ee1dbff96c710c41c6 Mon Sep 17 00:00:00 2001
From: nerix <nero.9 at hotmail.de>
Date: Thu, 30 Oct 2025 17:33:49 +0100
Subject: [PATCH 2/2] Update
 lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp

Co-authored-by: Jonas Devlieghere <jonas at devlieghere.com>
---
 .../source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp
index be5cb1a619803..aaec1600dacff 100644
--- a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp
+++ b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp
@@ -1157,7 +1157,7 @@ void SymbolFileNativePDB::AddSymbols(Symtab &symtab) {
     }
   };
 
-  // the address map is sorted by the address of a symbol
+  // The address map is sorted by the address of a symbol.
   for (auto pid : m_index->publics().getAddressMap()) {
     PdbGlobalSymId global{pid, true};
     CVSymbol sym = m_index->ReadSymbolRecord(global);



More information about the lldb-commits mailing list