[Lldb-commits] [lldb] [lldb] Use the first address range as the function address (PR #122440)

Pavel Labath via lldb-commits lldb-commits at lists.llvm.org
Fri Jan 10 03:36:07 PST 2025


https://github.com/labath updated https://github.com/llvm/llvm-project/pull/122440

>From be424b1e32f0bc69d01bd582e1de51b66b920b25 Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel at labath.sk>
Date: Fri, 10 Jan 2025 10:44:53 +0100
Subject: [PATCH 1/2] [lldb] Use the first address range as the function
 address

This is the behavior expected by DWARF. It also requires some fixups to
algorithms which were storing the addresses of some objects (Blocks and
Variables) relative to the beginning of the function.

There are plenty of things that still don't work in this setups, but
this change is sufficient for the expression evaluator to correctly
recognize the entry point of a function in this case.
---
 lldb/include/lldb/Symbol/Function.h           |  2 +-
 .../Breakpad/SymbolFileBreakpad.cpp           |  8 +++---
 .../Plugins/SymbolFile/CTF/SymbolFileCTF.cpp  |  2 +-
 .../SymbolFile/DWARF/DWARFASTParserClang.cpp  | 21 +++++++++++++++-
 .../SymbolFile/DWARF/DWARFDebugInfoEntry.cpp  |  7 +++---
 .../SymbolFile/DWARF/SymbolFileDWARF.cpp      |  6 ++---
 .../NativePDB/SymbolFileNativePDB.cpp         | 12 ++++-----
 .../Plugins/SymbolFile/PDB/SymbolFilePDB.cpp  |  9 +++----
 .../SymbolFile/Symtab/SymbolFileSymtab.cpp    | 16 ++++++------
 lldb/source/Symbol/Function.cpp               |  4 +--
 .../DWARF/x86/discontinuous-function.s        | 25 +++++++++++--------
 11 files changed, 65 insertions(+), 47 deletions(-)

diff --git a/lldb/include/lldb/Symbol/Function.h b/lldb/include/lldb/Symbol/Function.h
index 157c007bdf0e84..858e2b15eb41c1 100644
--- a/lldb/include/lldb/Symbol/Function.h
+++ b/lldb/include/lldb/Symbol/Function.h
@@ -428,7 +428,7 @@ class Function : public UserID, public SymbolContextScope {
   ///     The section offset based address for this function.
   Function(CompileUnit *comp_unit, lldb::user_id_t func_uid,
            lldb::user_id_t func_type_uid, const Mangled &mangled,
-           Type *func_type, AddressRanges ranges);
+           Type *func_type, Address address, AddressRanges ranges);
 
   /// Destructor.
   ~Function() override;
diff --git a/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp b/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp
index 45c8f121db2bc4..e82a853f7f24e6 100644
--- a/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp
+++ b/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp
@@ -251,11 +251,11 @@ FunctionSP SymbolFileBreakpad::GetOrCreateFunction(CompileUnit &comp_unit) {
     addr_t address = record->Address + base;
     SectionSP section_sp = list->FindSectionContainingFileAddress(address);
     if (section_sp) {
-      AddressRange func_range(
-          section_sp, address - section_sp->GetFileAddress(), record->Size);
+      Address func_addr(section_sp, address-section_sp->GetFileAddress());
       // Use the CU's id because every CU has only one function inside.
-      func_sp = std::make_shared<Function>(&comp_unit, id, 0, func_name,
-                                           nullptr, AddressRanges{func_range});
+      func_sp = std::make_shared<Function>(
+          &comp_unit, id, 0, func_name, nullptr, func_addr,
+          AddressRanges{AddressRange(func_addr, record->Size)});
       comp_unit.AddFunction(func_sp);
     }
   }
diff --git a/lldb/source/Plugins/SymbolFile/CTF/SymbolFileCTF.cpp b/lldb/source/Plugins/SymbolFile/CTF/SymbolFileCTF.cpp
index 15e8d38e7f334b..0feb927c5c9488 100644
--- a/lldb/source/Plugins/SymbolFile/CTF/SymbolFileCTF.cpp
+++ b/lldb/source/Plugins/SymbolFile/CTF/SymbolFileCTF.cpp
@@ -829,7 +829,7 @@ size_t SymbolFileCTF::ParseFunctions(CompileUnit &cu) {
       lldb::user_id_t func_uid = m_functions.size();
       FunctionSP function_sp = std::make_shared<Function>(
           &cu, func_uid, function_type_uid, symbol->GetMangled(), type_sp.get(),
-          AddressRanges{func_range});
+          symbol->GetAddress(), AddressRanges{func_range});
       m_functions.emplace_back(function_sp);
       cu.AddFunction(function_sp);
     }
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
index e2f76e88dd6f0f..5d4b3d9f509024 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
@@ -2393,10 +2393,29 @@ Function *DWARFASTParserClang::ParseFunctionFromDWARF(
     assert(func_type == nullptr || func_type != DIE_IS_BEING_PARSED);
 
     const user_id_t func_user_id = die.GetID();
+
+    // The base address of the scope for any of the debugging information
+    // entries listed above is given by either the DW_AT_low_pc attribute or the
+    // first address in the first range entry in the list of ranges given by the
+    // DW_AT_ranges attribute.
+    //   -- DWARFv5, Section 2.17 Code Addresses, Ranges and Base Addresses
+    //
+    // If no DW_AT_entry_pc attribute is present, then the entry address is
+    // assumed to be the same as the base address of the containing scope.
+    //   -- DWARFv5, Section 2.18 Entry Address
+    //
+    // We currently don't support Debug Info Entries with
+    // DW_AT_low_pc/DW_AT_entry_pc and DW_AT_ranges attributes (the latter
+    // attributes are ignored even though they should be used for the address of
+    // the function), but compilers also don't emit that kind of information. If
+    // this becomes a problem we need to plumb these attributes separately.
+    Address func_addr = func_ranges[0].GetBaseAddress();
+
     func_sp = std::make_shared<Function>(
         &comp_unit,
         func_user_id, // UserID is the DIE offset
-        func_user_id, func_name, func_type, std::move(func_ranges));
+        func_user_id, func_name, func_type, std::move(func_addr),
+        std::move(func_ranges));
 
     if (func_sp.get() != nullptr) {
       if (frame_base.IsValid())
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp
index 6d073411de876c..c9887060848a2f 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp
@@ -263,10 +263,9 @@ bool DWARFDebugInfoEntry::GetDIENamesAndRanges(
   }
 
   if (set_frame_base_loclist_addr && !ranges.empty()) {
-    // TODO: Use the first range instead.
-    dw_addr_t lowest_range_pc = llvm::min_element(ranges)->LowPC;
-    assert(lowest_range_pc >= cu->GetBaseAddress());
-    frame_base->SetFuncFileAddress(lowest_range_pc);
+    dw_addr_t file_addr = ranges.begin()->LowPC;
+    assert(file_addr >= cu->GetBaseAddress());
+    frame_base->SetFuncFileAddress(file_addr);
   }
 
   if (ranges.empty() || name == nullptr || mangled == nullptr) {
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
index 360dbaa1beb5eb..a0ba054e15f7ac 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
@@ -3183,8 +3183,7 @@ size_t SymbolFileDWARF::ParseBlocksRecursive(Function &func) {
                 /*check_hi_lo_pc=*/true)) {
       if (ranges->empty())
         return 0;
-      // TODO: Use the first range instead.
-      dw_addr_t function_file_addr = llvm::min_element(*ranges)->LowPC;
+      dw_addr_t function_file_addr = ranges->begin()->LowPC;
       if (function_file_addr != LLDB_INVALID_ADDRESS)
         ParseBlocksRecursive(*comp_unit, &func.GetBlock(false),
                              function_die.GetFirstChild(), function_file_addr);
@@ -3223,9 +3222,8 @@ size_t SymbolFileDWARF::ParseVariablesForContext(const SymbolContext &sc) {
       if (llvm::Expected<llvm::DWARFAddressRangesVector> ranges =
               function_die.GetDIE()->GetAttributeAddressRanges(
                   function_die.GetCU(), /*check_hi_lo_pc=*/true)) {
-        // TODO: Use the first range element instead.
         if (!ranges->empty())
-          func_lo_pc = llvm::min_element(*ranges)->LowPC;
+          func_lo_pc = ranges->begin()->LowPC;
       } else {
         LLDB_LOG_ERROR(GetLog(DWARFLog::DebugInfo), ranges.takeError(),
                        "DIE({1:x}): {0}", function_die.GetID());
diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp
index 53c4c126658ba3..6338f12402b73a 100644
--- a/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp
+++ b/lldb/source/Plugins/SymbolFile/NativePDB/SymbolFileNativePDB.cpp
@@ -483,9 +483,8 @@ lldb::FunctionSP SymbolFileNativePDB::CreateFunction(PdbCompilandSymId func_id,
   if (file_vm_addr == LLDB_INVALID_ADDRESS || file_vm_addr == 0)
     return nullptr;
 
-  AddressRange func_range(file_vm_addr, sol.length,
-                          comp_unit.GetModule()->GetSectionList());
-  if (!func_range.GetBaseAddress().IsValid())
+  Address func_addr(file_vm_addr, comp_unit.GetModule()->GetSectionList());
+  if (!func_addr.IsValid())
     return nullptr;
 
   ProcSym proc(static_cast<SymbolRecordKind>(sym_record.kind()));
@@ -500,7 +499,8 @@ lldb::FunctionSP SymbolFileNativePDB::CreateFunction(PdbCompilandSymId func_id,
   Mangled mangled(proc.Name);
   FunctionSP func_sp = std::make_shared<Function>(
       &comp_unit, toOpaqueUid(func_id), toOpaqueUid(sig_id), mangled,
-      func_type.get(), AddressRanges{func_range});
+      func_type.get(), func_addr,
+      AddressRanges{AddressRange(func_addr, sol.length)});
 
   comp_unit.AddFunction(func_sp);
 
@@ -1279,9 +1279,7 @@ bool SymbolFileNativePDB::ParseLineTable(CompileUnit &comp_unit) {
     if (file_vm_addr == LLDB_INVALID_ADDRESS)
       continue;
 
-    AddressRange func_range(file_vm_addr, sol.length,
-                            comp_unit.GetModule()->GetSectionList());
-    Address func_base = func_range.GetBaseAddress();
+    Address func_base(file_vm_addr, comp_unit.GetModule()->GetSectionList());
     PdbCompilandSymId func_id{modi, record_offset};
 
     // Iterate all S_INLINESITEs in the function.
diff --git a/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp b/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp
index b7854c05d345a8..293be12ee6333e 100644
--- a/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp
+++ b/lldb/source/Plugins/SymbolFile/PDB/SymbolFilePDB.cpp
@@ -296,10 +296,9 @@ SymbolFilePDB::ParseCompileUnitFunctionForPDBFunc(const PDBSymbolFunc &pdb_func,
     return nullptr;
 
   auto func_length = pdb_func.getLength();
-  AddressRange func_range =
-      AddressRange(file_vm_addr, func_length,
-                   GetObjectFile()->GetModule()->GetSectionList());
-  if (!func_range.GetBaseAddress().IsValid())
+  Address func_addr(file_vm_addr,
+                    GetObjectFile()->GetModule()->GetSectionList());
+  if (!func_addr.IsValid())
     return nullptr;
 
   lldb_private::Type *func_type = ResolveTypeUID(pdb_func.getSymIndexId());
@@ -312,7 +311,7 @@ SymbolFilePDB::ParseCompileUnitFunctionForPDBFunc(const PDBSymbolFunc &pdb_func,
 
   FunctionSP func_sp = std::make_shared<Function>(
       &comp_unit, pdb_func.getSymIndexId(), func_type_uid, mangled, func_type,
-      AddressRanges{func_range});
+      func_addr, AddressRanges{AddressRange(func_addr, func_length)});
 
   comp_unit.AddFunction(func_sp);
 
diff --git a/lldb/source/Plugins/SymbolFile/Symtab/SymbolFileSymtab.cpp b/lldb/source/Plugins/SymbolFile/Symtab/SymbolFileSymtab.cpp
index e1e11d274d6e40..4cd46440b10bd2 100644
--- a/lldb/source/Plugins/SymbolFile/Symtab/SymbolFileSymtab.cpp
+++ b/lldb/source/Plugins/SymbolFile/Symtab/SymbolFileSymtab.cpp
@@ -179,14 +179,14 @@ size_t SymbolFileSymtab::ParseFunctions(CompileUnit &comp_unit) {
               }
             }
 
-            FunctionSP func_sp(
-                new Function(&comp_unit,
-                             symbol_idx,       // UserID is the DIE offset
-                             LLDB_INVALID_UID, // We don't have any type info
-                                               // for this function
-                             curr_symbol->GetMangled(), // Linker/mangled name
-                             nullptr, // no return type for a code symbol...
-                             AddressRanges{func_range}));
+            FunctionSP func_sp(new Function(
+                &comp_unit,
+                symbol_idx,                // UserID is the DIE offset
+                LLDB_INVALID_UID,          // We don't have any type info
+                                           // for this function
+                curr_symbol->GetMangled(), // Linker/mangled name
+                nullptr, // no return type for a code symbol...
+                curr_symbol->GetAddress(), AddressRanges{func_range}));
 
             if (func_sp.get() != nullptr) {
               comp_unit.AddFunction(func_sp);
diff --git a/lldb/source/Symbol/Function.cpp b/lldb/source/Symbol/Function.cpp
index c9523281dc5659..23817cb41af9e7 100644
--- a/lldb/source/Symbol/Function.cpp
+++ b/lldb/source/Symbol/Function.cpp
@@ -276,10 +276,10 @@ AddressRange CollapseRanges(llvm::ArrayRef<AddressRange> ranges) {
 //
 Function::Function(CompileUnit *comp_unit, lldb::user_id_t func_uid,
                    lldb::user_id_t type_uid, const Mangled &mangled, Type *type,
-                   AddressRanges ranges)
+                   Address address, AddressRanges ranges)
     : UserID(func_uid), m_comp_unit(comp_unit), m_type_uid(type_uid),
       m_type(type), m_mangled(mangled), m_block(*this, func_uid),
-      m_range(CollapseRanges(ranges)), m_address(m_range.GetBaseAddress()),
+      m_range(CollapseRanges(ranges)), m_address(std::move(address)),
       m_prologue_byte_size(0) {
   assert(comp_unit != nullptr);
   lldb::addr_t base_file_addr = m_address.GetFileAddress();
diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/discontinuous-function.s b/lldb/test/Shell/SymbolFile/DWARF/x86/discontinuous-function.s
index b03d5d12ad2a1d..93ea9f33e762df 100644
--- a/lldb/test/Shell/SymbolFile/DWARF/x86/discontinuous-function.s
+++ b/lldb/test/Shell/SymbolFile/DWARF/x86/discontinuous-function.s
@@ -3,17 +3,30 @@
 # int baz();
 # int bar() { return 47; }
 # int foo(int flag) { return flag ? bar() : baz(); }
-# The function bar has been placed "in the middle" of foo.
+# The function bar has been placed "in the middle" of foo, and the function
+# entry point is deliberately not its lowest address.
 
 # RUN: llvm-mc -triple x86_64-pc-linux -filetype=obj %s -o %t
-# RUN: %lldb %t -o "image lookup -v -n foo" -o exit | FileCheck %s
+# RUN: %lldb %t -o "image lookup -v -n foo" -o "expr -- &foo" -o exit | FileCheck %s
 
+# CHECK-LABEL: image lookup
 # CHECK: 1 match found in {{.*}}
 # CHECK: Summary: {{.*}}`foo
 # CHECK: Function: id = {{.*}}, name = "foo", ranges = [0x0000000000000000-0x000000000000000e)[0x0000000000000014-0x000000000000001c)
 
+# CHECK-LABEL: expr -- &foo
+# CHECK: (void (*)()) $0 = 0x0000000000000007
+
         .text
 
+foo.__part.1:
+        .cfi_startproc
+        callq   bar
+        jmp     foo.__part.3
+.Lfoo.__part.1_end:
+        .size   foo.__part.1, .Lfoo.__part.1_end-foo.__part.1
+        .cfi_endproc
+
         .type   foo, at function
 foo:
         .cfi_startproc
@@ -24,14 +37,6 @@ foo:
 .Lfoo_end:
         .size   foo, .Lfoo_end-foo
 
-foo.__part.1:
-        .cfi_startproc
-        callq   bar
-        jmp     foo.__part.3
-.Lfoo.__part.1_end:
-        .size   foo.__part.1, .Lfoo.__part.1_end-foo.__part.1
-        .cfi_endproc
-
 bar:
         .cfi_startproc
         movl    $47, %eax

>From 15ae9745b8bd74f65558812650ac94bb61233ca9 Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel at labath.sk>
Date: Fri, 10 Jan 2025 12:35:52 +0100
Subject: [PATCH 2/2] format

---
 lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp b/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp
index e82a853f7f24e6..c7229568e1a0cd 100644
--- a/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp
+++ b/lldb/source/Plugins/SymbolFile/Breakpad/SymbolFileBreakpad.cpp
@@ -251,7 +251,7 @@ FunctionSP SymbolFileBreakpad::GetOrCreateFunction(CompileUnit &comp_unit) {
     addr_t address = record->Address + base;
     SectionSP section_sp = list->FindSectionContainingFileAddress(address);
     if (section_sp) {
-      Address func_addr(section_sp, address-section_sp->GetFileAddress());
+      Address func_addr(section_sp, address - section_sp->GetFileAddress());
       // Use the CU's id because every CU has only one function inside.
       func_sp = std::make_shared<Function>(
           &comp_unit, id, 0, func_name, nullptr, func_addr,



More information about the lldb-commits mailing list