[Lldb-commits] [lldb] [lldb] Teach FuncUnwinders about discontinuous functions (PR #133072)

via lldb-commits lldb-commits at lists.llvm.org
Wed Mar 26 05:56:24 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-lldb

Author: Pavel Labath (labath)

<details>
<summary>Changes</summary>

The main change here is that we're now able to correctly look up plans for these functions. Previously, due to caching, we could end up with one entry covering most of the address space (because part of the function was at the beginning and one at the end). Now, we can correctly recognise that the part in between does not belong to that function, and we can create a different FuncUnwinders instance for it. It doesn't help the discontinuous function much (it's plan will still be garbled), but we can at least properly unwind out of the simple functions in between.

Fixing the unwind plans for discontinuous functions requires handling each unwind source specially, and this setup allows us to make the transition incrementally.

---

Patch is 26.07 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/133072.diff


8 Files Affected:

- (modified) lldb/include/lldb/Symbol/FuncUnwinders.h (+16-4) 
- (modified) lldb/include/lldb/Symbol/UnwindTable.h (+1-2) 
- (modified) lldb/source/Symbol/FuncUnwinders.cpp (+24-12) 
- (modified) lldb/source/Symbol/UnwindTable.cpp (+24-19) 
- (added) lldb/test/Shell/Unwind/Inputs/basic-block-sections-with-dwarf.s (+256) 
- (added) lldb/test/Shell/Unwind/Inputs/linux-x86_64.yaml (+26) 
- (added) lldb/test/Shell/Unwind/basic-block-sections-with-dwarf-static.test (+39) 
- (added) lldb/test/Shell/Unwind/basic-block-sections-with-dwarf.test (+23) 


``````````diff
diff --git a/lldb/include/lldb/Symbol/FuncUnwinders.h b/lldb/include/lldb/Symbol/FuncUnwinders.h
index 2e841b3b34bd6..1d4c28324e90f 100644
--- a/lldb/include/lldb/Symbol/FuncUnwinders.h
+++ b/lldb/include/lldb/Symbol/FuncUnwinders.h
@@ -13,9 +13,9 @@ class UnwindTable;
 class FuncUnwinders {
 public:
   // FuncUnwinders objects are used to track UnwindPlans for a function (named
-  // or not - really just an address range)
+  // or not - really just a set of address ranges)
 
-  // We'll record four different UnwindPlans for each address range:
+  // We'll record four different UnwindPlans for each function:
   //
   //   1. Unwinding from a call site (a valid exception throw location)
   //      This is often sourced from the eh_frame exception handling info
@@ -31,7 +31,8 @@ class FuncUnwinders {
   // instructions are finished for migrating breakpoints past the stack frame
   // setup instructions when we don't have line table information.
 
-  FuncUnwinders(lldb_private::UnwindTable &unwind_table, AddressRange range);
+  FuncUnwinders(lldb_private::UnwindTable &unwind_table, Address addr,
+                AddressRanges ranges);
 
   ~FuncUnwinders();
 
@@ -54,7 +55,9 @@ class FuncUnwinders {
   const Address &GetFunctionStartAddress() const;
 
   bool ContainsAddress(const Address &addr) const {
-    return m_range.ContainsFileAddress(addr);
+    return llvm::any_of(m_ranges, [&](const AddressRange range) {
+      return range.ContainsFileAddress(addr);
+    });
   }
 
   // A function may have a Language Specific Data Area specified -- a block of
@@ -113,6 +116,15 @@ class FuncUnwinders {
       Thread &thread, const lldb::UnwindPlanSP &a, const lldb::UnwindPlanSP &b);
 
   UnwindTable &m_unwind_table;
+
+  /// Start address of the function described by this object.
+  Address m_addr;
+
+  /// The address ranges of the function.
+  AddressRanges m_ranges;
+
+  /// The smallest address range covering the entire function.
+  /// DEPRECATED: Use m_ranges instead.
   AddressRange m_range;
 
   std::recursive_mutex m_mutex;
diff --git a/lldb/include/lldb/Symbol/UnwindTable.h b/lldb/include/lldb/Symbol/UnwindTable.h
index 3166fdec6ebaa..1cc718efb28d6 100644
--- a/lldb/include/lldb/Symbol/UnwindTable.h
+++ b/lldb/include/lldb/Symbol/UnwindTable.h
@@ -66,8 +66,7 @@ class UnwindTable {
   void Dump(Stream &s);
 
   void Initialize();
-  std::optional<AddressRange> GetAddressRange(const Address &addr,
-                                              const SymbolContext &sc);
+  AddressRanges GetAddressRanges(const Address &addr, const SymbolContext &sc);
 
   typedef std::map<lldb::addr_t, lldb::FuncUnwindersSP> collection;
   typedef collection::iterator iterator;
diff --git a/lldb/source/Symbol/FuncUnwinders.cpp b/lldb/source/Symbol/FuncUnwinders.cpp
index 2449c82812696..a5ca7b094c949 100644
--- a/lldb/source/Symbol/FuncUnwinders.cpp
+++ b/lldb/source/Symbol/FuncUnwinders.cpp
@@ -31,15 +31,29 @@
 using namespace lldb;
 using namespace lldb_private;
 
-/// constructor
-
-FuncUnwinders::FuncUnwinders(UnwindTable &unwind_table, AddressRange range)
-    : m_unwind_table(unwind_table), m_range(range), m_mutex(),
-      m_unwind_plan_assembly_sp(), m_unwind_plan_eh_frame_sp(),
-      m_unwind_plan_eh_frame_augmented_sp(), m_unwind_plan_compact_unwind(),
-      m_unwind_plan_arm_unwind_sp(), m_unwind_plan_fast_sp(),
-      m_unwind_plan_arch_default_sp(),
-      m_unwind_plan_arch_default_at_func_entry_sp(),
+static AddressRange CollapseRanges(llvm::ArrayRef<AddressRange> ranges) {
+  if (ranges.empty())
+    return AddressRange();
+  if (ranges.size() == 1)
+    return ranges[0];
+
+  Address lowest_addr = ranges[0].GetBaseAddress();
+  addr_t highest_addr = lowest_addr.GetFileAddress() + ranges[0].GetByteSize();
+  for (const AddressRange &range : ranges.drop_front()) {
+    Address range_begin = range.GetBaseAddress();
+    addr_t range_end = range_begin.GetFileAddress() + range.GetByteSize();
+    if (range_begin.GetFileAddress() < lowest_addr.GetFileAddress())
+      lowest_addr = range_begin;
+    if (range_end > highest_addr)
+      highest_addr = range_end;
+  }
+  return AddressRange(lowest_addr, highest_addr - lowest_addr.GetFileAddress());
+}
+
+FuncUnwinders::FuncUnwinders(UnwindTable &unwind_table, Address addr,
+                             AddressRanges ranges)
+    : m_unwind_table(unwind_table), m_addr(std::move(addr)),
+      m_ranges(std::move(ranges)), m_range(CollapseRanges(m_ranges)),
       m_tried_unwind_plan_assembly(false), m_tried_unwind_plan_eh_frame(false),
       m_tried_unwind_plan_object_file(false),
       m_tried_unwind_plan_debug_frame(false),
@@ -511,9 +525,7 @@ Address &FuncUnwinders::GetFirstNonPrologueInsn(Target &target) {
   return m_first_non_prologue_insn;
 }
 
-const Address &FuncUnwinders::GetFunctionStartAddress() const {
-  return m_range.GetBaseAddress();
-}
+const Address &FuncUnwinders::GetFunctionStartAddress() const { return m_addr; }
 
 lldb::UnwindAssemblySP
 FuncUnwinders::GetUnwindAssemblyProfiler(Target &target) {
diff --git a/lldb/source/Symbol/UnwindTable.cpp b/lldb/source/Symbol/UnwindTable.cpp
index 61d51192bf3d1..e62ea7b571906 100644
--- a/lldb/source/Symbol/UnwindTable.cpp
+++ b/lldb/source/Symbol/UnwindTable.cpp
@@ -91,30 +91,35 @@ void UnwindTable::ModuleWasUpdated() {
 
 UnwindTable::~UnwindTable() = default;
 
-std::optional<AddressRange>
-UnwindTable::GetAddressRange(const Address &addr, const SymbolContext &sc) {
+AddressRanges UnwindTable::GetAddressRanges(const Address &addr,
+                                            const SymbolContext &sc) {
   AddressRange range;
 
   // First check the unwind info from the object file plugin
   if (m_object_file_unwind_up &&
       m_object_file_unwind_up->GetAddressRange(addr, range))
-    return range;
+    return {range};
 
   // Check the symbol context
-  if (sc.GetAddressRange(eSymbolContextFunction | eSymbolContextSymbol, 0,
-                         false, range) &&
-      range.GetBaseAddress().IsValid())
-    return range;
+  AddressRanges result;
+  for (size_t idx = 0;
+       sc.GetAddressRange(eSymbolContextFunction | eSymbolContextSymbol, idx,
+                          false, range) &&
+       range.GetBaseAddress().IsValid();
+       ++idx)
+    result.push_back(range);
+  if (!result.empty())
+    return result;
 
   // Does the eh_frame unwind info has a function bounds for this addr?
   if (m_eh_frame_up && m_eh_frame_up->GetAddressRange(addr, range))
-    return range;
+    return {range};
 
   // Try debug_frame as well
   if (m_debug_frame_up && m_debug_frame_up->GetAddressRange(addr, range))
-    return range;
+    return {range};
 
-  return std::nullopt;
+  return {};
 }
 
 FuncUnwindersSP
@@ -140,14 +145,14 @@ UnwindTable::GetFuncUnwindersContainingAddress(const Address &addr,
       return pos->second;
   }
 
-  auto range_or = GetAddressRange(addr, sc);
-  if (!range_or)
+  AddressRanges ranges = GetAddressRanges(addr, sc);
+  if (ranges.empty())
     return nullptr;
 
-  FuncUnwindersSP func_unwinder_sp(new FuncUnwinders(*this, *range_or));
-  m_unwinds.insert(insert_pos,
-                   std::make_pair(range_or->GetBaseAddress().GetFileAddress(),
-                                  func_unwinder_sp));
+  auto func_unwinder_sp = std::make_shared<FuncUnwinders>(*this, addr, ranges);
+  for (const AddressRange &range: ranges)
+    m_unwinds.emplace_hint(insert_pos, range.GetBaseAddress().GetFileAddress(),
+                           func_unwinder_sp);
   return func_unwinder_sp;
 }
 
@@ -159,11 +164,11 @@ FuncUnwindersSP UnwindTable::GetUncachedFuncUnwindersContainingAddress(
     const Address &addr, const SymbolContext &sc) {
   Initialize();
 
-  auto range_or = GetAddressRange(addr, sc);
-  if (!range_or)
+  AddressRanges ranges = GetAddressRanges(addr, sc);
+  if (ranges.empty())
     return nullptr;
 
-  return std::make_shared<FuncUnwinders>(*this, *range_or);
+  return std::make_shared<FuncUnwinders>(*this, addr, std::move(ranges));
 }
 
 void UnwindTable::Dump(Stream &s) {
diff --git a/lldb/test/Shell/Unwind/Inputs/basic-block-sections-with-dwarf.s b/lldb/test/Shell/Unwind/Inputs/basic-block-sections-with-dwarf.s
new file mode 100644
index 0000000000000..c405e51c227cb
--- /dev/null
+++ b/lldb/test/Shell/Unwind/Inputs/basic-block-sections-with-dwarf.s
@@ -0,0 +1,256 @@
+# An example of a function which has been split into two parts. Roughly
+# corresponds to this C code.
+# int baz() { return 47; }
+# int bar() { return foo(0); }
+# int foo(int flag) { return flag ? bar() : baz(); }
+# int main() { return foo(1); }
+# The function bar has been placed "in the middle" of foo.
+
+        .text
+
+        .type   baz, at function
+baz:
+        .cfi_startproc
+        movl    $47, %eax
+        retq
+        .cfi_endproc
+.Lbaz_end:
+        .size   baz, .Lbaz_end-baz
+
+        .type   foo, at function
+foo:
+        .cfi_startproc
+        pushq   %rbp
+        .cfi_def_cfa_offset 16
+        .cfi_offset %rbp, -16
+        movq    %rsp, %rbp
+        .cfi_def_cfa_register %rbp
+        subq    $16, %rsp
+        movl    %edi, -8(%rbp)
+        cmpl    $0, -8(%rbp)
+        je      foo.__part.2
+        jmp     foo.__part.1
+        .cfi_endproc
+.Lfoo_end:
+        .size   foo, .Lfoo_end-foo
+
+foo.__part.1:
+        .cfi_startproc
+        .cfi_def_cfa %rbp, 16
+        .cfi_offset %rbp, -16
+        callq   bar
+        movl    %eax, -4(%rbp)
+        jmp     foo.__part.3
+.Lfoo.__part.1_end:
+        .size   foo.__part.1, .Lfoo.__part.1_end-foo.__part.1
+        .cfi_endproc
+
+bar:
+        .cfi_startproc
+# NB: Decrease the stack pointer to make the unwind info for this function
+# different from the surrounding foo function.
+        subq    $24, %rsp
+        .cfi_def_cfa_offset 32
+        xorl    %edi, %edi
+        callq   foo
+        addq    $24, %rsp
+        .cfi_def_cfa %rsp, 8
+        retq
+        .cfi_endproc
+.Lbar_end:
+        .size   bar, .Lbar_end-bar
+
+foo.__part.2:
+        .cfi_startproc
+        .cfi_def_cfa %rbp, 16
+        .cfi_offset %rbp, -16
+        callq   baz
+        movl    %eax, -4(%rbp)
+        jmp     foo.__part.3
+.Lfoo.__part.2_end:
+        .size   foo.__part.2, .Lfoo.__part.2_end-foo.__part.2
+        .cfi_endproc
+
+foo.__part.3:
+        .cfi_startproc
+        .cfi_def_cfa %rbp, 16
+        .cfi_offset %rbp, -16
+        movl    -4(%rbp), %eax
+        addq    $16, %rsp
+        popq    %rbp
+        .cfi_def_cfa %rsp, 8
+        retq
+.Lfoo.__part.3_end:
+        .size   foo.__part.3, .Lfoo.__part.3_end-foo.__part.3
+        .cfi_endproc
+
+
+        .globl  main
+        .type   main, at function
+main:
+        .cfi_startproc
+        movl    $1, %edi
+        callq   foo
+        retq
+        .cfi_endproc
+.Lmain_end:
+        .size   main, .Lmain_end-main
+
+        .section        .debug_abbrev,"", at progbits
+        .byte   1                               # Abbreviation Code
+        .byte   17                              # DW_TAG_compile_unit
+        .byte   1                               # DW_CHILDREN_yes
+        .byte   37                              # DW_AT_producer
+        .byte   8                               # DW_FORM_string
+        .byte   19                              # DW_AT_language
+        .byte   5                               # DW_FORM_data2
+        .byte   17                              # DW_AT_low_pc
+        .byte   1                               # DW_FORM_addr
+        .byte   85                              # DW_AT_ranges
+        .byte   35                              # DW_FORM_rnglistx
+        .byte   116                             # DW_AT_rnglists_base
+        .byte   23                              # DW_FORM_sec_offset
+        .byte   0                               # EOM(1)
+        .byte   0                               # EOM(2)
+        .byte   2                               # Abbreviation Code
+        .byte   46                              # DW_TAG_subprogram
+        .byte   0                               # DW_CHILDREN_no
+        .byte   17                              # DW_AT_low_pc
+        .byte   1                               # DW_FORM_addr
+        .byte   18                              # DW_AT_high_pc
+        .byte   1                               # DW_FORM_addr
+        .byte   3                               # DW_AT_name
+        .byte   8                               # DW_FORM_string
+        .byte   0                               # EOM(1)
+        .byte   0                               # EOM(2)
+        .byte   3                               # Abbreviation Code
+        .byte   46                              # DW_TAG_subprogram
+        .byte   1                               # DW_CHILDREN_yes
+        .byte   85                              # DW_AT_ranges
+        .byte   35                              # DW_FORM_rnglistx
+        .byte   64                              # DW_AT_frame_base
+        .byte   24                              # DW_FORM_exprloc
+        .byte   3                               # DW_AT_name
+        .byte   8                               # DW_FORM_string
+        .byte   0                               # EOM(1)
+        .byte   0                               # EOM(2)
+        .byte   4                               # Abbreviation Code
+        .byte   5                               # DW_TAG_formal_parameter
+        .byte   0                               # DW_CHILDREN_no
+        .byte   2                               # DW_AT_location
+        .byte   24                              # DW_FORM_exprloc
+        .byte   3                               # DW_AT_name
+        .byte   8                               # DW_FORM_string
+        .byte   73                              # DW_AT_type
+        .byte   19                              # DW_FORM_ref4
+        .byte   0                               # EOM(1)
+        .byte   0                               # EOM(2)
+        .byte   5                               # Abbreviation Code
+        .byte   36                              # DW_TAG_base_type
+        .byte   0                               # DW_CHILDREN_no
+        .byte   3                               # DW_AT_name
+        .byte   8                               # DW_FORM_string
+        .byte   62                              # DW_AT_encoding
+        .byte   11                              # DW_FORM_data1
+        .byte   11                              # DW_AT_byte_size
+        .byte   11                              # DW_FORM_data1
+        .byte   0                               # EOM(1)
+        .byte   0                               # EOM(2)
+        .byte   0                               # EOM(3)
+
+        .section        .debug_info,"", at progbits
+.Lcu_begin0:
+        .long   .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+.Ldebug_info_start0:
+        .short  5                               # DWARF version number
+        .byte   1                               # DWARF Unit Type
+        .byte   8                               # Address Size (in bytes)
+        .long   .debug_abbrev                   # Offset Into Abbrev. Section
+        .byte   1                               # Abbrev [1] DW_TAG_compile_unit
+        .asciz  "Hand-written DWARF"            # DW_AT_producer
+        .short  29                              # DW_AT_language
+        .quad   0                               # DW_AT_low_pc
+        .byte   1                               # DW_AT_ranges
+        .long   .Lrnglists_table_base0          # DW_AT_rnglists_base
+        .byte   2                               # Abbrev [2] DW_TAG_subprogram
+        .quad   baz                             # DW_AT_low_pc
+        .quad   .Lbaz_end                       # DW_AT_high_pc
+        .asciz  "baz"                           # DW_AT_name
+        .byte   2                               # Abbrev [2] DW_TAG_subprogram
+        .quad   bar                             # DW_AT_low_pc
+        .quad   .Lbar_end                       # DW_AT_high_pc
+        .asciz  "bar"                           # DW_AT_name
+        .byte   3                               # Abbrev [3] DW_TAG_subprogram
+        .byte   0                               # DW_AT_ranges
+        .byte   1                               # DW_AT_frame_base
+        .byte   86
+        .asciz  "foo"                           # DW_AT_name
+        .byte   4                               # Abbrev [4] DW_TAG_formal_parameter
+        .byte   2                               # DW_AT_location
+        .byte   145
+        .byte   120
+        .asciz  "flag"                          # DW_AT_name
+        .long   .Lint-.Lcu_begin0               # DW_AT_type
+        .byte   0                               # End Of Children Mark
+        .byte   2                               # Abbrev [2] DW_TAG_subprogram
+        .quad   main                            # DW_AT_low_pc
+        .quad   .Lmain_end                      # DW_AT_high_pc
+        .asciz  "main"                          # DW_AT_name
+.Lint:
+        .byte   5                               # Abbrev [5] DW_TAG_base_type
+        .asciz  "int"                           # DW_AT_name
+        .byte   5                               # DW_AT_encoding
+        .byte   4                               # DW_AT_byte_size
+        .byte   0                               # End Of Children Mark
+.Ldebug_info_end0:
+
+        .section        .debug_rnglists,"", at progbits
+        .long   .Ldebug_list_header_end0-.Ldebug_list_header_start0 # Length
+.Ldebug_list_header_start0:
+        .short  5                               # Version
+        .byte   8                               # Address size
+        .byte   0                               # Segment selector size
+        .long   2                               # Offset entry count
+.Lrnglists_table_base0:
+        .long   .Ldebug_ranges0-.Lrnglists_table_base0
+        .long   .Ldebug_ranges1-.Lrnglists_table_base0
+.Ldebug_ranges0:
+        .byte   6                               # DW_RLE_start_end
+        .quad   foo
+        .quad   .Lfoo_end
+        .byte   6                               # DW_RLE_start_end
+        .quad   foo.__part.1
+        .quad   .Lfoo.__part.1_end
+        .byte   6                               # DW_RLE_start_end
+        .quad   foo.__part.2
+        .quad   .Lfoo.__part.2_end
+        .byte   6                               # DW_RLE_start_end
+        .quad   foo.__part.3
+        .quad   .Lfoo.__part.3_end
+        .byte   0                               # DW_RLE_end_of_list
+.Ldebug_ranges1:
+        .byte   6                               # DW_RLE_start_end
+        .quad   baz
+        .quad   .Lbaz_end
+        .byte   6                               # DW_RLE_start_end
+        .quad   bar
+        .quad   .Lbar_end
+        .byte   6                               # DW_RLE_start_end
+        .quad   foo.__part.1
+        .quad   .Lfoo.__part.1_end
+        .byte   6                               # DW_RLE_start_end
+        .quad   foo.__part.2
+        .quad   .Lfoo.__part.2_end
+        .byte   6                               # DW_RLE_start_end
+        .quad   foo.__part.3
+        .quad   .Lfoo.__part.3_end
+        .byte   6                               # DW_RLE_start_end
+        .quad   foo
+        .quad   .Lfoo_end
+        .byte   6                               # DW_RLE_start_end
+        .quad   main
+        .quad   .Lmain_end
+        .byte   0                               # DW_RLE_end_of_list
+.Ldebug_list_header_end0:
+
+        .section        ".note.GNU-stack","", at progbits
diff --git a/lldb/test/Shell/Unwind/Inputs/linux-x86_64.yaml b/lldb/test/Shell/Unwind/Inputs/linux-x86_64.yaml
new file mode 100644
index 0000000000000..987462c2a0efc
--- /dev/null
+++ b/lldb/test/Shell/Unwind/Inputs/linux-x86_64.yaml
@@ -0,0 +1,26 @@
+--- !minidump
+Streams:
+  - Type:            ThreadList
+    Threads:
+      - Thread Id:       0x000074DD
+        Context:         0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000B0010000000000033000000000000000000000002020100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000040109600000000000100000000000000000000000000000068E7D0C8FF7F000068E7D0C8FF7F000097E6D0C8FF7F00001010...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/133072


More information about the lldb-commits mailing list