[Lldb-commits] [lldb] [lldb] Expose discontinuous functions through SBFunction::GetRanges (PR #117532)

Pavel Labath via lldb-commits lldb-commits at lists.llvm.org
Mon Nov 25 01:15:15 PST 2024


https://github.com/labath created https://github.com/llvm/llvm-project/pull/117532

SBFunction::GetEndAddress doesn't really make sense for discontinuous functions, so I'm declaring it deprecated. GetStartAddress sort of makes sense, if one uses it to find the functions entry point, so I'm keeping that undeprecated.

I've made the test a Shell tests because these make it easier to create discontinuous functions regardless of the host os and architecture. They do make testing the python API harder, but I think I've managed to come up with something not entirely unreasonable.

>From 03385da68038c3fd40fc085c8fb1914529116837 Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel at labath.sk>
Date: Tue, 12 Nov 2024 11:30:39 +0100
Subject: [PATCH] [lldb] Expose discontinuous functions through
 SBFunction::GetRanges

SBFunction::GetEndAddress doesn't really make sense for discontinuous
functions, so I'm declaring it deprecated. GetStartAddress sort of makes
sense, if one uses it to find the functions entry point, so I'm keeping
that undeprecated.

I've made the test a Shell tests because these make it easier to create
discontinuous functions regardless of the host os and architecture. They
do make testing the python API harder, but I think I've managed to come
up with something not entirely unreasonable.
---
 lldb/include/lldb/API/SBAddressRangeList.h    |   1 +
 lldb/include/lldb/API/SBFunction.h            |   2 +
 lldb/include/lldb/Core/AddressRangeListImpl.h |   5 +-
 lldb/include/lldb/Symbol/Function.h           |   3 +
 lldb/source/API/SBFunction.cpp                |  17 +-
 lldb/source/Core/AddressRangeListImpl.cpp     |   8 -
 .../Python/sb_function_ranges.s               | 182 ++++++++++++++++++
 7 files changed, 198 insertions(+), 20 deletions(-)
 create mode 100644 lldb/test/Shell/ScriptInterpreter/Python/sb_function_ranges.s

diff --git a/lldb/include/lldb/API/SBAddressRangeList.h b/lldb/include/lldb/API/SBAddressRangeList.h
index 5a4eeecf37dc96..41085b1edf8d7f 100644
--- a/lldb/include/lldb/API/SBAddressRangeList.h
+++ b/lldb/include/lldb/API/SBAddressRangeList.h
@@ -45,6 +45,7 @@ class LLDB_API SBAddressRangeList {
 private:
   friend class SBBlock;
   friend class SBProcess;
+  friend class SBFunction;
 
   lldb_private::AddressRangeListImpl &ref() const;
 
diff --git a/lldb/include/lldb/API/SBFunction.h b/lldb/include/lldb/API/SBFunction.h
index df607fdc7ebf59..0a8aeeff1ea5ad 100644
--- a/lldb/include/lldb/API/SBFunction.h
+++ b/lldb/include/lldb/API/SBFunction.h
@@ -43,6 +43,8 @@ class LLDB_API SBFunction {
 
   lldb::SBAddress GetStartAddress();
 
+  LLDB_DEPRECATED_FIXME("Not compatible with discontinuous functions.",
+                        "GetRanges()")
   lldb::SBAddress GetEndAddress();
 
   lldb::SBAddressRangeList GetRanges();
diff --git a/lldb/include/lldb/Core/AddressRangeListImpl.h b/lldb/include/lldb/Core/AddressRangeListImpl.h
index 6742e6ead87de0..6b88f9b1ac1795 100644
--- a/lldb/include/lldb/Core/AddressRangeListImpl.h
+++ b/lldb/include/lldb/Core/AddressRangeListImpl.h
@@ -24,9 +24,8 @@ class AddressRangeListImpl {
 public:
   AddressRangeListImpl();
 
-  AddressRangeListImpl(const AddressRangeListImpl &rhs) = default;
-
-  AddressRangeListImpl &operator=(const AddressRangeListImpl &rhs);
+  explicit AddressRangeListImpl(AddressRanges ranges)
+      : m_ranges(std::move(ranges)) {}
 
   size_t GetSize() const;
 
diff --git a/lldb/include/lldb/Symbol/Function.h b/lldb/include/lldb/Symbol/Function.h
index 70f51a846f8d96..11921398ac7651 100644
--- a/lldb/include/lldb/Symbol/Function.h
+++ b/lldb/include/lldb/Symbol/Function.h
@@ -444,8 +444,11 @@ class Function : public UserID, public SymbolContextScope {
 
   Function *CalculateSymbolContextFunction() override;
 
+  // DEPRECATED: Use GetAddressRanges instead.
   const AddressRange &GetAddressRange() { return m_range; }
 
+  const AddressRanges &GetAddressRanges() const { return m_ranges; }
+
   lldb::LanguageType GetLanguage() const;
   /// Find the file and line number of the source location of the start of the
   /// function.  This will use the declaration if present and fall back on the
diff --git a/lldb/source/API/SBFunction.cpp b/lldb/source/API/SBFunction.cpp
index ac61220ec8736a..2ef62eea4d1993 100644
--- a/lldb/source/API/SBFunction.cpp
+++ b/lldb/source/API/SBFunction.cpp
@@ -10,6 +10,7 @@
 #include "lldb/API/SBAddressRange.h"
 #include "lldb/API/SBProcess.h"
 #include "lldb/API/SBStream.h"
+#include "lldb/Core/AddressRangeListImpl.h"
 #include "lldb/Core/Disassembler.h"
 #include "lldb/Core/Module.h"
 #include "lldb/Symbol/CompileUnit.h"
@@ -153,10 +154,11 @@ SBAddress SBFunction::GetEndAddress() {
 
   SBAddress addr;
   if (m_opaque_ptr) {
-    addr_t byte_size = m_opaque_ptr->GetAddressRange().GetByteSize();
-    if (byte_size > 0) {
-      addr.SetAddress(m_opaque_ptr->GetAddressRange().GetBaseAddress());
-      addr->Slide(byte_size);
+    llvm::ArrayRef<AddressRange> ranges = m_opaque_ptr->GetAddressRanges();
+    if (!ranges.empty()) {
+      // Return the end of the first range, use GetRanges to get all ranges.
+      addr.SetAddress(ranges.front().GetBaseAddress());
+      addr->Slide(ranges.front().GetByteSize());
     }
   }
   return addr;
@@ -166,11 +168,8 @@ lldb::SBAddressRangeList SBFunction::GetRanges() {
   LLDB_INSTRUMENT_VA(this);
 
   lldb::SBAddressRangeList ranges;
-  if (m_opaque_ptr) {
-    lldb::SBAddressRange range;
-    (*range.m_opaque_up) = m_opaque_ptr->GetAddressRange();
-    ranges.Append(std::move(range));
-  }
+  if (m_opaque_ptr)
+    ranges.ref() = AddressRangeListImpl(m_opaque_ptr->GetAddressRanges());
 
   return ranges;
 }
diff --git a/lldb/source/Core/AddressRangeListImpl.cpp b/lldb/source/Core/AddressRangeListImpl.cpp
index d405cf0fa3ec35..257824a0551e1b 100644
--- a/lldb/source/Core/AddressRangeListImpl.cpp
+++ b/lldb/source/Core/AddressRangeListImpl.cpp
@@ -13,14 +13,6 @@ using namespace lldb_private;
 
 AddressRangeListImpl::AddressRangeListImpl() : m_ranges() {}
 
-AddressRangeListImpl &
-AddressRangeListImpl::operator=(const AddressRangeListImpl &rhs) {
-  if (this == &rhs)
-    return *this;
-  m_ranges = rhs.m_ranges;
-  return *this;
-}
-
 size_t AddressRangeListImpl::GetSize() const { return m_ranges.size(); }
 
 void AddressRangeListImpl::Reserve(size_t capacity) {
diff --git a/lldb/test/Shell/ScriptInterpreter/Python/sb_function_ranges.s b/lldb/test/Shell/ScriptInterpreter/Python/sb_function_ranges.s
new file mode 100644
index 00000000000000..09b41148c7068d
--- /dev/null
+++ b/lldb/test/Shell/ScriptInterpreter/Python/sb_function_ranges.s
@@ -0,0 +1,182 @@
+# REQUIRES: x86
+
+# RUN: split-file %s %t
+# RUN: llvm-mc -triple x86_64-pc-linux -filetype=obj %t/input.s -o %t/input.o
+# RUN: %lldb %t/input.o -o "command script import %t/script.py" -o exit | FileCheck %s
+
+# CHECK: Found 1 function(s).
+# CHECK: foo: [input.o[0x0-0x7), input.o[0x7-0xe), input.o[0x14-0x1b), input.o[0x1b-0x1c)]
+
+#--- script.py
+import lldb
+
+def __lldb_init_module(debugger, internal_dict):
+  target = debugger.GetSelectedTarget()
+  sym_ctxs = target.FindFunctions("foo")
+  print(f"Found {len(sym_ctxs)} function(s).")
+  for ctx in sym_ctxs:
+    fn = ctx.function
+    print(f"{fn.name}: {fn.GetRanges()}")
+
+#--- input.s
+# An example of a function which has been split into two parts. Roughly
+# corresponds to this C code.
+# int baz();
+# int bar() { return 47; }
+# int foo(int flag) { return flag ? bar() : baz(); }
+# The function bar has been placed "in the middle" of foo.
+
+        .text
+
+        .type   foo, at function
+foo:
+        .cfi_startproc
+        cmpl    $0, %edi
+        je      foo.__part.2
+        jmp     foo.__part.1
+        .cfi_endproc
+.Lfoo_end:
+        .size   foo, .Lfoo_end-foo
+
+foo.__part.1:
+        .cfi_startproc
+        callq   bar
+        jmp     foo.__part.3
+.Lfoo.__part.1_end:
+        .size   foo.__part.1, .Lfoo.__part.1_end-foo.__part.1
+        .cfi_endproc
+
+bar:
+        .cfi_startproc
+        movl    $47, %eax
+        retq
+        .cfi_endproc
+.Lbar_end:
+        .size   bar, .Lbar_end-bar
+
+foo.__part.2:
+        .cfi_startproc
+        callq   baz
+        jmp     foo.__part.3
+.Lfoo.__part.2_end:
+        .size   foo.__part.2, .Lfoo.__part.2_end-foo.__part.2
+        .cfi_endproc
+
+foo.__part.3:
+        .cfi_startproc
+        retq
+.Lfoo.__part.3_end:
+        .size   foo.__part.3, .Lfoo.__part.3_end-foo.__part.3
+        .cfi_endproc
+
+
+        .section        .debug_abbrev,"", at progbits
+        .byte   1                               # Abbreviation Code
+        .byte   17                              # DW_TAG_compile_unit
+        .byte   1                               # DW_CHILDREN_yes
+        .byte   37                              # DW_AT_producer
+        .byte   8                               # DW_FORM_string
+        .byte   19                              # DW_AT_language
+        .byte   5                               # DW_FORM_data2
+        .byte   17                              # DW_AT_low_pc
+        .byte   1                               # DW_FORM_addr
+        .byte   85                              # DW_AT_ranges
+        .byte   35                              # DW_FORM_rnglistx
+        .byte   116                             # DW_AT_rnglists_base
+        .byte   23                              # DW_FORM_sec_offset
+        .byte   0                               # EOM(1)
+        .byte   0                               # EOM(2)
+        .byte   2                               # Abbreviation Code
+        .byte   46                              # DW_TAG_subprogram
+        .byte   0                               # DW_CHILDREN_no
+        .byte   17                              # DW_AT_low_pc
+        .byte   1                               # DW_FORM_addr
+        .byte   18                              # DW_AT_high_pc
+        .byte   1                               # DW_FORM_addr
+        .byte   3                               # DW_AT_name
+        .byte   8                               # DW_FORM_string
+        .byte   0                               # EOM(1)
+        .byte   0                               # EOM(2)
+        .byte   3                               # Abbreviation Code
+        .byte   46                              # DW_TAG_subprogram
+        .byte   0                               # DW_CHILDREN_no
+        .byte   85                              # DW_AT_ranges
+        .byte   35                              # DW_FORM_rnglistx
+        .byte   64                              # DW_AT_frame_base
+        .byte   24                              # DW_FORM_exprloc
+        .byte   3                               # DW_AT_name
+        .byte   8                               # DW_FORM_string
+        .byte   0                               # EOM(1)
+        .byte   0                               # EOM(2)
+        .byte   0                               # EOM(3)
+
+        .section        .debug_info,"", at progbits
+.Lcu_begin0:
+        .long   .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+.Ldebug_info_start0:
+        .short  5                               # DWARF version number
+        .byte   1                               # DWARF Unit Type
+        .byte   8                               # Address Size (in bytes)
+        .long   .debug_abbrev                   # Offset Into Abbrev. Section
+        .byte   1                               # Abbrev [1] DW_TAG_compile_unit
+        .asciz  "Hand-written DWARF"            # DW_AT_producer
+        .short  29                              # DW_AT_language
+        .quad   0                               # DW_AT_low_pc
+        .byte   1                               # DW_AT_ranges
+        .long   .Lrnglists_table_base0          # DW_AT_rnglists_base
+        .byte   2                               # Abbrev [2] DW_TAG_subprogram
+        .quad   bar                             # DW_AT_low_pc
+        .quad   .Lbar_end                       # DW_AT_high_pc
+        .asciz  "bar"                           # DW_AT_name
+        .byte   3                               # Abbrev [3] DW_TAG_subprogram
+        .byte   0                               # DW_AT_ranges
+        .byte   1                               # DW_AT_frame_base
+        .byte   86
+        .asciz  "foo"                           # DW_AT_name
+        .byte   0                               # End Of Children Mark
+.Ldebug_info_end0:
+
+        .section        .debug_rnglists,"", at progbits
+        .long   .Ldebug_list_header_end0-.Ldebug_list_header_start0 # Length
+.Ldebug_list_header_start0:
+        .short  5                               # Version
+        .byte   8                               # Address size
+        .byte   0                               # Segment selector size
+        .long   2                               # Offset entry count
+.Lrnglists_table_base0:
+        .long   .Ldebug_ranges0-.Lrnglists_table_base0
+        .long   .Ldebug_ranges1-.Lrnglists_table_base0
+.Ldebug_ranges0:
+        .byte   6                               # DW_RLE_start_end
+        .quad   foo
+        .quad   .Lfoo_end
+        .byte   6                               # DW_RLE_start_end
+        .quad   foo.__part.1
+        .quad   .Lfoo.__part.1_end
+        .byte   6                               # DW_RLE_start_end
+        .quad   foo.__part.2
+        .quad   .Lfoo.__part.2_end
+        .byte   6                               # DW_RLE_start_end
+        .quad   foo.__part.3
+        .quad   .Lfoo.__part.3_end
+        .byte   0                               # DW_RLE_end_of_list
+.Ldebug_ranges1:
+        .byte   6                               # DW_RLE_start_end
+        .quad   bar
+        .quad   .Lbar_end
+        .byte   6                               # DW_RLE_start_end
+        .quad   foo.__part.1
+        .quad   .Lfoo.__part.1_end
+        .byte   6                               # DW_RLE_start_end
+        .quad   foo.__part.2
+        .quad   .Lfoo.__part.2_end
+        .byte   6                               # DW_RLE_start_end
+        .quad   foo.__part.3
+        .quad   .Lfoo.__part.3_end
+        .byte   6                               # DW_RLE_start_end
+        .quad   foo
+        .quad   .Lfoo_end
+        .byte   0                               # DW_RLE_end_of_list
+.Ldebug_list_header_end0:
+
+        .section        ".note.GNU-stack","", at progbits



More information about the lldb-commits mailing list