[Lldb-commits] [lldb] [lldb] Correctly resolve (discontinuous) function offsets when disassembling (PR #126925)

Wed Feb 12 07:35:32 PST 2025

https://github.com/labath created https://github.com/llvm/llvm-project/pull/126925

We need to iterate through the all symbol context ranges returned by (since #126505) SymbolContext::GetAddressRange. This also includes a fix to print the function offsets as signed values.

I've also wanted to check that the addresses which are in the middle of the function do *not* resolve to the function, but that's not entirely the case right now. This appears to be a separate issue though, so I've just left a TODO for now.

>From 7d01821bbf930005e7bf026f3c436616178718a2 Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel at labath.sk>
Date: Wed, 12 Feb 2025 16:07:50 +0100
Subject: [PATCH] [lldb] Correctly resolve (discontinuous) function offsets
 when disassembling

We need to iterate through the all symbol context ranges returned by
(since #126505) SymbolContext::GetAddressRange. This also includes a fix
to print the function offsets as signed values.

I've also wanted to check that the addresses which are in the middle of
the function do *not* resolve to the function, but that's not entirely
the case right now. This appears to be a separate issue though, so I've
just left a TODO for now.
---
 .../Disassembler/LLVMC/DisassemblerLLVMC.cpp       | 12 ++++++++----
 lldb/source/Symbol/SymbolContext.cpp               | 14 +++++++++-----
 lldb/test/Shell/Commands/command-disassemble.s     | 13 ++++++++-----
 3 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp b/lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp
index 76f2db086476f..a77155f6bf41e 100644
--- a/lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp
+++ b/lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp
@@ -31,6 +31,7 @@
 
 #include "lldb/Core/Address.h"
 #include "lldb/Core/Module.h"
+#include "lldb/Symbol/Function.h"
 #include "lldb/Symbol/SymbolContext.h"
 #include "lldb/Target/ExecutionContext.h"
 #include "lldb/Target/Process.h"
@@ -1806,10 +1807,13 @@ const char *DisassemblerLLVMC::SymbolLookup(uint64_t value, uint64_t *type_ptr,
         bool format_omitting_current_func_name = false;
         if (sym_ctx.symbol || sym_ctx.function) {
           AddressRange range;
-          if (sym_ctx.GetAddressRange(resolve_scope, 0, false, range) &&
-              range.GetBaseAddress().IsValid() &&
-              range.ContainsLoadAddress(value_so_addr, target)) {
-            format_omitting_current_func_name = true;
+          for (uint32_t idx = 0;
+               sym_ctx.GetAddressRange(resolve_scope, idx, false, range);
+               ++idx) {
+            if (range.ContainsLoadAddress(value_so_addr, target)) {
+              format_omitting_current_func_name = true;
+              break;
+            }
           }
         }
 
diff --git a/lldb/source/Symbol/SymbolContext.cpp b/lldb/source/Symbol/SymbolContext.cpp
index 4725df52ff559..183947a694363 100644
--- a/lldb/source/Symbol/SymbolContext.cpp
+++ b/lldb/source/Symbol/SymbolContext.cpp
@@ -104,15 +104,19 @@ bool SymbolContext::DumpStopContext(
 
     if (addr_t file_addr = addr.GetFileAddress();
         file_addr != LLDB_INVALID_ADDRESS) {
-      const addr_t function_offset =
-          file_addr - function->GetAddress().GetFileAddress();
+      // Avoiding signed arithmetic due to UB in -INT_MAX.
+      const char sign =
+          file_addr >= function->GetAddress().GetFileAddress() ? '+' : '-';
+      addr_t offset = file_addr - function->GetAddress().GetFileAddress();
+      if (sign == '-')
+        offset = -offset;
       if (!show_function_name) {
         // Print +offset even if offset is 0
         dumped_something = true;
-        s->Printf("+%" PRIu64 ">", function_offset);
-      } else if (function_offset) {
+        s->Format("{0}{1}>", sign, offset);
+      } else if (offset) {
         dumped_something = true;
-        s->Printf(" + %" PRIu64, function_offset);
+        s->Format(" {0} {1}", sign, offset);
       }
     }
 
diff --git a/lldb/test/Shell/Commands/command-disassemble.s b/lldb/test/Shell/Commands/command-disassemble.s
index 951d96cefd4b9..eb84a9ce39d4a 100644
--- a/lldb/test/Shell/Commands/command-disassemble.s
+++ b/lldb/test/Shell/Commands/command-disassemble.s
@@ -84,7 +84,7 @@
 # CHECK-NEXT: command-disassemble.s.tmp[0x2044] <+0>: int    $0x32
 # CHECK-NEXT: warning: Not disassembling a function because it is very large [0x0000000000002046-0x0000000000004046). To disassemble specify an instruction count limit, start/stop addresses or use the --force option.
 # CHECK-NEXT: (lldb) disassemble --name case3
-# CHECK-NEXT: error: Not disassembling a function because it is very large [0x0000000000006046-0x0000000000007046)[0x0000000000009046-0x000000000000a046). To disassemble specify an instruction count limit, start/stop addresses or use the --force option.
+# CHECK-NEXT: error: Not disassembling a function because it is very large [0x0000000000006046-0x0000000000007046)[0x0000000000009046-0x000000000000a050). To disassemble specify an instruction count limit, start/stop addresses or use the --force option.
 # CHECK-NEXT: Not disassembling a function because it is very large [0x0000000000004046-0x0000000000006046). To disassemble specify an instruction count limit, start/stop addresses or use the --force option.
 # CHECK-NEXT: (lldb) disassemble --name case3 --count 3
 # CHECK-NEXT: command-disassemble.s.tmp`n2::case3:
@@ -93,9 +93,10 @@
 # CHECK-NEXT: command-disassemble.s.tmp[0x604a] <-12284>: int    $0x2a
 # CHECK-EMPTY:
 # CHECK-NEXT: command-disassemble.s.tmp`n2::case3:
-# CHECK-NEXT: command-disassemble.s.tmp[0x9046] <+0>: int    $0x2a
-# CHECK-NEXT: command-disassemble.s.tmp[0x9048] <+2>: int    $0x2a
-# CHECK-NEXT: command-disassemble.s.tmp[0x904a] <+4>: int    $0x2a
+# CHECK-NEXT: command-disassemble.s.tmp[0x9046] <+0>: jmp 0x6046 ; <-12288>
+## FIXME: This should resolve to `middle_of_case3`
+# CHECK-NEXT: command-disassemble.s.tmp[0x904b] <+5>: jmp 0x7046 ; n2::case3 - 8192
+# CHECK-NEXT: command-disassemble.s.tmp[0x9050] <+10>: int    $0x2a
 # CHECK-EMPTY:
 # CHECK-NEXT: command-disassemble.s.tmp`n1::case3:
 # CHECK-NEXT: command-disassemble.s.tmp[0x4046] <+0>: int    $0x2a
@@ -171,12 +172,14 @@ _ZN2n15case3Ev:
         .endr
 .L_ZN2n25case3Ev.__part.1_end:
 
-.Lpadding:
+middle_of_case3:
         .rept 0x1000
         int $42
         .endr
 
 _ZN2n25case3Ev:
+        jmp .L_ZN2n25case3Ev.__part.1
+        jmp middle_of_case3
         .rept 0x800
         int $42
         .endr