[Lldb-commits] [lldb] [lldb] Skip local variable declarations at start of Wasm function (PR #190093)

Jonas Devlieghere via lldb-commits lldb-commits at lists.llvm.org
Wed Apr 1 20:50:48 PDT 2026


https://github.com/JDevlieghere updated https://github.com/llvm/llvm-project/pull/190093

>From 36b16c13b7b8a7930cc5f68a2b558601c6b0dc14 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas at devlieghere.com>
Date: Wed, 1 Apr 2026 17:18:05 -0700
Subject: [PATCH] [lldb] Skip local variable declarations at start of Wasm
 function

In WebAssembly, a function starts with a number of local variable
declarations, sometimes called a function header. These declarations are
*not* instructions, but they are considered to be part of the function,
meaning we can't just pretend like the function starts on the first
instruction. Instead, we treat them like a prologue, albeit one that you
cannot disassemble or set a breakpoint on.

With this PR, we now correctly disassemble the function, matching the
output of `objdump` and breakpoints resolve to the first instruction.

Fixes #189960
---
 lldb/include/lldb/Symbol/Symbol.h             |  5 ++
 lldb/source/Core/Disassembler.cpp             | 19 +++++
 .../ObjectFile/wasm/ObjectFileWasm.cpp        | 41 +++++++++--
 .../ObjectFile/wasm/wasm-local-decls.yaml     | 70 +++++++++++++++++++
 4 files changed, 131 insertions(+), 4 deletions(-)
 create mode 100644 lldb/test/Shell/ObjectFile/wasm/wasm-local-decls.yaml

diff --git a/lldb/include/lldb/Symbol/Symbol.h b/lldb/include/lldb/Symbol/Symbol.h
index 1f9d222b6ab29..bf8809f6730f1 100644
--- a/lldb/include/lldb/Symbol/Symbol.h
+++ b/lldb/include/lldb/Symbol/Symbol.h
@@ -223,6 +223,11 @@ class Symbol : public SymbolContextScope {
   // in bytes, else it will return zero.
   uint32_t GetPrologueByteSize();
 
+  void SetPrologueByteSize(uint32_t prologue_byte_size) {
+    m_type_data = prologue_byte_size;
+    m_type_data_resolved = true;
+  }
+
   bool GetDemangledNameIsSynthesized() const {
     return m_demangled_is_synthesized;
   }
diff --git a/lldb/source/Core/Disassembler.cpp b/lldb/source/Core/Disassembler.cpp
index 3011b6aede3d2..b40168f9359cd 100644
--- a/lldb/source/Core/Disassembler.cpp
+++ b/lldb/source/Core/Disassembler.cpp
@@ -1355,6 +1355,25 @@ size_t Disassembler::AppendInstructions(Target &target, Address start,
 
   start = ResolveAddress(target, start);
 
+  // WebAssembly functions begin with local variable declarations that are part
+  // of the binary format but are not executable instructions. Skip past them
+  // so the disassembler doesn't try to decode non-instruction bytes.
+  if (m_arch.GetTriple().getArch() == llvm::Triple::wasm32 ||
+      m_arch.GetTriple().getArch() == llvm::Triple::wasm64) {
+    if (ModuleSP module_sp = start.GetModule()) {
+      SymbolContext sc;
+      module_sp->ResolveSymbolContextForAddress(start, eSymbolContextSymbol,
+                                                sc);
+      if (sc.symbol && sc.symbol->GetAddress() == start) {
+        if (uint32_t skip = sc.symbol->GetPrologueByteSize()) {
+          start.Slide(skip);
+          if (limit.kind == Limit::Bytes && limit.value > skip)
+            limit.value -= skip;
+        }
+      }
+    }
+  }
+
   addr_t byte_size = limit.value;
   if (limit.kind == Limit::Instructions)
     byte_size *= m_arch.GetMaximumOpcodeByteSize();
diff --git a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
index ff04b68adbd6a..d66841a5cc0a1 100644
--- a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
+++ b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
@@ -308,8 +308,16 @@ bool ObjectFileWasm::ParseHeader() {
 }
 
 struct WasmFunction {
+  /// Offset from the section to the start of the function. This points past the
+  /// function size, which some other tools consider part of the function.
   lldb::offset_t section_offset = LLDB_INVALID_OFFSET;
+
+  /// Function size.
   uint32_t size = 0;
+
+  /// Offset from section_offset to the first instruction in the function, past
+  /// the local variable declarations.
+  uint32_t code_offset = 0;
 };
 
 static llvm::Expected<uint32_t> ParseImports(DataExtractor &import_data) {
@@ -353,6 +361,22 @@ static llvm::Expected<uint32_t> ParseImports(DataExtractor &import_data) {
   return function_imports;
 }
 
+/// Get the offset in the function to the first instruction.
+static llvm::Expected<uint32_t> GetFunctionCodeOffset(DataExtractor &data,
+                                                      lldb::offset_t offset) {
+  // Wasm function bodies start with:
+  //   [local_count: ULEB128]
+  //   [local_decl: {count: ULEB128, type: byte}] × local_count
+  //   [instructions...]
+  const lldb::offset_t locals_start = offset;
+  const uint32_t local_count = data.GetULEB128(&offset);
+  for (uint32_t i = 0; i < local_count; ++i) {
+    data.GetULEB128(&offset); // count
+    data.GetU8(&offset);      // valtype
+  }
+  return offset - locals_start;
+}
+
 static llvm::Expected<std::vector<WasmFunction>>
 ParseFunctions(DataExtractor &data) {
   lldb::offset_t offset = 0;
@@ -365,13 +389,20 @@ ParseFunctions(DataExtractor &data) {
   functions.reserve(*function_count);
 
   for (uint32_t i = 0; i < *function_count; ++i) {
-    llvm::Expected<uint32_t> function_size = GetULEB32(data, offset);
-    if (!function_size)
-      return function_size.takeError();
     // llvm-objdump considers the ULEB with the function size to be part of the
     // function. We can't do that here because that would break symbolic
     // breakpoints, as that address is never executed.
-    functions.push_back({offset, *function_size});
+    llvm::Expected<uint32_t> function_size = GetULEB32(data, offset);
+    if (!function_size)
+      return function_size.takeError();
+
+    // Functions start with with a number of local variable declarations.
+    // They're part of the function but they're not instructions.
+    llvm::Expected<uint32_t> code_offset = GetFunctionCodeOffset(data, offset);
+    if (!code_offset)
+      return code_offset.takeError();
+
+    functions.push_back({offset, *function_size, *code_offset});
 
     std::optional<lldb::offset_t> next_offset =
         llvm::checkedAddUnsigned<lldb::offset_t>(offset, *function_size);
@@ -503,6 +534,8 @@ ParseNames(SectionSP code_section_sp, DataExtractor &name_data,
                                /*size_is_valid=*/true,
                                /*contains_linker_annotations=*/false,
                                /*flags=*/0);
+          if (func.code_offset)
+            symbols.back().SetPrologueByteSize(func.code_offset);
         }
       }
     } break;
diff --git a/lldb/test/Shell/ObjectFile/wasm/wasm-local-decls.yaml b/lldb/test/Shell/ObjectFile/wasm/wasm-local-decls.yaml
new file mode 100644
index 0000000000000..2ba78e687b612
--- /dev/null
+++ b/lldb/test/Shell/ObjectFile/wasm/wasm-local-decls.yaml
@@ -0,0 +1,70 @@
+# REQUIRES: webassembly
+
+# RUN: yaml2obj %s -o %t.wasm
+# RUN: %lldb %t.wasm -o 'dis -n no_locals' -o 'dis -n with_locals' \
+# RUN:   -o 'b no_locals' -o 'b with_locals' -o 'breakpoint list' \
+# RUN:   2>&1 | FileCheck %s
+
+# no_locals has zero local declarations (1 byte for the count of 0).
+# The first instruction should be at <+1>.
+# CHECK-LABEL: dis -n no_locals
+# CHECK:       {{.*}}`no_locals:
+# CHECK-NEXT:  {{.*}} <+1>: i32.const 42
+# CHECK-NEXT:  {{.*}} <+3>: return
+# CHECK-NEXT:  {{.*}} <+4>: end
+
+# with_locals has one local declaration (1 local of type i32 = 3 bytes).
+# The first instruction should be at <+3>.
+# CHECK-LABEL: dis -n with_locals
+# CHECK:       {{.*}}`with_locals:
+# CHECK-NEXT:  {{.*}} <+3>:  local.get 0
+# CHECK-NEXT:  {{.*}} <+5>:  local.get 1
+# CHECK-NEXT:  {{.*}} <+7>:  i32.add
+# CHECK-NEXT:  {{.*}} <+8>:  local.set 2
+# CHECK-NEXT:  {{.*}} <+10>: local.get 0
+# CHECK-NEXT:  {{.*}} <+12>: local.get 1
+# CHECK-NEXT:  {{.*}} <+14>: return
+# CHECK-NEXT:  {{.*}} <+15>: end
+
+# Breakpoints should land at the first instruction.
+# CHECK-LABEL: breakpoint list
+# CHECK: where = {{.*}}`no_locals + 1
+# CHECK: where = {{.*}}`with_locals + 3
+
+--- !WASM
+FileHeader:
+  Version: 0x1
+Sections:
+  - Type: TYPE
+    Signatures:
+      - Index: 0
+        ParamTypes: []
+        ReturnTypes:
+          - I32
+      - Index: 1
+        ParamTypes:
+          - I32
+          - I32
+        ReturnTypes:
+          - I32
+  - Type: FUNCTION
+    FunctionTypes: [ 0, 1 ]
+  - Type: CODE
+    Functions:
+      # no_locals: returns 42, no local variables.
+      - Index: 0
+        Locals: []
+        Body: 412A0F0B
+      # with_locals: adds two params using a local, has 1 local decl (1 x i32).
+      - Index: 1
+        Locals:
+          - Type: I32
+            Count: 1
+        Body: 200020016A2102200020010F0B
+  - Type: CUSTOM
+    Name: name
+    FunctionNames:
+      - Index: 0
+        Name: no_locals
+      - Index: 1
+        Name: with_locals



More information about the lldb-commits mailing list