[Lldb-commits] [lldb] [lldb] Support parsing data symbols from the Wasm name section (PR #153494)
Jonas Devlieghere via lldb-commits
lldb-commits at lists.llvm.org
Wed Aug 13 14:24:21 PDT 2025
https://github.com/JDevlieghere updated https://github.com/llvm/llvm-project/pull/153494
>From 2d0a3dad2321cdd34dca93919fab0dd9f0e9ed4f Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas at devlieghere.com>
Date: Wed, 13 Aug 2025 13:44:35 -0700
Subject: [PATCH] [lldb] Support parsing data symbols from the Wasm name
section
This PR adds support for parsing data symbols from the WebAssembly name
section.
---
.../ObjectFile/wasm/ObjectFileWasm.cpp | 114 ++++++++++++++----
.../test/Shell/Symtab/Inputs/simple.wasm.yaml | 82 ++++++++++---
lldb/test/Shell/Symtab/symtab-wasm.test | 10 +-
3 files changed, 161 insertions(+), 45 deletions(-)
diff --git a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
index 919cc21c32ffd..b3144f28f4913 100644
--- a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
+++ b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
@@ -251,11 +251,11 @@ bool ObjectFileWasm::ParseHeader() {
static llvm::Expected<std::vector<AddressRange>>
ParseFunctions(SectionSP code_section_sp) {
- DataExtractor code_section_data;
- code_section_sp->GetSectionData(code_section_data);
+ DataExtractor data;
+ code_section_sp->GetSectionData(data);
lldb::offset_t offset = 0;
- const uint64_t function_count = code_section_data.GetULEB128(&offset);
+ const uint64_t function_count = data.GetULEB128(&offset);
if (function_count > std::numeric_limits<uint32_t>::max())
return llvm::createStringError("function count overflows uint32_t");
@@ -263,7 +263,7 @@ ParseFunctions(SectionSP code_section_sp) {
functions.reserve(function_count);
for (uint32_t i = 0; i < function_count; ++i) {
- const uint64_t function_size = code_section_data.GetULEB128(&offset);
+ const uint64_t function_size = data.GetULEB128(&offset);
if (function_size > std::numeric_limits<uint32_t>::max())
return llvm::createStringError("function size overflows uint32_t");
// llvm-objdump considers the ULEB with the function size to be part of the
@@ -281,9 +281,45 @@ ParseFunctions(SectionSP code_section_sp) {
return functions;
}
+static llvm::Expected<std::vector<AddressRange>>
+ParseData(SectionSP data_section_sp) {
+ DataExtractor data;
+ data_section_sp->GetSectionData(data);
+
+ lldb::offset_t offset = 0;
+
+ const uint64_t segment_count = data.GetULEB128(&offset);
+ if (segment_count > std::numeric_limits<uint32_t>::max())
+ return llvm::createStringError("segment count overflows uint32_t");
+
+ std::vector<AddressRange> segments;
+ segments.reserve(segment_count);
+
+ for (uint32_t i = 0; i < segment_count; ++i) {
+ const uint64_t flags = data.GetULEB128(&offset);
+ if (flags > std::numeric_limits<uint32_t>::max())
+ return llvm::createStringError("segment flags overflows uint32_t");
+
+ const uint64_t segment_size = data.GetULEB128(&offset);
+ if (flags > std::numeric_limits<uint32_t>::max())
+ return llvm::createStringError("segment size overflows uint32_t");
+
+ segments.emplace_back(data_section_sp, offset, segment_size);
+
+ std::optional<lldb::offset_t> next_offset =
+ llvm::checkedAddUnsigned(offset, segment_size);
+ if (!next_offset)
+ return llvm::createStringError("segment offset overflows uint64_t");
+ offset = *next_offset;
+ }
+
+ return segments;
+}
+
static llvm::Expected<std::vector<Symbol>>
ParseNames(SectionSP name_section_sp,
- const std::vector<AddressRange> &functions) {
+ const std::vector<AddressRange> &function_ranges,
+ const std::vector<AddressRange> &segment_ranges) {
DataExtractor name_section_data;
name_section_sp->GetSectionData(name_section_data);
@@ -305,17 +341,34 @@ ParseNames(SectionSP name_section_sp,
for (uint64_t i = 0; c && i < count; ++i) {
const uint64_t idx = data.getULEB128(c);
const std::optional<std::string> name = GetWasmString(data, c);
- if (!name || idx >= functions.size())
+ if (!name || idx >= function_ranges.size())
continue;
symbols.emplace_back(
symbols.size(), Mangled(*name), lldb::eSymbolTypeCode,
/*external=*/false, /*is_debug=*/false, /*is_trampoline=*/false,
- /*is_artificial=*/false, functions[idx],
+ /*is_artificial=*/false, function_ranges[idx],
/*size_is_valid=*/true, /*contains_linker_annotations=*/false,
/*flags=*/0);
}
} break;
- case llvm::wasm::WASM_NAMES_DATA_SEGMENT:
+ case llvm::wasm::WASM_NAMES_DATA_SEGMENT: {
+ const uint64_t count = data.getULEB128(c);
+ if (count > std::numeric_limits<uint32_t>::max())
+ return llvm::createStringError("data count overflows uint32_t");
+ for (uint64_t i = 0; c && i < count; ++i) {
+ const uint64_t idx = data.getULEB128(c);
+ const std::optional<std::string> name = GetWasmString(data, c);
+ if (!name || idx >= segment_ranges.size())
+ continue;
+ symbols.emplace_back(
+ symbols.size(), Mangled(*name), lldb::eSymbolTypeData,
+ /*external=*/false, /*is_debug=*/false, /*is_trampoline=*/false,
+ /*is_artificial=*/false, segment_ranges[idx],
+ /*size_is_valid=*/true, /*contains_linker_annotations=*/false,
+ /*flags=*/0);
+ }
+
+ } break;
case llvm::wasm::WASM_NAMES_GLOBAL:
case llvm::wasm::WASM_NAMES_LOCAL:
default:
@@ -336,21 +389,35 @@ void ObjectFileWasm::ParseSymtab(Symtab &symtab) {
assert(m_sections_up && "sections must be parsed");
Log *log = GetLog(LLDBLog::Object);
- // The name section contains names and indexes. First parse the functions from
- // the code section so we can access them by their index.
- SectionSP code_section_sp =
- m_sections_up->FindSectionByType(lldb::eSectionTypeCode, false);
- if (!code_section_sp) {
- LLDB_LOG(log, "Failed to parse Wasm symbol table: no functions section");
- return;
+ // The name section contains names and indexes. First parse the data from the
+ // relevant sections so we can access it by its index.
+ std::vector<AddressRange> function_ranges;
+ std::vector<AddressRange> segment_ranges;
+
+ // Parse the code section.
+ if (SectionSP code_section_sp =
+ m_sections_up->FindSectionByType(lldb::eSectionTypeCode, false)) {
+ llvm::Expected<std::vector<AddressRange>> functions =
+ ParseFunctions(code_section_sp);
+ if (!functions) {
+ LLDB_LOG_ERROR(log, functions.takeError(),
+ "Failed to parse Wasm code section: {0}");
+ return;
+ }
+ function_ranges = *functions;
}
- llvm::Expected<std::vector<AddressRange>> functions =
- ParseFunctions(code_section_sp);
- if (!functions) {
- LLDB_LOG_ERROR(log, functions.takeError(),
- "Failed to parse Wasm functions: {0}");
- return;
+ // Parse the data section.
+ if (SectionSP data_section_sp =
+ m_sections_up->FindSectionByType(lldb::eSectionTypeData, false)) {
+ llvm::Expected<std::vector<AddressRange>> segments =
+ ParseData(data_section_sp);
+ if (!segments) {
+ LLDB_LOG_ERROR(log, segments.takeError(),
+ "Failed to parse Wasm data section: {0}");
+ return;
+ }
+ segment_ranges = *segments;
}
// Parse the name section.
@@ -362,7 +429,7 @@ void ObjectFileWasm::ParseSymtab(Symtab &symtab) {
}
llvm::Expected<std::vector<Symbol>> symbols =
- ParseNames(name_section_sp, *functions);
+ ParseNames(name_section_sp, function_ranges, segment_ranges);
if (!symbols) {
LLDB_LOG_ERROR(log, symbols.takeError(), "Failed to parse Wasm names: {0}");
return;
@@ -408,6 +475,9 @@ void ObjectFileWasm::CreateSections(SectionList &unified_section_list) {
// For this reason Section::GetFileAddress() must return zero for the
// Code section.
vm_addr = 0;
+ } else if (llvm::wasm::WASM_SEC_DATA == sect_info.id) {
+ section_type = eSectionTypeData;
+ section_name = ConstString("data");
} else {
section_type = GetSectionTypeFromName(sect_info.name.GetStringRef());
if (section_type == eSectionTypeOther)
diff --git a/lldb/test/Shell/Symtab/Inputs/simple.wasm.yaml b/lldb/test/Shell/Symtab/Inputs/simple.wasm.yaml
index 165bb53662f40..088d6163d6b0b 100644
--- a/lldb/test/Shell/Symtab/Inputs/simple.wasm.yaml
+++ b/lldb/test/Shell/Symtab/Inputs/simple.wasm.yaml
@@ -1,3 +1,15 @@
+# clang -target wasm32 -nostdlib -Wl,--no-entry -Wl,--export-all -O0 -g -o simple.wasm simple.c
+# char* str = "data str";
+#
+# int add(int a, int b) {
+# return a + b;
+# }
+#
+# int main() {
+# int i = 1;
+# int j = 2;
+# return add(i, j);
+# }
--- !WASM
FileHeader:
Version: 0x1
@@ -37,13 +49,13 @@ Sections:
Mutable: true
InitExpr:
Opcode: I32_CONST
- Value: 66560
+ Value: 66576
- Index: 1
Type: I32
Mutable: false
InitExpr:
Opcode: I32_CONST
- Value: 1024
+ Value: 1036
- Index: 2
Type: I32
Mutable: false
@@ -55,44 +67,50 @@ Sections:
Mutable: false
InitExpr:
Opcode: I32_CONST
- Value: 1024
+ Value: 1040
- Index: 4
Type: I32
Mutable: false
InitExpr:
Opcode: I32_CONST
- Value: 66560
+ Value: 1040
- Index: 5
Type: I32
Mutable: false
InitExpr:
Opcode: I32_CONST
- Value: 1024
+ Value: 66576
- Index: 6
Type: I32
Mutable: false
InitExpr:
Opcode: I32_CONST
- Value: 66560
+ Value: 1024
- Index: 7
Type: I32
Mutable: false
InitExpr:
Opcode: I32_CONST
- Value: 131072
+ Value: 66576
- Index: 8
Type: I32
Mutable: false
InitExpr:
Opcode: I32_CONST
- Value: 0
+ Value: 131072
- Index: 9
Type: I32
Mutable: false
InitExpr:
Opcode: I32_CONST
- Value: 1
+ Value: 0
- Index: 10
+ Type: I32
+ Mutable: false
+ InitExpr:
+ Opcode: I32_CONST
+ Value: 1
+ - Index: 11
Type: I32
Mutable: false
InitExpr:
@@ -115,6 +133,9 @@ Sections:
- Name: main
Kind: FUNCTION
Index: 3
+ - Name: str
+ Kind: GLOBAL
+ Index: 1
- Name: __main_void
Kind: FUNCTION
Index: 2
@@ -123,34 +144,34 @@ Sections:
Index: 0
- Name: __dso_handle
Kind: GLOBAL
- Index: 1
+ Index: 2
- Name: __data_end
Kind: GLOBAL
- Index: 2
+ Index: 3
- Name: __stack_low
Kind: GLOBAL
- Index: 3
+ Index: 4
- Name: __stack_high
Kind: GLOBAL
- Index: 4
+ Index: 5
- Name: __global_base
Kind: GLOBAL
- Index: 5
+ Index: 6
- Name: __heap_base
Kind: GLOBAL
- Index: 6
+ Index: 7
- Name: __heap_end
Kind: GLOBAL
- Index: 7
+ Index: 8
- Name: __memory_base
Kind: GLOBAL
- Index: 8
+ Index: 9
- Name: __table_base
Kind: GLOBAL
- Index: 9
+ Index: 10
- Name: __wasm_first_page_end
Kind: GLOBAL
- Index: 10
+ Index: 11
- Type: CODE
Functions:
- Index: 0
@@ -169,6 +190,20 @@ Sections:
- Index: 3
Locals: []
Body: 1082808080000F0B
+ - Type: DATA
+ Segments:
+ - SectionOffset: 7
+ InitFlags: 0
+ Offset:
+ Opcode: I32_CONST
+ Value: 1024
+ Content: '646174612073747200'
+ - SectionOffset: 22
+ InitFlags: 0
+ Offset:
+ Opcode: I32_CONST
+ Value: 1036
+ Content: '00040000'
- Type: CUSTOM
Name: name
FunctionNames:
@@ -183,8 +218,17 @@ Sections:
GlobalNames:
- Index: 0
Name: __stack_pointer
+ DataSegmentNames:
+ - Index: 0
+ Name: .rodata
+ - Index: 1
+ Name: .data
- Type: CUSTOM
+ HeaderSecSizeEncodingLen: 2
Name: producers
+ Languages:
+ - Name: C11
+ Version: ''
Tools:
- Name: clang
Version: '22.0.0git'
diff --git a/lldb/test/Shell/Symtab/symtab-wasm.test b/lldb/test/Shell/Symtab/symtab-wasm.test
index fc185cd81a0ec..5374b0c2f2892 100644
--- a/lldb/test/Shell/Symtab/symtab-wasm.test
+++ b/lldb/test/Shell/Symtab/symtab-wasm.test
@@ -1,7 +1,9 @@
# RUN: yaml2obj %S/Inputs/simple.wasm.yaml -o %t.wasm
# RUN: %lldb %t.wasm -o 'image dump symtab'
-# CHECK: Code 0x0000000000000002 {{.*}} __wasm_call_ctors
-# CHECK: Code 0x0000000000000005 {{.*}} add
-# CHECK: Code 0x000000000000002f {{.*}} __original_main
-# CHECK: Code 0x000000000000007c {{.*}} main
+# CHECK: Code 0x0000000000000002 0x0000000000000002 {{.*}} __wasm_call_ctors
+# CHECK: Code 0x0000000000000005 0x0000000000000029 {{.*}} add
+# CHECK: Code 0x000000000000002f 0x000000000000004c {{.*}} __original_main
+# CHECK: Code 0x000000000000007c 0x0000000000000009 {{.*}} main
+# CHECK: Data 0x000000000000022f 0x0000000000000041 {{.*}} .rodata
+# CHECK: Data 0x0000000000000270 0x0000000000000000 {{.*}} .data
More information about the lldb-commits
mailing list