[Lldb-commits] [lldb] [lldb] Support parsing data symbols from the Wasm name section (PR #153494)
via lldb-commits
lldb-commits at lists.llvm.org
Wed Aug 13 14:23:50 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-lldb
Author: Jonas Devlieghere (JDevlieghere)
<details>
<summary>Changes</summary>
This PR adds support for parsing data symbols from the WebAssembly name section.
---
Full diff: https://github.com/llvm/llvm-project/pull/153494.diff
3 Files Affected:
- (modified) lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp (+92-22)
- (modified) lldb/test/Shell/Symtab/Inputs/simple.wasm.yaml (+78-19)
- (modified) lldb/test/Shell/Symtab/symtab-wasm.test (+6-4)
``````````diff
diff --git a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
index 919cc21c32ffd..b3144f28f4913 100644
--- a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
+++ b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
@@ -251,11 +251,11 @@ bool ObjectFileWasm::ParseHeader() {
static llvm::Expected<std::vector<AddressRange>>
ParseFunctions(SectionSP code_section_sp) {
- DataExtractor code_section_data;
- code_section_sp->GetSectionData(code_section_data);
+ DataExtractor data;
+ code_section_sp->GetSectionData(data);
lldb::offset_t offset = 0;
- const uint64_t function_count = code_section_data.GetULEB128(&offset);
+ const uint64_t function_count = data.GetULEB128(&offset);
if (function_count > std::numeric_limits<uint32_t>::max())
return llvm::createStringError("function count overflows uint32_t");
@@ -263,7 +263,7 @@ ParseFunctions(SectionSP code_section_sp) {
functions.reserve(function_count);
for (uint32_t i = 0; i < function_count; ++i) {
- const uint64_t function_size = code_section_data.GetULEB128(&offset);
+ const uint64_t function_size = data.GetULEB128(&offset);
if (function_size > std::numeric_limits<uint32_t>::max())
return llvm::createStringError("function size overflows uint32_t");
// llvm-objdump considers the ULEB with the function size to be part of the
@@ -281,9 +281,45 @@ ParseFunctions(SectionSP code_section_sp) {
return functions;
}
+static llvm::Expected<std::vector<AddressRange>>
+ParseData(SectionSP data_section_sp) {
+ DataExtractor data;
+ data_section_sp->GetSectionData(data);
+
+ lldb::offset_t offset = 0;
+
+ const uint64_t segment_count = data.GetULEB128(&offset);
+ if (segment_count > std::numeric_limits<uint32_t>::max())
+ return llvm::createStringError("segment count overflows uint32_t");
+
+ std::vector<AddressRange> segments;
+ segments.reserve(segment_count);
+
+ for (uint32_t i = 0; i < segment_count; ++i) {
+ const uint64_t flags = data.GetULEB128(&offset);
+ if (flags > std::numeric_limits<uint32_t>::max())
+ return llvm::createStringError("segment flags overflows uint32_t");
+
+ const uint64_t segment_size = data.GetULEB128(&offset);
+ if (flags > std::numeric_limits<uint32_t>::max())
+ return llvm::createStringError("segment size overflows uint32_t");
+
+ segments.emplace_back(data_section_sp, offset, segment_size);
+
+ std::optional<lldb::offset_t> next_offset =
+ llvm::checkedAddUnsigned(offset, segment_size);
+ if (!next_offset)
+ return llvm::createStringError("segment offset overflows uint64_t");
+ offset = *next_offset;
+ }
+
+ return segments;
+}
+
static llvm::Expected<std::vector<Symbol>>
ParseNames(SectionSP name_section_sp,
- const std::vector<AddressRange> &functions) {
+ const std::vector<AddressRange> &function_ranges,
+ const std::vector<AddressRange> &segment_ranges) {
DataExtractor name_section_data;
name_section_sp->GetSectionData(name_section_data);
@@ -305,17 +341,34 @@ ParseNames(SectionSP name_section_sp,
for (uint64_t i = 0; c && i < count; ++i) {
const uint64_t idx = data.getULEB128(c);
const std::optional<std::string> name = GetWasmString(data, c);
- if (!name || idx >= functions.size())
+ if (!name || idx >= function_ranges.size())
continue;
symbols.emplace_back(
symbols.size(), Mangled(*name), lldb::eSymbolTypeCode,
/*external=*/false, /*is_debug=*/false, /*is_trampoline=*/false,
- /*is_artificial=*/false, functions[idx],
+ /*is_artificial=*/false, function_ranges[idx],
/*size_is_valid=*/true, /*contains_linker_annotations=*/false,
/*flags=*/0);
}
} break;
- case llvm::wasm::WASM_NAMES_DATA_SEGMENT:
+ case llvm::wasm::WASM_NAMES_DATA_SEGMENT: {
+ const uint64_t count = data.getULEB128(c);
+ if (count > std::numeric_limits<uint32_t>::max())
+ return llvm::createStringError("data count overflows uint32_t");
+ for (uint64_t i = 0; c && i < count; ++i) {
+ const uint64_t idx = data.getULEB128(c);
+ const std::optional<std::string> name = GetWasmString(data, c);
+ if (!name || idx >= segment_ranges.size())
+ continue;
+ symbols.emplace_back(
+ symbols.size(), Mangled(*name), lldb::eSymbolTypeData,
+ /*external=*/false, /*is_debug=*/false, /*is_trampoline=*/false,
+ /*is_artificial=*/false, segment_ranges[idx],
+ /*size_is_valid=*/true, /*contains_linker_annotations=*/false,
+ /*flags=*/0);
+ }
+
+ } break;
case llvm::wasm::WASM_NAMES_GLOBAL:
case llvm::wasm::WASM_NAMES_LOCAL:
default:
@@ -336,21 +389,35 @@ void ObjectFileWasm::ParseSymtab(Symtab &symtab) {
assert(m_sections_up && "sections must be parsed");
Log *log = GetLog(LLDBLog::Object);
- // The name section contains names and indexes. First parse the functions from
- // the code section so we can access them by their index.
- SectionSP code_section_sp =
- m_sections_up->FindSectionByType(lldb::eSectionTypeCode, false);
- if (!code_section_sp) {
- LLDB_LOG(log, "Failed to parse Wasm symbol table: no functions section");
- return;
+ // The name section contains names and indexes. First parse the data from the
+ // relevant sections so we can access it by its index.
+ std::vector<AddressRange> function_ranges;
+ std::vector<AddressRange> segment_ranges;
+
+ // Parse the code section.
+ if (SectionSP code_section_sp =
+ m_sections_up->FindSectionByType(lldb::eSectionTypeCode, false)) {
+ llvm::Expected<std::vector<AddressRange>> functions =
+ ParseFunctions(code_section_sp);
+ if (!functions) {
+ LLDB_LOG_ERROR(log, functions.takeError(),
+ "Failed to parse Wasm code section: {0}");
+ return;
+ }
+ function_ranges = *functions;
}
- llvm::Expected<std::vector<AddressRange>> functions =
- ParseFunctions(code_section_sp);
- if (!functions) {
- LLDB_LOG_ERROR(log, functions.takeError(),
- "Failed to parse Wasm functions: {0}");
- return;
+ // Parse the data section.
+ if (SectionSP data_section_sp =
+ m_sections_up->FindSectionByType(lldb::eSectionTypeData, false)) {
+ llvm::Expected<std::vector<AddressRange>> segments =
+ ParseData(data_section_sp);
+ if (!segments) {
+ LLDB_LOG_ERROR(log, segments.takeError(),
+ "Failed to parse Wasm data section: {0}");
+ return;
+ }
+ segment_ranges = *segments;
}
// Parse the name section.
@@ -362,7 +429,7 @@ void ObjectFileWasm::ParseSymtab(Symtab &symtab) {
}
llvm::Expected<std::vector<Symbol>> symbols =
- ParseNames(name_section_sp, *functions);
+ ParseNames(name_section_sp, function_ranges, segment_ranges);
if (!symbols) {
LLDB_LOG_ERROR(log, symbols.takeError(), "Failed to parse Wasm names: {0}");
return;
@@ -408,6 +475,9 @@ void ObjectFileWasm::CreateSections(SectionList &unified_section_list) {
// For this reason Section::GetFileAddress() must return zero for the
// Code section.
vm_addr = 0;
+ } else if (llvm::wasm::WASM_SEC_DATA == sect_info.id) {
+ section_type = eSectionTypeData;
+ section_name = ConstString("data");
} else {
section_type = GetSectionTypeFromName(sect_info.name.GetStringRef());
if (section_type == eSectionTypeOther)
diff --git a/lldb/test/Shell/Symtab/Inputs/simple.wasm.yaml b/lldb/test/Shell/Symtab/Inputs/simple.wasm.yaml
index 165bb53662f40..67b04aa3cf81c 100644
--- a/lldb/test/Shell/Symtab/Inputs/simple.wasm.yaml
+++ b/lldb/test/Shell/Symtab/Inputs/simple.wasm.yaml
@@ -1,3 +1,15 @@
+# clang -target wasm32 -nostdlib -Wl,--no-entry -Wl,--export-all -O0 -g -o simple.wasm simple.c
+# char* str = "data str";
+#
+# int add(int a, int b) {
+# return a + b;
+# }
+#
+# int main() {
+# int i = 1;
+# int j = 2;
+# return add(i, j);
+# }
--- !WASM
FileHeader:
Version: 0x1
@@ -37,13 +49,13 @@ Sections:
Mutable: true
InitExpr:
Opcode: I32_CONST
- Value: 66560
+ Value: 66576
- Index: 1
Type: I32
Mutable: false
InitExpr:
Opcode: I32_CONST
- Value: 1024
+ Value: 1036
- Index: 2
Type: I32
Mutable: false
@@ -55,44 +67,50 @@ Sections:
Mutable: false
InitExpr:
Opcode: I32_CONST
- Value: 1024
+ Value: 1040
- Index: 4
Type: I32
Mutable: false
InitExpr:
Opcode: I32_CONST
- Value: 66560
+ Value: 1040
- Index: 5
Type: I32
Mutable: false
InitExpr:
Opcode: I32_CONST
- Value: 1024
+ Value: 66576
- Index: 6
Type: I32
Mutable: false
InitExpr:
Opcode: I32_CONST
- Value: 66560
+ Value: 1024
- Index: 7
Type: I32
Mutable: false
InitExpr:
Opcode: I32_CONST
- Value: 131072
+ Value: 66576
- Index: 8
Type: I32
Mutable: false
InitExpr:
Opcode: I32_CONST
- Value: 0
+ Value: 131072
- Index: 9
Type: I32
Mutable: false
InitExpr:
Opcode: I32_CONST
- Value: 1
+ Value: 0
- Index: 10
+ Type: I32
+ Mutable: false
+ InitExpr:
+ Opcode: I32_CONST
+ Value: 1
+ - Index: 11
Type: I32
Mutable: false
InitExpr:
@@ -115,6 +133,9 @@ Sections:
- Name: main
Kind: FUNCTION
Index: 3
+ - Name: str
+ Kind: GLOBAL
+ Index: 1
- Name: __main_void
Kind: FUNCTION
Index: 2
@@ -123,34 +144,34 @@ Sections:
Index: 0
- Name: __dso_handle
Kind: GLOBAL
- Index: 1
+ Index: 2
- Name: __data_end
Kind: GLOBAL
- Index: 2
+ Index: 3
- Name: __stack_low
Kind: GLOBAL
- Index: 3
+ Index: 4
- Name: __stack_high
Kind: GLOBAL
- Index: 4
+ Index: 5
- Name: __global_base
Kind: GLOBAL
- Index: 5
+ Index: 6
- Name: __heap_base
Kind: GLOBAL
- Index: 6
+ Index: 7
- Name: __heap_end
Kind: GLOBAL
- Index: 7
+ Index: 8
- Name: __memory_base
Kind: GLOBAL
- Index: 8
+ Index: 9
- Name: __table_base
Kind: GLOBAL
- Index: 9
+ Index: 10
- Name: __wasm_first_page_end
Kind: GLOBAL
- Index: 10
+ Index: 11
- Type: CODE
Functions:
- Index: 0
@@ -169,6 +190,35 @@ Sections:
- Index: 3
Locals: []
Body: 1082808080000F0B
+ - Type: DATA
+ Segments:
+ - SectionOffset: 7
+ InitFlags: 0
+ Offset:
+ Opcode: I32_CONST
+ Value: 1024
+ Content: '646174612073747200'
+ - SectionOffset: 22
+ InitFlags: 0
+ Offset:
+ Opcode: I32_CONST
+ Value: 1036
+ Content: '00040000'
+ - Type: CUSTOM
+ Name: .debug_abbrev
+ Payload: 011101250E1305030E10171B0E11015517000002340049133A0B3B0B02180000030101491300000421004913370B0000052400030E3E0B0B0B0000062400030E0B0B3E0B0000073400030E49133F193A0B3B0B02180000080F0049130000092E01110112064018030E3A0B3B0B271949133F1900000A05000218030E3A0B3B0B491300000B2E01110112064018030E3A0B3B0B49133F1900000C34000218030E3A0B3B0B4913000000
+ - Type: CUSTOM
+ Name: .debug_info
+ Payload: D100000004000000000004017F0000001D005E0000000000000016000000000000000000000002330000000101050300040000033F0000000446000000090005080000000601066B000000080707040000005E000000010105030C040000083F00000009050000002900000004ED00029F5A0000000103CD0000000A02910C690000000103CD0000000A029108670000000103CD000000000B2F0000004C00000004ED00009F0D0000000107CD0000000C029108140000000108CD0000000C029104120000000109CD000000000500000000050400
+ - Type: CUSTOM
+ Name: .debug_ranges
+ Payload: 050000002E0000002F0000007B0000000000000000000000
+ - Type: CUSTOM
+ Name: .debug_str
+ Payload: 696E74007374720063686172006D61696E006A0069002F55736572732F6A6F6E61732F7761736D2D6D6963726F2D72756E74696D652F70726F647563742D6D696E692F706C6174666F726D732F64617277696E2F6275696C64006164640073696D706C652E6300620061005F5F41525241595F53495A455F545950455F5F00636C616E672076657273696F6E2032322E302E306769742028676974406769746875622E636F6D3A6C6C766D2F6C6C766D2D70726F6A6563742E67697420363363633265333930646235376362633430306235313937373162373030356561623166633736612900
+ - Type: CUSTOM
+ Name: .debug_line
+ Payload: 62000000040020000000010101FB0E0D0001010101000000010000010073696D706C652E6300000000000005020500000014050A0A08AD050E0658050C5805032002020001010005022F0000001805070A08BB75050E7505110658050A58050382020F000101
- Type: CUSTOM
Name: name
FunctionNames:
@@ -183,8 +233,17 @@ Sections:
GlobalNames:
- Index: 0
Name: __stack_pointer
+ DataSegmentNames:
+ - Index: 0
+ Name: .rodata
+ - Index: 1
+ Name: .data
- Type: CUSTOM
+ HeaderSecSizeEncodingLen: 2
Name: producers
+ Languages:
+ - Name: C11
+ Version: ''
Tools:
- Name: clang
Version: '22.0.0git'
diff --git a/lldb/test/Shell/Symtab/symtab-wasm.test b/lldb/test/Shell/Symtab/symtab-wasm.test
index fc185cd81a0ec..5374b0c2f2892 100644
--- a/lldb/test/Shell/Symtab/symtab-wasm.test
+++ b/lldb/test/Shell/Symtab/symtab-wasm.test
@@ -1,7 +1,9 @@
# RUN: yaml2obj %S/Inputs/simple.wasm.yaml -o %t.wasm
# RUN: %lldb %t.wasm -o 'image dump symtab'
-# CHECK: Code 0x0000000000000002 {{.*}} __wasm_call_ctors
-# CHECK: Code 0x0000000000000005 {{.*}} add
-# CHECK: Code 0x000000000000002f {{.*}} __original_main
-# CHECK: Code 0x000000000000007c {{.*}} main
+# CHECK: Code 0x0000000000000002 0x0000000000000002 {{.*}} __wasm_call_ctors
+# CHECK: Code 0x0000000000000005 0x0000000000000029 {{.*}} add
+# CHECK: Code 0x000000000000002f 0x000000000000004c {{.*}} __original_main
+# CHECK: Code 0x000000000000007c 0x0000000000000009 {{.*}} main
+# CHECK: Data 0x000000000000022f 0x0000000000000041 {{.*}} .rodata
+# CHECK: Data 0x0000000000000270 0x0000000000000000 {{.*}} .data
``````````
</details>
https://github.com/llvm/llvm-project/pull/153494
More information about the lldb-commits
mailing list