[Lldb-commits] [lldb] [lldb] Support parsing data symbols from the Wasm name section (PR #153494)
Jonas Devlieghere via lldb-commits
lldb-commits at lists.llvm.org
Wed Aug 13 13:55:01 PDT 2025
https://github.com/JDevlieghere created https://github.com/llvm/llvm-project/pull/153494
This PR adds support for parsing data symbols from the WebAssembly name section.
>From e68b6d31ab7ba418b23c7289e12d0befeb07f6d6 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas at devlieghere.com>
Date: Wed, 13 Aug 2025 13:44:35 -0700
Subject: [PATCH] [lldb] Support parsing data symbols from the Wasm name
section
This PR adds support for parsing data symbols from the WebAssembly name
section.
---
.../ObjectFile/wasm/ObjectFileWasm.cpp | 114 ++++++++++++++----
1 file changed, 92 insertions(+), 22 deletions(-)
diff --git a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
index 919cc21c32ffd..b3144f28f4913 100644
--- a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
+++ b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
@@ -251,11 +251,11 @@ bool ObjectFileWasm::ParseHeader() {
static llvm::Expected<std::vector<AddressRange>>
ParseFunctions(SectionSP code_section_sp) {
- DataExtractor code_section_data;
- code_section_sp->GetSectionData(code_section_data);
+ DataExtractor data;
+ code_section_sp->GetSectionData(data);
lldb::offset_t offset = 0;
- const uint64_t function_count = code_section_data.GetULEB128(&offset);
+ const uint64_t function_count = data.GetULEB128(&offset);
if (function_count > std::numeric_limits<uint32_t>::max())
return llvm::createStringError("function count overflows uint32_t");
@@ -263,7 +263,7 @@ ParseFunctions(SectionSP code_section_sp) {
functions.reserve(function_count);
for (uint32_t i = 0; i < function_count; ++i) {
- const uint64_t function_size = code_section_data.GetULEB128(&offset);
+ const uint64_t function_size = data.GetULEB128(&offset);
if (function_size > std::numeric_limits<uint32_t>::max())
return llvm::createStringError("function size overflows uint32_t");
// llvm-objdump considers the ULEB with the function size to be part of the
@@ -281,9 +281,45 @@ ParseFunctions(SectionSP code_section_sp) {
return functions;
}
+static llvm::Expected<std::vector<AddressRange>>
+ParseData(SectionSP data_section_sp) {
+ DataExtractor data;
+ data_section_sp->GetSectionData(data);
+
+ lldb::offset_t offset = 0;
+
+ const uint64_t segment_count = data.GetULEB128(&offset);
+ if (segment_count > std::numeric_limits<uint32_t>::max())
+ return llvm::createStringError("segment count overflows uint32_t");
+
+ std::vector<AddressRange> segments;
+ segments.reserve(segment_count);
+
+ for (uint32_t i = 0; i < segment_count; ++i) {
+ const uint64_t flags = data.GetULEB128(&offset);
+ if (flags > std::numeric_limits<uint32_t>::max())
+ return llvm::createStringError("segment flags overflows uint32_t");
+
+ const uint64_t segment_size = data.GetULEB128(&offset);
+ if (flags > std::numeric_limits<uint32_t>::max())
+ return llvm::createStringError("segment size overflows uint32_t");
+
+ segments.emplace_back(data_section_sp, offset, segment_size);
+
+ std::optional<lldb::offset_t> next_offset =
+ llvm::checkedAddUnsigned(offset, segment_size);
+ if (!next_offset)
+ return llvm::createStringError("segment offset overflows uint64_t");
+ offset = *next_offset;
+ }
+
+ return segments;
+}
+
static llvm::Expected<std::vector<Symbol>>
ParseNames(SectionSP name_section_sp,
- const std::vector<AddressRange> &functions) {
+ const std::vector<AddressRange> &function_ranges,
+ const std::vector<AddressRange> &segment_ranges) {
DataExtractor name_section_data;
name_section_sp->GetSectionData(name_section_data);
@@ -305,17 +341,34 @@ ParseNames(SectionSP name_section_sp,
for (uint64_t i = 0; c && i < count; ++i) {
const uint64_t idx = data.getULEB128(c);
const std::optional<std::string> name = GetWasmString(data, c);
- if (!name || idx >= functions.size())
+ if (!name || idx >= function_ranges.size())
continue;
symbols.emplace_back(
symbols.size(), Mangled(*name), lldb::eSymbolTypeCode,
/*external=*/false, /*is_debug=*/false, /*is_trampoline=*/false,
- /*is_artificial=*/false, functions[idx],
+ /*is_artificial=*/false, function_ranges[idx],
/*size_is_valid=*/true, /*contains_linker_annotations=*/false,
/*flags=*/0);
}
} break;
- case llvm::wasm::WASM_NAMES_DATA_SEGMENT:
+ case llvm::wasm::WASM_NAMES_DATA_SEGMENT: {
+ const uint64_t count = data.getULEB128(c);
+ if (count > std::numeric_limits<uint32_t>::max())
+ return llvm::createStringError("data count overflows uint32_t");
+ for (uint64_t i = 0; c && i < count; ++i) {
+ const uint64_t idx = data.getULEB128(c);
+ const std::optional<std::string> name = GetWasmString(data, c);
+ if (!name || idx >= segment_ranges.size())
+ continue;
+ symbols.emplace_back(
+ symbols.size(), Mangled(*name), lldb::eSymbolTypeData,
+ /*external=*/false, /*is_debug=*/false, /*is_trampoline=*/false,
+ /*is_artificial=*/false, segment_ranges[idx],
+ /*size_is_valid=*/true, /*contains_linker_annotations=*/false,
+ /*flags=*/0);
+ }
+
+ } break;
case llvm::wasm::WASM_NAMES_GLOBAL:
case llvm::wasm::WASM_NAMES_LOCAL:
default:
@@ -336,21 +389,35 @@ void ObjectFileWasm::ParseSymtab(Symtab &symtab) {
assert(m_sections_up && "sections must be parsed");
Log *log = GetLog(LLDBLog::Object);
- // The name section contains names and indexes. First parse the functions from
- // the code section so we can access them by their index.
- SectionSP code_section_sp =
- m_sections_up->FindSectionByType(lldb::eSectionTypeCode, false);
- if (!code_section_sp) {
- LLDB_LOG(log, "Failed to parse Wasm symbol table: no functions section");
- return;
+ // The name section contains names and indexes. First parse the data from the
+ // relevant sections so we can access it by its index.
+ std::vector<AddressRange> function_ranges;
+ std::vector<AddressRange> segment_ranges;
+
+ // Parse the code section.
+ if (SectionSP code_section_sp =
+ m_sections_up->FindSectionByType(lldb::eSectionTypeCode, false)) {
+ llvm::Expected<std::vector<AddressRange>> functions =
+ ParseFunctions(code_section_sp);
+ if (!functions) {
+ LLDB_LOG_ERROR(log, functions.takeError(),
+ "Failed to parse Wasm code section: {0}");
+ return;
+ }
+ function_ranges = *functions;
}
- llvm::Expected<std::vector<AddressRange>> functions =
- ParseFunctions(code_section_sp);
- if (!functions) {
- LLDB_LOG_ERROR(log, functions.takeError(),
- "Failed to parse Wasm functions: {0}");
- return;
+ // Parse the data section.
+ if (SectionSP data_section_sp =
+ m_sections_up->FindSectionByType(lldb::eSectionTypeData, false)) {
+ llvm::Expected<std::vector<AddressRange>> segments =
+ ParseData(data_section_sp);
+ if (!segments) {
+ LLDB_LOG_ERROR(log, segments.takeError(),
+ "Failed to parse Wasm data section: {0}");
+ return;
+ }
+ segment_ranges = *segments;
}
// Parse the name section.
@@ -362,7 +429,7 @@ void ObjectFileWasm::ParseSymtab(Symtab &symtab) {
}
llvm::Expected<std::vector<Symbol>> symbols =
- ParseNames(name_section_sp, *functions);
+ ParseNames(name_section_sp, function_ranges, segment_ranges);
if (!symbols) {
LLDB_LOG_ERROR(log, symbols.takeError(), "Failed to parse Wasm names: {0}");
return;
@@ -408,6 +475,9 @@ void ObjectFileWasm::CreateSections(SectionList &unified_section_list) {
// For this reason Section::GetFileAddress() must return zero for the
// Code section.
vm_addr = 0;
+ } else if (llvm::wasm::WASM_SEC_DATA == sect_info.id) {
+ section_type = eSectionTypeData;
+ section_name = ConstString("data");
} else {
section_type = GetSectionTypeFromName(sect_info.name.GetStringRef());
if (section_type == eSectionTypeOther)
More information about the lldb-commits
mailing list