[Lldb-commits] [lldb] [lldb] Create sections for Wasm segments (PR #153634)
Jonas Devlieghere via lldb-commits
lldb-commits at lists.llvm.org
Mon Aug 18 08:08:45 PDT 2025
https://github.com/JDevlieghere updated https://github.com/llvm/llvm-project/pull/153634
>From bfc57b337054bd1184b96baa1d59dd75a23a70c1 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas at devlieghere.com>
Date: Thu, 14 Aug 2025 10:49:48 -0700
Subject: [PATCH 1/3] [lldb] Create sections fro Wasm segments
This is a continuation of #153494. In a WebAssembly file, the "name"
section contains names for the segments in the data section
(WASM_NAMES_DATA_SEGMENT). We already parse these as sections, as with
this PR, we also create sub-sections for the data segments.
---
.../ObjectFile/wasm/ObjectFileWasm.cpp | 70 ++++++++++++++-----
lldb/test/Shell/Symtab/symtab-wasm.test | 20 ++++--
2 files changed, 64 insertions(+), 26 deletions(-)
diff --git a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
index b3144f28f4913..dc0b0241d1f24 100644
--- a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
+++ b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
@@ -281,7 +281,16 @@ ParseFunctions(SectionSP code_section_sp) {
return functions;
}
-static llvm::Expected<std::vector<AddressRange>>
+struct WasmSegment {
+ WasmSegment(SectionSP section_sp, lldb::offset_t offset, uint32_t size,
+ uint32_t flags)
+ : address_range(section_sp, offset, size), flags(flags) {};
+ std::string name;
+ AddressRange address_range;
+ uint32_t flags = 0;
+};
+
+static llvm::Expected<std::vector<WasmSegment>>
ParseData(SectionSP data_section_sp) {
DataExtractor data;
data_section_sp->GetSectionData(data);
@@ -292,7 +301,7 @@ ParseData(SectionSP data_section_sp) {
if (segment_count > std::numeric_limits<uint32_t>::max())
return llvm::createStringError("segment count overflows uint32_t");
- std::vector<AddressRange> segments;
+ std::vector<WasmSegment> segments;
segments.reserve(segment_count);
for (uint32_t i = 0; i < segment_count; ++i) {
@@ -304,7 +313,7 @@ ParseData(SectionSP data_section_sp) {
if (flags > std::numeric_limits<uint32_t>::max())
return llvm::createStringError("segment size overflows uint32_t");
- segments.emplace_back(data_section_sp, offset, segment_size);
+ segments.emplace_back(data_section_sp, offset, segment_size, flags);
std::optional<lldb::offset_t> next_offset =
llvm::checkedAddUnsigned(offset, segment_size);
@@ -319,7 +328,7 @@ ParseData(SectionSP data_section_sp) {
static llvm::Expected<std::vector<Symbol>>
ParseNames(SectionSP name_section_sp,
const std::vector<AddressRange> &function_ranges,
- const std::vector<AddressRange> &segment_ranges) {
+ std::vector<WasmSegment> &segments) {
DataExtractor name_section_data;
name_section_sp->GetSectionData(name_section_data);
@@ -358,12 +367,14 @@ ParseNames(SectionSP name_section_sp,
for (uint64_t i = 0; c && i < count; ++i) {
const uint64_t idx = data.getULEB128(c);
const std::optional<std::string> name = GetWasmString(data, c);
- if (!name || idx >= segment_ranges.size())
+ if (!name || idx >= segments.size())
continue;
+ // Update the segment name.
+ segments[i].name = *name;
symbols.emplace_back(
symbols.size(), Mangled(*name), lldb::eSymbolTypeData,
/*external=*/false, /*is_debug=*/false, /*is_trampoline=*/false,
- /*is_artificial=*/false, segment_ranges[idx],
+ /*is_artificial=*/false, segments[i].address_range,
/*size_is_valid=*/true, /*contains_linker_annotations=*/false,
/*flags=*/0);
}
@@ -391,33 +402,34 @@ void ObjectFileWasm::ParseSymtab(Symtab &symtab) {
// The name section contains names and indexes. First parse the data from the
// relevant sections so we can access it by its index.
- std::vector<AddressRange> function_ranges;
- std::vector<AddressRange> segment_ranges;
+ std::vector<AddressRange> functions;
+ std::vector<WasmSegment> segments;
// Parse the code section.
if (SectionSP code_section_sp =
m_sections_up->FindSectionByType(lldb::eSectionTypeCode, false)) {
- llvm::Expected<std::vector<AddressRange>> functions =
+ llvm::Expected<std::vector<AddressRange>> maybe_functions =
ParseFunctions(code_section_sp);
- if (!functions) {
- LLDB_LOG_ERROR(log, functions.takeError(),
+ if (!maybe_functions) {
+ LLDB_LOG_ERROR(log, maybe_functions.takeError(),
"Failed to parse Wasm code section: {0}");
return;
}
- function_ranges = *functions;
+ functions = *maybe_functions;
}
// Parse the data section.
- if (SectionSP data_section_sp =
- m_sections_up->FindSectionByType(lldb::eSectionTypeData, false)) {
- llvm::Expected<std::vector<AddressRange>> segments =
+ SectionSP data_section_sp =
+ m_sections_up->FindSectionByType(lldb::eSectionTypeData, false);
+ if (data_section_sp) {
+ llvm::Expected<std::vector<WasmSegment>> maybe_segments =
ParseData(data_section_sp);
- if (!segments) {
- LLDB_LOG_ERROR(log, segments.takeError(),
+ if (!maybe_segments) {
+ LLDB_LOG_ERROR(log, maybe_segments.takeError(),
"Failed to parse Wasm data section: {0}");
return;
}
- segment_ranges = *segments;
+ segments = *maybe_segments;
}
// Parse the name section.
@@ -429,7 +441,7 @@ void ObjectFileWasm::ParseSymtab(Symtab &symtab) {
}
llvm::Expected<std::vector<Symbol>> symbols =
- ParseNames(name_section_sp, function_ranges, segment_ranges);
+ ParseNames(name_section_sp, functions, segments);
if (!symbols) {
LLDB_LOG_ERROR(log, symbols.takeError(), "Failed to parse Wasm names: {0}");
return;
@@ -438,6 +450,26 @@ void ObjectFileWasm::ParseSymtab(Symtab &symtab) {
for (const Symbol &symbol : *symbols)
symtab.AddSymbol(symbol);
+ lldb::user_id_t segment_id = 0;
+ for (const WasmSegment &segment : segments) {
+ const lldb::addr_t segment_addr =
+ segment.address_range.GetBaseAddress().GetFileAddress();
+ const size_t segment_size = segment.address_range.GetByteSize();
+ SectionSP segment_sp = std::make_shared<Section>(
+ /*parent_section_sp=*/data_section_sp, GetModule(),
+ /*obj_file=*/this,
+ ++segment_id << 8, // 1-based segment index, shifted by 8 bits to avoid
+ // collision with section IDs.
+ ConstString(segment.name), eSectionTypeData,
+ /*file_vm_addr=*/segment_addr,
+ /*vm_size=*/segment_size,
+ /*file_offset=*/segment_addr,
+ /*file_size=*/segment_size,
+ /*log2align=*/0, segment.flags);
+ m_sections_up->AddSection(segment_sp);
+ GetModule()->GetSectionList()->AddSection(segment_sp);
+ }
+
symtab.Finalize();
}
diff --git a/lldb/test/Shell/Symtab/symtab-wasm.test b/lldb/test/Shell/Symtab/symtab-wasm.test
index 5374b0c2f2892..5e7c7cabc5280 100644
--- a/lldb/test/Shell/Symtab/symtab-wasm.test
+++ b/lldb/test/Shell/Symtab/symtab-wasm.test
@@ -1,9 +1,15 @@
# RUN: yaml2obj %S/Inputs/simple.wasm.yaml -o %t.wasm
-# RUN: %lldb %t.wasm -o 'image dump symtab'
+# RUN: %lldb %t.wasm -o 'image dump symtab' -o 'image dump sections' | FileCheck %s
-# CHECK: Code 0x0000000000000002 0x0000000000000002 {{.*}} __wasm_call_ctors
-# CHECK: Code 0x0000000000000005 0x0000000000000029 {{.*}} add
-# CHECK: Code 0x000000000000002f 0x000000000000004c {{.*}} __original_main
-# CHECK: Code 0x000000000000007c 0x0000000000000009 {{.*}} main
-# CHECK: Data 0x000000000000022f 0x0000000000000041 {{.*}} .rodata
-# CHECK: Data 0x0000000000000270 0x0000000000000000 {{.*}} .data
+CHECK: Code 0x0000000000000002 0x0000000000000002 0x00000000 __wasm_call_ctors
+CHECK: Code 0x0000000000000005 0x0000000000000029 0x00000000 add
+CHECK: Code 0x000000000000002f 0x000000000000004c 0x00000000 __original_main
+CHECK: Code 0x000000000000007c 0x0000000000000009 0x00000000 main
+CHECK: Data 0x000000000000022f 0x0000000000000041 0x00000000 .rodata
+CHECK: Data 0x0000000000000270 0x0000000000000000 0x00000000 .data
+
+CHECK: 0x0000000000000001 code {{.*}} 0x000001a1 0x00000085 0x00000000 symtab-wasm.test.tmp.wasm.code
+CHECK: 0x0000000000000003 data {{.*}} 0x0000022c 0x0000001a 0x00000000 symtab-wasm.test.tmp.wasm.data
+CHECK: 0x0000000000000040 wasm-name {{.*}} 0x00000251 0x00000059 0x00000000 symtab-wasm.test.tmp.wasm.name
+CHECK: 0x0000000000000100 data {{.*}} 0x0000022f 0x00000041 0x00000000 symtab-wasm.test.tmp.wasm.data..rodata
+CHECK: 0x0000000000000200 data {{.*}} 0x00000270 0x00000000 0x00000000 symtab-wasm.test.tmp.wasm.data..data
>From 83cd4e1512c652567fa4c3deb342774dbce6d2c9 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas at devlieghere.com>
Date: Sun, 17 Aug 2025 10:48:12 -0700
Subject: [PATCH 2/3] Check segment_size, not flags
---
lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
index dc0b0241d1f24..f1f34a7fe7fbf 100644
--- a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
+++ b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
@@ -310,7 +310,7 @@ ParseData(SectionSP data_section_sp) {
return llvm::createStringError("segment flags overflows uint32_t");
const uint64_t segment_size = data.GetULEB128(&offset);
- if (flags > std::numeric_limits<uint32_t>::max())
+ if (segment_size > std::numeric_limits<uint32_t>::max())
return llvm::createStringError("segment size overflows uint32_t");
segments.emplace_back(data_section_sp, offset, segment_size, flags);
>From 83ae0f619a30f4c309a61389e0f5ca1b4cbda48a Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas at devlieghere.com>
Date: Mon, 18 Aug 2025 08:08:30 -0700
Subject: [PATCH 3/3] Support 'active' data segments
---
.../ObjectFile/wasm/ObjectFileWasm.cpp | 26 ++++++++++++++-----
lldb/test/Shell/Symtab/symtab-wasm.test | 8 +++---
2 files changed, 24 insertions(+), 10 deletions(-)
diff --git a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
index f1f34a7fe7fbf..a000b34fbb549 100644
--- a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
+++ b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
@@ -282,12 +282,10 @@ ParseFunctions(SectionSP code_section_sp) {
}
struct WasmSegment {
- WasmSegment(SectionSP section_sp, lldb::offset_t offset, uint32_t size,
- uint32_t flags)
- : address_range(section_sp, offset, size), flags(flags) {};
+ WasmSegment(SectionSP section_sp, lldb::offset_t offset, uint32_t size)
+ : address_range(section_sp, offset, size) {};
std::string name;
AddressRange address_range;
- uint32_t flags = 0;
};
static llvm::Expected<std::vector<WasmSegment>>
@@ -309,11 +307,27 @@ ParseData(SectionSP data_section_sp) {
if (flags > std::numeric_limits<uint32_t>::max())
return llvm::createStringError("segment flags overflows uint32_t");
+ // Data segments have a mode that identifies them as either passive or
+ // active. An active data segment copies its contents into a memory during
+ // instantiation, as specified by a memory index and a constant expression
+ // defining an offset into that memory.
+ if (flags & llvm::wasm::WASM_DATA_SEGMENT_HAS_MEMINDEX) {
+ const uint64_t memidx = data.GetULEB128(&offset);
+ if (memidx > std::numeric_limits<uint32_t>::max())
+ return llvm::createStringError("memidx overflows uint32_t");
+ }
+
+ if ((flags & llvm::wasm::WASM_DATA_SEGMENT_IS_PASSIVE) == 0) {
+ // Skip over the constant expression.
+ for (uint8_t b = 0; b != llvm::wasm::WASM_OPCODE_END;)
+ b = data.GetU8(&offset);
+ }
+
const uint64_t segment_size = data.GetULEB128(&offset);
if (segment_size > std::numeric_limits<uint32_t>::max())
return llvm::createStringError("segment size overflows uint32_t");
- segments.emplace_back(data_section_sp, offset, segment_size, flags);
+ segments.emplace_back(data_section_sp, offset, segment_size);
std::optional<lldb::offset_t> next_offset =
llvm::checkedAddUnsigned(offset, segment_size);
@@ -465,7 +479,7 @@ void ObjectFileWasm::ParseSymtab(Symtab &symtab) {
/*vm_size=*/segment_size,
/*file_offset=*/segment_addr,
/*file_size=*/segment_size,
- /*log2align=*/0, segment.flags);
+ /*log2align=*/0, /*flags=*/0);
m_sections_up->AddSection(segment_sp);
GetModule()->GetSectionList()->AddSection(segment_sp);
}
diff --git a/lldb/test/Shell/Symtab/symtab-wasm.test b/lldb/test/Shell/Symtab/symtab-wasm.test
index 5e7c7cabc5280..4170d9aba9eea 100644
--- a/lldb/test/Shell/Symtab/symtab-wasm.test
+++ b/lldb/test/Shell/Symtab/symtab-wasm.test
@@ -5,11 +5,11 @@ CHECK: Code 0x0000000000000002 0x0000000000000002 0x00000000 __wasm_call_ctors
CHECK: Code 0x0000000000000005 0x0000000000000029 0x00000000 add
CHECK: Code 0x000000000000002f 0x000000000000004c 0x00000000 __original_main
CHECK: Code 0x000000000000007c 0x0000000000000009 0x00000000 main
-CHECK: Data 0x000000000000022f 0x0000000000000041 0x00000000 .rodata
-CHECK: Data 0x0000000000000270 0x0000000000000000 0x00000000 .data
+CHECK: Data 0x0000000000000233 0x0000000000000009 0x00000000 .rodata
+CHECK: Data 0x0000000000000242 0x0000000000000004 0x00000000 .data
CHECK: 0x0000000000000001 code {{.*}} 0x000001a1 0x00000085 0x00000000 symtab-wasm.test.tmp.wasm.code
CHECK: 0x0000000000000003 data {{.*}} 0x0000022c 0x0000001a 0x00000000 symtab-wasm.test.tmp.wasm.data
CHECK: 0x0000000000000040 wasm-name {{.*}} 0x00000251 0x00000059 0x00000000 symtab-wasm.test.tmp.wasm.name
-CHECK: 0x0000000000000100 data {{.*}} 0x0000022f 0x00000041 0x00000000 symtab-wasm.test.tmp.wasm.data..rodata
-CHECK: 0x0000000000000200 data {{.*}} 0x00000270 0x00000000 0x00000000 symtab-wasm.test.tmp.wasm.data..data
+CHECK: 0x0000000000000100 data {{.*}} 0x00000233 0x00000009 0x00000000 symtab-wasm.test.tmp.wasm.data..rodata
+CHECK: 0x0000000000000200 data {{.*}} 0x00000242 0x00000004 0x00000000 symtab-wasm.test.tmp.wasm.data..data
More information about the lldb-commits
mailing list