[Lldb-commits] [lldb] 7cd6179 - [lldb] Improve error handling in ObjectFileWasm (#154433)
via lldb-commits
lldb-commits at lists.llvm.org
Tue Aug 19 16:00:34 PDT 2025
Author: Jonas Devlieghere
Date: 2025-08-19T16:00:31-07:00
New Revision: 7cd61793edbb4e6dbc0fb92e5d6d85cabfa62b40
URL: https://github.com/llvm/llvm-project/commit/7cd61793edbb4e6dbc0fb92e5d6d85cabfa62b40
DIFF: https://github.com/llvm/llvm-project/commit/7cd61793edbb4e6dbc0fb92e5d6d85cabfa62b40.diff
LOG: [lldb] Improve error handling in ObjectFileWasm (#154433)
Improve error handling in ObjectFileWasm by using helpers that wrap
their result in an llvm::Expected. The helper to read a Wasm string now
return an Expected<std::string> and I created a helper to parse 32-bit
ULEBs that returns an Expected<uint32_t>.
Added:
Modified:
lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
Removed:
################################################################################
diff --git a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
index a000b34fbb549..777b20e9bb0f6 100644
--- a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
+++ b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp
@@ -36,6 +36,41 @@ LLDB_PLUGIN_DEFINE(ObjectFileWasm)
static const uint32_t kWasmHeaderSize =
sizeof(llvm::wasm::WasmMagic) + sizeof(llvm::wasm::WasmVersion);
+/// Helper to read a 32-bit ULEB using LLDB's DataExtractor.
+static inline llvm::Expected<uint32_t> GetULEB32(DataExtractor &data,
+ lldb::offset_t &offset) {
+ const uint64_t value = data.GetULEB128(&offset);
+ if (value > std::numeric_limits<uint32_t>::max())
+ return llvm::createStringError("ULEB exceeds 32 bits");
+ return value;
+}
+
+/// Helper to read a 32-bit ULEB using LLVM's DataExtractor.
+static inline llvm::Expected<uint32_t>
+GetULEB32(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) {
+ const uint64_t value = data.getULEB128(c);
+ if (!c)
+ return c.takeError();
+ if (value > std::numeric_limits<uint32_t>::max())
+ return llvm::createStringError("ULEB exceeds 32 bits");
+ return value;
+}
+
+/// Helper to read a Wasm string, whcih is encoded as a vector of UTF-8 codes.
+static inline llvm::Expected<std::string>
+GetWasmString(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) {
+ llvm::Expected<uint32_t> len = GetULEB32(data, c);
+ if (!len)
+ return len.takeError();
+
+ llvm::SmallVector<uint8_t, 32> str_storage;
+ data.getU8(c, str_storage, *len);
+ if (!c)
+ return c.takeError();
+
+ return std::string(toStringRef(llvm::ArrayRef(str_storage)));
+}
+
/// Checks whether the data buffer starts with a valid Wasm module header.
static bool ValidateModuleHeader(const DataBufferSP &data_sp) {
if (!data_sp || data_sp->GetByteSize() < kWasmHeaderSize)
@@ -51,32 +86,6 @@ static bool ValidateModuleHeader(const DataBufferSP &data_sp) {
return version == llvm::wasm::WasmVersion;
}
-// FIXME: Use lldb::DataExtractor instead of llvm::DataExtractor.
-static std::optional<std::string>
-GetWasmString(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) {
- // A Wasm string is encoded as a vector of UTF-8 codes.
- // Vectors are encoded with their u32 length followed by the element
- // sequence.
- uint64_t len = data.getULEB128(c);
- if (!c) {
- consumeError(c.takeError());
- return std::nullopt;
- }
-
- if (len > std::numeric_limits<uint32_t>::max()) {
- return std::nullopt;
- }
-
- llvm::SmallVector<uint8_t, 32> str_storage;
- data.getU8(c, str_storage, len);
- if (!c) {
- consumeError(c.takeError());
- return std::nullopt;
- }
-
- return std::string(toStringRef(llvm::ArrayRef(str_storage)));
-}
-
char ObjectFileWasm::ID;
void ObjectFileWasm::Initialize() {
@@ -183,9 +192,12 @@ bool ObjectFileWasm::DecodeNextSection(lldb::offset_t *offset_ptr) {
// identifying the custom section, followed by an uninterpreted sequence
// of bytes.
lldb::offset_t prev_offset = c.tell();
- std::optional<std::string> sect_name = GetWasmString(data, c);
- if (!sect_name)
+ llvm::Expected<std::string> sect_name = GetWasmString(data, c);
+ if (!sect_name) {
+ LLDB_LOG_ERROR(GetLog(LLDBLog::Object), sect_name.takeError(),
+ "failed to parse section name: {0}");
return false;
+ }
if (payload_len < c.tell() - prev_offset)
return false;
@@ -255,26 +267,26 @@ ParseFunctions(SectionSP code_section_sp) {
code_section_sp->GetSectionData(data);
lldb::offset_t offset = 0;
- const uint64_t function_count = data.GetULEB128(&offset);
- if (function_count > std::numeric_limits<uint32_t>::max())
- return llvm::createStringError("function count overflows uint32_t");
+ llvm::Expected<uint32_t> function_count = GetULEB32(data, offset);
+ if (!function_count)
+ return function_count.takeError();
std::vector<AddressRange> functions;
- functions.reserve(function_count);
+ functions.reserve(*function_count);
- for (uint32_t i = 0; i < function_count; ++i) {
- const uint64_t function_size = data.GetULEB128(&offset);
- if (function_size > std::numeric_limits<uint32_t>::max())
- return llvm::createStringError("function size overflows uint32_t");
+ for (uint32_t i = 0; i < *function_count; ++i) {
+ llvm::Expected<uint32_t> function_size = GetULEB32(data, offset);
+ if (!function_size)
+ return function_size.takeError();
// llvm-objdump considers the ULEB with the function size to be part of the
// function. We can't do that here because that would break symbolic
// breakpoints, as that address is never executed.
- functions.emplace_back(code_section_sp, offset, function_size);
+ functions.emplace_back(code_section_sp, offset, *function_size);
std::optional<lldb::offset_t> next_offset =
- llvm::checkedAddUnsigned(offset, function_size);
+ llvm::checkedAddUnsigned<lldb::offset_t>(offset, *function_size);
if (!next_offset)
- return llvm::createStringError("function offset overflows uint64_t");
+ return llvm::createStringError("function offset overflows 64 bits");
offset = *next_offset;
}
@@ -295,44 +307,44 @@ ParseData(SectionSP data_section_sp) {
lldb::offset_t offset = 0;
- const uint64_t segment_count = data.GetULEB128(&offset);
- if (segment_count > std::numeric_limits<uint32_t>::max())
- return llvm::createStringError("segment count overflows uint32_t");
+ llvm::Expected<uint32_t> segment_count = GetULEB32(data, offset);
+ if (!segment_count)
+ return segment_count.takeError();
std::vector<WasmSegment> segments;
- segments.reserve(segment_count);
+ segments.reserve(*segment_count);
- for (uint32_t i = 0; i < segment_count; ++i) {
- const uint64_t flags = data.GetULEB128(&offset);
- if (flags > std::numeric_limits<uint32_t>::max())
- return llvm::createStringError("segment flags overflows uint32_t");
+ for (uint32_t i = 0; i < *segment_count; ++i) {
+ llvm::Expected<uint32_t> flags = GetULEB32(data, offset);
+ if (!flags)
+ return flags.takeError();
// Data segments have a mode that identifies them as either passive or
// active. An active data segment copies its contents into a memory during
// instantiation, as specified by a memory index and a constant expression
// defining an offset into that memory.
- if (flags & llvm::wasm::WASM_DATA_SEGMENT_HAS_MEMINDEX) {
- const uint64_t memidx = data.GetULEB128(&offset);
- if (memidx > std::numeric_limits<uint32_t>::max())
- return llvm::createStringError("memidx overflows uint32_t");
+ if (*flags & llvm::wasm::WASM_DATA_SEGMENT_HAS_MEMINDEX) {
+ llvm::Expected<uint32_t> memidx = GetULEB32(data, offset);
+ if (!memidx)
+ return memidx.takeError();
}
- if ((flags & llvm::wasm::WASM_DATA_SEGMENT_IS_PASSIVE) == 0) {
+ if ((*flags & llvm::wasm::WASM_DATA_SEGMENT_IS_PASSIVE) == 0) {
// Skip over the constant expression.
for (uint8_t b = 0; b != llvm::wasm::WASM_OPCODE_END;)
b = data.GetU8(&offset);
}
- const uint64_t segment_size = data.GetULEB128(&offset);
- if (segment_size > std::numeric_limits<uint32_t>::max())
- return llvm::createStringError("segment size overflows uint32_t");
+ llvm::Expected<uint32_t> segment_size = GetULEB32(data, offset);
+ if (!segment_size)
+ return segment_size.takeError();
- segments.emplace_back(data_section_sp, offset, segment_size);
+ segments.emplace_back(data_section_sp, offset, *segment_size);
std::optional<lldb::offset_t> next_offset =
- llvm::checkedAddUnsigned(offset, segment_size);
+ llvm::checkedAddUnsigned<lldb::offset_t>(offset, *segment_size);
if (!next_offset)
- return llvm::createStringError("segment offset overflows uint64_t");
+ return llvm::createStringError("segment offset overflows 64 bits");
offset = *next_offset;
}
@@ -351,9 +363,9 @@ ParseNames(SectionSP name_section_sp,
std::vector<Symbol> symbols;
while (c && c.tell() < data.size()) {
const uint8_t type = data.getU8(c);
- const uint64_t size = data.getULEB128(c);
- if (size > std::numeric_limits<uint32_t>::max())
- return llvm::createStringError("size overflows uint32_t");
+ llvm::Expected<uint32_t> size = GetULEB32(data, c);
+ if (!size)
+ return size.takeError();
switch (type) {
case llvm::wasm::WASM_NAMES_FUNCTION: {
@@ -362,26 +374,34 @@ ParseNames(SectionSP name_section_sp,
return llvm::createStringError("function count overflows uint32_t");
for (uint64_t i = 0; c && i < count; ++i) {
- const uint64_t idx = data.getULEB128(c);
- const std::optional<std::string> name = GetWasmString(data, c);
- if (!name || idx >= function_ranges.size())
+ llvm::Expected<uint32_t> idx = GetULEB32(data, c);
+ if (!idx)
+ return idx.takeError();
+ llvm::Expected<std::string> name = GetWasmString(data, c);
+ if (!name)
+ return name.takeError();
+ if (*idx >= function_ranges.size())
continue;
symbols.emplace_back(
symbols.size(), Mangled(*name), lldb::eSymbolTypeCode,
/*external=*/false, /*is_debug=*/false, /*is_trampoline=*/false,
- /*is_artificial=*/false, function_ranges[idx],
+ /*is_artificial=*/false, function_ranges[*idx],
/*size_is_valid=*/true, /*contains_linker_annotations=*/false,
/*flags=*/0);
}
} break;
case llvm::wasm::WASM_NAMES_DATA_SEGMENT: {
- const uint64_t count = data.getULEB128(c);
- if (count > std::numeric_limits<uint32_t>::max())
- return llvm::createStringError("data count overflows uint32_t");
- for (uint64_t i = 0; c && i < count; ++i) {
- const uint64_t idx = data.getULEB128(c);
- const std::optional<std::string> name = GetWasmString(data, c);
- if (!name || idx >= segments.size())
+ llvm::Expected<uint32_t> count = GetULEB32(data, c);
+ if (!count)
+ return count.takeError();
+ for (uint32_t i = 0; c && i < *count; ++i) {
+ llvm::Expected<uint32_t> idx = GetULEB32(data, c);
+ if (!idx)
+ return idx.takeError();
+ llvm::Expected<std::string> name = GetWasmString(data, c);
+ if (!name)
+ return name.takeError();
+ if (*idx >= segments.size())
continue;
// Update the segment name.
segments[i].name = *name;
@@ -397,9 +417,10 @@ ParseNames(SectionSP name_section_sp,
case llvm::wasm::WASM_NAMES_GLOBAL:
case llvm::wasm::WASM_NAMES_LOCAL:
default:
- std::optional<uint64_t> offset = llvm::checkedAddUnsigned(c.tell(), size);
+ std::optional<lldb::offset_t> offset =
+ llvm::checkedAddUnsigned<lldb::offset_t>(c.tell(), *size);
if (!offset)
- return llvm::createStringError("offset overflows uint64_t");
+ return llvm::createStringError("offset overflows 64 bits");
c.seek(*offset);
}
}
@@ -636,11 +657,15 @@ std::optional<FileSpec> ObjectFileWasm::GetExternalDebugInfoFileSpec() {
const uint32_t kBufferSize = 1024;
DataExtractor section_header_data =
ReadImageData(sect_info.offset, kBufferSize);
+
llvm::DataExtractor data = section_header_data.GetAsLLVM();
llvm::DataExtractor::Cursor c(0);
- std::optional<std::string> symbols_url = GetWasmString(data, c);
- if (symbols_url)
- return FileSpec(*symbols_url);
+ llvm::Expected<std::string> symbols_url = GetWasmString(data, c);
+ if (!symbols_url) {
+ llvm::consumeError(symbols_url.takeError());
+ return std::nullopt;
+ }
+ return FileSpec(*symbols_url);
}
}
return std::nullopt;
More information about the lldb-commits
mailing list