[Mlir-commits] [mlir] a4baf2c - Revert "[mlir][bytecode] Add support for deferred attribute/type parsing. (#170993)"
Mehdi Amini
llvmlistbot at llvm.org
Wed Dec 17 11:29:32 PST 2025
Author: Mehdi Amini
Date: 2025-12-17T11:29:13-08:00
New Revision: a4baf2c7ffc7fffd399c4093faa707fb63334c12
URL: https://github.com/llvm/llvm-project/commit/a4baf2c7ffc7fffd399c4093faa707fb63334c12
DIFF: https://github.com/llvm/llvm-project/commit/a4baf2c7ffc7fffd399c4093faa707fb63334c12.diff
LOG: Revert "[mlir][bytecode] Add support for deferred attribute/type parsing. (#170993)"
This reverts commit 93d2ef105703254769a8f182300b329dad5ed976.
A regression was found.
See: https://github.com/llvm/llvm-project/pull/170993#issuecomment-3666792469
Added:
Modified:
mlir/lib/Bytecode/Reader/BytecodeReader.cpp
mlir/unittests/Bytecode/BytecodeTest.cpp
Removed:
################################################################################
diff --git a/mlir/lib/Bytecode/Reader/BytecodeReader.cpp b/mlir/lib/Bytecode/Reader/BytecodeReader.cpp
index 0ac5fc5358ea5..1659437e1eb24 100644
--- a/mlir/lib/Bytecode/Reader/BytecodeReader.cpp
+++ b/mlir/lib/Bytecode/Reader/BytecodeReader.cpp
@@ -27,7 +27,6 @@
#include <cstddef>
#include <cstdint>
-#include <deque>
#include <list>
#include <memory>
#include <numeric>
@@ -831,23 +830,6 @@ namespace {
/// This class provides support for reading attribute and type entries from the
/// bytecode. Attribute and Type entries are read lazily on demand, so we use
/// this reader to manage when to actually parse them from the bytecode.
-///
-/// The parsing of attributes & types are generally recursive, this can lead to
-/// stack overflows for deeply nested structures, so we track a few extra pieces
-/// of information to avoid this:
-///
-/// - `depth`: The current depth while parsing nested attributes. We defer on
-/// parsing deeply nested attributes to avoid potential stack overflows. The
-/// deferred parsing is achieved by reporting a failure when parsing a nested
-/// attribute/type and registering the index of the encountered attribute/type
-/// in the deferred parsing worklist. Hence, a failure with deffered entry
-/// does not constitute a failure, it also requires that folks return on
-/// first failure rather than attempting additional parses.
-/// - `deferredWorklist`: A list of attribute/type indices that we could not
-/// parse due to hitting the depth limit. The worklist is used to capture the
-/// indices of attributes/types that need to be parsed/reparsed when we hit
-/// the depth limit. This enables moving the tracking of what needs to be
-/// parsed to the heap.
class AttrTypeReader {
/// This class represents a single attribute or type entry.
template <typename T>
@@ -881,34 +863,12 @@ class AttrTypeReader {
ArrayRef<uint8_t> sectionData,
ArrayRef<uint8_t> offsetSectionData);
- LogicalResult readAttribute(uint64_t index, Attribute &result,
- uint64_t depth = 0) {
- return readEntry(attributes, index, result, "attribute", depth);
- }
-
- LogicalResult readType(uint64_t index, Type &result, uint64_t depth = 0) {
- return readEntry(types, index, result, "type", depth);
- }
-
/// Resolve the attribute or type at the given index. Returns nullptr on
/// failure.
- Attribute resolveAttribute(size_t index, uint64_t depth = 0) {
- return resolveEntry(attributes, index, "Attribute", depth);
- }
- Type resolveType(size_t index, uint64_t depth = 0) {
- return resolveEntry(types, index, "Type", depth);
- }
-
- Attribute getAttributeOrSentinel(size_t index) {
- if (index >= attributes.size())
- return nullptr;
- return attributes[index].entry;
- }
- Type getTypeOrSentinel(size_t index) {
- if (index >= types.size())
- return nullptr;
- return types[index].entry;
+ Attribute resolveAttribute(size_t index) {
+ return resolveEntry(attributes, index, "Attribute");
}
+ Type resolveType(size_t index) { return resolveEntry(types, index, "Type"); }
/// Parse a reference to an attribute or type using the given reader.
LogicalResult parseAttribute(EncodingReader &reader, Attribute &result) {
@@ -949,33 +909,23 @@ class AttrTypeReader {
llvm::getTypeName<T>(), ", but got: ", baseResult);
}
- /// Add an index to the deferred worklist for re-parsing.
- void addDeferredParsing(uint64_t index) { deferredWorklist.push_back(index); }
-
private:
/// Resolve the given entry at `index`.
template <typename T>
- T resolveEntry(SmallVectorImpl<Entry<T>> &entries, uint64_t index,
- StringRef entryType, uint64_t depth = 0);
+ T resolveEntry(SmallVectorImpl<Entry<T>> &entries, size_t index,
+ StringRef entryType);
- /// Read the entry at the given index, returning failure if the entry is not
- /// yet resolved.
+ /// Parse an entry using the given reader that was encoded using the textual
+ /// assembly format.
template <typename T>
- LogicalResult readEntry(SmallVectorImpl<Entry<T>> &entries, uint64_t index,
- T &result, StringRef entryType, uint64_t depth);
+ LogicalResult parseAsmEntry(T &result, EncodingReader &reader,
+ StringRef entryType);
/// Parse an entry using the given reader that was encoded using a custom
/// bytecode format.
template <typename T>
LogicalResult parseCustomEntry(Entry<T> &entry, EncodingReader &reader,
- StringRef entryType, uint64_t index,
- uint64_t depth);
-
- /// Parse an entry using the given reader that was encoded using the textual
- /// assembly format.
- template <typename T>
- LogicalResult parseAsmEntry(T &result, EncodingReader &reader,
- StringRef entryType);
+ StringRef entryType);
/// The string section reader used to resolve string references when parsing
/// custom encoded attribute/type entries.
@@ -1001,10 +951,6 @@ class AttrTypeReader {
/// Reference to the parser configuration.
const ParserConfig &parserConfig;
-
- /// Worklist for deferred attribute/type parsing. This is used to handle
- /// deeply nested structures like CallSiteLoc iteratively.
- std::vector<uint64_t> deferredWorklist;
};
class DialectReader : public DialectBytecodeReader {
@@ -1013,11 +959,10 @@ class DialectReader : public DialectBytecodeReader {
const StringSectionReader &stringReader,
const ResourceSectionReader &resourceReader,
const llvm::StringMap<BytecodeDialect *> &dialectsMap,
- EncodingReader &reader, uint64_t &bytecodeVersion,
- uint64_t depth = 0)
+ EncodingReader &reader, uint64_t &bytecodeVersion)
: attrTypeReader(attrTypeReader), stringReader(stringReader),
resourceReader(resourceReader), dialectsMap(dialectsMap),
- reader(reader), bytecodeVersion(bytecodeVersion), depth(depth) {}
+ reader(reader), bytecodeVersion(bytecodeVersion) {}
InFlightDiagnostic emitError(const Twine &msg) const override {
return reader.emitError(msg);
@@ -1053,40 +998,14 @@ class DialectReader : public DialectBytecodeReader {
// IR
//===--------------------------------------------------------------------===//
- /// The maximum depth to eagerly parse nested attributes/types before
- /// deferring.
- static constexpr uint64_t maxAttrTypeDepth = 5;
-
LogicalResult readAttribute(Attribute &result) override {
- uint64_t index;
- if (failed(reader.parseVarInt(index)))
- return failure();
- if (depth > maxAttrTypeDepth) {
- if (Attribute attr = attrTypeReader.getAttributeOrSentinel(index)) {
- result = attr;
- return success();
- }
- attrTypeReader.addDeferredParsing(index);
- return failure();
- }
- return attrTypeReader.readAttribute(index, result, depth + 1);
+ return attrTypeReader.parseAttribute(reader, result);
}
LogicalResult readOptionalAttribute(Attribute &result) override {
return attrTypeReader.parseOptionalAttribute(reader, result);
}
LogicalResult readType(Type &result) override {
- uint64_t index;
- if (failed(reader.parseVarInt(index)))
- return failure();
- if (depth > maxAttrTypeDepth) {
- if (Type type = attrTypeReader.getTypeOrSentinel(index)) {
- result = type;
- return success();
- }
- attrTypeReader.addDeferredParsing(index);
- return failure();
- }
- return attrTypeReader.readType(index, result, depth + 1);
+ return attrTypeReader.parseType(reader, result);
}
FailureOr<AsmDialectResourceHandle> readResourceHandle() override {
@@ -1176,7 +1095,6 @@ class DialectReader : public DialectBytecodeReader {
const llvm::StringMap<BytecodeDialect *> &dialectsMap;
EncodingReader &reader;
uint64_t &bytecodeVersion;
- uint64_t depth;
};
/// Wraps the properties section and handles reading properties out of it.
@@ -1320,112 +1238,69 @@ LogicalResult AttrTypeReader::initialize(
}
template <typename T>
-T AttrTypeReader::resolveEntry(SmallVectorImpl<Entry<T>> &entries,
- uint64_t index, StringRef entryType,
- uint64_t depth) {
+T AttrTypeReader::resolveEntry(SmallVectorImpl<Entry<T>> &entries, size_t index,
+ StringRef entryType) {
if (index >= entries.size()) {
emitError(fileLoc) << "invalid " << entryType << " index: " << index;
return {};
}
- // Fast path: Try direct parsing without worklist overhead. This handles the
- // common case where there are no deferred dependencies.
- assert(deferredWorklist.empty());
- T result;
- if (succeeded(readEntry(entries, index, result, entryType, depth))) {
- assert(deferredWorklist.empty());
- return result;
- }
- if (deferredWorklist.empty()) {
- // Failed with no deferred entries is error.
- return T();
- }
-
- // Slow path: Use worklist to handle deferred dependencies. Use a deque to
- // iteratively resolve entries with dependencies.
- // - Pop from front to process
- // - Push new dependencies to front (depth-first)
- // - Move failed entries to back (retry after dependencies)
- std::deque<size_t> worklist;
- llvm::DenseSet<size_t> inWorklist;
-
- // Add the original index and any dependencies from the fast path attempt.
- worklist.push_back(index);
- inWorklist.insert(index);
- for (uint64_t idx : llvm::reverse(deferredWorklist)) {
- if (inWorklist.insert(idx).second)
- worklist.push_front(idx);
- }
-
- while (!worklist.empty()) {
- size_t currentIndex = worklist.front();
- worklist.pop_front();
-
- // Clear the deferred worklist before parsing to capture any new entries.
- deferredWorklist.clear();
+ // If the entry has already been resolved, there is nothing left to do.
+ Entry<T> &entry = entries[index];
+ if (entry.entry)
+ return entry.entry;
- T result;
- if (succeeded(readEntry(entries, currentIndex, result, entryType, depth))) {
- inWorklist.erase(currentIndex);
- continue;
- }
+ // Parse the entry.
+ EncodingReader reader(entry.data, fileLoc);
- if (deferredWorklist.empty()) {
- // Parsing failed with no deferred entries which implies an error.
+ // Parse based on how the entry was encoded.
+ if (entry.hasCustomEncoding) {
+ if (failed(parseCustomEntry(entry, reader, entryType)))
return T();
- }
-
- // Move this entry to the back to retry after dependencies.
- worklist.push_back(currentIndex);
+ } else if (failed(parseAsmEntry(entry.entry, reader, entryType))) {
+ return T();
+ }
- // Add dependencies to the front (in reverse so they maintain order).
- for (uint64_t idx : llvm::reverse(deferredWorklist)) {
- if (inWorklist.insert(idx).second)
- worklist.push_front(idx);
- }
- deferredWorklist.clear();
+ if (!reader.empty()) {
+ reader.emitError("unexpected trailing bytes after " + entryType + " entry");
+ return T();
}
- return entries[index].entry;
+ return entry.entry;
}
template <typename T>
-LogicalResult AttrTypeReader::readEntry(SmallVectorImpl<Entry<T>> &entries,
- uint64_t index, T &result,
- StringRef entryType, uint64_t depth) {
- if (index >= entries.size())
- return emitError(fileLoc) << "invalid " << entryType << " index: " << index;
-
- // If the entry has already been resolved, return it.
- Entry<T> &entry = entries[index];
- if (entry.entry) {
- result = entry.entry;
- return success();
- }
-
- // If the entry hasn't been resolved, try to parse it.
- EncodingReader reader(entry.data, fileLoc);
- LogicalResult parseResult =
- entry.hasCustomEncoding
- ? parseCustomEntry(entry, reader, entryType, index, depth)
- : parseAsmEntry(entry.entry, reader, entryType);
- if (failed(parseResult))
+LogicalResult AttrTypeReader::parseAsmEntry(T &result, EncodingReader &reader,
+ StringRef entryType) {
+ StringRef asmStr;
+ if (failed(reader.parseNullTerminatedString(asmStr)))
return failure();
- if (!reader.empty())
- return reader.emitError("unexpected trailing bytes after " + entryType +
- " entry");
+ // Invoke the MLIR assembly parser to parse the entry text.
+ size_t numRead = 0;
+ MLIRContext *context = fileLoc->getContext();
+ if constexpr (std::is_same_v<T, Type>)
+ result =
+ ::parseType(asmStr, context, &numRead, /*isKnownNullTerminated=*/true);
+ else
+ result = ::parseAttribute(asmStr, context, Type(), &numRead,
+ /*isKnownNullTerminated=*/true);
+ if (!result)
+ return failure();
- result = entry.entry;
+ // Ensure there weren't dangling characters after the entry.
+ if (numRead != asmStr.size()) {
+ return reader.emitError("trailing characters found after ", entryType,
+ " assembly format: ", asmStr.drop_front(numRead));
+ }
return success();
}
template <typename T>
LogicalResult AttrTypeReader::parseCustomEntry(Entry<T> &entry,
EncodingReader &reader,
- StringRef entryType,
- uint64_t index, uint64_t depth) {
+ StringRef entryType) {
DialectReader dialectReader(*this, stringReader, resourceReader, dialectsMap,
- reader, bytecodeVersion, depth);
+ reader, bytecodeVersion);
if (failed(entry.dialect->load(dialectReader, fileLoc.getContext())))
return failure();
@@ -1475,33 +1350,6 @@ LogicalResult AttrTypeReader::parseCustomEntry(Entry<T> &entry,
return success(!!entry.entry);
}
-template <typename T>
-LogicalResult AttrTypeReader::parseAsmEntry(T &result, EncodingReader &reader,
- StringRef entryType) {
- StringRef asmStr;
- if (failed(reader.parseNullTerminatedString(asmStr)))
- return failure();
-
- // Invoke the MLIR assembly parser to parse the entry text.
- size_t numRead = 0;
- MLIRContext *context = fileLoc->getContext();
- if constexpr (std::is_same_v<T, Type>)
- result =
- ::parseType(asmStr, context, &numRead, /*isKnownNullTerminated=*/true);
- else
- result = ::parseAttribute(asmStr, context, Type(), &numRead,
- /*isKnownNullTerminated=*/true);
- if (!result)
- return failure();
-
- // Ensure there weren't dangling characters after the entry.
- if (numRead != asmStr.size()) {
- return reader.emitError("trailing characters found after ", entryType,
- " assembly format: ", asmStr.drop_front(numRead));
- }
- return success();
-}
-
//===----------------------------------------------------------------------===//
// Bytecode Reader
//===----------------------------------------------------------------------===//
diff --git a/mlir/unittests/Bytecode/BytecodeTest.cpp b/mlir/unittests/Bytecode/BytecodeTest.cpp
index 30e7ed9b6cb7e..d7b442f6832d0 100644
--- a/mlir/unittests/Bytecode/BytecodeTest.cpp
+++ b/mlir/unittests/Bytecode/BytecodeTest.cpp
@@ -15,7 +15,6 @@
#include "mlir/IR/OwningOpRef.h"
#include "mlir/Parser/Parser.h"
-#include "mlir/IR/BuiltinOps.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Support/Endian.h"
@@ -229,39 +228,3 @@ TEST(Bytecode, OpWithoutProperties) {
EXPECT_TRUE(OperationEquivalence::computeHash(op.get()) ==
OperationEquivalence::computeHash(roundtripped));
}
-
-TEST(Bytecode, DeepCallSiteLoc) {
- MLIRContext context;
- ParserConfig config(&context);
-
- // Create a deep CallSiteLoc chain to test iterative parsing.
- Location baseLoc = FileLineColLoc::get(&context, "test.mlir", 1, 1);
- Location loc = baseLoc;
- constexpr int kDepth = 1000;
- for (int i = 0; i < kDepth; ++i) {
- loc = CallSiteLoc::get(loc, baseLoc);
- }
-
- // Create a simple module with the deep location.
- Builder builder(&context);
- OwningOpRef<ModuleOp> module =
- ModuleOp::create(loc, /*attributes=*/std::nullopt);
- ASSERT_TRUE(module);
-
- // Write to bytecode.
- std::string bytecode;
- llvm::raw_string_ostream os(bytecode);
- ASSERT_TRUE(succeeded(writeBytecodeToFile(module.get(), os)));
-
- // Parse it back using the bytecode reader.
- std::unique_ptr<Block> block = std::make_unique<Block>();
- ASSERT_TRUE(succeeded(readBytecodeFile(
- llvm::MemoryBufferRef(bytecode, "string-buffer"), block.get(), config)));
-
- // Verify we got the roundtripped module.
- ASSERT_FALSE(block->empty());
- Operation *roundTripped = &block->front();
-
- // Verify the location matches.
- EXPECT_EQ(module.get()->getLoc(), roundTripped->getLoc());
-}
More information about the Mlir-commits
mailing list