[Mlir-commits] [mlir] a4baf2c - Revert "[mlir][bytecode] Add support for deferred attribute/type parsing. (#170993)"

Mehdi Amini llvmlistbot at llvm.org
Wed Dec 17 11:29:32 PST 2025


Author: Mehdi Amini
Date: 2025-12-17T11:29:13-08:00
New Revision: a4baf2c7ffc7fffd399c4093faa707fb63334c12

URL: https://github.com/llvm/llvm-project/commit/a4baf2c7ffc7fffd399c4093faa707fb63334c12
DIFF: https://github.com/llvm/llvm-project/commit/a4baf2c7ffc7fffd399c4093faa707fb63334c12.diff

LOG: Revert "[mlir][bytecode] Add support for deferred attribute/type parsing. (#170993)"

This reverts commit 93d2ef105703254769a8f182300b329dad5ed976.

A regression was found.

See: https://github.com/llvm/llvm-project/pull/170993#issuecomment-3666792469

Added: 
    

Modified: 
    mlir/lib/Bytecode/Reader/BytecodeReader.cpp
    mlir/unittests/Bytecode/BytecodeTest.cpp

Removed: 
    


################################################################################
diff  --git a/mlir/lib/Bytecode/Reader/BytecodeReader.cpp b/mlir/lib/Bytecode/Reader/BytecodeReader.cpp
index 0ac5fc5358ea5..1659437e1eb24 100644
--- a/mlir/lib/Bytecode/Reader/BytecodeReader.cpp
+++ b/mlir/lib/Bytecode/Reader/BytecodeReader.cpp
@@ -27,7 +27,6 @@
 
 #include <cstddef>
 #include <cstdint>
-#include <deque>
 #include <list>
 #include <memory>
 #include <numeric>
@@ -831,23 +830,6 @@ namespace {
 /// This class provides support for reading attribute and type entries from the
 /// bytecode. Attribute and Type entries are read lazily on demand, so we use
 /// this reader to manage when to actually parse them from the bytecode.
-///
-/// The parsing of attributes & types are generally recursive, this can lead to
-/// stack overflows for deeply nested structures, so we track a few extra pieces
-/// of information to avoid this:
-///
-/// - `depth`: The current depth while parsing nested attributes. We defer on
-///   parsing deeply nested attributes to avoid potential stack overflows. The
-///   deferred parsing is achieved by reporting a failure when parsing a nested
-///   attribute/type and registering the index of the encountered attribute/type
-///   in the deferred parsing worklist. Hence, a failure with deffered entry
-///   does not constitute a failure, it also requires that folks return on
-///   first failure rather than attempting additional parses.
-/// - `deferredWorklist`: A list of attribute/type indices that we could not
-///   parse due to hitting the depth limit. The worklist is used to capture the
-///   indices of attributes/types that need to be parsed/reparsed when we hit
-///   the depth limit. This enables moving the tracking of what needs to be
-///   parsed to the heap.
 class AttrTypeReader {
   /// This class represents a single attribute or type entry.
   template <typename T>
@@ -881,34 +863,12 @@ class AttrTypeReader {
              ArrayRef<uint8_t> sectionData,
              ArrayRef<uint8_t> offsetSectionData);
 
-  LogicalResult readAttribute(uint64_t index, Attribute &result,
-                              uint64_t depth = 0) {
-    return readEntry(attributes, index, result, "attribute", depth);
-  }
-
-  LogicalResult readType(uint64_t index, Type &result, uint64_t depth = 0) {
-    return readEntry(types, index, result, "type", depth);
-  }
-
   /// Resolve the attribute or type at the given index. Returns nullptr on
   /// failure.
-  Attribute resolveAttribute(size_t index, uint64_t depth = 0) {
-    return resolveEntry(attributes, index, "Attribute", depth);
-  }
-  Type resolveType(size_t index, uint64_t depth = 0) {
-    return resolveEntry(types, index, "Type", depth);
-  }
-
-  Attribute getAttributeOrSentinel(size_t index) {
-    if (index >= attributes.size())
-      return nullptr;
-    return attributes[index].entry;
-  }
-  Type getTypeOrSentinel(size_t index) {
-    if (index >= types.size())
-      return nullptr;
-    return types[index].entry;
+  Attribute resolveAttribute(size_t index) {
+    return resolveEntry(attributes, index, "Attribute");
   }
+  Type resolveType(size_t index) { return resolveEntry(types, index, "Type"); }
 
   /// Parse a reference to an attribute or type using the given reader.
   LogicalResult parseAttribute(EncodingReader &reader, Attribute &result) {
@@ -949,33 +909,23 @@ class AttrTypeReader {
                             llvm::getTypeName<T>(), ", but got: ", baseResult);
   }
 
-  /// Add an index to the deferred worklist for re-parsing.
-  void addDeferredParsing(uint64_t index) { deferredWorklist.push_back(index); }
-
 private:
   /// Resolve the given entry at `index`.
   template <typename T>
-  T resolveEntry(SmallVectorImpl<Entry<T>> &entries, uint64_t index,
-                 StringRef entryType, uint64_t depth = 0);
+  T resolveEntry(SmallVectorImpl<Entry<T>> &entries, size_t index,
+                 StringRef entryType);
 
-  /// Read the entry at the given index, returning failure if the entry is not
-  /// yet resolved.
+  /// Parse an entry using the given reader that was encoded using the textual
+  /// assembly format.
   template <typename T>
-  LogicalResult readEntry(SmallVectorImpl<Entry<T>> &entries, uint64_t index,
-                          T &result, StringRef entryType, uint64_t depth);
+  LogicalResult parseAsmEntry(T &result, EncodingReader &reader,
+                              StringRef entryType);
 
   /// Parse an entry using the given reader that was encoded using a custom
   /// bytecode format.
   template <typename T>
   LogicalResult parseCustomEntry(Entry<T> &entry, EncodingReader &reader,
-                                 StringRef entryType, uint64_t index,
-                                 uint64_t depth);
-
-  /// Parse an entry using the given reader that was encoded using the textual
-  /// assembly format.
-  template <typename T>
-  LogicalResult parseAsmEntry(T &result, EncodingReader &reader,
-                              StringRef entryType);
+                                 StringRef entryType);
 
   /// The string section reader used to resolve string references when parsing
   /// custom encoded attribute/type entries.
@@ -1001,10 +951,6 @@ class AttrTypeReader {
 
   /// Reference to the parser configuration.
   const ParserConfig &parserConfig;
-
-  /// Worklist for deferred attribute/type parsing. This is used to handle
-  /// deeply nested structures like CallSiteLoc iteratively.
-  std::vector<uint64_t> deferredWorklist;
 };
 
 class DialectReader : public DialectBytecodeReader {
@@ -1013,11 +959,10 @@ class DialectReader : public DialectBytecodeReader {
                 const StringSectionReader &stringReader,
                 const ResourceSectionReader &resourceReader,
                 const llvm::StringMap<BytecodeDialect *> &dialectsMap,
-                EncodingReader &reader, uint64_t &bytecodeVersion,
-                uint64_t depth = 0)
+                EncodingReader &reader, uint64_t &bytecodeVersion)
       : attrTypeReader(attrTypeReader), stringReader(stringReader),
         resourceReader(resourceReader), dialectsMap(dialectsMap),
-        reader(reader), bytecodeVersion(bytecodeVersion), depth(depth) {}
+        reader(reader), bytecodeVersion(bytecodeVersion) {}
 
   InFlightDiagnostic emitError(const Twine &msg) const override {
     return reader.emitError(msg);
@@ -1053,40 +998,14 @@ class DialectReader : public DialectBytecodeReader {
   // IR
   //===--------------------------------------------------------------------===//
 
-  /// The maximum depth to eagerly parse nested attributes/types before
-  /// deferring.
-  static constexpr uint64_t maxAttrTypeDepth = 5;
-
   LogicalResult readAttribute(Attribute &result) override {
-    uint64_t index;
-    if (failed(reader.parseVarInt(index)))
-      return failure();
-    if (depth > maxAttrTypeDepth) {
-      if (Attribute attr = attrTypeReader.getAttributeOrSentinel(index)) {
-        result = attr;
-        return success();
-      }
-      attrTypeReader.addDeferredParsing(index);
-      return failure();
-    }
-    return attrTypeReader.readAttribute(index, result, depth + 1);
+    return attrTypeReader.parseAttribute(reader, result);
   }
   LogicalResult readOptionalAttribute(Attribute &result) override {
     return attrTypeReader.parseOptionalAttribute(reader, result);
   }
   LogicalResult readType(Type &result) override {
-    uint64_t index;
-    if (failed(reader.parseVarInt(index)))
-      return failure();
-    if (depth > maxAttrTypeDepth) {
-      if (Type type = attrTypeReader.getTypeOrSentinel(index)) {
-        result = type;
-        return success();
-      }
-      attrTypeReader.addDeferredParsing(index);
-      return failure();
-    }
-    return attrTypeReader.readType(index, result, depth + 1);
+    return attrTypeReader.parseType(reader, result);
   }
 
   FailureOr<AsmDialectResourceHandle> readResourceHandle() override {
@@ -1176,7 +1095,6 @@ class DialectReader : public DialectBytecodeReader {
   const llvm::StringMap<BytecodeDialect *> &dialectsMap;
   EncodingReader &reader;
   uint64_t &bytecodeVersion;
-  uint64_t depth;
 };
 
 /// Wraps the properties section and handles reading properties out of it.
@@ -1320,112 +1238,69 @@ LogicalResult AttrTypeReader::initialize(
 }
 
 template <typename T>
-T AttrTypeReader::resolveEntry(SmallVectorImpl<Entry<T>> &entries,
-                               uint64_t index, StringRef entryType,
-                               uint64_t depth) {
+T AttrTypeReader::resolveEntry(SmallVectorImpl<Entry<T>> &entries, size_t index,
+                               StringRef entryType) {
   if (index >= entries.size()) {
     emitError(fileLoc) << "invalid " << entryType << " index: " << index;
     return {};
   }
 
-  // Fast path: Try direct parsing without worklist overhead. This handles the
-  // common case where there are no deferred dependencies.
-  assert(deferredWorklist.empty());
-  T result;
-  if (succeeded(readEntry(entries, index, result, entryType, depth))) {
-    assert(deferredWorklist.empty());
-    return result;
-  }
-  if (deferredWorklist.empty()) {
-    // Failed with no deferred entries is error.
-    return T();
-  }
-
-  // Slow path: Use worklist to handle deferred dependencies. Use a deque to
-  // iteratively resolve entries with dependencies.
-  // - Pop from front to process
-  // - Push new dependencies to front (depth-first)
-  // - Move failed entries to back (retry after dependencies)
-  std::deque<size_t> worklist;
-  llvm::DenseSet<size_t> inWorklist;
-
-  // Add the original index and any dependencies from the fast path attempt.
-  worklist.push_back(index);
-  inWorklist.insert(index);
-  for (uint64_t idx : llvm::reverse(deferredWorklist)) {
-    if (inWorklist.insert(idx).second)
-      worklist.push_front(idx);
-  }
-
-  while (!worklist.empty()) {
-    size_t currentIndex = worklist.front();
-    worklist.pop_front();
-
-    // Clear the deferred worklist before parsing to capture any new entries.
-    deferredWorklist.clear();
+  // If the entry has already been resolved, there is nothing left to do.
+  Entry<T> &entry = entries[index];
+  if (entry.entry)
+    return entry.entry;
 
-    T result;
-    if (succeeded(readEntry(entries, currentIndex, result, entryType, depth))) {
-      inWorklist.erase(currentIndex);
-      continue;
-    }
+  // Parse the entry.
+  EncodingReader reader(entry.data, fileLoc);
 
-    if (deferredWorklist.empty()) {
-      // Parsing failed with no deferred entries which implies an error.
+  // Parse based on how the entry was encoded.
+  if (entry.hasCustomEncoding) {
+    if (failed(parseCustomEntry(entry, reader, entryType)))
       return T();
-    }
-
-    // Move this entry to the back to retry after dependencies.
-    worklist.push_back(currentIndex);
+  } else if (failed(parseAsmEntry(entry.entry, reader, entryType))) {
+    return T();
+  }
 
-    // Add dependencies to the front (in reverse so they maintain order).
-    for (uint64_t idx : llvm::reverse(deferredWorklist)) {
-      if (inWorklist.insert(idx).second)
-        worklist.push_front(idx);
-    }
-    deferredWorklist.clear();
+  if (!reader.empty()) {
+    reader.emitError("unexpected trailing bytes after " + entryType + " entry");
+    return T();
   }
-  return entries[index].entry;
+  return entry.entry;
 }
 
 template <typename T>
-LogicalResult AttrTypeReader::readEntry(SmallVectorImpl<Entry<T>> &entries,
-                                        uint64_t index, T &result,
-                                        StringRef entryType, uint64_t depth) {
-  if (index >= entries.size())
-    return emitError(fileLoc) << "invalid " << entryType << " index: " << index;
-
-  // If the entry has already been resolved, return it.
-  Entry<T> &entry = entries[index];
-  if (entry.entry) {
-    result = entry.entry;
-    return success();
-  }
-
-  // If the entry hasn't been resolved, try to parse it.
-  EncodingReader reader(entry.data, fileLoc);
-  LogicalResult parseResult =
-      entry.hasCustomEncoding
-          ? parseCustomEntry(entry, reader, entryType, index, depth)
-          : parseAsmEntry(entry.entry, reader, entryType);
-  if (failed(parseResult))
+LogicalResult AttrTypeReader::parseAsmEntry(T &result, EncodingReader &reader,
+                                            StringRef entryType) {
+  StringRef asmStr;
+  if (failed(reader.parseNullTerminatedString(asmStr)))
     return failure();
 
-  if (!reader.empty())
-    return reader.emitError("unexpected trailing bytes after " + entryType +
-                            " entry");
+  // Invoke the MLIR assembly parser to parse the entry text.
+  size_t numRead = 0;
+  MLIRContext *context = fileLoc->getContext();
+  if constexpr (std::is_same_v<T, Type>)
+    result =
+        ::parseType(asmStr, context, &numRead, /*isKnownNullTerminated=*/true);
+  else
+    result = ::parseAttribute(asmStr, context, Type(), &numRead,
+                              /*isKnownNullTerminated=*/true);
+  if (!result)
+    return failure();
 
-  result = entry.entry;
+  // Ensure there weren't dangling characters after the entry.
+  if (numRead != asmStr.size()) {
+    return reader.emitError("trailing characters found after ", entryType,
+                            " assembly format: ", asmStr.drop_front(numRead));
+  }
   return success();
 }
 
 template <typename T>
 LogicalResult AttrTypeReader::parseCustomEntry(Entry<T> &entry,
                                                EncodingReader &reader,
-                                               StringRef entryType,
-                                               uint64_t index, uint64_t depth) {
+                                               StringRef entryType) {
   DialectReader dialectReader(*this, stringReader, resourceReader, dialectsMap,
-                              reader, bytecodeVersion, depth);
+                              reader, bytecodeVersion);
   if (failed(entry.dialect->load(dialectReader, fileLoc.getContext())))
     return failure();
 
@@ -1475,33 +1350,6 @@ LogicalResult AttrTypeReader::parseCustomEntry(Entry<T> &entry,
   return success(!!entry.entry);
 }
 
-template <typename T>
-LogicalResult AttrTypeReader::parseAsmEntry(T &result, EncodingReader &reader,
-                                            StringRef entryType) {
-  StringRef asmStr;
-  if (failed(reader.parseNullTerminatedString(asmStr)))
-    return failure();
-
-  // Invoke the MLIR assembly parser to parse the entry text.
-  size_t numRead = 0;
-  MLIRContext *context = fileLoc->getContext();
-  if constexpr (std::is_same_v<T, Type>)
-    result =
-        ::parseType(asmStr, context, &numRead, /*isKnownNullTerminated=*/true);
-  else
-    result = ::parseAttribute(asmStr, context, Type(), &numRead,
-                              /*isKnownNullTerminated=*/true);
-  if (!result)
-    return failure();
-
-  // Ensure there weren't dangling characters after the entry.
-  if (numRead != asmStr.size()) {
-    return reader.emitError("trailing characters found after ", entryType,
-                            " assembly format: ", asmStr.drop_front(numRead));
-  }
-  return success();
-}
-
 //===----------------------------------------------------------------------===//
 // Bytecode Reader
 //===----------------------------------------------------------------------===//

diff  --git a/mlir/unittests/Bytecode/BytecodeTest.cpp b/mlir/unittests/Bytecode/BytecodeTest.cpp
index 30e7ed9b6cb7e..d7b442f6832d0 100644
--- a/mlir/unittests/Bytecode/BytecodeTest.cpp
+++ b/mlir/unittests/Bytecode/BytecodeTest.cpp
@@ -15,7 +15,6 @@
 #include "mlir/IR/OwningOpRef.h"
 #include "mlir/Parser/Parser.h"
 
-#include "mlir/IR/BuiltinOps.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Alignment.h"
 #include "llvm/Support/Endian.h"
@@ -229,39 +228,3 @@ TEST(Bytecode, OpWithoutProperties) {
   EXPECT_TRUE(OperationEquivalence::computeHash(op.get()) ==
               OperationEquivalence::computeHash(roundtripped));
 }
-
-TEST(Bytecode, DeepCallSiteLoc) {
-  MLIRContext context;
-  ParserConfig config(&context);
-
-  // Create a deep CallSiteLoc chain to test iterative parsing.
-  Location baseLoc = FileLineColLoc::get(&context, "test.mlir", 1, 1);
-  Location loc = baseLoc;
-  constexpr int kDepth = 1000;
-  for (int i = 0; i < kDepth; ++i) {
-    loc = CallSiteLoc::get(loc, baseLoc);
-  }
-
-  // Create a simple module with the deep location.
-  Builder builder(&context);
-  OwningOpRef<ModuleOp> module =
-      ModuleOp::create(loc, /*attributes=*/std::nullopt);
-  ASSERT_TRUE(module);
-
-  // Write to bytecode.
-  std::string bytecode;
-  llvm::raw_string_ostream os(bytecode);
-  ASSERT_TRUE(succeeded(writeBytecodeToFile(module.get(), os)));
-
-  // Parse it back using the bytecode reader.
-  std::unique_ptr<Block> block = std::make_unique<Block>();
-  ASSERT_TRUE(succeeded(readBytecodeFile(
-      llvm::MemoryBufferRef(bytecode, "string-buffer"), block.get(), config)));
-
-  // Verify we got the roundtripped module.
-  ASSERT_FALSE(block->empty());
-  Operation *roundTripped = &block->front();
-
-  // Verify the location matches.
-  EXPECT_EQ(module.get()->getLoc(), roundTripped->getLoc());
-}


        


More information about the Mlir-commits mailing list