[Mlir-commits] [mlir] [MLIR] Fix duplicated attribute nodes in MLIR bytecode deserialization (PR #151267)

llvmlistbot at llvm.org llvmlistbot at llvm.org
Thu Jul 31 01:01:06 PDT 2025


https://github.com/hankluo6 updated https://github.com/llvm/llvm-project/pull/151267

>From 158f2643ac6bb48cc4b12553ef7c7fbca39a0a52 Mon Sep 17 00:00:00 2001
From: hankluo6 <hankluo6 at gmail.com>
Date: Sun, 27 Jul 2025 18:38:02 -0700
Subject: [PATCH 1/2] Fix duplicated attribute nodes in MLIR bytecode
 deserialization

---
 mlir/include/mlir/AsmParser/AsmParser.h     |  3 ++-
 mlir/lib/AsmParser/DialectSymbolParser.cpp  | 25 +++++++++++++++++++--
 mlir/lib/AsmParser/ParserState.h            |  3 +++
 mlir/lib/Bytecode/Reader/BytecodeReader.cpp |  6 ++++-
 4 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/mlir/include/mlir/AsmParser/AsmParser.h b/mlir/include/mlir/AsmParser/AsmParser.h
index 33daf7ca26f49..f39b3bd853a2a 100644
--- a/mlir/include/mlir/AsmParser/AsmParser.h
+++ b/mlir/include/mlir/AsmParser/AsmParser.h
@@ -53,7 +53,8 @@ parseAsmSourceFile(const llvm::SourceMgr &sourceMgr, Block *block,
 /// null terminated.
 Attribute parseAttribute(llvm::StringRef attrStr, MLIRContext *context,
                          Type type = {}, size_t *numRead = nullptr,
-                         bool isKnownNullTerminated = false);
+                         bool isKnownNullTerminated = false,
+                         llvm::StringMap<Attribute> *attributesCache = nullptr);
 
 /// This parses a single MLIR type to an MLIR context if it was valid. If not,
 /// an error diagnostic is emitted to the context.
diff --git a/mlir/lib/AsmParser/DialectSymbolParser.cpp b/mlir/lib/AsmParser/DialectSymbolParser.cpp
index 8b14e71118c3a..de8e3c1fc1e72 100644
--- a/mlir/lib/AsmParser/DialectSymbolParser.cpp
+++ b/mlir/lib/AsmParser/DialectSymbolParser.cpp
@@ -245,6 +245,14 @@ static Symbol parseExtendedSymbol(Parser &p, AsmParserState *asmState,
       return nullptr;
   }
 
+  if constexpr (std::is_same_v<Symbol, Attribute>) {
+    auto &cache = p.getState().symbols.attributesCache;
+
+    auto cacheIt = cache.find(symbolData);
+    if (cacheIt != cache.end()) {
+      return cacheIt->second;
+    }
+  }
   return createSymbol(dialectName, symbolData, loc);
 }
 
@@ -337,6 +345,7 @@ Type Parser::parseExtendedType() {
 template <typename T, typename ParserFn>
 static T parseSymbol(StringRef inputStr, MLIRContext *context,
                      size_t *numReadOut, bool isKnownNullTerminated,
+                     llvm::StringMap<Attribute> *attributesCache,
                      ParserFn &&parserFn) {
   // Set the buffer name to the string being parsed, so that it appears in error
   // diagnostics.
@@ -348,6 +357,9 @@ static T parseSymbol(StringRef inputStr, MLIRContext *context,
   SourceMgr sourceMgr;
   sourceMgr.AddNewSourceBuffer(std::move(memBuffer), SMLoc());
   SymbolState aliasState;
+  if (attributesCache)
+    aliasState.attributesCache = *attributesCache;
+
   ParserConfig config(context);
   ParserState state(sourceMgr, config, aliasState, /*asmState=*/nullptr,
                     /*codeCompleteContext=*/nullptr);
@@ -358,6 +370,13 @@ static T parseSymbol(StringRef inputStr, MLIRContext *context,
   if (!symbol)
     return T();
 
+  if constexpr (std::is_same_v<T, Attribute>) {
+    // Cache key is the symbol data without the dialect prefix.
+    StringRef cacheKey = inputStr.split('.').second;
+    if (attributesCache && !cacheKey.empty()) {
+      (*attributesCache)[cacheKey] = symbol;
+    }
+  }
   // Provide the number of bytes that were read.
   Token endTok = parser.getToken();
   size_t numRead =
@@ -374,13 +393,15 @@ static T parseSymbol(StringRef inputStr, MLIRContext *context,
 
 Attribute mlir::parseAttribute(StringRef attrStr, MLIRContext *context,
                                Type type, size_t *numRead,
-                               bool isKnownNullTerminated) {
+                               bool isKnownNullTerminated,
+                               llvm::StringMap<Attribute> *attributesCache) {
   return parseSymbol<Attribute>(
-      attrStr, context, numRead, isKnownNullTerminated,
+      attrStr, context, numRead, isKnownNullTerminated, attributesCache,
       [type](Parser &parser) { return parser.parseAttribute(type); });
 }
 Type mlir::parseType(StringRef typeStr, MLIRContext *context, size_t *numRead,
                      bool isKnownNullTerminated) {
   return parseSymbol<Type>(typeStr, context, numRead, isKnownNullTerminated,
+                           /*attributesCache=*/nullptr,
                            [](Parser &parser) { return parser.parseType(); });
 }
diff --git a/mlir/lib/AsmParser/ParserState.h b/mlir/lib/AsmParser/ParserState.h
index 159058a18fa4e..aa53032107cbf 100644
--- a/mlir/lib/AsmParser/ParserState.h
+++ b/mlir/lib/AsmParser/ParserState.h
@@ -40,6 +40,9 @@ struct SymbolState {
 
   /// A map from unique integer identifier to DistinctAttr.
   DenseMap<uint64_t, DistinctAttr> distinctAttributes;
+
+  /// A map from unique string identifier to Attribute.
+  llvm::StringMap<Attribute> attributesCache;
 };
 
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Bytecode/Reader/BytecodeReader.cpp b/mlir/lib/Bytecode/Reader/BytecodeReader.cpp
index 44458d010c6c8..0f97443433774 100644
--- a/mlir/lib/Bytecode/Reader/BytecodeReader.cpp
+++ b/mlir/lib/Bytecode/Reader/BytecodeReader.cpp
@@ -895,6 +895,10 @@ class AttrTypeReader {
   SmallVector<AttrEntry> attributes;
   SmallVector<TypeEntry> types;
 
+  /// The map of cached attributes, used to avoid re-parsing the same
+  /// attribute multiple times.
+  llvm::StringMap<Attribute> attributesCache;
+
   /// A location used for error emission.
   Location fileLoc;
 
@@ -1235,7 +1239,7 @@ LogicalResult AttrTypeReader::parseAsmEntry(T &result, EncodingReader &reader,
         ::parseType(asmStr, context, &numRead, /*isKnownNullTerminated=*/true);
   else
     result = ::parseAttribute(asmStr, context, Type(), &numRead,
-                              /*isKnownNullTerminated=*/true);
+                              /*isKnownNullTerminated=*/true, &attributesCache);
   if (!result)
     return failure();
 

>From 5e3bddf42fa5f7abbf43d2a6515c904ce7f2cbdb Mon Sep 17 00:00:00 2001
From: hankluo6 <hankluo6 at gmail.com>
Date: Thu, 31 Jul 2025 00:59:27 -0700
Subject: [PATCH 2/2] Add test

---
 .../Dialect/LLVMIR/recursive-debuginfo.mlir   | 32 +++++++++++++++++++
 1 file changed, 32 insertions(+)
 create mode 100644 mlir/test/Dialect/LLVMIR/recursive-debuginfo.mlir

diff --git a/mlir/test/Dialect/LLVMIR/recursive-debuginfo.mlir b/mlir/test/Dialect/LLVMIR/recursive-debuginfo.mlir
new file mode 100644
index 0000000000000..45617ef422482
--- /dev/null
+++ b/mlir/test/Dialect/LLVMIR/recursive-debuginfo.mlir
@@ -0,0 +1,32 @@
+// RUN: mlir-opt -emit-bytecode %s | mlir-translate --mlir-to-llvmir | FileCheck %s
+
+#di_basic_type = #llvm.di_basic_type<tag = DW_TAG_base_type, name = "int", sizeInBits = 32, encoding = DW_ATE_signed>
+#di_file = #llvm.di_file<"foo.c" in "/mlir/">
+#di_file1 = #llvm.di_file<"foo.c" in "/mlir/">
+#di_subprogram = #llvm.di_subprogram<recId = distinct[0]<>, isRecSelf = true>
+#di_compile_unit = #llvm.di_compile_unit<id = distinct[1]<>, sourceLanguage = DW_LANG_C11, file = #di_file, producer = "MLIR", isOptimized = true, emissionKind = Full, nameTableKind = None>
+#di_local_variable = #llvm.di_local_variable<scope = #di_subprogram, name = "a", file = #di_file1, line = 2, type = #di_basic_type>
+#di_subroutine_type = #llvm.di_subroutine_type<types = #di_basic_type>
+#di_subprogram1 = #llvm.di_subprogram<recId = distinct[0]<>, id = distinct[2]<>, compileUnit = #di_compile_unit, scope = #di_file1, name = "main", file = #di_file1, line = 1, scopeLine = 1, subprogramFlags = "Definition|Optimized", type = #di_subroutine_type, retainedNodes = #di_local_variable>
+#di_local_variable1 = #llvm.di_local_variable<scope = #di_subprogram1, name = "a", file = #di_file1, line = 2, type = #di_basic_type>
+
+module attributes {dlti.dl_spec = #dlti.dl_spec<i64 = dense<64> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>>, llvm.ident = "MLIR", llvm.target_triple = "x86_64-unknown-linux-gnu"} {
+  llvm.module_flags [#llvm.mlir.module_flag<warning, "Debug Info Version", 3 : i32>]
+
+  // CHECK: define i32 @main
+  llvm.func @main() -> i32 attributes {passthrough = ["noinline"]} {
+    %0 = llvm.mlir.constant(0 : i32) : i32 loc(#loc3)
+    llvm.intr.dbg.value #di_local_variable1 = %0 : i32 loc(#loc7)
+    llvm.return %0 : i32 loc(#loc8)
+  } loc(#loc6)
+} loc(#loc)
+#loc = loc("foo.c":0:0)
+#loc1 = loc("main")
+#loc2 = loc("foo.c":1:0)
+#loc3 = loc(unknown)
+#loc4 = loc("foo.c":0:0)
+#loc5 = loc("foo.c":3:0)
+#loc6 = loc(fused<#di_subprogram1>[#loc1, #loc2])
+#loc7 = loc(fused<#di_subprogram1>[#loc4])
+#loc8 = loc(fused<#di_subprogram1>[#loc5])
+



More information about the Mlir-commits mailing list