[Mlir-commits] [mlir] Fix duplicated attribute nodes in MLIR bytecode deserialization (PR #151267)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Tue Jul 29 19:26:55 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mlir
Author: Hank (hankluo6)
<details>
<summary>Changes</summary>
Fixes #<!-- -->150163
MLIR bytecode does not preserve alias definitions, so each attribute encountered during deserialization is treated as a new one. This can generate duplicate `DISubprogram` nodes during deserialization.
The patch adds a `StringMap` cache that records attributes and fetches them when encountered again.
---
Full diff: https://github.com/llvm/llvm-project/pull/151267.diff
4 Files Affected:
- (modified) mlir/include/mlir/AsmParser/AsmParser.h (+2-1)
- (modified) mlir/lib/AsmParser/DialectSymbolParser.cpp (+23-2)
- (modified) mlir/lib/AsmParser/ParserState.h (+3)
- (modified) mlir/lib/Bytecode/Reader/BytecodeReader.cpp (+5-1)
``````````diff
diff --git a/mlir/include/mlir/AsmParser/AsmParser.h b/mlir/include/mlir/AsmParser/AsmParser.h
index 33daf7ca26f49..f39b3bd853a2a 100644
--- a/mlir/include/mlir/AsmParser/AsmParser.h
+++ b/mlir/include/mlir/AsmParser/AsmParser.h
@@ -53,7 +53,8 @@ parseAsmSourceFile(const llvm::SourceMgr &sourceMgr, Block *block,
/// null terminated.
Attribute parseAttribute(llvm::StringRef attrStr, MLIRContext *context,
Type type = {}, size_t *numRead = nullptr,
- bool isKnownNullTerminated = false);
+ bool isKnownNullTerminated = false,
+ llvm::StringMap<Attribute> *attributesCache = nullptr);
/// This parses a single MLIR type to an MLIR context if it was valid. If not,
/// an error diagnostic is emitted to the context.
diff --git a/mlir/lib/AsmParser/DialectSymbolParser.cpp b/mlir/lib/AsmParser/DialectSymbolParser.cpp
index 8b14e71118c3a..de8e3c1fc1e72 100644
--- a/mlir/lib/AsmParser/DialectSymbolParser.cpp
+++ b/mlir/lib/AsmParser/DialectSymbolParser.cpp
@@ -245,6 +245,14 @@ static Symbol parseExtendedSymbol(Parser &p, AsmParserState *asmState,
return nullptr;
}
+ if constexpr (std::is_same_v<Symbol, Attribute>) {
+ auto &cache = p.getState().symbols.attributesCache;
+
+ auto cacheIt = cache.find(symbolData);
+ if (cacheIt != cache.end()) {
+ return cacheIt->second;
+ }
+ }
return createSymbol(dialectName, symbolData, loc);
}
@@ -337,6 +345,7 @@ Type Parser::parseExtendedType() {
template <typename T, typename ParserFn>
static T parseSymbol(StringRef inputStr, MLIRContext *context,
size_t *numReadOut, bool isKnownNullTerminated,
+ llvm::StringMap<Attribute> *attributesCache,
ParserFn &&parserFn) {
// Set the buffer name to the string being parsed, so that it appears in error
// diagnostics.
@@ -348,6 +357,9 @@ static T parseSymbol(StringRef inputStr, MLIRContext *context,
SourceMgr sourceMgr;
sourceMgr.AddNewSourceBuffer(std::move(memBuffer), SMLoc());
SymbolState aliasState;
+ if (attributesCache)
+ aliasState.attributesCache = *attributesCache;
+
ParserConfig config(context);
ParserState state(sourceMgr, config, aliasState, /*asmState=*/nullptr,
/*codeCompleteContext=*/nullptr);
@@ -358,6 +370,13 @@ static T parseSymbol(StringRef inputStr, MLIRContext *context,
if (!symbol)
return T();
+ if constexpr (std::is_same_v<T, Attribute>) {
+ // Cache key is the symbol data without the dialect prefix.
+ StringRef cacheKey = inputStr.split('.').second;
+ if (attributesCache && !cacheKey.empty()) {
+ (*attributesCache)[cacheKey] = symbol;
+ }
+ }
// Provide the number of bytes that were read.
Token endTok = parser.getToken();
size_t numRead =
@@ -374,13 +393,15 @@ static T parseSymbol(StringRef inputStr, MLIRContext *context,
Attribute mlir::parseAttribute(StringRef attrStr, MLIRContext *context,
Type type, size_t *numRead,
- bool isKnownNullTerminated) {
+ bool isKnownNullTerminated,
+ llvm::StringMap<Attribute> *attributesCache) {
return parseSymbol<Attribute>(
- attrStr, context, numRead, isKnownNullTerminated,
+ attrStr, context, numRead, isKnownNullTerminated, attributesCache,
[type](Parser &parser) { return parser.parseAttribute(type); });
}
Type mlir::parseType(StringRef typeStr, MLIRContext *context, size_t *numRead,
bool isKnownNullTerminated) {
return parseSymbol<Type>(typeStr, context, numRead, isKnownNullTerminated,
+ /*attributesCache=*/nullptr,
[](Parser &parser) { return parser.parseType(); });
}
diff --git a/mlir/lib/AsmParser/ParserState.h b/mlir/lib/AsmParser/ParserState.h
index 159058a18fa4e..aa53032107cbf 100644
--- a/mlir/lib/AsmParser/ParserState.h
+++ b/mlir/lib/AsmParser/ParserState.h
@@ -40,6 +40,9 @@ struct SymbolState {
/// A map from unique integer identifier to DistinctAttr.
DenseMap<uint64_t, DistinctAttr> distinctAttributes;
+
+ /// A map from unique string identifier to Attribute.
+ llvm::StringMap<Attribute> attributesCache;
};
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Bytecode/Reader/BytecodeReader.cpp b/mlir/lib/Bytecode/Reader/BytecodeReader.cpp
index 44458d010c6c8..0f97443433774 100644
--- a/mlir/lib/Bytecode/Reader/BytecodeReader.cpp
+++ b/mlir/lib/Bytecode/Reader/BytecodeReader.cpp
@@ -895,6 +895,10 @@ class AttrTypeReader {
SmallVector<AttrEntry> attributes;
SmallVector<TypeEntry> types;
+ /// The map of cached attributes, used to avoid re-parsing the same
+ /// attribute multiple times.
+ llvm::StringMap<Attribute> attributesCache;
+
/// A location used for error emission.
Location fileLoc;
@@ -1235,7 +1239,7 @@ LogicalResult AttrTypeReader::parseAsmEntry(T &result, EncodingReader &reader,
::parseType(asmStr, context, &numRead, /*isKnownNullTerminated=*/true);
else
result = ::parseAttribute(asmStr, context, Type(), &numRead,
- /*isKnownNullTerminated=*/true);
+ /*isKnownNullTerminated=*/true, &attributesCache);
if (!result)
return failure();
``````````
</details>
https://github.com/llvm/llvm-project/pull/151267
More information about the Mlir-commits
mailing list