[Mlir-commits] [mlir] [MLIR] Add two-phase region parsing API to OpAsmParser (PR #190545)
Henrich Lauko
llvmlistbot at llvm.org
Sun Apr 5 11:26:33 PDT 2026
https://github.com/xlauko created https://github.com/llvm/llvm-project/pull/190545
None
>From 61550ef9e788a68b2d560cec73fe9501fb09b622 Mon Sep 17 00:00:00 2001
From: xlauko <xlauko at mail.muni.cz>
Date: Sun, 5 Apr 2026 20:22:58 +0200
Subject: [PATCH] [MLIR] Add two-phase region parsing API to OpAsmParser
---
mlir/include/mlir/IR/OpImplementation.h | 23 ++++
mlir/lib/AsmParser/Parser.cpp | 34 ++++++
mlir/test/lib/Dialect/Test/TestOpsSyntax.cpp | 33 ++++++
mlir/test/lib/Dialect/Test/TestOpsSyntax.td | 35 ++++++
mlir/test/mlir-tblgen/op-format.mlir | 109 +++++++++++++++++++
5 files changed, 234 insertions(+)
diff --git a/mlir/include/mlir/IR/OpImplementation.h b/mlir/include/mlir/IR/OpImplementation.h
index e66f149b6f812..42b1e3099b08a 100644
--- a/mlir/include/mlir/IR/OpImplementation.h
+++ b/mlir/include/mlir/IR/OpImplementation.h
@@ -1782,6 +1782,29 @@ class OpAsmParser : public AsmParser {
ArrayRef<Argument> arguments = {},
bool enableNameShadowing = false) = 0;
+ /// Store region entry arguments for later use by parseRegion or
+ /// parseOptionalRegion. This enables a two-phase region parsing pattern
+ /// where argument names are parsed before other constructs (e.g.,
+ /// attr-dict), then the region body is parsed in a separate step.
+ ///
+ /// Example ODS assembly format:
+ /// custom<FuncHeader>($body) attr-dict-with-keyword
+ /// custom<FuncBody>(ref($body))
+ ///
+ /// In the custom parser for FuncHeader:
+ /// parser.parseArgumentList(args, Delimiter::Paren, /*allowType=*/true);
+ /// parser.stashRegionArguments(region, args);
+ ///
+ /// In the custom parser for FuncBody:
+ /// auto args = parser.takeRegionArguments(region);
+ /// return parser.parseOptionalRegion(region, args);
+ virtual void stashRegionArguments(Region ®ion,
+ ArrayRef<Argument> arguments) = 0;
+
+ /// Retrieve and consume previously stashed region arguments.
+ /// Returns an empty vector if no arguments were stashed.
+ virtual SmallVector<Argument> takeRegionArguments(Region ®ion) = 0;
+
//===--------------------------------------------------------------------===//
// Successor Parsing
//===--------------------------------------------------------------------===//
diff --git a/mlir/lib/AsmParser/Parser.cpp b/mlir/lib/AsmParser/Parser.cpp
index 06a3dfa65e933..9634aff1e8076 100644
--- a/mlir/lib/AsmParser/Parser.cpp
+++ b/mlir/lib/AsmParser/Parser.cpp
@@ -747,6 +747,24 @@ class OperationParser : public Parser {
ArrayRef<Argument> entryArguments,
bool isIsolatedNameScope);
+ /// Store region arguments for later retrieval (two-phase region parsing).
+ void stashRegionArguments(Region ®ion, ArrayRef<Argument> arguments) {
+ assert(!stashedRegionArgs.count(®ion) &&
+ "region arguments already stashed; stashRegionArguments() called "
+ "twice for the same region");
+ stashedRegionArgs[®ion] = SmallVector<Argument>(arguments);
+ }
+
+ /// Retrieve and consume previously stashed region arguments.
+ SmallVector<Argument> takeRegionArguments(Region ®ion) {
+ auto it = stashedRegionArgs.find(®ion);
+ if (it == stashedRegionArgs.end())
+ return {};
+ auto args = std::move(it->second);
+ stashedRegionArgs.erase(it);
+ return args;
+ }
+
//===--------------------------------------------------------------------===//
// Block Parsing
//===--------------------------------------------------------------------===//
@@ -860,6 +878,10 @@ class OperationParser : public Parser {
/// their first reference, to allow checking for use of undefined values.
DenseMap<Value, SMLoc> forwardRefPlaceholders;
+ /// Stashed region arguments for two-phase region parsing. Custom parsers
+ /// can stash arguments in one directive and retrieve them in another.
+ llvm::SmallDenseMap<Region *, SmallVector<Argument>, 1> stashedRegionArgs;
+
/// Operations that define the placeholders. These are kept until the end of
/// of the lifetime of the parser because some custom parsers may store
/// references to them in local state and use them after forward references
@@ -893,6 +915,9 @@ OperationParser::OperationParser(ParserState &state, ModuleOp topLevelOp)
}
OperationParser::~OperationParser() {
+ assert(stashedRegionArgs.empty() &&
+ "all stashRegionArguments() calls must be paired with "
+ "takeRegionArguments()");
for (Operation *op : forwardRefOps) {
// Drop all uses of undefined forward declared reference and destroy
// defining operation.
@@ -1945,6 +1970,15 @@ class CustomOpAsmParser : public AsmParserImpl<OpAsmParser> {
return success();
}
+ void stashRegionArguments(Region ®ion,
+ ArrayRef<Argument> arguments) override {
+ parser.stashRegionArguments(region, arguments);
+ }
+
+ SmallVector<Argument> takeRegionArguments(Region ®ion) override {
+ return parser.takeRegionArguments(region);
+ }
+
//===--------------------------------------------------------------------===//
// Successor Parsing
//===--------------------------------------------------------------------===//
diff --git a/mlir/test/lib/Dialect/Test/TestOpsSyntax.cpp b/mlir/test/lib/Dialect/Test/TestOpsSyntax.cpp
index 5880c2a2302b0..bfd8bbc1e43c5 100644
--- a/mlir/test/lib/Dialect/Test/TestOpsSyntax.cpp
+++ b/mlir/test/lib/Dialect/Test/TestOpsSyntax.cpp
@@ -153,6 +153,23 @@ static ParseResult parseCustomDirectiveOptionalOperandRef(
bool expectedOptionalOperand = operandCount == 0;
return success(expectedOptionalOperand != optOperand.has_value());
}
+static ParseResult parseTwoPhaseRegionHeader(OpAsmParser &parser,
+ Region ®ion) {
+ SmallVector<OpAsmParser::Argument> args;
+ if (parser.parseArgumentList(args, OpAsmParser::Delimiter::Paren,
+ /*allowType=*/true))
+ return failure();
+ parser.stashRegionArguments(region, args);
+ return success();
+}
+static ParseResult parseTwoPhaseRegionBody(OpAsmParser &parser,
+ Region ®ion) {
+ auto args = parser.takeRegionArguments(region);
+ auto result = parser.parseOptionalRegion(region, args);
+ if (result.has_value())
+ return *result;
+ return success();
+}
//===----------------------------------------------------------------------===//
// Printing
@@ -234,6 +251,22 @@ static void printCustomDirectiveOptionalOperandRef(OpAsmPrinter &printer,
Value optOperand) {
printer << (optOperand ? "1" : "0");
}
+static void printTwoPhaseRegionHeader(OpAsmPrinter &printer, Operation *op,
+ Region ®ion) {
+ printer << '(';
+ if (!region.empty()) {
+ llvm::interleaveComma(
+ region.front().getArguments(), printer,
+ [&](BlockArgument arg) { printer.printRegionArgument(arg); });
+ }
+ printer << ')';
+}
+static void printTwoPhaseRegionBody(OpAsmPrinter &printer, Operation *op,
+ Region ®ion) {
+ if (!region.empty())
+ printer.printRegion(region, /*printEntryBlockArgs=*/false,
+ /*printBlockTerminators=*/false);
+}
//===----------------------------------------------------------------------===//
// Test parser.
//===----------------------------------------------------------------------===//
diff --git a/mlir/test/lib/Dialect/Test/TestOpsSyntax.td b/mlir/test/lib/Dialect/Test/TestOpsSyntax.td
index 096d4d255b729..0693b50f2301b 100644
--- a/mlir/test/lib/Dialect/Test/TestOpsSyntax.td
+++ b/mlir/test/lib/Dialect/Test/TestOpsSyntax.td
@@ -552,6 +552,41 @@ def FormatCustomDirectiveRegions : TEST_Op<"format_custom_directive_regions"> {
}];
}
+def FormatTwoPhaseRegionOp
+ : TEST_Op<"format_two_phase_region_op", [IsolatedFromAbove, NoTerminator]> {
+ let regions = (region AnyRegion:$body);
+ let assemblyFormat = [{
+ custom<TwoPhaseRegionHeader>($body) attr-dict-with-keyword
+ custom<TwoPhaseRegionBody>(ref($body))
+ }];
+}
+
+def FormatTwoPhaseRegionWithOperandOp
+ : TEST_Op<"format_two_phase_region_with_operand_op", [IsolatedFromAbove,
+ NoTerminator]> {
+ let arguments = (ins AnyType:$value);
+ let regions = (region AnyRegion:$body);
+ let assemblyFormat = [{
+ custom<TwoPhaseRegionHeader>($body)
+ `(` $value `:` type($value) `)`
+ attr-dict-with-keyword
+ custom<TwoPhaseRegionBody>(ref($body))
+ }];
+}
+
+def FormatTwoPhaseMultiRegionOp
+ : TEST_Op<"format_two_phase_multi_region_op", [IsolatedFromAbove,
+ NoTerminator]> {
+ let regions = (region AnyRegion:$first, AnyRegion:$second);
+ let assemblyFormat = [{
+ custom<TwoPhaseRegionHeader>($first)
+ custom<TwoPhaseRegionHeader>($second)
+ attr-dict-with-keyword
+ custom<TwoPhaseRegionBody>(ref($first))
+ custom<TwoPhaseRegionBody>(ref($second))
+ }];
+}
+
def FormatCustomDirectiveResults
: TEST_Op<"format_custom_directive_results", [AttrSizedResultSegments]> {
let results = (outs AnyType:$result, Optional<AnyType>:$optResult,
diff --git a/mlir/test/mlir-tblgen/op-format.mlir b/mlir/test/mlir-tblgen/op-format.mlir
index 7ff9091d5500d..a78ecf9c3e6c0 100644
--- a/mlir/test/mlir-tblgen/op-format.mlir
+++ b/mlir/test/mlir-tblgen/op-format.mlir
@@ -413,6 +413,115 @@ test.format_custom_directive_regions {
"test.return"() : () -> ()
}
+// CHECK: test.format_two_phase_region_op (%[[A:.*]]: i32, %[[B:.*]]: f64) {
+// CHECK-NEXT: }
+test.format_two_phase_region_op (%arg0 : i32, %arg1 : f64) {
+}
+
+// Declaration (no body) - region is empty, args are not preserved.
+// CHECK: test.format_two_phase_region_op ()
+test.format_two_phase_region_op (%arg0 : i32)
+
+// CHECK: test.format_two_phase_region_op ()
+test.format_two_phase_region_op ()
+
+// Empty body with no args round-trips to no body (NoTerminator).
+// CHECK: test.format_two_phase_region_op ()
+test.format_two_phase_region_op () {
+}
+
+// CHECK: test.format_two_phase_region_op (%{{.*}}: i32) attributes {foo = "bar"} {
+// CHECK-NEXT: }
+test.format_two_phase_region_op (%arg0 : i32) attributes {foo = "bar"} {
+}
+
+// CHECK: test.format_two_phase_region_op (%[[C:.*]]: i32, %[[D:.*]]: f64) {
+// CHECK-NEXT: "test.op"(%[[C]], %[[D]]) : (i32, f64) -> ()
+// CHECK-NEXT: }
+test.format_two_phase_region_op (%arg0 : i32, %arg1 : f64) {
+ "test.op"(%arg0, %arg1) : (i32, f64) -> ()
+}
+
+// Multiple attributes between header and body.
+// CHECK: test.format_two_phase_region_op (%{{.*}}: i32) attributes {baz = 42 : i64, foo = "bar"} {
+// CHECK-NEXT: }
+test.format_two_phase_region_op (%arg0 : i32) attributes {foo = "bar", baz = 42 : i64} {
+}
+
+// Multiple blocks in the region.
+// CHECK: test.format_two_phase_region_op (%[[E:.*]]: i32) {
+// CHECK-NEXT: "test.op"(%[[E]]) : (i32) -> ()
+// CHECK-NEXT: ^bb1:
+// CHECK-NEXT: "test.op2"() : () -> ()
+// CHECK-NEXT: }
+test.format_two_phase_region_op (%arg0 : i32) {
+ "test.op"(%arg0) : (i32) -> ()
+^bb1:
+ "test.op2"() : () -> ()
+}
+
+// Nested operation using two-phase region parser.
+// CHECK: test.format_two_phase_region_op (%[[F:.*]]: i32) {
+// CHECK-NEXT: test.format_two_phase_region_op (%[[G:.*]]: f64) {
+// CHECK-NEXT: "test.op"(%[[G]]) : (f64) -> ()
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+test.format_two_phase_region_op (%arg0 : i32) {
+ test.format_two_phase_region_op (%arg1 : f64) {
+ "test.op"(%arg1) : (f64) -> ()
+ }
+}
+
+// Operand and type between header and body.
+// CHECK: %[[V:.*]] = "test.op"() : () -> i32
+// CHECK: test.format_two_phase_region_with_operand_op (%[[A:.*]]: f64)(%[[V]] : i32) attributes {tag = "hello"} {
+// CHECK-NEXT: "test.use"(%[[A]]) : (f64) -> ()
+// CHECK-NEXT: }
+%tpr_v = "test.op"() : () -> i32
+test.format_two_phase_region_with_operand_op (%arg0 : f64) (%tpr_v : i32) attributes {tag = "hello"} {
+ "test.use"(%arg0) : (f64) -> ()
+}
+
+// Operand between header and body, no region body (declaration).
+// CHECK: test.format_two_phase_region_with_operand_op ()(%[[V]] : i32)
+test.format_two_phase_region_with_operand_op (%arg0 : f64) (%tpr_v : i32)
+
+// Multi-region two-phase parsing: both regions have args and bodies.
+// CHECK: test.format_two_phase_multi_region_op (%[[MR1:.*]]: i32) (%[[MR2:.*]]: f64) {
+// CHECK-NEXT: "test.op"(%[[MR1]]) : (i32) -> ()
+// CHECK-NEXT: } {
+// CHECK-NEXT: "test.op"(%[[MR2]]) : (f64) -> ()
+// CHECK-NEXT: }
+test.format_two_phase_multi_region_op (%arg0 : i32)(%arg1 : f64) {
+ "test.op"(%arg0) : (i32) -> ()
+} {
+ "test.op"(%arg1) : (f64) -> ()
+}
+
+// Multi-region: no bodies (declaration).
+// CHECK: test.format_two_phase_multi_region_op () ()
+test.format_two_phase_multi_region_op (%arg0 : i32)(%arg1 : f64)
+
+// Multi-region: only first region has a body.
+// CHECK: test.format_two_phase_multi_region_op (%[[MR3:.*]]: i32) () {
+// CHECK-NEXT: "test.op"(%[[MR3]]) : (i32) -> ()
+// CHECK-NEXT: }
+test.format_two_phase_multi_region_op (%arg0 : i32)(%arg1 : f64) {
+ "test.op"(%arg0) : (i32) -> ()
+}
+
+// Multi-region with attributes between header and body.
+// CHECK: test.format_two_phase_multi_region_op (%[[MR4:.*]]: i32) (%[[MR5:.*]]: f64) attributes {key = "val"} {
+// CHECK-NEXT: "test.op"(%[[MR4]]) : (i32) -> ()
+// CHECK-NEXT: } {
+// CHECK-NEXT: "test.op"(%[[MR5]]) : (f64) -> ()
+// CHECK-NEXT: }
+test.format_two_phase_multi_region_op (%arg0 : i32)(%arg1 : f64) attributes {key = "val"} {
+ "test.op"(%arg0) : (i32) -> ()
+} {
+ "test.op"(%arg1) : (f64) -> ()
+}
+
// CHECK: test.format_custom_directive_results : i64, i64 -> (i64)
test.format_custom_directive_results : i64, i64 -> (i64)
More information about the Mlir-commits
mailing list