[Mlir-commits] [mlir] [MLIR] Add two-phase region parsing API to OpAsmParser (PR #190545)

Henrich Lauko llvmlistbot at llvm.org
Sun Apr 5 11:26:33 PDT 2026


https://github.com/xlauko created https://github.com/llvm/llvm-project/pull/190545

None

>From 61550ef9e788a68b2d560cec73fe9501fb09b622 Mon Sep 17 00:00:00 2001
From: xlauko <xlauko at mail.muni.cz>
Date: Sun, 5 Apr 2026 20:22:58 +0200
Subject: [PATCH] [MLIR] Add two-phase region parsing API to OpAsmParser

---
 mlir/include/mlir/IR/OpImplementation.h      |  23 ++++
 mlir/lib/AsmParser/Parser.cpp                |  34 ++++++
 mlir/test/lib/Dialect/Test/TestOpsSyntax.cpp |  33 ++++++
 mlir/test/lib/Dialect/Test/TestOpsSyntax.td  |  35 ++++++
 mlir/test/mlir-tblgen/op-format.mlir         | 109 +++++++++++++++++++
 5 files changed, 234 insertions(+)

diff --git a/mlir/include/mlir/IR/OpImplementation.h b/mlir/include/mlir/IR/OpImplementation.h
index e66f149b6f812..42b1e3099b08a 100644
--- a/mlir/include/mlir/IR/OpImplementation.h
+++ b/mlir/include/mlir/IR/OpImplementation.h
@@ -1782,6 +1782,29 @@ class OpAsmParser : public AsmParser {
                       ArrayRef<Argument> arguments = {},
                       bool enableNameShadowing = false) = 0;
 
+  /// Store region entry arguments for later use by parseRegion or
+  /// parseOptionalRegion. This enables a two-phase region parsing pattern
+  /// where argument names are parsed before other constructs (e.g.,
+  /// attr-dict), then the region body is parsed in a separate step.
+  ///
+  /// Example ODS assembly format:
+  ///   custom<FuncHeader>($body) attr-dict-with-keyword
+  ///   custom<FuncBody>(ref($body))
+  ///
+  /// In the custom parser for FuncHeader:
+  ///   parser.parseArgumentList(args, Delimiter::Paren, /*allowType=*/true);
+  ///   parser.stashRegionArguments(region, args);
+  ///
+  /// In the custom parser for FuncBody:
+  ///   auto args = parser.takeRegionArguments(region);
+  ///   return parser.parseOptionalRegion(region, args);
+  virtual void stashRegionArguments(Region &region,
+                                    ArrayRef<Argument> arguments) = 0;
+
+  /// Retrieve and consume previously stashed region arguments.
+  /// Returns an empty vector if no arguments were stashed.
+  virtual SmallVector<Argument> takeRegionArguments(Region &region) = 0;
+
   //===--------------------------------------------------------------------===//
   // Successor Parsing
   //===--------------------------------------------------------------------===//
diff --git a/mlir/lib/AsmParser/Parser.cpp b/mlir/lib/AsmParser/Parser.cpp
index 06a3dfa65e933..9634aff1e8076 100644
--- a/mlir/lib/AsmParser/Parser.cpp
+++ b/mlir/lib/AsmParser/Parser.cpp
@@ -747,6 +747,24 @@ class OperationParser : public Parser {
                               ArrayRef<Argument> entryArguments,
                               bool isIsolatedNameScope);
 
+  /// Store region arguments for later retrieval (two-phase region parsing).
+  void stashRegionArguments(Region &region, ArrayRef<Argument> arguments) {
+    assert(!stashedRegionArgs.count(&region) &&
+           "region arguments already stashed; stashRegionArguments() called "
+           "twice for the same region");
+    stashedRegionArgs[&region] = SmallVector<Argument>(arguments);
+  }
+
+  /// Retrieve and consume previously stashed region arguments.
+  SmallVector<Argument> takeRegionArguments(Region &region) {
+    auto it = stashedRegionArgs.find(&region);
+    if (it == stashedRegionArgs.end())
+      return {};
+    auto args = std::move(it->second);
+    stashedRegionArgs.erase(it);
+    return args;
+  }
+
   //===--------------------------------------------------------------------===//
   // Block Parsing
   //===--------------------------------------------------------------------===//
@@ -860,6 +878,10 @@ class OperationParser : public Parser {
   /// their first reference, to allow checking for use of undefined values.
   DenseMap<Value, SMLoc> forwardRefPlaceholders;
 
+  /// Stashed region arguments for two-phase region parsing. Custom parsers
+  /// can stash arguments in one directive and retrieve them in another.
+  llvm::SmallDenseMap<Region *, SmallVector<Argument>, 1> stashedRegionArgs;
+
   /// Operations that define the placeholders. These are kept until the end of
   /// of the lifetime of the parser because some custom parsers may store
   /// references to them in local state and use them after forward references
@@ -893,6 +915,9 @@ OperationParser::OperationParser(ParserState &state, ModuleOp topLevelOp)
 }
 
 OperationParser::~OperationParser() {
+  assert(stashedRegionArgs.empty() &&
+         "all stashRegionArguments() calls must be paired with "
+         "takeRegionArguments()");
   for (Operation *op : forwardRefOps) {
     // Drop all uses of undefined forward declared reference and destroy
     // defining operation.
@@ -1945,6 +1970,15 @@ class CustomOpAsmParser : public AsmParserImpl<OpAsmParser> {
     return success();
   }
 
+  void stashRegionArguments(Region &region,
+                            ArrayRef<Argument> arguments) override {
+    parser.stashRegionArguments(region, arguments);
+  }
+
+  SmallVector<Argument> takeRegionArguments(Region &region) override {
+    return parser.takeRegionArguments(region);
+  }
+
   //===--------------------------------------------------------------------===//
   // Successor Parsing
   //===--------------------------------------------------------------------===//
diff --git a/mlir/test/lib/Dialect/Test/TestOpsSyntax.cpp b/mlir/test/lib/Dialect/Test/TestOpsSyntax.cpp
index 5880c2a2302b0..bfd8bbc1e43c5 100644
--- a/mlir/test/lib/Dialect/Test/TestOpsSyntax.cpp
+++ b/mlir/test/lib/Dialect/Test/TestOpsSyntax.cpp
@@ -153,6 +153,23 @@ static ParseResult parseCustomDirectiveOptionalOperandRef(
   bool expectedOptionalOperand = operandCount == 0;
   return success(expectedOptionalOperand != optOperand.has_value());
 }
+static ParseResult parseTwoPhaseRegionHeader(OpAsmParser &parser,
+                                             Region &region) {
+  SmallVector<OpAsmParser::Argument> args;
+  if (parser.parseArgumentList(args, OpAsmParser::Delimiter::Paren,
+                               /*allowType=*/true))
+    return failure();
+  parser.stashRegionArguments(region, args);
+  return success();
+}
+static ParseResult parseTwoPhaseRegionBody(OpAsmParser &parser,
+                                           Region &region) {
+  auto args = parser.takeRegionArguments(region);
+  auto result = parser.parseOptionalRegion(region, args);
+  if (result.has_value())
+    return *result;
+  return success();
+}
 
 //===----------------------------------------------------------------------===//
 // Printing
@@ -234,6 +251,22 @@ static void printCustomDirectiveOptionalOperandRef(OpAsmPrinter &printer,
                                                    Value optOperand) {
   printer << (optOperand ? "1" : "0");
 }
+static void printTwoPhaseRegionHeader(OpAsmPrinter &printer, Operation *op,
+                                      Region &region) {
+  printer << '(';
+  if (!region.empty()) {
+    llvm::interleaveComma(
+        region.front().getArguments(), printer,
+        [&](BlockArgument arg) { printer.printRegionArgument(arg); });
+  }
+  printer << ')';
+}
+static void printTwoPhaseRegionBody(OpAsmPrinter &printer, Operation *op,
+                                    Region &region) {
+  if (!region.empty())
+    printer.printRegion(region, /*printEntryBlockArgs=*/false,
+                        /*printBlockTerminators=*/false);
+}
 //===----------------------------------------------------------------------===//
 // Test parser.
 //===----------------------------------------------------------------------===//
diff --git a/mlir/test/lib/Dialect/Test/TestOpsSyntax.td b/mlir/test/lib/Dialect/Test/TestOpsSyntax.td
index 096d4d255b729..0693b50f2301b 100644
--- a/mlir/test/lib/Dialect/Test/TestOpsSyntax.td
+++ b/mlir/test/lib/Dialect/Test/TestOpsSyntax.td
@@ -552,6 +552,41 @@ def FormatCustomDirectiveRegions : TEST_Op<"format_custom_directive_regions"> {
   }];
 }
 
+def FormatTwoPhaseRegionOp
+    : TEST_Op<"format_two_phase_region_op", [IsolatedFromAbove, NoTerminator]> {
+  let regions = (region AnyRegion:$body);
+  let assemblyFormat = [{
+    custom<TwoPhaseRegionHeader>($body) attr-dict-with-keyword
+    custom<TwoPhaseRegionBody>(ref($body))
+  }];
+}
+
+def FormatTwoPhaseRegionWithOperandOp
+    : TEST_Op<"format_two_phase_region_with_operand_op", [IsolatedFromAbove,
+                                                          NoTerminator]> {
+  let arguments = (ins AnyType:$value);
+  let regions = (region AnyRegion:$body);
+  let assemblyFormat = [{
+    custom<TwoPhaseRegionHeader>($body)
+    `(` $value `:` type($value) `)`
+    attr-dict-with-keyword
+    custom<TwoPhaseRegionBody>(ref($body))
+  }];
+}
+
+def FormatTwoPhaseMultiRegionOp
+    : TEST_Op<"format_two_phase_multi_region_op", [IsolatedFromAbove,
+                                                   NoTerminator]> {
+  let regions = (region AnyRegion:$first, AnyRegion:$second);
+  let assemblyFormat = [{
+    custom<TwoPhaseRegionHeader>($first)
+    custom<TwoPhaseRegionHeader>($second)
+    attr-dict-with-keyword
+    custom<TwoPhaseRegionBody>(ref($first))
+    custom<TwoPhaseRegionBody>(ref($second))
+  }];
+}
+
 def FormatCustomDirectiveResults
     : TEST_Op<"format_custom_directive_results", [AttrSizedResultSegments]> {
   let results = (outs AnyType:$result, Optional<AnyType>:$optResult,
diff --git a/mlir/test/mlir-tblgen/op-format.mlir b/mlir/test/mlir-tblgen/op-format.mlir
index 7ff9091d5500d..a78ecf9c3e6c0 100644
--- a/mlir/test/mlir-tblgen/op-format.mlir
+++ b/mlir/test/mlir-tblgen/op-format.mlir
@@ -413,6 +413,115 @@ test.format_custom_directive_regions {
   "test.return"() : () -> ()
 }
 
+// CHECK: test.format_two_phase_region_op (%[[A:.*]]: i32, %[[B:.*]]: f64) {
+// CHECK-NEXT: }
+test.format_two_phase_region_op (%arg0 : i32, %arg1 : f64) {
+}
+
+// Declaration (no body) - region is empty, args are not preserved.
+// CHECK: test.format_two_phase_region_op ()
+test.format_two_phase_region_op (%arg0 : i32)
+
+// CHECK: test.format_two_phase_region_op ()
+test.format_two_phase_region_op ()
+
+// Empty body with no args round-trips to no body (NoTerminator).
+// CHECK: test.format_two_phase_region_op ()
+test.format_two_phase_region_op () {
+}
+
+// CHECK: test.format_two_phase_region_op (%{{.*}}: i32) attributes {foo = "bar"} {
+// CHECK-NEXT: }
+test.format_two_phase_region_op (%arg0 : i32) attributes {foo = "bar"} {
+}
+
+// CHECK: test.format_two_phase_region_op (%[[C:.*]]: i32, %[[D:.*]]: f64) {
+// CHECK-NEXT:   "test.op"(%[[C]], %[[D]]) : (i32, f64) -> ()
+// CHECK-NEXT: }
+test.format_two_phase_region_op (%arg0 : i32, %arg1 : f64) {
+  "test.op"(%arg0, %arg1) : (i32, f64) -> ()
+}
+
+// Multiple attributes between header and body.
+// CHECK: test.format_two_phase_region_op (%{{.*}}: i32) attributes {baz = 42 : i64, foo = "bar"} {
+// CHECK-NEXT: }
+test.format_two_phase_region_op (%arg0 : i32) attributes {foo = "bar", baz = 42 : i64} {
+}
+
+// Multiple blocks in the region.
+// CHECK: test.format_two_phase_region_op (%[[E:.*]]: i32) {
+// CHECK-NEXT:   "test.op"(%[[E]]) : (i32) -> ()
+// CHECK-NEXT: ^bb1:
+// CHECK-NEXT:   "test.op2"() : () -> ()
+// CHECK-NEXT: }
+test.format_two_phase_region_op (%arg0 : i32) {
+  "test.op"(%arg0) : (i32) -> ()
+^bb1:
+  "test.op2"() : () -> ()
+}
+
+// Nested operation using two-phase region parser.
+// CHECK: test.format_two_phase_region_op (%[[F:.*]]: i32) {
+// CHECK-NEXT:   test.format_two_phase_region_op (%[[G:.*]]: f64) {
+// CHECK-NEXT:     "test.op"(%[[G]]) : (f64) -> ()
+// CHECK-NEXT:   }
+// CHECK-NEXT: }
+test.format_two_phase_region_op (%arg0 : i32) {
+  test.format_two_phase_region_op (%arg1 : f64) {
+    "test.op"(%arg1) : (f64) -> ()
+  }
+}
+
+// Operand and type between header and body.
+// CHECK: %[[V:.*]] = "test.op"() : () -> i32
+// CHECK: test.format_two_phase_region_with_operand_op (%[[A:.*]]: f64)(%[[V]] : i32) attributes {tag = "hello"} {
+// CHECK-NEXT:   "test.use"(%[[A]]) : (f64) -> ()
+// CHECK-NEXT: }
+%tpr_v = "test.op"() : () -> i32
+test.format_two_phase_region_with_operand_op (%arg0 : f64) (%tpr_v : i32) attributes {tag = "hello"} {
+  "test.use"(%arg0) : (f64) -> ()
+}
+
+// Operand between header and body, no region body (declaration).
+// CHECK: test.format_two_phase_region_with_operand_op ()(%[[V]] : i32)
+test.format_two_phase_region_with_operand_op (%arg0 : f64) (%tpr_v : i32)
+
+// Multi-region two-phase parsing: both regions have args and bodies.
+// CHECK: test.format_two_phase_multi_region_op (%[[MR1:.*]]: i32) (%[[MR2:.*]]: f64) {
+// CHECK-NEXT:   "test.op"(%[[MR1]]) : (i32) -> ()
+// CHECK-NEXT: } {
+// CHECK-NEXT:   "test.op"(%[[MR2]]) : (f64) -> ()
+// CHECK-NEXT: }
+test.format_two_phase_multi_region_op (%arg0 : i32)(%arg1 : f64) {
+  "test.op"(%arg0) : (i32) -> ()
+} {
+  "test.op"(%arg1) : (f64) -> ()
+}
+
+// Multi-region: no bodies (declaration).
+// CHECK: test.format_two_phase_multi_region_op () ()
+test.format_two_phase_multi_region_op (%arg0 : i32)(%arg1 : f64)
+
+// Multi-region: only first region has a body.
+// CHECK: test.format_two_phase_multi_region_op (%[[MR3:.*]]: i32) () {
+// CHECK-NEXT:   "test.op"(%[[MR3]]) : (i32) -> ()
+// CHECK-NEXT: }
+test.format_two_phase_multi_region_op (%arg0 : i32)(%arg1 : f64) {
+  "test.op"(%arg0) : (i32) -> ()
+}
+
+// Multi-region with attributes between header and body.
+// CHECK: test.format_two_phase_multi_region_op (%[[MR4:.*]]: i32) (%[[MR5:.*]]: f64) attributes {key = "val"} {
+// CHECK-NEXT:   "test.op"(%[[MR4]]) : (i32) -> ()
+// CHECK-NEXT: } {
+// CHECK-NEXT:   "test.op"(%[[MR5]]) : (f64) -> ()
+// CHECK-NEXT: }
+test.format_two_phase_multi_region_op (%arg0 : i32)(%arg1 : f64) attributes {key = "val"} {
+  "test.op"(%arg0) : (i32) -> ()
+} {
+  "test.op"(%arg1) : (f64) -> ()
+}
+
 // CHECK: test.format_custom_directive_results : i64, i64 -> (i64)
 test.format_custom_directive_results : i64, i64 -> (i64)
 



More information about the Mlir-commits mailing list