[Mlir-commits] [mlir] [mlir][TD] Fix the order of return handles (PR #76929)

Thu Jan 4 01:50:38 PST 2024

https://github.com/banach-space updated https://github.com/llvm/llvm-project/pull/76929

>From ace1ed950ef69859dfdc61af24253ad1ed71196f Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <andrzej.warzynski at arm.com>
Date: Thu, 4 Jan 2024 09:11:08 +0000
Subject: [PATCH] [mlir][TD] Fix the order of return handles

Replace (in tests and docs):

  %forall, %tiled = transform.structured.tile_using_forall

with:

  %tiled, %forall = transform.structured.tile_using_forall

Applies similar change to (in the TD tutorial):

  transform.structured.fuse_into_containing_op

This update makes sure that the tests/documentation are consistent with
the Op specifications. Follow-up for #67320 which updated the order of
the return handles for `tile_using_forall`.
---
 mlir/docs/Tutorials/transform/Ch1.md             | 16 ++++++++--------
 .../Linalg/TransformOps/LinalgTransformOps.td    |  4 ++--
 mlir/test/Dialect/GPU/transform-gpu-failing.mlir |  2 +-
 mlir/test/Dialect/Linalg/tile-to-forall.mlir     |  4 ++--
 .../Examples/transform/Ch1/invalidation-1.mlir   |  4 ++--
 5 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/mlir/docs/Tutorials/transform/Ch1.md b/mlir/docs/Tutorials/transform/Ch1.md
index 95b37eb6ca4130..a44e93f65c531a 100644
--- a/mlir/docs/Tutorials/transform/Ch1.md
+++ b/mlir/docs/Tutorials/transform/Ch1.md
@@ -261,7 +261,7 @@ transform.sequence failures(propagate) {
 
   // The actual tiling transformation takes tile sizes as attributes. It
   // produces a handle to the loop generated during tiling.
-  %loop, %tiled_max =
+  %tiled_max, %loop =
       transform.structured.tile_using_forall %max tile_sizes [8, 32]
         : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
 
@@ -271,12 +271,12 @@ transform.sequence failures(propagate) {
   // important. We could also use "transform.merge_handles" to obtain a single
   // handle to all operations and give it to `fuse_into_containing_op` that
   // would take care of the ordering in this case.
-  %loop_0, %tiled_add =
+  %add_fused, %loop_0 =
       transform.structured.fuse_into_containing_op %add into %loop
         : (!transform.any_op, !transform.any_op)
           -> (!transform.any_op, !transform.any_op)
-  %loop_1, %tiled_matmul =
-      transform.structured.fuse_into_containing_op %arg1 into %loop
+  %matmul_fused, %loop_1 =
+      transform.structured.fuse_into_containing_op %arg1 into %loop_0
         : (!transform.op<"linalg.matmul">, !transform.any_op)
           -> (!transform.any_op, !transform.any_op)
 
@@ -304,7 +304,7 @@ transform.sequence failures(propagate) {
 
   // The actual tiling transformation takes tile sizes as attributes. It
   // produces a handle to the loop generated during tiling.
-  %loop, %tiled = transform.structured.tile_using_forall %max tile_sizes [8, 32]
+  %tiled, %loop  = transform.structured.tile_using_forall %max tile_sizes [8, 32]
       : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
 
   // We can now fuse the other operations into the loop. Here, we fuse
@@ -318,7 +318,7 @@ transform.sequence failures(propagate) {
         : (!transform.any_op, !transform.any_op)
           -> (!transform.any_op, !transform.any_op)
   %matmul_fused, %loop_1 =
-      transform.structured.fuse_into_containing_op %arg1 into %loop
+      transform.structured.fuse_into_containing_op %arg1 into %loop_0
         : (!transform.op<"linalg.matmul">, !transform.any_op)
           -> (!transform.any_op, !transform.any_op)
 
@@ -327,7 +327,7 @@ transform.sequence failures(propagate) {
   // "max" operation. This illustrates the precise targeting with the transform
   // dialect. Otherwise, it is difficult to differentiate "add" and "max", both
   // of which having the same kind.
-  %loop_2, %tiled_2 =
+  %tiled_2, %loop_2 =
       transform.structured.tile_using_forall %add_fused tile_sizes [4, 4]
         : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
   %matmul_fused_2, %loop_3 =
@@ -338,7 +338,7 @@ transform.sequence failures(propagate) {
   // Since outlining is currently only implemented for region-holding operations
   // such as loops, use tiling to size 1 to materialize the outer loop that is
   // going to be outlined.
-  %outline_target, %_ =
+  %_, %outline_target =
       transform.structured.tile_using_forall %tiled_2 tile_sizes [1]
         : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
   transform.structured.fuse_into_containing_op %matmul_fused_2
diff --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
index 77ed9db5e71bd1..bc257d17483e3b 100644
--- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
@@ -1911,8 +1911,8 @@ def TileUsingForallOp :
     tiled operations, which can all be empty.
 
     These two returned handles point to:
-      - the new scf.forall op,
-      - the tiled op that implements TilingInterface.
+      - the tiled op that implements TilingInterface,
+      - the new scf.forall op.
 
     #### Example using `num_threads`
 
diff --git a/mlir/test/Dialect/GPU/transform-gpu-failing.mlir b/mlir/test/Dialect/GPU/transform-gpu-failing.mlir
index f81f8b64afdfc6..8d7a1aa2a55fca 100644
--- a/mlir/test/Dialect/GPU/transform-gpu-failing.mlir
+++ b/mlir/test/Dialect/GPU/transform-gpu-failing.mlir
@@ -144,7 +144,7 @@ func.func @map_nested_forall_to_threads_not_buffer(%x: tensor<32x32xf32>, %y: te
 module attributes {transform.with_named_sequence} {
   transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
     %matmul = transform.structured.match ops{["linalg.matmul"]} in %arg0 : (!transform.any_op) -> !transform.any_op
-    %forall, %tiled = transform.structured.tile_using_forall %matmul num_threads [2, 3, 1] (mapping = [ #gpu.thread<y>, #gpu.thread<x>, #gpu.thread<z> ] )
+    %tiled, %forall = transform.structured.tile_using_forall %matmul num_threads [2, 3, 1] (mapping = [ #gpu.thread<y>, #gpu.thread<x>, #gpu.thread<z> ] )
       : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
     %funcop = transform.structured.match ops{["gpu.launch"]} in %arg0 : (!transform.any_op) -> !transform.any_op
     // expected-error @below {{only bufferized scf.forall can be mapped}}
diff --git a/mlir/test/Dialect/Linalg/tile-to-forall.mlir b/mlir/test/Dialect/Linalg/tile-to-forall.mlir
index 38742028e48101..2192d160b1150f 100644
--- a/mlir/test/Dialect/Linalg/tile-to-forall.mlir
+++ b/mlir/test/Dialect/Linalg/tile-to-forall.mlir
@@ -389,7 +389,7 @@ module attributes {transform.with_named_sequence} {
   module attributes {transform.with_named_sequence} {
     transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
       %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op
-      %forall, %tiled_generic = transform.structured.tile_using_forall %0 num_threads [7]
+      %tiled_generic, %forall = transform.structured.tile_using_forall %0 num_threads [7]
            : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
       transform.yield
     }
@@ -445,7 +445,7 @@ module attributes {transform.with_named_sequence} {
   module attributes {transform.with_named_sequence} {
     transform.named_sequence @__transform_main(%IN_MAT2: !transform.any_op {transform.readonly}) {
       %0 = transform.structured.match ops{["linalg.generic"]} in %IN_MAT2 : (!transform.any_op) -> !transform.any_op
-      %forall, %tiled_generic = transform.structured.tile_using_forall %0 num_threads [4]
+      %tiled_generic, %forall = transform.structured.tile_using_forall %0 num_threads [4]
            : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
       transform.yield
     }
diff --git a/mlir/test/Examples/transform/Ch1/invalidation-1.mlir b/mlir/test/Examples/transform/Ch1/invalidation-1.mlir
index 1b71cfbe9b3607..825e2abe48229f 100644
--- a/mlir/test/Examples/transform/Ch1/invalidation-1.mlir
+++ b/mlir/test/Examples/transform/Ch1/invalidation-1.mlir
@@ -19,7 +19,7 @@ transform.sequence failures(propagate) {
      %arg2: !transform.op<"linalg.elemwise_binary">):
   // The actual tiling transformation takes tile sizes as attributes.
   // expected-note @below {{invalidated by this transform op that consumes its operand #0 and invalidates all handles to payload IR entities associated with this operand and entities nested in them}}
-  %loop, %tiled = transform.structured.tile_using_forall %arg1 tile_sizes [4, 32]
+  %tiled, %loop = transform.structured.tile_using_forall %arg1 tile_sizes [4, 32]
       : (!transform.op<"linalg.matmul">) -> (!transform.any_op, !transform.any_op)
 
   // This is trying to use an invalidated handle leading to undefined behavior.
@@ -64,7 +64,7 @@ transform.sequence failures(propagate) {
 
   // The actual tiling transformation takes tile sizes as attributes.
   // expected-note @below {{invalidated by this transform op that consumes its operand #0 and invalidates all handles to payload IR entities associated with this operand and entities nested in them}}
-  %loop, %tiled = transform.structured.tile_using_forall %arg1 tile_sizes [4, 32]
+  %tiled, %loop = transform.structured.tile_using_forall %arg1 tile_sizes [4, 32]
     : (!transform.op<"linalg.matmul">) -> (!transform.any_op, !transform.any_op)
 
   // Consuming an operand invalidates the consumed handle and any other handle that is