[Mlir-commits] [mlir] [mlir][linalg][elementwise] Add tests for tiling elementwise operation. (PR #192798)

Sat Apr 18 10:08:38 PDT 2026

https://github.com/javedabsar1 updated https://github.com/llvm/llvm-project/pull/192798

>From 39cf0e35f16e6924d4415367401a549cb4b685fc Mon Sep 17 00:00:00 2001
From: Javed Absar <javed.absar at gmail.com>
Date: Sat, 18 Apr 2026 12:33:56 -0400
Subject: [PATCH] [mlir][linalg][elementwise] Add tests for tiling elementwise
 operation.

Adding missing tests for show-casing tiling of `linalg.elementwise` op.
As `linalg.elementwise` is a structured op, the tiling interface is
already implemented.

Signed-off-by: Javed Absar <javed.absar at gmail.com>
---
 .../TilingInterface/tile-elementwise.mlir     | 265 ++++++++++++++++++
 1 file changed, 265 insertions(+)
 create mode 100644 mlir/test/Interfaces/TilingInterface/tile-elementwise.mlir

diff --git a/mlir/test/Interfaces/TilingInterface/tile-elementwise.mlir b/mlir/test/Interfaces/TilingInterface/tile-elementwise.mlir
new file mode 100644
index 0000000000000..dd574e9be6b91
--- /dev/null
+++ b/mlir/test/Interfaces/TilingInterface/tile-elementwise.mlir
@@ -0,0 +1,265 @@
+// RUN: mlir-opt --transform-interpreter --cse -split-input-file %s | FileCheck %s
+
+// Static binary add, tile both dims.
+
+func.func @tile_elementwise(%A: tensor<128x256xf32>, %B: tensor<128x256xf32>,
+                            %C: tensor<128x256xf32>) -> tensor<128x256xf32> {
+  %r = linalg.elementwise kind=#linalg.elementwise_kind<add>
+      ins(%A, %B : tensor<128x256xf32>, tensor<128x256xf32>)
+      outs(%C : tensor<128x256xf32>) -> tensor<128x256xf32>
+  return %r : tensor<128x256xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%root : !transform.any_op {transform.readonly}) {
+    %op = transform.structured.match ops{["linalg.elementwise"]} in %root
+      : (!transform.any_op) -> !transform.any_op
+    %tiled, %loop0, %loop1 = transform.structured.tile_using_for %op tile_sizes [32, 64]
+      : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
+    transform.yield
+  }
+}
+
+// CHECK-LABEL: func.func @tile_elementwise(
+// CHECK-SAME:    %[[A:[a-zA-Z0-9]+]]: tensor<128x256xf32>
+// CHECK-SAME:    %[[B:[a-zA-Z0-9]+]]: tensor<128x256xf32>
+// CHECK-SAME:    %[[C:[a-zA-Z0-9]+]]: tensor<128x256xf32>
+//  CHECK-DAG:   %[[C0:.+]] = arith.constant 0 : index
+//  CHECK-DAG:   %[[C32:.+]] = arith.constant 32 : index
+//  CHECK-DAG:   %[[C64:.+]] = arith.constant 64 : index
+//  CHECK-DAG:   %[[C128:.+]] = arith.constant 128 : index
+//  CHECK-DAG:   %[[C256:.+]] = arith.constant 256 : index
+//      CHECK:   scf.for %[[IV0:.+]] = %[[C0]] to %[[C128]] step %[[C32]]
+// CHECK-SAME:       iter_args(%[[INIT0:.+]] = %[[C]])
+//      CHECK:     scf.for %[[IV1:.+]] = %[[C0]] to %[[C256]] step %[[C64]]
+// CHECK-SAME:         iter_args(%[[INIT1:.+]] = %[[INIT0]])
+//  CHECK-DAG:       %[[AT:.+]] = tensor.extract_slice %[[A]][%[[IV0]], %[[IV1]]] [32, 64] [1, 1]
+//  CHECK-DAG:       %[[BT:.+]] = tensor.extract_slice %[[B]][%[[IV0]], %[[IV1]]] [32, 64] [1, 1]
+//  CHECK-DAG:       %[[CT:.+]] = tensor.extract_slice %[[INIT1]][%[[IV0]], %[[IV1]]] [32, 64] [1, 1]
+//      CHECK:       %[[TILED:.+]] = linalg.elementwise kind=#linalg.elementwise_kind<add>
+// CHECK-SAME:           ins(%[[AT]], %[[BT]] :
+// CHECK-SAME:           outs(%[[CT]] :
+//      CHECK:       %[[INS:.+]] = tensor.insert_slice %[[TILED]] into %[[INIT1]]
+// CHECK-SAME:           [%[[IV0]], %[[IV1]]] [32, 64] [1, 1]
+//      CHECK:       scf.yield %[[INS]]
+//      CHECK:     scf.yield
+
+// -----
+
+// Dynamic binary add.
+
+func.func @tile_elementwise_dynamic(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>,
+                                    %C: tensor<?x?xf32>) -> tensor<?x?xf32> {
+  %r = linalg.elementwise kind=#linalg.elementwise_kind<add>
+      ins(%A, %B : tensor<?x?xf32>, tensor<?x?xf32>)
+      outs(%C : tensor<?x?xf32>) -> tensor<?x?xf32>
+  return %r : tensor<?x?xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%root : !transform.any_op {transform.readonly}) {
+    %op = transform.structured.match ops{["linalg.elementwise"]} in %root
+      : (!transform.any_op) -> !transform.any_op
+    %tiled, %loop0, %loop1 = transform.structured.tile_using_for %op tile_sizes [10, 20]
+      : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
+    transform.yield
+  }
+}
+
+//  CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 10)>
+//  CHECK-DAG: #[[$MAP1:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 20)>
+// CHECK-LABEL: func.func @tile_elementwise_dynamic(
+// CHECK-SAME:    %[[A:[a-zA-Z0-9]+]]: tensor<?x?xf32>
+// CHECK-SAME:    %[[B:[a-zA-Z0-9]+]]: tensor<?x?xf32>
+// CHECK-SAME:    %[[C:[a-zA-Z0-9]+]]: tensor<?x?xf32>
+//  CHECK-DAG:   %[[C0:.+]] = arith.constant 0 : index
+//  CHECK-DAG:   %[[D0:.+]] = tensor.dim %[[A]], %[[C0]]
+//  CHECK-DAG:   %[[C1:.+]] = arith.constant 1 : index
+//  CHECK-DAG:   %[[D1:.+]] = tensor.dim %[[A]], %[[C1]]
+//  CHECK-DAG:   %[[C10:.+]] = arith.constant 10 : index
+//  CHECK-DAG:   %[[C20:.+]] = arith.constant 20 : index
+//      CHECK:   scf.for %[[IV0:.+]] = %[[C0]] to %[[D0]] step %[[C10]]
+// CHECK-SAME:       iter_args(%[[INIT0:.+]] = %[[C]])
+//      CHECK:     scf.for %[[IV1:.+]] = %[[C0]] to %[[D1]] step %[[C20]]
+// CHECK-SAME:         iter_args(%[[INIT1:.+]] = %[[INIT0]])
+//      CHECK:       %[[TS0:.+]] = affine.min #[[$MAP0]](%[[IV0]])[%[[D0]]]
+//      CHECK:       %[[TS1:.+]] = affine.min #[[$MAP1]](%[[IV1]])[%[[D1]]]
+//  CHECK-DAG:       %[[AT:.+]] = tensor.extract_slice %[[A]][%[[IV0]], %[[IV1]]] [%[[TS0]], %[[TS1]]] [1, 1]
+//  CHECK-DAG:       %[[BT:.+]] = tensor.extract_slice %[[B]][%[[IV0]], %[[IV1]]] [%[[TS0]], %[[TS1]]] [1, 1]
+//  CHECK-DAG:       %[[CT:.+]] = tensor.extract_slice %[[INIT1]][%[[IV0]], %[[IV1]]] [%[[TS0]], %[[TS1]]] [1, 1]
+//      CHECK:       %[[TILED:.+]] = linalg.elementwise kind=#linalg.elementwise_kind<add>
+// CHECK-SAME:           ins(%[[AT]], %[[BT]] :
+// CHECK-SAME:           outs(%[[CT]] :
+//      CHECK:       tensor.insert_slice %[[TILED]] into %[[INIT1]]
+// CHECK-SAME:           [%[[IV0]], %[[IV1]]] [%[[TS0]], %[[TS1]]] [1, 1]
+
+// -----
+
+// Memref variant: no iter_args, uses memref.subview instead of tensor.extract_slice.
+
+func.func @tile_elementwise_memref(%A: memref<128x256xf32>,
+                                   %B: memref<128x256xf32>) {
+  linalg.elementwise kind=#linalg.elementwise_kind<negf>
+      ins(%A : memref<128x256xf32>)
+      outs(%B : memref<128x256xf32>)
+  return
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%root : !transform.any_op {transform.readonly}) {
+    %op = transform.structured.match ops{["linalg.elementwise"]} in %root
+      : (!transform.any_op) -> !transform.any_op
+    %tiled, %loop0, %loop1 = transform.structured.tile_using_for %op tile_sizes [32, 64]
+      : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
+    transform.yield
+  }
+}
+
+// CHECK-LABEL: func.func @tile_elementwise_memref(
+// CHECK-SAME:    %[[A:[a-zA-Z0-9]+]]: memref<128x256xf32>
+// CHECK-SAME:    %[[B:[a-zA-Z0-9]+]]: memref<128x256xf32>
+//  CHECK-DAG:   %[[C0:.+]] = arith.constant 0 : index
+//  CHECK-DAG:   %[[C32:.+]] = arith.constant 32 : index
+//  CHECK-DAG:   %[[C64:.+]] = arith.constant 64 : index
+//  CHECK-DAG:   %[[C128:.+]] = arith.constant 128 : index
+//  CHECK-DAG:   %[[C256:.+]] = arith.constant 256 : index
+//      CHECK:   scf.for %[[IV0:.+]] = %[[C0]] to %[[C128]] step %[[C32]]
+//  CHECK-NOT:     iter_args
+//      CHECK:     scf.for %[[IV1:.+]] = %[[C0]] to %[[C256]] step %[[C64]]
+//  CHECK-DAG:       %[[AT:.+]] = memref.subview %[[A]][%[[IV0]], %[[IV1]]] [32, 64] [1, 1]
+//  CHECK-DAG:       %[[BT:.+]] = memref.subview %[[B]][%[[IV0]], %[[IV1]]] [32, 64] [1, 1]
+//      CHECK:       linalg.elementwise kind=#linalg.elementwise_kind<negf>
+// CHECK-SAME:           ins(%[[AT]] :
+// CHECK-SAME:           outs(%[[BT]] :
+
+// -----
+
+// Parallel tiling with scf.forall: produces parallel_insert_slice.
+
+func.func @tile_elementwise_forall(%A: tensor<128x256xf32>, %B: tensor<128x256xf32>,
+                                   %C: tensor<128x256xf32>) -> tensor<128x256xf32> {
+  %r = linalg.elementwise kind=#linalg.elementwise_kind<add>
+      ins(%A, %B : tensor<128x256xf32>, tensor<128x256xf32>)
+      outs(%C : tensor<128x256xf32>) -> tensor<128x256xf32>
+  return %r : tensor<128x256xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%root : !transform.any_op {transform.readonly}) {
+    %op = transform.structured.match ops{["linalg.elementwise"]} in %root
+      : (!transform.any_op) -> !transform.any_op
+    %tiled, %forall = transform.structured.tile_using_forall %op tile_sizes [32, 64]
+      : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
+    transform.yield
+  }
+}
+
+//  CHECK-DAG: #[[$MAPX:.+]] = affine_map<(d0) -> (d0 * 32)>
+//  CHECK-DAG: #[[$MAPY:.+]] = affine_map<(d0) -> (d0 * 64)>
+// CHECK-LABEL: func.func @tile_elementwise_forall(
+// CHECK-SAME:    %[[A:[a-zA-Z0-9]+]]: tensor<128x256xf32>
+// CHECK-SAME:    %[[B:[a-zA-Z0-9]+]]: tensor<128x256xf32>
+// CHECK-SAME:    %[[C:[a-zA-Z0-9]+]]: tensor<128x256xf32>
+//      CHECK:   %[[RESULT:.+]] = scf.forall (%[[IV0:.+]], %[[IV1:.+]]) in (4, 4)
+// CHECK-SAME:       shared_outs(%[[INIT:.+]] = %[[C]])
+//  CHECK-DAG:     %[[OFF0:.+]] = affine.apply #[[$MAPX]](%[[IV0]])
+//  CHECK-DAG:     %[[OFF1:.+]] = affine.apply #[[$MAPY]](%[[IV1]])
+//  CHECK-DAG:     %[[AT:.+]] = tensor.extract_slice %[[A]][%[[OFF0]], %[[OFF1]]] [32, 64] [1, 1]
+//  CHECK-DAG:     %[[BT:.+]] = tensor.extract_slice %[[B]][%[[OFF0]], %[[OFF1]]] [32, 64] [1, 1]
+//  CHECK-DAG:     %[[CT:.+]] = tensor.extract_slice %[[INIT]][%[[OFF0]], %[[OFF1]]] [32, 64] [1, 1]
+//      CHECK:     %[[TILED:.+]] = linalg.elementwise kind=#linalg.elementwise_kind<add>
+// CHECK-SAME:         ins(%[[AT]], %[[BT]] :
+// CHECK-SAME:         outs(%[[CT]] :
+//      CHECK:     scf.forall.in_parallel
+//      CHECK:       tensor.parallel_insert_slice %[[TILED]] into %[[INIT]]
+// CHECK-SAME:           [%[[OFF0]], %[[OFF1]]] [32, 64] [1, 1]
+//      CHECK:   return %[[RESULT]]
+
+// -----
+
+// Broadcast: non-identity indexing map.  The input has rank 1 (only d1) so
+// tiling along d0 does not slice the input at all.
+
+#map_in  = affine_map<(d0, d1) -> (d1)>
+#map_out = affine_map<(d0, d1) -> (d0, d1)>
+
+func.func @tile_elementwise_broadcast(%A: tensor<256xf32>,
+                                      %B: tensor<128x256xf32>) -> tensor<128x256xf32> {
+  %r = linalg.elementwise kind=#linalg.elementwise_kind<exp>
+      indexing_maps = [#map_in, #map_out]
+      ins(%A : tensor<256xf32>)
+      outs(%B : tensor<128x256xf32>) -> tensor<128x256xf32>
+  return %r : tensor<128x256xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%root : !transform.any_op {transform.readonly}) {
+    %op = transform.structured.match ops{["linalg.elementwise"]} in %root
+      : (!transform.any_op) -> !transform.any_op
+    %tiled, %loop0, %loop1 = transform.structured.tile_using_for %op tile_sizes [32, 64]
+      : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
+    transform.yield
+  }
+}
+
+// CHECK-LABEL: func.func @tile_elementwise_broadcast(
+// CHECK-SAME:    %[[A:[a-zA-Z0-9]+]]: tensor<256xf32>
+// CHECK-SAME:    %[[B:[a-zA-Z0-9]+]]: tensor<128x256xf32>
+//      CHECK:   scf.for %[[IV0:[a-zA-Z0-9]+]] =
+// CHECK-SAME:       iter_args(%[[INIT0:[a-zA-Z0-9]+]] = %[[B]])
+//      CHECK:     scf.for %[[IV1:[a-zA-Z0-9]+]] =
+// CHECK-SAME:         iter_args(%[[INIT1:[a-zA-Z0-9]+]] = %[[INIT0]])
+// Input is 1-D: sliced only along d1, not d0.
+//      CHECK:       %[[AT:.+]] = tensor.extract_slice %[[A]][%[[IV1]]] [64] [1]
+//      CHECK:       %[[BT:.+]] = tensor.extract_slice %[[INIT1]][%[[IV0]], %[[IV1]]] [32, 64] [1, 1]
+//      CHECK:       %[[TILED:.+]] = linalg.elementwise kind=#linalg.elementwise_kind<exp>
+// CHECK-SAME:           ins(%[[AT]] : tensor<64xf32>)
+// CHECK-SAME:           outs(%[[BT]] : tensor<32x64xf32>)
+//      CHECK:       tensor.insert_slice %[[TILED]] into %[[INIT1]]
+// CHECK-SAME:           [%[[IV0]], %[[IV1]]] [32, 64] [1, 1]
+
+// -----
+
+// Tile-and-fuse: exp producer is fused into the tiled add consumer.
+
+func.func @tile_and_fuse_elementwise(%A: tensor<128x256xf32>,
+                                     %B: tensor<128x256xf32>) -> tensor<128x256xf32> {
+  %empty0 = tensor.empty() : tensor<128x256xf32>
+  %exp = linalg.elementwise kind=#linalg.elementwise_kind<exp>
+      ins(%A : tensor<128x256xf32>)
+      outs(%empty0 : tensor<128x256xf32>) -> tensor<128x256xf32>
+  %empty1 = tensor.empty() : tensor<128x256xf32>
+  %r = linalg.elementwise kind=#linalg.elementwise_kind<add>
+      ins(%exp, %B : tensor<128x256xf32>, tensor<128x256xf32>)
+      outs(%empty1 : tensor<128x256xf32>) -> tensor<128x256xf32>
+  return %r : tensor<128x256xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(%root : !transform.any_op {transform.readonly}) {
+    %add = transform.structured.match ops{["linalg.elementwise"]}
+        attributes{kind = #linalg.elementwise_kind<add>} in %root
+      : (!transform.any_op) -> !transform.any_op
+    %tiled, %loop0, %loop1 = transform.structured.fuse %add tile_sizes [32, 64]
+      : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
+    transform.yield
+  }
+}
+
+// CHECK-LABEL: func.func @tile_and_fuse_elementwise(
+// CHECK-SAME:    %[[A:[a-zA-Z0-9]+]]: tensor<128x256xf32>
+// CHECK-SAME:    %[[B:[a-zA-Z0-9]+]]: tensor<128x256xf32>
+//      CHECK:   %[[EMPTY:.+]] = tensor.empty()
+//      CHECK:   scf.for %[[IV0:[a-zA-Z0-9]+]] =
+// CHECK-SAME:       iter_args(%[[INIT0:[a-zA-Z0-9]+]] = %[[EMPTY]])
+//      CHECK:     scf.for %[[IV1:[a-zA-Z0-9]+]] =
+// CHECK-SAME:         iter_args(%[[INIT1:[a-zA-Z0-9]+]] = %[[INIT0]])
+// exp tile is generated from the original input, not the full exp result.
+//  CHECK-DAG:       %[[AT:.+]] = tensor.extract_slice %[[A]][%[[IV0]], %[[IV1]]] [32, 64] [1, 1]
+//      CHECK:       %[[EXP_TILE:.+]] = linalg.elementwise kind=#linalg.elementwise_kind<exp>
+// CHECK-SAME:           ins(%[[AT]] :
+//  CHECK-DAG:       %[[BT:.+]] = tensor.extract_slice %[[B]][%[[IV0]], %[[IV1]]] [32, 64] [1, 1]
+//      CHECK:       %[[ADD_TILE:.+]] = linalg.elementwise kind=#linalg.elementwise_kind<add>
+// CHECK-SAME:           ins(%[[EXP_TILE]], %[[BT]] :
+//      CHECK:       tensor.insert_slice %[[ADD_TILE]] into %[[INIT1]]
+// CHECK-SAME:           [%[[IV0]], %[[IV1]]] [32, 64] [1, 1]