[Mlir-commits] [mlir] [mlir][linalg][elementwise] Add tests for tiling elementwise operation. (PR #192798)
Javed Absar
llvmlistbot at llvm.org
Sat Apr 18 10:08:38 PDT 2026
https://github.com/javedabsar1 updated https://github.com/llvm/llvm-project/pull/192798
>From 39cf0e35f16e6924d4415367401a549cb4b685fc Mon Sep 17 00:00:00 2001
From: Javed Absar <javed.absar at gmail.com>
Date: Sat, 18 Apr 2026 12:33:56 -0400
Subject: [PATCH] [mlir][linalg][elementwise] Add tests for tiling elementwise
operation.
Adding missing tests for show-casing tiling of `linalg.elementwise` op.
As `linalg.elementwise` is a structured op, the tiling interface is
already implemented.
Signed-off-by: Javed Absar <javed.absar at gmail.com>
---
.../TilingInterface/tile-elementwise.mlir | 265 ++++++++++++++++++
1 file changed, 265 insertions(+)
create mode 100644 mlir/test/Interfaces/TilingInterface/tile-elementwise.mlir
diff --git a/mlir/test/Interfaces/TilingInterface/tile-elementwise.mlir b/mlir/test/Interfaces/TilingInterface/tile-elementwise.mlir
new file mode 100644
index 0000000000000..dd574e9be6b91
--- /dev/null
+++ b/mlir/test/Interfaces/TilingInterface/tile-elementwise.mlir
@@ -0,0 +1,265 @@
+// RUN: mlir-opt --transform-interpreter --cse -split-input-file %s | FileCheck %s
+
+// Static binary add, tile both dims.
+
+func.func @tile_elementwise(%A: tensor<128x256xf32>, %B: tensor<128x256xf32>,
+ %C: tensor<128x256xf32>) -> tensor<128x256xf32> {
+ %r = linalg.elementwise kind=#linalg.elementwise_kind<add>
+ ins(%A, %B : tensor<128x256xf32>, tensor<128x256xf32>)
+ outs(%C : tensor<128x256xf32>) -> tensor<128x256xf32>
+ return %r : tensor<128x256xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+ transform.named_sequence @__transform_main(%root : !transform.any_op {transform.readonly}) {
+ %op = transform.structured.match ops{["linalg.elementwise"]} in %root
+ : (!transform.any_op) -> !transform.any_op
+ %tiled, %loop0, %loop1 = transform.structured.tile_using_for %op tile_sizes [32, 64]
+ : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
+ transform.yield
+ }
+}
+
+// CHECK-LABEL: func.func @tile_elementwise(
+// CHECK-SAME: %[[A:[a-zA-Z0-9]+]]: tensor<128x256xf32>
+// CHECK-SAME: %[[B:[a-zA-Z0-9]+]]: tensor<128x256xf32>
+// CHECK-SAME: %[[C:[a-zA-Z0-9]+]]: tensor<128x256xf32>
+// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
+// CHECK-DAG: %[[C32:.+]] = arith.constant 32 : index
+// CHECK-DAG: %[[C64:.+]] = arith.constant 64 : index
+// CHECK-DAG: %[[C128:.+]] = arith.constant 128 : index
+// CHECK-DAG: %[[C256:.+]] = arith.constant 256 : index
+// CHECK: scf.for %[[IV0:.+]] = %[[C0]] to %[[C128]] step %[[C32]]
+// CHECK-SAME: iter_args(%[[INIT0:.+]] = %[[C]])
+// CHECK: scf.for %[[IV1:.+]] = %[[C0]] to %[[C256]] step %[[C64]]
+// CHECK-SAME: iter_args(%[[INIT1:.+]] = %[[INIT0]])
+// CHECK-DAG: %[[AT:.+]] = tensor.extract_slice %[[A]][%[[IV0]], %[[IV1]]] [32, 64] [1, 1]
+// CHECK-DAG: %[[BT:.+]] = tensor.extract_slice %[[B]][%[[IV0]], %[[IV1]]] [32, 64] [1, 1]
+// CHECK-DAG: %[[CT:.+]] = tensor.extract_slice %[[INIT1]][%[[IV0]], %[[IV1]]] [32, 64] [1, 1]
+// CHECK: %[[TILED:.+]] = linalg.elementwise kind=#linalg.elementwise_kind<add>
+// CHECK-SAME: ins(%[[AT]], %[[BT]] :
+// CHECK-SAME: outs(%[[CT]] :
+// CHECK: %[[INS:.+]] = tensor.insert_slice %[[TILED]] into %[[INIT1]]
+// CHECK-SAME: [%[[IV0]], %[[IV1]]] [32, 64] [1, 1]
+// CHECK: scf.yield %[[INS]]
+// CHECK: scf.yield
+
+// -----
+
+// Dynamic binary add.
+
+func.func @tile_elementwise_dynamic(%A: tensor<?x?xf32>, %B: tensor<?x?xf32>,
+ %C: tensor<?x?xf32>) -> tensor<?x?xf32> {
+ %r = linalg.elementwise kind=#linalg.elementwise_kind<add>
+ ins(%A, %B : tensor<?x?xf32>, tensor<?x?xf32>)
+ outs(%C : tensor<?x?xf32>) -> tensor<?x?xf32>
+ return %r : tensor<?x?xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+ transform.named_sequence @__transform_main(%root : !transform.any_op {transform.readonly}) {
+ %op = transform.structured.match ops{["linalg.elementwise"]} in %root
+ : (!transform.any_op) -> !transform.any_op
+ %tiled, %loop0, %loop1 = transform.structured.tile_using_for %op tile_sizes [10, 20]
+ : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
+ transform.yield
+ }
+}
+
+// CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 10)>
+// CHECK-DAG: #[[$MAP1:.+]] = affine_map<(d0)[s0] -> (-d0 + s0, 20)>
+// CHECK-LABEL: func.func @tile_elementwise_dynamic(
+// CHECK-SAME: %[[A:[a-zA-Z0-9]+]]: tensor<?x?xf32>
+// CHECK-SAME: %[[B:[a-zA-Z0-9]+]]: tensor<?x?xf32>
+// CHECK-SAME: %[[C:[a-zA-Z0-9]+]]: tensor<?x?xf32>
+// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
+// CHECK-DAG: %[[D0:.+]] = tensor.dim %[[A]], %[[C0]]
+// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
+// CHECK-DAG: %[[D1:.+]] = tensor.dim %[[A]], %[[C1]]
+// CHECK-DAG: %[[C10:.+]] = arith.constant 10 : index
+// CHECK-DAG: %[[C20:.+]] = arith.constant 20 : index
+// CHECK: scf.for %[[IV0:.+]] = %[[C0]] to %[[D0]] step %[[C10]]
+// CHECK-SAME: iter_args(%[[INIT0:.+]] = %[[C]])
+// CHECK: scf.for %[[IV1:.+]] = %[[C0]] to %[[D1]] step %[[C20]]
+// CHECK-SAME: iter_args(%[[INIT1:.+]] = %[[INIT0]])
+// CHECK: %[[TS0:.+]] = affine.min #[[$MAP0]](%[[IV0]])[%[[D0]]]
+// CHECK: %[[TS1:.+]] = affine.min #[[$MAP1]](%[[IV1]])[%[[D1]]]
+// CHECK-DAG: %[[AT:.+]] = tensor.extract_slice %[[A]][%[[IV0]], %[[IV1]]] [%[[TS0]], %[[TS1]]] [1, 1]
+// CHECK-DAG: %[[BT:.+]] = tensor.extract_slice %[[B]][%[[IV0]], %[[IV1]]] [%[[TS0]], %[[TS1]]] [1, 1]
+// CHECK-DAG: %[[CT:.+]] = tensor.extract_slice %[[INIT1]][%[[IV0]], %[[IV1]]] [%[[TS0]], %[[TS1]]] [1, 1]
+// CHECK: %[[TILED:.+]] = linalg.elementwise kind=#linalg.elementwise_kind<add>
+// CHECK-SAME: ins(%[[AT]], %[[BT]] :
+// CHECK-SAME: outs(%[[CT]] :
+// CHECK: tensor.insert_slice %[[TILED]] into %[[INIT1]]
+// CHECK-SAME: [%[[IV0]], %[[IV1]]] [%[[TS0]], %[[TS1]]] [1, 1]
+
+// -----
+
+// Memref variant: no iter_args, uses memref.subview instead of tensor.extract_slice.
+
+func.func @tile_elementwise_memref(%A: memref<128x256xf32>,
+ %B: memref<128x256xf32>) {
+ linalg.elementwise kind=#linalg.elementwise_kind<negf>
+ ins(%A : memref<128x256xf32>)
+ outs(%B : memref<128x256xf32>)
+ return
+}
+
+module attributes {transform.with_named_sequence} {
+ transform.named_sequence @__transform_main(%root : !transform.any_op {transform.readonly}) {
+ %op = transform.structured.match ops{["linalg.elementwise"]} in %root
+ : (!transform.any_op) -> !transform.any_op
+ %tiled, %loop0, %loop1 = transform.structured.tile_using_for %op tile_sizes [32, 64]
+ : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
+ transform.yield
+ }
+}
+
+// CHECK-LABEL: func.func @tile_elementwise_memref(
+// CHECK-SAME: %[[A:[a-zA-Z0-9]+]]: memref<128x256xf32>
+// CHECK-SAME: %[[B:[a-zA-Z0-9]+]]: memref<128x256xf32>
+// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
+// CHECK-DAG: %[[C32:.+]] = arith.constant 32 : index
+// CHECK-DAG: %[[C64:.+]] = arith.constant 64 : index
+// CHECK-DAG: %[[C128:.+]] = arith.constant 128 : index
+// CHECK-DAG: %[[C256:.+]] = arith.constant 256 : index
+// CHECK: scf.for %[[IV0:.+]] = %[[C0]] to %[[C128]] step %[[C32]]
+// CHECK-NOT: iter_args
+// CHECK: scf.for %[[IV1:.+]] = %[[C0]] to %[[C256]] step %[[C64]]
+// CHECK-DAG: %[[AT:.+]] = memref.subview %[[A]][%[[IV0]], %[[IV1]]] [32, 64] [1, 1]
+// CHECK-DAG: %[[BT:.+]] = memref.subview %[[B]][%[[IV0]], %[[IV1]]] [32, 64] [1, 1]
+// CHECK: linalg.elementwise kind=#linalg.elementwise_kind<negf>
+// CHECK-SAME: ins(%[[AT]] :
+// CHECK-SAME: outs(%[[BT]] :
+
+// -----
+
+// Parallel tiling with scf.forall: produces parallel_insert_slice.
+
+func.func @tile_elementwise_forall(%A: tensor<128x256xf32>, %B: tensor<128x256xf32>,
+ %C: tensor<128x256xf32>) -> tensor<128x256xf32> {
+ %r = linalg.elementwise kind=#linalg.elementwise_kind<add>
+ ins(%A, %B : tensor<128x256xf32>, tensor<128x256xf32>)
+ outs(%C : tensor<128x256xf32>) -> tensor<128x256xf32>
+ return %r : tensor<128x256xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+ transform.named_sequence @__transform_main(%root : !transform.any_op {transform.readonly}) {
+ %op = transform.structured.match ops{["linalg.elementwise"]} in %root
+ : (!transform.any_op) -> !transform.any_op
+ %tiled, %forall = transform.structured.tile_using_forall %op tile_sizes [32, 64]
+ : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
+ transform.yield
+ }
+}
+
+// CHECK-DAG: #[[$MAPX:.+]] = affine_map<(d0) -> (d0 * 32)>
+// CHECK-DAG: #[[$MAPY:.+]] = affine_map<(d0) -> (d0 * 64)>
+// CHECK-LABEL: func.func @tile_elementwise_forall(
+// CHECK-SAME: %[[A:[a-zA-Z0-9]+]]: tensor<128x256xf32>
+// CHECK-SAME: %[[B:[a-zA-Z0-9]+]]: tensor<128x256xf32>
+// CHECK-SAME: %[[C:[a-zA-Z0-9]+]]: tensor<128x256xf32>
+// CHECK: %[[RESULT:.+]] = scf.forall (%[[IV0:.+]], %[[IV1:.+]]) in (4, 4)
+// CHECK-SAME: shared_outs(%[[INIT:.+]] = %[[C]])
+// CHECK-DAG: %[[OFF0:.+]] = affine.apply #[[$MAPX]](%[[IV0]])
+// CHECK-DAG: %[[OFF1:.+]] = affine.apply #[[$MAPY]](%[[IV1]])
+// CHECK-DAG: %[[AT:.+]] = tensor.extract_slice %[[A]][%[[OFF0]], %[[OFF1]]] [32, 64] [1, 1]
+// CHECK-DAG: %[[BT:.+]] = tensor.extract_slice %[[B]][%[[OFF0]], %[[OFF1]]] [32, 64] [1, 1]
+// CHECK-DAG: %[[CT:.+]] = tensor.extract_slice %[[INIT]][%[[OFF0]], %[[OFF1]]] [32, 64] [1, 1]
+// CHECK: %[[TILED:.+]] = linalg.elementwise kind=#linalg.elementwise_kind<add>
+// CHECK-SAME: ins(%[[AT]], %[[BT]] :
+// CHECK-SAME: outs(%[[CT]] :
+// CHECK: scf.forall.in_parallel
+// CHECK: tensor.parallel_insert_slice %[[TILED]] into %[[INIT]]
+// CHECK-SAME: [%[[OFF0]], %[[OFF1]]] [32, 64] [1, 1]
+// CHECK: return %[[RESULT]]
+
+// -----
+
+// Broadcast: non-identity indexing map. The input has rank 1 (only d1) so
+// tiling along d0 does not slice the input at all.
+
+#map_in = affine_map<(d0, d1) -> (d1)>
+#map_out = affine_map<(d0, d1) -> (d0, d1)>
+
+func.func @tile_elementwise_broadcast(%A: tensor<256xf32>,
+ %B: tensor<128x256xf32>) -> tensor<128x256xf32> {
+ %r = linalg.elementwise kind=#linalg.elementwise_kind<exp>
+ indexing_maps = [#map_in, #map_out]
+ ins(%A : tensor<256xf32>)
+ outs(%B : tensor<128x256xf32>) -> tensor<128x256xf32>
+ return %r : tensor<128x256xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+ transform.named_sequence @__transform_main(%root : !transform.any_op {transform.readonly}) {
+ %op = transform.structured.match ops{["linalg.elementwise"]} in %root
+ : (!transform.any_op) -> !transform.any_op
+ %tiled, %loop0, %loop1 = transform.structured.tile_using_for %op tile_sizes [32, 64]
+ : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
+ transform.yield
+ }
+}
+
+// CHECK-LABEL: func.func @tile_elementwise_broadcast(
+// CHECK-SAME: %[[A:[a-zA-Z0-9]+]]: tensor<256xf32>
+// CHECK-SAME: %[[B:[a-zA-Z0-9]+]]: tensor<128x256xf32>
+// CHECK: scf.for %[[IV0:[a-zA-Z0-9]+]] =
+// CHECK-SAME: iter_args(%[[INIT0:[a-zA-Z0-9]+]] = %[[B]])
+// CHECK: scf.for %[[IV1:[a-zA-Z0-9]+]] =
+// CHECK-SAME: iter_args(%[[INIT1:[a-zA-Z0-9]+]] = %[[INIT0]])
+// Input is 1-D: sliced only along d1, not d0.
+// CHECK: %[[AT:.+]] = tensor.extract_slice %[[A]][%[[IV1]]] [64] [1]
+// CHECK: %[[BT:.+]] = tensor.extract_slice %[[INIT1]][%[[IV0]], %[[IV1]]] [32, 64] [1, 1]
+// CHECK: %[[TILED:.+]] = linalg.elementwise kind=#linalg.elementwise_kind<exp>
+// CHECK-SAME: ins(%[[AT]] : tensor<64xf32>)
+// CHECK-SAME: outs(%[[BT]] : tensor<32x64xf32>)
+// CHECK: tensor.insert_slice %[[TILED]] into %[[INIT1]]
+// CHECK-SAME: [%[[IV0]], %[[IV1]]] [32, 64] [1, 1]
+
+// -----
+
+// Tile-and-fuse: exp producer is fused into the tiled add consumer.
+
+func.func @tile_and_fuse_elementwise(%A: tensor<128x256xf32>,
+ %B: tensor<128x256xf32>) -> tensor<128x256xf32> {
+ %empty0 = tensor.empty() : tensor<128x256xf32>
+ %exp = linalg.elementwise kind=#linalg.elementwise_kind<exp>
+ ins(%A : tensor<128x256xf32>)
+ outs(%empty0 : tensor<128x256xf32>) -> tensor<128x256xf32>
+ %empty1 = tensor.empty() : tensor<128x256xf32>
+ %r = linalg.elementwise kind=#linalg.elementwise_kind<add>
+ ins(%exp, %B : tensor<128x256xf32>, tensor<128x256xf32>)
+ outs(%empty1 : tensor<128x256xf32>) -> tensor<128x256xf32>
+ return %r : tensor<128x256xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+ transform.named_sequence @__transform_main(%root : !transform.any_op {transform.readonly}) {
+ %add = transform.structured.match ops{["linalg.elementwise"]}
+ attributes{kind = #linalg.elementwise_kind<add>} in %root
+ : (!transform.any_op) -> !transform.any_op
+ %tiled, %loop0, %loop1 = transform.structured.fuse %add tile_sizes [32, 64]
+ : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
+ transform.yield
+ }
+}
+
+// CHECK-LABEL: func.func @tile_and_fuse_elementwise(
+// CHECK-SAME: %[[A:[a-zA-Z0-9]+]]: tensor<128x256xf32>
+// CHECK-SAME: %[[B:[a-zA-Z0-9]+]]: tensor<128x256xf32>
+// CHECK: %[[EMPTY:.+]] = tensor.empty()
+// CHECK: scf.for %[[IV0:[a-zA-Z0-9]+]] =
+// CHECK-SAME: iter_args(%[[INIT0:[a-zA-Z0-9]+]] = %[[EMPTY]])
+// CHECK: scf.for %[[IV1:[a-zA-Z0-9]+]] =
+// CHECK-SAME: iter_args(%[[INIT1:[a-zA-Z0-9]+]] = %[[INIT0]])
+// exp tile is generated from the original input, not the full exp result.
+// CHECK-DAG: %[[AT:.+]] = tensor.extract_slice %[[A]][%[[IV0]], %[[IV1]]] [32, 64] [1, 1]
+// CHECK: %[[EXP_TILE:.+]] = linalg.elementwise kind=#linalg.elementwise_kind<exp>
+// CHECK-SAME: ins(%[[AT]] :
+// CHECK-DAG: %[[BT:.+]] = tensor.extract_slice %[[B]][%[[IV0]], %[[IV1]]] [32, 64] [1, 1]
+// CHECK: %[[ADD_TILE:.+]] = linalg.elementwise kind=#linalg.elementwise_kind<add>
+// CHECK-SAME: ins(%[[EXP_TILE]], %[[BT]] :
+// CHECK: tensor.insert_slice %[[ADD_TILE]] into %[[INIT1]]
+// CHECK-SAME: [%[[IV0]], %[[IV1]]] [32, 64] [1, 1]
More information about the Mlir-commits
mailing list