[Mlir-commits] [mlir] [mlir][Tiling] Remove zero slice guard in Pad tiling (PR #175827)
Nirvedh Meshram
llvmlistbot at llvm.org
Tue Jan 13 12:48:50 PST 2026
https://github.com/nirvedhmeshram updated https://github.com/llvm/llvm-project/pull/175827
>From b108a7539f5d6a94459572ccd53669989401179e Mon Sep 17 00:00:00 2001
From: Nirvedh Meshram <nirvedh at gmail.com>
Date: Tue, 13 Jan 2026 12:45:27 -0800
Subject: [PATCH] [mlir][Tiling] Remove sero slice guard in Pad tiling
Having dynamically zero tensors is handled by backends and having the if/else generates sub-optimal code which is actually harder to optimize for the backends
Signed-off-by: Nirvedh Meshram <nirvedh at gmail.com>
---
.../Tensor/IR/TensorTilingInterfaceImpl.cpp | 4 +-
.../Dialect/Linalg/transform-op-tile.mlir | 5 +-
mlir/test/Dialect/Tensor/tiling.mlir | 56 ++++++-------------
.../tile-and-fuse-using-interface.mlir | 15 ++---
.../tile-pad-using-interface.mlir | 36 ++++--------
5 files changed, 39 insertions(+), 77 deletions(-)
diff --git a/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp
index 124a63281a37c..0e865cfa83893 100644
--- a/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp
@@ -47,8 +47,8 @@ struct PadOpTiling : public TilingInterface::ExternalModel<PadOpTiling, PadOp> {
getTiledImplementation(Operation *op, OpBuilder &b,
ArrayRef<OpFoldResult> offsets,
ArrayRef<OpFoldResult> sizes) const {
- FailureOr<TilingResult> result =
- tensor::bubbleUpPadSlice(b, cast<PadOp>(op), offsets, sizes);
+ FailureOr<TilingResult> result = tensor::bubbleUpPadSlice(
+ b, cast<PadOp>(op), offsets, sizes, /*generateZeroSliceGuard=*/false);
if (failed(result))
return failure();
return result.value();
diff --git a/mlir/test/Dialect/Linalg/transform-op-tile.mlir b/mlir/test/Dialect/Linalg/transform-op-tile.mlir
index 0466a7ba3e2ea..61230502f1d1d 100644
--- a/mlir/test/Dialect/Linalg/transform-op-tile.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-tile.mlir
@@ -140,10 +140,7 @@ func.func @tile_tensor_pad(
-> tensor<20x40xf32>
{
// CHECK: scf.forall
- // CHECK: scf.if
- // CHECK: tensor.generate
- // CHECK: else
- // CHECK: tensor.pad {{.*}} nofold
+ // CHECK: tensor.pad {{.*}} nofold
%0 = tensor.pad %arg0 nofold low[%low, %low] high[%high, %high] {
^bb0(%arg9: index, %arg10: index):
tensor.yield %cst : f32
diff --git a/mlir/test/Dialect/Tensor/tiling.mlir b/mlir/test/Dialect/Tensor/tiling.mlir
index 32fb0c9e41c39..c6c28f1e17ae4 100644
--- a/mlir/test/Dialect/Tensor/tiling.mlir
+++ b/mlir/test/Dialect/Tensor/tiling.mlir
@@ -14,12 +14,9 @@
// CHECK-DAG: %[[DIM1:.*]] = affine.apply #[[MAP1]]()[%[[DIM_IN1]]]
// CHECK: %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[DIM0]] step %[[C2]]
// CHECK: scf.for {{.*}} = %[[C0]] to %[[DIM1]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] =
-// CHECK: %[[SWAP_RESULT:.*]] = scf.if
-// CHECK: tensor.generate
-// CHECK: else
-// CHECK: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
-// CHECK: %[[PAD:.*]] = tensor.pad %[[SLICE]]
-// CHECK: tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
+// CHECK: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
+// CHECK: %[[PAD:.*]] = tensor.pad %[[SLICE]]
+// CHECK: tensor.insert_slice %[[PAD]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
// CHECK: return %[[RESULT]]
func.func @dynamic_pad_tensor_3_4(%input_tensor: tensor<?x?xf32>,
@@ -53,12 +50,9 @@ module attributes {transform.with_named_sequence} {
// CHECK-DAG: %[[DIM_IN0:.*]] = tensor.dim %[[IN]], %[[C0]]
// CHECK-DAG: %[[DIM0:.*]] = affine.apply #[[MAP1]]()[%[[DIM_IN0]]]
// CHECK: %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[DIM1]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] =
-// CHECK: %[[SWAP_RESULT:.*]] = scf.if
-// CHECK: tensor.generate
-// CHECK: else
-// CHECK: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
-// CHECK: %[[PAD:.*]] = tensor.pad %[[SLICE]] low[3, %{{.*}}] high[{{.*}}, {{.*}}]
-// CHECK: tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][0, {{.*}}] [%[[DIM0]], {{.*}}] [1, 1]
+// CHECK: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
+// CHECK: %[[PAD:.*]] = tensor.pad %[[SLICE]] low[3, %{{.*}}] high[{{.*}}, {{.*}}]
+// CHECK: tensor.insert_slice %[[PAD]] into %[[INNER_OUT]][0, {{.*}}] [%[[DIM0]], {{.*}}] [1, 1]
// CHECK: return %[[RESULT]]
func.func @dynamic_pad_tensor_0_3(%input_tensor: tensor<?x?xf32>,
@@ -89,12 +83,9 @@ module attributes {transform.with_named_sequence} {
// CHECK-DAG: %[[C16:.*]] = arith.constant 16 : index
// CHECK: %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[C15]] step %[[C2]]
// CHECK: scf.for {{.*}} = %[[C0]] to %[[C16]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] =
-// CHECK: %[[SWAP_RESULT:.*]] = scf.if
-// CHECK: tensor.generate
-// CHECK: else
-// CHECK: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
-// CHECK: %[[PAD:.*]] = tensor.pad %[[SLICE]]
-// CHECK: tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
+// CHECK: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
+// CHECK: %[[PAD:.*]] = tensor.pad %[[SLICE]]
+// CHECK: tensor.insert_slice %[[PAD]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
// CHECK: return %[[RESULT]]
func.func @static_pad_tensor_3_4(%input_tensor: tensor<7x9xf32>,
@@ -125,12 +116,9 @@ module attributes {transform.with_named_sequence} {
// CHECK-DAG: %[[C16:.*]] = arith.constant 16 : index
// CHECK: %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[C15]] step %[[C2]]
// CHECK: scf.for {{.*}} = %[[C0]] to %[[C16]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] =
-// CHECK: %[[SWAP_RESULT:.*]] = scf.if
-// CHECK: tensor.generate
-// CHECK: else
-// CHECK: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
-// CHECK: %[[PAD:.*]] = tensor.pad %[[SLICE]]
-// CHECK: %[[COPY:.*]] = linalg.copy ins(%[[SWAP_RESULT:.*]]
+// CHECK: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
+// CHECK: %[[PAD:.*]] = tensor.pad %[[SLICE]]
+// CHECK: %[[COPY:.*]] = linalg.copy ins(%[[PAD:.*]]
// CHECK: tensor.insert_slice %[[COPY]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
// CHECK: return %[[RESULT]]
@@ -163,12 +151,9 @@ module attributes {transform.with_named_sequence} {
// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index
// CHECK-DAG: %[[C16:.*]] = arith.constant 16 : index
// CHECK: %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[C16]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] =
-// CHECK: %[[SWAP_RESULT:.*]] = scf.if
-// CHECK: tensor.generate
-// CHECK: else
-// CHECK: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][0, {{.*}}] [7, {{.*}}] [1, 1]
-// CHECK: %[[PAD:.*]] = tensor.pad %[[SLICE]] low[3, %{{.*}}] high[5, {{.*}}]
-// CHECK: tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][0, {{.*}}] [15, {{.*}}] [1, 1]
+// CHECK: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][0, {{.*}}] [7, {{.*}}] [1, 1]
+// CHECK: %[[PAD:.*]] = tensor.pad %[[SLICE]] low[3, %{{.*}}] high[5, {{.*}}]
+// CHECK: tensor.insert_slice %[[PAD]] into %[[INNER_OUT]][0, {{.*}}] [15, {{.*}}] [1, 1]
// CHECK: return %[[RESULT]]
func.func @static_pad_tensor_0_3(%input_tensor: tensor<7x9xf32>,
@@ -196,14 +181,9 @@ module attributes {transform.with_named_sequence} {
// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index
// CHECK-DAG: %[[C15:.*]] = arith.constant 15 : index
// CHECK: %[[RESULT:.*]] = scf.for %[[IV:.*]] = %[[C0]] to %[[C15]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] =
-// CHECK: %[[R2:.*]] = scf.if
-// CHECK: %[[GEN:.*]] = tensor.generate
-// CHECK: scf.yield %[[GEN]] : tensor<14x3xf32>
-// CHECK: else
-// CHECK: %[[SLICE:.*]] = tensor.extract_slice %arg0[0, %{{.*}}] [7, %{{.*}}] [1, 1] : tensor<7x9xf32> to tensor<7x?xf32>
-// CHECK: %[[PAD:.*]] = tensor.pad %[[SLICE]] low[0, 0] high[7, %{{.*}}]
-// CHECK: scf.yield %[[PAD]] : tensor<14x3xf32>
-// CHECK: %[[R3:.*]] = tensor.insert_slice %[[R2]] into %[[INNER_OUT]][0, %[[IV]]] [14, 3] [1, 1] : tensor<14x3xf32> into tensor<14x15xf32>
+// CHECK: %[[SLICE:.*]] = tensor.extract_slice %arg0[0, %{{.*}}] [7, %{{.*}}] [1, 1] : tensor<7x9xf32> to tensor<7x?xf32>
+// CHECK: %[[PAD:.*]] = tensor.pad %[[SLICE]] low[0, 0] high[7, %{{.*}}]
+// CHECK: %[[R3:.*]] = tensor.insert_slice %[[PAD]] into %[[INNER_OUT]][0, %[[IV]]] [14, 3] [1, 1] : tensor<14x3xf32> into tensor<14x15xf32>
// CHECK: scf.yield %[[R3]] : tensor<14x15xf32>
// CHECK: return %[[RESULT]] : tensor<14x15xf32>
diff --git a/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir b/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir
index 21d7816934bf9..91935dd3ac664 100644
--- a/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir
+++ b/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir
@@ -576,15 +576,12 @@ module attributes {transform.with_named_sequence} {
// CHECK-LABEL: func @pad_producer_fusion
// CHECK-SAME: %[[ARG0:.+]]: tensor<10xf32>
// CHECK: %[[FOR_RESULT:.+]] = scf.for
-// CHECK: %[[IF_RESULT:.+]] = scf.if
-// CHECK: else
-// CHECK: %[[SLICE:.+]] = tensor.extract_slice %[[ARG0]]
-// CHECK: %[[GENERIC:.+]] = linalg.generic
-// CHECK-SAME: ins(%[[SLICE]] :
-// CHECK: %[[PAD:.+]] = tensor.pad %[[GENERIC]]
-// CHECK: %[[CAST:.+]] = tensor.cast %[[PAD]]
-// CHECK: scf.yield %[[CAST]]
-// CHECK: %[[INSERT_SLICE:.+]] = tensor.insert_slice %[[IF_RESULT]]
+// CHECK: %[[SLICE:.+]] = tensor.extract_slice %[[ARG0]]
+// CHECK: %[[GENERIC:.+]] = linalg.generic
+// CHECK-SAME: ins(%[[SLICE]] :
+// CHECK: %[[PAD:.+]] = tensor.pad %[[GENERIC]]
+// CHECK: %[[CAST:.+]] = tensor.cast %[[PAD]]
+// CHECK: %[[INSERT_SLICE:.+]] = tensor.insert_slice %[[CAST]]
// CHECK: scf.yield %[[INSERT_SLICE]]
// CHECK: return %[[FOR_RESULT]]
diff --git a/mlir/test/Interfaces/TilingInterface/tile-pad-using-interface.mlir b/mlir/test/Interfaces/TilingInterface/tile-pad-using-interface.mlir
index ccf8e37c094f4..169cbd37d01a1 100644
--- a/mlir/test/Interfaces/TilingInterface/tile-pad-using-interface.mlir
+++ b/mlir/test/Interfaces/TilingInterface/tile-pad-using-interface.mlir
@@ -34,12 +34,9 @@ module attributes {transform.with_named_sequence} {
// CHECK-DAG: %[[C3:.+]] = arith.constant 3 : index
// CHECK: %[[RESULT:[a-zA-Z0-9]+]] = scf.for %[[IV0:[a-zA-Z0-9]+]] = %[[C0]] to %[[DIM0]] step %[[C2]]
// CHECK: scf.for {{.*}} = %[[C0]] to %[[DIM1]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] =
-// CHECK: %[[SWAP_RESULT:.*]] = scf.if
-// CHECK: tensor.generate
-// CHECK: else
-// CHECK: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
-// CHECK: %[[PAD:.*]] = tensor.pad %[[SLICE]]
-// CHECK: tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
+// CHECK: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
+// CHECK: %[[PAD:.*]] = tensor.pad %[[SLICE]]
+// CHECK: tensor.insert_slice %[[PAD]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
// CHECK: return %[[RESULT]]
// -----
@@ -74,12 +71,9 @@ module attributes {transform.with_named_sequence} {
// CHECK-DAG: %[[DIM1:.*]] = affine.apply #[[MAP1]]()[%[[DIM_IN1]]]
// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index
// CHECK: %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[DIM1]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] =
-// CHECK: %[[SWAP_RESULT:.*]] = scf.if
-// CHECK: tensor.generate
-// CHECK: else
-// CHECK: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
-// CHECK: %[[PAD:.*]] = tensor.pad %[[SLICE]] low[3, %{{.*}}] high[{{.*}}, {{.*}}]
-// CHECK: tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][0, {{.*}}] [%[[DIM0]], {{.*}}] [1, 1]
+// CHECK: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
+// CHECK: %[[PAD:.*]] = tensor.pad %[[SLICE]] low[3, %{{.*}}] high[{{.*}}, {{.*}}]
+// CHECK: tensor.insert_slice %[[PAD]] into %[[INNER_OUT]][0, {{.*}}] [%[[DIM0]], {{.*}}] [1, 1]
// CHECK: return %[[RESULT]]
// -----
@@ -111,12 +105,9 @@ module attributes {transform.with_named_sequence} {
// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index
// CHECK: %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[C15]] step %[[C2]]
// CHECK: scf.for {{.*}} = %[[C0]] to %[[C16]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] =
-// CHECK: %[[SWAP_RESULT:.*]] = scf.if
-// CHECK: tensor.generate
-// CHECK: else
-// CHECK: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
-// CHECK: %[[PAD:.*]] = tensor.pad %[[SLICE]]
-// CHECK: tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
+// CHECK: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
+// CHECK: %[[PAD:.*]] = tensor.pad %[[SLICE]]
+// CHECK: tensor.insert_slice %[[PAD]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
// CHECK: return %[[RESULT]]
// -----
@@ -145,12 +136,9 @@ module attributes {transform.with_named_sequence} {
// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index
// CHECK-DAG: %[[C16:.*]] = arith.constant 16 : index
// CHECK: %[[RESULT:.*]] = scf.for {{.*}} = %[[C0]] to %[[C16]] step %[[C3]] iter_args(%[[INNER_OUT:.*]] =
-// CHECK: %[[SWAP_RESULT:.*]] = scf.if
-// CHECK: tensor.generate
-// CHECK: else
-// CHECK: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][0, {{.*}}] [7, {{.*}}] [1, 1]
-// CHECK: %[[PAD:.*]] = tensor.pad %[[SLICE]] low[3, %{{.*}}] high[5, {{.*}}]
-// CHECK: tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][0, {{.*}}] [15, {{.*}}] [1, 1]
+// CHECK: %[[SLICE:.*]] = tensor.extract_slice %[[IN]][0, {{.*}}] [7, {{.*}}] [1, 1]
+// CHECK: %[[PAD:.*]] = tensor.pad %[[SLICE]] low[3, %{{.*}}] high[5, {{.*}}]
+// CHECK: tensor.insert_slice %[[PAD]] into %[[INNER_OUT]][0, {{.*}}] [15, {{.*}}] [1, 1]
// CHECK: return %[[RESULT]]
/// Rest of the tests only check that they dont fail.
More information about the Mlir-commits
mailing list