[Mlir-commits] [mlir] [mlir][Tensor] Add rank-reducing slice in generatedSlices (PR #174248)
Bangtian Liu
llvmlistbot at llvm.org
Tue Jan 6 09:01:09 PST 2026
https://github.com/bangtianliu updated https://github.com/llvm/llvm-project/pull/174248
>From 51fff993d557b3f79de40b247526f060bb4ec361 Mon Sep 17 00:00:00 2001
From: Bangtian Liu <liubangtian at gmail.com>
Date: Fri, 2 Jan 2026 14:54:43 -0800
Subject: [PATCH 1/3] [mlir][Tensor] Add rank-reducing slice in generatedSlices
Signed-off-by: Bangtian Liu <liubangtian at gmail.com>
---
.../Tensor/Transforms/SwapExtractSliceWithProducerPatterns.cpp | 1 +
1 file changed, 1 insertion(+)
diff --git a/mlir/lib/Dialect/Tensor/Transforms/SwapExtractSliceWithProducerPatterns.cpp b/mlir/lib/Dialect/Tensor/Transforms/SwapExtractSliceWithProducerPatterns.cpp
index 549ac7afca8ca..7903f3c51b73b 100644
--- a/mlir/lib/Dialect/Tensor/Transforms/SwapExtractSliceWithProducerPatterns.cpp
+++ b/mlir/lib/Dialect/Tensor/Transforms/SwapExtractSliceWithProducerPatterns.cpp
@@ -53,6 +53,7 @@ FailureOr<TilingResult> tensor::replaceExtractSliceWithTiledProducer(
builder, sliceOp.getLoc(), sliceOp.getType(),
tiledResult->tiledValues[0], offsets, sliceOp.getMixedSizes(), strides);
tiledResult->tiledValues[0] = newSliceOp;
+ tiledResult->generatedSlices.push_back(newSliceOp);
}
return *tiledResult;
>From 252732345b3a3fa1a19ed118d9687b7ffe4331a0 Mon Sep 17 00:00:00 2001
From: Bangtian Liu <liubangtian at gmail.com>
Date: Fri, 2 Jan 2026 17:27:51 -0800
Subject: [PATCH 2/3] fix timeout issue
Signed-off-by: Bangtian Liu <liubangtian at gmail.com>
---
.../Dialect/SCF/Transforms/TileUsingInterface.cpp | 13 ++++++++++++-
.../tile-and-fuse-with-reduction-tiling.mlir | 5 ++++-
2 files changed, 16 insertions(+), 2 deletions(-)
diff --git a/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp b/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp
index 009c2c3537411..33960093d51c6 100644
--- a/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp
+++ b/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp
@@ -1327,7 +1327,18 @@ getUntiledProducerFromSliceSource(OpOperand *source,
}
if (loopIt == loops.rend())
destinationIterArg = source;
- return {dyn_cast<OpResult>(source->get()), destinationIterArg};
+
+ OpResult result = dyn_cast<OpResult>(source->get());
+ if (result) {
+ Operation *producer = result.getOwner();
+ Operation *innermostLoop = loops.back();
+ // If the producer is already inside the innermost loop (where the slice
+ // is), it has already been fused. Skip it to avoid infinite loops.
+ if (innermostLoop->isProperAncestor(producer))
+ return {OpResult(), std::nullopt};
+ }
+
+ return {result, destinationIterArg};
}
/// Implementation of fusing producer of a single slice by computing the
diff --git a/mlir/test/Interfaces/TilingInterface/tile-and-fuse-with-reduction-tiling.mlir b/mlir/test/Interfaces/TilingInterface/tile-and-fuse-with-reduction-tiling.mlir
index 8cace28d441c6..62c82a15a5417 100644
--- a/mlir/test/Interfaces/TilingInterface/tile-and-fuse-with-reduction-tiling.mlir
+++ b/mlir/test/Interfaces/TilingInterface/tile-and-fuse-with-reduction-tiling.mlir
@@ -1,6 +1,9 @@
// RUN: mlir-opt -transform-interpreter -cse -mlir-print-local-scope -split-input-file -verify-diagnostics %s | FileCheck %s
-// Check tile+ fuse works with partial reduction outer parallel strategy.
+// Check tile + fuse works with partial reduction outer parallel strategy.
+// This also tests that the fusion logic correctly skips producers that are
+// already inside the innermost loop (e.g., the rank-reducing slice of the
+// fused fill), avoiding infinite loops in the fusion worklist.
module{
func.func @tile_and_fuse_with_partial_reduction_outer_parallel(
>From 792ac2e4a0de5ab57996b33ae63c38551dc3fcdf Mon Sep 17 00:00:00 2001
From: Bangtian Liu <liubangtian at gmail.com>
Date: Tue, 6 Jan 2026 09:05:32 -0800
Subject: [PATCH 3/3] add a test about rank-reducing slices
Signed-off-by: Bangtian Liu <liubangtian at gmail.com>
---
.../tile-and-fuse-using-interface.mlir | 62 +++++++++++++++++++
1 file changed, 62 insertions(+)
diff --git a/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir b/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir
index 21d7816934bf9..aeb65ecef61c1 100644
--- a/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir
+++ b/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir
@@ -675,3 +675,65 @@ module attributes {transform.with_named_sequence} {
// CHECK-SAME: ins(%[[TILEDARG0]]
// CHECK-SAME: outs(%[[TILEDARG1]]
// CHECK: tensor.insert_slice %[[RES:.*]]
+
+// -----
+
+// Test that tile-and-fuse correctly handles rank-reducing extract_slice operations.
+//
+// The rank-reducing slice created during fusion is added to generatedSlices
+// (via SwapExtractSliceWithProducerPatterns.cpp). Note: this does NOT enable
+// additional fusion with default cleanup patterns because the slice's source
+// is the tiled producer (inside loop), and isProperAncestor prevents re-fusion.
+// The tracking enables specific cleanup patterns like SwapExtractSliceWithFillPatterns
+// (when added to cleanupPatterns) to transform the slice during tile-and-fuse.
+
+func.func @fuse_through_rank_reducing_slice(
+ %arg0: tensor<4x96xf16>) -> tensor<4x96xf16> {
+ %cst = arith.constant 1.0 : f16
+
+ // Producer: fill on 3D tensor with unit dimension
+ %empty_3d = tensor.empty() : tensor<4x1x96xf16>
+ %fill = linalg.fill ins(%cst : f16) outs(%empty_3d : tensor<4x1x96xf16>) -> tensor<4x1x96xf16>
+
+ // Rank-reducing slice: 3D (4x1x96) -> 2D (4x96), dropping the unit dimension
+ %reduced = tensor.extract_slice %fill[0, 0, 0] [4, 1, 96] [1, 1, 1]
+ : tensor<4x1x96xf16> to tensor<4x96xf16>
+
+ // Consumer: 2D operation
+ %result = linalg.generic {
+ indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>],
+ iterator_types = ["parallel", "parallel"]}
+ ins(%reduced : tensor<4x96xf16>)
+ outs(%arg0 : tensor<4x96xf16>) {
+ ^bb0(%in: f16, %out: f16):
+ %sum = arith.addf %in, %out : f16
+ linalg.yield %sum : f16
+ } -> tensor<4x96xf16>
+
+ return %result : tensor<4x96xf16>
+}
+
+module attributes {transform.with_named_sequence} {
+ transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
+ %consumer = transform.structured.match ops{["linalg.generic"]} in %arg0
+ : (!transform.any_op) -> !transform.any_op
+
+ %tiled, %loop = transform.structured.fuse %consumer tile_sizes [0, 32] {apply_cleanup}
+ : (!transform.any_op) -> (!transform.any_op, !transform.op<"scf.for">)
+ transform.yield
+ }
+}
+// CHECK-LABEL: func.func @fuse_through_rank_reducing_slice
+// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: tensor<4x96xf16>
+// CHECK-DAG: %[[EMPTY_3D:.+]] = tensor.empty() : tensor<4x1x96xf16>
+// CHECK: scf.for %[[IV:[a-zA-Z0-9_]+]] = {{.*}} iter_args(%[[ITERARG:.+]] = %[[ARG0]])
+// CHECK: %[[FILL_DEST:.+]] = tensor.extract_slice %[[EMPTY_3D]][0, 0, %[[IV]]] [4, 1, 32]
+// CHECK-SAME: tensor<4x1x96xf16> to tensor<4x1x32xf16>
+// CHECK: %[[TILED_FILL:.+]] = linalg.fill
+// CHECK-SAME: outs(%[[FILL_DEST]] : tensor<4x1x32xf16>)
+// CHECK: %[[RANK_REDUCED:.+]] = tensor.extract_slice %[[TILED_FILL]][0, 0, 0] [4, 1, 32]
+// CHECK-SAME: tensor<4x1x32xf16> to tensor<4x32xf16>
+// CHECK: %[[CONSUMER_DEST:.+]] = tensor.extract_slice %[[ITERARG]]
+// CHECK: linalg.generic
+// CHECK-SAME: ins(%[[RANK_REDUCED]] : tensor<4x32xf16>)
+// CHECK-SAME: outs(%[[CONSUMER_DEST]] : tensor<4x32xf16>)
More information about the Mlir-commits
mailing list