[Mlir-commits] [mlir] [MLIR][Linalg] Fix crash in tileToForallOpImpl for rank-0 ops (PR #189000)
Mehdi Amini
llvmlistbot at llvm.org
Fri Mar 27 08:57:43 PDT 2026
https://github.com/joker-eph updated https://github.com/llvm/llvm-project/pull/189000
>From b2e624135fd29e0b32499753d6637f0b244e7925 Mon Sep 17 00:00:00 2001
From: Mehdi Amini <joker.eph at gmail.com>
Date: Fri, 27 Mar 2026 03:33:13 -0700
Subject: [PATCH] [MLIR][Linalg] Fix crash in tileToForallOpImpl for rank-0 ops
When tiling a rank-0 linalg.generic op, tileUsingSCF returns an empty
loops vector (rank-0 ops have no parallel dimensions and produce no
scf.forall). Two call sites unconditionally accessed
tilingResult.loops.front(), causing a crash:
- tileToForallOpImpl: the loop normalization block was entered whenever
mixedNumThreads was empty, regardless of whether any loops exist.
Guard it with \!tilingResult.loops.empty().
- TileUsingForallOp::apply: tileOps.push_back was called
unconditionally. Guard it with \!tilingResult.loops.empty().
Add regression tests for both the tile_sizes and num_threads paths,
verifying that the linalg.generic is preserved and no scf.forall is
emitted.
Fixes #187073
Assisted-by: Claude Code
---
.../TransformOps/LinalgTransformOps.cpp | 7 ++-
mlir/test/Dialect/Linalg/tile-to-forall.mlir | 58 +++++++++++++++++++
2 files changed, 63 insertions(+), 2 deletions(-)
diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
index 5f530a585ddb9..baa57f8920094 100644
--- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
+++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
@@ -3890,7 +3890,9 @@ DiagnosedSilenceableFailure transform::tileToForallOpImpl(
tilingResult = *maybeTilingResult;
- if (mixedNumThreads.empty()) {
+ // Rank-0 ops produce no loops; skip normalization when there is nothing
+ // to normalize.
+ if (mixedNumThreads.empty() && !tilingResult.loops.empty()) {
auto generatedForallOp = cast<scf::ForallOp>(tilingResult.loops.front());
OpBuilder::InsertionGuard g(rewriter);
rewriter.setInsertionPoint(generatedForallOp);
@@ -3938,7 +3940,8 @@ DiagnosedSilenceableFailure transform::TileUsingForallOp::apply(
getMapping(), tilingResult);
if (!diag.succeeded())
return diag;
- tileOps.push_back(tilingResult.loops.front());
+ if (!tilingResult.loops.empty())
+ tileOps.push_back(tilingResult.loops.front());
tiledOps.append(tilingResult.tiledOps);
}
diff --git a/mlir/test/Dialect/Linalg/tile-to-forall.mlir b/mlir/test/Dialect/Linalg/tile-to-forall.mlir
index 1b0bade728b44..f0ba58c55e11f 100644
--- a/mlir/test/Dialect/Linalg/tile-to-forall.mlir
+++ b/mlir/test/Dialect/Linalg/tile-to-forall.mlir
@@ -723,3 +723,61 @@ module attributes {transform.with_named_sequence} {
transform.yield
}
}
+
+// -----
+
+// Tiling a rank-0 linalg op with tile_sizes should not crash even when
+// tileUsingSCF produces no loops. (https://github.com/llvm/llvm-project/issues/187073)
+#rankZeroMap = affine_map<() -> ()>
+// CHECK-LABEL: @tile_rank_zero_op_no_forall
+func.func @tile_rank_zero_op_no_forall(%arg0: tensor<i64>) -> tensor<i64> {
+ // CHECK-NOT: scf.forall
+ // CHECK: return %{{.*}} : tensor<i64>
+ %empty = tensor.empty() : tensor<i64>
+ %copy = linalg.generic {indexing_maps = [#rankZeroMap, #rankZeroMap], iterator_types = []}
+ ins(%arg0 : tensor<i64>) outs(%empty : tensor<i64>) {
+ ^bb0(%in: i64, %out: i64):
+ linalg.yield %in : i64
+ } -> tensor<i64>
+ return %copy : tensor<i64>
+}
+
+module attributes {transform.with_named_sequence} {
+ transform.named_sequence @__transform_main(
+ %arg0: !transform.any_op {transform.readonly}) {
+ %ops = transform.structured.match ops{["linalg.generic"]} in %arg0
+ : (!transform.any_op) -> !transform.any_op
+ %tiled, %forall = transform.structured.tile_using_forall %ops tile_sizes [32]
+ : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
+ transform.yield
+ }
+}
+
+// -----
+
+// Tiling a rank-0 linalg op with num_threads should also not crash.
+// (https://github.com/llvm/llvm-project/issues/187073)
+#rankZeroMap = affine_map<() -> ()>
+// CHECK-LABEL: @tile_rank_zero_op_no_forall_num_threads
+func.func @tile_rank_zero_op_no_forall_num_threads(%arg0: tensor<i64>) -> tensor<i64> {
+ // CHECK-NOT: scf.forall
+ // CHECK: return %{{.*}} : tensor<i64>
+ %empty = tensor.empty() : tensor<i64>
+ %copy = linalg.generic {indexing_maps = [#rankZeroMap, #rankZeroMap], iterator_types = []}
+ ins(%arg0 : tensor<i64>) outs(%empty : tensor<i64>) {
+ ^bb0(%in: i64, %out: i64):
+ linalg.yield %in : i64
+ } -> tensor<i64>
+ return %copy : tensor<i64>
+}
+
+module attributes {transform.with_named_sequence} {
+ transform.named_sequence @__transform_main(
+ %arg0: !transform.any_op {transform.readonly}) {
+ %ops = transform.structured.match ops{["linalg.generic"]} in %arg0
+ : (!transform.any_op) -> !transform.any_op
+ %tiled, %forall = transform.structured.tile_using_forall %ops num_threads [4]
+ : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
+ transform.yield
+ }
+}
More information about the Mlir-commits
mailing list