[Mlir-commits] [mlir] [MLIR][Linalg] Fix crash in tileToForallOpImpl for rank-0 ops (PR #189000)

Fri Mar 27 08:57:43 PDT 2026

https://github.com/joker-eph updated https://github.com/llvm/llvm-project/pull/189000

>From b2e624135fd29e0b32499753d6637f0b244e7925 Mon Sep 17 00:00:00 2001
From: Mehdi Amini <joker.eph at gmail.com>
Date: Fri, 27 Mar 2026 03:33:13 -0700
Subject: [PATCH] [MLIR][Linalg] Fix crash in tileToForallOpImpl for rank-0 ops

When tiling a rank-0 linalg.generic op, tileUsingSCF returns an empty
loops vector (rank-0 ops have no parallel dimensions and produce no
scf.forall). Two call sites unconditionally accessed
tilingResult.loops.front(), causing a crash:

- tileToForallOpImpl: the loop normalization block was entered whenever
  mixedNumThreads was empty, regardless of whether any loops exist.
  Guard it with \!tilingResult.loops.empty().

- TileUsingForallOp::apply: tileOps.push_back was called
  unconditionally. Guard it with \!tilingResult.loops.empty().

Add regression tests for both the tile_sizes and num_threads paths,
verifying that the linalg.generic is preserved and no scf.forall is
emitted.

Fixes #187073

Assisted-by: Claude Code
---
 .../TransformOps/LinalgTransformOps.cpp       |  7 ++-
 mlir/test/Dialect/Linalg/tile-to-forall.mlir  | 58 +++++++++++++++++++
 2 files changed, 63 insertions(+), 2 deletions(-)

diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
index 5f530a585ddb9..baa57f8920094 100644
--- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
+++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
@@ -3890,7 +3890,9 @@ DiagnosedSilenceableFailure transform::tileToForallOpImpl(
 
   tilingResult = *maybeTilingResult;
 
-  if (mixedNumThreads.empty()) {
+  // Rank-0 ops produce no loops; skip normalization when there is nothing
+  // to normalize.
+  if (mixedNumThreads.empty() && !tilingResult.loops.empty()) {
     auto generatedForallOp = cast<scf::ForallOp>(tilingResult.loops.front());
     OpBuilder::InsertionGuard g(rewriter);
     rewriter.setInsertionPoint(generatedForallOp);
@@ -3938,7 +3940,8 @@ DiagnosedSilenceableFailure transform::TileUsingForallOp::apply(
         getMapping(), tilingResult);
     if (!diag.succeeded())
       return diag;
-    tileOps.push_back(tilingResult.loops.front());
+    if (!tilingResult.loops.empty())
+      tileOps.push_back(tilingResult.loops.front());
     tiledOps.append(tilingResult.tiledOps);
   }
 
diff --git a/mlir/test/Dialect/Linalg/tile-to-forall.mlir b/mlir/test/Dialect/Linalg/tile-to-forall.mlir
index 1b0bade728b44..f0ba58c55e11f 100644
--- a/mlir/test/Dialect/Linalg/tile-to-forall.mlir
+++ b/mlir/test/Dialect/Linalg/tile-to-forall.mlir
@@ -723,3 +723,61 @@ module attributes {transform.with_named_sequence} {
     transform.yield
   }
 }
+
+// -----
+
+// Tiling a rank-0 linalg op with tile_sizes should not crash even when
+// tileUsingSCF produces no loops. (https://github.com/llvm/llvm-project/issues/187073)
+#rankZeroMap = affine_map<() -> ()>
+// CHECK-LABEL: @tile_rank_zero_op_no_forall
+func.func @tile_rank_zero_op_no_forall(%arg0: tensor<i64>) -> tensor<i64> {
+  // CHECK-NOT: scf.forall
+  // CHECK: return %{{.*}} : tensor<i64>
+  %empty = tensor.empty() : tensor<i64>
+  %copy = linalg.generic {indexing_maps = [#rankZeroMap, #rankZeroMap], iterator_types = []}
+      ins(%arg0 : tensor<i64>) outs(%empty : tensor<i64>) {
+  ^bb0(%in: i64, %out: i64):
+    linalg.yield %in : i64
+  } -> tensor<i64>
+  return %copy : tensor<i64>
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(
+      %arg0: !transform.any_op {transform.readonly}) {
+    %ops = transform.structured.match ops{["linalg.generic"]} in %arg0
+        : (!transform.any_op) -> !transform.any_op
+    %tiled, %forall = transform.structured.tile_using_forall %ops tile_sizes [32]
+        : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
+    transform.yield
+  }
+}
+
+// -----
+
+// Tiling a rank-0 linalg op with num_threads should also not crash.
+// (https://github.com/llvm/llvm-project/issues/187073)
+#rankZeroMap = affine_map<() -> ()>
+// CHECK-LABEL: @tile_rank_zero_op_no_forall_num_threads
+func.func @tile_rank_zero_op_no_forall_num_threads(%arg0: tensor<i64>) -> tensor<i64> {
+  // CHECK-NOT: scf.forall
+  // CHECK: return %{{.*}} : tensor<i64>
+  %empty = tensor.empty() : tensor<i64>
+  %copy = linalg.generic {indexing_maps = [#rankZeroMap, #rankZeroMap], iterator_types = []}
+      ins(%arg0 : tensor<i64>) outs(%empty : tensor<i64>) {
+  ^bb0(%in: i64, %out: i64):
+    linalg.yield %in : i64
+  } -> tensor<i64>
+  return %copy : tensor<i64>
+}
+
+module attributes {transform.with_named_sequence} {
+  transform.named_sequence @__transform_main(
+      %arg0: !transform.any_op {transform.readonly}) {
+    %ops = transform.structured.match ops{["linalg.generic"]} in %arg0
+        : (!transform.any_op) -> !transform.any_op
+    %tiled, %forall = transform.structured.tile_using_forall %ops num_threads [4]
+        : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
+    transform.yield
+  }
+}