[Mlir-commits] [mlir] [mlir][sparse] Fix crash in ForeachRewriter for rank-0 dense tensors (PR #183903)

Sat Feb 28 03:48:59 PST 2026

https://github.com/joker-eph created https://github.com/llvm/llvm-project/pull/183903

sparse_tensor.foreach over a rank-0 (scalar) dense tensor crashed because ForeachRewriter delegated entirely to LoopEmitter, which builds one loop level per tensor dimension. For rank-0 tensors no loops are created, so getValPosits() called std::vector::back() on an empty container.

Add a rank-0 early-return path in ForeachRewriter::matchAndRewrite that handles dense scalar tensors directly: bufferize the input to a rank-0 memref, load the single element with empty indices, then inline the body block exactly once. Reduction block-argument values in the yield are remapped to their post-inline equivalents before the block is inlined to avoid dangling references.

Sparse rank-0 tensors are left as a notifyMatchFailure (genuinely unsupported, no crash).

Fixes #177856

>From f660666c3f8b571932183abe73c799e0b3e25ad4 Mon Sep 17 00:00:00 2001
From: Mehdi Amini <joker.eph at gmail.com>
Date: Sat, 28 Feb 2026 03:46:26 -0800
Subject: [PATCH] [mlir][sparse] Fix crash in ForeachRewriter for rank-0 dense
 tensors

sparse_tensor.foreach over a rank-0 (scalar) dense tensor crashed because
ForeachRewriter delegated entirely to LoopEmitter, which builds one loop
level per tensor dimension. For rank-0 tensors no loops are created, so
getValPosits() called std::vector::back() on an empty container.

Add a rank-0 early-return path in ForeachRewriter::matchAndRewrite that
handles dense scalar tensors directly: bufferize the input to a rank-0
memref, load the single element with empty indices, then inline the body
block exactly once. Reduction block-argument values in the yield are
remapped to their post-inline equivalents before the block is inlined to
avoid dangling references.

Sparse rank-0 tensors are left as a notifyMatchFailure (genuinely
unsupported, no crash).

Fixes #177856
---
 .../Transforms/SparseTensorRewriting.cpp      | 40 +++++++++++++++++++
 .../SparseTensor/sparse_foreach_rank0.mlir    | 31 ++++++++++++++
 2 files changed, 71 insertions(+)
 create mode 100644 mlir/test/Dialect/SparseTensor/sparse_foreach_rank0.mlir

diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp
index 24290bde62f49..89ed468d2e1b9 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp
@@ -1381,6 +1381,46 @@ struct ForeachRewriter : public OpRewritePattern<ForeachOp> {
     // Otherwise, use loop emitter to generate loops.
     const auto enc = stt.getEncoding();
 
+    // Special-case: rank-0 tensors have no dimensions to loop over.
+    // The LoopEmitter (getValPosits) requires at least one loop level, so
+    // handle scalar tensors separately.
+    if (lvlRank == 0) {
+      // Sparse rank-0 tensors are not yet supported.
+      if (enc)
+        return rewriter.notifyMatchFailure(
+            op, "foreach over rank-0 sparse tensors is not supported");
+      // Dense rank-0 tensor: bufferize and load the single element once,
+      // then inline the body without any surrounding loop.
+      LoopEmitter loopEmitter(
+          ValueRange{input},
+          StringAttr::get(getContext(), ForeachOp::getOperationName()));
+      loopEmitter.initializeLoopEmit(rewriter, loc);
+      Value vals = loopEmitter.getValBuffer()[0];
+      Value val = memref::LoadOp::create(rewriter, loc, vals, ValueRange{});
+      // Rank-0 has no coordinates; body args = [value, reductions...].
+      SmallVector<Value> args = {val};
+      args.append(reduc);
+      Block *srcBlock = op.getBody();
+      Operation *terminator = srcBlock->getTerminator();
+      SmallVector<Value> reducValue(terminator->getOperands());
+      // Remap any block-arg entries in reducValue to their post-inline values
+      // before the terminator is erased and the block is inlined, because
+      // inlineBlockBefore() will detach the block args.
+      for (Value &v : reducValue)
+        if (auto ba = dyn_cast<BlockArgument>(v))
+          if (ba.getOwner() == srcBlock)
+            v = args[ba.getArgNumber()];
+      rewriter.eraseOp(terminator);
+      Operation &last = rewriter.getBlock()->back();
+      if (llvm::isa<scf::YieldOp>(last))
+        rewriter.setInsertionPoint(&last);
+      rewriter.inlineBlockBefore(srcBlock, rewriter.getBlock(),
+                                 rewriter.getInsertionPoint(), args);
+      rewriter.setInsertionPointToEnd(rewriter.getBlock());
+      rewriter.replaceOp(op, reducValue);
+      return success();
+    }
+
     // 1. Generates loop for the sparse input.
     LoopEmitter loopEmitter(
         ValueRange{input},
diff --git a/mlir/test/Dialect/SparseTensor/sparse_foreach_rank0.mlir b/mlir/test/Dialect/SparseTensor/sparse_foreach_rank0.mlir
new file mode 100644
index 0000000000000..bb0d51d71a4e0
--- /dev/null
+++ b/mlir/test/Dialect/SparseTensor/sparse_foreach_rank0.mlir
@@ -0,0 +1,31 @@
+// RUN: mlir-opt %s --sparsification-and-bufferization | FileCheck %s
+
+// Regression test for https://github.com/llvm/llvm-project/issues/177856:
+// sparse_tensor.foreach over a rank-0 (scalar) dense tensor must not crash.
+// The LoopEmitter called getValPosits() which invoked std::vector::back()
+// on an empty container because no loop levels were entered for rank-0.
+
+// CHECK-LABEL: func.func @foreach_scalar_no_reduc(
+// CHECK-SAME:    %[[A:.*]]: memref<i32>)
+// CHECK-NOT:   memref.load
+// CHECK:       return
+func.func @foreach_scalar_no_reduc(%arg0: tensor<i32>) {
+  sparse_tensor.foreach in %arg0 : tensor<i32> do {
+    ^bb0(%v: i32):
+  }
+  return
+}
+
+// CHECK-LABEL: func.func @foreach_scalar_with_reduc(
+// CHECK-SAME:    %[[A:.*]]: memref<i32>
+// CHECK-SAME:    %[[B:.*]]: i32)
+// CHECK:         %[[VAL:.*]] = memref.load %[[A]][] : memref<i32>
+// CHECK:         return %[[VAL]] : i32
+func.func @foreach_scalar_with_reduc(%arg0: tensor<i32>, %arg1: i32) -> i32 {
+  %ret = sparse_tensor.foreach in %arg0 init(%arg1): tensor<i32>, i32 -> i32
+  do {
+    ^bb0(%v: i32, %r: i32):
+      sparse_tensor.yield %v : i32
+  }
+  return %ret : i32
+}