[Mlir-commits] [mlir] [mlir][sparse] Fix crash in ForeachRewriter for rank-0 dense tensors (PR #183903)
Mehdi Amini
llvmlistbot at llvm.org
Sat Feb 28 03:48:59 PST 2026
https://github.com/joker-eph created https://github.com/llvm/llvm-project/pull/183903
sparse_tensor.foreach over a rank-0 (scalar) dense tensor crashed because ForeachRewriter delegated entirely to LoopEmitter, which builds one loop level per tensor dimension. For rank-0 tensors no loops are created, so getValPosits() called std::vector::back() on an empty container.
Add a rank-0 early-return path in ForeachRewriter::matchAndRewrite that handles dense scalar tensors directly: bufferize the input to a rank-0 memref, load the single element with empty indices, then inline the body block exactly once. Reduction block-argument values in the yield are remapped to their post-inline equivalents before the block is inlined to avoid dangling references.
Sparse rank-0 tensors are left as a notifyMatchFailure (genuinely unsupported, no crash).
Fixes #177856
>From f660666c3f8b571932183abe73c799e0b3e25ad4 Mon Sep 17 00:00:00 2001
From: Mehdi Amini <joker.eph at gmail.com>
Date: Sat, 28 Feb 2026 03:46:26 -0800
Subject: [PATCH] [mlir][sparse] Fix crash in ForeachRewriter for rank-0 dense
tensors
sparse_tensor.foreach over a rank-0 (scalar) dense tensor crashed because
ForeachRewriter delegated entirely to LoopEmitter, which builds one loop
level per tensor dimension. For rank-0 tensors no loops are created, so
getValPosits() called std::vector::back() on an empty container.
Add a rank-0 early-return path in ForeachRewriter::matchAndRewrite that
handles dense scalar tensors directly: bufferize the input to a rank-0
memref, load the single element with empty indices, then inline the body
block exactly once. Reduction block-argument values in the yield are
remapped to their post-inline equivalents before the block is inlined to
avoid dangling references.
Sparse rank-0 tensors are left as a notifyMatchFailure (genuinely
unsupported, no crash).
Fixes #177856
---
.../Transforms/SparseTensorRewriting.cpp | 40 +++++++++++++++++++
.../SparseTensor/sparse_foreach_rank0.mlir | 31 ++++++++++++++
2 files changed, 71 insertions(+)
create mode 100644 mlir/test/Dialect/SparseTensor/sparse_foreach_rank0.mlir
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp
index 24290bde62f49..89ed468d2e1b9 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorRewriting.cpp
@@ -1381,6 +1381,46 @@ struct ForeachRewriter : public OpRewritePattern<ForeachOp> {
// Otherwise, use loop emitter to generate loops.
const auto enc = stt.getEncoding();
+ // Special-case: rank-0 tensors have no dimensions to loop over.
+ // The LoopEmitter (getValPosits) requires at least one loop level, so
+ // handle scalar tensors separately.
+ if (lvlRank == 0) {
+ // Sparse rank-0 tensors are not yet supported.
+ if (enc)
+ return rewriter.notifyMatchFailure(
+ op, "foreach over rank-0 sparse tensors is not supported");
+ // Dense rank-0 tensor: bufferize and load the single element once,
+ // then inline the body without any surrounding loop.
+ LoopEmitter loopEmitter(
+ ValueRange{input},
+ StringAttr::get(getContext(), ForeachOp::getOperationName()));
+ loopEmitter.initializeLoopEmit(rewriter, loc);
+ Value vals = loopEmitter.getValBuffer()[0];
+ Value val = memref::LoadOp::create(rewriter, loc, vals, ValueRange{});
+ // Rank-0 has no coordinates; body args = [value, reductions...].
+ SmallVector<Value> args = {val};
+ args.append(reduc);
+ Block *srcBlock = op.getBody();
+ Operation *terminator = srcBlock->getTerminator();
+ SmallVector<Value> reducValue(terminator->getOperands());
+ // Remap any block-arg entries in reducValue to their post-inline values
+ // before the terminator is erased and the block is inlined, because
+ // inlineBlockBefore() will detach the block args.
+ for (Value &v : reducValue)
+ if (auto ba = dyn_cast<BlockArgument>(v))
+ if (ba.getOwner() == srcBlock)
+ v = args[ba.getArgNumber()];
+ rewriter.eraseOp(terminator);
+ Operation &last = rewriter.getBlock()->back();
+ if (llvm::isa<scf::YieldOp>(last))
+ rewriter.setInsertionPoint(&last);
+ rewriter.inlineBlockBefore(srcBlock, rewriter.getBlock(),
+ rewriter.getInsertionPoint(), args);
+ rewriter.setInsertionPointToEnd(rewriter.getBlock());
+ rewriter.replaceOp(op, reducValue);
+ return success();
+ }
+
// 1. Generates loop for the sparse input.
LoopEmitter loopEmitter(
ValueRange{input},
diff --git a/mlir/test/Dialect/SparseTensor/sparse_foreach_rank0.mlir b/mlir/test/Dialect/SparseTensor/sparse_foreach_rank0.mlir
new file mode 100644
index 0000000000000..bb0d51d71a4e0
--- /dev/null
+++ b/mlir/test/Dialect/SparseTensor/sparse_foreach_rank0.mlir
@@ -0,0 +1,31 @@
+// RUN: mlir-opt %s --sparsification-and-bufferization | FileCheck %s
+
+// Regression test for https://github.com/llvm/llvm-project/issues/177856:
+// sparse_tensor.foreach over a rank-0 (scalar) dense tensor must not crash.
+// The LoopEmitter called getValPosits() which invoked std::vector::back()
+// on an empty container because no loop levels were entered for rank-0.
+
+// CHECK-LABEL: func.func @foreach_scalar_no_reduc(
+// CHECK-SAME: %[[A:.*]]: memref<i32>)
+// CHECK-NOT: memref.load
+// CHECK: return
+func.func @foreach_scalar_no_reduc(%arg0: tensor<i32>) {
+ sparse_tensor.foreach in %arg0 : tensor<i32> do {
+ ^bb0(%v: i32):
+ }
+ return
+}
+
+// CHECK-LABEL: func.func @foreach_scalar_with_reduc(
+// CHECK-SAME: %[[A:.*]]: memref<i32>
+// CHECK-SAME: %[[B:.*]]: i32)
+// CHECK: %[[VAL:.*]] = memref.load %[[A]][] : memref<i32>
+// CHECK: return %[[VAL]] : i32
+func.func @foreach_scalar_with_reduc(%arg0: tensor<i32>, %arg1: i32) -> i32 {
+ %ret = sparse_tensor.foreach in %arg0 init(%arg1): tensor<i32>, i32 -> i32
+ do {
+ ^bb0(%v: i32, %r: i32):
+ sparse_tensor.yield %v : i32
+ }
+ return %ret : i32
+}
More information about the Mlir-commits
mailing list