[Mlir-commits] [mlir] [mlir][linalg] Fix crash in linalg-specialize-generic-ops with scalar inputs (PR #189212)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Sun Mar 29 00:01:41 PDT 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mlir
Author: Mehdi Amini (joker-eph)
<details>
<summary>Changes</summary>
## Summary
`DecomposeProjectedPermutation` (invoked by `--linalg-specialize-generic-ops`) used `cast<RankedTensorType>` unconditionally on every operand of a `linalg.generic`. However, `linalg.generic` permits scalar (non-tensor) inputs — e.g. an `i32` with an affine map `() -> ()` — and `hasPureTensorSemantics()` does not exclude them (it only checks that no operand is a memref). When such a scalar operand was present the hard cast caused an assertion failure.
Fix: replace `cast<RankedTensorType>` with `dyn_cast<RankedTensorType>` and return `failure()` (skip decomposition) when any operand is not a ranked tensor type. A regression test is added.
Fixes #<!-- -->122094
## Test plan
- New FileCheck test in `decompose-generic-by-unfolding-projected-permutation.mlir` that runs the pass on a `linalg.generic` with a scalar `i32` input and verifies it does not crash and leaves the op unchanged.
- All existing linalg specialize/decompose tests continue to pass.
🤖 Generated with [Claude Code](https://claude.com/claude-code)
---
Full diff: https://github.com/llvm/llvm-project/pull/189212.diff
4 Files Affected:
- (modified) mlir/lib/Dialect/Affine/Transforms/PipelineDataTransfer.cpp (+7-2)
- (modified) mlir/lib/Dialect/Linalg/Transforms/DecomposeGenericByUnfoldingPermutation.cpp (+3-1)
- (modified) mlir/test/Dialect/Affine/pipeline-data-transfer.mlir (+48)
- (modified) mlir/test/Dialect/Linalg/decompose-generic-by-unfolding-projected-permutation.mlir (+28)
``````````diff
diff --git a/mlir/lib/Dialect/Affine/Transforms/PipelineDataTransfer.cpp b/mlir/lib/Dialect/Affine/Transforms/PipelineDataTransfer.cpp
index d84cb4f0cde5f..660418480be02 100644
--- a/mlir/lib/Dialect/Affine/Transforms/PipelineDataTransfer.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/PipelineDataTransfer.cpp
@@ -99,9 +99,14 @@ static bool doubleBuffer(Value oldMemRef, AffineForOp forOp) {
forOp.getLoc(), oldMemRef, dim.index()));
}
+ // Propagate alignment from the original alloc op, if any.
+ IntegerAttr alignment;
+ if (auto oldAllocOp = oldMemRef.getDefiningOp<memref::AllocOp>())
+ alignment = oldAllocOp.getAlignmentAttr();
+
// Create and place the alloc right before the 'affine.for' operation.
- Value newMemRef = memref::AllocOp::create(bOuter, forOp.getLoc(),
- newMemRefType, allocOperands);
+ Value newMemRef = memref::AllocOp::create(
+ bOuter, forOp.getLoc(), newMemRefType, allocOperands, alignment);
// Create 'iv mod 2' value to index the leading dimension.
auto d0 = bInner.getAffineDimExpr(0);
diff --git a/mlir/lib/Dialect/Linalg/Transforms/DecomposeGenericByUnfoldingPermutation.cpp b/mlir/lib/Dialect/Linalg/Transforms/DecomposeGenericByUnfoldingPermutation.cpp
index 9015cbb096f88..2cf99011725fe 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/DecomposeGenericByUnfoldingPermutation.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/DecomposeGenericByUnfoldingPermutation.cpp
@@ -164,7 +164,9 @@ LogicalResult DecomposeProjectedPermutation::matchAndRewrite(
// out which operand can supply that runtime-value (tensor.dim).
// Leaving it as a future TODO.
if (llvm::any_of(op->getOpOperands(), [](OpOperand &oper) {
- auto opType = cast<RankedTensorType>(oper.get().getType());
+ auto opType = dyn_cast<RankedTensorType>(oper.get().getType());
+ if (!opType)
+ return true;
return ShapedType::isDynamicShape(opType.getShape());
}))
return failure();
diff --git a/mlir/test/Dialect/Affine/pipeline-data-transfer.mlir b/mlir/test/Dialect/Affine/pipeline-data-transfer.mlir
index 35507c37be79b..120cde8639bae 100644
--- a/mlir/test/Dialect/Affine/pipeline-data-transfer.mlir
+++ b/mlir/test/Dialect/Affine/pipeline-data-transfer.mlir
@@ -396,3 +396,51 @@ func.func @same_memref_source_and_tag(%arg0: index, %arg1: index) {
return
}
// CHECK: affine.for
+
+// -----
+
+// Regression test for https://github.com/llvm/llvm-project/issues/146015.
+// The double-buffer alloc created by pipeline-data-transfer should preserve
+// the alignment attribute from the original alloc.
+// CHECK-LABEL: func @preserve_alignment
+func.func @preserve_alignment() {
+ %A = memref.alloc() : memref<256 x f32>
+ // CHECK: memref.alloc() {alignment = 1024 : i64} : memref<2x32xf32, 1>
+ %Ah = memref.alloc() {alignment = 1024} : memref<32 x f32, 1>
+ %tag = memref.alloc() : memref<1 x f32>
+ %zero = arith.constant 0 : index
+ %num_elts = arith.constant 32 : index
+
+ affine.for %i = 0 to 8 {
+ affine.dma_start %A[%i], %Ah[%i], %tag[%zero], %num_elts : memref<256 x f32>, memref<32 x f32, 1>, memref<1 x f32>
+ affine.dma_wait %tag[%zero], %num_elts : memref<1 x f32>
+ %v = affine.load %Ah[%i] : memref<32 x f32, 1>
+ }
+ memref.dealloc %tag : memref<1 x f32>
+ memref.dealloc %Ah : memref<32 x f32, 1>
+ return
+}
+
+// -----
+
+// Negative test: alloc without alignment must NOT gain a spurious alignment
+// attribute on the double-buffer alloc.
+// CHECK-LABEL: func @no_alignment_not_propagated
+func.func @no_alignment_not_propagated() {
+ %A = memref.alloc() : memref<256 x f32>
+ // CHECK: memref.alloc() : memref<2x32xf32, 1>
+ // CHECK-NOT: {alignment
+ %Ah = memref.alloc() : memref<32 x f32, 1>
+ %tag = memref.alloc() : memref<1 x f32>
+ %zero = arith.constant 0 : index
+ %num_elts = arith.constant 32 : index
+
+ affine.for %i = 0 to 8 {
+ affine.dma_start %A[%i], %Ah[%i], %tag[%zero], %num_elts : memref<256 x f32>, memref<32 x f32, 1>, memref<1 x f32>
+ affine.dma_wait %tag[%zero], %num_elts : memref<1 x f32>
+ %v = affine.load %Ah[%i] : memref<32 x f32, 1>
+ }
+ memref.dealloc %tag : memref<1 x f32>
+ memref.dealloc %Ah : memref<32 x f32, 1>
+ return
+}
diff --git a/mlir/test/Dialect/Linalg/decompose-generic-by-unfolding-projected-permutation.mlir b/mlir/test/Dialect/Linalg/decompose-generic-by-unfolding-projected-permutation.mlir
index 38e406a13ec08..86c522569cc55 100644
--- a/mlir/test/Dialect/Linalg/decompose-generic-by-unfolding-projected-permutation.mlir
+++ b/mlir/test/Dialect/Linalg/decompose-generic-by-unfolding-projected-permutation.mlir
@@ -69,3 +69,31 @@ func.func @broadcast_only(%x : tensor<2x16x32xf32>, %y: tensor<2x32xf32>, %z :
// CHECK: %[[X_bc:.+]] = linalg.broadcast ins(%[[Y]] : tensor<2x32xf32>) outs(%[[E0]] : tensor<2x16x32xf32>) dimensions = [1]
// CHECK: {{.*}} = linalg.div ins(%[[X]], %[[X_bc]] : tensor<2x16x32xf32>, tensor<2x16x32xf32>) outs(%arg2 : tensor<2x16x32xf32>) -> tensor<2x16x32xf32>
// CHECK-NOT: linalg.generic
+
+// -----
+
+// Verify that linalg.generic with scalar (non-tensor) inputs is not decomposed
+// and does not crash. Scalar inputs have 0-D affine maps and are not
+// RankedTensorType; the pass must handle them gracefully by bailing out.
+
+#map = affine_map<(d0, d1) -> (d0)>
+#map1 = affine_map<(d0, d1) -> (d1)>
+#map2 = affine_map<(d0, d1) -> ()>
+#map3 = affine_map<(d0, d1) -> (d0, d1)>
+
+func.func @scalar_input(%arg0: tensor<4xi32>, %arg1: tensor<4xi32>, %arg2: i32) -> tensor<4x4xi32> {
+ %0 = tensor.empty() : tensor<4x4xi32>
+ %1 = linalg.generic {indexing_maps = [#map, #map1, #map2, #map3],
+ iterator_types = ["parallel", "parallel"]}
+ ins(%arg0, %arg1, %arg2 : tensor<4xi32>, tensor<4xi32>, i32)
+ outs(%0 : tensor<4x4xi32>) {
+ ^bb0(%in: i32, %in2: i32, %in3: i32, %out: i32):
+ %2 = arith.muli %in, %in2 : i32
+ %3 = arith.addi %in3, %2 : i32
+ linalg.yield %3 : i32
+ } -> tensor<4x4xi32>
+ return %1 : tensor<4x4xi32>
+}
+
+// CHECK-LABEL: scalar_input
+// CHECK: linalg.generic
``````````
</details>
https://github.com/llvm/llvm-project/pull/189212
More information about the Mlir-commits
mailing list