[Mlir-commits] [mlir] [mlir][vector] Missing indices on vectorization of 1-d reduction to 1-ranked memref (PR #166959)
Simone Pellegrini
llvmlistbot at llvm.org
Mon Nov 17 05:17:41 PST 2025
https://github.com/simpel01 updated https://github.com/llvm/llvm-project/pull/166959
>From 6e25c588c7c585acc5159399a65573620c5016c5 Mon Sep 17 00:00:00 2001
From: Simone Pellegrini <simone.pellegrini at arm.com>
Date: Fri, 7 Nov 2025 13:48:48 +0100
Subject: [PATCH] [mlir][vector] Missing indices on vectorization of 1-d
reductions to rank-1 shaped-types
Vectorization of a 1-d reduction where the output variable is a rank-1 shaped-type
can generate an invalid `vector.transfer_write` with no indices for the corresponding
input, e.g.:
vector.transfer_write"(%vec, %t) <{...}> : (vector<f32>, tensor<1xf32>) -> ()
This patch solves the problem by providing the expected amount of indices (i.e.
matching the rank of the shaped-type).
---
.../Linalg/Transforms/Vectorization.cpp | 8 +--
.../linalg-ops-with-patterns.mlir | 68 ++++++++++++++++---
2 files changed, 64 insertions(+), 12 deletions(-)
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
index 19d2d854a3838..4eb2a0cb200a0 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
@@ -746,12 +746,12 @@ static Value buildVectorWrite(RewriterBase &rewriter, Value value,
auto vectorType = state.getCanonicalVecType(
getElementTypeOrSelf(outputOperand->get().getType()), vectorTypeMap);
+ SmallVector<Value> indices(linalgOp.getRank(outputOperand),
+ arith::ConstantIndexOp::create(rewriter, loc, 0));
+
Operation *write;
if (vectorType.getRank() > 0) {
AffineMap writeMap = inversePermutation(reindexIndexingMap(opOperandMap));
- SmallVector<Value> indices(
- linalgOp.getRank(outputOperand),
- arith::ConstantIndexOp::create(rewriter, loc, 0));
value = broadcastIfNeeded(rewriter, value, vectorType);
assert(value.getType() == vectorType && "Incorrect type");
write = vector::TransferWriteOp::create(
@@ -762,7 +762,7 @@ static Value buildVectorWrite(RewriterBase &rewriter, Value value,
value = vector::BroadcastOp::create(rewriter, loc, vectorType, value);
assert(value.getType() == vectorType && "Incorrect type");
write = vector::TransferWriteOp::create(rewriter, loc, value,
- outputOperand->get(), ValueRange{});
+ outputOperand->get(), indices);
}
write = state.maskOperation(rewriter, write, linalgOp, opOperandMap);
diff --git a/mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir b/mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir
index 9a14ab7d38d3e..95959fcf085fc 100644
--- a/mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir
+++ b/mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir
@@ -1481,23 +1481,23 @@ module attributes {transform.with_named_sequence} {
// -----
-// CHECK-LABEL: func @reduce_1d(
-// CHECK-SAME: %[[A:.*]]: tensor<32xf32>
-func.func @reduce_1d(%arg0: tensor<32xf32>) -> tensor<f32> {
+// CHECK-LABEL: func @reduce_to_rank_0(
+// CHECK-SAME: %[[SRC:.*]]: tensor<32xf32>
+func.func @reduce_to_rank_0(%arg0: tensor<32xf32>) -> tensor<f32> {
// CHECK-DAG: %[[F0:.*]] = arith.constant 0.000000e+00 : f32
// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
%f0 = arith.constant 0.000000e+00 : f32
- // CHECK: %[[init:.*]] = tensor.empty() : tensor<f32>
+ // CHECK: %[[INIT:.*]] = tensor.empty() : tensor<f32>
%0 = tensor.empty() : tensor<f32>
%1 = linalg.fill ins(%f0 : f32) outs(%0 : tensor<f32>) -> tensor<f32>
- // CHECK: %[[r:.*]] = vector.transfer_read %[[A]][%[[C0]]]
+ // CHECK: %[[R:.*]] = vector.transfer_read %[[SRC]][%[[C0]]]
// CHECK-SAME: : tensor<32xf32>, vector<32xf32>
- // CHECK: %[[red:.*]] = vector.multi_reduction <add>, %[[r]], %[[F0]] [0]
+ // CHECK: %[[RED:.*]] = vector.multi_reduction <add>, %[[R]], %[[F0]] [0]
// CHECK-SAME: : vector<32xf32> to f32
- // CHECK: %[[red_v1:.*]] = vector.broadcast %[[red]] : f32 to vector<f32>
- // CHECK: %[[res:.*]] = vector.transfer_write %[[red_v1]], %[[init]][]
+ // CHECK: %[[RED_V1:.*]] = vector.broadcast %[[RED]] : f32 to vector<f32>
+ // CHECK: %[[RES:.*]] = vector.transfer_write %[[RED_V1]], %[[INIT]][]
// CHECK-SAME: : vector<f32>, tensor<f32>
%2 = linalg.generic {
indexing_maps = [affine_map<(d0) -> (d0)>,
@@ -1523,6 +1523,58 @@ module attributes {transform.with_named_sequence} {
}
+// -----
+
+// CHECK-LABEL: func @reduce_to_rank_1(
+// CHECK-SAME: %[[SRC:.*]]: tensor<32xf32>
+func.func @reduce_to_rank_1(%arg0: tensor<32xf32>) -> tensor<1xf32> {
+ // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
+ // CHECK-DAG: %[[F0:.*]] = arith.constant dense<0.000000e+00> : vector<1xf32>
+ %f0 = arith.constant 0.000000e+00 : f32
+
+ // CHECK: %[[INIT:.*]] = tensor.empty() : tensor<1xf32>
+ %0 = tensor.empty() : tensor<1xf32>
+
+ // CHECK: %[[INIT_ZERO:.*]] = vector.transfer_write %[[F0]], %[[INIT]][%[[C0]]]
+ // CHECK-SAME: : vector<1xf32>, tensor<1xf32>
+ %1 = linalg.fill ins(%f0 : f32) outs(%0 : tensor<1xf32>) -> tensor<1xf32>
+
+ // CHECK: %[[R:.*]] = vector.transfer_read %[[SRC]][%[[C0]]]
+ // CHECK-SAME: : tensor<32xf32>, vector<32xf32>
+ // CHECK: %[[INIT_ZERO_VEC:.*]] = vector.transfer_read %[[INIT_ZERO]][%[[C0]]]
+ // CHECK-SAME: : tensor<1xf32>, vector<f32>
+ // CHECK: %[[INIT_ZERO_SCL:.*]] = vector.extract %[[INIT_ZERO_VEC]][]
+ // CHECK-SAME: : f32 from vector<f32>
+ // CHECK: %[[RED:.*]] = vector.multi_reduction <add>, %[[R]], %[[INIT_ZERO_SCL]] [0]
+ // CHECK-SAME: : vector<32xf32> to f32
+ // CHECK: %[[RED_V1:.*]] = vector.broadcast %[[RED]] : f32 to vector<f32>
+ // CHECK: vector.transfer_write %[[RED_V1]], %[[INIT_ZERO]][%[[C0]]]
+ // CHECK-SAME: : vector<f32>, tensor<1xf32>
+
+ %2 = linalg.generic {
+ indexing_maps = [affine_map<(d0) -> (d0)>,
+ affine_map<(d0) -> (0)>],
+ iterator_types = ["reduction"]}
+ ins(%arg0 : tensor<32xf32>)
+ outs(%1 : tensor<1xf32>) {
+ ^bb0(%a: f32, %b: f32):
+ %3 = arith.addf %a, %b : f32
+ linalg.yield %3 : f32
+ } -> tensor<1xf32>
+
+ return %2 : tensor<1xf32>
+}
+
+module attributes {transform.with_named_sequence} {
+ transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+ %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+ %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op
+ %2 = transform.structured.vectorize_children_and_apply_patterns %1 : (!transform.any_op) -> !transform.any_op
+ transform.yield
+ }
+}
+
+
// -----
// This test checks that vectorization does not occur when an input indexing map
More information about the Mlir-commits
mailing list