[Mlir-commits] [mlir] [mlir][linalg] Support scalable vectorization of linalg.index operations (PR #96778)
Cullen Rhodes
llvmlistbot at llvm.org
Mon Jul 8 02:04:25 PDT 2024
https://github.com/c-rhodes updated https://github.com/llvm/llvm-project/pull/96778
>From 0b5246c3cfd5ee3f9148d09fd8c212be2d6a8ed8 Mon Sep 17 00:00:00 2001
From: Cullen Rhodes <cullen.rhodes at arm.com>
Date: Thu, 20 Jun 2024 13:47:19 +0000
Subject: [PATCH 1/3] [mlir][linalg] Add scalable vectorization of tensor
extract test
---
.../Linalg/vectorize-tensor-extract.mlir | 50 +++++++++++++++++++
1 file changed, 50 insertions(+)
diff --git a/mlir/test/Dialect/Linalg/vectorize-tensor-extract.mlir b/mlir/test/Dialect/Linalg/vectorize-tensor-extract.mlir
index 85e1c56dd45a0..4b0df6a01c8fc 100644
--- a/mlir/test/Dialect/Linalg/vectorize-tensor-extract.mlir
+++ b/mlir/test/Dialect/Linalg/vectorize-tensor-extract.mlir
@@ -113,6 +113,56 @@ module attributes {transform.with_named_sequence} {
}
}
+// -----
+
+#map1 = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
+func.func @vectorize_scalable_nd_tensor_extract_transfer_read_basic(%arg0: tensor<?x?x?xf32>, %arg2: tensor<?x?x?xf32>) -> tensor<?x?x?xf32> {
+ %1 = linalg.generic {
+ indexing_maps = [#map1],
+ iterator_types = ["parallel", "parallel", "parallel"]
+ } outs(%arg2 : tensor<?x?x?xf32>) {
+ ^bb0(%arg4: f32):
+ %2 = linalg.index 0 : index
+ %3 = linalg.index 1 : index
+ %4 = linalg.index 2 : index
+ %5 = tensor.extract %arg0[%2, %3, %4] : tensor<?x?x?xf32>
+ linalg.yield %5 : f32
+ } -> tensor<?x?x?xf32>
+ return %1 : tensor<?x?x?xf32>
+}
+
+// CHECK-LABEL: @vectorize_scalable_nd_tensor_extract_transfer_read_basic
+// CHECK-SAME: %[[BASE:.*]]: tensor<?x?x?xf32>, %[[DEST:.*]]: tensor<?x?x?xf32>
+// CHECK: %[[PASSTHRU:.*]] = arith.constant dense<0.000000e+00> : vector<1x1x[4]xf32>
+// CHECK: %[[MASK:.*]] = arith.constant dense<true> : vector<1x1x[4]xi1>
+// CHECK: %[[INDEX_VEC:.*]] = arith.constant dense<[0, 1, 2, 3]> : vector<4xindex>
+// CHECK: %[[C2:.*]] = arith.constant 2 : index
+// CHECK: %[[C1:.*]] = arith.constant 1 : index
+// CHECK: %[[C0:.*]] = arith.constant 0 : index
+// CHECK: %[[DEST_DIM0:.*]] = tensor.dim %[[DEST]], %[[C0]] : tensor<?x?x?xf32>
+// CHECK: %[[DEST_DIM1:.*]] = tensor.dim %[[DEST]], %[[C1]] : tensor<?x?x?xf32>
+// CHECK: %[[DEST_DIM2:.*]] = tensor.dim %[[DEST]], %[[C2]] : tensor<?x?x?xf32>
+// CHECK: %[[DEST_MASK:.*]] = vector.create_mask %[[DEST_DIM0]], %[[DEST_DIM1]], %[[DEST_DIM2]] : vector<1x1x[4]xi1>
+// CHECK: %[[INDEX_VEC_BCAST:.*]] = vector.broadcast %[[INDEX_VEC]] : vector<4xindex> to vector<1x1x[4]xindex>
+// CHECK: %[[GATHER:.*]] = vector.mask %[[DEST_MASK]] { vector.gather %[[BASE]]{{\[}}%[[C0]], %[[C0]], %[[C0]]] {{\[}}%[[INDEX_VEC_BCAST]]], %[[MASK]], %[[PASSTHRU]] : tensor<?x?x?xf32>, vector<1x1x[4]xindex>, vector<1x1x[4]xi1>, vector<1x1x[4]xf32> into vector<1x1x[4]xf32> } : vector<1x1x[4]xi1> -> vector<1x1x[4]xf32>
+// CHECK: %[[OUT:.*]] = vector.mask %[[DEST_MASK]] { vector.transfer_write %[[GATHER]], %[[DEST]]{{\[}}%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<1x1x[4]xf32>, tensor<?x?x?xf32> } : vector<1x1x[4]xi1> -> tensor<?x?x?xf32>
+// CHECK: return %[[OUT]] : tensor<?x?x?xf32>
+
+module attributes {transform.with_named_sequence} {
+ transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+ %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+ transform.structured.vectorize %0 vector_sizes [1, 1, [4]] {vectorize_nd_extract} : !transform.any_op
+
+ %func = transform.structured.match ops{["func.func"]} in %arg1
+ : (!transform.any_op) -> !transform.any_op
+ transform.apply_patterns to %func {
+ transform.apply_patterns.canonicalization
+ transform.apply_patterns.linalg.tiling_canonicalization
+ } : !transform.any_op
+ transform.yield
+ }
+}
+
// -----
func.func @vectorize_nd_tensor_extract_transfer_read_complex(%6: tensor<45x80x16xf32>, %arg0: index, %arg2: index, %arg1: index, %arg4: index, %extracted_slice : tensor<1x4xf32>) -> tensor<1x4xf32> {
>From 90b2114798fa0d97b5ab845e515c3ed3d37b48f1 Mon Sep 17 00:00:00 2001
From: Cullen Rhodes <cullen.rhodes at arm.com>
Date: Wed, 26 Jun 2024 14:27:26 +0000
Subject: [PATCH 2/3] [mlir][linalg] Support scalable vectorization of
linalg.index operations
The vectorization of linalg.index operations doesn't support scalable
vectors when computing the index vector. This patch fixes this with the
vector.step operation.
---
.../Linalg/Transforms/Vectorization.cpp | 21 ++++++++++++-------
.../vectorize-tensor-extract-masked.mlir | 4 ++--
.../Linalg/vectorize-tensor-extract.mlir | 4 ++--
3 files changed, 17 insertions(+), 12 deletions(-)
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
index 3a75d2ac08157..a4c0508d0d8fa 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
@@ -195,6 +195,10 @@ struct VectorizationState {
/// Returns the canonical vector shape used to vectorize the iteration space.
ArrayRef<int64_t> getCanonicalVecShape() const { return canonicalVecShape; }
+ /// Returns the vector dimensions that are scalable in the canonical vector
+ /// shape.
+ ArrayRef<bool> getScalableVecDims() const { return scalableVecDims; }
+
/// Returns a vector type of the provided `elementType` with the canonical
/// vector shape and the corresponding fixed/scalable dimensions bit. If
/// `dimPermutation` is provided, the canonical vector dimensions are permuted
@@ -694,23 +698,24 @@ static VectorizationResult vectorizeLinalgIndex(RewriterBase &rewriter,
return VectorizationResult{VectorizationStatus::Failure, nullptr};
auto loc = indexOp.getLoc();
// Compute the static loop sizes of the index op.
- auto targetShape = state.getCanonicalVecShape();
+ ArrayRef<int64_t> targetShape = state.getCanonicalVecShape();
+ auto dim = indexOp.getDim();
// Compute a one-dimensional index vector for the index op dimension.
- auto constantSeq =
- llvm::to_vector(llvm::seq<int64_t>(0, targetShape[indexOp.getDim()]));
- auto indexSteps = rewriter.create<arith::ConstantOp>(
- loc, rewriter.getIndexVectorAttr(constantSeq));
+ auto indexVectorType =
+ VectorType::get({targetShape[dim]}, rewriter.getIndexType(),
+ state.getScalableVecDims()[dim]);
+ auto indexSteps = rewriter.create<vector::StepOp>(loc, indexVectorType);
// Return the one-dimensional index vector if it lives in the trailing
// dimension of the iteration space since the vectorization algorithm in this
// case can handle the broadcast.
- if (indexOp.getDim() == targetShape.size() - 1)
+ if (dim == targetShape.size() - 1)
return VectorizationResult{VectorizationStatus::NewOp, indexSteps};
// Otherwise permute the targetShape to move the index dimension last,
// broadcast the one-dimensional index vector to the permuted shape, and
// finally transpose the broadcasted index vector to undo the permutation.
auto permPattern =
llvm::to_vector(llvm::seq<unsigned>(0, targetShape.size()));
- std::swap(permPattern[indexOp.getDim()], permPattern.back());
+ std::swap(permPattern[dim], permPattern.back());
auto permMap =
AffineMap::getPermutationMap(permPattern, linalgOp.getContext());
@@ -719,7 +724,7 @@ static VectorizationResult vectorizeLinalgIndex(RewriterBase &rewriter,
indexSteps);
SmallVector<int64_t> transposition =
llvm::to_vector<16>(llvm::seq<int64_t>(0, linalgOp.getNumLoops()));
- std::swap(transposition.back(), transposition[indexOp.getDim()]);
+ std::swap(transposition.back(), transposition[dim]);
auto transposeOp =
rewriter.create<vector::TransposeOp>(loc, broadCastOp, transposition);
return VectorizationResult{VectorizationStatus::NewOp, transposeOp};
diff --git a/mlir/test/Dialect/Linalg/vectorize-tensor-extract-masked.mlir b/mlir/test/Dialect/Linalg/vectorize-tensor-extract-masked.mlir
index e68d297dc41f2..f042753780013 100644
--- a/mlir/test/Dialect/Linalg/vectorize-tensor-extract-masked.mlir
+++ b/mlir/test/Dialect/Linalg/vectorize-tensor-extract-masked.mlir
@@ -63,7 +63,7 @@ func.func @masked_dynamic_vectorize_nd_tensor_extract_with_affine_apply_contiguo
// CHECK-DAG: %[[VAL_9:.*]] = arith.constant 0.000000e+00 : f32
// CHECK: %[[VAL_10:.*]] = vector.create_mask %[[VAL_5]], %[[VAL_7]] : vector<1x4xi1>
// CHECK: %[[VAL_11:.*]] = vector.mask %[[VAL_10]] { vector.transfer_read %[[VAL_2]]{{\[}}%[[VAL_8]], %[[VAL_8]]], %[[VAL_9]] {in_bounds = [true, true]} : tensor<?x?xf32>, vector<1x4xf32> } : vector<1x4xi1> -> vector<1x4xf32>
-// CHECK: %[[VAL_12:.*]] = arith.constant dense<[0, 1, 2, 3]> : vector<4xindex>
+// CHECK: %[[VAL_12:.*]] = vector.step : vector<4xindex>
// CHECK: %[[VAL_13:.*]] = vector.broadcast %[[VAL_1]] : index to vector<4xindex>
// CHECK: %[[VAL_14:.*]] = arith.addi %[[VAL_12]], %[[VAL_13]] : vector<4xindex>
// CHECK-DAG: %[[VAL_15:.*]] = arith.constant dense<true> : vector<1x4xi1>
@@ -160,7 +160,7 @@ func.func @masked_dynamic_vectorize_nd_tensor_extract_with_affine_apply_gather(%
// CHECK: %[[VAL_9:.*]] = arith.constant 0.000000e+00 : f32
// CHECK: %[[VAL_10:.*]] = vector.create_mask %[[VAL_5]], %[[VAL_7]] : vector<1x4xi1>
// CHECK: %[[VAL_11:.*]] = vector.mask %[[VAL_10]] { vector.transfer_read %[[VAL_2]]{{\[}}%[[VAL_8]], %[[VAL_8]]], %[[VAL_9]] {in_bounds = [true, true]} : tensor<?x?xf32>, vector<1x4xf32> } : vector<1x4xi1> -> vector<1x4xf32>
-// CHECK: %[[VAL_12:.*]] = arith.constant dense<[0, 1, 2, 3]> : vector<4xindex>
+// CHECK: %[[VAL_12:.*]] = vector.step : vector<4xindex>
// CHECK: %[[VAL_13:.*]] = vector.broadcast %[[VAL_1]] : index to vector<4xindex>
// CHECK: %[[VAL_14:.*]] = arith.addi %[[VAL_12]], %[[VAL_13]] : vector<4xindex>
// CHECK: %[[VAL_15:.*]] = arith.constant dense<true> : vector<1x4xi1>
diff --git a/mlir/test/Dialect/Linalg/vectorize-tensor-extract.mlir b/mlir/test/Dialect/Linalg/vectorize-tensor-extract.mlir
index 4b0df6a01c8fc..8ec1cdc609742 100644
--- a/mlir/test/Dialect/Linalg/vectorize-tensor-extract.mlir
+++ b/mlir/test/Dialect/Linalg/vectorize-tensor-extract.mlir
@@ -135,7 +135,6 @@ func.func @vectorize_scalable_nd_tensor_extract_transfer_read_basic(%arg0: tenso
// CHECK-SAME: %[[BASE:.*]]: tensor<?x?x?xf32>, %[[DEST:.*]]: tensor<?x?x?xf32>
// CHECK: %[[PASSTHRU:.*]] = arith.constant dense<0.000000e+00> : vector<1x1x[4]xf32>
// CHECK: %[[MASK:.*]] = arith.constant dense<true> : vector<1x1x[4]xi1>
-// CHECK: %[[INDEX_VEC:.*]] = arith.constant dense<[0, 1, 2, 3]> : vector<4xindex>
// CHECK: %[[C2:.*]] = arith.constant 2 : index
// CHECK: %[[C1:.*]] = arith.constant 1 : index
// CHECK: %[[C0:.*]] = arith.constant 0 : index
@@ -143,7 +142,8 @@ func.func @vectorize_scalable_nd_tensor_extract_transfer_read_basic(%arg0: tenso
// CHECK: %[[DEST_DIM1:.*]] = tensor.dim %[[DEST]], %[[C1]] : tensor<?x?x?xf32>
// CHECK: %[[DEST_DIM2:.*]] = tensor.dim %[[DEST]], %[[C2]] : tensor<?x?x?xf32>
// CHECK: %[[DEST_MASK:.*]] = vector.create_mask %[[DEST_DIM0]], %[[DEST_DIM1]], %[[DEST_DIM2]] : vector<1x1x[4]xi1>
-// CHECK: %[[INDEX_VEC_BCAST:.*]] = vector.broadcast %[[INDEX_VEC]] : vector<4xindex> to vector<1x1x[4]xindex>
+// CHECK: %[[INDEX_VEC:.*]] = vector.step : vector<[4]xindex>
+// CHECK: %[[INDEX_VEC_BCAST:.*]] = vector.broadcast %[[INDEX_VEC]] : vector<[4]xindex> to vector<1x1x[4]xindex>
// CHECK: %[[GATHER:.*]] = vector.mask %[[DEST_MASK]] { vector.gather %[[BASE]]{{\[}}%[[C0]], %[[C0]], %[[C0]]] {{\[}}%[[INDEX_VEC_BCAST]]], %[[MASK]], %[[PASSTHRU]] : tensor<?x?x?xf32>, vector<1x1x[4]xindex>, vector<1x1x[4]xi1>, vector<1x1x[4]xf32> into vector<1x1x[4]xf32> } : vector<1x1x[4]xi1> -> vector<1x1x[4]xf32>
// CHECK: %[[OUT:.*]] = vector.mask %[[DEST_MASK]] { vector.transfer_write %[[GATHER]], %[[DEST]]{{\[}}%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<1x1x[4]xf32>, tensor<?x?x?xf32> } : vector<1x1x[4]xi1> -> tensor<?x?x?xf32>
// CHECK: return %[[OUT]] : tensor<?x?x?xf32>
>From 91c7608e4a50938af14d52bee19453a2862c67e5 Mon Sep 17 00:00:00 2001
From: Cullen Rhodes <cullen.rhodes at arm.com>
Date: Mon, 8 Jul 2024 09:02:06 +0000
Subject: [PATCH 3/3] address comments
---
.../Linalg/vectorization-scalable.mlir | 47 +++++++++++++++++
.../Linalg/vectorize-tensor-extract.mlir | 50 -------------------
2 files changed, 47 insertions(+), 50 deletions(-)
diff --git a/mlir/test/Dialect/Linalg/vectorization-scalable.mlir b/mlir/test/Dialect/Linalg/vectorization-scalable.mlir
index d6f8d78358370..4423ee6ea6a51 100644
--- a/mlir/test/Dialect/Linalg/vectorization-scalable.mlir
+++ b/mlir/test/Dialect/Linalg/vectorization-scalable.mlir
@@ -142,3 +142,50 @@ module attributes {transform.with_named_sequence} {
}
}
+// -----
+
+#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
+func.func @vectorize_linalg_index(%arg0: tensor<3x3x?xf32>, %arg1: tensor<1x1x?xf32>) -> tensor<1x1x?xf32> {
+ %0 = linalg.generic {
+ indexing_maps = [#map],
+ iterator_types = ["parallel", "parallel", "parallel"]
+ } outs(%arg1 : tensor<1x1x?xf32>) {
+ ^bb0(%in: f32):
+ %1 = linalg.index 0 : index
+ %2 = linalg.index 1 : index
+ %3 = linalg.index 2 : index
+ %4 = tensor.extract %arg0[%1, %2, %3] : tensor<3x3x?xf32>
+ linalg.yield %4 : f32
+ } -> tensor<1x1x?xf32>
+ return %0 : tensor<1x1x?xf32>
+}
+
+// CHECK-LABEL: @vectorize_linalg_index
+// CHECK-SAME: %[[SRC:.*]]: tensor<3x3x?xf32>, %[[DST:.*]]: tensor<1x1x?xf32>
+// CHECK-DAG: %[[PASSTHRU:.*]] = arith.constant dense<0.000000e+00> : vector<1x1x[4]xf32>
+// CHECK-DAG: %[[MASK:.*]] = arith.constant dense<true> : vector<1x1x[4]xi1>
+// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
+// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index
+// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index
+// CHECK: %[[DST_DIM2:.*]] = tensor.dim %[[DST]], %[[C2]] : tensor<1x1x?xf32>
+// CHECK: %[[DST_MASK:.*]] = vector.create_mask %[[C1]], %[[C1]], %[[DST_DIM2]] : vector<1x1x[4]xi1>
+// CHECK: %[[INDEX_VEC:.*]] = vector.step : vector<[4]xindex>
+// CHECK: %[[INDEX_VEC_BCAST:.*]] = vector.broadcast %[[INDEX_VEC]] : vector<[4]xindex> to vector<1x1x[4]xindex>
+// CHECK: %[[GATHER:.*]] = vector.mask %[[DST_MASK]] { vector.gather %[[SRC]]{{\[}}%[[C0]], %[[C0]], %[[C0]]] {{\[}}%[[INDEX_VEC_BCAST]]], %[[MASK]], %[[PASSTHRU]] : tensor<3x3x?xf32>, vector<1x1x[4]xindex>, vector<1x1x[4]xi1>, vector<1x1x[4]xf32> into vector<1x1x[4]xf32> } : vector<1x1x[4]xi1> -> vector<1x1x[4]xf32>
+// CHECK: %[[OUT:.*]] = vector.mask %[[DST_MASK]] { vector.transfer_write %[[GATHER]], %[[DST]]{{\[}}%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<1x1x[4]xf32>, tensor<1x1x?xf32> } : vector<1x1x[4]xi1> -> tensor<1x1x?xf32>
+// CHECK: return %[[OUT]] : tensor<1x1x?xf32>
+
+module attributes {transform.with_named_sequence} {
+ transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
+ %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+ transform.structured.vectorize %0 vector_sizes [1, 1, [4]] {vectorize_nd_extract} : !transform.any_op
+
+ %func = transform.structured.match ops{["func.func"]} in %arg1
+ : (!transform.any_op) -> !transform.any_op
+ transform.apply_patterns to %func {
+ transform.apply_patterns.canonicalization
+ transform.apply_patterns.linalg.tiling_canonicalization
+ } : !transform.any_op
+ transform.yield
+ }
+}
diff --git a/mlir/test/Dialect/Linalg/vectorize-tensor-extract.mlir b/mlir/test/Dialect/Linalg/vectorize-tensor-extract.mlir
index 8ec1cdc609742..85e1c56dd45a0 100644
--- a/mlir/test/Dialect/Linalg/vectorize-tensor-extract.mlir
+++ b/mlir/test/Dialect/Linalg/vectorize-tensor-extract.mlir
@@ -113,56 +113,6 @@ module attributes {transform.with_named_sequence} {
}
}
-// -----
-
-#map1 = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
-func.func @vectorize_scalable_nd_tensor_extract_transfer_read_basic(%arg0: tensor<?x?x?xf32>, %arg2: tensor<?x?x?xf32>) -> tensor<?x?x?xf32> {
- %1 = linalg.generic {
- indexing_maps = [#map1],
- iterator_types = ["parallel", "parallel", "parallel"]
- } outs(%arg2 : tensor<?x?x?xf32>) {
- ^bb0(%arg4: f32):
- %2 = linalg.index 0 : index
- %3 = linalg.index 1 : index
- %4 = linalg.index 2 : index
- %5 = tensor.extract %arg0[%2, %3, %4] : tensor<?x?x?xf32>
- linalg.yield %5 : f32
- } -> tensor<?x?x?xf32>
- return %1 : tensor<?x?x?xf32>
-}
-
-// CHECK-LABEL: @vectorize_scalable_nd_tensor_extract_transfer_read_basic
-// CHECK-SAME: %[[BASE:.*]]: tensor<?x?x?xf32>, %[[DEST:.*]]: tensor<?x?x?xf32>
-// CHECK: %[[PASSTHRU:.*]] = arith.constant dense<0.000000e+00> : vector<1x1x[4]xf32>
-// CHECK: %[[MASK:.*]] = arith.constant dense<true> : vector<1x1x[4]xi1>
-// CHECK: %[[C2:.*]] = arith.constant 2 : index
-// CHECK: %[[C1:.*]] = arith.constant 1 : index
-// CHECK: %[[C0:.*]] = arith.constant 0 : index
-// CHECK: %[[DEST_DIM0:.*]] = tensor.dim %[[DEST]], %[[C0]] : tensor<?x?x?xf32>
-// CHECK: %[[DEST_DIM1:.*]] = tensor.dim %[[DEST]], %[[C1]] : tensor<?x?x?xf32>
-// CHECK: %[[DEST_DIM2:.*]] = tensor.dim %[[DEST]], %[[C2]] : tensor<?x?x?xf32>
-// CHECK: %[[DEST_MASK:.*]] = vector.create_mask %[[DEST_DIM0]], %[[DEST_DIM1]], %[[DEST_DIM2]] : vector<1x1x[4]xi1>
-// CHECK: %[[INDEX_VEC:.*]] = vector.step : vector<[4]xindex>
-// CHECK: %[[INDEX_VEC_BCAST:.*]] = vector.broadcast %[[INDEX_VEC]] : vector<[4]xindex> to vector<1x1x[4]xindex>
-// CHECK: %[[GATHER:.*]] = vector.mask %[[DEST_MASK]] { vector.gather %[[BASE]]{{\[}}%[[C0]], %[[C0]], %[[C0]]] {{\[}}%[[INDEX_VEC_BCAST]]], %[[MASK]], %[[PASSTHRU]] : tensor<?x?x?xf32>, vector<1x1x[4]xindex>, vector<1x1x[4]xi1>, vector<1x1x[4]xf32> into vector<1x1x[4]xf32> } : vector<1x1x[4]xi1> -> vector<1x1x[4]xf32>
-// CHECK: %[[OUT:.*]] = vector.mask %[[DEST_MASK]] { vector.transfer_write %[[GATHER]], %[[DEST]]{{\[}}%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<1x1x[4]xf32>, tensor<?x?x?xf32> } : vector<1x1x[4]xi1> -> tensor<?x?x?xf32>
-// CHECK: return %[[OUT]] : tensor<?x?x?xf32>
-
-module attributes {transform.with_named_sequence} {
- transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
- %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- transform.structured.vectorize %0 vector_sizes [1, 1, [4]] {vectorize_nd_extract} : !transform.any_op
-
- %func = transform.structured.match ops{["func.func"]} in %arg1
- : (!transform.any_op) -> !transform.any_op
- transform.apply_patterns to %func {
- transform.apply_patterns.canonicalization
- transform.apply_patterns.linalg.tiling_canonicalization
- } : !transform.any_op
- transform.yield
- }
-}
-
// -----
func.func @vectorize_nd_tensor_extract_transfer_read_complex(%6: tensor<45x80x16xf32>, %arg0: index, %arg2: index, %arg1: index, %arg4: index, %extracted_slice : tensor<1x4xf32>) -> tensor<1x4xf32> {
More information about the Mlir-commits
mailing list