[Mlir-commits] [mlir] [mlir][linalg] Vectorize unpack op without masking (PR #89067)
Prashant Kumar
llvmlistbot at llvm.org
Thu May 2 05:37:40 PDT 2024
https://github.com/pashu123 updated https://github.com/llvm/llvm-project/pull/89067
>From 17ec14f92f2206b783df4a557955ed3318de7b82 Mon Sep 17 00:00:00 2001
From: Prashant Kumar <pk5561 at gmail.com>
Date: Wed, 17 Apr 2024 08:54:28 -0400
Subject: [PATCH] [mlir] Vectorize unpack op given no vector sizes
Enables vectorization of unpack op in the case of unknown vector size.
The vector sizes are determined by the result shape.
---
.../Linalg/Transforms/Vectorization.cpp | 72 ++++++++++++++-----
mlir/test/Dialect/Linalg/vectorization.mlir | 70 ++++++++++++++++++
2 files changed, 126 insertions(+), 16 deletions(-)
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
index ef9a30be9a0153..9cb910c22c7335 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
@@ -1414,27 +1414,39 @@ static SmallVector<int64_t> getTiledPackShape(tensor::PackOp packOp,
/// create an empty destination tensor and create a TransferWriteOp from the
/// input to the empty tensor. If the destination shape is not the same as the
/// inputVectorSizes for the first rank(inputVectorSizes) dims, then create a
-/// mask for the write.
+/// mask for the write. If `useInBoundsInsteadOfMasking` is set, then update the
+/// inBounds attribute of the transfer write op instead of masking.
static Operation *createWriteOrMaskedWrite(OpBuilder &builder, Location loc,
Value input,
SmallVector<OpFoldResult> destSizes,
- ArrayRef<int64_t> inputVectorSizes) {
+ ArrayRef<int64_t> inputVectorSizes,
+ bool useInBoundsInsteadOfMasking) {
+
auto inputType = cast<VectorType>(input.getType());
Value dest = builder.create<tensor::EmptyOp>(loc, destSizes,
inputType.getElementType());
int64_t rank = cast<ShapedType>(dest.getType()).getRank();
auto zero = builder.create<arith::ConstantIndexOp>(loc, 0);
+ auto destShape = cast<ShapedType>(dest.getType()).getShape();
+ SmallVector<bool> inBoundsVal(rank, true);
+ if (useInBoundsInsteadOfMasking) {
+ // Update the inBounds attribute.
+ for (unsigned i = 0; i < rank; i++)
+ inBoundsVal[i] = (destShape[i] == inputVectorSizes[i]) &&
+ !ShapedType::isDynamic(destShape[i]);
+ }
Operation *write = builder.create<vector::TransferWriteOp>(
loc,
/*vector=*/input,
/*source=*/dest,
/*indices=*/SmallVector<Value>(rank, zero),
- /*inBounds=*/SmallVector<bool>(rank, true));
- auto destShape = cast<ShapedType>(dest.getType()).getShape();
+ /*inBounds=*/inBoundsVal);
assert(llvm::none_of(
destShape.drop_front(inputVectorSizes.size()),
[](int64_t size) { return size == ShapedType::kDynamic; }) &&
"Only dims aligned with inputVectorSizes may be dynamic");
+ if (useInBoundsInsteadOfMasking)
+ return write;
bool needMaskForWrite = !llvm::equal(
inputVectorSizes, destShape.take_front(inputVectorSizes.size()));
if (needMaskForWrite) {
@@ -1535,9 +1547,9 @@ vectorizeAsTensorPackOp(RewriterBase &rewriter, tensor::PackOp packOp,
loc, shapeCastOp.getResult(), destPermutation);
// Create TransferWriteOp.
- Operation *write =
- createWriteOrMaskedWrite(rewriter, loc, transposeOp.getResult(),
- reifiedReturnShapes[0], inputVectorSizes);
+ Operation *write = createWriteOrMaskedWrite(
+ rewriter, loc, transposeOp.getResult(), reifiedReturnShapes[0],
+ inputVectorSizes, /*useInBoundsInsteadOfMasking=*/false);
newResults.push_back(write->getResult(0));
return success();
}
@@ -1547,7 +1559,9 @@ vectorizeAsTensorPackOp(RewriterBase &rewriter, tensor::PackOp packOp,
/// vector::TransposeOp - Transpose the Source tensor
/// ShapeCastOp - Reshape the data based on the target.
/// vector::TransferWriteOp. - Write the result vector back to the destination
-/// tensor
+/// tensor. If the vector sizes are not provided:
+/// * the vector sizes are determined by the input operand and attributes,
+/// * update the inBounds attribute instead of masking.
static LogicalResult
vectorizeAsTensorUnpackOp(RewriterBase &rewriter, tensor::UnPackOp unpackOp,
ArrayRef<int64_t> inputVectorSizes,
@@ -1560,11 +1574,33 @@ vectorizeAsTensorUnpackOp(RewriterBase &rewriter, tensor::UnPackOp unpackOp,
ArrayRef<int64_t> innerDimPos = unpackOp.getInnerDimsPos();
ArrayRef<int64_t> innerTiles = unpackOp.getStaticInnerTiles();
+ ArrayRef<int64_t> sourceShape = unpackTensorType.getShape();
+ bool useInBoundsInsteadOfMasking = false;
+ ArrayRef<int64_t> outerDimsPerm = unpackOp.getOuterDimsPerm();
+
+ auto destSize = unpackOp.getDestRank();
+
+ // initVectorShape is the shape of the vector that will be used to do final
+ // write on the destination tensor. It is set like this: Let's say the
+ // sourceShape is 'M' and the vectorSize (VS) array is size 'N' where N <= M.
+ // Thus:
+ // - initVectorShape = sourceShape.take_front(N)
+ // - if outer_dims_perms is present: do that permutation on initVectorShape.
+ // - Multiply all the locations pointed by innerDimPos by the innerTileSize
+ // attribute value.
+ SmallVector<int64_t> initVectorShape(sourceShape.take_front(destSize));
+ if (inputVectorSizes.empty()) {
+ if (!outerDimsPerm.empty())
+ applyPermutationToVector(initVectorShape, outerDimsPerm);
+ for (auto [i, pos] : llvm::enumerate(innerDimPos))
+ initVectorShape[pos] *= innerTiles[i];
+
+ inputVectorSizes = initVectorShape;
+ useInBoundsInsteadOfMasking = true;
+ }
SmallVector<int64_t> readMaskShape(inputVectorSizes.begin(),
inputVectorSizes.end());
- ArrayRef<int64_t> outerDimsPerm = unpackOp.getOuterDimsPerm();
- ArrayRef<int64_t> sourceShape = unpackTensorType.getShape();
// ReadMask is the size of tensor used to read and apply mask. It is
// set like this: Let's say the vectorSize (VS) array is size 'N' and
@@ -1642,9 +1678,9 @@ vectorizeAsTensorUnpackOp(RewriterBase &rewriter, tensor::UnPackOp unpackOp,
unpackOp.getDestType().hasStaticShape()
? inputVectorSizes
: shapeCastOp.getResultVectorType().getShape());
- Operation *write =
- createWriteOrMaskedWrite(rewriter, loc, shapeCastOp.getResult(),
- reifiedRetShapes[0], writeMaskShape);
+ Operation *write = createWriteOrMaskedWrite(
+ rewriter, loc, shapeCastOp.getResult(), reifiedRetShapes[0],
+ writeMaskShape, useInBoundsInsteadOfMasking);
newResults.push_back(write->getResult(0));
return success();
}
@@ -1673,7 +1709,8 @@ vectorizeAsTensorPadOp(RewriterBase &rewriter, tensor::PadOp padOp,
rewriter, loc, padOp.getSource(), inputVectorSizes, padValue,
/*useInBoundsInsteadOfMasking=*/false);
Operation *write = createWriteOrMaskedWrite(
- rewriter, loc, maskedRead, reifiedReturnShapes[0], inputVectorSizes);
+ rewriter, loc, maskedRead, reifiedReturnShapes[0], inputVectorSizes,
+ /*useInBoundsInsteadOfMasking=*/false);
newResults.push_back(write->getResult(0));
return success();
}
@@ -1755,8 +1792,11 @@ vectorizeUnPackOpPrecondition(tensor::UnPackOp unpackOp,
LDBG("Inner-tiles must be constant: " << unpackOp << "\n");
return failure();
}
- llvm::ArrayRef<int64_t> resultShape = unpackOp.getDestType().getShape();
- if (!inputVectorSizes.empty() &&
+ ArrayRef<int64_t> resultShape = unpackOp.getDestType().getShape();
+ bool satisfyEmptyCond = inputVectorSizes.empty() &&
+ unpackOp.getDestType().hasStaticShape() &&
+ unpackOp.getSourceType().hasStaticShape();
+ if (!satisfyEmptyCond &&
failed(vector::isValidMaskedInputVector(resultShape, inputVectorSizes)))
return failure();
diff --git a/mlir/test/Dialect/Linalg/vectorization.mlir b/mlir/test/Dialect/Linalg/vectorization.mlir
index 80a5a4c6702ac1..bbeccc7fecd68b 100644
--- a/mlir/test/Dialect/Linalg/vectorization.mlir
+++ b/mlir/test/Dialect/Linalg/vectorization.mlir
@@ -985,3 +985,73 @@ module attributes {transform.with_named_sequence} {
transform.yield
}
}
+
+ // -----
+
+func.func @test_vectorize_unpack_no_vector_sizes(%source: tensor<8x8x32x16xf32>, %dest: tensor<256x128xf32>) -> tensor<256x128xf32> {
+ // CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 : f32
+ // CHECK: %[[C0:.*]] = arith.constant 0 : index
+ // CHECK: %[[READ:.*]] = vector.transfer_read {{.*}} : tensor<8x8x32x16xf32>, vector<8x8x32x16xf32>
+ // CHECK: %[[TRANSP:.*]] = vector.transpose %[[READ]], [0, 2, 1, 3] : vector<8x8x32x16xf32> to vector<8x32x8x16xf32>
+ // CHECK: %[[SHAPC:.*]] = vector.shape_cast %[[TRANSP]] : vector<8x32x8x16xf32> to vector<256x128xf32>
+ // CHECK: %[[EMPT:.*]] = tensor.empty() : tensor<256x128xf32>
+ // CHECK: %[[C00:.*]] = arith.constant 0 : index
+ // CHECK: %[[WRIT:.*]] = vector.transfer_write %[[SHAPC]], {{.*}} : vector<256x128xf32>, tensor<256x128xf32>
+ // CHECK: return %[[WRIT]] : tensor<256x128xf32>
+ %0 = tensor.unpack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<8x8x32x16xf32> -> tensor<256x128xf32>
+ return %0 : tensor<256x128xf32>
+ }
+ module attributes {transform.with_named_sequence} {
+ transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
+ %0 = transform.structured.match ops{["tensor.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
+ transform.structured.vectorize %0 : !transform.any_op
+ transform.yield
+ }
+ }
+
+ // -----
+
+func.func @test_vectorize_unpack_no_vector_sizes_slice_output(%source: tensor<8x4x16x16xf32>, %dest: tensor<64x127xf32>) -> tensor<64x127xf32> {
+ // CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 : f32
+ // CHECK: %[[C0:.*]] = arith.constant 0 : index
+ // CHECK: %[[READ:.*]] = vector.transfer_read {{.*}} : tensor<8x4x16x16xf32>, vector<8x4x16x16xf32>
+ // CHECK: %[[TRANSP:.*]] = vector.transpose %[[READ]], [1, 2, 0, 3] : vector<8x4x16x16xf32> to vector<4x16x8x16xf32>
+ // CHECK: %[[SHAPC:.*]] = vector.shape_cast %[[TRANSP]] : vector<4x16x8x16xf32> to vector<64x128xf32>
+ // CHECK: %[[EMPT:.*]] = tensor.empty() : tensor<64x127xf32>
+ // CHECK: %[[C00:.*]] = arith.constant 0 : index
+ // CHECK: %[[WRIT:.*]] = vector.transfer_write %[[SHAPC]], %[[EMPT]]{{\[}}%[[C00]], %[[C00]]]
+ // CHECK-SAME: {in_bounds = [true, false]} : vector<64x128xf32>, tensor<64x127xf32>
+ // CHECK: return %[[WRIT]] : tensor<64x127xf32>
+ %0 = tensor.unpack %source outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [16, 16] into %dest : tensor<8x4x16x16xf32> -> tensor<64x127xf32>
+ return %0 : tensor<64x127xf32>
+ }
+ module attributes {transform.with_named_sequence} {
+ transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
+ %0 = transform.structured.match ops{["tensor.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
+ transform.structured.vectorize %0 : !transform.any_op
+ transform.yield
+ }
+ }
+
+ // -----
+
+func.func @test_vectorize_unpack_no_vector_sizes_permute(%source: tensor<4x7x4xf32>, %dest: tensor<7x16xf32>) -> tensor<7x16xf32> {
+ %0 = tensor.unpack %source outer_dims_perm=[1, 0] inner_dims_pos = [1] inner_tiles = [4] into %dest : tensor<4x7x4xf32> -> tensor<7x16xf32>
+ return %0 : tensor<7x16xf32>
+ }
+ // CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 : f32
+ // CHECK: %[[C0:.*]] = arith.constant 0 : index
+ // CHECK: %[[READ:.*]] = vector.transfer_read {{.*}} : tensor<4x7x4xf32>, vector<4x7x4xf32>
+ // CHECK: %[[TRANSP:.*]] = vector.transpose %[[READ]], [1, 0, 2] : vector<4x7x4xf32> to vector<7x4x4xf32>
+ // CHECK: %[[SHAPC:.*]] = vector.shape_cast %[[TRANSP]] : vector<7x4x4xf32> to vector<7x16xf32>
+ // CHECK: %[[EMPT:.*]] = tensor.empty() : tensor<7x16xf32>
+ // CHECK: %[[C00:.*]] = arith.constant 0 : index
+ // CHECK: %[[WRIT:.*]] = vector.transfer_write %[[SHAPC]], {{.*}} : vector<7x16xf32>, tensor<7x16xf32>
+ // CHECK: return %[[WRIT]] : tensor<7x16xf32>
+ module attributes {transform.with_named_sequence} {
+ transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) {
+ %0 = transform.structured.match ops{["tensor.unpack"]} in %arg0 : (!transform.any_op) -> !transform.any_op
+ transform.structured.vectorize %0 : !transform.any_op
+ transform.yield
+ }
+ }
More information about the Mlir-commits
mailing list