[Mlir-commits] [mlir] 6abd8e3 - [mlir][linalg] Add support for dynamic pad op masking vectorization.
Hanhan Wang
llvmlistbot at llvm.org
Thu May 18 15:05:30 PDT 2023
Author: Hanhan Wang
Date: 2023-05-18T15:05:18-07:00
New Revision: 6abd8e30f443b9b17935c40e4451cbc63e3bd49d
URL: https://github.com/llvm/llvm-project/commit/6abd8e30f443b9b17935c40e4451cbc63e3bd49d
DIFF: https://github.com/llvm/llvm-project/commit/6abd8e30f443b9b17935c40e4451cbc63e3bd49d.diff
LOG: [mlir][linalg] Add support for dynamic pad op masking vectorization.
Reviewed By: dcaballe, awarzynski
Differential Revision: https://reviews.llvm.org/D150497
Added:
Modified:
mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
mlir/test/Dialect/Linalg/vectorization-masked.mlir
Removed:
################################################################################
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
index fbdc7d7f57e59..93939544e4910 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
@@ -1296,14 +1296,19 @@ vectorizeAsTensorPadOp(RewriterBase &rewriter, tensor::PadOp padOp,
// transfer_write_in_bounds(transfer_read_masked(pad_source, pad_value))
OpBuilder::InsertionGuard g(rewriter);
rewriter.setInsertionPoint(padOp);
- auto zero = rewriter.create<arith::ConstantIndexOp>(loc, 0);
- auto emptyOp =
- rewriter.create<tensor::EmptyOp>(loc, padOp.getResultType(),
- /*dynamicSizes=*/ValueRange{});
+
+ ReifiedRankedShapedTypeDims reifiedReturnShapes;
+ LogicalResult status =
+ cast<ReifyRankedShapedTypeOpInterface>(padOp.getOperation())
+ .reifyResultShapes(rewriter, reifiedReturnShapes);
+ assert(succeeded(status) && "failed to reify result shapes");
+ auto emptyOp = rewriter.create<tensor::EmptyOp>(loc, reifiedReturnShapes[0],
+ padValue.getType());
SmallVector<OpFoldResult> mixedSourceDims =
getMixedDimensions(rewriter, loc, padOp.getSource());
Value mask =
rewriter.create<vector::CreateMaskOp>(loc, maskType, mixedSourceDims);
+ auto zero = rewriter.create<arith::ConstantIndexOp>(loc, 0);
auto transferReadOp = rewriter.create<vector::TransferReadOp>(
loc,
/*vectorType=*/vectorType,
@@ -1313,13 +1318,14 @@ vectorizeAsTensorPadOp(RewriterBase &rewriter, tensor::PadOp padOp,
/*inBounds=*/SmallVector<bool>(rank, true));
auto maskedOp = cast<vector::MaskOp>(
mlir::vector::maskOperation(rewriter, transferReadOp, mask));
- auto transferWriteOp = rewriter.create<vector::TransferWriteOp>(
+ Operation *write = rewriter.create<vector::TransferWriteOp>(
loc,
/*vector=*/maskedOp->getResult(0),
/*source=*/emptyOp,
/*indices=*/SmallVector<Value>(rank, zero),
/*inBounds=*/SmallVector<bool>(rank, true));
- newResults.push_back(transferWriteOp.getResult());
+ write = mlir::vector::maskOperation(rewriter, write, mask);
+ newResults.push_back(write->getResult(0));
return success();
}
@@ -1354,6 +1360,37 @@ static LogicalResult vectorizeDynamicLinalgOpPrecondition(linalg::LinalgOp op) {
return success();
}
+/// Returns success if `inputVectorSizes` is a valid masking configuraion for
+/// given `shape`, i.e., it meets:
+/// 1. The numbers of elements in both array are equal.
+/// 2. `inputVectorSizes` does nos have dynamic dimensions.
+/// 3. All the values in `inputVectorSizes` are greater than or equal to
+/// static sizes in `shape`.
+static LogicalResult
+isValidMaskedInputVector(ArrayRef<int64_t> shape,
+ ArrayRef<int64_t> inputVectorSizes) {
+ if (inputVectorSizes.size() != shape.size()) {
+ LDBG("Input vector sizes don't match the number of loops");
+ return failure();
+ }
+ if (ShapedType::isDynamicShape(inputVectorSizes)) {
+ LDBG("Input vector sizes can't have dynamic dimensions");
+ return failure();
+ }
+ if (!llvm::all_of(llvm::zip(shape, inputVectorSizes),
+ [](std::tuple<int64_t, int64_t> sizePair) {
+ int64_t staticSize = std::get<0>(sizePair);
+ int64_t inputSize = std::get<1>(sizePair);
+ return ShapedType::isDynamic(staticSize) ||
+ staticSize <= inputSize;
+ })) {
+ LDBG("Input vector sizes must be greater than or equal to iteration space "
+ "static sizes");
+ return failure();
+ }
+ return success();
+}
+
static LogicalResult
vectorizeLinalgOpPrecondition(LinalgOp linalgOp,
ArrayRef<int64_t> inputVectorSizes,
@@ -1363,23 +1400,10 @@ vectorizeLinalgOpPrecondition(LinalgOp linalgOp,
[](int64_t dim) { return dim == 0; }))
return failure();
// Check API contract for input vector sizes.
- if (!inputVectorSizes.empty()) {
- assert(inputVectorSizes.size() == linalgOp.getNumLoops() &&
- "Input vector sizes don't match the number of loops");
- assert(!ShapedType::isDynamicShape(inputVectorSizes) &&
- "Input vector sizes can't have dynamic dimensions");
- assert(
- llvm::all_of(
- llvm::zip(linalgOp.getStaticLoopRanges(), inputVectorSizes),
- [](std::tuple<int64_t, int64_t> sizePair) {
- int64_t staticSize = std::get<0>(sizePair);
- int64_t inputSize = std::get<1>(sizePair);
- return ShapedType::isDynamic(staticSize) ||
- staticSize <= inputSize;
- }) &&
- "Input vector sizes must be greater than or equal to iteration space "
- "static sizes");
- }
+ if (!inputVectorSizes.empty() &&
+ failed(isValidMaskedInputVector(linalgOp.getStaticLoopRanges(),
+ inputVectorSizes)))
+ return failure();
if (linalgOp.hasDynamicShape() &&
failed(vectorizeDynamicLinalgOpPrecondition(linalgOp))) {
@@ -1445,11 +1469,8 @@ vectorizePadOpPrecondition(tensor::PadOp padOp,
}
ArrayRef<int64_t> resultTensorShape = padOp.getResultType().getShape();
- if (!(resultTensorShape == inputVectorSizes)) {
- LDBG("result tensor shape must match input vector sizes: " << padOp
- << "\n");
+ if (failed(isValidMaskedInputVector(resultTensorShape, inputVectorSizes)))
return failure();
- }
if (llvm::any_of(padOp.getLow(), [](Value v) {
std::optional<int64_t> res = getConstantIntValue(v);
diff --git a/mlir/test/Dialect/Linalg/vectorization-masked.mlir b/mlir/test/Dialect/Linalg/vectorization-masked.mlir
index 95fe18bc4c7c7..7b363e7df61db 100644
--- a/mlir/test/Dialect/Linalg/vectorization-masked.mlir
+++ b/mlir/test/Dialect/Linalg/vectorization-masked.mlir
@@ -374,13 +374,13 @@ func.func @test_masked_vectorize_pad(
%0 : tensor<?x?xf32>, %h0 : index, %h1 : index)
-> tensor<2x4xf32>
{
- // CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index
- // CHECK-DAG: %[[c0_2:.*]] = arith.constant 0 : index
// CHECK-DAG: %[[c42:.*]] = arith.constant 4.243000e+01 : f32
+ // CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index
// CHECK-DAG: %[[empty:.*]] = tensor.empty() : tensor<2x4xf32>
// CHECK: %[[d0:.*]] = tensor.dim {{.*}} : tensor<?x?xf32>
// CHECK: %[[d1:.*]] = tensor.dim {{.*}} : tensor<?x?xf32>
// CHECK: %[[mask:.*]] = vector.create_mask %[[d0]], %[[d1]] : vector<2x4xi1>
+ // CHECK-DAG: %[[c0_2:.*]] = arith.constant 0 : index
// CHECK: %[[masked_read:.*]] = vector.mask %[[mask]] {
// CHECK-SAME: vector.transfer_read %{{.*}}[%[[c0_2]], %[[c0_2]]], %[[c42]]
// CHECK-SAME: {in_bounds = [true, true]} : tensor<?x?xf32>, vector<2x4xf32>
@@ -405,6 +405,44 @@ transform.sequence failures(propagate) {
// -----
+// CHECK-LABEL: func @test_masked_vectorize_dynamic_pad
+func.func @test_masked_vectorize_dynamic_pad(
+ %0 : tensor<?x?xf32>, %h0 : index, %h1 : index)
+ -> tensor<?x?xf32>
+{
+ // CHECK-DAG: %[[c42:.*]] = arith.constant 4.243000e+01 : f32
+ // CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index
+ // CHECK: %[[empty:.*]] = tensor.empty({{.+}}) : tensor<?x?xf32>
+ // CHECK: %[[d0:.*]] = tensor.dim {{.*}} : tensor<?x?xf32>
+ // CHECK: %[[d1:.*]] = tensor.dim {{.*}} : tensor<?x?xf32>
+ // CHECK: %[[mask:.*]] = vector.create_mask %[[d0]], %[[d1]] : vector<2x4xi1>
+ // CHECK-DAG: %[[c0_2:.*]] = arith.constant 0 : index
+ // CHECK: %[[masked_read:.*]] = vector.mask %[[mask]] {
+ // CHECK-SAME: vector.transfer_read %{{.*}}[%[[c0_2]], %[[c0_2]]], %[[c42]]
+ // CHECK-SAME: {in_bounds = [true, true]} : tensor<?x?xf32>, vector<2x4xf32>
+ // CHECK-SAME: } : vector<2x4xi1> -> vector<2x4xf32>
+ // CHECK: %[[masked_write:.*]] = vector.mask %[[mask]] {
+ // CHECK-SAME: vector.transfer_write %[[masked_read]], %[[empty]][%[[c0_2]], %[[c0_2]]]
+ // CHECK-SAME: {in_bounds = [true, true]} : vector<2x4xf32>, tensor<?x?xf32>
+ // CHECK: return %[[masked_write]] : tensor<?x?xf32>
+ %cst = arith.constant 42.43 : f32
+ %c0 = arith.constant 0 : index
+ %1 = tensor.pad %0 low[0, %c0] high[%h0, %h1] {
+ ^bb0(%hh1: index, %hh2: index):
+ tensor.yield %cst : f32
+ } : tensor<?x?xf32> to tensor<?x?xf32>
+ return %1: tensor<?x?xf32>
+}
+
+transform.sequence failures(propagate) {
+^bb1(%arg1: !transform.any_op):
+ %0 = transform.structured.match ops{["tensor.pad"]} in %arg1
+ : (!transform.any_op) -> !transform.any_op
+ transform.structured.masked_vectorize %0 vector_sizes [2, 4] : !transform.any_op
+}
+
+// -----
+
func.func @vectorize_dynamic_matmul(%A: memref<?x?xf32>, %B: memref<?x?xf32>, %C: memref<?x?xf32>) {
linalg.matmul ins(%A, %B: memref<?x?xf32>, memref<?x?xf32>)
outs(%C: memref<?x?xf32>)
More information about the Mlir-commits
mailing list