[Mlir-commits] [mlir] 6abd8e3 - [mlir][linalg] Add support for dynamic pad op masking vectorization.

Thu May 18 15:05:30 PDT 2023

Author: Hanhan Wang
Date: 2023-05-18T15:05:18-07:00
New Revision: 6abd8e30f443b9b17935c40e4451cbc63e3bd49d

URL: https://github.com/llvm/llvm-project/commit/6abd8e30f443b9b17935c40e4451cbc63e3bd49d
DIFF: https://github.com/llvm/llvm-project/commit/6abd8e30f443b9b17935c40e4451cbc63e3bd49d.diff

LOG: [mlir][linalg] Add support for dynamic pad op masking vectorization.

Reviewed By: dcaballe, awarzynski

Differential Revision: https://reviews.llvm.org/D150497

Added: 
    

Modified: 
    mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
    mlir/test/Dialect/Linalg/vectorization-masked.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
index fbdc7d7f57e59..93939544e4910 100644

--- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
@@ -1296,14 +1296,19 @@ vectorizeAsTensorPadOp(RewriterBase &rewriter, tensor::PadOp padOp,
   // transfer_write_in_bounds(transfer_read_masked(pad_source, pad_value))
   OpBuilder::InsertionGuard g(rewriter);
   rewriter.setInsertionPoint(padOp);
-  auto zero = rewriter.create<arith::ConstantIndexOp>(loc, 0);
-  auto emptyOp =
-      rewriter.create<tensor::EmptyOp>(loc, padOp.getResultType(),
-                                       /*dynamicSizes=*/ValueRange{});
+
+  ReifiedRankedShapedTypeDims reifiedReturnShapes;
+  LogicalResult status =
+      cast<ReifyRankedShapedTypeOpInterface>(padOp.getOperation())
+          .reifyResultShapes(rewriter, reifiedReturnShapes);
+  assert(succeeded(status) && "failed to reify result shapes");
+  auto emptyOp = rewriter.create<tensor::EmptyOp>(loc, reifiedReturnShapes[0],
+                                                  padValue.getType());
   SmallVector<OpFoldResult> mixedSourceDims =
       getMixedDimensions(rewriter, loc, padOp.getSource());
   Value mask =
       rewriter.create<vector::CreateMaskOp>(loc, maskType, mixedSourceDims);
+  auto zero = rewriter.create<arith::ConstantIndexOp>(loc, 0);
   auto transferReadOp = rewriter.create<vector::TransferReadOp>(
       loc,
       /*vectorType=*/vectorType,
@@ -1313,13 +1318,14 @@ vectorizeAsTensorPadOp(RewriterBase &rewriter, tensor::PadOp padOp,
       /*inBounds=*/SmallVector<bool>(rank, true));
   auto maskedOp = cast<vector::MaskOp>(
       mlir::vector::maskOperation(rewriter, transferReadOp, mask));
-  auto transferWriteOp = rewriter.create<vector::TransferWriteOp>(
+  Operation *write = rewriter.create<vector::TransferWriteOp>(
       loc,
       /*vector=*/maskedOp->getResult(0),
       /*source=*/emptyOp,
       /*indices=*/SmallVector<Value>(rank, zero),
       /*inBounds=*/SmallVector<bool>(rank, true));
-  newResults.push_back(transferWriteOp.getResult());
+  write = mlir::vector::maskOperation(rewriter, write, mask);
+  newResults.push_back(write->getResult(0));
   return success();
 }
 
@@ -1354,6 +1360,37 @@ static LogicalResult vectorizeDynamicLinalgOpPrecondition(linalg::LinalgOp op) {
   return success();
 }
 
+/// Returns success if `inputVectorSizes` is a valid masking configuraion for
+/// given `shape`, i.e., it meets:
+///   1. The numbers of elements in both array are equal.
+///   2. `inputVectorSizes` does nos have dynamic dimensions.
+///   3. All the values in `inputVectorSizes` are greater than or equal to
+///      static sizes in `shape`.
+static LogicalResult
+isValidMaskedInputVector(ArrayRef<int64_t> shape,
+                         ArrayRef<int64_t> inputVectorSizes) {
+  if (inputVectorSizes.size() != shape.size()) {
+    LDBG("Input vector sizes don't match the number of loops");
+    return failure();
+  }
+  if (ShapedType::isDynamicShape(inputVectorSizes)) {
+    LDBG("Input vector sizes can't have dynamic dimensions");
+    return failure();
+  }
+  if (!llvm::all_of(llvm::zip(shape, inputVectorSizes),
+                    [](std::tuple<int64_t, int64_t> sizePair) {
+                      int64_t staticSize = std::get<0>(sizePair);
+                      int64_t inputSize = std::get<1>(sizePair);
+                      return ShapedType::isDynamic(staticSize) ||
+                             staticSize <= inputSize;
+                    })) {
+    LDBG("Input vector sizes must be greater than or equal to iteration space "
+         "static sizes");
+    return failure();
+  }
+  return success();
+}
+
 static LogicalResult
 vectorizeLinalgOpPrecondition(LinalgOp linalgOp,
                               ArrayRef<int64_t> inputVectorSizes,
@@ -1363,23 +1400,10 @@ vectorizeLinalgOpPrecondition(LinalgOp linalgOp,
                    [](int64_t dim) { return dim == 0; }))
     return failure();
   // Check API contract for input vector sizes.
-  if (!inputVectorSizes.empty()) {
-    assert(inputVectorSizes.size() == linalgOp.getNumLoops() &&
-           "Input vector sizes don't match the number of loops");
-    assert(!ShapedType::isDynamicShape(inputVectorSizes) &&
-           "Input vector sizes can't have dynamic dimensions");
-    assert(
-        llvm::all_of(
-            llvm::zip(linalgOp.getStaticLoopRanges(), inputVectorSizes),
-            [](std::tuple<int64_t, int64_t> sizePair) {
-              int64_t staticSize = std::get<0>(sizePair);
-              int64_t inputSize = std::get<1>(sizePair);
-              return ShapedType::isDynamic(staticSize) ||
-                     staticSize <= inputSize;
-            }) &&
-        "Input vector sizes must be greater than or equal to iteration space "
-        "static sizes");
-  }
+  if (!inputVectorSizes.empty() &&
+      failed(isValidMaskedInputVector(linalgOp.getStaticLoopRanges(),
+                                      inputVectorSizes)))
+    return failure();
 
   if (linalgOp.hasDynamicShape() &&
       failed(vectorizeDynamicLinalgOpPrecondition(linalgOp))) {
@@ -1445,11 +1469,8 @@ vectorizePadOpPrecondition(tensor::PadOp padOp,
   }
 
   ArrayRef<int64_t> resultTensorShape = padOp.getResultType().getShape();
-  if (!(resultTensorShape == inputVectorSizes)) {
-    LDBG("result tensor shape must match input vector sizes: " << padOp
-                                                               << "\n");
+  if (failed(isValidMaskedInputVector(resultTensorShape, inputVectorSizes)))
     return failure();
-  }
 
   if (llvm::any_of(padOp.getLow(), [](Value v) {
         std::optional<int64_t> res = getConstantIntValue(v);

diff  --git a/mlir/test/Dialect/Linalg/vectorization-masked.mlir b/mlir/test/Dialect/Linalg/vectorization-masked.mlir
index 95fe18bc4c7c7..7b363e7df61db 100644
--- a/mlir/test/Dialect/Linalg/vectorization-masked.mlir
+++ b/mlir/test/Dialect/Linalg/vectorization-masked.mlir
@@ -374,13 +374,13 @@ func.func @test_masked_vectorize_pad(
   %0 : tensor<?x?xf32>, %h0 : index, %h1 : index)
     -> tensor<2x4xf32>
 {
-  //  CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index
-  //  CHECK-DAG: %[[c0_2:.*]] = arith.constant 0 : index
   //  CHECK-DAG: %[[c42:.*]] = arith.constant 4.243000e+01 : f32
+  //  CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index
   //  CHECK-DAG: %[[empty:.*]] = tensor.empty() : tensor<2x4xf32>
   //      CHECK: %[[d0:.*]] = tensor.dim {{.*}} : tensor<?x?xf32>
   //      CHECK: %[[d1:.*]] = tensor.dim {{.*}} : tensor<?x?xf32>
   //      CHECK: %[[mask:.*]] = vector.create_mask %[[d0]], %[[d1]] : vector<2x4xi1>
+  //  CHECK-DAG: %[[c0_2:.*]] = arith.constant 0 : index
   //      CHECK: %[[masked_read:.*]] = vector.mask %[[mask]] {
   // CHECK-SAME:   vector.transfer_read %{{.*}}[%[[c0_2]], %[[c0_2]]], %[[c42]]
   // CHECK-SAME:   {in_bounds = [true, true]} : tensor<?x?xf32>, vector<2x4xf32>
@@ -405,6 +405,44 @@ transform.sequence failures(propagate) {
 
 // -----
 
+// CHECK-LABEL: func @test_masked_vectorize_dynamic_pad
+func.func @test_masked_vectorize_dynamic_pad(
+  %0 : tensor<?x?xf32>, %h0 : index, %h1 : index)
+    -> tensor<?x?xf32>
+{
+  //  CHECK-DAG: %[[c42:.*]] = arith.constant 4.243000e+01 : f32
+  //  CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index
+  //      CHECK: %[[empty:.*]] = tensor.empty({{.+}}) : tensor<?x?xf32>
+  //      CHECK: %[[d0:.*]] = tensor.dim {{.*}} : tensor<?x?xf32>
+  //      CHECK: %[[d1:.*]] = tensor.dim {{.*}} : tensor<?x?xf32>
+  //      CHECK: %[[mask:.*]] = vector.create_mask %[[d0]], %[[d1]] : vector<2x4xi1>
+  //  CHECK-DAG: %[[c0_2:.*]] = arith.constant 0 : index
+  //      CHECK: %[[masked_read:.*]] = vector.mask %[[mask]] {
+  // CHECK-SAME:   vector.transfer_read %{{.*}}[%[[c0_2]], %[[c0_2]]], %[[c42]]
+  // CHECK-SAME:   {in_bounds = [true, true]} : tensor<?x?xf32>, vector<2x4xf32>
+  // CHECK-SAME: } : vector<2x4xi1> -> vector<2x4xf32>
+  //      CHECK: %[[masked_write:.*]] = vector.mask %[[mask]] {
+  // CHECK-SAME: vector.transfer_write %[[masked_read]], %[[empty]][%[[c0_2]], %[[c0_2]]]
+  // CHECK-SAME:   {in_bounds = [true, true]} : vector<2x4xf32>, tensor<?x?xf32>
+  //      CHECK: return %[[masked_write]] : tensor<?x?xf32>
+  %cst = arith.constant 42.43 : f32
+  %c0 = arith.constant 0 : index
+  %1 = tensor.pad %0 low[0, %c0] high[%h0, %h1]  {
+    ^bb0(%hh1: index, %hh2: index):
+      tensor.yield %cst : f32
+    } : tensor<?x?xf32> to tensor<?x?xf32>
+  return %1: tensor<?x?xf32>
+}
+
+transform.sequence failures(propagate) {
+^bb1(%arg1: !transform.any_op):
+  %0 = transform.structured.match ops{["tensor.pad"]} in %arg1
+    : (!transform.any_op) -> !transform.any_op
+  transform.structured.masked_vectorize %0 vector_sizes [2, 4] : !transform.any_op
+}
+
+// -----
+
 func.func @vectorize_dynamic_matmul(%A: memref<?x?xf32>, %B: memref<?x?xf32>, %C: memref<?x?xf32>) {
   linalg.matmul ins(%A, %B: memref<?x?xf32>, memref<?x?xf32>)
             outs(%C: memref<?x?xf32>)