[Mlir-commits] [mlir] 579bca1 - [mlir][linalg] BufferizeToAllocation: Add custom memcpy op

Tue Jul 11 07:48:07 PDT 2023

Author: Matthias Springer
Date: 2023-07-11T16:47:42+02:00
New Revision: 579bca12652ad0391df52c63704392d34bf13f09

URL: https://github.com/llvm/llvm-project/commit/579bca12652ad0391df52c63704392d34bf13f09
DIFF: https://github.com/llvm/llvm-project/commit/579bca12652ad0391df52c63704392d34bf13f09.diff

LOG: [mlir][linalg] BufferizeToAllocation: Add custom memcpy op

Add a new option that allows users to specify a memcpy op: "memref.tensor_store", "memref.copy" or "linalg.copy".

Differential Revision: https://reviews.llvm.org/D154968

Added: 
    

Modified: 
    mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
    mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
    mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
    mlir/lib/Dialect/Linalg/Transforms/ConvertToDestinationStyle.cpp
    mlir/test/Dialect/Linalg/transform-op-bufferize-to-allocation.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
index 4a143a158867cb..7a2bc02451dec9 100644

--- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
@@ -118,6 +118,16 @@ def BufferizeToAllocationOp : Op<Transform_Dialect,
     An optional memory space attribute can be specified for the materialized
     buffer allocation.
 
+    If a memory copy is needed, a "memref.tensor_store" is used when possible.
+    This is an op with tensor semantics that will bufferize to a memory copy
+    later. Which concrete op will be used for the memory copy is up to the
+    bufferization framework. Alternatively, a custom memcpy op can be specified
+    via `memcpy_op`. Currently supported are "memref.copy" and "linalg.copy".
+    In that case, the source of each memcpy must not have a custom memory space.
+    Furthermore, because the future buffer layout unknown for a given tensor,
+    a fully dynamic layout is assumed for best compatibility. Users should use
+    "memref.tensor_store" when possible.
+
     #### Return modes
 
     This operation consumes the `target` handle and produces the
@@ -125,7 +135,10 @@ def BufferizeToAllocationOp : Op<Transform_Dialect,
   }];
 
   let arguments = (ins TransformHandleTypeInterface:$target,
-                       OptionalAttr<AnyAttr>:$memory_space);
+                       OptionalAttr<AnyAttr>:$memory_space,
+                       DefaultValuedAttr<StrAttr, "\"memref.tensor_store\"">:
+                           $memcpy_op);
+  let hasVerifier = 1;
   let results = (outs Transform_AnyValue:$allocated_buffer,
                       Transform_AnyOpType:$new_ops);
   let assemblyFormat = "$target attr-dict `:` type($target)";

diff  --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
index d02f798c72030a..3491eebc84d694 100644
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
@@ -46,6 +46,12 @@ std::optional<vector::CombiningKind> getCombinerOpKind(Operation *combinerOp);
 // Bufferization-related transforms.
 //===----------------------------------------------------------------------===//
 
+struct BufferizeToAllocationOptions {
+  enum class MemcpyOp { MemrefTensorStore = 0, MemrefCopy = 1, LinalgCopy = 2 };
+
+  MemcpyOp memcpyOp = MemcpyOp::MemrefTensorStore;
+};
+
 /// Materialize a buffer allocation for the given tensor.pad op and lower the
 /// op to linalg.fill/linalg.generic + memref.tensor_store. E.g.:
 ///
@@ -62,8 +68,9 @@ std::optional<vector::CombiningKind> getCombinerOpKind(Operation *combinerOp);
 /// In addition to rewriting the IR as shown above, this function returns the
 /// newly allocated buffer. The `insertionPoint` parameter can be used to
 /// specify a custom insertion point for the buffer allocation.
-Value bufferizeToAllocation(RewriterBase &rewriter, tensor::PadOp padOp,
-                            Attribute memorySpace = {},
+Value bufferizeToAllocation(RewriterBase &rewriter,
+                            const BufferizeToAllocationOptions &options,
+                            tensor::PadOp padOp, Attribute memorySpace = {},
                             Operation *insertionPoint = nullptr);
 
 /// Materialize a buffer allocation for the given vector.mask op and bufferize
@@ -85,8 +92,9 @@ Value bufferizeToAllocation(RewriterBase &rewriter, tensor::PadOp padOp,
 /// In addition to rewriting the IR as shown above, this function returns the
 /// newly allocated buffer. The `insertionPoint` parameter can be used to
 /// specify a custom insertion point for the buffer allocation.
-Value bufferizeToAllocation(RewriterBase &rewriter, vector::MaskOp maskOp,
-                            Attribute memorySpace = {},
+Value bufferizeToAllocation(RewriterBase &rewriter,
+                            const BufferizeToAllocationOptions &options,
+                            vector::MaskOp maskOp, Attribute memorySpace = {},
                             Operation *insertionPoint = nullptr);
 
 /// Bufferize the given op with tensor semantics and materialize the result in
@@ -105,8 +113,9 @@ Value bufferizeToAllocation(RewriterBase &rewriter, vector::MaskOp maskOp,
 /// This function returns the newly allocated buffer. The `insertionPoint`
 /// parameter can be used to specify a custom insertion point for the buffer
 /// allocation.
-Value bufferizeToAllocation(RewriterBase &rewriter, Operation *op,
-                            Attribute memorySpace = {},
+Value bufferizeToAllocation(RewriterBase &rewriter,
+                            const BufferizeToAllocationOptions &options,
+                            Operation *op, Attribute memorySpace = {},
                             Operation *insertionPoint = nullptr);
 
 /// Try to eliminate tensor::EmptyOps inside `op` that are anchored on a

diff  --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
index 5474377ee364b0..4e0aa88464647e 100644
--- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
+++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
@@ -235,12 +235,27 @@ DiagnosedSilenceableFailure transform::BufferizeToAllocationOp::apply(
   NewOpsListener newOpsListener(previousListener);
   rewriter.setListener(&newOpsListener);
 
+  linalg::BufferizeToAllocationOptions options;
+  if (getMemcpyOp() == "memref.tensor_store") {
+    options.memcpyOp =
+        linalg::BufferizeToAllocationOptions::MemcpyOp::MemrefTensorStore;
+  } else if (getMemcpyOp() == "memref.copy") {
+    options.memcpyOp =
+        linalg::BufferizeToAllocationOptions::MemcpyOp::MemrefCopy;
+  } else if (getMemcpyOp() == "linalg.copy") {
+    options.memcpyOp =
+        linalg::BufferizeToAllocationOptions::MemcpyOp::LinalgCopy;
+  } else {
+    llvm_unreachable("invalid memcpy op");
+  }
+
   // Bufferize ops.
   Attribute memorySpace =
       getMemorySpace().has_value() ? getMemorySpace().value() : Attribute();
   SmallVector<Value> allocatedBuffers;
   for (Operation *op : state.getPayloadOps(getTarget())) {
-    Value buffer = linalg::bufferizeToAllocation(rewriter, op, memorySpace);
+    Value buffer =
+        linalg::bufferizeToAllocation(rewriter, options, op, memorySpace);
     if (!buffer) {
       DiagnosedSilenceableFailure diag = emitSilenceableError()
                                          << "failed to bufferize operation";
@@ -264,6 +279,13 @@ void transform::BufferizeToAllocationOp::getEffects(
   modifiesPayload(effects);
 }
 
+LogicalResult transform::BufferizeToAllocationOp::verify() {
+  if (getMemcpyOp() != "memref.tensor_store" &&
+      getMemcpyOp() != "memref.copy" && getMemcpyOp() != "linalg.copy")
+    return emitOpError() << "unsupported memcpy op";
+  return success();
+}
+
 //===----------------------------------------------------------------------===//
 // DecomposeOp
 //===----------------------------------------------------------------------===//

diff  --git a/mlir/lib/Dialect/Linalg/Transforms/ConvertToDestinationStyle.cpp b/mlir/lib/Dialect/Linalg/Transforms/ConvertToDestinationStyle.cpp
index e5f7f6128c17bc..d75891af7e45d0 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/ConvertToDestinationStyle.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/ConvertToDestinationStyle.cpp
@@ -53,6 +53,42 @@ static Value createInserts(RewriterBase &rewriter, Location loc, int dim,
   return destination;
 }
 
+/// Create a memcpy from the given source tensor to the given destination
+/// memref. The copy op type can be specified in the `options`.
+static void createMemcpy(OpBuilder &b, Location loc, Value tensorSource,
+                         Value memrefDest,
+                         const linalg::BufferizeToAllocationOptions &options) {
+  auto tensorType = dyn_cast<RankedTensorType>(tensorSource.getType());
+  assert(tensorType && "expected ranked tensor");
+  assert(memrefDest.getType().isa<MemRefType>() && "expected ranked memref");
+
+  switch (options.memcpyOp) {
+  case linalg::BufferizeToAllocationOptions::MemcpyOp::MemrefTensorStore:
+    // Note: This is the preferred way of memcpy'ing because no layout map
+    // and/or memory space must be specified for the source.
+    b.create<memref::TensorStoreOp>(loc, tensorSource, memrefDest);
+    break;
+  case linalg::BufferizeToAllocationOptions::MemcpyOp::MemrefCopy: {
+    // TODO: Support custom memory space on source.
+    // We do not know the layout map of the source yet, so use a fully dynamic
+    // layout for best compatibility.
+    Value toMemref = b.create<bufferization::ToMemrefOp>(
+        loc, bufferization::getMemRefTypeWithFullyDynamicLayout(tensorType),
+        tensorSource, /*readOnly=*/true);
+    b.create<memref::CopyOp>(loc, toMemref, memrefDest);
+  } break;
+  case linalg::BufferizeToAllocationOptions::MemcpyOp::LinalgCopy: {
+    // TODO: Support custom memory space on source.
+    // We do not know the layout map of the source yet, so use a fully dynamic
+    // layout for best compatibility.
+    Value toMemref = b.create<bufferization::ToMemrefOp>(
+        loc, bufferization::getMemRefTypeWithFullyDynamicLayout(tensorType),
+        tensorSource, /*readOnly=*/true);
+    b.create<linalg::CopyOp>(loc, toMemref, memrefDest);
+  } break;
+  };
+}
+
 static Operation *movePaddingToFillOrGenericOp(RewriterBase &rewriter,
                                                Location loc, PadOp padOp,
                                                Value dest) {
@@ -169,9 +205,9 @@ static Value createAllocationForTensor(RewriterBase &rewriter, Location loc,
   return alloc;
 }
 
-Value linalg::bufferizeToAllocation(RewriterBase &rewriter, PadOp padOp,
-                                    Attribute memorySpace,
-                                    Operation *insertionPoint) {
+Value linalg::bufferizeToAllocation(
+    RewriterBase &rewriter, const linalg::BufferizeToAllocationOptions &options,
+    PadOp padOp, Attribute memorySpace, Operation *insertionPoint) {
   OpBuilder::InsertionGuard g(rewriter);
   rewriter.setInsertionPoint(insertionPoint ? insertionPoint : padOp);
   Location loc = padOp.getLoc();
@@ -195,7 +231,7 @@ Value linalg::bufferizeToAllocation(RewriterBase &rewriter, PadOp padOp,
                                     rewriter.getIndexAttr(1));
   Value subview = rewriter.create<memref::SubViewOp>(
       loc, alloc, /*offsets=*/padOp.getMixedLowPad(), sizes, strides);
-  rewriter.create<memref::TensorStoreOp>(loc, padOp.getSource(), subview);
+  createMemcpy(rewriter, loc, padOp.getSource(), subview, options);
 
   // Create bufferization.to_tensor with "restrict" and "writable". The returned
   // tensor is a new buffer allocation, so it does not alias with any buffer.
@@ -205,27 +241,26 @@ Value linalg::bufferizeToAllocation(RewriterBase &rewriter, PadOp padOp,
   return alloc;
 }
 
-Value linalg::bufferizeToAllocation(RewriterBase &rewriter,
-                                    vector::MaskOp maskOp,
-                                    Attribute memorySpace,
-                                    Operation *insertionPoint) {
+Value linalg::bufferizeToAllocation(
+    RewriterBase &rewriter, const linalg::BufferizeToAllocationOptions &options,
+    vector::MaskOp maskOp, Attribute memorySpace, Operation *insertionPoint) {
   assert(llvm::range_size(maskOp.getMaskBlock()->without_terminator()) == 1 &&
          "expected single masked op");
   OpBuilder::InsertionGuard g(rewriter);
-  bufferization::BufferizationOptions options;
+  bufferization::BufferizationOptions bufferizationOptions;
   Operation *yieldOp = maskOp.getMaskRegion().front().getTerminator();
   assert(isa<vector::YieldOp>(yieldOp) && "expected yield op terminator");
 
   // Bufferize maskable op. By default, place the buffer allocation right before
   // the mask op.
   Value alloc = bufferizeToAllocation(
-      rewriter, maskOp.getMaskableOp(), memorySpace,
+      rewriter, options, maskOp.getMaskableOp(), memorySpace,
       /*insertionPoint=*/insertionPoint ? insertionPoint : maskOp);
 
   // Bufferize terminator.
   rewriter.setInsertionPoint(yieldOp);
   if (failed(cast<bufferization::BufferizableOpInterface>(yieldOp).bufferize(
-          rewriter, options)))
+          rewriter, bufferizationOptions)))
     return nullptr;
 
   // Erase dead to_tensor ops inside of the mask op. This is necessary because
@@ -247,7 +282,7 @@ Value linalg::bufferizeToAllocation(RewriterBase &rewriter,
         resultUses.push_back(&use);
   rewriter.setInsertionPoint(maskOp);
   if (failed(cast<bufferization::BufferizableOpInterface>(maskOp.getOperation())
-                 .bufferize(rewriter, options)))
+                 .bufferize(rewriter, bufferizationOptions)))
     return nullptr;
 
   // Set "restrict" attribute, indicating that no other tensor aliases with
@@ -392,23 +427,23 @@ mlir::linalg::rewriteInDestinationPassingStyle(RewriterBase &rewriter,
   return insertSliceOp.getOperation();
 }
 
-Value linalg::bufferizeToAllocation(RewriterBase &rewriter, Operation *op,
-                                    Attribute memorySpace,
-                                    Operation *insertionPoint) {
+Value linalg::bufferizeToAllocation(
+    RewriterBase &rewriter, const linalg::BufferizeToAllocationOptions &options,
+    Operation *op, Attribute memorySpace, Operation *insertionPoint) {
   using namespace bufferization;
 
   // Call specialized overload for certain ops.
   if (auto padOp = dyn_cast<tensor::PadOp>(op))
-    return bufferizeToAllocation(rewriter, padOp, memorySpace);
+    return bufferizeToAllocation(rewriter, options, padOp, memorySpace);
   if (auto maskOp = dyn_cast<vector::MaskOp>(op))
-    return bufferizeToAllocation(rewriter, maskOp, memorySpace);
+    return bufferizeToAllocation(rewriter, options, maskOp, memorySpace);
 
   // Only bufferizable ops are supported.
   auto bufferizableOp = dyn_cast<BufferizableOpInterface>(op);
   if (!bufferizableOp)
     return nullptr;
-  BufferizationOptions options;
-  AnalysisState state(options);
+  BufferizationOptions bufferizationOptions;
+  AnalysisState state(bufferizationOptions);
 
   // Gather tensor results.
   SmallVector<OpResult> tensorResults;
@@ -462,8 +497,7 @@ Value linalg::bufferizeToAllocation(RewriterBase &rewriter, Operation *op,
     if (!state.findDefinitions(operand->get()).empty()) {
       // Initialize buffer with a copy of the operand data. Not needed if the
       // tensor is uninitialized.
-      rewriter.create<memref::TensorStoreOp>(op->getLoc(), operand->get(),
-                                             alloc);
+      createMemcpy(rewriter, op->getLoc(), operand->get(), alloc, options);
     }
     rewriter.updateRootInPlace(op, [&]() {
       operand->set(rewriter.create<ToTensorOp>(op->getLoc(), alloc));
@@ -472,7 +506,7 @@ Value linalg::bufferizeToAllocation(RewriterBase &rewriter, Operation *op,
 
   // Bufferize the op.
   rewriter.setInsertionPoint(op);
-  if (failed(bufferizableOp.bufferize(rewriter, options)))
+  if (failed(bufferizableOp.bufferize(rewriter, bufferizationOptions)))
     return nullptr;
 
   // Set "restrict" attribute, indicating that no other tensor aliases with

diff  --git a/mlir/test/Dialect/Linalg/transform-op-bufferize-to-allocation.mlir b/mlir/test/Dialect/Linalg/transform-op-bufferize-to-allocation.mlir
index 45efde3b077a44..dcac1f77a8b4fc 100644
--- a/mlir/test/Dialect/Linalg/transform-op-bufferize-to-allocation.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-bufferize-to-allocation.mlir
@@ -39,7 +39,7 @@ transform.sequence failures(propagate) {
   // expected-remark @below{{1}}
   test_print_number_of_associated_payload_ir_ops %fill_op : !transform.any_op
 
-  // Ensure that one memref.tensor_store was generated.
+  // Ensure that one linalg.copy was generated.
   %tensor_store = transform.select "memref.tensor_store" in %new : (!transform.any_op) -> !transform.any_op
   // expected-remark @below{{1}}
   test_print_number_of_associated_payload_ir_ops %tensor_store : !transform.any_op
@@ -47,6 +47,43 @@ transform.sequence failures(propagate) {
 
 // -----
 
+// CHECK-LABEL: func @tensor_pad_constant_with_custom_copy(
+//   CHECK-NOT:   memref.tensor_store
+//   CHECK-NOT:   memref.copy
+//       CHECK:   linalg.copy
+func.func @tensor_pad_constant_with_custom_copy(
+    %t: tensor<?x10xindex>, %l2: index, %h1: index, %h2: index)
+        -> tensor<?x?xindex>
+{
+  %0 = tensor.pad %t low[5, %l2] high[%h1, %h2] {
+  ^bb0(%arg0: index, %arg1: index):
+    %c = arith.constant 50 : index
+    tensor.yield %c : index
+  } : tensor<?x10xindex> to tensor<?x?xindex>
+  return %0 : tensor<?x?xindex>
+}
+
+transform.sequence failures(propagate) {
+^bb1(%arg1: !transform.any_op):
+  %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+  %2, %new = transform.structured.bufferize_to_allocation %0 {memory_space = 3, memcpy_op = "linalg.copy"}: !transform.any_op
+
+  // Ensure that one linalg.fill was generated.
+  %fill_op = transform.select "linalg.fill" in %new : (!transform.any_op) -> !transform.any_op
+  // expected-remark @below{{1}}
+  test_print_number_of_associated_payload_ir_ops %fill_op : !transform.any_op
+
+  // Ensure that one linalg.copy was generated.
+  %linalg_copy = transform.select "linalg.copy" in %new : (!transform.any_op) -> !transform.any_op
+  // expected-remark @below{{1}}
+  test_print_number_of_associated_payload_ir_ops %linalg_copy : !transform.any_op
+
+  // Make sure that One-Shot Bufferize can bufferize the rest.
+  %4 = transform.bufferization.one_shot_bufferize %arg1 : (!transform.any_op) -> !transform.any_op
+}
+
+// -----
+
 // CHECK-LABEL: func @tensor_pad_constant(
 //  CHECK-SAME:     %[[t:.*]]: tensor<?x10xindex>
 //       CHECK:   %[[src:.*]] = bufferization.to_memref %[[t]]