[Mlir-commits] [mlir] 412c2fd - [mlir][linalg] Optional dealloc insertion for bufferize_to_allocation (#65610)

Thu Sep 7 08:49:51 PDT 2023

Author: Martin Erhart
Date: 2023-09-07T17:49:48+02:00
New Revision: 412c2fd27030f084088e0f395c28e0767df8b212

URL: https://github.com/llvm/llvm-project/commit/412c2fd27030f084088e0f395c28e0767df8b212
DIFF: https://github.com/llvm/llvm-project/commit/412c2fd27030f084088e0f395c28e0767df8b212.diff

LOG: [mlir][linalg] Optional dealloc insertion for bufferize_to_allocation (#65610)

This commit allows to omit insertion of the memref.dealloc operation
when linalg.structured.bufferize_to_allocation is run and makes this the
default behavior. This is desirable when the
buffer-deallocation-pipeline is run after bufferization to handle buffer
deallocation.

Added: 
    

Modified: 
    mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
    mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
    mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
    mlir/lib/Dialect/Linalg/Transforms/ConvertToDestinationStyle.cpp
    mlir/test/Dialect/Linalg/matmul-shared-memory-padding.mlir
    mlir/test/Dialect/Linalg/pad-to-specific-memory-space.mlir
    mlir/test/Dialect/Linalg/transform-op-bufferize-to-allocation.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
index 6011663e432c1e1..f9b94eef22446d2 100644

--- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
@@ -129,9 +129,13 @@ def BufferizeToAllocationOp : Op<Transform_Dialect,
     "memref.tensor_store" when possible.
 
     "memref.alloc" is used for new buffer allocations. The buffer is deallocated
-    at the end of the block. Custom allocation ops can be specified via
-    `alloc_op`. Currently supported are "memref.alloc" and "memref.alloca". In
-    case of a "memref.alloca", the buffer is not deallocated.
+    at the end of the block if the "emit_dealloc" attribute is present. If this
+    attribute is not present, the allocated memory will be leaked. However,
+    running the `-buffer-deallocation-pipeline` after all bufferization is done
+    will properly insert the corresponding deallocation(s). Custom allocation
+    ops can be specified via `alloc_op`. Currently supported are "memref.alloc"
+    and "memref.alloca". In case of a "memref.alloca", the buffer is not
+    deallocated.
 
     If `bufferize_destination_only` is set, only the destination operands of the
     op are bufferized to a new memory allocation, but not the op itself.
@@ -148,7 +152,8 @@ def BufferizeToAllocationOp : Op<Transform_Dialect,
                            $memcpy_op,
                        DefaultValuedAttr<StrAttr, "\"memref.alloc\"">:
                            $alloc_op,
-                       UnitAttr:$bufferize_destination_only);
+                       UnitAttr:$bufferize_destination_only,
+                       UnitAttr:$emit_dealloc);
   let results = (outs Transform_AnyValue:$allocated_buffer,
                       Transform_AnyOpType:$new_ops);
   let assemblyFormat = "$target attr-dict `:` type($target)";

diff  --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
index 94a39ad186f54a3..0c341209a933122 100644
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
@@ -57,6 +57,12 @@ struct BufferizeToAllocationOptions {
   /// a new allocation (and wrapped in "bufferization.to_tensor"), but not the
   /// targeted op itself.
   bool bufferizeDestinationOnly = false;
+
+  /// If set to "true", a `memref.dealloc` operation will be emitted for each
+  /// allocated buffer. Otherwise, the memory is leaked, which is useful if
+  /// the buffer deallocation pipeline should be run after bufferization is
+  /// done.
+  bool emitDealloc = false;
 };
 
 /// Materialize a buffer allocation for the given tensor.pad op and lower the

diff  --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
index 7a701e44a9cda4a..cab646ebd4e2253 100644
--- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
+++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
@@ -263,6 +263,7 @@ DiagnosedSilenceableFailure transform::BufferizeToAllocationOp::apply(
     llvm_unreachable("invalid alloc op");
   }
   options.bufferizeDestinationOnly = getBufferizeDestinationOnly();
+  options.emitDealloc = getEmitDealloc();
 
   // Bufferize ops.
   Attribute memorySpace =

diff  --git a/mlir/lib/Dialect/Linalg/Transforms/ConvertToDestinationStyle.cpp b/mlir/lib/Dialect/Linalg/Transforms/ConvertToDestinationStyle.cpp
index 0c559db60ee88d2..f7340844f7e1977 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/ConvertToDestinationStyle.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/ConvertToDestinationStyle.cpp
@@ -202,9 +202,11 @@ createAllocationForTensor(RewriterBase &rewriter, Location loc, Value value,
   if (options.allocOp ==
       linalg::BufferizeToAllocationOptions::AllocOp::MemrefAlloc) {
     alloc = rewriter.create<memref::AllocOp>(loc, memrefType, dynamicSizes);
-    // Place deallocation at the end of the block.
-    rewriter.setInsertionPoint(rewriter.getInsertionBlock()->getTerminator());
-    rewriter.create<memref::DeallocOp>(loc, alloc);
+    if (options.emitDealloc) {
+      // Place deallocation at the end of the block.
+      rewriter.setInsertionPoint(rewriter.getInsertionBlock()->getTerminator());
+      rewriter.create<memref::DeallocOp>(loc, alloc);
+    }
   } else if (options.allocOp ==
              linalg::BufferizeToAllocationOptions::AllocOp::MemrefAlloca) {
     alloc = rewriter.create<memref::AllocaOp>(loc, memrefType, dynamicSizes);

diff  --git a/mlir/test/Dialect/Linalg/matmul-shared-memory-padding.mlir b/mlir/test/Dialect/Linalg/matmul-shared-memory-padding.mlir
index a809939e7539eef..da6ebdbd24ded48 100644
--- a/mlir/test/Dialect/Linalg/matmul-shared-memory-padding.mlir
+++ b/mlir/test/Dialect/Linalg/matmul-shared-memory-padding.mlir
@@ -85,7 +85,7 @@ transform.sequence failures(propagate) {
 
   // Assign shared memory buffer to padding.
   %buffer, %new_ops = transform.structured.bufferize_to_allocation
-      %pad_forall_op {memory_space = 3, bufferize_destination_only}
+      %pad_forall_op {memory_space = 3, bufferize_destination_only, emit_dealloc}
       : !transform.any_op
 
   // Bufferize.
@@ -197,7 +197,7 @@ transform.sequence failures(propagate) {
 
   // Assign shared memory buffer to padding.
   %buffer, %new_ops = transform.structured.bufferize_to_allocation
-      %pad_forall_op {memory_space = 3, bufferize_destination_only}
+      %pad_forall_op {memory_space = 3, bufferize_destination_only, emit_dealloc}
       : !transform.any_op
 
   // Bufferize.

diff  --git a/mlir/test/Dialect/Linalg/pad-to-specific-memory-space.mlir b/mlir/test/Dialect/Linalg/pad-to-specific-memory-space.mlir
index 5b20627e19dc209..45c2eb5dfdf5022 100644
--- a/mlir/test/Dialect/Linalg/pad-to-specific-memory-space.mlir
+++ b/mlir/test/Dialect/Linalg/pad-to-specific-memory-space.mlir
@@ -54,7 +54,7 @@ transform.sequence failures(propagate) {
     padding_dimensions=[0, 1, 2],
     pack_paddings=[1, 1, 1]
   } : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
-  %buffer, %new_ops = transform.structured.bufferize_to_allocation %pad {memory_space = 3} : !transform.any_op
+  %buffer, %new_ops = transform.structured.bufferize_to_allocation %pad {memory_space = 3, emit_dealloc} : !transform.any_op
   %2 = transform.bufferization.one_shot_bufferize %arg1 {bufferize_function_boundaries=true} : (!transform.any_op) -> !transform.any_op
 
 }
@@ -114,6 +114,6 @@ transform.sequence failures(propagate) {
   transform.structured.masked_vectorize %pad vector_sizes [10, 12] : !transform.any_op
   %vector_write = transform.structured.match ops{["vector.transfer_write"]} in %arg1 : (!transform.any_op) -> !transform.any_op
   %mask_op = transform.get_parent_op %vector_write {op_name = "vector.mask"} : (!transform.any_op) -> !transform.any_op
-  %buffer, %new_ops = transform.structured.bufferize_to_allocation %mask_op {memory_space = 3} : !transform.any_op
+  %buffer, %new_ops = transform.structured.bufferize_to_allocation %mask_op {memory_space = 3, emit_dealloc} : !transform.any_op
   %2 = transform.bufferization.one_shot_bufferize %arg1 {bufferize_function_boundaries=true} : (!transform.any_op) -> !transform.any_op
 }

diff  --git a/mlir/test/Dialect/Linalg/transform-op-bufferize-to-allocation.mlir b/mlir/test/Dialect/Linalg/transform-op-bufferize-to-allocation.mlir
index ddb953237b008d8..7dee14f22df5d08 100644
--- a/mlir/test/Dialect/Linalg/transform-op-bufferize-to-allocation.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-bufferize-to-allocation.mlir
@@ -32,7 +32,7 @@ func.func @tensor_pad_constant(%t: tensor<?x10xindex>, %l2: index, %h1: index,
 transform.sequence failures(propagate) {
 ^bb1(%arg1: !transform.any_op):
   %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!transform.any_op) -> !transform.any_op
-  %2, %new = transform.structured.bufferize_to_allocation %0 : !transform.any_op
+  %2, %new = transform.structured.bufferize_to_allocation %0 {emit_dealloc} : !transform.any_op
 
   // Ensure that one linalg.fill was generated.
   %fill_op = transform.select "linalg.fill" in %new : (!transform.any_op) -> !transform.any_op
@@ -67,7 +67,7 @@ func.func @tensor_pad_constant_with_custom_copy(
 transform.sequence failures(propagate) {
 ^bb1(%arg1: !transform.any_op):
   %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!transform.any_op) -> !transform.any_op
-  %2, %new = transform.structured.bufferize_to_allocation %0 {memory_space = 3, alloc_op = "memref.alloca", memcpy_op = "linalg.copy"}: !transform.any_op
+  %2, %new = transform.structured.bufferize_to_allocation %0 {memory_space = 3, alloc_op = "memref.alloca", memcpy_op = "linalg.copy", emit_dealloc}: !transform.any_op
 
   // Ensure that one linalg.fill was generated.
   %fill_op = transform.select "linalg.fill" in %new : (!transform.any_op) -> !transform.any_op
@@ -110,7 +110,7 @@ func.func @tensor_pad_constant(%t: tensor<?x10xindex>, %l2: index, %h1: index,
 transform.sequence failures(propagate) {
 ^bb1(%arg1: !transform.any_op):
   %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!transform.any_op) -> !transform.any_op
-  %2, %new = transform.structured.bufferize_to_allocation %0 : !transform.any_op
+  %2, %new = transform.structured.bufferize_to_allocation %0 {emit_dealloc} : !transform.any_op
   // Make sure that One-Shot Bufferize can bufferize the rest.
   %4 = transform.bufferization.one_shot_bufferize %arg1 : (!transform.any_op) -> !transform.any_op
 }
@@ -134,7 +134,7 @@ func.func @tensor_insert(%t: tensor<?x10xindex>, %idx: index, %v: index) -> tens
 transform.sequence failures(propagate) {
 ^bb1(%arg1: !transform.any_op):
   %0 = transform.structured.match ops{["tensor.insert"]} in %arg1 : (!transform.any_op) -> !transform.any_op
-  %2, %new = transform.structured.bufferize_to_allocation %0 {memory_space = 4} : !transform.any_op
+  %2, %new = transform.structured.bufferize_to_allocation %0 {memory_space = 4, emit_dealloc} : !transform.any_op
   // Make sure that One-Shot Bufferize can bufferize the rest.
   %4 = transform.bufferization.one_shot_bufferize %arg1 : (!transform.any_op) -> !transform.any_op
 }
@@ -157,7 +157,7 @@ func.func @tensor_insert_into_empty(%idx: index, %v: index) -> tensor<10xindex>
 transform.sequence failures(propagate) {
 ^bb1(%arg1: !transform.any_op):
   %0 = transform.structured.match ops{["tensor.insert"]} in %arg1 : (!transform.any_op) -> !transform.any_op
-  %2, %new = transform.structured.bufferize_to_allocation %0 {memory_space = 4} : !transform.any_op
+  %2, %new = transform.structured.bufferize_to_allocation %0 {memory_space = 4, emit_dealloc} : !transform.any_op
   // Make sure that One-Shot Bufferize can bufferize the rest.
   %4 = transform.bufferization.one_shot_bufferize %arg1 : (!transform.any_op) -> !transform.any_op
 }
@@ -174,7 +174,7 @@ transform.sequence failures(propagate) {
 ^bb1(%arg1: !transform.any_op):
   %0 = transform.structured.match ops{["tensor.extract"]} in %arg1 : (!transform.any_op) -> !transform.any_op
   // expected-error @below{{failed to bufferize operation}}
-  %2, %new = transform.structured.bufferize_to_allocation %0 {memory_space = 4} : !transform.any_op
+  %2, %new = transform.structured.bufferize_to_allocation %0 {memory_space = 4, emit_dealloc} : !transform.any_op
 }
 
 // -----
@@ -195,7 +195,7 @@ func.func @vector_mask(%t: tensor<?xf32>, %val: vector<16xf32>, %idx: index, %m0
 transform.sequence failures(propagate) {
 ^bb1(%arg1: !transform.any_op):
   %0 = transform.structured.match ops{["vector.mask"]} in %arg1 : (!transform.any_op) -> !transform.any_op
-  %2, %new = transform.structured.bufferize_to_allocation %0 {memory_space = 4} : !transform.any_op
+  %2, %new = transform.structured.bufferize_to_allocation %0 {memory_space = 4, emit_dealloc} : !transform.any_op
 }
 
 // -----
@@ -216,7 +216,7 @@ func.func @tensor_insert_destination(%t: tensor<?x10xindex>, %idx: index, %v: in
 transform.sequence failures(propagate) {
 ^bb1(%arg1: !transform.any_op):
   %0 = transform.structured.match ops{["tensor.insert"]} in %arg1 : (!transform.any_op) -> !transform.any_op
-  %2, %new = transform.structured.bufferize_to_allocation %0 {memory_space = 4, bufferize_destination_only} : !transform.any_op
+  %2, %new = transform.structured.bufferize_to_allocation %0 {memory_space = 4, bufferize_destination_only, emit_dealloc} : !transform.any_op
 }
 
 // -----
@@ -240,5 +240,20 @@ func.func @scf_for_destination(%t: tensor<?x10xindex>, %lb: index, %ub: index, %
 transform.sequence failures(propagate) {
 ^bb1(%arg1: !transform.any_op):
   %0 = transform.structured.match ops{["scf.for"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+  %2, %new = transform.structured.bufferize_to_allocation %0 {memory_space = 4, bufferize_destination_only, emit_dealloc} : !transform.any_op
+}
+
+// -----
+
+// CHECK-LABEL: func @tensor_insert_destination_no_dealloc
+//   CHECK-NOT: dealloc
+func.func @tensor_insert_destination_no_dealloc(%t: tensor<?x10xindex>, %idx: index, %v: index) -> tensor<?x10xindex> {
+  %r = tensor.insert %v into %t[%idx, %idx] : tensor<?x10xindex>
+  return %r : tensor<?x10xindex>
+}
+
+transform.sequence failures(propagate) {
+^bb1(%arg1: !transform.any_op):
+  %0 = transform.structured.match ops{["tensor.insert"]} in %arg1 : (!transform.any_op) -> !transform.any_op
   %2, %new = transform.structured.bufferize_to_allocation %0 {memory_space = 4, bufferize_destination_only} : !transform.any_op
 }