[Mlir-commits] [mlir] [mlir][linalg] Optional dealloc insertion for bufferize_to_allocation (PR #65610)
Martin Erhart
llvmlistbot at llvm.org
Thu Sep 7 07:24:36 PDT 2023
https://github.com/maerhart created https://github.com/llvm/llvm-project/pull/65610:
This commit allows to omit insertion of the memref.dealloc operation when linalg.structured.bufferize_to_allocation is run and makes this the default behavior. This is desirable when the buffer-deallocation-pipeline is run after bufferization to handle buffer deallocation.
>From 497802628f7cb73c5aae59d86518573d296de09a Mon Sep 17 00:00:00 2001
From: Martin Erhart <merhart at google.com>
Date: Thu, 7 Sep 2023 14:09:33 +0000
Subject: [PATCH] [mlir][linalg] Optinal dealloc insertion for
bufferize_to_allocation
This commit allows to omit insertion of the memref.dealloc operation when
linalg.structured.bufferize_to_allocation is run and makes this the default
behavior. This is desirable when the buffer-deallocation-pipeline is run after
bufferization to handle buffer deallocation.
---
.../Linalg/TransformOps/LinalgTransformOps.td | 13 +++++---
.../Dialect/Linalg/Transforms/Transforms.h | 6 ++++
.../TransformOps/LinalgTransformOps.cpp | 1 +
.../Transforms/ConvertToDestinationStyle.cpp | 8 +++--
.../Linalg/matmul-shared-memory-padding.mlir | 4 +--
.../Linalg/pad-to-specific-memory-space.mlir | 4 +--
.../transform-op-bufferize-to-allocation.mlir | 31 ++++++++++++++-----
7 files changed, 48 insertions(+), 19 deletions(-)
diff --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
index ee6e12f72b80bab..ecfa37d35138c83 100644
--- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td
@@ -129,9 +129,13 @@ def BufferizeToAllocationOp : Op<Transform_Dialect,
"memref.tensor_store" when possible.
"memref.alloc" is used for new buffer allocations. The buffer is deallocated
- at the end of the block. Custom allocation ops can be specified via
- `alloc_op`. Currently supported are "memref.alloc" and "memref.alloca". In
- case of a "memref.alloca", the buffer is not deallocated.
+ at the end of the block if the "emit_dealloc" attribute is present. If this
+ attribute is not present, the allocated memory will be leaked. However,
+ running the `-buffer-deallocation-pipeline` after all bufferization is done
+ will properly insert the corresponding deallocation(s). Custom allocation
+ ops can be specified via `alloc_op`. Currently supported are "memref.alloc"
+ and "memref.alloca". In case of a "memref.alloca", the buffer is not
+ deallocated.
If `bufferize_destination_only` is set, only the destination operands of the
op are bufferized to a new memory allocation, but not the op itself.
@@ -148,7 +152,8 @@ def BufferizeToAllocationOp : Op<Transform_Dialect,
$memcpy_op,
DefaultValuedAttr<StrAttr, "\"memref.alloc\"">:
$alloc_op,
- UnitAttr:$bufferize_destination_only);
+ UnitAttr:$bufferize_destination_only,
+ UnitAttr:$emit_dealloc);
let results = (outs Transform_AnyValue:$allocated_buffer,
Transform_AnyOpType:$new_ops);
let assemblyFormat = "$target attr-dict `:` type($target)";
diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
index fd82c67ede5fa97..0a3597d35f0eaa9 100644
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
@@ -57,6 +57,12 @@ struct BufferizeToAllocationOptions {
/// a new allocation (and wrapped in "bufferization.to_tensor"), but not the
/// targeted op itself.
bool bufferizeDestinationOnly = false;
+
+ /// If set to "true", a `memref.dealloc` operation will be emitted for each
+ /// allocated buffer. Otherwise, the memory is leaked, which is useful if
+ /// the buffer deallocation pipeline should be run after bufferization is
+ /// done.
+ bool emitDealloc = false;
};
/// Materialize a buffer allocation for the given tensor.pad op and lower the
diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
index 3421a3c169dbba1..a3e264aee00f169 100644
--- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
+++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp
@@ -263,6 +263,7 @@ DiagnosedSilenceableFailure transform::BufferizeToAllocationOp::apply(
llvm_unreachable("invalid alloc op");
}
options.bufferizeDestinationOnly = getBufferizeDestinationOnly();
+ options.emitDealloc = getEmitDealloc();
// Bufferize ops.
Attribute memorySpace =
diff --git a/mlir/lib/Dialect/Linalg/Transforms/ConvertToDestinationStyle.cpp b/mlir/lib/Dialect/Linalg/Transforms/ConvertToDestinationStyle.cpp
index 0c559db60ee88d2..f7340844f7e1977 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/ConvertToDestinationStyle.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/ConvertToDestinationStyle.cpp
@@ -202,9 +202,11 @@ createAllocationForTensor(RewriterBase &rewriter, Location loc, Value value,
if (options.allocOp ==
linalg::BufferizeToAllocationOptions::AllocOp::MemrefAlloc) {
alloc = rewriter.create<memref::AllocOp>(loc, memrefType, dynamicSizes);
- // Place deallocation at the end of the block.
- rewriter.setInsertionPoint(rewriter.getInsertionBlock()->getTerminator());
- rewriter.create<memref::DeallocOp>(loc, alloc);
+ if (options.emitDealloc) {
+ // Place deallocation at the end of the block.
+ rewriter.setInsertionPoint(rewriter.getInsertionBlock()->getTerminator());
+ rewriter.create<memref::DeallocOp>(loc, alloc);
+ }
} else if (options.allocOp ==
linalg::BufferizeToAllocationOptions::AllocOp::MemrefAlloca) {
alloc = rewriter.create<memref::AllocaOp>(loc, memrefType, dynamicSizes);
diff --git a/mlir/test/Dialect/Linalg/matmul-shared-memory-padding.mlir b/mlir/test/Dialect/Linalg/matmul-shared-memory-padding.mlir
index a809939e7539eef..da6ebdbd24ded48 100644
--- a/mlir/test/Dialect/Linalg/matmul-shared-memory-padding.mlir
+++ b/mlir/test/Dialect/Linalg/matmul-shared-memory-padding.mlir
@@ -85,7 +85,7 @@ transform.sequence failures(propagate) {
// Assign shared memory buffer to padding.
%buffer, %new_ops = transform.structured.bufferize_to_allocation
- %pad_forall_op {memory_space = 3, bufferize_destination_only}
+ %pad_forall_op {memory_space = 3, bufferize_destination_only, emit_dealloc}
: !transform.any_op
// Bufferize.
@@ -197,7 +197,7 @@ transform.sequence failures(propagate) {
// Assign shared memory buffer to padding.
%buffer, %new_ops = transform.structured.bufferize_to_allocation
- %pad_forall_op {memory_space = 3, bufferize_destination_only}
+ %pad_forall_op {memory_space = 3, bufferize_destination_only, emit_dealloc}
: !transform.any_op
// Bufferize.
diff --git a/mlir/test/Dialect/Linalg/pad-to-specific-memory-space.mlir b/mlir/test/Dialect/Linalg/pad-to-specific-memory-space.mlir
index 5b20627e19dc209..45c2eb5dfdf5022 100644
--- a/mlir/test/Dialect/Linalg/pad-to-specific-memory-space.mlir
+++ b/mlir/test/Dialect/Linalg/pad-to-specific-memory-space.mlir
@@ -54,7 +54,7 @@ transform.sequence failures(propagate) {
padding_dimensions=[0, 1, 2],
pack_paddings=[1, 1, 1]
} : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
- %buffer, %new_ops = transform.structured.bufferize_to_allocation %pad {memory_space = 3} : !transform.any_op
+ %buffer, %new_ops = transform.structured.bufferize_to_allocation %pad {memory_space = 3, emit_dealloc} : !transform.any_op
%2 = transform.bufferization.one_shot_bufferize %arg1 {bufferize_function_boundaries=true} : (!transform.any_op) -> !transform.any_op
}
@@ -114,6 +114,6 @@ transform.sequence failures(propagate) {
transform.structured.masked_vectorize %pad vector_sizes [10, 12] : !transform.any_op
%vector_write = transform.structured.match ops{["vector.transfer_write"]} in %arg1 : (!transform.any_op) -> !transform.any_op
%mask_op = transform.get_parent_op %vector_write {op_name = "vector.mask"} : (!transform.any_op) -> !transform.any_op
- %buffer, %new_ops = transform.structured.bufferize_to_allocation %mask_op {memory_space = 3} : !transform.any_op
+ %buffer, %new_ops = transform.structured.bufferize_to_allocation %mask_op {memory_space = 3, emit_dealloc} : !transform.any_op
%2 = transform.bufferization.one_shot_bufferize %arg1 {bufferize_function_boundaries=true} : (!transform.any_op) -> !transform.any_op
}
diff --git a/mlir/test/Dialect/Linalg/transform-op-bufferize-to-allocation.mlir b/mlir/test/Dialect/Linalg/transform-op-bufferize-to-allocation.mlir
index ddb953237b008d8..7dee14f22df5d08 100644
--- a/mlir/test/Dialect/Linalg/transform-op-bufferize-to-allocation.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-bufferize-to-allocation.mlir
@@ -32,7 +32,7 @@ func.func @tensor_pad_constant(%t: tensor<?x10xindex>, %l2: index, %h1: index,
transform.sequence failures(propagate) {
^bb1(%arg1: !transform.any_op):
%0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- %2, %new = transform.structured.bufferize_to_allocation %0 : !transform.any_op
+ %2, %new = transform.structured.bufferize_to_allocation %0 {emit_dealloc} : !transform.any_op
// Ensure that one linalg.fill was generated.
%fill_op = transform.select "linalg.fill" in %new : (!transform.any_op) -> !transform.any_op
@@ -67,7 +67,7 @@ func.func @tensor_pad_constant_with_custom_copy(
transform.sequence failures(propagate) {
^bb1(%arg1: !transform.any_op):
%0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- %2, %new = transform.structured.bufferize_to_allocation %0 {memory_space = 3, alloc_op = "memref.alloca", memcpy_op = "linalg.copy"}: !transform.any_op
+ %2, %new = transform.structured.bufferize_to_allocation %0 {memory_space = 3, alloc_op = "memref.alloca", memcpy_op = "linalg.copy", emit_dealloc}: !transform.any_op
// Ensure that one linalg.fill was generated.
%fill_op = transform.select "linalg.fill" in %new : (!transform.any_op) -> !transform.any_op
@@ -110,7 +110,7 @@ func.func @tensor_pad_constant(%t: tensor<?x10xindex>, %l2: index, %h1: index,
transform.sequence failures(propagate) {
^bb1(%arg1: !transform.any_op):
%0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- %2, %new = transform.structured.bufferize_to_allocation %0 : !transform.any_op
+ %2, %new = transform.structured.bufferize_to_allocation %0 {emit_dealloc} : !transform.any_op
// Make sure that One-Shot Bufferize can bufferize the rest.
%4 = transform.bufferization.one_shot_bufferize %arg1 : (!transform.any_op) -> !transform.any_op
}
@@ -134,7 +134,7 @@ func.func @tensor_insert(%t: tensor<?x10xindex>, %idx: index, %v: index) -> tens
transform.sequence failures(propagate) {
^bb1(%arg1: !transform.any_op):
%0 = transform.structured.match ops{["tensor.insert"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- %2, %new = transform.structured.bufferize_to_allocation %0 {memory_space = 4} : !transform.any_op
+ %2, %new = transform.structured.bufferize_to_allocation %0 {memory_space = 4, emit_dealloc} : !transform.any_op
// Make sure that One-Shot Bufferize can bufferize the rest.
%4 = transform.bufferization.one_shot_bufferize %arg1 : (!transform.any_op) -> !transform.any_op
}
@@ -157,7 +157,7 @@ func.func @tensor_insert_into_empty(%idx: index, %v: index) -> tensor<10xindex>
transform.sequence failures(propagate) {
^bb1(%arg1: !transform.any_op):
%0 = transform.structured.match ops{["tensor.insert"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- %2, %new = transform.structured.bufferize_to_allocation %0 {memory_space = 4} : !transform.any_op
+ %2, %new = transform.structured.bufferize_to_allocation %0 {memory_space = 4, emit_dealloc} : !transform.any_op
// Make sure that One-Shot Bufferize can bufferize the rest.
%4 = transform.bufferization.one_shot_bufferize %arg1 : (!transform.any_op) -> !transform.any_op
}
@@ -174,7 +174,7 @@ transform.sequence failures(propagate) {
^bb1(%arg1: !transform.any_op):
%0 = transform.structured.match ops{["tensor.extract"]} in %arg1 : (!transform.any_op) -> !transform.any_op
// expected-error @below{{failed to bufferize operation}}
- %2, %new = transform.structured.bufferize_to_allocation %0 {memory_space = 4} : !transform.any_op
+ %2, %new = transform.structured.bufferize_to_allocation %0 {memory_space = 4, emit_dealloc} : !transform.any_op
}
// -----
@@ -195,7 +195,7 @@ func.func @vector_mask(%t: tensor<?xf32>, %val: vector<16xf32>, %idx: index, %m0
transform.sequence failures(propagate) {
^bb1(%arg1: !transform.any_op):
%0 = transform.structured.match ops{["vector.mask"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- %2, %new = transform.structured.bufferize_to_allocation %0 {memory_space = 4} : !transform.any_op
+ %2, %new = transform.structured.bufferize_to_allocation %0 {memory_space = 4, emit_dealloc} : !transform.any_op
}
// -----
@@ -216,7 +216,7 @@ func.func @tensor_insert_destination(%t: tensor<?x10xindex>, %idx: index, %v: in
transform.sequence failures(propagate) {
^bb1(%arg1: !transform.any_op):
%0 = transform.structured.match ops{["tensor.insert"]} in %arg1 : (!transform.any_op) -> !transform.any_op
- %2, %new = transform.structured.bufferize_to_allocation %0 {memory_space = 4, bufferize_destination_only} : !transform.any_op
+ %2, %new = transform.structured.bufferize_to_allocation %0 {memory_space = 4, bufferize_destination_only, emit_dealloc} : !transform.any_op
}
// -----
@@ -240,5 +240,20 @@ func.func @scf_for_destination(%t: tensor<?x10xindex>, %lb: index, %ub: index, %
transform.sequence failures(propagate) {
^bb1(%arg1: !transform.any_op):
%0 = transform.structured.match ops{["scf.for"]} in %arg1 : (!transform.any_op) -> !transform.any_op
+ %2, %new = transform.structured.bufferize_to_allocation %0 {memory_space = 4, bufferize_destination_only, emit_dealloc} : !transform.any_op
+}
+
+// -----
+
+// CHECK-LABEL: func @tensor_insert_destination_no_dealloc
+// CHECK-NOT: dealloc
+func.func @tensor_insert_destination_no_dealloc(%t: tensor<?x10xindex>, %idx: index, %v: index) -> tensor<?x10xindex> {
+ %r = tensor.insert %v into %t[%idx, %idx] : tensor<?x10xindex>
+ return %r : tensor<?x10xindex>
+}
+
+transform.sequence failures(propagate) {
+^bb1(%arg1: !transform.any_op):
+ %0 = transform.structured.match ops{["tensor.insert"]} in %arg1 : (!transform.any_op) -> !transform.any_op
%2, %new = transform.structured.bufferize_to_allocation %0 {memory_space = 4, bufferize_destination_only} : !transform.any_op
}
More information about the Mlir-commits
mailing list