[Mlir-commits] [mlir] 79f1159 - [mlir][bufferize] Avoid tensor copies when the data is not read
Matthias Springer
llvmlistbot at llvm.org
Fri Jun 10 01:26:14 PDT 2022
Author: Matthias Springer
Date: 2022-06-10T10:26:07+02:00
New Revision: 79f115911eebbd3c8f7758135e2533e041b0c2a3
URL: https://github.com/llvm/llvm-project/commit/79f115911eebbd3c8f7758135e2533e041b0c2a3
DIFF: https://github.com/llvm/llvm-project/commit/79f115911eebbd3c8f7758135e2533e041b0c2a3.diff
LOG: [mlir][bufferize] Avoid tensor copies when the data is not read
There are various shortcuts in `BufferizationState::getBuffer` that avoid a buffer copy when we just need an allocation (and no initialization). This change adds those shortcuts to the TensorCopyInsertion pass, so that `getBuffer` can be simplified in a subsequent change.
Differential Revision: https://reviews.llvm.org/D126821
Added:
Modified:
mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h
mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir
Removed:
################################################################################
diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h
index d80ba8ff56fd5..01cda7d5aa1f1 100644
--- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h
+++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h
@@ -356,6 +356,10 @@ class AnalysisState {
/// an alias. Return false if the op is not bufferizable.
bool bufferizesToAliasOnly(OpOperand &opOperand) const;
+ /// Return true if a copy can always be avoided when allocating a new tensor
+ /// for the given OpOperand.
+ bool canOmitTensorCopy(OpOperand &opOperand) const;
+
/// Return true if the given value is read by an op that bufferizes to a
/// memory read. Also takes into account ops that create an alias but do not
/// read by themselves (e.g., ExtractSliceOp).
diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
index eb9b5e4cca8be..e1241e2ef8e77 100644
--- a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
+++ b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
@@ -10,6 +10,7 @@
#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
#include "mlir/IR/AsmState.h"
#include "mlir/IR/BlockAndValueMapping.h"
#include "mlir/IR/BuiltinOps.h"
@@ -42,12 +43,40 @@ using namespace bufferization;
constexpr const ::llvm::StringLiteral
bufferization::BufferizableOpInterface::kInplaceableAttrName;
+/// Create an AllocTensorOp for the given shaped value. Only ranked tensors are
+/// supported at the moment. If `copy` is set, the shaped value is copied.
+/// Otherwise, a tensor with undefined contents is allocated.
+static Value allocateTensorForShapedValue(OpBuilder &b, Location loc,
+ Value shapedValue, bool escape,
+ bool copy = true) {
+ auto tensorType = shapedValue.getType().dyn_cast<RankedTensorType>();
+ assert(tensorType && "only RankedTensorType supported at the moment");
+ Value alloc;
+ if (!copy) {
+ // No copy needed: Just allocate.
+ SmallVector<Value> dynamicSizes;
+ for (int64_t i = 0; i < tensorType.getRank(); ++i)
+ if (tensorType.isDynamicDim(i))
+ dynamicSizes.push_back(b.create<tensor::DimOp>(loc, shapedValue, i));
+ alloc = b.create<AllocTensorOp>(loc, tensorType, dynamicSizes,
+ /*copy=*/Value(), escape);
+ } else {
+ // Allocate and copy.
+ alloc = b.create<AllocTensorOp>(loc, tensorType,
+ /*dynamicSizes=*/ValueRange(), shapedValue,
+ escape);
+ }
+ return alloc;
+}
+
LogicalResult BufferizableOpInterface::resolveTensorOpOperandConflicts(
RewriterBase &rewriter, const AnalysisState &state) {
OpBuilder::InsertionGuard g(rewriter);
Operation *op = getOperation();
SmallVector<OpOperand *> outOfPlaceOpOperands;
+ DenseSet<OpOperand *> copiedOpOperands;
SmallVector<OpResult> outOfPlaceOpResults;
+ DenseSet<OpResult> copiedOpResults;
// Find all out-of-place OpOperands.
for (OpOperand &opOperand : op->getOpOperands()) {
@@ -69,32 +98,36 @@ LogicalResult BufferizableOpInterface::resolveTensorOpOperandConflicts(
// be smaller than the OpOperand (e.g., in the case of an extract_slice,
// where the result is usually a smaller part of the source).
outOfPlaceOpResults.push_back(aliasingOpResults.front());
+ if (!state.canOmitTensorCopy(opOperand))
+ copiedOpResults.insert(aliasingOpResults.front());
} else {
// In all other cases, make a copy of the OpOperand.
outOfPlaceOpOperands.push_back(&opOperand);
+ if (!state.canOmitTensorCopy(opOperand))
+ copiedOpOperands.insert(&opOperand);
}
}
// Insert copies of OpOperands.
rewriter.setInsertionPoint(op);
for (OpOperand *opOperand : outOfPlaceOpOperands) {
- auto tensorType = opOperand->get().getType().cast<RankedTensorType>();
SmallVector<OpResult> aliasingOpResults =
state.getAliasingOpResult(*opOperand);
bool escape = llvm::any_of(
aliasingOpResults, [&](Value v) { return state.isTensorYielded(v); });
- Value copy = rewriter.create<AllocTensorOp>(
- op->getLoc(), tensorType, ValueRange(), opOperand->get(), escape);
+ Value copy = allocateTensorForShapedValue(
+ rewriter, op->getLoc(), opOperand->get(), escape,
+ copiedOpOperands.contains(opOperand));
rewriter.updateRootInPlace(op, [&]() { opOperand->set(copy); });
}
// Insert copies of OpResults.
rewriter.setInsertionPointAfter(op);
for (OpResult opResult : outOfPlaceOpResults) {
- auto tensorType = opResult.getType().cast<RankedTensorType>();
bool escape = state.isTensorYielded(opResult);
- Value copy = rewriter.create<AllocTensorOp>(op->getLoc(), tensorType,
- ValueRange(), opResult, escape);
+ Value copy =
+ allocateTensorForShapedValue(rewriter, op->getLoc(), opResult, escape,
+ copiedOpResults.count(opResult));
SmallVector<OpOperand *> uses = llvm::to_vector(llvm::map_range(
opResult.getUses(), [](OpOperand &use) { return &use; }));
for (OpOperand *use : uses) {
@@ -313,6 +346,27 @@ AnalysisState::AnalysisState(const BufferizationOptions &options)
fn(*this);
}
+bool AnalysisState::canOmitTensorCopy(OpOperand &opOperand) const {
+ // Do not copy if the tensor has undefined contents.
+ if (hasUndefinedContents(&opOperand))
+ return true;
+
+ // Do not copy if the buffer of the tensor is entirely overwritten (with
+ // values that do not depend on the old tensor).
+ if (bufferizesToMemoryWrite(opOperand) && !bufferizesToMemoryRead(opOperand))
+ return true;
+
+ // Do not copy if the tensor is never read.
+ SmallVector<OpResult> aliasingOpResults = getAliasingOpResult(opOperand);
+ if (!bufferizesToMemoryRead(opOperand) &&
+ llvm::none_of(aliasingOpResults,
+ [&](OpResult opResult) { return isValueRead(opResult); }))
+ return true;
+
+ // Default: Cannot omit the copy.
+ return false;
+}
+
// bufferization.to_memref is not allowed to change the rank.
static void ensureToMemrefOpIsValid(Value tensor, Type memrefType) {
#ifndef NDEBUG
diff --git a/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir b/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir
index b54e07e5b1331..cb6977c013a4f 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir
@@ -25,3 +25,54 @@ func.func @return_alloc_tensor() -> (tensor<5xf32>) {
%0 = bufferization.alloc_tensor() : tensor<5xf32>
return %0 : tensor<5xf32>
}
+
+// -----
+
+// CHECK-LABEL: func @do_not_copy_undefined_tensor
+func.func @do_not_copy_undefined_tensor(%f: f32, %idx: index)
+ -> (tensor<5xf32>, tensor<5xf32>)
+{
+ // CHECK: bufferization.alloc_tensor() {escape = false} : tensor<5xf32>
+ // The second alloc_tensor should not have a copy operand.
+ // CHECK: bufferization.alloc_tensor() {escape = false} : tensor<5xf32>
+ %0 = bufferization.alloc_tensor() : tensor<5xf32>
+ %1 = tensor.insert %f into %0[%idx] : tensor<5xf32>
+ return %0, %1 : tensor<5xf32>, tensor<5xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @do_not_copy_when_overwritten
+func.func @do_not_copy_when_overwritten(%t: tensor<5xf32>, %f: f32)
+ -> (tensor<5xf32>, tensor<5xf32>)
+{
+ // CHECK: %[[alloc:.*]] = bufferization.alloc_tensor() {escape = false} : tensor<5xf32>
+ // CHECK: linalg.generic {{.*}} outs(%[[alloc]] : tensor<5xf32>)
+ %r = linalg.generic {
+ indexing_maps = [affine_map<(d0) -> (d0)>],
+ iterator_types = ["parallel"]}
+ outs(%t : tensor<5xf32>) {
+ ^bb0(%arg0 : f32) :
+ linalg.yield %f : f32
+ } -> tensor<5xf32>
+ return %t, %r : tensor<5xf32>, tensor<5xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @do_not_copy_when_result_not_read
+func.func @do_not_copy_when_result_not_read(%t: tensor<5xf32>, %f: f32)
+ -> (tensor<3xf32>)
+{
+ %0 = tensor.extract_slice %t[0][3][1] : tensor<5xf32> to tensor<3xf32>
+ // CHECK: %[[alloc:.*]] = bufferization.alloc_tensor() {escape = false} : tensor<3xf32>
+ // CHECK: linalg.generic {{.*}} outs(%[[alloc]] : tensor<3xf32>)
+ %r = linalg.generic {
+ indexing_maps = [affine_map<(d0) -> (d0)>],
+ iterator_types = ["parallel"]}
+ outs(%0 : tensor<3xf32>) {
+ ^bb0(%arg0 : f32) :
+ linalg.yield %f : f32
+ } -> tensor<3xf32>
+ return %r : tensor<3xf32>
+}
More information about the Mlir-commits
mailing list