[Mlir-commits] [mlir] ad2e635 - [mlir][linalg][bufferize] Remove always-aliasing-with-dest option
Matthias Springer
llvmlistbot at llvm.org
Wed Jun 15 00:57:08 PDT 2022
Author: Matthias Springer
Date: 2022-06-15T09:56:53+02:00
New Revision: ad2e635faea75cbb15f108dc219c79bdd5638023
URL: https://github.com/llvm/llvm-project/commit/ad2e635faea75cbb15f108dc219c79bdd5638023
DIFF: https://github.com/llvm/llvm-project/commit/ad2e635faea75cbb15f108dc219c79bdd5638023.diff
LOG: [mlir][linalg][bufferize] Remove always-aliasing-with-dest option
This flag was introduced for a use case in IREE, but it is no longer needed.
Differential Revision: https://reviews.llvm.org/D126965
Added:
Modified:
mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h
mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
mlir/lib/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.cpp
Removed:
mlir/test/Dialect/Linalg/one-shot-bufferize-aliasing-in.mlir
mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-aliasing-in.mlir
################################################################################
diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h
index 7c84b6fd3d059..72ec758606d92 100644
--- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h
+++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h
@@ -293,20 +293,6 @@ struct BufferizationOptions {
/// For debugging only. Should be used together with `testAnalysisOnly`.
bool printConflicts = false;
- /// If set to `true`, an `getAliasingOpResult` will return the corresponding
- /// "out"/"dest" OpOperand for every op that has the notion of an "out"/"dest"
- /// operand. I.e., the aliasing OpOperand of the i-th tensor OpResult is
- /// usually the i-th "out" tensor OpOperand. This is in line with
- /// destination-passing style and the default behavior. Op interface
- /// implementations must follow this contract to avoid surprising behavior.
- ///
- /// If set to `false`, BufferizableOpInterface implementations can try to be
- /// smart and choose to alias with "in" operands or other operands. E.g., the
- /// result of a `linalg.generic` op could bufferize in-place with an "in"
- /// OpOperand if the corresponding "out" operand is not used within the
- /// computation. Whether this pays off or not can be very input IR-specific.
- bool alwaysAliasingWithDest = true;
-
/// Buffer alignment for new memory allocations.
unsigned int bufferAlignment = 128;
diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
index 16426744cc77a..1fa4090a0e133 100644
--- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
@@ -270,10 +270,6 @@ def OneShotBufferize : Pass<"one-shot-bufferize", "ModuleOp"> {
Option<"allowUnknownOps", "allow-unknown-ops", "bool",
/*default=*/"false",
"Allows unknown (not bufferizable) ops in the input IR.">,
- Option<"alwaysAliasingWithDest", "always-aliasing-with-dest", "bool",
- /*default=*/"true",
- "Tensor OpResult cannot bufferize inplace OpOperands other than "
- "out/dest OpOperands (if the op has such operands; experimental)">,
Option<"analysisFuzzerSeed", "analysis-fuzzer-seed", "unsigned",
/*default=*/"0",
"Test only: Analyze ops in random order with a given seed (fuzzer)">,
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
index f3a8d5306de79..6b6eaf15c55ac 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
@@ -182,7 +182,6 @@ struct OneShotBufferizePass
// pass.
opt.allowReturnAllocs = allowReturnAllocs;
opt.allowUnknownOps = allowUnknownOps;
- opt.alwaysAliasingWithDest = alwaysAliasingWithDest;
opt.analysisFuzzerSeed = analysisFuzzerSeed;
opt.createDeallocs = createDeallocs;
opt.functionBoundaryTypeConversion =
diff --git a/mlir/lib/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.cpp
index a924e8c8c2640..bccae3b8ba69b 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.cpp
@@ -86,69 +86,6 @@ static LogicalResult bufferizeLinalgOp(RewriterBase &rewriter, LinalgOp op,
return success();
}
-/// Linalg OpResults usually bufferize inplace with their tied (output
-/// OpOperands. However, if an output OpOperand is not used in the computation,
-/// it is better to bufferize inplace with an actually used input OpOperand;
-/// less memory will be touched that way.
-///
-/// Example:
-/// O(i, j) = A(i, j) + B(j) --> bufferizes inplace to: A(i, j) += B(j)
-///
-/// O(i, j) = A(j, i) + B(j) --> cannot bufferize inplace with A because
-/// indexing maps are not identical
-///
-/// O(i, j) += A(i, j) + B(j) --> Output is used in computation.
-/// This could bufferize inplace with A:
-/// A(i, j) += O(i, j) + B(j)
-/// However, we choose to bufferize inplace with O here, as there is no clear
-/// benefit of choosing A. TODO: We may want to consider both options and make
-/// an informed decision during analysis in the future.
-static DenseMap<OpOperand *, OpResult> computeAliasingPairs(LinalgOp op) {
- DenseMap<OpOperand *, OpResult> mapping;
- for (OpResult opResult : op->getOpResults()) {
- OpOperand *tiedOperand =
- op.getOutputTensorOperands()[opResult.getResultNumber()];
- AffineMap outputIndexingMap = op.getTiedIndexingMap(tiedOperand);
- bool onlyParallelIterators = op.getNumParallelLoops() == op.getNumLoops();
- bool tiedOperandUsed = op.payloadUsesValueFromOperand(tiedOperand);
-
- // If the output arg is used in the computation or at least one iterator is
- // not parallel, try to bufferize inplace with the corresponding output
- // tensor.
- if (tiedOperandUsed || !onlyParallelIterators) {
- mapping[tiedOperand] = opResult;
- continue;
- }
-
- // Otherwise, try to bufferize inplace with one of the inputs.
- OpOperand *chosenOperand = nullptr;
- for (OpOperand *opOperand : op.getInputTensorOperands()) {
- if (opOperand->get().getType() != opResult.getType())
- continue;
- if (!op.payloadUsesValueFromOperand(opOperand))
- continue;
- if (op.getTiedIndexingMap(opOperand) != outputIndexingMap)
- continue;
- // No other OpResult bufferizes aliases with this OpOperand.
- if (mapping.count(opOperand))
- continue;
- assert(op.getTiedIndexingMap(opOperand).isProjectedPermutation() &&
- "expected projected permutation");
- chosenOperand = opOperand;
- break;
- }
-
- // No suitable input tensor found. Use output tensor.
- // TODO: This operand could bufferize inplace with OpOperands that have the
- // correct type, even if they are not used inside the computation.
- if (!chosenOperand)
- chosenOperand = tiedOperand;
-
- mapping[chosenOperand] = opResult;
- }
- return mapping;
-}
-
/// Bufferization of linalg.generic. Replace with a new linalg.generic that
/// operates entirely on memrefs.
template <typename OpTy>
@@ -174,37 +111,18 @@ struct LinalgOpInterface
const AnalysisState &state) const {
auto genericOp = cast<linalg::LinalgOp>(op);
- // By default, the i-th OpResult may alias with the i-th "out" tensor.
- if (state.getOptions().alwaysAliasingWithDest)
- return {genericOp.getOutputOperand(opResult.getResultNumber())};
-
- // We can try to be smart and alias in-place with an "in" tensor if the
- // corresponding "out" tensor is not used in the computation.
- // Aliasing OpOperand/OpResult pairs are computed by `computeAliasingPairs`.
- DenseMap<OpOperand *, OpResult> pairs = computeAliasingPairs(genericOp);
- for (OpOperand *opOperand : genericOp.getInputAndOutputOperands())
- if (pairs[opOperand] == opResult)
- return {opOperand};
- return {};
+ // The i-th OpResult may alias with the i-th "out" tensor.
+ return {genericOp.getOutputOperand(opResult.getResultNumber())};
}
SmallVector<OpResult> getAliasingOpResult(Operation *op, OpOperand &opOperand,
const AnalysisState &state) const {
auto genericOp = cast<linalg::LinalgOp>(op);
- // By default, the i-th "out" tensor may alias with the i-th OpResult.
- if (state.getOptions().alwaysAliasingWithDest) {
- if (genericOp.isOutputTensor(&opOperand))
- return {genericOp.getTiedOpResult(&opOperand)};
- return {};
- }
-
- // We can try to be smart. See comment in `getAliasingOpOperand`.
- // Aliasing OpOperand/OpResult pairs are computed by `computeAliasingPairs`.
- DenseMap<OpOperand *, OpResult> pairs = computeAliasingPairs(genericOp);
- if (!pairs.count(&opOperand))
- return {};
- return {pairs[&opOperand]};
+ // The i-th "out" tensor may alias with the i-th OpResult.
+ if (genericOp.isOutputTensor(&opOperand))
+ return {genericOp.getTiedOpResult(&opOperand)};
+ return {};
}
BufferRelation bufferRelation(Operation *op, OpResult opResult,
diff --git a/mlir/test/Dialect/Linalg/one-shot-bufferize-aliasing-in.mlir b/mlir/test/Dialect/Linalg/one-shot-bufferize-aliasing-in.mlir
deleted file mode 100644
index 6d475bac61a08..0000000000000
--- a/mlir/test/Dialect/Linalg/one-shot-bufferize-aliasing-in.mlir
+++ /dev/null
@@ -1,75 +0,0 @@
-// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries allow-return-allocs always-aliasing-with-dest=0" -split-input-file | FileCheck %s
-
-// CHECK-LABEL: func @linalg_op_bufferizes_inplace_with_input
-// CHECK-SAME: %[[t1:.*]]: memref<?x?xf32, #{{.*}}>, %[[t2:.*]]: memref<?xf32, #{{.*}}>, %[[t3:.*]]: memref<?x?xf32, #{{.*}}>
-func.func @linalg_op_bufferizes_inplace_with_input(
- %t1: tensor<?x?xf32> {bufferization.writable = true},
- %t2: tensor<?xf32> {bufferization.writable = false},
- %t3: tensor<?x?xf32> {bufferization.writable = false},
- %s1: index, %s2: index, %cst: f32) -> tensor<?x?xf32> {
- // CHECK: linalg.generic {{.*}} ins(%[[t1]], %[[t2]] : {{.*}}) outs(%[[t1]] : {{.*}})
- %r = linalg.generic {
- indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
- affine_map<(d0, d1) -> (d1)>,
- affine_map<(d0, d1)-> (d0, d1)>],
- iterator_types = ["parallel", "parallel"]}
- ins(%t1, %t2 : tensor<?x?xf32>, tensor<?xf32>)
- outs(%t3 : tensor<?x?xf32>) {
- ^bb0(%arg0 : f32, %arg1 : f32, %arg2 : f32) :
- %add = arith.addf %arg0, %arg1 : f32
- linalg.yield %add : f32
- } -> tensor<?x?xf32>
- return %r : tensor<?x?xf32>
-}
-
-// -----
-
-// CHECK-LABEL: func @linalg_op_bufferizes_out_of_place_with_input
-// CHECK-SAME: %[[t1:.*]]: memref<?x?xf32, #{{.*}}>, %[[t2:.*]]: memref<?xf32, #{{.*}}>, %[[t3:.*]]: memref<?x?xf32, #{{.*}}>
-func.func @linalg_op_bufferizes_out_of_place_with_input(
- %t1: tensor<?x?xf32> {bufferization.writable = false},
- %t2: tensor<?xf32> {bufferization.writable = false},
- %t3: tensor<?x?xf32> {bufferization.writable = false},
- %s1: index, %s2: index, %cst: f32) -> tensor<?x?xf32> {
- // CHECK: %[[alloc:.*]] = memref.alloc
- // CHECK: memref.copy %[[t1]], %[[alloc]]
- // CHECK: linalg.generic {{.*}} ins(%[[t1]], %[[t2]] : {{.*}}) outs(%[[alloc]] : {{.*}})
- %r = linalg.generic {
- indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
- affine_map<(d0, d1) -> (d1)>,
- affine_map<(d0, d1)-> (d0, d1)>],
- iterator_types = ["parallel", "parallel"]}
- ins(%t1, %t2 : tensor<?x?xf32>, tensor<?xf32>)
- outs(%t3 : tensor<?x?xf32>) {
- ^bb0(%arg0 : f32, %arg1 : f32, %arg2 : f32) :
- %add = arith.addf %arg0, %arg1 : f32
- linalg.yield %add : f32
- } -> tensor<?x?xf32>
- // CHECK: return %[[alloc]]
- return %r : tensor<?x?xf32>
-}
-
-// -----
-
-// CHECK-LABEL: func @linalg_op_output_cannot_alias_with_input
-// CHECK-SAME: %[[t1:.*]]: memref<?x?xf32, #{{.*}}>, %[[t2:.*]]: memref<?xf32, #{{.*}}>, %[[t3:.*]]: memref<?x?xf32, #{{.*}}>
-func.func @linalg_op_output_cannot_alias_with_input(
- %t1: tensor<?x?xf32> {bufferization.writable = true},
- %t2: tensor<?xf32> {bufferization.writable = false},
- %t3: tensor<?x?xf32> {bufferization.writable = true},
- %s1: index, %s2: index, %cst: f32) -> tensor<?x?xf32> {
- // CHECK: linalg.generic {{.*}} ins(%[[t1]], %[[t2]] : {{.*}}) outs(%[[t3]] : {{.*}})
- %r = linalg.generic {
- indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>,
- affine_map<(d0, d1) -> (d1)>,
- affine_map<(d0, d1)-> (d0, d1)>],
- iterator_types = ["parallel", "parallel"]}
- ins(%t1, %t2 : tensor<?x?xf32>, tensor<?xf32>)
- outs(%t3 : tensor<?x?xf32>) {
- ^bb0(%arg0 : f32, %arg1 : f32, %arg2 : f32) :
- %add = arith.addf %arg0, %arg1 : f32
- linalg.yield %add : f32
- } -> tensor<?x?xf32>
- return %r : tensor<?x?xf32>
-}
-
diff --git a/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-aliasing-in.mlir b/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-aliasing-in.mlir
deleted file mode 100644
index 1d9d066f45855..0000000000000
--- a/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-aliasing-in.mlir
+++ /dev/null
@@ -1,83 +0,0 @@
-// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only allow-return-allocs always-aliasing-with-dest=0" -split-input-file | FileCheck %s
-
-// This is a test case for alwaysAliasingWithDest = 0. In that case, an OpResult
-// may bufferize in-place with an "in" OpOperand or any non-"out" OpOperand.
-
-
-#accesses = [
- affine_map<(i) -> (i)>,
- affine_map<(i) -> (i)>,
- affine_map<(i) -> (i)>
-]
-#trait = {
- indexing_maps = #accesses,
- iterator_types = ["parallel"]
-}
-
-// CHECK-LABEL: func @linalg_op_same_out_tensors(
-func.func @linalg_op_same_out_tensors(
- %t1: tensor<?xf32> {bufferization.writable = true},
-// CHECK-SAME: bufferization.access = "read-write"
- %t2: tensor<?xf32> {bufferization.writable = true})
-// CHECK-SAME: bufferization.access = "write"
- -> (tensor<?xf32>, tensor<?xf32>){
-
- // %1 and %2 are not used in the computation, so the two OpResults do not
- // necessarily have to bufferize in-place with the two "out" OpOperands. They
- // bufferize in-place with the first and second OpOperand (one of which is an
- // "in" OpOperand).
- // CHECK: linalg.generic
- // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]
- %o:2 = linalg.generic #trait ins(%t1 : tensor<?xf32>)
- outs (%t2, %t2 : tensor<?xf32>, tensor<?xf32>) {
- ^bb(%0: f32, %1: f32, %2 : f32) :
- linalg.yield %0, %0 : f32, f32
- } -> (tensor<?xf32>, tensor<?xf32>)
-
- // CHECK: return
- // CHECK-SAME: __equivalent_func_args__ = [0, 1]
- return %o#0, %o#1 : tensor<?xf32>, tensor<?xf32>
-}
-
-// -----
-
-#accesses = [
- affine_map<(i) -> (i)>,
- affine_map<(i) -> (i)>,
- affine_map<(i) -> (i)>,
- affine_map<(i) -> (i)>
-]
-#trait = {
- indexing_maps = #accesses,
- iterator_types = ["parallel"]
-}
-
-// CHECK-LABEL: func @linalg_op_same_out_tensors_2(
-func.func @linalg_op_same_out_tensors_2(
- %t1: tensor<?xf32> {bufferization.writable = true},
-// CHECK-SAME: bufferization.access = "read-write"
- %t2: tensor<?xf32> {bufferization.writable = true})
-// CHECK-SAME: bufferization.access = "write"
- -> (tensor<?xf32>, tensor<?xf32>, tensor<?xf32>){
-
- // %1, %2 and %3 are not used in the computation, so the three OpResults do
- // not necessarily have to bufferize in-place with the three "out" OpOperands.
- // They bufferize in-place with the first, second and third OpOperand (one of
- // which is an "in" OpOperand).
- // In contrast to the previous test case, two of the chosen OpOperands are the
- // same (aliasing) SSA value, which is why one of them must bufferize
- // out-of-place.
- // CHECK: linalg.generic
- // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true", "false"]
- %o:3 = linalg.generic #trait
- ins(%t1 : tensor<?xf32>)
- outs (%t2, %t2, %t2 : tensor<?xf32>, tensor<?xf32>, tensor<?xf32>) {
- ^bb(%0: f32, %1: f32, %2 : f32, %3 : f32) :
- linalg.yield %0, %0, %0 : f32, f32, f32
- } -> (tensor<?xf32>, tensor<?xf32>, tensor<?xf32>)
-
- // CHECK: return
- // CHECK-SAME: __equivalent_func_args__ = [0, 1, -1]
- return %o#0, %o#1, %o#2 : tensor<?xf32>, tensor<?xf32>, tensor<?xf32>
-}
-
More information about the Mlir-commits
mailing list