[Mlir-commits] [mlir] 101d017 - [mlir][Linalg] Revisit heuristic ordering of tensor.insert_slice in comprehensive bufferize.
Nicolas Vasilache
llvmlistbot at llvm.org
Tue Sep 21 07:31:13 PDT 2021
Author: Nicolas Vasilache
Date: 2021-09-21T14:22:45Z
New Revision: 101d017a643845b537687467e3f7c2a5d963df6e
URL: https://github.com/llvm/llvm-project/commit/101d017a643845b537687467e3f7c2a5d963df6e
DIFF: https://github.com/llvm/llvm-project/commit/101d017a643845b537687467e3f7c2a5d963df6e.diff
LOG: [mlir][Linalg] Revisit heuristic ordering of tensor.insert_slice in comprehensive bufferize.
It was previously assumed that tensor.insert_slice should be bufferized first in a greedy fashion to avoid out-of-place bufferization of the large tensor. This heuristic does not hold upon further inspection.
This CL removes the special handling of such ops and adds a test that exhibits better behavior and appears in real use cases.
The only test adversely affected is an artificial test which results in a returned memref: this pattern is not allowed by comprehensive bufferization in real scenarios anyway and the offending test is deleted.
Differential Revision: https://reviews.llvm.org/D110072
Added:
Modified:
mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp
mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis.mlir
mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
Removed:
################################################################################
diff --git a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp
index de1b9e63e8b2..1edab1e26813 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferize.cpp
@@ -2467,33 +2467,18 @@ inPlaceAnalysisFuncOpBody(FuncOp funcOp, BufferizationAliasInfo &aliasInfo,
assert(funcOp && funcOp->getNumRegions() > 0 && !funcOp.body().empty() &&
"expected a funcOp definition with a body");
- // Collect ops so we can build our own traversal.
- SmallVector<Operation *> otherOps;
- SmallVector<InsertSliceOp> insertSliceOps;
+ // Collect ops so we can build our own reverse traversal.
+ SmallVector<Operation *> ops;
funcOp.walk([&](Operation *op) {
- if (auto insertSliceOp = dyn_cast<InsertSliceOp>(op))
- return insertSliceOps.push_back(insertSliceOp);
// No tensors => no buffers.
if (none_of(op->getOperandTypes(), isaTensor) &&
none_of(op->getResultTypes(), isaTensor))
return;
- otherOps.push_back(op);
+ ops.push_back(op);
});
- // First, analyze InsertSliceOp greedily: we almost never want to bufferize
- // the tensor "inserted into" to become out-of-place. This implementation
- // does not distinguish between
diff erent InsertSliceOp. If we want
- // finer-grained behavior, we could order the InsertSliceOp with some metric.
- for (InsertSliceOp insertSliceOp : reverse(insertSliceOps)) {
- OpOperand &destOpOperand = insertSliceOp->getOpOperand(1);
- if (failed(bufferizableInPlaceAnalysis(
- destOpOperand, getInplaceableOpResult(destOpOperand), aliasInfo,
- domInfo)))
- return failure();
- }
-
// Walk ops in reverse for better interference analysis.
- for (Operation *op : reverse(otherOps)) {
+ for (Operation *op : reverse(ops)) {
for (OpOperand &opOperand : op->getOpOperands()) {
if (OpResult result = getInplaceableOpResult(opOperand))
if (result.getType().isa<TensorType>() &&
diff --git a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis.mlir b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis.mlir
index a435a2539220..effb510a49b6 100644
--- a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis.mlir
+++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis.mlir
@@ -705,6 +705,53 @@ builtin.func @matmul_on_tensors(
// -----
+//===----------------------------------------------------------------------===//
+// Chain of tensor.insert_slice is better traversed in reverse order without
+// prioritizing the tensor.insert_slice ops.
+//===----------------------------------------------------------------------===//
+
+func @insert_slice_chain(
+ %v1: vector<32x90xf32>,
+ %v2: vector<30x90xf32>,
+ %arg0: tensor<62x126xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
+ %arg1: tensor<126x90xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = false},
+ %arg2: tensor<62x90xf32> {linalg.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, linalg.inplaceable = true})
+ -> tensor<62x90xf32> attributes {passthrough = [["target-cpu", "skylake-avx512"], ["prefer-vector-width", "512"]]}
+{
+ %c0 = constant 0 : index
+ %cst = constant 0.000000e+00 : f32
+
+ // CHECK: linalg.fill
+ // CHECK-SAME: {__inplace_results_attr__ = ["true"]
+ %0 = linalg.fill(%cst, %arg2) : f32, tensor<62x90xf32> -> tensor<62x90xf32>
+
+ // CHECK: tensor.extract_slice
+ // CHECK-SAME: {__inplace_results_attr__ = ["false"]
+ // TODO: in order to have this extract_slice bufferize inplace, we need to write a range
+ // analysis and determine that intersection([0, 32)x[0, 90), [32, 62)x[0, 90)) is empty.
+ %2 = tensor.extract_slice %0[0, 0] [32, 90] [1, 1] : tensor<62x90xf32> to tensor<32x90xf32>
+ // CHECK: vector.transfer_write
+ // CHECK-SAME: {__inplace_results_attr__ = ["true"]
+ %7 = vector.transfer_write %v1, %2[%c0, %c0] {in_bounds = [true, true]} : vector<32x90xf32>, tensor<32x90xf32>
+ // CHECK: tensor.insert_slice
+ // CHECK-SAME: {__inplace_results_attr__ = ["true"]
+ %8 = tensor.insert_slice %7 into %0[0, 0] [32, 90] [1, 1] : tensor<32x90xf32> into tensor<62x90xf32>
+
+ // CHECK: tensor.extract_slice
+ // CHECK-SAME: {__inplace_results_attr__ = ["true"]
+ %10 = tensor.extract_slice %8[32, 0] [30, 90] [1, 1] : tensor<62x90xf32> to tensor<30x90xf32>
+ // CHECK: vector.transfer_write
+ // CHECK-SAME: {__inplace_results_attr__ = ["true"]
+ %14 = vector.transfer_write %v2, %10[%c0, %c0] {in_bounds = [true, true]} : vector<30x90xf32>, tensor<30x90xf32>
+ // CHECK: tensor.insert_slice
+ // CHECK-SAME: {__inplace_results_attr__ = ["true"]
+ %15 = tensor.insert_slice %14 into %8[32, 0] [30, 90] [1, 1] : tensor<30x90xf32> into tensor<62x90xf32>
+
+ return %15 : tensor<62x90xf32>
+}
+
+// -----
+
//===----------------------------------------------------------------------===//
// Insert point issue cases.
//===----------------------------------------------------------------------===//
diff --git a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
index 88a209a0be66..d06c5162c1ce 100644
--- a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
+++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
@@ -269,34 +269,6 @@ func @insert_slice_fun_not_inplace(%A : tensor<?xf32>, %t : tensor<4xf32>)
return %r0: tensor<?xf32>
}
-// -----
-
-// CHECK-DAG: #[[$map_1d_dyn:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
-
-// CHECK-LABEL: func @insert_slice_fun_not_inplace
-// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref<?xf32, #[[$map_1d_dyn]]>
-// CHECK-SAME: %[[t:[a-zA-Z0-9]*]]: memref<4xf32, #[[$map_1d_dyn]]>
-func @insert_slice_fun_not_inplace(%A : tensor<?xf32> {linalg.inplaceable = true}, %t : tensor<4xf32>)
- -> (tensor<?xf32>, tensor<?xf32>)
-{
- %f0 = constant 0.0 : f32
-
- // tensor.insert_slice is bufferized first, %A is inplaceable so we can make this inplace
- // CHECK-DAG: %[[SV_A:.*]] = memref.subview %[[A]][0] [4] [1] : memref<?xf32, {{.*}}> to memref<4xf32, {{.*}}>
- // CHECK-DAG: linalg.copy(%[[t]], %[[SV_A]]) : memref<4xf32, {{.*}}>, memref<4xf32, {{.*}}>
- %r0 = tensor.insert_slice %t into %A[0][4][1] : tensor<4xf32> into tensor<?xf32>
-
- // fill would interfere with %r0 that is also being returned.
- // So we need to bufferize it out of place and make a new alloc.
- // CHECK-DAG: %[[ALLOC:.*]] = memref.alloc({{.*}}) {alignment = 128 : i64} : memref<?xf32>
- // CHECK: linalg.fill(%{{.*}}, %[[ALLOC]]
- %r1 = linalg.fill(%f0, %A) : f32, tensor<?xf32> -> tensor<?xf32>
-
- // CHECK: memref.dealloc %[[ALLOC]] : memref<?xf32>
- // CHECK: return %[[ALLOC]] : memref<?xf32>
- return %r1, %r0: tensor<?xf32>, tensor<?xf32>
-}
-
//===----------------------------------------------------------------------===//
// Simple loop cases
//===----------------------------------------------------------------------===//
More information about the Mlir-commits
mailing list