[Mlir-commits] [mlir] 1a2474b - [mlir][Linalg] Disable init_tensor elimination by default
Nicolas Vasilache
llvmlistbot at llvm.org
Mon Jan 10 06:23:52 PST 2022
Author: Nicolas Vasilache
Date: 2022-01-10T09:19:18-05:00
New Revision: 1a2474b786318c75cef1981eae38268f1701cf4e
URL: https://github.com/llvm/llvm-project/commit/1a2474b786318c75cef1981eae38268f1701cf4e
DIFF: https://github.com/llvm/llvm-project/commit/1a2474b786318c75cef1981eae38268f1701cf4e.diff
LOG: [mlir][Linalg] Disable init_tensor elimination by default
init_tensor elimination is arguably a pre-optimization that should be separated from comprehensive bufferization.
In any case it is still experimental and easily results in wrong IR with violated SSA def-use orderings.
Isolate the optimization behind a flag, separate the test cases and add a test case that would results in wrong IR.
Differential Revision: https://reviews.llvm.org/D116936
Added:
mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis-init-tensor-elimination.mlir
mlir/test/Dialect/Linalg/comprehensive-module-bufferize-init-tensor-elimination.mlir
Modified:
mlir/include/mlir/Dialect/Linalg/Passes.td
mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp
mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis.mlir
mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
Removed:
################################################################################
diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.td b/mlir/include/mlir/Dialect/Linalg/Passes.td
index 893487df4a35..b9ac21641e1b 100644
--- a/mlir/include/mlir/Dialect/Linalg/Passes.td
+++ b/mlir/include/mlir/Dialect/Linalg/Passes.td
@@ -53,7 +53,11 @@ def LinalgComprehensiveModuleBufferize :
"Use stack allocations for memrefs (for testing purposes only)">,
Option<"analysisFuzzerSeed", "analysis-fuzzer-seed", "unsigned",
/*default=*/"0",
- "Analyze ops in random order with a given seed (fuzzer)">
+ "Analyze ops in random order with a given seed (fuzzer)">,
+ Option<"initTensorElimination", "init-tensor-elimination", "bool",
+ /*default=*/"false",
+ "(Experimental) Try to eliminate init_tensor operations that are "
+ "anchored at an insert_slice op">,
];
let constructor = "mlir::createLinalgComprehensiveModuleBufferizePass()";
}
diff --git a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp
index 21d7c4e62a45..6bc3ece8693e 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp
@@ -92,8 +92,10 @@ void LinalgComprehensiveModuleBufferize::runOnOperation() {
options->printConflicts = printConflicts;
// Enable InitTensorOp elimination.
- options->addPostAnalysisStep<
- linalg_ext::InsertSliceAnchoredInitTensorEliminationStep>();
+ if (initTensorElimination) {
+ options->addPostAnalysisStep<
+ linalg_ext::InsertSliceAnchoredInitTensorEliminationStep>();
+ }
if (!allowReturnMemref)
options->addPostAnalysisStep<scf_ext::AssertDestinationPassingStyle>();
diff --git a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis-init-tensor-elimination.mlir b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis-init-tensor-elimination.mlir
new file mode 100644
index 000000000000..b2116e5d6d32
--- /dev/null
+++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis-init-tensor-elimination.mlir
@@ -0,0 +1,55 @@
+// RUN: mlir-opt %s -linalg-comprehensive-module-bufferize="test-analysis-only allow-return-memref init-tensor-elimination" -split-input-file | FileCheck %s
+
+// -----
+
+//===----------------------------------------------------------------------===//
+// InitTensorOp elimination
+//===----------------------------------------------------------------------===//
+
+// CHECK-LABEL: func @buffer_forwarding_conflict
+func @buffer_forwarding_conflict(%arg0: tensor<?xf32> {linalg.inplaceable = true}, %arg1: index) -> (tensor<?xf32>, tensor<?xf32>) {
+ %cst = arith.constant 0.000000e+00 : f32
+ // CHECK: tensor.extract_slice
+ // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none"]
+ // Instead of allocating, share buffer with some inplace bufferization?
+ %0 = linalg.init_tensor [%arg1] : tensor<?xf32>
+
+ // CHECK: linalg.fill
+ // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]
+ %1 = linalg.fill(%cst, %0) : f32, tensor<?xf32> -> tensor<?xf32>
+
+ // CHECK: tensor.insert_slice
+ // CHECK-SAME: {__inplace_operands_attr__ = ["true", "false", "none"]
+ %2 = tensor.insert_slice %1 into %arg0[0] [%arg1] [1] : tensor<?xf32> into tensor<?xf32>
+
+ // CHECK: tensor.insert_slice
+ // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none"]
+ %3 = tensor.insert_slice %1 into %arg0[42] [%arg1] [1] : tensor<?xf32> into tensor<?xf32>
+
+ // CHECK: return
+ // CHECK-SAME: __equivalent_func_args__ = [-1, 0]
+ return %2, %3 : tensor<?xf32>, tensor<?xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @buffer_forwarding_no_conflict
+func @buffer_forwarding_no_conflict(%arg0: tensor<?xf32> {linalg.inplaceable = true}, %arg1: index) -> (tensor<?xf32>, tensor<?xf32>) {
+ %cst = arith.constant 0.000000e+00 : f32
+ // CHECK: tensor.extract_slice
+ // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"]
+ // Instead of allocating, share buffer with some inplace bufferization?
+ %0 = linalg.init_tensor [%arg1] : tensor<?xf32>
+
+ // CHECK: linalg.fill
+ // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]
+ %1 = linalg.fill(%cst, %0) : f32, tensor<?xf32> -> tensor<?xf32>
+
+ // CHECK: tensor.insert_slice
+ // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none"]
+ %2 = tensor.insert_slice %1 into %arg0[42] [%arg1] [1] : tensor<?xf32> into tensor<?xf32>
+
+ // CHECK: return
+ // CHECK-SAME: __equivalent_func_args__ = [0, 0]
+ return %2, %2 : tensor<?xf32>, tensor<?xf32>
+}
diff --git a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis.mlir b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis.mlir
index b51cd921e3f0..46f442b8d297 100644
--- a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis.mlir
+++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-analysis.mlir
@@ -1249,60 +1249,6 @@ func @non_reading_scf_for(%t1: tensor<?xf32> {linalg.inplaceable = true},
// -----
-//===----------------------------------------------------------------------===//
-// InitTensorOp elimination
-//===----------------------------------------------------------------------===//
-
-// CHECK-LABEL: func @buffer_forwarding_conflict
-func @buffer_forwarding_conflict(%arg0: tensor<?xf32> {linalg.inplaceable = true}, %arg1: index) -> (tensor<?xf32>, tensor<?xf32>) {
- %cst = arith.constant 0.000000e+00 : f32
- // CHECK: tensor.extract_slice
- // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none"]
- // Instead of allocating, share buffer with some inplace bufferization?
- %0 = linalg.init_tensor [%arg1] : tensor<?xf32>
-
- // CHECK: linalg.fill
- // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]
- %1 = linalg.fill(%cst, %0) : f32, tensor<?xf32> -> tensor<?xf32>
-
- // CHECK: tensor.insert_slice
- // CHECK-SAME: {__inplace_operands_attr__ = ["true", "false", "none"]
- %2 = tensor.insert_slice %1 into %arg0[0] [%arg1] [1] : tensor<?xf32> into tensor<?xf32>
-
- // CHECK: tensor.insert_slice
- // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none"]
- %3 = tensor.insert_slice %1 into %arg0[42] [%arg1] [1] : tensor<?xf32> into tensor<?xf32>
-
- // CHECK: return
- // CHECK-SAME: __equivalent_func_args__ = [-1, 0]
- return %2, %3 : tensor<?xf32>, tensor<?xf32>
-}
-
-// -----
-
-// CHECK-LABEL: func @buffer_forwarding_no_conflict
-func @buffer_forwarding_no_conflict(%arg0: tensor<?xf32> {linalg.inplaceable = true}, %arg1: index) -> (tensor<?xf32>, tensor<?xf32>) {
- %cst = arith.constant 0.000000e+00 : f32
- // CHECK: tensor.extract_slice
- // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"]
- // Instead of allocating, share buffer with some inplace bufferization?
- %0 = linalg.init_tensor [%arg1] : tensor<?xf32>
-
- // CHECK: linalg.fill
- // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]
- %1 = linalg.fill(%cst, %0) : f32, tensor<?xf32> -> tensor<?xf32>
-
- // CHECK: tensor.insert_slice
- // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none"]
- %2 = tensor.insert_slice %1 into %arg0[42] [%arg1] [1] : tensor<?xf32> into tensor<?xf32>
-
- // CHECK: return
- // CHECK-SAME: __equivalent_func_args__ = [0, 0]
- return %2, %2 : tensor<?xf32>, tensor<?xf32>
-}
-
-// -----
-
//===----------------------------------------------------------------------===//
// scf.if cases
//===----------------------------------------------------------------------===//
@@ -1764,3 +1710,26 @@ func @equivalent_func_arg_2(%c0: index, %c10: index, %c1: index, %t0: tensor<?xf
}
return %1: tensor<?xf32>
}
+
+// -----
+
+//===----------------------------------------------------------------------===//
+// InitTensorOp elimination would produce SSA violations for the example below.
+//===----------------------------------------------------------------------===//
+
+func @depthwise_conv_1d_nwc_wc(%arg0: index, %arg1: index, %arg2: tensor<8x18x32xf32>)
+ -> tensor<?x1x6x8xf32> {
+ %c0 = arith.constant 0 : index
+ %c32 = arith.constant 32 : index
+ %c8 = arith.constant 8 : index
+ %0 = linalg.init_tensor [4, 1, 6, 8] : tensor<4x1x6x8xf32>
+ %1 = tensor.cast %0 : tensor<4x1x6x8xf32> to tensor<?x1x6x8xf32>
+ %2 = linalg.init_tensor [1, 6, 8] : tensor<1x6x8xf32>
+ %3 = scf.for %arg3 = %c0 to %c32 step %c8 iter_args(%arg4 = %1) -> (tensor<?x1x6x8xf32>) {
+ %4 = affine.apply affine_map<(d0) -> (d0 ceildiv 8)>(%arg3)
+ %5 = tensor.insert_slice %2 into %arg4[%4,0, 0, 0] [1, 1, 6, 8] [1, 1, 1, 1] :
+ tensor<1x6x8xf32> into tensor<?x1x6x8xf32>
+ scf.yield %5 : tensor<?x1x6x8xf32>
+ }
+ return %3 : tensor<?x1x6x8xf32>
+}
\ No newline at end of file
diff --git a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-init-tensor-elimination.mlir b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-init-tensor-elimination.mlir
new file mode 100644
index 000000000000..d30ab5ac4f9a
--- /dev/null
+++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-init-tensor-elimination.mlir
@@ -0,0 +1,64 @@
+// RUN: mlir-opt %s -linalg-comprehensive-module-bufferize="allow-return-memref init-tensor-elimination" -split-input-file | FileCheck %s
+
+// -----
+
+// CHECK: func @buffer_forwarding_conflict(
+// CHECK-SAME: %[[FUNC_ARG:[0-9a-zA-Z]*]]: memref<?xf32>
+// CHECK-SAME: %[[sz:[0-9a-zA-Z]*]]: index
+func @buffer_forwarding_conflict(
+ %t: tensor<?xf32> {linalg.buffer_layout = affine_map<(d0) -> (d0)>, linalg.inplaceable = true},
+ %sz: index)
+ -> (tensor<?xf32>, tensor<?xf32>)
+{
+ %f0 = arith.constant 0.0: f32
+ // Alloc is needed for the **first** insert_slice (due to backward traversal during analysis).
+ // CHECK: %[[DIM:.*]] = memref.dim %[[FUNC_ARG]]
+ // This allocs the whole dim to allow for a full clone of t.
+ // CHECK: %[[ALLOC:.*]] = memref.alloc(%[[DIM]])
+
+ // init_tensor itself does not alloc but forwards to the **second**
+ // insert_slice. InitTensorOp replaces the init_tensor with an out-of-place
+ // extract_slice.
+ // CHECK: %[[EXTRACT_SLICE_ALLOC:.*]] = memref.alloc(%[[sz]])
+ // CHECK: %[[T_SUBVIEW:.*]] = memref.subview %[[FUNC_ARG]][42] [%[[sz]]] [1]
+ %a = linalg.init_tensor[%sz] : tensor<?xf32>
+
+ // CHECK: linalg.fill({{.*}}, %[[EXTRACT_SLICE_ALLOC]]) : f32, memref<?xf32>
+ %f = linalg.fill(%f0, %a) : f32, tensor<?xf32> -> tensor<?xf32>
+
+ // CHECK: linalg.copy(%[[FUNC_ARG]], %[[ALLOC]]) : memref<?xf32>, memref<?xf32>
+ // CHECK: %[[SV0_ALLOC:.*]] = memref.subview %[[ALLOC]][0] [%[[sz]]] [1] : memref<?xf32> to memref<?xf32>
+ // CHECK: linalg.copy(%[[EXTRACT_SLICE_ALLOC]], %[[SV0_ALLOC]]) : memref<?xf32>, memref<?xf32>
+ %r0 = tensor.insert_slice %f into %t[0][%sz][1]: tensor<?xf32> into tensor<?xf32>
+
+ // CHECK: linalg.copy(%[[EXTRACT_SLICE_ALLOC]], %[[T_SUBVIEW]])
+ %r1 = tensor.insert_slice %f into %t[42][%sz][1]: tensor<?xf32> into tensor<?xf32>
+
+ return %r0, %r1: tensor<?xf32>, tensor<?xf32>
+}
+
+// -----
+
+// CHECK: func @buffer_forwarding_no_conflict(
+// CHECK-SAME: %[[FUNC_ARG:[0-9a-zA-Z]*]]: memref<?xf32>
+// CHECK-SAME: %[[sz:[0-9a-zA-Z]*]]: index
+func @buffer_forwarding_no_conflict(
+ %t: tensor<?xf32> {linalg.buffer_layout = affine_map<(d0) -> (d0)>, linalg.inplaceable = true},
+ %sz: index)
+ -> (tensor<?xf32>)
+{
+ %f0 = arith.constant 0.0: f32
+
+ // init_tensor itself does not alloc but forwards to the insert_slice.
+ // InitTensorOp replaces the init_tensor with an inplace extract_slice.
+ // CHECK: %[[T_SUBVIEW:.*]] = memref.subview %[[FUNC_ARG]][42] [%[[sz]]] [1]
+ %a = linalg.init_tensor[%sz] : tensor<?xf32>
+
+ // CHECK: linalg.fill({{.*}}, %[[T_SUBVIEW]]) : f32, memref<?xf32
+ %f = linalg.fill(%f0, %a) : f32, tensor<?xf32> -> tensor<?xf32>
+
+ // Self-copy canonicalizes away later.
+ %r1 = tensor.insert_slice %f into %t[42][%sz][1]: tensor<?xf32> into tensor<?xf32>
+
+ return %r1: tensor<?xf32>
+}
diff --git a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
index 4d6341501682..4501a3a075dd 100644
--- a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
+++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
@@ -868,69 +868,6 @@ func @dominance_violation_bug_1(
// -----
-// CHECK: func @buffer_forwarding_conflict(
-// CHECK-SAME: %[[FUNC_ARG:[0-9a-zA-Z]*]]: memref<?xf32>
-// CHECK-SAME: %[[sz:[0-9a-zA-Z]*]]: index
-func @buffer_forwarding_conflict(
- %t: tensor<?xf32> {linalg.buffer_layout = affine_map<(d0) -> (d0)>, linalg.inplaceable = true},
- %sz: index)
- -> (tensor<?xf32>, tensor<?xf32>)
-{
- %f0 = arith.constant 0.0: f32
- // Alloc is needed for the **first** insert_slice (due to backward traversal during analysis).
- // CHECK: %[[DIM:.*]] = memref.dim %[[FUNC_ARG]]
- // This allocs the whole dim to allow for a full clone of t.
- // CHECK: %[[ALLOC:.*]] = memref.alloc(%[[DIM]])
-
- // init_tensor itself does not alloc but forwards to the **second**
- // insert_slice. InitTensorOp replaces the init_tensor with an out-of-place
- // extract_slice.
- // CHECK: %[[EXTRACT_SLICE_ALLOC:.*]] = memref.alloc(%[[sz]])
- // CHECK: %[[T_SUBVIEW:.*]] = memref.subview %[[FUNC_ARG]][42] [%[[sz]]] [1]
- %a = linalg.init_tensor[%sz] : tensor<?xf32>
-
- // CHECK: linalg.fill({{.*}}, %[[EXTRACT_SLICE_ALLOC]]) : f32, memref<?xf32>
- %f = linalg.fill(%f0, %a) : f32, tensor<?xf32> -> tensor<?xf32>
-
- // CHECK: linalg.copy(%[[FUNC_ARG]], %[[ALLOC]]) : memref<?xf32>, memref<?xf32>
- // CHECK: %[[SV0_ALLOC:.*]] = memref.subview %[[ALLOC]][0] [%[[sz]]] [1] : memref<?xf32> to memref<?xf32>
- // CHECK: linalg.copy(%[[EXTRACT_SLICE_ALLOC]], %[[SV0_ALLOC]]) : memref<?xf32>, memref<?xf32>
- %r0 = tensor.insert_slice %f into %t[0][%sz][1]: tensor<?xf32> into tensor<?xf32>
-
- // CHECK: linalg.copy(%[[EXTRACT_SLICE_ALLOC]], %[[T_SUBVIEW]])
- %r1 = tensor.insert_slice %f into %t[42][%sz][1]: tensor<?xf32> into tensor<?xf32>
-
- return %r0, %r1: tensor<?xf32>, tensor<?xf32>
-}
-
-// -----
-
-// CHECK: func @buffer_forwarding_no_conflict(
-// CHECK-SAME: %[[FUNC_ARG:[0-9a-zA-Z]*]]: memref<?xf32>
-// CHECK-SAME: %[[sz:[0-9a-zA-Z]*]]: index
-func @buffer_forwarding_no_conflict(
- %t: tensor<?xf32> {linalg.buffer_layout = affine_map<(d0) -> (d0)>, linalg.inplaceable = true},
- %sz: index)
- -> (tensor<?xf32>)
-{
- %f0 = arith.constant 0.0: f32
-
- // init_tensor itself does not alloc but forwards to the insert_slice.
- // InitTensorOp replaces the init_tensor with an inplace extract_slice.
- // CHECK: %[[T_SUBVIEW:.*]] = memref.subview %[[FUNC_ARG]][42] [%[[sz]]] [1]
- %a = linalg.init_tensor[%sz] : tensor<?xf32>
-
- // CHECK: linalg.fill({{.*}}, %[[T_SUBVIEW]]) : f32, memref<?xf32
- %f = linalg.fill(%f0, %a) : f32, tensor<?xf32> -> tensor<?xf32>
-
- // Self-copy canonicalizes away later.
- %r1 = tensor.insert_slice %f into %t[42][%sz][1]: tensor<?xf32> into tensor<?xf32>
-
- return %r1: tensor<?xf32>
-}
-
-// -----
-
// CHECK-LABEL: func @scf_if_inplace(
// CHECK-SAME: %[[cond:.*]]: i1, %[[t1:.*]]: memref<?xf32{{.*}}>, %[[v:.*]]: vector
func @scf_if_inplace(%cond: i1,
More information about the Mlir-commits
mailing list