[Mlir-commits] [mlir] [mlir][bufferization] Remove `resolveUsesInRepetitiveRegions` (PR #67927)
Matthias Springer
llvmlistbot at llvm.org
Sun Oct 1 09:30:04 PDT 2023
https://github.com/matthias-springer created https://github.com/llvm/llvm-project/pull/67927
The bufferization analysis has been improved over the last months and this workaround is no longer needed.
>From 12252bea27ccf2debf74ee0ffd36245ecdd208c6 Mon Sep 17 00:00:00 2001
From: Matthias Springer <me at m-sp.org>
Date: Sun, 1 Oct 2023 18:29:00 +0200
Subject: [PATCH] [mlir][bufferization] Remove `resolveUsesInRepetitiveRegions`
The bufferization analysis has been improved over the last months and this workaround is no longer needed.
---
.../Transforms/TensorCopyInsertion.cpp | 73 -------------------
.../Dialect/Linalg/one-shot-bufferize.mlir | 15 ++--
mlir/test/Dialect/SCF/one-shot-bufferize.mlir | 19 ++---
3 files changed, 16 insertions(+), 91 deletions(-)
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/TensorCopyInsertion.cpp b/mlir/lib/Dialect/Bufferization/Transforms/TensorCopyInsertion.cpp
index ad7da317f5db14d..6db60b75b302b84 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/TensorCopyInsertion.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/TensorCopyInsertion.cpp
@@ -26,82 +26,9 @@ namespace bufferization {
using namespace mlir;
using namespace mlir::bufferization;
-/// Resolve all operands that are also used inside of repetitive regions of the
-/// same op. Such cases are not fully supported by One-Shot Bufferize.
-///
-/// E.g.:
-/// %r = scf.for ... iter_args(%t = %tensor) -> tensor<?xf32> {
-/// "some_use"(%tensor)
-/// ...
-/// }
-///
-/// Is converted to:
-/// %tensor_copy = bufferization.alloc_tensor copy(%tensor)
-/// %r = scf.for ... iter_args(%t = %tensor) -> tensor<?xf32> {
-/// "some_use"(%tensor_copy)
-/// ...
-/// }
-static void
-resolveUsesInRepetitiveRegions(Operation *op,
- const BufferizationOptions &options) {
- IRRewriter rewriter(op->getContext());
- AnalysisState state(options);
-
- // Look for repetitive ops (loops).
- op->walk([&](BufferizableOpInterface bufferizableOp) {
- // Skip filtered ops.
- if (!options.isOpAllowed(bufferizableOp.getOperation()))
- return WalkResult::advance();
-
- // Find all operands that are also used inside of a repetitive region of
- // this op.
- for (OpOperand &opOperand : bufferizableOp->getOpOperands()) {
- Value operand = opOperand.get();
- // Skip non-tensor operands.
- if (!isa<TensorType>(operand.getType()))
- continue;
- // Skip operands that do not bufferize to memory writes.
- if (!bufferizableOp.bufferizesToMemoryWrite(opOperand, state))
- continue;
-
- // Gather all uses inside repetitive regions.
- SmallVector<OpOperand *> usesInsideRegion;
- for (OpOperand &use : operand.getUses()) {
- Operation *owner = use.getOwner();
- if (!bufferizableOp->isProperAncestor(owner))
- continue;
- for (Region &r : bufferizableOp->getRegions()) {
- if (r.findAncestorOpInRegion(*owner) &&
- bufferizableOp.isRepetitiveRegion(r.getRegionNumber())) {
- usesInsideRegion.push_back(&use);
- break;
- }
- }
- }
- // Nothing to do if the operand is not used inside a repetitive region.
- if (usesInsideRegion.empty())
- continue;
-
- // Insert a tensor copy and replace all uses inside of repetitive regions.
- rewriter.setInsertionPoint(bufferizableOp);
- auto tensorCopy = rewriter.create<AllocTensorOp>(
- bufferizableOp->getLoc(), cast<TensorType>(operand.getType()),
- /*dynamicSizes=*/ValueRange(),
- /*copy=*/operand, /*memory_space=*/IntegerAttr());
- for (OpOperand *use : usesInsideRegion)
- use->set(tensorCopy);
- }
-
- return WalkResult::advance();
- });
-}
-
LogicalResult mlir::bufferization::insertTensorCopies(
Operation *op, const OneShotBufferizationOptions &options,
BufferizationStatistics *statistics) {
- // Preprocessing: Resolve currently unsupported bufferization cases.
- resolveUsesInRepetitiveRegions(op, options);
-
OneShotAnalysisState state(op, options);
// Run normal One-Shot Bufferize analysis or One-Shot Module Bufferize
// analysis depending on whether function boundary bufferization is enabled or
diff --git a/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir b/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir
index 66d7fda2230ce8d..c69701b65e20643 100644
--- a/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir
+++ b/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir
@@ -160,8 +160,7 @@ func.func @matmul(
%c16 = arith.constant 16 : index
// Hoisted alloc.
- // CHECK: %[[ALLOC:.*]] = memref.alloc() {alignment = 64 : i64} : memref<128x192xf32>
- // CHECK: memref.copy %[[C]], %[[ALLOC]]
+ // CHECK: %[[ALLOC:.*]] = memref.alloc() {alignment = 64 : i64} : memref<8x16xf32>
// CHECK: scf.for %[[I:.*]] =
%0 = scf.for %arg3 = %c0 to %c128 step %c8 iter_args(%arg4 = %C) -> (tensor<128x192xf32>) {
@@ -173,14 +172,12 @@ func.func @matmul(
%3 = tensor.extract_slice %B[0, %arg5] [256, 16] [1, 1] :
tensor<256x192xf32> to tensor<256x16xf32>
- // C was already replaced with a copy by preprocessing, so no copy is
- // needed here.
- // CHECK: %[[C_SLICE:.*]] = memref.subview %[[ALLOC]]
+ // Insert an artificial out-of-place buffer by extracting from %C instead
+ // of %arg6.
%4 = tensor.extract_slice %C[%arg3, %arg5] [8, 16] [1, 1] :
tensor<128x192xf32> to tensor<8x16xf32>
- // linalg.fill is inplace.
- // CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[C_SLICE]]
+ // CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[ALLOC]]
%5 = linalg.fill ins(%cst : f32) outs(%4 : tensor<8x16xf32>) -> tensor<8x16xf32>
// CHECK: scf.for %[[K:.*]] =
@@ -191,7 +188,7 @@ func.func @matmul(
tensor<256x16xf32> to tensor<32x16xf32>
// linalg.matmul is inplace as well as the enclosing scf.for.
- // CHECK: linalg.matmul ins({{.*}} outs(%[[C_SLICE]]
+ // CHECK: linalg.matmul ins({{.*}} outs(%[[ALLOC]]
%10 = linalg.matmul ins(%8, %9 : tensor<8x32xf32>, tensor<32x16xf32>)
outs(%arg8 : tensor<8x16xf32>)
-> tensor<8x16xf32>
@@ -202,7 +199,7 @@ func.func @matmul(
// that is not in place. So we must insert a copy of the small buffer into
// the bigger buffer.
// CHECK: %[[T:.*]] = memref.subview %[[C]][%[[I]], %[[J]]] [8, 16] [1, 1]
- // CHECK: memref.copy %[[C_SLICE]], %[[T]]
+ // CHECK: memref.copy %[[ALLOC]], %[[T]]
%7 = tensor.insert_slice %6 into %arg6[%arg3, %arg5] [8, 16] [1, 1] :
tensor<8x16xf32> into tensor<128x192xf32>
diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir
index 9b5c0cf048c56f5..30d0ea1a4b09ff8 100644
--- a/mlir/test/Dialect/SCF/one-shot-bufferize.mlir
+++ b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir
@@ -253,11 +253,10 @@ func.func @scf_execute_region_yield_non_equivalent(%i: index, %j: index) -> f32
// CHECK-SAME: %[[t:.*]]: memref<?xf32
// CHECK: %[[alloc:.*]] = memref.alloc(%{{.*}})
// CHECK: memref.copy %[[t]], %[[alloc]]
-// CHECK: %[[for:.*]] = scf.for {{.*}} iter_args(%[[iter:.*]] = %[[t]])
+// CHECK: %[[for:.*]] = scf.for {{.*}} iter_args(%[[iter:.*]] = %[[alloc]])
// CHECK-DAG: %[[alloc2:.*]] = memref.alloc(%{{.*}})
-// CHECK: memref.copy %[[alloc]], %[[alloc2]]
-// CHECK: %[[alloc2_casted:.*]] = memref.cast %[[alloc2]]
-// CHECK: scf.yield %[[alloc2_casted]]
+// CHECK: memref.copy %[[t]], %[[alloc2]]
+// CHECK: scf.yield %[[alloc2]]
// CHECK: return %[[for]]
func.func @scf_for_yield_non_equivalent(
%t: tensor<?xf32>, %lb : index, %ub : index, %step : index) -> tensor<?xf32> {
@@ -606,9 +605,9 @@ func.func @scf_foreach_private_var(%t: tensor<10xf32>) -> f32 {
// CHECK: scf.forall (%{{.*}}) in (2) {
- // Load from the copy and store into the shared output.
- // CHECK: %[[subview:.*]] = memref.subview %[[t]]
- // CHECK: memref.load %[[t_copy]]
+ // Load from the original and store into the copy.
+ // CHECK: %[[subview:.*]] = memref.subview %[[t_copy]]
+ // CHECK: memref.load %[[t]]
// CHECK: memref.store %{{.*}}, %[[subview]]
%0 = scf.forall (%tid) in (%c2) shared_outs(%o = %t) -> tensor<10xf32> {
%offset = arith.muli %c5, %tid : index
@@ -752,14 +751,16 @@ func.func @scf_for_yield_alias_of_non_equivalent(%sz: index) -> tensor<?xf32> {
// CHECK: scf.for
%r = scf.for %iv = %c0 to %sz step %c1 iter_args(%t = %0) -> tensor<?xf32> {
%iv_sub = arith.subi %iv, %c1 : index
- // CHECK: memref.subview %[[generate_copy]]
+ // CHECK: memref.subview %[[generate]]
%ll = tensor.extract_slice %0[%iv_sub][%sz][1] : tensor<?xf32> to tensor<?xf32>
%l = tensor.extract %ll[%c0] : tensor<?xf32>
%double = arith.mulf %cst, %l : f32
- // CHECK: memref.store %{{.*}}, %[[generate]]
+ // CHECK: memref.store %{{.*}}, %[[generate_copy]]
%s = tensor.insert %double into %t[%iv] : tensor<?xf32>
scf.yield %s : tensor<?xf32>
}
+
+ // CHECK: return %[[generate_copy]]
return %r : tensor<?xf32>
}
More information about the Mlir-commits
mailing list