[Mlir-commits] [mlir] 0abf513 - [mlir][bufferize] Support parallel_insert_slice in EmptyTensorElimination
Matthias Springer
llvmlistbot at llvm.org
Wed Dec 7 02:44:11 PST 2022
Author: Matthias Springer
Date: 2022-12-07T11:39:12+01:00
New Revision: 0abf513d0f3ac7f13694ea5376f669c03f8b7600
URL: https://github.com/llvm/llvm-project/commit/0abf513d0f3ac7f13694ea5376f669c03f8b7600
DIFF: https://github.com/llvm/llvm-project/commit/0abf513d0f3ac7f13694ea5376f669c03f8b7600.diff
LOG: [mlir][bufferize] Support parallel_insert_slice in EmptyTensorElimination
Differential Revision: https://reviews.llvm.org/D139431
Added:
Modified:
mlir/lib/Dialect/Bufferization/Transforms/EmptyTensorElimination.cpp
mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-empty-tensor-elimination.mlir
Removed:
################################################################################
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/EmptyTensorElimination.cpp b/mlir/lib/Dialect/Bufferization/Transforms/EmptyTensorElimination.cpp
index b30fdcaa5c30b..7914ba2b0484d 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/EmptyTensorElimination.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/EmptyTensorElimination.cpp
@@ -187,15 +187,14 @@ LogicalResult mlir::bufferization::eliminateEmptyTensors(
/// relation is "equivalent" (TODO: can be relaxed if needed).
/// * The reverse use-def chain has exactly one end, which is the
/// tensor::EmptyOp.
-LogicalResult
-mlir::bufferization::insertSliceAnchoredEmptyTensorEliminationStep(
+template <typename OpTy>
+static LogicalResult insertSliceLikeAnchoredEmptyTensorEliminationStep(
RewriterBase &rewriter, Operation *op, AnalysisState &state) {
return eliminateEmptyTensors(
rewriter, op, state,
/*anchorMatchFunc=*/
[&](OpOperand &operand, SmallVector<Value> &neededValues) {
- auto insertSliceOp =
- dyn_cast<tensor::InsertSliceOp>(operand.getOwner());
+ auto insertSliceOp = dyn_cast<OpTy>(operand.getOwner());
if (!insertSliceOp)
return false;
if (&operand != &insertSliceOp->getOpOperand(0) /*source*/)
@@ -214,7 +213,7 @@ mlir::bufferization::insertSliceAnchoredEmptyTensorEliminationStep(
},
/*rewriteFunc=*/
[](OpBuilder &b, Location loc, OpOperand &operand) {
- auto insertOp = cast<tensor::InsertSliceOp>(operand.getOwner());
+ auto insertOp = cast<OpTy>(operand.getOwner());
auto extractOp = b.create<tensor::ExtractSliceOp>(
loc, insertOp.getSourceType(), insertOp.getDest(),
insertOp.getMixedOffsets(), insertOp.getMixedSizes(),
@@ -223,6 +222,18 @@ mlir::bufferization::insertSliceAnchoredEmptyTensorEliminationStep(
});
}
+LogicalResult
+mlir::bufferization::insertSliceAnchoredEmptyTensorEliminationStep(
+ RewriterBase &rewriter, Operation *op, AnalysisState &state) {
+ if (failed(insertSliceLikeAnchoredEmptyTensorEliminationStep<
+ tensor::InsertSliceOp>(rewriter, op, state)))
+ return failure();
+ if (failed(insertSliceLikeAnchoredEmptyTensorEliminationStep<
+ tensor::ParallelInsertSliceOp>(rewriter, op, state)))
+ return failure();
+ return success();
+}
+
namespace {
struct EmptyTensorElimination
: public bufferization::impl::EmptyTensorEliminationBase<
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-empty-tensor-elimination.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-empty-tensor-elimination.mlir
index 73ac2e1f1f2f0..1c0860ffafbef 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-empty-tensor-elimination.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-empty-tensor-elimination.mlir
@@ -137,3 +137,35 @@ func.func @shape_mismatch(%t: tensor<5x6x128xf32>) -> tensor<5x6x128xf32> {
: tensor<1x1x128xf32> into tensor<5x6x128xf32>
return %3 : tensor<5x6x128xf32>
}
+
+// -----
+
+// CHECK: func @parallel_insert_slice(
+// CHECK-SAME: %[[FUNC_ARG:[0-9a-zA-Z]*]]: memref<?xf32>
+// CHECK-SAME: %[[sz:[0-9a-zA-Z]*]]: index
+func.func @parallel_insert_slice(
+ %t: tensor<?xf32> {bufferization.buffer_layout = affine_map<(d0) -> (d0)>, bufferization.writable = true},
+ %sz: index)
+ -> (tensor<?xf32>)
+{
+ %f0 = arith.constant 0.0: f32
+ %c512 = arith.constant 512 : index
+
+ %r1 = scf.foreach_thread (%iv) in (%c512) shared_outs(%o = %t) -> (tensor<?xf32>) {
+ // tensor.empty itself does not alloc but forwards to the insert_slice.
+ // EmptyTensorOpElimination replaces the tensor.empty with an inplace
+ // extract_slice.
+ // CHECK: %[[T_SUBVIEW:.*]] = memref.subview %[[FUNC_ARG]][42] [%[[sz]]] [1]
+ %a = tensor.empty(%sz) : tensor<?xf32>
+
+ // CHECK: linalg.fill ins({{.*}} : f32) outs(%[[T_SUBVIEW]] : memref<?xf32
+ %f = linalg.fill ins(%f0 : f32) outs(%a : tensor<?xf32>) -> tensor<?xf32>
+
+ // Self-copy canonicalizes away later.
+ scf.foreach_thread.perform_concurrently {
+ tensor.parallel_insert_slice %f into %o[42][%sz][1]: tensor<?xf32> into tensor<?xf32>
+ }
+ }
+
+ return %r1: tensor<?xf32>
+}
More information about the Mlir-commits
mailing list