[Mlir-commits] [mlir] 3c3810e - [mlir][vector] Avoid hoisting alloca'ed temporary buffers across AutomaticAllocationScope
Nicolas Vasilache
llvmlistbot at llvm.org
Wed Feb 2 03:00:46 PST 2022
Author: Nicolas Vasilache
Date: 2022-02-02T06:00:42-05:00
New Revision: 3c3810e72e8b5d324be3c3de6faf177144653408
URL: https://github.com/llvm/llvm-project/commit/3c3810e72e8b5d324be3c3de6faf177144653408
DIFF: https://github.com/llvm/llvm-project/commit/3c3810e72e8b5d324be3c3de6faf177144653408.diff
LOG: [mlir][vector] Avoid hoisting alloca'ed temporary buffers across AutomaticAllocationScope
This revision avoids incorrect hoisting of alloca'd buffers across an AutomaticAllocationScope boundary.
In the more general case, we will probably need a ParallelScope-like interface.
Differential Revision: https://reviews.llvm.org/D118768
Added:
Modified:
mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp
mlir/lib/Dialect/Vector/Transforms/VectorTransferSplitRewritePatterns.cpp
mlir/test/Conversion/VectorToSCF/vector-to-scf.mlir
mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir
Removed:
################################################################################
diff --git a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp
index 6cdad451e3282..499f4403d317f 100644
--- a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp
+++ b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp
@@ -267,15 +267,22 @@ struct BufferAllocs {
Value maskBuffer;
};
+// TODO: Parallelism and threadlocal considerations with a ParallelScope trait.
+static Operation *getAutomaticAllocationScope(Operation *op) {
+ Operation *scope =
+ op->getParentWithTrait<OpTrait::AutomaticAllocationScope>();
+ assert(scope && "Expected op to be inside automatic allocation scope");
+ return scope;
+}
+
/// Allocate temporary buffers for data (vector) and mask (if present).
-/// TODO: Parallelism and threadlocal considerations.
template <typename OpTy>
static BufferAllocs allocBuffers(OpBuilder &b, OpTy xferOp) {
Location loc = xferOp.getLoc();
OpBuilder::InsertionGuard guard(b);
- Operation *scope =
- xferOp->template getParentWithTrait<OpTrait::AutomaticAllocationScope>();
- assert(scope && "Expected op to be inside automatic allocation scope");
+ Operation *scope = getAutomaticAllocationScope(xferOp);
+ assert(scope->getNumRegions() == 1 &&
+ "AutomaticAllocationScope with >1 regions");
b.setInsertionPointToStart(&scope->getRegion(0).front());
BufferAllocs result;
diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransferSplitRewritePatterns.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorTransferSplitRewritePatterns.cpp
index ff3a6012f2d54..2cbc95d5d0f8c 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorTransferSplitRewritePatterns.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransferSplitRewritePatterns.cpp
@@ -438,6 +438,14 @@ static void createFullPartialVectorTransferWrite(RewriterBase &b,
});
}
+// TODO: Parallelism and threadlocal considerations with a ParallelScope trait.
+static Operation *getAutomaticAllocationScope(Operation *op) {
+ Operation *scope =
+ op->getParentWithTrait<OpTrait::AutomaticAllocationScope>();
+ assert(scope && "Expected op to be inside automatic allocation scope");
+ return scope;
+}
+
/// Split a vector.transfer operation into an in-bounds (i.e., no out-of-bounds
/// masking) fastpath and a slowpath.
///
@@ -538,12 +546,14 @@ LogicalResult mlir::vector::splitFullAndPartialTransfer(
// Top of the function `alloc` for transient storage.
Value alloc;
{
- FuncOp funcOp = xferOp->getParentOfType<FuncOp>();
RewriterBase::InsertionGuard guard(b);
- b.setInsertionPointToStart(&funcOp.getRegion().front());
+ Operation *scope = getAutomaticAllocationScope(xferOp);
+ assert(scope->getNumRegions() == 1 &&
+ "AutomaticAllocationScope with >1 regions");
+ b.setInsertionPointToStart(&scope->getRegion(0).front());
auto shape = xferOp.getVectorType().getShape();
Type elementType = xferOp.getVectorType().getElementType();
- alloc = b.create<memref::AllocaOp>(funcOp.getLoc(),
+ alloc = b.create<memref::AllocaOp>(scope->getLoc(),
MemRefType::get(shape, elementType),
ValueRange{}, b.getI64IntegerAttr(32));
}
diff --git a/mlir/test/Conversion/VectorToSCF/vector-to-scf.mlir b/mlir/test/Conversion/VectorToSCF/vector-to-scf.mlir
index 7cddb46f094e1..15b70caa930f0 100644
--- a/mlir/test/Conversion/VectorToSCF/vector-to-scf.mlir
+++ b/mlir/test/Conversion/VectorToSCF/vector-to-scf.mlir
@@ -481,3 +481,22 @@ func @transfer_write_strided(%A : vector<4xf32>, %B : memref<8x4xf32, affine_map
// CHECK-LABEL: transfer_write_strided(
// CHECK: scf.for
// CHECK: store
+
+// -----
+
+func private @fake_side_effecting_fun(%0: vector<2x2xf32>) -> ()
+
+// CHECK-LABEL: transfer_read_within_async_execute
+func @transfer_read_within_async_execute(%A : memref<2x2xf32>) -> !async.token {
+ %c0 = arith.constant 0 : index
+ %f0 = arith.constant 0.0 : f32
+ // CHECK-NOT: alloca
+ // CHECK: async.execute
+ // CHECK: alloca
+ %token = async.execute {
+ %0 = vector.transfer_read %A[%c0, %c0], %f0 : memref<2x2xf32>, vector<2x2xf32>
+ call @fake_side_effecting_fun(%0) : (vector<2x2xf32>) -> ()
+ async.yield
+ }
+ return %token : !async.token
+}
diff --git a/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir b/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir
index 9a10482e027a0..ace977fb1e7a3 100644
--- a/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir
+++ b/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir
@@ -393,3 +393,22 @@ func @split_vector_transfer_write_strided_2d(
// LINALG: }
// LINALG: return
// LINALG: }
+
+// -----
+
+func private @fake_side_effecting_fun(%0: vector<2x2xf32>) -> ()
+
+// CHECK-LABEL: transfer_read_within_async_execute
+func @transfer_read_within_async_execute(%A : memref<?x?xf32>) -> !async.token {
+ %c0 = arith.constant 0 : index
+ %f0 = arith.constant 0.0 : f32
+ // CHECK-NOT: alloca
+ // CHECK: async.execute
+ // CHECK: alloca
+ %token = async.execute {
+ %0 = vector.transfer_read %A[%c0, %c0], %f0 : memref<?x?xf32>, vector<2x2xf32>
+ call @fake_side_effecting_fun(%0) : (vector<2x2xf32>) -> ()
+ async.yield
+ }
+ return %token : !async.token
+}
More information about the Mlir-commits
mailing list