[Mlir-commits] [mlir] 0b21371 - [mlir] Support pre-existing tokens in 'gpu-async-region'
Christian Sigg
llvmlistbot at llvm.org
Wed Jun 9 23:43:56 PDT 2021
Author: Christian Sigg
Date: 2021-06-10T08:43:45+02:00
New Revision: 0b21371e1201f23e0f1de2de56ef8eed9c43d6db
URL: https://github.com/llvm/llvm-project/commit/0b21371e1201f23e0f1de2de56ef8eed9c43d6db
DIFF: https://github.com/llvm/llvm-project/commit/0b21371e1201f23e0f1de2de56ef8eed9c43d6db.diff
LOG: [mlir] Support pre-existing tokens in 'gpu-async-region'
Allow gpu ops implementing the async interface to already be async when running the GpuAsyncRegionPass.
That pass threads a 'current token' through a block with ops implementing the gpu async interface.
After this change, existing async ops (returning a !gpu.async.token) set the current token.
Existing synchronous `gpu.wait` ops reset the current token.
Reviewed By: herhut
Differential Revision: https://reviews.llvm.org/D103396
Added:
Modified:
mlir/lib/Dialect/GPU/Transforms/AsyncRegionRewriter.cpp
mlir/test/Dialect/GPU/async-region.mlir
Removed:
################################################################################
diff --git a/mlir/lib/Dialect/GPU/Transforms/AsyncRegionRewriter.cpp b/mlir/lib/Dialect/GPU/Transforms/AsyncRegionRewriter.cpp
index 68417baf1459a..e4e3d72b4a475 100644
--- a/mlir/lib/Dialect/GPU/Transforms/AsyncRegionRewriter.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/AsyncRegionRewriter.cpp
@@ -47,6 +47,15 @@ static bool hasSideEffects(Operation *op) {
struct GpuAsyncRegionPass::ThreadTokenCallback {
ThreadTokenCallback(MLIRContext &context) : builder(&context) {}
+ WalkResult operator()(Block *block) {
+ for (Operation &op : make_early_inc_range(*block)) {
+ if (failed(visit(&op)))
+ return WalkResult::interrupt();
+ }
+ return WalkResult::advance();
+ }
+
+private:
// If `op` implements the AsyncOpInterface, insert a `gpu.wait async` to
// create a current token (unless it already exists), and 'thread' that token
// through the `op` so that it executes asynchronously.
@@ -55,11 +64,15 @@ struct GpuAsyncRegionPass::ThreadTokenCallback {
// host-synchronize execution. A `!gpu.async.token` will therefore only be
// used inside of its block and GPU execution will always synchronize with
// the host at block boundaries.
- WalkResult operator()(Operation *op) {
+ LogicalResult visit(Operation *op) {
if (isa<gpu::LaunchOp>(op))
return op->emitOpError("replace with gpu.launch_func first");
- if (isa<gpu::WaitOp>(op))
- return op->emitOpError("unexpected pre-existing gpu.wait");
+ if (auto waitOp = llvm::dyn_cast<gpu::WaitOp>(op)) {
+ if (currentToken)
+ waitOp.addAsyncDependency(currentToken);
+ currentToken = waitOp.asyncToken();
+ return success();
+ }
builder.setInsertionPoint(op);
if (auto asyncOp = dyn_cast<gpu::AsyncOpInterface>(op))
return rewriteAsyncOp(asyncOp); // Replace GPU op with async version.
@@ -71,14 +84,9 @@ struct GpuAsyncRegionPass::ThreadTokenCallback {
return success();
}
-private:
// Replaces asyncOp with a clone that returns a token.
LogicalResult rewriteAsyncOp(gpu::AsyncOpInterface asyncOp) {
auto *op = asyncOp.getOperation();
- if (asyncOp.getAsyncToken())
- // TODO: Support ops that are already async.
- return op->emitOpError("is already async");
-
auto tokenType = builder.getType<gpu::AsyncTokenType>();
// If there is no current token, insert a `gpu.wait async` without
@@ -87,6 +95,11 @@ struct GpuAsyncRegionPass::ThreadTokenCallback {
currentToken = createWaitOp(op->getLoc(), tokenType, {});
asyncOp.addAsyncDependency(currentToken);
+ // Return early if op returns a token already.
+ currentToken = asyncOp.getAsyncToken();
+ if (currentToken)
+ return success();
+
// Clone the op to return a token in addition to the other results.
SmallVector<Type, 1> resultTypes;
resultTypes.reserve(1 + op->getNumResults());
@@ -315,10 +328,7 @@ struct GpuAsyncRegionPass::SingleTokenUseCallback {
// inserts the necessary synchronization (as gpu.wait ops). Assumes sequential
// execution semantics and that no GPU ops are asynchronous yet.
void GpuAsyncRegionPass::runOnFunction() {
- if (getFunction()
- .getRegion()
- .walk(ThreadTokenCallback(getContext()))
- .wasInterrupted())
+ if (getFunction()->walk(ThreadTokenCallback(getContext())).wasInterrupted())
return signalPassFailure();
// Collect gpu.wait ops that we can move out of async.execute regions.
diff --git a/mlir/test/Dialect/GPU/async-region.mlir b/mlir/test/Dialect/GPU/async-region.mlir
index 1a2206c3aa5a6..d9ba9ce338806 100644
--- a/mlir/test/Dialect/GPU/async-region.mlir
+++ b/mlir/test/Dialect/GPU/async-region.mlir
@@ -169,4 +169,24 @@ module attributes {gpu.container_module} {
}
return
}
+
+ // CHECK-LABEL:func @existing_tokens()
+ func @existing_tokens() {
+ // CHECK: %[[t0:.*]] = gpu.wait async
+ // CHECK-NOT: [{{.*}}]
+ %t0 = gpu.wait async
+ // CHECK: %[[t1:.*]] = gpu.wait async [%[[t0]], %[[t0]]]
+ %t1 = gpu.wait async [%t0]
+ // CHECK: %[[m:.*]], %[[t2:.*]] = gpu.alloc async [%[[t1]], %[[t0]]] ()
+ %0 = gpu.alloc [%t0] () : memref<7xf32>
+ // CHECK: %[[t3:.*]] = gpu.dealloc async [%[[t2]]] %[[m]]
+ %t2 = gpu.dealloc async %0 : memref<7xf32>
+ // CHECK: gpu.wait [%[[t3]]]
+ gpu.wait
+ // CHECK: gpu.wait
+ // CHECK-NOT: async
+ // CHECK-NOT: [{{.*}}]
+ gpu.wait
+ return
+ }
}
More information about the Mlir-commits
mailing list