[Mlir-commits] [mlir] db1cf3d - [mlir][gpu] Add `gpu.wait` op.

Tue Oct 13 08:31:14 PDT 2020

Author: Christian Sigg
Date: 2020-10-13T17:30:59+02:00
New Revision: db1cf3d9ab33f56fcaea616baa71c6e4036beffa

URL: https://github.com/llvm/llvm-project/commit/db1cf3d9ab33f56fcaea616baa71c6e4036beffa
DIFF: https://github.com/llvm/llvm-project/commit/db1cf3d9ab33f56fcaea616baa71c6e4036beffa.diff

LOG: [mlir][gpu] Add `gpu.wait` op.

This combines two separate ops (D88972: `gpu.create_token`, D89043: `gpu.host_wait`) into one.

I do after all like the idea of combining the two ops, because it matches exactly the pattern we are
going to have in the other gpu ops that will implement the AsyncOpInterface (launch_func, copies, alloc):

If the op is async, we return a !gpu.async.token. Otherwise, we synchronize with the host and don't return a token.

The use cases for `gpu.wait async` and `gpu.wait` are further apart than those of e.g. `gpu.h2d async` and `gpu.h2d`,
but I like the consistent meaning of the `async` keyword in GPU ops.

Reviewed By: herhut

Differential Revision: https://reviews.llvm.org/D89160

Added: 
    

Modified: 
    mlir/include/mlir/Dialect/GPU/GPUOps.td
    mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
    mlir/test/Dialect/GPU/invalid.mlir
    mlir/test/Dialect/GPU/ops.mlir

Removed: 
    


################################################################################
diff  --git a/mlir/include/mlir/Dialect/GPU/GPUOps.td b/mlir/include/mlir/Dialect/GPU/GPUOps.td
index 229023f6ec4c..5d4443e1d8ff 100644

--- a/mlir/include/mlir/Dialect/GPU/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/GPUOps.td
@@ -756,4 +756,46 @@ def GPU_HostRegisterOp : GPU_Op<"host_register">,
   let verifier = [{ return success(); }];
 }
 
+def GPU_WaitOp : GPU_Op<"wait", [GPU_AsyncOpInterface]> {
+  let summary = "Wait for async gpu ops to complete.";
+  let description = [{
+    This op synchronizes the host or the device with a list of dependent ops.
+
+    If the op contains the `async` keyword, it returns a new async token which
+    is synchronized with the op arguments. This new token is merely a shortcut
+    to the argument list, and one could replace the uses of the result with the
+    arguments for the same effect. The async version of this op is primarily
+    used to make each async token have a single use during lowering and
+    thereby make forks in async execution explicit. Example usage:
+
+    ```mlir
+    %t0 = gpu.foo async : !gpu.async.token
+    %t1 = gpu.bar async : !gpu.async.token
+    %t2 = gpu.wait async [%t0, %t1]
+    // gpu.baz doesn't run until gpu.foo and gpu.bar have both completed, just
+    // as if the async dependencies were [%t0, %t1].
+    %t3 = gpu.baz async [%t2]
+    ```
+
+    If the op does not contain the `async` keyword, it does not return a new
+    async token but blocks until all ops producing the async dependency tokens
+    finished execution. All dependent memory operations are visible to the host
+    once this op completes. Example usage:
+
+    ```mlir
+    %t0 = gpu.foo async : !gpu.async.token
+    %t1 = gpu.bar async : !gpu.async.token
+    // The gpu.wait op blocks until gpu.foo and gpu.bar have completed.
+    gpu.wait [%t0, %t1]
+    ```
+  }];
+
+  let arguments = (ins Variadic<GPU_AsyncToken>:$asyncDependencies);
+  let results = (outs Optional<GPU_AsyncToken>:$asyncToken);
+
+  let assemblyFormat = [{
+    custom<AsyncDependencies>(type($asyncToken), $asyncDependencies) attr-dict
+  }];
+}
+
 #endif // GPU_OPS

diff  --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
index 063e894829df..79fe969dbe17 100644
--- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
+++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -818,6 +818,30 @@ static void print(OpAsmPrinter &p, GPUModuleOp op) {
                 /*printBlockTerminators=*/false);
 }
 
+static ParseResult parseAsyncDependencies(
+    OpAsmParser &parser, Type &asyncTokenType,
+    SmallVectorImpl<OpAsmParser::OperandType> &asyncDependencies) {
+  auto loc = parser.getCurrentLocation();
+  if (succeeded(parser.parseOptionalKeyword("async"))) {
+    if (parser.getNumResults() == 0)
+      return parser.emitError(loc, "needs to be named when marked 'async'");
+    asyncTokenType = parser.getBuilder().getType<AsyncTokenType>();
+  }
+  return parser.parseOperandList(asyncDependencies,
+                                 OpAsmParser::Delimiter::OptionalSquare);
+}
+
+static void printAsyncDependencies(OpAsmPrinter &printer, Type asyncTokenType,
+                                   OperandRange asyncDependencies) {
+  if (asyncTokenType)
+    printer << "async ";
+  if (asyncDependencies.empty())
+    return;
+  printer << "[";
+  llvm::interleaveComma(asyncDependencies, printer);
+  printer << "]";
+}
+
 #include "mlir/Dialect/GPU/GPUOpInterfaces.cpp.inc"
 
 #define GET_OP_CLASSES

diff  --git a/mlir/test/Dialect/GPU/invalid.mlir b/mlir/test/Dialect/GPU/invalid.mlir
index 739d23a59f05..cfdb06ac5702 100644
--- a/mlir/test/Dialect/GPU/invalid.mlir
+++ b/mlir/test/Dialect/GPU/invalid.mlir
@@ -435,3 +435,17 @@ module {
     } ) {gpu.kernel, sym_name = "kernel_1", type = (f32, memref<?xf32>) -> (), workgroup_attributions = 3: i64} : () -> ()
   }
 }
+
+// -----
+
+func @sync_wait_with_result() {
+  // expected-error @+1 {{cannot name an operation with no results}}
+  %t = gpu.wait
+}
+
+// -----
+
+func @async_wait_without_result() {
+  // expected-error @+1 {{custom op 'gpu.wait' needs to be named when marked 'async'}}
+  gpu.wait async
+}

diff  --git a/mlir/test/Dialect/GPU/ops.mlir b/mlir/test/Dialect/GPU/ops.mlir
index 574c29088432..23cd6d5c7d0a 100644
--- a/mlir/test/Dialect/GPU/ops.mlir
+++ b/mlir/test/Dialect/GPU/ops.mlir
@@ -149,4 +149,21 @@ module attributes {gpu.container_module} {
     // CHECK: return {{.*}} : !gpu.async.token
     return %arg0 : !gpu.async.token
   }
+
+  func @async_wait() {
+    // CHECK-LABEL: func @async_wait
+    // CHECK: %[[t0:.*]] = gpu.wait async
+    %0 = gpu.wait async
+    // CHECK: %[[t1:.*]] = gpu.wait async [%[[t0]]]
+    %1 = gpu.wait async [%0]
+    // CHECK: %{{.*}} = gpu.wait async [%[[t0]], %[[t1]]]
+    %2 = gpu.wait async [%0, %1]
+    // CHECK: gpu.wait [%[[t0]], %[[t1]]]
+    // CHECK-NOT: async
+    gpu.wait [%0, %1]
+    // CHECK: gpu.wait
+    // CHECK-NOT: async
+    gpu.wait // Valid, but a no-op.
+    return
+  }
 }