[Mlir-commits] [mlir] 834618a - Revert "[mlir][gpu] Allow gpu.launch_func to be async."
Mehdi Amini
llvmlistbot at llvm.org
Thu Oct 29 10:30:59 PDT 2020
Author: Mehdi Amini
Date: 2020-10-29T17:30:27Z
New Revision: 834618a2ffbdaf3f2e94025b53f49f3764e5adb0
URL: https://github.com/llvm/llvm-project/commit/834618a2ffbdaf3f2e94025b53f49f3764e5adb0
DIFF: https://github.com/llvm/llvm-project/commit/834618a2ffbdaf3f2e94025b53f49f3764e5adb0.diff
LOG: Revert "[mlir][gpu] Allow gpu.launch_func to be async."
This reverts commit ec7780ebdab480139596c3cb08ee77d7035457b3.
One of the bot is crashing in a test related to this change.
Added:
Modified:
mlir/include/mlir/Dialect/GPU/GPUOps.td
mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
mlir/test/Dialect/GPU/invalid.mlir
mlir/test/Dialect/GPU/ops.mlir
Removed:
################################################################################
diff --git a/mlir/include/mlir/Dialect/GPU/GPUOps.td b/mlir/include/mlir/Dialect/GPU/GPUOps.td
index 593b735f01c9..8ad3c9abe6cc 100644
--- a/mlir/include/mlir/Dialect/GPU/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/GPUOps.td
@@ -291,14 +291,12 @@ def GPU_GPUFuncOp : GPU_Op<"func", [HasParent<"GPUModuleOp">,
let parser = [{ return parseGPUFuncOp(parser, result); }];
}
-def GPU_LaunchFuncOp : GPU_Op<"launch_func",
- [GPU_AsyncOpInterface, AttrSizedOperandSegments]>,
- Arguments<(ins Variadic<GPU_AsyncToken>:$asyncDependencies,
- SymbolRefAttr:$kernel,
+def GPU_LaunchFuncOp : GPU_Op<"launch_func">,
+ Arguments<(ins SymbolRefAttr:$kernel,
Index:$gridSizeX, Index:$gridSizeY, Index:$gridSizeZ,
Index:$blockSizeX, Index:$blockSizeY, Index:$blockSizeZ,
Variadic<AnyType>:$operands)>,
- Results<(outs Optional<GPU_AsyncToken>:$asyncToken)> {
+ Results<(outs)> {
let summary = "Launches a function as a GPU kernel";
let description = [{
@@ -310,22 +308,14 @@ def GPU_LaunchFuncOp : GPU_Op<"launch_func",
function is required to be a gpu.module. And finally, the module containing
the kernel module (which thus cannot be the top-level module) is required
to have the `gpu.container_module` attribute. The `gpu.launch_func`
- operation has a symbol attribute named `kernel` to identify the fully
+ operation has a symbol attribute named `kernel` to identify the fully
specified kernel function to launch (both the gpu.module and func).
- The `gpu.launch_func` supports async dependencies: the kernel does not start
- executing until the ops producing those async dependencies have completed.
-
- By the default, the host implicitly blocks until kernel execution has
- completed. If the `async` keyword is present, the host does not block but
- instead a `!gpu.async.token` is returned. Other async GPU ops can take this
- token as dependency.
-
- The operation requires at least the grid and block sizes along the x,y,z
- dimensions as arguments. When a lower-dimensional kernel is required,
- unused sizes must be explicitly set to `1`.
-
- The remaining operands are passed as arguments to the kernel function.
+ The operation takes at least six operands, with the first three operands
+ being grid sizes along x,y,z dimensions and the following three being block
+ sizes along x,y,z dimensions. When a lower-dimensional kernel is required,
+ unused sizes must be explicitly set to `1`. The remaining operands are
+ passed as arguments to the kernel function.
Example:
@@ -361,15 +351,11 @@ def GPU_LaunchFuncOp : GPU_Op<"launch_func",
}
}
- %t0 = gpu.wait async
gpu.launch_func
- async // (Optional) Don't block host, return token.
- [%t0] // (Optional) Execute only after %t0 has completed.
- @kernels::@kernel_1 // Kernel function.
- blocks in (%cst, %cst, %cst) // Grid size.
- threads in (%cst, %cst, %cst) // Block size.
- args(%arg0 : f32, // (Optional) Kernel arguments.
- %arg1 : memref<?xf32, 1>)
+ @kernels::@kernel_1 // Kernel function.
+ blocks in (%cst, %cst, %cst) // Grid size.
+ threads in (%cst, %cst, %cst) // Block size.
+ args(%arg0 : f32, %arg1 : memref<?xf32, 1>) // Kernel arguments.
}
```
}];
@@ -415,7 +401,6 @@ def GPU_LaunchFuncOp : GPU_Op<"launch_func",
let verifier = [{ return ::verify(*this); }];
let assemblyFormat = [{
- custom<AsyncDependencies>(type($asyncToken), $asyncDependencies)
$kernel
`blocks` `in` ` ` `(`$gridSizeX`,` $gridSizeY`,` $gridSizeZ`)`
`threads` `in` ` ` `(`$blockSizeX`,` $blockSizeY`,` $blockSizeZ`)`
diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
index 8b0f7a3829d2..9514d3e0a04b 100644
--- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
+++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -438,15 +438,10 @@ void LaunchFuncOp::build(OpBuilder &builder, OperationState &result,
auto kernelSymbol = builder.getSymbolRefAttr(
kernelModule.getName(), {builder.getSymbolRefAttr(kernelFunc.getName())});
result.addAttribute(getKernelAttrName(), kernelSymbol);
- SmallVector<int32_t, 8> segmentSizes(8, 1);
- segmentSizes.front() = 0; // Initially no async dependencies.
- segmentSizes.back() = static_cast<int32_t>(kernelOperands.size());
- result.addAttribute(getOperandSegmentSizeAttr(),
- builder.getI32VectorAttr(segmentSizes));
}
unsigned LaunchFuncOp::getNumKernelOperands() {
- return getNumOperands() - asyncDependencies().size() - kNumConfigOperands;
+ return getNumOperands() - kNumConfigOperands;
}
StringRef LaunchFuncOp::getKernelModuleName() {
@@ -456,17 +451,15 @@ StringRef LaunchFuncOp::getKernelModuleName() {
StringRef LaunchFuncOp::getKernelName() { return kernel().getLeafReference(); }
Value LaunchFuncOp::getKernelOperand(unsigned i) {
- return getOperand(asyncDependencies().size() + kNumConfigOperands + i);
+ return getOperation()->getOperand(i + kNumConfigOperands);
}
KernelDim3 LaunchFuncOp::getGridSizeOperandValues() {
- auto operands = getOperands().drop_front(asyncDependencies().size());
- return KernelDim3{operands[0], operands[1], operands[2]};
+ return KernelDim3{getOperand(0), getOperand(1), getOperand(2)};
}
KernelDim3 LaunchFuncOp::getBlockSizeOperandValues() {
- auto operands = getOperands().drop_front(asyncDependencies().size());
- return KernelDim3{operands[3], operands[4], operands[5]};
+ return KernelDim3{getOperand(3), getOperand(4), getOperand(5)};
}
static LogicalResult verify(LaunchFuncOp op) {
diff --git a/mlir/test/Dialect/GPU/invalid.mlir b/mlir/test/Dialect/GPU/invalid.mlir
index 3dc5be405aac..3612b8e0dcc1 100644
--- a/mlir/test/Dialect/GPU/invalid.mlir
+++ b/mlir/test/Dialect/GPU/invalid.mlir
@@ -37,7 +37,6 @@ func @launch_requires_gpu_return(%sz : index) {
func @launch_func_too_few_operands(%sz : index) {
// expected-error at +1 {{expected 6 or more operands}}
"gpu.launch_func"(%sz, %sz, %sz, %sz, %sz)
- {operand_segment_sizes = dense<[0, 1, 1, 1, 1, 1, 0, 0]> : vector<8xi32>}
: (index, index, index, index, index) -> ()
return
}
@@ -56,7 +55,6 @@ module attributes {gpu.container_module} {
func @launch_func_missing_callee_attribute(%sz : index) {
// expected-error at +1 {{'gpu.launch_func' op requires attribute 'kernel'}}
"gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz)
- {operand_segment_sizes = dense<[0, 1, 1, 1, 1, 1, 1, 0]> : vector<8xi32>}
: (index, index, index, index, index, index) -> ()
return
}
diff --git a/mlir/test/Dialect/GPU/ops.mlir b/mlir/test/Dialect/GPU/ops.mlir
index a3b781afdfbc..e81b233abfbc 100644
--- a/mlir/test/Dialect/GPU/ops.mlir
+++ b/mlir/test/Dialect/GPU/ops.mlir
@@ -73,7 +73,6 @@ module attributes {gpu.container_module} {
%1 = "op"() : () -> (memref<?xf32, 1>)
// CHECK: %{{.*}} = constant 8
%cst = constant 8 : index
- %t0 = gpu.wait async
// CHECK: gpu.launch_func @kernels::@kernel_1 blocks in (%{{.*}}, %{{.*}}, %{{.*}}) threads in (%{{.*}}, %{{.*}}, %{{.*}}) args(%{{.*}} : f32, %{{.*}} : memref<?xf32, 1>)
gpu.launch_func @kernels::@kernel_1 blocks in (%cst, %cst, %cst) threads in (%cst, %cst, %cst) args(%0 : f32, %1 : memref<?xf32, 1>)
@@ -81,9 +80,6 @@ module attributes {gpu.container_module} {
// CHECK: gpu.launch_func @kernels::@kernel_2 blocks in (%{{.*}}, %{{.*}}, %{{.*}}) threads in (%{{.*}}, %{{.*}}, %{{.*}})
gpu.launch_func @kernels::@kernel_2 blocks in (%cst, %cst, %cst) threads in (%cst, %cst, %cst)
- // CHECK: %{{.*}} = gpu.launch_func async [%{{.*}}] @kernels::@kernel_2 blocks in (%{{.*}}, %{{.*}}, %{{.*}}) threads in (%{{.*}}, %{{.*}}, %{{.*}})
- %t1 = gpu.launch_func async [%t0] @kernels::@kernel_2 blocks in (%cst, %cst, %cst) threads in (%cst, %cst, %cst)
-
return
}
More information about the Mlir-commits
mailing list