[Mlir-commits] [mlir] 8d73bee - [mlir] Add gpu async integration test.
Christian Sigg
llvmlistbot at llvm.org
Wed Feb 3 12:45:34 PST 2021
Author: Christian Sigg
Date: 2021-02-03T21:45:23+01:00
New Revision: 8d73bee4edc2b5029d20777a8d4c740ea782b691
URL: https://github.com/llvm/llvm-project/commit/8d73bee4edc2b5029d20777a8d4c740ea782b691
DIFF: https://github.com/llvm/llvm-project/commit/8d73bee4edc2b5029d20777a8d4c740ea782b691.diff
LOG: [mlir] Add gpu async integration test.
Reviewed By: herhut
Differential Revision: https://reviews.llvm.org/D94421
Added:
mlir/test/mlir-cuda-runner/async.mlir
Modified:
mlir/lib/Conversion/GPUCommon/CMakeLists.txt
mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp
mlir/tools/mlir-cuda-runner/mlir-cuda-runner.cpp
Removed:
################################################################################
diff --git a/mlir/lib/Conversion/GPUCommon/CMakeLists.txt b/mlir/lib/Conversion/GPUCommon/CMakeLists.txt
index 9cb6038b3020..f3c6eca87d8a 100644
--- a/mlir/lib/Conversion/GPUCommon/CMakeLists.txt
+++ b/mlir/lib/Conversion/GPUCommon/CMakeLists.txt
@@ -29,6 +29,7 @@ add_mlir_conversion_library(MLIRGPUToGPURuntimeTransforms
${NVPTX_LIBS}
LINK_LIBS PUBLIC
+ MLIRAsyncToLLVM
MLIRGPU
MLIRIR
MLIRLLVMIR
diff --git a/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp b/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp
index cee1d7ba20e3..5b5761ac63de 100644
--- a/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp
+++ b/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp
@@ -16,6 +16,7 @@
#include "mlir/Conversion/GPUCommon/GPUCommonPass.h"
#include "../PassDetail.h"
+#include "mlir/Conversion/AsyncToLLVM/AsyncToLLVM.h"
#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
#include "mlir/Dialect/GPU/GPUDialect.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
@@ -293,10 +294,13 @@ class ConvertMemcpyOpToGpuRuntimeCallPattern
void GpuToLLVMConversionPass::runOnOperation() {
LLVMTypeConverter converter(&getContext());
OwningRewritePatternList patterns;
+ LLVMConversionTarget target(getContext());
+
populateStdToLLVMConversionPatterns(converter, patterns);
+ populateAsyncStructuralTypeConversionsAndLegality(&getContext(), converter,
+ patterns, target);
populateGpuToLLVMConversionPatterns(converter, patterns, gpuBinaryAnnotation);
- LLVMConversionTarget target(getContext());
if (failed(
applyPartialConversion(getOperation(), target, std::move(patterns))))
signalPassFailure();
diff --git a/mlir/test/mlir-cuda-runner/async.mlir b/mlir/test/mlir-cuda-runner/async.mlir
new file mode 100644
index 000000000000..19eb6af374c8
--- /dev/null
+++ b/mlir/test/mlir-cuda-runner/async.mlir
@@ -0,0 +1,66 @@
+// RUN: mlir-cuda-runner %s --entry-point-result=void -O0 \
+// RUN: --shared-libs=%cuda_wrapper_library_dir/libcuda-runtime-wrappers%shlibext \
+// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_async_runtime%shlibext \
+// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \
+// RUN: | FileCheck %s
+
+func @main() {
+ %c0 = constant 0 : index
+ %c1 = constant 1 : index
+ %count = constant 2 : index
+
+ // initialize h0 on host
+ %h0 = alloc(%count) : memref<?xi32>
+ %h0_unranked = memref_cast %h0 : memref<?xi32> to memref<*xi32>
+ gpu.host_register %h0_unranked : memref<*xi32>
+
+ %v0 = constant 42 : i32
+ store %v0, %h0[%c0] : memref<?xi32>
+ store %v0, %h0[%c1] : memref<?xi32>
+
+ // copy h0 to b0 on device.
+ %t0, %f0 = async.execute () -> !async.value<memref<?xi32>> {
+ %b0 = gpu.alloc(%count) : memref<?xi32>
+ gpu.memcpy %b0, %h0 : memref<?xi32>, memref<?xi32>
+ async.yield %b0 : memref<?xi32>
+ }
+
+ // copy h0 to b1 and b2 (fork)
+ %t1, %f1 = async.execute [%t0] (
+ %f0 as %b0 : !async.value<memref<?xi32>>
+ ) -> !async.value<memref<?xi32>> {
+ %b1 = gpu.alloc(%count) : memref<?xi32>
+ gpu.memcpy %b1, %b0 : memref<?xi32>, memref<?xi32>
+ async.yield %b1 : memref<?xi32>
+ }
+ %t2, %f2 = async.execute [%t0] (
+ %f0 as %b0 : !async.value<memref<?xi32>>
+ ) -> !async.value<memref<?xi32>> {
+ %b2 = gpu.alloc(%count) : memref<?xi32>
+ gpu.memcpy %b2, %b0 : memref<?xi32>, memref<?xi32>
+ async.yield %b2 : memref<?xi32>
+ }
+
+ // h0 = b1 + b2 (join).
+ %t3 = async.execute [%t1, %t2] (
+ %f1 as %b1 : !async.value<memref<?xi32>>,
+ %f2 as %b2 : !async.value<memref<?xi32>>
+ ) {
+ gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %c1, %grid_y = %c1, %grid_z = %c1)
+ threads(%tx, %ty, %tz) in (%block_x = %count, %block_y = %c1, %block_z = %c1) {
+ %v1 = load %b1[%tx] : memref<?xi32>
+ %v2 = load %b2[%tx] : memref<?xi32>
+ %sum = addi %v1, %v2 : i32
+ store %sum, %h0[%tx] : memref<?xi32>
+ gpu.terminator
+ }
+ async.yield
+ }
+
+ async.await %t3 : !async.token
+ // CHECK: [84, 84]
+ call @print_memref_i32(%h0_unranked) : (memref<*xi32>) -> ()
+ return
+}
+
+func private @print_memref_i32(memref<*xi32>)
diff --git a/mlir/tools/mlir-cuda-runner/mlir-cuda-runner.cpp b/mlir/tools/mlir-cuda-runner/mlir-cuda-runner.cpp
index 8707910e5a15..ceb30b8f0df9 100644
--- a/mlir/tools/mlir-cuda-runner/mlir-cuda-runner.cpp
+++ b/mlir/tools/mlir-cuda-runner/mlir-cuda-runner.cpp
@@ -14,10 +14,12 @@
#include "llvm/ADT/STLExtras.h"
+#include "mlir/Conversion/AsyncToLLVM/AsyncToLLVM.h"
#include "mlir/Conversion/GPUCommon/GPUCommonPass.h"
#include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h"
#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h"
+#include "mlir/Dialect/Async/Passes.h"
#include "mlir/Dialect/GPU/GPUDialect.h"
#include "mlir/Dialect/GPU/Passes.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
@@ -118,7 +120,14 @@ static LogicalResult runMLIRPasses(ModuleOp m) {
kernelPm.addPass(createConvertGPUKernelToBlobPass(
translateModuleToNVVMIR, compilePtxToCubin, "nvptx64-nvidia-cuda",
"sm_35", "+ptx60", gpuBinaryAnnotation));
+ auto &funcPm = pm.nest<FuncOp>();
+ funcPm.addPass(createGpuAsyncRegionPass());
+ funcPm.addPass(createAsyncRefCountingPass());
pm.addPass(createGpuToLLVMConversionPass(gpuBinaryAnnotation));
+ pm.addPass(createAsyncToAsyncRuntimePass());
+ pm.addPass(createConvertAsyncToLLVMPass());
+ mlir::LowerToLLVMOptions lower_to_llvm_opts;
+ pm.addPass(mlir::createLowerToLLVMPass(lower_to_llvm_opts));
return pm.run(m);
}
More information about the Mlir-commits
mailing list