[Mlir-commits] [mlir] 01dc85c - [mlir][gpu] Adding gpu runtime wrapper functions for async execution.
Christian Sigg
llvmlistbot at llvm.org
Mon Oct 12 05:07:36 PDT 2020
Author: Christian Sigg
Date: 2020-10-12T14:07:27+02:00
New Revision: 01dc85c173cb72b7dc42971942eb00c0be88924f
URL: https://github.com/llvm/llvm-project/commit/01dc85c173cb72b7dc42971942eb00c0be88924f
DIFF: https://github.com/llvm/llvm-project/commit/01dc85c173cb72b7dc42971942eb00c0be88924f.diff
LOG: [mlir][gpu] Adding gpu runtime wrapper functions for async execution.
Reviewed By: herhut
Differential Revision: https://reviews.llvm.org/D89037
Added:
Modified:
mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp
mlir/tools/mlir-cuda-runner/cuda-runtime-wrappers.cpp
mlir/tools/mlir-rocm-runner/rocm-runtime-wrappers.cpp
Removed:
################################################################################
diff --git a/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp b/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp
index b25c2643f52e..f7f5834e6351 100644
--- a/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp
+++ b/mlir/lib/Conversion/GPUCommon/ConvertLaunchFuncToRuntimeCalls.cpp
@@ -113,10 +113,28 @@ class ConvertOpToGpuRuntimeCallPattern : public ConvertOpToLLVMPattern<OpTy> {
}};
FunctionCallBuilder streamCreateCallBuilder = {
"mgpuStreamCreate", llvmPointerType /* void *stream */, {}};
+ FunctionCallBuilder streamDestroyCallBuilder = {
+ "mgpuStreamDestroy", llvmVoidType, {llvmPointerType /* void *stream */}};
FunctionCallBuilder streamSynchronizeCallBuilder = {
"mgpuStreamSynchronize",
llvmVoidType,
{llvmPointerType /* void *stream */}};
+ FunctionCallBuilder streamWaitEventCallBuilder = {
+ "mgpuStreamWaitEvent",
+ llvmVoidType,
+ {llvmPointerType /* void *stream */, llvmPointerType /* void *event */}};
+ FunctionCallBuilder eventCreateCallBuilder = {
+ "mgpuEventCreate", llvmPointerType /* void *event */, {}};
+ FunctionCallBuilder eventDestroyCallBuilder = {
+ "mgpuEventDestroy", llvmVoidType, {llvmPointerType /* void *event */}};
+ FunctionCallBuilder eventSynchronizeCallBuilder = {
+ "mgpuEventSynchronize",
+ llvmVoidType,
+ {llvmPointerType /* void *event */}};
+ FunctionCallBuilder eventRecordCallBuilder = {
+ "mgpuEventRecord",
+ llvmVoidType,
+ {llvmPointerType /* void *event */, llvmPointerType /* void *stream */}};
FunctionCallBuilder hostRegisterCallBuilder = {
"mgpuMemHostRegisterMemRef",
llvmVoidType,
diff --git a/mlir/tools/mlir-cuda-runner/cuda-runtime-wrappers.cpp b/mlir/tools/mlir-cuda-runner/cuda-runtime-wrappers.cpp
index 517fc9fc18f5..a32c37d96a42 100644
--- a/mlir/tools/mlir-cuda-runner/cuda-runtime-wrappers.cpp
+++ b/mlir/tools/mlir-cuda-runner/cuda-runtime-wrappers.cpp
@@ -32,6 +32,15 @@
llvm::errs() << "'" << #expr << "' failed with '" << name << "'\n"; \
}(expr)
+// Static initialization of CUDA context for device ordinal 0.
+static auto InitializeCtx = [] {
+ CUdevice device;
+ CUDA_REPORT_IF_ERROR(cuDeviceGet(&device, /*ordinal=*/0));
+ CUcontext context;
+ CUDA_REPORT_IF_ERROR(cuCtxCreate(&context, /*flags=*/0, device));
+ return 0;
+}();
+
extern "C" CUmodule mgpuModuleLoad(void *data) {
CUmodule module = nullptr;
CUDA_REPORT_IF_ERROR(cuModuleLoadData(&module, data));
@@ -63,10 +72,36 @@ extern "C" CUstream mgpuStreamCreate() {
return stream;
}
+extern "C" void mgpuStreamDestroy(CUstream stream) {
+ CUDA_REPORT_IF_ERROR(cuStreamDestroy(stream));
+}
+
extern "C" void mgpuStreamSynchronize(CUstream stream) {
CUDA_REPORT_IF_ERROR(cuStreamSynchronize(stream));
}
+extern "C" void mgpuStreamWaitEvent(CUstream stream, CUevent event) {
+ CUDA_REPORT_IF_ERROR(cuStreamWaitEvent(stream, event, /*flags=*/0));
+}
+
+extern "C" CUevent mgpuEventCreate() {
+ CUevent event = nullptr;
+ CUDA_REPORT_IF_ERROR(cuEventCreate(&event, CU_EVENT_DISABLE_TIMING));
+ return event;
+}
+
+extern "C" void mgpuEventDestroy(CUevent event) {
+ CUDA_REPORT_IF_ERROR(cuEventDestroy(event));
+}
+
+extern "C" void mgpuEventSynchronize(CUevent event) {
+ CUDA_REPORT_IF_ERROR(cuEventSynchronize(event));
+}
+
+extern "C" void mgpuEventRecord(CUevent event, CUstream stream) {
+ CUDA_REPORT_IF_ERROR(cuEventRecord(event, stream));
+}
+
/// Helper functions for writing mlir example code
// Allows to register byte array with the CUDA runtime. Helpful until we have
diff --git a/mlir/tools/mlir-rocm-runner/rocm-runtime-wrappers.cpp b/mlir/tools/mlir-rocm-runner/rocm-runtime-wrappers.cpp
index 9184c9fa20fa..999b80c21fbc 100644
--- a/mlir/tools/mlir-rocm-runner/rocm-runtime-wrappers.cpp
+++ b/mlir/tools/mlir-rocm-runner/rocm-runtime-wrappers.cpp
@@ -31,6 +31,15 @@
llvm::errs() << "'" << #expr << "' failed with '" << name << "'\n"; \
}(expr)
+// Static initialization of HIP context for device ordinal 0.
+static auto InitializeCtx = [] {
+ hipDevice_t device;
+ HIP_REPORT_IF_ERROR(hipDeviceGet(&device, /*ordinal=*/0));
+ hipContext_t context;
+ HIP_REPORT_IF_ERROR(hipCtxCreate(&context, /*flags=*/0, device));
+ return 0;
+}();
+
extern "C" hipModule_t mgpuModuleLoad(void *data) {
hipModule_t module = nullptr;
HIP_REPORT_IF_ERROR(hipModuleLoadData(&module, data));
@@ -58,16 +67,42 @@ extern "C" void mgpuLaunchKernel(hipFunction_t function, intptr_t gridX,
stream, params, extra));
}
-extern "C" void *mgpuStreamCreate() {
+extern "C" hipStream_t mgpuStreamCreate() {
hipStream_t stream = nullptr;
HIP_REPORT_IF_ERROR(hipStreamCreate(&stream));
return stream;
}
+extern "C" void mgpuStreamDestroy(hipStream_t stream) {
+ HIP_REPORT_IF_ERROR(hipStreamDestroy(stream));
+}
+
extern "C" void mgpuStreamSynchronize(hipStream_t stream) {
return HIP_REPORT_IF_ERROR(hipStreamSynchronize(stream));
}
+extern "C" void mgpuStreamWaitEvent(hipStream_t stream, hipEvent_t event) {
+ HIP_REPORT_IF_ERROR(hipStreamWaitEvent(stream, event, /*flags=*/0));
+}
+
+extern "C" hipEvent_t mgpuEventCreate() {
+ hipEvent_t event = nullptr;
+ HIP_REPORT_IF_ERROR(hipEventCreateWithFlags(&event, hipEventDisableTiming));
+ return event;
+}
+
+extern "C" void mgpuEventDestroy(hipEvent_t event) {
+ HIP_REPORT_IF_ERROR(hipEventDestroy(event));
+}
+
+extern "C" void mgpuEventSynchronize(hipEvent_t event) {
+ HIP_REPORT_IF_ERROR(hipEventSynchronize(event));
+}
+
+extern "C" void mgpuEventRecord(hipEvent_t event, hipStream_t stream) {
+ HIP_REPORT_IF_ERROR(hipEventRecord(event, stream));
+}
+
/// Helper functions for writing mlir example code
// Allows to register byte array with the ROCM runtime. Helpful until we have
More information about the Mlir-commits
mailing list