[Mlir-commits] [mlir] b4117fe - Fix CUDA runtime wrapper for GPU mem alloc/free to async
Uday Bondhugula
llvmlistbot at llvm.org
Mon Apr 11 21:04:15 PDT 2022
Author: Uday Bondhugula
Date: 2022-04-12T09:04:02+05:30
New Revision: b4117fede20b8c649320ad37364ae208baa0d0e7
URL: https://github.com/llvm/llvm-project/commit/b4117fede20b8c649320ad37364ae208baa0d0e7
DIFF: https://github.com/llvm/llvm-project/commit/b4117fede20b8c649320ad37364ae208baa0d0e7.diff
LOG: Fix CUDA runtime wrapper for GPU mem alloc/free to async
Switch CUDA runtime wrapper for GPU mem alloc/free to async. The
semantics of the GPU dialect ops (gpu.alloc/dealloc) and the wrappers it
lowered to (gpu-to-llvm) was for the async versions -- however, this was
being incorrectly mapped to cuMemAlloc/cuMemFree instead of
cuMemAllocAsync/cuMemFreeAsync.
Reviewed By: csigg
Differential Revision: https://reviews.llvm.org/D123482
Added:
Modified:
mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp
Removed:
################################################################################
diff --git a/mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp b/mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp
index 44ed5b0cd2057..18b6c589cb91f 100644
--- a/mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp
+++ b/mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp
@@ -18,6 +18,12 @@
#include "cuda.h"
+// We need to know the CUDA version to determine how to map some of the runtime
+// calls below.
+#if !defined(CUDA_VERSION)
+#error "cuda.h did not define CUDA_VERSION"
+#endif
+
#ifdef _WIN32
#define MLIR_CUDA_WRAPPERS_EXPORT __declspec(dllexport)
#else
@@ -134,15 +140,28 @@ extern MLIR_CUDA_WRAPPERS_EXPORT "C" void mgpuEventRecord(CUevent event,
CUDA_REPORT_IF_ERROR(cuEventRecord(event, stream));
}
-extern "C" void *mgpuMemAlloc(uint64_t sizeBytes, CUstream /*stream*/) {
+extern "C" void *mgpuMemAlloc(uint64_t sizeBytes, CUstream stream) {
ScopedContext scopedContext;
CUdeviceptr ptr;
+#if CUDA_VERSION >= 11020
+ // Use the async version that was available since CUDA 11.2.
+ CUDA_REPORT_IF_ERROR(cuMemAllocAsync(&ptr, sizeBytes, stream));
+#else
CUDA_REPORT_IF_ERROR(cuMemAlloc(&ptr, sizeBytes));
+ (void)stream;
+#endif
return reinterpret_cast<void *>(ptr);
}
-extern "C" void mgpuMemFree(void *ptr, CUstream /*stream*/) {
+extern "C" void mgpuMemFree(void *ptr, CUstream stream) {
+#if CUDA_VERSION >= 11020
+ // Use the async version that was available since CUDA 11.2.
+ CUDA_REPORT_IF_ERROR(
+ cuMemFreeAsync(reinterpret_cast<CUdeviceptr>(ptr), stream));
+#else
CUDA_REPORT_IF_ERROR(cuMemFree(reinterpret_cast<CUdeviceptr>(ptr)));
+ (void)stream;
+#endif
}
extern "C" void mgpuMemcpy(void *dst, void *src, size_t sizeBytes,
More information about the Mlir-commits
mailing list