[llvm-branch-commits] [openmp] 95f0d1e - [libomptarget] Compile with older cuda, revert D95274
Jon Chesterfield via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Jan 25 08:17:16 PST 2021
Author: Jon Chesterfield
Date: 2021-01-25T16:12:56Z
New Revision: 95f0d1edafe3e52a4057768f8cde5d55faf39d16
URL: https://github.com/llvm/llvm-project/commit/95f0d1edafe3e52a4057768f8cde5d55faf39d16
DIFF: https://github.com/llvm/llvm-project/commit/95f0d1edafe3e52a4057768f8cde5d55faf39d16.diff
LOG: [libomptarget] Compile with older cuda, revert D95274
[libomptarget] Compile with older cuda, revert D95274
Fixes regression reported in comments of D95274.
Reviewed By: jdoerfert
Differential Revision: https://reviews.llvm.org/D95367
Added:
Modified:
openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.cpp
openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h
openmp/libomptarget/plugins/cuda/src/rtl.cpp
Removed:
################################################################################
diff --git a/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.cpp b/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.cpp
index ad67fe95c77e..cc7bc42412f6 100644
--- a/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.cpp
+++ b/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.cpp
@@ -28,26 +28,26 @@ DLWRAP(cuFuncGetAttribute, 3);
DLWRAP(cuGetErrorString, 2);
DLWRAP(cuLaunchKernel, 11);
-DLWRAP(cuMemAlloc_v2, 2);
-DLWRAP(cuMemcpyDtoDAsync_v2, 4);
+DLWRAP(cuMemAlloc, 2);
+DLWRAP(cuMemcpyDtoDAsync, 4);
-DLWRAP(cuMemcpyDtoH_v2, 3);
-DLWRAP(cuMemcpyDtoHAsync_v2, 4);
-DLWRAP(cuMemcpyHtoD_v2, 3);
-DLWRAP(cuMemcpyHtoDAsync_v2, 4);
+DLWRAP(cuMemcpyDtoH, 3);
+DLWRAP(cuMemcpyDtoHAsync, 4);
+DLWRAP(cuMemcpyHtoD, 3);
+DLWRAP(cuMemcpyHtoDAsync, 4);
-DLWRAP(cuMemFree_v2, 1);
+DLWRAP(cuMemFree, 1);
DLWRAP(cuModuleGetFunction, 3);
-DLWRAP(cuModuleGetGlobal_v2, 4);
+DLWRAP(cuModuleGetGlobal, 4);
DLWRAP(cuModuleUnload, 1);
DLWRAP(cuStreamCreate, 2);
-DLWRAP(cuStreamDestroy_v2, 1);
+DLWRAP(cuStreamDestroy, 1);
DLWRAP(cuStreamSynchronize, 1);
DLWRAP(cuCtxSetCurrent, 1);
-DLWRAP(cuDevicePrimaryCtxRelease_v2, 1);
+DLWRAP(cuDevicePrimaryCtxRelease, 1);
DLWRAP(cuDevicePrimaryCtxGetState, 3);
-DLWRAP(cuDevicePrimaryCtxSetFlags_v2, 2);
+DLWRAP(cuDevicePrimaryCtxSetFlags, 2);
DLWRAP(cuDevicePrimaryCtxRetain, 2);
DLWRAP(cuModuleLoadDataEx, 5);
diff --git a/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h b/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h
index dd579a1f7490..832c26965144 100644
--- a/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h
+++ b/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h
@@ -48,6 +48,18 @@ typedef enum CUctx_flags_enum {
CU_CTX_SCHED_MASK = 0x07,
} CUctx_flags;
+#define cuMemFree cuMemFree_v2
+#define cuMemAlloc cuMemAlloc_v2
+#define cuMemcpyDtoH cuMemcpyDtoH_v2
+#define cuMemcpyHtoD cuMemcpyHtoD_v2
+#define cuStreamDestroy cuStreamDestroy_v2
+#define cuModuleGetGlobal cuModuleGetGlobal_v2
+#define cuMemcpyDtoHAsync cuMemcpyDtoHAsync_v2
+#define cuMemcpyDtoDAsync cuMemcpyDtoDAsync_v2
+#define cuMemcpyHtoDAsync cuMemcpyHtoDAsync_v2
+#define cuDevicePrimaryCtxRelease cuDevicePrimaryCtxRelease_v2
+#define cuDevicePrimaryCtxSetFlags cuDevicePrimaryCtxSetFlags_v2
+
CUresult cuCtxGetDevice(CUdevice *);
CUresult cuDeviceGet(CUdevice *, int);
CUresult cuDeviceGetAttribute(int *, CUdevice_attribute, CUdevice);
@@ -60,26 +72,26 @@ CUresult cuLaunchKernel(CUfunction, unsigned, unsigned, unsigned, unsigned,
unsigned, unsigned, unsigned, CUstream, void **,
void **);
-CUresult cuMemAlloc_v2(CUdeviceptr *, size_t);
-CUresult cuMemcpyDtoDAsync_v2(CUdeviceptr, CUdeviceptr, size_t, CUstream);
+CUresult cuMemAlloc(CUdeviceptr *, size_t);
+CUresult cuMemcpyDtoDAsync(CUdeviceptr, CUdeviceptr, size_t, CUstream);
-CUresult cuMemcpyDtoH_v2(void *, CUdeviceptr, size_t);
-CUresult cuMemcpyDtoHAsync_v2(void *, CUdeviceptr, size_t, CUstream);
-CUresult cuMemcpyHtoD_v2(CUdeviceptr, const void *, size_t);
-CUresult cuMemcpyHtoDAsync_v2(CUdeviceptr, const void *, size_t, CUstream);
+CUresult cuMemcpyDtoH(void *, CUdeviceptr, size_t);
+CUresult cuMemcpyDtoHAsync(void *, CUdeviceptr, size_t, CUstream);
+CUresult cuMemcpyHtoD(CUdeviceptr, const void *, size_t);
+CUresult cuMemcpyHtoDAsync(CUdeviceptr, const void *, size_t, CUstream);
-CUresult cuMemFree_v2(CUdeviceptr);
+CUresult cuMemFree(CUdeviceptr);
CUresult cuModuleGetFunction(CUfunction *, CUmodule, const char *);
-CUresult cuModuleGetGlobal_v2(CUdeviceptr *, size_t *, CUmodule, const char *);
+CUresult cuModuleGetGlobal(CUdeviceptr *, size_t *, CUmodule, const char *);
CUresult cuModuleUnload(CUmodule);
CUresult cuStreamCreate(CUstream *, unsigned);
-CUresult cuStreamDestroy_v2(CUstream);
+CUresult cuStreamDestroy(CUstream);
CUresult cuStreamSynchronize(CUstream);
CUresult cuCtxSetCurrent(CUcontext);
-CUresult cuDevicePrimaryCtxRelease_v2(CUdevice);
+CUresult cuDevicePrimaryCtxRelease(CUdevice);
CUresult cuDevicePrimaryCtxGetState(CUdevice, unsigned *, int *);
-CUresult cuDevicePrimaryCtxSetFlags_v2(CUdevice, unsigned);
+CUresult cuDevicePrimaryCtxSetFlags(CUdevice, unsigned);
CUresult cuDevicePrimaryCtxRetain(CUcontext *, CUdevice);
CUresult cuModuleLoadDataEx(CUmodule *, const void *, unsigned, void *,
void **);
diff --git a/openmp/libomptarget/plugins/cuda/src/rtl.cpp b/openmp/libomptarget/plugins/cuda/src/rtl.cpp
index f83c9df920aa..e4ac1e0820e6 100644
--- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp
@@ -110,8 +110,8 @@ bool checkResult(CUresult Err, const char *ErrMsg) {
int memcpyDtoD(const void *SrcPtr, void *DstPtr, int64_t Size,
CUstream Stream) {
- CUresult Err = cuMemcpyDtoDAsync_v2((CUdeviceptr)DstPtr, (CUdeviceptr)SrcPtr,
- Size, Stream);
+ CUresult Err =
+ cuMemcpyDtoDAsync((CUdeviceptr)DstPtr, (CUdeviceptr)SrcPtr, Size, Stream);
if (Err != CUDA_SUCCESS) {
REPORT("Error when copying data from device to device. Pointers: src "
@@ -207,8 +207,8 @@ class StreamManagerTy {
for (CUstream &S : StreamPool[I]) {
if (S)
- checkResult(cuStreamDestroy_v2(S),
- "Error returned from cuStreamDestroy_v2\n");
+ checkResult(cuStreamDestroy(S),
+ "Error returned from cuStreamDestroy\n");
}
}
}
@@ -311,8 +311,8 @@ class DeviceRTLTy {
return nullptr;
CUdeviceptr DevicePtr;
- Err = cuMemAlloc_v2(&DevicePtr, Size);
- if (!checkResult(Err, "Error returned from cuMemAlloc_v2\n"))
+ Err = cuMemAlloc(&DevicePtr, Size);
+ if (!checkResult(Err, "Error returned from cuMemAlloc\n"))
return nullptr;
return (void *)DevicePtr;
@@ -323,8 +323,8 @@ class DeviceRTLTy {
if (!checkResult(Err, "Error returned from cuCtxSetCurrent\n"))
return OFFLOAD_FAIL;
- Err = cuMemFree_v2((CUdeviceptr)TgtPtr);
- if (!checkResult(Err, "Error returned from cuMemFree_v2\n"))
+ Err = cuMemFree((CUdeviceptr)TgtPtr);
+ if (!checkResult(Err, "Error returned from cuMemFree\n"))
return OFFLOAD_FAIL;
return OFFLOAD_SUCCESS;
@@ -466,8 +466,8 @@ class DeviceRTLTy {
CUdevice Device;
checkResult(cuCtxGetDevice(&Device),
"Error returned from cuCtxGetDevice\n");
- checkResult(cuDevicePrimaryCtxRelease_v2(Device),
- "Error returned from cuDevicePrimaryCtxRelease_v2\n");
+ checkResult(cuDevicePrimaryCtxRelease(Device),
+ "Error returned from cuDevicePrimaryCtxRelease\n");
}
}
}
@@ -506,9 +506,8 @@ class DeviceRTLTy {
} else {
DP("The primary context is inactive, set its flags to "
"CU_CTX_SCHED_BLOCKING_SYNC\n");
- Err = cuDevicePrimaryCtxSetFlags_v2(Device, CU_CTX_SCHED_BLOCKING_SYNC);
- if (!checkResult(Err,
- "Error returned from cuDevicePrimaryCtxSetFlags_v2\n"))
+ Err = cuDevicePrimaryCtxSetFlags(Device, CU_CTX_SCHED_BLOCKING_SYNC);
+ if (!checkResult(Err, "Error returned from cuDevicePrimaryCtxSetFlags\n"))
return OFFLOAD_FAIL;
}
@@ -657,7 +656,7 @@ class DeviceRTLTy {
__tgt_offload_entry Entry = *E;
CUdeviceptr CUPtr;
size_t CUSize;
- Err = cuModuleGetGlobal_v2(&CUPtr, &CUSize, Module, E->name);
+ Err = cuModuleGetGlobal(&CUPtr, &CUSize, Module, E->name);
// We keep this style here because we need the name
if (Err != CUDA_SUCCESS) {
REPORT("Loading global '%s' Failed\n", E->name);
@@ -689,7 +688,7 @@ class DeviceRTLTy {
// If unified memory is present any target link or to variables
// can access host addresses directly. There is no longer a
// need for device copies.
- cuMemcpyHtoD_v2(CUPtr, E->addr, sizeof(void *));
+ cuMemcpyHtoD(CUPtr, E->addr, sizeof(void *));
DP("Copy linked variable host address (" DPxMOD
") to device address (" DPxMOD ")\n",
DPxPTR(*((void **)E->addr)), DPxPTR(CUPtr));
@@ -720,7 +719,7 @@ class DeviceRTLTy {
CUdeviceptr ExecModePtr;
size_t CUSize;
- Err = cuModuleGetGlobal_v2(&ExecModePtr, &CUSize, Module, ExecModeName);
+ Err = cuModuleGetGlobal(&ExecModePtr, &CUSize, Module, ExecModeName);
if (Err == CUDA_SUCCESS) {
if (CUSize != sizeof(int8_t)) {
DP("Loading global exec_mode '%s' - size mismatch (%zd != %zd)\n",
@@ -728,7 +727,7 @@ class DeviceRTLTy {
return nullptr;
}
- Err = cuMemcpyDtoH_v2(&ExecModeVal, ExecModePtr, CUSize);
+ Err = cuMemcpyDtoH(&ExecModeVal, ExecModePtr, CUSize);
if (Err != CUDA_SUCCESS) {
REPORT("Error when copying data from device to host. Pointers: "
"host = " DPxMOD ", device = " DPxMOD ", size = %zd\n",
@@ -769,7 +768,7 @@ class DeviceRTLTy {
CUdeviceptr DeviceEnvPtr;
size_t CUSize;
- Err = cuModuleGetGlobal_v2(&DeviceEnvPtr, &CUSize, Module, DeviceEnvName);
+ Err = cuModuleGetGlobal(&DeviceEnvPtr, &CUSize, Module, DeviceEnvName);
if (Err == CUDA_SUCCESS) {
if (CUSize != sizeof(DeviceEnv)) {
REPORT(
@@ -779,7 +778,7 @@ class DeviceRTLTy {
return nullptr;
}
- Err = cuMemcpyHtoD_v2(DeviceEnvPtr, &DeviceEnv, CUSize);
+ Err = cuMemcpyHtoD(DeviceEnvPtr, &DeviceEnv, CUSize);
if (Err != CUDA_SUCCESS) {
REPORT("Error when copying data from host to device. Pointers: "
"host = " DPxMOD ", device = " DPxMOD ", size = %zu\n",
@@ -817,7 +816,7 @@ class DeviceRTLTy {
CUstream Stream = getStream(DeviceId, AsyncInfoPtr);
- Err = cuMemcpyHtoDAsync_v2((CUdeviceptr)TgtPtr, HstPtr, Size, Stream);
+ Err = cuMemcpyHtoDAsync((CUdeviceptr)TgtPtr, HstPtr, Size, Stream);
if (Err != CUDA_SUCCESS) {
REPORT("Error when copying data from host to device. Pointers: host "
"= " DPxMOD ", device = " DPxMOD ", size = %" PRId64 "\n",
@@ -839,7 +838,7 @@ class DeviceRTLTy {
CUstream Stream = getStream(DeviceId, AsyncInfoPtr);
- Err = cuMemcpyDtoHAsync_v2(HstPtr, (CUdeviceptr)TgtPtr, Size, Stream);
+ Err = cuMemcpyDtoHAsync(HstPtr, (CUdeviceptr)TgtPtr, Size, Stream);
if (Err != CUDA_SUCCESS) {
REPORT("Error when copying data from device to host. Pointers: host "
"= " DPxMOD ", device = " DPxMOD ", size = %" PRId64 "\n",
More information about the llvm-branch-commits
mailing list