[Openmp-commits] [openmp] 78b0630 - [libomptarget][cuda] Call v2 functions explicitly

Jon Chesterfield via Openmp-commits openmp-commits at lists.llvm.org
Sat Jan 23 12:33:29 PST 2021


Author: Jon Chesterfield
Date: 2021-01-23T20:33:13Z
New Revision: 78b0630b72a9742d62b07cef912b72f1743bfae9

URL: https://github.com/llvm/llvm-project/commit/78b0630b72a9742d62b07cef912b72f1743bfae9
DIFF: https://github.com/llvm/llvm-project/commit/78b0630b72a9742d62b07cef912b72f1743bfae9.diff

LOG: [libomptarget][cuda] Call v2 functions explicitly

[libomptarget][cuda] Call v2 functions explicitly

rtl.cpp calls functions like cuMemFree that are replaced by a macro
in cuda.h with cuMemFree_v2. This patch changes the source to use
the v2 names consistently.

See also D95104, D95155 for the idea. Alternatives are to use a mixture,
e.g. call the macro names and explictly dlopen the _v2 names, or to keep
the current status where the symbols are replaced by macros in both files

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D95274

Added: 
    

Modified: 
    openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h
    openmp/libomptarget/plugins/cuda/src/rtl.cpp

Removed: 
    


################################################################################
diff  --git a/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h b/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h
index 832c26965144..dd579a1f7490 100644
--- a/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h
+++ b/openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h
@@ -48,18 +48,6 @@ typedef enum CUctx_flags_enum {
   CU_CTX_SCHED_MASK = 0x07,
 } CUctx_flags;
 
-#define cuMemFree cuMemFree_v2
-#define cuMemAlloc cuMemAlloc_v2
-#define cuMemcpyDtoH cuMemcpyDtoH_v2
-#define cuMemcpyHtoD cuMemcpyHtoD_v2
-#define cuStreamDestroy cuStreamDestroy_v2
-#define cuModuleGetGlobal cuModuleGetGlobal_v2
-#define cuMemcpyDtoHAsync cuMemcpyDtoHAsync_v2
-#define cuMemcpyDtoDAsync cuMemcpyDtoDAsync_v2
-#define cuMemcpyHtoDAsync cuMemcpyHtoDAsync_v2
-#define cuDevicePrimaryCtxRelease cuDevicePrimaryCtxRelease_v2
-#define cuDevicePrimaryCtxSetFlags cuDevicePrimaryCtxSetFlags_v2
-
 CUresult cuCtxGetDevice(CUdevice *);
 CUresult cuDeviceGet(CUdevice *, int);
 CUresult cuDeviceGetAttribute(int *, CUdevice_attribute, CUdevice);
@@ -72,26 +60,26 @@ CUresult cuLaunchKernel(CUfunction, unsigned, unsigned, unsigned, unsigned,
                         unsigned, unsigned, unsigned, CUstream, void **,
                         void **);
 
-CUresult cuMemAlloc(CUdeviceptr *, size_t);
-CUresult cuMemcpyDtoDAsync(CUdeviceptr, CUdeviceptr, size_t, CUstream);
+CUresult cuMemAlloc_v2(CUdeviceptr *, size_t);
+CUresult cuMemcpyDtoDAsync_v2(CUdeviceptr, CUdeviceptr, size_t, CUstream);
 
-CUresult cuMemcpyDtoH(void *, CUdeviceptr, size_t);
-CUresult cuMemcpyDtoHAsync(void *, CUdeviceptr, size_t, CUstream);
-CUresult cuMemcpyHtoD(CUdeviceptr, const void *, size_t);
-CUresult cuMemcpyHtoDAsync(CUdeviceptr, const void *, size_t, CUstream);
+CUresult cuMemcpyDtoH_v2(void *, CUdeviceptr, size_t);
+CUresult cuMemcpyDtoHAsync_v2(void *, CUdeviceptr, size_t, CUstream);
+CUresult cuMemcpyHtoD_v2(CUdeviceptr, const void *, size_t);
+CUresult cuMemcpyHtoDAsync_v2(CUdeviceptr, const void *, size_t, CUstream);
 
-CUresult cuMemFree(CUdeviceptr);
+CUresult cuMemFree_v2(CUdeviceptr);
 CUresult cuModuleGetFunction(CUfunction *, CUmodule, const char *);
-CUresult cuModuleGetGlobal(CUdeviceptr *, size_t *, CUmodule, const char *);
+CUresult cuModuleGetGlobal_v2(CUdeviceptr *, size_t *, CUmodule, const char *);
 
 CUresult cuModuleUnload(CUmodule);
 CUresult cuStreamCreate(CUstream *, unsigned);
-CUresult cuStreamDestroy(CUstream);
+CUresult cuStreamDestroy_v2(CUstream);
 CUresult cuStreamSynchronize(CUstream);
 CUresult cuCtxSetCurrent(CUcontext);
-CUresult cuDevicePrimaryCtxRelease(CUdevice);
+CUresult cuDevicePrimaryCtxRelease_v2(CUdevice);
 CUresult cuDevicePrimaryCtxGetState(CUdevice, unsigned *, int *);
-CUresult cuDevicePrimaryCtxSetFlags(CUdevice, unsigned);
+CUresult cuDevicePrimaryCtxSetFlags_v2(CUdevice, unsigned);
 CUresult cuDevicePrimaryCtxRetain(CUcontext *, CUdevice);
 CUresult cuModuleLoadDataEx(CUmodule *, const void *, unsigned, void *,
                             void **);

diff  --git a/openmp/libomptarget/plugins/cuda/src/rtl.cpp b/openmp/libomptarget/plugins/cuda/src/rtl.cpp
index e4ac1e0820e6..f83c9df920aa 100644
--- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp
@@ -110,8 +110,8 @@ bool checkResult(CUresult Err, const char *ErrMsg) {
 
 int memcpyDtoD(const void *SrcPtr, void *DstPtr, int64_t Size,
                CUstream Stream) {
-  CUresult Err =
-      cuMemcpyDtoDAsync((CUdeviceptr)DstPtr, (CUdeviceptr)SrcPtr, Size, Stream);
+  CUresult Err = cuMemcpyDtoDAsync_v2((CUdeviceptr)DstPtr, (CUdeviceptr)SrcPtr,
+                                      Size, Stream);
 
   if (Err != CUDA_SUCCESS) {
     REPORT("Error when copying data from device to device. Pointers: src "
@@ -207,8 +207,8 @@ class StreamManagerTy {
 
       for (CUstream &S : StreamPool[I]) {
         if (S)
-          checkResult(cuStreamDestroy(S),
-                      "Error returned from cuStreamDestroy\n");
+          checkResult(cuStreamDestroy_v2(S),
+                      "Error returned from cuStreamDestroy_v2\n");
       }
     }
   }
@@ -311,8 +311,8 @@ class DeviceRTLTy {
         return nullptr;
 
       CUdeviceptr DevicePtr;
-      Err = cuMemAlloc(&DevicePtr, Size);
-      if (!checkResult(Err, "Error returned from cuMemAlloc\n"))
+      Err = cuMemAlloc_v2(&DevicePtr, Size);
+      if (!checkResult(Err, "Error returned from cuMemAlloc_v2\n"))
         return nullptr;
 
       return (void *)DevicePtr;
@@ -323,8 +323,8 @@ class DeviceRTLTy {
       if (!checkResult(Err, "Error returned from cuCtxSetCurrent\n"))
         return OFFLOAD_FAIL;
 
-      Err = cuMemFree((CUdeviceptr)TgtPtr);
-      if (!checkResult(Err, "Error returned from cuMemFree\n"))
+      Err = cuMemFree_v2((CUdeviceptr)TgtPtr);
+      if (!checkResult(Err, "Error returned from cuMemFree_v2\n"))
         return OFFLOAD_FAIL;
 
       return OFFLOAD_SUCCESS;
@@ -466,8 +466,8 @@ class DeviceRTLTy {
         CUdevice Device;
         checkResult(cuCtxGetDevice(&Device),
                     "Error returned from cuCtxGetDevice\n");
-        checkResult(cuDevicePrimaryCtxRelease(Device),
-                    "Error returned from cuDevicePrimaryCtxRelease\n");
+        checkResult(cuDevicePrimaryCtxRelease_v2(Device),
+                    "Error returned from cuDevicePrimaryCtxRelease_v2\n");
       }
     }
   }
@@ -506,8 +506,9 @@ class DeviceRTLTy {
     } else {
       DP("The primary context is inactive, set its flags to "
          "CU_CTX_SCHED_BLOCKING_SYNC\n");
-      Err = cuDevicePrimaryCtxSetFlags(Device, CU_CTX_SCHED_BLOCKING_SYNC);
-      if (!checkResult(Err, "Error returned from cuDevicePrimaryCtxSetFlags\n"))
+      Err = cuDevicePrimaryCtxSetFlags_v2(Device, CU_CTX_SCHED_BLOCKING_SYNC);
+      if (!checkResult(Err,
+                       "Error returned from cuDevicePrimaryCtxSetFlags_v2\n"))
         return OFFLOAD_FAIL;
     }
 
@@ -656,7 +657,7 @@ class DeviceRTLTy {
         __tgt_offload_entry Entry = *E;
         CUdeviceptr CUPtr;
         size_t CUSize;
-        Err = cuModuleGetGlobal(&CUPtr, &CUSize, Module, E->name);
+        Err = cuModuleGetGlobal_v2(&CUPtr, &CUSize, Module, E->name);
         // We keep this style here because we need the name
         if (Err != CUDA_SUCCESS) {
           REPORT("Loading global '%s' Failed\n", E->name);
@@ -688,7 +689,7 @@ class DeviceRTLTy {
           // If unified memory is present any target link or to variables
           // can access host addresses directly. There is no longer a
           // need for device copies.
-          cuMemcpyHtoD(CUPtr, E->addr, sizeof(void *));
+          cuMemcpyHtoD_v2(CUPtr, E->addr, sizeof(void *));
           DP("Copy linked variable host address (" DPxMOD
              ") to device address (" DPxMOD ")\n",
              DPxPTR(*((void **)E->addr)), DPxPTR(CUPtr));
@@ -719,7 +720,7 @@ class DeviceRTLTy {
 
       CUdeviceptr ExecModePtr;
       size_t CUSize;
-      Err = cuModuleGetGlobal(&ExecModePtr, &CUSize, Module, ExecModeName);
+      Err = cuModuleGetGlobal_v2(&ExecModePtr, &CUSize, Module, ExecModeName);
       if (Err == CUDA_SUCCESS) {
         if (CUSize != sizeof(int8_t)) {
           DP("Loading global exec_mode '%s' - size mismatch (%zd != %zd)\n",
@@ -727,7 +728,7 @@ class DeviceRTLTy {
           return nullptr;
         }
 
-        Err = cuMemcpyDtoH(&ExecModeVal, ExecModePtr, CUSize);
+        Err = cuMemcpyDtoH_v2(&ExecModeVal, ExecModePtr, CUSize);
         if (Err != CUDA_SUCCESS) {
           REPORT("Error when copying data from device to host. Pointers: "
                  "host = " DPxMOD ", device = " DPxMOD ", size = %zd\n",
@@ -768,7 +769,7 @@ class DeviceRTLTy {
       CUdeviceptr DeviceEnvPtr;
       size_t CUSize;
 
-      Err = cuModuleGetGlobal(&DeviceEnvPtr, &CUSize, Module, DeviceEnvName);
+      Err = cuModuleGetGlobal_v2(&DeviceEnvPtr, &CUSize, Module, DeviceEnvName);
       if (Err == CUDA_SUCCESS) {
         if (CUSize != sizeof(DeviceEnv)) {
           REPORT(
@@ -778,7 +779,7 @@ class DeviceRTLTy {
           return nullptr;
         }
 
-        Err = cuMemcpyHtoD(DeviceEnvPtr, &DeviceEnv, CUSize);
+        Err = cuMemcpyHtoD_v2(DeviceEnvPtr, &DeviceEnv, CUSize);
         if (Err != CUDA_SUCCESS) {
           REPORT("Error when copying data from host to device. Pointers: "
                  "host = " DPxMOD ", device = " DPxMOD ", size = %zu\n",
@@ -816,7 +817,7 @@ class DeviceRTLTy {
 
     CUstream Stream = getStream(DeviceId, AsyncInfoPtr);
 
-    Err = cuMemcpyHtoDAsync((CUdeviceptr)TgtPtr, HstPtr, Size, Stream);
+    Err = cuMemcpyHtoDAsync_v2((CUdeviceptr)TgtPtr, HstPtr, Size, Stream);
     if (Err != CUDA_SUCCESS) {
       REPORT("Error when copying data from host to device. Pointers: host "
              "= " DPxMOD ", device = " DPxMOD ", size = %" PRId64 "\n",
@@ -838,7 +839,7 @@ class DeviceRTLTy {
 
     CUstream Stream = getStream(DeviceId, AsyncInfoPtr);
 
-    Err = cuMemcpyDtoHAsync(HstPtr, (CUdeviceptr)TgtPtr, Size, Stream);
+    Err = cuMemcpyDtoHAsync_v2(HstPtr, (CUdeviceptr)TgtPtr, Size, Stream);
     if (Err != CUDA_SUCCESS) {
       REPORT("Error when copying data from device to host. Pointers: host "
              "= " DPxMOD ", device = " DPxMOD ", size = %" PRId64 "\n",


        


More information about the Openmp-commits mailing list