[Openmp-commits] [PATCH] D122764: [OpenMP][CUDA] Fix the issue that P2P memcpy doesn't work

Shilei Tian via Phabricator via Openmp-commits openmp-commits at lists.llvm.org
Wed Mar 30 15:20:25 PDT 2022


tianshilei1992 updated this revision to Diff 419278.
tianshilei1992 added a comment.

add set context before d2d in case of any failure when setting destination device


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D122764/new/

https://reviews.llvm.org/D122764

Files:
  openmp/libomptarget/plugins/cuda/src/rtl.cpp


Index: openmp/libomptarget/plugins/cuda/src/rtl.cpp
===================================================================
--- openmp/libomptarget/plugins/cuda/src/rtl.cpp
+++ openmp/libomptarget/plugins/cuda/src/rtl.cpp
@@ -1047,6 +1047,26 @@
         return memcpyDtoD(SrcPtr, DstPtr, Size, Stream);
       }
 
+      // Switch to destination context to enable peer access.
+      if (setContext(DstDevId) != OFFLOAD_SUCCESS)
+        return OFFLOAD_FAIL;
+
+      Err = cuCtxEnablePeerAccess(DeviceData[SrcDevId].Context, 0);
+      if (Err != CUDA_SUCCESS) {
+        REPORT("Error returned from cuCtxEnablePeerAccess. src = %" PRId32
+               ", dst = %" PRId32 "\n",
+               SrcDevId, DstDevId);
+        CUDA_ERR_STRING(Err);
+        // Switch back to source context to issue D2D.
+        if (setContext(SrcDevId) != OFFLOAD_SUCCESS)
+          return OFFLOAD_FAIL;
+        return memcpyDtoD(SrcPtr, DstPtr, Size, Stream);
+      }
+
+      // Switch back to source context to issue memcpy.
+      if (setContext(SrcDevId) != OFFLOAD_SUCCESS)
+        return OFFLOAD_FAIL;
+
       Err = cuMemcpyPeerAsync((CUdeviceptr)DstPtr, DeviceData[DstDevId].Context,
                               (CUdeviceptr)SrcPtr, DeviceData[SrcDevId].Context,
                               Size, Stream);
@@ -1464,7 +1484,7 @@
     return OFFLOAD_SUCCESS;
   }
 
-  int setContext(int DeviceId) {
+  int setContext(int DeviceId) const {
     assert(InitializedFlags[DeviceId] && "Device is not initialized");
 
     CUresult Err = cuCtxSetCurrent(DeviceData[DeviceId].Context);
@@ -1598,8 +1618,10 @@
   assert(DeviceRTL.isValidDeviceId(src_dev_id) && "src_dev_id is invalid");
   assert(DeviceRTL.isValidDeviceId(dst_dev_id) && "dst_dev_id is invalid");
   assert(AsyncInfo && "AsyncInfo is nullptr");
-  // NOTE: We don't need to set context for data exchange as the device contexts
-  // are passed to CUDA function directly.
+
+  if (DeviceRTL.setContext(src_dev_id) != OFFLOAD_SUCCESS)
+    return OFFLOAD_FAIL;
+
   return DeviceRTL.dataExchange(src_dev_id, src_ptr, dst_dev_id, dst_ptr, size,
                                 AsyncInfo);
 }


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D122764.419278.patch
Type: text/x-patch
Size: 2150 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/openmp-commits/attachments/20220330/2032a144/attachment-0001.bin>


More information about the Openmp-commits mailing list