[Openmp-commits] [PATCH] D122764: [OpenMP][CUDA] Fix the issue that P2P memcpy doesn't work

Shilei Tian via Phabricator via Openmp-commits openmp-commits at lists.llvm.org
Wed Mar 30 14:56:26 PDT 2022


tianshilei1992 created this revision.
Herald added subscribers: carlosgalvezp, guansong, yaxunl.
Herald added a project: All.
tianshilei1992 requested review of this revision.
Herald added a reviewer: jdoerfert.
Herald added subscribers: openmp-commits, sstefan1.
Herald added a project: OpenMP.

This patch fixes the issue that P2p memcpy doesn't work. The root cause
is enabling peer access has to be bi-directional, which we didn't do that before.
I'm not sure if it is something new in latest CUDA.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D122764

Files:
  openmp/libomptarget/plugins/cuda/src/rtl.cpp


Index: openmp/libomptarget/plugins/cuda/src/rtl.cpp
===================================================================
--- openmp/libomptarget/plugins/cuda/src/rtl.cpp
+++ openmp/libomptarget/plugins/cuda/src/rtl.cpp
@@ -1047,6 +1047,23 @@
         return memcpyDtoD(SrcPtr, DstPtr, Size, Stream);
       }
 
+      // Switch to destination context to enable peer access.
+      if (setContext(DstDevId) != OFFLOAD_SUCCESS)
+        return OFFLOAD_FAIL;
+
+      Err = cuCtxEnablePeerAccess(DeviceData[SrcDevId].Context, 0);
+      if (Err != CUDA_SUCCESS) {
+        REPORT("Error returned from cuCtxEnablePeerAccess. src = %" PRId32
+               ", dst = %" PRId32 "\n",
+               SrcDevId, DstDevId);
+        CUDA_ERR_STRING(Err);
+        return memcpyDtoD(SrcPtr, DstPtr, Size, Stream);
+      }
+
+      // Switch back to source context to issue memcpy.
+      if (setContext(SrcDevId) != OFFLOAD_SUCCESS)
+        return OFFLOAD_FAIL;
+
       Err = cuMemcpyPeerAsync((CUdeviceptr)DstPtr, DeviceData[DstDevId].Context,
                               (CUdeviceptr)SrcPtr, DeviceData[SrcDevId].Context,
                               Size, Stream);
@@ -1464,7 +1481,7 @@
     return OFFLOAD_SUCCESS;
   }
 
-  int setContext(int DeviceId) {
+  int setContext(int DeviceId) const {
     assert(InitializedFlags[DeviceId] && "Device is not initialized");
 
     CUresult Err = cuCtxSetCurrent(DeviceData[DeviceId].Context);
@@ -1597,9 +1614,11 @@
                                       __tgt_async_info *AsyncInfo) {
   assert(DeviceRTL.isValidDeviceId(src_dev_id) && "src_dev_id is invalid");
   assert(DeviceRTL.isValidDeviceId(dst_dev_id) && "dst_dev_id is invalid");
+
   assert(AsyncInfo && "AsyncInfo is nullptr");
-  // NOTE: We don't need to set context for data exchange as the device contexts
-  // are passed to CUDA function directly.
+  if (DeviceRTL.setContext(src_dev_id) != OFFLOAD_SUCCESS)
+    return OFFLOAD_FAIL;
+
   return DeviceRTL.dataExchange(src_dev_id, src_ptr, dst_dev_id, dst_ptr, size,
                                 AsyncInfo);
 }


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D122764.419272.patch
Type: text/x-patch
Size: 2078 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/openmp-commits/attachments/20220330/5637942a/attachment.bin>


More information about the Openmp-commits mailing list