[Openmp-commits] [PATCH] D122764: [OpenMP][CUDA] Fix the issue that P2P memcpy doesn't work
Shilei Tian via Phabricator via Openmp-commits
openmp-commits at lists.llvm.org
Wed Mar 30 15:20:25 PDT 2022
tianshilei1992 updated this revision to Diff 419278.
tianshilei1992 added a comment.
add set context before d2d in case of any failure when setting destination device
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D122764/new/
https://reviews.llvm.org/D122764
Files:
openmp/libomptarget/plugins/cuda/src/rtl.cpp
Index: openmp/libomptarget/plugins/cuda/src/rtl.cpp
===================================================================
--- openmp/libomptarget/plugins/cuda/src/rtl.cpp
+++ openmp/libomptarget/plugins/cuda/src/rtl.cpp
@@ -1047,6 +1047,26 @@
return memcpyDtoD(SrcPtr, DstPtr, Size, Stream);
}
+ // Switch to destination context to enable peer access.
+ if (setContext(DstDevId) != OFFLOAD_SUCCESS)
+ return OFFLOAD_FAIL;
+
+ Err = cuCtxEnablePeerAccess(DeviceData[SrcDevId].Context, 0);
+ if (Err != CUDA_SUCCESS) {
+ REPORT("Error returned from cuCtxEnablePeerAccess. src = %" PRId32
+ ", dst = %" PRId32 "\n",
+ SrcDevId, DstDevId);
+ CUDA_ERR_STRING(Err);
+ // Switch back to source context to issue D2D.
+ if (setContext(SrcDevId) != OFFLOAD_SUCCESS)
+ return OFFLOAD_FAIL;
+ return memcpyDtoD(SrcPtr, DstPtr, Size, Stream);
+ }
+
+ // Switch back to source context to issue memcpy.
+ if (setContext(SrcDevId) != OFFLOAD_SUCCESS)
+ return OFFLOAD_FAIL;
+
Err = cuMemcpyPeerAsync((CUdeviceptr)DstPtr, DeviceData[DstDevId].Context,
(CUdeviceptr)SrcPtr, DeviceData[SrcDevId].Context,
Size, Stream);
@@ -1464,7 +1484,7 @@
return OFFLOAD_SUCCESS;
}
- int setContext(int DeviceId) {
+ int setContext(int DeviceId) const {
assert(InitializedFlags[DeviceId] && "Device is not initialized");
CUresult Err = cuCtxSetCurrent(DeviceData[DeviceId].Context);
@@ -1598,8 +1618,10 @@
assert(DeviceRTL.isValidDeviceId(src_dev_id) && "src_dev_id is invalid");
assert(DeviceRTL.isValidDeviceId(dst_dev_id) && "dst_dev_id is invalid");
assert(AsyncInfo && "AsyncInfo is nullptr");
- // NOTE: We don't need to set context for data exchange as the device contexts
- // are passed to CUDA function directly.
+
+ if (DeviceRTL.setContext(src_dev_id) != OFFLOAD_SUCCESS)
+ return OFFLOAD_FAIL;
+
return DeviceRTL.dataExchange(src_dev_id, src_ptr, dst_dev_id, dst_ptr, size,
AsyncInfo);
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D122764.419278.patch
Type: text/x-patch
Size: 2150 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/openmp-commits/attachments/20220330/2032a144/attachment-0001.bin>
More information about the Openmp-commits
mailing list