[PATCH] D70010: [OpenMP][Offloading] Replaced default stream with an actual per-device unblocking stream in NVPTX implementation
Shilei Tian via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 8 07:58:44 PST 2019
tianshilei1992 created this revision.
tianshilei1992 added reviewers: jdoerfert, hfinkel.
tianshilei1992 added a project: OpenMP.
Herald added subscribers: llvm-commits, guansong.
Herald added a project: LLVM.
tianshilei1992 retitled this revision from "[OpenMP][Offlloading] Replaced default stream with an actual per-device unblocking stream in NVPTX implementation" to "[OpenMP][Offloading] Replaced default stream with an actual per-device unblocking stream in NVPTX implementation".
Repository:
rL LLVM
https://reviews.llvm.org/D70010
Files:
openmp/libomptarget/plugins/cuda/src/rtl.cpp
Index: openmp/libomptarget/plugins/cuda/src/rtl.cpp
===================================================================
--- openmp/libomptarget/plugins/cuda/src/rtl.cpp
+++ openmp/libomptarget/plugins/cuda/src/rtl.cpp
@@ -95,6 +95,7 @@
int NumberOfDevices;
std::vector<CUmodule> Modules;
std::vector<CUcontext> Contexts;
+ std::vector<CUstream> Streams;
// Device properties
std::vector<int> ThreadsPerBlock;
@@ -205,6 +206,7 @@
FuncGblEntries.resize(NumberOfDevices);
Contexts.resize(NumberOfDevices);
+ Streams.resize(NumberOfDevices);
ThreadsPerBlock.resize(NumberOfDevices);
BlocksPerGrid.resize(NumberOfDevices);
WarpSize.resize(NumberOfDevices);
@@ -253,6 +255,16 @@
CUDA_ERR_STRING(err);
}
}
+
+ // Destroy streams
+ for (auto &stream : Streams)
+ if (stream) {
+ CUresult err = cuStreamDestroy(stream);
+ if (err != CUDA_SUCCESS) {
+ DP("Error when destroying CUDA stream\n");
+ CUDA_ERR_STRING(err);
+ }
+ }
}
};
@@ -294,6 +306,22 @@
return OFFLOAD_FAIL;
}
+ // Set current context for later creating corresponding stream
+ err = cuCtxSetCurrent(DeviceInfo.Contexts[device_id]);
+ if (err != CUDA_SUCCESS) {
+ DP("Error when setting current CUDA context\n");
+ CUDA_ERR_STRING(err);
+ return OFFLOAD_FAIL;
+ }
+
+ //Create a stream for each device
+ err = cuStreamCreate(&DeviceInfo.Streams[device_id], CU_STREAM_NON_BLOCKING);
+ if (err != CUDA_SUCCESS) {
+ DP("Error when creating CUDA stream\n");
+ CUDA_ERR_STRING(err);
+ return OFFLOAD_FAIL;
+ }
+
// Query attributes to determine number of threads/block and blocks/grid.
int maxGridDimX;
err = cuDeviceGetAttribute(&maxGridDimX, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X,
@@ -756,7 +784,8 @@
cudaThreadsPerBlock);
err = cuLaunchKernel(KernelInfo->Func, cudaBlocksPerGrid, 1, 1,
- cudaThreadsPerBlock, 1, 1, 0 /*bytes of shared memory*/, 0, &args[0], 0);
+ cudaThreadsPerBlock, 1, 1, 0 /*bytes of shared memory*/,
+ DeviceInfo.Streams[device_id], &args[0], 0);
if (err != CUDA_SUCCESS) {
DP("Device kernel launch failed!\n");
CUDA_ERR_STRING(err);
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D70010.228459.patch
Type: text/x-patch
Size: 2259 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20191108/4d9dad51/attachment.bin>
More information about the llvm-commits
mailing list