[PATCH] D70010: [OpenMP][Offloading] Replaced default stream with an actual per-device unblocking stream in NVPTX implementation

Shilei Tian via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 8 07:58:44 PST 2019


tianshilei1992 created this revision.
tianshilei1992 added reviewers: jdoerfert, hfinkel.
tianshilei1992 added a project: OpenMP.
Herald added subscribers: llvm-commits, guansong.
Herald added a project: LLVM.
tianshilei1992 retitled this revision from "[OpenMP][Offlloading] Replaced default stream with an actual per-device unblocking stream in NVPTX implementation" to "[OpenMP][Offloading] Replaced default stream with an actual per-device unblocking stream in NVPTX implementation".

Repository:
  rL LLVM

https://reviews.llvm.org/D70010

Files:
  openmp/libomptarget/plugins/cuda/src/rtl.cpp


Index: openmp/libomptarget/plugins/cuda/src/rtl.cpp
===================================================================
--- openmp/libomptarget/plugins/cuda/src/rtl.cpp
+++ openmp/libomptarget/plugins/cuda/src/rtl.cpp
@@ -95,6 +95,7 @@
   int NumberOfDevices;
   std::vector<CUmodule> Modules;
   std::vector<CUcontext> Contexts;
+  std::vector<CUstream> Streams;
 
   // Device properties
   std::vector<int> ThreadsPerBlock;
@@ -205,6 +206,7 @@
 
     FuncGblEntries.resize(NumberOfDevices);
     Contexts.resize(NumberOfDevices);
+    Streams.resize(NumberOfDevices);
     ThreadsPerBlock.resize(NumberOfDevices);
     BlocksPerGrid.resize(NumberOfDevices);
     WarpSize.resize(NumberOfDevices);
@@ -253,6 +255,16 @@
           CUDA_ERR_STRING(err);
         }
       }
+
+    // Destroy streams
+    for (auto &stream : Streams)
+      if (stream) {
+        CUresult err = cuStreamDestroy(stream);
+        if (err != CUDA_SUCCESS) {
+          DP("Error when destroying CUDA stream\n");
+          CUDA_ERR_STRING(err);
+        }
+      }
   }
 };
 
@@ -294,6 +306,22 @@
     return OFFLOAD_FAIL;
   }
 
+  // Set current context for later creating corresponding stream
+  err = cuCtxSetCurrent(DeviceInfo.Contexts[device_id]);
+  if (err != CUDA_SUCCESS) {
+    DP("Error when setting current CUDA context\n");
+    CUDA_ERR_STRING(err);
+    return OFFLOAD_FAIL;
+  }
+
+  //Create a stream for each device
+  err = cuStreamCreate(&DeviceInfo.Streams[device_id], CU_STREAM_NON_BLOCKING);
+  if (err != CUDA_SUCCESS) {
+    DP("Error when creating CUDA stream\n");
+    CUDA_ERR_STRING(err);
+    return OFFLOAD_FAIL;
+  }
+
   // Query attributes to determine number of threads/block and blocks/grid.
   int maxGridDimX;
   err = cuDeviceGetAttribute(&maxGridDimX, CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X,
@@ -756,7 +784,8 @@
      cudaThreadsPerBlock);
 
   err = cuLaunchKernel(KernelInfo->Func, cudaBlocksPerGrid, 1, 1,
-      cudaThreadsPerBlock, 1, 1, 0 /*bytes of shared memory*/, 0, &args[0], 0);
+                       cudaThreadsPerBlock, 1, 1, 0 /*bytes of shared memory*/,
+                       DeviceInfo.Streams[device_id], &args[0], 0);
   if (err != CUDA_SUCCESS) {
     DP("Device kernel launch failed!\n");
     CUDA_ERR_STRING(err);


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D70010.228459.patch
Type: text/x-patch
Size: 2259 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20191108/4d9dad51/attachment.bin>


More information about the llvm-commits mailing list