[Openmp-commits] [PATCH] D42686: [libomptarget] Only use CUDA Driver API

Jonas Hahnfeld via Phabricator via Openmp-commits openmp-commits at lists.llvm.org
Tue Jan 30 08:51:42 PST 2018


This revision was automatically updated to reflect the committed changes.
Closed by commit rL323786: [libomptarget] Only use CUDA Driver API (authored by Hahnfeld, committed by ).
Herald added a subscriber: llvm-commits.

Changed prior to commit:
  https://reviews.llvm.org/D42686?vs=131965&id=131976#toc

Repository:
  rL LLVM

https://reviews.llvm.org/D42686

Files:
  openmp/trunk/libomptarget/plugins/cuda/src/rtl.cpp


Index: openmp/trunk/libomptarget/plugins/cuda/src/rtl.cpp
===================================================================
--- openmp/trunk/libomptarget/plugins/cuda/src/rtl.cpp
+++ openmp/trunk/libomptarget/plugins/cuda/src/rtl.cpp
@@ -14,7 +14,6 @@
 #include <cassert>
 #include <cstddef>
 #include <cuda.h>
-#include <cuda_runtime_api.h>
 #include <list>
 #include <string>
 #include <vector>
@@ -280,9 +279,9 @@
   }
 
   // scan properties to determine number of threads/block and blocks/grid.
-  struct cudaDeviceProp Properties;
-  cudaError_t error = cudaGetDeviceProperties(&Properties, device_id);
-  if (error != cudaSuccess) {
+  CUdevprop Properties;
+  err = cuDeviceGetProperties(&Properties, cuDevice);
+  if (err != CUDA_SUCCESS) {
     DP("Error getting device Properties, use defaults\n");
     DeviceInfo.BlocksPerGrid[device_id] = RTLDeviceInfoTy::DefaultNumTeams;
     DeviceInfo.ThreadsPerBlock[device_id] = RTLDeviceInfoTy::DefaultNumThreads;
@@ -314,8 +313,8 @@
           RTLDeviceInfoTy::HardThreadLimit);
     }
 
-    // Get warp size
-    DeviceInfo.WarpSize[device_id] = Properties.warpSize;
+    // According to the documentation, SIMDWidth is "Warp size in threads".
+    DeviceInfo.WarpSize[device_id] = Properties.SIMDWidth;
   }
 
   // Adjust teams to the env variables
@@ -678,17 +677,16 @@
   if (err != CUDA_SUCCESS) {
     DP("Device kernel launch failed!\n");
     CUDA_ERR_STRING(err);
-    assert(err == CUDA_SUCCESS && "Unable to launch target execution!");
     return OFFLOAD_FAIL;
   }
 
   DP("Launch of entry point at " DPxMOD " successful!\n",
       DPxPTR(tgt_entry_ptr));
 
-  cudaError_t sync_error = cudaDeviceSynchronize();
-  if (sync_error != cudaSuccess) {
-  DP("Kernel execution error at " DPxMOD ", %s.\n", DPxPTR(tgt_entry_ptr),
-      cudaGetErrorString(sync_error));
+  CUresult sync_err = cuCtxSynchronize();
+  if (sync_err != CUDA_SUCCESS) {
+    DP("Kernel execution error at " DPxMOD "!\n", DPxPTR(tgt_entry_ptr));
+    CUDA_ERR_STRING(sync_err);
     return OFFLOAD_FAIL;
   } else {
     DP("Kernel execution at " DPxMOD " successful!\n", DPxPTR(tgt_entry_ptr));


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D42686.131976.patch
Type: text/x-patch
Size: 2139 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/openmp-commits/attachments/20180130/af6626c3/attachment.bin>


More information about the Openmp-commits mailing list