[polly] r311289 - [GPUJIT] Switch from Runtime API calls for managed memory to Driver API calls.

Sun Aug 20 06:38:04 PDT 2017

Author: bollu
Date: Sun Aug 20 06:38:04 2017
New Revision: 311289

URL: http://llvm.org/viewvc/llvm-project?rev=311289&view=rev
Log:
[GPUJIT] Switch from Runtime API calls for managed memory to Driver API calls.

We now load the function pointer for `cuMemAllocManaged` dynamically, so
it should be possible to compile `GPUJIT` on non-CUDA systems again.

It should now be possible to link on non-cuda systems again.

Thanks to Philipp Schaad for noticing this inconsitency.

Differential Revision: https://reviews.llvm.org/D36921

Modified:
    polly/trunk/tools/GPURuntime/GPUJIT.c

Modified: polly/trunk/tools/GPURuntime/GPUJIT.c
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/tools/GPURuntime/GPUJIT.c?rev=311289&r1=311288&r2=311289&view=diff
==============================================================================

--- polly/trunk/tools/GPURuntime/GPUJIT.c (original)
+++ polly/trunk/tools/GPURuntime/GPUJIT.c Sun Aug 20 06:38:04 2017
@@ -941,6 +941,10 @@ static void *HandleCudaRT;
 typedef CUresult CUDAAPI CuMemAllocFcnTy(CUdeviceptr *, size_t);
 static CuMemAllocFcnTy *CuMemAllocFcnPtr;
 
+typedef CUresult CUDAAPI CuMemAllocManagedFcnTy(CUdeviceptr *, size_t,
+                                                unsigned int);
+static CuMemAllocManagedFcnTy *CuMemAllocManagedFcnPtr;
+
 typedef CUresult CUDAAPI CuLaunchKernelFcnTy(
     CUfunction F, unsigned int GridDimX, unsigned int GridDimY,
     unsigned int gridDimZ, unsigned int blockDimX, unsigned int BlockDimY,
@@ -1081,6 +1085,9 @@ static int initialDeviceAPIsCUDA() {
   CuMemAllocFcnPtr =
       (CuMemAllocFcnTy *)getAPIHandleCUDA(HandleCuda, "cuMemAlloc_v2");
 
+  CuMemAllocManagedFcnPtr = (CuMemAllocManagedFcnTy *)getAPIHandleCUDA(
+      HandleCuda, "cuMemAllocManaged");
+
   CuMemFreeFcnPtr =
       (CuMemFreeFcnTy *)getAPIHandleCUDA(HandleCuda, "cuMemFree_v2");
 
@@ -1445,7 +1452,7 @@ void polly_freeManaged(void *mem) {
   // If not, we pass it along to the underlying allocator.
   // This is a hack, and can be removed if the underlying issue is fixed.
   if (isManagedPtr(mem)) {
-    if (cudaFree(mem) != cudaSuccess) {
+    if (CuMemFreeFcnPtr((size_t)mem) != CUDA_SUCCESS) {
       fprintf(stderr, "cudaFree failed.\n");
       exit(-1);
     }
@@ -1465,15 +1472,18 @@ void *polly_mallocManaged(size_t size) {
     fprintf(stderr, "cudaMallocManaged called with size 0. "
                     "Promoting to size 1");
   size = max(size, 1);
-  polly_initContextCUDA();
-  dump_function();
-  void *a;
-  if (cudaMallocManaged(&a, size, cudaMemAttachGlobal) != cudaSuccess) {
+  PollyGPUContext *_ = polly_initContextCUDA();
+  assert(_ && "polly_initContextCUDA failed");
+
+  void *newMemPtr;
+  const CUresult Res = CuMemAllocManagedFcnPtr((CUdeviceptr *)&newMemPtr, size,
+                                               CU_MEM_ATTACH_GLOBAL);
+  if (Res != CUDA_SUCCESS) {
     fprintf(stderr, "cudaMallocManaged failed for size: %zu\n", size);
     exit(-1);
   }
-  addManagedPtr(a);
-  return a;
+  addManagedPtr(newMemPtr);
+  return newMemPtr;
 }
 
 static void freeDeviceMemoryCUDA(PollyGPUDevicePtr *Allocation) {