[llvm-commits] [polly] r159725 - in /polly/trunk: CMakeLists.txt cmake/FindCUDA.cmake tools/GPURuntime/GPUJIT.c tools/GPURuntime/GPUJIT.h

Tobias Grosser grosser at fim.uni-passau.de
Wed Jul 4 14:45:04 PDT 2012


Author: grosser
Date: Wed Jul  4 16:45:03 2012
New Revision: 159725

URL: http://llvm.org/viewvc/llvm-project?rev=159725&view=rev
Log:
Replace CUDA data types with Polly's GPGPU data types.

Contributed by:  Yabin Hu  <yabin.hwu at gmail.com>

Modified:
    polly/trunk/CMakeLists.txt
    polly/trunk/cmake/FindCUDA.cmake
    polly/trunk/tools/GPURuntime/GPUJIT.c
    polly/trunk/tools/GPURuntime/GPUJIT.h

Modified: polly/trunk/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/CMakeLists.txt?rev=159725&r1=159724&r2=159725&view=diff
==============================================================================
--- polly/trunk/CMakeLists.txt (original)
+++ polly/trunk/CMakeLists.txt Wed Jul  4 16:45:03 2012
@@ -144,9 +144,6 @@
   if (SCOPLIB_FOUND)
     target_link_libraries( ${name} ${SCOPLIB_LIBRARY})
   endif(SCOPLIB_FOUND)
-  if (CUDALIB_FOUND)
-    target_link_libraries( ${name} ${CUDALIB_LIBRARY})
-  endif(CUDALIB_FOUND)
 
   if( LLVM_LINK_COMPONENTS )
     llvm_config(${name} ${LLVM_LINK_COMPONENTS})

Modified: polly/trunk/cmake/FindCUDA.cmake
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/cmake/FindCUDA.cmake?rev=159725&r1=159724&r2=159725&view=diff
==============================================================================
--- polly/trunk/cmake/FindCUDA.cmake (original)
+++ polly/trunk/cmake/FindCUDA.cmake Wed Jul  4 16:45:03 2012
@@ -8,13 +8,13 @@
 
 FIND_LIBRARY(CUDALIB_LIBRARY NAMES cuda)
 
-IF (CUDALIB_INCLUDE_DIR AND CUDALIB_LIBRARY)
+IF (CUDALIB_INCLUDE_DIR)
   SET(CUDALIB_FOUND TRUE)
-ENDIF (CUDALIB_INCLUDE_DIR AND CUDALIB_LIBRARY)
+ENDIF (CUDALIB_INCLUDE_DIR)
 
 IF (CUDALIB_FOUND)
   IF (NOT CUDA_FIND_QUIETLY)
-    MESSAGE(STATUS "Found CUDA: ${CUDALIB_LIBRARY}")
+    MESSAGE(STATUS "Found CUDA: ${CUDALIB_INCLUDE_DIR}")
   ENDIF (NOT CUDA_FIND_QUIETLY)
 ELSE (CUDALIB_FOUND)
   IF (CUDA_FIND_REQUIRED)

Modified: polly/trunk/tools/GPURuntime/GPUJIT.c
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/tools/GPURuntime/GPUJIT.c?rev=159725&r1=159724&r2=159725&view=diff
==============================================================================
--- polly/trunk/tools/GPURuntime/GPUJIT.c (original)
+++ polly/trunk/tools/GPURuntime/GPUJIT.c Wed Jul  4 16:45:03 2012
@@ -12,9 +12,36 @@
 /******************************************************************************/
 
 #include "GPUJIT.h"
+#include <cuda.h>
+#include <cuda_runtime.h>
 #include <dlfcn.h>
 #include <stdio.h>
 
+/* Define Polly's GPGPU data types. */
+struct PollyGPUContextT {
+  CUcontext Cuda;
+};
+
+struct PollyGPUModuleT {
+  CUmodule Cuda;
+};
+
+struct PollyGPUFunctionT {
+  CUfunction Cuda;
+};
+
+struct PollyGPUDeviceT {
+  CUdevice Cuda;
+};
+
+struct PollyGPUDevicePtrT {
+  CUdeviceptr Cuda;
+};
+
+struct PollyGPUEventT {
+  cudaEvent_t Cuda;
+};
+
 /* Dynamic library handles for the CUDA and CUDA runtime library. */
 static void *HandleCuda;
 static void *HandleCudaRT;
@@ -218,7 +245,7 @@
   return 1;
 }
 
-void polly_initDevice(CUcontext *Context, CUdevice *Device) {
+void polly_initDevice(PollyGPUContext **Context, PollyGPUDevice **Device) {
   int Major = 0, Minor = 0, DeviceID = 0;
   char DeviceName[256];
   int DeviceCount = 0;
@@ -242,85 +269,135 @@
   }
 
   /* We select the 1st device as default. */
-  CuDeviceGetFcnPtr(Device, 0);
+  *Device =  malloc(sizeof(PollyGPUDevice));
+  if (*Device == 0) {
+    fprintf(stdout, "Allocate memory for Polly GPU device failed.\n");
+    exit(-1);
+  }
+  CuDeviceGetFcnPtr(&((*Device)->Cuda), 0);
 
   /* Get compute capabilities and the device name. */
-  CuDeviceComputeCapabilityFcnPtr(&Major, &Minor, *Device);
-  CuDeviceGetNameFcnPtr(DeviceName, 256, *Device);
+  CuDeviceComputeCapabilityFcnPtr(&Major, &Minor, (*Device)->Cuda);
+  CuDeviceGetNameFcnPtr(DeviceName, 256, (*Device)->Cuda);
   fprintf(stderr, "> Running on GPU device %d : %s.\n", DeviceID, DeviceName);
 
   /* Create context on the device. */
-  CuCtxCreateFcnPtr(Context, 0, *Device);
+  *Context = malloc(sizeof(PollyGPUContext));
+  if (*Context == 0) {
+    fprintf(stdout, "Allocate memory for Polly GPU context failed.\n");
+    exit(-1);
+  }
+  CuCtxCreateFcnPtr(&((*Context)->Cuda), 0, (*Device)->Cuda);
 }
 
-void polly_getPTXModule(void *PTXBuffer, CUmodule *Module) {
-  if(CuModuleLoadDataExFcnPtr(Module, PTXBuffer, 0, 0, 0) != CUDA_SUCCESS) {
+void polly_getPTXModule(void *PTXBuffer, PollyGPUModule **Module) {
+  *Module = malloc(sizeof(PollyGPUModule));
+  if (*Module == 0) {
+    fprintf(stdout, "Allocate memory for Polly GPU module failed.\n");
+    exit(-1);
+  }
+
+  if (CuModuleLoadDataExFcnPtr(&((*Module)->Cuda), PTXBuffer, 0, 0, 0)
+      != CUDA_SUCCESS) {
     fprintf(stdout, "Loading ptx assembly text failed.\n");
     exit(-1);
   }
 }
 
-void polly_getPTXKernelEntry(const char *KernelName, CUmodule *Module,
-                             CUfunction *Kernel) {
+void polly_getPTXKernelEntry(const char *KernelName, PollyGPUModule *Module,
+                             PollyGPUFunction **Kernel) {
+  *Kernel = malloc(sizeof(PollyGPUFunction));
+  if (*Kernel == 0) {
+    fprintf(stdout, "Allocate memory for Polly GPU kernel failed.\n");
+    exit(-1);
+  }
+
   /* Locate the kernel entry point. */
-  if(CuModuleGetFunctionFcnPtr(Kernel, *Module, KernelName)
+  if(CuModuleGetFunctionFcnPtr(&((*Kernel)->Cuda), Module->Cuda, KernelName)
      !=  CUDA_SUCCESS) {
     fprintf(stdout, "Loading kernel function failed.\n");
     exit(-1);
   }
 }
 
-void polly_startTimerByCudaEvent(cudaEvent_t *StartTimer,
-                                 cudaEvent_t *StopTimer) {
-  CudaEventCreateFcnPtr(StartTimer);
-  CudaEventCreateFcnPtr(StopTimer);
-  CudaEventRecordFcnPtr(*StartTimer, 0);
+void polly_startTimerByCudaEvent(PollyGPUEvent **Start, PollyGPUEvent **Stop) {
+  *Start = malloc(sizeof(PollyGPUEvent));
+  if (*Start == 0) {
+    fprintf(stdout, "Allocate memory for Polly GPU start timer failed.\n");
+    exit(-1);
+  }
+  CudaEventCreateFcnPtr(&((*Start)->Cuda));
+
+  *Stop = malloc(sizeof(PollyGPUEvent));
+  if (*Stop == 0) {
+    fprintf(stdout, "Allocate memory for Polly GPU stop timer failed.\n");
+    exit(-1);
+  }
+  CudaEventCreateFcnPtr(&((*Stop)->Cuda));
+
+  /* Record the start time. */
+  CudaEventRecordFcnPtr((*Start)->Cuda, 0);
 }
 
-void polly_stopTimerByCudaEvent(cudaEvent_t *StartTimer,
-                                cudaEvent_t *StopTimer, float *ElapsedTimes) {
-  CudaEventRecordFcnPtr(*StopTimer, 0);
-  CudaEventSynchronizeFcnPtr(*StopTimer);
-  CudaEventElapsedTimeFcnPtr(ElapsedTimes, *StartTimer, *StopTimer );
-  CudaEventDestroyFcnPtr(*StartTimer);
-  CudaEventDestroyFcnPtr(*StopTimer);
+void polly_stopTimerByCudaEvent(PollyGPUEvent *Start, PollyGPUEvent *Stop,
+                                float *ElapsedTimes) {
+  /* Record the end time. */
+  CudaEventRecordFcnPtr(Stop->Cuda, 0);
+  CudaEventSynchronizeFcnPtr(Start->Cuda);
+  CudaEventSynchronizeFcnPtr(Stop->Cuda);
+  CudaEventElapsedTimeFcnPtr(ElapsedTimes, Start->Cuda, Stop->Cuda);
+  CudaEventDestroyFcnPtr(Start->Cuda);
+  CudaEventDestroyFcnPtr(Stop->Cuda);
   fprintf(stderr, "Processing time: %f (ms).\n", *ElapsedTimes);
+
+  free(Start);
+  free(Stop);
 }
 
-void polly_allocateMemoryForHostAndDevice(void **PtrHostData,
-                                          CUdeviceptr *PtrDevData,
+void polly_allocateMemoryForHostAndDevice(void **HostData,
+                                          PollyGPUDevicePtr **DevData,
                                           int MemSize) {
-  if ((*PtrHostData = (int *)malloc(MemSize)) == 0) {
+  if ((*HostData = (int *)malloc(MemSize)) == 0) {
     fprintf(stdout, "Could not allocate host memory.\n");
     exit(-1);
   }
-  CuMemAllocFcnPtr(PtrDevData, MemSize);
+
+  *DevData = malloc(sizeof(PollyGPUDevicePtr));
+  if (*DevData == 0) {
+    fprintf(stdout, "Allocate memory for GPU device memory pointer failed.\n");
+    exit(-1);
+  }
+  CuMemAllocFcnPtr(&((*DevData)->Cuda), MemSize);
 }
 
-void polly_copyFromHostToDevice(CUdeviceptr DevData, void *HostData,
+void polly_copyFromHostToDevice(PollyGPUDevicePtr *DevData, void *HostData,
                                 int MemSize) {
-  CuMemcpyHtoDFcnPtr(DevData, HostData, MemSize);
+  CUdeviceptr CuDevData = DevData->Cuda;
+  CuMemcpyHtoDFcnPtr(CuDevData, HostData, MemSize);
 }
 
-void polly_copyFromDeviceToHost(void *HostData, CUdeviceptr DevData,
+void polly_copyFromDeviceToHost(void *HostData, PollyGPUDevicePtr *DevData,
                                 int MemSize) {
-  if(CuMemcpyDtoHFcnPtr(HostData, DevData, MemSize) != CUDA_SUCCESS) {
+  if(CuMemcpyDtoHFcnPtr(HostData, DevData->Cuda, MemSize) != CUDA_SUCCESS) {
     fprintf(stdout, "Copying results from device to host memory failed.\n");
     exit(-1);
   }
 }
 
-void polly_setKernelParameters(CUfunction *Kernel, int BlockWidth,
-                               int BlockHeight, CUdeviceptr DevData) {
+void polly_setKernelParameters(PollyGPUFunction *Kernel, int BlockWidth,
+                               int BlockHeight, PollyGPUDevicePtr *DevData) {
   int ParamOffset = 0;
-  CuFuncSetBlockShapeFcnPtr(*Kernel, BlockWidth, BlockHeight, 1);
-  CuParamSetvFcnPtr(*Kernel, ParamOffset, &DevData, sizeof(DevData));
-  ParamOffset += sizeof(DevData);
-  CuParamSetSizeFcnPtr(*Kernel, ParamOffset);
+
+  CuFuncSetBlockShapeFcnPtr(Kernel->Cuda, BlockWidth, BlockHeight, 1);
+  CuParamSetvFcnPtr(Kernel->Cuda, ParamOffset, &(DevData->Cuda),
+                    sizeof(DevData->Cuda));
+  ParamOffset += sizeof(DevData->Cuda);
+  CuParamSetSizeFcnPtr(Kernel->Cuda, ParamOffset);
 }
 
-void polly_launchKernel(CUfunction *Kernel, int GridWidth, int GridHeight) {
-  if (CuLaunchGridFcnPtr(*Kernel, GridWidth, GridHeight) != CUDA_SUCCESS) {
+void polly_launchKernel(PollyGPUFunction *Kernel, int GridWidth,
+                        int GridHeight) {
+  if (CuLaunchGridFcnPtr(Kernel->Cuda, GridWidth, GridHeight) != CUDA_SUCCESS) {
     fprintf(stdout, "Launching CUDA kernel failed.\n");
     exit(-1);
   }
@@ -328,26 +405,32 @@
   fprintf(stdout, "CUDA kernel launched.\n");
 }
 
-void polly_cleanupGPGPUResources(void *HostData, CUdeviceptr DevData,
-                                 CUmodule *Module, CUcontext *Context) {
+void polly_cleanupGPGPUResources(void *HostData, PollyGPUDevicePtr *DevData,
+                                 PollyGPUModule *Module,
+                                 PollyGPUContext *Context,
+                                 PollyGPUFunction *Kernel) {
   if (HostData) {
     free(HostData);
     HostData = 0;
   }
 
-  if (DevData) {
-    CuMemFreeFcnPtr(DevData);
-    DevData = 0;
+  if (DevData->Cuda) {
+    CuMemFreeFcnPtr(DevData->Cuda);
+    free(DevData);
+  }
+
+  if (Module->Cuda) {
+    CuModuleUnloadFcnPtr(Module->Cuda);
+    free(Module);
   }
 
-  if (*Module) {
-    CuModuleUnloadFcnPtr(*Module);
-    *Module = 0;
+  if (Context->Cuda) {
+    CuCtxDestroyFcnPtr(Context->Cuda);
+    free(Context);
   }
 
-  if (*Context) {
-    CuCtxDestroyFcnPtr(*Context);
-    *Context = 0;
+  if (Kernel) {
+    free(Kernel);
   }
 
   dlclose(HandleCuda);

Modified: polly/trunk/tools/GPURuntime/GPUJIT.h
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/tools/GPURuntime/GPUJIT.h?rev=159725&r1=159724&r2=159725&view=diff
==============================================================================
--- polly/trunk/tools/GPURuntime/GPUJIT.h (original)
+++ polly/trunk/tools/GPURuntime/GPUJIT.h Wed Jul  4 16:45:03 2012
@@ -14,28 +14,93 @@
 #ifndef GPUJIT_H_
 #define GPUJIT_H_
 
-#include <cuda.h>
-#include <cuda_runtime.h>
+/*
+ * The following demostrates how we can use the GPURuntime library to
+ * execute a GPU kernel.
+ *
+ * char KernelString[] = "\n\
+ *   .version 1.4\n\
+ *   .target sm_10, map_f64_to_f32\n\
+ *   .entry _Z8myKernelPi (\n\
+ *   .param .u64 __cudaparm__Z8myKernelPi_data)\n\
+ *   {\n\
+ *     .reg .u16 %rh<4>;\n\
+ *     .reg .u32 %r<5>;\n\
+ *     .reg .u64 %rd<6>;\n\
+ *     cvt.u32.u16     %r1, %tid.x;\n\
+ *     mov.u16         %rh1, %ctaid.x;\n\
+ *     mov.u16         %rh2, %ntid.x;\n\
+ *     mul.wide.u16    %r2, %rh1, %rh2;\n\
+ *     add.u32         %r3, %r1, %r2;\n\
+ *     ld.param.u64    %rd1, [__cudaparm__Z8myKernelPi_data];\n\
+ *     cvt.s64.s32     %rd2, %r3;\n\
+ *     mul.wide.s32    %rd3, %r3, 4;\n\
+ *     add.u64         %rd4, %rd1, %rd3;\n\
+ *     st.global.s32   [%rd4+0], %r3;\n\
+ *     exit;\n\
+ *   }\n\
+ * ";
+ *
+ * const char *Entry = "_Z8myKernelPi";
+ *
+ * int main() {
+ *   PollyGPUContext *Context;
+ *   PollyGPUModule *Module;
+ *   PollyGPUFunction *Kernel;
+ *   PollyGPUDevice *Device;
+ *   PollyGPUDevicePtr *PtrDevData;
+ *   int *HostData;
+ *   PollyGPUEvent *Start;
+ *   PollyGPUEvent *Stop;
+ *   float *ElapsedTime;
+ *   int MemSize;
+ *   int BlockWidth = 16;
+ *   int BlockHeight = 16;
+ *   int GridWidth = 8;
+ *   int GridHeight = 8;
+ *
+ *   MemSize = 256*64*sizeof(int);
+ *   polly_initDevice(&Context, &Device);
+ *   polly_getPTXModule(KernelString, &Module);
+ *   polly_getPTXKernelEntry(Entry, Module, &Kernel);
+ *   polly_allocateMemoryForHostAndDevice(&HostData, &DevData, MemSize);
+ *   polly_setKernelParameters(Kernel, BlockWidth, BlockHeight, DevData);
+ *   polly_startTimerByCudaEvent(&Start, &Stop);
+ *   polly_launchKernel(Kernel, GridWidth, GridHeight);
+ *   polly_copyFromDeviceToHost(HostData, DevData, MemSize);
+ *   polly_stopTimerByCudaEvent(Start, Stop, ElapsedTime);
+ *   polly_cleanupGPGPUResources(HostData, DevData, Module, Context, Kernel);
+ * }
+ *
+ */
 
-void polly_initDevice(CUcontext *Context, CUdevice *Device);
-void polly_getPTXModule(void *PTXBuffer, CUmodule *Module);
-void polly_getPTXKernelEntry(const char *KernelName,
-                             CUmodule *Module,
-                             CUfunction *Kernel);
-void polly_startTimerByCudaEvent(cudaEvent_t *StartTimer,
-                                 cudaEvent_t *StopTimer);
-void polly_stopTimerByCudaEvent(cudaEvent_t *StartTimer, cudaEvent_t *StopTimer,
+typedef struct PollyGPUContextT PollyGPUContext;
+typedef struct PollyGPUModuleT PollyGPUModule;
+typedef struct PollyGPUFunctionT PollyGPUFunction;
+typedef struct PollyGPUDeviceT PollyGPUDevice;
+typedef struct PollyGPUDevicePtrT PollyGPUDevicePtr;
+typedef struct PollyGPUEventT PollyGPUEvent;
+
+void polly_initDevice(PollyGPUContext **Context, PollyGPUDevice **Device);
+void polly_getPTXModule(void *PTXBuffer, PollyGPUModule **Module);
+void polly_getPTXKernelEntry(const char *KernelName, PollyGPUModule *Module,
+                             PollyGPUFunction **Kernel);
+void polly_startTimerByCudaEvent(PollyGPUEvent **Start, PollyGPUEvent **Stop);
+void polly_stopTimerByCudaEvent(PollyGPUEvent *Start, PollyGPUEvent *Stop,
                                 float *ElapsedTimes);
-void polly_copyFromHostToDevice(CUdeviceptr DevData, void *HostData,
+void polly_copyFromHostToDevice(PollyGPUDevicePtr *DevData, void *HostData,
                                 int MemSize);
-void polly_copyFromDeviceToHost(void *HostData, CUdeviceptr DevData,
+void polly_copyFromDeviceToHost(void *HostData, PollyGPUDevicePtr *DevData,
                                 int MemSize);
-void polly_allocateMemoryForHostAndDevice(void **PtrHostData,
-                                          CUdeviceptr *PtrDevData,
+void polly_allocateMemoryForHostAndDevice(void **HostData,
+                                          PollyGPUDevicePtr **DevData,
                                           int MemSize);
-void polly_setKernelParameters(CUfunction *Kernel, int BlockWidth,
-                               int BlockHeight, CUdeviceptr DevData);
-void polly_launchKernel(CUfunction *Kernel, int GridWidth, int GridHeight);
-void polly_cleanupGPGPUResources(void *HostData, CUdeviceptr DevData,
-                                 CUmodule *Module, CUcontext *Context);
+void polly_setKernelParameters(PollyGPUFunction *Kernel, int BlockWidth,
+                               int BlockHeight, PollyGPUDevicePtr *DevData);
+void polly_launchKernel(PollyGPUFunction *Kernel, int GridWidth,
+                        int GridHeight);
+void polly_cleanupGPGPUResources(void *HostData, PollyGPUDevicePtr *DevData,
+                                 PollyGPUModule *Module,
+                                 PollyGPUContext *Context,
+                                 PollyGPUFunction *Kernel);
 #endif /* GPUJIT_H_ */





More information about the llvm-commits mailing list