[llvm-commits] [polly] r158304 - in /polly/trunk/tools: CMakeLists.txt GPURuntime/ GPURuntime/CMakeLists.txt GPURuntime/GPUJIT.c GPURuntime/GPUJIT.h GPURuntime/Makefile Makefile

Tobias Grosser grosser at fim.uni-passau.de
Mon Jun 11 02:25:01 PDT 2012


Author: grosser
Date: Mon Jun 11 04:25:01 2012
New Revision: 158304

URL: http://llvm.org/viewvc/llvm-project?rev=158304&view=rev
Log:
Add the runtime library for GPGPU code generation.

Contributed by: Yabin Hu <yabin.hwu at gmail.com>

Added:
    polly/trunk/tools/GPURuntime/
    polly/trunk/tools/GPURuntime/CMakeLists.txt
    polly/trunk/tools/GPURuntime/GPUJIT.c
    polly/trunk/tools/GPURuntime/GPUJIT.h
    polly/trunk/tools/GPURuntime/Makefile
      - copied, changed from r158081, polly/trunk/tools/Makefile
Modified:
    polly/trunk/tools/CMakeLists.txt
    polly/trunk/tools/Makefile

Modified: polly/trunk/tools/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/tools/CMakeLists.txt?rev=158304&r1=158303&r2=158304&view=diff
==============================================================================
--- polly/trunk/tools/CMakeLists.txt (original)
+++ polly/trunk/tools/CMakeLists.txt Mon Jun 11 04:25:01 2012
@@ -1 +1,5 @@
+if (CUDALIB_FOUND)
+  add_subdirectory(GPURuntime)
+endif (CUDALIB_FOUND)
+
 set(LLVM_COMMON_DEPENDS ${LLVM_COMMON_DEPENDS} PARENT_SCOPE)

Added: polly/trunk/tools/GPURuntime/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/tools/GPURuntime/CMakeLists.txt?rev=158304&view=auto
==============================================================================
--- polly/trunk/tools/GPURuntime/CMakeLists.txt (added)
+++ polly/trunk/tools/GPURuntime/CMakeLists.txt Mon Jun 11 04:25:01 2012
@@ -0,0 +1,12 @@
+set(MODULE TRUE)
+set(LLVM_NO_RTTI 1)
+
+add_polly_library(GPURuntime
+  GPUJIT.c
+  )
+
+set_target_properties(GPURuntime
+  PROPERTIES
+  LINKER_LANGUAGE C
+  PREFIX "lib"
+  )

Added: polly/trunk/tools/GPURuntime/GPUJIT.c
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/tools/GPURuntime/GPUJIT.c?rev=158304&view=auto
==============================================================================
--- polly/trunk/tools/GPURuntime/GPUJIT.c (added)
+++ polly/trunk/tools/GPURuntime/GPUJIT.c Mon Jun 11 04:25:01 2012
@@ -0,0 +1,355 @@
+/******************** GPUJIT.cpp - GPUJIT Execution Engine ********************/
+/*                                                                            */
+/*                     The LLVM Compiler Infrastructure                       */
+/*                                                                            */
+/* This file is distributed under the University of Illinois Open Source      */
+/* License. See LICENSE.TXT for details.                                      */
+/*                                                                            */
+/******************************************************************************/
+/*                                                                            */
+/*  This file implements GPUJIT, a ptx string execution engine for GPU.       */
+/*                                                                            */
+/******************************************************************************/
+
+#include "GPUJIT.h"
+#include <dlfcn.h>
+#include <stdio.h>
+
+/* Dynamic library handles for the CUDA and CUDA runtime library. */
+static void *HandleCuda;
+static void *HandleCudaRT;
+
+/* Type-defines of function pointer to CUDA driver APIs. */
+typedef CUresult CUDAAPI CuMemAllocFcnTy(CUdeviceptr *, size_t);
+static CuMemAllocFcnTy *CuMemAllocFcnPtr;
+
+typedef CUresult CUDAAPI CuFuncSetBlockShapeFcnTy(CUfunction, int, int, int);
+static CuFuncSetBlockShapeFcnTy *CuFuncSetBlockShapeFcnPtr;
+
+typedef CUresult CUDAAPI CuParamSetvFcnTy(CUfunction, int, void *,
+                                          unsigned int);
+static CuParamSetvFcnTy *CuParamSetvFcnPtr;
+
+typedef CUresult CUDAAPI CuParamSetSizeFcnTy(CUfunction, unsigned int);
+static CuParamSetSizeFcnTy *CuParamSetSizeFcnPtr;
+
+typedef CUresult CUDAAPI CuLaunchGridFcnTy(CUfunction, int, int);
+static CuLaunchGridFcnTy *CuLaunchGridFcnPtr;
+
+typedef CUresult CUDAAPI CuMemcpyDtoHFcnTy(void *, CUdeviceptr, size_t);
+static CuMemcpyDtoHFcnTy *CuMemcpyDtoHFcnPtr;
+
+typedef CUresult CUDAAPI CuMemcpyHtoDFcnTy(CUdeviceptr, const void *, size_t);
+static CuMemcpyHtoDFcnTy *CuMemcpyHtoDFcnPtr;
+
+typedef CUresult CUDAAPI CuMemFreeFcnTy(CUdeviceptr);
+static CuMemFreeFcnTy *CuMemFreeFcnPtr;
+
+typedef CUresult CUDAAPI CuModuleUnloadFcnTy(CUmodule);
+static CuModuleUnloadFcnTy *CuModuleUnloadFcnPtr;
+
+typedef CUresult CUDAAPI CuCtxDestroyFcnTy(CUcontext);
+static CuCtxDestroyFcnTy *CuCtxDestroyFcnPtr;
+
+typedef CUresult CUDAAPI CuInitFcnTy(unsigned int);
+static CuInitFcnTy *CuInitFcnPtr;
+
+typedef CUresult CUDAAPI CuDeviceGetCountFcnTy(int *);
+static CuDeviceGetCountFcnTy *CuDeviceGetCountFcnPtr;
+
+typedef CUresult CUDAAPI CuCtxCreateFcnTy(CUcontext *, unsigned int, CUdevice);
+static CuCtxCreateFcnTy *CuCtxCreateFcnPtr;
+
+typedef CUresult CUDAAPI CuDeviceGetFcnTy(CUdevice *, int);
+static CuDeviceGetFcnTy *CuDeviceGetFcnPtr;
+
+typedef CUresult CUDAAPI CuModuleLoadDataExFcnTy(CUmodule *, const void *,
+                                                 unsigned int, CUjit_option *,
+                                                 void **);
+static CuModuleLoadDataExFcnTy *CuModuleLoadDataExFcnPtr;
+
+typedef CUresult CUDAAPI CuModuleGetFunctionFcnTy(CUfunction *, CUmodule,
+                                                  const char *);
+static CuModuleGetFunctionFcnTy *CuModuleGetFunctionFcnPtr;
+
+typedef CUresult CUDAAPI CuDeviceComputeCapabilityFcnTy(int *, int *, CUdevice);
+static CuDeviceComputeCapabilityFcnTy *CuDeviceComputeCapabilityFcnPtr;
+
+typedef CUresult CUDAAPI CuDeviceGetNameFcnTy(char *, int, CUdevice);
+static CuDeviceGetNameFcnTy *CuDeviceGetNameFcnPtr;
+
+/* Type-defines of function pointer ot CUDA runtime APIs. */
+typedef cudaError_t CUDARTAPI CudaEventCreateFcnTy(cudaEvent_t *);
+static CudaEventCreateFcnTy *CudaEventCreateFcnPtr;
+
+typedef cudaError_t CUDARTAPI CudaEventRecordFcnTy(cudaEvent_t,
+                                                   cudaStream_t);
+static CudaEventRecordFcnTy *CudaEventRecordFcnPtr;
+
+typedef cudaError_t CUDARTAPI CudaEventSynchronizeFcnTy(cudaEvent_t);
+static CudaEventSynchronizeFcnTy *CudaEventSynchronizeFcnPtr;
+
+typedef cudaError_t CUDARTAPI CudaEventElapsedTimeFcnTy(float *, cudaEvent_t,
+                                                        cudaEvent_t);
+static CudaEventElapsedTimeFcnTy *CudaEventElapsedTimeFcnPtr;
+
+typedef cudaError_t CUDARTAPI CudaEventDestroyFcnTy(cudaEvent_t);
+static CudaEventDestroyFcnTy *CudaEventDestroyFcnPtr;
+
+typedef cudaError_t CUDARTAPI CudaThreadSynchronizeFcnTy(void);
+static CudaThreadSynchronizeFcnTy *CudaThreadSynchronizeFcnPtr;
+
+static void *getAPIHandle(void *Handle, const char *FuncName) {
+  char *Err;
+  void *FuncPtr;
+  dlerror();
+  FuncPtr = dlsym(Handle, FuncName);
+  if ((Err = dlerror()) != 0) {
+    fprintf(stdout, "Load CUDA driver API failed: %s. \n", Err);
+    return 0;
+  }
+  return FuncPtr;
+}
+
+static int initialDeviceAPILibraries() {
+  HandleCuda = dlopen("libcuda.so", RTLD_LAZY);
+  if (!HandleCuda) {
+    printf("Cannot open library: %s. \n", dlerror());
+    return 0;
+  }
+
+  HandleCudaRT = dlopen("libcudart.so", RTLD_LAZY);
+  if (!HandleCudaRT) {
+    printf("Cannot open library: %s. \n", dlerror());
+    return 0;
+  }
+
+  return 1;
+}
+
+static int initialDeviceAPIs() {
+  if (initialDeviceAPILibraries() == 0)
+    return 0;
+
+  /* Get function pointer to CUDA Driver APIs.
+   *
+   * Note that compilers conforming to the ISO C standard are required to
+   * generate a warning if a conversion from a void * pointer to a function
+   * pointer is attempted as in the following statements. The warning
+   * of this kind of cast may not be emitted by clang and new versions of gcc
+   * as it is valid on POSIX 2008.
+   */
+  CuFuncSetBlockShapeFcnPtr =
+    (CuFuncSetBlockShapeFcnTy *) getAPIHandle(HandleCuda,
+                                              "cuFuncSetBlockShape");
+
+  CuParamSetvFcnPtr = (CuParamSetvFcnTy *) getAPIHandle(HandleCuda,
+                                                        "cuParamSetv");
+
+  CuParamSetSizeFcnPtr = (CuParamSetSizeFcnTy *) getAPIHandle(HandleCuda,
+                                                              "cuParamSetSize");
+
+  CuLaunchGridFcnPtr = (CuLaunchGridFcnTy *) getAPIHandle(HandleCuda,
+                                                          "cuLaunchGrid");
+
+  CuMemAllocFcnPtr = (CuMemAllocFcnTy *) getAPIHandle(HandleCuda,
+                                                      "cuMemAlloc_v2");
+
+  CuMemFreeFcnPtr = (CuMemFreeFcnTy *) getAPIHandle(HandleCuda, "cuMemFree_v2");
+
+  CuMemcpyDtoHFcnPtr = (CuMemcpyDtoHFcnTy *) getAPIHandle(HandleCuda,
+                                                          "cuMemcpyDtoH_v2");
+
+  CuMemcpyHtoDFcnPtr = (CuMemcpyHtoDFcnTy *) getAPIHandle(HandleCuda,
+                                                          "cuMemcpyHtoD_v2");
+
+  CuModuleUnloadFcnPtr = (CuModuleUnloadFcnTy *) getAPIHandle(HandleCuda,
+                                                              "cuModuleUnload");
+
+  CuCtxDestroyFcnPtr = (CuCtxDestroyFcnTy *) getAPIHandle(HandleCuda,
+                                                          "cuCtxDestroy");
+
+  CuInitFcnPtr = (CuInitFcnTy *) getAPIHandle(HandleCuda, "cuInit");
+
+  CuDeviceGetCountFcnPtr = (CuDeviceGetCountFcnTy *) getAPIHandle(HandleCuda,
+                                                            "cuDeviceGetCount");
+
+  CuDeviceGetFcnPtr = (CuDeviceGetFcnTy *) getAPIHandle(HandleCuda,
+                                                        "cuDeviceGet");
+
+  CuCtxCreateFcnPtr = (CuCtxCreateFcnTy *) getAPIHandle(HandleCuda,
+                                                        "cuCtxCreate_v2");
+
+  CuModuleLoadDataExFcnPtr =
+    (CuModuleLoadDataExFcnTy *) getAPIHandle(HandleCuda, "cuModuleLoadDataEx");
+
+  CuModuleGetFunctionFcnPtr =
+    (CuModuleGetFunctionFcnTy *)getAPIHandle(HandleCuda, "cuModuleGetFunction");
+
+  CuDeviceComputeCapabilityFcnPtr =
+    (CuDeviceComputeCapabilityFcnTy *)getAPIHandle(HandleCuda,
+                                                   "cuDeviceComputeCapability");
+
+  CuDeviceGetNameFcnPtr =
+    (CuDeviceGetNameFcnTy *) getAPIHandle(HandleCuda, "cuDeviceGetName");
+
+  /* Get function pointer to CUDA Runtime APIs. */
+  CudaEventCreateFcnPtr =
+    (CudaEventCreateFcnTy *) getAPIHandle(HandleCudaRT, "cudaEventCreate");
+
+  CudaEventRecordFcnPtr =
+    (CudaEventRecordFcnTy *) getAPIHandle(HandleCudaRT, "cudaEventRecord");
+
+  CudaEventSynchronizeFcnPtr =
+    (CudaEventSynchronizeFcnTy *) getAPIHandle(HandleCudaRT,
+                                               "cudaEventSynchronize");
+
+  CudaEventElapsedTimeFcnPtr =
+    (CudaEventElapsedTimeFcnTy *) getAPIHandle(HandleCudaRT,
+                                               "cudaEventElapsedTime");
+
+  CudaEventDestroyFcnPtr =
+    (CudaEventDestroyFcnTy *) getAPIHandle(HandleCudaRT, "cudaEventDestroy");
+
+  CudaThreadSynchronizeFcnPtr =
+    (CudaThreadSynchronizeFcnTy *) getAPIHandle(HandleCudaRT,
+                                                "cudaThreadSynchronize");
+
+  return 1;
+}
+
+void polly_initDevice(CUcontext *Context, CUdevice *Device) {
+  int Major = 0, Minor = 0, DeviceID = 0;
+  char DeviceName[256];
+  int DeviceCount = 0;
+
+  /* Get API handles. */
+  if (initialDeviceAPIs() == 0) {
+    fprintf(stdout, "Getting the \"handle\" for the CUDA driver API failed.\n");
+    exit(-1);
+  }
+
+  if (CuInitFcnPtr(0) != CUDA_SUCCESS) {
+    fprintf(stdout, "Initializing the CUDA driver API failed.\n");
+    exit(-1);
+  }
+
+  /* Get number of devices that supports CUDA. */
+  CuDeviceGetCountFcnPtr(&DeviceCount);
+  if (DeviceCount == 0) {
+    fprintf(stdout, "There is no device supporting CUDA.\n");
+    exit(-1);
+  }
+
+  /* We select the 1st device as default. */
+  CuDeviceGetFcnPtr(Device, 0);
+
+  /* Get compute capabilities and the device name. */
+  CuDeviceComputeCapabilityFcnPtr(&Major, &Minor, *Device);
+  CuDeviceGetNameFcnPtr(DeviceName, 256, *Device);
+  fprintf(stderr, "> Running on GPU device %d : %s.\n", DeviceID, DeviceName);
+
+  /* Create context on the device. */
+  CuCtxCreateFcnPtr(Context, 0, *Device);
+}
+
+void polly_getPTXModule(void *PTXBuffer, CUmodule *Module) {
+  if(CuModuleLoadDataExFcnPtr(Module, PTXBuffer, 0, 0, 0) != CUDA_SUCCESS) {
+    fprintf(stdout, "Loading ptx assembly text failed.\n");
+    exit(-1);
+  }
+}
+
+void polly_getPTXKernelEntry(const char *KernelName, CUmodule *Module,
+                             CUfunction *Kernel) {
+  /* Locate the kernel entry point. */
+  if(CuModuleGetFunctionFcnPtr(Kernel, *Module, KernelName)
+     !=  CUDA_SUCCESS) {
+    fprintf(stdout, "Loading kernel function failed.\n");
+    exit(-1);
+  }
+}
+
+void polly_startTimerByCudaEvent(cudaEvent_t *StartTimer,
+                                 cudaEvent_t *StopTimer) {
+  CudaEventCreateFcnPtr(StartTimer);
+  CudaEventCreateFcnPtr(StopTimer);
+  CudaEventRecordFcnPtr(*StartTimer, 0);
+}
+
+void polly_stopTimerByCudaEvent(cudaEvent_t *StartTimer,
+                                cudaEvent_t *StopTimer, float *ElapsedTimes) {
+  CudaEventRecordFcnPtr(*StopTimer, 0);
+  CudaEventSynchronizeFcnPtr(*StopTimer);
+  CudaEventElapsedTimeFcnPtr(ElapsedTimes, *StartTimer, *StopTimer );
+  CudaEventDestroyFcnPtr(*StartTimer);
+  CudaEventDestroyFcnPtr(*StopTimer);
+  fprintf(stderr, "Processing time: %f (ms).\n", *ElapsedTimes);
+}
+
+void polly_allocateMemoryForHostAndDevice(void **PtrHostData,
+                                          CUdeviceptr *PtrDevData,
+                                          int MemSize) {
+  if ((*PtrHostData = (int *)malloc(MemSize)) == 0) {
+    fprintf(stdout, "Could not allocate host memory.\n");
+    exit(-1);
+  }
+  CuMemAllocFcnPtr(PtrDevData, MemSize);
+}
+
+void polly_copyFromHostToDevice(CUdeviceptr DevData, void *HostData,
+                                int MemSize) {
+  CuMemcpyHtoDFcnPtr(DevData, HostData, MemSize);
+}
+
+void polly_copyFromDeviceToHost(void *HostData, CUdeviceptr DevData,
+                                int MemSize) {
+  if(CuMemcpyDtoHFcnPtr(HostData, DevData, MemSize) != CUDA_SUCCESS) {
+    fprintf(stdout, "Copying results from device to host memory failed.\n");
+    exit(-1);
+  }
+}
+
+void polly_setKernelParameters(CUfunction *Kernel, int BlockWidth,
+                               int BlockHeight, CUdeviceptr DevData) {
+  int ParamOffset = 0;
+  CuFuncSetBlockShapeFcnPtr(*Kernel, BlockWidth, BlockHeight, 1);
+  CuParamSetvFcnPtr(*Kernel, ParamOffset, &DevData, sizeof(DevData));
+  ParamOffset += sizeof(DevData);
+  CuParamSetSizeFcnPtr(*Kernel, ParamOffset);
+}
+
+void polly_launchKernel(CUfunction *Kernel, int GridWidth, int GridHeight) {
+  if (CuLaunchGridFcnPtr(*Kernel, GridWidth, GridHeight) != CUDA_SUCCESS) {
+    fprintf(stdout, "Launching CUDA kernel failed.\n");
+    exit(-1);
+  }
+  CudaThreadSynchronizeFcnPtr();
+  fprintf(stdout, "CUDA kernel launched.\n");
+}
+
+void polly_cleanupGPGPUResources(void *HostData, CUdeviceptr DevData,
+                                 CUmodule *Module, CUcontext *Context) {
+  if (HostData) {
+    free(HostData);
+    HostData = 0;
+  }
+
+  if (DevData) {
+    CuMemFreeFcnPtr(DevData);
+    DevData = 0;
+  }
+
+  if (*Module) {
+    CuModuleUnloadFcnPtr(*Module);
+    *Module = 0;
+  }
+
+  if (*Context) {
+    CuCtxDestroyFcnPtr(*Context);
+    *Context = 0;
+  }
+
+  dlclose(HandleCuda);
+  dlclose(HandleCudaRT);
+}

Added: polly/trunk/tools/GPURuntime/GPUJIT.h
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/tools/GPURuntime/GPUJIT.h?rev=158304&view=auto
==============================================================================
--- polly/trunk/tools/GPURuntime/GPUJIT.h (added)
+++ polly/trunk/tools/GPURuntime/GPUJIT.h Mon Jun 11 04:25:01 2012
@@ -0,0 +1,41 @@
+/******************************************************************************/
+/*                                                                            */
+/*                     The LLVM Compiler Infrastructure                       */
+/*                                                                            */
+/* This file is distributed under the University of Illinois Open Source      */
+/* License. See LICENSE.TXT for details.                                      */
+/*                                                                            */
+/******************************************************************************/
+/*                                                                            */
+/*  This file defines GPUJIT.                                                 */
+/*                                                                            */
+/******************************************************************************/
+
+#ifndef GPUJIT_H_
+#define GPUJIT_H_
+
+#include <cuda.h>
+#include <cuda_runtime.h>
+
+void polly_initDevice(CUcontext *Context, CUdevice *Device);
+void polly_getPTXModule(void *PTXBuffer, CUmodule *Module);
+void polly_getPTXKernelEntry(const char *KernelName,
+                             CUmodule *Module,
+                             CUfunction *Kernel);
+void polly_startTimerByCudaEvent(cudaEvent_t *StartTimer,
+                                 cudaEvent_t *StopTimer);
+void polly_stopTimerByCudaEvent(cudaEvent_t *StartTimer, cudaEvent_t *StopTimer,
+                                float *ElapsedTimes);
+void polly_copyFromHostToDevice(CUdeviceptr DevData, void *HostData,
+                                int MemSize);
+void polly_copyFromDeviceToHost(void *HostData, CUdeviceptr DevData,
+                                int MemSize);
+void polly_allocateMemoryForHostAndDevice(void **PtrHostData,
+                                          CUdeviceptr *PtrDevData,
+                                          int MemSize);
+void polly_setKernelParameters(CUfunction *Kernel, int BlockWidth,
+                               int BlockHeight, CUdeviceptr DevData);
+void polly_launchKernel(CUfunction *Kernel, int GridWidth, int GridHeight);
+void polly_cleanupGPGPUResources(void *HostData, CUdeviceptr DevData,
+                                 CUmodule *Module, CUcontext *Context);
+#endif /* GPUJIT_H_ */

Copied: polly/trunk/tools/GPURuntime/Makefile (from r158081, polly/trunk/tools/Makefile)
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/tools/GPURuntime/Makefile?p2=polly/trunk/tools/GPURuntime/Makefile&p1=polly/trunk/tools/Makefile&r1=158081&r2=158304&rev=158304&view=diff
==============================================================================
--- polly/trunk/tools/Makefile (original)
+++ polly/trunk/tools/GPURuntime/Makefile Mon Jun 11 04:25:01 2012
@@ -1,5 +1,4 @@
-##===- tools/Makefile --------------------------------------*- Makefile -*-===##
-#
+##===- polly/lib/GPURuntime/Makefile -----------------------*- Makefile -*-===##
 #                     The LLVM Compiler Infrastructure
 #
 # This file is distributed under the University of Illinois Open Source
@@ -7,7 +6,11 @@
 #
 ##===----------------------------------------------------------------------===##
 
-LEVEL := ..
-DIRS :=
+LEVEL = ../..
+
+LIBRARYNAME = libGPURuntime
+LOADABLE_MODULE = 1
 
+include $(LEVEL)/Makefile.config
+CPP.Flags += $(POLLY_INC)
 include $(LEVEL)/Makefile.common

Modified: polly/trunk/tools/Makefile
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/tools/Makefile?rev=158304&r1=158303&r2=158304&view=diff
==============================================================================
--- polly/trunk/tools/Makefile (original)
+++ polly/trunk/tools/Makefile Mon Jun 11 04:25:01 2012
@@ -10,4 +10,11 @@
 LEVEL := ..
 DIRS :=
 
+include $(LEVEL)/Makefile.config
+
+# GPU Runtime Support
+ifeq ($(CUDALIB_FOUND), yes)
+ DIRS += GPURuntime
+endif
+
 include $(LEVEL)/Makefile.common





More information about the llvm-commits mailing list