[polly] r277707 - GPGPU: Cache PTX kernels

Tobias Grosser via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 4 02:15:58 PDT 2016


Author: grosser
Date: Thu Aug  4 04:15:58 2016
New Revision: 277707

URL: http://llvm.org/viewvc/llvm-project?rev=277707&view=rev
Log:
GPGPU: Cache PTX kernels

We always keep a number of already compiled kernels available to ensure to avoid
costly recompilation.

Modified:
    polly/trunk/tools/GPURuntime/GPUJIT.c

Modified: polly/trunk/tools/GPURuntime/GPUJIT.c
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/tools/GPURuntime/GPUJIT.c?rev=277707&r1=277706&r2=277707&view=diff
==============================================================================
--- polly/trunk/tools/GPURuntime/GPUJIT.c (original)
+++ polly/trunk/tools/GPURuntime/GPUJIT.c Thu Aug  4 04:15:58 2016
@@ -20,6 +20,7 @@
 #include <string.h>
 
 static int DebugMode;
+static int CacheMode;
 
 static void debug_print(const char *format, ...) {
   if (!DebugMode)
@@ -40,6 +41,7 @@ struct PollyGPUContextT {
 struct PollyGPUFunctionT {
   CUfunction Cuda;
   CUmodule CudaModule;
+  const char *PTXString;
 };
 
 struct PollyGPUDevicePtrT {
@@ -249,6 +251,11 @@ PollyGPUContext *polly_initContext() {
   char DeviceName[256];
   int DeviceCount = 0;
 
+  static __thread PollyGPUContext *CurrentContext = NULL;
+
+  if (CurrentContext)
+    return CurrentContext;
+
   /* Get API handles. */
   if (initialDeviceAPIs() == 0) {
     fprintf(stdout, "Getting the \"handle\" for the CUDA driver API failed.\n");
@@ -282,13 +289,41 @@ PollyGPUContext *polly_initContext() {
   }
   CuCtxCreateFcnPtr(&(Context->Cuda), 0, Device);
 
+  CacheMode = getenv("POLLY_NOCACHE") == 0;
+
+  if (CacheMode)
+    CurrentContext = Context;
+
   return Context;
 }
 
+static void freeKernel(PollyGPUFunction *Kernel) {
+  if (Kernel->CudaModule)
+    CuModuleUnloadFcnPtr(Kernel->CudaModule);
+
+  if (Kernel)
+    free(Kernel);
+}
+
+#define KERNEL_CACHE_SIZE 10
+
 PollyGPUFunction *polly_getKernel(const char *PTXBuffer,
                                   const char *KernelName) {
   dump_function();
 
+  static __thread PollyGPUFunction *KernelCache[KERNEL_CACHE_SIZE];
+  static __thread int NextCacheItem = 0;
+
+  for (long i = 0; i < KERNEL_CACHE_SIZE; i++) {
+    // We exploit here the property that all Polly-ACC kernels are allocated
+    // as global constants, hence a pointer comparision is sufficient to
+    // determin equality.
+    if (KernelCache[i] && KernelCache[i]->PTXString == PTXBuffer) {
+      debug_print("  -> using cached kernel\n");
+      return KernelCache[i];
+    }
+  }
+
   PollyGPUFunction *Function = malloc(sizeof(PollyGPUFunction));
 
   if (Function == 0) {
@@ -361,17 +396,27 @@ PollyGPUFunction *polly_getKernel(const
 
   CuLinkDestroyFcnPtr(LState);
 
+  Function->PTXString = PTXBuffer;
+
+  if (CacheMode) {
+    if (KernelCache[NextCacheItem])
+      freeKernel(KernelCache[NextCacheItem]);
+
+    KernelCache[NextCacheItem] = Function;
+
+    NextCacheItem = (NextCacheItem + 1) % KERNEL_CACHE_SIZE;
+  }
+
   return Function;
 }
 
 void polly_freeKernel(PollyGPUFunction *Kernel) {
   dump_function();
 
-  if (Kernel->CudaModule)
-    CuModuleUnloadFcnPtr(Kernel->CudaModule);
+  if (CacheMode)
+    return;
 
-  if (Kernel)
-    free(Kernel);
+  freeKernel(Kernel);
 }
 
 void polly_copyFromHostToDevice(void *HostData, PollyGPUDevicePtr *DevData,
@@ -448,6 +493,9 @@ void *polly_getDevicePtr(PollyGPUDeviceP
 void polly_freeContext(PollyGPUContext *Context) {
   dump_function();
 
+  if (CacheMode)
+    return;
+
   if (Context->Cuda) {
     CuCtxDestroyFcnPtr(Context->Cuda);
     free(Context);




More information about the llvm-commits mailing list