[polly] r277707 - GPGPU: Cache PTX kernels
Tobias Grosser via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 4 02:15:58 PDT 2016
Author: grosser
Date: Thu Aug 4 04:15:58 2016
New Revision: 277707
URL: http://llvm.org/viewvc/llvm-project?rev=277707&view=rev
Log:
GPGPU: Cache PTX kernels
We always keep a number of already compiled kernels available to ensure to avoid
costly recompilation.
Modified:
polly/trunk/tools/GPURuntime/GPUJIT.c
Modified: polly/trunk/tools/GPURuntime/GPUJIT.c
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/tools/GPURuntime/GPUJIT.c?rev=277707&r1=277706&r2=277707&view=diff
==============================================================================
--- polly/trunk/tools/GPURuntime/GPUJIT.c (original)
+++ polly/trunk/tools/GPURuntime/GPUJIT.c Thu Aug 4 04:15:58 2016
@@ -20,6 +20,7 @@
#include <string.h>
static int DebugMode;
+static int CacheMode;
static void debug_print(const char *format, ...) {
if (!DebugMode)
@@ -40,6 +41,7 @@ struct PollyGPUContextT {
struct PollyGPUFunctionT {
CUfunction Cuda;
CUmodule CudaModule;
+ const char *PTXString;
};
struct PollyGPUDevicePtrT {
@@ -249,6 +251,11 @@ PollyGPUContext *polly_initContext() {
char DeviceName[256];
int DeviceCount = 0;
+ static __thread PollyGPUContext *CurrentContext = NULL;
+
+ if (CurrentContext)
+ return CurrentContext;
+
/* Get API handles. */
if (initialDeviceAPIs() == 0) {
fprintf(stdout, "Getting the \"handle\" for the CUDA driver API failed.\n");
@@ -282,13 +289,41 @@ PollyGPUContext *polly_initContext() {
}
CuCtxCreateFcnPtr(&(Context->Cuda), 0, Device);
+ CacheMode = getenv("POLLY_NOCACHE") == 0;
+
+ if (CacheMode)
+ CurrentContext = Context;
+
return Context;
}
+static void freeKernel(PollyGPUFunction *Kernel) {
+ if (Kernel->CudaModule)
+ CuModuleUnloadFcnPtr(Kernel->CudaModule);
+
+ if (Kernel)
+ free(Kernel);
+}
+
+#define KERNEL_CACHE_SIZE 10
+
PollyGPUFunction *polly_getKernel(const char *PTXBuffer,
const char *KernelName) {
dump_function();
+ static __thread PollyGPUFunction *KernelCache[KERNEL_CACHE_SIZE];
+ static __thread int NextCacheItem = 0;
+
+ for (long i = 0; i < KERNEL_CACHE_SIZE; i++) {
+ // We exploit here the property that all Polly-ACC kernels are allocated
+ // as global constants, hence a pointer comparision is sufficient to
+ // determin equality.
+ if (KernelCache[i] && KernelCache[i]->PTXString == PTXBuffer) {
+ debug_print(" -> using cached kernel\n");
+ return KernelCache[i];
+ }
+ }
+
PollyGPUFunction *Function = malloc(sizeof(PollyGPUFunction));
if (Function == 0) {
@@ -361,17 +396,27 @@ PollyGPUFunction *polly_getKernel(const
CuLinkDestroyFcnPtr(LState);
+ Function->PTXString = PTXBuffer;
+
+ if (CacheMode) {
+ if (KernelCache[NextCacheItem])
+ freeKernel(KernelCache[NextCacheItem]);
+
+ KernelCache[NextCacheItem] = Function;
+
+ NextCacheItem = (NextCacheItem + 1) % KERNEL_CACHE_SIZE;
+ }
+
return Function;
}
void polly_freeKernel(PollyGPUFunction *Kernel) {
dump_function();
- if (Kernel->CudaModule)
- CuModuleUnloadFcnPtr(Kernel->CudaModule);
+ if (CacheMode)
+ return;
- if (Kernel)
- free(Kernel);
+ freeKernel(Kernel);
}
void polly_copyFromHostToDevice(void *HostData, PollyGPUDevicePtr *DevData,
@@ -448,6 +493,9 @@ void *polly_getDevicePtr(PollyGPUDeviceP
void polly_freeContext(PollyGPUContext *Context) {
dump_function();
+ if (CacheMode)
+ return;
+
if (Context->Cuda) {
CuCtxDestroyFcnPtr(Context->Cuda);
free(Context);
More information about the llvm-commits
mailing list