[polly] r276623 - GPGPU: initialize GPU context and simplify the corresponding GPURuntime interface.

Tobias Grosser via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 25 02:16:02 PDT 2016


Author: grosser
Date: Mon Jul 25 04:16:01 2016
New Revision: 276623

URL: http://llvm.org/viewvc/llvm-project?rev=276623&view=rev
Log:
GPGPU: initialize GPU context and simplify the corresponding GPURuntime interface.

There is no need to expose the selected device at the moment. We also pass back
pointers as return values, as this simplifies the interface.

Modified:
    polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp
    polly/trunk/test/GPGPU/double-parallel-loop.ll
    polly/trunk/tools/GPURuntime/GPUJIT.c
    polly/trunk/tools/GPURuntime/GPUJIT.h

Modified: polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp?rev=276623&r1=276622&r2=276623&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp (original)
+++ polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp Mon Jul 25 04:16:01 2016
@@ -135,6 +135,12 @@ public:
     getExprBuilder().setIDToSAI(&IDToSAI);
   }
 
+  /// Create after-run-time-check initialization code.
+  void initializeAfterRTH();
+
+  /// Finalize the generated scop.
+  virtual void finalize();
+
 private:
   /// A vector of array base pointers for which a new ScopArrayInfo was created.
   ///
@@ -142,6 +148,9 @@ private:
   /// more.
   std::vector<Value *> LocalArrays;
 
+  /// The current GPU context.
+  Value *GPUContext;
+
   /// A module containing GPU code.
   ///
   /// This pointer is only set in case we are currently generating GPU code.
@@ -256,8 +265,113 @@ private:
   /// Free the LLVM-IR module corresponding to the kernel and -- if requested --
   /// dump its IR to stderr.
   void finalizeKernelFunction();
+
+  void allocateDeviceArrays();
+
+  /// Create a call to initialize the GPU context.
+  ///
+  /// @returns A pointer to the newly initialized context.
+  Value *createCallInitContext();
+
+  /// Create a call to free the GPU context.
+  ///
+  /// @param Context A pointer to an initialized GPU context.
+  void createCallFreeContext(Value *Context);
+
+  Value *createCallAllocateMemoryForDevice(Value *Size);
 };
 
+void GPUNodeBuilder::initializeAfterRTH() {
+  GPUContext = createCallInitContext();
+  allocateDeviceArrays();
+}
+
+void GPUNodeBuilder::finalize() {
+  createCallFreeContext(GPUContext);
+  IslNodeBuilder::finalize();
+}
+
+void GPUNodeBuilder::allocateDeviceArrays() {
+  isl_ast_build *Build = isl_ast_build_from_context(S.getContext());
+
+  for (int i = 0; i < Prog->n_array; ++i) {
+    gpu_array_info *Array = &Prog->array[i];
+    std::string DevPtrName("p_devptr_");
+    DevPtrName.append(Array->name);
+
+    Value *ArraySize = ConstantInt::get(Builder.getInt64Ty(), Array->size);
+
+    if (!gpu_array_is_scalar(Array)) {
+      auto OffsetDimZero = isl_pw_aff_copy(Array->bound[0]);
+      isl_ast_expr *Res = isl_ast_build_expr_from_pw_aff(Build, OffsetDimZero);
+
+      for (unsigned int i = 1; i < Array->n_index; i++) {
+        isl_pw_aff *Bound_I = isl_pw_aff_copy(Array->bound[i]);
+        isl_ast_expr *Expr = isl_ast_build_expr_from_pw_aff(Build, Bound_I);
+        Res = isl_ast_expr_mul(Res, Expr);
+      }
+
+      Value *NumElements = ExprBuilder.create(Res);
+      ArraySize = Builder.CreateMul(ArraySize, NumElements);
+    }
+
+    Value *DevPtr = createCallAllocateMemoryForDevice(ArraySize);
+    DevPtr->setName(DevPtrName);
+  }
+
+  isl_ast_build_free(Build);
+}
+
+Value *GPUNodeBuilder::createCallAllocateMemoryForDevice(Value *Size) {
+  const char *Name = "polly_allocateMemoryForDevice";
+  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+  Function *F = M->getFunction(Name);
+
+  // If F is not available, declare it.
+  if (!F) {
+    GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
+    std::vector<Type *> Args;
+    Args.push_back(Builder.getInt64Ty());
+    FunctionType *Ty = FunctionType::get(Builder.getInt8PtrTy(), Args, false);
+    F = Function::Create(Ty, Linkage, Name, M);
+  }
+
+  return Builder.CreateCall(F, {Size});
+}
+
+Value *GPUNodeBuilder::createCallInitContext() {
+  const char *Name = "polly_initContext";
+  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+  Function *F = M->getFunction(Name);
+
+  // If F is not available, declare it.
+  if (!F) {
+    GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
+    std::vector<Type *> Args;
+    FunctionType *Ty = FunctionType::get(Builder.getInt8PtrTy(), Args, false);
+    F = Function::Create(Ty, Linkage, Name, M);
+  }
+
+  return Builder.CreateCall(F, {});
+}
+
+void GPUNodeBuilder::createCallFreeContext(Value *Context) {
+  const char *Name = "polly_freeContext";
+  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+  Function *F = M->getFunction(Name);
+
+  // If F is not available, declare it.
+  if (!F) {
+    GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
+    std::vector<Type *> Args;
+    Args.push_back(Builder.getInt8PtrTy());
+    FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Args, false);
+    F = Function::Create(Ty, Linkage, Name, M);
+  }
+
+  Builder.CreateCall(F, {Context});
+}
+
 /// Check if one string is a prefix of another.
 ///
 /// @param String The string in which to look for the prefix.
@@ -1325,6 +1439,8 @@ public:
     Builder.SetInsertPoint(SplitBlock->getTerminator());
     NodeBuilder.addParameters(S->getContext());
     Builder.SetInsertPoint(&*StartBlock->begin());
+
+    NodeBuilder.initializeAfterRTH();
     NodeBuilder.create(Root);
     NodeBuilder.finalize();
   }

Modified: polly/trunk/test/GPGPU/double-parallel-loop.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/double-parallel-loop.ll?rev=276623&r1=276622&r2=276623&view=diff
==============================================================================
--- polly/trunk/test/GPGPU/double-parallel-loop.ll (original)
+++ polly/trunk/test/GPGPU/double-parallel-loop.ll Mon Jul 25 04:16:01 2016
@@ -92,6 +92,9 @@
 ; IR-NEXT:    br i1 true, label %polly.start, label %bb2
 
 ; IR: polly.start:
+; IR-NEXT:    [[GPUContext:%.*]] = call i8* @polly_initContext()
+; IR-NEXT:    %p_devptr_MemRef_A = call i8* @polly_allocateMemoryForDevice(i64 4194304)
+; IR-NEXT:    call void @polly_freeContext(i8* [[GPUContext]])
 ; IR-NEXT:    br label %polly.exiting
 
 ; IR: polly.exiting:

Modified: polly/trunk/tools/GPURuntime/GPUJIT.c
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/tools/GPURuntime/GPUJIT.c?rev=276623&r1=276622&r2=276623&view=diff
==============================================================================
--- polly/trunk/tools/GPURuntime/GPUJIT.c (original)
+++ polly/trunk/tools/GPURuntime/GPUJIT.c Mon Jul 25 04:16:01 2016
@@ -44,10 +44,6 @@ struct PollyGPUFunctionT {
   CUfunction Cuda;
 };
 
-struct PollyGPUDeviceT {
-  CUdevice Cuda;
-};
-
 struct PollyGPUDevicePtrT {
   CUdeviceptr Cuda;
 };
@@ -219,10 +215,12 @@ static int initialDeviceAPIs() {
   return 1;
 }
 
-void polly_initDevice(PollyGPUContext **Context, PollyGPUDevice **Device) {
+PollyGPUContext *polly_initContext() {
   DebugMode = getenv("POLLY_DEBUG") != 0;
 
   dump_function();
+  PollyGPUContext *Context;
+  CUdevice Device;
 
   int Major = 0, Minor = 0, DeviceID = 0;
   char DeviceName[256];
@@ -246,26 +244,22 @@ void polly_initDevice(PollyGPUContext **
     exit(-1);
   }
 
-  /* We select the 1st device as default. */
-  *Device = malloc(sizeof(PollyGPUDevice));
-  if (*Device == 0) {
-    fprintf(stdout, "Allocate memory for Polly GPU device failed.\n");
-    exit(-1);
-  }
-  CuDeviceGetFcnPtr(&((*Device)->Cuda), 0);
+  CuDeviceGetFcnPtr(&Device, 0);
 
   /* Get compute capabilities and the device name. */
-  CuDeviceComputeCapabilityFcnPtr(&Major, &Minor, (*Device)->Cuda);
-  CuDeviceGetNameFcnPtr(DeviceName, 256, (*Device)->Cuda);
+  CuDeviceComputeCapabilityFcnPtr(&Major, &Minor, Device);
+  CuDeviceGetNameFcnPtr(DeviceName, 256, Device);
   debug_print("> Running on GPU device %d : %s.\n", DeviceID, DeviceName);
 
   /* Create context on the device. */
-  *Context = malloc(sizeof(PollyGPUContext));
-  if (*Context == 0) {
+  Context = (PollyGPUContext *)malloc(sizeof(PollyGPUContext));
+  if (Context == 0) {
     fprintf(stdout, "Allocate memory for Polly GPU context failed.\n");
     exit(-1);
   }
-  CuCtxCreateFcnPtr(&((*Context)->Cuda), 0, (*Device)->Cuda);
+  CuCtxCreateFcnPtr(&(Context->Cuda), 0, Device);
+
+  return Context;
 }
 
 void polly_getPTXModule(void *PTXBuffer, PollyGPUModule **Module) {
@@ -347,7 +341,6 @@ void polly_launchKernel(PollyGPUFunction
 
 void polly_cleanupGPGPUResources(void *HostData, PollyGPUDevicePtr *DevData,
                                  PollyGPUModule *Module,
-                                 PollyGPUContext *Context,
                                  PollyGPUFunction *Kernel) {
   dump_function();
 
@@ -365,16 +358,18 @@ void polly_cleanupGPGPUResources(void *H
     CuModuleUnloadFcnPtr(Module->Cuda);
     free(Module);
   }
+  if (Kernel) {
+    free(Kernel);
+  }
+}
+
+void polly_freeContext(PollyGPUContext *Context) {
 
   if (Context->Cuda) {
     CuCtxDestroyFcnPtr(Context->Cuda);
     free(Context);
   }
 
-  if (Kernel) {
-    free(Kernel);
-  }
-
   dlclose(HandleCuda);
   dlclose(HandleCudaRT);
 }

Modified: polly/trunk/tools/GPURuntime/GPUJIT.h
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/tools/GPURuntime/GPUJIT.h?rev=276623&r1=276622&r2=276623&view=diff
==============================================================================
--- polly/trunk/tools/GPURuntime/GPUJIT.h (original)
+++ polly/trunk/tools/GPURuntime/GPUJIT.h Mon Jul 25 04:16:01 2016
@@ -44,10 +44,9 @@
  * const char *Entry = "_Z8myKernelPi";
  *
  * int main() {
- *   PollyGPUContext *Context;
  *   PollyGPUModule *Module;
  *   PollyGPUFunction *Kernel;
- *   PollyGPUDevice *Device;
+ *   PollyGPUContext *Context;
  *   PollyGPUDevicePtr *PtrDevData;
  *   int *HostData;
  *   int MemSize;
@@ -57,13 +56,14 @@
  *   int GridHeight = 8;
  *
  *   MemSize = 256*64*sizeof(int);
- *   polly_initDevice(&Context, &Device);
+ *   Context = polly_initContext();
  *   polly_getPTXModule(KernelString, &Module);
  *   polly_getPTXKernelEntry(Entry, Module, &Kernel);
  *   polly_setKernelParameters(Kernel, BlockWidth, BlockHeight, DevData);
  *   polly_launchKernel(Kernel, GridWidth, GridHeight);
  *   polly_copyFromDeviceToHost(HostData, DevData, MemSize);
- *   polly_cleanupGPGPUResources(HostData, DevData, Module, Context, Kernel);
+ *   polly_cleanupGPGPUResources(HostData, DevData, Module, Kernel);
+ *   polly_freeContext(Context);
  * }
  *
  */
@@ -71,10 +71,9 @@
 typedef struct PollyGPUContextT PollyGPUContext;
 typedef struct PollyGPUModuleT PollyGPUModule;
 typedef struct PollyGPUFunctionT PollyGPUFunction;
-typedef struct PollyGPUDeviceT PollyGPUDevice;
 typedef struct PollyGPUDevicePtrT PollyGPUDevicePtr;
 
-void polly_initDevice(PollyGPUContext **Context, PollyGPUDevice **Device);
+PollyGPUContext *polly_initContext();
 void polly_getPTXModule(void *PTXBuffer, PollyGPUModule **Module);
 void polly_getPTXKernelEntry(const char *KernelName, PollyGPUModule *Module,
                              PollyGPUFunction **Kernel);
@@ -88,6 +87,6 @@ void polly_launchKernel(PollyGPUFunction
                         int GridHeight);
 void polly_cleanupGPGPUResources(void *HostData, PollyGPUDevicePtr *DevData,
                                  PollyGPUModule *Module,
-                                 PollyGPUContext *Context,
                                  PollyGPUFunction *Kernel);
+void free_Context(PollyGPUContext *Context);
 #endif /* GPUJIT_H_ */




More information about the llvm-commits mailing list