[polly] r276635 - GPGPU: Complete code to allocate and free device arrays

Tobias Grosser via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 25 05:47:33 PDT 2016


Author: grosser
Date: Mon Jul 25 07:47:33 2016
New Revision: 276635

URL: http://llvm.org/viewvc/llvm-project?rev=276635&view=rev
Log:
GPGPU: Complete code to allocate and free device arrays

At the beginning of each SCoP, we allocate device arrays for all arrays
used on the GPU and we free such arrays after the SCoP has been executed.

Modified:
    polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp
    polly/trunk/test/GPGPU/double-parallel-loop.ll
    polly/trunk/tools/GPURuntime/GPUJIT.c
    polly/trunk/tools/GPURuntime/GPUJIT.h

Modified: polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp?rev=276635&r1=276634&r2=276635&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp (original)
+++ polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp Mon Jul 25 07:47:33 2016
@@ -148,6 +148,9 @@ private:
   /// more.
   std::vector<Value *> LocalArrays;
 
+  /// A list of device arrays that has been allocated.
+  std::vector<Value *> AllocatedDevArrays;
+
   /// The current GPU context.
   Value *GPUContext;
 
@@ -266,8 +269,12 @@ private:
   /// dump its IR to stderr.
   void finalizeKernelFunction();
 
+  /// Create code that allocates memory to store arrays on device.
   void allocateDeviceArrays();
 
+  /// Free all allocated device arrays.
+  void freeDeviceArrays();
+
   /// Create a call to initialize the GPU context.
   ///
   /// @returns A pointer to the newly initialized context.
@@ -278,7 +285,17 @@ private:
   /// @param Context A pointer to an initialized GPU context.
   void createCallFreeContext(Value *Context);
 
+  /// Create a call to allocate memory on the device.
+  ///
+  /// @param Size The size of memory to allocate
+  ///
+  /// @returns A pointer that identifies this allocation.
   Value *createCallAllocateMemoryForDevice(Value *Size);
+
+  /// Create a call to free a device array.
+  ///
+  /// @param Array The device array to free.
+  void createCallFreeDeviceMemory(Value *Array);
 };
 
 void GPUNodeBuilder::initializeAfterRTH() {
@@ -287,6 +304,7 @@ void GPUNodeBuilder::initializeAfterRTH(
 }
 
 void GPUNodeBuilder::finalize() {
+  freeDeviceArrays();
   createCallFreeContext(GPUContext);
   IslNodeBuilder::finalize();
 }
@@ -296,8 +314,8 @@ void GPUNodeBuilder::allocateDeviceArray
 
   for (int i = 0; i < Prog->n_array; ++i) {
     gpu_array_info *Array = &Prog->array[i];
-    std::string DevPtrName("p_devptr_");
-    DevPtrName.append(Array->name);
+    std::string DevArrayName("p_dev_array_");
+    DevArrayName.append(Array->name);
 
     Value *ArraySize = ConstantInt::get(Builder.getInt64Ty(), Array->size);
 
@@ -315,13 +333,36 @@ void GPUNodeBuilder::allocateDeviceArray
       ArraySize = Builder.CreateMul(ArraySize, NumElements);
     }
 
-    Value *DevPtr = createCallAllocateMemoryForDevice(ArraySize);
-    DevPtr->setName(DevPtrName);
+    Value *DevArray = createCallAllocateMemoryForDevice(ArraySize);
+    DevArray->setName(DevArrayName);
+    AllocatedDevArrays.push_back(DevArray);
   }
 
   isl_ast_build_free(Build);
 }
 
+void GPUNodeBuilder::freeDeviceArrays() {
+  for (auto &Array : AllocatedDevArrays)
+    createCallFreeDeviceMemory(Array);
+}
+
+void GPUNodeBuilder::createCallFreeDeviceMemory(Value *Array) {
+  const char *Name = "polly_freeDeviceMemory";
+  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+  Function *F = M->getFunction(Name);
+
+  // If F is not available, declare it.
+  if (!F) {
+    GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
+    std::vector<Type *> Args;
+    Args.push_back(Builder.getInt8PtrTy());
+    FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Args, false);
+    F = Function::Create(Ty, Linkage, Name, M);
+  }
+
+  Builder.CreateCall(F, {Array});
+}
+
 Value *GPUNodeBuilder::createCallAllocateMemoryForDevice(Value *Size) {
   const char *Name = "polly_allocateMemoryForDevice";
   Module *M = Builder.GetInsertBlock()->getParent()->getParent();

Modified: polly/trunk/test/GPGPU/double-parallel-loop.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/double-parallel-loop.ll?rev=276635&r1=276634&r2=276635&view=diff
==============================================================================
--- polly/trunk/test/GPGPU/double-parallel-loop.ll (original)
+++ polly/trunk/test/GPGPU/double-parallel-loop.ll Mon Jul 25 07:47:33 2016
@@ -93,7 +93,8 @@
 
 ; IR: polly.start:
 ; IR-NEXT:    [[GPUContext:%.*]] = call i8* @polly_initContext()
-; IR-NEXT:    %p_devptr_MemRef_A = call i8* @polly_allocateMemoryForDevice(i64 4194304)
+; IR-NEXT:    %p_dev_array_MemRef_A = call i8* @polly_allocateMemoryForDevice(i64 4194304)
+; IR-NEXT:    call void @polly_freeDeviceMemory(i8* %p_dev_array_MemRef_A)
 ; IR-NEXT:    call void @polly_freeContext(i8* [[GPUContext]])
 ; IR-NEXT:    br label %polly.exiting
 

Modified: polly/trunk/tools/GPURuntime/GPUJIT.c
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/tools/GPURuntime/GPUJIT.c?rev=276635&r1=276634&r2=276635&view=diff
==============================================================================
--- polly/trunk/tools/GPURuntime/GPUJIT.c (original)
+++ polly/trunk/tools/GPURuntime/GPUJIT.c Mon Jul 25 07:47:33 2016
@@ -339,6 +339,32 @@ void polly_launchKernel(PollyGPUFunction
   debug_print("CUDA kernel launched.\n");
 }
 
+void polly_freeDeviceMemory(PollyGPUDevicePtr *Allocation) {
+  dump_function();
+  CuMemFreeFcnPtr((CUdeviceptr)Allocation->Cuda);
+  free(Allocation);
+}
+
+PollyGPUDevicePtr *polly_allocateMemoryForDevice(long MemSize) {
+  dump_function();
+
+  PollyGPUDevicePtr *DevData = malloc(sizeof(PollyGPUDevicePtr));
+
+  if (DevData == 0) {
+    fprintf(stdout, "Allocate memory for GPU device memory pointer failed.\n");
+    exit(-1);
+  }
+
+  CUresult Res = CuMemAllocFcnPtr(&(DevData->Cuda), MemSize);
+
+  if (Res != CUDA_SUCCESS) {
+    fprintf(stdout, "Allocate memory for GPU device memory pointer failed.\n");
+    exit(-1);
+  }
+
+  return DevData;
+}
+
 void polly_freeContext(PollyGPUContext *Context) {
   dump_function();
 

Modified: polly/trunk/tools/GPURuntime/GPUJIT.h
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/tools/GPURuntime/GPUJIT.h?rev=276635&r1=276634&r2=276635&view=diff
==============================================================================
--- polly/trunk/tools/GPURuntime/GPUJIT.h (original)
+++ polly/trunk/tools/GPURuntime/GPUJIT.h Mon Jul 25 07:47:33 2016
@@ -47,7 +47,7 @@
  *   PollyGPUModule *Module;
  *   PollyGPUFunction *Kernel;
  *   PollyGPUContext *Context;
- *   PollyGPUDevicePtr *PtrDevData;
+ *   PollyGPUDevicePtr *DevArray;
  *   int *HostData;
  *   int MemSize;
  *   int BlockWidth = 16;
@@ -57,11 +57,13 @@
  *
  *   MemSize = 256*64*sizeof(int);
  *   Context = polly_initContext();
+ *   DevArray = polly_allocateMemoryForDevice(MemSize);
  *   polly_getPTXModule(KernelString, &Module);
  *   polly_getPTXKernelEntry(Entry, Module, &Kernel);
  *   polly_setKernelParameters(Kernel, BlockWidth, BlockHeight, DevData);
  *   polly_launchKernel(Kernel, GridWidth, GridHeight);
  *   polly_copyFromDeviceToHost(HostData, DevData, MemSize);
+ *   polly_freeDeviceMemory(DevArray);
  *   polly_freeContext(Context);
  * }
  *
@@ -84,5 +86,6 @@ void polly_setKernelParameters(PollyGPUF
                                int BlockHeight, PollyGPUDevicePtr *DevData);
 void polly_launchKernel(PollyGPUFunction *Kernel, int GridWidth,
                         int GridHeight);
+void polly_freeDeviceMemory(PollyGPUDevicePtr *Allocation);
 void polly_freeContext(PollyGPUContext *Context);
 #endif /* GPUJIT_H_ */




More information about the llvm-commits mailing list