[polly] r276636 - GPGPU: Emit data-transfer code

Tobias Grosser via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 25 05:47:39 PDT 2016


Author: grosser
Date: Mon Jul 25 07:47:39 2016
New Revision: 276636

URL: http://llvm.org/viewvc/llvm-project?rev=276636&view=rev
Log:
GPGPU: Emit data-transfer code

Also factor out getArraySize() to avoid code dupliciation and reorder some
function arguments to indicate the direction into which data is transferred.

Modified:
    polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp
    polly/trunk/lib/External/ppcg/gpu.h
    polly/trunk/test/GPGPU/double-parallel-loop.ll
    polly/trunk/tools/GPURuntime/GPUJIT.c
    polly/trunk/tools/GPURuntime/GPUJIT.h

Modified: polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp?rev=276636&r1=276635&r2=276636&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp (original)
+++ polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp Mon Jul 25 07:47:39 2016
@@ -148,8 +148,8 @@ private:
   /// more.
   std::vector<Value *> LocalArrays;
 
-  /// A list of device arrays that has been allocated.
-  std::vector<Value *> AllocatedDevArrays;
+  /// A map from ScopArrays to their corresponding device allocations.
+  std::map<ScopArrayInfo *, Value *> DeviceAllocations;
 
   /// The current GPU context.
   Value *GPUContext;
@@ -181,13 +181,22 @@ private:
   ///
   ///   - ScopStmt:      A computational statement (TODO)
   ///   - Kernel:        A GPU kernel call (TODO)
-  ///   - Data-Transfer: A GPU <-> CPU data-transfer (TODO)
+  ///   - Data-Transfer: A GPU <-> CPU data-transfer
   ///   - In-kernel synchronization
   ///   - In-kernel memory copy statement
   ///
   /// @param UserStmt The ast node to generate code for.
   virtual void createUser(__isl_take isl_ast_node *UserStmt);
 
+  enum DataDirection { HOST_TO_DEVICE, DEVICE_TO_HOST };
+
+  /// Create code for a data transfer statement
+  ///
+  /// @param TransferStmt The data transfer statement.
+  /// @param Direction The direction in which to transfer data.
+  void createDataTransfer(__isl_take isl_ast_node *TransferStmt,
+                          enum DataDirection Direction);
+
   /// Find llvm::Values referenced in GPU kernel.
   ///
   /// @param Kernel The kernel to scan for llvm::Values
@@ -202,6 +211,11 @@ private:
   /// @param KernelStmt The ast node to generate kernel code for.
   void createKernel(__isl_take isl_ast_node *KernelStmt);
 
+  /// Generate code that computes the size of an array.
+  ///
+  /// @param Array The array for which to compute a size.
+  Value *getArraySize(gpu_array_info *Array);
+
   /// Create kernel function.
   ///
   /// Create a kernel function located in a newly created module that can serve
@@ -296,6 +310,20 @@ private:
   ///
   /// @param Array The device array to free.
   void createCallFreeDeviceMemory(Value *Array);
+
+  /// Create a call to copy data from host to device.
+  ///
+  /// @param HostPtr A pointer to the host data that should be copied.
+  /// @param DevicePtr A device pointer specifying the location to copy to.
+  void createCallCopyFromHostToDevice(Value *HostPtr, Value *DevicePtr,
+                                      Value *Size);
+
+  /// Create a call to copy data from device to host.
+  ///
+  /// @param DevicePtr A pointer to the device data that should be copied.
+  /// @param HostPtr A host pointer specifying the location to copy to.
+  void createCallCopyFromDeviceToHost(Value *DevicePtr, Value *HostPtr,
+                                      Value *Size);
 };
 
 void GPUNodeBuilder::initializeAfterRTH() {
@@ -314,36 +342,22 @@ void GPUNodeBuilder::allocateDeviceArray
 
   for (int i = 0; i < Prog->n_array; ++i) {
     gpu_array_info *Array = &Prog->array[i];
+    auto *ScopArray = (ScopArrayInfo *)Array->user;
     std::string DevArrayName("p_dev_array_");
     DevArrayName.append(Array->name);
 
-    Value *ArraySize = ConstantInt::get(Builder.getInt64Ty(), Array->size);
-
-    if (!gpu_array_is_scalar(Array)) {
-      auto OffsetDimZero = isl_pw_aff_copy(Array->bound[0]);
-      isl_ast_expr *Res = isl_ast_build_expr_from_pw_aff(Build, OffsetDimZero);
-
-      for (unsigned int i = 1; i < Array->n_index; i++) {
-        isl_pw_aff *Bound_I = isl_pw_aff_copy(Array->bound[i]);
-        isl_ast_expr *Expr = isl_ast_build_expr_from_pw_aff(Build, Bound_I);
-        Res = isl_ast_expr_mul(Res, Expr);
-      }
-
-      Value *NumElements = ExprBuilder.create(Res);
-      ArraySize = Builder.CreateMul(ArraySize, NumElements);
-    }
-
+    Value *ArraySize = getArraySize(Array);
     Value *DevArray = createCallAllocateMemoryForDevice(ArraySize);
     DevArray->setName(DevArrayName);
-    AllocatedDevArrays.push_back(DevArray);
+    DeviceAllocations[ScopArray] = DevArray;
   }
 
   isl_ast_build_free(Build);
 }
 
 void GPUNodeBuilder::freeDeviceArrays() {
-  for (auto &Array : AllocatedDevArrays)
-    createCallFreeDeviceMemory(Array);
+  for (auto &Array : DeviceAllocations)
+    createCallFreeDeviceMemory(Array.second);
 }
 
 void GPUNodeBuilder::createCallFreeDeviceMemory(Value *Array) {
@@ -380,6 +394,48 @@ Value *GPUNodeBuilder::createCallAllocat
   return Builder.CreateCall(F, {Size});
 }
 
+void GPUNodeBuilder::createCallCopyFromHostToDevice(Value *HostData,
+                                                    Value *DeviceData,
+                                                    Value *Size) {
+  const char *Name = "polly_copyFromHostToDevice";
+  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+  Function *F = M->getFunction(Name);
+
+  // If F is not available, declare it.
+  if (!F) {
+    GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
+    std::vector<Type *> Args;
+    Args.push_back(Builder.getInt8PtrTy());
+    Args.push_back(Builder.getInt8PtrTy());
+    Args.push_back(Builder.getInt64Ty());
+    FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Args, false);
+    F = Function::Create(Ty, Linkage, Name, M);
+  }
+
+  Builder.CreateCall(F, {HostData, DeviceData, Size});
+}
+
+void GPUNodeBuilder::createCallCopyFromDeviceToHost(Value *DeviceData,
+                                                    Value *HostData,
+                                                    Value *Size) {
+  const char *Name = "polly_copyFromDeviceToHost";
+  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+  Function *F = M->getFunction(Name);
+
+  // If F is not available, declare it.
+  if (!F) {
+    GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
+    std::vector<Type *> Args;
+    Args.push_back(Builder.getInt8PtrTy());
+    Args.push_back(Builder.getInt8PtrTy());
+    Args.push_back(Builder.getInt64Ty());
+    FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Args, false);
+    F = Function::Create(Ty, Linkage, Name, M);
+  }
+
+  Builder.CreateCall(F, {DeviceData, HostData, Size});
+}
+
 Value *GPUNodeBuilder::createCallInitContext() {
   const char *Name = "polly_initContext";
   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
@@ -421,6 +477,58 @@ static bool isPrefix(std::string String,
   return String.find(Prefix) == 0;
 }
 
+Value *GPUNodeBuilder::getArraySize(gpu_array_info *Array) {
+  isl_ast_build *Build = isl_ast_build_from_context(S.getContext());
+  Value *ArraySize = ConstantInt::get(Builder.getInt64Ty(), Array->size);
+
+  if (!gpu_array_is_scalar(Array)) {
+    auto OffsetDimZero = isl_pw_aff_copy(Array->bound[0]);
+    isl_ast_expr *Res = isl_ast_build_expr_from_pw_aff(Build, OffsetDimZero);
+
+    for (unsigned int i = 1; i < Array->n_index; i++) {
+      isl_pw_aff *Bound_I = isl_pw_aff_copy(Array->bound[i]);
+      isl_ast_expr *Expr = isl_ast_build_expr_from_pw_aff(Build, Bound_I);
+      Res = isl_ast_expr_mul(Res, Expr);
+    }
+
+    Value *NumElements = ExprBuilder.create(Res);
+    ArraySize = Builder.CreateMul(ArraySize, NumElements);
+  }
+  isl_ast_build_free(Build);
+  return ArraySize;
+}
+
+void GPUNodeBuilder::createDataTransfer(__isl_take isl_ast_node *TransferStmt,
+                                        enum DataDirection Direction) {
+  isl_ast_expr *Expr = isl_ast_node_user_get_expr(TransferStmt);
+  isl_ast_expr *Arg = isl_ast_expr_get_op_arg(Expr, 0);
+  isl_id *Id = isl_ast_expr_get_id(Arg);
+  auto Array = (gpu_array_info *)isl_id_get_user(Id);
+  auto ScopArray = (ScopArrayInfo *)(Array->user);
+
+  Value *Size = getArraySize(Array);
+  Value *HostPtr = ScopArray->getBasePtr();
+
+  Value *DevPtr = DeviceAllocations[ScopArray];
+
+  if (gpu_array_is_scalar(Array)) {
+    HostPtr = Builder.CreateAlloca(ScopArray->getElementType());
+    Builder.CreateStore(ScopArray->getBasePtr(), HostPtr);
+  }
+
+  HostPtr = Builder.CreatePointerCast(HostPtr, Builder.getInt8PtrTy());
+
+  if (Direction == HOST_TO_DEVICE)
+    createCallCopyFromHostToDevice(HostPtr, DevPtr, Size);
+  else
+    createCallCopyFromDeviceToHost(DevPtr, HostPtr, Size);
+
+  isl_id_free(Id);
+  isl_ast_expr_free(Arg);
+  isl_ast_expr_free(Expr);
+  isl_ast_node_free(TransferStmt);
+}
+
 void GPUNodeBuilder::createUser(__isl_take isl_ast_node *UserStmt) {
   isl_ast_expr *Expr = isl_ast_node_user_get_expr(UserStmt);
   isl_ast_expr *StmtExpr = isl_ast_expr_get_op_arg(Expr, 0);
@@ -435,10 +543,15 @@ void GPUNodeBuilder::createUser(__isl_ta
     return;
   }
 
-  if (isPrefix(Str, "to_device") || isPrefix(Str, "from_device")) {
-    // TODO: Insert memory copies
+  if (isPrefix(Str, "to_device")) {
+    createDataTransfer(UserStmt, HOST_TO_DEVICE);
+    isl_ast_expr_free(Expr);
+    return;
+  }
+
+  if (isPrefix(Str, "from_device")) {
+    createDataTransfer(UserStmt, DEVICE_TO_HOST);
     isl_ast_expr_free(Expr);
-    isl_ast_node_free(UserStmt);
     return;
   }
 
@@ -1202,6 +1315,7 @@ public:
       PPCGArray.global = false;
       PPCGArray.linearize = false;
       PPCGArray.dep_order = nullptr;
+      PPCGArray.user = Array;
 
       setArrayBounds(PPCGArray, Array);
       i++;

Modified: polly/trunk/lib/External/ppcg/gpu.h
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/External/ppcg/gpu.h?rev=276636&r1=276635&r2=276636&view=diff
==============================================================================
--- polly/trunk/lib/External/ppcg/gpu.h (original)
+++ polly/trunk/lib/External/ppcg/gpu.h Mon Jul 25 07:47:39 2016
@@ -54,6 +54,8 @@ struct gpu_array_info {
 	 * It is set to NULL otherwise.
 	 */
 	isl_union_map *dep_order;
+
+        void *user;
 };
 
 /* Represents an outer array accessed by a ppcg_kernel, localized

Modified: polly/trunk/test/GPGPU/double-parallel-loop.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/double-parallel-loop.ll?rev=276636&r1=276635&r2=276636&view=diff
==============================================================================
--- polly/trunk/test/GPGPU/double-parallel-loop.ll (original)
+++ polly/trunk/test/GPGPU/double-parallel-loop.ll Mon Jul 25 07:47:39 2016
@@ -94,6 +94,10 @@
 ; IR: polly.start:
 ; IR-NEXT:    [[GPUContext:%.*]] = call i8* @polly_initContext()
 ; IR-NEXT:    %p_dev_array_MemRef_A = call i8* @polly_allocateMemoryForDevice(i64 4194304)
+; IR-NEXT:    [[HostPtr:%.*]] = bitcast [1024 x float]* %A to i8*
+; IR-NEXT:    call void @polly_copyFromHostToDevice(i8* [[HostPtr]], i8* %p_dev_array_MemRef_A, i64 4194304)
+; IR-NEXT:    [[HostPtr2:%.*]] = bitcast [1024 x float]* %A to i8*
+; IR-NEXT:    call void @polly_copyFromDeviceToHost(i8* %p_dev_array_MemRef_A, i8* [[HostPtr2]], i64 4194304)
 ; IR-NEXT:    call void @polly_freeDeviceMemory(i8* %p_dev_array_MemRef_A)
 ; IR-NEXT:    call void @polly_freeContext(i8* [[GPUContext]])
 ; IR-NEXT:    br label %polly.exiting

Modified: polly/trunk/tools/GPURuntime/GPUJIT.c
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/tools/GPURuntime/GPUJIT.c?rev=276636&r1=276635&r2=276636&view=diff
==============================================================================
--- polly/trunk/tools/GPURuntime/GPUJIT.c (original)
+++ polly/trunk/tools/GPURuntime/GPUJIT.c Mon Jul 25 07:47:39 2016
@@ -296,16 +296,16 @@ void polly_getPTXKernelEntry(const char
   }
 }
 
-void polly_copyFromHostToDevice(PollyGPUDevicePtr *DevData, void *HostData,
-                                int MemSize) {
+void polly_copyFromHostToDevice(void *HostData, PollyGPUDevicePtr *DevData,
+                                long MemSize) {
   dump_function();
 
   CUdeviceptr CuDevData = DevData->Cuda;
   CuMemcpyHtoDFcnPtr(CuDevData, HostData, MemSize);
 }
 
-void polly_copyFromDeviceToHost(void *HostData, PollyGPUDevicePtr *DevData,
-                                int MemSize) {
+void polly_copyFromDeviceToHost(PollyGPUDevicePtr *DevData, void *HostData,
+                                long MemSize) {
   dump_function();
 
   if (CuMemcpyDtoHFcnPtr(HostData, DevData->Cuda, MemSize) != CUDA_SUCCESS) {

Modified: polly/trunk/tools/GPURuntime/GPUJIT.h
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/tools/GPURuntime/GPUJIT.h?rev=276636&r1=276635&r2=276636&view=diff
==============================================================================
--- polly/trunk/tools/GPURuntime/GPUJIT.h (original)
+++ polly/trunk/tools/GPURuntime/GPUJIT.h Mon Jul 25 07:47:39 2016
@@ -78,10 +78,10 @@ PollyGPUContext *polly_initContext();
 void polly_getPTXModule(void *PTXBuffer, PollyGPUModule **Module);
 void polly_getPTXKernelEntry(const char *KernelName, PollyGPUModule *Module,
                              PollyGPUFunction **Kernel);
-void polly_copyFromHostToDevice(PollyGPUDevicePtr *DevData, void *HostData,
-                                int MemSize);
-void polly_copyFromDeviceToHost(void *HostData, PollyGPUDevicePtr *DevData,
-                                int MemSize);
+void polly_copyFromHostToDevice(void *HostData, PollyGPUDevicePtr *DevData,
+                                long MemSize);
+void polly_copyFromDeviceToHost(PollyGPUDevicePtr *DevData, void *HostData,
+                                long MemSize);
 void polly_setKernelParameters(PollyGPUFunction *Kernel, int BlockWidth,
                                int BlockHeight, PollyGPUDevicePtr *DevData);
 void polly_launchKernel(PollyGPUFunction *Kernel, int GridWidth,




More information about the llvm-commits mailing list