[polly] r302515 - [Polly][PPCGCodeGen] OpenCL now gets kernel argument size from PPCG CodeGen

Siddharth Bhat via llvm-commits llvm-commits at lists.llvm.org
Tue May 9 03:45:53 PDT 2017


Author: bollu
Date: Tue May  9 05:45:52 2017
New Revision: 302515

URL: http://llvm.org/viewvc/llvm-project?rev=302515&view=rev
Log:
[Polly][PPCGCodeGen] OpenCL now gets kernel argument size from PPCG CodeGen

Summary: PPCGCodeGeneration now attaches the size of the kernel launch parameters at the end of the parameter list. For the existing CUDA Runtime, this gets ignored, but the OpenCL Runtime knows to check for kernel-argument size at the end of the parameter list. (The resulting parameters list is twice as long. This has been accounted for in the corresponding test cases).

Reviewers: grosser, Meinersbur, bollu

Reviewed By: bollu

Subscribers: nemanjai, yaxunl, Anastasia, pollydev, llvm-commits

Tags: #polly

Differential Revision: https://reviews.llvm.org/D32961

Modified:
    polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp
    polly/trunk/test/GPGPU/cuda-managed-memory-simple.ll
    polly/trunk/test/GPGPU/host-control-flow.ll
    polly/trunk/test/GPGPU/kernel-params-only-some-arrays.ll
    polly/trunk/test/GPGPU/parametric-loop-bound.ll
    polly/trunk/tools/GPURuntime/GPUJIT.c

Modified: polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp?rev=302515&r1=302514&r2=302515&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp (original)
+++ polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp Tue May  9 05:45:52 2017
@@ -142,6 +142,14 @@ static __isl_give isl_id_to_ast_expr *po
   return RefToExpr;
 }
 
+/// Given a LLVM Type, compute its size in bytes,
+static int computeSizeInBytes(const Type *T) {
+  int bytes = T->getPrimitiveSizeInBits() / 8;
+  if (bytes == 0)
+    bytes = T->getScalarSizeInBits() / 8;
+  return bytes;
+}
+
 /// Generate code for a GPU specific isl AST.
 ///
 /// The GPUNodeBuilder augments the general existing IslNodeBuilder, which
@@ -272,6 +280,16 @@ private:
   /// @returns A tuple with thread block sizes for X, Y, and Z dimensions.
   std::tuple<Value *, Value *, Value *> getBlockSizes(ppcg_kernel *Kernel);
 
+  /// Store a specific kernel launch parameter in the array of kernel launch
+  /// parameters.
+  ///
+  /// @param Parameters The list of parameters in which to store.
+  /// @param Param      The kernel launch parameter to store.
+  /// @param Index      The index in the parameter list, at which to store the
+  ///                   parameter.
+  void insertStoreParameter(Instruction *Parameters, Instruction *Param,
+                            int Index);
+
   /// Create kernel launch parameters.
   ///
   /// @param Kernel        The kernel to create parameters for.
@@ -1192,11 +1210,21 @@ GPUNodeBuilder::getBlockSizes(ppcg_kerne
   return std::make_tuple(Sizes[0], Sizes[1], Sizes[2]);
 }
 
+void GPUNodeBuilder::insertStoreParameter(Instruction *Parameters,
+                                          Instruction *Param, int Index) {
+  Value *Slot = Builder.CreateGEP(
+      Parameters, {Builder.getInt64(0), Builder.getInt64(Index)});
+  Value *ParamTyped = Builder.CreatePointerCast(Param, Builder.getInt8PtrTy());
+  Builder.CreateStore(ParamTyped, Slot);
+}
+
 Value *
 GPUNodeBuilder::createLaunchParameters(ppcg_kernel *Kernel, Function *F,
                                        SetVector<Value *> SubtreeValues) {
-  Type *ArrayTy = ArrayType::get(Builder.getInt8PtrTy(),
-                                 std::distance(F->arg_begin(), F->arg_end()));
+  const int NumArgs = F->arg_size();
+  std::vector<int> ArgSizes(NumArgs);
+
+  Type *ArrayTy = ArrayType::get(Builder.getInt8PtrTy(), 2 * NumArgs);
 
   BasicBlock *EntryBlock =
       &Builder.GetInsertBlock()->getParent()->getEntryBlock();
@@ -1213,6 +1241,8 @@ GPUNodeBuilder::createLaunchParameters(p
     isl_id *Id = isl_space_get_tuple_id(Prog->array[i].space, isl_dim_set);
     const ScopArrayInfo *SAI = ScopArrayInfo::getFromId(Id);
 
+    ArgSizes[Index] = SAI->getElemSizeInBytes();
+
     Value *DevArray = nullptr;
     if (ManagedMemory) {
       DevArray = getOrCreateManagedDeviceArray(
@@ -1265,16 +1295,15 @@ GPUNodeBuilder::createLaunchParameters(p
     isl_id *Id = isl_space_get_dim_id(Kernel->space, isl_dim_set, i);
     Value *Val = IDToValue[Id];
     isl_id_free(Id);
+
+    ArgSizes[Index] = computeSizeInBytes(Val->getType());
+
     Instruction *Param =
         new AllocaInst(Val->getType(), AddressSpace,
                        Launch + "_param_" + std::to_string(Index),
                        EntryBlock->getTerminator());
     Builder.CreateStore(Val, Param);
-    Value *Slot = Builder.CreateGEP(
-        Parameters, {Builder.getInt64(0), Builder.getInt64(Index)});
-    Value *ParamTyped =
-        Builder.CreatePointerCast(Param, Builder.getInt8PtrTy());
-    Builder.CreateStore(ParamTyped, Slot);
+    insertStoreParameter(Parameters, Param, Index);
     Index++;
   }
 
@@ -1284,30 +1313,38 @@ GPUNodeBuilder::createLaunchParameters(p
     isl_id *Id = isl_space_get_dim_id(Kernel->space, isl_dim_param, i);
     Value *Val = IDToValue[Id];
     isl_id_free(Id);
+
+    ArgSizes[Index] = computeSizeInBytes(Val->getType());
+
     Instruction *Param =
         new AllocaInst(Val->getType(), AddressSpace,
                        Launch + "_param_" + std::to_string(Index),
                        EntryBlock->getTerminator());
     Builder.CreateStore(Val, Param);
-    Value *Slot = Builder.CreateGEP(
-        Parameters, {Builder.getInt64(0), Builder.getInt64(Index)});
-    Value *ParamTyped =
-        Builder.CreatePointerCast(Param, Builder.getInt8PtrTy());
-    Builder.CreateStore(ParamTyped, Slot);
+    insertStoreParameter(Parameters, Param, Index);
     Index++;
   }
 
   for (auto Val : SubtreeValues) {
+    ArgSizes[Index] = computeSizeInBytes(Val->getType());
+
     Instruction *Param =
         new AllocaInst(Val->getType(), AddressSpace,
                        Launch + "_param_" + std::to_string(Index),
                        EntryBlock->getTerminator());
     Builder.CreateStore(Val, Param);
-    Value *Slot = Builder.CreateGEP(
-        Parameters, {Builder.getInt64(0), Builder.getInt64(Index)});
-    Value *ParamTyped =
-        Builder.CreatePointerCast(Param, Builder.getInt8PtrTy());
-    Builder.CreateStore(ParamTyped, Slot);
+    insertStoreParameter(Parameters, Param, Index);
+    Index++;
+  }
+
+  for (int i = 0; i < NumArgs; i++) {
+    Value *Val = ConstantInt::get(Builder.getInt32Ty(), ArgSizes[i]);
+    Instruction *Param =
+        new AllocaInst(Builder.getInt32Ty(), AddressSpace,
+                       Launch + "_param_size_" + std::to_string(i),
+                       EntryBlock->getTerminator());
+    Builder.CreateStore(Val, Param);
+    insertStoreParameter(Parameters, Param, Index);
     Index++;
   }
 

Modified: polly/trunk/test/GPGPU/cuda-managed-memory-simple.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/cuda-managed-memory-simple.ll?rev=302515&r1=302514&r2=302515&view=diff
==============================================================================
--- polly/trunk/test/GPGPU/cuda-managed-memory-simple.ll (original)
+++ polly/trunk/test/GPGPU/cuda-managed-memory-simple.ll Tue May  9 05:45:52 2017
@@ -37,18 +37,26 @@
 
 ; CHECK:       %13 = call i8* @polly_initContextCUDA()
 ; CHECK-NEXT:  %14 = bitcast i32* %A to i8*
-; CHECK-NEXT:  %15 = getelementptr [2 x i8*], [2 x i8*]* %polly_launch_0_params, i64 0, i64 0
+; CHECK-NEXT:  %15 = getelementptr [4 x i8*], [4 x i8*]* %polly_launch_0_params, i64 0, i64 0
 ; CHECK-NEXT:  store i8* %14, i8** %polly_launch_0_param_0
 ; CHECK-NEXT:  %16 = bitcast i8** %polly_launch_0_param_0 to i8*
 ; CHECK-NEXT:  store i8* %16, i8** %15
 ; CHECK-NEXT:  %17 = bitcast i32* %R to i8*
-; CHECK-NEXT:  %18 = getelementptr [2 x i8*], [2 x i8*]* %polly_launch_0_params, i64 0, i64 1
+; CHECK-NEXT:  %18 = getelementptr [4 x i8*], [4 x i8*]* %polly_launch_0_params, i64 0, i64 1
 ; CHECK-NEXT:  store i8* %17, i8** %polly_launch_0_param_1
 ; CHECK-NEXT:  %19 = bitcast i8** %polly_launch_0_param_1 to i8*
 ; CHECK-NEXT:  store i8* %19, i8** %18
-; CHECK-NEXT:  %20 = call i8* @polly_getKernel(i8* getelementptr inbounds ([750 x i8], [750 x i8]* @kernel_0, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @kernel_0_name, i32 0, i32 0))
-; CHECK-NEXT:  call void @polly_launchKernel(i8* %20, i32 2, i32 1, i32 32, i32 1, i32 1, i8* %polly_launch_0_params_i8ptr)
-; CHECK-NEXT:  call void @polly_freeKernel(i8* %20)
+; CHECK-NEXT:  store i32 4, i32* %polly_launch_0_param_size_0
+; CHECK-NEXT:  %20 = getelementptr [4 x i8*], [4 x i8*]* %polly_launch_0_params, i64 0, i64 2
+; CHECK-NEXT:  %21 = bitcast i32* %polly_launch_0_param_size_0 to i8*
+; CHECK-NEXT:  store i8* %21, i8** %20
+; CHECK-NEXT:  store i32 4, i32* %polly_launch_0_param_size_1
+; CHECK-NEXT:  %22 = getelementptr [4 x i8*], [4 x i8*]* %polly_launch_0_params, i64 0, i64 3
+; CHECK-NEXT:  %23 = bitcast i32* %polly_launch_0_param_size_1 to i8*
+; CHECK-NEXT:  store i8* %23, i8** %22
+; CHECK-NEXT:  %24 = call i8* @polly_getKernel(i8* getelementptr inbounds ([750 x i8], [750 x i8]* @kernel_0, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @kernel_0_name, i32 0, i32 0))
+; CHECK-NEXT:  call void @polly_launchKernel(i8* %24, i32 2, i32 1, i32 32, i32 1, i32 1, i8* %polly_launch_0_params_i8ptr)
+; CHECK-NEXT:  call void @polly_freeKernel(i8* %24)
 ; CHECK-NEXT:  call void @polly_synchronizeDevice()
 ; CHECK-NEXT:  call void @polly_freeContext(i8* %13)
 

Modified: polly/trunk/test/GPGPU/host-control-flow.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/host-control-flow.ll?rev=302515&r1=302514&r2=302515&view=diff
==============================================================================
--- polly/trunk/test/GPGPU/host-control-flow.ll (original)
+++ polly/trunk/test/GPGPU/host-control-flow.ll Tue May  9 05:45:52 2017
@@ -32,7 +32,7 @@
 ; IR-NEXT:   %polly.indvar = phi i64 [ 0, %polly.loop_preheader ], [ %polly.indvar_next, %polly.loop_header ]
 ; ...
 ; IR:  store i64 %polly.indvar, i64* %polly_launch_0_param_1
-; IR-NEXT:  [[REGA:%.+]] = getelementptr [2 x i8*], [2 x i8*]* %polly_launch_0_params, i64 0, i64 1
+; IR-NEXT:  [[REGA:%.+]] = getelementptr [4 x i8*], [4 x i8*]* %polly_launch_0_params, i64 0, i64 1
 ; IR-NEXT:  [[REGB:%.+]] = bitcast i64* %polly_launch_0_param_1 to i8*
 ; IR-NEXT:  store i8* [[REGB]], i8** [[REGA]]
 ; IR: call i8* @polly_getKernel

Modified: polly/trunk/test/GPGPU/kernel-params-only-some-arrays.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/kernel-params-only-some-arrays.ll?rev=302515&r1=302514&r2=302515&view=diff
==============================================================================
--- polly/trunk/test/GPGPU/kernel-params-only-some-arrays.ll (original)
+++ polly/trunk/test/GPGPU/kernel-params-only-some-arrays.ll Tue May  9 05:45:52 2017
@@ -48,13 +48,13 @@
 
 
 ; IR:       [[DEVPTR:%.*]] = call i8* @polly_getDevicePtr(i8* %p_dev_array_MemRef_A)
-; IR-NEXT:  [[SLOT:%.*]] = getelementptr [1 x i8*], [1 x i8*]* %polly_launch_0_params, i64 0, i64 0
+; IR-NEXT:  [[SLOT:%.*]] = getelementptr [2 x i8*], [2 x i8*]* %polly_launch_0_params, i64 0, i64 0
 ; IR-NEXT:  store i8* [[DEVPTR]], i8** %polly_launch_0_param_0
 ; IR-NEXT:  [[DATA:%.*]] = bitcast i8** %polly_launch_0_param_0 to i8*
 ; IR-NEXT:  store i8* [[DATA]], i8** [[SLOT]]
 
 ; IR:       [[DEVPTR:%.*]] = call i8* @polly_getDevicePtr(i8* %p_dev_array_MemRef_B)
-; IR-NEXT:  [[SLOT:%.*]] = getelementptr [1 x i8*], [1 x i8*]* %polly_launch_1_params, i64 0, i64 0
+; IR-NEXT:  [[SLOT:%.*]] = getelementptr [2 x i8*], [2 x i8*]* %polly_launch_1_params, i64 0, i64 0
 ; IR-NEXT:  store i8* [[DEVPTR]], i8** %polly_launch_1_param_0
 ; IR-NEXT:  [[DATA:%.*]] = bitcast i8** %polly_launch_1_param_0 to i8*
 ; IR-NEXT:  store i8* [[DATA]], i8** [[SLOT]]

Modified: polly/trunk/test/GPGPU/parametric-loop-bound.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/parametric-loop-bound.ll?rev=302515&r1=302514&r2=302515&view=diff
==============================================================================
--- polly/trunk/test/GPGPU/parametric-loop-bound.ll (original)
+++ polly/trunk/test/GPGPU/parametric-loop-bound.ll Tue May  9 05:45:52 2017
@@ -31,7 +31,7 @@
 ; CODE-NEXT:     Stmt_bb2(32 * b0 + t0 + 1048576 * c0);
 
 ; IR: store i64 %n, i64* %polly_launch_0_param_1
-; IR-NEXT: [[REGA:%.+]] = getelementptr [2 x i8*], [2 x i8*]* %polly_launch_0_params, i64 0, i64 1
+; IR-NEXT: [[REGA:%.+]] = getelementptr [4 x i8*], [4 x i8*]* %polly_launch_0_params, i64 0, i64 1
 ; IR-NEXT: [[REGB:%.+]] = bitcast i64* %polly_launch_0_param_1 to i8*
 ; IR-NEXT: store i8* [[REGB]], i8** [[REGA]]
 

Modified: polly/trunk/tools/GPURuntime/GPUJIT.c
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/tools/GPURuntime/GPUJIT.c?rev=302515&r1=302514&r2=302515&view=diff
==============================================================================
--- polly/trunk/tools/GPURuntime/GPUJIT.c (original)
+++ polly/trunk/tools/GPURuntime/GPUJIT.c Tue May  9 05:45:52 2017
@@ -554,28 +554,12 @@ static void launchKernelCL(PollyGPUFunct
                               sizeof(cl_uint), &NumArgs, NULL);
   checkOpenCLError(Ret, "Failed to get number of kernel arguments.\n");
 
-  // TODO: Pass the size of the kernel arguments in to launchKernelCL, along
-  // with the arguments themselves. This is a dirty workaround that can be
-  // broken.
+  /* Argument sizes are stored at the end of the Parameters array. */
   for (cl_uint i = 0; i < NumArgs; i++) {
-    Ret = clSetKernelArgFcnPtr(CLKernel->Kernel, i, 8, (void *)Parameters[i]);
-    if (Ret == CL_INVALID_ARG_SIZE) {
-      Ret = clSetKernelArgFcnPtr(CLKernel->Kernel, i, 4, (void *)Parameters[i]);
-      if (Ret == CL_INVALID_ARG_SIZE) {
-        Ret =
-            clSetKernelArgFcnPtr(CLKernel->Kernel, i, 2, (void *)Parameters[i]);
-        if (Ret == CL_INVALID_ARG_SIZE) {
-          Ret = clSetKernelArgFcnPtr(CLKernel->Kernel, i, 1,
-                                     (void *)Parameters[i]);
-          checkOpenCLError(Ret, "Failed to set Kernel argument %d.\n", i);
-        }
-      }
-    }
-    if (Ret != CL_SUCCESS && Ret != CL_INVALID_ARG_SIZE) {
-      fprintf(stderr, "Failed to set Kernel argument.\n");
-      printOpenCLError(Ret);
-      exit(-1);
-    }
+    Ret = clSetKernelArgFcnPtr(CLKernel->Kernel, i,
+                               *((int *)Parameters[NumArgs + i]),
+                               (void *)Parameters[i]);
+    checkOpenCLError(Ret, "Failed to set Kernel argument %d.\n", i);
   }
 
   unsigned int GridDimZ = 1;




More information about the llvm-commits mailing list