[polly] r302515 - [Polly][PPCGCodeGen] OpenCL now gets kernel argument size from PPCG CodeGen

Michael Kruse via llvm-commits llvm-commits at lists.llvm.org
Tue May 9 07:54:11 PDT 2017


Can you remove the "[Polly]" prefix before committing? It's a commit
to the Polly repository, so it does not add new information. The
distinction of repositories does not exist in Phabricator, which is
the reason for the prefix there.

When I do commits, I usually also do some normalization, but I don't
personally care:
- Put a dot at the end of the title.
- Remove "Summary:"
- Wrap after 72 characters
- Keep only the "Differential Revision" line. The other information
about the review process is already available behind the link.

This is what I found in most LLVM commits.

Thank you.

Michael

2017-05-09 12:45 GMT+02:00 Siddharth Bhat via llvm-commits
<llvm-commits at lists.llvm.org>:
> Author: bollu
> Date: Tue May  9 05:45:52 2017
> New Revision: 302515
>
> URL: http://llvm.org/viewvc/llvm-project?rev=302515&view=rev
> Log:
> [Polly][PPCGCodeGen] OpenCL now gets kernel argument size from PPCG CodeGen
>
> Summary: PPCGCodeGeneration now attaches the size of the kernel launch parameters at the end of the parameter list. For the existing CUDA Runtime, this gets ignored, but the OpenCL Runtime knows to check for kernel-argument size at the end of the parameter list. (The resulting parameters list is twice as long. This has been accounted for in the corresponding test cases).
>
> Reviewers: grosser, Meinersbur, bollu
>
> Reviewed By: bollu
>
> Subscribers: nemanjai, yaxunl, Anastasia, pollydev, llvm-commits
>
> Tags: #polly
>
> Differential Revision: https://reviews.llvm.org/D32961
>
> Modified:
>     polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp
>     polly/trunk/test/GPGPU/cuda-managed-memory-simple.ll
>     polly/trunk/test/GPGPU/host-control-flow.ll
>     polly/trunk/test/GPGPU/kernel-params-only-some-arrays.ll
>     polly/trunk/test/GPGPU/parametric-loop-bound.ll
>     polly/trunk/tools/GPURuntime/GPUJIT.c
>
> Modified: polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp
> URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp?rev=302515&r1=302514&r2=302515&view=diff
> ==============================================================================
> --- polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp (original)
> +++ polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp Tue May  9 05:45:52 2017
> @@ -142,6 +142,14 @@ static __isl_give isl_id_to_ast_expr *po
>    return RefToExpr;
>  }
>
> +/// Given a LLVM Type, compute its size in bytes,
> +static int computeSizeInBytes(const Type *T) {
> +  int bytes = T->getPrimitiveSizeInBits() / 8;
> +  if (bytes == 0)
> +    bytes = T->getScalarSizeInBits() / 8;
> +  return bytes;
> +}
> +
>  /// Generate code for a GPU specific isl AST.
>  ///
>  /// The GPUNodeBuilder augments the general existing IslNodeBuilder, which
> @@ -272,6 +280,16 @@ private:
>    /// @returns A tuple with thread block sizes for X, Y, and Z dimensions.
>    std::tuple<Value *, Value *, Value *> getBlockSizes(ppcg_kernel *Kernel);
>
> +  /// Store a specific kernel launch parameter in the array of kernel launch
> +  /// parameters.
> +  ///
> +  /// @param Parameters The list of parameters in which to store.
> +  /// @param Param      The kernel launch parameter to store.
> +  /// @param Index      The index in the parameter list, at which to store the
> +  ///                   parameter.
> +  void insertStoreParameter(Instruction *Parameters, Instruction *Param,
> +                            int Index);
> +
>    /// Create kernel launch parameters.
>    ///
>    /// @param Kernel        The kernel to create parameters for.
> @@ -1192,11 +1210,21 @@ GPUNodeBuilder::getBlockSizes(ppcg_kerne
>    return std::make_tuple(Sizes[0], Sizes[1], Sizes[2]);
>  }
>
> +void GPUNodeBuilder::insertStoreParameter(Instruction *Parameters,
> +                                          Instruction *Param, int Index) {
> +  Value *Slot = Builder.CreateGEP(
> +      Parameters, {Builder.getInt64(0), Builder.getInt64(Index)});
> +  Value *ParamTyped = Builder.CreatePointerCast(Param, Builder.getInt8PtrTy());
> +  Builder.CreateStore(ParamTyped, Slot);
> +}
> +
>  Value *
>  GPUNodeBuilder::createLaunchParameters(ppcg_kernel *Kernel, Function *F,
>                                         SetVector<Value *> SubtreeValues) {
> -  Type *ArrayTy = ArrayType::get(Builder.getInt8PtrTy(),
> -                                 std::distance(F->arg_begin(), F->arg_end()));
> +  const int NumArgs = F->arg_size();
> +  std::vector<int> ArgSizes(NumArgs);
> +
> +  Type *ArrayTy = ArrayType::get(Builder.getInt8PtrTy(), 2 * NumArgs);
>
>    BasicBlock *EntryBlock =
>        &Builder.GetInsertBlock()->getParent()->getEntryBlock();
> @@ -1213,6 +1241,8 @@ GPUNodeBuilder::createLaunchParameters(p
>      isl_id *Id = isl_space_get_tuple_id(Prog->array[i].space, isl_dim_set);
>      const ScopArrayInfo *SAI = ScopArrayInfo::getFromId(Id);
>
> +    ArgSizes[Index] = SAI->getElemSizeInBytes();
> +
>      Value *DevArray = nullptr;
>      if (ManagedMemory) {
>        DevArray = getOrCreateManagedDeviceArray(
> @@ -1265,16 +1295,15 @@ GPUNodeBuilder::createLaunchParameters(p
>      isl_id *Id = isl_space_get_dim_id(Kernel->space, isl_dim_set, i);
>      Value *Val = IDToValue[Id];
>      isl_id_free(Id);
> +
> +    ArgSizes[Index] = computeSizeInBytes(Val->getType());
> +
>      Instruction *Param =
>          new AllocaInst(Val->getType(), AddressSpace,
>                         Launch + "_param_" + std::to_string(Index),
>                         EntryBlock->getTerminator());
>      Builder.CreateStore(Val, Param);
> -    Value *Slot = Builder.CreateGEP(
> -        Parameters, {Builder.getInt64(0), Builder.getInt64(Index)});
> -    Value *ParamTyped =
> -        Builder.CreatePointerCast(Param, Builder.getInt8PtrTy());
> -    Builder.CreateStore(ParamTyped, Slot);
> +    insertStoreParameter(Parameters, Param, Index);
>      Index++;
>    }
>
> @@ -1284,30 +1313,38 @@ GPUNodeBuilder::createLaunchParameters(p
>      isl_id *Id = isl_space_get_dim_id(Kernel->space, isl_dim_param, i);
>      Value *Val = IDToValue[Id];
>      isl_id_free(Id);
> +
> +    ArgSizes[Index] = computeSizeInBytes(Val->getType());
> +
>      Instruction *Param =
>          new AllocaInst(Val->getType(), AddressSpace,
>                         Launch + "_param_" + std::to_string(Index),
>                         EntryBlock->getTerminator());
>      Builder.CreateStore(Val, Param);
> -    Value *Slot = Builder.CreateGEP(
> -        Parameters, {Builder.getInt64(0), Builder.getInt64(Index)});
> -    Value *ParamTyped =
> -        Builder.CreatePointerCast(Param, Builder.getInt8PtrTy());
> -    Builder.CreateStore(ParamTyped, Slot);
> +    insertStoreParameter(Parameters, Param, Index);
>      Index++;
>    }
>
>    for (auto Val : SubtreeValues) {
> +    ArgSizes[Index] = computeSizeInBytes(Val->getType());
> +
>      Instruction *Param =
>          new AllocaInst(Val->getType(), AddressSpace,
>                         Launch + "_param_" + std::to_string(Index),
>                         EntryBlock->getTerminator());
>      Builder.CreateStore(Val, Param);
> -    Value *Slot = Builder.CreateGEP(
> -        Parameters, {Builder.getInt64(0), Builder.getInt64(Index)});
> -    Value *ParamTyped =
> -        Builder.CreatePointerCast(Param, Builder.getInt8PtrTy());
> -    Builder.CreateStore(ParamTyped, Slot);
> +    insertStoreParameter(Parameters, Param, Index);
> +    Index++;
> +  }
> +
> +  for (int i = 0; i < NumArgs; i++) {
> +    Value *Val = ConstantInt::get(Builder.getInt32Ty(), ArgSizes[i]);
> +    Instruction *Param =
> +        new AllocaInst(Builder.getInt32Ty(), AddressSpace,
> +                       Launch + "_param_size_" + std::to_string(i),
> +                       EntryBlock->getTerminator());
> +    Builder.CreateStore(Val, Param);
> +    insertStoreParameter(Parameters, Param, Index);
>      Index++;
>    }
>
>
> Modified: polly/trunk/test/GPGPU/cuda-managed-memory-simple.ll
> URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/cuda-managed-memory-simple.ll?rev=302515&r1=302514&r2=302515&view=diff
> ==============================================================================
> --- polly/trunk/test/GPGPU/cuda-managed-memory-simple.ll (original)
> +++ polly/trunk/test/GPGPU/cuda-managed-memory-simple.ll Tue May  9 05:45:52 2017
> @@ -37,18 +37,26 @@
>
>  ; CHECK:       %13 = call i8* @polly_initContextCUDA()
>  ; CHECK-NEXT:  %14 = bitcast i32* %A to i8*
> -; CHECK-NEXT:  %15 = getelementptr [2 x i8*], [2 x i8*]* %polly_launch_0_params, i64 0, i64 0
> +; CHECK-NEXT:  %15 = getelementptr [4 x i8*], [4 x i8*]* %polly_launch_0_params, i64 0, i64 0
>  ; CHECK-NEXT:  store i8* %14, i8** %polly_launch_0_param_0
>  ; CHECK-NEXT:  %16 = bitcast i8** %polly_launch_0_param_0 to i8*
>  ; CHECK-NEXT:  store i8* %16, i8** %15
>  ; CHECK-NEXT:  %17 = bitcast i32* %R to i8*
> -; CHECK-NEXT:  %18 = getelementptr [2 x i8*], [2 x i8*]* %polly_launch_0_params, i64 0, i64 1
> +; CHECK-NEXT:  %18 = getelementptr [4 x i8*], [4 x i8*]* %polly_launch_0_params, i64 0, i64 1
>  ; CHECK-NEXT:  store i8* %17, i8** %polly_launch_0_param_1
>  ; CHECK-NEXT:  %19 = bitcast i8** %polly_launch_0_param_1 to i8*
>  ; CHECK-NEXT:  store i8* %19, i8** %18
> -; CHECK-NEXT:  %20 = call i8* @polly_getKernel(i8* getelementptr inbounds ([750 x i8], [750 x i8]* @kernel_0, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @kernel_0_name, i32 0, i32 0))
> -; CHECK-NEXT:  call void @polly_launchKernel(i8* %20, i32 2, i32 1, i32 32, i32 1, i32 1, i8* %polly_launch_0_params_i8ptr)
> -; CHECK-NEXT:  call void @polly_freeKernel(i8* %20)
> +; CHECK-NEXT:  store i32 4, i32* %polly_launch_0_param_size_0
> +; CHECK-NEXT:  %20 = getelementptr [4 x i8*], [4 x i8*]* %polly_launch_0_params, i64 0, i64 2
> +; CHECK-NEXT:  %21 = bitcast i32* %polly_launch_0_param_size_0 to i8*
> +; CHECK-NEXT:  store i8* %21, i8** %20
> +; CHECK-NEXT:  store i32 4, i32* %polly_launch_0_param_size_1
> +; CHECK-NEXT:  %22 = getelementptr [4 x i8*], [4 x i8*]* %polly_launch_0_params, i64 0, i64 3
> +; CHECK-NEXT:  %23 = bitcast i32* %polly_launch_0_param_size_1 to i8*
> +; CHECK-NEXT:  store i8* %23, i8** %22
> +; CHECK-NEXT:  %24 = call i8* @polly_getKernel(i8* getelementptr inbounds ([750 x i8], [750 x i8]* @kernel_0, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @kernel_0_name, i32 0, i32 0))
> +; CHECK-NEXT:  call void @polly_launchKernel(i8* %24, i32 2, i32 1, i32 32, i32 1, i32 1, i8* %polly_launch_0_params_i8ptr)
> +; CHECK-NEXT:  call void @polly_freeKernel(i8* %24)
>  ; CHECK-NEXT:  call void @polly_synchronizeDevice()
>  ; CHECK-NEXT:  call void @polly_freeContext(i8* %13)
>
>
> Modified: polly/trunk/test/GPGPU/host-control-flow.ll
> URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/host-control-flow.ll?rev=302515&r1=302514&r2=302515&view=diff
> ==============================================================================
> --- polly/trunk/test/GPGPU/host-control-flow.ll (original)
> +++ polly/trunk/test/GPGPU/host-control-flow.ll Tue May  9 05:45:52 2017
> @@ -32,7 +32,7 @@
>  ; IR-NEXT:   %polly.indvar = phi i64 [ 0, %polly.loop_preheader ], [ %polly.indvar_next, %polly.loop_header ]
>  ; ...
>  ; IR:  store i64 %polly.indvar, i64* %polly_launch_0_param_1
> -; IR-NEXT:  [[REGA:%.+]] = getelementptr [2 x i8*], [2 x i8*]* %polly_launch_0_params, i64 0, i64 1
> +; IR-NEXT:  [[REGA:%.+]] = getelementptr [4 x i8*], [4 x i8*]* %polly_launch_0_params, i64 0, i64 1
>  ; IR-NEXT:  [[REGB:%.+]] = bitcast i64* %polly_launch_0_param_1 to i8*
>  ; IR-NEXT:  store i8* [[REGB]], i8** [[REGA]]
>  ; IR: call i8* @polly_getKernel
>
> Modified: polly/trunk/test/GPGPU/kernel-params-only-some-arrays.ll
> URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/kernel-params-only-some-arrays.ll?rev=302515&r1=302514&r2=302515&view=diff
> ==============================================================================
> --- polly/trunk/test/GPGPU/kernel-params-only-some-arrays.ll (original)
> +++ polly/trunk/test/GPGPU/kernel-params-only-some-arrays.ll Tue May  9 05:45:52 2017
> @@ -48,13 +48,13 @@
>
>
>  ; IR:       [[DEVPTR:%.*]] = call i8* @polly_getDevicePtr(i8* %p_dev_array_MemRef_A)
> -; IR-NEXT:  [[SLOT:%.*]] = getelementptr [1 x i8*], [1 x i8*]* %polly_launch_0_params, i64 0, i64 0
> +; IR-NEXT:  [[SLOT:%.*]] = getelementptr [2 x i8*], [2 x i8*]* %polly_launch_0_params, i64 0, i64 0
>  ; IR-NEXT:  store i8* [[DEVPTR]], i8** %polly_launch_0_param_0
>  ; IR-NEXT:  [[DATA:%.*]] = bitcast i8** %polly_launch_0_param_0 to i8*
>  ; IR-NEXT:  store i8* [[DATA]], i8** [[SLOT]]
>
>  ; IR:       [[DEVPTR:%.*]] = call i8* @polly_getDevicePtr(i8* %p_dev_array_MemRef_B)
> -; IR-NEXT:  [[SLOT:%.*]] = getelementptr [1 x i8*], [1 x i8*]* %polly_launch_1_params, i64 0, i64 0
> +; IR-NEXT:  [[SLOT:%.*]] = getelementptr [2 x i8*], [2 x i8*]* %polly_launch_1_params, i64 0, i64 0
>  ; IR-NEXT:  store i8* [[DEVPTR]], i8** %polly_launch_1_param_0
>  ; IR-NEXT:  [[DATA:%.*]] = bitcast i8** %polly_launch_1_param_0 to i8*
>  ; IR-NEXT:  store i8* [[DATA]], i8** [[SLOT]]
>
> Modified: polly/trunk/test/GPGPU/parametric-loop-bound.ll
> URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/parametric-loop-bound.ll?rev=302515&r1=302514&r2=302515&view=diff
> ==============================================================================
> --- polly/trunk/test/GPGPU/parametric-loop-bound.ll (original)
> +++ polly/trunk/test/GPGPU/parametric-loop-bound.ll Tue May  9 05:45:52 2017
> @@ -31,7 +31,7 @@
>  ; CODE-NEXT:     Stmt_bb2(32 * b0 + t0 + 1048576 * c0);
>
>  ; IR: store i64 %n, i64* %polly_launch_0_param_1
> -; IR-NEXT: [[REGA:%.+]] = getelementptr [2 x i8*], [2 x i8*]* %polly_launch_0_params, i64 0, i64 1
> +; IR-NEXT: [[REGA:%.+]] = getelementptr [4 x i8*], [4 x i8*]* %polly_launch_0_params, i64 0, i64 1
>  ; IR-NEXT: [[REGB:%.+]] = bitcast i64* %polly_launch_0_param_1 to i8*
>  ; IR-NEXT: store i8* [[REGB]], i8** [[REGA]]
>
>
> Modified: polly/trunk/tools/GPURuntime/GPUJIT.c
> URL: http://llvm.org/viewvc/llvm-project/polly/trunk/tools/GPURuntime/GPUJIT.c?rev=302515&r1=302514&r2=302515&view=diff
> ==============================================================================
> --- polly/trunk/tools/GPURuntime/GPUJIT.c (original)
> +++ polly/trunk/tools/GPURuntime/GPUJIT.c Tue May  9 05:45:52 2017
> @@ -554,28 +554,12 @@ static void launchKernelCL(PollyGPUFunct
>                                sizeof(cl_uint), &NumArgs, NULL);
>    checkOpenCLError(Ret, "Failed to get number of kernel arguments.\n");
>
> -  // TODO: Pass the size of the kernel arguments in to launchKernelCL, along
> -  // with the arguments themselves. This is a dirty workaround that can be
> -  // broken.
> +  /* Argument sizes are stored at the end of the Parameters array. */
>    for (cl_uint i = 0; i < NumArgs; i++) {
> -    Ret = clSetKernelArgFcnPtr(CLKernel->Kernel, i, 8, (void *)Parameters[i]);
> -    if (Ret == CL_INVALID_ARG_SIZE) {
> -      Ret = clSetKernelArgFcnPtr(CLKernel->Kernel, i, 4, (void *)Parameters[i]);
> -      if (Ret == CL_INVALID_ARG_SIZE) {
> -        Ret =
> -            clSetKernelArgFcnPtr(CLKernel->Kernel, i, 2, (void *)Parameters[i]);
> -        if (Ret == CL_INVALID_ARG_SIZE) {
> -          Ret = clSetKernelArgFcnPtr(CLKernel->Kernel, i, 1,
> -                                     (void *)Parameters[i]);
> -          checkOpenCLError(Ret, "Failed to set Kernel argument %d.\n", i);
> -        }
> -      }
> -    }
> -    if (Ret != CL_SUCCESS && Ret != CL_INVALID_ARG_SIZE) {
> -      fprintf(stderr, "Failed to set Kernel argument.\n");
> -      printOpenCLError(Ret);
> -      exit(-1);
> -    }
> +    Ret = clSetKernelArgFcnPtr(CLKernel->Kernel, i,
> +                               *((int *)Parameters[NumArgs + i]),
> +                               (void *)Parameters[i]);
> +    checkOpenCLError(Ret, "Failed to set Kernel argument %d.\n", i);
>    }
>
>    unsigned int GridDimZ = 1;
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits


More information about the llvm-commits mailing list