[polly] r311248 - [PPCG] Only add Kernel argument sizes for OpenCL, not CUDA runtime

Philipp Schaad via llvm-commits llvm-commits at lists.llvm.org
Sat Aug 19 10:04:57 PDT 2017


Author: phschaad
Date: Sat Aug 19 10:04:57 2017
New Revision: 311248

URL: http://llvm.org/viewvc/llvm-project?rev=311248&view=rev
Log:
[PPCG] Only add Kernel argument sizes for OpenCL, not CUDA runtime

Kernel argument sizes now only get appended to the kernel launch parameter list if the OpenCL runtime is selected, not if CUDA runtime is chosen.

Differential revision: D36925

Modified:
    polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp
    polly/trunk/test/GPGPU/cuda-managed-memory-simple.ll
    polly/trunk/test/GPGPU/host-control-flow.ll
    polly/trunk/test/GPGPU/invariant-load-hoisting.ll
    polly/trunk/test/GPGPU/kernel-params-only-some-arrays.ll
    polly/trunk/test/GPGPU/parametric-loop-bound.ll
    polly/trunk/test/GPGPU/phi-nodes-in-kernel.ll

Modified: polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp?rev=311248&r1=311247&r2=311248&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp (original)
+++ polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp Sat Aug 19 10:04:57 2017
@@ -1591,7 +1591,15 @@ GPUNodeBuilder::createLaunchParameters(p
   const int NumArgs = F->arg_size();
   std::vector<int> ArgSizes(NumArgs);
 
-  Type *ArrayTy = ArrayType::get(Builder.getInt8PtrTy(), 2 * NumArgs);
+  // If we are using the OpenCL Runtime, we need to add the kernel argument
+  // sizes to the end of the launch-parameter list, so OpenCL can determine
+  // how big the respective kernel arguments are.
+  // Here we need to reserve adequate space for that.
+  Type *ArrayTy;
+  if (Runtime == GPURuntime::OpenCL)
+    ArrayTy = ArrayType::get(Builder.getInt8PtrTy(), 2 * NumArgs);
+  else
+    ArrayTy = ArrayType::get(Builder.getInt8PtrTy(), NumArgs);
 
   BasicBlock *EntryBlock =
       &Builder.GetInsertBlock()->getParent()->getEntryBlock();
@@ -1608,7 +1616,8 @@ GPUNodeBuilder::createLaunchParameters(p
     isl_id *Id = isl_space_get_tuple_id(Prog->array[i].space, isl_dim_set);
     const ScopArrayInfo *SAI = ScopArrayInfo::getFromId(isl::manage(Id));
 
-    ArgSizes[Index] = SAI->getElemSizeInBytes();
+    if (Runtime == GPURuntime::OpenCL)
+      ArgSizes[Index] = SAI->getElemSizeInBytes();
 
     Value *DevArray = nullptr;
     if (PollyManagedMemory) {
@@ -1663,7 +1672,8 @@ GPUNodeBuilder::createLaunchParameters(p
     Value *Val = IDToValue[Id];
     isl_id_free(Id);
 
-    ArgSizes[Index] = computeSizeInBytes(Val->getType());
+    if (Runtime == GPURuntime::OpenCL)
+      ArgSizes[Index] = computeSizeInBytes(Val->getType());
 
     Instruction *Param =
         new AllocaInst(Val->getType(), AddressSpace,
@@ -1683,7 +1693,8 @@ GPUNodeBuilder::createLaunchParameters(p
       Val = ValueMap[Val];
     isl_id_free(Id);
 
-    ArgSizes[Index] = computeSizeInBytes(Val->getType());
+    if (Runtime == GPURuntime::OpenCL)
+      ArgSizes[Index] = computeSizeInBytes(Val->getType());
 
     Instruction *Param =
         new AllocaInst(Val->getType(), AddressSpace,
@@ -1695,7 +1706,8 @@ GPUNodeBuilder::createLaunchParameters(p
   }
 
   for (auto Val : SubtreeValues) {
-    ArgSizes[Index] = computeSizeInBytes(Val->getType());
+    if (Runtime == GPURuntime::OpenCL)
+      ArgSizes[Index] = computeSizeInBytes(Val->getType());
 
     Instruction *Param =
         new AllocaInst(Val->getType(), AddressSpace,
@@ -1706,15 +1718,17 @@ GPUNodeBuilder::createLaunchParameters(p
     Index++;
   }
 
-  for (int i = 0; i < NumArgs; i++) {
-    Value *Val = ConstantInt::get(Builder.getInt32Ty(), ArgSizes[i]);
-    Instruction *Param =
-        new AllocaInst(Builder.getInt32Ty(), AddressSpace,
-                       Launch + "_param_size_" + std::to_string(i),
-                       EntryBlock->getTerminator());
-    Builder.CreateStore(Val, Param);
-    insertStoreParameter(Parameters, Param, Index);
-    Index++;
+  if (Runtime == GPURuntime::OpenCL) {
+    for (int i = 0; i < NumArgs; i++) {
+      Value *Val = ConstantInt::get(Builder.getInt32Ty(), ArgSizes[i]);
+      Instruction *Param =
+          new AllocaInst(Builder.getInt32Ty(), AddressSpace,
+                         Launch + "_param_size_" + std::to_string(i),
+                         EntryBlock->getTerminator());
+      Builder.CreateStore(Val, Param);
+      insertStoreParameter(Parameters, Param, Index);
+      Index++;
+    }
   }
 
   auto Location = EntryBlock->getTerminator();

Modified: polly/trunk/test/GPGPU/cuda-managed-memory-simple.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/cuda-managed-memory-simple.ll?rev=311248&r1=311247&r2=311248&view=diff
==============================================================================
--- polly/trunk/test/GPGPU/cuda-managed-memory-simple.ll (original)
+++ polly/trunk/test/GPGPU/cuda-managed-memory-simple.ll Sat Aug 19 10:04:57 2017
@@ -38,25 +38,17 @@
 ; CHECK:       %13 = call i8* @polly_initContextCUDA()
 ; CHECK-NEXT:  %14 = bitcast i32* %A to i8*
 ; CHECK-NEXT:  %15 = bitcast i32* %R to i8*
-; CHECK-NEXT:  %16 = getelementptr [4 x i8*], [4 x i8*]* %polly_launch_0_params, i64 0, i64 0
+; CHECK-NEXT:  %16 = getelementptr [2 x i8*], [2 x i8*]* %polly_launch_0_params, i64 0, i64 0
 ; CHECK-NEXT:  store i8* %14, i8** %polly_launch_0_param_0
 ; CHECK-NEXT:  %17 = bitcast i8** %polly_launch_0_param_0 to i8*
 ; CHECK-NEXT:  store i8* %17, i8** %16
-; CHECK-NEXT:  %18 = getelementptr [4 x i8*], [4 x i8*]* %polly_launch_0_params, i64 0, i64 1
+; CHECK-NEXT:  %18 = getelementptr [2 x i8*], [2 x i8*]* %polly_launch_0_params, i64 0, i64 1
 ; CHECK-NEXT:  store i8* %15, i8** %polly_launch_0_param_1
 ; CHECK-NEXT:  %19 = bitcast i8** %polly_launch_0_param_1 to i8*
 ; CHECK-NEXT:  store i8* %19, i8** %18
-; CHECK-NEXT:  store i32 4, i32* %polly_launch_0_param_size_0
-; CHECK-NEXT:  %20 = getelementptr [4 x i8*], [4 x i8*]* %polly_launch_0_params, i64 0, i64 2
-; CHECK-NEXT:  %21 = bitcast i32* %polly_launch_0_param_size_0 to i8*
-; CHECK-NEXT:  store i8* %21, i8** %20
-; CHECK-NEXT:  store i32 4, i32* %polly_launch_0_param_size_1
-; CHECK-NEXT:  %22 = getelementptr [4 x i8*], [4 x i8*]* %polly_launch_0_params, i64 0, i64 3
-; CHECK-NEXT:  %23 = bitcast i32* %polly_launch_0_param_size_1 to i8*
-; CHECK-NEXT:  store i8* %23, i8** %22
-; CHECK-NEXT:  %24 = call i8* @polly_getKernel(i8* getelementptr inbounds ([852 x i8], [852 x i8]* @FUNC_copy_SCOP_0_KERNEL_0, i32 0, i32 0), i8* getelementptr inbounds ([26 x i8], [26 x i8]* @FUNC_copy_SCOP_0_KERNEL_0_name, i32 0, i32 0))
-; CHECK-NEXT:  call void @polly_launchKernel(i8* %24, i32 2, i32 1, i32 32, i32 1, i32 1, i8* %polly_launch_0_params_i8ptr)
-; CHECK-NEXT:  call void @polly_freeKernel(i8* %24)
+; CHECK-NEXT:  %20 = call i8* @polly_getKernel(i8* getelementptr inbounds ([852 x i8], [852 x i8]* @FUNC_copy_SCOP_0_KERNEL_0, i32 0, i32 0), i8* getelementptr inbounds ([26 x i8], [26 x i8]* @FUNC_copy_SCOP_0_KERNEL_0_name, i32 0, i32 0))
+; CHECK-NEXT:  call void @polly_launchKernel(i8* %20, i32 2, i32 1, i32 32, i32 1, i32 1, i8* %polly_launch_0_params_i8ptr)
+; CHECK-NEXT:  call void @polly_freeKernel(i8* %20)
 ; CHECK-NEXT:  call void @polly_synchronizeDevice()
 ; CHECK-NEXT:  call void @polly_freeContext(i8* %13)
 

Modified: polly/trunk/test/GPGPU/host-control-flow.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/host-control-flow.ll?rev=311248&r1=311247&r2=311248&view=diff
==============================================================================
--- polly/trunk/test/GPGPU/host-control-flow.ll (original)
+++ polly/trunk/test/GPGPU/host-control-flow.ll Sat Aug 19 10:04:57 2017
@@ -31,7 +31,7 @@
 ; IR-NEXT:   %polly.indvar = phi i64 [ 0, %polly.loop_preheader ], [ %polly.indvar_next, %polly.loop_header ]
 ; ...
 ; IR:  store i64 %polly.indvar, i64* %polly_launch_0_param_1
-; IR-NEXT:  [[REGA:%.+]] = getelementptr [4 x i8*], [4 x i8*]* %polly_launch_0_params, i64 0, i64 1
+; IR-NEXT:  [[REGA:%.+]] = getelementptr [2 x i8*], [2 x i8*]* %polly_launch_0_params, i64 0, i64 1
 ; IR-NEXT:  [[REGB:%.+]] = bitcast i64* %polly_launch_0_param_1 to i8*
 ; IR-NEXT:  store i8* [[REGB]], i8** [[REGA]]
 ; IR: call i8* @polly_getKernel

Modified: polly/trunk/test/GPGPU/invariant-load-hoisting.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/invariant-load-hoisting.ll?rev=311248&r1=311247&r2=311248&view=diff
==============================================================================
--- polly/trunk/test/GPGPU/invariant-load-hoisting.ll (original)
+++ polly/trunk/test/GPGPU/invariant-load-hoisting.ll Sat Aug 19 10:04:57 2017
@@ -17,8 +17,8 @@
 ; SCOP-NEXT:              [n, tmp12] -> { Stmt_for_body6[i0, i1, i2] -> MemRef_invariant[0] };
 ; SCOP-NEXT:          Execution Context: [n, tmp12] -> {  : n > 0 }
 ; SCOP-NEXT:  }
-; HOST-IR:      call void @polly_launchKernel(i8* %219, i32 %225, i32 1, i32 32, i32 1, i32 1, i8* %polly_launch_0_params_i8ptr)
-; HOST-IR-NEXT: call void @polly_freeKernel(i8* %219)
+; HOST-IR:      call void @polly_launchKernel(i8* %209, i32 %215, i32 1, i32 32, i32 1, i32 1, i8* %polly_launch_0_params_i8ptr)
+; HOST-IR-NEXT: call void @polly_freeKernel(i8* %209)
 
 ; KERNEL-IR: define ptx_kernel void @FUNC_f_SCOP_0_KERNEL_0(i8 addrspace(1)* %MemRef_B, i8 addrspace(1)* %MemRef_A, i32 %n, i32 %tmp12, i32 %polly.preload.tmp21.merge)
 

Modified: polly/trunk/test/GPGPU/kernel-params-only-some-arrays.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/kernel-params-only-some-arrays.ll?rev=311248&r1=311247&r2=311248&view=diff
==============================================================================
--- polly/trunk/test/GPGPU/kernel-params-only-some-arrays.ll (original)
+++ polly/trunk/test/GPGPU/kernel-params-only-some-arrays.ll Sat Aug 19 10:04:57 2017
@@ -48,13 +48,13 @@
 
 
 ; IR:       [[DEVPTR:%.*]] = call i8* @polly_getDevicePtr(i8* %p_dev_array_MemRef_A)
-; IR-NEXT:  [[SLOT:%.*]] = getelementptr [2 x i8*], [2 x i8*]* %polly_launch_0_params, i64 0, i64 0
+; IR-NEXT:  [[SLOT:%.*]] = getelementptr [1 x i8*], [1 x i8*]* %polly_launch_0_params, i64 0, i64 0
 ; IR-NEXT:  store i8* [[DEVPTR]], i8** %polly_launch_0_param_0
 ; IR-NEXT:  [[DATA:%.*]] = bitcast i8** %polly_launch_0_param_0 to i8*
 ; IR-NEXT:  store i8* [[DATA]], i8** [[SLOT]]
 
 ; IR:       [[DEVPTR:%.*]] = call i8* @polly_getDevicePtr(i8* %p_dev_array_MemRef_B)
-; IR-NEXT:  [[SLOT:%.*]] = getelementptr [2 x i8*], [2 x i8*]* %polly_launch_1_params, i64 0, i64 0
+; IR-NEXT:  [[SLOT:%.*]] = getelementptr [1 x i8*], [1 x i8*]* %polly_launch_1_params, i64 0, i64 0
 ; IR-NEXT:  store i8* [[DEVPTR]], i8** %polly_launch_1_param_0
 ; IR-NEXT:  [[DATA:%.*]] = bitcast i8** %polly_launch_1_param_0 to i8*
 ; IR-NEXT:  store i8* [[DATA]], i8** [[SLOT]]

Modified: polly/trunk/test/GPGPU/parametric-loop-bound.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/parametric-loop-bound.ll?rev=311248&r1=311247&r2=311248&view=diff
==============================================================================
--- polly/trunk/test/GPGPU/parametric-loop-bound.ll (original)
+++ polly/trunk/test/GPGPU/parametric-loop-bound.ll Sat Aug 19 10:04:57 2017
@@ -32,7 +32,7 @@
 ; CODE-NEXT:     Stmt_bb2(32 * b0 + t0 + 1048576 * c0);
 
 ; IR: store i64 %n, i64* %polly_launch_0_param_1
-; IR-NEXT: [[REGA:%.+]] = getelementptr [4 x i8*], [4 x i8*]* %polly_launch_0_params, i64 0, i64 1
+; IR-NEXT: [[REGA:%.+]] = getelementptr [2 x i8*], [2 x i8*]* %polly_launch_0_params, i64 0, i64 1
 ; IR-NEXT: [[REGB:%.+]] = bitcast i64* %polly_launch_0_param_1 to i8*
 ; IR-NEXT: store i8* [[REGB]], i8** [[REGA]]
 

Modified: polly/trunk/test/GPGPU/phi-nodes-in-kernel.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/phi-nodes-in-kernel.ll?rev=311248&r1=311247&r2=311248&view=diff
==============================================================================
--- polly/trunk/test/GPGPU/phi-nodes-in-kernel.ll (original)
+++ polly/trunk/test/GPGPU/phi-nodes-in-kernel.ll Sat Aug 19 10:04:57 2017
@@ -48,7 +48,7 @@ target triple = "x86_64-unknown-linux-gn
 ; CODE-NEXT: if (32 * b0 + t0 <= 48)
 ; CODE-NEXT:     Stmt_for_body17(0, 32 * b0 + t0);
 
-; IR: [[REGC:%.+]] =   bitcast i32* %27 to i8*
+; IR: [[REGC:%.+]] =   bitcast i32* %23 to i8*
 ; IR-NEXT:  call void @polly_copyFromDeviceToHost(i8* %p_dev_array_MemRef_c, i8* [[REGC]], i64 196)
 
 ; KERNEL-IR: define ptx_kernel void @FUNC_kernel_dynprog_SCOP_0_KERNEL_0(i8 addrspace(1)* %MemRef_c, i32) #0 {




More information about the llvm-commits mailing list