[polly] r275953 - GPGPU: add intrinsic functions to obtain a kernels thread and block ids

Tobias Grosser via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 19 00:32:44 PDT 2016


Author: grosser
Date: Tue Jul 19 02:32:44 2016
New Revision: 275953

URL: http://llvm.org/viewvc/llvm-project?rev=275953&view=rev
Log:
GPGPU: add intrinsic functions to obtain a kernels thread and block ids

Modified:
    polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp
    polly/trunk/test/GPGPU/double-parallel-loop.ll
    polly/trunk/test/GPGPU/kernel-params-only-some-arrays.ll

Modified: polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp?rev=275953&r1=275952&r2=275953&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp (original)
+++ polly/trunk/lib/CodeGen/PPCGCodeGeneration.cpp Tue Jul 19 02:32:44 2016
@@ -97,6 +97,17 @@ private:
   /// The GPU program we generate code for.
   gpu_prog *Prog;
 
+  /// Class to free isl_ids.
+  class IslIdDeleter {
+  public:
+    void operator()(__isl_take isl_id *Id) { isl_id_free(Id); };
+  };
+
+  /// A set containing all isl_ids allocated in a GPU kernel.
+  ///
+  /// By releasing this set all isl_ids will be freed.
+  std::set<std::unique_ptr<isl_id, IslIdDeleter>> KernelIDs;
+
   /// Create code for user-defined AST nodes.
   ///
   /// These AST nodes can be of type:
@@ -137,6 +148,11 @@ private:
   /// @returns The newly declared function.
   Function *createKernelFunctionDecl(ppcg_kernel *Kernel);
 
+  /// Insert intrinsic functions to obtain thread and block ids.
+  ///
+  /// @param The kernel to generate the intrinsic functions for.
+  void insertKernelIntrinsics(ppcg_kernel *Kernel);
+
   /// Finalize the generation of the kernel function.
   ///
   /// Free the LLVM-IR module corresponding to the kernel and -- if requested --
@@ -172,10 +188,12 @@ void GPUNodeBuilder::createKernel(__isl_
   assert(Kernel->tree && "Device AST of kernel node is empty");
 
   Instruction &HostInsertPoint = *Builder.GetInsertPoint();
+  IslExprBuilder::IDToValueTy HostIDs = IDToValue;
 
   createKernelFunction(Kernel);
 
   Builder.SetInsertPoint(&HostInsertPoint);
+  IDToValue = HostIDs;
 
   finalizeKernelFunction();
 }
@@ -222,6 +240,35 @@ Function *GPUNodeBuilder::createKernelFu
   return FN;
 }
 
+void GPUNodeBuilder::insertKernelIntrinsics(ppcg_kernel *Kernel) {
+  Intrinsic::ID IntrinsicsBID[] = {Intrinsic::nvvm_read_ptx_sreg_ctaid_x,
+                                   Intrinsic::nvvm_read_ptx_sreg_ctaid_y};
+
+  Intrinsic::ID IntrinsicsTID[] = {Intrinsic::nvvm_read_ptx_sreg_tid_x,
+                                   Intrinsic::nvvm_read_ptx_sreg_tid_y,
+                                   Intrinsic::nvvm_read_ptx_sreg_tid_z};
+
+  auto addId = [this](__isl_take isl_id *Id, Intrinsic::ID Intr) mutable {
+    std::string Name = isl_id_get_name(Id);
+    Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+    Function *IntrinsicFn = Intrinsic::getDeclaration(M, Intr);
+    Value *Val = Builder.CreateCall(IntrinsicFn, {});
+    Val = Builder.CreateIntCast(Val, Builder.getInt64Ty(), false, Name);
+    IDToValue[Id] = Val;
+    KernelIDs.insert(std::unique_ptr<isl_id, IslIdDeleter>(Id));
+  };
+
+  for (int i = 0; i < Kernel->n_grid; ++i) {
+    isl_id *Id = isl_id_list_get_id(Kernel->block_ids, i);
+    addId(Id, IntrinsicsBID[i]);
+  }
+
+  for (int i = 0; i < Kernel->n_block; ++i) {
+    isl_id *Id = isl_id_list_get_id(Kernel->thread_ids, i);
+    addId(Id, IntrinsicsTID[i]);
+  }
+}
+
 void GPUNodeBuilder::createKernelFunction(ppcg_kernel *Kernel) {
 
   std::string Identifier = "kernel_" + std::to_string(Kernel->id);
@@ -236,6 +283,8 @@ void GPUNodeBuilder::createKernelFunctio
   Builder.SetInsertPoint(EntryBlock);
   Builder.CreateRetVoid();
   Builder.SetInsertPoint(EntryBlock, EntryBlock->begin());
+
+  insertKernelIntrinsics(Kernel);
 }
 
 void GPUNodeBuilder::finalizeKernelFunction() {
@@ -244,6 +293,7 @@ void GPUNodeBuilder::finalizeKernelFunct
     outs() << *GPUModule << "\n";
 
   GPUModule.release();
+  KernelIDs.clear();
 }
 
 namespace {

Modified: polly/trunk/test/GPGPU/double-parallel-loop.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/double-parallel-loop.ll?rev=275953&r1=275952&r2=275953&view=diff
==============================================================================
--- polly/trunk/test/GPGPU/double-parallel-loop.ll (original)
+++ polly/trunk/test/GPGPU/double-parallel-loop.ll Tue Jul 19 02:32:44 2016
@@ -10,6 +10,10 @@
 ; RUN: opt %loadPolly -polly-codegen-ppcg -S < %s | \
 ; RUN: FileCheck %s -check-prefix=IR
 
+; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-kernel-ir \
+; RUN: -disable-output < %s | \
+; RUN: FileCheck %s -check-prefix=KERNEL-IR
+
 ; REQUIRES: pollyacc
 
 ; CHECK: Stmt_bb5
@@ -89,6 +93,19 @@
 ; IR: polly.exiting:
 ; IR-NEXT:    br label %polly.merge_new_and_old
 
+; KERNEL-IR: define ptx_kernel void @kernel_0(i8* %MemRef_A) {
+; KERNEL-IR-NEXT: entry:
+; KERNEL-IR-NEXT:   %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
+; KERNEL-IR-NEXT:   %b0 = zext i32 %0 to i64
+; KERNEL-IR-NEXT:   %1 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y()
+; KERNEL-IR-NEXT:   %b1 = zext i32 %1 to i64
+; KERNEL-IR-NEXT:   %2 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+; KERNEL-IR-NEXT:   %t0 = zext i32 %2 to i64
+; KERNEL-IR-NEXT:   %3 = call i32 @llvm.nvvm.read.ptx.sreg.tid.y()
+; KERNEL-IR-NEXT:   %t1 = zext i32 %3 to i64
+; KERNEL-IR-NEXT:   ret void
+; KERNEL-IR-NEXT: }
+
 ;    void double_parallel_loop(float A[][1024]) {
 ;      for (long i = 0; i < 1024; i++)
 ;        for (long j = 0; j < 1024; j++)

Modified: polly/trunk/test/GPGPU/kernel-params-only-some-arrays.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/GPGPU/kernel-params-only-some-arrays.ll?rev=275953&r1=275952&r2=275953&view=diff
==============================================================================
--- polly/trunk/test/GPGPU/kernel-params-only-some-arrays.ll (original)
+++ polly/trunk/test/GPGPU/kernel-params-only-some-arrays.ll Tue Jul 19 02:32:44 2016
@@ -17,6 +17,10 @@
 
 ; KERNEL: define ptx_kernel void @kernel_0(i8* %MemRef_A) {
 ; KERNEL-NEXT:   entry:
+; KERNEL-NEXT:     %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
+; KERNEL-NEXT:     %b0 = zext i32 %0 to i64
+; KERNEL-NEXT:     %1 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+; KERNEL-NEXT:     %t0 = zext i32 %1 to i64
 ; KERNEL-NEXT:     ret void
 ; KERNEL-NEXT: }
 
@@ -27,6 +31,10 @@
 
 ; KERNEL: define ptx_kernel void @kernel_1(i8* %MemRef_B) {
 ; KERNEL-NEXT:   entry:
+; KERNEL-NEXT:     %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
+; KERNEL-NEXT:     %b0 = zext i32 %0 to i64
+; KERNEL-NEXT:     %1 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+; KERNEL-NEXT:     %t0 = zext i32 %1 to i64
 ; KERNEL-NEXT:     ret void
 ; KERNEL-NEXT: }
 




More information about the llvm-commits mailing list