[PATCH] D44588: [OpenMP][Clang] Pass global thread ID to outlined function

Gheorghe-Teodor Bercea via Phabricator via cfe-commits cfe-commits at lists.llvm.org
Fri Mar 16 15:01:33 PDT 2018


gtbercea created this revision.
gtbercea added reviewers: ABataev, grokos, carlo.bertolli, caomhin.
Herald added subscribers: cfe-commits, guansong, jholewinski.

The data sharing wrapper function needs to pass a valid global thread ID to the parallel outlined function when the parallel is combined with a for directive.


Repository:
  rC Clang

https://reviews.llvm.org/D44588

Files:
  lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp


Index: lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
===================================================================
--- lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -423,14 +423,26 @@
 };
 } // anonymous namespace
 
+///
+/// NVPTX API calls.
+///
+
 /// Get the GPU warp size.
 static llvm::Value *getNVPTXWarpSize(CodeGenFunction &CGF) {
   return CGF.EmitRuntimeCall(
       llvm::Intrinsic::getDeclaration(
           &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_warpsize),
       "nvptx_warp_size");
 }
 
+/// Get the id of the current block on the GPU.
+static llvm::Value *getNVPTXBlockID(CodeGenFunction &CGF) {
+  return CGF.EmitRuntimeCall(
+      llvm::Intrinsic::getDeclaration(
+          &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ctaid_x),
+      "nvptx_block_id");
+}
+
 /// Get the id of the current thread on the GPU.
 static llvm::Value *getNVPTXThreadID(CodeGenFunction &CGF) {
   return CGF.EmitRuntimeCall(
@@ -521,6 +533,32 @@
                        Bld.CreateNot(Mask), "master_tid");
 }
 
+/// Get number of OMP workers for parallel region after subtracting
+/// the master warp.
+static llvm::Value *getNumWorkers(CodeGenFunction &CGF) {
+  CGBuilderTy &Bld = CGF.Builder;
+  return Bld.CreateNUWSub(getNVPTXNumThreads(CGF), Bld.getInt32(32),
+                          "num_workers");
+}
+
+/// Get thread id in team.
+/// FIXME: Remove the expensive remainder operation.
+static llvm::Value *getTeamThreadId(CodeGenFunction &CGF) {
+  CGBuilderTy &Bld = CGF.Builder;
+  // N % M = N & (M-1) it M is a power of 2. The master Id is expected to be a
+  // power fo two in all cases.
+  auto *Mask = Bld.CreateNUWSub(getMasterThreadID(CGF), Bld.getInt32(1));
+  return Bld.CreateAnd(getNVPTXThreadID(CGF), Mask, "team_tid");
+}
+
+/// Get global thread id.
+static llvm::Value *getGlobalThreadId(CodeGenFunction &CGF) {
+  assert(CGF.CurFn && "No function in current CodeGenFunction.");
+  CGBuilderTy &Bld = CGF.Builder;
+  return Bld.CreateAdd(Bld.CreateMul(getNVPTXBlockID(CGF), getNumWorkers(CGF)),
+                       getTeamThreadId(CGF), "global_tid");
+}
+
 CGOpenMPRuntimeNVPTX::WorkerFunctionState::WorkerFunctionState(
     CodeGenModule &CGM, SourceLocation Loc)
     : WorkerFn(nullptr), CGFI(nullptr), Loc(Loc) {
@@ -2876,9 +2914,15 @@
   // Get the array of arguments.
   SmallVector<llvm::Value *, 8> Args;
 
-  // TODO: suppport SIMD and pass actual values
-  Args.emplace_back(
-      llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()));
+  // First argument is the global thread ID.
+  Address GlobalThreadIDAddr =
+      CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, "global_tid");
+  CGF.EmitStoreOfScalar(getGlobalThreadId(CGF), GlobalThreadIDAddr,
+                        /*Volatile=*/false,
+                        Ctx.getPointerType(Ctx.VoidPtrTy));
+  Args.emplace_back(GlobalThreadIDAddr.getPointer());
+
+  // TODO: suppport SIMD and pass actual value
   Args.emplace_back(
       llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()));
 


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D44588.138777.patch
Type: text/x-patch
Size: 3073 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20180316/6cd99cb6/attachment.bin>


More information about the cfe-commits mailing list