[PATCH] D44588: [OpenMP][Clang] Pass global thread ID to outlined function
Gheorghe-Teodor Bercea via Phabricator via cfe-commits
cfe-commits at lists.llvm.org
Fri Mar 16 15:01:33 PDT 2018
gtbercea created this revision.
gtbercea added reviewers: ABataev, grokos, carlo.bertolli, caomhin.
Herald added subscribers: cfe-commits, guansong, jholewinski.
The data sharing wrapper function needs to pass a valid global thread ID to the parallel outlined function when the parallel is combined with a for directive.
Repository:
rC Clang
https://reviews.llvm.org/D44588
Files:
lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
Index: lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
===================================================================
--- lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -423,14 +423,26 @@
};
} // anonymous namespace
+///
+/// NVPTX API calls.
+///
+
/// Get the GPU warp size.
static llvm::Value *getNVPTXWarpSize(CodeGenFunction &CGF) {
return CGF.EmitRuntimeCall(
llvm::Intrinsic::getDeclaration(
&CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_warpsize),
"nvptx_warp_size");
}
+/// Get the id of the current block on the GPU.
+static llvm::Value *getNVPTXBlockID(CodeGenFunction &CGF) {
+ return CGF.EmitRuntimeCall(
+ llvm::Intrinsic::getDeclaration(
+ &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ctaid_x),
+ "nvptx_block_id");
+}
+
/// Get the id of the current thread on the GPU.
static llvm::Value *getNVPTXThreadID(CodeGenFunction &CGF) {
return CGF.EmitRuntimeCall(
@@ -521,6 +533,32 @@
Bld.CreateNot(Mask), "master_tid");
}
+/// Get number of OMP workers for parallel region after subtracting
+/// the master warp.
+static llvm::Value *getNumWorkers(CodeGenFunction &CGF) {
+ CGBuilderTy &Bld = CGF.Builder;
+ return Bld.CreateNUWSub(getNVPTXNumThreads(CGF), Bld.getInt32(32),
+ "num_workers");
+}
+
+/// Get thread id in team.
+/// FIXME: Remove the expensive remainder operation.
+static llvm::Value *getTeamThreadId(CodeGenFunction &CGF) {
+ CGBuilderTy &Bld = CGF.Builder;
+ // N % M = N & (M-1) it M is a power of 2. The master Id is expected to be a
+ // power fo two in all cases.
+ auto *Mask = Bld.CreateNUWSub(getMasterThreadID(CGF), Bld.getInt32(1));
+ return Bld.CreateAnd(getNVPTXThreadID(CGF), Mask, "team_tid");
+}
+
+/// Get global thread id.
+static llvm::Value *getGlobalThreadId(CodeGenFunction &CGF) {
+ assert(CGF.CurFn && "No function in current CodeGenFunction.");
+ CGBuilderTy &Bld = CGF.Builder;
+ return Bld.CreateAdd(Bld.CreateMul(getNVPTXBlockID(CGF), getNumWorkers(CGF)),
+ getTeamThreadId(CGF), "global_tid");
+}
+
CGOpenMPRuntimeNVPTX::WorkerFunctionState::WorkerFunctionState(
CodeGenModule &CGM, SourceLocation Loc)
: WorkerFn(nullptr), CGFI(nullptr), Loc(Loc) {
@@ -2876,9 +2914,15 @@
// Get the array of arguments.
SmallVector<llvm::Value *, 8> Args;
- // TODO: suppport SIMD and pass actual values
- Args.emplace_back(
- llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()));
+ // First argument is the global thread ID.
+ Address GlobalThreadIDAddr =
+ CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, "global_tid");
+ CGF.EmitStoreOfScalar(getGlobalThreadId(CGF), GlobalThreadIDAddr,
+ /*Volatile=*/false,
+ Ctx.getPointerType(Ctx.VoidPtrTy));
+ Args.emplace_back(GlobalThreadIDAddr.getPointer());
+
+ // TODO: suppport SIMD and pass actual value
Args.emplace_back(
llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()));
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D44588.138777.patch
Type: text/x-patch
Size: 3073 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20180316/6cd99cb6/attachment.bin>
More information about the cfe-commits
mailing list