r327867 - [OPENMP, NVPTX] Emit correct thread id.
Alexey Bataev via cfe-commits
cfe-commits at lists.llvm.org
Mon Mar 19 10:04:08 PDT 2018
Author: abataev
Date: Mon Mar 19 10:04:07 2018
New Revision: 327867
URL: http://llvm.org/viewvc/llvm-project?rev=327867&view=rev
Log:
[OPENMP, NVPTX] Emit correct thread id.
We emitted fake thread id for the outined function in NVPTX codegen.
Patch adds emission of the real thread id.
Modified:
cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h
cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
cfe/trunk/test/OpenMP/nvptx_teams_codegen.cpp
Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h?rev=327867&r1=327866&r2=327867&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h Mon Mar 19 10:04:07 2018
@@ -268,6 +268,10 @@ protected:
void emitCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *Callee,
ArrayRef<llvm::Value *> Args = llvm::None) const;
+ /// \brief Emits address of the word in a memory where current thread id is
+ /// stored.
+ virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc);
+
private:
/// \brief Default const ident_t object used for initialization of all other
/// ident_t objects.
@@ -564,10 +568,6 @@ private:
/// \return Cache variable for the specified threadprivate.
llvm::Constant *getOrCreateThreadPrivateCache(const VarDecl *VD);
- /// \brief Emits address of the word in a memory where current thread id is
- /// stored.
- virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc);
-
/// \brief Gets (if variable with the given name already exist) or creates
/// internal global variable with the specified Name. The created variable has
/// linkage CommonLinkage by default and is initialized by null value.
Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp?rev=327867&r1=327866&r2=327867&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp Mon Mar 19 10:04:07 2018
@@ -608,7 +608,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericEn
Bld.CreateCondBr(IsWorker, WorkerBB, MasterCheckBB);
CGF.EmitBlock(WorkerBB);
- emitOutlinedFunctionCall(CGF, WST.Loc, WST.WorkerFn);
+ emitCall(CGF, WST.Loc, WST.WorkerFn);
CGF.EmitBranch(EST.ExitBB);
CGF.EmitBlock(MasterCheckBB);
@@ -831,10 +831,9 @@ void CGOpenMPRuntimeNVPTX::emitWorkerLoo
// Insert call to work function via shared wrapper. The shared
// wrapper takes two arguments:
// - the parallelism level;
- // - the master thread ID;
- emitOutlinedFunctionCall(CGF, WST.Loc, W,
- {Bld.getInt16(/*ParallelLevel=*/0),
- getMasterThreadID(CGF)});
+ // - the thread ID;
+ emitCall(CGF, WST.Loc, W,
+ {Bld.getInt16(/*ParallelLevel=*/0), getThreadID(CGF, WST.Loc)});
// Go to end of parallel region.
CGF.EmitBranch(TerminateBB);
@@ -1316,12 +1315,12 @@ void CGOpenMPRuntimeNVPTX::emitTeamsCall
if (!CGF.HaveInsertPoint())
return;
- Address ZeroAddr =
- CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4),
- /*Name*/ ".zero.addr");
+ Address ZeroAddr = CGF.CreateMemTemp(
+ CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
+ /*Name*/ ".zero.addr");
CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
- OutlinedFnArgs.push_back(ZeroAddr.getPointer());
+ OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer());
OutlinedFnArgs.push_back(ZeroAddr.getPointer());
OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
@@ -1350,7 +1349,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericPa
// Force inline this outlined function at its call site.
Fn->setLinkage(llvm::GlobalValue::InternalLinkage);
- auto &&L0ParallelGen = [this, WFn, &CapturedVars](CodeGenFunction &CGF,
+ auto &&L0ParallelGen = [this, WFn, CapturedVars](CodeGenFunction &CGF,
PrePostActionTy &) {
CGBuilderTy &Bld = CGF.Builder;
@@ -1420,17 +1419,20 @@ void CGOpenMPRuntimeNVPTX::emitGenericPa
auto *ThreadID = getThreadID(CGF, Loc);
llvm::Value *Args[] = {RTLoc, ThreadID};
- auto &&SeqGen = [this, Fn, &CapturedVars, &Args, Loc](CodeGenFunction &CGF,
- PrePostActionTy &) {
- auto &&CodeGen = [this, Fn, &CapturedVars, Loc](CodeGenFunction &CGF,
- PrePostActionTy &Action) {
+ auto &&SeqGen = [this, Fn, CapturedVars, Args, Loc](CodeGenFunction &CGF,
+ PrePostActionTy &) {
+ auto &&CodeGen = [this, Fn, CapturedVars, Loc](CodeGenFunction &CGF,
+ PrePostActionTy &Action) {
Action.Enter(CGF);
llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
- OutlinedFnArgs.push_back(
- llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()));
- OutlinedFnArgs.push_back(
- llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()));
+ Address ZeroAddr =
+ CGF.CreateMemTemp(CGF.getContext().getIntTypeForBitwidth(
+ /*DestWidth=*/32, /*Signed=*/1),
+ ".zero.addr");
+ CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
+ OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer());
+ OutlinedFnArgs.push_back(ZeroAddr.getPointer());
OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
emitOutlinedFunctionCall(CGF, Loc, Fn, OutlinedFnArgs);
};
@@ -1468,7 +1470,7 @@ void CGOpenMPRuntimeNVPTX::emitSpmdParal
CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
".zero.addr");
CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
- OutlinedFnArgs.push_back(ZeroAddr.getPointer());
+ OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer());
OutlinedFnArgs.push_back(ZeroAddr.getPointer());
OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
@@ -2873,14 +2875,15 @@ llvm::Function *CGOpenMPRuntimeNVPTX::cr
const auto *RD = CS.getCapturedRecordDecl();
auto CurField = RD->field_begin();
+ Address ZeroAddr = CGF.CreateMemTemp(
+ CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
+ /*Name*/ ".zero.addr");
+ CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
// Get the array of arguments.
SmallVector<llvm::Value *, 8> Args;
- // TODO: suppport SIMD and pass actual values
- Args.emplace_back(
- llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()));
- Args.emplace_back(
- llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()));
+ Args.emplace_back(CGF.GetAddrOfLocalVar(&WrapperArg).getPointer());
+ Args.emplace_back(ZeroAddr.getPointer());
CGBuilderTy &Bld = CGF.Builder;
auto CI = CS.capture_begin();
Modified: cfe/trunk/test/OpenMP/nvptx_teams_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_teams_codegen.cpp?rev=327867&r1=327866&r2=327867&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_teams_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_teams_codegen.cpp Mon Mar 19 10:04:07 2018
@@ -105,7 +105,7 @@ int main (int argc, char **argv) {
// CK2: [[AADDR:%.+]] = alloca i{{[0-9]+}},
// CK2: [[BADDR:%.+]] = alloca i{{[0-9]+}},
// CK2: [[ARGCADDR:%.+]] = alloca i{{[0-9]+}},
-// CK2-NOT: {{%.+}} = call i32 @__kmpc_global_thread_num(
+// CK2: {{%.+}} = call i32 @__kmpc_global_thread_num(
// CK2: store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[AADDR]],
// CK2: store i{{[0-9]+}} [[B_IN]], i{{[0-9]+}}* [[BADDR]],
// CK2: store i{{[0-9]+}} [[ARGC_IN]], i{{[0-9]+}}* [[ARGCADDR]],
@@ -129,7 +129,7 @@ int main (int argc, char **argv) {
// CK2: [[AADDR:%.+]] = alloca i{{[0-9]+}},
// CK2: [[BADDR:%.+]] = alloca i{{[0-9]+}},
// CK2: [[ARGCADDR:%.+]] = alloca i{{[0-9]+}}**,
-// CK2-NOT: {{%.+}} = call i32 @__kmpc_global_thread_num(
+// CK2: {{%.+}} = call i32 @__kmpc_global_thread_num(
// CK2: store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[AADDR]],
// CK2: store i{{[0-9]+}} [[B_IN]], i{{[0-9]+}}* [[BADDR]],
// CK2: store i{{[0-9]+}}** [[ARGC]], i{{[0-9]+}}*** [[ARGCADDR]],
More information about the cfe-commits
mailing list