r327867 - [OPENMP, NVPTX] Emit correct thread id.

Alexey Bataev via cfe-commits cfe-commits at lists.llvm.org
Mon Mar 19 10:04:08 PDT 2018


Author: abataev
Date: Mon Mar 19 10:04:07 2018
New Revision: 327867

URL: http://llvm.org/viewvc/llvm-project?rev=327867&view=rev
Log:
[OPENMP, NVPTX] Emit correct thread id.

We emitted fake thread id for the outined function in NVPTX codegen.
Patch adds emission of the real thread id.

Modified:
    cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h
    cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
    cfe/trunk/test/OpenMP/nvptx_teams_codegen.cpp

Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h?rev=327867&r1=327866&r2=327867&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h Mon Mar 19 10:04:07 2018
@@ -268,6 +268,10 @@ protected:
   void emitCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *Callee,
                 ArrayRef<llvm::Value *> Args = llvm::None) const;
 
+  /// \brief Emits address of the word in a memory where current thread id is
+  /// stored.
+  virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc);
+
 private:
   /// \brief Default const ident_t object used for initialization of all other
   /// ident_t objects.
@@ -564,10 +568,6 @@ private:
   /// \return Cache variable for the specified threadprivate.
   llvm::Constant *getOrCreateThreadPrivateCache(const VarDecl *VD);
 
-  /// \brief Emits address of the word in a memory where current thread id is
-  /// stored.
-  virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc);
-
   /// \brief Gets (if variable with the given name already exist) or creates
   /// internal global variable with the specified Name. The created variable has
   /// linkage CommonLinkage by default and is initialized by null value.

Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp?rev=327867&r1=327866&r2=327867&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp Mon Mar 19 10:04:07 2018
@@ -608,7 +608,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericEn
   Bld.CreateCondBr(IsWorker, WorkerBB, MasterCheckBB);
 
   CGF.EmitBlock(WorkerBB);
-  emitOutlinedFunctionCall(CGF, WST.Loc, WST.WorkerFn);
+  emitCall(CGF, WST.Loc, WST.WorkerFn);
   CGF.EmitBranch(EST.ExitBB);
 
   CGF.EmitBlock(MasterCheckBB);
@@ -831,10 +831,9 @@ void CGOpenMPRuntimeNVPTX::emitWorkerLoo
     // Insert call to work function via shared wrapper. The shared
     // wrapper takes two arguments:
     //   - the parallelism level;
-    //   - the master thread ID;
-    emitOutlinedFunctionCall(CGF, WST.Loc, W,
-                             {Bld.getInt16(/*ParallelLevel=*/0),
-                              getMasterThreadID(CGF)});
+    //   - the thread ID;
+    emitCall(CGF, WST.Loc, W,
+             {Bld.getInt16(/*ParallelLevel=*/0), getThreadID(CGF, WST.Loc)});
 
     // Go to end of parallel region.
     CGF.EmitBranch(TerminateBB);
@@ -1316,12 +1315,12 @@ void CGOpenMPRuntimeNVPTX::emitTeamsCall
   if (!CGF.HaveInsertPoint())
     return;
 
-  Address ZeroAddr =
-      CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4),
-                           /*Name*/ ".zero.addr");
+  Address ZeroAddr = CGF.CreateMemTemp(
+      CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
+      /*Name*/ ".zero.addr");
   CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
   llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
-  OutlinedFnArgs.push_back(ZeroAddr.getPointer());
+  OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer());
   OutlinedFnArgs.push_back(ZeroAddr.getPointer());
   OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
   emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
@@ -1350,7 +1349,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericPa
   // Force inline this outlined function at its call site.
   Fn->setLinkage(llvm::GlobalValue::InternalLinkage);
 
-  auto &&L0ParallelGen = [this, WFn, &CapturedVars](CodeGenFunction &CGF,
+  auto &&L0ParallelGen = [this, WFn, CapturedVars](CodeGenFunction &CGF,
                                                     PrePostActionTy &) {
     CGBuilderTy &Bld = CGF.Builder;
 
@@ -1420,17 +1419,20 @@ void CGOpenMPRuntimeNVPTX::emitGenericPa
   auto *ThreadID = getThreadID(CGF, Loc);
   llvm::Value *Args[] = {RTLoc, ThreadID};
 
-  auto &&SeqGen = [this, Fn, &CapturedVars, &Args, Loc](CodeGenFunction &CGF,
-                                                        PrePostActionTy &) {
-    auto &&CodeGen = [this, Fn, &CapturedVars, Loc](CodeGenFunction &CGF,
-                                                    PrePostActionTy &Action) {
+  auto &&SeqGen = [this, Fn, CapturedVars, Args, Loc](CodeGenFunction &CGF,
+                                                      PrePostActionTy &) {
+    auto &&CodeGen = [this, Fn, CapturedVars, Loc](CodeGenFunction &CGF,
+                                                   PrePostActionTy &Action) {
       Action.Enter(CGF);
 
       llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
-      OutlinedFnArgs.push_back(
-          llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()));
-      OutlinedFnArgs.push_back(
-          llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()));
+      Address ZeroAddr =
+          CGF.CreateMemTemp(CGF.getContext().getIntTypeForBitwidth(
+                                /*DestWidth=*/32, /*Signed=*/1),
+                            ".zero.addr");
+      CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
+      OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer());
+      OutlinedFnArgs.push_back(ZeroAddr.getPointer());
       OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
       emitOutlinedFunctionCall(CGF, Loc, Fn, OutlinedFnArgs);
     };
@@ -1468,7 +1470,7 @@ void CGOpenMPRuntimeNVPTX::emitSpmdParal
       CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
       ".zero.addr");
   CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
-  OutlinedFnArgs.push_back(ZeroAddr.getPointer());
+  OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer());
   OutlinedFnArgs.push_back(ZeroAddr.getPointer());
   OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
   emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
@@ -2873,14 +2875,15 @@ llvm::Function *CGOpenMPRuntimeNVPTX::cr
   const auto *RD = CS.getCapturedRecordDecl();
   auto CurField = RD->field_begin();
 
+  Address ZeroAddr = CGF.CreateMemTemp(
+      CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
+      /*Name*/ ".zero.addr");
+  CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
   // Get the array of arguments.
   SmallVector<llvm::Value *, 8> Args;
 
-  // TODO: suppport SIMD and pass actual values
-  Args.emplace_back(
-      llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()));
-  Args.emplace_back(
-      llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()));
+  Args.emplace_back(CGF.GetAddrOfLocalVar(&WrapperArg).getPointer());
+  Args.emplace_back(ZeroAddr.getPointer());
 
   CGBuilderTy &Bld = CGF.Builder;
   auto CI = CS.capture_begin();

Modified: cfe/trunk/test/OpenMP/nvptx_teams_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_teams_codegen.cpp?rev=327867&r1=327866&r2=327867&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_teams_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_teams_codegen.cpp Mon Mar 19 10:04:07 2018
@@ -105,7 +105,7 @@ int main (int argc, char **argv) {
 // CK2: [[AADDR:%.+]] = alloca i{{[0-9]+}},
 // CK2: [[BADDR:%.+]] = alloca i{{[0-9]+}},
 // CK2: [[ARGCADDR:%.+]] = alloca i{{[0-9]+}},
-// CK2-NOT:  {{%.+}} = call i32 @__kmpc_global_thread_num(
+// CK2:  {{%.+}} = call i32 @__kmpc_global_thread_num(
 // CK2: store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[AADDR]],
 // CK2: store i{{[0-9]+}} [[B_IN]], i{{[0-9]+}}* [[BADDR]],
 // CK2: store i{{[0-9]+}} [[ARGC_IN]], i{{[0-9]+}}* [[ARGCADDR]],
@@ -129,7 +129,7 @@ int main (int argc, char **argv) {
 // CK2: [[AADDR:%.+]] = alloca i{{[0-9]+}},
 // CK2: [[BADDR:%.+]] = alloca i{{[0-9]+}},
 // CK2: [[ARGCADDR:%.+]] = alloca i{{[0-9]+}}**,
-// CK2-NOT: {{%.+}} = call i32 @__kmpc_global_thread_num(
+// CK2: {{%.+}} = call i32 @__kmpc_global_thread_num(
 // CK2: store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[AADDR]],
 // CK2: store i{{[0-9]+}} [[B_IN]], i{{[0-9]+}}* [[BADDR]],
 // CK2: store i{{[0-9]+}}** [[ARGC]], i{{[0-9]+}}*** [[ARGCADDR]],




More information about the cfe-commits mailing list