r266722 - [OPENMP] Codegen for untied tasks.

Alexey Bataev via cfe-commits cfe-commits at lists.llvm.org
Tue Apr 19 02:10:30 PDT 2016


Author: abataev
Date: Tue Apr 19 04:10:27 2016
New Revision: 266722

URL: http://llvm.org/viewvc/llvm-project?rev=266722&view=rev
Log:
[OPENMP] Codegen for untied tasks.

If the untied clause is present on a task construct, any thread in the team can resume the task region after a suspension. Patch adds proper codegen for untied tasks.

Modified:
    cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp
    cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h
    cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp
    cfe/trunk/lib/Sema/SemaOpenMP.cpp
    cfe/trunk/test/OpenMP/task_codegen.cpp
    cfe/trunk/test/OpenMP/task_firstprivate_codegen.cpp
    cfe/trunk/test/OpenMP/task_private_codegen.cpp

Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp?rev=266722&r1=266721&r2=266722&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp Tue Apr 19 04:10:27 2016
@@ -72,6 +72,8 @@ public:
   /// \return LValue for thread id variable. This LValue always has type int32*.
   virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
 
+  virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
+
   CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
 
   OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
@@ -82,6 +84,8 @@ public:
     return Info->getKind() == CR_OpenMP;
   }
 
+  ~CGOpenMPRegionInfo() override = default;
+
 protected:
   CGOpenMPRegionKind RegionKind;
   RegionCodeGenTy CodeGen;
@@ -90,7 +94,7 @@ protected:
 };
 
 /// \brief API for captured statement code generation in OpenMP constructs.
-class CGOpenMPOutlinedRegionInfo : public CGOpenMPRegionInfo {
+class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
 public:
   CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
                              const RegionCodeGenTy &CodeGen,
@@ -121,14 +125,62 @@ private:
 };
 
 /// \brief API for captured statement code generation in OpenMP constructs.
-class CGOpenMPTaskOutlinedRegionInfo : public CGOpenMPRegionInfo {
+class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
 public:
+  class UntiedTaskActionTy : public PrePostActionTy {
+    bool Untied;
+    const VarDecl *PartIDVar;
+    const RegionCodeGenTy &UntiedCodeGen;
+    llvm::SwitchInst *UntiedSwitch = nullptr;
+
+  public:
+    UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
+                       const RegionCodeGenTy &UntiedCodeGen)
+        : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
+    void Enter(CodeGenFunction &CGF) override {
+      if (Untied) {
+        // Emit task switching point.
+        auto PartIdLVal = CGF.EmitLoadOfPointerLValue(
+            CGF.GetAddrOfLocalVar(PartIDVar),
+            PartIDVar->getType()->castAs<PointerType>());
+        auto *Res = CGF.EmitLoadOfScalar(PartIdLVal, SourceLocation());
+        auto *DoneBB = CGF.createBasicBlock(".untied.done.");
+        UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
+        CGF.EmitBlock(DoneBB);
+        CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
+        CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
+        UntiedSwitch->addCase(CGF.Builder.getInt32(0),
+                              CGF.Builder.GetInsertBlock());
+        emitUntiedSwitch(CGF);
+      }
+    }
+    void emitUntiedSwitch(CodeGenFunction &CGF) const {
+      if (Untied) {
+        auto PartIdLVal = CGF.EmitLoadOfPointerLValue(
+            CGF.GetAddrOfLocalVar(PartIDVar),
+            PartIDVar->getType()->castAs<PointerType>());
+        CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
+                              PartIdLVal);
+        UntiedCodeGen(CGF);
+        CodeGenFunction::JumpDest CurPoint =
+            CGF.getJumpDestInCurrentScope(".untied.next.");
+        CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
+        CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
+        UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
+                              CGF.Builder.GetInsertBlock());
+        CGF.EmitBranchThroughCleanup(CurPoint);
+        CGF.EmitBlock(CurPoint.getBlock());
+      }
+    }
+    unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
+  };
   CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
                                  const VarDecl *ThreadIDVar,
                                  const RegionCodeGenTy &CodeGen,
-                                 OpenMPDirectiveKind Kind, bool HasCancel)
+                                 OpenMPDirectiveKind Kind, bool HasCancel,
+                                 const UntiedTaskActionTy &Action)
       : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
-        ThreadIDVar(ThreadIDVar) {
+        ThreadIDVar(ThreadIDVar), Action(Action) {
     assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
   }
 
@@ -142,6 +194,10 @@ public:
   /// \brief Get the name of the capture helper.
   StringRef getHelperName() const override { return ".omp_outlined."; }
 
+  void emitUntiedSwitch(CodeGenFunction &CGF) override {
+    Action.emitUntiedSwitch(CGF);
+  }
+
   static bool classof(const CGCapturedStmtInfo *Info) {
     return CGOpenMPRegionInfo::classof(Info) &&
            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
@@ -152,6 +208,8 @@ private:
   /// \brief A variable or parameter storing global thread id for OpenMP
   /// constructs.
   const VarDecl *ThreadIDVar;
+  /// Action for emitting code for untied tasks.
+  const UntiedTaskActionTy &Action;
 };
 
 /// \brief API for inlined captured statement code generation in OpenMP
@@ -210,6 +268,11 @@ public:
     llvm_unreachable("No helper name for inlined OpenMP construct");
   }
 
+  void emitUntiedSwitch(CodeGenFunction &CGF) override {
+    if (OuterRegionInfo)
+      OuterRegionInfo->emitUntiedSwitch(CGF);
+  }
+
   CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
 
   static bool classof(const CGCapturedStmtInfo *Info) {
@@ -217,6 +280,8 @@ public:
            cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
   }
 
+  ~CGOpenMPInlinedRegionInfo() override = default;
+
 private:
   /// \brief CodeGen info about outer OpenMP region.
   CodeGenFunction::CGCapturedStmtInfo *OldCSI;
@@ -228,7 +293,7 @@ private:
 /// captured fields. The name of the target region has to be unique in a given
 /// application so it is provided by the client, because only the client has
 /// the information to generate that.
-class CGOpenMPTargetRegionInfo : public CGOpenMPRegionInfo {
+class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
 public:
   CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
                            const RegionCodeGenTy &CodeGen, StringRef HelperName)
@@ -257,7 +322,7 @@ static void EmptyCodeGen(CodeGenFunction
 }
 /// \brief API for generation of expressions captured in a innermost OpenMP
 /// region.
-class CGOpenMPInnerExprInfo : public CGOpenMPInlinedRegionInfo {
+class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
 public:
   CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
       : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
@@ -757,16 +822,35 @@ llvm::Value *CGOpenMPRuntime::emitParall
 
 llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction(
     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
-    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
+    const VarDecl *PartIDVar, const VarDecl *TaskTVar,
+    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
+    bool Tied, unsigned &NumberOfParts) {
+  auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
+                                              PrePostActionTy &) {
+    auto *ThreadID = getThreadID(CGF, D.getLocStart());
+    auto *UpLoc = emitUpdateLocation(CGF, D.getLocStart());
+    llvm::Value *TaskArgs[] = {
+        UpLoc, ThreadID,
+        CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
+                                    TaskTVar->getType()->castAs<PointerType>())
+            .getPointer()};
+    CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
+  };
+  CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
+                                                            UntiedCodeGen);
+  CodeGen.setAction(Action);
   assert(!ThreadIDVar->getType()->isPointerType() &&
          "thread id variable must be of type kmp_int32 for tasks");
   auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
   CodeGenFunction CGF(CGM, true);
-  CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
-                                        InnermostKind,
-                                        cast<OMPTaskDirective>(D).hasCancel());
+  CGOpenMPTaskOutlinedRegionInfo CGInfo(
+      *CS, ThreadIDVar, CodeGen, InnermostKind,
+      cast<OMPTaskDirective>(D).hasCancel(), Action);
   CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
-  return CGF.GenerateCapturedStmtFunction(*CS);
+  auto *Res = CGF.GenerateCapturedStmtFunction(*CS);
+  if (!Tied)
+    NumberOfParts = Action.getNumberOfParts();
+  return Res;
 }
 
 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
@@ -1898,6 +1982,8 @@ void CGOpenMPRuntime::emitTaskyieldCall(
       emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
       llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
+  if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
+    Region->emitUntiedSwitch(CGF);
 }
 
 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
@@ -2951,7 +3037,7 @@ createKmpTaskTWithPrivatesRecordDecl(Cod
 /// argument.
 /// \code
 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
-///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map,
+///   TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
 ///   tt->shareds);
 ///   return 0;
 /// }
@@ -2982,7 +3068,7 @@ emitProxyTaskFunction(CodeGenModule &CGM
   CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args);
 
   // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
-  // tt->task_data.shareds);
+  // tt, tt->task_data.shareds);
   auto *GtidParam = CGF.EmitLoadOfScalar(
       CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
   LValue TDBase = CGF.EmitLoadOfPointerLValue(
@@ -2995,7 +3081,7 @@ emitProxyTaskFunction(CodeGenModule &CGM
   auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
   auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
   auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
-  auto *PartidParam = CGF.EmitLoadOfLValue(PartIdLVal, Loc).getScalarVal();
+  auto *PartidParam = PartIdLVal.getPointer();
 
   auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
   auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
@@ -3014,7 +3100,11 @@ emitProxyTaskFunction(CodeGenModule &CGM
   }
 
   llvm::Value *CallArgs[] = {GtidParam, PartidParam, PrivatesParam,
-                             TaskPrivatesMap, SharedsParam};
+                             TaskPrivatesMap,
+                             CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+                                            TDBase.getAddress(), CGF.VoidPtrTy)
+                                 .getPointer(),
+                             SharedsParam};
   CGF.EmitCallOrInvoke(TaskFunction, CallArgs);
   CGF.EmitStoreThroughLValue(
       RValue::get(CGF.Builder.getInt32(/*C=*/0)),
@@ -3154,8 +3244,8 @@ static int array_pod_sort_comparator(con
 void CGOpenMPRuntime::emitTaskCall(
     CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D,
     bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
-    llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds,
-    const Expr *IfCond, ArrayRef<const Expr *> PrivateVars,
+    unsigned NumberOfParts, llvm::Value *TaskFunction, QualType SharedsTy,
+    Address Shareds, const Expr *IfCond, ArrayRef<const Expr *> PrivateVars,
     ArrayRef<const Expr *> PrivateCopies,
     ArrayRef<const Expr *> FirstprivateVars,
     ArrayRef<const Expr *> FirstprivateCopies,
@@ -3390,7 +3480,8 @@ void CGOpenMPRuntime::emitTaskCall(
         KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
         ArrayType::Normal, /*IndexTypeQuals=*/0);
     // kmp_depend_info[<Dependences.size()>] deps;
-    DependenciesArray = CGF.CreateMemTemp(KmpDependInfoArrayTy);
+    DependenciesArray =
+        CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
     for (unsigned i = 0; i < NumDependencies; ++i) {
       const Expr *E = Dependences[i].second;
       auto Addr = CGF.EmitLValue(E);
@@ -3448,8 +3539,6 @@ void CGOpenMPRuntime::emitTaskCall(
 
   // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
   // libcall.
-  // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
-  // *new_task);
   // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
   // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
   // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
@@ -3467,18 +3556,25 @@ void CGOpenMPRuntime::emitTaskCall(
     DepTaskArgs[5] = CGF.Builder.getInt32(0);
     DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
   }
-  auto &&ThenCodeGen = [NumDependencies, &TaskArgs,
+  auto &&ThenCodeGen = [this, Tied, Loc, NumberOfParts, TDBase, KmpTaskTQTyRD,
+                        NumDependencies, &TaskArgs,
                         &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
-    // TODO: add check for untied tasks.
-    auto &RT = CGF.CGM.getOpenMPRuntime();
+    if (!Tied) {
+      auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
+      auto PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
+      CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
+    }
     if (NumDependencies) {
       CGF.EmitRuntimeCall(
-          RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps),
-          DepTaskArgs);
+          createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs);
     } else {
-      CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_task),
+      CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
                           TaskArgs);
     }
+    // Check if parent region is untied and build return for untied task;
+    if (auto *Region =
+            dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
+      Region->emitUntiedSwitch(CGF);
   };
 
   llvm::Value *DepWaitTaskArgs[6];
@@ -4039,6 +4135,8 @@ void CGOpenMPRuntime::emitTaskwaitCall(C
   llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
   // Ignore return result until untied tasks are supported.
   CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
+  if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
+    Region->emitUntiedSwitch(CGF);
 }
 
 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,

Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h?rev=266722&r1=266721&r2=266722&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h Tue Apr 19 04:10:27 2016
@@ -455,17 +455,25 @@ public:
       OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen);
 
   /// \brief Emits outlined function for the OpenMP task directive \a D. This
-  /// outlined function has type void(*)(kmp_int32 ThreadID, kmp_int32
-  /// PartID, struct context_vars*).
+  /// outlined function has type void(*)(kmp_int32 ThreadID, struct task_t*
+  /// TaskT).
   /// \param D OpenMP directive.
   /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
+  /// \param PartIDVar Variable for partition id in the current OpenMP untied
+  /// task region.
+  /// \param TaskTVar Variable for task_t argument.
   /// \param InnermostKind Kind of innermost directive (for simple directives it
   /// is a directive itself, for combined - its innermost directive).
   /// \param CodeGen Code generation sequence for the \a D directive.
+  /// \param Tied true if task is generated for tied task, false otherwise.
+  /// \param NumberOfParts Number of parts in untied task. Ignored for tied
+  /// tasks.
   ///
   virtual llvm::Value *emitTaskOutlinedFunction(
       const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
-      OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen);
+      const VarDecl *PartIDVar, const VarDecl *TaskTVar,
+      OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
+      bool Tied, unsigned &NumberOfParts);
 
   /// \brief Cleans up references to the objects in finished function.
   ///
@@ -731,6 +739,7 @@ public:
   /// \param Tied true if the task is tied (the task is tied to the thread that
   /// can suspend its task region), false - untied (the task is not tied to any
   /// thread).
+  /// \param NumberOfParts Number of parts for untied task.
   /// \param Final Contains either constant bool value, or llvm::Value * of i1
   /// type for final clause. If the value is true, the task forces all of its
   /// child tasks to become final and included tasks.
@@ -757,8 +766,8 @@ public:
   virtual void emitTaskCall(
       CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D,
       bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
-      llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds,
-      const Expr *IfCond, ArrayRef<const Expr *> PrivateVars,
+      unsigned NumberOfParts, llvm::Value *TaskFunction, QualType SharedsTy,
+      Address Shareds, const Expr *IfCond, ArrayRef<const Expr *> PrivateVars,
       ArrayRef<const Expr *> PrivateCopies,
       ArrayRef<const Expr *> FirstprivateVars,
       ArrayRef<const Expr *> FirstprivateCopies,

Modified: cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp?rev=266722&r1=266721&r2=266722&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp Tue Apr 19 04:10:27 2016
@@ -2244,6 +2244,7 @@ void CodeGenFunction::EmitOMPTaskDirecti
   auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
   auto *I = CS->getCapturedDecl()->param_begin();
   auto *PartId = std::next(I);
+  auto *TaskT = std::next(I, 4);
   // The first function argument for tasks is a thread id, the second one is a
   // part id (0 for tied tasks, >=0 for untied task).
   llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
@@ -2288,53 +2289,52 @@ void CodeGenFunction::EmitOMPTaskDirecti
       Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef));
     }
   }
-  auto &&CodeGen = [PartId, &S, &PrivateVars, &FirstprivateVars](
-      CodeGenFunction &CGF, PrePostActionTy &) {
+  auto &&CodeGen = [&S, &PrivateVars, &FirstprivateVars](
+      CodeGenFunction &CGF, PrePostActionTy &Action) {
+    OMPPrivateScope Scope(CGF);
     // Set proper addresses for generated private copies.
     auto *CS = cast<CapturedStmt>(S.getAssociatedStmt());
-    {
-      OMPPrivateScope Scope(CGF);
-      if (!PrivateVars.empty() || !FirstprivateVars.empty()) {
-        auto *CopyFn = CGF.Builder.CreateLoad(
-            CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)));
-        auto *PrivatesPtr = CGF.Builder.CreateLoad(
-            CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2)));
-        // Map privates.
-        llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
-        llvm::SmallVector<llvm::Value *, 16> CallArgs;
-        CallArgs.push_back(PrivatesPtr);
-        for (auto *E : PrivateVars) {
-          auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
-          Address PrivatePtr =
-              CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()));
-          PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
-          CallArgs.push_back(PrivatePtr.getPointer());
-        }
-        for (auto *E : FirstprivateVars) {
-          auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
-          Address PrivatePtr =
-              CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()));
-          PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
-          CallArgs.push_back(PrivatePtr.getPointer());
-        }
-        CGF.EmitRuntimeCall(CopyFn, CallArgs);
-        for (auto &&Pair : PrivatePtrs) {
-          Address Replacement(CGF.Builder.CreateLoad(Pair.second),
-                              CGF.getContext().getDeclAlign(Pair.first));
-          Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
-        }
+    if (!PrivateVars.empty() || !FirstprivateVars.empty()) {
+      auto *CopyFn = CGF.Builder.CreateLoad(
+          CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3)));
+      auto *PrivatesPtr = CGF.Builder.CreateLoad(
+          CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2)));
+      // Map privates.
+      llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
+      llvm::SmallVector<llvm::Value *, 16> CallArgs;
+      CallArgs.push_back(PrivatesPtr);
+      for (auto *E : PrivateVars) {
+        auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
+        Address PrivatePtr = CGF.CreateMemTemp(
+            CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
+        PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
+        CallArgs.push_back(PrivatePtr.getPointer());
       }
-      (void)Scope.Privatize();
-      if (*PartId) {
-        // TODO: emit code for untied tasks.
+      for (auto *E : FirstprivateVars) {
+        auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
+        Address PrivatePtr =
+            CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
+                              ".firstpriv.ptr.addr");
+        PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr));
+        CallArgs.push_back(PrivatePtr.getPointer());
+      }
+      CGF.EmitRuntimeCall(CopyFn, CallArgs);
+      for (auto &&Pair : PrivatePtrs) {
+        Address Replacement(CGF.Builder.CreateLoad(Pair.second),
+                            CGF.getContext().getDeclAlign(Pair.first));
+        Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; });
       }
-      CGF.EmitStmt(CS->getCapturedStmt());
     }
+    (void)Scope.Privatize();
+
+    Action.Enter(CGF);
+    CGF.EmitStmt(CS->getCapturedStmt());
   };
-  auto OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
-      S, *I, OMPD_task, CodeGen);
   // Check if we should emit tied or untied task.
   bool Tied = !S.getSingleClause<OMPUntiedClause>();
+  unsigned NumberOfParts;
+  auto OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
+      S, *I, *PartId, *TaskT, OMPD_task, CodeGen, Tied, NumberOfParts);
   // Check if the task is final
   llvm::PointerIntPair<llvm::Value *, 1, bool> Final;
   if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
@@ -2361,9 +2361,9 @@ void CodeGenFunction::EmitOMPTaskDirecti
   }
   OMPLexicalScope Scope(*this, S);
   CGM.getOpenMPRuntime().emitTaskCall(
-      *this, S.getLocStart(), S, Tied, Final, OutlinedFn, SharedsTy,
-      CapturedStruct, IfCond, PrivateVars, PrivateCopies, FirstprivateVars,
-      FirstprivateCopies, FirstprivateInits, Dependences);
+      *this, S.getLocStart(), S, Tied, Final, NumberOfParts, OutlinedFn,
+      SharedsTy, CapturedStruct, IfCond, PrivateVars, PrivateCopies,
+      FirstprivateVars, FirstprivateCopies, FirstprivateInits, Dependences);
 }
 
 void CodeGenFunction::EmitOMPTaskyieldDirective(

Modified: cfe/trunk/lib/Sema/SemaOpenMP.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaOpenMP.cpp?rev=266722&r1=266721&r2=266722&view=diff
==============================================================================
--- cfe/trunk/lib/Sema/SemaOpenMP.cpp (original)
+++ cfe/trunk/lib/Sema/SemaOpenMP.cpp Tue Apr 19 04:10:27 2016
@@ -1610,12 +1610,11 @@ void Sema::ActOnOpenMPRegionStart(OpenMP
     QualType CopyFnType = Context.getFunctionType(Context.VoidTy, Args, EPI);
     Sema::CapturedParamNameType Params[] = {
         std::make_pair(".global_tid.", KmpInt32Ty),
-        std::make_pair(".part_id.", KmpInt32Ty),
-        std::make_pair(".privates.",
-                       Context.VoidPtrTy.withConst().withRestrict()),
-        std::make_pair(
-            ".copy_fn.",
-            Context.getPointerType(CopyFnType).withConst().withRestrict()),
+        std::make_pair(".part_id.", Context.getPointerType(KmpInt32Ty)),
+        std::make_pair(".privates.", Context.VoidPtrTy.withConst()),
+        std::make_pair(".copy_fn.",
+                       Context.getPointerType(CopyFnType).withConst()),
+        std::make_pair(".task_t.", Context.VoidPtrTy.withConst()),
         std::make_pair(StringRef(), QualType()) // __context with shared vars
     };
     ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,

Modified: cfe/trunk/test/OpenMP/task_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/task_codegen.cpp?rev=266722&r1=266721&r2=266722&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/task_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/task_codegen.cpp Tue Apr 19 04:10:27 2016
@@ -230,6 +230,19 @@ int main() {
     a = 4;
     c = 5;
   }
+// CHECK: [[ORIG_TASK_PTR:%.+]] = call i8* @__kmpc_omp_task_alloc([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i32 0, i64 32, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_T]]{{.*}}*)* [[TASK_ENTRY6:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[DESTRUCTORS_REF_PTR:%.+]] = getelementptr inbounds [[KMP_TASK_T]]{{.*}}* {{%.+}}, i32 0, i32 3
+// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** [[DESTRUCTORS_REF_PTR]]
+// CHECK: call i32 @__kmpc_omp_task([[IDENT_T]]* @{{.+}}, i32 [[GTID]], i8* [[ORIG_TASK_PTR]])
+#pragma omp task untied
+  {
+    S s1;
+#pragma omp task
+    a = 4;
+#pragma omp taskyield
+    s1 = S();
+#pragma omp taskwait
+  }
   return a;
 }
 // CHECK: define internal i32 [[TASK_ENTRY1]](i32, [[KMP_TASK_T]]{{.*}}* noalias)
@@ -240,16 +253,42 @@ int main() {
 // CHECK: store i32 10, i32* %{{.+}}
 
 // CHECK: define internal i32 [[TASK_ENTRY2]](i32, [[KMP_TASK_T]]{{.*}}* noalias)
-// CHECK: store i32 1, i32* [[A_PTR:@.+]]
+// CHECK: store i32 1, i32* [[A_PTR]]
 
 // CHECK: define internal i32 [[TASK_ENTRY3]](i32, [[KMP_TASK_T]]{{.*}}* noalias)
-// CHECK: store i32 2, i32* [[A_PTR:@.+]]
+// CHECK: store i32 2, i32* [[A_PTR]]
 
 // CHECK: define internal i32 [[TASK_ENTRY4]](i32, [[KMP_TASK_T]]{{.*}}* noalias)
-// CHECK: store i32 3, i32* [[A_PTR:@.+]]
+// CHECK: store i32 3, i32* [[A_PTR]]
 
 // CHECK: define internal i32 [[TASK_ENTRY5]](i32, [[KMP_TASK_T]]{{.*}}* noalias)
-// CHECK: store i32 4, i32* [[A_PTR:@.+]]
+// CHECK: store i32 4, i32* [[A_PTR]]
 // CHECK: store i32 5, i32* [[C_PTR:%.+]], align 128
+
+// CHECK: define internal i32
+// CHECK: store i32 4, i32* [[A_PTR]]
+
+// CHECK: define internal i32 [[TASK_ENTRY6]](i32, [[KMP_TASK_T]]{{.*}}* noalias)
+// CHECK: switch i32 %{{.+}}, label
+// CHECK: load i32*, i32** %
+// CHECK: store i32 1, i32* %
+// CHECK: call i32 @__kmpc_omp_task(%
+
+// CHECK: call i8* @__kmpc_omp_task_alloc(
+// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** %
+// CHECK: call i32 @__kmpc_omp_task(%
+// CHECK: load i32*, i32** %
+// CHECK: store i32 2, i32* %
+// CHECK: call i32 @__kmpc_omp_task(%
+
+// CHECK: call i32 @__kmpc_omp_taskyield(%
+// CHECK: load i32*, i32** %
+// CHECK: store i32 3, i32* %
+// CHECK: call i32 @__kmpc_omp_task(%
+
+// CHECK: call i32 @__kmpc_omp_taskwait(%
+// CHECK: load i32*, i32** %
+// CHECK: store i32 4, i32* %
+// CHECK: call i32 @__kmpc_omp_task(%
 #endif
 

Modified: cfe/trunk/test/OpenMP/task_firstprivate_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/task_firstprivate_codegen.cpp?rev=266722&r1=266721&r2=266722&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/task_firstprivate_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/task_firstprivate_codegen.cpp Tue Apr 19 04:10:27 2016
@@ -422,11 +422,11 @@ int main() {
 // CHECK: ret void
 
 // CHECK: define internal i32 [[TASK_ENTRY]](i32, [[KMP_TASK_TMAIN_TY]]* noalias)
-
-// CHECK: [[PRIV_T_VAR_ADDR:%.+]] = alloca i32*,
-// CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*,
-// CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*,
-// CHECK: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
+// CHECK: alloca i32*,
+// CHECK-DAG: [[PRIV_T_VAR_ADDR:%.+]] = alloca i32*,
+// CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*,
+// CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*,
+// CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
 // CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]],
 // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]],
 // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]])

Modified: cfe/trunk/test/OpenMP/task_private_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/task_private_codegen.cpp?rev=266722&r1=266721&r2=266722&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/task_private_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/task_private_codegen.cpp Tue Apr 19 04:10:27 2016
@@ -331,10 +331,11 @@ int main() {
 
 // CHECK: define internal i32 [[TASK_ENTRY]](i32, [[KMP_TASK_TMAIN_TY]]* noalias)
 
-// CHECK: [[PRIV_T_VAR_ADDR:%.+]] = alloca i32*,
-// CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*,
-// CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*,
-// CHECK: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
+// CHECK: alloca i32*,
+// CHECK-DAG: [[PRIV_T_VAR_ADDR:%.+]] = alloca i32*,
+// CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*,
+// CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*,
+// CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
 // CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]],
 // CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]],
 // CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]])




More information about the cfe-commits mailing list