r327654 - [OPENMP, NVPTX] Improve globalization of the variables captured by value.

Alexey Bataev via cfe-commits cfe-commits at lists.llvm.org
Thu Mar 15 11:10:54 PDT 2018


Author: abataev
Date: Thu Mar 15 11:10:54 2018
New Revision: 327654

URL: http://llvm.org/viewvc/llvm-project?rev=327654&view=rev
Log:
[OPENMP, NVPTX] Improve globalization of the variables captured by value.

If the variable is captured by value and the corresponding parameter in
the outlined function escapes its declaration context, this parameter
must be globalized. To globalize it we need to get the address of the
original parameter, load the value, store it to the global address and
use this global address instead of the original.

Patch improves globalization for parallel|teams regions + functions in
declare target regions.

Added:
    cfe/trunk/test/OpenMP/declare_target_codegen_globalization.cpp
Modified:
    cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
    cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
    cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp
    cfe/trunk/test/OpenMP/nvptx_teams_codegen.cpp

Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp?rev=327654&r1=327653&r2=327654&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp Thu Mar 15 11:10:54 2018
@@ -171,16 +171,23 @@ class CheckVarsEscapingDeclContext final
     : public ConstStmtVisitor<CheckVarsEscapingDeclContext> {
   CodeGenFunction &CGF;
   llvm::SetVector<const ValueDecl *> EscapedDecls;
+  llvm::SmallPtrSet<const Decl *, 4> EscapedParameters;
   llvm::SmallPtrSet<const ValueDecl *, 4> IgnoredDecls;
   bool AllEscaped = false;
   RecordDecl *GlobalizedRD = nullptr;
   llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields;
 
   void markAsEscaped(const ValueDecl *VD) {
-    if (IgnoredDecls.count(VD) ||
-        (CGF.CapturedStmtInfo &&
-         CGF.CapturedStmtInfo->lookup(cast<VarDecl>(VD))))
+    if (IgnoredDecls.count(VD))
       return;
+    // Variables captured by value must be globalized.
+    if (auto *CSI = CGF.CapturedStmtInfo) {
+      if (const FieldDecl *FD = CGF.CapturedStmtInfo->lookup(cast<VarDecl>(VD))) {
+        if (FD->getType()->isReferenceType())
+          return;
+        EscapedParameters.insert(VD);
+      }
+    }
     EscapedDecls.insert(VD);
   }
 
@@ -385,12 +392,15 @@ public:
         Visit(Child);
   }
 
+  /// Returns the record that handles all the escaped local variables and used
+  /// instead of their original storage.
   const RecordDecl *getGlobalizedRecord() {
     if (!GlobalizedRD)
       buildRecordForGlobalizedVars();
     return GlobalizedRD;
   }
 
+  /// Returns the field in the globalized record for the escaped variable.
   const FieldDecl *getFieldForGlobalizedVar(const ValueDecl *VD) const {
     assert(GlobalizedRD &&
            "Record for globalized variables must be generated already.");
@@ -400,9 +410,16 @@ public:
     return I->getSecond();
   }
 
+  /// Returns the list of the escaped local variables/parameters.
   ArrayRef<const ValueDecl *> getEscapedDecls() const {
     return EscapedDecls.getArrayRef();
   }
+
+  /// Checks if the escaped local variable is actually a parameter passed by
+  /// value.
+  const llvm::SmallPtrSetImpl<const Decl *> &getEscapedParameters() const {
+    return EscapedParameters;
+  }
 };
 } // anonymous namespace
 
@@ -614,58 +631,14 @@ void CGOpenMPRuntimeNVPTX::emitGenericEn
       createNVPTXRuntimeFunction(
           OMPRTL_NVPTX__kmpc_data_sharing_init_stack));
 
-  const auto I = FunctionGlobalizedDecls.find(CGF.CurFn);
-  if (I == FunctionGlobalizedDecls.end())
-    return;
-  const RecordDecl *GlobalizedVarsRecord = I->getSecond().first;
-  QualType RecTy = CGM.getContext().getRecordType(GlobalizedVarsRecord);
-
-  // Recover pointer to this function's global record. The runtime will
-  // handle the specifics of the allocation of the memory.
-  // Use actual memory size of the record including the padding
-  // for alignment purposes.
-  unsigned Alignment =
-      CGM.getContext().getTypeAlignInChars(RecTy).getQuantity();
-  unsigned GlobalRecordSize =
-      CGM.getContext().getTypeSizeInChars(RecTy).getQuantity();
-  GlobalRecordSize = llvm::alignTo(GlobalRecordSize, Alignment);
-  // TODO: allow the usage of shared memory to be controlled by
-  // the user, for now, default to global.
-  llvm::Value *GlobalRecordSizeArg[] = {
-      llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize),
-      CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
-  llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall(
-      createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_push_stack),
-      GlobalRecordSizeArg);
-  llvm::Value *GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
-      GlobalRecValue, CGF.ConvertTypeForMem(RecTy)->getPointerTo());
-  FunctionToGlobalRecPtr.try_emplace(CGF.CurFn, GlobalRecValue);
-
-  // Emit the "global alloca" which is a GEP from the global declaration record
-  // using the pointer returned by the runtime.
-  LValue Base = CGF.MakeNaturalAlignPointeeAddrLValue(GlobalRecCastAddr, RecTy);
-  auto &Res = I->getSecond().second;
-  for (auto &Rec : Res) {
-    const FieldDecl *FD = Rec.second.first;
-    LValue VarAddr = CGF.EmitLValueForField(Base, FD);
-    Rec.second.second = VarAddr.getAddress();
-  }
+  emitGenericVarsProlog(CGF, WST.Loc);
 }
 
 void CGOpenMPRuntimeNVPTX::emitGenericEntryFooter(CodeGenFunction &CGF,
                                                   EntryFunctionState &EST) {
-  const auto I = FunctionGlobalizedDecls.find(CGF.CurFn);
-  if (I != FunctionGlobalizedDecls.end()) {
-    if (!CGF.HaveInsertPoint())
-      return;
-    auto I = FunctionToGlobalRecPtr.find(CGF.CurFn);
-    if (I != FunctionToGlobalRecPtr.end()) {
-      llvm::Value *Args[] = {I->getSecond()};
-      CGF.EmitRuntimeCall(
-          createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
-          Args);
-    }
-  }
+  emitGenericVarsEpilog(CGF);
+  if (!CGF.HaveInsertPoint())
+    return;
 
   if (!EST.ExitBB)
     EST.ExitBB = CGF.createBasicBlock(".exit");
@@ -1207,6 +1180,24 @@ void CGOpenMPRuntimeNVPTX::emitNumTeamsC
 llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction(
     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
+  SourceLocation Loc = D.getLocStart();
+
+  // Emit target region as a standalone region.
+  class NVPTXPrePostActionTy : public PrePostActionTy {
+    SourceLocation &Loc;
+
+  public:
+    NVPTXPrePostActionTy(SourceLocation &Loc) : Loc(Loc) {}
+    void Enter(CodeGenFunction &CGF) override {
+      static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime())
+          .emitGenericVarsProlog(CGF, Loc);
+    }
+    void Exit(CodeGenFunction &CGF) override {
+      static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime())
+          .emitGenericVarsEpilog(CGF);
+    }
+  } Action(Loc);
+  CodeGen.setAction(Action);
   auto *OutlinedFun =
       cast<llvm::Function>(CGOpenMPRuntime::emitParallelOutlinedFunction(
           D, ThreadIDVar, InnermostKind, CodeGen));
@@ -1222,7 +1213,24 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitP
 llvm::Value *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction(
     const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
     OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
+  SourceLocation Loc = D.getLocStart();
 
+  // Emit target region as a standalone region.
+  class NVPTXPrePostActionTy : public PrePostActionTy {
+    SourceLocation &Loc;
+
+  public:
+    NVPTXPrePostActionTy(SourceLocation &Loc) : Loc(Loc) {}
+    void Enter(CodeGenFunction &CGF) override {
+      static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime())
+          .emitGenericVarsProlog(CGF, Loc);
+    }
+    void Exit(CodeGenFunction &CGF) override {
+      static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime())
+          .emitGenericVarsEpilog(CGF);
+    }
+  } Action(Loc);
+  CodeGen.setAction(Action);
   llvm::Value *OutlinedFunVal = CGOpenMPRuntime::emitTeamsOutlinedFunction(
       D, ThreadIDVar, InnermostKind, CodeGen);
   llvm::Function *OutlinedFun = cast<llvm::Function>(OutlinedFunVal);
@@ -1233,6 +1241,73 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitT
   return OutlinedFun;
 }
 
+void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF,
+                                                 SourceLocation Loc) {
+  CGBuilderTy &Bld = CGF.Builder;
+
+  const auto I = FunctionGlobalizedDecls.find(CGF.CurFn);
+  if (I == FunctionGlobalizedDecls.end())
+    return;
+  const RecordDecl *GlobalizedVarsRecord = I->getSecond().GlobalRecord;
+  QualType RecTy = CGM.getContext().getRecordType(GlobalizedVarsRecord);
+
+  // Recover pointer to this function's global record. The runtime will
+  // handle the specifics of the allocation of the memory.
+  // Use actual memory size of the record including the padding
+  // for alignment purposes.
+  unsigned Alignment =
+      CGM.getContext().getTypeAlignInChars(RecTy).getQuantity();
+  unsigned GlobalRecordSize =
+      CGM.getContext().getTypeSizeInChars(RecTy).getQuantity();
+  GlobalRecordSize = llvm::alignTo(GlobalRecordSize, Alignment);
+  // TODO: allow the usage of shared memory to be controlled by
+  // the user, for now, default to global.
+  llvm::Value *GlobalRecordSizeArg[] = {
+      llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize),
+      CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
+  llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall(
+      createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_push_stack),
+      GlobalRecordSizeArg);
+  llvm::Value *GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
+      GlobalRecValue, CGF.ConvertTypeForMem(RecTy)->getPointerTo());
+  LValue Base = CGF.MakeNaturalAlignPointeeAddrLValue(GlobalRecCastAddr, RecTy);
+  I->getSecond().GlobalRecordAddr = GlobalRecValue;
+
+  // Emit the "global alloca" which is a GEP from the global declaration record
+  // using the pointer returned by the runtime.
+  for (auto &Rec : I->getSecond().LocalVarData) {
+    bool EscapedParam = I->getSecond().EscapedParameters.count(Rec.first);
+    llvm::Value *ParValue;
+    if (EscapedParam) {
+      const auto *VD = cast<VarDecl>(Rec.first);
+      LValue ParLVal =
+          CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
+      ParValue = CGF.EmitLoadOfScalar(ParLVal, Loc);
+    }
+    const FieldDecl *FD = Rec.second.first;
+    LValue VarAddr = CGF.EmitLValueForField(Base, FD);
+    Rec.second.second = VarAddr.getAddress();
+    if (EscapedParam) {
+      const auto *VD = cast<VarDecl>(Rec.first);
+      CGF.EmitStoreOfScalar(ParValue, VarAddr);
+      I->getSecond().MappedParams->setVarAddr(CGF, VD, VarAddr.getAddress());
+    }
+  }
+  I->getSecond().MappedParams->apply(CGF);
+}
+
+void CGOpenMPRuntimeNVPTX::emitGenericVarsEpilog(CodeGenFunction &CGF) {
+  const auto I = FunctionGlobalizedDecls.find(CGF.CurFn);
+  if (I != FunctionGlobalizedDecls.end() && I->getSecond().GlobalRecordAddr) {
+    I->getSecond().MappedParams->restore(CGF);
+    if (!CGF.HaveInsertPoint())
+      return;
+    CGF.EmitRuntimeCall(
+        createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
+        I->getSecond().GlobalRecordAddr);
+  }
+}
+
 void CGOpenMPRuntimeNVPTX::emitTeamsCall(CodeGenFunction &CGF,
                                          const OMPExecutableDirective &D,
                                          SourceLocation Loc,
@@ -1317,7 +1392,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericPa
         llvm::Value *PtrV = Bld.CreateBitCast(V, CGF.VoidPtrTy);
         CGF.EmitStoreOfScalar(PtrV, Dst, /*Volatile=*/false,
                               Ctx.getPointerType(Ctx.VoidPtrTy));
-        Idx++;
+        ++Idx;
       }
     }
 
@@ -2750,8 +2825,8 @@ void CGOpenMPRuntimeNVPTX::emitOutlinedF
       continue;
     }
     llvm::Value *TargetArg = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
-        NativeArg, NativeArg->getType()->getPointerElementType()->getPointerTo(
-                       /*AddrSpace=*/0));
+        NativeArg,
+        NativeArg->getType()->getPointerElementType()->getPointerTo());
     TargetArgs.emplace_back(
         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TargetArg, TargetType));
   }
@@ -2788,7 +2863,7 @@ llvm::Function *CGOpenMPRuntimeNVPTX::cr
   auto *Fn = llvm::Function::Create(
       CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
       OutlinedParallelFn->getName() + "_wrapper", &CGM.getModule());
-  CGM.SetInternalFunctionAttributes(/*D=*/GlobalDecl(), Fn, CGFI);
+  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
   Fn->setLinkage(llvm::GlobalValue::InternalLinkage);
 
   CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
@@ -2854,6 +2929,7 @@ void CGOpenMPRuntimeNVPTX::emitFunctionP
          "Function is registered already.");
   SmallVector<const ValueDecl *, 4> IgnoredDecls;
   const Stmt *Body = nullptr;
+  bool NeedToDelayGlobalization = false;
   if (const auto *FD = dyn_cast<FunctionDecl>(D)) {
     Body = FD->getBody();
   } else if (const auto *BD = dyn_cast<BlockDecl>(D)) {
@@ -2861,6 +2937,7 @@ void CGOpenMPRuntimeNVPTX::emitFunctionP
   } else if (const auto *CD = dyn_cast<CapturedDecl>(D)) {
     Body = CD->getBody();
     if (CGF.CapturedStmtInfo->getKind() == CR_OpenMP) {
+      NeedToDelayGlobalization = true;
       if (const auto *CS = dyn_cast<CapturedStmt>(Body)) {
         IgnoredDecls.reserve(CS->capture_size());
         for (const auto &Capture : CS->captures())
@@ -2876,14 +2953,29 @@ void CGOpenMPRuntimeNVPTX::emitFunctionP
   const RecordDecl *GlobalizedVarsRecord = VarChecker.getGlobalizedRecord();
   if (!GlobalizedVarsRecord)
     return;
-  auto &Res =
-      FunctionGlobalizedDecls
-          .try_emplace(CGF.CurFn, GlobalizedVarsRecord, DeclToAddrMapTy())
-          .first->getSecond()
-          .second;
+  auto I = FunctionGlobalizedDecls.try_emplace(CGF.CurFn).first;
+  I->getSecond().MappedParams =
+      llvm::make_unique<CodeGenFunction::OMPMapVars>();
+  I->getSecond().GlobalRecord = GlobalizedVarsRecord;
+  I->getSecond().EscapedParameters.insert(
+      VarChecker.getEscapedParameters().begin(),
+      VarChecker.getEscapedParameters().end());
+  DeclToAddrMapTy &Data = I->getSecond().LocalVarData;
   for (const ValueDecl *VD : VarChecker.getEscapedDecls()) {
     const FieldDecl *FD = VarChecker.getFieldForGlobalizedVar(VD);
-    Res.insert(std::make_pair(VD, std::make_pair(FD, Address::invalid())));
+    Data.insert(std::make_pair(VD, std::make_pair(FD, Address::invalid())));
+  }
+  if (!NeedToDelayGlobalization) {
+    emitGenericVarsProlog(CGF, D->getLocStart());
+    struct GlobalizationScope final : EHScopeStack::Cleanup {
+      GlobalizationScope() = default;
+
+      void Emit(CodeGenFunction &CGF, Flags flags) override {
+        static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime())
+            .emitGenericVarsEpilog(CGF);
+      }
+    };
+    CGF.EHStack.pushCleanup<GlobalizationScope>(NormalAndEHCleanup);
   }
 }
 
@@ -2892,14 +2984,13 @@ Address CGOpenMPRuntimeNVPTX::getAddress
   auto I = FunctionGlobalizedDecls.find(CGF.CurFn);
   if (I == FunctionGlobalizedDecls.end())
     return Address::invalid();
-  auto VDI = I->getSecond().second.find(VD);
-  if (VDI == I->getSecond().second.end())
+  auto VDI = I->getSecond().LocalVarData.find(VD);
+  if (VDI == I->getSecond().LocalVarData.end())
     return Address::invalid();
   return VDI->second.second;
 }
 
 void CGOpenMPRuntimeNVPTX::functionFinished(CodeGenFunction &CGF) {
-  FunctionToGlobalRecPtr.erase(CGF.CurFn);
   FunctionGlobalizedDecls.erase(CGF.CurFn);
   CGOpenMPRuntime::functionFinished(CGF);
 }

Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h?rev=327654&r1=327653&r2=327654&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h Thu Mar 15 11:10:54 2018
@@ -61,6 +61,12 @@ private:
   /// function.
   void emitGenericEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST);
 
+  /// Helper for generic variables globalization prolog.
+  void emitGenericVarsProlog(CodeGenFunction &CGF, SourceLocation Loc);
+
+  /// Helper for generic variables globalization epilog.
+  void emitGenericVarsEpilog(CodeGenFunction &CGF);
+
   /// \brief Helper for Spmd mode target directive's entry function.
   void emitSpmdEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST,
                            const OMPExecutableDirective &D);
@@ -332,13 +338,18 @@ private:
   /// The map of local variables to their addresses in the global memory.
   using DeclToAddrMapTy = llvm::MapVector<const Decl *,
       std::pair<const FieldDecl *, Address>>;
+  /// Set of the parameters passed by value escaping OpenMP context.
+  using EscapedParamsTy = llvm::SmallPtrSet<const Decl *, 4>;
+  struct FunctionData {
+    DeclToAddrMapTy LocalVarData;
+    const RecordDecl *GlobalRecord = nullptr;
+    llvm::Value *GlobalRecordAddr = nullptr;
+    EscapedParamsTy EscapedParameters;
+    std::unique_ptr<CodeGenFunction::OMPMapVars> MappedParams;
+  };
   /// Maps the function to the list of the globalized variables with their
   /// addresses.
-  llvm::DenseMap<llvm::Function *,
-                 std::pair<const RecordDecl *, DeclToAddrMapTy>>
-      FunctionGlobalizedDecls;
-  /// Map from function to global record pointer.
-  llvm::DenseMap<llvm::Function *, llvm::Value *> FunctionToGlobalRecPtr;
+  llvm::SmallDenseMap<llvm::Function *, FunctionData> FunctionGlobalizedDecls;
 };
 
 } // CodeGen namespace.

Modified: cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp?rev=327654&r1=327653&r2=327654&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp Thu Mar 15 11:10:54 2018
@@ -1262,7 +1262,7 @@ static void emitEmptyBoundParameters(Cod
 
 void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
   // Emit parallel region as a standalone region.
-  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
     OMPPrivateScope PrivateScope(CGF);
     bool Copyins = CGF.EmitOMPCopyinClause(S);
     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
@@ -1277,6 +1277,7 @@ void CodeGenFunction::EmitOMPParallelDir
     CGF.EmitOMPPrivateClause(S, PrivateScope);
     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
     (void)PrivateScope.Privatize();
+    Action.Enter(CGF);
     CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt());
     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
   };
@@ -2100,7 +2101,8 @@ emitInnerParallelForWhenCombined(CodeGen
                                  const OMPLoopDirective &S,
                                  CodeGenFunction::JumpDest LoopExit) {
   auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
-                                         PrePostActionTy &) {
+                                         PrePostActionTy &Action) {
+    Action.Enter(CGF);
     bool HasCancel = false;
     if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
       if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S))
@@ -2682,7 +2684,8 @@ void CodeGenFunction::EmitOMPParallelFor
     const OMPParallelForDirective &S) {
   // Emit directive as a combined directive that consists of two implicit
   // directives: 'parallel' with 'for' directive.
-  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+    Action.Enter(CGF);
     OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel());
     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
                                emitDispatchForLoopBounds);
@@ -2695,7 +2698,8 @@ void CodeGenFunction::EmitOMPParallelFor
     const OMPParallelForSimdDirective &S) {
   // Emit directive as a combined directive that consists of two implicit
   // directives: 'parallel' with 'for' directive.
-  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+    Action.Enter(CGF);
     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
                                emitDispatchForLoopBounds);
   };
@@ -2707,7 +2711,8 @@ void CodeGenFunction::EmitOMPParallelSec
     const OMPParallelSectionsDirective &S) {
   // Emit directive as a combined directive that consists of two implicit
   // directives: 'parallel' with 'sections' directive.
-  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+    Action.Enter(CGF);
     CGF.EmitSections(S);
   };
   emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
@@ -4033,12 +4038,13 @@ static void emitCommonOMPTeamsDirective(
 
 void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
   // Emit teams region as a standalone region.
-  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
     OMPPrivateScope PrivateScope(CGF);
     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
     CGF.EmitOMPPrivateClause(S, PrivateScope);
     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
     (void)PrivateScope.Privatize();
+    Action.Enter(CGF);
     CGF.EmitStmt(S.getCapturedStmt(OMPD_teams)->getCapturedStmt());
     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
   };
@@ -4099,10 +4105,11 @@ emitTargetTeamsDistributeRegion(CodeGenF
 
   // Emit teams region as a standalone region.
   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
-                                            PrePostActionTy &) {
+                                            PrePostActionTy &Action) {
     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
     (void)PrivateScope.Privatize();
+    Action.Enter(CGF);
     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
                                                     CodeGenDistribute);
     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
@@ -4144,10 +4151,11 @@ static void emitTargetTeamsDistributeSim
 
   // Emit teams region as a standalone region.
   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
-                                            PrePostActionTy &) {
+                                            PrePostActionTy &Action) {
     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
     (void)PrivateScope.Privatize();
+    Action.Enter(CGF);
     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
                                                     CodeGenDistribute);
     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
@@ -4188,10 +4196,11 @@ void CodeGenFunction::EmitOMPTeamsDistri
 
   // Emit teams region as a standalone region.
   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
-                                            PrePostActionTy &) {
+                                            PrePostActionTy &Action) {
     OMPPrivateScope PrivateScope(CGF);
     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
     (void)PrivateScope.Privatize();
+    Action.Enter(CGF);
     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
                                                     CodeGenDistribute);
     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
@@ -4209,10 +4218,11 @@ void CodeGenFunction::EmitOMPTeamsDistri
 
   // Emit teams region as a standalone region.
   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
-                                            PrePostActionTy &) {
+                                            PrePostActionTy &Action) {
     OMPPrivateScope PrivateScope(CGF);
     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
     (void)PrivateScope.Privatize();
+    Action.Enter(CGF);
     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd,
                                                     CodeGenDistribute);
     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
@@ -4231,10 +4241,11 @@ void CodeGenFunction::EmitOMPTeamsDistri
 
   // Emit teams region as a standalone region.
   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
-                                            PrePostActionTy &) {
+                                            PrePostActionTy &Action) {
     OMPPrivateScope PrivateScope(CGF);
     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
     (void)PrivateScope.Privatize();
+    Action.Enter(CGF);
     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
                                                     CodeGenDistribute);
     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
@@ -4253,10 +4264,11 @@ void CodeGenFunction::EmitOMPTeamsDistri
 
   // Emit teams region as a standalone region.
   auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
-                                            PrePostActionTy &) {
+                                            PrePostActionTy &Action) {
     OMPPrivateScope PrivateScope(CGF);
     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
     (void)PrivateScope.Privatize();
+    Action.Enter(CGF);
     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
         CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
@@ -4277,10 +4289,11 @@ static void emitTargetTeamsDistributePar
 
   // Emit teams region as a standalone region.
   auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
-                                                 PrePostActionTy &) {
+                                                 PrePostActionTy &Action) {
     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
     (void)PrivateScope.Privatize();
+    Action.Enter(CGF);
     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
         CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
@@ -4328,10 +4341,11 @@ static void emitTargetTeamsDistributePar
 
   // Emit teams region as a standalone region.
   auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
-                                                 PrePostActionTy &) {
+                                                 PrePostActionTy &Action) {
     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
     (void)PrivateScope.Privatize();
+    Action.Enter(CGF);
     CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
         CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
@@ -4599,12 +4613,13 @@ static void emitTargetParallelRegion(Cod
   // Get the captured statement associated with the 'parallel' region.
   auto *CS = S.getCapturedStmt(OMPD_parallel);
   Action.Enter(CGF);
-  auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &) {
+  auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
     CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
     (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
     CGF.EmitOMPPrivateClause(S, PrivateScope);
     CGF.EmitOMPReductionClauseInit(S, PrivateScope);
     (void)PrivateScope.Privatize();
+    Action.Enter(CGF);
     // TODO: Add support for clauses.
     CGF.EmitStmt(CS->getCapturedStmt());
     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
@@ -4643,7 +4658,8 @@ static void emitTargetParallelForRegion(
   Action.Enter(CGF);
   // Emit directive as a combined directive that consists of two implicit
   // directives: 'parallel' with 'for' directive.
-  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+    Action.Enter(CGF);
     CodeGenFunction::OMPCancelStackRAII CancelRegion(
         CGF, OMPD_target_parallel_for, S.hasCancel());
     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
@@ -4683,7 +4699,8 @@ emitTargetParallelForSimdRegion(CodeGenF
   Action.Enter(CGF);
   // Emit directive as a combined directive that consists of two implicit
   // directives: 'parallel' with 'for' directive.
-  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+  auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+    Action.Enter(CGF);
     CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
                                emitDispatchForLoopBounds);
   };

Added: cfe/trunk/test/OpenMP/declare_target_codegen_globalization.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/declare_target_codegen_globalization.cpp?rev=327654&view=auto
==============================================================================
--- cfe/trunk/test/OpenMP/declare_target_codegen_globalization.cpp (added)
+++ cfe/trunk/test/OpenMP/declare_target_codegen_globalization.cpp Thu Mar 15 11:10:54 2018
@@ -0,0 +1,44 @@
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
+// expected-no-diagnostics
+
+int foo(int &a) { return a; }
+
+int bar() {
+  int a;
+  return foo(a);
+}
+
+// CHECK: define void @__omp_offloading_{{.*}}maini1{{.*}}_l[[@LINE+5]](i32* dereferenceable{{.*}})
+// CHECK-NOT: @__kmpc_data_sharing_push_stack
+
+int maini1() {
+  int a;
+#pragma omp target parallel map(from:a)
+  {
+    int b;
+    a = foo(b) + bar();
+  }
+  return a;
+}
+
+// parallel region
+// CHECK: define {{.*}}void @{{.*}}(i32* noalias {{.*}}, i32* noalias {{.*}}, i32* dereferenceable{{.*}})
+// CHECK: [[RES:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i64 4, i16 0)
+// CHECK: [[GLOBALS:%.+]] = bitcast i8* [[RES]] to [[GLOBAL_ST:%struct[.].*]]*
+// CHECK: [[B_ADDR:%.+]] = getelementptr inbounds [[GLOBAL_ST]], [[GLOBAL_ST]]* [[GLOBALS]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK: call {{.*}}[[FOO:@.*foo.*]](i32* dereferenceable{{.*}} [[B_ADDR]])
+// CHECK: call {{.*}}[[BAR:@.*bar.*]]()
+// CHECK: call void @__kmpc_data_sharing_pop_stack(i8* [[RES]])
+// CHECK: ret void
+
+// CHECK: define {{.*}}[[FOO]](i32* dereferenceable{{.*}})
+// CHECK-NOT: @__kmpc_data_sharing_push_stack
+
+// CHECK: define {{.*}}[[BAR]]()
+// CHECK: [[RES:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i64 4, i16 0)
+// CHECK: [[GLOBALS:%.+]] = bitcast i8* [[RES]] to [[GLOBAL_ST:%struct[.].*]]*
+// CHECK: [[A_ADDR:%.+]] = getelementptr inbounds [[GLOBAL_ST]], [[GLOBAL_ST]]* [[GLOBALS]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK: call {{.*}}[[FOO]](i32* dereferenceable{{.*}} [[A_ADDR]])
+// CHECK: call void @__kmpc_data_sharing_pop_stack(i8* [[RES]])
+// CHECK: ret i32

Modified: cfe/trunk/test/OpenMP/nvptx_teams_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_teams_codegen.cpp?rev=327654&r1=327653&r2=327654&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_teams_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_teams_codegen.cpp Thu Mar 15 11:10:54 2018
@@ -36,8 +36,12 @@ int main (int argc, char **argv) {
 // CK1:  store {{.+}} 0, {{.+}},
 // CK1:  store i{{[0-9]+}} [[ARGC]], i{{[0-9]+}}* [[ARGCADDR]],
 // CK1-64:  [[CONV:%.+]] = bitcast i{{[0-9]+}}* [[ARGCADDR]] to i{{[0-9]+}}*
-// CK1-64:  store i{{[0-9]+}}* [[CONV]], i{{[0-9]+}}** [[ARGCADDR_PTR]],
-// CK1-32:  store i{{[0-9]+}}* [[ARGCADDR]], i{{[0-9]+}}** [[ARGCADDR_PTR]],
+// CK1:  call i8* @__kmpc_data_sharing_push_stack(i{{[0-9]+}} 4, i16 0)
+// CK1-64:  [[ARG:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[CONV]]
+// CK1-32:  [[ARG:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[ARGCADDR]]
+// CK1:  [[ARGCADDR:%.+]] = getelementptr inbounds %struct.{{.*}}, %struct.{{.*}}* %{{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CK1:  store i{{[0-9]+}} [[ARG]], i{{[0-9]+}}* [[ARGCADDR]],
+// CK1:  store i{{[0-9]+}}* [[ARGCADDR]], i{{[0-9]+}}** [[ARGCADDR_PTR]],
 // CK1:  [[ARGCADDR_PTR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[ARGCADDR_PTR]],
 // CK1:  store i{{[0-9]+}} 0, i{{[0-9]+}}* [[ARGCADDR_PTR_REF]],
 // CK1-NOT: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_teams(
@@ -49,6 +53,10 @@ int main (int argc, char **argv) {
 // CK1: [[ARGCADDR_PTR:%.+]] = alloca i{{.+}}***,
 // CK1: [[ARGCADDR:%.+]] = alloca i{{.+}}**,
 // CK1: store i{{.+}}** [[ARGC]], i{{.+}}*** [[ARGCADDR]]
+// CK1: call i8* @__kmpc_data_sharing_push_stack(i{{[0-9]+}} {{4|8}}, i16 0)
+// CK1: [[ARG:%.+]] = load i{{[0-9]+}}**, i{{[0-9]+}}*** [[ARGCADDR]]
+// CK1: [[ARGCADDR:%.+]] = getelementptr inbounds %struct.{{.*}}, %struct.{{.*}}* %{{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CK1: store i{{[0-9]+}}** [[ARG]], i{{[0-9]+}}*** [[ARGCADDR]],
 // CK1: store i8*** [[ARGCADDR]], i8**** [[ARGCADDR_PTR]],
 // CK1: [[ARGCADDR_PTR_REF:%.+]] = load i{{.+}}**, i{{.+}}*** [[ARGCADDR_PTR]],
 // CK1: store i{{[0-9]+}}** null, i{{[0-9]+}}*** [[ARGCADDR_PTR_REF]],
@@ -104,8 +112,12 @@ int main (int argc, char **argv) {
 // CK2-64: [[ACONV:%.+]] = bitcast i64* [[AADDR]] to i32*
 // CK2-64: [[BCONV:%.+]] = bitcast i64* [[BADDR]] to i32*
 // CK2-64: [[CONV:%.+]] = bitcast i64* [[ARGCADDR]] to i32*
-// CK2-64:  store i{{[0-9]+}}* [[CONV]], i{{[0-9]+}}** [[ARGCADDR_PTR]],
-// CK2-32:  store i{{[0-9]+}}* [[ARGCADDR]], i{{[0-9]+}}** [[ARGCADDR_PTR]],
+// CK2:  call i8* @__kmpc_data_sharing_push_stack(i{{[0-9]+}} 4, i16 0)
+// CK2-64:  [[ARG:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[CONV]]
+// CK2-32:  [[ARG:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[ARGCADDR]]
+// CK2:  [[ARGCADDR:%.+]] = getelementptr inbounds %struct.{{.*}}, %struct.{{.*}}* %{{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CK2:  store i{{[0-9]+}} [[ARG]], i{{[0-9]+}}* [[ARGCADDR]],
+// CK2:  store i{{[0-9]+}}* [[ARGCADDR]], i{{[0-9]+}}** [[ARGCADDR_PTR]],
 // CK2:  [[ARGCADDR_PTR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[ARGCADDR_PTR]],
 // CK2: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[ARGCADDR_PTR_REF]],
 // CK2-NOT:  {{.+}} = call i32 @__kmpc_push_num_teams(
@@ -121,6 +133,10 @@ int main (int argc, char **argv) {
 // CK2: store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[AADDR]],
 // CK2: store i{{[0-9]+}} [[B_IN]], i{{[0-9]+}}* [[BADDR]],
 // CK2: store i{{[0-9]+}}** [[ARGC]], i{{[0-9]+}}*** [[ARGCADDR]],
+// CK2: call i8* @__kmpc_data_sharing_push_stack(i{{[0-9]+}} {{4|8}}, i16 0)
+// CK2: [[ARG:%.+]] = load i{{[0-9]+}}**, i{{[0-9]+}}*** [[ARGCADDR]]
+// CK2: [[ARGCADDR:%.+]] = getelementptr inbounds %struct.{{.*}}, %struct.{{.*}}* %{{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CK2: store i{{[0-9]+}}** [[ARG]], i{{[0-9]+}}*** [[ARGCADDR]],
 // CK2: store i{{[0-9]+}}*** [[ARGCADDR]], i{{[0-9]+}}**** [[ARGCADDR_PTR]],
 // CK2: [[ARGCADDR_PTR_REF:%.+]] = load i{{[0-9]+}}***, i{{[0-9]+}}**** [[ARGCADDR_PTR]],
 // CK2: store i{{[0-9]+}}** null, i{{[0-9]+}}*** [[ARGCADDR_PTR_REF]],




More information about the cfe-commits mailing list