r332016 - [OPENMP, NVPTX] Initial support for L2 parallelism in SPMD mode.

Thu May 10 11:32:08 PDT 2018

Author: abataev
Date: Thu May 10 11:32:08 2018
New Revision: 332016

URL: http://llvm.org/viewvc/llvm-project?rev=332016&view=rev
Log:
[OPENMP, NVPTX] Initial support for L2 parallelism in SPMD mode.

Added initial support for L2 parallelism in SPMD mode. Note, though,
that the orphaned parallel directives are not currently supported in
SPMD mode.

Modified:
    cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
    cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
    cfe/trunk/test/OpenMP/nvptx_parallel_codegen.cpp
    cfe/trunk/test/OpenMP/nvptx_target_teams_codegen.cpp
    cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp
    cfe/trunk/test/OpenMP/target_parallel_debug_codegen.cpp

Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp?rev=332016&r1=332015&r2=332016&view=diff
==============================================================================

--- cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp Thu May 10 11:32:08 2018
@@ -140,13 +140,15 @@ public:
 /// to emit optimized code.
 class ExecutionModeRAII {
 private:
-  bool SavedMode;
-  bool &Mode;
+  CGOpenMPRuntimeNVPTX::ExecutionMode SavedMode;
+  CGOpenMPRuntimeNVPTX::ExecutionMode &Mode;
 
 public:
-  ExecutionModeRAII(bool &Mode, bool NewMode) : Mode(Mode) {
+  ExecutionModeRAII(CGOpenMPRuntimeNVPTX::ExecutionMode &Mode, bool IsSPMD)
+      : Mode(Mode) {
     SavedMode = Mode;
-    Mode = NewMode;
+    Mode = IsSPMD ? CGOpenMPRuntimeNVPTX::EM_SPMD
+                  : CGOpenMPRuntimeNVPTX::EM_NonSPMD;
   }
   ~ExecutionModeRAII() { Mode = SavedMode; }
 };
@@ -579,8 +581,9 @@ void CGOpenMPRuntimeNVPTX::WorkerFunctio
   WorkerFn->setDoesNotRecurse();
 }
 
-bool CGOpenMPRuntimeNVPTX::isInSpmdExecutionMode() const {
-  return IsInSPMDExecutionMode;
+CGOpenMPRuntimeNVPTX::ExecutionMode
+CGOpenMPRuntimeNVPTX::getExecutionMode() const {
+  return CurrentExecutionMode;
 }
 
 static CGOpenMPRuntimeNVPTX::DataSharingMode
@@ -589,34 +592,96 @@ getDataSharingMode(CodeGenModule &CGM) {
                                           : CGOpenMPRuntimeNVPTX::Generic;
 }
 
-/// Check for inner (nested) SPMD construct, if any
-static bool hasNestedSPMDDirective(const OMPExecutableDirective &D) {
-  const auto *CS = D.getCapturedStmt(OMPD_target);
-  const auto *Body = CS->getCapturedStmt()->IgnoreContainers();
-  const Stmt *ChildStmt = nullptr;
+/// Checks if the \p Body is the \a CompoundStmt and returns its child statement
+/// iff there is only one.
+static const Stmt *getSingleCompoundChild(const Stmt *Body) {
   if (const auto *C = dyn_cast<CompoundStmt>(Body))
     if (C->size() == 1)
-      ChildStmt = C->body_front();
-  if (!ChildStmt)
-    return false;
+      return C->body_front();
+  return Body;
+}
+
+/// Check if the parallel directive has an 'if' clause with non-constant or
+/// false condition.
+static bool hasParallelIfClause(ASTContext &Ctx,
+                                const OMPExecutableDirective &D) {
+  for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
+    OpenMPDirectiveKind NameModifier = C->getNameModifier();
+    if (NameModifier != OMPD_parallel && NameModifier != OMPD_unknown)
+      continue;
+    const Expr *Cond = C->getCondition();
+    bool Result;
+    if (!Cond->EvaluateAsBooleanCondition(Result, Ctx) || !Result)
+      return true;
+  }
+  return false;
+}
+
+/// Check for inner (nested) SPMD construct, if any
+static bool hasNestedSPMDDirective(ASTContext &Ctx,
+                                   const OMPExecutableDirective &D) {
+  const auto *CS = D.getInnermostCapturedStmt();
+  const auto *Body = CS->getCapturedStmt()->IgnoreContainers();
+  const Stmt *ChildStmt = getSingleCompoundChild(Body);
 
   if (const auto *NestedDir = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
     OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
-    // TODO: add further analysis for inner teams|distribute directives, if any.
     switch (D.getDirectiveKind()) {
     case OMPD_target:
-      return (isOpenMPParallelDirective(DKind) &&
-              !isOpenMPTeamsDirective(DKind) &&
-              !isOpenMPDistributeDirective(DKind)) ||
-             isOpenMPSimdDirective(DKind) ||
-             DKind == OMPD_teams_distribute_parallel_for;
+      if ((isOpenMPParallelDirective(DKind) &&
+           !hasParallelIfClause(Ctx, *NestedDir)) ||
+          isOpenMPSimdDirective(DKind))
+        return true;
+      if (DKind == OMPD_teams || DKind == OMPD_teams_distribute) {
+        Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers();
+        if (!Body)
+          return false;
+        ChildStmt = getSingleCompoundChild(Body);
+        if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
+          DKind = NND->getDirectiveKind();
+          if ((isOpenMPParallelDirective(DKind) &&
+               !hasParallelIfClause(Ctx, *NND)) ||
+              isOpenMPSimdDirective(DKind))
+            return true;
+          if (DKind == OMPD_distribute) {
+            Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers();
+            if (!Body)
+              return false;
+            ChildStmt = getSingleCompoundChild(Body);
+            if (!ChildStmt)
+              return false;
+            if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
+              DKind = NND->getDirectiveKind();
+              return (isOpenMPParallelDirective(DKind) &&
+                      !hasParallelIfClause(Ctx, *NND)) ||
+                     isOpenMPSimdDirective(DKind);
+            }
+          }
+        }
+      }
+      return false;
     case OMPD_target_teams:
-      return (isOpenMPParallelDirective(DKind) &&
-              !isOpenMPDistributeDirective(DKind)) ||
-             isOpenMPSimdDirective(DKind) ||
-             DKind == OMPD_distribute_parallel_for;
+      if ((isOpenMPParallelDirective(DKind) &&
+           !hasParallelIfClause(Ctx, *NestedDir)) ||
+          isOpenMPSimdDirective(DKind))
+        return true;
+      if (DKind == OMPD_distribute) {
+        Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers();
+        if (!Body)
+          return false;
+        ChildStmt = getSingleCompoundChild(Body);
+        if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
+          DKind = NND->getDirectiveKind();
+          return (isOpenMPParallelDirective(DKind) &&
+                  !hasParallelIfClause(Ctx, *NND)) ||
+                 isOpenMPSimdDirective(DKind);
+        }
+      }
+      return false;
     case OMPD_target_teams_distribute:
-      return isOpenMPParallelDirective(DKind) || isOpenMPSimdDirective(DKind);
+      return (isOpenMPParallelDirective(DKind) &&
+              !hasParallelIfClause(Ctx, *NestedDir)) ||
+             isOpenMPSimdDirective(DKind);
     case OMPD_target_simd:
     case OMPD_target_parallel:
     case OMPD_target_parallel_for:
@@ -674,20 +739,22 @@ static bool hasNestedSPMDDirective(const
   return false;
 }
 
-static bool supportsSPMDExecutionMode(const OMPExecutableDirective &D) {
+static bool supportsSPMDExecutionMode(ASTContext &Ctx,
+                                      const OMPExecutableDirective &D) {
   OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
   switch (DirectiveKind) {
   case OMPD_target:
   case OMPD_target_teams:
   case OMPD_target_teams_distribute:
-    return hasNestedSPMDDirective(D);
-  case OMPD_target_simd:
+    return hasNestedSPMDDirective(Ctx, D);
   case OMPD_target_parallel:
   case OMPD_target_parallel_for:
   case OMPD_target_parallel_for_simd:
-  case OMPD_target_teams_distribute_simd:
   case OMPD_target_teams_distribute_parallel_for:
   case OMPD_target_teams_distribute_parallel_for_simd:
+    return !hasParallelIfClause(Ctx, D);
+  case OMPD_target_simd:
+  case OMPD_target_teams_distribute_simd:
     return true;
   case OMPD_parallel:
   case OMPD_for:
@@ -744,7 +811,7 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDKe
                                              llvm::Constant *&OutlinedFnID,
                                              bool IsOffloadEntry,
                                              const RegionCodeGenTy &CodeGen) {
-  ExecutionModeRAII ModeRAII(IsInSPMDExecutionMode, /*NewMode=*/false);
+  ExecutionModeRAII ModeRAII(CurrentExecutionMode, /*IsSPMD=*/false);
   EntryFunctionState EST;
   WorkerFunctionState WST(CGM, D.getLocStart());
   Work.clear();
@@ -858,7 +925,7 @@ void CGOpenMPRuntimeNVPTX::emitSpmdKerne
                                           llvm::Constant *&OutlinedFnID,
                                           bool IsOffloadEntry,
                                           const RegionCodeGenTy &CodeGen) {
-  ExecutionModeRAII ModeRAII(IsInSPMDExecutionMode, /*NewMode=*/true);
+  ExecutionModeRAII ModeRAII(CurrentExecutionMode, /*IsSPMD=*/true);
   EntryFunctionState EST;
 
   // Emit target region as a standalone region.
@@ -905,11 +972,13 @@ void CGOpenMPRuntimeNVPTX::emitSpmdEntry
 
   CGF.EmitBlock(ExecuteBB);
 
+  IsInTargetMasterThreadRegion = true;
   emitGenericVarsProlog(CGF, D.getLocStart());
 }
 
 void CGOpenMPRuntimeNVPTX::emitSpmdEntryFooter(CodeGenFunction &CGF,
                                                EntryFunctionState &EST) {
+  IsInTargetMasterThreadRegion = false;
   if (!CGF.HaveInsertPoint())
     return;
 
@@ -1380,7 +1449,7 @@ void CGOpenMPRuntimeNVPTX::emitTargetOut
 
   assert(!ParentName.empty() && "Invalid target region parent name!");
 
-  bool Mode = supportsSPMDExecutionMode(D);
+  bool Mode = supportsSPMDExecutionMode(CGM.getContext(), D);
   if (Mode)
     emitSpmdKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
                    CodeGen);
@@ -1401,8 +1470,8 @@ void CGOpenMPRuntimeNVPTX::emitProcBindC
                                               OpenMPProcBindClauseKind ProcBind,
                                               SourceLocation Loc) {
   // Do nothing in case of Spmd mode and L0 parallel.
-  // TODO: If in Spmd mode and L1 parallel emit the clause.
-  if (isInSpmdExecutionMode())
+  if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD &&
+      IsInTargetMasterThreadRegion)
     return;
 
   CGOpenMPRuntime::emitProcBindClause(CGF, ProcBind, Loc);
@@ -1412,8 +1481,8 @@ void CGOpenMPRuntimeNVPTX::emitNumThread
                                                 llvm::Value *NumThreads,
                                                 SourceLocation Loc) {
   // Do nothing in case of Spmd mode and L0 parallel.
-  // TODO: If in Spmd mode and L1 parallel emit the clause.
-  if (isInSpmdExecutionMode())
+  if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD &&
+      IsInTargetMasterThreadRegion)
     return;
 
   CGOpenMPRuntime::emitNumThreadsClause(CGF, NumThreads, Loc);
@@ -1457,7 +1526,8 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitP
       cast<llvm::Function>(CGOpenMPRuntime::emitParallelOutlinedFunction(
           D, ThreadIDVar, InnermostKind, CodeGen));
   IsInTargetMasterThreadRegion = PrevIsInTargetMasterThreadRegion;
-  if (!isInSpmdExecutionMode() && !IsInParallelRegion) {
+  if (getExecutionMode() != CGOpenMPRuntimeNVPTX::EM_SPMD &&
+      !IsInParallelRegion) {
     llvm::Function *WrapperFun =
         createParallelDataSharingWrapper(OutlinedFun, D);
     WrapperFunctionsMap[OutlinedFun] = WrapperFun;
@@ -1635,7 +1705,7 @@ void CGOpenMPRuntimeNVPTX::emitParallelC
   if (!CGF.HaveInsertPoint())
     return;
 
-  if (isInSpmdExecutionMode())
+  if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD)
     emitSpmdParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond);
   else
     emitNonSPMDParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond);
@@ -1759,6 +1829,8 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDPa
       SeqGen(CGF, Action);
     } else if (IsInTargetMasterThreadRegion) {
       L0ParallelGen(CGF, Action);
+    } else if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_NonSPMD) {
+      RCG(CGF);
     } else {
       // Check for master and then parallelism:
       // if (is_master) {
@@ -1770,20 +1842,18 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDPa
       // }
       CGBuilderTy &Bld = CGF.Builder;
       llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit");
-      if (!isInSpmdExecutionMode()) {
-        llvm::BasicBlock *MasterCheckBB = CGF.createBasicBlock(".mastercheck");
-        llvm::BasicBlock *ParallelCheckBB =
-            CGF.createBasicBlock(".parallelcheck");
-        llvm::Value *IsMaster =
-            Bld.CreateICmpEQ(getNVPTXThreadID(CGF), getMasterThreadID(CGF));
-        Bld.CreateCondBr(IsMaster, MasterCheckBB, ParallelCheckBB);
-        CGF.EmitBlock(MasterCheckBB);
-        L0ParallelGen(CGF, Action);
-        CGF.EmitBranch(ExitBB);
-        // There is no need to emit line number for unconditional branch.
-        (void)ApplyDebugLocation::CreateEmpty(CGF);
-        CGF.EmitBlock(ParallelCheckBB);
-      }
+      llvm::BasicBlock *MasterCheckBB = CGF.createBasicBlock(".mastercheck");
+      llvm::BasicBlock *ParallelCheckBB =
+          CGF.createBasicBlock(".parallelcheck");
+      llvm::Value *IsMaster =
+          Bld.CreateICmpEQ(getNVPTXThreadID(CGF), getMasterThreadID(CGF));
+      Bld.CreateCondBr(IsMaster, MasterCheckBB, ParallelCheckBB);
+      CGF.EmitBlock(MasterCheckBB);
+      L0ParallelGen(CGF, Action);
+      CGF.EmitBranch(ExitBB);
+      // There is no need to emit line number for unconditional branch.
+      (void)ApplyDebugLocation::CreateEmpty(CGF);
+      CGF.EmitBlock(ParallelCheckBB);
       llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
       llvm::Value *ThreadID = getThreadID(CGF, Loc);
       llvm::Value *PL = CGF.EmitRuntimeCall(
@@ -1827,14 +1897,49 @@ void CGOpenMPRuntimeNVPTX::emitSpmdParal
   // is added on Spmd target directives.
   llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
 
-  Address ZeroAddr = CGF.CreateMemTemp(
-      CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
-      ".zero.addr");
+  Address ZeroAddr = CGF.CreateMemTemp(CGF.getContext().getIntTypeForBitwidth(
+                                           /*DestWidth=*/32, /*Signed=*/1),
+                                       ".zero.addr");
   CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
-  OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer());
-  OutlinedFnArgs.push_back(ZeroAddr.getPointer());
-  OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
-  emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
+  Address ThreadIDAddr = emitThreadIDAddress(CGF, Loc);
+  auto &&CodeGen = [this, OutlinedFn, CapturedVars, Loc, ZeroAddr,
+                    ThreadIDAddr](CodeGenFunction &CGF,
+                                  PrePostActionTy &Action) {
+    Action.Enter(CGF);
+
+    llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
+    OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
+    OutlinedFnArgs.push_back(ZeroAddr.getPointer());
+    OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
+    emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
+  };
+  auto &&SeqGen = [this, &CodeGen, Loc](CodeGenFunction &CGF,
+                                        PrePostActionTy &) {
+
+    RegionCodeGenTy RCG(CodeGen);
+    llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
+    llvm::Value *ThreadID = getThreadID(CGF, Loc);
+    llvm::Value *Args[] = {RTLoc, ThreadID};
+
+    NVPTXActionTy Action(
+        createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_serialized_parallel),
+        Args,
+        createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_serialized_parallel),
+        Args);
+    RCG.setAction(Action);
+    RCG(CGF);
+  };
+
+  if (IsInTargetMasterThreadRegion) {
+    RegionCodeGenTy RCG(CodeGen);
+    RCG(CGF);
+  } else {
+    // If we are not in the target region, it is definitely L2 parallelism or
+    // more, because for SPMD mode we always has L1 parallel level, sowe don't
+    // need to check for orphaned directives.
+    RegionCodeGenTy RCG(SeqGen);
+    RCG(CGF);
+  }
 }
 
 void CGOpenMPRuntimeNVPTX::emitCriticalRegion(

Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h?rev=332016&r1=332015&r2=332016&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h Thu May 10 11:32:08 2018
@@ -24,6 +24,16 @@ namespace clang {
 namespace CodeGen {
 
 class CGOpenMPRuntimeNVPTX : public CGOpenMPRuntime {
+public:
+  /// Defines the execution mode.
+  enum ExecutionMode {
+    /// SPMD execution mode (all threads are worker threads).
+    EM_SPMD,
+    /// Non-SPMD execution mode (1 master thread, others are workers).
+    EM_NonSPMD,
+    /// Unknown execution mode (orphaned directive).
+    EM_Unknown,
+  };
 private:
   /// Parallel outlined function work for workers to execute.
   llvm::SmallVector<llvm::Function *, 16> Work;
@@ -44,7 +54,7 @@ private:
     void createWorkerFunction(CodeGenModule &CGM);
   };
 
-  bool isInSpmdExecutionMode() const;
+  ExecutionMode getExecutionMode() const;
 
   /// Emit the worker function for the current target region.
   void emitWorkerFunction(WorkerFunctionState &WST);
@@ -334,7 +344,7 @@ private:
   /// region. The appropriate mode (SPMD/NON-SPMD) is set on entry to the
   /// target region and used by containing directives such as 'parallel'
   /// to emit optimized code.
-  bool IsInSPMDExecutionMode = false;
+  ExecutionMode CurrentExecutionMode = EM_Unknown;
 
   /// true if we're emitting the code for the target region and next parallel
   /// region is L0 for sure.

Modified: cfe/trunk/test/OpenMP/nvptx_parallel_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_parallel_codegen.cpp?rev=332016&r1=332015&r2=332016&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_parallel_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_parallel_codegen.cpp Thu May 10 11:32:08 2018
@@ -58,6 +58,7 @@ tx ftemplate(int n) {
     #pragma omp critical
     ++a;
     }
+    ++a;
   }
   return a;
 }

Modified: cfe/trunk/test/OpenMP/nvptx_target_teams_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_target_teams_codegen.cpp?rev=332016&r1=332015&r2=332016&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_target_teams_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_target_teams_codegen.cpp Thu May 10 11:32:08 2018
@@ -9,8 +9,9 @@
 #define HEADER
 
 // Check that the execution mode of all 2 target regions is set to Generic Mode.
-// CHECK-DAG: {{@__omp_offloading_.+l26}}_exec_mode = weak constant i8 1
-// CHECK-DAG: {{@__omp_offloading_.+l31}}_exec_mode = weak constant i8 1
+// CHECK-DAG: {{@__omp_offloading_.+l27}}_exec_mode = weak constant i8 1
+// CHECK-DAG: {{@__omp_offloading_.+l32}}_exec_mode = weak constant i8 1
+// CHECK-DAG: {{@__omp_offloading_.+l37}}_exec_mode = weak constant i8 0
 
 template<typename tx>
 tx ftemplate(int n) {
@@ -33,6 +34,13 @@ tx ftemplate(int n) {
     aa = 1;
   }
 
+  #pragma omp target teams
+  {
+#pragma omp parallel
+#pragma omp parallel
+    aa = 1;
+  }
+
   return a;
 }
 
@@ -44,14 +52,14 @@ int bar(int n){
   return a;
 }
 
-  // CHECK-NOT: define {{.*}}void {{@__omp_offloading_.+template.+l21}}_worker()
+  // CHECK-NOT: define {{.*}}void {{@__omp_offloading_.+template.+l22}}_worker()
 
 
 
 
 
 
-  // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l26}}_worker()
+  // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l27}}_worker()
   // CHECK-DAG: [[OMP_EXEC_STATUS:%.+]] = alloca i8,
   // CHECK-DAG: [[OMP_WORK_FN:%.+]] = alloca i8*,
   // CHECK: store i8* null, i8** [[OMP_WORK_FN]],
@@ -86,7 +94,7 @@ int bar(int n){
   // CHECK: [[EXIT]]
   // CHECK: ret void
 
-  // CHECK: define {{.*}}void [[T1:@__omp_offloading_.+template.+l26]](i[[SZ:32|64]] [[A:%[^)]+]])
+  // CHECK: define {{.*}}void [[T1:@__omp_offloading_.+template.+l27]](i[[SZ:32|64]] [[A:%[^)]+]])
   // CHECK: store i[[SZ]] [[A]], i[[SZ]]* [[A_ADDR:%.+]], align
   // CHECK: [[CONV:%.+]] = bitcast i[[SZ]]* [[A_ADDR]] to i8*
 
@@ -137,7 +145,7 @@ int bar(int n){
 
 
 
-  // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l31}}_worker()
+  // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l32}}_worker()
   // CHECK-DAG: [[OMP_EXEC_STATUS:%.+]] = alloca i8,
   // CHECK-DAG: [[OMP_WORK_FN:%.+]] = alloca i8*,
   // CHECK: store i8* null, i8** [[OMP_WORK_FN]],
@@ -172,7 +180,7 @@ int bar(int n){
   // CHECK: [[EXIT]]
   // CHECK: ret void
 
-  // CHECK: define {{.*}}void [[T2:@__omp_offloading_.+template.+l31]](i[[SZ:32|64]] [[AA:%[^)]+]])
+  // CHECK: define {{.*}}void [[T2:@__omp_offloading_.+template.+l32]](i[[SZ:32|64]] [[AA:%[^)]+]])
   // CHECK: store i[[SZ]] [[AA]], i[[SZ]]* [[AA_ADDR:%.+]], align
   // CHECK: [[CONV:%.+]] = bitcast i[[SZ]]* [[AA_ADDR]] to i16*
 
@@ -218,5 +226,24 @@ int bar(int n){
   // CHECK: [[EXIT]]
   // CHECK: ret void
 
+// CHECK: define weak void @__omp_offloading_{{.*}}ftemplate{{.*}}_l37(
+// CHECK: call void @__kmpc_spmd_kernel_init(
+// CHECK: call i8* @__kmpc_data_sharing_push_stack(
+// CHECK-NOT: call void @__kmpc_serialized_parallel(
+// CHECK: call void [[L0:@.+]](i32* %{{.+}}, i32* %{{.+}}, i16* %{{.*}})
+// CHECK-NOT: call void @__kmpc_end_serialized_parallel(
+// CHECK: call void @__kmpc_data_sharing_pop_stack(
+// CHECK: call void @__kmpc_spmd_kernel_deinit()
+// CHECK: ret
+
+// CHECK: define internal void [[L0]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i16* dereferenceable
+// CHECK: call void @__kmpc_serialized_parallel(
+// CHECK: call void [[L1:@.+]](i32* %{{.+}}, i32* %{{.+}}, i16* %{{.+}})
+// CHECK: call void @__kmpc_end_serialized_parallel(
+// CHECK: ret void
+
+// CHECK: define internal void [[L1]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i16* dereferenceable
+// CHECK: store i16 1, i16* %
+// CHECK: ret void
 
 #endif

Modified: cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp?rev=332016&r1=332015&r2=332016&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp Thu May 10 11:32:08 2018
@@ -13,7 +13,7 @@ int a;
 int foo(int *a);
 
 int main(int argc, char **argv) {
-#pragma omp target teams distribute parallel for map(tofrom:a) if(parallel:argc)
+#pragma omp target teams distribute parallel for map(tofrom:a) if(target:argc) schedule(static, a)
   for (int i= 0; i < argc; ++i)
     a = foo(&i) + foo(&a) + foo(&argc);
   return 0;

Modified: cfe/trunk/test/OpenMP/target_parallel_debug_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/target_parallel_debug_codegen.cpp?rev=332016&r1=332015&r2=332016&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/target_parallel_debug_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/target_parallel_debug_codegen.cpp Thu May 10 11:32:08 2018
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc
-// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited | FileCheck %s
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=45
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-cuda-mode -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited -fopenmp-version=45 | FileCheck %s
 // expected-no-diagnostics
 
 int main() {
@@ -11,7 +11,7 @@ int main() {
   int c[10][10][10];
 #pragma omp target parallel firstprivate(a, b) map(tofrom          \
                                                    : c) map(tofrom \
-                                                            : bb) if (a)
+                                                            : bb) if (target:a)
   {
     int &f = c[1][1][1];
     int &g = a;
@@ -54,7 +54,7 @@ int main() {
   return 0;
 }
 
-// CHECK: define internal void @__omp_offloading{{[^(]+}}([10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]]* {{[^,]+}}, i8 addrspace(1)* noalias{{[^,]+}}, i1 {{[^)]+}})
+// CHECK: define internal void @__omp_offloading{{[^(]+}}([10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]]* {{[^,]+}}, i8 addrspace(1)* noalias{{[^,]+}})
 // CHECK: addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* %{{.+}} to [10 x [10 x [10 x i32]]]*
 // CHECK: call void [[NONDEBUG_WRAPPER:.+]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]]* {{[^,]+}}, i64 {{[^,]+}}, [10 x [10 x i32]]* {{[^,]+}}, i8* {{[^)]+}})