r292997 - [OpenMP] Support for the num_threads-clause on 'target parallel'.

Arpith Chacko Jacob via cfe-commits cfe-commits at lists.llvm.org
Tue Jan 24 16:57:17 PST 2017


Author: arpith
Date: Tue Jan 24 18:57:16 2017
New Revision: 292997

URL: http://llvm.org/viewvc/llvm-project?rev=292997&view=rev
Log:
[OpenMP] Support for the num_threads-clause on 'target parallel'.

The num_threads-clause on the combined directive applies to the
'parallel' region of this construct. We modify the NumThreadsClause
class to capture the clause expression within the 'target' region.

The offload runtime call for 'target parallel' is changed to
__tgt_target_teams() with 1 team and the number of threads set by
this clause or a default if none.

Reviewers: ABataev
Differential Revision: https://reviews.llvm.org/D29082

Added:
    cfe/trunk/test/OpenMP/target_parallel_num_threads_codegen.cpp
Modified:
    cfe/trunk/include/clang/AST/OpenMPClause.h
    cfe/trunk/include/clang/AST/RecursiveASTVisitor.h
    cfe/trunk/lib/AST/OpenMPClause.cpp
    cfe/trunk/lib/AST/StmtProfile.cpp
    cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp
    cfe/trunk/lib/Sema/SemaOpenMP.cpp
    cfe/trunk/lib/Serialization/ASTReaderStmt.cpp
    cfe/trunk/lib/Serialization/ASTWriterStmt.cpp
    cfe/trunk/test/OpenMP/target_parallel_codegen.cpp
    cfe/trunk/test/OpenMP/target_parallel_if_codegen.cpp
    cfe/trunk/tools/libclang/CIndex.cpp

Modified: cfe/trunk/include/clang/AST/OpenMPClause.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/AST/OpenMPClause.h?rev=292997&r1=292996&r2=292997&view=diff
==============================================================================
--- cfe/trunk/include/clang/AST/OpenMPClause.h (original)
+++ cfe/trunk/include/clang/AST/OpenMPClause.h Tue Jan 24 18:57:16 2017
@@ -345,7 +345,7 @@ public:
 /// In this example directive '#pragma omp parallel' has simple 'num_threads'
 /// clause with number of threads '6'.
 ///
-class OMPNumThreadsClause : public OMPClause {
+class OMPNumThreadsClause : public OMPClause, public OMPClauseWithPreInit {
   friend class OMPClauseReader;
   /// \brief Location of '('.
   SourceLocation LParenLoc;
@@ -360,20 +360,29 @@ public:
   /// \brief Build 'num_threads' clause with condition \a NumThreads.
   ///
   /// \param NumThreads Number of threads for the construct.
+  /// \param HelperNumThreads Helper Number of threads for the construct.
+  /// \param CaptureRegion Innermost OpenMP region where expressions in this
+  /// clause must be captured.
   /// \param StartLoc Starting location of the clause.
   /// \param LParenLoc Location of '('.
   /// \param EndLoc Ending location of the clause.
   ///
-  OMPNumThreadsClause(Expr *NumThreads, SourceLocation StartLoc,
-                      SourceLocation LParenLoc, SourceLocation EndLoc)
-      : OMPClause(OMPC_num_threads, StartLoc, EndLoc), LParenLoc(LParenLoc),
-        NumThreads(NumThreads) {}
+  OMPNumThreadsClause(Expr *NumThreads, Stmt *HelperNumThreads,
+                      OpenMPDirectiveKind CaptureRegion,
+                      SourceLocation StartLoc, SourceLocation LParenLoc,
+                      SourceLocation EndLoc)
+      : OMPClause(OMPC_num_threads, StartLoc, EndLoc),
+        OMPClauseWithPreInit(this), LParenLoc(LParenLoc),
+        NumThreads(NumThreads) {
+    setPreInitStmt(HelperNumThreads, CaptureRegion);
+  }
 
   /// \brief Build an empty clause.
   ///
   OMPNumThreadsClause()
       : OMPClause(OMPC_num_threads, SourceLocation(), SourceLocation()),
-        LParenLoc(SourceLocation()), NumThreads(nullptr) {}
+        OMPClauseWithPreInit(this), LParenLoc(SourceLocation()),
+        NumThreads(nullptr) {}
 
   /// \brief Sets the location of '('.
   void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; }

Modified: cfe/trunk/include/clang/AST/RecursiveASTVisitor.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/AST/RecursiveASTVisitor.h?rev=292997&r1=292996&r2=292997&view=diff
==============================================================================
--- cfe/trunk/include/clang/AST/RecursiveASTVisitor.h (original)
+++ cfe/trunk/include/clang/AST/RecursiveASTVisitor.h Tue Jan 24 18:57:16 2017
@@ -2725,6 +2725,7 @@ bool RecursiveASTVisitor<Derived>::Visit
 template <typename Derived>
 bool
 RecursiveASTVisitor<Derived>::VisitOMPNumThreadsClause(OMPNumThreadsClause *C) {
+  TRY_TO(VisitOMPClauseWithPreInit(C));
   TRY_TO(TraverseStmt(C->getNumThreads()));
   return true;
 }

Modified: cfe/trunk/lib/AST/OpenMPClause.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/OpenMPClause.cpp?rev=292997&r1=292996&r2=292997&view=diff
==============================================================================
--- cfe/trunk/lib/AST/OpenMPClause.cpp (original)
+++ cfe/trunk/lib/AST/OpenMPClause.cpp Tue Jan 24 18:57:16 2017
@@ -50,10 +50,11 @@ const OMPClauseWithPreInit *OMPClauseWit
     return static_cast<const OMPLinearClause *>(C);
   case OMPC_if:
     return static_cast<const OMPIfClause *>(C);
+  case OMPC_num_threads:
+    return static_cast<const OMPNumThreadsClause *>(C);
   case OMPC_default:
   case OMPC_proc_bind:
   case OMPC_final:
-  case OMPC_num_threads:
   case OMPC_safelen:
   case OMPC_simdlen:
   case OMPC_collapse:

Modified: cfe/trunk/lib/AST/StmtProfile.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/StmtProfile.cpp?rev=292997&r1=292996&r2=292997&view=diff
==============================================================================
--- cfe/trunk/lib/AST/StmtProfile.cpp (original)
+++ cfe/trunk/lib/AST/StmtProfile.cpp Tue Jan 24 18:57:16 2017
@@ -294,6 +294,7 @@ void OMPClauseProfiler::VisitOMPFinalCla
 }
 
 void OMPClauseProfiler::VisitOMPNumThreadsClause(const OMPNumThreadsClause *C) {
+  VistOMPClauseWithPreInit(C);
   if (C->getNumThreads())
     Profiler->VisitStmt(C->getNumThreads());
 }

Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp?rev=292997&r1=292996&r2=292997&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp Tue Jan 24 18:57:16 2017
@@ -4894,19 +4894,29 @@ static const Stmt *ignoreCompoundStmts(c
   return Body;
 }
 
-/// \brief Emit the num_teams clause of an enclosed teams directive at the
-/// target region scope. If there is no teams directive associated with the
-/// target directive, or if there is no num_teams clause associated with the
-/// enclosed teams directive, return nullptr.
+/// Emit the number of teams for a target directive.  Inspect the num_teams
+/// clause associated with a teams construct combined or closely nested
+/// with the target directive.
+///
+/// Emit a team of size one for directives such as 'target parallel' that
+/// have no associated teams construct.
+///
+/// Otherwise, return nullptr.
 static llvm::Value *
-emitNumTeamsClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime,
-                                     CodeGenFunction &CGF,
-                                     const OMPExecutableDirective &D) {
+emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime,
+                               CodeGenFunction &CGF,
+                               const OMPExecutableDirective &D) {
 
   assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
                                               "teams directive expected to be "
                                               "emitted only for the host!");
 
+  // If the target directive is combined with a parallel directive but not a
+  // teams directive, start one team.
+  if (isOpenMPParallelDirective(D.getDirectiveKind()) &&
+      !isOpenMPTeamsDirective(D.getDirectiveKind()))
+    return CGF.Builder.getInt32(1);
+
   // FIXME: For the moment we do not support combined directives with target and
   // teams, so we do not expect to get any num_teams clause in the provided
   // directive. Once we support that, this assertion can be replaced by the
@@ -4943,19 +4953,56 @@ emitNumTeamsClauseForTargetDirective(CGO
   return nullptr;
 }
 
-/// \brief Emit the thread_limit clause of an enclosed teams directive at the
-/// target region scope. If there is no teams directive associated with the
-/// target directive, or if there is no thread_limit clause associated with the
-/// enclosed teams directive, return nullptr.
+/// Emit the number of threads for a target directive.  Inspect the
+/// thread_limit clause associated with a teams construct combined or closely
+/// nested with the target directive.
+///
+/// Emit the num_threads clause for directives such as 'target parallel' that
+/// have no associated teams construct.
+///
+/// Otherwise, return nullptr.
 static llvm::Value *
-emitThreadLimitClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime,
-                                        CodeGenFunction &CGF,
-                                        const OMPExecutableDirective &D) {
+emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime,
+                                 CodeGenFunction &CGF,
+                                 const OMPExecutableDirective &D) {
 
   assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
                                               "teams directive expected to be "
                                               "emitted only for the host!");
 
+  auto &Bld = CGF.Builder;
+
+  //
+  // If the target directive is combined with a teams directive:
+  //   Return the value in the thread_limit clause, if any.
+  //
+  // If the target directive is combined with a parallel directive:
+  //   Return the value in the num_threads clause, if any.
+  //
+  // If both clauses are set, select the minimum of the two.
+  //
+  // If neither teams or parallel combined directives set the number of threads
+  // in a team, return 0 to denote the runtime default.
+  //
+  // If this is not a teams directive return nullptr.
+
+  if (isOpenMPParallelDirective(D.getDirectiveKind())) {
+    llvm::Value *DefaultThreadLimitVal = Bld.getInt32(0);
+    llvm::Value *NumThreadsVal = nullptr;
+
+    if (const auto *NumThreadsClause =
+            D.getSingleClause<OMPNumThreadsClause>()) {
+      CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
+      llvm::Value *NumThreads =
+          CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
+                             /*IgnoreResultAssign*/ true);
+      NumThreadsVal =
+          Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*IsSigned=*/true);
+    }
+
+    return NumThreadsVal ? NumThreadsVal : DefaultThreadLimitVal;
+  }
+
   // FIXME: For the moment we do not support combined directives with target and
   // teams, so we do not expect to get any thread_limit clause in the provided
   // directive. Once we support that, this assertion can be replaced by the
@@ -6041,24 +6088,50 @@ void CGOpenMPRuntime::emitTargetCall(Cod
     // Return value of the runtime offloading call.
     llvm::Value *Return;
 
-    auto *NumTeams = emitNumTeamsClauseForTargetDirective(RT, CGF, D);
-    auto *ThreadLimit = emitThreadLimitClauseForTargetDirective(RT, CGF, D);
+    auto *NumTeams = emitNumTeamsForTargetDirective(RT, CGF, D);
+    auto *NumThreads = emitNumThreadsForTargetDirective(RT, CGF, D);
 
-    // If we have NumTeams defined this means that we have an enclosed teams
-    // region. Therefore we also expect to have ThreadLimit defined. These two
-    // values should be defined in the presence of a teams directive, regardless
-    // of having any clauses associated. If the user is using teams but no
-    // clauses, these two values will be the default that should be passed to
-    // the runtime library - a 32-bit integer with the value zero.
+    // The target region is an outlined function launched by the runtime
+    // via calls __tgt_target() or __tgt_target_teams().
+    //
+    // __tgt_target() launches a target region with one team and one thread,
+    // executing a serial region.  This master thread may in turn launch
+    // more threads within its team upon encountering a parallel region,
+    // however, no additional teams can be launched on the device.
+    //
+    // __tgt_target_teams() launches a target region with one or more teams,
+    // each with one or more threads.  This call is required for target
+    // constructs such as:
+    //  'target teams'
+    //  'target' / 'teams'
+    //  'target teams distribute parallel for'
+    //  'target parallel'
+    // and so on.
+    //
+    // Note that on the host and CPU targets, the runtime implementation of
+    // these calls simply call the outlined function without forking threads.
+    // The outlined functions themselves have runtime calls to
+    // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
+    // the compiler in emitTeamsCall() and emitParallelCall().
+    //
+    // In contrast, on the NVPTX target, the implementation of
+    // __tgt_target_teams() launches a GPU kernel with the requested number
+    // of teams and threads so no additional calls to the runtime are required.
     if (NumTeams) {
-      assert(ThreadLimit && "Thread limit expression should be available along "
-                            "with number of teams.");
+      // If we have NumTeams defined this means that we have an enclosed teams
+      // region. Therefore we also expect to have NumThreads defined. These two
+      // values should be defined in the presence of a teams directive,
+      // regardless of having any clauses associated. If the user is using teams
+      // but no clauses, these two values will be the default that should be
+      // passed to the runtime library - a 32-bit integer with the value zero.
+      assert(NumThreads && "Thread limit expression should be available along "
+                           "with number of teams.");
       llvm::Value *OffloadingArgs[] = {
           DeviceID,           OutlinedFnID,
           PointerNum,         Info.BasePointersArray,
           Info.PointersArray, Info.SizesArray,
           Info.MapTypesArray, NumTeams,
-          ThreadLimit};
+          NumThreads};
       Return = CGF.EmitRuntimeCall(
           RT.createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs);
     } else {

Modified: cfe/trunk/lib/Sema/SemaOpenMP.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaOpenMP.cpp?rev=292997&r1=292996&r2=292997&view=diff
==============================================================================
--- cfe/trunk/lib/Sema/SemaOpenMP.cpp (original)
+++ cfe/trunk/lib/Sema/SemaOpenMP.cpp Tue Jan 24 18:57:16 2017
@@ -6635,10 +6635,9 @@ OMPClause *Sema::ActOnOpenMPSingleExprCl
 // the region in which to capture expressions associated with a clause.
 // A return value of OMPD_unknown signifies that the expression should not
 // be captured.
-static OpenMPDirectiveKind
-getOpenMPCaptureRegionForClause(OpenMPDirectiveKind DKind,
-                                OpenMPClauseKind CKind,
-                                OpenMPDirectiveKind NameModifier) {
+static OpenMPDirectiveKind getOpenMPCaptureRegionForClause(
+    OpenMPDirectiveKind DKind, OpenMPClauseKind CKind,
+    OpenMPDirectiveKind NameModifier = OMPD_unknown) {
   OpenMPDirectiveKind CaptureRegion = OMPD_unknown;
 
   switch (CKind) {
@@ -6708,6 +6707,69 @@ getOpenMPCaptureRegionForClause(OpenMPDi
       llvm_unreachable("Unknown OpenMP directive");
     }
     break;
+  case OMPC_num_threads:
+    switch (DKind) {
+    case OMPD_target_parallel:
+      CaptureRegion = OMPD_target;
+      break;
+    case OMPD_cancel:
+    case OMPD_parallel:
+    case OMPD_parallel_sections:
+    case OMPD_parallel_for:
+    case OMPD_parallel_for_simd:
+    case OMPD_target:
+    case OMPD_target_simd:
+    case OMPD_target_parallel_for:
+    case OMPD_target_parallel_for_simd:
+    case OMPD_target_teams:
+    case OMPD_target_teams_distribute:
+    case OMPD_target_teams_distribute_simd:
+    case OMPD_target_teams_distribute_parallel_for:
+    case OMPD_target_teams_distribute_parallel_for_simd:
+    case OMPD_teams_distribute_parallel_for:
+    case OMPD_teams_distribute_parallel_for_simd:
+    case OMPD_distribute_parallel_for:
+    case OMPD_distribute_parallel_for_simd:
+    case OMPD_task:
+    case OMPD_taskloop:
+    case OMPD_taskloop_simd:
+    case OMPD_target_data:
+    case OMPD_target_enter_data:
+    case OMPD_target_exit_data:
+    case OMPD_target_update:
+      // Do not capture num_threads-clause expressions.
+      break;
+    case OMPD_threadprivate:
+    case OMPD_taskyield:
+    case OMPD_barrier:
+    case OMPD_taskwait:
+    case OMPD_cancellation_point:
+    case OMPD_flush:
+    case OMPD_declare_reduction:
+    case OMPD_declare_simd:
+    case OMPD_declare_target:
+    case OMPD_end_declare_target:
+    case OMPD_teams:
+    case OMPD_simd:
+    case OMPD_for:
+    case OMPD_for_simd:
+    case OMPD_sections:
+    case OMPD_section:
+    case OMPD_single:
+    case OMPD_master:
+    case OMPD_critical:
+    case OMPD_taskgroup:
+    case OMPD_distribute:
+    case OMPD_ordered:
+    case OMPD_atomic:
+    case OMPD_distribute_simd:
+    case OMPD_teams_distribute:
+    case OMPD_teams_distribute_simd:
+      llvm_unreachable("Unexpected OpenMP directive with num_threads-clause");
+    case OMPD_unknown:
+      llvm_unreachable("Unknown OpenMP directive");
+    }
+    break;
   case OMPC_schedule:
   case OMPC_dist_schedule:
   case OMPC_firstprivate:
@@ -6717,7 +6779,6 @@ getOpenMPCaptureRegionForClause(OpenMPDi
   case OMPC_default:
   case OMPC_proc_bind:
   case OMPC_final:
-  case OMPC_num_threads:
   case OMPC_safelen:
   case OMPC_simdlen:
   case OMPC_collapse:
@@ -6887,6 +6948,8 @@ OMPClause *Sema::ActOnOpenMPNumThreadsCl
                                              SourceLocation LParenLoc,
                                              SourceLocation EndLoc) {
   Expr *ValExpr = NumThreads;
+  Stmt *HelperValStmt = nullptr;
+  OpenMPDirectiveKind CaptureRegion = OMPD_unknown;
 
   // OpenMP [2.5, Restrictions]
   //  The num_threads expression must evaluate to a positive integer value.
@@ -6894,8 +6957,16 @@ OMPClause *Sema::ActOnOpenMPNumThreadsCl
                                  /*StrictlyPositive=*/true))
     return nullptr;
 
-  return new (Context)
-      OMPNumThreadsClause(ValExpr, StartLoc, LParenLoc, EndLoc);
+  OpenMPDirectiveKind DKind = DSAStack->getCurrentDirective();
+  CaptureRegion = getOpenMPCaptureRegionForClause(DKind, OMPC_num_threads);
+  if (CaptureRegion != OMPD_unknown) {
+    llvm::MapVector<Expr *, DeclRefExpr *> Captures;
+    ValExpr = tryBuildCapture(*this, ValExpr, Captures).get();
+    HelperValStmt = buildPreInits(Context, Captures);
+  }
+
+  return new (Context) OMPNumThreadsClause(
+      ValExpr, HelperValStmt, CaptureRegion, StartLoc, LParenLoc, EndLoc);
 }
 
 ExprResult Sema::VerifyPositiveIntegerConstantInClause(Expr *E,

Modified: cfe/trunk/lib/Serialization/ASTReaderStmt.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Serialization/ASTReaderStmt.cpp?rev=292997&r1=292996&r2=292997&view=diff
==============================================================================
--- cfe/trunk/lib/Serialization/ASTReaderStmt.cpp (original)
+++ cfe/trunk/lib/Serialization/ASTReaderStmt.cpp Tue Jan 24 18:57:16 2017
@@ -1952,6 +1952,7 @@ void OMPClauseReader::VisitOMPFinalClaus
 }
 
 void OMPClauseReader::VisitOMPNumThreadsClause(OMPNumThreadsClause *C) {
+  VisitOMPClauseWithPreInit(C);
   C->setNumThreads(Reader->Record.readSubExpr());
   C->setLParenLoc(Reader->ReadSourceLocation());
 }

Modified: cfe/trunk/lib/Serialization/ASTWriterStmt.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Serialization/ASTWriterStmt.cpp?rev=292997&r1=292996&r2=292997&view=diff
==============================================================================
--- cfe/trunk/lib/Serialization/ASTWriterStmt.cpp (original)
+++ cfe/trunk/lib/Serialization/ASTWriterStmt.cpp Tue Jan 24 18:57:16 2017
@@ -1818,6 +1818,7 @@ void OMPClauseWriter::VisitOMPFinalClaus
 }
 
 void OMPClauseWriter::VisitOMPNumThreadsClause(OMPNumThreadsClause *C) {
+  VisitOMPClauseWithPreInit(C);
   Record.AddStmt(C->getNumThreads());
   Record.AddSourceLocation(C->getLParenLoc());
 }

Modified: cfe/trunk/test/OpenMP/target_parallel_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/target_parallel_codegen.cpp?rev=292997&r1=292996&r2=292997&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/target_parallel_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/target_parallel_codegen.cpp Tue Jan 24 18:57:16 2017
@@ -91,7 +91,7 @@ int foo(int n) {
   double cn[5][n];
   TT<long long, char> d;
 
-  // CHECK:       [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i32* null)
+  // CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i32* null, i32 1, i32 0)
   // CHECK:       store i32 [[RET]], i32* [[RHV:%.+]], align 4
   // CHECK:       [[RET2:%.+]] = load i32, i32* [[RHV]], align 4
   // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0
@@ -114,7 +114,7 @@ int foo(int n) {
     a += 1;
   }
 
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([1 x i[[SZ]]], [1 x i[[SZ]]]* [[SIZET2]], i32 0, i32 0), i32* getelementptr inbounds ([1 x i32], [1 x i32]* [[MAPT2]], i32 0, i32 0))
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([1 x i[[SZ]]], [1 x i[[SZ]]]* [[SIZET2]], i32 0, i32 0), i32* getelementptr inbounds ([1 x i32], [1 x i32]* [[MAPT2]], i32 0, i32 0), i32 1, i32 0)
   // CHECK-DAG:   [[BP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[P]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PR:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR]], i32 0, i32 [[IDX0:[0-9]+]]
@@ -140,7 +140,7 @@ int foo(int n) {
   // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 10
   // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
   // CHECK:       [[IFTHEN]]
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET3]], i32 0, i32 0), i32* getelementptr inbounds ([2 x i32], [2 x i32]* [[MAPT3]], i32 0, i32 0))
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET3]], i32 0, i32 0), i32* getelementptr inbounds ([2 x i32], [2 x i32]* [[MAPT3]], i32 0, i32 0), i32 1, i32 0)
   // CHECK-DAG:   [[BPR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[PR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%[^,]+]], i32 0, i32 0
 
@@ -195,7 +195,7 @@ int foo(int n) {
   // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20
   // CHECK:       br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
   // CHECK:       [[TRY]]
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i32* getelementptr inbounds ([9 x i32], [9 x i32]* [[MAPT4]], i32 0, i32 0))
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i32* getelementptr inbounds ([9 x i32], [9 x i32]* [[MAPT4]], i32 0, i32 0), i32 1, i32 0)
   // CHECK-DAG:   [[BPR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[PR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[SR]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S:%[^,]+]], i32 0, i32 0
@@ -529,7 +529,7 @@ int bar(int n){
 // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60
 // CHECK:       br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]]
 // CHECK:       [[TRY]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i32* getelementptr inbounds ([5 x i32], [5 x i32]* [[MAPT7]], i32 0, i32 0))
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i32* getelementptr inbounds ([5 x i32], [5 x i32]* [[MAPT7]], i32 0, i32 0), i32 1, i32 0)
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[SR]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S:%.+]], i32 0, i32 0
@@ -592,7 +592,7 @@ int bar(int n){
 // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 50
 // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CHECK:       [[IFTHEN]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 4, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([4 x i[[SZ]]], [4 x i[[SZ]]]* [[SIZET6]], i32 0, i32 0), i32* getelementptr inbounds ([4 x i32], [4 x i32]* [[MAPT6]], i32 0, i32 0))
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 4, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([4 x i[[SZ]]], [4 x i[[SZ]]]* [[SIZET6]], i32 0, i32 0), i32* getelementptr inbounds ([4 x i32], [4 x i32]* [[MAPT6]], i32 0, i32 0), i32 1, i32 0)
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P:%.+]], i32 0, i32 0
 
@@ -644,7 +644,7 @@ int bar(int n){
 // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 40
 // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CHECK:       [[IFTHEN]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET5]], i32 0, i32 0), i32* getelementptr inbounds ([3 x i32], [3 x i32]* [[MAPT5]], i32 0, i32 0))
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET5]], i32 0, i32 0), i32* getelementptr inbounds ([3 x i32], [3 x i32]* [[MAPT5]], i32 0, i32 0), i32 1, i32 0)
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P:%.+]], i32 0, i32 0
 

Modified: cfe/trunk/test/OpenMP/target_parallel_if_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/target_parallel_if_codegen.cpp?rev=292997&r1=292996&r2=292997&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/target_parallel_if_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/target_parallel_if_codegen.cpp Tue Jan 24 18:57:16 2017
@@ -145,7 +145,7 @@ int bar(int n){
 // CHECK:       store i8 [[FB]], i8* [[CONV]], align
 // CHECK:       [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 3,
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i32 1, i32 0)
 // CHECK:       store i32 [[RET]], i32* [[RHV:%.+]], align
 // CHECK:       [[RET2:%.+]] = load i32, i32* [[RHV]], align
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0
@@ -173,7 +173,7 @@ int bar(int n){
 // CHECK:       br i1 [[CMP]], label {{%?}}[[IF_THEN:.+]], label {{%?}}[[IF_ELSE:.+]]
 //
 // CHECK:       [[IF_THEN]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 2,
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, {{.*}}, i32 1, i32 0)
 // CHECK:       store i32 [[RET]], i32* [[RHV:%.+]], align
 // CHECK:       br label {{%?}}[[END:.+]]
 //
@@ -215,7 +215,7 @@ int bar(int n){
 // CHECK:       br i1 [[TB]], label {{%?}}[[IF_THEN:.+]], label {{%?}}[[IF_ELSE:.+]]
 //
 // CHECK:       [[IF_THEN]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 1,
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 1, i32 0)
 // CHECK:       store i32 [[RET]], i32* [[RHV:%.+]], align
 // CHECK:       br label {{%?}}[[END:.+]]
 //
@@ -241,7 +241,7 @@ int bar(int n){
 // CHECK:       br i1 [[CMP]], label {{%?}}[[IF_THEN:.+]], label {{%?}}[[IF_ELSE:.+]]
 //
 // CHECK:       [[IF_THEN]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 0,
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 0, {{.*}}, i32 1, i32 0)
 // CHECK:       store i32 [[RET]], i32* [[RHV:%.+]], align
 // CHECK:       br label {{%?}}[[END:.+]]
 //
@@ -267,7 +267,7 @@ int bar(int n){
 //
 // CHECK: define {{.*}}[[FTEMPLATE]]
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 1,
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 1, i32 0)
 // CHECK-NEXT:  store i32 [[RET]], i32* [[RHV:%.+]], align
 // CHECK-NEXT:  [[RET2:%.+]] = load i32, i32* [[RHV]], align
 // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0
@@ -281,7 +281,7 @@ int bar(int n){
 //
 //
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 2,
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, {{.*}}, i32 1, i32 0)
 // CHECK-NEXT:  store i32 [[RET]], i32* [[RHV:%.+]], align
 // CHECK-NEXT:  [[RET2:%.+]] = load i32, i32* [[RHV]], align
 // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0

Added: cfe/trunk/test/OpenMP/target_parallel_num_threads_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/target_parallel_num_threads_codegen.cpp?rev=292997&view=auto
==============================================================================
--- cfe/trunk/test/OpenMP/target_parallel_num_threads_codegen.cpp (added)
+++ cfe/trunk/test/OpenMP/target_parallel_num_threads_codegen.cpp Tue Jan 24 18:57:16 2017
@@ -0,0 +1,344 @@
+// Test host codegen.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+
+// Test target codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK-DAG: %ident_t = type { i32, i32, i32, i32, i8* }
+// CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
+// CHECK-DAG: [[DEF_LOC:@.+]] = private unnamed_addr constant %ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
+
+// CHECK-DAG: [[S1:%.+]] = type { double }
+// CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 }
+// CHECK-DAG: [[DEVTY:%.+]] = type { i8*, i8*, [[ENTTY]]*, [[ENTTY]]* }
+// CHECK-DAG: [[DSCTY:%.+]] = type { i32, [[DEVTY]]*, [[ENTTY]]*, [[ENTTY]]* }
+
+// TCHECK: [[ENTTY:%.+]] = type { i8*, i8*, i{{32|64}}, i32, i32 }
+
+// We have 6 target regions
+
+// CHECK-DAG: @{{.*}} = private constant i8 0
+// CHECK-DAG: @{{.*}} = private constant i8 0
+// CHECK-DAG: @{{.*}} = private constant i8 0
+// CHECK-DAG: @{{.*}} = private constant i8 0
+// CHECK-DAG: @{{.*}} = private constant i8 0
+// CHECK-DAG: @{{.*}} = private constant i8 0
+
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+// TCHECK: @{{.+}} = constant [[ENTTY]]
+
+// Check if offloading descriptor is created.
+// CHECK: [[ENTBEGIN:@.+]] = external constant [[ENTTY]]
+// CHECK: [[ENTEND:@.+]] = external constant [[ENTTY]]
+// CHECK: [[DEVBEGIN:@.+]] = external constant i8
+// CHECK: [[DEVEND:@.+]] = external constant i8
+// CHECK: [[IMAGES:@.+]] = internal unnamed_addr constant [1 x [[DEVTY]]] [{{.+}} { i8* [[DEVBEGIN]], i8* [[DEVEND]], [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }]
+// CHECK: [[DESC:@.+]] = internal constant [[DSCTY]] { i32 1, [[DEVTY]]* getelementptr inbounds ([1 x [[DEVTY]]], [1 x [[DEVTY]]]* [[IMAGES]], i32 0, i32 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }
+
+// Check target registration is registered as a Ctor.
+// CHECK: appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 0, void ()* bitcast (void (i8*)* [[REGFN:@.+]] to void ()*), i8* null }]
+
+
+template<typename tx>
+tx ftemplate(int n) {
+  tx a = 0;
+
+  #pragma omp target parallel num_threads(tx(20))
+  {
+  }
+
+  short b = 1;
+  #pragma omp target parallel num_threads(b)
+  {
+    a += b;
+  }
+
+  return a;
+}
+
+static
+int fstatic(int n) {
+
+  #pragma omp target parallel num_threads(n)
+  {
+  }
+
+  #pragma omp target parallel num_threads(32+n)
+  {
+  }
+
+  return n+1;
+}
+
+struct S1 {
+  double a;
+
+  int r1(int n){
+    int b = 1;
+
+    #pragma omp target parallel num_threads(n-b)
+    {
+      this->a = (double)b + 1.5;
+    }
+
+    #pragma omp target parallel num_threads(1024)
+    {
+      this->a = 2.5;
+    }
+
+    return (int)a;
+  }
+};
+
+// CHECK: define {{.*}}@{{.*}}bar{{.*}}
+int bar(int n){
+  int a = 0;
+
+  S1 S;
+  // CHECK: call {{.*}}i32 [[FS1:@.+]]([[S1]]* {{.*}}, i32 {{.*}})
+  a += S.r1(n);
+
+  // CHECK: call {{.*}}i32 [[FSTATIC:@.+]](i32 {{.*}})
+  a += fstatic(n);
+
+  // CHECK: call {{.*}}i32 [[FTEMPLATE:@.+]](i32 {{.*}})
+  a += ftemplate<int>(n);
+
+  return a;
+}
+
+
+
+//
+// CHECK: define {{.*}}[[FS1]]([[S1]]* {{%.+}}, i32 {{[^%]*}}[[PARM:%.+]])
+//
+// CHECK-DAG:   store i32 [[PARM]], i32* [[N_ADDR:%.+]], align
+// CHECK:       store i32 1, i32* [[B:%.+]], align
+// CHECK:       [[NV:%.+]] = load i32, i32* [[N_ADDR]], align
+// CHECK:       [[BV:%.+]] = load i32, i32* [[B]], align
+// CHECK:       [[SUB:%.+]] = sub nsw i32 [[NV]], [[BV]]
+// CHECK:       store i32 [[SUB]], i32* [[CAPE_ADDR:%.+]], align
+// CHECK:       [[CEV:%.+]] = load i32, i32* [[CAPE_ADDR]], align
+// CHECK-64:    [[CONV:%.+]] = bitcast i[[SZ]]* [[CAPEC_ADDR:%.+]] to i32*
+// CHECK-64:    store i32 [[CEV]], i32* [[CONV]], align
+// CHECK-32:    store i32 [[CEV]], i32* [[CAPEC_ADDR:%.+]], align
+// CHECK:       [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align
+// CHECK:       [[THREADS:%.+]] = load i32, i32* [[CAPE_ADDR]], align
+//
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i32 1, i32 [[THREADS]])
+// CHECK:       store i32 [[RET]], i32* [[RHV:%.+]], align
+// CHECK:       [[RET2:%.+]] = load i32, i32* [[RHV]], align
+// CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0
+// CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
+//
+// CHECK:       [[FAIL]]
+// CHECK:       call void [[HVT1:@.+]]([[S1]]* {{%.+}}, i[[SZ]] {{%.+}}, i[[SZ]] [[ARG]])
+// CHECK:       br label {{%?}}[[END]]
+// CHECK:       [[END]]
+//
+//
+//
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, {{.+}}, i32 1, i32 1024)
+// CHECK:       store i32 [[RET]], i32* [[RHV:%.+]], align
+// CHECK:       [[RET2:%.+]] = load i32, i32* [[RHV]], align
+// CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0
+// CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
+//
+// CHECK:       [[FAIL]]
+// CHECK:       call void [[HVT2:@.+]]([[S1]]* {{[^,]+}})
+// CHECK:       br label {{%?}}[[END]]
+// CHECK:       [[END]]
+//
+
+
+
+
+
+
+//
+// CHECK: define {{.*}}[[FSTATIC]](i32 {{[^%]*}}[[PARM:%.+]])
+//
+// CHECK-DAG:   store i32 [[PARM]], i32* [[N_ADDR:%.+]], align
+// CHECK:       [[NV:%.+]] = load i32, i32* [[N_ADDR]], align
+// CHECK:       store i32 [[NV]], i32* [[CAPE_ADDR:%.+]], align
+// CHECK:       [[CEV:%.+]] = load i32, i32* [[CAPE_ADDR]], align
+// CHECK-64:    [[CONV:%.+]] = bitcast i[[SZ]]* [[CAPEC_ADDR:%.+]] to i32*
+// CHECK-64:    store i32 [[CEV]], i32* [[CONV]], align
+// CHECK-32:    store i32 [[CEV]], i32* [[CAPEC_ADDR:%.+]], align
+// CHECK:       [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align
+// CHECK:       [[THREADS:%.+]] = load i32, i32* [[CAPE_ADDR]], align
+//
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 1, i32 [[THREADS]])
+// CHECK:       store i32 [[RET]], i32* [[RHV:%.+]], align
+// CHECK:       [[RET2:%.+]] = load i32, i32* [[RHV]], align
+// CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0
+// CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
+//
+// CHECK:       [[FAIL]]
+// CHECK:       call void [[HVT3:@.+]](i[[SZ]] [[ARG]])
+// CHECK:       br label {{%?}}[[END]]
+// CHECK:       [[END]]
+//
+//
+//
+// CHECK:       [[NV:%.+]] = load i32, i32* [[N_ADDR]], align
+// CHECK:       [[ADD:%.+]] = add nsw i32 32, [[NV]]
+// CHECK:       store i32 [[ADD]], i32* [[CAPE_ADDR:%.+]], align
+// CHECK:       [[CEV:%.+]] = load i32, i32* [[CAPE_ADDR]], align
+// CHECK-64:    [[CONV:%.+]] = bitcast i[[SZ]]* [[CAPEC_ADDR:%.+]] to i32*
+// CHECK-64:    store i32 [[CEV]], i32* [[CONV]], align
+// CHECK-32:    store i32 [[CEV]], i32* [[CAPEC_ADDR:%.+]], align
+// CHECK:       [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align
+// CHECK:       [[THREADS:%.+]] = load i32, i32* [[CAPE_ADDR]], align
+//
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 1, i32 [[THREADS]])
+// CHECK:       store i32 [[RET]], i32* [[RHV:%.+]], align
+// CHECK:       [[RET2:%.+]] = load i32, i32* [[RHV]], align
+// CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0
+// CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
+//
+// CHECK:       [[FAIL]]
+// CHECK:       call void [[HVT4:@.+]](i[[SZ]] [[ARG]])
+// CHECK:       br label {{%?}}[[END]]
+// CHECK:       [[END]]
+//
+
+
+
+
+
+
+//
+// CHECK: define {{.*}}[[FTEMPLATE]]
+//
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 0, {{.*}}, i32 1, i32 20)
+// CHECK-NEXT:  store i32 [[RET]], i32* [[RHV:%.+]], align
+// CHECK-NEXT:  [[RET2:%.+]] = load i32, i32* [[RHV]], align
+// CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0
+// CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
+//
+// CHECK:       [[FAIL]]
+// CHECK:       call void [[HVT5:@.+]]()
+// CHECK:       br label {{%?}}[[END]]
+//
+// CHECK:       [[END]]
+//
+//
+//
+// CHECK:       store i16 1, i16* [[B:%.+]], align
+// CHECK:       [[BV:%.+]] = load i16, i16* [[B]], align
+// CHECK:       store i16 [[BV]], i16* [[CAPE_ADDR:%.+]], align
+// CHECK:       [[CEV:%.+]] = load i16, i16* [[CAPE_ADDR]], align
+// CHECK:       [[CONV:%.+]] = bitcast i[[SZ]]* [[CAPEC_ADDR:%.+]] to i16*
+// CHECK:       store i16 [[CEV]], i16* [[CONV]], align
+// CHECK:       [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align
+// CHECK:       [[T:%.+]] = load i16, i16* [[CAPE_ADDR]], align
+// CHECK:       [[THREADS:%.+]] = sext i16 [[T]] to i32
+//
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i32 1, i32 [[THREADS]])
+// CHECK:       store i32 [[RET]], i32* [[RHV:%.+]], align
+// CHECK:       [[RET2:%.+]] = load i32, i32* [[RHV]], align
+// CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0
+// CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
+//
+// CHECK:       [[FAIL]]
+// CHECK:       call void [[HVT6:@.+]](i[[SZ]] {{%.+}}, i[[SZ]] {{%.+}}, i[[SZ]] [[ARG]])
+// CHECK:       br label {{%?}}[[END]]
+// CHECK:       [[END]]
+//
+
+
+
+
+
+
+// Check that the offloading functions are emitted and that the parallel function
+// is appropriately guarded.
+
+// CHECK:       define internal void [[HVT1]]([[S1]]* {{%.+}}, i[[SZ]] [[PARM1:%.+]], i[[SZ]] [[PARM2:%.+]])
+// CHECK-DAG:   store i[[SZ]] [[PARM2]], i[[SZ]]* [[CAPE_ADDR:%.+]], align
+// CHECK-64:    [[CONV:%.+]] = bitcast i[[SZ]]* [[CAPE_ADDR]] to i32*
+// CHECK-64:    [[NT:%.+]] = load i32, i32* [[CONV]], align
+// CHECK-32:    [[NT:%.+]] = load i32, i32* [[CAPE_ADDR]], align
+// CHECK:       call void @__kmpc_push_num_threads(%ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 [[NT]])
+// CHECK:       call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 2,
+//
+//
+
+
+// CHECK:       define internal void [[HVT2]]([[S1]]* {{%.+}})
+// CHECK:       call void @__kmpc_push_num_threads(%ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 1024)
+// CHECK:       call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 1,
+//
+//
+
+
+
+
+
+
+
+
+// CHECK:       define internal void [[HVT3]](i[[SZ]] [[PARM:%.+]])
+// CHECK-DAG:   store i[[SZ]] [[PARM]], i[[SZ]]* [[CAPE_ADDR:%.+]], align
+// CHECK-64:    [[CONV:%.+]] = bitcast i[[SZ]]* [[CAPE_ADDR]] to i32*
+// CHECK-64:    [[NT:%.+]] = load i32, i32* [[CONV]], align
+// CHECK-32:    [[NT:%.+]] = load i32, i32* [[CAPE_ADDR]], align
+// CHECK:       call void @__kmpc_push_num_threads(%ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 [[NT]])
+// CHECK:       call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 0,
+//
+//
+// CHECK:       define internal void [[HVT4]](i[[SZ]] [[PARM:%.+]])
+// CHECK-DAG:   store i[[SZ]] [[PARM]], i[[SZ]]* [[CAPE_ADDR:%.+]], align
+// CHECK-64:    [[CONV:%.+]] = bitcast i[[SZ]]* [[CAPE_ADDR]] to i32*
+// CHECK-64:    [[NT:%.+]] = load i32, i32* [[CONV]], align
+// CHECK-32:    [[NT:%.+]] = load i32, i32* [[CAPE_ADDR]], align
+// CHECK:       call void @__kmpc_push_num_threads(%ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 [[NT]])
+// CHECK:       call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 0,
+//
+//
+
+
+
+
+
+// CHECK:       define internal void [[HVT5]](
+// CHECK:       call void @__kmpc_push_num_threads(%ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 20)
+// CHECK:       call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 0,
+//
+//
+
+
+// CHECK:       define internal void [[HVT6]](i[[SZ]] [[PARM1:%.+]], i[[SZ]] [[PARM2:%.+]], i[[SZ]] [[PARM3:%.+]])
+// CHECK-DAG:   store i[[SZ]] [[PARM3]], i[[SZ]]* [[CAPE_ADDR:%.+]], align
+// CHECK:       [[CONV:%.+]] = bitcast i[[SZ]]* [[CAPE_ADDR]] to i16*
+// CHECK:       [[T:%.+]] = load i16, i16* [[CONV]], align
+// CHECK:       [[NT:%.+]] = sext i16 [[T]] to i32
+// CHECK:       call void @__kmpc_push_num_threads(%ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 [[NT]])
+// CHECK:       call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 2,
+//
+//
+
+
+
+#endif

Modified: cfe/trunk/tools/libclang/CIndex.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/tools/libclang/CIndex.cpp?rev=292997&r1=292996&r2=292997&view=diff
==============================================================================
--- cfe/trunk/tools/libclang/CIndex.cpp (original)
+++ cfe/trunk/tools/libclang/CIndex.cpp Tue Jan 24 18:57:16 2017
@@ -2113,6 +2113,7 @@ void OMPClauseEnqueue::VisitOMPFinalClau
 }
 
 void OMPClauseEnqueue::VisitOMPNumThreadsClause(const OMPNumThreadsClause *C) {
+  VisitOMPClauseWithPreInit(C);
   Visitor->AddStmt(C->getNumThreads());
 }
 




More information about the cfe-commits mailing list