[clang] 14a388f - [OPENMP50]Add support for parallel master taskloop simd directive.

Alexey Bataev via cfe-commits cfe-commits at lists.llvm.org
Wed Oct 30 07:29:43 PDT 2019


Author: Alexey Bataev
Date: 2019-10-30T10:23:33-04:00
New Revision: 14a388f43bbdfac304a1a117731ac5a27fdab61e

URL: https://github.com/llvm/llvm-project/commit/14a388f43bbdfac304a1a117731ac5a27fdab61e
DIFF: https://github.com/llvm/llvm-project/commit/14a388f43bbdfac304a1a117731ac5a27fdab61e.diff

LOG: [OPENMP50]Add support for parallel master taskloop simd directive.

Added full support for parallel master taskloop simd directive.

Added: 
    clang/test/OpenMP/parallel_master_taskloop_simd_aligned_messages.cpp
    clang/test/OpenMP/parallel_master_taskloop_simd_ast_print.cpp
    clang/test/OpenMP/parallel_master_taskloop_simd_codegen.cpp
    clang/test/OpenMP/parallel_master_taskloop_simd_collapse_messages.cpp
    clang/test/OpenMP/parallel_master_taskloop_simd_final_messages.cpp
    clang/test/OpenMP/parallel_master_taskloop_simd_firstprivate_codegen.cpp
    clang/test/OpenMP/parallel_master_taskloop_simd_firstprivate_messages.cpp
    clang/test/OpenMP/parallel_master_taskloop_simd_grainsize_messages.cpp
    clang/test/OpenMP/parallel_master_taskloop_simd_lastprivate_codegen.cpp
    clang/test/OpenMP/parallel_master_taskloop_simd_lastprivate_messages.cpp
    clang/test/OpenMP/parallel_master_taskloop_simd_linear_messages.cpp
    clang/test/OpenMP/parallel_master_taskloop_simd_loop_messages.cpp
    clang/test/OpenMP/parallel_master_taskloop_simd_misc_messages.c
    clang/test/OpenMP/parallel_master_taskloop_simd_num_tasks_messages.cpp
    clang/test/OpenMP/parallel_master_taskloop_simd_priority_messages.cpp
    clang/test/OpenMP/parallel_master_taskloop_simd_private_codegen.cpp
    clang/test/OpenMP/parallel_master_taskloop_simd_private_messages.cpp
    clang/test/OpenMP/parallel_master_taskloop_simd_reduction_codegen.cpp
    clang/test/OpenMP/parallel_master_taskloop_simd_reduction_messages.cpp
    clang/test/OpenMP/parallel_master_taskloop_simd_safelen_messages.cpp
    clang/test/OpenMP/parallel_master_taskloop_simd_simdlen_messages.cpp

Modified: 
    clang/include/clang-c/Index.h
    clang/include/clang/AST/RecursiveASTVisitor.h
    clang/include/clang/AST/StmtOpenMP.h
    clang/include/clang/Basic/OpenMPKinds.def
    clang/include/clang/Basic/OpenMPKinds.h
    clang/include/clang/Basic/StmtNodes.td
    clang/include/clang/Sema/Sema.h
    clang/include/clang/Serialization/ASTBitCodes.h
    clang/lib/AST/StmtOpenMP.cpp
    clang/lib/AST/StmtPrinter.cpp
    clang/lib/AST/StmtProfile.cpp
    clang/lib/Basic/OpenMPKinds.cpp
    clang/lib/CodeGen/CGOpenMPRuntime.cpp
    clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
    clang/lib/CodeGen/CGStmt.cpp
    clang/lib/CodeGen/CGStmtOpenMP.cpp
    clang/lib/CodeGen/CodeGenFunction.h
    clang/lib/Parse/ParseOpenMP.cpp
    clang/lib/Sema/SemaOpenMP.cpp
    clang/lib/Sema/TreeTransform.h
    clang/lib/Serialization/ASTReaderStmt.cpp
    clang/lib/Serialization/ASTWriterStmt.cpp
    clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
    clang/tools/libclang/CIndex.cpp
    clang/tools/libclang/CXCursor.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang-c/Index.h b/clang/include/clang-c/Index.h
index 226893505437..438ee3698a1a 100644
--- a/clang/include/clang-c/Index.h
+++ b/clang/include/clang-c/Index.h
@@ -2567,8 +2567,11 @@ enum CXCursorKind {
    */
   CXCursor_OMPMasterTaskLoopSimdDirective      = 283,
 
+  /** OpenMP parallel master taskloop simd directive.
+   */
+  CXCursor_OMPParallelMasterTaskLoopSimdDirective      = 284,
 
-  CXCursor_LastStmt = CXCursor_OMPMasterTaskLoopSimdDirective,
+  CXCursor_LastStmt = CXCursor_OMPParallelMasterTaskLoopSimdDirective,
 
   /**
    * Cursor that represents the translation unit itself.

diff  --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h
index 5b58eab95d60..86059842da65 100644
--- a/clang/include/clang/AST/RecursiveASTVisitor.h
+++ b/clang/include/clang/AST/RecursiveASTVisitor.h
@@ -2814,6 +2814,9 @@ DEF_TRAVERSE_STMT(OMPMasterTaskLoopSimdDirective,
 DEF_TRAVERSE_STMT(OMPParallelMasterTaskLoopDirective,
                   { TRY_TO(TraverseOMPExecutableDirective(S)); })
 
+DEF_TRAVERSE_STMT(OMPParallelMasterTaskLoopSimdDirective,
+                  { TRY_TO(TraverseOMPExecutableDirective(S)); })
+
 DEF_TRAVERSE_STMT(OMPDistributeDirective,
                   { TRY_TO(TraverseOMPExecutableDirective(S)); })
 

diff  --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h
index ddfb3060b158..722aa509b132 100644
--- a/clang/include/clang/AST/StmtOpenMP.h
+++ b/clang/include/clang/AST/StmtOpenMP.h
@@ -1167,6 +1167,7 @@ class OMPLoopDirective : public OMPExecutableDirective {
            T->getStmtClass() == OMPMasterTaskLoopDirectiveClass ||
            T->getStmtClass() == OMPMasterTaskLoopSimdDirectiveClass ||
            T->getStmtClass() == OMPParallelMasterTaskLoopDirectiveClass ||
+           T->getStmtClass() == OMPParallelMasterTaskLoopSimdDirectiveClass ||
            T->getStmtClass() == OMPDistributeDirectiveClass ||
            T->getStmtClass() == OMPTargetParallelForDirectiveClass ||
            T->getStmtClass() == OMPDistributeParallelForDirectiveClass ||
@@ -3327,6 +3328,76 @@ class OMPParallelMasterTaskLoopDirective : public OMPLoopDirective {
   }
 };
 
+/// This represents '#pragma omp parallel master taskloop simd' directive.
+///
+/// \code
+/// #pragma omp parallel master taskloop simd private(a,b) grainsize(val)
+/// num_tasks(num)
+/// \endcode
+/// In this example directive '#pragma omp parallel master taskloop simd' has
+/// clauses 'private' with the variables 'a' and 'b', 'grainsize' with
+/// expression 'val' and 'num_tasks' with expression 'num'.
+///
+class OMPParallelMasterTaskLoopSimdDirective : public OMPLoopDirective {
+  friend class ASTStmtReader;
+  /// Build directive with the given start and end location.
+  ///
+  /// \param StartLoc Starting location of the directive kind.
+  /// \param EndLoc Ending location of the directive.
+  /// \param CollapsedNum Number of collapsed nested loops.
+  /// \param NumClauses Number of clauses.
+  ///
+  OMPParallelMasterTaskLoopSimdDirective(SourceLocation StartLoc,
+                                         SourceLocation EndLoc,
+                                         unsigned CollapsedNum,
+                                         unsigned NumClauses)
+      : OMPLoopDirective(this, OMPParallelMasterTaskLoopSimdDirectiveClass,
+                         OMPD_parallel_master_taskloop_simd, StartLoc, EndLoc,
+                         CollapsedNum, NumClauses) {}
+
+  /// Build an empty directive.
+  ///
+  /// \param CollapsedNum Number of collapsed nested loops.
+  /// \param NumClauses Number of clauses.
+  ///
+  explicit OMPParallelMasterTaskLoopSimdDirective(unsigned CollapsedNum,
+                                                  unsigned NumClauses)
+      : OMPLoopDirective(this, OMPParallelMasterTaskLoopSimdDirectiveClass,
+                         OMPD_parallel_master_taskloop_simd, SourceLocation(),
+                         SourceLocation(), CollapsedNum, NumClauses) {}
+
+public:
+  /// Creates directive with a list of \p Clauses.
+  ///
+  /// \param C AST context.
+  /// \param StartLoc Starting location of the directive kind.
+  /// \param EndLoc Ending Location of the directive.
+  /// \param CollapsedNum Number of collapsed loops.
+  /// \param Clauses List of clauses.
+  /// \param AssociatedStmt Statement, associated with the directive.
+  /// \param Exprs Helper expressions for CodeGen.
+  ///
+  static OMPParallelMasterTaskLoopSimdDirective *
+  Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+         unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses,
+         Stmt *AssociatedStmt, const HelperExprs &Exprs);
+
+  /// Creates an empty directive with the place
+  /// for \a NumClauses clauses.
+  ///
+  /// \param C AST context.
+  /// \param CollapsedNum Number of collapsed nested loops.
+  /// \param NumClauses Number of clauses.
+  ///
+  static OMPParallelMasterTaskLoopSimdDirective *
+  CreateEmpty(const ASTContext &C, unsigned NumClauses, unsigned CollapsedNum,
+              EmptyShell);
+
+  static bool classof(const Stmt *T) {
+    return T->getStmtClass() == OMPParallelMasterTaskLoopSimdDirectiveClass;
+  }
+};
+
 /// This represents '#pragma omp distribute' directive.
 ///
 /// \code

diff  --git a/clang/include/clang/Basic/OpenMPKinds.def b/clang/include/clang/Basic/OpenMPKinds.def
index ff8f07aa5def..aac688deb50f 100644
--- a/clang/include/clang/Basic/OpenMPKinds.def
+++ b/clang/include/clang/Basic/OpenMPKinds.def
@@ -101,6 +101,9 @@
 #ifndef OPENMP_PARALLEL_MASTER_TASKLOOP_CLAUSE
 #  define OPENMP_PARALLEL_MASTER_TASKLOOP_CLAUSE(Name)
 #endif
+#ifndef OPENMP_PARALLEL_MASTER_TASKLOOP_SIMD_CLAUSE
+#  define OPENMP_PARALLEL_MASTER_TASKLOOP_SIMD_CLAUSE(Name)
+#endif
 #ifndef OPENMP_CRITICAL_CLAUSE
 #  define OPENMP_CRITICAL_CLAUSE(Name)
 #endif
@@ -270,6 +273,7 @@ OPENMP_DIRECTIVE_EXT(declare_variant, "declare variant")
 OPENMP_DIRECTIVE_EXT(master_taskloop, "master taskloop")
 OPENMP_DIRECTIVE_EXT(parallel_master_taskloop, "parallel master taskloop")
 OPENMP_DIRECTIVE_EXT(master_taskloop_simd, "master taskloop simd")
+OPENMP_DIRECTIVE_EXT(parallel_master_taskloop_simd, "parallel master taskloop simd")
 
 // OpenMP clauses.
 OPENMP_CLAUSE(allocator, OMPAllocatorClause)
@@ -741,6 +745,31 @@ OPENMP_PARALLEL_MASTER_TASKLOOP_CLAUSE(num_threads)
 OPENMP_PARALLEL_MASTER_TASKLOOP_CLAUSE(proc_bind)
 OPENMP_PARALLEL_MASTER_TASKLOOP_CLAUSE(copyin)
 
+// Clauses allowed for OpenMP directive 'parallel master taskloop simd'.
+OPENMP_PARALLEL_MASTER_TASKLOOP_SIMD_CLAUSE(if)
+OPENMP_PARALLEL_MASTER_TASKLOOP_SIMD_CLAUSE(shared)
+OPENMP_PARALLEL_MASTER_TASKLOOP_SIMD_CLAUSE(private)
+OPENMP_PARALLEL_MASTER_TASKLOOP_SIMD_CLAUSE(firstprivate)
+OPENMP_PARALLEL_MASTER_TASKLOOP_SIMD_CLAUSE(lastprivate)
+OPENMP_PARALLEL_MASTER_TASKLOOP_SIMD_CLAUSE(default)
+OPENMP_PARALLEL_MASTER_TASKLOOP_SIMD_CLAUSE(collapse)
+OPENMP_PARALLEL_MASTER_TASKLOOP_SIMD_CLAUSE(final)
+OPENMP_PARALLEL_MASTER_TASKLOOP_SIMD_CLAUSE(untied)
+OPENMP_PARALLEL_MASTER_TASKLOOP_SIMD_CLAUSE(mergeable)
+OPENMP_PARALLEL_MASTER_TASKLOOP_SIMD_CLAUSE(priority)
+OPENMP_PARALLEL_MASTER_TASKLOOP_SIMD_CLAUSE(grainsize)
+OPENMP_PARALLEL_MASTER_TASKLOOP_SIMD_CLAUSE(nogroup)
+OPENMP_PARALLEL_MASTER_TASKLOOP_SIMD_CLAUSE(num_tasks)
+OPENMP_PARALLEL_MASTER_TASKLOOP_SIMD_CLAUSE(reduction)
+OPENMP_PARALLEL_MASTER_TASKLOOP_SIMD_CLAUSE(allocate)
+OPENMP_PARALLEL_MASTER_TASKLOOP_SIMD_CLAUSE(num_threads)
+OPENMP_PARALLEL_MASTER_TASKLOOP_SIMD_CLAUSE(proc_bind)
+OPENMP_PARALLEL_MASTER_TASKLOOP_SIMD_CLAUSE(copyin)
+OPENMP_PARALLEL_MASTER_TASKLOOP_SIMD_CLAUSE(linear)
+OPENMP_PARALLEL_MASTER_TASKLOOP_SIMD_CLAUSE(aligned)
+OPENMP_PARALLEL_MASTER_TASKLOOP_SIMD_CLAUSE(safelen)
+OPENMP_PARALLEL_MASTER_TASKLOOP_SIMD_CLAUSE(simdlen)
+
 // Clauses allowed for OpenMP directive 'critical'.
 OPENMP_CRITICAL_CLAUSE(hint)
 
@@ -1053,6 +1082,7 @@ OPENMP_MATCH_KIND(implementation)
 #undef OPENMP_ALLOCATE_CLAUSE
 #undef OPENMP_DECLARE_MAPPER_CLAUSE
 #undef OPENMP_TASKGROUP_CLAUSE
+#undef OPENMP_PARALLEL_MASTER_TASKLOOP_SIMD_CLAUSE
 #undef OPENMP_PARALLEL_MASTER_TASKLOOP_CLAUSE
 #undef OPENMP_MASTER_TASKLOOP_SIMD_CLAUSE
 #undef OPENMP_MASTER_TASKLOOP_CLAUSE

diff  --git a/clang/include/clang/Basic/OpenMPKinds.h b/clang/include/clang/Basic/OpenMPKinds.h
index 4129cca0fe68..8c0171eb8229 100644
--- a/clang/include/clang/Basic/OpenMPKinds.h
+++ b/clang/include/clang/Basic/OpenMPKinds.h
@@ -269,8 +269,8 @@ bool isOpenMPPrivate(OpenMPClauseKind Kind);
 bool isOpenMPThreadPrivate(OpenMPClauseKind Kind);
 
 /// Checks if the specified directive kind is one of tasking directives - task,
-/// taskloop, taksloop simd, master taskloop, parallel master taskloop or master
-/// taskloop simd.
+/// taskloop, taksloop simd, master taskloop, parallel master taskloop, master
+/// taskloop simd, or parallel master taskloop simd.
 bool isOpenMPTaskingDirective(OpenMPDirectiveKind Kind);
 
 /// Checks if the specified directive kind is one of the composite or combined

diff  --git a/clang/include/clang/Basic/StmtNodes.td b/clang/include/clang/Basic/StmtNodes.td
index 18b46e6aa103..81ab16a83aed 100644
--- a/clang/include/clang/Basic/StmtNodes.td
+++ b/clang/include/clang/Basic/StmtNodes.td
@@ -247,6 +247,7 @@ def OMPTaskLoopSimdDirective : StmtNode<OMPLoopDirective>;
 def OMPMasterTaskLoopDirective : StmtNode<OMPLoopDirective>;
 def OMPMasterTaskLoopSimdDirective : StmtNode<OMPLoopDirective>;
 def OMPParallelMasterTaskLoopDirective : StmtNode<OMPLoopDirective>;
+def OMPParallelMasterTaskLoopSimdDirective : StmtNode<OMPLoopDirective>;
 def OMPDistributeDirective : StmtNode<OMPLoopDirective>;
 def OMPDistributeParallelForDirective : StmtNode<OMPLoopDirective>;
 def OMPDistributeParallelForSimdDirective : StmtNode<OMPLoopDirective>;

diff  --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index a505ab18d686..60f02f5bfbb4 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -9675,6 +9675,11 @@ class Sema {
   StmtResult ActOnOpenMPParallelMasterTaskLoopDirective(
       ArrayRef<OMPClause *> Clauses, Stmt *AStmt, SourceLocation StartLoc,
       SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA);
+  /// Called on well-formed '\#pragma omp parallel master taskloop simd' after
+  /// parsing of the associated statement.
+  StmtResult ActOnOpenMPParallelMasterTaskLoopSimdDirective(
+      ArrayRef<OMPClause *> Clauses, Stmt *AStmt, SourceLocation StartLoc,
+      SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA);
   /// Called on well-formed '\#pragma omp distribute' after parsing
   /// of the associated statement.
   StmtResult

diff  --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h
index f31057275479..52504976692b 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -1971,6 +1971,7 @@ namespace serialization {
       STMT_OMP_MASTER_TASKLOOP_DIRECTIVE,
       STMT_OMP_MASTER_TASKLOOP_SIMD_DIRECTIVE,
       STMT_OMP_PARALLEL_MASTER_TASKLOOP_DIRECTIVE,
+      STMT_OMP_PARALLEL_MASTER_TASKLOOP_SIMD_DIRECTIVE,
       STMT_OMP_DISTRIBUTE_DIRECTIVE,
       STMT_OMP_TARGET_UPDATE_DIRECTIVE,
       STMT_OMP_DISTRIBUTE_PARALLEL_FOR_DIRECTIVE,

diff  --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp
index da1364ebffc4..a93192b4857f 100644
--- a/clang/lib/AST/StmtOpenMP.cpp
+++ b/clang/lib/AST/StmtOpenMP.cpp
@@ -1182,6 +1182,63 @@ OMPParallelMasterTaskLoopDirective::CreateEmpty(const ASTContext &C,
   return new (Mem) OMPParallelMasterTaskLoopDirective(CollapsedNum, NumClauses);
 }
 
+OMPParallelMasterTaskLoopSimdDirective *
+OMPParallelMasterTaskLoopSimdDirective::Create(
+    const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+    unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
+    const HelperExprs &Exprs) {
+  unsigned Size = llvm::alignTo(sizeof(OMPParallelMasterTaskLoopSimdDirective),
+                                alignof(OMPClause *));
+  void *Mem = C.Allocate(
+      Size + sizeof(OMPClause *) * Clauses.size() +
+      sizeof(Stmt *) *
+          numLoopChildren(CollapsedNum, OMPD_parallel_master_taskloop_simd));
+  auto *Dir = new (Mem) OMPParallelMasterTaskLoopSimdDirective(
+      StartLoc, EndLoc, CollapsedNum, Clauses.size());
+  Dir->setClauses(Clauses);
+  Dir->setAssociatedStmt(AssociatedStmt);
+  Dir->setIterationVariable(Exprs.IterationVarRef);
+  Dir->setLastIteration(Exprs.LastIteration);
+  Dir->setCalcLastIteration(Exprs.CalcLastIteration);
+  Dir->setPreCond(Exprs.PreCond);
+  Dir->setCond(Exprs.Cond);
+  Dir->setInit(Exprs.Init);
+  Dir->setInc(Exprs.Inc);
+  Dir->setIsLastIterVariable(Exprs.IL);
+  Dir->setLowerBoundVariable(Exprs.LB);
+  Dir->setUpperBoundVariable(Exprs.UB);
+  Dir->setStrideVariable(Exprs.ST);
+  Dir->setEnsureUpperBound(Exprs.EUB);
+  Dir->setNextLowerBound(Exprs.NLB);
+  Dir->setNextUpperBound(Exprs.NUB);
+  Dir->setNumIterations(Exprs.NumIterations);
+  Dir->setCounters(Exprs.Counters);
+  Dir->setPrivateCounters(Exprs.PrivateCounters);
+  Dir->setInits(Exprs.Inits);
+  Dir->setUpdates(Exprs.Updates);
+  Dir->setFinals(Exprs.Finals);
+  Dir->setDependentCounters(Exprs.DependentCounters);
+  Dir->setDependentInits(Exprs.DependentInits);
+  Dir->setFinalsConditions(Exprs.FinalsConditions);
+  Dir->setPreInits(Exprs.PreInits);
+  return Dir;
+}
+
+OMPParallelMasterTaskLoopSimdDirective *
+OMPParallelMasterTaskLoopSimdDirective::CreateEmpty(const ASTContext &C,
+                                                    unsigned NumClauses,
+                                                    unsigned CollapsedNum,
+                                                    EmptyShell) {
+  unsigned Size = llvm::alignTo(sizeof(OMPParallelMasterTaskLoopSimdDirective),
+                                alignof(OMPClause *));
+  void *Mem = C.Allocate(
+      Size + sizeof(OMPClause *) * NumClauses +
+      sizeof(Stmt *) *
+          numLoopChildren(CollapsedNum, OMPD_parallel_master_taskloop_simd));
+  return new (Mem)
+      OMPParallelMasterTaskLoopSimdDirective(CollapsedNum, NumClauses);
+}
+
 OMPDistributeDirective *OMPDistributeDirective::Create(
     const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
     unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,

diff  --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp
index 7759ff6c1389..0f92d4c367e9 100644
--- a/clang/lib/AST/StmtPrinter.cpp
+++ b/clang/lib/AST/StmtPrinter.cpp
@@ -841,6 +841,12 @@ void StmtPrinter::VisitOMPParallelMasterTaskLoopDirective(
   PrintOMPExecutableDirective(Node);
 }
 
+void StmtPrinter::VisitOMPParallelMasterTaskLoopSimdDirective(
+    OMPParallelMasterTaskLoopSimdDirective *Node) {
+  Indent() << "#pragma omp parallel master taskloop simd";
+  PrintOMPExecutableDirective(Node);
+}
+
 void StmtPrinter::VisitOMPDistributeDirective(OMPDistributeDirective *Node) {
   Indent() << "#pragma omp distribute";
   PrintOMPExecutableDirective(Node);

diff  --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp
index d1e856538932..6f266cf12949 100644
--- a/clang/lib/AST/StmtProfile.cpp
+++ b/clang/lib/AST/StmtProfile.cpp
@@ -937,6 +937,11 @@ void StmtProfiler::VisitOMPParallelMasterTaskLoopDirective(
   VisitOMPLoopDirective(S);
 }
 
+void StmtProfiler::VisitOMPParallelMasterTaskLoopSimdDirective(
+    const OMPParallelMasterTaskLoopSimdDirective *S) {
+  VisitOMPLoopDirective(S);
+}
+
 void StmtProfiler::VisitOMPDistributeDirective(
     const OMPDistributeDirective *S) {
   VisitOMPLoopDirective(S);

diff  --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp
index a52ed8caa121..75199fbceac2 100644
--- a/clang/lib/Basic/OpenMPKinds.cpp
+++ b/clang/lib/Basic/OpenMPKinds.cpp
@@ -676,6 +676,16 @@ bool clang::isAllowedClauseForDirective(OpenMPDirectiveKind DKind,
 #define OPENMP_PARALLEL_MASTER_TASKLOOP_CLAUSE(Name)                           \
   case OMPC_##Name:                                                            \
     return true;
+#include "clang/Basic/OpenMPKinds.def"
+    default:
+      break;
+    }
+    break;
+  case OMPD_parallel_master_taskloop_simd:
+    switch (CKind) {
+#define OPENMP_PARALLEL_MASTER_TASKLOOP_SIMD_CLAUSE(Name)                      \
+  case OMPC_##Name:                                                            \
+    return true;
 #include "clang/Basic/OpenMPKinds.def"
     default:
       break;
@@ -903,8 +913,9 @@ bool clang::isOpenMPLoopDirective(OpenMPDirectiveKind DKind) {
          DKind == OMPD_parallel_for || DKind == OMPD_parallel_for_simd ||
          DKind == OMPD_taskloop || DKind == OMPD_taskloop_simd ||
          DKind == OMPD_master_taskloop || DKind == OMPD_master_taskloop_simd ||
-         DKind == OMPD_parallel_master_taskloop || DKind == OMPD_distribute ||
-         DKind == OMPD_target_parallel_for ||
+         DKind == OMPD_parallel_master_taskloop ||
+         DKind == OMPD_parallel_master_taskloop_simd ||
+         DKind == OMPD_distribute || DKind == OMPD_target_parallel_for ||
          DKind == OMPD_distribute_parallel_for ||
          DKind == OMPD_distribute_parallel_for_simd ||
          DKind == OMPD_distribute_simd ||
@@ -937,7 +948,8 @@ bool clang::isOpenMPWorksharingDirective(OpenMPDirectiveKind DKind) {
 bool clang::isOpenMPTaskLoopDirective(OpenMPDirectiveKind DKind) {
   return DKind == OMPD_taskloop || DKind == OMPD_taskloop_simd ||
          DKind == OMPD_master_taskloop || DKind == OMPD_master_taskloop_simd ||
-         DKind == OMPD_parallel_master_taskloop;
+         DKind == OMPD_parallel_master_taskloop ||
+         DKind == OMPD_parallel_master_taskloop_simd;
 }
 
 bool clang::isOpenMPParallelDirective(OpenMPDirectiveKind DKind) {
@@ -951,7 +963,8 @@ bool clang::isOpenMPParallelDirective(OpenMPDirectiveKind DKind) {
          DKind == OMPD_teams_distribute_parallel_for_simd ||
          DKind == OMPD_target_teams_distribute_parallel_for ||
          DKind == OMPD_target_teams_distribute_parallel_for_simd ||
-         DKind == OMPD_parallel_master_taskloop;
+         DKind == OMPD_parallel_master_taskloop ||
+         DKind == OMPD_parallel_master_taskloop_simd;
 }
 
 bool clang::isOpenMPTargetExecutionDirective(OpenMPDirectiveKind DKind) {
@@ -988,6 +1001,7 @@ bool clang::isOpenMPSimdDirective(OpenMPDirectiveKind DKind) {
   return DKind == OMPD_simd || DKind == OMPD_for_simd ||
          DKind == OMPD_parallel_for_simd || DKind == OMPD_taskloop_simd ||
          DKind == OMPD_master_taskloop_simd ||
+         DKind == OMPD_parallel_master_taskloop_simd ||
          DKind == OMPD_distribute_parallel_for_simd ||
          DKind == OMPD_distribute_simd || DKind == OMPD_target_simd ||
          DKind == OMPD_teams_distribute_simd ||
@@ -1094,6 +1108,7 @@ void clang::getOpenMPCaptureRegions(
     CaptureRegions.push_back(OMPD_taskloop);
     break;
   case OMPD_parallel_master_taskloop:
+  case OMPD_parallel_master_taskloop_simd:
     CaptureRegions.push_back(OMPD_parallel);
     CaptureRegions.push_back(OMPD_taskloop);
     break;

diff  --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 2a13a2a58156..514457d9e4af 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -6708,6 +6708,7 @@ emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
   case OMPD_master_taskloop:
   case OMPD_master_taskloop_simd:
   case OMPD_parallel_master_taskloop:
+  case OMPD_parallel_master_taskloop_simd:
   case OMPD_requires:
   case OMPD_unknown:
     break;
@@ -7017,6 +7018,7 @@ emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
   case OMPD_master_taskloop:
   case OMPD_master_taskloop_simd:
   case OMPD_parallel_master_taskloop:
+  case OMPD_parallel_master_taskloop_simd:
   case OMPD_requires:
   case OMPD_unknown:
     break;
@@ -8791,6 +8793,7 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
     case OMPD_master_taskloop:
     case OMPD_master_taskloop_simd:
     case OMPD_parallel_master_taskloop:
+    case OMPD_parallel_master_taskloop_simd:
     case OMPD_requires:
     case OMPD_unknown:
       llvm_unreachable("Unexpected directive.");
@@ -9551,6 +9554,7 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
     case OMPD_master_taskloop:
     case OMPD_master_taskloop_simd:
     case OMPD_parallel_master_taskloop:
+    case OMPD_parallel_master_taskloop_simd:
     case OMPD_requires:
     case OMPD_unknown:
       llvm_unreachable("Unknown target directive for OpenMP device codegen.");
@@ -10169,6 +10173,7 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
     case OMPD_master_taskloop:
     case OMPD_master_taskloop_simd:
     case OMPD_parallel_master_taskloop:
+    case OMPD_parallel_master_taskloop_simd:
     case OMPD_target:
     case OMPD_target_simd:
     case OMPD_target_teams_distribute:

diff  --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
index b7808e0c4f69..eab2d7be1aeb 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -806,6 +806,7 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx,
     case OMPD_master_taskloop:
     case OMPD_master_taskloop_simd:
     case OMPD_parallel_master_taskloop:
+    case OMPD_parallel_master_taskloop_simd:
     case OMPD_requires:
     case OMPD_unknown:
       llvm_unreachable("Unexpected directive.");
@@ -880,6 +881,7 @@ static bool supportsSPMDExecutionMode(ASTContext &Ctx,
   case OMPD_master_taskloop:
   case OMPD_master_taskloop_simd:
   case OMPD_parallel_master_taskloop:
+  case OMPD_parallel_master_taskloop_simd:
   case OMPD_requires:
   case OMPD_unknown:
     break;
@@ -1047,6 +1049,7 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx,
     case OMPD_master_taskloop:
     case OMPD_master_taskloop_simd:
     case OMPD_parallel_master_taskloop:
+    case OMPD_parallel_master_taskloop_simd:
     case OMPD_requires:
     case OMPD_unknown:
       llvm_unreachable("Unexpected directive.");
@@ -1127,6 +1130,7 @@ static bool supportsLightweightRuntime(ASTContext &Ctx,
   case OMPD_master_taskloop:
   case OMPD_master_taskloop_simd:
   case OMPD_parallel_master_taskloop:
+  case OMPD_parallel_master_taskloop_simd:
   case OMPD_requires:
   case OMPD_unknown:
     break;

diff  --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index bb2629f89d3d..46fa29fa69bd 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -292,6 +292,10 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs) {
     EmitOMPParallelMasterTaskLoopDirective(
         cast<OMPParallelMasterTaskLoopDirective>(*S));
     break;
+  case Stmt::OMPParallelMasterTaskLoopSimdDirectiveClass:
+    EmitOMPParallelMasterTaskLoopSimdDirective(
+        cast<OMPParallelMasterTaskLoopSimdDirective>(*S));
+    break;
   case Stmt::OMPDistributeDirectiveClass:
     EmitOMPDistributeDirective(cast<OMPDistributeDirective>(*S));
     break;

diff  --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 6ece69d51daf..df4b69e49866 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -5017,8 +5017,10 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
       CGF.incrementProfileCounter(&S);
     }
 
-    if (isOpenMPSimdDirective(S.getDirectiveKind()))
+    if (isOpenMPSimdDirective(S.getDirectiveKind())) {
       CGF.EmitOMPSimdInit(S);
+      (void)CGF.EmitOMPLinearClauseInit(S);
+    }
 
     OMPPrivateScope LoopScope(CGF);
     // Emit helper vars inits.
@@ -5036,6 +5038,7 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
     mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP,
              LoopScope);
     CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
+    CGF.EmitOMPLinearClause(S, LoopScope);
     bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
     (void)LoopScope.Privatize();
     // Emit the loop iteration variable.
@@ -5073,6 +5076,11 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
               CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
               (*LIP)->getType(), S.getBeginLoc())));
     }
+    CGF.EmitOMPLinearClauseFinal(S, [LIP, &S](CodeGenFunction &CGF) {
+      return CGF.Builder.CreateIsNotNull(
+          CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
+                               (*LIP)->getType(), S.getBeginLoc()));
+    });
   };
   auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
                     IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn,
@@ -5147,6 +5155,22 @@ void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective(
                                  emitEmptyBoundParameters);
 }
 
+void CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective(
+    const OMPParallelMasterTaskLoopSimdDirective &S) {
+  auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+    auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
+                                  PrePostActionTy &Action) {
+      Action.Enter(CGF);
+      CGF.EmitOMPTaskLoopBasedDirective(S);
+    };
+    OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
+    CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen,
+                                            S.getBeginLoc());
+  };
+  emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop_simd, CodeGen,
+                                 emitEmptyBoundParameters);
+}
+
 // Generate the instructions for '#pragma omp target update' directive.
 void CodeGenFunction::EmitOMPTargetUpdateDirective(
     const OMPTargetUpdateDirective &S) {

diff  --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 2c20ba4e6b65..5c3d1764fad7 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -3160,6 +3160,8 @@ class CodeGenFunction : public CodeGenTypeCache {
   EmitOMPMasterTaskLoopSimdDirective(const OMPMasterTaskLoopSimdDirective &S);
   void EmitOMPParallelMasterTaskLoopDirective(
       const OMPParallelMasterTaskLoopDirective &S);
+  void EmitOMPParallelMasterTaskLoopSimdDirective(
+      const OMPParallelMasterTaskLoopSimdDirective &S);
   void EmitOMPDistributeDirective(const OMPDistributeDirective &S);
   void EmitOMPDistributeParallelForDirective(
       const OMPDistributeParallelForDirective &S);

diff  --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp
index 91fe10e667db..816e8882510a 100644
--- a/clang/lib/Parse/ParseOpenMP.cpp
+++ b/clang/lib/Parse/ParseOpenMP.cpp
@@ -140,7 +140,9 @@ static OpenMPDirectiveKind parseOpenMPDirectiveKind(Parser &P) {
       {OMPD_master, OMPD_taskloop, OMPD_master_taskloop},
       {OMPD_master_taskloop, OMPD_simd, OMPD_master_taskloop_simd},
       {OMPD_parallel, OMPD_master, OMPD_parallel_master},
-      {OMPD_parallel_master, OMPD_taskloop, OMPD_parallel_master_taskloop}};
+      {OMPD_parallel_master, OMPD_taskloop, OMPD_parallel_master_taskloop},
+      {OMPD_parallel_master_taskloop, OMPD_simd,
+       OMPD_parallel_master_taskloop_simd}};
   enum { CancellationPoint = 0, DeclareReduction = 1, TargetData = 2 };
   Token Tok = P.getCurToken();
   unsigned DKind =
@@ -1511,6 +1513,7 @@ Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirectiveWithExtDecl(
   case OMPD_master_taskloop:
   case OMPD_master_taskloop_simd:
   case OMPD_parallel_master_taskloop:
+  case OMPD_parallel_master_taskloop_simd:
   case OMPD_distribute:
   case OMPD_end_declare_target:
   case OMPD_target_update:
@@ -1567,15 +1570,16 @@ Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirectiveWithExtDecl(
 ///         'for simd' | 'parallel for simd' | 'target' | 'target data' |
 ///         'taskgroup' | 'teams' | 'taskloop' | 'taskloop simd' | 'master
 ///         taskloop' | 'master taskloop simd' | 'parallel master taskloop' |
-///         'distribute' | 'target enter data' | 'target exit data' | 'target
-///         parallel' | 'target parallel for' | 'target update' | 'distribute
-///         parallel for' | 'distribute paralle for simd' | 'distribute simd' |
-///         'target parallel for simd' | 'target simd' | 'teams distribute' |
-///         'teams distribute simd' | 'teams distribute parallel for simd' |
-///         'teams distribute parallel for' | 'target teams' | 'target teams
-///         distribute' | 'target teams distribute parallel for' | 'target teams
-///         distribute parallel for simd' | 'target teams distribute simd'
-///         {clause} annot_pragma_openmp_end
+///         'parallel master taskloop simd' | 'distribute' | 'target enter data'
+///         | 'target exit data' | 'target parallel' | 'target parallel for' |
+///         'target update' | 'distribute parallel for' | 'distribute paralle
+///         for simd' | 'distribute simd' | 'target parallel for simd' | 'target
+///         simd' | 'teams distribute' | 'teams distribute simd' | 'teams
+///         distribute parallel for simd' | 'teams distribute parallel for' |
+///         'target teams' | 'target teams distribute' | 'target teams
+///         distribute parallel for' | 'target teams distribute parallel for
+///         simd' | 'target teams distribute simd' {clause}
+///         annot_pragma_openmp_end
 ///
 StmtResult
 Parser::ParseOpenMPDeclarativeOrExecutableDirective(ParsedStmtContext StmtCtx) {
@@ -1753,6 +1757,7 @@ Parser::ParseOpenMPDeclarativeOrExecutableDirective(ParsedStmtContext StmtCtx) {
   case OMPD_master_taskloop:
   case OMPD_master_taskloop_simd:
   case OMPD_parallel_master_taskloop:
+  case OMPD_parallel_master_taskloop_simd:
   case OMPD_distribute:
   case OMPD_distribute_parallel_for:
   case OMPD_distribute_parallel_for_simd:

diff  --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index b3b8fd655f14..595281a7c372 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -3301,7 +3301,8 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
             AlwaysInlineAttr::Keyword_forceinline));
     break;
   }
-  case OMPD_parallel_master_taskloop: {
+  case OMPD_parallel_master_taskloop:
+  case OMPD_parallel_master_taskloop_simd: {
     QualType KmpInt32Ty =
         Context.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1)
             .withConst();
@@ -4502,6 +4503,12 @@ StmtResult Sema::ActOnOpenMPExecutableDirective(
     AllowedNameModifiers.push_back(OMPD_taskloop);
     AllowedNameModifiers.push_back(OMPD_parallel);
     break;
+  case OMPD_parallel_master_taskloop_simd:
+    Res = ActOnOpenMPParallelMasterTaskLoopSimdDirective(
+        ClausesWithImplicit, AStmt, StartLoc, EndLoc, VarsWithInheritedDSA);
+    AllowedNameModifiers.push_back(OMPD_taskloop);
+    AllowedNameModifiers.push_back(OMPD_parallel);
+    break;
   case OMPD_distribute:
     Res = ActOnOpenMPDistributeDirective(ClausesWithImplicit, AStmt, StartLoc,
                                          EndLoc, VarsWithInheritedDSA);
@@ -9486,6 +9493,74 @@ StmtResult Sema::ActOnOpenMPParallelMasterTaskLoopDirective(
       Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B);
 }
 
+StmtResult Sema::ActOnOpenMPParallelMasterTaskLoopSimdDirective(
+    ArrayRef<OMPClause *> Clauses, Stmt *AStmt, SourceLocation StartLoc,
+    SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA) {
+  if (!AStmt)
+    return StmtError();
+
+  assert(isa<CapturedStmt>(AStmt) && "Captured statement expected");
+  auto *CS = cast<CapturedStmt>(AStmt);
+  // 1.2.2 OpenMP Language Terminology
+  // Structured block - An executable statement with a single entry at the
+  // top and a single exit at the bottom.
+  // The point of exit cannot be a branch out of the structured block.
+  // longjmp() and throw() must not violate the entry/exit criteria.
+  CS->getCapturedDecl()->setNothrow();
+  for (int ThisCaptureLevel =
+           getOpenMPCaptureLevels(OMPD_parallel_master_taskloop_simd);
+       ThisCaptureLevel > 1; --ThisCaptureLevel) {
+    CS = cast<CapturedStmt>(CS->getCapturedStmt());
+    // 1.2.2 OpenMP Language Terminology
+    // Structured block - An executable statement with a single entry at the
+    // top and a single exit at the bottom.
+    // The point of exit cannot be a branch out of the structured block.
+    // longjmp() and throw() must not violate the entry/exit criteria.
+    CS->getCapturedDecl()->setNothrow();
+  }
+
+  OMPLoopDirective::HelperExprs B;
+  // In presence of clause 'collapse' or 'ordered' with number of loops, it will
+  // define the nested loops number.
+  unsigned NestedLoopCount = checkOpenMPLoop(
+      OMPD_parallel_master_taskloop_simd, getCollapseNumberExpr(Clauses),
+      /*OrderedLoopCountExpr=*/nullptr, CS, *this, *DSAStack,
+      VarsWithImplicitDSA, B);
+  if (NestedLoopCount == 0)
+    return StmtError();
+
+  assert((CurContext->isDependentContext() || B.builtAll()) &&
+         "omp for loop exprs were not built");
+
+  if (!CurContext->isDependentContext()) {
+    // Finalize the clauses that need pre-built expressions for CodeGen.
+    for (OMPClause *C : Clauses) {
+      if (auto *LC = dyn_cast<OMPLinearClause>(C))
+        if (FinishOpenMPLinearClause(*LC, cast<DeclRefExpr>(B.IterationVarRef),
+                                     B.NumIterations, *this, CurScope,
+                                     DSAStack))
+          return StmtError();
+    }
+  }
+
+  // OpenMP, [2.9.2 taskloop Construct, Restrictions]
+  // The grainsize clause and num_tasks clause are mutually exclusive and may
+  // not appear on the same taskloop directive.
+  if (checkGrainsizeNumTasksClauses(*this, Clauses))
+    return StmtError();
+  // OpenMP, [2.9.2 taskloop Construct, Restrictions]
+  // If a reduction clause is present on the taskloop directive, the nogroup
+  // clause must not be specified.
+  if (checkReductionClauseWithNogroup(*this, Clauses))
+    return StmtError();
+  if (checkSimdlenSafelenSpecified(*this, Clauses))
+    return StmtError();
+
+  setFunctionHasBranchProtectedScope();
+  return OMPParallelMasterTaskLoopSimdDirective::Create(
+      Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B);
+}
+
 StmtResult Sema::ActOnOpenMPDistributeDirective(
     ArrayRef<OMPClause *> Clauses, Stmt *AStmt, SourceLocation StartLoc,
     SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA) {
@@ -10378,6 +10453,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause(
       CaptureRegion = OMPD_task;
       break;
     case OMPD_parallel_master_taskloop:
+    case OMPD_parallel_master_taskloop_simd:
       if (NameModifier == OMPD_unknown || NameModifier == OMPD_taskloop)
         CaptureRegion = OMPD_parallel;
       break;
@@ -10456,6 +10532,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause(
     case OMPD_distribute_parallel_for:
     case OMPD_distribute_parallel_for_simd:
     case OMPD_parallel_master_taskloop:
+    case OMPD_parallel_master_taskloop_simd:
       // Do not capture num_threads-clause expressions.
       break;
     case OMPD_target_data:
@@ -10532,6 +10609,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause(
     case OMPD_master_taskloop:
     case OMPD_master_taskloop_simd:
     case OMPD_parallel_master_taskloop:
+    case OMPD_parallel_master_taskloop_simd:
     case OMPD_target_data:
     case OMPD_target_enter_data:
     case OMPD_target_exit_data:
@@ -10602,6 +10680,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause(
     case OMPD_master_taskloop:
     case OMPD_master_taskloop_simd:
     case OMPD_parallel_master_taskloop:
+    case OMPD_parallel_master_taskloop_simd:
     case OMPD_target_data:
     case OMPD_target_enter_data:
     case OMPD_target_exit_data:
@@ -10672,6 +10751,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause(
     case OMPD_master_taskloop:
     case OMPD_master_taskloop_simd:
     case OMPD_parallel_master_taskloop:
+    case OMPD_parallel_master_taskloop_simd:
     case OMPD_target_data:
     case OMPD_target_enter_data:
     case OMPD_target_exit_data:
@@ -10746,6 +10826,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause(
     case OMPD_master_taskloop:
     case OMPD_master_taskloop_simd:
     case OMPD_parallel_master_taskloop:
+    case OMPD_parallel_master_taskloop_simd:
     case OMPD_target_data:
     case OMPD_target_enter_data:
     case OMPD_target_exit_data:
@@ -10821,6 +10902,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause(
     case OMPD_master_taskloop:
     case OMPD_master_taskloop_simd:
     case OMPD_parallel_master_taskloop:
+    case OMPD_parallel_master_taskloop_simd:
     case OMPD_cancel:
     case OMPD_parallel:
     case OMPD_parallel_sections:
@@ -10870,6 +10952,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause(
     case OMPD_master_taskloop_simd:
       break;
     case OMPD_parallel_master_taskloop:
+    case OMPD_parallel_master_taskloop_simd:
       CaptureRegion = OMPD_parallel;
       break;
     case OMPD_target_update:

diff  --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 4b3a6708717c..ad3db4e134df 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -8309,6 +8309,18 @@ StmtResult TreeTransform<Derived>::TransformOMPParallelMasterTaskLoopDirective(
   return Res;
 }
 
+template <typename Derived>
+StmtResult
+TreeTransform<Derived>::TransformOMPParallelMasterTaskLoopSimdDirective(
+    OMPParallelMasterTaskLoopSimdDirective *D) {
+  DeclarationNameInfo DirName;
+  getDerived().getSema().StartOpenMPDSABlock(
+      OMPD_parallel_master_taskloop_simd, DirName, nullptr, D->getBeginLoc());
+  StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
+  getDerived().getSema().EndOpenMPDSABlock(Res.get());
+  return Res;
+}
+
 template <typename Derived>
 StmtResult TreeTransform<Derived>::TransformOMPDistributeDirective(
     OMPDistributeDirective *D) {

diff  --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp
index a275e0c30579..3fd9fff5add4 100644
--- a/clang/lib/Serialization/ASTReaderStmt.cpp
+++ b/clang/lib/Serialization/ASTReaderStmt.cpp
@@ -2316,6 +2316,11 @@ void ASTStmtReader::VisitOMPParallelMasterTaskLoopDirective(
   VisitOMPLoopDirective(D);
 }
 
+void ASTStmtReader::VisitOMPParallelMasterTaskLoopSimdDirective(
+    OMPParallelMasterTaskLoopSimdDirective *D) {
+  VisitOMPLoopDirective(D);
+}
+
 void ASTStmtReader::VisitOMPDistributeDirective(OMPDistributeDirective *D) {
   VisitOMPLoopDirective(D);
 }
@@ -3131,6 +3136,14 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) {
       break;
     }
 
+    case STMT_OMP_PARALLEL_MASTER_TASKLOOP_SIMD_DIRECTIVE: {
+      unsigned NumClauses = Record[ASTStmtReader::NumStmtFields];
+      unsigned CollapsedNum = Record[ASTStmtReader::NumStmtFields + 1];
+      S = OMPParallelMasterTaskLoopSimdDirective::CreateEmpty(
+          Context, NumClauses, CollapsedNum, Empty);
+      break;
+    }
+
     case STMT_OMP_DISTRIBUTE_DIRECTIVE: {
       unsigned NumClauses = Record[ASTStmtReader::NumStmtFields];
       unsigned CollapsedNum = Record[ASTStmtReader::NumStmtFields + 1];

diff  --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp
index c39d4d39bcdf..6f4abc449094 100644
--- a/clang/lib/Serialization/ASTWriterStmt.cpp
+++ b/clang/lib/Serialization/ASTWriterStmt.cpp
@@ -2267,6 +2267,12 @@ void ASTStmtWriter::VisitOMPParallelMasterTaskLoopDirective(
   Code = serialization::STMT_OMP_PARALLEL_MASTER_TASKLOOP_DIRECTIVE;
 }
 
+void ASTStmtWriter::VisitOMPParallelMasterTaskLoopSimdDirective(
+    OMPParallelMasterTaskLoopSimdDirective *D) {
+  VisitOMPLoopDirective(D);
+  Code = serialization::STMT_OMP_PARALLEL_MASTER_TASKLOOP_SIMD_DIRECTIVE;
+}
+
 void ASTStmtWriter::VisitOMPDistributeDirective(OMPDistributeDirective *D) {
   VisitOMPLoopDirective(D);
   Code = serialization::STMT_OMP_DISTRIBUTE_DIRECTIVE;

diff  --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
index e92e95354f5f..5c49231a7123 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -1268,6 +1268,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
     case Stmt::OMPMasterTaskLoopDirectiveClass:
     case Stmt::OMPMasterTaskLoopSimdDirectiveClass:
     case Stmt::OMPParallelMasterTaskLoopDirectiveClass:
+    case Stmt::OMPParallelMasterTaskLoopSimdDirectiveClass:
     case Stmt::OMPDistributeDirectiveClass:
     case Stmt::OMPDistributeParallelForDirectiveClass:
     case Stmt::OMPDistributeParallelForSimdDirectiveClass:

diff  --git a/clang/test/OpenMP/parallel_master_taskloop_simd_aligned_messages.cpp b/clang/test/OpenMP/parallel_master_taskloop_simd_aligned_messages.cpp
new file mode 100644
index 000000000000..b5b079f6dc7a
--- /dev/null
+++ b/clang/test/OpenMP/parallel_master_taskloop_simd_aligned_messages.cpp
@@ -0,0 +1,205 @@
+// RUN: %clang_cc1 -x c++ -std=c++11 -verify -fopenmp %s -Wuninitialized
+
+// RUN: %clang_cc1 -x c++ -std=c++11 -verify -fopenmp-simd %s -Wuninitialized
+
+struct B {
+  static int ib[20]; // expected-note 0 {{'B::ib' declared here}}
+  static constexpr int bfoo() { return 8; }
+};
+namespace X {
+  B x; // expected-note {{'x' defined here}}
+};
+constexpr int bfoo() { return 4; }
+
+int **z;
+const int C1 = 1;
+const int C2 = 2;
+void test_aligned_colons(int *&rp)
+{
+  int *B = 0;
+  #pragma omp parallel master taskloop simd aligned(B:bfoo())
+  for (int i = 0; i < 10; ++i) ;
+  // expected-error at +1 {{unexpected ':' in nested name specifier; did you mean '::'}}
+  #pragma omp parallel master taskloop simd aligned(B::ib:B:bfoo())
+  for (int i = 0; i < 10; ++i) ;
+  #pragma omp parallel master taskloop simd aligned(B:B::bfoo())
+  for (int i = 0; i < 10; ++i) ;
+  // expected-error at +1 {{unexpected ':' in nested name specifier; did you mean '::'?}}
+  #pragma omp parallel master taskloop simd aligned(z:B:bfoo())
+  for (int i = 0; i < 10; ++i) ;
+  #pragma omp parallel master taskloop simd aligned(B:B::bfoo())
+  for (int i = 0; i < 10; ++i) ;
+  // expected-error at +2 {{integral constant expression must have integral or unscoped enumeration type, not 'int **'}}
+  // expected-error at +1 {{argument of aligned clause should be array, pointer, reference to array or reference to pointer, not 'B'}}
+  #pragma omp parallel master taskloop simd aligned(X::x : ::z)
+  for (int i = 0; i < 10; ++i) ;
+  // expected-error at +1 {{integral constant expression must have integral or unscoped enumeration type, not 'B'}}
+  #pragma omp parallel master taskloop simd aligned(B,rp,::z: X::x)
+  for (int i = 0; i < 10; ++i) ;
+  #pragma omp parallel master taskloop simd aligned(::z)
+  for (int i = 0; i < 10; ++i) ;
+  // expected-error at +1 {{expected variable name}}
+  #pragma omp parallel master taskloop simd aligned(B::bfoo())
+  for (int i = 0; i < 10; ++i) ;
+  // expected-warning at +1 {{aligned clause will be ignored because the requested alignment is not a power of 2}}
+  #pragma omp parallel master taskloop simd aligned(B::ib,B:C1+C2)
+  for (int i = 0; i < 10; ++i) ;
+}
+
+// expected-note at +1 {{'num' defined here}}
+template<int L, class T, class N> T test_template(T* arr, N num) {
+  N i;
+  T sum = (T)0;
+  T ind2 = - num * L;
+  // Negative number is passed as L.
+  // expected-error at +1 {{argument to 'aligned' clause must be a strictly positive integer value}}
+  #pragma omp parallel master taskloop simd aligned(arr:L)
+  for (i = 0; i < num; ++i) {
+    T cur = arr[(int)ind2];
+    ind2 += L;
+    sum += cur;
+  }
+  // expected-error at +1 {{argument of aligned clause should be array, pointer, reference to array or reference to pointer, not 'int'}}
+  #pragma omp parallel master taskloop simd aligned(num:4)
+  for (i = 0; i < num; ++i);
+  return T();
+}
+
+template<int LEN> int test_warn() {
+  int *ind2 = 0;
+  // expected-error at +1 {{argument to 'aligned' clause must be a strictly positive integer value}}
+  #pragma omp parallel master taskloop simd aligned(ind2:LEN)
+  for (int i = 0; i < 100; i++) {
+    ind2 += LEN;
+  }
+  return 0;
+}
+
+struct S1; // expected-note 2 {{declared here}}
+extern S1 a; // expected-note {{'a' declared here}}
+class S2 {
+  mutable int a;
+public:
+  S2():a(0) { }
+};
+const S2 b; // expected-note 1 {{'b' defined here}}
+const S2 ba[5];
+class S3 {
+  int a;
+public:
+  S3():a(0) { }
+};
+const S3 ca[5];
+class S4 {
+  int a;
+  S4();
+public:
+  S4(int v):a(v) { }
+};
+class S5 {
+  int a;
+  S5():a(0) {}
+public:
+  S5(int v):a(v) { }
+};
+
+S3 h; // expected-note 2 {{'h' defined here}}
+#pragma omp threadprivate(h)
+
+template<class I, class C> int foomain(I argc, C **argv) {
+  I e(argc);
+  I g(argc);
+  int i; // expected-note {{'i' defined here}}
+  // expected-note at +1 {{declared here}}
+  int &j = i;
+  #pragma omp parallel master taskloop simd aligned // expected-error {{expected '(' after 'aligned'}}
+  for (I k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel master taskloop simd aligned ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (I k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel master taskloop simd aligned () // expected-error {{expected expression}}
+  for (I k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel master taskloop simd aligned (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (I k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel master taskloop simd aligned (argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (I k = 0; k < argc; ++k) ++k;
+// FIXME: Should argc really be a pointer?
+  #pragma omp parallel master taskloop simd aligned (*argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+  for (I k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel master taskloop simd aligned (argc : 5) // expected-warning {{aligned clause will be ignored because the requested alignment is not a power of 2}}
+  for (I k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel master taskloop simd aligned (S1) // expected-error {{'S1' does not refer to a value}}
+  for (I k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel master taskloop simd aligned (argv[1]) // expected-error {{expected variable name}}
+  for (I k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel master taskloop simd aligned(e, g)
+  for (I k = 0; k < argc; ++k) ++k;
+  // expected-error at +1 {{argument of aligned clause should be array, pointer, reference to array or reference to pointer, not 'S3'}}
+  #pragma omp parallel master taskloop simd aligned(h)
+  for (I k = 0; k < argc; ++k) ++k;
+  // expected-error at +1 {{argument of aligned clause should be array, pointer, reference to array or reference to pointer, not 'int'}}
+  #pragma omp parallel master taskloop simd aligned(i)
+  for (I k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel
+  {
+    int *v = 0;
+    I i;
+    #pragma omp parallel master taskloop simd aligned(v:16)
+    for (I k = 0; k < argc; ++k) { i = k; v += 2; }
+  }
+  float *f;
+  #pragma omp parallel master taskloop simd aligned(f)
+  for (I k = 0; k < argc; ++k) ++k;
+  int v = 0;
+  // expected-note at +2 {{initializer of 'j' is not a constant expression}}
+  // expected-error at +1 {{expression is not an integral constant expression}}
+  #pragma omp parallel master taskloop simd aligned(f:j)
+  for (I k = 0; k < argc; ++k) { ++k; v += j; }
+  #pragma omp parallel master taskloop simd aligned(f)
+  for (I k = 0; k < argc; ++k) ++k;
+  return 0;
+}
+
+// expected-note at +1 2 {{'argc' defined here}}
+int main(int argc, char **argv) {
+  double darr[100];
+  // expected-note at +1 {{in instantiation of function template specialization 'test_template<-4, double, int>' requested here}}
+  test_template<-4>(darr, 4);
+  test_warn<4>(); // ok
+  // expected-note at +1 {{in instantiation of function template specialization 'test_warn<0>' requested here}}
+  test_warn<0>();
+
+  int i;
+  int &j = i;
+  #pragma omp parallel master taskloop simd aligned // expected-error {{expected '(' after 'aligned'}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel master taskloop simd aligned ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel master taskloop simd aligned () // expected-error {{expected expression}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel master taskloop simd aligned (argv // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k) ++k;
+  // expected-error at +1 {{argument of aligned clause should be array, pointer, reference to array or reference to pointer, not 'int'}}
+  #pragma omp parallel master taskloop simd aligned (argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel master taskloop simd aligned (argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k) ++k;
+  // expected-error at +1 {{argument of aligned clause should be array, pointer, reference to array or reference to pointer, not 'int'}}
+  #pragma omp parallel master taskloop simd aligned (argc)
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel master taskloop simd aligned (S1) // expected-error {{'S1' does not refer to a value}}
+  for (int k = 0; k < argc; ++k) ++k;
+  // expected-error at +2 {{argument of aligned clause should be array, pointer, reference to array or reference to pointer, not 'S1'}}
+  // expected-error at +1 {{argument of aligned clause should be array, pointer, reference to array or reference to pointer, not 'S2'}}
+#pragma omp parallel master taskloop simd aligned(a, b)
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel master taskloop simd aligned (argv[1]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k) ++k;
+  // expected-error at +1 {{argument of aligned clause should be array, pointer, reference to array or reference to pointer, not 'S3'}}
+  #pragma omp parallel master taskloop simd aligned(h)
+  for (int k = 0; k < argc; ++k) ++k;
+  int *pargc = &argc;
+  // expected-note at +1 {{in instantiation of function template specialization 'foomain<int *, char>' requested here}}
+  foomain<int*,char>(pargc,argv);
+  return 0;
+}
+

diff  --git a/clang/test/OpenMP/parallel_master_taskloop_simd_ast_print.cpp b/clang/test/OpenMP/parallel_master_taskloop_simd_ast_print.cpp
new file mode 100644
index 000000000000..e09ca86b5320
--- /dev/null
+++ b/clang/test/OpenMP/parallel_master_taskloop_simd_ast_print.cpp
@@ -0,0 +1,83 @@
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=45 -ast-print %s | FileCheck %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -x c++ -std=c++11 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=45 -std=c++11 -include-pch %t -fsyntax-only -verify %s -ast-print | FileCheck %s
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+void foo() {}
+
+template <class T, int N>
+T tmain(T argc) {
+  T b = argc, c, d, e, f, g;
+  static T a;
+// CHECK: static T a;
+#pragma omp taskgroup allocate(d) task_reduction(+: d)
+#pragma omp parallel master taskloop simd if(taskloop: argc > N) default(shared) untied priority(N) grainsize(N) reduction(+:g) allocate(g) simdlen(8)
+  // CHECK-NEXT: #pragma omp taskgroup allocate(d) task_reduction(+: d)
+  // CHECK-NEXT: #pragma omp parallel master taskloop simd if(taskloop: argc > N) default(shared) untied priority(N) grainsize(N) reduction(+: g) allocate(g) simdlen(8){{$}}
+  for (int i = 0; i < 2; ++i)
+    a = 2;
+// CHECK-NEXT: for (int i = 0; i < 2; ++i)
+// CHECK-NEXT: a = 2;
+#pragma omp parallel
+#pragma omp parallel master taskloop simd private(argc, b), firstprivate(c, d), lastprivate(d, f) collapse(N) shared(g) if (c) final(d) mergeable priority(f) nogroup num_tasks(N) safelen(8)
+  for (int i = 0; i < 2; ++i)
+    for (int j = 0; j < 2; ++j)
+      for (int j = 0; j < 2; ++j)
+        for (int j = 0; j < 2; ++j)
+          for (int j = 0; j < 2; ++j)
+  for (int i = 0; i < 2; ++i)
+    for (int j = 0; j < 2; ++j)
+      for (int j = 0; j < 2; ++j)
+        for (int j = 0; j < 2; ++j)
+          for (int j = 0; j < 2; ++j)
+            foo();
+  // CHECK-NEXT: #pragma omp parallel
+  // CHECK-NEXT: #pragma omp parallel master taskloop simd private(argc,b) firstprivate(c,d) lastprivate(d,f) collapse(N) shared(g) if(c) final(d) mergeable priority(f) nogroup num_tasks(N) safelen(8)
+  // CHECK-NEXT: for (int i = 0; i < 2; ++i)
+  // CHECK-NEXT: for (int j = 0; j < 2; ++j)
+  // CHECK-NEXT: for (int j = 0; j < 2; ++j)
+  // CHECK-NEXT: for (int j = 0; j < 2; ++j)
+  // CHECK-NEXT: for (int j = 0; j < 2; ++j)
+  // CHECK-NEXT: for (int i = 0; i < 2; ++i)
+  // CHECK-NEXT: for (int j = 0; j < 2; ++j)
+  // CHECK-NEXT: for (int j = 0; j < 2; ++j)
+  // CHECK-NEXT: for (int j = 0; j < 2; ++j)
+  // CHECK-NEXT: for (int j = 0; j < 2; ++j)
+  // CHECK-NEXT: foo();
+  return T();
+}
+
+// CHECK-LABEL: int main(int argc, char **argv) {
+int main(int argc, char **argv) {
+  int b = argc, c, d, e, f, g;
+  static int a;
+// CHECK: static int a;
+#pragma omp taskgroup task_reduction(+: d)
+#pragma omp parallel master taskloop simd if(parallel: a) default(none) shared(a, b, argc) final(b) priority(5) num_tasks(argc) reduction(*: g) aligned(argv: 8) linear(c:b)
+  // CHECK-NEXT: #pragma omp taskgroup task_reduction(+: d)
+  // CHECK-NEXT: #pragma omp parallel master taskloop simd if(parallel: a) default(none) shared(a,b,argc) final(b) priority(5) num_tasks(argc) reduction(*: g) aligned(argv: 8) linear(c: b)
+  for (int i = 0; i < 2; ++i)
+    a = 2;
+// CHECK-NEXT: for (int i = 0; i < 2; ++i)
+// CHECK-NEXT: a = 2;
+#pragma omp parallel
+#pragma omp parallel master taskloop simd private(argc, b), firstprivate(argv, c), lastprivate(d, f) collapse(2) shared(g) if(argc) mergeable priority(argc) grainsize(argc) reduction(max: a, e)
+  for (int i = 0; i < 10; ++i)
+    for (int j = 0; j < 10; ++j)
+      foo();
+  // CHECK-NEXT: #pragma omp parallel
+  // CHECK-NEXT: #pragma omp parallel master taskloop simd private(argc,b) firstprivate(argv,c) lastprivate(d,f) collapse(2) shared(g) if(argc) mergeable priority(argc) grainsize(argc) reduction(max: a,e)
+  // CHECK-NEXT: for (int i = 0; i < 10; ++i)
+  // CHECK-NEXT: for (int j = 0; j < 10; ++j)
+  // CHECK-NEXT: foo();
+  return (tmain<int, 5>(argc) + tmain<char, 1>(argv[0][0]));
+}
+
+#endif

diff  --git a/clang/test/OpenMP/parallel_master_taskloop_simd_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_simd_codegen.cpp
new file mode 100644
index 000000000000..d72c86ab2107
--- /dev/null
+++ b/clang/test/OpenMP/parallel_master_taskloop_simd_codegen.cpp
@@ -0,0 +1,250 @@
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - -femit-all-decls | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - -femit-all-decls | FileCheck %s
+
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp-simd -x c++ -emit-llvm %s -o - -femit-all-decls | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - -femit-all-decls | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK-LABEL: @main
+int main(int argc, char **argv) {
+// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* [[DEFLOC:@.+]])
+// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEFLOC]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* [[OMP_OUTLINED1:@.+]] to void (i32*, i32*, ...)*), i64 [[PRIORITY:%.+]])
+// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEFLOC]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* [[OMP_OUTLINED2:@.+]] to void (i32*, i32*, ...)*), i64 [[GRAINSIZE:%.+]])
+// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEFLOC]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i8***, i64, i64)* [[OMP_OUTLINED3:@.+]] to void (i32*, i32*, ...)*), i32* [[I:%.+]], i32* [[ARGC:%.+]], i8*** [[ARGV:%.+]], i64 [[COND:%.+]], i64 [[NUM_TASKS:%.+]])
+// CHECK: call void @__kmpc_serialized_parallel(%struct.ident_t* [[DEFLOC]], i32 [[GTID]])
+// CHECK: call void [[OMP_OUTLINED3]](i32* %{{.+}}, i32* %{{.+}}, i32* [[I]], i32* [[ARGC]], i8*** [[ARGV]], i64 [[COND]], i64 [[NUM_TASKS]])
+// CHECK: call void @__kmpc_end_serialized_parallel(%struct.ident_t* [[DEFLOC]], i32 [[GTID]])
+
+
+// CHECK: define internal void [[OMP_OUTLINED1]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i64 %{{.+}})
+// CHECK: [[PRIO_ADDR:%.+]] = bitcast i64* %{{.+}} to i32*
+// CHECK:       [[RES:%.+]] = call {{.*}}i32 @__kmpc_master(%struct.ident_t* [[DEFLOC]], i32 [[GTID:%.+]])
+// CHECK-NEXT:  [[IS_MASTER:%.+]] = icmp ne i32 [[RES]], 0
+// CHECK-NEXT:  br i1 [[IS_MASTER]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]]
+// CHECK:       [[THEN]]
+// CHECK: call void @__kmpc_taskgroup(%struct.ident_t* [[DEFLOC]], i32 [[GTID]])
+// CHECK: [[PRIO:%.+]] = load i32, i32* [[PRIO_ADDR]],
+// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* [[DEFLOC]], i32 [[GTID]], i32 33, i64 80, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK1:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]*
+// CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0
+// CHECK: [[PRIO_ADDR:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 4
+// CHECK: [[PRIO_ADDR_CAST:%.+]] = bitcast %{{.+}}* [[PRIO_ADDR]] to i32*
+// CHECK: store i32 [[PRIO]], i32* [[PRIO_ADDR_CAST]],
+// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 5
+// CHECK: store i64 0, i64* [[DOWN]],
+// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6
+// CHECK: store i64 9, i64* [[UP]],
+// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 7
+// CHECK: store i64 1, i64* [[ST]],
+// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
+// CHECK: call void @__kmpc_taskloop(%struct.ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 1, i32 0, i64 0, i8* null)
+// CHECK: call void @__kmpc_end_taskgroup(%struct.ident_t* [[DEFLOC]], i32 [[GTID]])
+// CHECK-NEXT:  call {{.*}}void @__kmpc_end_master(%struct.ident_t* [[DEFLOC]], i32 [[GTID]])
+// CHECK-NEXT:  br label {{%?}}[[EXIT]]
+// CHECK:       [[EXIT]]
+
+
+// CHECK: define internal i32 [[TASK1]](
+// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* %{{.+}}, i32 0, i32 5
+// CHECK: [[DOWN_VAL:%.+]] = load i64, i64* [[DOWN]],
+// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 6
+// CHECK: [[UP_VAL:%.+]] = load i64, i64* [[UP]],
+// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 7
+// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
+// CHECK: [[LITER:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 8
+// CHECK: [[LITER_VAL:%.+]] = load i32, i32* [[LITER]],
+// CHECK: store i64 [[DOWN_VAL]], i64* [[LB:%[^,]+]],
+// CHECK: store i64 [[UP_VAL]], i64* [[UB:%[^,]+]],
+// CHECK: store i64 [[ST_VAL]], i64* [[ST:%[^,]+]],
+// CHECK: store i32 [[LITER_VAL]], i32* [[LITER:%[^,]+]],
+// CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]],
+// CHECK: [[LB_I32:%.+]] = trunc i64 [[LB_VAL]] to i32
+// CHECK: store i32 [[LB_I32]], i32* [[CNT:%.+]],
+// CHECK: br label
+// CHECK: [[VAL:%.+]] = load i32, i32* [[CNT]],
+// CHECK: [[VAL_I64:%.+]] = sext i32 [[VAL]] to i64
+// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]],
+// CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]]
+// CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}}
+// CHECK: load i32, i32* %
+// CHECK: store i32 %
+// CHECK: load i32, i32* %
+// CHECK: add nsw i32 %{{.+}}, 1
+// CHECK: store i32 %{{.+}}, i32* %
+// CHECK: br label %
+// CHECK: ret i32 0
+
+#pragma omp parallel master taskloop simd priority(argc) safelen(8)
+  for (int i = 0; i < 10; ++i)
+    ;
+// CHECK: define internal void [[OMP_OUTLINED2]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i64 %{{.+}})
+// CHECK:       [[RES:%.+]] = call {{.*}}i32 @__kmpc_master(%struct.ident_t* [[DEFLOC]], i32 [[GTID:%.+]])
+// CHECK-NEXT:  [[IS_MASTER:%.+]] = icmp ne i32 [[RES]], 0
+// CHECK-NEXT:  br i1 [[IS_MASTER]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]]
+// CHECK:       [[THEN]]
+// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 80, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK2:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]*
+// CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0
+// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 5
+// CHECK: store i64 0, i64* [[DOWN]],
+// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6
+// CHECK: store i64 9, i64* [[UP]],
+// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 7
+// CHECK: store i64 1, i64* [[ST]],
+// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
+// CHECK: [[GRAINSIZE:%.+]] = zext i32 %{{.+}} to i64
+// CHECK: call void @__kmpc_taskloop(%struct.ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 1, i32 1, i64 [[GRAINSIZE]], i8* null)
+// CHECK-NEXT:  call {{.*}}void @__kmpc_end_master(%struct.ident_t* [[DEFLOC]], i32 [[GTID]])
+// CHECK-NEXT:  br label {{%?}}[[EXIT]]
+// CHECK:       [[EXIT]]
+
+
+// CHECK: define internal i32 [[TASK2]](
+// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* %{{.+}}, i32 0, i32 5
+// CHECK: [[DOWN_VAL:%.+]] = load i64, i64* [[DOWN]],
+// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 6
+// CHECK: [[UP_VAL:%.+]] = load i64, i64* [[UP]],
+// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 7
+// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
+// CHECK: [[LITER:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 8
+// CHECK: [[LITER_VAL:%.+]] = load i32, i32* [[LITER]],
+// CHECK: store i64 [[DOWN_VAL]], i64* [[LB:%[^,]+]],
+// CHECK: store i64 [[UP_VAL]], i64* [[UB:%[^,]+]],
+// CHECK: store i64 [[ST_VAL]], i64* [[ST:%[^,]+]],
+// CHECK: store i32 [[LITER_VAL]], i32* [[LITER:%[^,]+]],
+// CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]],
+// CHECK: [[LB_I32:%.+]] = trunc i64 [[LB_VAL]] to i32
+// CHECK: store i32 [[LB_I32]], i32* [[CNT:%.+]],
+// CHECK: br label
+// CHECK: [[VAL:%.+]] = load i32, i32* [[CNT]],{{.*}}!llvm.access.group
+// CHECK: [[VAL_I64:%.+]] = sext i32 [[VAL]] to i64
+// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]],{{.*}}!llvm.access.group
+// CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]]
+// CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}}
+// CHECK: load i32, i32* %{{.*}}!llvm.access.group
+// CHECK: store i32 %{{.*}}!llvm.access.group
+// CHECK: load i32, i32* %
+// CHECK: add nsw i32 %{{.+}}, 1
+// CHECK: store i32 %{{.+}}, i32* %
+// CHECK: br label %
+// CHECK: ret i32 0
+
+#pragma omp parallel master taskloop simd nogroup grainsize(argc) simdlen(16)
+  for (int i = 0; i < 10; ++i)
+    ;
+// CHECK: define internal void [[OMP_OUTLINED3]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* dereferenceable(4) %{{.+}}, i8*** dereferenceable(8) %{{.+}}, i64 %{{.+}}, i64 %{{.+}})
+// CHECK:       [[RES:%.+]] = call {{.*}}i32 @__kmpc_master(%struct.ident_t* [[DEFLOC]], i32 [[GTID:%.+]])
+// CHECK-NEXT:  [[IS_MASTER:%.+]] = icmp ne i32 [[RES]], 0
+// CHECK-NEXT:  br i1 [[IS_MASTER]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]]
+// CHECK:       [[THEN]]
+// CHECK: call void @__kmpc_taskgroup(%struct.ident_t* [[DEFLOC]], i32 [[GTID]])
+// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 88, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK3:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]*
+// CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0
+// CHECK: [[COND_VAL:%.+]] = load i8, i8* %{{.+}},
+// CHECK: [[COND_BOOL:%.+]] = trunc i8 [[COND_VAL]] to i1
+// CHECK: [[IF_INT:%.+]] = sext i1 [[COND_BOOL]] to i32
+// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 5
+// CHECK: store i64 0, i64* [[DOWN]],
+// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6
+// CHECK: store i64 %{{.+}}, i64* [[UP]],
+// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 7
+// CHECK: store i64 1, i64* [[ST]],
+// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
+// CHECK: [[NUM_TASKS:%.+]] = zext i32 %{{.+}} to i64
+// CHECK: call void @__kmpc_taskloop(%struct.ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 [[IF_INT]], i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 1, i32 2, i64 [[NUM_TASKS]], i8* bitcast (void ([[TDP_TY]]*, [[TDP_TY]]*, i32)* [[TASK_DUP:@.+]] to i8*))
+// CHECK: call void @__kmpc_end_taskgroup(%struct.ident_t* [[DEFLOC]], i32 [[GTID]])
+// CHECK-NEXT:  call {{.*}}void @__kmpc_end_master(%struct.ident_t* [[DEFLOC]], i32 [[GTID]])
+// CHECK-NEXT:  br label {{%?}}[[EXIT]]
+// CHECK:       [[EXIT]]
+
+// CHECK: define internal i32 [[TASK3]](
+// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* %{{.+}}, i32 0, i32 5
+// CHECK: [[DOWN_VAL:%.+]] = load i64, i64* [[DOWN]],
+// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 6
+// CHECK: [[UP_VAL:%.+]] = load i64, i64* [[UP]],
+// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 7
+// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
+// CHECK: [[LITER:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 8
+// CHECK: [[LITER_VAL:%.+]] = load i32, i32* [[LITER]],
+// CHECK: store i64 [[DOWN_VAL]], i64* [[LB:%[^,]+]],
+// CHECK: store i64 [[UP_VAL]], i64* [[UB:%[^,]+]],
+// CHECK: store i64 [[ST_VAL]], i64* [[ST:%[^,]+]],
+// CHECK: store i32 [[LITER_VAL]], i32* [[LITER:%[^,]+]],
+// CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]],
+// CHECK: store i64 [[LB_VAL]], i64* [[CNT:%.+]],
+// CHECK: br label
+// CHECK: ret i32 0
+
+  int i;
+#pragma omp parallel master taskloop simd if(argc) shared(argc, argv) collapse(2) num_tasks(argc) lastprivate(i) aligned(argv:8)
+  for (i = 0; i < argc; ++i)
+  for (int j = argc; j < argv[argc][argc]; ++j)
+    ;
+}
+
+// CHECK-LABEL: @_ZN1SC2Ei
+struct S {
+  int a;
+  S(int c) {
+// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* [[DEFLOC]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S*, i32*, i64)* [[OMP_OUTLINED4:@.+]] to void (i32*, i32*, ...)*), %struct.S* %{{.+}}, i32* %{{.+}}, i64 %{{.+}})
+
+// CHECK: define internal void [[OMP_OUTLINED4]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, %struct.S* %{{.+}}, i32* dereferenceable(4) %{{.+}}, i64 %{{.+}})
+// CHECK: [[CONV:%.+]] = bitcast i64* %{{.+}} to i8*
+// CHECK: [[CONDI8:%.+]] = load i8, i8* [[CONV]],
+// CHECK: [[COND:%.+]] = trunc i8 [[CONDI8]] to i1
+// CHECK: [[IS_FINAL:%.+]] = select i1 [[COND:%.+]], i32 2, i32 0
+// CHECK: [[FLAGS:%.+]] = or i32 [[IS_FINAL]], 1
+// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* [[DEFLOC]], i32 [[GTID:%.+]], i32 [[FLAGS]], i64 80, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK4:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]*
+// CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0
+// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 5
+// CHECK: store i64 0, i64* [[DOWN]],
+// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6
+// CHECK: store i64 %{{.+}}, i64* [[UP]],
+// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 7
+// CHECK: store i64 1, i64* [[ST]],
+// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
+// CHECK: call void @__kmpc_taskloop(%struct.ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 1, i32 2, i64 4, i8* null)
+#pragma omp parallel master taskloop simd shared(c) num_tasks(4) final(c)
+    for (a = 0; a < c; ++a)
+      ;
+  }
+} s(1);
+
+// CHECK: define internal i32 [[TASK4]](
+// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* %{{.+}}, i32 0, i32 5
+// CHECK: [[DOWN_VAL:%.+]] = load i64, i64* [[DOWN]],
+// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 6
+// CHECK: [[UP_VAL:%.+]] = load i64, i64* [[UP]],
+// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 7
+// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
+// CHECK: [[LITER:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 8
+// CHECK: [[LITER_VAL:%.+]] = load i32, i32* [[LITER]],
+// CHECK: store i64 [[DOWN_VAL]], i64* [[LB:%[^,]+]],
+// CHECK: store i64 [[UP_VAL]], i64* [[UB:%[^,]+]],
+// CHECK: store i64 [[ST_VAL]], i64* [[ST:%[^,]+]],
+// CHECK: store i32 [[LITER_VAL]], i32* [[LITER:%[^,]+]],
+// CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]],
+// CHECK: [[LB_I32:%.+]] = trunc i64 [[LB_VAL]] to i32
+// CHECK: store i32 [[LB_I32]], i32* [[CNT:%.+]],
+// CHECK: br label
+// CHECK: [[VAL:%.+]] = load i32, i32* [[CNT]],
+// CHECK: [[VAL_I64:%.+]] = sext i32 [[VAL]] to i64
+// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]],
+// CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]]
+// CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}}
+// CHECK: load i32, i32* %{{.*}}!llvm.access.group
+// CHECK: store i32 %{{.*}}!llvm.access.group
+// CHECK: load i32, i32* %
+// CHECK: add nsw i32 %{{.+}}, 1
+// CHECK: store i32 %{{.+}}, i32* %
+// CHECK: br label %
+// CHECK: ret i32 0
+
+#endif

diff  --git a/clang/test/OpenMP/parallel_master_taskloop_simd_collapse_messages.cpp b/clang/test/OpenMP/parallel_master_taskloop_simd_collapse_messages.cpp
new file mode 100644
index 000000000000..667c39ad4299
--- /dev/null
+++ b/clang/test/OpenMP/parallel_master_taskloop_simd_collapse_messages.cpp
@@ -0,0 +1,110 @@
+// RUN: %clang_cc1 -verify -fopenmp %s -Wuninitialized
+// RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s -Wuninitialized
+// RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s -Wuninitialized
+
+// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized
+
+void foo() {
+}
+
+#if __cplusplus >= 201103L
+// expected-note at +2 4 {{declared here}}
+#endif
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note {{declared here}}
+
+template <class T, typename S, int N, int ST> // expected-note {{declared here}}
+T tmain(T argc, S **argv) { //expected-note 2 {{declared here}}
+  #pragma omp parallel master taskloop simd collapse // expected-error {{expected '(' after 'collapse'}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp parallel master taskloop simd collapse ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp parallel master taskloop simd collapse () // expected-error {{expected expression}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  // expected-error at +3 {{expected ')'}} expected-note at +3 {{to match this '('}}
+  // expected-error at +2 2 {{expression is not an integral constant expression}}
+  // expected-note at +1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}}
+  #pragma omp parallel master taskloop simd collapse (argc
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  // expected-error at +1 2 {{argument to 'collapse' clause must be a strictly positive integer value}}
+  #pragma omp parallel master taskloop simd collapse (ST // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp parallel master taskloop simd collapse (1)) // expected-warning {{extra tokens at the end of '#pragma omp parallel master taskloop simd' are ignored}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp parallel master taskloop simd collapse ((ST > 0) ? 1 + ST : 2) // expected-note 2 {{as specified in 'collapse' clause}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST]; // expected-error 2 {{expected 2 for loops after '#pragma omp parallel master taskloop simd', but found only 1}}
+  // expected-error at +6 2 {{directive '#pragma omp parallel master taskloop simd' cannot contain more than one 'collapse' clause}}
+  // expected-error at +5 {{argument to 'collapse' clause must be a strictly positive integer value}}
+  // expected-error at +4 2 {{expression is not an integral constant expression}}
+#if __cplusplus >= 201103L
+  // expected-note at +2 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}}
+#endif
+  #pragma omp parallel master taskloop simd collapse (foobool(argc)), collapse (true), collapse (-5)
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp parallel master taskloop simd collapse (S) // expected-error {{'S' does not refer to a value}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+#if __cplusplus <= 199711L
+  // expected-error at +4 2 {{expression is not an integral constant expression}}
+#else
+  // expected-error at +2 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}}
+#endif
+  #pragma omp parallel master taskloop simd collapse (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp parallel master taskloop simd collapse (1)
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp parallel master taskloop simd collapse (N) // expected-error {{argument to 'collapse' clause must be a strictly positive integer value}}
+  for (T i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp parallel master taskloop simd collapse (2) // expected-note {{as specified in 'collapse' clause}}
+  foo(); // expected-error {{expected 2 for loops after '#pragma omp parallel master taskloop simd'}}
+  return argc;
+}
+
+int main(int argc, char **argv) {
+  #pragma omp parallel master taskloop simd collapse // expected-error {{expected '(' after 'collapse'}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp parallel master taskloop simd collapse ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp parallel master taskloop simd collapse () // expected-error {{expected expression}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp parallel master taskloop simd collapse (4 // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-note {{as specified in 'collapse' clause}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; // expected-error {{expected 4 for loops after '#pragma omp parallel master taskloop simd', but found only 1}}
+  #pragma omp parallel master taskloop simd collapse (2+2)) // expected-warning {{extra tokens at the end of '#pragma omp parallel master taskloop simd' are ignored}}  expected-note {{as specified in 'collapse' clause}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4]; // expected-error {{expected 4 for loops after '#pragma omp parallel master taskloop simd', but found only 1}}
+  // expected-error at +4 {{expression is not an integral constant expression}}
+#if __cplusplus >= 201103L
+  // expected-note at +2 {{non-constexpr function 'foobool' cannot be used in a constant expression}}
+#endif
+  #pragma omp parallel master taskloop simd collapse (foobool(1) > 0 ? 1 : 2)
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  // expected-error at +6 {{expression is not an integral constant expression}}
+#if __cplusplus >= 201103L
+  // expected-note at +4 {{non-constexpr function 'foobool' cannot be used in a constant expression}}
+#endif
+  // expected-error at +2 2 {{directive '#pragma omp parallel master taskloop simd' cannot contain more than one 'collapse' clause}}
+  // expected-error at +1 {{argument to 'collapse' clause must be a strictly positive integer value}}
+  #pragma omp parallel master taskloop simd collapse (foobool(argc)), collapse (true), collapse (-5) 
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp parallel master taskloop simd collapse (S1) // expected-error {{'S1' does not refer to a value}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+#if __cplusplus <= 199711L
+  // expected-error at +4 {{expression is not an integral constant expression}}
+#else
+  // expected-error at +2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}}
+#endif
+  #pragma omp parallel master taskloop simd collapse (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  // expected-error at +3 {{statement after '#pragma omp parallel master taskloop simd' must be a for loop}}
+  // expected-note at +1 {{in instantiation of function template specialization 'tmain<int, char, -1, -2>' requested here}}
+  #pragma omp parallel master taskloop simd collapse(collapse(tmain<int, char, -1, -2>(argc, argv) // expected-error 2 {{expected ')'}} expected-note 2 {{to match this '('}}
+  foo();
+  #pragma omp parallel master taskloop simd collapse (2) // expected-note {{as specified in 'collapse' clause}}
+  foo(); // expected-error {{expected 2 for loops after '#pragma omp parallel master taskloop simd'}}
+  // expected-note at +1 {{in instantiation of function template specialization 'tmain<int, char, 1, 0>' requested here}}
+  return tmain<int, char, 1, 0>(argc, argv);
+}
+

diff  --git a/clang/test/OpenMP/parallel_master_taskloop_simd_final_messages.cpp b/clang/test/OpenMP/parallel_master_taskloop_simd_final_messages.cpp
new file mode 100644
index 000000000000..10b74784b057
--- /dev/null
+++ b/clang/test/OpenMP/parallel_master_taskloop_simd_final_messages.cpp
@@ -0,0 +1,94 @@
+// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s -Wuninitialized
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s -Wuninitialized
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note {{declared here}}
+
+template <class T, class S> // expected-note {{declared here}}
+int tmain(T argc, S **argv) {
+  T z;
+#pragma omp parallel master taskloop simd final // expected-error {{expected '(' after 'final'}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd final( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd final() // expected-error {{expected expression}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd final(argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd final(argc)) // expected-warning {{extra tokens at the end of '#pragma omp parallel master taskloop simd' are ignored}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd final(argc > 0 ? argv[1] : argv[2] + z)
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd final(foobool(argc)), final(true) // expected-error {{directive '#pragma omp parallel master taskloop simd' cannot contain more than one 'final' clause}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd final(S) // expected-error {{'S' does not refer to a value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd final(argv[1] = 2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd final(argc argc) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd final(argc)
+  for (int i = 0; i < 10; ++i)
+    foo();
+
+  return 0;
+}
+
+int main(int argc, char **argv) {
+  int z;
+#pragma omp parallel master taskloop simd final // expected-error {{expected '(' after 'final'}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd final( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd final() // expected-error {{expected expression}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd final(argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd final(argc)) // expected-warning {{extra tokens at the end of '#pragma omp parallel master taskloop simd' are ignored}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd final(argc > 0 ? argv[1] : argv[2] - z)
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd final(foobool(argc)), final(true) // expected-error {{directive '#pragma omp parallel master taskloop simd' cannot contain more than one 'final' clause}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd final(S1) // expected-error {{'S1' does not refer to a value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd final(argv[1] = 2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd final(argc argc) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd final(1 0) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd final(if (tmain(argc, argv) // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+
+  return tmain(argc, argv);
+}

diff  --git a/clang/test/OpenMP/parallel_master_taskloop_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_simd_firstprivate_codegen.cpp
new file mode 100644
index 000000000000..80897ff1fcfb
--- /dev/null
+++ b/clang/test/OpenMP/parallel_master_taskloop_simd_firstprivate_codegen.cpp
@@ -0,0 +1,515 @@
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA %s
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS %s
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=ARRAY %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+
+#ifndef ARRAY
+#ifndef HEADER
+#define HEADER
+
+template <class T>
+struct S {
+  T f;
+  S(T a) : f(a) {}
+  S() : f() {}
+  S(const S &s, T t = T()) : f(s.f + t) {}
+  operator T() { return T(); }
+  ~S() {}
+};
+
+volatile double g;
+
+// CHECK-DAG: [[KMP_TASK_T_TY:%.+]] = type { i8*, i32 (i32, i8*)*, i32, %union{{.+}}, %union{{.+}}, i64, i64, i64, i32, i8* }
+// CHECK-DAG: [[S_DOUBLE_TY:%.+]] = type { double }
+// CHECK-DAG: [[PRIVATES_MAIN_TY:%.+]] = type {{.?}}{ [2 x [[S_DOUBLE_TY]]], [[S_DOUBLE_TY]], i32, [2 x i32]
+// CHECK-DAG: [[CAP_MAIN_TY:%.+]] = type {{.*}}{ [2 x i32]*, i32, {{.*}}[2 x [[S_DOUBLE_TY]]]*, [[S_DOUBLE_TY]]*, i{{[0-9]+}}
+// CHECK-DAG: [[KMP_TASK_MAIN_TY:%.+]] = type { [[KMP_TASK_T_TY]], [[PRIVATES_MAIN_TY]] }
+// CHECK-DAG: [[S_INT_TY:%.+]] = type { i32 }
+// CHECK-DAG: [[CAP_TMAIN_TY:%.+]] = type { [2 x i32]*, i32*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]* }
+// CHECK-DAG: [[PRIVATES_TMAIN_TY:%.+]] = type { i32, [2 x i32], [2 x [[S_INT_TY]]], [[S_INT_TY]], [104 x i8] }
+// CHECK-DAG: [[KMP_TASK_TMAIN_TY:%.+]] = type { [[KMP_TASK_T_TY]], [{{[0-9]+}} x i8], [[PRIVATES_TMAIN_TY]] }
+template <typename T>
+T tmain() {
+  S<T> ttt;
+  S<T> test(ttt);
+  T t_var __attribute__((aligned(128))) = T();
+  T vec[] = {1, 2};
+  S<T> s_arr[] = {1, 2};
+  S<T> var(3);
+#pragma omp parallel master taskloop simd firstprivate(t_var, vec, s_arr, s_arr, var, var)
+  for (int i = 0; i < 10; ++i) {
+    vec[0] = t_var;
+    s_arr[0] = var;
+  }
+  return T();
+}
+
+int main() {
+  static int sivar;
+#ifdef LAMBDA
+  // LAMBDA: [[G:@.+]] = global double
+  // LAMBDA: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0,
+  // LAMBDA-LABEL: @main
+  // LAMBDA: call{{( x86_thiscallcc)?}} void [[OUTER_LAMBDA:@.+]](
+  [&]() {
+  // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
+// LAMBDA:       [[RES:%.+]] = call {{.*}}i32 @__kmpc_master(
+// LAMBDA-NEXT:  [[IS_MASTER:%.+]] = icmp ne i32 [[RES]], 0
+// LAMBDA-NEXT:  br i1 [[IS_MASTER]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]]
+// LAMBDA:       [[THEN]]
+// LAMBDA: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 96, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*))
+// LAMBDA: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1
+// LAMBDA: [[G_PRIVATE_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0
+// LAMBDA: [[G_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 0
+// LAMBDA: [[G_VAL:%.+]] = load volatile double, double* [[G_ADDR_REF]]
+// LAMBDA: store volatile double [[G_VAL]], double* [[G_PRIVATE_ADDR]]
+
+// LAMBDA: [[SIVAR_PRIVATE_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1
+// LAMBDA: [[SIVAR_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1
+// LAMBDA: [[SIVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_ADDR_REF]]
+// LAMBDA: store i{{[0-9]+}} [[SIVAR_VAL]], i{{[0-9]+}}* [[SIVAR_PRIVATE_ADDR]]
+
+// LAMBDA: call void @__kmpc_taskloop(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* null)
+// LAMBDA:  call {{.*}}void @__kmpc_end_master(
+// LAMBDA-NEXT:  br label {{%?}}[[EXIT]]
+// LAMBDA:       [[EXIT]]
+// LAMBDA: ret
+#pragma omp parallel master taskloop simd firstprivate(g, sivar)
+  for (int i = 0; i < 10; ++i) {
+    // LAMBDA: define {{.+}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG_PTR:%.+]])
+    // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
+    // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
+    // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+    // LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]]
+    // LAMBDA: store double 2.0{{.+}}, double* [[G_REF]]
+
+    // LAMBDA: store double* %{{.+}}, double** %{{.+}},
+    // LAMBDA: define internal i32 [[TASK_ENTRY]](i32 %0, %{{.+}}* noalias %1)
+    g = 1;
+    sivar = 11;
+    // LAMBDA: store double 1.0{{.+}}, double* %{{.+}},
+    // LAMBDA: store i{{[0-9]+}} 11, i{{[0-9]+}}* %{{.+}},
+    // LAMBDA: call void [[INNER_LAMBDA]](%
+    // LAMBDA: ret
+    [&]() {
+      g = 2;
+      sivar = 22;
+    }();
+  }
+  }();
+  return 0;
+#elif defined(BLOCKS)
+  // BLOCKS: [[G:@.+]] = global double
+  // BLOCKS-LABEL: @main
+  // BLOCKS: call void {{%.+}}(i8
+  ^{
+  // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8*
+  // BLOCKS:       [[RES:%.+]] = call {{.*}}i32 @__kmpc_master(
+  // BLOCKS-NEXT:  [[IS_MASTER:%.+]] = icmp ne i32 [[RES]], 0
+  // BLOCKS-NEXT:  br i1 [[IS_MASTER]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]]
+  // BLOCKS:       [[THEN]]
+  // BLOCKS: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 96, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*))
+  // BLOCKS: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1
+  // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0
+  // BLOCKS: [[G_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 0
+  // BLOCKS: [[G_VAL:%.+]] = load volatile double, double* [[G_ADDR_REF]]
+  // BLOCKS: store volatile double [[G_VAL]], double* [[G_PRIVATE_ADDR]]
+
+  // BLOCKS: [[SIVAR_PRIVATE_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1
+  // BLOCKS: [[SIVAR_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1
+  // BLOCKS: [[SIVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SIVAR_ADDR_REF]]
+  // BLOCKS: store i{{[0-9]+}} [[SIVAR_VAL]], i{{[0-9]+}}* [[SIVAR_PRIVATE_ADDR]]
+  // BLOCKS: call void @__kmpc_taskloop(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* null)
+  // BLOCKS:  call {{.*}}void @__kmpc_end_master(
+  // BLOCKS-NEXT:  br label {{%?}}[[EXIT]]
+  // BLOCKS:       [[EXIT]]
+  // BLOCKS: ret
+#pragma omp parallel master taskloop simd firstprivate(g, sivar)
+  for (int i = 0; i < 10; ++i) {
+    // BLOCKS: define {{.+}} void {{@.+}}(i8*
+    // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+    // BLOCKS: store double 2.0{{.+}}, double*
+    // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+    // BLOCKS-NOT: [[ISVAR]]{{[[^:word:]]}}
+    // BLOCKS: store i{{[0-9]+}} 22, i{{[0-9]+}}*
+    // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}}
+    // BLOCKS: ret
+
+    // BLOCKS: store double* %{{.+}}, double** %{{.+}},
+    // BLOCKS: store i{{[0-9]+}}* %{{.+}}, i{{[0-9]+}}** %{{.+}},
+    // BLOCKS: define internal i32 [[TASK_ENTRY]](i32 %0, %{{.+}}* noalias %1)
+    g = 1;
+    sivar = 11;
+    // BLOCKS: store double 1.0{{.+}}, double* %{{.+}},
+    // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+    // BLOCKS: store i{{[0-9]+}} 11, i{{[0-9]+}}* %{{.+}},
+    // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}}
+    // BLOCKS: call void {{%.+}}(i8
+    ^{
+      g = 2;
+      sivar = 22;
+    }();
+  }
+  }();
+  return 0;
+#else
+  S<double> ttt;
+  S<double> test(ttt);
+  int t_var = 0;
+  int vec[] = {1, 2};
+  S<double> s_arr[] = {1, 2};
+  S<double> var(3);
+#pragma omp parallel master taskloop simd firstprivate(var, t_var, s_arr, vec, s_arr, var, sivar)
+  for (int i = 0; i < 10; ++i) {
+    vec[0] = t_var;
+    s_arr[0] = var;
+    sivar = 33;
+  }
+  return tmain<int>();
+#endif
+}
+
+// CHECK: [[SIVAR:.+]] = internal global i{{[0-9]+}} 0,
+// CHECK: define i{{[0-9]+}} @main()
+// CHECK: alloca [[S_DOUBLE_TY]],
+// CHECK: [[TEST:%.+]] = alloca [[S_DOUBLE_TY]],
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i32,
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i32],
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]],
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_DOUBLE_TY]],
+
+// CHECK: call {{.*}} [[S_DOUBLE_TY_COPY_CONSTR:@.+]]([[S_DOUBLE_TY]]* [[TEST]],
+
+// CHECK:       [[RES:%.+]] = call {{.*}}i32 @__kmpc_master(
+// CHECK-NEXT:  [[IS_MASTER:%.+]] = icmp ne i32 [[RES]], 0
+// CHECK-NEXT:  br i1 [[IS_MASTER]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]]
+// CHECK:       [[THEN]]
+// Store original variables in capture struct.
+// CHECK: [[VEC_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK: store [2 x i32]* %{{.+}}, [2 x i32]** [[VEC_REF]],
+// CHECK: [[T_VAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 1
+// CHECK: [[T_VAR_VAL:%.+]] = load i32, i32* %{{.+}},
+// CHECK: store i32 [[T_VAR_VAL]], i32* [[T_VAR_REF]],
+// CHECK: [[S_ARR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 3
+// CHECK: store [2 x [[S_DOUBLE_TY]]]* %{{.+}}, [2 x [[S_DOUBLE_TY]]]** [[S_ARR_REF]],
+// CHECK: [[VAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 4
+// CHECK: store [[S_DOUBLE_TY]]* %{{.+}}, [[S_DOUBLE_TY]]** [[VAR_REF]],
+// CHECK: [[SIVAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 5
+// CHECK: [[SIVAR_VAL:%.+]] = load i32, i32* %{{.+}},
+// CHECK: store i{{[0-9]+}} [[SIVAR_VAL]], i{{[0-9]+}}* [[SIVAR_REF]],
+
+// Allocate task.
+// Returns struct kmp_task_t {
+//         [[KMP_TASK_T]] task_data;
+//         [[KMP_TASK_MAIN_TY]] privates;
+//       };
+// CHECK: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 9, i64 120, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_MAIN_TY]]*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*))
+// CHECK: [[RES_KMP_TASK:%.+]] = bitcast i8* [[RES]] to [[KMP_TASK_MAIN_TY]]*
+
+// Fill kmp_task_t->shareds by copying from original capture argument.
+// CHECK: [[TASK:%.+]] = getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK: [[SHAREDS_REF_ADDR:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_ADDR]],
+// CHECK: [[CAPTURES_ADDR:%.+]] = bitcast [[CAP_MAIN_TY]]* %{{.+}} to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[SHAREDS_REF]], i8* align 8 [[CAPTURES_ADDR]], i64 40, i1 false)
+
+// Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes).
+// Also copy address of private copy to the corresponding shareds reference.
+// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+// CHECK: [[SHAREDS:%.+]] = bitcast i8* [[SHAREDS_REF]] to [[CAP_MAIN_TY]]*
+
+// Constructors for s_arr and var.
+// s_arr;
+// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK: [[S_ARR_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 3
+// CHECK: load [2 x [[S_DOUBLE_TY]]]*, [2 x [[S_DOUBLE_TY]]]** [[S_ARR_ADDR_REF]],
+// CHECK: call void [[S_DOUBLE_TY_COPY_CONSTR]]([[S_DOUBLE_TY]]* [[S_ARR_CUR:%[^,]+]],
+// CHECK: getelementptr [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* [[S_ARR_CUR]], i{{.+}} 1
+// CHECK: getelementptr [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i{{.+}} 1
+// CHECK: icmp eq
+// CHECK: br i1
+
+// var;
+// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1
+// CHECK: [[VAR_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 4
+// CHECK: [[VAR_REF:%.+]] = load [[S_DOUBLE_TY]]*, [[S_DOUBLE_TY]]** [[VAR_ADDR_REF]],
+// CHECK: call void [[S_DOUBLE_TY_COPY_CONSTR]]([[S_DOUBLE_TY]]* [[PRIVATE_VAR_REF]], [[S_DOUBLE_TY]]* {{.*}}[[VAR_REF]],
+
+// t_var;
+// CHECK: [[PRIVATE_T_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2
+// CHECK: [[T_VAR_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 1
+// CHECK: [[T_VAR:%.+]] = load i{{.+}}, i{{.+}}* [[T_VAR_ADDR_REF]],
+// CHECK: store i32 [[T_VAR]], i32* [[PRIVATE_T_VAR_REF]],
+
+// vec;
+// CHECK: [[PRIVATE_VEC_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3
+// CHECK: [[VEC_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 0
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(
+
+// sivar;
+// CHECK: [[PRIVATE_SIVAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 4
+// CHECK: [[SIVAR_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 5
+// CHECK: [[SIVAR:%.+]] = load i{{.+}}, i{{.+}}* [[SIVAR_ADDR_REF]],
+// CHECK: store i32 [[SIVAR]], i32* [[PRIVATE_SIVAR_REF]],
+
+// Provide pointer to destructor function, which will destroy private variables at the end of the task.
+// CHECK: [[DESTRUCTORS_REF:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{.+}} 0, i{{.+}} 3
+// CHECK: [[DESTRUCTORS_PTR:%.+]] = bitcast %union{{.+}}* [[DESTRUCTORS_REF]] to i32 (i32, i8*)**
+// CHECK: store i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_MAIN_TY]]*)* [[DESTRUCTORS:@.+]] to i32 (i32, i8*)*), i32 (i32, i8*)** [[DESTRUCTORS_PTR]],
+
+// Start task.
+// CHECK: call void @__kmpc_taskloop(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* bitcast (void ([[KMP_TASK_MAIN_TY]]*, [[KMP_TASK_MAIN_TY]]*, i32)* [[MAIN_DUP:@.+]] to i8*))
+// CHECK:  call {{.*}}void @__kmpc_end_master(
+// CHECK-NEXT:  br label {{%?}}[[EXIT]]
+// CHECK:       [[EXIT]]
+
+// CHECK: define internal void [[PRIVATES_MAP_FN:@.+]]([[PRIVATES_MAIN_TY]]* noalias %0, [[S_DOUBLE_TY]]** noalias %1, i32** noalias %2, [2 x [[S_DOUBLE_TY]]]** noalias %3, [2 x i32]** noalias %4, i32** noalias %5)
+// CHECK: [[PRIVATES:%.+]] = load [[PRIVATES_MAIN_TY]]*, [[PRIVATES_MAIN_TY]]**
+// CHECK: [[PRIV_S_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 0
+// CHECK: [[ARG3:%.+]] = load [2 x [[S_DOUBLE_TY]]]**, [2 x [[S_DOUBLE_TY]]]*** %{{.+}},
+// CHECK: store [2 x [[S_DOUBLE_TY]]]* [[PRIV_S_VAR]], [2 x [[S_DOUBLE_TY]]]** [[ARG3]],
+// CHECK: [[PRIV_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 1
+// CHECK: [[ARG1:%.+]] = load [[S_DOUBLE_TY]]**, [[S_DOUBLE_TY]]*** {{.+}},
+// CHECK: store [[S_DOUBLE_TY]]* [[PRIV_VAR]], [[S_DOUBLE_TY]]** [[ARG1]],
+// CHECK: [[PRIV_T_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 2
+// CHECK: [[ARG2:%.+]] = load i32**, i32*** %{{.+}},
+// CHECK: store i32* [[PRIV_T_VAR]], i32** [[ARG2]],
+// CHECK: [[PRIV_VEC:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 3
+// CHECK: [[ARG4:%.+]] = load [2 x i32]**, [2 x i32]*** %{{.+}},
+// CHECK: store [2 x i32]* [[PRIV_VEC]], [2 x i32]** [[ARG4]],
+// CHECK: [[PRIV_SIVAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 4
+// CHECK: [[ARG5:%.+]] = load i{{[0-9]+}}**, i{{[0-9]+}}*** %{{.+}},
+// CHECK: store i{{[0-9]+}}* [[PRIV_SIVAR]], i{{[0-9]+}}** [[ARG5]],
+// CHECK: ret void
+
+// CHECK: define internal i32 [[TASK_ENTRY]](i32 %0, [[KMP_TASK_MAIN_TY]]* noalias %1)
+
+// CHECK: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_DOUBLE_TY]]*,
+// CHECK: [[PRIV_T_VAR_ADDR:%.+]] = alloca i32*,
+// CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*,
+// CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*,
+// CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*,
+// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]],
+// CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]],
+
+// CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]])
+
+// CHECK: [[PRIV_VAR:%.+]] = load [[S_DOUBLE_TY]]*, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]],
+// CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]],
+// CHECK: [[PRIV_S_ARR:%.+]] = load [2 x [[S_DOUBLE_TY]]]*, [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]],
+// CHECK: [[PRIV_VEC:%.+]] = load [2 x i32]*, [2 x i32]** [[PRIV_VEC_ADDR]],
+// CHECK: [[PRIV_SIVAR:%.+]] = load i32*, i32** [[PRIV_SIVAR_ADDR]],
+
+// Privates actually are used.
+// CHECK-DAG: [[PRIV_VAR]]
+// CHECK-DAG: [[PRIV_T_VAR]]
+// CHECK-DAG: [[PRIV_S_ARR]]
+// CHECK-DAG: [[PRIV_VEC]]
+// CHECK-DAG: [[PRIV_SIVAR]]
+
+// CHECK: ret
+
+// CHECK: define internal void [[MAIN_DUP]]([[KMP_TASK_MAIN_TY]]* %0, [[KMP_TASK_MAIN_TY]]* %1, i32 %2)
+// CHECK: getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* %{{.+}}, i32 0, i32 1
+// CHECK: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* %{{.+}}, i32 0, i32 0
+// CHECK: getelementptr inbounds [2 x [[S_DOUBLE_TY]]], [2 x [[S_DOUBLE_TY]]]* %{{.+}}, i32 0, i32 0
+// CHECK: getelementptr [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i64 2
+// CHECK: br i1 %
+
+// CHECK: phi [[S_DOUBLE_TY]]*
+// CHECK: call {{.*}} [[S_DOUBLE_TY_COPY_CONSTR]]([[S_DOUBLE_TY]]*
+// CHECK: getelementptr [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i32 1
+// CHECK: icmp eq [[S_DOUBLE_TY]]* %
+// CHECK: br i1 %
+
+// CHECK: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* %{{.+}}, i32 0, i32 1
+// CHECK: call {{.*}} [[S_DOUBLE_TY_COPY_CONSTR]]([[S_DOUBLE_TY]]*
+// CHECK: ret void
+
+// CHECK: define internal i32 [[DESTRUCTORS]](i32 %{{.+}}, [[KMP_TASK_MAIN_TY]]* noalias %{{.+}})
+// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* [[RES_KMP_TASK:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0
+// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1
+// CHECK: call void @_ZN1SIdED1Ev([[S_DOUBLE_TY]]* [[PRIVATE_VAR_REF]])
+// CHECK: getelementptr inbounds [2 x [[S_DOUBLE_TY]]], [2 x [[S_DOUBLE_TY]]]* [[PRIVATE_S_ARR_REF]], i{{.+}} 0, i{{.+}} 0
+// CHECK: getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i{{.+}} 2
+// CHECK: [[PRIVATE_S_ARR_ELEM_REF:%.+]] = getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i{{.+}} -1
+// CHECK: call void @_ZN1SIdED1Ev([[S_DOUBLE_TY]]* [[PRIVATE_S_ARR_ELEM_REF]])
+// CHECK: icmp eq
+// CHECK: br i1
+// CHECK: ret i32
+
+// CHECK: alloca [[S_INT_TY]],
+// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i32, align 128
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i32],
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]],
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]],
+
+// CHECK: call {{.*}} [[S_INT_TY_COPY_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]],
+
+// Store original variables in capture struct.
+// CHECK: [[VEC_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK: store [2 x i32]* %{{.+}}, [2 x i32]** [[VEC_REF]],
+// CHECK: [[T_VAR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 1
+// CHECK: store i32* %{{.+}}, i32** [[T_VAR_REF]],
+// CHECK: [[S_ARR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 2
+// CHECK: store [2 x [[S_INT_TY]]]* %{{.+}}, [2 x [[S_INT_TY]]]** [[S_ARR_REF]],
+// CHECK: [[VAR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 3
+// CHECK: store [[S_INT_TY]]* %{{.+}}, [[S_INT_TY]]** [[VAR_REF]],
+
+// Allocate task.
+// Returns struct kmp_task_t {
+//         [[KMP_TASK_T_TY]] task_data;
+//         [[KMP_TASK_TMAIN_TY]] privates;
+//       };
+// CHECK: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 9, i64 256, i64 32, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_TMAIN_TY]]*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*))
+// CHECK: [[RES_KMP_TASK:%.+]] = bitcast i8* [[RES]] to [[KMP_TASK_TMAIN_TY]]*
+
+// Fill kmp_task_t->shareds by copying from original capture argument.
+// CHECK: [[TASK:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK: [[SHAREDS_REF_ADDR:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_ADDR]],
+// CHECK: [[CAPTURES_ADDR:%.+]] = bitcast [[CAP_TMAIN_TY]]* %{{.+}} to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[SHAREDS_REF]], i8* align 8 [[CAPTURES_ADDR]], i64 32, i1 false)
+
+// Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes).
+// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+// CHECK: [[SHAREDS:%.+]] = bitcast i8* [[SHAREDS_REF]] to [[CAP_TMAIN_TY]]*
+
+// t_var;
+// CHECK: [[PRIVATE_T_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0
+// CHECK: [[T_VAR_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 1
+// CHECK: [[T_VAR_REF:%.+]] = load i{{.+}}*, i{{.+}}** [[T_VAR_ADDR_REF]],
+// CHECK: [[T_VAR:%.+]] = load i{{.+}}, i{{.+}}* [[T_VAR_REF]], align 128
+// CHECK: store i32 [[T_VAR]], i32* [[PRIVATE_T_VAR_REF]], align 128
+
+// vec;
+// CHECK: [[PRIVATE_VEC_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1
+// CHECK: [[VEC_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 0
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(
+
+// Constructors for s_arr and var.
+// a_arr;
+// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+// CHECK: [[S_ARR_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 2
+// CHECK: getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[PRIVATE_S_ARR_REF]], i{{.+}} 0, i{{.+}} 0
+// CHECK: getelementptr [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i{{.+}} 2
+// CHECK: call void [[S_INT_TY_COPY_CONSTR]]([[S_INT_TY]]* [[S_ARR_CUR:%[^,]+]],
+// CHECK: getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_CUR]], i{{.+}} 1
+// CHECK: icmp eq
+// CHECK: br i1
+
+// var;
+// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3
+// CHECK: [[VAR_ADDR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* [[SHAREDS]], i{{.+}} 0, i{{.+}} 3
+// CHECK: call void [[S_INT_TY_COPY_CONSTR]]([[S_INT_TY]]* [[PRIVATE_VAR_REF]],
+
+// Provide pointer to destructor function, which will destroy private variables at the end of the task.
+// CHECK: [[DESTRUCTORS_REF:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{.+}} 0, i{{.+}} 3
+// CHECK: [[DESTRUCTORS_PTR:%.+]] = bitcast %union{{.+}}* [[DESTRUCTORS_REF]] to i32 (i32, i8*)**
+// CHECK: store i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_TMAIN_TY]]*)* [[DESTRUCTORS:@.+]] to i32 (i32, i8*)*), i32 (i32, i8*)** [[DESTRUCTORS_PTR]],
+
+// Start task.
+// CHECK: call void @__kmpc_taskloop(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* bitcast (void ([[KMP_TASK_TMAIN_TY]]*, [[KMP_TASK_TMAIN_TY]]*, i32)* [[TMAIN_DUP:@.+]] to i8*))
+
+// CHECK: define internal void [[PRIVATES_MAP_FN:@.+]]([[PRIVATES_TMAIN_TY]]* noalias %{{.+}}, i32** noalias %{{.+}}, [2 x i32]** noalias %{{.+}}, [2 x [[S_INT_TY]]]** noalias %{{.+}}, [[S_INT_TY]]** noalias %{{.+}})
+// CHECK: [[PRIVATES:%.+]] = load [[PRIVATES_TMAIN_TY]]*, [[PRIVATES_TMAIN_TY]]**
+// CHECK: [[PRIV_T_VAR:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 0
+// CHECK: [[ARG1:%.+]] = load i32**, i32*** %{{.+}},
+// CHECK: store i32* [[PRIV_T_VAR]], i32** [[ARG1]],
+// CHECK: [[PRIV_VEC:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 1
+// CHECK: [[ARG2:%.+]] = load [2 x i32]**, [2 x i32]*** %{{.+}},
+// CHECK: store [2 x i32]* [[PRIV_VEC]], [2 x i32]** [[ARG2]],
+// CHECK: [[PRIV_S_VAR:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 2
+// CHECK: [[ARG3:%.+]] = load [2 x [[S_INT_TY]]]**, [2 x [[S_INT_TY]]]*** %{{.+}},
+// CHECK: store [2 x [[S_INT_TY]]]* [[PRIV_S_VAR]], [2 x [[S_INT_TY]]]** [[ARG3]],
+// CHECK: [[PRIV_VAR:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 3
+// CHECK: [[ARG4:%.+]] = load [[S_INT_TY]]**, [[S_INT_TY]]*** {{.+}},
+// CHECK: store [[S_INT_TY]]* [[PRIV_VAR]], [[S_INT_TY]]** [[ARG4]],
+// CHECK: ret void
+
+// CHECK: define internal i32 [[TASK_ENTRY]](i32 %0, [[KMP_TASK_TMAIN_TY]]* noalias %1)
+// CHECK: alloca i32*,
+// CHECK-DAG: [[PRIV_T_VAR_ADDR:%.+]] = alloca i32*,
+// CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*,
+// CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*,
+// CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
+// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]],
+// CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]],
+// CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]])
+// CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]],
+// CHECK: [[PRIV_VEC:%.+]] = load [2 x i32]*, [2 x i32]** [[PRIV_VEC_ADDR]],
+// CHECK: [[PRIV_S_ARR:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]],
+// CHECK: [[PRIV_VAR:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[PRIV_VAR_ADDR]],
+
+// Privates actually are used.
+// CHECK-DAG: [[PRIV_VAR]]
+// CHECK-DAG: [[PRIV_T_VAR]]
+// CHECK-DAG: [[PRIV_S_ARR]]
+// CHECK-DAG: [[PRIV_VEC]]
+
+// CHECK: ret
+
+// CHECK: define internal void [[TMAIN_DUP]]([[KMP_TASK_TMAIN_TY]]* %0, [[KMP_TASK_TMAIN_TY]]* %1, i32 %2)
+// CHECK: getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* %{{.+}}, i32 0, i32 2
+// CHECK: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* %{{.+}}, i32 0, i32 2
+// CHECK: getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* %{{.+}}, i32 0, i32 0
+// CHECK: getelementptr [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i64 2
+// CHECK: br i1 %
+
+// CHECK: phi [[S_INT_TY]]*
+// CHECK: call {{.*}} [[S_INT_TY_COPY_CONSTR]]([[S_INT_TY]]*
+// CHECK: getelementptr [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i32 1
+// CHECK: icmp eq [[S_INT_TY]]* %
+// CHECK: br i1 %
+
+// CHECK: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* %{{.+}}, i32 0, i32 3
+// CHECK: call {{.*}} [[S_INT_TY_COPY_CONSTR]]([[S_INT_TY]]*
+// CHECK: ret void
+
+// CHECK: define internal i32 [[DESTRUCTORS]](i32 %0, [[KMP_TASK_TMAIN_TY]]* noalias %1)
+// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2
+// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3
+// CHECK: call void @_ZN1SIiED1Ev([[S_INT_TY]]* [[PRIVATE_VAR_REF]])
+// CHECK: getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[PRIVATE_S_ARR_REF]], i{{.+}} 0, i{{.+}} 0
+// CHECK: getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i{{.+}} 2
+// CHECK: [[PRIVATE_S_ARR_ELEM_REF:%.+]] = getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i{{.+}} -1
+// CHECK: call void @_ZN1SIiED1Ev([[S_INT_TY]]* [[PRIVATE_S_ARR_ELEM_REF]])
+// CHECK: icmp eq
+// CHECK: br i1
+// CHECK: ret i32
+
+#endif
+#else
+// ARRAY-LABEL: array_func
+struct St {
+  int a, b;
+  St() : a(0), b(0) {}
+  St(const St &) {}
+  ~St() {}
+};
+
+void array_func(int n, float a[n], St s[2]) {
+// ARRAY: call i8* @__kmpc_omp_task_alloc(
+// ARRAY: call void @__kmpc_taskloop(
+// ARRAY: store float** %{{.+}}, float*** %{{.+}},
+// ARRAY: store %struct.St** %{{.+}}, %struct.St*** %{{.+}},
+#pragma omp parallel master taskloop simd firstprivate(a, s)
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+#endif
+

diff  --git a/clang/test/OpenMP/parallel_master_taskloop_simd_firstprivate_messages.cpp b/clang/test/OpenMP/parallel_master_taskloop_simd_firstprivate_messages.cpp
new file mode 100644
index 000000000000..aa411e285a5f
--- /dev/null
+++ b/clang/test/OpenMP/parallel_master_taskloop_simd_firstprivate_messages.cpp
@@ -0,0 +1,335 @@
+// RUN: %clang_cc1 -verify -fopenmp %s -Wuninitialized
+
+// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized
+
+typedef void **omp_allocator_handle_t;
+extern const omp_allocator_handle_t omp_default_mem_alloc;
+extern const omp_allocator_handle_t omp_large_cap_mem_alloc;
+extern const omp_allocator_handle_t omp_const_mem_alloc;
+extern const omp_allocator_handle_t omp_high_bw_mem_alloc;
+extern const omp_allocator_handle_t omp_low_lat_mem_alloc;
+extern const omp_allocator_handle_t omp_cgroup_mem_alloc;
+extern const omp_allocator_handle_t omp_pteam_mem_alloc;
+extern const omp_allocator_handle_t omp_thread_mem_alloc;
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+void xxx(int argc) {
+  int fp; // expected-note {{initialize the variable 'fp' to silence this warning}}
+#pragma omp parallel master taskloop simd firstprivate(fp) // expected-warning {{variable 'fp' is uninitialized when used here}}
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
+struct S1; // expected-note 2 {{declared here}} expected-note 2 {{forward declaration of 'S1'}}
+extern S1 a;
+class S2 {
+  mutable int a;
+
+public:
+  S2() : a(0) {}
+  S2(const S2 &s2) : a(s2.a) {}
+  static float S2s;
+  static const float S2sc;
+};
+const float S2::S2sc = 0;
+const S2 b;
+const S2 ba[5];
+class S3 {
+  int a;
+  S3 &operator=(const S3 &s3);
+
+public:
+  S3() : a(0) {} // expected-note 2 {{candidate constructor not viable: requires 0 arguments, but 1 was provided}}
+  S3(S3 &s3) : a(s3.a) {} // expected-note 2 {{candidate constructor not viable: 1st argument ('const S3') would lose const qualifier}}
+};
+const S3 c;
+const S3 ca[5];
+extern const int f;
+class S4 {
+  int a;
+  S4();
+  S4(const S4 &s4); // expected-note 2 {{implicitly declared private here}}
+
+public:
+  S4(int v) : a(v) {}
+};
+class S5 {
+  int a;
+  S5(const S5 &s5) : a(s5.a) {} // expected-note 4 {{implicitly declared private here}}
+
+public:
+  S5() : a(0) {}
+  S5(int v) : a(v) {}
+};
+class S6 {
+  int a;
+  S6() : a(0) {}
+
+public:
+  S6(const S6 &s6) : a(s6.a) {}
+  S6(int v) : a(v) {}
+};
+
+S3 h;
+#pragma omp threadprivate(h) // expected-note 2 {{defined as threadprivate or thread local}}
+
+template <class I, class C>
+int foomain(int argc, char **argv) {
+  I e(4);
+  C g(5);
+  int i, z;
+  int &j = i;
+#pragma omp parallel
+#pragma omp parallel master taskloop simd firstprivate // expected-error {{expected '(' after 'firstprivate'}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp parallel master taskloop simd firstprivate( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp parallel master taskloop simd firstprivate() // expected-error {{expected expression}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp parallel master taskloop simd firstprivate(argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp parallel master taskloop simd firstprivate(argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp parallel master taskloop simd firstprivate(argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp parallel master taskloop simd allocate(omp_thread_mem_alloc: argc) firstprivate(argc) // expected-warning {{allocator with the 'thread' trait access has unspecified behavior on 'parallel master taskloop simd' directive}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp parallel master taskloop simd firstprivate(S1) // expected-error {{'S1' does not refer to a value}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp parallel master taskloop simd firstprivate(a, b) // expected-error {{firstprivate variable with incomplete type 'S1'}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp parallel master taskloop simd firstprivate(argv[1]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp parallel master taskloop simd firstprivate(z, e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp parallel master taskloop simd firstprivate(h) // expected-error {{threadprivate or thread local variable cannot be firstprivate}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+  {
+    int v = 0;
+    int i;
+#pragma omp parallel master taskloop simd firstprivate(i)
+    for (int k = 0; k < argc; ++k) {
+      i = k;
+      v += i;
+    }
+  }
+#pragma omp parallel shared(i)
+#pragma omp parallel private(i)
+#pragma omp parallel master taskloop simd firstprivate(j)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp parallel master taskloop simd firstprivate(i)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp parallel master taskloop simd lastprivate(g) firstprivate(g) // expected-error {{calling a private constructor of class 'S5'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel private(i)
+#pragma omp parallel master taskloop simd firstprivate(i) // expected-note 2 {{defined as firstprivate}}
+  for (i = 0; i < argc; ++i) // expected-error 2 {{loop iteration variable in the associated loop of 'omp parallel master taskloop simd' directive may not be firstprivate, predetermined as linear}}
+    foo();
+#pragma omp parallel reduction(+ : i)  // expected-note {{defined as reduction}}
+#pragma omp parallel master taskloop simd firstprivate(i) // expected-note {{defined as firstprivate}} expected-error {{argument of a reduction clause of a parallel construct must not appear in a firstprivate clause on a task construct}}
+  for (i = 0; i < argc; ++i) // expected-error {{loop iteration variable in the associated loop of 'omp parallel master taskloop simd' directive may not be firstprivate, predetermined as linear}}
+    foo();
+  return 0;
+}
+
+void bar(S4 a[2]) {
+#pragma omp parallel
+#pragma omp parallel master taskloop simd firstprivate(a)
+  for (int i = 0; i < 2; ++i)
+    foo();
+}
+
+namespace A {
+double x;
+#pragma omp threadprivate(x) // expected-note {{defined as threadprivate or thread local}}
+}
+namespace B {
+using A::x;
+}
+
+int main(int argc, char **argv) {
+  const int d = 5;
+  const int da[5] = {0};
+  S4 e(4);
+  S5 g(5);
+  S3 m;
+  S6 n(2);
+  int i;
+  int &j = i;
+#pragma omp parallel
+#pragma omp parallel master taskloop simd firstprivate // expected-error {{expected '(' after 'firstprivate'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp parallel master taskloop simd firstprivate( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp parallel master taskloop simd firstprivate() // expected-error {{expected expression}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp parallel master taskloop simd firstprivate(argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp parallel master taskloop simd firstprivate(argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp parallel master taskloop simd firstprivate(argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp parallel master taskloop simd firstprivate(argc) allocate , allocate(, allocate(omp_default , allocate(omp_default_mem_alloc, allocate(omp_default_mem_alloc:, allocate(omp_default_mem_alloc: argc, allocate(omp_default_mem_alloc: argv), allocate(argv) // expected-error {{expected '(' after 'allocate'}} expected-error 2 {{expected expression}} expected-error 2 {{expected ')'}} expected-error {{use of undeclared identifier 'omp_default'}} expected-note 2 {{to match this '('}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp parallel master taskloop simd firstprivate(S1) // expected-error {{'S1' does not refer to a value}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp parallel master taskloop simd firstprivate(a, b, c, d, f) // expected-error {{firstprivate variable with incomplete type 'S1'}} expected-error {{no matching constructor for initialization of 'S3'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp parallel master taskloop simd firstprivate(argv[1]) // expected-error {{expected variable name}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp parallel master taskloop simd firstprivate(2 * 2) // expected-error {{expected variable name}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp parallel master taskloop simd firstprivate(ba) // OK
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp parallel master taskloop simd firstprivate(ca) // expected-error {{no matching constructor for initialization of 'S3'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp parallel master taskloop simd firstprivate(da) // OK
+  for (i = 0; i < argc; ++i)
+    foo();
+  int xa;
+#pragma omp parallel
+#pragma omp parallel master taskloop simd firstprivate(xa) // OK
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp parallel master taskloop simd firstprivate(S2::S2s) // OK
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp parallel master taskloop simd firstprivate(S2::S2sc) // OK
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp parallel master taskloop simd safelen(5)
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp parallel master taskloop simd firstprivate(e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp parallel master taskloop simd firstprivate(m) // OK
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp parallel master taskloop simd firstprivate(h) // expected-error {{threadprivate or thread local variable cannot be firstprivate}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp parallel master taskloop simd private(xa), firstprivate(xa) // expected-error {{private variable cannot be firstprivate}} expected-note {{defined as private}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp parallel master taskloop simd firstprivate(i) // expected-note {{defined as firstprivate}}
+  for (i = 0; i < argc; ++i)    // expected-error {{loop iteration variable in the associated loop of 'omp parallel master taskloop simd' directive may not be firstprivate, predetermined as linear}}
+    foo();
+#pragma omp parallel shared(xa)
+#pragma omp parallel master taskloop simd firstprivate(xa) // OK: may be firstprivate
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp parallel master taskloop simd firstprivate(j)
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp parallel master taskloop simd lastprivate(g) firstprivate(g) // expected-error {{calling a private constructor of class 'S5'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp parallel master taskloop simd lastprivate(n) firstprivate(n) // OK
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+  {
+    int v = 0;
+    int i;
+#pragma omp parallel master taskloop simd firstprivate(i)
+    for (int k = 0; k < argc; ++k) {
+      i = k;
+      v += i;
+    }
+  }
+#pragma omp parallel private(i)
+#pragma omp parallel master taskloop simd firstprivate(i) // expected-note {{defined as firstprivate}}
+  for (i = 0; i < argc; ++i) // expected-error {{loop iteration variable in the associated loop of 'omp parallel master taskloop simd' directive may not be firstprivate, predetermined as linear}}
+    foo();
+#pragma omp parallel reduction(+ : i) // expected-note {{defined as reduction}}
+#pragma omp parallel master taskloop simd firstprivate(i) //expected-error {{argument of a reduction clause of a parallel construct must not appear in a firstprivate clause on a task construct}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel master taskloop simd firstprivate(i) //expected-note {{defined as firstprivate}}
+  for (i = 0; i < argc; ++i) // expected-error {{loop iteration variable in the associated loop of 'omp parallel master taskloop simd' directive may not be firstprivate, predetermined as linear}}
+    foo();
+#pragma omp parallel
+#pragma omp parallel master taskloop simd firstprivate(B::x) // expected-error {{threadprivate or thread local variable cannot be firstprivate}}
+  for (i = 0; i < argc; ++i)
+    foo();
+  static int si;
+#pragma omp parallel master taskloop simd firstprivate(si) // OK
+  for (i = 0; i < argc; ++i)
+    si = i + 1;
+
+  return foomain<S4, S5>(argc, argv); // expected-note {{in instantiation of function template specialization 'foomain<S4, S5>' requested here}}
+}
+

diff  --git a/clang/test/OpenMP/parallel_master_taskloop_simd_grainsize_messages.cpp b/clang/test/OpenMP/parallel_master_taskloop_simd_grainsize_messages.cpp
new file mode 100644
index 000000000000..318d5b7e3230
--- /dev/null
+++ b/clang/test/OpenMP/parallel_master_taskloop_simd_grainsize_messages.cpp
@@ -0,0 +1,103 @@
+// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s -Wuninitialized
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s -Wuninitialized
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note {{declared here}}
+
+template <class T, class S> // expected-note {{declared here}}
+int tmain(T argc, S **argv) {
+  T z;
+  #pragma omp parallel master taskloop simd grainsize // expected-error {{expected '(' after 'grainsize'}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd grainsize ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd grainsize () // expected-error {{expected expression}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd grainsize (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd grainsize (argc)) // expected-warning {{extra tokens at the end of '#pragma omp parallel master taskloop simd' are ignored}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd grainsize (argc > 0 ? argv[1][0] : argv[2][argc] + z)
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd grainsize (foobool(argc)), grainsize (true) // expected-error {{directive '#pragma omp parallel master taskloop simd' cannot contain more than one 'grainsize' clause}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd grainsize (S) // expected-error {{'S' does not refer to a value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd grainsize (argc argc) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd grainsize(0) // expected-error {{argument to 'grainsize' clause must be a strictly positive integer value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd grainsize(-1) // expected-error {{argument to 'grainsize' clause must be a strictly positive integer value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd grainsize(argc) num_tasks(argc) // expected-error {{'num_tasks' and 'grainsize' clause are mutually exclusive and may not appear on the same directive}} expected-note {{'grainsize' clause is specified here}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+
+  return 0;
+}
+
+int main(int argc, char **argv) {
+  int z;
+  #pragma omp parallel master taskloop simd grainsize // expected-error {{expected '(' after 'grainsize'}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd grainsize ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd grainsize () // expected-error {{expected expression}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd grainsize (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd grainsize (argc)) // expected-warning {{extra tokens at the end of '#pragma omp parallel master taskloop simd' are ignored}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd grainsize (argc > 0 ? argv[1][0] : argv[2][argc] + z)
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd grainsize (foobool(argc)), grainsize (true) // expected-error {{directive '#pragma omp parallel master taskloop simd' cannot contain more than one 'grainsize' clause}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd grainsize (S1) // expected-error {{'S1' does not refer to a value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd grainsize (argc argc) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd grainsize (1 0) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd grainsize(if(tmain(argc, argv) // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd grainsize(0)  // expected-error {{argument to 'grainsize' clause must be a strictly positive integer value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd grainsize(-1) // expected-error {{argument to 'grainsize' clause must be a strictly positive integer value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd grainsize(argc) num_tasks(argc) // expected-error {{'num_tasks' and 'grainsize' clause are mutually exclusive and may not appear on the same directive}} expected-note {{'grainsize' clause is specified here}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+
+  return tmain(argc, argv);
+}

diff  --git a/clang/test/OpenMP/parallel_master_taskloop_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_simd_lastprivate_codegen.cpp
new file mode 100644
index 000000000000..9014ce378281
--- /dev/null
+++ b/clang/test/OpenMP/parallel_master_taskloop_simd_lastprivate_codegen.cpp
@@ -0,0 +1,527 @@
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA %s
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS %s
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=ARRAY %s
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DLOOP -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=LOOP %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DLOOP -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+
+#if !defined(ARRAY) && !defined(LOOP)
+#ifndef HEADER
+#define HEADER
+
+template <class T>
+struct S {
+  T f;
+  S(T a) : f(a) {}
+  S() : f() {}
+  S(const S &s, T t = T()) : f(s.f + t) {}
+  operator T() { return T(); }
+  ~S() {}
+};
+
+volatile double g;
+
+// CHECK-DAG: [[KMP_TASK_T_TY:%.+]] = type { i8*, i32 (i32, i8*)*, i32, %union{{.+}}, %union{{.+}}, i64, i64, i64, i32, i8* }
+// CHECK-DAG: [[S_DOUBLE_TY:%.+]] = type { double }
+// CHECK-DAG: [[PRIVATES_MAIN_TY:%.+]] = type {{.?}}{ [2 x [[S_DOUBLE_TY]]], [[S_DOUBLE_TY]], i32, [2 x i32]
+// CHECK-DAG: [[CAP_MAIN_TY:%.+]] = type { [2 x i32]*, i32*, [2 x [[S_DOUBLE_TY]]]*, [[S_DOUBLE_TY]]*, i{{[0-9]+}}* }
+// CHECK-DAG: [[KMP_TASK_MAIN_TY:%.+]] = type { [[KMP_TASK_T_TY]], [[PRIVATES_MAIN_TY]] }
+// CHECK-DAG: [[S_INT_TY:%.+]] = type { i32 }
+// CHECK-DAG: [[CAP_TMAIN_TY:%.+]] = type { [2 x i32]*, i32*, [2 x [[S_INT_TY]]]*, [[S_INT_TY]]* }
+// CHECK-DAG: [[PRIVATES_TMAIN_TY:%.+]] = type { i32, [2 x i32], [2 x [[S_INT_TY]]], [[S_INT_TY]], [104 x i8] }
+// CHECK-DAG: [[KMP_TASK_TMAIN_TY:%.+]] = type { [[KMP_TASK_T_TY]], [{{[0-9]+}} x i8], [[PRIVATES_TMAIN_TY]] }
+template <typename T>
+T tmain() {
+  S<T> ttt;
+  S<T> test;
+  T t_var __attribute__((aligned(128))) = T();
+  T vec[] = {1, 2};
+  S<T> s_arr[] = {1, 2};
+  S<T> var(3);
+#pragma omp parallel master taskloop simd lastprivate(t_var, vec, s_arr, s_arr, var, var)
+  for (int i = 0; i < 10; ++i) {
+    vec[0] = t_var;
+    s_arr[0] = var;
+  }
+  return T();
+}
+
+int main() {
+  static int sivar;
+#ifdef LAMBDA
+  // LAMBDA: [[G:@.+]] = global double
+  // LAMBDA: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0,
+  // LAMBDA-LABEL: @main
+  // LAMBDA: call{{( x86_thiscallcc)?}} void [[OUTER_LAMBDA:@.+]](
+  [&]() {
+  // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
+  // LAMBDA: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 96, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*))
+// LAMBDA: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1
+
+// LAMBDA: call void @__kmpc_taskloop(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* bitcast (void ([[KMP_TASK_MAIN_TY:%[^*]+]]*, [[KMP_TASK_MAIN_TY]]*, i32)* [[MAIN_DUP:@.+]] to i8*))
+// LAMBDA: ret
+#pragma omp parallel master taskloop simd lastprivate(g, sivar)
+  for (int i = 0; i < 10; ++i) {
+    // LAMBDA: define {{.+}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG_PTR:%.+]])
+    // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
+    // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
+    // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+    // LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]]
+    // LAMBDA: store double 2.0{{.+}}, double* [[G_REF]]
+
+    // LAMBDA: store double* %{{.+}}, double** %{{.+}},
+    // LAMBDA: define internal i32 [[TASK_ENTRY]](i32 %0, %{{.+}}* noalias %1)
+    g = 1;
+    sivar = 11;
+    // LAMBDA: store double 1.0{{.+}}, double* %{{.+}},{{.*}}!llvm.access.group
+    // LAMBDA: store i{{[0-9]+}} 11, i{{[0-9]+}}* %{{.+}},{{.*}}!llvm.access.group
+    // LAMBDA: call void [[INNER_LAMBDA]](%
+    // LAMBDA: icmp ne i32 %{{.+}}, 0
+    // LAMBDA: br i1
+    // LAMBDA: load double, double* %{{.*}}
+    // LAMBDA: store volatile double %{{.*}}
+    // LAMBDA: load i32, i32* %{{.*}}
+    // LAMBDA: store i32 %{{.*}}
+    // LAMBDA: ret
+    [&]() {
+      g = 2;
+      sivar = 22;
+    }();
+  }
+  }();
+  return 0;
+#elif defined(BLOCKS)
+  // BLOCKS: [[G:@.+]] = global double
+  // BLOCKS-LABEL: @main
+  // BLOCKS: call void {{%.+}}(i8
+  ^{
+  // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8*
+  // BLOCKS: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 96, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*))
+  // BLOCKS: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1
+  // BLOCKS: call void @__kmpc_taskloop(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* bitcast (void ([[KMP_TASK_MAIN_TY:%[^*]+]]*, [[KMP_TASK_MAIN_TY]]*, i32)* [[MAIN_DUP:@.+]] to i8*))
+  // BLOCKS: ret
+#pragma omp parallel master taskloop simd lastprivate(g, sivar)
+  for (int i = 0; i < 10; ++i) {
+    // BLOCKS: define {{.+}} void {{@.+}}(i8*
+    // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+    // BLOCKS: store double 2.0{{.+}}, double*
+    // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+    // BLOCKS-NOT: [[ISVAR]]{{[[^:word:]]}}
+    // BLOCKS: store i{{[0-9]+}} 22, i{{[0-9]+}}*
+    // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}}
+    // BLOCKS: ret
+
+    // BLOCKS: store double* %{{.+}}, double** %{{.+}},
+    // BLOCKS: store i{{[0-9]+}}* %{{.+}}, i{{[0-9]+}}** %{{.+}},
+    // BLOCKS: define internal i32 [[TASK_ENTRY]](i32 %0, %{{.+}}* noalias %1)
+    g = 1;
+    sivar = 11;
+    // BLOCKS: store double 1.0{{.+}}, double* %{{.+}},{{.*}}!llvm.access.group
+    // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+    // BLOCKS: store i{{[0-9]+}} 11, i{{[0-9]+}}* %{{.+}},{{.*}}!llvm.access.group
+    // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}}
+    // BLOCKS: call void {{%.+}}(i8
+    // BLOCKS: icmp ne i32 %{{.+}}, 0
+    // BLOCKS: br i1
+    // BLOCKS: load double, double* %
+    // BLOCKS: store volatile double %
+    // BLOCKS: load i32, i32* %
+    // BLOCKS: store i32 %
+    ^{
+      g = 2;
+      sivar = 22;
+    }();
+  }
+  }();
+  return 0;
+#else
+  S<double> ttt;
+  S<double> test;
+  int t_var = 0;
+  int vec[] = {1, 2};
+  S<double> s_arr[] = {1, 2};
+  S<double> var(3);
+#pragma omp parallel master taskloop simd lastprivate(var, t_var, s_arr, vec, s_arr, var, sivar)
+  for (int i = 0; i < 10; ++i) {
+    vec[0] = t_var;
+    s_arr[0] = var;
+    sivar = 33;
+  }
+  return tmain<int>();
+#endif
+}
+
+// CHECK: [[SIVAR:.+]] = internal global i{{[0-9]+}} 0,
+// CHECK: define i{{[0-9]+}} @main()
+// CHECK: alloca [[S_DOUBLE_TY]],
+// CHECK: [[TEST:%.+]] = alloca [[S_DOUBLE_TY]],
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i32,
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i32],
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]],
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_DOUBLE_TY]],
+
+// CHECK: call {{.*}} [[S_DOUBLE_TY_CONSTR:@.+]]([[S_DOUBLE_TY]]* [[TEST]])
+
+// CHECK:       [[RES:%.+]] = call {{.*}}i32 @__kmpc_master(
+// CHECK-NEXT:  [[IS_MASTER:%.+]] = icmp ne i32 [[RES]], 0
+// CHECK-NEXT:  br i1 [[IS_MASTER]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]]
+// CHECK:       [[THEN]]
+// Store original variables in capture struct.
+// CHECK: [[VEC_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK: store [2 x i32]* [[VEC_ADDR:%.+]], [2 x i32]** [[VEC_REF]],
+// CHECK: [[T_VAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 1
+// CHECK: store i32* [[T_VAR_ADDR:%.+]], i32** [[T_VAR_REF]],
+// CHECK: [[S_ARR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 2
+// CHECK: store [2 x [[S_DOUBLE_TY]]]* [[S_ARR_ADDR:%.+]], [2 x [[S_DOUBLE_TY]]]** [[S_ARR_REF]],
+// CHECK: [[VAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 3
+// CHECK: store [[S_DOUBLE_TY]]* [[VAR_ADDR:%.+]], [[S_DOUBLE_TY]]** [[VAR_REF]],
+// CHECK: [[SIVAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 4
+// CHECK: store i{{[0-9]+}}* [[SIVAR:%.+]], i{{[0-9]+}}** [[SIVAR_REF]],
+
+// Allocate task.
+// Returns struct kmp_task_t {
+//         [[KMP_TASK_T]] task_data;
+//         [[KMP_TASK_MAIN_TY]] privates;
+//       };
+// CHECK: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc([[LOC:%.+]], i32 [[GTID:%.+]], i32 9, i64 120, i64 40, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_MAIN_TY]]*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*))
+// CHECK: [[RES_KMP_TASK:%.+]] = bitcast i8* [[RES]] to [[KMP_TASK_MAIN_TY]]*
+
+// Fill kmp_task_t->shareds by copying from original capture argument.
+// CHECK: [[TASK:%.+]] = getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK: [[SHAREDS_REF_ADDR:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_ADDR]],
+// CHECK: [[CAPTURES_ADDR:%.+]] = bitcast [[CAP_MAIN_TY]]* %{{.+}} to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[SHAREDS_REF]], i8* align 8 [[CAPTURES_ADDR]], i64 40, i1 false)
+
+// Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes).
+// Also copy address of private copy to the corresponding shareds reference.
+// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+
+// Constructors for s_arr and var.
+// s_arr;
+// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK: call {{.*}} [[S_DOUBLE_TY_CONSTR]]([[S_DOUBLE_TY]]* [[S_ARR_CUR:%[^,]+]])
+// CHECK: getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* [[S_ARR_CUR]], i{{.+}} 1
+// CHECK: icmp eq
+// CHECK: br i1
+
+// var;
+// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1
+// CHECK: call {{.*}} [[S_DOUBLE_TY_CONSTR]]([[S_DOUBLE_TY]]* [[PRIVATE_VAR_REF]])
+
+// t_var;
+// vec;
+// sivar;
+
+// Provide pointer to destructor function, which will destroy private variables at the end of the task.
+// CHECK: [[DESTRUCTORS_REF:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{.+}} 0, i{{.+}} 3
+// CHECK: [[DESTRUCTORS_PTR:%.+]] = bitcast %union{{.+}}* [[DESTRUCTORS_REF]] to i32 (i32, i8*)**
+// CHECK: store i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_MAIN_TY]]*)* [[DESTRUCTORS:@.+]] to i32 (i32, i8*)*), i32 (i32, i8*)** [[DESTRUCTORS_PTR]],
+
+// Start task.
+// CHECK: call void @__kmpc_taskloop([[LOC]], i32 [[GTID]], i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* bitcast (void ([[KMP_TASK_MAIN_TY]]*, [[KMP_TASK_MAIN_TY]]*, i32)* [[MAIN_DUP:@.+]] to i8*))
+// CHECK:  call {{.*}}void @__kmpc_end_master(
+// CHECK-NEXT:  br label {{%?}}[[EXIT]]
+// CHECK:       [[EXIT]]
+
+// CHECK: define internal void [[PRIVATES_MAP_FN:@.+]]([[PRIVATES_MAIN_TY]]* noalias %0, [[S_DOUBLE_TY]]** noalias %1, i32** noalias %2, [2 x [[S_DOUBLE_TY]]]** noalias %3, [2 x i32]** noalias %4, i32** noalias %5)
+// CHECK: [[PRIVATES:%.+]] = load [[PRIVATES_MAIN_TY]]*, [[PRIVATES_MAIN_TY]]**
+// CHECK: [[PRIV_S_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 0
+// CHECK: [[ARG3:%.+]] = load [2 x [[S_DOUBLE_TY]]]**, [2 x [[S_DOUBLE_TY]]]*** %{{.+}},
+// CHECK: store [2 x [[S_DOUBLE_TY]]]* [[PRIV_S_VAR]], [2 x [[S_DOUBLE_TY]]]** [[ARG3]],
+// CHECK: [[PRIV_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 1
+// CHECK: [[ARG1:%.+]] = load [[S_DOUBLE_TY]]**, [[S_DOUBLE_TY]]*** {{.+}},
+// CHECK: store [[S_DOUBLE_TY]]* [[PRIV_VAR]], [[S_DOUBLE_TY]]** [[ARG1]],
+// CHECK: [[PRIV_T_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 2
+// CHECK: [[ARG2:%.+]] = load i32**, i32*** %{{.+}},
+// CHECK: store i32* [[PRIV_T_VAR]], i32** [[ARG2]],
+// CHECK: [[PRIV_VEC:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 3
+// CHECK: [[ARG4:%.+]] = load [2 x i32]**, [2 x i32]*** %{{.+}},
+// CHECK: store [2 x i32]* [[PRIV_VEC]], [2 x i32]** [[ARG4]],
+// CHECK: [[PRIV_SIVAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 4
+// CHECK: [[ARG5:%.+]] = load i{{[0-9]+}}**, i{{[0-9]+}}*** %{{.+}},
+// CHECK: store i{{[0-9]+}}* [[PRIV_SIVAR]], i{{[0-9]+}}** [[ARG5]],
+// CHECK: ret void
+
+// CHECK: define internal i32 [[TASK_ENTRY]](i32 %0, [[KMP_TASK_MAIN_TY]]* noalias %1)
+
+// CHECK: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_DOUBLE_TY]]*,
+// CHECK: [[PRIV_T_VAR_ADDR:%.+]] = alloca i32*,
+// CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*,
+// CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*,
+// CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*,
+// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]],
+// CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]],
+
+// CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]])
+
+// CHECK: [[PRIV_VAR:%.+]] = load [[S_DOUBLE_TY]]*, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]],
+// CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]],
+// CHECK: [[PRIV_S_ARR:%.+]] = load [2 x [[S_DOUBLE_TY]]]*, [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]],
+// CHECK: [[PRIV_VEC:%.+]] = load [2 x i32]*, [2 x i32]** [[PRIV_VEC_ADDR]],
+// CHECK: [[PRIV_SIVAR:%.+]] = load i32*, i32** [[PRIV_SIVAR_ADDR]],
+
+// Privates actually are used.
+// CHECK-DAG: [[PRIV_VAR]]
+// CHECK-DAG: [[PRIV_T_VAR]]
+// CHECK-DAG: [[PRIV_S_ARR]]
+// CHECK-DAG: [[PRIV_VEC]]
+// CHECK-DAG: [[PRIV_SIVAR]]
+
+// CHECK:     icmp ne i32 %{{.+}}, 0
+// CHECK-NEXT: br i1
+// CHECK: bitcast [[S_DOUBLE_TY]]* %{{.+}} to i8*
+// CHECK: bitcast [[S_DOUBLE_TY]]* %{{.+}} to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align {{[0-9]+}} %
+// CHECK: load i32, i32* %
+// CHECK: store i32 %{{.+}}, i32* %
+// CHECK: getelementptr inbounds [2 x [[S_DOUBLE_TY]]], [2 x [[S_DOUBLE_TY]]]* %
+// CHECK: phi [[S_DOUBLE_TY]]*
+// CHECK: phi [[S_DOUBLE_TY]]*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align {{[0-9]+}} %
+// CHECK: icmp eq [[S_DOUBLE_TY]]* %
+// CHECK-NEXT: br i1
+// CHECK: bitcast [2 x i32]* %{{.+}} to i8*
+// CHECK: bitcast [2 x i32]* %{{.+}} to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align {{[0-9]+}} %
+// CHECK: load i32, i32* %
+// CHECK: store i32 %{{.+}}, i32* %
+// CHECK: br label
+// CHECK: ret
+
+// CHECK: define internal void [[MAIN_DUP]]([[KMP_TASK_MAIN_TY]]* %0, [[KMP_TASK_MAIN_TY]]* %1, i32 %2)
+// CHECK: getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* %{{.+}}, i32 0, i32 0
+// CHECK: getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* %{{.+}}, i32 0, i32 8
+// CHECK: load i32, i32* %
+// CHECK: store i32 %{{.+}}, i32* %
+// CHECK: getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* %{{.+}}, i32 0, i32 1
+// CHECK: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* %{{.+}}, i32 0, i32 0
+// CHECK: getelementptr inbounds [2 x [[S_DOUBLE_TY]]], [2 x [[S_DOUBLE_TY]]]* %{{.+}}, i32 0, i32 0
+// CHECK: getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i64 2
+// CHECK: br label %
+
+// CHECK: phi [[S_DOUBLE_TY]]*
+// CHECK: call {{.*}} [[S_DOUBLE_TY_CONSTR]]([[S_DOUBLE_TY]]*
+// CHECK: getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i64 1
+// CHECK: icmp eq [[S_DOUBLE_TY]]* %
+// CHECK: br i1 %
+
+// CHECK: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* %{{.+}}, i32 0, i32 1
+// CHECK: call {{.*}} [[S_DOUBLE_TY_CONSTR]]([[S_DOUBLE_TY]]*
+// CHECK: ret void
+
+// CHECK: define internal i32 [[DESTRUCTORS]](i32 %0, [[KMP_TASK_MAIN_TY]]* noalias %1)
+// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* [[RES_KMP_TASK:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0
+// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1
+// CHECK: call {{.*}} @_ZN1SIdED1Ev([[S_DOUBLE_TY]]* [[PRIVATE_VAR_REF]])
+// CHECK: getelementptr inbounds [2 x [[S_DOUBLE_TY]]], [2 x [[S_DOUBLE_TY]]]* [[PRIVATE_S_ARR_REF]], i{{.+}} 0, i{{.+}} 0
+// CHECK: getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i{{.+}} 2
+// CHECK: [[PRIVATE_S_ARR_ELEM_REF:%.+]] = getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i{{.+}} -1
+// CHECK: call {{.*}} @_ZN1SIdED1Ev([[S_DOUBLE_TY]]* [[PRIVATE_S_ARR_ELEM_REF]])
+// CHECK: icmp eq
+// CHECK: br i1
+// CHECK: ret i32
+
+// CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
+// CHECK: alloca [[S_INT_TY]],
+// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i32, align 128
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i32],
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]],
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]],
+
+// CHECK: call {{.*}} [[S_INT_TY_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
+
+// Store original variables in capture struct.
+// CHECK: [[VEC_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK: store [2 x i32]* [[VEC_ADDR:%.+]], [2 x i32]** [[VEC_REF]],
+// CHECK: [[T_VAR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 1
+// CHECK: store i32* [[T_VAR_ADDR:%.+]], i32** [[T_VAR_REF]],
+// CHECK: [[S_ARR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 2
+// CHECK: store [2 x [[S_INT_TY]]]* [[S_ARR_ADDR:%.+]], [2 x [[S_INT_TY]]]** [[S_ARR_REF]],
+// CHECK: [[VAR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 3
+// CHECK: store [[S_INT_TY]]* [[VAR_ADDR:%.+]], [[S_INT_TY]]** [[VAR_REF]],
+
+// Allocate task.
+// Returns struct kmp_task_t {
+//         [[KMP_TASK_T_TY]] task_data;
+//         [[KMP_TASK_TMAIN_TY]] privates;
+//       };
+// CHECK: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc([[LOC]], i32 [[GTID:%.+]], i32 9, i64 256, i64 32, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_TMAIN_TY]]*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*))
+// CHECK: [[RES_KMP_TASK:%.+]] = bitcast i8* [[RES]] to [[KMP_TASK_TMAIN_TY]]*
+
+// Fill kmp_task_t->shareds by copying from original capture argument.
+// CHECK: [[TASK:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK: [[SHAREDS_REF_ADDR:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_ADDR]],
+// CHECK: [[CAPTURES_ADDR:%.+]] = bitcast [[CAP_TMAIN_TY]]* %{{.+}} to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[SHAREDS_REF]], i8* align 8 [[CAPTURES_ADDR]], i64 32, i1 false)
+
+// Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes).
+// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+
+// t_var;
+// vec;
+
+// Constructors for s_arr and var.
+// a_arr;
+// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+// CHECK: getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[PRIVATE_S_ARR_REF]], i{{.+}} 0, i{{.+}} 0
+// CHECK: getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i{{.+}} 2
+// CHECK: call {{.*}} [[S_INT_TY_CONSTR]]([[S_INT_TY]]* [[S_ARR_CUR:%[^,]+]])
+// CHECK: getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_CUR]], i{{.+}} 1
+// CHECK: icmp eq
+// CHECK: br i1
+
+// var;
+// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3
+// CHECK: call {{.*}} [[S_INT_TY_CONSTR]]([[S_INT_TY]]* [[PRIVATE_VAR_REF]])
+
+// Provide pointer to destructor function, which will destroy private variables at the end of the task.
+// CHECK: [[DESTRUCTORS_REF:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{.+}} 0, i{{.+}} 3
+// CHECK: [[DESTRUCTORS_PTR:%.+]] = bitcast %union{{.+}}* [[DESTRUCTORS_REF]] to i32 (i32, i8*)**
+// CHECK: store i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_TMAIN_TY]]*)* [[DESTRUCTORS:@.+]] to i32 (i32, i8*)*), i32 (i32, i8*)** [[DESTRUCTORS_PTR]],
+
+// Start task.
+// CHECK: call void @__kmpc_taskloop([[LOC]], i32 [[GTID]], i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* bitcast (void ([[KMP_TASK_TMAIN_TY]]*, [[KMP_TASK_TMAIN_TY]]*, i32)* [[TMAIN_DUP:@.+]] to i8*))
+
+// No destructors must be called for private copies of s_arr and var.
+// CHECK-NOT: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2
+// CHECK-NOT: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3
+
+// CHECK: define internal void [[PRIVATES_MAP_FN:@.+]]([[PRIVATES_TMAIN_TY]]* noalias %0, i32** noalias %1, [2 x i32]** noalias %2, [2 x [[S_INT_TY]]]** noalias %3, [[S_INT_TY]]** noalias %4)
+// CHECK: [[PRIVATES:%.+]] = load [[PRIVATES_TMAIN_TY]]*, [[PRIVATES_TMAIN_TY]]**
+// CHECK: [[PRIV_T_VAR:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 0
+// CHECK: [[ARG1:%.+]] = load i32**, i32*** %{{.+}},
+// CHECK: store i32* [[PRIV_T_VAR]], i32** [[ARG1]],
+// CHECK: [[PRIV_VEC:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 1
+// CHECK: [[ARG2:%.+]] = load [2 x i32]**, [2 x i32]*** %{{.+}},
+// CHECK: store [2 x i32]* [[PRIV_VEC]], [2 x i32]** [[ARG2]],
+// CHECK: [[PRIV_S_VAR:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 2
+// CHECK: [[ARG3:%.+]] = load [2 x [[S_INT_TY]]]**, [2 x [[S_INT_TY]]]*** %{{.+}},
+// CHECK: store [2 x [[S_INT_TY]]]* [[PRIV_S_VAR]], [2 x [[S_INT_TY]]]** [[ARG3]],
+// CHECK: [[PRIV_VAR:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 3
+// CHECK: [[ARG4:%.+]] = load [[S_INT_TY]]**, [[S_INT_TY]]*** {{.+}},
+// CHECK: store [[S_INT_TY]]* [[PRIV_VAR]], [[S_INT_TY]]** [[ARG4]],
+// CHECK: ret void
+
+// CHECK: define internal i32 [[TASK_ENTRY]](i32 %0, [[KMP_TASK_TMAIN_TY]]* noalias %1)
+// CHECK: alloca i32*,
+// CHECK-DAG: [[PRIV_T_VAR_ADDR:%.+]] = alloca i32*,
+// CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*,
+// CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*,
+// CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
+// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]],
+// CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]],
+// CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]])
+// CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]],
+// CHECK: [[PRIV_VEC:%.+]] = load [2 x i32]*, [2 x i32]** [[PRIV_VEC_ADDR]],
+// CHECK: [[PRIV_S_ARR:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]],
+// CHECK: [[PRIV_VAR:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[PRIV_VAR_ADDR]],
+
+// Privates actually are used.
+// CHECK-DAG: [[PRIV_VAR]]
+// CHECK-DAG: [[PRIV_T_VAR]]
+// CHECK-DAG: [[PRIV_S_ARR]]
+// CHECK-DAG: [[PRIV_VEC]]
+
+// CHECK:     icmp ne i32 %{{.+}}, 0
+// CHECK-NEXT: br i1
+// CHECK: load i32, i32* %
+// CHECK: store i32 %{{.+}}, i32* %
+// CHECK: bitcast [2 x i32]* %{{.+}} to i8*
+// CHECK: bitcast [2 x i32]* %{{.+}} to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align {{[0-9]+}} %
+// CHECK: getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* %
+// CHECK: phi [[S_INT_TY]]*
+// CHECK: phi [[S_INT_TY]]*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align {{[0-9]+}} %
+// CHECK: icmp eq [[S_INT_TY]]* %
+// CHECK-NEXT: br i1
+// CHECK: bitcast [[S_INT_TY]]* %{{.+}} to i8*
+// CHECK: bitcast [[S_INT_TY]]* %{{.+}} to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align {{[0-9]+}} %
+// CHECK: br label
+// CHECK: ret
+
+// CHECK: define internal void [[TMAIN_DUP]]([[KMP_TASK_TMAIN_TY]]* %0, [[KMP_TASK_TMAIN_TY]]* %1, i32 %2)
+// CHECK: getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* %{{.+}}, i32 0, i32 0
+// CHECK: getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* %{{.+}}, i32 0, i32 8
+// CHECK: load i32, i32* %
+// CHECK: store i32 %{{.+}}, i32* %
+// CHECK: getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* %{{.+}}, i32 0, i32 2
+// CHECK: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* %{{.+}}, i32 0, i32 2
+// CHECK: getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* %{{.+}}, i32 0, i32 0
+// CHECK: getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i64 2
+// CHECK: br label %
+
+// CHECK: phi [[S_INT_TY]]*
+// CHECK: call {{.*}} [[S_INT_TY_CONSTR]]([[S_INT_TY]]*
+// CHECK: getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i64 1
+// CHECK: icmp eq [[S_INT_TY]]* %
+// CHECK: br i1 %
+
+// CHECK: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* %{{.+}}, i32 0, i32 3
+// CHECK: call {{.*}} [[S_INT_TY_CONSTR]]([[S_INT_TY]]*
+// CHECK: ret void
+
+// CHECK: define internal i32 [[DESTRUCTORS]](i32 %0, [[KMP_TASK_TMAIN_TY]]* noalias %1)
+// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2
+// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3
+// CHECK: call void @_ZN1SIiED1Ev([[S_INT_TY]]* [[PRIVATE_VAR_REF]])
+// CHECK: getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[PRIVATE_S_ARR_REF]], i{{.+}} 0, i{{.+}} 0
+// CHECK: getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i{{.+}} 2
+// CHECK: [[PRIVATE_S_ARR_ELEM_REF:%.+]] = getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i{{.+}} -1
+// CHECK: call void @_ZN1SIiED1Ev([[S_INT_TY]]* [[PRIVATE_S_ARR_ELEM_REF]])
+// CHECK: icmp eq
+// CHECK: br i1
+// CHECK: ret i32
+
+#endif
+#elif defined(ARRAY)
+// ARRAY-LABEL: array_func
+struct St {
+  int a, b;
+  St() : a(0), b(0) {}
+  St(const St &) {}
+  ~St() {}
+};
+
+void array_func(int n, float a[n], St s[2]) {
+// ARRAY: call i8* @__kmpc_omp_task_alloc(
+// ARRAY: call void @__kmpc_taskloop(
+// ARRAY: store float** %{{.+}}, float*** %{{.+}},
+// ARRAY: store %struct.St** %{{.+}}, %struct.St*** %{{.+}},
+// ARRAY: icmp ne i32 %{{.+}}, 0
+// ARRAY: store float* %{{.+}}, float** %{{.+}},
+// ARRAY: store %struct.St* %{{.+}}, %struct.St** %{{.+}},
+#pragma omp parallel master taskloop simd lastprivate(a, s)
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+#else
+
+// LOOP-LABEL: loop
+void loop() {
+// LOOP: call i8* @__kmpc_omp_task_alloc(
+// LOOP: call void @__kmpc_taskloop(
+  int i, j;
+#pragma omp parallel master taskloop simd linear(i, j)
+  for (i = 0; i < 10; ++i)
+    ++j;
+}
+#endif
+

diff  --git a/clang/test/OpenMP/parallel_master_taskloop_simd_lastprivate_messages.cpp b/clang/test/OpenMP/parallel_master_taskloop_simd_lastprivate_messages.cpp
new file mode 100644
index 000000000000..38e083e68b2a
--- /dev/null
+++ b/clang/test/OpenMP/parallel_master_taskloop_simd_lastprivate_messages.cpp
@@ -0,0 +1,299 @@
+// RUN: %clang_cc1 -verify -fopenmp %s -Wuninitialized
+
+// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized
+
+typedef void **omp_allocator_handle_t;
+extern const omp_allocator_handle_t omp_default_mem_alloc;
+extern const omp_allocator_handle_t omp_large_cap_mem_alloc;
+extern const omp_allocator_handle_t omp_const_mem_alloc;
+extern const omp_allocator_handle_t omp_high_bw_mem_alloc;
+extern const omp_allocator_handle_t omp_low_lat_mem_alloc;
+extern const omp_allocator_handle_t omp_cgroup_mem_alloc;
+extern const omp_allocator_handle_t omp_pteam_mem_alloc;
+extern const omp_allocator_handle_t omp_thread_mem_alloc;
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note 2 {{declared here}} expected-note 2 {{forward declaration of 'S1'}}
+extern S1 a;
+class S2 {
+  mutable int a;
+
+public:
+  S2() : a(0) {}
+  S2(S2 &s2) : a(s2.a) {}
+  const S2 &operator =(const S2&) const;
+  S2 &operator =(const S2&);
+  static float S2s; // expected-note {{static data member is predetermined as shared}}
+  static const float S2sc; // expected-note {{'S2sc' declared here}}
+};
+const float S2::S2sc = 0;
+const S2 b;
+const S2 ba[5];
+class S3 {
+  int a;
+  S3 &operator=(const S3 &s3); // expected-note 2 {{implicitly declared private here}}
+
+public:
+  S3() : a(0) {}
+  S3(S3 &s3) : a(s3.a) {}
+};
+const S3 c;         // expected-note {{'c' defined here}}
+const S3 ca[5];     // expected-note {{'ca' defined here}}
+extern const int f; // expected-note {{'f' declared here}}
+class S4 {
+  int a;
+  S4();             // expected-note 3 {{implicitly declared private here}}
+  S4(const S4 &s4);
+
+public:
+  S4(int v) : a(v) {}
+};
+class S5 {
+  int a;
+  S5() : a(0) {} // expected-note {{implicitly declared private here}}
+
+public:
+  S5(const S5 &s5) : a(s5.a) {}
+  S5(int v) : a(v) {}
+};
+class S6 {
+  int a;
+  S6() : a(0) {}
+
+public:
+  S6(const S6 &s6) : a(s6.a) {}
+  S6(int v) : a(v) {}
+};
+
+S3 h;
+#pragma omp threadprivate(h) // expected-note 2 {{defined as threadprivate or thread local}}
+
+template <class I, class C>
+int foomain(int argc, char **argv) {
+  I e(4);
+  I g(5);
+  int i, z;
+  int &j = i;
+#pragma omp parallel
+#pragma omp parallel master taskloop simd lastprivate // expected-error {{expected '(' after 'lastprivate'}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp parallel master taskloop simd lastprivate( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp parallel master taskloop simd lastprivate() // expected-error {{expected expression}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp parallel master taskloop simd lastprivate(argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp parallel master taskloop simd lastprivate(argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp parallel master taskloop simd lastprivate(argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp parallel master taskloop simd lastprivate(argc) allocate , allocate(, allocate(omp_default , allocate(omp_default_mem_alloc, allocate(omp_default_mem_alloc:, allocate(omp_default_mem_alloc: argc, allocate(omp_default_mem_alloc: argv), allocate(argv) // expected-error {{expected '(' after 'allocate'}} expected-error 2 {{expected expression}} expected-error 2 {{expected ')'}} expected-error {{use of undeclared identifier 'omp_default'}} expected-note 2 {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp parallel master taskloop simd lastprivate(S1) // expected-error {{'S1' does not refer to a value}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp parallel master taskloop simd lastprivate(a, b) // expected-error {{lastprivate variable with incomplete type 'S1'}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp parallel master taskloop simd lastprivate(argv[1]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp parallel master taskloop simd lastprivate(z, e, g) // expected-error 2 {{calling a private constructor of class 'S4'}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp parallel master taskloop simd lastprivate(h) // expected-error {{threadprivate or thread local variable cannot be lastprivate}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+  {
+    int v = 0;
+    int i;
+#pragma omp parallel master taskloop simd allocate(omp_thread_mem_alloc: i) lastprivate(i) // expected-warning {{allocator with the 'thread' trait access has unspecified behavior on 'parallel master taskloop simd' directive}}
+    for (int k = 0; k < argc; ++k) {
+      i = k;
+      v += i;
+    }
+  }
+#pragma omp parallel shared(i)
+#pragma omp parallel private(i)
+#pragma omp parallel master taskloop simd lastprivate(j)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+#pragma omp parallel master taskloop simd lastprivate(i)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+  return 0;
+}
+
+void bar(S4 a[2]) {
+#pragma omp parallel
+#pragma omp parallel master taskloop simd lastprivate(a)
+  for (int i = 0; i < 2; ++i)
+    foo();
+}
+
+namespace A {
+double x;
+#pragma omp threadprivate(x) // expected-note {{defined as threadprivate or thread local}}
+}
+namespace B {
+using A::x;
+}
+
+int main(int argc, char **argv) {
+  const int d = 5;       // expected-note {{'d' defined here}}
+  const int da[5] = {0}; // expected-note {{'da' defined here}}
+  S4 e(4);
+  S5 g(5);
+  S3 m;
+  S6 n(2);
+  int i, z;
+  int &j = i;
+#pragma omp parallel
+#pragma omp parallel master taskloop simd lastprivate // expected-error {{expected '(' after 'lastprivate'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp parallel master taskloop simd lastprivate( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp parallel master taskloop simd lastprivate() // expected-error {{expected expression}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp parallel master taskloop simd lastprivate(argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp parallel master taskloop simd lastprivate(argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp parallel master taskloop simd lastprivate(argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp parallel master taskloop simd lastprivate(argc, z)
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp parallel master taskloop simd lastprivate(S1) // expected-error {{'S1' does not refer to a value}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp parallel master taskloop simd lastprivate(a, b, c, d, f) // expected-error {{lastprivate variable with incomplete type 'S1'}} expected-error 1 {{const-qualified variable without mutable fields cannot be lastprivate}} expected-error 2 {{const-qualified variable cannot be lastprivate}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp parallel master taskloop simd lastprivate(argv[1]) // expected-error {{expected variable name}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp parallel master taskloop simd lastprivate(2 * 2) // expected-error {{expected variable name}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp parallel master taskloop simd lastprivate(ba)
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp parallel master taskloop simd lastprivate(ca) // expected-error {{const-qualified variable without mutable fields cannot be lastprivate}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp parallel master taskloop simd lastprivate(da) // expected-error {{const-qualified variable cannot be lastprivate}}
+  for (i = 0; i < argc; ++i)
+    foo();
+  int xa;
+#pragma omp parallel
+#pragma omp parallel master taskloop simd lastprivate(xa) // OK
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp parallel master taskloop simd lastprivate(S2::S2s) // expected-error {{shared variable cannot be lastprivate}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp parallel master taskloop simd lastprivate(S2::S2sc) // expected-error {{const-qualified variable cannot be lastprivate}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp parallel master taskloop simd safelen(5)
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp parallel master taskloop simd lastprivate(e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp parallel master taskloop simd lastprivate(m) // expected-error {{'operator=' is a private member of 'S3'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp parallel master taskloop simd lastprivate(h) // expected-error {{threadprivate or thread local variable cannot be lastprivate}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp parallel master taskloop simd lastprivate(B::x) // expected-error {{threadprivate or thread local variable cannot be lastprivate}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp parallel master taskloop simd private(xa), lastprivate(xa) // expected-error {{private variable cannot be lastprivate}} expected-note {{defined as private}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp parallel master taskloop simd linear(i)
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel private(xa)
+#pragma omp parallel master taskloop simd lastprivate(xa)
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel reduction(+ : xa)
+#pragma omp parallel master taskloop simd lastprivate(xa)
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp parallel master taskloop simd lastprivate(j)
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp parallel master taskloop simd firstprivate(m) lastprivate(m) // expected-error {{'operator=' is a private member of 'S3'}}
+  for (i = 0; i < argc; ++i)
+    foo();
+#pragma omp parallel
+#pragma omp parallel master taskloop simd lastprivate(n) firstprivate(n) // OK
+  for (i = 0; i < argc; ++i)
+    foo();
+  static int si;
+#pragma omp parallel master taskloop simd lastprivate(si) // OK
+  for (i = 0; i < argc; ++i)
+    si = i + 1;
+  return foomain<S4, S5>(argc, argv); // expected-note {{in instantiation of function template specialization 'foomain<S4, S5>' requested here}}
+}

diff  --git a/clang/test/OpenMP/parallel_master_taskloop_simd_linear_messages.cpp b/clang/test/OpenMP/parallel_master_taskloop_simd_linear_messages.cpp
new file mode 100644
index 000000000000..ede683fa3729
--- /dev/null
+++ b/clang/test/OpenMP/parallel_master_taskloop_simd_linear_messages.cpp
@@ -0,0 +1,268 @@
+// RUN: %clang_cc1 -verify -fopenmp %s -Wuninitialized
+
+// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized
+
+typedef void **omp_allocator_handle_t;
+extern const omp_allocator_handle_t omp_default_mem_alloc;
+extern const omp_allocator_handle_t omp_large_cap_mem_alloc;
+extern const omp_allocator_handle_t omp_const_mem_alloc;
+extern const omp_allocator_handle_t omp_high_bw_mem_alloc;
+extern const omp_allocator_handle_t omp_low_lat_mem_alloc;
+extern const omp_allocator_handle_t omp_cgroup_mem_alloc;
+extern const omp_allocator_handle_t omp_pteam_mem_alloc;
+extern const omp_allocator_handle_t omp_thread_mem_alloc;
+
+void xxx(int argc) {
+  int i, lin, step; // expected-note {{initialize the variable 'lin' to silence this warning}} expected-note {{initialize the variable 'step' to silence this warning}}
+#pragma omp parallel master taskloop simd linear(i, lin : step) // expected-warning {{variable 'lin' is uninitialized when used here}} expected-warning {{variable 'step' is uninitialized when used here}}
+  for (i = 0; i < 10; ++i)
+    ;
+}
+
+namespace X {
+  int x;
+};
+
+struct B {
+  static int ib; // expected-note {{'B::ib' declared here}}
+  static int bfoo() { return 8; }
+};
+
+int bfoo() { return 4; }
+
+int z;
+const int C1 = 1;
+const int C2 = 2;
+void test_linear_colons()
+{
+  int B = 0;
+  #pragma omp parallel master taskloop simd linear(B:bfoo())
+  for (int i = 0; i < 10; ++i) ;
+  // expected-error at +1 {{unexpected ':' in nested name specifier; did you mean '::'}}
+  #pragma omp parallel master taskloop simd linear(B::ib:B:bfoo())
+  for (int i = 0; i < 10; ++i) ;
+  // expected-error at +1 {{use of undeclared identifier 'ib'; did you mean 'B::ib'}}
+  #pragma omp parallel master taskloop simd linear(B:ib)
+  for (int i = 0; i < 10; ++i) ;
+  // expected-error at +1 {{unexpected ':' in nested name specifier; did you mean '::'?}}
+  #pragma omp parallel master taskloop simd linear(z:B:ib)
+  for (int i = 0; i < 10; ++i) ;
+  #pragma omp parallel master taskloop simd linear(B:B::bfoo())
+  for (int i = 0; i < 10; ++i) ;
+  #pragma omp parallel master taskloop simd linear(X::x : ::z)
+  for (int i = 0; i < 10; ++i) ;
+  #pragma omp parallel master taskloop simd linear(B,::z, X::x)
+  for (int i = 0; i < 10; ++i) ;
+  #pragma omp parallel master taskloop simd linear(::z)
+  for (int i = 0; i < 10; ++i) ;
+  // expected-error at +1 {{expected variable name}}
+  #pragma omp parallel master taskloop simd linear(B::bfoo())
+  for (int i = 0; i < 10; ++i) ;
+  #pragma omp parallel master taskloop simd linear(B::ib,B:C1+C2)
+  for (int i = 0; i < 10; ++i) ;
+}
+
+template<int L, class T, class N> T test_template(T* arr, N num) {
+  N i;
+  T sum = (T)0;
+  T ind2 = - num * L; // expected-note {{'ind2' defined here}}
+  // expected-error at +1 {{argument of a linear clause should be of integral or pointer type}}
+#pragma omp parallel master taskloop simd linear(ind2:L)
+  for (i = 0; i < num; ++i) {
+    T cur = arr[(int)ind2];
+    ind2 += L;
+    sum += cur;
+  }
+  return T();
+}
+
+template<int LEN> int test_warn() {
+  int ind2 = 0;
+  // expected-warning at +1 {{zero linear step (ind2 should probably be const)}}
+  #pragma omp parallel master taskloop simd linear(ind2:LEN)
+  for (int i = 0; i < 100; i++) {
+    ind2 += LEN;
+  }
+  return ind2;
+}
+
+struct S1; // expected-note 2 {{declared here}} expected-note 2 {{forward declaration of 'S1'}}
+extern S1 a;
+class S2 {
+  mutable int a;
+public:
+  S2():a(0) { }
+};
+const S2 b; // expected-note 2 {{'b' defined here}}
+const S2 ba[5];
+class S3 {
+  int a;
+public:
+  S3():a(0) { }
+};
+const S3 ca[5];
+class S4 {
+  int a;
+  S4();
+public:
+  S4(int v):a(v) { }
+};
+class S5 {
+  int a;
+  S5():a(0) {}
+public:
+  S5(int v):a(v) { }
+};
+
+S3 h;
+#pragma omp threadprivate(h) // expected-note 2 {{defined as threadprivate or thread local}}
+
+template<class I, class C> int foomain(I argc, C **argv) {
+  I e(4);
+  I g(5);
+  int i, z;
+  int &j = i;
+  #pragma omp parallel master taskloop simd linear // expected-error {{expected '(' after 'linear'}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel master taskloop simd linear ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel master taskloop simd linear (val // expected-error {{use of undeclared identifier 'val'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel master taskloop simd linear (uval( // expected-error {{expected expression}} expected-error 2 {{expected ')'}} expected-note 2 {{to match this '('}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel master taskloop simd linear (ref() // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel master taskloop simd linear (foo() // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel master taskloop simd linear () // expected-error {{expected expression}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel master taskloop simd linear (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel master taskloop simd linear (val argc // expected-error {{use of undeclared identifier 'val'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel master taskloop simd linear (val(argc, // expected-error {{expected expression}} expected-error 2 {{expected ')'}} expected-note 2 {{to match this '('}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel master taskloop simd linear (argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel master taskloop simd linear (argc : 5) allocate , allocate(, allocate(omp_default , allocate(omp_default_mem_alloc, allocate(omp_default_mem_alloc:, allocate(omp_default_mem_alloc: argc, allocate(omp_default_mem_alloc: argv), allocate(argv) // expected-error {{expected '(' after 'allocate'}} expected-error 2 {{expected expression}} expected-error 2 {{expected ')'}} expected-error {{use of undeclared identifier 'omp_default'}} expected-note 2 {{to match this '('}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel master taskloop simd linear (S1) // expected-error {{'S1' does not refer to a value}}
+  for (int k = 0; k < argc; ++k) ++k;
+  // expected-error at +2 {{linear variable with incomplete type 'S1'}}
+  // expected-error at +1 {{argument of a linear clause should be of integral or pointer type, not 'S2'}}
+  #pragma omp parallel master taskloop simd linear (val(a, b):B::ib)
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel master taskloop simd linear (argv[1]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel master taskloop simd linear(ref(e, g)) // expected-error 2 {{variable of non-reference type 'int' can be used only with 'val' modifier, but used with 'ref'}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel master taskloop simd linear(h, z) // expected-error {{threadprivate or thread local variable cannot be linear}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel master taskloop simd linear(uval(i)) // expected-error {{variable of non-reference type 'int' can be used only with 'val' modifier, but used with 'uval'}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel
+  {
+    int v = 0;
+    int i;
+    #pragma omp parallel master taskloop simd allocate(omp_thread_mem_alloc: v) linear(v:i) // expected-warning {{allocator with the 'thread' trait access has unspecified behavior on 'parallel master taskloop simd' directive}}
+    for (int k = 0; k < argc; ++k) { i = k; v += i; }
+  }
+  #pragma omp parallel master taskloop simd linear(ref(j))
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel master taskloop simd linear(uval(j))
+  for (int k = 0; k < argc; ++k) ++k;
+  int v = 0;
+  #pragma omp parallel master taskloop simd linear(v:j)
+  for (int k = 0; k < argc; ++k) { ++k; v += j; }
+  #pragma omp parallel master taskloop simd linear(i)
+  for (int k = 0; k < argc; ++k) ++k;
+  return 0;
+}
+
+namespace A {
+double x;
+#pragma omp threadprivate(x) // expected-note {{defined as threadprivate or thread local}}
+}
+namespace C {
+using A::x;
+}
+
+void linear_modifiers(int argc) {
+  int &f = argc;
+  #pragma omp parallel master taskloop simd linear(f)
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel master taskloop simd linear(val(f))
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel master taskloop simd linear(uval(f))
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel master taskloop simd linear(ref(f))
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel master taskloop simd linear(foo(f)) // expected-error {{expected one of 'ref', val' or 'uval' modifiers}}
+  for (int k = 0; k < argc; ++k) ++k;
+}
+
+int f;
+int main(int argc, char **argv) {
+  double darr[100];
+  // expected-note at +1 {{in instantiation of function template specialization 'test_template<-4, double, int>' requested here}}
+  test_template<-4>(darr, 4);
+  // expected-note at +1 {{in instantiation of function template specialization 'test_warn<0>' requested here}}
+  test_warn<0>();
+
+  S4 e(4); // expected-note {{'e' defined here}}
+  S5 g(5); // expected-note {{'g' defined here}}
+  int i, z;
+  int &j = i;
+  #pragma omp parallel master taskloop simd linear(f) linear(f) // expected-error {{linear variable cannot be linear}} expected-note {{defined as linear}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel master taskloop simd linear // expected-error {{expected '(' after 'linear'}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel master taskloop simd linear ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel master taskloop simd linear () // expected-error {{expected expression}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel master taskloop simd linear (val // expected-error {{use of undeclared identifier 'val'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel master taskloop simd linear (ref()) // expected-error {{expected expression}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel master taskloop simd linear (foo()) // expected-error {{expected expression}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel master taskloop simd linear (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel master taskloop simd linear (argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel master taskloop simd linear (argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel master taskloop simd linear (argc, z)
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel master taskloop simd linear (S1) // expected-error {{'S1' does not refer to a value}}
+  for (int k = 0; k < argc; ++k) ++k;
+  // expected-error at +2 {{linear variable with incomplete type 'S1'}}
+  // expected-error at +1 {{argument of a linear clause should be of integral or pointer type, not 'S2'}}
+  #pragma omp parallel master taskloop simd linear(a, b)
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel master taskloop simd linear (argv[1]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k) ++k;
+  // expected-error at +2 {{argument of a linear clause should be of integral or pointer type, not 'S4'}}
+  // expected-error at +1 {{argument of a linear clause should be of integral or pointer type, not 'S5'}}
+  #pragma omp parallel master taskloop simd linear(val(e, g))
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel master taskloop simd linear(h, C::x) // expected-error 2 {{threadprivate or thread local variable cannot be linear}}
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel
+  {
+    int i;
+    #pragma omp parallel master taskloop simd linear(val(i))
+    for (int k = 0; k < argc; ++k) ++k;
+    #pragma omp parallel master taskloop simd linear(uval(i) : 4) // expected-error {{variable of non-reference type 'int' can be used only with 'val' modifier, but used with 'uval'}}
+    for (int k = 0; k < argc; ++k) { ++k; i += 4; }
+  }
+  #pragma omp parallel master taskloop simd linear(ref(j))
+  for (int k = 0; k < argc; ++k) ++k;
+  #pragma omp parallel master taskloop simd linear(i)
+  for (int k = 0; k < argc; ++k) ++k;
+
+  foomain<int,char>(argc,argv); // expected-note {{in instantiation of function template specialization 'foomain<int, char>' requested here}}
+  return 0;
+}
+

diff  --git a/clang/test/OpenMP/parallel_master_taskloop_simd_loop_messages.cpp b/clang/test/OpenMP/parallel_master_taskloop_simd_loop_messages.cpp
new file mode 100644
index 000000000000..71b458dc2329
--- /dev/null
+++ b/clang/test/OpenMP/parallel_master_taskloop_simd_loop_messages.cpp
@@ -0,0 +1,736 @@
+// RUN: %clang_cc1 -fsyntax-only -fopenmp -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify=expected,omp4 %s -Wuninitialized
+// RUN: %clang_cc1 -fsyntax-only -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify=expected,omp5 %s -Wuninitialized
+
+// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify=expected,omp4 %s -Wuninitialized
+// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -fexceptions -fcxx-exceptions -verify=expected,omp5 %s -Wuninitialized
+
+class S {
+  int a;
+  S() : a(0) {}
+
+public:
+  S(int v) : a(v) {}
+  S(const S &s) : a(s.a) {}
+};
+
+static int sii;
+// expected-note at +1 {{defined as threadprivate or thread local}}
+#pragma omp threadprivate(sii)
+static int globalii;
+
+// Currently, we cannot use "0" for global register variables.
+// register int reg0 __asm__("0");
+int reg0;
+
+int test_iteration_spaces() {
+  const int N = 100;
+  float a[N], b[N], c[N];
+  int ii, jj, kk;
+  float fii;
+  double dii;
+  register int reg; // expected-warning {{'register' storage class specifier is deprecated}}
+#pragma omp parallel
+#pragma omp parallel master taskloop simd
+  for (int i = 0; i < 10; i += 1) {
+    c[i] = a[i] + b[i];
+  }
+#pragma omp parallel
+#pragma omp parallel master taskloop simd
+  for (char i = 0; i < 10; i++) {
+    c[i] = a[i] + b[i];
+  }
+#pragma omp parallel
+#pragma omp parallel master taskloop simd
+  for (char i = 0; i < 10; i += '\1') {
+    c[i] = a[i] + b[i];
+  }
+#pragma omp parallel
+#pragma omp parallel master taskloop simd
+  for (long long i = 0; i < 10; i++) {
+    c[i] = a[i] + b[i];
+  }
+#pragma omp parallel
+// expected-error at +2 {{expression must have integral or unscoped enumeration type, not 'double'}}
+#pragma omp parallel master taskloop simd
+  for (long long i = 0; i < 10; i += 1.5) {
+    c[i] = a[i] + b[i];
+  }
+#pragma omp parallel
+#pragma omp parallel master taskloop simd
+  for (long long i = 0; i < 'z'; i += 1u) {
+    c[i] = a[i] + b[i];
+  }
+#pragma omp parallel
+// expected-error at +2 {{variable must be of integer or random access iterator type}}
+#pragma omp parallel master taskloop simd
+  for (float fi = 0; fi < 10.0; fi++) {
+    c[(int)fi] = a[(int)fi] + b[(int)fi];
+  }
+#pragma omp parallel
+// expected-error at +2 {{variable must be of integer or random access iterator type}}
+#pragma omp parallel master taskloop simd
+  for (double fi = 0; fi < 10.0; fi++) {
+    c[(int)fi] = a[(int)fi] + b[(int)fi];
+  }
+#pragma omp parallel
+// expected-error at +2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
+#pragma omp parallel master taskloop simd
+  for (int &ref = ii; ref < 10; ref++) {
+  }
+#pragma omp parallel
+// expected-error at +2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
+#pragma omp parallel master taskloop simd
+  for (int i; i < 10; i++)
+    c[i] = a[i];
+
+#pragma omp parallel
+// expected-error at +2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
+#pragma omp parallel master taskloop simd
+  for (int i = 0, j = 0; i < 10; ++i)
+    c[i] = a[i];
+
+#pragma omp parallel
+// expected-error at +2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
+#pragma omp parallel master taskloop simd
+  for (; ii < 10; ++ii)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-warning at +3 {{expression result unused}}
+// expected-error at +2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
+#pragma omp parallel master taskloop simd
+  for (ii + 1; ii < 10; ++ii)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-error at +2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
+#pragma omp parallel master taskloop simd
+  for (c[ii] = 0; ii < 10; ++ii)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// Ok to skip parenthesises.
+#pragma omp parallel master taskloop simd
+  for (((ii)) = 0; ii < 10; ++ii)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// omp4-error at +2 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', or '>=') of loop variable 'i'}} omp5-error at +2 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', '>=', or '!=') of loop variable 'i'}}
+#pragma omp parallel master taskloop simd
+  for (int i = 0; i; i++)
+    c[i] = a[i];
+
+#pragma omp parallel
+// omp4-error at +3 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', or '>=') of loop variable 'i'}} omp5-error at +3 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', '>=', or '!=') of loop variable 'i'}}
+// expected-error at +2 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'i'}}
+#pragma omp parallel master taskloop simd
+  for (int i = 0; jj < kk; ii++)
+    c[i] = a[i];
+
+#pragma omp parallel
+// omp4-error at +2 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', or '>=') of loop variable 'i'}} omp5-error at +2 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', '>=', or '!=') of loop variable 'i'}}
+#pragma omp parallel master taskloop simd
+  for (int i = 0; !!i; i++)
+    c[i] = a[i];
+
+// Ok
+#pragma omp parallel
+// omp4-error at +2 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', or '>=') of loop variable 'i'}}
+#pragma omp parallel master taskloop simd
+  for (int i = 0; i != 1; i++)
+    c[i] = a[i];
+
+#pragma omp parallel
+// omp4-error at +2 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', or '>=') of loop variable 'i'}} omp5-error at +2 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', '>=', or '!=') of loop variable 'i'}}
+#pragma omp parallel master taskloop simd
+  for (int i = 0;; i++)
+    c[i] = a[i];
+
+#pragma omp parallel
+// Ok.
+#pragma omp parallel master taskloop simd
+  for (int i = 11; i > 10; i--)
+    c[i] = a[i];
+
+#pragma omp parallel
+// Ok.
+#pragma omp parallel master taskloop simd
+  for (int i = 0; i < 10; ++i)
+    c[i] = a[i];
+
+#pragma omp parallel
+// Ok.
+#pragma omp parallel master taskloop simd
+  for (ii = 0; ii < 10; ++ii)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-error at +2 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'ii'}}
+#pragma omp parallel master taskloop simd
+  for (ii = 0; ii < 10; ++jj)
+    c[ii] = a[jj];
+
+#pragma omp parallel
+// expected-error at +2 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'ii'}}
+#pragma omp parallel master taskloop simd
+  for (ii = 0; ii < 10; ++++ii)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// Ok but undefined behavior (in general, cannot check that incr
+// is really loop-invariant).
+#pragma omp parallel master taskloop simd
+  for (ii = 0; ii < 10; ii = ii + ii)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-error at +2 {{expression must have integral or unscoped enumeration type, not 'float'}}
+#pragma omp parallel master taskloop simd
+  for (ii = 0; ii < 10; ii = ii + 1.0f)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// Ok - step was converted to integer type.
+#pragma omp parallel master taskloop simd
+  for (ii = 0; ii < 10; ii = ii + (int)1.1f)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-error at +2 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'ii'}}
+#pragma omp parallel master taskloop simd
+  for (ii = 0; ii < 10; jj = ii + 2)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-warning at +3 {{relational comparison result unused}}
+// expected-error at +2 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'ii'}}
+#pragma omp parallel master taskloop simd
+  for (ii = 0; ii<10; jj> kk + 2)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-error at +2 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'ii'}}
+#pragma omp parallel master taskloop simd
+  for (ii = 0; ii < 10;)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-warning at +3 {{expression result unused}}
+// expected-error at +2 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'ii'}}
+#pragma omp parallel master taskloop simd
+  for (ii = 0; ii < 10; !ii)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-error at +2 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'ii'}}
+#pragma omp parallel master taskloop simd
+  for (ii = 0; ii < 10; ii ? ++ii : ++jj)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-error at +2 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'ii'}}
+#pragma omp parallel master taskloop simd
+  for (ii = 0; ii < 10; ii = ii < 10)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-note at +3 {{loop step is expected to be positive due to this condition}}
+// expected-error at +2 {{increment expression must cause 'ii' to increase on each iteration of OpenMP for loop}}
+#pragma omp parallel master taskloop simd
+  for (ii = 0; ii < 10; ii = ii + 0)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-note at +3 {{loop step is expected to be positive due to this condition}}
+// expected-error at +2 {{increment expression must cause 'ii' to increase on each iteration of OpenMP for loop}}
+#pragma omp parallel master taskloop simd
+  for (ii = 0; ii < 10; ii = ii + (int)(0.8 - 0.45))
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-note at +3 {{loop step is expected to be positive due to this condition}}
+// expected-error at +2 {{increment expression must cause 'ii' to increase on each iteration of OpenMP for loop}}
+#pragma omp parallel master taskloop simd
+  for (ii = 0; (ii) < 10; ii -= 25)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-note at +3 {{loop step is expected to be positive due to this condition}}
+// expected-error at +2 {{increment expression must cause 'ii' to increase on each iteration of OpenMP for loop}}
+#pragma omp parallel master taskloop simd
+  for (ii = 0; (ii < 10); ii -= 0)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-note at +3 {{loop step is expected to be negative due to this condition}}
+// expected-error at +2 {{increment expression must cause 'ii' to decrease on each iteration of OpenMP for loop}}
+#pragma omp parallel master taskloop simd
+  for (ii = 0; ii > 10; (ii += 0))
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-note at +3 {{loop step is expected to be positive due to this condition}}
+// expected-error at +2 {{increment expression must cause 'ii' to increase on each iteration of OpenMP for loop}}
+#pragma omp parallel master taskloop simd
+  for (ii = 0; ii < 10; (ii) = (1 - 1) + (ii))
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-note at +3 {{loop step is expected to be negative due to this condition}}
+// expected-error at +2 {{increment expression must cause 'ii' to decrease on each iteration of OpenMP for loop}}
+#pragma omp parallel master taskloop simd
+  for ((ii = 0); ii > 10; (ii -= 0))
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-note at +3 {{loop step is expected to be positive due to this condition}}
+// expected-error at +2 {{increment expression must cause 'ii' to increase on each iteration of OpenMP for loop}}
+#pragma omp parallel master taskloop simd
+  for (ii = 0; (ii < 10); (ii -= 0))
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-note at +2  {{defined as firstprivate}}
+// expected-error at +2 {{loop iteration variable in the associated loop of 'omp parallel master taskloop simd' directive may not be firstprivate, predetermined as linear}}
+#pragma omp parallel master taskloop simd firstprivate(ii)
+  for (ii = 0; ii < 10; ii++)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+// expected-error at +1 {{unexpected OpenMP clause 'in_reduction' in directive '#pragma omp parallel master taskloop simd'}}
+#pragma omp parallel master taskloop simd in_reduction(+:ii)
+  for (ii = 0; ii < 10; ii++)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+#pragma omp parallel master taskloop simd linear(ii)
+  for (ii = 0; ii < 10; ii++)
+    c[ii] = a[ii];
+
+#pragma omp parallel
+  {
+// expected-error at +2 {{loop iteration variable in the associated loop of 'omp parallel master taskloop simd' directive may not be threadprivate or thread local, predetermined as linear}}
+#pragma omp parallel master taskloop simd
+    for (sii = 0; sii < 10; sii += 1)
+      c[sii] = a[sii];
+  }
+
+#pragma omp parallel
+  {
+#pragma omp parallel master taskloop simd
+    for (reg0 = 0; reg0 < 10; reg0 += 1)
+      c[reg0] = a[reg0];
+  }
+
+#pragma omp parallel
+  {
+#pragma omp parallel master taskloop simd
+    for (reg = 0; reg < 10; reg += 1)
+      c[reg] = a[reg];
+  }
+
+#pragma omp parallel
+  {
+#pragma omp parallel master taskloop simd
+    for (globalii = 0; globalii < 10; globalii += 1)
+      c[globalii] = a[globalii];
+  }
+
+#pragma omp parallel
+  {
+#pragma omp parallel master taskloop simd collapse(2)
+    for (ii = 0; ii < 10; ii += 1)
+    for (globalii = 0; globalii < 10; globalii += 1)
+      c[globalii] += a[globalii] + ii;
+  }
+
+#pragma omp parallel
+// omp4-error at +2 {{statement after '#pragma omp parallel master taskloop simd' must be a for loop}}
+#pragma omp parallel master taskloop simd
+  for (auto &item : a) {
+    item = item + 1;
+  }
+
+#pragma omp parallel
+// expected-note at +3 {{loop step is expected to be positive due to this condition}}
+// expected-error at +2 {{increment expression must cause 'i' to increase on each iteration of OpenMP for loop}}
+#pragma omp parallel master taskloop simd
+  for (unsigned i = 9; i < 10; i--) {
+    c[i] = a[i] + b[i];
+  }
+
+  int(*lb)[4] = nullptr;
+#pragma omp parallel
+#pragma omp parallel master taskloop simd
+  for (int(*p)[4] = lb; p < lb + 8; ++p) {
+  }
+
+#pragma omp parallel
+// expected-warning at +2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
+#pragma omp parallel master taskloop simd
+  for (int a{0}; a < 10; ++a) {
+  }
+
+  return 0;
+}
+
+// Iterators allowed in openmp for-loops.
+namespace std {
+struct random_access_iterator_tag {};
+template <class Iter>
+struct iterator_traits {
+  typedef typename Iter::
diff erence_type 
diff erence_type;
+  typedef typename Iter::iterator_category iterator_category;
+};
+template <class Iter>
+typename iterator_traits<Iter>::
diff erence_type
+distance(Iter first, Iter last) { return first - last; }
+}
+class Iter0 {
+public:
+  Iter0() {}
+  Iter0(const Iter0 &) {}
+  Iter0 operator++() { return *this; }
+  Iter0 operator--() { return *this; }
+  bool operator<(Iter0 a) { return true; }
+};
+// expected-note at +2 {{candidate function not viable: no known conversion from 'GoodIter' to 'Iter0' for 1st argument}}
+// expected-note at +1 2 {{candidate function not viable: no known conversion from 'Iter1' to 'Iter0' for 1st argument}}
+int operator-(Iter0 a, Iter0 b) { return 0; }
+class Iter1 {
+public:
+  Iter1(float f = 0.0f, double d = 0.0) {}
+  Iter1(const Iter1 &) {}
+  Iter1 operator++() { return *this; }
+  Iter1 operator--() { return *this; }
+  bool operator<(Iter1 a) { return true; }
+  bool operator>=(Iter1 a) { return false; }
+};
+class GoodIter {
+public:
+  GoodIter() {}
+  GoodIter(const GoodIter &) {}
+  GoodIter(int fst, int snd) {}
+  GoodIter &operator=(const GoodIter &that) { return *this; }
+  GoodIter &operator=(const Iter0 &that) { return *this; }
+  GoodIter &operator+=(int x) { return *this; }
+  GoodIter &operator-=(int x) { return *this; }
+  explicit GoodIter(void *) {}
+  GoodIter operator++() { return *this; }
+  GoodIter operator--() { return *this; }
+  bool operator!() { return true; }
+  bool operator<(GoodIter a) { return true; }
+  bool operator<=(GoodIter a) { return true; }
+  bool operator>=(GoodIter a) { return false; }
+  typedef int 
diff erence_type;
+  typedef std::random_access_iterator_tag iterator_category;
+};
+// expected-note at +2 {{candidate function not viable: no known conversion from 'const Iter0' to 'GoodIter' for 2nd argument}}
+// expected-note at +1 2 {{candidate function not viable: no known conversion from 'Iter1' to 'GoodIter' for 1st argument}}
+int operator-(GoodIter a, GoodIter b) { return 0; }
+// expected-note at +1 3 {{candidate function not viable: requires single argument 'a', but 2 arguments were provided}}
+GoodIter operator-(GoodIter a) { return a; }
+// expected-note at +2 {{candidate function not viable: no known conversion from 'const Iter0' to 'int' for 2nd argument}}
+// expected-note at +1 2 {{candidate function not viable: no known conversion from 'Iter1' to 'GoodIter' for 1st argument}}
+GoodIter operator-(GoodIter a, int v) { return GoodIter(); }
+// expected-note at +1 2 {{candidate function not viable: no known conversion from 'Iter0' to 'GoodIter' for 1st argument}}
+GoodIter operator+(GoodIter a, int v) { return GoodIter(); }
+// expected-note at +2 {{candidate function not viable: no known conversion from 'GoodIter' to 'int' for 1st argument}}
+// expected-note at +1 2 {{candidate function not viable: no known conversion from 'Iter1' to 'int' for 1st argument}}
+GoodIter operator-(int v, GoodIter a) { return GoodIter(); }
+// expected-note at +1 2 {{candidate function not viable: no known conversion from 'Iter0' to 'int' for 1st argument}}
+GoodIter operator+(int v, GoodIter a) { return GoodIter(); }
+
+int test_with_random_access_iterator() {
+  GoodIter begin, end;
+  Iter0 begin0, end0;
+#pragma omp parallel
+#pragma omp parallel master taskloop simd
+  for (GoodIter I = begin; I < end; ++I)
+    ++I;
+#pragma omp parallel
+// expected-error at +2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
+#pragma omp parallel master taskloop simd
+  for (GoodIter &I = begin; I < end; ++I)
+    ++I;
+#pragma omp parallel
+#pragma omp parallel master taskloop simd
+  for (GoodIter I = begin; I >= end; --I)
+    ++I;
+#pragma omp parallel
+// expected-warning at +2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
+#pragma omp parallel master taskloop simd
+  for (GoodIter I(begin); I < end; ++I)
+    ++I;
+#pragma omp parallel
+// expected-warning at +2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
+#pragma omp parallel master taskloop simd
+  for (GoodIter I(nullptr); I < end; ++I)
+    ++I;
+#pragma omp parallel
+// expected-warning at +2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
+#pragma omp parallel master taskloop simd
+  for (GoodIter I(0); I < end; ++I)
+    ++I;
+#pragma omp parallel
+// expected-warning at +2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
+#pragma omp parallel master taskloop simd
+  for (GoodIter I(1, 2); I < end; ++I)
+    ++I;
+#pragma omp parallel
+#pragma omp parallel master taskloop simd
+  for (begin = GoodIter(0); begin < end; ++begin)
+    ++begin;
+// expected-error at +4 {{invalid operands to binary expression ('GoodIter' and 'const Iter0')}}
+// expected-error at +3 {{could not calculate number of iterations calling 'operator-' with upper and lower loop bounds}}
+#pragma omp parallel
+#pragma omp parallel master taskloop simd
+  for (begin = begin0; begin < end; ++begin)
+    ++begin;
+#pragma omp parallel
+// expected-error at +2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
+#pragma omp parallel master taskloop simd
+  for (++begin; begin < end; ++begin)
+    ++begin;
+#pragma omp parallel
+#pragma omp parallel master taskloop simd
+  for (begin = end; begin < end; ++begin)
+    ++begin;
+#pragma omp parallel
+// omp4-error at +2 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', or '>=') of loop variable 'I'}} omp5-error at +2 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', '>=', or '!=') of loop variable 'I'}}
+#pragma omp parallel master taskloop simd
+  for (GoodIter I = begin; I - I; ++I)
+    ++I;
+#pragma omp parallel
+// omp4-error at +2 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', or '>=') of loop variable 'I'}} omp5-error at +2 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', '>=', or '!=') of loop variable 'I'}}
+#pragma omp parallel master taskloop simd
+  for (GoodIter I = begin; begin < end; ++I)
+    ++I;
+#pragma omp parallel
+// omp4-error at +2 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', or '>=') of loop variable 'I'}} omp5-error at +2 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', '>=', or '!=') of loop variable 'I'}}
+#pragma omp parallel master taskloop simd
+  for (GoodIter I = begin; !I; ++I)
+    ++I;
+#pragma omp parallel
+// expected-note at +3 {{loop step is expected to be negative due to this condition}}
+// expected-error at +2 {{increment expression must cause 'I' to decrease on each iteration of OpenMP for loop}}
+#pragma omp parallel master taskloop simd
+  for (GoodIter I = begin; I >= end; I = I + 1)
+    ++I;
+#pragma omp parallel
+#pragma omp parallel master taskloop simd
+  for (GoodIter I = begin; I >= end; I = I - 1)
+    ++I;
+#pragma omp parallel
+// expected-error at +2 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'I'}}
+#pragma omp parallel master taskloop simd
+  for (GoodIter I = begin; I >= end; I = -I)
+    ++I;
+#pragma omp parallel
+// expected-note at +3 {{loop step is expected to be negative due to this condition}}
+// expected-error at +2 {{increment expression must cause 'I' to decrease on each iteration of OpenMP for loop}}
+#pragma omp parallel master taskloop simd
+  for (GoodIter I = begin; I >= end; I = 2 + I)
+    ++I;
+#pragma omp parallel
+// expected-error at +2 {{increment clause of OpenMP for loop must perform simple addition or subtraction on loop variable 'I'}}
+#pragma omp parallel master taskloop simd
+  for (GoodIter I = begin; I >= end; I = 2 - I)
+    ++I;
+// In the following example, we cannot update the loop variable using '+='
+// expected-error at +3 {{invalid operands to binary expression ('Iter0' and 'int')}}
+#pragma omp parallel
+#pragma omp parallel master taskloop simd
+  for (Iter0 I = begin0; I < end0; ++I)
+    ++I;
+#pragma omp parallel
+// Initializer is constructor without params.
+// expected-error at +3 {{invalid operands to binary expression ('Iter0' and 'int')}}
+// expected-warning at +2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
+#pragma omp parallel master taskloop simd
+  for (Iter0 I; I < end0; ++I)
+    ++I;
+  Iter1 begin1, end1;
+// expected-error at +4 {{invalid operands to binary expression ('Iter1' and 'Iter1')}}
+// expected-error at +3 {{could not calculate number of iterations calling 'operator-' with upper and lower loop bounds}}
+#pragma omp parallel
+#pragma omp parallel master taskloop simd
+  for (Iter1 I = begin1; I < end1; ++I)
+    ++I;
+#pragma omp parallel
+// expected-note at +3 {{loop step is expected to be negative due to this condition}}
+// expected-error at +2 {{increment expression must cause 'I' to decrease on each iteration of OpenMP for loop}}
+#pragma omp parallel master taskloop simd
+  for (Iter1 I = begin1; I >= end1; ++I)
+    ++I;
+#pragma omp parallel
+// expected-error at +5 {{invalid operands to binary expression ('Iter1' and 'float')}}
+// expected-error at +4 {{could not calculate number of iterations calling 'operator-' with upper and lower loop bounds}}
+// Initializer is constructor with all default params.
+// expected-warning at +2 {{initialization clause of OpenMP for loop is not in canonical form ('var = init' or 'T var = init')}}
+#pragma omp parallel master taskloop simd
+  for (Iter1 I; I < end1; ++I) {
+  }
+  return 0;
+}
+
+template <typename IT, int ST>
+class TC {
+public:
+  int dotest_lt(IT begin, IT end) {
+#pragma omp parallel
+// expected-note at +3 {{loop step is expected to be positive due to this condition}}
+// expected-error at +2 {{increment expression must cause 'I' to increase on each iteration of OpenMP for loop}}
+#pragma omp parallel master taskloop simd
+    for (IT I = begin; I < end; I = I + ST) {
+      ++I;
+    }
+#pragma omp parallel
+// expected-note at +3 {{loop step is expected to be positive due to this condition}}
+// expected-error at +2 {{increment expression must cause 'I' to increase on each iteration of OpenMP for loop}}
+#pragma omp parallel master taskloop simd
+    for (IT I = begin; I <= end; I += ST) {
+      ++I;
+    }
+#pragma omp parallel
+#pragma omp parallel master taskloop simd
+    for (IT I = begin; I < end; ++I) {
+      ++I;
+    }
+  }
+
+  static IT step() {
+    return IT(ST);
+  }
+};
+template <typename IT, int ST = 0>
+int dotest_gt(IT begin, IT end) {
+#pragma omp parallel
+// expected-note at +3 2 {{loop step is expected to be negative due to this condition}}
+// expected-error at +2 2 {{increment expression must cause 'I' to decrease on each iteration of OpenMP for loop}}
+#pragma omp parallel master taskloop simd
+  for (IT I = begin; I >= end; I = I + ST) {
+    ++I;
+  }
+#pragma omp parallel
+// expected-note at +3 2 {{loop step is expected to be negative due to this condition}}
+// expected-error at +2 2 {{increment expression must cause 'I' to decrease on each iteration of OpenMP for loop}}
+#pragma omp parallel master taskloop simd
+  for (IT I = begin; I >= end; I += ST) {
+    ++I;
+  }
+
+#pragma omp parallel
+// expected-note at +3 {{loop step is expected to be negative due to this condition}}
+// expected-error at +2 {{increment expression must cause 'I' to decrease on each iteration of OpenMP for loop}}
+#pragma omp parallel master taskloop simd
+  for (IT I = begin; I >= end; ++I) {
+    ++I;
+  }
+
+#pragma omp parallel
+#pragma omp parallel master taskloop simd
+  for (IT I = begin; I < end; I += TC<int, ST>::step()) {
+    ++I;
+  }
+}
+
+void test_with_template() {
+  GoodIter begin, end;
+  TC<GoodIter, 100> t1;
+  TC<GoodIter, -100> t2;
+  t1.dotest_lt(begin, end);
+  t2.dotest_lt(begin, end);         // expected-note {{in instantiation of member function 'TC<GoodIter, -100>::dotest_lt' requested here}}
+  dotest_gt(begin, end);            // expected-note {{in instantiation of function template specialization 'dotest_gt<GoodIter, 0>' requested here}}
+  dotest_gt<unsigned, 10>(0, 100);  // expected-note {{in instantiation of function template specialization 'dotest_gt<unsigned int, 10>' requested here}}
+}
+
+void test_loop_break() {
+  const int N = 100;
+  float a[N], b[N], c[N];
+#pragma omp parallel
+#pragma omp parallel master taskloop simd
+  for (int i = 0; i < 10; i++) {
+    c[i] = a[i] + b[i];
+    for (int j = 0; j < 10; ++j) {
+      if (a[i] > b[j])
+        break; // OK in nested loop
+    }
+    switch (i) {
+    case 1:
+      b[i]++;
+      break;
+    default:
+      break;
+    }
+    if (c[i] > 10)
+      break; // expected-error {{'break' statement cannot be used in OpenMP for loop}}
+
+    if (c[i] > 11)
+      break; // expected-error {{'break' statement cannot be used in OpenMP for loop}}
+  }
+
+#pragma omp parallel
+#pragma omp parallel master taskloop simd
+  for (int i = 0; i < 10; i++) {
+    for (int j = 0; j < 10; j++) {
+      c[i] = a[i] + b[i];
+      if (c[i] > 10) {
+        if (c[i] < 20) {
+          break; // OK
+        }
+      }
+    }
+  }
+}
+
+void test_loop_eh() {
+  const int N = 100;
+  float a[N], b[N], c[N];
+#pragma omp parallel
+#pragma omp parallel master taskloop simd
+  for (int i = 0; i < 10; i++) {
+    c[i] = a[i] + b[i];
+    try { // expected-error {{'try' statement cannot be used in OpenMP simd region}}
+      for (int j = 0; j < 10; ++j) {
+        if (a[i] > b[j])
+          throw a[i]; // expected-error {{'throw' statement cannot be used in OpenMP simd region}}
+      }
+      throw a[i]; // expected-error {{'throw' statement cannot be used in OpenMP simd region}}
+    } catch (float f) {
+      if (f > 0.1)
+        throw a[i]; // expected-error {{'throw' statement cannot be used in OpenMP simd region}}
+      return; // expected-error {{cannot return from OpenMP region}}
+    }
+    switch (i) {
+    case 1:
+      b[i]++;
+      break;
+    default:
+      break;
+    }
+    for (int j = 0; j < 10; j++) {
+      if (c[i] > 10)
+        throw c[i]; // expected-error {{'throw' statement cannot be used in OpenMP simd region}}
+    }
+  }
+  if (c[9] > 10)
+    throw c[9]; // OK
+
+#pragma omp parallel
+#pragma omp parallel master taskloop simd
+  for (int i = 0; i < 10; ++i) {
+    struct S {
+      void g() { throw 0; }
+    };
+  }
+}
+
+void test_loop_firstprivate_lastprivate() {
+  S s(4);
+#pragma omp parallel
+#pragma omp parallel master taskloop simd lastprivate(s) firstprivate(s)
+  for (int i = 0; i < 16; ++i)
+    ;
+}
+

diff  --git a/clang/test/OpenMP/parallel_master_taskloop_simd_misc_messages.c b/clang/test/OpenMP/parallel_master_taskloop_simd_misc_messages.c
new file mode 100644
index 000000000000..c6756b88580a
--- /dev/null
+++ b/clang/test/OpenMP/parallel_master_taskloop_simd_misc_messages.c
@@ -0,0 +1,384 @@
+// RUN: %clang_cc1 -fsyntax-only -fopenmp -triple x86_64-unknown-unknown -verify %s -Wuninitialized
+
+// RUN: %clang_cc1 -fsyntax-only -fopenmp-simd -triple x86_64-unknown-unknown -verify %s -Wuninitialized
+
+void xxx(int argc) {
+  int x; // expected-note {{initialize the variable 'x' to silence this warning}}
+#pragma omp parallel master taskloop simd
+  for (int i = 0; i < 10; ++i)
+    argc = x; // expected-warning {{variable 'x' is uninitialized when used here}}
+}
+
+// expected-error at +1 {{unexpected OpenMP directive '#pragma omp parallel master taskloop simd'}}
+#pragma omp parallel master taskloop simd
+
+// expected-error at +1 {{unexpected OpenMP directive '#pragma omp parallel master taskloop simd'}}
+#pragma omp parallel master taskloop simd foo
+
+void test_no_clause() {
+  int i;
+#pragma omp parallel master taskloop simd
+  for (i = 0; i < 16; ++i)
+    ;
+
+// expected-error at +2 {{statement after '#pragma omp parallel master taskloop simd' must be a for loop}}
+#pragma omp parallel master taskloop simd
+  ++i;
+}
+
+void test_branch_protected_scope() {
+  int i = 0;
+L1:
+  ++i;
+
+  int x[24];
+
+#pragma omp parallel
+#pragma omp parallel master taskloop simd
+  for (i = 0; i < 16; ++i) {
+    if (i == 5)
+      goto L1; // expected-error {{use of undeclared label 'L1'}}
+    else if (i == 6)
+      return; // expected-error {{cannot return from OpenMP region}}
+    else if (i == 7)
+      goto L2;
+    else if (i == 8) {
+    L2:
+      x[i]++;
+    }
+  }
+
+  if (x[0] == 0)
+    goto L2; // expected-error {{use of undeclared label 'L2'}}
+  else if (x[1] == 1)
+    goto L1;
+}
+
+void test_invalid_clause() {
+  int i, a;
+// expected-warning at +1 {{extra tokens at the end of '#pragma omp parallel master taskloop simd' are ignored}}
+#pragma omp parallel master taskloop simd foo bar
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-error at +1 {{directive '#pragma omp parallel master taskloop simd' cannot contain more than one 'nogroup' clause}}
+#pragma omp parallel master taskloop simd nogroup nogroup
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-error at +1 {{unexpected OpenMP clause 'in_reduction' in directive '#pragma omp parallel master taskloop simd'}}
+#pragma omp parallel master taskloop simd in_reduction(+:a)
+  for (i = 0; i < 16; ++i)
+    ;
+}
+
+void test_non_identifiers() {
+  int i, x;
+
+#pragma omp parallel
+// expected-warning at +1 {{extra tokens at the end of '#pragma omp parallel master taskloop simd' are ignored}}
+#pragma omp parallel master taskloop simd;
+  for (i = 0; i < 16; ++i)
+    ;
+// expected-warning at +2 {{extra tokens at the end of '#pragma omp parallel master taskloop simd' are ignored}}
+#pragma omp parallel
+#pragma omp parallel master taskloop simd linear(x);
+  for (i = 0; i < 16; ++i)
+    ;
+
+#pragma omp parallel
+// expected-warning at +1 {{extra tokens at the end of '#pragma omp parallel master taskloop simd' are ignored}}
+#pragma omp parallel master taskloop simd private(x);
+  for (i = 0; i < 16; ++i)
+    ;
+
+#pragma omp parallel
+// expected-warning at +1 {{extra tokens at the end of '#pragma omp parallel master taskloop simd' are ignored}}
+#pragma omp parallel master taskloop simd, private(x);
+  for (i = 0; i < 16; ++i)
+    ;
+}
+
+extern int foo();
+
+void test_collapse() {
+  int i;
+#pragma omp parallel
+// expected-error at +1 {{expected '('}}
+#pragma omp parallel master taskloop simd collapse
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error at +1 {{expected expression}} expected-error at +1 {{expected ')'}} expected-note at +1 {{to match this '('}}
+#pragma omp parallel master taskloop simd collapse(
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error at +1 {{expected expression}}
+#pragma omp parallel master taskloop simd collapse()
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error at +1 {{expected expression}} expected-error at +1 {{expected ')'}} expected-note at +1 {{to match this '('}}
+#pragma omp parallel master taskloop simd collapse(,
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error at +1 {{expected expression}}  expected-error at +1 {{expected ')'}} expected-note at +1 {{to match this '('}}
+#pragma omp parallel master taskloop simd collapse(, )
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-warning at +2 {{extra tokens at the end of '#pragma omp parallel master taskloop simd' are ignored}}
+// expected-error at +1 {{expected '('}}
+#pragma omp parallel master taskloop simd collapse 4)
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error at +2 {{expected ')'}}
+// expected-note at +1 {{to match this '('}} expected-note at +1 {{as specified in 'collapse' clause}}
+#pragma omp parallel master taskloop simd collapse(4
+  for (i = 0; i < 16; ++i)
+    ; // expected-error {{expected 4 for loops after '#pragma omp parallel master taskloop simd', but found only 1}}
+#pragma omp parallel
+// expected-error at +2 {{expected ')'}}
+// expected-note at +1 {{to match this '('}} expected-note at +1 {{as specified in 'collapse' clause}}
+#pragma omp parallel master taskloop simd collapse(4,
+  for (i = 0; i < 16; ++i)
+    ; // expected-error {{expected 4 for loops after '#pragma omp parallel master taskloop simd', but found only 1}}
+#pragma omp parallel
+// expected-error at +2 {{expected ')'}}
+// expected-note at +1 {{to match this '('}} expected-note at +1 {{as specified in 'collapse' clause}}
+#pragma omp parallel master taskloop simd collapse(4, )
+  for (i = 0; i < 16; ++i)
+    ; // expected-error {{expected 4 for loops after '#pragma omp parallel master taskloop simd', but found only 1}}
+#pragma omp parallel
+// expected-note at +1 {{as specified in 'collapse' clause}}
+#pragma omp parallel master taskloop simd collapse(4)
+  for (i = 0; i < 16; ++i)
+    ; // expected-error {{expected 4 for loops after '#pragma omp parallel master taskloop simd', but found only 1}}
+#pragma omp parallel
+// expected-error at +2 {{expected ')'}}
+// expected-note at +1 {{to match this '('}} expected-note at +1 {{as specified in 'collapse' clause}}
+#pragma omp parallel master taskloop simd collapse(4 4)
+  for (i = 0; i < 16; ++i)
+    ; // expected-error {{expected 4 for loops after '#pragma omp parallel master taskloop simd', but found only 1}}
+#pragma omp parallel
+// expected-error at +2 {{expected ')'}}
+// expected-note at +1 {{to match this '('}} expected-note at +1 {{as specified in 'collapse' clause}}
+#pragma omp parallel master taskloop simd collapse(4, , 4)
+  for (i = 0; i < 16; ++i)
+    ; // expected-error {{expected 4 for loops after '#pragma omp parallel master taskloop simd', but found only 1}}
+#pragma omp parallel
+#pragma omp parallel master taskloop simd collapse(4)
+  for (int i1 = 0; i1 < 16; ++i1)
+    for (int i2 = 0; i2 < 16; ++i2)
+      for (int i3 = 0; i3 < 16; ++i3)
+        for (int i4 = 0; i4 < 16; ++i4)
+          foo();
+#pragma omp parallel
+// expected-error at +2 {{expected ')'}}
+// expected-note at +1 {{to match this '('}} expected-note at +1 {{as specified in 'collapse' clause}}
+#pragma omp parallel master taskloop simd collapse(4, 8)
+  for (i = 0; i < 16; ++i)
+    ; // expected-error {{expected 4 for loops after '#pragma omp parallel master taskloop simd', but found only 1}}
+#pragma omp parallel
+// expected-error at +1 {{expression is not an integer constant expression}}
+#pragma omp parallel master taskloop simd collapse(2.5)
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error at +1 {{expression is not an integer constant expression}}
+#pragma omp parallel master taskloop simd collapse(foo())
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error at +1 {{argument to 'collapse' clause must be a strictly positive integer value}}
+#pragma omp parallel master taskloop simd collapse(-5)
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error at +1 {{argument to 'collapse' clause must be a strictly positive integer value}}
+#pragma omp parallel master taskloop simd collapse(0)
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error at +1 {{argument to 'collapse' clause must be a strictly positive integer value}}
+#pragma omp parallel master taskloop simd collapse(5 - 5)
+  for (i = 0; i < 16; ++i)
+    ;
+}
+
+void test_private() {
+  int i;
+#pragma omp parallel
+// expected-error at +2 {{expected expression}}
+// expected-error at +1 {{expected ')'}} expected-note at +1 {{to match this '('}}
+#pragma omp parallel master taskloop simd private(
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error at +2 {{expected ')'}} expected-note at +2 {{to match this '('}}
+// expected-error at +1 2 {{expected expression}}
+#pragma omp parallel master taskloop simd private(,
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error at +1 2 {{expected expression}}
+#pragma omp parallel master taskloop simd private(, )
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error at +1 {{expected expression}}
+#pragma omp parallel master taskloop simd private()
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error at +1 {{expected expression}}
+#pragma omp parallel master taskloop simd private(int)
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error at +1 {{expected variable name}}
+#pragma omp parallel master taskloop simd private(0)
+  for (i = 0; i < 16; ++i)
+    ;
+
+  int x, y, z;
+#pragma omp parallel
+#pragma omp parallel master taskloop simd private(x)
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+#pragma omp parallel master taskloop simd private(x, y)
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+#pragma omp parallel master taskloop simd private(x, y, z)
+  for (i = 0; i < 16; ++i) {
+    x = y * i + z;
+  }
+}
+
+void test_lastprivate() {
+  int i;
+#pragma omp parallel
+// expected-error at +2 {{expected ')'}} expected-note at +2 {{to match this '('}}
+// expected-error at +1 {{expected expression}}
+#pragma omp parallel master taskloop simd lastprivate(
+  for (i = 0; i < 16; ++i)
+    ;
+
+#pragma omp parallel
+// expected-error at +2 {{expected ')'}} expected-note at +2 {{to match this '('}}
+// expected-error at +1 2 {{expected expression}}
+#pragma omp parallel master taskloop simd lastprivate(,
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error at +1 2 {{expected expression}}
+#pragma omp parallel master taskloop simd lastprivate(, )
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error at +1 {{expected expression}}
+#pragma omp parallel master taskloop simd lastprivate()
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error at +1 {{expected expression}}
+#pragma omp parallel master taskloop simd lastprivate(int)
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error at +1 {{expected variable name}}
+#pragma omp parallel master taskloop simd lastprivate(0)
+  for (i = 0; i < 16; ++i)
+    ;
+
+  int x, y, z;
+#pragma omp parallel
+#pragma omp parallel master taskloop simd lastprivate(x)
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+#pragma omp parallel master taskloop simd lastprivate(x, y)
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+#pragma omp parallel master taskloop simd lastprivate(x, y, z)
+  for (i = 0; i < 16; ++i)
+    ;
+}
+
+void test_firstprivate() {
+  int i;
+#pragma omp parallel
+// expected-error at +2 {{expected ')'}} expected-note at +2 {{to match this '('}}
+// expected-error at +1 {{expected expression}}
+#pragma omp parallel master taskloop simd firstprivate(
+  for (i = 0; i < 16; ++i)
+    ;
+
+#pragma omp parallel
+// expected-error at +2 {{expected ')'}} expected-note at +2 {{to match this '('}}
+// expected-error at +1 2 {{expected expression}}
+#pragma omp parallel master taskloop simd firstprivate(,
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error at +1 2 {{expected expression}}
+#pragma omp parallel master taskloop simd firstprivate(, )
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error at +1 {{expected expression}}
+#pragma omp parallel master taskloop simd firstprivate()
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error at +1 {{expected expression}}
+#pragma omp parallel master taskloop simd firstprivate(int)
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+// expected-error at +1 {{expected variable name}}
+#pragma omp parallel master taskloop simd firstprivate(0)
+  for (i = 0; i < 16; ++i)
+    ;
+
+  int x, y, z;
+#pragma omp parallel
+#pragma omp parallel master taskloop simd lastprivate(x) firstprivate(x)
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+#pragma omp parallel master taskloop simd lastprivate(x, y) firstprivate(x, y)
+  for (i = 0; i < 16; ++i)
+    ;
+#pragma omp parallel
+#pragma omp parallel master taskloop simd lastprivate(x, y, z) firstprivate(x, y, z)
+  for (i = 0; i < 16; ++i)
+    ;
+}
+
+void test_loop_messages() {
+  float a[100], b[100], c[100];
+#pragma omp parallel
+// expected-error at +2 {{variable must be of integer or pointer type}}
+#pragma omp parallel master taskloop simd
+  for (float fi = 0; fi < 10.0; fi++) {
+    c[(int)fi] = a[(int)fi] + b[(int)fi];
+  }
+#pragma omp parallel
+// expected-error at +2 {{variable must be of integer or pointer type}}
+#pragma omp parallel master taskloop simd
+  for (double fi = 0; fi < 10.0; fi++) {
+    c[(int)fi] = a[(int)fi] + b[(int)fi];
+  }
+
+  // expected-warning at +2 {{OpenMP loop iteration variable cannot have more than 64 bits size and will be narrowed}}
+  #pragma omp parallel master taskloop simd
+  for (__int128 ii = 0; ii < 10; ii++) {
+    c[ii] = a[ii] + b[ii];
+  }
+}
+

diff  --git a/clang/test/OpenMP/parallel_master_taskloop_simd_num_tasks_messages.cpp b/clang/test/OpenMP/parallel_master_taskloop_simd_num_tasks_messages.cpp
new file mode 100644
index 000000000000..fbdc10aaf21c
--- /dev/null
+++ b/clang/test/OpenMP/parallel_master_taskloop_simd_num_tasks_messages.cpp
@@ -0,0 +1,103 @@
+// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s -Wuninitialized
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s -Wuninitialized
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note {{declared here}}
+
+template <class T, class S> // expected-note {{declared here}}
+int tmain(T argc, S **argv) {
+  T z;
+  #pragma omp parallel master taskloop simd num_tasks // expected-error {{expected '(' after 'num_tasks'}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd num_tasks ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd num_tasks () // expected-error {{expected expression}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd num_tasks (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd num_tasks (argc)) // expected-warning {{extra tokens at the end of '#pragma omp parallel master taskloop simd' are ignored}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd num_tasks (argc > 0 ? argv[1][0] : argv[2][argc] + z)
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd num_tasks (foobool(argc)), num_tasks (true) // expected-error {{directive '#pragma omp parallel master taskloop simd' cannot contain more than one 'num_tasks' clause}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd num_tasks (S) // expected-error {{'S' does not refer to a value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd num_tasks (argc argc) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd num_tasks(0) // expected-error {{argument to 'num_tasks' clause must be a strictly positive integer value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd num_tasks(-1) // expected-error {{argument to 'num_tasks' clause must be a strictly positive integer value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd num_tasks(argc) grainsize(argc) // expected-error {{'grainsize' and 'num_tasks' clause are mutually exclusive and may not appear on the same directive}} expected-note {{'num_tasks' clause is specified here}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+
+  return 0;
+}
+
+int main(int argc, char **argv) {
+  int z;
+  #pragma omp parallel master taskloop simd num_tasks // expected-error {{expected '(' after 'num_tasks'}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd num_tasks ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd num_tasks () // expected-error {{expected expression}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd num_tasks (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd num_tasks (argc)) // expected-warning {{extra tokens at the end of '#pragma omp parallel master taskloop simd' are ignored}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd num_tasks (argc > 0 ? argv[1][0] : argv[2][argc] - z)
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd num_tasks (foobool(argc)), num_tasks (true) // expected-error {{directive '#pragma omp parallel master taskloop simd' cannot contain more than one 'num_tasks' clause}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd num_tasks (S1) // expected-error {{'S1' does not refer to a value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd num_tasks (argc argc) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd num_tasks (1 0) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd num_tasks(if(tmain(argc, argv) // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd num_tasks(0)  // expected-error {{argument to 'num_tasks' clause must be a strictly positive integer value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd num_tasks(-1) // expected-error {{argument to 'num_tasks' clause must be a strictly positive integer value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd num_tasks(argc) grainsize(argc) // expected-error {{'grainsize' and 'num_tasks' clause are mutually exclusive and may not appear on the same directive}} expected-note {{'num_tasks' clause is specified here}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+
+  return tmain(argc, argv);
+}

diff  --git a/clang/test/OpenMP/parallel_master_taskloop_simd_priority_messages.cpp b/clang/test/OpenMP/parallel_master_taskloop_simd_priority_messages.cpp
new file mode 100644
index 000000000000..70ab0bfd744b
--- /dev/null
+++ b/clang/test/OpenMP/parallel_master_taskloop_simd_priority_messages.cpp
@@ -0,0 +1,97 @@
+// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 %s -Wuninitialized
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 %s -Wuninitialized
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note {{declared here}}
+
+template <class T, class S> // expected-note {{declared here}}
+int tmain(T argc, S **argv) {
+  T z;
+  #pragma omp parallel master taskloop simd priority // expected-error {{expected '(' after 'priority'}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd priority ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd priority () // expected-error {{expected expression}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd priority (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd priority (argc)) // expected-warning {{extra tokens at the end of '#pragma omp parallel master taskloop simd' are ignored}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd priority (argc > 0 ? argv[1][0] : argv[2][argc] + z)
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd priority (foobool(argc)), priority (true) // expected-error {{directive '#pragma omp parallel master taskloop simd' cannot contain more than one 'priority' clause}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd priority (S) // expected-error {{'S' does not refer to a value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd priority (argc argc) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd priority(0)
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd priority(-1) // expected-error {{argument to 'priority' clause must be a non-negative integer value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+
+  return 0;
+}
+
+int main(int argc, char **argv) {
+  int z;
+  #pragma omp parallel master taskloop simd priority // expected-error {{expected '(' after 'priority'}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd priority ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd priority () // expected-error {{expected expression}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd priority (argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd priority (argc)) // expected-warning {{extra tokens at the end of '#pragma omp parallel master taskloop simd' are ignored}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd priority (argc > 0 ? argv[1][0] : argv[2][argc] - z)
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd priority (foobool(argc)), priority (true) // expected-error {{directive '#pragma omp parallel master taskloop simd' cannot contain more than one 'priority' clause}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd priority (S1) // expected-error {{'S1' does not refer to a value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd priority (argc argc) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd priority (1 0) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd priority(if(tmain(argc, argv) // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd priority(0)
+  for (int i = 0; i < 10; ++i)
+    foo();
+  #pragma omp parallel master taskloop simd priority(-1) // expected-error {{argument to 'priority' clause must be a non-negative integer value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+
+  return tmain(argc, argv);
+}

diff  --git a/clang/test/OpenMP/parallel_master_taskloop_simd_private_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_simd_private_codegen.cpp
new file mode 100644
index 000000000000..d3aa734ae609
--- /dev/null
+++ b/clang/test/OpenMP/parallel_master_taskloop_simd_private_codegen.cpp
@@ -0,0 +1,418 @@
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=LAMBDA %s
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=BLOCKS %s
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck -check-prefix=ARRAY %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+
+#ifndef ARRAY
+#ifndef HEADER
+#define HEADER
+
+template <class T>
+struct S {
+  T f;
+  S(T a) : f(a) {}
+  S() : f() {}
+  operator T() { return T(); }
+  ~S() {}
+};
+
+volatile double g;
+
+// CHECK-DAG: [[KMP_TASK_T_TY:%.+]] = type { i8*, i32 (i32, i8*)*, i32, %union{{.+}}, %union{{.+}}, i64, i64, i64, i32, i8* }
+// CHECK-DAG: [[S_DOUBLE_TY:%.+]] = type { double }
+// CHECK-DAG: [[CAP_MAIN_TY:%.+]] = type { i8 }
+// CHECK-DAG: [[PRIVATES_MAIN_TY:%.+]] = type {{.?}}{ [2 x [[S_DOUBLE_TY]]], [[S_DOUBLE_TY]], i32, [2 x i32]
+// CHECK-DAG: [[KMP_TASK_MAIN_TY:%.+]] = type { [[KMP_TASK_T_TY]], [[PRIVATES_MAIN_TY]] }
+// CHECK-DAG: [[S_INT_TY:%.+]] = type { i32 }
+// CHECK-DAG: [[CAP_TMAIN_TY:%.+]] = type { i8 }
+// CHECK-DAG: [[PRIVATES_TMAIN_TY:%.+]] = type { i32, [2 x i32], [2 x [[S_INT_TY]]], [[S_INT_TY]], [104 x i8] }
+// CHECK-DAG: [[KMP_TASK_TMAIN_TY:%.+]] = type { [[KMP_TASK_T_TY]], [{{[0-9]+}} x i8], [[PRIVATES_TMAIN_TY]] }
+template <typename T>
+T tmain() {
+  S<T> test;
+  T t_var __attribute__((aligned(128))) = T();
+  T vec[] = {1, 2};
+  S<T> s_arr[] = {1, 2};
+  S<T> var(3);
+#pragma omp parallel master taskloop simd private(t_var, vec, s_arr, s_arr, var, var)
+  for (int i = 0; i < 10; ++i) {
+    vec[0] = t_var;
+    s_arr[0] = var;
+  }
+  return T();
+}
+
+int main() {
+  static int sivar;
+#ifdef LAMBDA
+  // LAMBDA: [[G:@.+]] = global double
+  // LAMBDA-LABEL: @main
+  // LAMBDA: call{{( x86_thiscallcc)?}} void [[OUTER_LAMBDA:@.+]](
+  [&]() {
+  // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
+  // LAMBDA: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 96, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*))
+// LAMBDA: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1
+// LAMBDA: call void @__kmpc_taskloop(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* null)
+// LAMBDA: ret
+#pragma omp parallel master taskloop simd private(g, sivar)
+  for (int i = 0; i < 10; ++i) {
+    // LAMBDA: define {{.+}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG_PTR:%.+]])
+    // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
+    // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
+    // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+    // LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]]
+    // LAMBDA: store double 2.0{{.+}}, double* [[G_REF]]
+    // LAMBDA: [[SIVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+    // LAMBDA: [[SIVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SIVAR_PTR_REF]]
+    // LAMBDA: store i{{[0-9]+}} 3, i{{[0-9]+}}* [[SIVAR_REF]]
+
+    // LAMBDA: define internal i32 [[TASK_ENTRY]](i32 %0, %{{.+}}* noalias %1)
+    g = 1;
+    sivar = 2;
+    // LAMBDA: store double 1.0{{.+}}, double* %{{.+}},
+    // LAMBDA: store i{{[0-9]+}} 2, i{{[0-9]+}}* %{{.+}},
+    // LAMBDA: call void [[INNER_LAMBDA]](%
+    // LAMBDA: ret
+    [&]() {
+      g = 2;
+      sivar = 3;
+    }();
+  }
+  }();
+  return 0;
+#elif defined(BLOCKS)
+  // BLOCKS: [[G:@.+]] = global double
+  // BLOCKS-LABEL: @main
+  // BLOCKS: call void {{%.+}}(i8
+  ^{
+  // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8*
+  // BLOCKS: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 96, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*))
+  // BLOCKS: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1
+  // BLOCKS: call void @__kmpc_taskloop(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* null)
+  // BLOCKS: ret
+#pragma omp parallel master taskloop simd private(g, sivar)
+  for (int i = 0; i < 10; ++i) {
+    // BLOCKS: define {{.+}} void {{@.+}}(i8*
+    // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+    // BLOCKS: store double 2.0{{.+}}, double*
+    // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+    // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}}
+    // BLOCKS: store i{{[0-9]+}} 4, i{{[0-9]+}}*
+    // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}}
+    // BLOCKS: ret
+
+    // BLOCKS: define internal i32 [[TASK_ENTRY]](i32 %0, %{{.+}}* noalias %1)
+    g = 1;
+    sivar = 3;
+    // BLOCKS: store double 1.0{{.+}}, double* %{{.+}},
+    // BLOCKS-NOT: [[G]]{{[[^:word:]]}}
+    // BLOCKS: store i{{[0-9]+}} 3, i{{[0-9]+}}* %{{.+}},
+    // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}}
+    // BLOCKS: call void {{%.+}}(i8
+    ^{
+      g = 2;
+      sivar = 4;
+    }();
+  }
+  }();
+  return 0;
+#else
+  S<double> test;
+  int t_var = 0;
+  int vec[] = {1, 2};
+  S<double> s_arr[] = {1, 2};
+  S<double> var(3);
+#pragma omp parallel master taskloop simd private(var, t_var, s_arr, vec, s_arr, var, sivar)
+  for (int i = 0; i < 10; ++i) {
+    vec[0] = t_var;
+    s_arr[0] = var;
+    sivar = 8;
+  }
+#pragma omp task
+  g+=1;
+  return tmain<int>();
+#endif
+}
+
+// CHECK: define i{{[0-9]+}} @main()
+// CHECK: [[TEST:%.+]] = alloca [[S_DOUBLE_TY]],
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i32,
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i32],
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]],
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_DOUBLE_TY]],
+
+// CHECK: call {{.*}} [[S_DOUBLE_TY_DEF_CONSTR:@.+]]([[S_DOUBLE_TY]]* [[TEST]])
+
+// CHECK:       [[RES:%.+]] = call {{.*}}i32 @__kmpc_master(
+// CHECK-NEXT:  [[IS_MASTER:%.+]] = icmp ne i32 [[RES]], 0
+// CHECK-NEXT:  br i1 [[IS_MASTER]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]]
+// CHECK:       [[THEN]]
+// Do not store original variables in capture struct.
+// CHECK-NOT: getelementptr inbounds [[CAP_MAIN_TY]],
+
+// Allocate task.
+// Returns struct kmp_task_t {
+//         [[KMP_TASK_T_TY]] task_data;
+//         [[KMP_TASK_MAIN_TY]] privates;
+//       };
+// CHECK: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc([[LOC:%.+]], i32 [[GTID:%.+]], i32 9, i64 120, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_MAIN_TY]]*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*))
+// CHECK: [[RES_KMP_TASK:%.+]] = bitcast i8* [[RES]] to [[KMP_TASK_MAIN_TY]]*
+
+// CHECK: [[TASK:%.+]] = getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes).
+// Also copy address of private copy to the corresponding shareds reference.
+// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+
+// Constructors for s_arr and var.
+// a_arr;
+// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK: getelementptr inbounds [2 x [[S_DOUBLE_TY]]], [2 x [[S_DOUBLE_TY]]]* [[PRIVATE_S_ARR_REF]], i{{.+}} 0, i{{.+}} 0
+// CHECK: getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i{{.+}} 2
+// CHECK: call void [[S_DOUBLE_TY_DEF_CONSTR]]([[S_DOUBLE_TY]]* [[S_ARR_CUR:%.+]])
+// CHECK: getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* [[S_ARR_CUR]], i{{.+}} 1
+// CHECK: icmp eq
+// CHECK: br i1
+
+// var;
+// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1
+// CHECK: call void [[S_DOUBLE_TY_DEF_CONSTR]]([[S_DOUBLE_TY]]* [[PRIVATE_VAR_REF:%.+]])
+
+// Provide pointer to destructor function, which will destroy private variables at the end of the task.
+// CHECK: [[DESTRUCTORS_REF:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{.+}} 0, i{{.+}} 3
+// CHECK: [[DESTRUCTORS_PTR:%.+]] = bitcast %union{{.+}}* [[DESTRUCTORS_REF]] to i32 (i32, i8*)**
+// CHECK: store i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_MAIN_TY]]*)* [[DESTRUCTORS:@.+]] to i32 (i32, i8*)*), i32 (i32, i8*)** [[DESTRUCTORS_PTR]],
+
+// Start task.
+// CHECK: call void @__kmpc_taskloop([[LOC]], i32 [[GTID]], i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* bitcast (void ([[KMP_TASK_MAIN_TY]]*, [[KMP_TASK_MAIN_TY]]*, i32)* [[MAIN_DUP:@.+]] to i8*))
+// CHECK:  call {{.*}}void @__kmpc_end_master(
+// CHECK-NEXT:  br label {{%?}}[[EXIT]]
+// CHECK:       [[EXIT]]
+
+// No destructors must be called for private copies of s_arr and var.
+// CHECK-NOT: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2
+// CHECK-NOT: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3
+//
+
+// CHECK: define internal void [[PRIVATES_MAP_FN:@.+]]([[PRIVATES_MAIN_TY]]* noalias %0, [[S_DOUBLE_TY]]** noalias %1, i32** noalias %2, [2 x [[S_DOUBLE_TY]]]** noalias %3, [2 x i32]** noalias %4, i32** noalias %5)
+// CHECK: [[PRIVATES:%.+]] = load [[PRIVATES_MAIN_TY]]*, [[PRIVATES_MAIN_TY]]**
+// CHECK: [[PRIV_S_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 0
+// CHECK: [[ARG3:%.+]] = load [2 x [[S_DOUBLE_TY]]]**, [2 x [[S_DOUBLE_TY]]]*** %{{.+}},
+// CHECK: store [2 x [[S_DOUBLE_TY]]]* [[PRIV_S_VAR]], [2 x [[S_DOUBLE_TY]]]** [[ARG3]],
+// CHECK: [[PRIV_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 1
+// CHECK: [[ARG1:%.+]] = load [[S_DOUBLE_TY]]**, [[S_DOUBLE_TY]]*** {{.+}},
+// CHECK: store [[S_DOUBLE_TY]]* [[PRIV_VAR]], [[S_DOUBLE_TY]]** [[ARG1]],
+// CHECK: [[PRIV_T_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 2
+// CHECK: [[ARG2:%.+]] = load i32**, i32*** %{{.+}},
+// CHECK: store i32* [[PRIV_T_VAR]], i32** [[ARG2]],
+// CHECK: [[PRIV_VEC:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 3
+// CHECK: [[ARG4:%.+]] = load [2 x i32]**, [2 x i32]*** %{{.+}},
+// CHECK: store [2 x i32]* [[PRIV_VEC]], [2 x i32]** [[ARG4]],
+// CHECK: ret void
+
+// CHECK: define internal i32 [[TASK_ENTRY]](i32 %0, [[KMP_TASK_MAIN_TY]]* noalias %1)
+
+// CHECK: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_DOUBLE_TY]]*,
+// CHECK: [[PRIV_T_VAR_ADDR:%.+]] = alloca i32*,
+// CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*,
+// CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*,
+// CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*,
+// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]],
+// CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]],
+// CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]])
+// CHECK: [[PRIV_VAR:%.+]] = load [[S_DOUBLE_TY]]*, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]],
+// CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]],
+// CHECK: [[PRIV_S_ARR:%.+]] = load [2 x [[S_DOUBLE_TY]]]*, [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]],
+// CHECK: [[PRIV_VEC:%.+]] = load [2 x i32]*, [2 x i32]** [[PRIV_VEC_ADDR]],
+// CHECK: [[PRIV_SIVAR:%.+]] = load i32*, i32** [[PRIV_SIVAR_ADDR]],
+
+// Privates actually are used.
+// CHECK-DAG: [[PRIV_VAR]]
+// CHECK-DAG: [[PRIV_T_VAR]]
+// CHECK-DAG: [[PRIV_S_ARR]]
+// CHECK-DAG: [[PRIV_VEC]]
+// CHECK_DAG: [[PRIV_SIVAR]]
+
+// CHECK: ret
+
+// CHECK: define internal void [[MAIN_DUP]]([[KMP_TASK_MAIN_TY]]* %0, [[KMP_TASK_MAIN_TY]]* %1, i32 %2)
+// CHECK: getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* %{{.+}}, i32 0, i32 1
+// CHECK: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* %{{.+}}, i32 0, i32 0
+// CHECK: getelementptr inbounds [2 x [[S_DOUBLE_TY]]], [2 x [[S_DOUBLE_TY]]]* %{{.+}}, i32 0, i32 0
+// CHECK: getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i64 2
+// CHECK: br label %
+
+// CHECK: phi [[S_DOUBLE_TY]]*
+// CHECK: call {{.*}} [[S_DOUBLE_TY_DEF_CONSTR]]([[S_DOUBLE_TY]]*
+// CHECK: getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i64 1
+// CHECK: icmp eq [[S_DOUBLE_TY]]* %
+// CHECK: br i1 %
+
+// CHECK: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* %{{.+}}, i32 0, i32 1
+// CHECK: call {{.*}} [[S_DOUBLE_TY_DEF_CONSTR]]([[S_DOUBLE_TY]]*
+// CHECK: ret void
+
+// CHECK: define internal i32 [[DESTRUCTORS]](i32 %0, [[KMP_TASK_MAIN_TY]]* noalias %1)
+// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* [[RES_KMP_TASK:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0
+// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1
+// CHECK: call void @_ZN1SIdED1Ev([[S_DOUBLE_TY]]* [[PRIVATE_VAR_REF]])
+// CHECK: getelementptr inbounds [2 x [[S_DOUBLE_TY]]], [2 x [[S_DOUBLE_TY]]]* [[PRIVATE_S_ARR_REF]], i{{.+}} 0, i{{.+}} 0
+// CHECK: getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i{{.+}} 2
+// CHECK: [[PRIVATE_S_ARR_ELEM_REF:%.+]] = getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i{{.+}} -1
+// CHECK: call void @_ZN1SIdED1Ev([[S_DOUBLE_TY]]* [[PRIVATE_S_ARR_ELEM_REF]])
+// CHECK: icmp eq
+// CHECK: br i1
+// CHECK: ret i32
+
+// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i32,
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i32],
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]],
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]],
+
+// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
+
+// Do not store original variables in capture struct.
+// CHECK-NOT: getelementptr inbounds [[CAP_TMAIN_TY]],
+
+// Allocate task.
+// Returns struct kmp_task_t {
+//         [[KMP_TASK_T_TY]] task_data;
+//         [[KMP_TASK_TMAIN_TY]] privates;
+//       };
+// CHECK: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc([[LOC]], i32 [[GTID:%.+]], i32 9, i64 256, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_TMAIN_TY]]*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*))
+// CHECK: [[RES_KMP_TASK:%.+]] = bitcast i8* [[RES]] to [[KMP_TASK_TMAIN_TY]]*
+
+// CHECK: [[TASK:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+
+// Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes).
+// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+
+// Constructors for s_arr and var.
+// a_arr;
+// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+// CHECK: getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[PRIVATE_S_ARR_REF]], i{{.+}} 0, i{{.+}} 0
+// CHECK: getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i{{.+}} 2
+// CHECK: call void [[S_INT_TY_DEF_CONSTR]]([[S_INT_TY]]* [[S_ARR_CUR:%.+]])
+// CHECK: getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_CUR]], i{{.+}} 1
+// CHECK: icmp eq
+// CHECK: br i1
+
+// var;
+// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3
+// CHECK: call void [[S_INT_TY_DEF_CONSTR]]([[S_INT_TY]]* [[PRIVATE_VAR_REF:%.+]])
+
+// Provide pointer to destructor function, which will destroy private variables at the end of the task.
+// CHECK: [[DESTRUCTORS_REF:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{.+}} 0, i{{.+}} 3
+// CHECK: [[DESTRUCTORS_PTR:%.+]] = bitcast %union{{.+}}* [[DESTRUCTORS_REF]] to i32 (i32, i8*)**
+// CHECK: store i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_TMAIN_TY]]*)* [[DESTRUCTORS:@.+]] to i32 (i32, i8*)*), i32 (i32, i8*)** [[DESTRUCTORS_PTR]],
+
+// Start task.
+// CHECK: call void @__kmpc_taskloop([[LOC]], i32 [[GTID]], i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* bitcast (void ([[KMP_TASK_TMAIN_TY]]*, [[KMP_TASK_TMAIN_TY]]*, i32)* [[TMAIN_DUP:@.+]] to i8*))
+
+// No destructors must be called for private copies of s_arr and var.
+// CHECK-NOT: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2
+// CHECK-NOT: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3
+//
+
+// CHECK: define internal void [[PRIVATES_MAP_FN:@.+]]([[PRIVATES_TMAIN_TY]]* noalias %0, i32** noalias %1, [2 x i32]** noalias %2, [2 x [[S_INT_TY]]]** noalias %3, [[S_INT_TY]]** noalias %4)
+// CHECK: [[PRIVATES:%.+]] = load [[PRIVATES_TMAIN_TY]]*, [[PRIVATES_TMAIN_TY]]**
+// CHECK: [[PRIV_T_VAR:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 0
+// CHECK: [[ARG1:%.+]] = load i32**, i32*** %{{.+}},
+// CHECK: store i32* [[PRIV_T_VAR]], i32** [[ARG1]],
+// CHECK: [[PRIV_VEC:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 1
+// CHECK: [[ARG2:%.+]] = load [2 x i32]**, [2 x i32]*** %{{.+}},
+// CHECK: store [2 x i32]* [[PRIV_VEC]], [2 x i32]** [[ARG2]],
+// CHECK: [[PRIV_S_VAR:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 2
+// CHECK: [[ARG3:%.+]] = load [2 x [[S_INT_TY]]]**, [2 x [[S_INT_TY]]]*** %{{.+}},
+// CHECK: store [2 x [[S_INT_TY]]]* [[PRIV_S_VAR]], [2 x [[S_INT_TY]]]** [[ARG3]],
+// CHECK: [[PRIV_VAR:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 3
+// CHECK: [[ARG4:%.+]] = load [[S_INT_TY]]**, [[S_INT_TY]]*** {{.+}},
+// CHECK: store [[S_INT_TY]]* [[PRIV_VAR]], [[S_INT_TY]]** [[ARG4]],
+// CHECK: ret void
+
+// CHECK: define internal i32 [[TASK_ENTRY]](i32 %0, [[KMP_TASK_TMAIN_TY]]* noalias %1)
+
+// CHECK: alloca i32*,
+// CHECK-DAG: [[PRIV_T_VAR_ADDR:%.+]] = alloca i32*,
+// CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*,
+// CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*,
+// CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*,
+// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]],
+// CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]],
+// CHECK: call void (i8*, ...) [[MAP_FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]])
+// CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]],
+// CHECK: [[PRIV_VEC:%.+]] = load [2 x i32]*, [2 x i32]** [[PRIV_VEC_ADDR]],
+// CHECK: [[PRIV_S_ARR:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]],
+// CHECK: [[PRIV_VAR:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[PRIV_VAR_ADDR]],
+
+// Privates actually are used.
+// CHECK-DAG: [[PRIV_VAR]]
+// CHECK-DAG: [[PRIV_T_VAR]]
+// CHECK-DAG: [[PRIV_S_ARR]]
+// CHECK-DAG: [[PRIV_VEC]]
+
+// CHECK: ret
+
+// CHECK: define internal void [[TMAIN_DUP]]([[KMP_TASK_TMAIN_TY]]* %0, [[KMP_TASK_TMAIN_TY]]* %1, i32 %2)
+// CHECK: getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* %{{.+}}, i32 0, i32 2
+// CHECK: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* %{{.+}}, i32 0, i32 2
+// CHECK: getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* %{{.+}}, i32 0, i32 0
+// CHECK: getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i64 2
+// CHECK: br label %
+
+// CHECK: phi [[S_INT_TY]]*
+// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR]]([[S_INT_TY]]*
+// CHECK: getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i64 1
+// CHECK: icmp eq [[S_INT_TY]]* %
+// CHECK: br i1 %
+
+// CHECK: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* %{{.+}}, i32 0, i32 3
+// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR]]([[S_INT_TY]]*
+// CHECK: ret void
+
+// CHECK: define internal i32 [[DESTRUCTORS]](i32 %0, [[KMP_TASK_TMAIN_TY]]* noalias %1)
+// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2
+// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3
+// CHECK: call void @_ZN1SIiED1Ev([[S_INT_TY]]* [[PRIVATE_VAR_REF]])
+// CHECK: getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[PRIVATE_S_ARR_REF]], i{{.+}} 0, i{{.+}} 0
+// CHECK: getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i{{.+}} 2
+// CHECK: [[PRIVATE_S_ARR_ELEM_REF:%.+]] = getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i{{.+}} -1
+// CHECK: call void @_ZN1SIiED1Ev([[S_INT_TY]]* [[PRIVATE_S_ARR_ELEM_REF]])
+// CHECK: icmp eq
+// CHECK: br i1
+// CHECK: ret i32
+
+#endif
+#else
+// ARRAY-LABEL: array_func
+struct St {
+  int a, b;
+  St() : a(0), b(0) {}
+  St &operator=(const St &) { return *this; };
+  ~St() {}
+};
+
+void array_func(int n, float a[n], St s[2]) {
+// ARRAY: call i8* @__kmpc_omp_task_alloc(
+// ARRAY: call void @__kmpc_taskloop(
+// ARRAY: store float** %{{.+}}, float*** %{{.+}},
+// ARRAY: store %struct.St** %{{.+}}, %struct.St*** %{{.+}},
+#pragma omp parallel master taskloop simd private(a, s)
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+#endif
+

diff  --git a/clang/test/OpenMP/parallel_master_taskloop_simd_private_messages.cpp b/clang/test/OpenMP/parallel_master_taskloop_simd_private_messages.cpp
new file mode 100644
index 000000000000..eb1b5c7a1eb9
--- /dev/null
+++ b/clang/test/OpenMP/parallel_master_taskloop_simd_private_messages.cpp
@@ -0,0 +1,259 @@
+// RUN: %clang_cc1 -verify -fopenmp %s -Wuninitialized
+
+// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized
+
+typedef void **omp_allocator_handle_t;
+extern const omp_allocator_handle_t omp_default_mem_alloc;
+extern const omp_allocator_handle_t omp_large_cap_mem_alloc;
+extern const omp_allocator_handle_t omp_const_mem_alloc;
+extern const omp_allocator_handle_t omp_high_bw_mem_alloc;
+extern const omp_allocator_handle_t omp_low_lat_mem_alloc;
+extern const omp_allocator_handle_t omp_cgroup_mem_alloc;
+extern const omp_allocator_handle_t omp_pteam_mem_alloc;
+extern const omp_allocator_handle_t omp_thread_mem_alloc;
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note 2 {{declared here}} expected-note 2 {{forward declaration of 'S1'}}
+extern S1 a;
+class S2 {
+  mutable int a;
+
+public:
+  S2() : a(0) {}
+};
+const S2 b;
+const S2 ba[5];
+class S3 {
+  int a;
+
+public:
+  S3() : a(0) {}
+};
+const S3 ca[5];
+class S4 {
+  int a;
+  S4(); // expected-note {{implicitly declared private here}}
+
+public:
+  S4(int v) : a(v) {
+#pragma omp parallel master taskloop simd private(a) private(this->a)
+    for (int k = 0; k < v; ++k)
+      ++this->a;
+  }
+};
+class S5 {
+  int a;
+  S5() : a(0) {} // expected-note {{implicitly declared private here}}
+
+public:
+  S5(int v) : a(v) {}
+  S5 &operator=(S5 &s) {
+#pragma omp parallel master taskloop simd private(a) private(this->a) private(s.a) // expected-error {{expected variable name or data member of current class}}
+    for (int k = 0; k < s.a; ++k)
+      ++s.a;
+    return *this;
+  }
+};
+
+template <typename T>
+class S6 {
+public:
+  T a;
+
+  S6() : a(0) {}
+  S6(T v) : a(v) {
+#pragma omp parallel master taskloop simd private(a) private(this->a) allocate(omp_thread_mem_alloc: a) // expected-warning {{allocator with the 'thread' trait access has unspecified behavior on 'parallel master taskloop simd' directive}}
+    for (int k = 0; k < v; ++k)
+      ++this->a;
+  }
+  S6 &operator=(S6 &s) {
+#pragma omp parallel master taskloop simd private(a) private(this->a) private(s.a) // expected-error {{expected variable name or data member of current class}}
+    for (int k = 0; k < s.a; ++k)
+      ++s.a;
+    return *this;
+  }
+};
+
+template <typename T>
+class S7 : public T {
+  T a;
+  S7() : a(0) {}
+
+public:
+  S7(T v) : a(v) {
+#pragma omp parallel master taskloop simd private(a) private(this->a) private(T::a)
+    for (int k = 0; k < a.a; ++k)
+      ++this->a.a;
+  }
+  S7 &operator=(S7 &s) {
+#pragma omp parallel master taskloop simd private(a) private(this->a) private(s.a) private(s.T::a) // expected-error 2 {{expected variable name or data member of current class}}
+    for (int k = 0; k < s.a.a; ++k)
+      ++s.a.a;
+    return *this;
+  }
+};
+
+S3 h;
+#pragma omp threadprivate(h) // expected-note 2 {{defined as threadprivate or thread local}}
+
+template <class I, class C>
+int foomain(I argc, C **argv) {
+  I e(4);
+  I g(5);
+  int i, z;
+  int &j = i;
+#pragma omp parallel master taskloop simd private // expected-error {{expected '(' after 'private'}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel master taskloop simd private( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel master taskloop simd private() // expected-error {{expected expression}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel master taskloop simd private(argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel master taskloop simd private(argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel master taskloop simd private(argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel master taskloop simd private(argc) allocate , allocate(, allocate(omp_default , allocate(omp_default_mem_alloc, allocate(omp_default_mem_alloc:, allocate(omp_default_mem_alloc: argc, allocate(omp_default_mem_alloc: argv), allocate(argv) // expected-error {{expected '(' after 'allocate'}} expected-error 2 {{expected expression}} expected-error 2 {{expected ')'}} expected-error {{use of undeclared identifier 'omp_default'}} expected-note 2 {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel master taskloop simd private(S1) // expected-error {{'S1' does not refer to a value}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel master taskloop simd private(a, b) // expected-error {{private variable with incomplete type 'S1'}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel master taskloop simd private(argv[1]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel master taskloop simd private(e, g, z)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel master taskloop simd private(h) // expected-error {{threadprivate or thread local variable cannot be private}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel master taskloop simd shared(i)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+  {
+    int v = 0;
+    int i;
+#pragma omp parallel master taskloop simd private(i)
+    for (int k = 0; k < argc; ++k) {
+      i = k;
+      v += i;
+    }
+  }
+#pragma omp parallel shared(i)
+#pragma omp parallel private(i)
+#pragma omp parallel master taskloop simd private(j)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel master taskloop simd private(i)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+  return 0;
+}
+
+void bar(S4 a[2]) {
+#pragma omp parallel
+#pragma omp parallel master taskloop simd private(a)
+  for (int i = 0; i < 2; ++i)
+    foo();
+}
+
+namespace A {
+double x;
+#pragma omp threadprivate(x) // expected-note {{defined as threadprivate or thread local}}
+}
+namespace B {
+using A::x;
+}
+
+int main(int argc, char **argv) {
+  S4 e(4);
+  S5 g(5);
+  S6<float> s6(0.0) , s6_0(1.0); // expected-note {{in instantiation of member function 'S6<float>::S6' requested here}}
+  S7<S6<float> > s7(0.0) , s7_0(1.0);
+  int i, z;
+  int &j = i;
+#pragma omp parallel master taskloop simd private // expected-error {{expected '(' after 'private'}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel master taskloop simd private( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel master taskloop simd private() // expected-error {{expected expression}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel master taskloop simd private(argc // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel master taskloop simd private(argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel master taskloop simd private(argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel master taskloop simd private(argc)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel master taskloop simd private(S1) // expected-error {{'S1' does not refer to a value}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel master taskloop simd private(a, b) // expected-error {{private variable with incomplete type 'S1'}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel master taskloop simd private(argv[1]) // expected-error {{expected variable name}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel master taskloop simd private(e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{calling a private constructor of class 'S5'}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel master taskloop simd private(h) // expected-error {{threadprivate or thread local variable cannot be private}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel master taskloop simd private(B::x) // expected-error {{threadprivate or thread local variable cannot be private}}
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel master taskloop simd shared(i)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel
+  {
+    int i;
+#pragma omp parallel master taskloop simd private(i)
+    for (int k = 0; k < argc; ++k)
+      ++k;
+  }
+#pragma omp parallel shared(i)
+#pragma omp parallel private(i)
+#pragma omp parallel master taskloop simd private(j)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+#pragma omp parallel master taskloop simd private(i, z)
+  for (int k = 0; k < argc; ++k)
+    ++k;
+  static int si;
+#pragma omp parallel master taskloop simd private(si) // OK
+  for(int k = 0; k < argc; ++k)
+    si = k + 1;
+
+  s6 = s6_0; // expected-note {{in instantiation of member function 'S6<float>::operator=' requested here}}
+  s7 = s7_0; // expected-note {{in instantiation of member function 'S7<S6<float> >::operator=' requested here}}
+  return foomain(argc, argv); // expected-note {{in instantiation of function template specialization 'foomain<int, char>' requested here}}
+}
+

diff  --git a/clang/test/OpenMP/parallel_master_taskloop_simd_reduction_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_simd_reduction_codegen.cpp
new file mode 100644
index 000000000000..a2c85fd724ee
--- /dev/null
+++ b/clang/test/OpenMP/parallel_master_taskloop_simd_reduction_codegen.cpp
@@ -0,0 +1,234 @@
+// RUN: %clang_cc1 -fopenmp -x c++ %s -verify -debug-info-kind=limited -emit-llvm -o - -triple powerpc64le-unknown-linux-gnu -std=c++98 | FileCheck %s
+
+// RUN: %clang_cc1 -fopenmp-simd -x c++ %s -verify -debug-info-kind=limited -emit-llvm -o - -triple powerpc64le-unknown-linux-gnu -std=c++98 | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+
+struct S {
+  float a;
+  S() : a(0.0f) {}
+  ~S() {}
+};
+
+#pragma omp declare reduction(+:S:omp_out.a += omp_in.a) initializer(omp_priv = omp_orig)
+
+float g;
+
+int a;
+#pragma omp threadprivate(a)
+int main (int argc, char *argv[])
+{
+int   i, n;
+float a[100], b[100], sum, e[argc + 100];
+S c[100];
+float &d = g;
+
+/* Some initializations */
+n = 100;
+for (i=0; i < n; i++)
+  a[i] = b[i] = i * 1.0;
+sum = 0.0;
+
+#pragma omp parallel master taskloop simd reduction(+:sum, c[:n], d, e)
+  for (i=0; i < n; i++) {
+    sum = sum + (a[i] * b[i]);
+    c[i].a = i*i;
+    d += i*i;
+    e[i] = i;
+  }
+
+}
+
+// CHECK-LABEL: @main(
+// CHECK:    [[RETVAL:%.*]] = alloca i32,
+// CHECK:    [[ARGC_ADDR:%.*]] = alloca i32,
+// CHECK:    [[ARGV_ADDR:%.*]] = alloca i8**,
+// CHECK:    [[I:%.*]] = alloca i32,
+// CHECK:    [[N:%.*]] = alloca i32,
+// CHECK:    [[A:%.*]] = alloca [100 x float],
+// CHECK:    [[B:%.*]] = alloca [100 x float],
+// CHECK:    [[SUM:%.*]] = alloca float,
+// CHECK:    [[SAVED_STACK:%.*]] = alloca i8*,
+// CHECK:    [[C:%.*]] = alloca [100 x %struct.S],
+// CHECK:    [[D:%.*]] = alloca float*,
+// CHECK:    store i32 0, i32* [[RETVAL]],
+// CHECK:    store i32 [[ARGC:%.*]], i32* [[ARGC_ADDR]],
+// CHECK:    store i8** [[ARGV:%.*]], i8*** [[ARGV_ADDR]],
+// CHECK:    [[TMP1:%.*]] = load i32, i32* [[ARGC_ADDR]],
+// CHECK:    [[ADD:%.*]] = add nsw i32 [[TMP1]], 100
+// CHECK:    [[TMP2:%.*]] = zext i32 [[ADD]] to i64
+// CHECK:    [[VLA:%.+]] = alloca float, i64 %
+
+// CHECK:    [[SUM_ADDR:%.*]] = alloca float*,
+// CHECK:    [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]],
+// CHECK:    [[DOTRD_INPUT_:%.*]] = alloca [4 x %struct.kmp_task_red_input_t],
+// CHECK:    alloca i32,
+// CHECK:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32,
+// CHECK:    [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32,
+// CHECK:       [[RES:%.+]] = call {{.*}}i32 @__kmpc_master(
+// CHECK-NEXT:  [[IS_MASTER:%.+]] = icmp ne i32 [[RES]], 0
+// CHECK-NEXT:  br i1 [[IS_MASTER]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:[^,]+]]
+// CHECK:       [[THEN]]
+// CHECK:    call void @__kmpc_taskgroup(%struct.ident_t*
+// CHECK-DAG:    [[TMP21:%.*]] = bitcast float* %{{.+}} to i8*
+// CHECK-DAG:    store i8* [[TMP21]], i8** [[TMP20:%[^,]+]],
+// CHECK-DAG:    [[TMP20]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T:%.+]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_:%.+]], i32 0, i32 0
+// CHECK-DAG:    [[TMP22:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 1
+// CHECK-DAG:    store i64 4, i64* [[TMP22]],
+// CHECK-DAG:    [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 2
+// CHECK-DAG:    store i8* bitcast (void (i8*)* @[[RED_INIT1:.+]] to i8*), i8** [[TMP23]],
+// CHECK-DAG:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 3
+// CHECK-DAG:    store i8* null, i8** [[TMP24]],
+// CHECK-DAG:    [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 4
+// CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_COMB1:.+]] to i8*), i8** [[TMP25]],
+// CHECK-DAG:    [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_]], i32 0, i32 5
+// CHECK-DAG:    [[TMP27:%.*]] = bitcast i32* [[TMP26]] to i8*
+// CHECK-DAG:    call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP27]], i8 0, i64 4, i1 false)
+// CHECK-DAG:    [[ARRAYIDX5:%.*]] = getelementptr inbounds [100 x %struct.S], [100 x %struct.S]* [[C:%.+]], i64 0, i64 0
+// CHECK-DAG:    [[LB_ADD_LEN:%.*]] = add nsw i64 -1, %
+// CHECK-DAG:    [[ARRAYIDX6:%.*]] = getelementptr inbounds [100 x %struct.S], [100 x %struct.S]* [[C]], i64 0, i64 [[LB_ADD_LEN]]
+// CHECK-DAG:    [[TMP31:%.*]] = bitcast %struct.S* [[ARRAYIDX5]] to i8*
+// CHECK-DAG:    store i8* [[TMP31]], i8** [[TMP28:%[^,]+]],
+// CHECK-DAG:    [[TMP28]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4:%.+]], i32 0, i32 0
+// CHECK-DAG:    [[TMP32:%.*]] = ptrtoint %struct.S* [[ARRAYIDX6]] to i64
+// CHECK-DAG:    [[TMP33:%.*]] = ptrtoint %struct.S* [[ARRAYIDX5]] to i64
+// CHECK-DAG:    [[TMP34:%.*]] = sub i64 [[TMP32]], [[TMP33]]
+// CHECK-DAG:    [[TMP35:%.*]] = sdiv exact i64 [[TMP34]], ptrtoint (float* getelementptr (float, float* null, i32 1) to i64)
+// CHECK-DAG:    [[TMP36:%.*]] = add nuw i64 [[TMP35]], 1
+// CHECK-DAG:    [[TMP37:%.*]] = mul nuw i64 [[TMP36]], ptrtoint (float* getelementptr (float, float* null, i32 1) to i64)
+// CHECK-DAG:    store i64 [[TMP37]], i64* [[TMP38:%[^,]+]],
+// CHECK-DAG:    [[TMP38]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 1
+// CHECK-DAG:    [[TMP39:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 2
+// CHECK-DAG:    store i8* bitcast (void (i8*)* @[[RED_INIT2:.+]] to i8*), i8** [[TMP39]],
+// CHECK-DAG:    [[TMP40:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 3
+// CHECK-DAG:    store i8* bitcast (void (i8*)* @[[RED_FINI2:.+]] to i8*), i8** [[TMP40]],
+// CHECK-DAG:    [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 4
+// CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_COMB2:.+]] to i8*), i8** [[TMP41]],
+// CHECK-DAG:    [[TMP42:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_4]], i32 0, i32 5
+// CHECK-DAG:    store i32 1, i32* [[TMP42]],
+// CHECK-DAG:    [[TMP44:%.*]] = load float*, float** [[D:%.+]],
+// CHECK-DAG:    [[TMP45:%.*]] = bitcast float* [[TMP44]] to i8*
+// CHECK-DAG:    store i8* [[TMP45]], i8** [[TMP43:%[^,]+]],
+// CHECK-DAG:    [[TMP43]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7:%.+]], i32 0, i32 0
+// CHECK-DAG:    [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 1
+// CHECK-DAG:    store i64 4, i64* [[TMP46]],
+// CHECK-DAG:    [[TMP47:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 2
+// CHECK-DAG:    store i8* bitcast (void (i8*)* @[[RED_INIT3:.+]] to i8*), i8** [[TMP47]],
+// CHECK-DAG:    [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 3
+// CHECK-DAG:    store i8* null, i8** [[TMP48]],
+// CHECK-DAG:    [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 4
+// CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_COMB3:.+]] to i8*), i8** [[TMP49]],
+// CHECK-DAG:    [[TMP50:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_7]], i32 0, i32 5
+// CHECK-DAG:    [[TMP51:%.*]] = bitcast i32* [[TMP50]] to i8*
+// CHECK-DAG:    call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP51]], i8 0, i64 4, i1 false)
+// CHECK-DAG:    [[TMP53:%.*]] = bitcast float* [[VLA:%.+]] to i8*
+// CHECK-DAG:    store i8* [[TMP53]], i8** [[TMP52:%[^,]+]],
+// CHECK-DAG:    [[TMP52]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8:%.+]], i32 0, i32 0
+// CHECK-DAG:    [[TMP54:%.*]] = mul nuw i64 [[TMP2:%.+]], 4
+// CHECK-DAG:    [[TMP55:%.*]] = udiv exact i64 [[TMP54]], ptrtoint (float* getelementptr (float, float* null, i32 1) to i64)
+// CHECK-DAG:    store i64 [[TMP54]], i64* [[TMP56:%[^,]+]],
+// CHECK-DAG:    [[TMP56]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 1
+// CHECK-DAG:    [[TMP57:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 2
+// CHECK-DAG:    store i8* bitcast (void (i8*)* @[[RED_INIT4:.+]] to i8*), i8** [[TMP57]],
+// CHECK-DAG:    [[TMP58:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 3
+// CHECK-DAG:    store i8* null, i8** [[TMP58]],
+// CHECK-DAG:    [[TMP59:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 4
+// CHECK-DAG:    store i8* bitcast (void (i8*, i8*)* @[[RED_COMB4:.+]] to i8*), i8** [[TMP59]],
+// CHECK-DAG:    [[TMP60:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_RED_INPUT_T]], %struct.kmp_task_red_input_t* [[DOTRD_INPUT_GEP_8]], i32 0, i32 5
+// CHECK-DAG:    store i32 1, i32* [[TMP60]],
+// CHECK-DAG:    [[DOTRD_INPUT_GEP_]] = getelementptr inbounds [4 x %struct.kmp_task_red_input_t], [4 x %struct.kmp_task_red_input_t]* [[DOTRD_INPUT_]], i64 0, i64
+// CHECK-DAG:    [[DOTRD_INPUT_GEP_4]] = getelementptr inbounds [4 x %struct.kmp_task_red_input_t], [4 x %struct.kmp_task_red_input_t]* [[DOTRD_INPUT_]], i64 0, i64
+// CHECK-DAG:    [[DOTRD_INPUT_GEP_7]] = getelementptr inbounds [4 x %struct.kmp_task_red_input_t], [4 x %struct.kmp_task_red_input_t]* [[DOTRD_INPUT_]], i64 0, i64
+// CHECK-DAG:    [[DOTRD_INPUT_GEP_8]] = getelementptr inbounds [4 x %struct.kmp_task_red_input_t], [4 x %struct.kmp_task_red_input_t]* [[DOTRD_INPUT_]], i64 0, i64
+// CHECK:    [[TMP61:%.*]] = bitcast [4 x %struct.kmp_task_red_input_t]* [[DOTRD_INPUT_]] to i8*
+// CHECK:    [[TMP62:%.*]] = call i8* @__kmpc_task_reduction_init(i32 [[TMP0:%.+]], i32 4, i8* [[TMP61]])
+// CHECK:    [[TMP63:%.*]] = load i32, i32* [[N:%.+]],
+// CHECK:    store i32 [[TMP63]], i32* [[DOTCAPTURE_EXPR_]],
+// CHECK:    [[TMP64:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]],
+// CHECK:    [[SUB:%.*]] = sub nsw i32 [[TMP64]], 0
+// CHECK:    [[SUB10:%.*]] = sub nsw i32 [[SUB]], 1
+// CHECK:    [[ADD11:%.*]] = add nsw i32 [[SUB10]], 1
+// CHECK:    [[DIV:%.*]] = sdiv i32 [[ADD11]], 1
+// CHECK:    [[SUB12:%.*]] = sub nsw i32 [[DIV]], 1
+// CHECK:    store i32 [[SUB12]], i32* [[DOTCAPTURE_EXPR_9]],
+// CHECK:    [[TMP65:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* %{{.+}}, i32 [[TMP0]], i32 1, i64 888, i64 72, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @[[TASK:.+]] to i32 (i32, i8*)*))
+// CHECK:    call void @__kmpc_taskloop(%struct.ident_t* %{{.+}}, i32 [[TMP0]], i8* [[TMP65]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* null)
+// CHECK:    call void @__kmpc_end_taskgroup(%struct.ident_t*
+// CHECK:  call {{.*}}void @__kmpc_end_master(
+// CHECK-NEXT:  br label {{%?}}[[EXIT]]
+// CHECK:       [[EXIT]]
+
+// CHECK: define internal void @[[RED_INIT1]](i8* %0)
+// CHECK: store float 0.000000e+00, float* %
+// CHECK: ret void
+
+// CHECK: define internal void @[[RED_COMB1]](i8* %0, i8* %1)
+// CHECK: fadd float %
+// CHECK: store float %{{.+}}, float* %
+// CHECK: ret void
+
+// CHECK: define internal void @[[RED_INIT2]](i8* %0)
+// CHECK: call i8* @__kmpc_threadprivate_cached(
+// CHECK: [[ORIG_PTR_ADDR:%.+]] = call i8* @__kmpc_threadprivate_cached(
+// CHECK: [[ORIG_PTR_REF:%.+]] = bitcast i8* [[ORIG_PTR_ADDR]] to i8**
+// CHECK: load i8*, i8** [[ORIG_PTR_REF]],
+// CHECK: call void [[OMP_INIT1:@.+]](
+// CHECK: ret void
+
+// CHECK: define internal void [[OMP_COMB1:@.+]](%struct.S* noalias %0, %struct.S* noalias %1)
+// CHECK: fadd float %
+
+// CHECK: define internal void [[OMP_INIT1]](%struct.S* noalias %0, %struct.S* noalias %1)
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(
+
+// CHECK: define internal void @[[RED_FINI2]](i8* %0)
+// CHECK: call i8* @__kmpc_threadprivate_cached(
+// CHECK: call void @
+// CHECK: ret void
+
+// CHECK: define internal void @[[RED_COMB2]](i8* %0, i8* %1)
+// CHECK: call i8* @__kmpc_threadprivate_cached(
+// CHECK: call void [[OMP_COMB1]](
+// CHECK: ret void
+
+// CHECK: define internal void @[[RED_INIT3]](i8* %0)
+// CHECK: store float 0.000000e+00, float* %
+// CHECK: ret void
+
+// CHECK: define internal void @[[RED_COMB3]](i8* %0, i8* %1)
+// CHECK: fadd float %
+// CHECK: store float %{{.+}}, float* %
+// CHECK: ret void
+
+// CHECK: define internal void @[[RED_INIT4]](i8* %0)
+// CHECK: call i8* @__kmpc_threadprivate_cached(
+// CHECK: store float 0.000000e+00, float* %
+// CHECK: ret void
+
+// CHECK: define internal void @[[RED_COMB4]](i8* %0, i8* %1)
+// CHECK: call i8* @__kmpc_threadprivate_cached(
+// CHECK: fadd float %
+// CHECK: store float %{{.+}}, float* %
+// CHECK: ret void
+
+// CHECK-NOT: call i8* @__kmpc_threadprivate_cached(
+// CHECK: call i8* @__kmpc_task_reduction_get_th_data(
+// CHECK: call i8* @__kmpc_threadprivate_cached(
+// CHECK: call i8* @__kmpc_threadprivate_cached(
+// CHECK: call i8* @__kmpc_task_reduction_get_th_data(
+// CHECK-NOT: call i8* @__kmpc_threadprivate_cached(
+// CHECK: call i8* @__kmpc_task_reduction_get_th_data(
+// CHECK: call i8* @__kmpc_threadprivate_cached(
+// CHECK: call i8* @__kmpc_task_reduction_get_th_data(
+// CHECK-NOT: call i8* @__kmpc_threadprivate_cached(
+
+// CHECK-DAG: distinct !DISubprogram(linkageName: "[[TASK]]", scope: !
+// CHECK-DAG: !DISubprogram(linkageName: "[[RED_INIT1]]"
+// CHECK-DAG: !DISubprogram(linkageName: "[[RED_COMB1]]"
+// CHECK-DAG: !DISubprogram(linkageName: "[[RED_INIT2]]"
+// CHECK-DAG: !DISubprogram(linkageName: "[[RED_FINI2]]"
+// CHECK-DAG: !DISubprogram(linkageName: "[[RED_COMB2]]"
+// CHECK-DAG: !DISubprogram(linkageName: "[[RED_INIT3]]"
+// CHECK-DAG: !DISubprogram(linkageName: "[[RED_COMB3]]"
+// CHECK-DAG: !DISubprogram(linkageName: "[[RED_INIT4]]"
+// CHECK-DAG: !DISubprogram(linkageName: "[[RED_COMB4]]"

diff  --git a/clang/test/OpenMP/parallel_master_taskloop_simd_reduction_messages.cpp b/clang/test/OpenMP/parallel_master_taskloop_simd_reduction_messages.cpp
new file mode 100644
index 000000000000..9fb65f37072d
--- /dev/null
+++ b/clang/test/OpenMP/parallel_master_taskloop_simd_reduction_messages.cpp
@@ -0,0 +1,352 @@
+// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 150 -o - %s -Wuninitialized
+// RUN: %clang_cc1 -verify -fopenmp -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized
+// RUN: %clang_cc1 -verify -fopenmp -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 150 -o - %s -Wuninitialized
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 -ferror-limit 150 -o - %s -Wuninitialized
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 -ferror-limit 150 -o - %s -Wuninitialized
+
+typedef void **omp_allocator_handle_t;
+extern const omp_allocator_handle_t omp_default_mem_alloc;
+extern const omp_allocator_handle_t omp_large_cap_mem_alloc;
+extern const omp_allocator_handle_t omp_const_mem_alloc;
+extern const omp_allocator_handle_t omp_high_bw_mem_alloc;
+extern const omp_allocator_handle_t omp_low_lat_mem_alloc;
+extern const omp_allocator_handle_t omp_cgroup_mem_alloc;
+extern const omp_allocator_handle_t omp_pteam_mem_alloc;
+extern const omp_allocator_handle_t omp_thread_mem_alloc;
+
+void xxx(int argc) {
+  int fp; // expected-note {{initialize the variable 'fp' to silence this warning}}
+#pragma omp parallel master taskloop simd reduction(+:fp) // expected-warning {{variable 'fp' is uninitialized when used here}}
+  for (int i = 0; i < 10; ++i)
+    ;
+}
+
+void foo() {
+}
+
+bool foobool(int argc) {
+  return argc;
+}
+
+void foobar(int &ref) {
+#pragma omp parallel master taskloop simd reduction(+:ref)
+  for (int i = 0; i < 10; ++i)
+    foo();
+}
+
+struct S1; // expected-note {{declared here}} expected-note 4 {{forward declaration of 'S1'}}
+extern S1 a;
+class S2 {
+  mutable int a;
+  S2 &operator+(const S2 &arg) { return (*this); } // expected-note 3 {{implicitly declared private here}}
+
+public:
+  S2() : a(0) {}
+  S2(S2 &s2) : a(s2.a) {}
+  static float S2s; // expected-note 2 {{static data member is predetermined as shared}}
+  static const float S2sc; // expected-note 2 {{'S2sc' declared here}}
+};
+const float S2::S2sc = 0;
+S2 b;                     // expected-note 3 {{'b' defined here}}
+const S2 ba[5];           // expected-note 2 {{'ba' defined here}}
+class S3 {
+  int a;
+
+public:
+  int b;
+  S3() : a(0) {}
+  S3(const S3 &s3) : a(s3.a) {}
+  S3 operator+(const S3 &arg1) { return arg1; }
+};
+int operator+(const S3 &arg1, const S3 &arg2) { return 5; }
+S3 c;               // expected-note 3 {{'c' defined here}}
+const S3 ca[5];     // expected-note 2 {{'ca' defined here}}
+extern const int f; // expected-note 4 {{'f' declared here}}
+class S4 {
+  int a;
+  S4(); // expected-note {{implicitly declared private here}}
+  S4(const S4 &s4);
+  S4 &operator+(const S4 &arg) { return (*this); }
+
+public:
+  S4(int v) : a(v) {}
+};
+S4 &operator&=(S4 &arg1, S4 &arg2) { return arg1; }
+class S5 {
+  int a:32;
+  S5() : a(0) {} // expected-note {{implicitly declared private here}}
+  S5(const S5 &s5) : a(s5.a) {}
+  S5 &operator+(const S5 &arg);
+
+public:
+  S5(int v) : a(v) {}
+};
+class S6 { // expected-note 3 {{candidate function (the implicit copy assignment operator) not viable: no known conversion from 'int' to 'const S6' for 1st argument}}
+#if __cplusplus >= 201103L // C++11 or later
+// expected-note at -2 3 {{candidate function (the implicit move assignment operator) not viable}}
+#endif
+  int a;
+
+public:
+  S6() : a(6) {}
+  operator int() { return 6; }
+} o;
+
+struct S7 {
+  int a: 32;
+  S7() {
+#pragma omp parallel master taskloop simd reduction(+:a) // expected-error {{expected addressable reduction item for the task-based directives}}
+    for (int i = 0; i < 10; ++i)
+      ++a;
+  }
+};
+
+S3 h, k;
+#pragma omp threadprivate(h) // expected-note 2 {{defined as threadprivate or thread local}}
+
+template <class T>       // expected-note {{declared here}}
+T tmain(T argc) {
+  const T d = T();       // expected-note 4 {{'d' defined here}}
+  const T da[5] = {T()}; // expected-note 2 {{'da' defined here}}
+  T qa[5] = {T()};
+  T i, z;
+  T &j = i;                        // expected-note 4 {{'j' defined here}}
+  S3 &p = k;                       // expected-note 2 {{'p' defined here}}
+  const T &r = da[(int)i];         // expected-note 2 {{'r' defined here}}
+  T &q = qa[(int)i];               // expected-note 2 {{'q' defined here}}
+  T fl;
+#pragma omp parallel master taskloop simd reduction // expected-error {{expected '(' after 'reduction'}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd reduction + // expected-error {{expected '(' after 'reduction'}} expected-warning {{extra tokens at the end of '#pragma omp parallel master taskloop simd' are ignored}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd reduction( // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd reduction(- // expected-warning {{missing ':' after reduction identifier - ignoring}} expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd reduction() // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd reduction(*) // expected-warning {{missing ':' after reduction identifier - ignoring}} expected-error {{expected expression}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd reduction(\) // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd reduction(& : argc // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{invalid operands to binary expression ('float' and 'float')}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd reduction(| : argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{invalid operands to binary expression ('float' and 'float')}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd reduction(|| : argc ? i : argc) // expected-error 2 {{expected variable name, array element or array section}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd reduction(foo : argc) //expected-error {{incorrect reduction identifier, expected one of '+', '-', '*', '&', '|', '^', '&&', '||', 'min' or 'max' or declare reduction for type 'float'}} expected-error {{incorrect reduction identifier, expected one of '+', '-', '*', '&', '|', '^', '&&', '||', 'min' or 'max' or declare reduction for type 'int'}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd reduction(&& : argc) allocate , allocate(, allocate(omp_default , allocate(omp_default_mem_alloc, allocate(omp_default_mem_alloc:, allocate(omp_default_mem_alloc: argc, allocate(omp_default_mem_alloc: argv), allocate(argv) // expected-error {{expected '(' after 'allocate'}} expected-error 2 {{expected expression}} expected-error 2 {{expected ')'}} expected-error {{use of undeclared identifier 'omp_default'}} expected-note 2 {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd reduction(^ : T) // expected-error {{'T' does not refer to a value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd reduction(+ : z, a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 3 {{const-qualified variable cannot be reduction}} expected-error 2 {{'operator+' is a private member of 'S2'}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd reduction(min : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 4 {{arguments of OpenMP clause 'reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 3 {{const-qualified variable cannot be reduction}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd reduction(max : h.b) // expected-error {{expected variable name, array element or array section}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd reduction(+ : ba) // expected-error {{const-qualified variable cannot be reduction}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd reduction(* : ca) // expected-error {{const-qualified variable cannot be reduction}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd reduction(- : da) // expected-error {{const-qualified variable cannot be reduction}} expected-error {{const-qualified variable cannot be reduction}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd reduction(^ : fl) // expected-error {{invalid operands to binary expression ('float' and 'float')}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd reduction(&& : S2::S2s) // expected-error {{shared variable cannot be reduction}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be reduction}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd reduction(+ : h, k) // expected-error {{threadprivate or thread local variable cannot be reduction}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd reduction(+ : o) // expected-error 2 {{no viable overloaded '='}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd private(i), reduction(+ : j), reduction(+ : q) // expected-error 4 {{argument of OpenMP clause 'reduction' must reference the same object in all threads}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel private(k)
+#pragma omp parallel master taskloop simd reduction(+ : p), reduction(+ : p) // expected-error 2 {{argument of OpenMP clause 'reduction' must reference the same object in all threads}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd reduction(+ : p), reduction(+ : p) // expected-error 2 {{variable can appear only once in OpenMP 'reduction' clause}} expected-note 2 {{previously referenced here}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd reduction(+ : r) // expected-error 2 {{const-qualified variable cannot be reduction}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel shared(i)
+#pragma omp parallel reduction(min : i)
+#pragma omp parallel master taskloop simd reduction(max : j) // expected-error 2 {{argument of OpenMP clause 'reduction' must reference the same object in all threads}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel private(fl)
+#pragma omp parallel master taskloop simd reduction(+ : fl) allocate(omp_thread_mem_alloc: fl) // expected-warning 2 {{allocator with the 'thread' trait access has unspecified behavior on 'parallel master taskloop simd' directive}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel reduction(* : fl)
+#pragma omp parallel master taskloop simd reduction(+ : fl)
+  for (int i = 0; i < 10; ++i)
+    foo();
+
+  return T();
+}
+
+namespace A {
+double x;
+#pragma omp threadprivate(x) // expected-note {{defined as threadprivate or thread local}}
+}
+namespace B {
+using A::x;
+}
+
+int main(int argc, char **argv) {
+  const int d = 5;       // expected-note 2 {{'d' defined here}}
+  const int da[5] = {0}; // expected-note {{'da' defined here}}
+  int qa[5] = {0};
+  S4 e(4);
+  S5 g(5);
+  int i, z;
+  int &j = i;                      // expected-note 2 {{'j' defined here}}
+  S3 &p = k;                       // expected-note 2 {{'p' defined here}}
+  const int &r = da[i];            // expected-note {{'r' defined here}}
+  int &q = qa[i];                  // expected-note {{'q' defined here}}
+  float fl;
+#pragma omp parallel master taskloop simd reduction // expected-error {{expected '(' after 'reduction'}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd reduction + // expected-error {{expected '(' after 'reduction'}} expected-warning {{extra tokens at the end of '#pragma omp parallel master taskloop simd' are ignored}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd reduction( // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd reduction(- // expected-warning {{missing ':' after reduction identifier - ignoring}} expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd reduction() // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd reduction(*) // expected-warning {{missing ':' after reduction identifier - ignoring}} expected-error {{expected expression}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd reduction(\) // expected-error {{expected unqualified-id}} expected-warning {{missing ':' after reduction identifier - ignoring}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd reduction(foo : argc // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-error {{incorrect reduction identifier, expected one of '+', '-', '*', '&', '|', '^', '&&', '||', 'min' or 'max'}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd reduction(| : argc, // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd reduction(|| : argc > 0 ? argv[1] : argv[2]) // expected-error {{expected variable name, array element or array section}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd reduction(~ : argc) // expected-error {{expected unqualified-id}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd reduction(&& : argc, z)
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd reduction(^ : S1) // expected-error {{'S1' does not refer to a value}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd reduction(+ : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 2 {{const-qualified variable cannot be reduction}} expected-error {{'operator+' is a private member of 'S2'}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd reduction(min : a, b, c, d, f) // expected-error {{a reduction list item with incomplete type 'S1'}} expected-error 2 {{arguments of OpenMP clause 'reduction' for 'min' or 'max' must be of arithmetic type}} expected-error 2 {{const-qualified variable cannot be reduction}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd reduction(max : h.b) // expected-error {{expected variable name, array element or array section}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd reduction(+ : ba) // expected-error {{const-qualified variable cannot be reduction}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd reduction(* : ca) // expected-error {{const-qualified variable cannot be reduction}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd reduction(- : da) // expected-error {{const-qualified variable cannot be reduction}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd reduction(^ : fl) // expected-error {{invalid operands to binary expression ('float' and 'float')}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd reduction(&& : S2::S2s) // expected-error {{shared variable cannot be reduction}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd reduction(&& : S2::S2sc) // expected-error {{const-qualified variable cannot be reduction}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd reduction(& : e, g) // expected-error {{calling a private constructor of class 'S4'}} expected-error {{invalid operands to binary expression ('S4' and 'S4')}} expected-error {{calling a private constructor of class 'S5'}} expected-error {{invalid operands to binary expression ('S5' and 'S5')}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd reduction(+ : h, k, B::x) // expected-error 2 {{threadprivate or thread local variable cannot be reduction}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd reduction(+ : o) // expected-error {{no viable overloaded '='}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd private(i), reduction(+ : j), reduction(+ : q) // expected-error 2 {{argument of OpenMP clause 'reduction' must reference the same object in all threads}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel private(k)
+#pragma omp parallel master taskloop simd reduction(+ : p), reduction(+ : p) // expected-error 2 {{argument of OpenMP clause 'reduction' must reference the same object in all threads}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd reduction(+ : p), reduction(+ : p) // expected-error {{variable can appear only once in OpenMP 'reduction' clause}} expected-note {{previously referenced here}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel master taskloop simd reduction(+ : r) // expected-error {{const-qualified variable cannot be reduction}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel shared(i)
+#pragma omp parallel reduction(min : i)
+#pragma omp parallel master taskloop simd reduction(max : j) // expected-error {{argument of OpenMP clause 'reduction' must reference the same object in all threads}}
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel private(fl)
+#pragma omp parallel master taskloop simd reduction(+ : fl)
+  for (int i = 0; i < 10; ++i)
+    foo();
+#pragma omp parallel reduction(* : fl)
+#pragma omp parallel master taskloop simd reduction(+ : fl)
+  for (int i = 0; i < 10; ++i)
+    foo();
+  static int m;
+#pragma omp parallel master taskloop simd reduction(+ : m) // OK
+  for (int i = 0; i < 10; ++i)
+    m++;
+#pragma omp parallel master taskloop simd nogroup reduction(+ : m) // expected-error {{'reduction' clause cannot be used with 'nogroup' clause}}
+  for (int i = 0; i < 10; ++i)
+    m++;
+
+  return tmain(argc) + tmain(fl); // expected-note {{in instantiation of function template specialization 'tmain<int>' requested here}} expected-note {{in instantiation of function template specialization 'tmain<float>' requested here}}
+}

diff  --git a/clang/test/OpenMP/parallel_master_taskloop_simd_safelen_messages.cpp b/clang/test/OpenMP/parallel_master_taskloop_simd_safelen_messages.cpp
new file mode 100644
index 000000000000..2a860b767e25
--- /dev/null
+++ b/clang/test/OpenMP/parallel_master_taskloop_simd_safelen_messages.cpp
@@ -0,0 +1,106 @@
+// RUN: %clang_cc1 -verify -fopenmp %s -Wuninitialized
+// RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s -Wuninitialized
+// RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s -Wuninitialized
+
+// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized
+
+void foo() {
+}
+
+#if __cplusplus >= 201103L
+// expected-note at +2 4 {{declared here}}
+#endif
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note {{declared here}}
+
+template <class T, typename S, int N, int ST> // expected-note {{declared here}}
+T tmain(T argc, S **argv) { //expected-note 2 {{declared here}}
+  #pragma omp parallel master taskloop simd safelen // expected-error {{expected '(' after 'safelen'}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp parallel master taskloop simd safelen ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp parallel master taskloop simd safelen () // expected-error {{expected expression}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  // expected-error at +3 {{expected ')'}} expected-note at +3 {{to match this '('}}
+  // expected-error at +2 2 {{expression is not an integral constant expression}}
+  // expected-note at +1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}}
+  #pragma omp parallel master taskloop simd safelen (argc 
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  // expected-error at +1 {{argument to 'safelen' clause must be a strictly positive integer value}}
+  #pragma omp parallel master taskloop simd safelen (ST // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp parallel master taskloop simd safelen (1)) // expected-warning {{extra tokens at the end of '#pragma omp parallel master taskloop simd' are ignored}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp parallel master taskloop simd safelen ((ST > 0) ? 1 + ST : 2)
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  // expected-error at +6 2 {{directive '#pragma omp parallel master taskloop simd' cannot contain more than one 'safelen' clause}}
+  // expected-error at +5 {{argument to 'safelen' clause must be a strictly positive integer value}}
+  // expected-error at +4 2 {{expression is not an integral constant expression}}
+#if __cplusplus >= 201103L
+  // expected-note at +2 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}}
+#endif
+  #pragma omp parallel master taskloop simd safelen (foobool(argc)), safelen (true), safelen (-5)
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp parallel master taskloop simd safelen (S) // expected-error {{'S' does not refer to a value}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+#if __cplusplus <= 199711L
+  // expected-error at +4 2 {{expression is not an integral constant expression}}
+#else
+  // expected-error at +2 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}}
+#endif
+  #pragma omp parallel master taskloop simd safelen (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp parallel master taskloop simd safelen (4)
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp parallel master taskloop simd safelen (N) // expected-error {{argument to 'safelen' clause must be a strictly positive integer value}}
+  for (T i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  return argc;
+}
+
+int main(int argc, char **argv) {
+  #pragma omp parallel master taskloop simd safelen // expected-error {{expected '(' after 'safelen'}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp parallel master taskloop simd safelen ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp parallel master taskloop simd safelen () // expected-error {{expected expression}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp parallel master taskloop simd safelen (4 // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp parallel master taskloop simd safelen (2+2)) // expected-warning {{extra tokens at the end of '#pragma omp parallel master taskloop simd' are ignored}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  // expected-error at +4 {{expression is not an integral constant expression}}
+#if __cplusplus >= 201103L
+  // expected-note at +2 {{non-constexpr function 'foobool' cannot be used in a constant expression}}
+#endif
+  #pragma omp parallel master taskloop simd safelen (foobool(1) > 0 ? 1 : 2)
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  // expected-error at +6 {{expression is not an integral constant expression}}
+#if __cplusplus >= 201103L
+  // expected-note at +4 {{non-constexpr function 'foobool' cannot be used in a constant expression}}
+#endif
+  // expected-error at +2 2 {{directive '#pragma omp parallel master taskloop simd' cannot contain more than one 'safelen' clause}}
+  // expected-error at +1 {{argument to 'safelen' clause must be a strictly positive integer value}}
+  #pragma omp parallel master taskloop simd safelen (foobool(argc)), safelen (true), safelen (-5) 
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp parallel master taskloop simd safelen (S1) // expected-error {{'S1' does not refer to a value}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+#if __cplusplus <= 199711L
+  // expected-error at +4 {{expression is not an integral constant expression}}
+#else
+  // expected-error at +2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}}
+#endif
+  #pragma omp parallel master taskloop simd safelen (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  // expected-error at +3 {{statement after '#pragma omp parallel master taskloop simd' must be a for loop}}
+  // expected-note at +1 {{in instantiation of function template specialization 'tmain<int, char, -1, -2>' requested here}}
+  #pragma omp parallel master taskloop simd safelen(safelen(tmain<int, char, -1, -2>(argc, argv) // expected-error 2 {{expected ')'}} expected-note 2 {{to match this '('}}
+  foo();
+  // expected-note at +1 {{in instantiation of function template specialization 'tmain<int, char, 12, 4>' requested here}}
+  return tmain<int, char, 12, 4>(argc, argv);
+}
+

diff  --git a/clang/test/OpenMP/parallel_master_taskloop_simd_simdlen_messages.cpp b/clang/test/OpenMP/parallel_master_taskloop_simd_simdlen_messages.cpp
new file mode 100644
index 000000000000..9899ce380a46
--- /dev/null
+++ b/clang/test/OpenMP/parallel_master_taskloop_simd_simdlen_messages.cpp
@@ -0,0 +1,106 @@
+// RUN: %clang_cc1 -verify -fopenmp %s -Wuninitialized
+// RUN: %clang_cc1 -verify -fopenmp -std=c++98 %s -Wuninitialized
+// RUN: %clang_cc1 -verify -fopenmp -std=c++11 %s -Wuninitialized
+
+// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++98 %s -Wuninitialized
+// RUN: %clang_cc1 -verify -fopenmp-simd -std=c++11 %s -Wuninitialized
+
+void foo() {
+}
+
+#if __cplusplus >= 201103L
+// expected-note at +2 4 {{declared here}}
+#endif
+bool foobool(int argc) {
+  return argc;
+}
+
+struct S1; // expected-note {{declared here}}
+
+template <class T, typename S, int N, int ST> // expected-note {{declared here}}
+T tmain(T argc, S **argv) { //expected-note 2 {{declared here}}
+  #pragma omp parallel master taskloop simd simdlen // expected-error {{expected '(' after 'simdlen'}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp parallel master taskloop simd simdlen ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp parallel master taskloop simd simdlen () // expected-error {{expected expression}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  // expected-error at +3 {{expected ')'}} expected-note at +3 {{to match this '('}}
+  // expected-error at +2 2 {{expression is not an integral constant expression}}
+  // expected-note at +1 2 {{read of non-const variable 'argc' is not allowed in a constant expression}}
+  #pragma omp parallel master taskloop simd simdlen (argc
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  // expected-error at +1 {{argument to 'simdlen' clause must be a strictly positive integer value}}
+  #pragma omp parallel master taskloop simd simdlen (ST // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp parallel master taskloop simd simdlen (1)) // expected-warning {{extra tokens at the end of '#pragma omp parallel master taskloop simd' are ignored}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp parallel master taskloop simd simdlen ((ST > 0) ? 1 + ST : 2)
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  // expected-error at +6 2 {{directive '#pragma omp parallel master taskloop simd' cannot contain more than one 'simdlen' clause}}
+  // expected-error at +5 {{argument to 'simdlen' clause must be a strictly positive integer value}}
+  // expected-error at +4 2 {{expression is not an integral constant expression}}
+#if __cplusplus >= 201103L
+  // expected-note at +2 2 {{non-constexpr function 'foobool' cannot be used in a constant expression}}
+#endif
+  #pragma omp parallel master taskloop simd simdlen (foobool(argc)), simdlen (true), simdlen (-5)
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp parallel master taskloop simd simdlen (S) // expected-error {{'S' does not refer to a value}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+#if __cplusplus <= 199711L
+  // expected-error at +4 2 {{expression is not an integral constant expression}}
+#else
+  // expected-error at +2 2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}}
+#endif
+  #pragma omp parallel master taskloop simd simdlen (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp parallel master taskloop simd simdlen (4)
+  for (int i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  #pragma omp parallel master taskloop simd simdlen (N) // expected-error {{argument to 'simdlen' clause must be a strictly positive integer value}}
+  for (T i = ST; i < N; i++) argv[0][i] = argv[0][i] - argv[0][i-ST];
+  return argc;
+}
+
+int main(int argc, char **argv) {
+  #pragma omp parallel master taskloop simd simdlen // expected-error {{expected '(' after 'simdlen'}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp parallel master taskloop simd simdlen ( // expected-error {{expected expression}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp parallel master taskloop simd simdlen () // expected-error {{expected expression}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp parallel master taskloop simd simdlen (4 // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp parallel master taskloop simd simdlen (2+2)) // expected-warning {{extra tokens at the end of '#pragma omp parallel master taskloop simd' are ignored}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  // expected-error at +4 {{expression is not an integral constant expression}}
+#if __cplusplus >= 201103L
+  // expected-note at +2 {{non-constexpr function 'foobool' cannot be used in a constant expression}}
+#endif
+  #pragma omp parallel master taskloop simd simdlen (foobool(1) > 0 ? 1 : 2)
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  // expected-error at +6 {{expression is not an integral constant expression}}
+#if __cplusplus >= 201103L
+  // expected-note at +4 {{non-constexpr function 'foobool' cannot be used in a constant expression}}
+#endif
+  // expected-error at +2 2 {{directive '#pragma omp parallel master taskloop simd' cannot contain more than one 'simdlen' clause}}
+  // expected-error at +1 {{argument to 'simdlen' clause must be a strictly positive integer value}}
+  #pragma omp parallel master taskloop simd simdlen (foobool(argc)), simdlen (true), simdlen (-5) 
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  #pragma omp parallel master taskloop simd simdlen (S1) // expected-error {{'S1' does not refer to a value}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+#if __cplusplus <= 199711L
+  // expected-error at +4 {{expression is not an integral constant expression}}
+#else
+  // expected-error at +2 {{integral constant expression must have integral or unscoped enumeration type, not 'char *'}}
+#endif
+  #pragma omp parallel master taskloop simd simdlen (argv[1]=2) // expected-error {{expected ')'}} expected-note {{to match this '('}}
+  for (int i = 4; i < 12; i++) argv[0][i] = argv[0][i] - argv[0][i-4];
+  // expected-error at +3 {{statement after '#pragma omp parallel master taskloop simd' must be a for loop}}
+  // expected-note at +1 {{in instantiation of function template specialization 'tmain<int, char, -1, -2>' requested here}}
+  #pragma omp parallel master taskloop simd simdlen(simdlen(tmain<int, char, -1, -2>(argc, argv) // expected-error 2 {{expected ')'}} expected-note 2 {{to match this '('}}
+  foo();
+  // expected-note at +1 {{in instantiation of function template specialization 'tmain<int, char, 12, 4>' requested here}}
+  return tmain<int, char, 12, 4>(argc, argv);
+}
+

diff  --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp
index 5df02d51d036..48e8b6151581 100644
--- a/clang/tools/libclang/CIndex.cpp
+++ b/clang/tools/libclang/CIndex.cpp
@@ -2052,6 +2052,8 @@ class EnqueueVisitor : public ConstStmtVisitor<EnqueueVisitor, void> {
   VisitOMPMasterTaskLoopSimdDirective(const OMPMasterTaskLoopSimdDirective *D);
   void VisitOMPParallelMasterTaskLoopDirective(
       const OMPParallelMasterTaskLoopDirective *D);
+  void VisitOMPParallelMasterTaskLoopSimdDirective(
+      const OMPParallelMasterTaskLoopSimdDirective *D);
   void VisitOMPDistributeDirective(const OMPDistributeDirective *D);
   void VisitOMPDistributeParallelForDirective(
       const OMPDistributeParallelForDirective *D);
@@ -2911,6 +2913,11 @@ void EnqueueVisitor::VisitOMPParallelMasterTaskLoopDirective(
   VisitOMPLoopDirective(D);
 }
 
+void EnqueueVisitor::VisitOMPParallelMasterTaskLoopSimdDirective(
+    const OMPParallelMasterTaskLoopSimdDirective *D) {
+  VisitOMPLoopDirective(D);
+}
+
 void EnqueueVisitor::VisitOMPDistributeDirective(
     const OMPDistributeDirective *D) {
   VisitOMPLoopDirective(D);
@@ -5489,6 +5496,8 @@ CXString clang_getCursorKindSpelling(enum CXCursorKind Kind) {
     return cxstring::createRef("OMPMasterTaskLoopSimdDirective");
   case CXCursor_OMPParallelMasterTaskLoopDirective:
     return cxstring::createRef("OMPParallelMasterTaskLoopDirective");
+  case CXCursor_OMPParallelMasterTaskLoopSimdDirective:
+    return cxstring::createRef("OMPParallelMasterTaskLoopSimdDirective");
   case CXCursor_OMPDistributeDirective:
     return cxstring::createRef("OMPDistributeDirective");
   case CXCursor_OMPDistributeParallelForDirective:

diff  --git a/clang/tools/libclang/CXCursor.cpp b/clang/tools/libclang/CXCursor.cpp
index d4210dee24db..750872f37c23 100644
--- a/clang/tools/libclang/CXCursor.cpp
+++ b/clang/tools/libclang/CXCursor.cpp
@@ -682,6 +682,9 @@ CXCursor cxcursor::MakeCXCursor(const Stmt *S, const Decl *Parent,
   case Stmt::OMPParallelMasterTaskLoopDirectiveClass:
     K = CXCursor_OMPParallelMasterTaskLoopDirective;
     break;
+  case Stmt::OMPParallelMasterTaskLoopSimdDirectiveClass:
+    K = CXCursor_OMPParallelMasterTaskLoopSimdDirective;
+    break;
   case Stmt::OMPDistributeDirectiveClass:
     K = CXCursor_OMPDistributeDirective;
     break;


        


More information about the cfe-commits mailing list