[clang] 8c2f4e0 - [OPENMP50]Codegen for reduction clauses with 'task' modifier.

Alexey Bataev via cfe-commits cfe-commits at lists.llvm.org
Fri May 1 08:45:59 PDT 2020


Author: Alexey Bataev
Date: 2020-05-01T11:40:27-04:00
New Revision: 8c2f4e0e855cd41c412cb1c824960a8adf938b8f

URL: https://github.com/llvm/llvm-project/commit/8c2f4e0e855cd41c412cb1c824960a8adf938b8f
DIFF: https://github.com/llvm/llvm-project/commit/8c2f4e0e855cd41c412cb1c824960a8adf938b8f.diff

LOG: [OPENMP50]Codegen for reduction clauses with 'task' modifier.

Summary:
Added codegen for reduction clause with task modifier.
```
  #pragma omp ... reduction(task, +: a)
  {
  #pragma omp ... in_reduction(+: a)
  }
```
is translated into something like this:
```
  #pragma omp ... reduction(+:a)
  {
    struct red_input_t {
      void *reduce_shar;
      void *reduce_orig;
      size_t reduce_size;
      void *reduce_init;
      void *reduce_fini;
      void *reduce_comb;
      unsigned flags;
    } r_var;
    r_var.reduce_shar = &a;
    r_var.reduce_orig = &original a;
    r_var.reduce_size = sizeof(a);
    r_var.reduce_init = [](void* l,void*){return *(int*)l=0;};
    r_var.reduce_fini = nullptr;
    r_var.reduce_comb = [](void* l,void* r){return *(int*)l += *(int)r;};
    void *tg = __kmpc_taskred_modifier_init(<loc_addr>,<gtid>,
      <flag - 0 for parallel, 1 for worksharing>,
      <1 - number of reduction elements>,
      &r_var);
    {
    #pragma omp ... in_reduction(+: a) firstprivate(tg)
    ...
    }
    __kmpc_task_reduction_modifier_fini(<loc_addr>,<gtid>,
      <flag - 0 for parallel, 1 for worksharing>);
  }
```

Reviewers: jdoerfert

Subscribers: yaxunl, guansong, jfb, cfe-commits, caomhin

Tags: #clang

Differential Revision: https://reviews.llvm.org/D79034

Added: 
    clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp
    clang/test/OpenMP/for_reduction_task_codegen.cpp
    clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp
    clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp
    clang/test/OpenMP/parallel_reduction_task_codegen.cpp
    clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp
    clang/test/OpenMP/sections_reduction_task_codegen.cpp
    clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp
    clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp
    clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp
    clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp

Modified: 
    clang/include/clang/AST/StmtOpenMP.h
    clang/lib/AST/StmtOpenMP.cpp
    clang/lib/CodeGen/CGOpenMPRuntime.cpp
    clang/lib/CodeGen/CGOpenMPRuntime.h
    clang/lib/CodeGen/CGStmtOpenMP.cpp
    clang/lib/Sema/SemaOpenMP.cpp
    clang/lib/Serialization/ASTReaderStmt.cpp
    clang/lib/Serialization/ASTWriterStmt.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h
index 7fb63cf9ae3d..bd87eafc9034 100644
--- a/clang/include/clang/AST/StmtOpenMP.h
+++ b/clang/include/clang/AST/StmtOpenMP.h
@@ -356,6 +356,9 @@ class OMPExecutableDirective : public Stmt {
 ///
 class OMPParallelDirective : public OMPExecutableDirective {
   friend class ASTStmtReader;
+  /// Special reference expression for handling task reduction. Used to store
+  /// the taskgroup descriptor returned by the runtime functions.
+  Expr *TaskRedRef = nullptr;
   /// true if the construct has inner cancel directive.
   bool HasCancel;
 
@@ -381,6 +384,9 @@ class OMPParallelDirective : public OMPExecutableDirective {
                                SourceLocation(), NumClauses, 1),
         HasCancel(false) {}
 
+  /// Sets special task reduction descriptor.
+  void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; }
+
   /// Set cancel state.
   void setHasCancel(bool Has) { HasCancel = Has; }
 
@@ -392,11 +398,14 @@ class OMPParallelDirective : public OMPExecutableDirective {
   /// \param EndLoc Ending Location of the directive.
   /// \param Clauses List of clauses.
   /// \param AssociatedStmt Statement associated with the directive.
+  /// \param TaskRedRef Task reduction special reference expression to handle
+  /// taskgroup descriptor.
   /// \param HasCancel true if this directive has inner cancel directive.
   ///
   static OMPParallelDirective *
   Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
-         ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, bool HasCancel);
+         ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef,
+         bool HasCancel);
 
   /// Creates an empty directive with the place for \a N clauses.
   ///
@@ -406,6 +415,10 @@ class OMPParallelDirective : public OMPExecutableDirective {
   static OMPParallelDirective *CreateEmpty(const ASTContext &C,
                                            unsigned NumClauses, EmptyShell);
 
+  /// Returns special task reduction reference expression.
+  Expr *getTaskReductionRefExpr() { return TaskRedRef; }
+  const Expr *getTaskReductionRefExpr() const { return TaskRedRef; }
+
   /// Return true if current directive has inner cancel directive.
   bool hasCancel() const { return HasCancel; }
 
@@ -1258,7 +1271,9 @@ class OMPSimdDirective : public OMPLoopDirective {
 ///
 class OMPForDirective : public OMPLoopDirective {
   friend class ASTStmtReader;
-
+  /// Special reference expression for handling task reduction. Used to store
+  /// the taskgroup descriptor returned by the runtime functions.
+  Expr *TaskRedRef = nullptr;
   /// true if current directive has inner cancel directive.
   bool HasCancel;
 
@@ -1286,6 +1301,9 @@ class OMPForDirective : public OMPLoopDirective {
                          NumClauses),
         HasCancel(false) {}
 
+  /// Sets special task reduction descriptor.
+  void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; }
+
   /// Set cancel state.
   void setHasCancel(bool Has) { HasCancel = Has; }
 
@@ -1299,13 +1317,15 @@ class OMPForDirective : public OMPLoopDirective {
   /// \param Clauses List of clauses.
   /// \param AssociatedStmt Statement, associated with the directive.
   /// \param Exprs Helper expressions for CodeGen.
+  /// \param TaskRedRef Task reduction special reference expression to handle
+  /// taskgroup descriptor.
   /// \param HasCancel true if current directive has inner cancel directive.
   ///
   static OMPForDirective *Create(const ASTContext &C, SourceLocation StartLoc,
                                  SourceLocation EndLoc, unsigned CollapsedNum,
                                  ArrayRef<OMPClause *> Clauses,
                                  Stmt *AssociatedStmt, const HelperExprs &Exprs,
-                                 bool HasCancel);
+                                 Expr *TaskRedRef, bool HasCancel);
 
   /// Creates an empty directive with the place
   /// for \a NumClauses clauses.
@@ -1317,6 +1337,10 @@ class OMPForDirective : public OMPLoopDirective {
   static OMPForDirective *CreateEmpty(const ASTContext &C, unsigned NumClauses,
                                       unsigned CollapsedNum, EmptyShell);
 
+  /// Returns special task reduction reference expression.
+  Expr *getTaskReductionRefExpr() { return TaskRedRef; }
+  const Expr *getTaskReductionRefExpr() const { return TaskRedRef; }
+
   /// Return true if current directive has inner cancel directive.
   bool hasCancel() const { return HasCancel; }
 
@@ -1403,6 +1427,9 @@ class OMPForSimdDirective : public OMPLoopDirective {
 class OMPSectionsDirective : public OMPExecutableDirective {
   friend class ASTStmtReader;
 
+  /// Special reference expression for handling task reduction. Used to store
+  /// the taskgroup descriptor returned by the runtime functions.
+  Expr *TaskRedRef = nullptr;
   /// true if current directive has inner cancel directive.
   bool HasCancel;
 
@@ -1429,6 +1456,9 @@ class OMPSectionsDirective : public OMPExecutableDirective {
                                SourceLocation(), NumClauses, 1),
         HasCancel(false) {}
 
+  /// Sets special task reduction descriptor.
+  void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; }
+
   /// Set cancel state.
   void setHasCancel(bool Has) { HasCancel = Has; }
 
@@ -1440,11 +1470,14 @@ class OMPSectionsDirective : public OMPExecutableDirective {
   /// \param EndLoc Ending Location of the directive.
   /// \param Clauses List of clauses.
   /// \param AssociatedStmt Statement, associated with the directive.
+  /// \param TaskRedRef Task reduction special reference expression to handle
+  /// taskgroup descriptor.
   /// \param HasCancel true if current directive has inner directive.
   ///
   static OMPSectionsDirective *
   Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
-         ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, bool HasCancel);
+         ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef,
+         bool HasCancel);
 
   /// Creates an empty directive with the place for \a NumClauses
   /// clauses.
@@ -1455,6 +1488,10 @@ class OMPSectionsDirective : public OMPExecutableDirective {
   static OMPSectionsDirective *CreateEmpty(const ASTContext &C,
                                            unsigned NumClauses, EmptyShell);
 
+  /// Returns special task reduction reference expression.
+  Expr *getTaskReductionRefExpr() { return TaskRedRef; }
+  const Expr *getTaskReductionRefExpr() const { return TaskRedRef; }
+
   /// Return true if current directive has inner cancel directive.
   bool hasCancel() const { return HasCancel; }
 
@@ -1715,6 +1752,9 @@ class OMPCriticalDirective : public OMPExecutableDirective {
 class OMPParallelForDirective : public OMPLoopDirective {
   friend class ASTStmtReader;
 
+  /// Special reference expression for handling task reduction. Used to store
+  /// the taskgroup descriptor returned by the runtime functions.
+  Expr *TaskRedRef = nullptr;
   /// true if current region has inner cancel directive.
   bool HasCancel;
 
@@ -1743,6 +1783,9 @@ class OMPParallelForDirective : public OMPLoopDirective {
                          SourceLocation(), CollapsedNum, NumClauses),
         HasCancel(false) {}
 
+  /// Sets special task reduction descriptor.
+  void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; }
+
   /// Set cancel state.
   void setHasCancel(bool Has) { HasCancel = Has; }
 
@@ -1756,12 +1799,15 @@ class OMPParallelForDirective : public OMPLoopDirective {
   /// \param Clauses List of clauses.
   /// \param AssociatedStmt Statement, associated with the directive.
   /// \param Exprs Helper expressions for CodeGen.
+  /// \param TaskRedRef Task reduction special reference expression to handle
+  /// taskgroup descriptor.
   /// \param HasCancel true if current directive has inner cancel directive.
   ///
   static OMPParallelForDirective *
   Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
          unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses,
-         Stmt *AssociatedStmt, const HelperExprs &Exprs, bool HasCancel);
+         Stmt *AssociatedStmt, const HelperExprs &Exprs, Expr *TaskRedRef,
+         bool HasCancel);
 
   /// Creates an empty directive with the place
   /// for \a NumClauses clauses.
@@ -1775,6 +1821,10 @@ class OMPParallelForDirective : public OMPLoopDirective {
                                               unsigned CollapsedNum,
                                               EmptyShell);
 
+  /// Returns special task reduction reference expression.
+  Expr *getTaskReductionRefExpr() { return TaskRedRef; }
+  const Expr *getTaskReductionRefExpr() const { return TaskRedRef; }
+
   /// Return true if current directive has inner cancel directive.
   bool hasCancel() const { return HasCancel; }
 
@@ -1863,6 +1913,10 @@ class OMPParallelForSimdDirective : public OMPLoopDirective {
 class OMPParallelMasterDirective : public OMPExecutableDirective {
   friend class ASTStmtReader;
 
+  /// Special reference expression for handling task reduction. Used to store
+  /// the taskgroup descriptor returned by the runtime functions.
+  Expr *TaskRedRef = nullptr;
+
   OMPParallelMasterDirective(SourceLocation StartLoc, SourceLocation EndLoc,
                              unsigned NumClauses)
       : OMPExecutableDirective(this, OMPParallelMasterDirectiveClass,
@@ -1875,6 +1929,9 @@ class OMPParallelMasterDirective : public OMPExecutableDirective {
                                SourceLocation(), SourceLocation(), NumClauses,
                                1) {}
 
+  /// Sets special task reduction descriptor.
+  void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; }
+
 public:
   /// Creates directive with a list of \a Clauses.
   ///
@@ -1883,10 +1940,12 @@ class OMPParallelMasterDirective : public OMPExecutableDirective {
   /// \param EndLoc Ending Location of the directive.
   /// \param Clauses List of clauses.
   /// \param AssociatedStmt Statement, associated with the directive.
+  /// \param TaskRedRef Task reduction special reference expression to handle
+  /// taskgroup descriptor.
   ///
   static OMPParallelMasterDirective *
   Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
-         ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt);
+         ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef);
 
   /// Creates an empty directive with the place for \a NumClauses
   /// clauses.
@@ -1897,6 +1956,10 @@ class OMPParallelMasterDirective : public OMPExecutableDirective {
   static OMPParallelMasterDirective *
   CreateEmpty(const ASTContext &C, unsigned NumClauses, EmptyShell);
 
+  /// Returns special task reduction reference expression.
+  Expr *getTaskReductionRefExpr() { return TaskRedRef; }
+  const Expr *getTaskReductionRefExpr() const { return TaskRedRef; }
+
   static bool classof(const Stmt *T) {
     return T->getStmtClass() == OMPParallelMasterDirectiveClass;
   }
@@ -1914,6 +1977,9 @@ class OMPParallelMasterDirective : public OMPExecutableDirective {
 class OMPParallelSectionsDirective : public OMPExecutableDirective {
   friend class ASTStmtReader;
 
+  /// Special reference expression for handling task reduction. Used to store
+  /// the taskgroup descriptor returned by the runtime functions.
+  Expr *TaskRedRef = nullptr;
   /// true if current directive has inner cancel directive.
   bool HasCancel;
 
@@ -1941,6 +2007,9 @@ class OMPParallelSectionsDirective : public OMPExecutableDirective {
                                1),
         HasCancel(false) {}
 
+  /// Sets special task reduction descriptor.
+  void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; }
+
   /// Set cancel state.
   void setHasCancel(bool Has) { HasCancel = Has; }
 
@@ -1952,11 +2021,14 @@ class OMPParallelSectionsDirective : public OMPExecutableDirective {
   /// \param EndLoc Ending Location of the directive.
   /// \param Clauses List of clauses.
   /// \param AssociatedStmt Statement, associated with the directive.
+  /// \param TaskRedRef Task reduction special reference expression to handle
+  /// taskgroup descriptor.
   /// \param HasCancel true if current directive has inner cancel directive.
   ///
   static OMPParallelSectionsDirective *
   Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
-         ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, bool HasCancel);
+         ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef,
+         bool HasCancel);
 
   /// Creates an empty directive with the place for \a NumClauses
   /// clauses.
@@ -1967,6 +2039,10 @@ class OMPParallelSectionsDirective : public OMPExecutableDirective {
   static OMPParallelSectionsDirective *
   CreateEmpty(const ASTContext &C, unsigned NumClauses, EmptyShell);
 
+  /// Returns special task reduction reference expression.
+  Expr *getTaskReductionRefExpr() { return TaskRedRef; }
+  const Expr *getTaskReductionRefExpr() const { return TaskRedRef; }
+
   /// Return true if current directive has inner cancel directive.
   bool hasCancel() const { return HasCancel; }
 
@@ -2805,6 +2881,9 @@ class OMPTargetExitDataDirective : public OMPExecutableDirective {
 ///
 class OMPTargetParallelDirective : public OMPExecutableDirective {
   friend class ASTStmtReader;
+  /// Special reference expression for handling task reduction. Used to store
+  /// the taskgroup descriptor returned by the runtime functions.
+  Expr *TaskRedRef = nullptr;
   /// true if the construct has inner cancel directive.
   bool HasCancel = false;
 
@@ -2830,6 +2909,8 @@ class OMPTargetParallelDirective : public OMPExecutableDirective {
                                SourceLocation(), SourceLocation(), NumClauses,
                                /*NumChildren=*/1) {}
 
+  /// Sets special task reduction descriptor.
+  void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; }
   /// Set cancel state.
   void setHasCancel(bool Has) { HasCancel = Has; }
 
@@ -2841,11 +2922,14 @@ class OMPTargetParallelDirective : public OMPExecutableDirective {
   /// \param EndLoc Ending Location of the directive.
   /// \param Clauses List of clauses.
   /// \param AssociatedStmt Statement, associated with the directive.
+  /// \param TaskRedRef Task reduction special reference expression to handle
+  /// taskgroup descriptor.
   /// \param HasCancel true if this directive has inner cancel directive.
   ///
   static OMPTargetParallelDirective *
   Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
-         ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, bool HasCancel);
+         ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef,
+         bool HasCancel);
 
   /// Creates an empty directive with the place for \a NumClauses
   /// clauses.
@@ -2856,6 +2940,10 @@ class OMPTargetParallelDirective : public OMPExecutableDirective {
   static OMPTargetParallelDirective *
   CreateEmpty(const ASTContext &C, unsigned NumClauses, EmptyShell);
 
+  /// Returns special task reduction reference expression.
+  Expr *getTaskReductionRefExpr() { return TaskRedRef; }
+  const Expr *getTaskReductionRefExpr() const { return TaskRedRef; }
+
   /// Return true if current directive has inner cancel directive.
   bool hasCancel() const { return HasCancel; }
 
@@ -2876,6 +2964,9 @@ class OMPTargetParallelDirective : public OMPExecutableDirective {
 class OMPTargetParallelForDirective : public OMPLoopDirective {
   friend class ASTStmtReader;
 
+  /// Special reference expression for handling task reduction. Used to store
+  /// the taskgroup descriptor returned by the runtime functions.
+  Expr *TaskRedRef = nullptr;
   /// true if current region has inner cancel directive.
   bool HasCancel;
 
@@ -2905,6 +2996,9 @@ class OMPTargetParallelForDirective : public OMPLoopDirective {
                          SourceLocation(), CollapsedNum, NumClauses),
         HasCancel(false) {}
 
+  /// Sets special task reduction descriptor.
+  void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; }
+
   /// Set cancel state.
   void setHasCancel(bool Has) { HasCancel = Has; }
 
@@ -2918,12 +3012,15 @@ class OMPTargetParallelForDirective : public OMPLoopDirective {
   /// \param Clauses List of clauses.
   /// \param AssociatedStmt Statement, associated with the directive.
   /// \param Exprs Helper expressions for CodeGen.
+  /// \param TaskRedRef Task reduction special reference expression to handle
+  /// taskgroup descriptor.
   /// \param HasCancel true if current directive has inner cancel directive.
   ///
   static OMPTargetParallelForDirective *
   Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
          unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses,
-         Stmt *AssociatedStmt, const HelperExprs &Exprs, bool HasCancel);
+         Stmt *AssociatedStmt, const HelperExprs &Exprs, Expr *TaskRedRef,
+         bool HasCancel);
 
   /// Creates an empty directive with the place
   /// for \a NumClauses clauses.
@@ -2937,6 +3034,10 @@ class OMPTargetParallelForDirective : public OMPLoopDirective {
                                                     unsigned CollapsedNum,
                                                     EmptyShell);
 
+  /// Returns special task reduction reference expression.
+  Expr *getTaskReductionRefExpr() { return TaskRedRef; }
+  const Expr *getTaskReductionRefExpr() const { return TaskRedRef; }
+
   /// Return true if current directive has inner cancel directive.
   bool hasCancel() const { return HasCancel; }
 
@@ -3709,6 +3810,9 @@ class OMPTargetUpdateDirective : public OMPExecutableDirective {
 ///
 class OMPDistributeParallelForDirective : public OMPLoopDirective {
   friend class ASTStmtReader;
+  /// Special reference expression for handling task reduction. Used to store
+  /// the taskgroup descriptor returned by the runtime functions.
+  Expr *TaskRedRef = nullptr;
   /// true if the construct has inner cancel directive.
   bool HasCancel = false;
 
@@ -3740,6 +3844,9 @@ class OMPDistributeParallelForDirective : public OMPLoopDirective {
                          NumClauses),
         HasCancel(false) {}
 
+  /// Sets special task reduction descriptor.
+  void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; }
+
   /// Set cancel state.
   void setHasCancel(bool Has) { HasCancel = Has; }
 
@@ -3753,12 +3860,15 @@ class OMPDistributeParallelForDirective : public OMPLoopDirective {
   /// \param Clauses List of clauses.
   /// \param AssociatedStmt Statement, associated with the directive.
   /// \param Exprs Helper expressions for CodeGen.
+  /// \param TaskRedRef Task reduction special reference expression to handle
+  /// taskgroup descriptor.
   /// \param HasCancel true if this directive has inner cancel directive.
   ///
   static OMPDistributeParallelForDirective *
   Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
          unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses,
-         Stmt *AssociatedStmt, const HelperExprs &Exprs, bool HasCancel);
+         Stmt *AssociatedStmt, const HelperExprs &Exprs, Expr *TaskRedRef,
+         bool HasCancel);
 
   /// Creates an empty directive with the place
   /// for \a NumClauses clauses.
@@ -3772,6 +3882,10 @@ class OMPDistributeParallelForDirective : public OMPLoopDirective {
                                                         unsigned CollapsedNum,
                                                         EmptyShell);
 
+  /// Returns special task reduction reference expression.
+  Expr *getTaskReductionRefExpr() { return TaskRedRef; }
+  const Expr *getTaskReductionRefExpr() const { return TaskRedRef; }
+
   /// Return true if current directive has inner cancel directive.
   bool hasCancel() const { return HasCancel; }
 
@@ -4274,6 +4388,9 @@ class OMPTeamsDistributeParallelForSimdDirective final
 ///
 class OMPTeamsDistributeParallelForDirective final : public OMPLoopDirective {
   friend class ASTStmtReader;
+  /// Special reference expression for handling task reduction. Used to store
+  /// the taskgroup descriptor returned by the runtime functions.
+  Expr *TaskRedRef = nullptr;
   /// true if the construct has inner cancel directive.
   bool HasCancel = false;
 
@@ -4306,6 +4423,9 @@ class OMPTeamsDistributeParallelForDirective final : public OMPLoopDirective {
                          NumClauses),
         HasCancel(false) {}
 
+  /// Sets special task reduction descriptor.
+  void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; }
+
   /// Set cancel state.
   void setHasCancel(bool Has) { HasCancel = Has; }
 
@@ -4319,12 +4439,15 @@ class OMPTeamsDistributeParallelForDirective final : public OMPLoopDirective {
   /// \param Clauses List of clauses.
   /// \param AssociatedStmt Statement, associated with the directive.
   /// \param Exprs Helper expressions for CodeGen.
+  /// \param TaskRedRef Task reduction special reference expression to handle
+  /// taskgroup descriptor.
   /// \param HasCancel true if this directive has inner cancel directive.
   ///
   static OMPTeamsDistributeParallelForDirective *
   Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
          unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses,
-         Stmt *AssociatedStmt, const HelperExprs &Exprs, bool HasCancel);
+         Stmt *AssociatedStmt, const HelperExprs &Exprs, Expr *TaskRedRef,
+         bool HasCancel);
 
   /// Creates an empty directive with the place for \a NumClauses clauses.
   ///
@@ -4336,6 +4459,10 @@ class OMPTeamsDistributeParallelForDirective final : public OMPLoopDirective {
   CreateEmpty(const ASTContext &C, unsigned NumClauses, unsigned CollapsedNum,
               EmptyShell);
 
+  /// Returns special task reduction reference expression.
+  Expr *getTaskReductionRefExpr() { return TaskRedRef; }
+  const Expr *getTaskReductionRefExpr() const { return TaskRedRef; }
+
   /// Return true if current directive has inner cancel directive.
   bool hasCancel() const { return HasCancel; }
 
@@ -4483,6 +4610,9 @@ class OMPTargetTeamsDistributeDirective final : public OMPLoopDirective {
 class OMPTargetTeamsDistributeParallelForDirective final
     : public OMPLoopDirective {
   friend class ASTStmtReader;
+  /// Special reference expression for handling task reduction. Used to store
+  /// the taskgroup descriptor returned by the runtime functions.
+  Expr *TaskRedRef = nullptr;
   /// true if the construct has inner cancel directive.
   bool HasCancel = false;
 
@@ -4516,6 +4646,9 @@ class OMPTargetTeamsDistributeParallelForDirective final
             SourceLocation(), SourceLocation(), CollapsedNum, NumClauses),
         HasCancel(false) {}
 
+  /// Sets special task reduction descriptor.
+  void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; }
+
   /// Set cancel state.
   void setHasCancel(bool Has) { HasCancel = Has; }
 
@@ -4529,12 +4662,15 @@ class OMPTargetTeamsDistributeParallelForDirective final
   /// \param Clauses List of clauses.
   /// \param AssociatedStmt Statement, associated with the directive.
   /// \param Exprs Helper expressions for CodeGen.
+  /// \param TaskRedRef Task reduction special reference expression to handle
+  /// taskgroup descriptor.
   /// \param HasCancel true if this directive has inner cancel directive.
   ///
   static OMPTargetTeamsDistributeParallelForDirective *
   Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
          unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses,
-         Stmt *AssociatedStmt, const HelperExprs &Exprs, bool HasCancel);
+         Stmt *AssociatedStmt, const HelperExprs &Exprs, Expr *TaskRedRef,
+         bool HasCancel);
 
   /// Creates an empty directive with the place for \a NumClauses clauses.
   ///
@@ -4546,6 +4682,10 @@ class OMPTargetTeamsDistributeParallelForDirective final
   CreateEmpty(const ASTContext &C, unsigned NumClauses, unsigned CollapsedNum,
               EmptyShell);
 
+  /// Returns special task reduction reference expression.
+  Expr *getTaskReductionRefExpr() { return TaskRedRef; }
+  const Expr *getTaskReductionRefExpr() const { return TaskRedRef; }
+
   /// Return true if current directive has inner cancel directive.
   bool hasCancel() const { return HasCancel; }
 

diff  --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp
index 57cfbc505d95..788fac789270 100644
--- a/clang/lib/AST/StmtOpenMP.cpp
+++ b/clang/lib/AST/StmtOpenMP.cpp
@@ -161,7 +161,8 @@ void OMPLoopDirective::setFinalsConditions(ArrayRef<Expr *> A) {
 
 OMPParallelDirective *OMPParallelDirective::Create(
     const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
-    ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, bool HasCancel) {
+    ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef,
+    bool HasCancel) {
   unsigned Size =
       llvm::alignTo(sizeof(OMPParallelDirective), alignof(OMPClause *));
   void *Mem =
@@ -170,6 +171,7 @@ OMPParallelDirective *OMPParallelDirective::Create(
       new (Mem) OMPParallelDirective(StartLoc, EndLoc, Clauses.size());
   Dir->setClauses(Clauses);
   Dir->setAssociatedStmt(AssociatedStmt);
+  Dir->setTaskReductionRefExpr(TaskRedRef);
   Dir->setHasCancel(HasCancel);
   return Dir;
 }
@@ -227,11 +229,10 @@ OMPSimdDirective *OMPSimdDirective::CreateEmpty(const ASTContext &C,
   return new (Mem) OMPSimdDirective(CollapsedNum, NumClauses);
 }
 
-OMPForDirective *
-OMPForDirective::Create(const ASTContext &C, SourceLocation StartLoc,
-                        SourceLocation EndLoc, unsigned CollapsedNum,
-                        ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
-                        const HelperExprs &Exprs, bool HasCancel) {
+OMPForDirective *OMPForDirective::Create(
+    const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+    unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
+    const HelperExprs &Exprs, Expr *TaskRedRef, bool HasCancel) {
   unsigned Size = llvm::alignTo(sizeof(OMPForDirective), alignof(OMPClause *));
   void *Mem =
       C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() +
@@ -264,6 +265,7 @@ OMPForDirective::Create(const ASTContext &C, SourceLocation StartLoc,
   Dir->setDependentInits(Exprs.DependentInits);
   Dir->setFinalsConditions(Exprs.FinalsConditions);
   Dir->setPreInits(Exprs.PreInits);
+  Dir->setTaskReductionRefExpr(TaskRedRef);
   Dir->setHasCancel(HasCancel);
   return Dir;
 }
@@ -334,7 +336,8 @@ OMPForSimdDirective *OMPForSimdDirective::CreateEmpty(const ASTContext &C,
 
 OMPSectionsDirective *OMPSectionsDirective::Create(
     const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
-    ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, bool HasCancel) {
+    ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef,
+    bool HasCancel) {
   unsigned Size =
       llvm::alignTo(sizeof(OMPSectionsDirective), alignof(OMPClause *));
   void *Mem =
@@ -343,6 +346,7 @@ OMPSectionsDirective *OMPSectionsDirective::Create(
       new (Mem) OMPSectionsDirective(StartLoc, EndLoc, Clauses.size());
   Dir->setClauses(Clauses);
   Dir->setAssociatedStmt(AssociatedStmt);
+  Dir->setTaskReductionRefExpr(TaskRedRef);
   Dir->setHasCancel(HasCancel);
   return Dir;
 }
@@ -449,7 +453,7 @@ OMPCriticalDirective *OMPCriticalDirective::CreateEmpty(const ASTContext &C,
 OMPParallelForDirective *OMPParallelForDirective::Create(
     const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
     unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
-    const HelperExprs &Exprs, bool HasCancel) {
+    const HelperExprs &Exprs, Expr *TaskRedRef, bool HasCancel) {
   unsigned Size =
       llvm::alignTo(sizeof(OMPParallelForDirective), alignof(OMPClause *));
   void *Mem = C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() +
@@ -483,6 +487,7 @@ OMPParallelForDirective *OMPParallelForDirective::Create(
   Dir->setDependentInits(Exprs.DependentInits);
   Dir->setFinalsConditions(Exprs.FinalsConditions);
   Dir->setPreInits(Exprs.PreInits);
+  Dir->setTaskReductionRefExpr(TaskRedRef);
   Dir->setHasCancel(HasCancel);
   return Dir;
 }
@@ -552,7 +557,7 @@ OMPParallelForSimdDirective::CreateEmpty(const ASTContext &C,
 
 OMPParallelMasterDirective *OMPParallelMasterDirective::Create(
     const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
-    ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt) {
+    ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef) {
   unsigned Size =
       llvm::alignTo(sizeof(OMPParallelMasterDirective), alignof(OMPClause *));
   void *Mem =
@@ -561,6 +566,7 @@ OMPParallelMasterDirective *OMPParallelMasterDirective::Create(
       new (Mem) OMPParallelMasterDirective(StartLoc, EndLoc, Clauses.size());
   Dir->setClauses(Clauses);
   Dir->setAssociatedStmt(AssociatedStmt);
+  Dir->setTaskReductionRefExpr(TaskRedRef);
   return Dir;
 }
 
@@ -576,7 +582,8 @@ OMPParallelMasterDirective *OMPParallelMasterDirective::CreateEmpty(const ASTCon
 
 OMPParallelSectionsDirective *OMPParallelSectionsDirective::Create(
     const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
-    ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, bool HasCancel) {
+    ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef,
+    bool HasCancel) {
   unsigned Size =
       llvm::alignTo(sizeof(OMPParallelSectionsDirective), alignof(OMPClause *));
   void *Mem =
@@ -585,6 +592,7 @@ OMPParallelSectionsDirective *OMPParallelSectionsDirective::Create(
       new (Mem) OMPParallelSectionsDirective(StartLoc, EndLoc, Clauses.size());
   Dir->setClauses(Clauses);
   Dir->setAssociatedStmt(AssociatedStmt);
+  Dir->setTaskReductionRefExpr(TaskRedRef);
   Dir->setHasCancel(HasCancel);
   return Dir;
 }
@@ -887,7 +895,8 @@ OMPTargetDirective *OMPTargetDirective::CreateEmpty(const ASTContext &C,
 
 OMPTargetParallelDirective *OMPTargetParallelDirective::Create(
     const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
-    ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, bool HasCancel) {
+    ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef,
+    bool HasCancel) {
   unsigned Size =
       llvm::alignTo(sizeof(OMPTargetParallelDirective), alignof(OMPClause *));
   void *Mem =
@@ -896,6 +905,7 @@ OMPTargetParallelDirective *OMPTargetParallelDirective::Create(
       new (Mem) OMPTargetParallelDirective(StartLoc, EndLoc, Clauses.size());
   Dir->setClauses(Clauses);
   Dir->setAssociatedStmt(AssociatedStmt);
+  Dir->setTaskReductionRefExpr(TaskRedRef);
   Dir->setHasCancel(HasCancel);
   return Dir;
 }
@@ -913,7 +923,7 @@ OMPTargetParallelDirective::CreateEmpty(const ASTContext &C,
 OMPTargetParallelForDirective *OMPTargetParallelForDirective::Create(
     const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
     unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
-    const HelperExprs &Exprs, bool HasCancel) {
+    const HelperExprs &Exprs, Expr *TaskRedRef, bool HasCancel) {
   unsigned Size = llvm::alignTo(sizeof(OMPTargetParallelForDirective),
                                 alignof(OMPClause *));
   void *Mem = C.Allocate(
@@ -947,6 +957,7 @@ OMPTargetParallelForDirective *OMPTargetParallelForDirective::Create(
   Dir->setDependentInits(Exprs.DependentInits);
   Dir->setFinalsConditions(Exprs.FinalsConditions);
   Dir->setPreInits(Exprs.PreInits);
+  Dir->setTaskReductionRefExpr(TaskRedRef);
   Dir->setHasCancel(HasCancel);
   return Dir;
 }
@@ -1457,7 +1468,7 @@ OMPTargetUpdateDirective::CreateEmpty(const ASTContext &C, unsigned NumClauses,
 OMPDistributeParallelForDirective *OMPDistributeParallelForDirective::Create(
     const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
     unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
-    const HelperExprs &Exprs, bool HasCancel) {
+    const HelperExprs &Exprs, Expr *TaskRedRef, bool HasCancel) {
   unsigned Size = llvm::alignTo(sizeof(OMPDistributeParallelForDirective),
                                 alignof(OMPClause *));
   void *Mem = C.Allocate(
@@ -1506,6 +1517,7 @@ OMPDistributeParallelForDirective *OMPDistributeParallelForDirective::Create(
   Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB);
   Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond);
   Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond);
+  Dir->setTaskReductionRefExpr(TaskRedRef);
   Dir->HasCancel = HasCancel;
   return Dir;
 }
@@ -1932,7 +1944,7 @@ OMPTeamsDistributeParallelForDirective *
 OMPTeamsDistributeParallelForDirective::Create(
     const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
     unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
-    const HelperExprs &Exprs, bool HasCancel) {
+    const HelperExprs &Exprs, Expr *TaskRedRef, bool HasCancel) {
   auto Size = llvm::alignTo(sizeof(OMPTeamsDistributeParallelForDirective),
                             alignof(OMPClause *));
   void *Mem = C.Allocate(
@@ -1981,6 +1993,7 @@ OMPTeamsDistributeParallelForDirective::Create(
   Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB);
   Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond);
   Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond);
+  Dir->setTaskReductionRefExpr(TaskRedRef);
   Dir->HasCancel = HasCancel;
   return Dir;
 }
@@ -2084,7 +2097,7 @@ OMPTargetTeamsDistributeParallelForDirective *
 OMPTargetTeamsDistributeParallelForDirective::Create(
     const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
     unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
-    const HelperExprs &Exprs, bool HasCancel) {
+    const HelperExprs &Exprs, Expr *TaskRedRef, bool HasCancel) {
   auto Size =
       llvm::alignTo(sizeof(OMPTargetTeamsDistributeParallelForDirective),
                     alignof(OMPClause *));
@@ -2135,6 +2148,7 @@ OMPTargetTeamsDistributeParallelForDirective::Create(
   Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB);
   Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond);
   Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond);
+  Dir->setTaskReductionRefExpr(TaskRedRef);
   Dir->HasCancel = HasCancel;
   return Dir;
 }

diff  --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index e7525937416e..307750ccfec9 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -710,6 +710,12 @@ enum OpenMPRTLFunction {
   // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
   // *d);
   OMPRTL__kmpc_task_reduction_get_th_data,
+  // Call to void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
+  // is_ws, int num, void *data);
+  OMPRTL__kmpc_taskred_modifier_init,
+  // Call to void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid,
+  // int is_ws);
+  OMPRTL__kmpc_task_reduction_modifier_fini,
   // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al);
   OMPRTL__kmpc_alloc,
   // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
@@ -1020,26 +1026,25 @@ void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
   bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
   if (!PrivateType->isVariablyModifiedType()) {
     Sizes.emplace_back(
-        CGF.getTypeSize(
-            SharedAddresses[N].first.getType().getNonReferenceType()),
+        CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
         nullptr);
     return;
   }
   llvm::Value *Size;
   llvm::Value *SizeInChars;
-  auto *ElemType = cast<llvm::PointerType>(
-                       SharedAddresses[N].first.getPointer(CGF)->getType())
-                       ->getElementType();
+  auto *ElemType =
+      cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
+          ->getElementType();
   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
   if (AsArraySection) {
-    Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(CGF),
-                                     SharedAddresses[N].first.getPointer(CGF));
+    Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
+                                     OrigAddresses[N].first.getPointer(CGF));
     Size = CGF.Builder.CreateNUWAdd(
         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
     SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
   } else {
-    SizeInChars = CGF.getTypeSize(
-        SharedAddresses[N].first.getType().getNonReferenceType());
+    SizeInChars =
+        CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
     Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
   }
   Sizes.emplace_back(SizeInChars, Size);
@@ -2347,6 +2352,28 @@ llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
         FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
     break;
   }
+  case OMPRTL__kmpc_taskred_modifier_init: {
+    // Build void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
+    // is_ws, int num_data, void *data);
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.IntTy, CGM.IntTy,
+                                CGM.IntTy, CGM.VoidPtrTy};
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy,
+                                      /*Name=*/"__kmpc_taskred_modifier_init");
+    break;
+  }
+  case OMPRTL__kmpc_task_reduction_modifier_fini: {
+    // Build void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid,
+    // int is_ws);
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.IntTy, CGM.IntTy};
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(
+        FnTy,
+        /*Name=*/"__kmpc_task_reduction_modifier_fini");
+    break;
+  }
   case OMPRTL__kmpc_alloc: {
     // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t
     // al); omp_allocator_handle_t type is void *.
@@ -6784,7 +6811,7 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
       RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
   // kmp_task_red_input_t .rd_input.[Size];
   Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
-  ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionVars,
+  ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
                        Data.ReductionCopies, Data.ReductionOps);
   for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
     // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
@@ -6848,6 +6875,22 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
       CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
                                  FlagsLVal.getType());
   }
+  if (Data.IsReductionWithTaskMod) {
+    // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
+    // is_ws, int num, void *data);
+    llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
+    llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
+                                                  CGM.IntTy, /*isSigned=*/true);
+    llvm::Value *Args[] = {
+        IdentTLoc, GTid,
+        llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
+                               /*isSigned=*/true),
+        llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
+        CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+            TaskRedInput.getPointer(), CGM.VoidPtrTy)};
+    return CGF.EmitRuntimeCall(
+        createRuntimeFunction(OMPRTL__kmpc_taskred_modifier_init), Args);
+  }
   // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
   llvm::Value *Args[] = {
       CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
@@ -6859,6 +6902,22 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
                              Args);
 }
 
+void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
+                                            SourceLocation Loc,
+                                            bool IsWorksharingReduction) {
+  // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
+  // is_ws, int num, void *data);
+  llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
+  llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
+                                                CGM.IntTy, /*isSigned=*/true);
+  llvm::Value *Args[] = {IdentTLoc, GTid,
+                         llvm::ConstantInt::get(CGM.IntTy,
+                                                IsWorksharingReduction ? 1 : 0,
+                                                /*isSigned=*/true)};
+  (void)CGF.EmitRuntimeCall(
+      createRuntimeFunction(OMPRTL__kmpc_task_reduction_modifier_fini), Args);
+}
+
 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
                                               SourceLocation Loc,
                                               ReductionCodeGen &RCG,
@@ -12364,6 +12423,12 @@ llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
   llvm_unreachable("Not supported in SIMD-only mode");
 }
 
+void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
+                                                SourceLocation Loc,
+                                                bool IsWorksharingReduction) {
+  llvm_unreachable("Not supported in SIMD-only mode");
+}
+
 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
                                                   SourceLocation Loc,
                                                   ReductionCodeGen &RCG,

diff  --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h
index 830b1edc61f4..7a6a06aaf4a6 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -100,6 +100,7 @@ struct OMPTaskDataTy final {
   SmallVector<const Expr *, 4> LastprivateVars;
   SmallVector<const Expr *, 4> LastprivateCopies;
   SmallVector<const Expr *, 4> ReductionVars;
+  SmallVector<const Expr *, 4> ReductionOrigs;
   SmallVector<const Expr *, 4> ReductionCopies;
   SmallVector<const Expr *, 4> ReductionOps;
   struct DependData {
@@ -118,6 +119,8 @@ struct OMPTaskDataTy final {
   unsigned NumberOfParts = 0;
   bool Tied = true;
   bool Nogroup = false;
+  bool IsReductionWithTaskMod = false;
+  bool IsWorksharingReduction = false;
 };
 
 /// Class intended to support codegen of all kind of the reduction clauses.
@@ -1418,18 +1421,34 @@ class CGOpenMPRuntime {
   /// should be emitted for reduction:
   /// \code
   ///
-  /// _task_red_item_t red_data[n];
+  /// _taskred_item_t red_data[n];
   /// ...
-  /// red_data[i].shar = &origs[i];
+  /// red_data[i].shar = &shareds[i];
+  /// red_data[i].orig = &origs[i];
   /// red_data[i].size = sizeof(origs[i]);
   /// red_data[i].f_init = (void*)RedInit<i>;
   /// red_data[i].f_fini = (void*)RedDest<i>;
   /// red_data[i].f_comb = (void*)RedOp<i>;
   /// red_data[i].flags = <Flag_i>;
   /// ...
-  /// void* tg1 = __kmpc_task_reduction_init(gtid, n, red_data);
+  /// void* tg1 = __kmpc_taskred_init(gtid, n, red_data);
   /// \endcode
+  /// For reduction clause with task modifier it emits the next call:
+  /// \code
   ///
+  /// _taskred_item_t red_data[n];
+  /// ...
+  /// red_data[i].shar = &shareds[i];
+  /// red_data[i].orig = &origs[i];
+  /// red_data[i].size = sizeof(origs[i]);
+  /// red_data[i].f_init = (void*)RedInit<i>;
+  /// red_data[i].f_fini = (void*)RedDest<i>;
+  /// red_data[i].f_comb = (void*)RedOp<i>;
+  /// red_data[i].flags = <Flag_i>;
+  /// ...
+  /// void* tg1 = __kmpc_taskred_modifier_init(loc, gtid, is_worksharing, n,
+  /// red_data);
+  /// \endcode
   /// \param LHSExprs List of LHS in \a Data.ReductionOps reduction operations.
   /// \param RHSExprs List of RHS in \a Data.ReductionOps reduction operations.
   /// \param Data Additional data for task generation like tiedness, final
@@ -1440,6 +1459,13 @@ class CGOpenMPRuntime {
                                              ArrayRef<const Expr *> RHSExprs,
                                              const OMPTaskDataTy &Data);
 
+  /// Emits the following code for reduction clause with task modifier:
+  /// \code
+  /// __kmpc_task_reduction_modifier_fini(loc, gtid, is_worksharing);
+  /// \endcode
+  virtual void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc,
+                                     bool IsWorksharingReduction);
+
   /// Required to resolve existing problems in the runtime. Emits threadprivate
   /// variables to store the size of the VLAs/array sections for
   /// initializer/combiner/finalizer functions.
@@ -2192,18 +2218,34 @@ class CGOpenMPSIMDRuntime final : public CGOpenMPRuntime {
   /// should be emitted for reduction:
   /// \code
   ///
-  /// _task_red_item_t red_data[n];
+  /// _taskred_item_t red_data[n];
   /// ...
-  /// red_data[i].shar = &origs[i];
+  /// red_data[i].shar = &shareds[i];
+  /// red_data[i].orig = &origs[i];
   /// red_data[i].size = sizeof(origs[i]);
   /// red_data[i].f_init = (void*)RedInit<i>;
   /// red_data[i].f_fini = (void*)RedDest<i>;
   /// red_data[i].f_comb = (void*)RedOp<i>;
   /// red_data[i].flags = <Flag_i>;
   /// ...
-  /// void* tg1 = __kmpc_task_reduction_init(gtid, n, red_data);
+  /// void* tg1 = __kmpc_taskred_init(gtid, n, red_data);
   /// \endcode
+  /// For reduction clause with task modifier it emits the next call:
+  /// \code
   ///
+  /// _taskred_item_t red_data[n];
+  /// ...
+  /// red_data[i].shar = &shareds[i];
+  /// red_data[i].orig = &origs[i];
+  /// red_data[i].size = sizeof(origs[i]);
+  /// red_data[i].f_init = (void*)RedInit<i>;
+  /// red_data[i].f_fini = (void*)RedDest<i>;
+  /// red_data[i].f_comb = (void*)RedOp<i>;
+  /// red_data[i].flags = <Flag_i>;
+  /// ...
+  /// void* tg1 = __kmpc_taskred_modifier_init(loc, gtid, is_worksharing, n,
+  /// red_data);
+  /// \endcode
   /// \param LHSExprs List of LHS in \a Data.ReductionOps reduction operations.
   /// \param RHSExprs List of RHS in \a Data.ReductionOps reduction operations.
   /// \param Data Additional data for task generation like tiedness, final
@@ -2213,6 +2255,13 @@ class CGOpenMPSIMDRuntime final : public CGOpenMPRuntime {
                                      ArrayRef<const Expr *> RHSExprs,
                                      const OMPTaskDataTy &Data) override;
 
+  /// Emits the following code for reduction clause with task modifier:
+  /// \code
+  /// __kmpc_task_reduction_modifier_fini(loc, gtid, is_worksharing);
+  /// \endcode
+  void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc,
+                             bool IsWorksharingReduction) override;
+
   /// Required to resolve existing problems in the runtime. Emits threadprivate
   /// variables to store the size of the VLAs/array sections for
   /// initializer/combiner/finalizer functions + emits threadprivate variable to

diff  --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 19bbcb83b8db..02075be36dd5 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -1169,21 +1169,23 @@ void CodeGenFunction::EmitOMPReductionClauseInit(
   SmallVector<const Expr *, 4> ReductionOps;
   SmallVector<const Expr *, 4> LHSs;
   SmallVector<const Expr *, 4> RHSs;
+  OMPTaskDataTy Data;
+  SmallVector<const Expr *, 4> TaskLHSs;
+  SmallVector<const Expr *, 4> TaskRHSs;
   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
-    auto IPriv = C->privates().begin();
-    auto IRed = C->reduction_ops().begin();
-    auto ILHS = C->lhs_exprs().begin();
-    auto IRHS = C->rhs_exprs().begin();
-    for (const Expr *Ref : C->varlists()) {
-      Shareds.emplace_back(Ref);
-      Privates.emplace_back(*IPriv);
-      ReductionOps.emplace_back(*IRed);
-      LHSs.emplace_back(*ILHS);
-      RHSs.emplace_back(*IRHS);
-      std::advance(IPriv, 1);
-      std::advance(IRed, 1);
-      std::advance(ILHS, 1);
-      std::advance(IRHS, 1);
+    Shareds.append(C->varlist_begin(), C->varlist_end());
+    Privates.append(C->privates().begin(), C->privates().end());
+    ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
+    LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
+    RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
+    if (C->getModifier() == OMPC_REDUCTION_task) {
+      Data.ReductionVars.append(C->privates().begin(), C->privates().end());
+      Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
+      Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
+      Data.ReductionOps.append(C->reduction_ops().begin(),
+                               C->reduction_ops().end());
+      TaskLHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
+      TaskRHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
     }
   }
   ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
@@ -1261,6 +1263,117 @@ void CodeGenFunction::EmitOMPReductionClauseInit(
     ++IPriv;
     ++Count;
   }
+  if (!Data.ReductionVars.empty()) {
+    Data.IsReductionWithTaskMod = true;
+    Data.IsWorksharingReduction =
+        isOpenMPWorksharingDirective(D.getDirectiveKind());
+    llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit(
+        *this, D.getBeginLoc(), TaskLHSs, TaskRHSs, Data);
+    const Expr *TaskRedRef = nullptr;
+    switch (D.getDirectiveKind()) {
+    case OMPD_parallel:
+      TaskRedRef = cast<OMPParallelDirective>(D).getTaskReductionRefExpr();
+      break;
+    case OMPD_for:
+      TaskRedRef = cast<OMPForDirective>(D).getTaskReductionRefExpr();
+      break;
+    case OMPD_sections:
+      TaskRedRef = cast<OMPSectionsDirective>(D).getTaskReductionRefExpr();
+      break;
+    case OMPD_parallel_for:
+      TaskRedRef = cast<OMPParallelForDirective>(D).getTaskReductionRefExpr();
+      break;
+    case OMPD_parallel_master:
+      TaskRedRef =
+          cast<OMPParallelMasterDirective>(D).getTaskReductionRefExpr();
+      break;
+    case OMPD_parallel_sections:
+      TaskRedRef =
+          cast<OMPParallelSectionsDirective>(D).getTaskReductionRefExpr();
+      break;
+    case OMPD_target_parallel:
+      TaskRedRef =
+          cast<OMPTargetParallelDirective>(D).getTaskReductionRefExpr();
+      break;
+    case OMPD_target_parallel_for:
+      TaskRedRef =
+          cast<OMPTargetParallelForDirective>(D).getTaskReductionRefExpr();
+      break;
+    case OMPD_distribute_parallel_for:
+      TaskRedRef =
+          cast<OMPDistributeParallelForDirective>(D).getTaskReductionRefExpr();
+      break;
+    case OMPD_teams_distribute_parallel_for:
+      TaskRedRef = cast<OMPTeamsDistributeParallelForDirective>(D)
+                       .getTaskReductionRefExpr();
+      break;
+    case OMPD_target_teams_distribute_parallel_for:
+      TaskRedRef = cast<OMPTargetTeamsDistributeParallelForDirective>(D)
+                       .getTaskReductionRefExpr();
+      break;
+    case OMPD_simd:
+    case OMPD_for_simd:
+    case OMPD_section:
+    case OMPD_single:
+    case OMPD_master:
+    case OMPD_critical:
+    case OMPD_parallel_for_simd:
+    case OMPD_task:
+    case OMPD_taskyield:
+    case OMPD_barrier:
+    case OMPD_taskwait:
+    case OMPD_taskgroup:
+    case OMPD_flush:
+    case OMPD_depobj:
+    case OMPD_scan:
+    case OMPD_ordered:
+    case OMPD_atomic:
+    case OMPD_teams:
+    case OMPD_target:
+    case OMPD_cancellation_point:
+    case OMPD_cancel:
+    case OMPD_target_data:
+    case OMPD_target_enter_data:
+    case OMPD_target_exit_data:
+    case OMPD_taskloop:
+    case OMPD_taskloop_simd:
+    case OMPD_master_taskloop:
+    case OMPD_master_taskloop_simd:
+    case OMPD_parallel_master_taskloop:
+    case OMPD_parallel_master_taskloop_simd:
+    case OMPD_distribute:
+    case OMPD_target_update:
+    case OMPD_distribute_parallel_for_simd:
+    case OMPD_distribute_simd:
+    case OMPD_target_parallel_for_simd:
+    case OMPD_target_simd:
+    case OMPD_teams_distribute:
+    case OMPD_teams_distribute_simd:
+    case OMPD_teams_distribute_parallel_for_simd:
+    case OMPD_target_teams:
+    case OMPD_target_teams_distribute:
+    case OMPD_target_teams_distribute_parallel_for_simd:
+    case OMPD_target_teams_distribute_simd:
+    case OMPD_declare_target:
+    case OMPD_end_declare_target:
+    case OMPD_threadprivate:
+    case OMPD_allocate:
+    case OMPD_declare_reduction:
+    case OMPD_declare_mapper:
+    case OMPD_declare_simd:
+    case OMPD_requires:
+    case OMPD_declare_variant:
+    case OMPD_begin_declare_variant:
+    case OMPD_end_declare_variant:
+    case OMPD_unknown:
+      llvm_unreachable("Enexpected directive with task reductions.");
+    }
+
+    const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(TaskRedRef)->getDecl());
+    EmitVarDecl(*VD);
+    EmitStoreOfScalar(ReductionDesc, GetAddrOfLocalVar(VD),
+                      /*Volatile=*/false, TaskRedRef->getType());
+  }
 }
 
 void CodeGenFunction::EmitOMPReductionClauseFinal(
@@ -1272,14 +1385,22 @@ void CodeGenFunction::EmitOMPReductionClauseFinal(
   llvm::SmallVector<const Expr *, 8> RHSExprs;
   llvm::SmallVector<const Expr *, 8> ReductionOps;
   bool HasAtLeastOneReduction = false;
+  bool IsReductionWithTaskMod = false;
   for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
     HasAtLeastOneReduction = true;
     Privates.append(C->privates().begin(), C->privates().end());
     LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
     RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
     ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
+    IsReductionWithTaskMod =
+        IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task;
   }
   if (HasAtLeastOneReduction) {
+    if (IsReductionWithTaskMod) {
+      CGM.getOpenMPRuntime().emitTaskReductionFini(
+          *this, D.getBeginLoc(),
+          isOpenMPWorksharingDirective(D.getDirectiveKind()));
+    }
     bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
                       isOpenMPParallelDirective(D.getDirectiveKind()) ||
                       ReductionKind == OMPD_simd;
@@ -3382,21 +3503,13 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(
   SmallVector<const Expr *, 4> LHSs;
   SmallVector<const Expr *, 4> RHSs;
   for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
-    auto IPriv = C->privates().begin();
-    auto IRed = C->reduction_ops().begin();
-    auto ILHS = C->lhs_exprs().begin();
-    auto IRHS = C->rhs_exprs().begin();
-    for (const Expr *Ref : C->varlists()) {
-      Data.ReductionVars.emplace_back(Ref);
-      Data.ReductionCopies.emplace_back(*IPriv);
-      Data.ReductionOps.emplace_back(*IRed);
-      LHSs.emplace_back(*ILHS);
-      RHSs.emplace_back(*IRHS);
-      std::advance(IPriv, 1);
-      std::advance(IRed, 1);
-      std::advance(ILHS, 1);
-      std::advance(IRHS, 1);
-    }
+    Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
+    Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
+    Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
+    Data.ReductionOps.append(C->reduction_ops().begin(),
+                             C->reduction_ops().end());
+    LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
+    RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
   }
   Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
       *this, S.getBeginLoc(), LHSs, RHSs, Data);
@@ -3776,21 +3889,13 @@ void CodeGenFunction::EmitOMPTaskgroupDirective(
       SmallVector<const Expr *, 4> RHSs;
       OMPTaskDataTy Data;
       for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
-        auto IPriv = C->privates().begin();
-        auto IRed = C->reduction_ops().begin();
-        auto ILHS = C->lhs_exprs().begin();
-        auto IRHS = C->rhs_exprs().begin();
-        for (const Expr *Ref : C->varlists()) {
-          Data.ReductionVars.emplace_back(Ref);
-          Data.ReductionCopies.emplace_back(*IPriv);
-          Data.ReductionOps.emplace_back(*IRed);
-          LHSs.emplace_back(*ILHS);
-          RHSs.emplace_back(*IRHS);
-          std::advance(IPriv, 1);
-          std::advance(IRed, 1);
-          std::advance(ILHS, 1);
-          std::advance(IRHS, 1);
-        }
+        Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
+        Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
+        Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
+        Data.ReductionOps.append(C->reduction_ops().begin(),
+                                 C->reduction_ops().end());
+        LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
+        RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
       }
       llvm::Value *ReductionDesc =
           CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(),

diff  --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 7364af70a856..82027b0e8d8a 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -519,11 +519,15 @@ class DSAStackTy {
   getTopMostTaskgroupReductionData(const ValueDecl *D, SourceRange &SR,
                                    const Expr *&ReductionRef,
                                    Expr *&TaskgroupDescriptor) const;
-  /// Return reduction reference expression for the current taskgroup.
+  /// Return reduction reference expression for the current taskgroup or
+  /// parallel/worksharing directives with task reductions.
   Expr *getTaskgroupReductionRef() const {
-    assert(getTopOfStack().Directive == OMPD_taskgroup &&
-           "taskgroup reference expression requested for non taskgroup "
-           "directive.");
+    assert((getTopOfStack().Directive == OMPD_taskgroup ||
+            ((isOpenMPParallelDirective(getTopOfStack().Directive) ||
+              isOpenMPWorksharingDirective(getTopOfStack().Directive)) &&
+             !isOpenMPSimdDirective(getTopOfStack().Directive))) &&
+           "taskgroup reference expression requested for non taskgroup or "
+           "parallel/worksharing directive.");
     return getTopOfStack().TaskgroupReductionRef;
   }
   /// Checks if the given \p VD declaration is actually a taskgroup reduction
@@ -1351,7 +1355,10 @@ void DSAStackTy::addTaskgroupReductionData(const ValueDecl *D, SourceRange SR,
       "Additional reduction info may be specified only for reduction items.");
   ReductionData &ReductionData = getTopOfStack().ReductionMap[D];
   assert(ReductionData.ReductionRange.isInvalid() &&
-         getTopOfStack().Directive == OMPD_taskgroup &&
+         (getTopOfStack().Directive == OMPD_taskgroup ||
+          ((isOpenMPParallelDirective(getTopOfStack().Directive) ||
+            isOpenMPWorksharingDirective(getTopOfStack().Directive)) &&
+           !isOpenMPSimdDirective(getTopOfStack().Directive))) &&
          "Additional reduction info may be specified only once for reduction "
          "items.");
   ReductionData.set(BOK, SR);
@@ -1374,7 +1381,10 @@ void DSAStackTy::addTaskgroupReductionData(const ValueDecl *D, SourceRange SR,
       "Additional reduction info may be specified only for reduction items.");
   ReductionData &ReductionData = getTopOfStack().ReductionMap[D];
   assert(ReductionData.ReductionRange.isInvalid() &&
-         getTopOfStack().Directive == OMPD_taskgroup &&
+         (getTopOfStack().Directive == OMPD_taskgroup ||
+          ((isOpenMPParallelDirective(getTopOfStack().Directive) ||
+            isOpenMPWorksharingDirective(getTopOfStack().Directive)) &&
+           !isOpenMPSimdDirective(getTopOfStack().Directive))) &&
          "Additional reduction info may be specified only once for reduction "
          "items.");
   ReductionData.set(ReductionRef, SR);
@@ -1395,7 +1405,8 @@ const DSAStackTy::DSAVarData DSAStackTy::getTopMostTaskgroupReductionData(
   assert(!isStackEmpty() && "Data-sharing attributes stack is empty.");
   for (const_iterator I = begin() + 1, E = end(); I != E; ++I) {
     const DSAInfo &Data = I->SharingMap.lookup(D);
-    if (Data.Attributes != OMPC_reduction || I->Directive != OMPD_taskgroup)
+    if (Data.Attributes != OMPC_reduction ||
+        Data.Modifier != OMPC_REDUCTION_task)
       continue;
     const ReductionData &ReductionData = I->ReductionMap.lookup(D);
     if (!ReductionData.ReductionOp ||
@@ -1407,8 +1418,8 @@ const DSAStackTy::DSAVarData DSAStackTy::getTopMostTaskgroupReductionData(
                                        "expression for the descriptor is not "
                                        "set.");
     TaskgroupDescriptor = I->TaskgroupReductionRef;
-    return DSAVarData(OMPD_taskgroup, OMPC_reduction, Data.RefExpr.getPointer(),
-                      Data.PrivateCopy, I->DefaultAttrLoc, /*Modifier=*/0);
+    return DSAVarData(I->Directive, OMPC_reduction, Data.RefExpr.getPointer(),
+                      Data.PrivateCopy, I->DefaultAttrLoc, OMPC_REDUCTION_task);
   }
   return DSAVarData();
 }
@@ -1420,7 +1431,8 @@ const DSAStackTy::DSAVarData DSAStackTy::getTopMostTaskgroupReductionData(
   assert(!isStackEmpty() && "Data-sharing attributes stack is empty.");
   for (const_iterator I = begin() + 1, E = end(); I != E; ++I) {
     const DSAInfo &Data = I->SharingMap.lookup(D);
-    if (Data.Attributes != OMPC_reduction || I->Directive != OMPD_taskgroup)
+    if (Data.Attributes != OMPC_reduction ||
+        Data.Modifier != OMPC_REDUCTION_task)
       continue;
     const ReductionData &ReductionData = I->ReductionMap.lookup(D);
     if (!ReductionData.ReductionOp ||
@@ -1432,8 +1444,8 @@ const DSAStackTy::DSAVarData DSAStackTy::getTopMostTaskgroupReductionData(
                                        "expression for the descriptor is not "
                                        "set.");
     TaskgroupDescriptor = I->TaskgroupReductionRef;
-    return DSAVarData(OMPD_taskgroup, OMPC_reduction, Data.RefExpr.getPointer(),
-                      Data.PrivateCopy, I->DefaultAttrLoc, /*Modifier=*/0);
+    return DSAVarData(I->Directive, OMPC_reduction, Data.RefExpr.getPointer(),
+                      Data.PrivateCopy, I->DefaultAttrLoc, OMPC_REDUCTION_task);
   }
   return DSAVarData();
 }
@@ -2229,7 +2241,12 @@ OpenMPClauseKind Sema::isOpenMPPrivateDecl(ValueDecl *D, unsigned Level,
           // Consider taskgroup reduction descriptor variable a private
           // to avoid possible capture in the region.
           (DSAStack->hasExplicitDirective(
-               [](OpenMPDirectiveKind K) { return K == OMPD_taskgroup; },
+               [](OpenMPDirectiveKind K) {
+                 return K == OMPD_taskgroup ||
+                        ((isOpenMPParallelDirective(K) ||
+                          isOpenMPWorksharingDirective(K)) &&
+                         !isOpenMPSimdDirective(K));
+               },
                Level) &&
            DSAStack->isTaskgroupReductionRef(D, Level)))
              ? OMPC_private
@@ -4193,7 +4210,8 @@ StmtResult Sema::ActOnOpenMPRegionEnd(StmtResult S,
   SmallVector<const OMPClauseWithPreInit *, 4> PICs;
   // This is required for proper codegen.
   for (OMPClause *Clause : Clauses) {
-    if (isOpenMPTaskingDirective(DSAStack->getCurrentDirective()) &&
+    if (!LangOpts.OpenMPSimd &&
+        isOpenMPTaskingDirective(DSAStack->getCurrentDirective()) &&
         Clause->getClauseKind() == OMPC_in_reduction) {
       // Capture taskgroup task_reduction descriptors inside the tasking regions
       // with the corresponding in_reduction items.
@@ -6137,6 +6155,7 @@ StmtResult Sema::ActOnOpenMPParallelDirective(ArrayRef<OMPClause *> Clauses,
   setFunctionHasBranchProtectedScope();
 
   return OMPParallelDirective::Create(Context, StartLoc, EndLoc, Clauses, AStmt,
+                                      DSAStack->getTaskgroupReductionRef(),
                                       DSAStack->isCancelRegion());
 }
 
@@ -8620,8 +8639,9 @@ Sema::ActOnOpenMPForDirective(ArrayRef<OMPClause *> Clauses, Stmt *AStmt,
   }
 
   setFunctionHasBranchProtectedScope();
-  return OMPForDirective::Create(Context, StartLoc, EndLoc, NestedLoopCount,
-                                 Clauses, AStmt, B, DSAStack->isCancelRegion());
+  return OMPForDirective::Create(
+      Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B,
+      DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion());
 }
 
 StmtResult Sema::ActOnOpenMPForSimdDirective(
@@ -8698,6 +8718,7 @@ StmtResult Sema::ActOnOpenMPSectionsDirective(ArrayRef<OMPClause *> Clauses,
   setFunctionHasBranchProtectedScope();
 
   return OMPSectionsDirective::Create(Context, StartLoc, EndLoc, Clauses, AStmt,
+                                      DSAStack->getTaskgroupReductionRef(),
                                       DSAStack->isCancelRegion());
 }
 
@@ -8858,9 +8879,9 @@ StmtResult Sema::ActOnOpenMPParallelForDirective(
   }
 
   setFunctionHasBranchProtectedScope();
-  return OMPParallelForDirective::Create(Context, StartLoc, EndLoc,
-                                         NestedLoopCount, Clauses, AStmt, B,
-                                         DSAStack->isCancelRegion());
+  return OMPParallelForDirective::Create(
+      Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B,
+      DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion());
 }
 
 StmtResult Sema::ActOnOpenMPParallelForSimdDirective(
@@ -8924,8 +8945,9 @@ Sema::ActOnOpenMPParallelMasterDirective(ArrayRef<OMPClause *> Clauses,
 
   setFunctionHasBranchProtectedScope();
 
-  return OMPParallelMasterDirective::Create(Context, StartLoc, EndLoc, Clauses,
-                                            AStmt);
+  return OMPParallelMasterDirective::Create(
+      Context, StartLoc, EndLoc, Clauses, AStmt,
+      DSAStack->getTaskgroupReductionRef());
 }
 
 StmtResult
@@ -8964,7 +8986,8 @@ Sema::ActOnOpenMPParallelSectionsDirective(ArrayRef<OMPClause *> Clauses,
   setFunctionHasBranchProtectedScope();
 
   return OMPParallelSectionsDirective::Create(
-      Context, StartLoc, EndLoc, Clauses, AStmt, DSAStack->isCancelRegion());
+      Context, StartLoc, EndLoc, Clauses, AStmt,
+      DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion());
 }
 
 /// detach and mergeable clauses are mutially exclusive, check for it.
@@ -10016,8 +10039,9 @@ Sema::ActOnOpenMPTargetParallelDirective(ArrayRef<OMPClause *> Clauses,
 
   setFunctionHasBranchProtectedScope();
 
-  return OMPTargetParallelDirective::Create(Context, StartLoc, EndLoc, Clauses,
-                                            AStmt, DSAStack->isCancelRegion());
+  return OMPTargetParallelDirective::Create(
+      Context, StartLoc, EndLoc, Clauses, AStmt,
+      DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion());
 }
 
 StmtResult Sema::ActOnOpenMPTargetParallelForDirective(
@@ -10069,9 +10093,9 @@ StmtResult Sema::ActOnOpenMPTargetParallelForDirective(
   }
 
   setFunctionHasBranchProtectedScope();
-  return OMPTargetParallelForDirective::Create(Context, StartLoc, EndLoc,
-                                               NestedLoopCount, Clauses, AStmt,
-                                               B, DSAStack->isCancelRegion());
+  return OMPTargetParallelForDirective::Create(
+      Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B,
+      DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion());
 }
 
 /// Check for existence of a map clause in the list of clauses.
@@ -10683,7 +10707,7 @@ StmtResult Sema::ActOnOpenMPDistributeParallelForDirective(
   setFunctionHasBranchProtectedScope();
   return OMPDistributeParallelForDirective::Create(
       Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B,
-      DSAStack->isCancelRegion());
+      DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion());
 }
 
 StmtResult Sema::ActOnOpenMPDistributeParallelForSimdDirective(
@@ -11124,7 +11148,7 @@ StmtResult Sema::ActOnOpenMPTeamsDistributeParallelForDirective(
 
   return OMPTeamsDistributeParallelForDirective::Create(
       Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B,
-      DSAStack->isCancelRegion());
+      DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion());
 }
 
 StmtResult Sema::ActOnOpenMPTargetTeamsDirective(ArrayRef<OMPClause *> Clauses,
@@ -11253,7 +11277,7 @@ StmtResult Sema::ActOnOpenMPTargetTeamsDistributeParallelForDirective(
   setFunctionHasBranchProtectedScope();
   return OMPTargetTeamsDistributeParallelForDirective::Create(
       Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B,
-      DSAStack->isCancelRegion());
+      DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion());
 }
 
 StmtResult Sema::ActOnOpenMPTargetTeamsDistributeParallelForSimdDirective(
@@ -15103,9 +15127,17 @@ static bool actOnOMPReductionKindClause(
     }
     // All reduction items are still marked as reduction (to do not increase
     // code base size).
-    Stack->addDSA(D, RefExpr->IgnoreParens(), OMPC_reduction, Ref,
-                  RD.RedModifier);
-    if (CurrDir == OMPD_taskgroup) {
+    unsigned Modifier = RD.RedModifier;
+    // Consider task_reductions as reductions with task modifier. Required for
+    // correct analysis of in_reduction clauses.
+    if (CurrDir == OMPD_taskgroup && ClauseKind == OMPC_task_reduction)
+      Modifier = OMPC_REDUCTION_task;
+    Stack->addDSA(D, RefExpr->IgnoreParens(), OMPC_reduction, Ref, Modifier);
+    if (Modifier == OMPC_REDUCTION_task &&
+        (CurrDir == OMPD_taskgroup ||
+         ((isOpenMPParallelDirective(CurrDir) ||
+           isOpenMPWorksharingDirective(CurrDir)) &&
+          !isOpenMPSimdDirective(CurrDir)))) {
       if (DeclareReductionRef.isUsable())
         Stack->addTaskgroupReductionData(D, ReductionIdRange,
                                          DeclareReductionRef.get());

diff  --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp
index 2c91af31ee14..ea21d5e33c2e 100644
--- a/clang/lib/Serialization/ASTReaderStmt.cpp
+++ b/clang/lib/Serialization/ASTReaderStmt.cpp
@@ -2308,6 +2308,7 @@ void ASTStmtReader::VisitOMPParallelDirective(OMPParallelDirective *D) {
   // The NumClauses field was read in ReadStmtFromStream.
   Record.skipInts(1);
   VisitOMPExecutableDirective(D);
+  D->setTaskReductionRefExpr(Record.readSubExpr());
   D->setHasCancel(Record.readInt());
 }
 
@@ -2317,6 +2318,7 @@ void ASTStmtReader::VisitOMPSimdDirective(OMPSimdDirective *D) {
 
 void ASTStmtReader::VisitOMPForDirective(OMPForDirective *D) {
   VisitOMPLoopDirective(D);
+  D->setTaskReductionRefExpr(Record.readSubExpr());
   D->setHasCancel(Record.readInt());
 }
 
@@ -2329,6 +2331,7 @@ void ASTStmtReader::VisitOMPSectionsDirective(OMPSectionsDirective *D) {
   // The NumClauses field was read in ReadStmtFromStream.
   Record.skipInts(1);
   VisitOMPExecutableDirective(D);
+  D->setTaskReductionRefExpr(Record.readSubExpr());
   D->setHasCancel(Record.readInt());
 }
 
@@ -2360,6 +2363,7 @@ void ASTStmtReader::VisitOMPCriticalDirective(OMPCriticalDirective *D) {
 
 void ASTStmtReader::VisitOMPParallelForDirective(OMPParallelForDirective *D) {
   VisitOMPLoopDirective(D);
+  D->setTaskReductionRefExpr(Record.readSubExpr());
   D->setHasCancel(Record.readInt());
 }
 
@@ -2374,6 +2378,7 @@ void ASTStmtReader::VisitOMPParallelMasterDirective(
   // The NumClauses field was read in ReadStmtFromStream.
   Record.skipInts(1);
   VisitOMPExecutableDirective(D);
+  D->setTaskReductionRefExpr(Record.readSubExpr());
 }
 
 void ASTStmtReader::VisitOMPParallelSectionsDirective(
@@ -2382,6 +2387,7 @@ void ASTStmtReader::VisitOMPParallelSectionsDirective(
   // The NumClauses field was read in ReadStmtFromStream.
   Record.skipInts(1);
   VisitOMPExecutableDirective(D);
+  D->setTaskReductionRefExpr(Record.readSubExpr());
   D->setHasCancel(Record.readInt());
 }
 
@@ -2489,12 +2495,14 @@ void ASTStmtReader::VisitOMPTargetParallelDirective(
   VisitStmt(D);
   Record.skipInts(1);
   VisitOMPExecutableDirective(D);
+  D->setTaskReductionRefExpr(Record.readSubExpr());
   D->setHasCancel(Record.readBool());
 }
 
 void ASTStmtReader::VisitOMPTargetParallelForDirective(
     OMPTargetParallelForDirective *D) {
   VisitOMPLoopDirective(D);
+  D->setTaskReductionRefExpr(Record.readSubExpr());
   D->setHasCancel(Record.readInt());
 }
 
@@ -2564,6 +2572,7 @@ void ASTStmtReader::VisitOMPTargetUpdateDirective(OMPTargetUpdateDirective *D) {
 void ASTStmtReader::VisitOMPDistributeParallelForDirective(
     OMPDistributeParallelForDirective *D) {
   VisitOMPLoopDirective(D);
+  D->setTaskReductionRefExpr(Record.readSubExpr());
   D->setHasCancel(Record.readInt());
 }
 
@@ -2604,6 +2613,7 @@ void ASTStmtReader::VisitOMPTeamsDistributeParallelForSimdDirective(
 void ASTStmtReader::VisitOMPTeamsDistributeParallelForDirective(
     OMPTeamsDistributeParallelForDirective *D) {
   VisitOMPLoopDirective(D);
+  D->setTaskReductionRefExpr(Record.readSubExpr());
   D->setHasCancel(Record.readInt());
 }
 
@@ -2622,6 +2632,7 @@ void ASTStmtReader::VisitOMPTargetTeamsDistributeDirective(
 void ASTStmtReader::VisitOMPTargetTeamsDistributeParallelForDirective(
     OMPTargetTeamsDistributeParallelForDirective *D) {
   VisitOMPLoopDirective(D);
+  D->setTaskReductionRefExpr(Record.readSubExpr());
   D->setHasCancel(Record.readInt());
 }
 

diff  --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp
index fc96cffcb2ec..7c450bccc709 100644
--- a/clang/lib/Serialization/ASTWriterStmt.cpp
+++ b/clang/lib/Serialization/ASTWriterStmt.cpp
@@ -2195,6 +2195,7 @@ void ASTStmtWriter::VisitOMPParallelDirective(OMPParallelDirective *D) {
   VisitStmt(D);
   Record.push_back(D->getNumClauses());
   VisitOMPExecutableDirective(D);
+  Record.AddStmt(D->getTaskReductionRefExpr());
   Record.push_back(D->hasCancel() ? 1 : 0);
   Code = serialization::STMT_OMP_PARALLEL_DIRECTIVE;
 }
@@ -2206,6 +2207,7 @@ void ASTStmtWriter::VisitOMPSimdDirective(OMPSimdDirective *D) {
 
 void ASTStmtWriter::VisitOMPForDirective(OMPForDirective *D) {
   VisitOMPLoopDirective(D);
+  Record.AddStmt(D->getTaskReductionRefExpr());
   Record.push_back(D->hasCancel() ? 1 : 0);
   Code = serialization::STMT_OMP_FOR_DIRECTIVE;
 }
@@ -2219,6 +2221,7 @@ void ASTStmtWriter::VisitOMPSectionsDirective(OMPSectionsDirective *D) {
   VisitStmt(D);
   Record.push_back(D->getNumClauses());
   VisitOMPExecutableDirective(D);
+  Record.AddStmt(D->getTaskReductionRefExpr());
   Record.push_back(D->hasCancel() ? 1 : 0);
   Code = serialization::STMT_OMP_SECTIONS_DIRECTIVE;
 }
@@ -2253,6 +2256,7 @@ void ASTStmtWriter::VisitOMPCriticalDirective(OMPCriticalDirective *D) {
 
 void ASTStmtWriter::VisitOMPParallelForDirective(OMPParallelForDirective *D) {
   VisitOMPLoopDirective(D);
+  Record.AddStmt(D->getTaskReductionRefExpr());
   Record.push_back(D->hasCancel() ? 1 : 0);
   Code = serialization::STMT_OMP_PARALLEL_FOR_DIRECTIVE;
 }
@@ -2268,6 +2272,7 @@ void ASTStmtWriter::VisitOMPParallelMasterDirective(
   VisitStmt(D);
   Record.push_back(D->getNumClauses());
   VisitOMPExecutableDirective(D);
+  Record.AddStmt(D->getTaskReductionRefExpr());
   Code = serialization::STMT_OMP_PARALLEL_MASTER_DIRECTIVE;
 }
 
@@ -2276,6 +2281,7 @@ void ASTStmtWriter::VisitOMPParallelSectionsDirective(
   VisitStmt(D);
   Record.push_back(D->getNumClauses());
   VisitOMPExecutableDirective(D);
+  Record.AddStmt(D->getTaskReductionRefExpr());
   Record.push_back(D->hasCancel() ? 1 : 0);
   Code = serialization::STMT_OMP_PARALLEL_SECTIONS_DIRECTIVE;
 }
@@ -2336,6 +2342,7 @@ void ASTStmtWriter::VisitOMPTargetParallelDirective(
   VisitStmt(D);
   Record.push_back(D->getNumClauses());
   VisitOMPExecutableDirective(D);
+  Record.AddStmt(D->getTaskReductionRefExpr());
   Record.writeBool(D->hasCancel());
   Code = serialization::STMT_OMP_TARGET_PARALLEL_DIRECTIVE;
 }
@@ -2343,6 +2350,7 @@ void ASTStmtWriter::VisitOMPTargetParallelDirective(
 void ASTStmtWriter::VisitOMPTargetParallelForDirective(
     OMPTargetParallelForDirective *D) {
   VisitOMPLoopDirective(D);
+  Record.AddStmt(D->getTaskReductionRefExpr());
   Record.push_back(D->hasCancel() ? 1 : 0);
   Code = serialization::STMT_OMP_TARGET_PARALLEL_FOR_DIRECTIVE;
 }
@@ -2476,6 +2484,7 @@ void ASTStmtWriter::VisitOMPTargetUpdateDirective(OMPTargetUpdateDirective *D) {
 void ASTStmtWriter::VisitOMPDistributeParallelForDirective(
     OMPDistributeParallelForDirective *D) {
   VisitOMPLoopDirective(D);
+  Record.AddStmt(D->getTaskReductionRefExpr());
   Record.push_back(D->hasCancel() ? 1 : 0);
   Code = serialization::STMT_OMP_DISTRIBUTE_PARALLEL_FOR_DIRECTIVE;
 }
@@ -2524,6 +2533,7 @@ void ASTStmtWriter::VisitOMPTeamsDistributeParallelForSimdDirective(
 void ASTStmtWriter::VisitOMPTeamsDistributeParallelForDirective(
     OMPTeamsDistributeParallelForDirective *D) {
   VisitOMPLoopDirective(D);
+  Record.AddStmt(D->getTaskReductionRefExpr());
   Record.push_back(D->hasCancel() ? 1 : 0);
   Code = serialization::STMT_OMP_TEAMS_DISTRIBUTE_PARALLEL_FOR_DIRECTIVE;
 }
@@ -2544,6 +2554,7 @@ void ASTStmtWriter::VisitOMPTargetTeamsDistributeDirective(
 void ASTStmtWriter::VisitOMPTargetTeamsDistributeParallelForDirective(
     OMPTargetTeamsDistributeParallelForDirective *D) {
   VisitOMPLoopDirective(D);
+  Record.AddStmt(D->getTaskReductionRefExpr());
   Record.push_back(D->hasCancel() ? 1 : 0);
   Code = serialization::STMT_OMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_DIRECTIVE;
 }

diff  --git a/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp
new file mode 100644
index 000000000000..971e9be8534b
--- /dev/null
+++ b/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp
@@ -0,0 +1,130 @@
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK: @main
+int main(int argc, char **argv) {
+#pragma omp target teams
+#pragma omp distribute parallel for reduction(task, +: argc, argv[0:10][0:argc])
+  for (long long i = 0; i < 10; ++i) {
+#pragma omp task in_reduction(+: argc, argv[0:10][0:argc])
+    ;
+  }
+}
+
+// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i64 %{{.+}}, i64 %{{.+}}, i32* %{{.+}}, i8*** %{{.+}})
+
+// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i64 %{{.+}}, i64 %{{.+}}, i32* {{.+}}, i8*** {{.+}})
+// CHECK: alloca i32,
+// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32,
+// CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t],
+// CHECK: [[TG:%.+]] = alloca i8*,
+
+// Init firstprivate copy of argc
+// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]],
+// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]],
+// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]],
+
+// Init firstprivate copy of argv[0:10][0:argc]
+// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]]
+// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]]
+// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]]
+// CHECK: [[INIT]]:
+// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ]
+// CHECK: store i8 0, i8* [[EL]],
+// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1
+// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]]
+// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]]
+// CHECK: [[DONE]]:
+
+// Register task reduction.
+// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 0
+// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 0
+// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]],
+// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 1
+// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]],
+// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 2
+// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]],
+// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]],
+// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]],
+// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]],
+// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 6
+// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8*
+// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false)
+// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 1
+// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 0
+// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]],
+// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 1
+// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]],
+// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 2
+// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]],
+// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]],
+// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]],
+// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]],
+// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 6
+// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]],
+// CHECK: [[BC:%.+]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[TR]] to i8*
+// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i32 2, i8* [[BC]])
+// CHECK: store i8* [[TG_VAL]], i8** [[TG]],
+
+// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]*
+// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1
+// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0
+// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]],
+// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]],
+
+// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]])
+
+// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1)
+// CHECK: call i32 @__kmpc_reduce_nowait(
+
+// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: store i32 0, i32* %{{.+}},
+
+// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: store i32 [[ADD]], i32* %{{.+}},
+
+// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: phi i8*
+// CHECK: store i8 0, i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: phi i8*
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8
+// CHECK: store i8 [[CONV]], i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}})
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]],
+// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8*
+// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]],
+// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]],
+// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]],
+// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2
+
+#endif

diff  --git a/clang/test/OpenMP/for_reduction_task_codegen.cpp b/clang/test/OpenMP/for_reduction_task_codegen.cpp
new file mode 100644
index 000000000000..ea8fc55d9cb2
--- /dev/null
+++ b/clang/test/OpenMP/for_reduction_task_codegen.cpp
@@ -0,0 +1,130 @@
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK: @main
+int main(int argc, char **argv) {
+#pragma omp parallel
+#pragma omp for reduction(task, +: argc, argv[0:10][0:argc])
+  for (long long i = 0; i < 10; ++i) {
+#pragma omp task in_reduction(+: argc, argv[0:10][0:argc])
+    ;
+  }
+}
+
+// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}})
+
+// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}})
+// CHECK: alloca i32,
+// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32,
+// CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t],
+// CHECK: [[TG:%.+]] = alloca i8*,
+
+// Init firstprivate copy of argc
+// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]],
+// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]],
+// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]],
+
+// Init firstprivate copy of argv[0:10][0:argc]
+// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]]
+// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]]
+// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]]
+// CHECK: [[INIT]]:
+// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ]
+// CHECK: store i8 0, i8* [[EL]],
+// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1
+// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]]
+// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]]
+// CHECK: [[DONE]]:
+
+// Register task reduction.
+// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 0
+// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 0
+// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]],
+// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 1
+// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]],
+// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 2
+// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]],
+// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]],
+// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]],
+// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]],
+// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 6
+// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8*
+// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false)
+// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 1
+// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 0
+// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]],
+// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 1
+// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]],
+// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 2
+// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]],
+// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]],
+// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]],
+// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]],
+// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 6
+// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]],
+// CHECK: [[BC:%.+]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[TR]] to i8*
+// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i32 2, i8* [[BC]])
+// CHECK: store i8* [[TG_VAL]], i8** [[TG]],
+
+// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]*
+// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1
+// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0
+// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]],
+// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]],
+
+// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]])
+
+// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1)
+// CHECK: call i32 @__kmpc_reduce(
+
+// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: store i32 0, i32* %{{.+}},
+
+// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: store i32 [[ADD]], i32* %{{.+}},
+
+// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: phi i8*
+// CHECK: store i8 0, i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: phi i8*
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8
+// CHECK: store i8 [[CONV]], i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}})
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]],
+// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8*
+// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]],
+// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]],
+// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]],
+// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2
+
+#endif

diff  --git a/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp
new file mode 100644
index 000000000000..b4f4f83ec955
--- /dev/null
+++ b/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp
@@ -0,0 +1,129 @@
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK: @main
+int main(int argc, char **argv) {
+#pragma omp parallel for reduction(task, +: argc, argv[0:10][0:argc])
+  for (long long i = 0; i < 10; ++i) {
+#pragma omp task in_reduction(+: argc, argv[0:10][0:argc])
+    ;
+  }
+}
+
+// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}})
+
+// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}})
+// CHECK: alloca i32,
+// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32,
+// CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t],
+// CHECK: [[TG:%.+]] = alloca i8*,
+
+// Init firstprivate copy of argc
+// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]],
+// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]],
+// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]],
+
+// Init firstprivate copy of argv[0:10][0:argc]
+// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]]
+// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]]
+// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]]
+// CHECK: [[INIT]]:
+// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ]
+// CHECK: store i8 0, i8* [[EL]],
+// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1
+// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]]
+// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]]
+// CHECK: [[DONE]]:
+
+// Register task reduction.
+// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 0
+// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 0
+// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]],
+// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 1
+// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]],
+// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 2
+// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]],
+// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]],
+// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]],
+// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]],
+// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 6
+// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8*
+// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false)
+// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 1
+// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 0
+// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]],
+// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 1
+// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]],
+// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 2
+// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]],
+// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]],
+// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]],
+// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]],
+// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 6
+// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]],
+// CHECK: [[BC:%.+]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[TR]] to i8*
+// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i32 2, i8* [[BC]])
+// CHECK: store i8* [[TG_VAL]], i8** [[TG]],
+
+// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]*
+// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1
+// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0
+// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]],
+// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]],
+
+// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]])
+
+// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1)
+// CHECK: call i32 @__kmpc_reduce_nowait(
+
+// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: store i32 0, i32* %{{.+}},
+
+// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: store i32 [[ADD]], i32* %{{.+}},
+
+// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: phi i8*
+// CHECK: store i8 0, i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: phi i8*
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8
+// CHECK: store i8 [[CONV]], i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}})
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]],
+// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8*
+// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]],
+// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]],
+// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]],
+// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2
+
+#endif

diff  --git a/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp
new file mode 100644
index 000000000000..0f8366fa95e3
--- /dev/null
+++ b/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp
@@ -0,0 +1,128 @@
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK: @main
+int main(int argc, char **argv) {
+#pragma omp parallel master reduction(task, +: argc, argv[0:10][0:argc])
+  {
+#pragma omp task in_reduction(+: argc, argv[0:10][0:argc])
+    ;
+  }
+}
+
+// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}})
+
+// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}})
+// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32,
+// CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t],
+// CHECK: [[TG:%.+]] = alloca i8*,
+
+// Init firstprivate copy of argc
+// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]],
+// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]],
+// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]],
+
+// Init firstprivate copy of argv[0:10][0:argc]
+// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]]
+// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]]
+// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]]
+// CHECK: [[INIT]]:
+// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ]
+// CHECK: store i8 0, i8* [[EL]],
+// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1
+// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]]
+// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]]
+// CHECK: [[DONE]]:
+
+// Register task reduction.
+// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 0
+// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 0
+// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]],
+// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 1
+// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]],
+// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 2
+// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]],
+// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]],
+// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]],
+// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]],
+// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 6
+// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8*
+// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false)
+// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 1
+// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 0
+// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]],
+// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 1
+// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]],
+// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 2
+// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]],
+// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]],
+// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]],
+// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]],
+// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 6
+// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]],
+// CHECK: [[BC:%.+]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[TR]] to i8*
+// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 0, i32 2, i8* [[BC]])
+// CHECK: store i8* [[TG_VAL]], i8** [[TG]],
+
+// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]*
+// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1
+// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0
+// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]],
+// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]],
+
+// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]])
+
+// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 0)
+// CHECK: call i32 @__kmpc_reduce_nowait(
+
+// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: store i32 0, i32* %{{.+}},
+
+// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: store i32 [[ADD]], i32* %{{.+}},
+
+// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: phi i8*
+// CHECK: store i8 0, i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: phi i8*
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8
+// CHECK: store i8 [[CONV]], i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}})
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]],
+// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8*
+// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]],
+// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]],
+// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]],
+// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2
+
+#endif

diff  --git a/clang/test/OpenMP/parallel_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_reduction_task_codegen.cpp
new file mode 100644
index 000000000000..5e04aa8c1ec2
--- /dev/null
+++ b/clang/test/OpenMP/parallel_reduction_task_codegen.cpp
@@ -0,0 +1,128 @@
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK: @main
+int main(int argc, char **argv) {
+#pragma omp parallel reduction(task, +: argc, argv[0:10][0:argc])
+  {
+#pragma omp task in_reduction(+: argc, argv[0:10][0:argc])
+    ;
+  }
+}
+
+// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}})
+
+// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}})
+// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32,
+// CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t],
+// CHECK: [[TG:%.+]] = alloca i8*,
+
+// Init firstprivate copy of argc
+// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]],
+// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]],
+// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]],
+
+// Init firstprivate copy of argv[0:10][0:argc]
+// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]]
+// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]]
+// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]]
+// CHECK: [[INIT]]:
+// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ]
+// CHECK: store i8 0, i8* [[EL]],
+// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1
+// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]]
+// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]]
+// CHECK: [[DONE]]:
+
+// Register task reduction.
+// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 0
+// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 0
+// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]],
+// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 1
+// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]],
+// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 2
+// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]],
+// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]],
+// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]],
+// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]],
+// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 6
+// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8*
+// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false)
+// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 1
+// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 0
+// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]],
+// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 1
+// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]],
+// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 2
+// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]],
+// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]],
+// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]],
+// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]],
+// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 6
+// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]],
+// CHECK: [[BC:%.+]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[TR]] to i8*
+// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 0, i32 2, i8* [[BC]])
+// CHECK: store i8* [[TG_VAL]], i8** [[TG]],
+
+// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]*
+// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1
+// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0
+// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]],
+// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]],
+
+// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]])
+
+// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 0)
+// CHECK: call i32 @__kmpc_reduce_nowait(
+
+// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: store i32 0, i32* %{{.+}},
+
+// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: store i32 [[ADD]], i32* %{{.+}},
+
+// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: phi i8*
+// CHECK: store i8 0, i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: phi i8*
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8
+// CHECK: store i8 [[CONV]], i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}})
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]],
+// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8*
+// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]],
+// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]],
+// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]],
+// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2
+
+#endif

diff  --git a/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp
new file mode 100644
index 000000000000..867eb45a1332
--- /dev/null
+++ b/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp
@@ -0,0 +1,133 @@
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK: @main
+int main(int argc, char **argv) {
+#pragma omp parallel sections reduction(task, +: argc, argv[0:10][0:argc])
+  {
+#pragma omp task in_reduction(+: argc, argv[0:10][0:argc])
+    ;
+  }
+}
+
+// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}})
+
+// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}})
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32,
+// CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t],
+// CHECK: [[TG:%.+]] = alloca i8*,
+
+// Init firstprivate copy of argc
+// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]],
+// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]],
+// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]],
+
+// Init firstprivate copy of argv[0:10][0:argc]
+// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]]
+// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]]
+// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]]
+// CHECK: [[INIT]]:
+// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ]
+// CHECK: store i8 0, i8* [[EL]],
+// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1
+// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]]
+// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]]
+// CHECK: [[DONE]]:
+
+// Register task reduction.
+// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 0
+// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 0
+// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]],
+// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 1
+// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]],
+// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 2
+// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]],
+// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]],
+// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]],
+// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]],
+// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 6
+// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8*
+// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false)
+// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 1
+// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 0
+// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]],
+// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 1
+// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]],
+// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 2
+// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]],
+// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]],
+// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]],
+// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]],
+// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 6
+// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]],
+// CHECK: [[BC:%.+]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[TR]] to i8*
+// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i32 2, i8* [[BC]])
+// CHECK: store i8* [[TG_VAL]], i8** [[TG]],
+
+// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]*
+// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1
+// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0
+// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]],
+// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]],
+
+// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]])
+
+// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1)
+// CHECK: call i32 @__kmpc_reduce_nowait(
+
+// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: store i32 0, i32* %{{.+}},
+
+// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: store i32 [[ADD]], i32* %{{.+}},
+
+// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: phi i8*
+// CHECK: store i8 0, i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: phi i8*
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8
+// CHECK: store i8 [[CONV]], i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}})
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]],
+// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8*
+// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]],
+// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]],
+// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]],
+// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2
+
+#endif

diff  --git a/clang/test/OpenMP/sections_reduction_task_codegen.cpp b/clang/test/OpenMP/sections_reduction_task_codegen.cpp
new file mode 100644
index 000000000000..be67a2a17400
--- /dev/null
+++ b/clang/test/OpenMP/sections_reduction_task_codegen.cpp
@@ -0,0 +1,134 @@
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK: @main
+int main(int argc, char **argv) {
+#pragma omp parallel
+#pragma omp sections reduction(task, +: argc, argv[0:10][0:argc])
+  {
+#pragma omp task in_reduction(+: argc, argv[0:10][0:argc])
+    ;
+  }
+}
+
+// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}})
+
+// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}})
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32,
+// CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t],
+// CHECK: [[TG:%.+]] = alloca i8*,
+
+// Init firstprivate copy of argc
+// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]],
+// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]],
+// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]],
+
+// Init firstprivate copy of argv[0:10][0:argc]
+// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]]
+// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]]
+// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]]
+// CHECK: [[INIT]]:
+// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ]
+// CHECK: store i8 0, i8* [[EL]],
+// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1
+// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]]
+// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]]
+// CHECK: [[DONE]]:
+
+// Register task reduction.
+// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 0
+// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 0
+// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]],
+// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 1
+// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]],
+// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 2
+// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]],
+// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]],
+// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]],
+// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]],
+// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 6
+// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8*
+// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false)
+// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 1
+// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 0
+// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]],
+// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 1
+// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]],
+// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 2
+// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]],
+// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]],
+// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]],
+// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]],
+// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 6
+// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]],
+// CHECK: [[BC:%.+]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[TR]] to i8*
+// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i32 2, i8* [[BC]])
+// CHECK: store i8* [[TG_VAL]], i8** [[TG]],
+
+// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]*
+// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1
+// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0
+// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]],
+// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]],
+
+// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]])
+
+// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1)
+// CHECK: call i32 @__kmpc_reduce(
+
+// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: store i32 0, i32* %{{.+}},
+
+// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: store i32 [[ADD]], i32* %{{.+}},
+
+// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: phi i8*
+// CHECK: store i8 0, i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: phi i8*
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8
+// CHECK: store i8 [[CONV]], i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}})
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]],
+// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8*
+// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]],
+// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]],
+// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]],
+// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2
+
+#endif

diff  --git a/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp
new file mode 100644
index 000000000000..5c5ea6b90d52
--- /dev/null
+++ b/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp
@@ -0,0 +1,129 @@
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK: @main
+int main(int argc, char **argv) {
+#pragma omp target parallel for reduction(task, +: argc, argv[0:10][0:argc])
+  for (long long i = 0; i < 10; ++i) {
+#pragma omp task in_reduction(+: argc, argv[0:10][0:argc])
+    ;
+  }
+}
+
+// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}})
+
+// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}})
+// CHECK: alloca i32,
+// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32,
+// CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t],
+// CHECK: [[TG:%.+]] = alloca i8*,
+
+// Init firstprivate copy of argc
+// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]],
+// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]],
+// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]],
+
+// Init firstprivate copy of argv[0:10][0:argc]
+// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]]
+// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]]
+// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]]
+// CHECK: [[INIT]]:
+// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ]
+// CHECK: store i8 0, i8* [[EL]],
+// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1
+// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]]
+// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]]
+// CHECK: [[DONE]]:
+
+// Register task reduction.
+// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 0
+// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 0
+// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]],
+// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 1
+// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]],
+// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 2
+// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]],
+// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]],
+// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]],
+// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]],
+// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 6
+// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8*
+// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false)
+// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 1
+// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 0
+// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]],
+// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 1
+// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]],
+// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 2
+// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]],
+// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]],
+// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]],
+// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]],
+// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 6
+// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]],
+// CHECK: [[BC:%.+]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[TR]] to i8*
+// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i32 2, i8* [[BC]])
+// CHECK: store i8* [[TG_VAL]], i8** [[TG]],
+
+// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]*
+// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1
+// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0
+// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]],
+// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]],
+
+// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]])
+
+// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1)
+// CHECK: call i32 @__kmpc_reduce_nowait(
+
+// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: store i32 0, i32* %{{.+}},
+
+// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: store i32 [[ADD]], i32* %{{.+}},
+
+// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: phi i8*
+// CHECK: store i8 0, i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: phi i8*
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8
+// CHECK: store i8 [[CONV]], i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}})
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]],
+// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8*
+// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]],
+// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]],
+// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]],
+// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2
+
+#endif

diff  --git a/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp b/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp
new file mode 100644
index 000000000000..2fc49d44c1e9
--- /dev/null
+++ b/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp
@@ -0,0 +1,128 @@
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK: @main
+int main(int argc, char **argv) {
+#pragma omp target parallel reduction(task, +: argc, argv[0:10][0:argc])
+  {
+#pragma omp task in_reduction(+: argc, argv[0:10][0:argc])
+    ;
+  }
+}
+
+// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}})
+
+// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}})
+// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32,
+// CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t],
+// CHECK: [[TG:%.+]] = alloca i8*,
+
+// Init firstprivate copy of argc
+// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]],
+// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]],
+// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]],
+
+// Init firstprivate copy of argv[0:10][0:argc]
+// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]]
+// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]]
+// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]]
+// CHECK: [[INIT]]:
+// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ]
+// CHECK: store i8 0, i8* [[EL]],
+// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1
+// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]]
+// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]]
+// CHECK: [[DONE]]:
+
+// Register task reduction.
+// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 0
+// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 0
+// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]],
+// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 1
+// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]],
+// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 2
+// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]],
+// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]],
+// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]],
+// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]],
+// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 6
+// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8*
+// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false)
+// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 1
+// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 0
+// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]],
+// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 1
+// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]],
+// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 2
+// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]],
+// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]],
+// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]],
+// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]],
+// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 6
+// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]],
+// CHECK: [[BC:%.+]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[TR]] to i8*
+// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 0, i32 2, i8* [[BC]])
+// CHECK: store i8* [[TG_VAL]], i8** [[TG]],
+
+// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]*
+// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1
+// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0
+// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]],
+// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]],
+
+// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]])
+
+// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 0)
+// CHECK: call i32 @__kmpc_reduce_nowait(
+
+// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: store i32 0, i32* %{{.+}},
+
+// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: store i32 [[ADD]], i32* %{{.+}},
+
+// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: phi i8*
+// CHECK: store i8 0, i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: phi i8*
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8
+// CHECK: store i8 [[CONV]], i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}})
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]],
+// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8*
+// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]],
+// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]],
+// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]],
+// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2
+
+#endif

diff  --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp
new file mode 100644
index 000000000000..06c0f8744e8c
--- /dev/null
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp
@@ -0,0 +1,129 @@
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK: @main
+int main(int argc, char **argv) {
+#pragma omp target teams distribute parallel for reduction(task, +: argc, argv[0:10][0:argc])
+  for (long long i = 0; i < 10; ++i) {
+#pragma omp task in_reduction(+: argc, argv[0:10][0:argc])
+    ;
+  }
+}
+
+// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i64 %{{.+}}, i64 %{{.+}}, i32* %{{.+}}, i8*** %{{.+}})
+
+// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i64 %{{.+}}, i64 %{{.+}}, i32* {{.+}}, i8*** {{.+}})
+// CHECK: alloca i32,
+// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32,
+// CHECK: [[TR:%.+]] = alloca [2 x [[TASKRED_TY:%struct.kmp_taskred_input_t.*]]],
+// CHECK: [[TG:%.+]] = alloca i8*,
+
+// Init firstprivate copy of argc
+// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]],
+// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]],
+// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]],
+
+// Init firstprivate copy of argv[0:10][0:argc]
+// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]]
+// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]]
+// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]]
+// CHECK: [[INIT]]:
+// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ]
+// CHECK: store i8 0, i8* [[EL]],
+// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1
+// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]]
+// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]]
+// CHECK: [[DONE]]:
+
+// Register task reduction.
+// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x [[TASKRED_TY]]], [2 x [[TASKRED_TY]]]* [[TR]], i64 0, i64 0
+// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 0
+// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]],
+// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 1
+// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]],
+// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 2
+// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]],
+// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]],
+// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]],
+// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]],
+// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 6
+// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8*
+// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false)
+// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x [[TASKRED_TY]]], [2 x [[TASKRED_TY]]]* [[TR]], i64 0, i64 1
+// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 0
+// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]],
+// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 1
+// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]],
+// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 2
+// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]],
+// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]],
+// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]],
+// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]],
+// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 6
+// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]],
+// CHECK: [[BC:%.+]] = bitcast [2 x [[TASKRED_TY]]]* [[TR]] to i8*
+// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i32 2, i8* [[BC]])
+// CHECK: store i8* [[TG_VAL]], i8** [[TG]],
+
+// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]*
+// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1
+// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0
+// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]],
+// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]],
+
+// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]])
+
+// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1)
+// CHECK: call i32 @__kmpc_reduce_nowait(
+
+// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: store i32 0, i32* %{{.+}},
+
+// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: store i32 [[ADD]], i32* %{{.+}},
+
+// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: phi i8*
+// CHECK: store i8 0, i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: phi i8*
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8
+// CHECK: store i8 [[CONV]], i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}})
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]],
+// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8*
+// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]],
+// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]],
+// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]],
+// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2
+
+#endif

diff  --git a/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp
new file mode 100644
index 000000000000..194999f8cbb0
--- /dev/null
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp
@@ -0,0 +1,130 @@
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK: @main
+int main(int argc, char **argv) {
+#pragma omp target
+#pragma omp teams distribute parallel for reduction(task, +: argc, argv[0:10][0:argc])
+  for (long long i = 0; i < 10; ++i) {
+#pragma omp task in_reduction(+: argc, argv[0:10][0:argc])
+    ;
+  }
+}
+
+// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i64 %{{.+}}, i64 %{{.+}}, i32* %{{.+}}, i8*** %{{.+}})
+
+// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i64 %{{.+}}, i64 %{{.+}}, i32* {{.+}}, i8*** {{.+}})
+// CHECK: alloca i32,
+// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32,
+// CHECK: [[TR:%.+]] = alloca [2 x [[TASKRED_TY:%struct.kmp_taskred_input_t.*]]],
+// CHECK: [[TG:%.+]] = alloca i8*,
+
+// Init firstprivate copy of argc
+// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]],
+// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]],
+// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]],
+
+// Init firstprivate copy of argv[0:10][0:argc]
+// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]]
+// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]]
+// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]]
+// CHECK: [[INIT]]:
+// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ]
+// CHECK: store i8 0, i8* [[EL]],
+// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1
+// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]]
+// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]]
+// CHECK: [[DONE]]:
+
+// Register task reduction.
+// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x [[TASKRED_TY]]], [2 x [[TASKRED_TY]]]* [[TR]], i64 0, i64 0
+// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 0
+// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]],
+// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 1
+// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]],
+// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 2
+// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]],
+// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]],
+// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]],
+// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]],
+// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 6
+// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8*
+// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false)
+// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x [[TASKRED_TY]]], [2 x [[TASKRED_TY]]]* [[TR]], i64 0, i64 1
+// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 0
+// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]],
+// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 1
+// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]],
+// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 2
+// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]],
+// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]],
+// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]],
+// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]],
+// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 6
+// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]],
+// CHECK: [[BC:%.+]] = bitcast [2 x [[TASKRED_TY]]]* [[TR]] to i8*
+// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i32 2, i8* [[BC]])
+// CHECK: store i8* [[TG_VAL]], i8** [[TG]],
+
+// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]*
+// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1
+// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0
+// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]],
+// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]],
+
+// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]])
+
+// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1)
+// CHECK: call i32 @__kmpc_reduce_nowait(
+
+// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: store i32 0, i32* %{{.+}},
+
+// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: store i32 [[ADD]], i32* %{{.+}},
+
+// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: phi i8*
+// CHECK: store i8 0, i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: phi i8*
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8
+// CHECK: store i8 [[CONV]], i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}})
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]],
+// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8*
+// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]],
+// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]],
+// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]],
+// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2
+
+#endif


        


More information about the cfe-commits mailing list