[clang] 8c2f4e0 - [OPENMP50]Codegen for reduction clauses with 'task' modifier.
Alexey Bataev via cfe-commits
cfe-commits at lists.llvm.org
Fri May 1 08:45:59 PDT 2020
Author: Alexey Bataev
Date: 2020-05-01T11:40:27-04:00
New Revision: 8c2f4e0e855cd41c412cb1c824960a8adf938b8f
URL: https://github.com/llvm/llvm-project/commit/8c2f4e0e855cd41c412cb1c824960a8adf938b8f
DIFF: https://github.com/llvm/llvm-project/commit/8c2f4e0e855cd41c412cb1c824960a8adf938b8f.diff
LOG: [OPENMP50]Codegen for reduction clauses with 'task' modifier.
Summary:
Added codegen for reduction clause with task modifier.
```
#pragma omp ... reduction(task, +: a)
{
#pragma omp ... in_reduction(+: a)
}
```
is translated into something like this:
```
#pragma omp ... reduction(+:a)
{
struct red_input_t {
void *reduce_shar;
void *reduce_orig;
size_t reduce_size;
void *reduce_init;
void *reduce_fini;
void *reduce_comb;
unsigned flags;
} r_var;
r_var.reduce_shar = &a;
r_var.reduce_orig = &original a;
r_var.reduce_size = sizeof(a);
r_var.reduce_init = [](void* l,void*){return *(int*)l=0;};
r_var.reduce_fini = nullptr;
r_var.reduce_comb = [](void* l,void* r){return *(int*)l += *(int)r;};
void *tg = __kmpc_taskred_modifier_init(<loc_addr>,<gtid>,
<flag - 0 for parallel, 1 for worksharing>,
<1 - number of reduction elements>,
&r_var);
{
#pragma omp ... in_reduction(+: a) firstprivate(tg)
...
}
__kmpc_task_reduction_modifier_fini(<loc_addr>,<gtid>,
<flag - 0 for parallel, 1 for worksharing>);
}
```
Reviewers: jdoerfert
Subscribers: yaxunl, guansong, jfb, cfe-commits, caomhin
Tags: #clang
Differential Revision: https://reviews.llvm.org/D79034
Added:
clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp
clang/test/OpenMP/for_reduction_task_codegen.cpp
clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp
clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp
clang/test/OpenMP/parallel_reduction_task_codegen.cpp
clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp
clang/test/OpenMP/sections_reduction_task_codegen.cpp
clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp
clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp
clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp
clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp
Modified:
clang/include/clang/AST/StmtOpenMP.h
clang/lib/AST/StmtOpenMP.cpp
clang/lib/CodeGen/CGOpenMPRuntime.cpp
clang/lib/CodeGen/CGOpenMPRuntime.h
clang/lib/CodeGen/CGStmtOpenMP.cpp
clang/lib/Sema/SemaOpenMP.cpp
clang/lib/Serialization/ASTReaderStmt.cpp
clang/lib/Serialization/ASTWriterStmt.cpp
Removed:
################################################################################
diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h
index 7fb63cf9ae3d..bd87eafc9034 100644
--- a/clang/include/clang/AST/StmtOpenMP.h
+++ b/clang/include/clang/AST/StmtOpenMP.h
@@ -356,6 +356,9 @@ class OMPExecutableDirective : public Stmt {
///
class OMPParallelDirective : public OMPExecutableDirective {
friend class ASTStmtReader;
+ /// Special reference expression for handling task reduction. Used to store
+ /// the taskgroup descriptor returned by the runtime functions.
+ Expr *TaskRedRef = nullptr;
/// true if the construct has inner cancel directive.
bool HasCancel;
@@ -381,6 +384,9 @@ class OMPParallelDirective : public OMPExecutableDirective {
SourceLocation(), NumClauses, 1),
HasCancel(false) {}
+ /// Sets special task reduction descriptor.
+ void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; }
+
/// Set cancel state.
void setHasCancel(bool Has) { HasCancel = Has; }
@@ -392,11 +398,14 @@ class OMPParallelDirective : public OMPExecutableDirective {
/// \param EndLoc Ending Location of the directive.
/// \param Clauses List of clauses.
/// \param AssociatedStmt Statement associated with the directive.
+ /// \param TaskRedRef Task reduction special reference expression to handle
+ /// taskgroup descriptor.
/// \param HasCancel true if this directive has inner cancel directive.
///
static OMPParallelDirective *
Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
- ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, bool HasCancel);
+ ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef,
+ bool HasCancel);
/// Creates an empty directive with the place for \a N clauses.
///
@@ -406,6 +415,10 @@ class OMPParallelDirective : public OMPExecutableDirective {
static OMPParallelDirective *CreateEmpty(const ASTContext &C,
unsigned NumClauses, EmptyShell);
+ /// Returns special task reduction reference expression.
+ Expr *getTaskReductionRefExpr() { return TaskRedRef; }
+ const Expr *getTaskReductionRefExpr() const { return TaskRedRef; }
+
/// Return true if current directive has inner cancel directive.
bool hasCancel() const { return HasCancel; }
@@ -1258,7 +1271,9 @@ class OMPSimdDirective : public OMPLoopDirective {
///
class OMPForDirective : public OMPLoopDirective {
friend class ASTStmtReader;
-
+ /// Special reference expression for handling task reduction. Used to store
+ /// the taskgroup descriptor returned by the runtime functions.
+ Expr *TaskRedRef = nullptr;
/// true if current directive has inner cancel directive.
bool HasCancel;
@@ -1286,6 +1301,9 @@ class OMPForDirective : public OMPLoopDirective {
NumClauses),
HasCancel(false) {}
+ /// Sets special task reduction descriptor.
+ void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; }
+
/// Set cancel state.
void setHasCancel(bool Has) { HasCancel = Has; }
@@ -1299,13 +1317,15 @@ class OMPForDirective : public OMPLoopDirective {
/// \param Clauses List of clauses.
/// \param AssociatedStmt Statement, associated with the directive.
/// \param Exprs Helper expressions for CodeGen.
+ /// \param TaskRedRef Task reduction special reference expression to handle
+ /// taskgroup descriptor.
/// \param HasCancel true if current directive has inner cancel directive.
///
static OMPForDirective *Create(const ASTContext &C, SourceLocation StartLoc,
SourceLocation EndLoc, unsigned CollapsedNum,
ArrayRef<OMPClause *> Clauses,
Stmt *AssociatedStmt, const HelperExprs &Exprs,
- bool HasCancel);
+ Expr *TaskRedRef, bool HasCancel);
/// Creates an empty directive with the place
/// for \a NumClauses clauses.
@@ -1317,6 +1337,10 @@ class OMPForDirective : public OMPLoopDirective {
static OMPForDirective *CreateEmpty(const ASTContext &C, unsigned NumClauses,
unsigned CollapsedNum, EmptyShell);
+ /// Returns special task reduction reference expression.
+ Expr *getTaskReductionRefExpr() { return TaskRedRef; }
+ const Expr *getTaskReductionRefExpr() const { return TaskRedRef; }
+
/// Return true if current directive has inner cancel directive.
bool hasCancel() const { return HasCancel; }
@@ -1403,6 +1427,9 @@ class OMPForSimdDirective : public OMPLoopDirective {
class OMPSectionsDirective : public OMPExecutableDirective {
friend class ASTStmtReader;
+ /// Special reference expression for handling task reduction. Used to store
+ /// the taskgroup descriptor returned by the runtime functions.
+ Expr *TaskRedRef = nullptr;
/// true if current directive has inner cancel directive.
bool HasCancel;
@@ -1429,6 +1456,9 @@ class OMPSectionsDirective : public OMPExecutableDirective {
SourceLocation(), NumClauses, 1),
HasCancel(false) {}
+ /// Sets special task reduction descriptor.
+ void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; }
+
/// Set cancel state.
void setHasCancel(bool Has) { HasCancel = Has; }
@@ -1440,11 +1470,14 @@ class OMPSectionsDirective : public OMPExecutableDirective {
/// \param EndLoc Ending Location of the directive.
/// \param Clauses List of clauses.
/// \param AssociatedStmt Statement, associated with the directive.
+ /// \param TaskRedRef Task reduction special reference expression to handle
+ /// taskgroup descriptor.
/// \param HasCancel true if current directive has inner directive.
///
static OMPSectionsDirective *
Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
- ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, bool HasCancel);
+ ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef,
+ bool HasCancel);
/// Creates an empty directive with the place for \a NumClauses
/// clauses.
@@ -1455,6 +1488,10 @@ class OMPSectionsDirective : public OMPExecutableDirective {
static OMPSectionsDirective *CreateEmpty(const ASTContext &C,
unsigned NumClauses, EmptyShell);
+ /// Returns special task reduction reference expression.
+ Expr *getTaskReductionRefExpr() { return TaskRedRef; }
+ const Expr *getTaskReductionRefExpr() const { return TaskRedRef; }
+
/// Return true if current directive has inner cancel directive.
bool hasCancel() const { return HasCancel; }
@@ -1715,6 +1752,9 @@ class OMPCriticalDirective : public OMPExecutableDirective {
class OMPParallelForDirective : public OMPLoopDirective {
friend class ASTStmtReader;
+ /// Special reference expression for handling task reduction. Used to store
+ /// the taskgroup descriptor returned by the runtime functions.
+ Expr *TaskRedRef = nullptr;
/// true if current region has inner cancel directive.
bool HasCancel;
@@ -1743,6 +1783,9 @@ class OMPParallelForDirective : public OMPLoopDirective {
SourceLocation(), CollapsedNum, NumClauses),
HasCancel(false) {}
+ /// Sets special task reduction descriptor.
+ void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; }
+
/// Set cancel state.
void setHasCancel(bool Has) { HasCancel = Has; }
@@ -1756,12 +1799,15 @@ class OMPParallelForDirective : public OMPLoopDirective {
/// \param Clauses List of clauses.
/// \param AssociatedStmt Statement, associated with the directive.
/// \param Exprs Helper expressions for CodeGen.
+ /// \param TaskRedRef Task reduction special reference expression to handle
+ /// taskgroup descriptor.
/// \param HasCancel true if current directive has inner cancel directive.
///
static OMPParallelForDirective *
Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses,
- Stmt *AssociatedStmt, const HelperExprs &Exprs, bool HasCancel);
+ Stmt *AssociatedStmt, const HelperExprs &Exprs, Expr *TaskRedRef,
+ bool HasCancel);
/// Creates an empty directive with the place
/// for \a NumClauses clauses.
@@ -1775,6 +1821,10 @@ class OMPParallelForDirective : public OMPLoopDirective {
unsigned CollapsedNum,
EmptyShell);
+ /// Returns special task reduction reference expression.
+ Expr *getTaskReductionRefExpr() { return TaskRedRef; }
+ const Expr *getTaskReductionRefExpr() const { return TaskRedRef; }
+
/// Return true if current directive has inner cancel directive.
bool hasCancel() const { return HasCancel; }
@@ -1863,6 +1913,10 @@ class OMPParallelForSimdDirective : public OMPLoopDirective {
class OMPParallelMasterDirective : public OMPExecutableDirective {
friend class ASTStmtReader;
+ /// Special reference expression for handling task reduction. Used to store
+ /// the taskgroup descriptor returned by the runtime functions.
+ Expr *TaskRedRef = nullptr;
+
OMPParallelMasterDirective(SourceLocation StartLoc, SourceLocation EndLoc,
unsigned NumClauses)
: OMPExecutableDirective(this, OMPParallelMasterDirectiveClass,
@@ -1875,6 +1929,9 @@ class OMPParallelMasterDirective : public OMPExecutableDirective {
SourceLocation(), SourceLocation(), NumClauses,
1) {}
+ /// Sets special task reduction descriptor.
+ void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; }
+
public:
/// Creates directive with a list of \a Clauses.
///
@@ -1883,10 +1940,12 @@ class OMPParallelMasterDirective : public OMPExecutableDirective {
/// \param EndLoc Ending Location of the directive.
/// \param Clauses List of clauses.
/// \param AssociatedStmt Statement, associated with the directive.
+ /// \param TaskRedRef Task reduction special reference expression to handle
+ /// taskgroup descriptor.
///
static OMPParallelMasterDirective *
Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
- ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt);
+ ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef);
/// Creates an empty directive with the place for \a NumClauses
/// clauses.
@@ -1897,6 +1956,10 @@ class OMPParallelMasterDirective : public OMPExecutableDirective {
static OMPParallelMasterDirective *
CreateEmpty(const ASTContext &C, unsigned NumClauses, EmptyShell);
+ /// Returns special task reduction reference expression.
+ Expr *getTaskReductionRefExpr() { return TaskRedRef; }
+ const Expr *getTaskReductionRefExpr() const { return TaskRedRef; }
+
static bool classof(const Stmt *T) {
return T->getStmtClass() == OMPParallelMasterDirectiveClass;
}
@@ -1914,6 +1977,9 @@ class OMPParallelMasterDirective : public OMPExecutableDirective {
class OMPParallelSectionsDirective : public OMPExecutableDirective {
friend class ASTStmtReader;
+ /// Special reference expression for handling task reduction. Used to store
+ /// the taskgroup descriptor returned by the runtime functions.
+ Expr *TaskRedRef = nullptr;
/// true if current directive has inner cancel directive.
bool HasCancel;
@@ -1941,6 +2007,9 @@ class OMPParallelSectionsDirective : public OMPExecutableDirective {
1),
HasCancel(false) {}
+ /// Sets special task reduction descriptor.
+ void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; }
+
/// Set cancel state.
void setHasCancel(bool Has) { HasCancel = Has; }
@@ -1952,11 +2021,14 @@ class OMPParallelSectionsDirective : public OMPExecutableDirective {
/// \param EndLoc Ending Location of the directive.
/// \param Clauses List of clauses.
/// \param AssociatedStmt Statement, associated with the directive.
+ /// \param TaskRedRef Task reduction special reference expression to handle
+ /// taskgroup descriptor.
/// \param HasCancel true if current directive has inner cancel directive.
///
static OMPParallelSectionsDirective *
Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
- ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, bool HasCancel);
+ ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef,
+ bool HasCancel);
/// Creates an empty directive with the place for \a NumClauses
/// clauses.
@@ -1967,6 +2039,10 @@ class OMPParallelSectionsDirective : public OMPExecutableDirective {
static OMPParallelSectionsDirective *
CreateEmpty(const ASTContext &C, unsigned NumClauses, EmptyShell);
+ /// Returns special task reduction reference expression.
+ Expr *getTaskReductionRefExpr() { return TaskRedRef; }
+ const Expr *getTaskReductionRefExpr() const { return TaskRedRef; }
+
/// Return true if current directive has inner cancel directive.
bool hasCancel() const { return HasCancel; }
@@ -2805,6 +2881,9 @@ class OMPTargetExitDataDirective : public OMPExecutableDirective {
///
class OMPTargetParallelDirective : public OMPExecutableDirective {
friend class ASTStmtReader;
+ /// Special reference expression for handling task reduction. Used to store
+ /// the taskgroup descriptor returned by the runtime functions.
+ Expr *TaskRedRef = nullptr;
/// true if the construct has inner cancel directive.
bool HasCancel = false;
@@ -2830,6 +2909,8 @@ class OMPTargetParallelDirective : public OMPExecutableDirective {
SourceLocation(), SourceLocation(), NumClauses,
/*NumChildren=*/1) {}
+ /// Sets special task reduction descriptor.
+ void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; }
/// Set cancel state.
void setHasCancel(bool Has) { HasCancel = Has; }
@@ -2841,11 +2922,14 @@ class OMPTargetParallelDirective : public OMPExecutableDirective {
/// \param EndLoc Ending Location of the directive.
/// \param Clauses List of clauses.
/// \param AssociatedStmt Statement, associated with the directive.
+ /// \param TaskRedRef Task reduction special reference expression to handle
+ /// taskgroup descriptor.
/// \param HasCancel true if this directive has inner cancel directive.
///
static OMPTargetParallelDirective *
Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
- ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, bool HasCancel);
+ ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef,
+ bool HasCancel);
/// Creates an empty directive with the place for \a NumClauses
/// clauses.
@@ -2856,6 +2940,10 @@ class OMPTargetParallelDirective : public OMPExecutableDirective {
static OMPTargetParallelDirective *
CreateEmpty(const ASTContext &C, unsigned NumClauses, EmptyShell);
+ /// Returns special task reduction reference expression.
+ Expr *getTaskReductionRefExpr() { return TaskRedRef; }
+ const Expr *getTaskReductionRefExpr() const { return TaskRedRef; }
+
/// Return true if current directive has inner cancel directive.
bool hasCancel() const { return HasCancel; }
@@ -2876,6 +2964,9 @@ class OMPTargetParallelDirective : public OMPExecutableDirective {
class OMPTargetParallelForDirective : public OMPLoopDirective {
friend class ASTStmtReader;
+ /// Special reference expression for handling task reduction. Used to store
+ /// the taskgroup descriptor returned by the runtime functions.
+ Expr *TaskRedRef = nullptr;
/// true if current region has inner cancel directive.
bool HasCancel;
@@ -2905,6 +2996,9 @@ class OMPTargetParallelForDirective : public OMPLoopDirective {
SourceLocation(), CollapsedNum, NumClauses),
HasCancel(false) {}
+ /// Sets special task reduction descriptor.
+ void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; }
+
/// Set cancel state.
void setHasCancel(bool Has) { HasCancel = Has; }
@@ -2918,12 +3012,15 @@ class OMPTargetParallelForDirective : public OMPLoopDirective {
/// \param Clauses List of clauses.
/// \param AssociatedStmt Statement, associated with the directive.
/// \param Exprs Helper expressions for CodeGen.
+ /// \param TaskRedRef Task reduction special reference expression to handle
+ /// taskgroup descriptor.
/// \param HasCancel true if current directive has inner cancel directive.
///
static OMPTargetParallelForDirective *
Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses,
- Stmt *AssociatedStmt, const HelperExprs &Exprs, bool HasCancel);
+ Stmt *AssociatedStmt, const HelperExprs &Exprs, Expr *TaskRedRef,
+ bool HasCancel);
/// Creates an empty directive with the place
/// for \a NumClauses clauses.
@@ -2937,6 +3034,10 @@ class OMPTargetParallelForDirective : public OMPLoopDirective {
unsigned CollapsedNum,
EmptyShell);
+ /// Returns special task reduction reference expression.
+ Expr *getTaskReductionRefExpr() { return TaskRedRef; }
+ const Expr *getTaskReductionRefExpr() const { return TaskRedRef; }
+
/// Return true if current directive has inner cancel directive.
bool hasCancel() const { return HasCancel; }
@@ -3709,6 +3810,9 @@ class OMPTargetUpdateDirective : public OMPExecutableDirective {
///
class OMPDistributeParallelForDirective : public OMPLoopDirective {
friend class ASTStmtReader;
+ /// Special reference expression for handling task reduction. Used to store
+ /// the taskgroup descriptor returned by the runtime functions.
+ Expr *TaskRedRef = nullptr;
/// true if the construct has inner cancel directive.
bool HasCancel = false;
@@ -3740,6 +3844,9 @@ class OMPDistributeParallelForDirective : public OMPLoopDirective {
NumClauses),
HasCancel(false) {}
+ /// Sets special task reduction descriptor.
+ void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; }
+
/// Set cancel state.
void setHasCancel(bool Has) { HasCancel = Has; }
@@ -3753,12 +3860,15 @@ class OMPDistributeParallelForDirective : public OMPLoopDirective {
/// \param Clauses List of clauses.
/// \param AssociatedStmt Statement, associated with the directive.
/// \param Exprs Helper expressions for CodeGen.
+ /// \param TaskRedRef Task reduction special reference expression to handle
+ /// taskgroup descriptor.
/// \param HasCancel true if this directive has inner cancel directive.
///
static OMPDistributeParallelForDirective *
Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses,
- Stmt *AssociatedStmt, const HelperExprs &Exprs, bool HasCancel);
+ Stmt *AssociatedStmt, const HelperExprs &Exprs, Expr *TaskRedRef,
+ bool HasCancel);
/// Creates an empty directive with the place
/// for \a NumClauses clauses.
@@ -3772,6 +3882,10 @@ class OMPDistributeParallelForDirective : public OMPLoopDirective {
unsigned CollapsedNum,
EmptyShell);
+ /// Returns special task reduction reference expression.
+ Expr *getTaskReductionRefExpr() { return TaskRedRef; }
+ const Expr *getTaskReductionRefExpr() const { return TaskRedRef; }
+
/// Return true if current directive has inner cancel directive.
bool hasCancel() const { return HasCancel; }
@@ -4274,6 +4388,9 @@ class OMPTeamsDistributeParallelForSimdDirective final
///
class OMPTeamsDistributeParallelForDirective final : public OMPLoopDirective {
friend class ASTStmtReader;
+ /// Special reference expression for handling task reduction. Used to store
+ /// the taskgroup descriptor returned by the runtime functions.
+ Expr *TaskRedRef = nullptr;
/// true if the construct has inner cancel directive.
bool HasCancel = false;
@@ -4306,6 +4423,9 @@ class OMPTeamsDistributeParallelForDirective final : public OMPLoopDirective {
NumClauses),
HasCancel(false) {}
+ /// Sets special task reduction descriptor.
+ void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; }
+
/// Set cancel state.
void setHasCancel(bool Has) { HasCancel = Has; }
@@ -4319,12 +4439,15 @@ class OMPTeamsDistributeParallelForDirective final : public OMPLoopDirective {
/// \param Clauses List of clauses.
/// \param AssociatedStmt Statement, associated with the directive.
/// \param Exprs Helper expressions for CodeGen.
+ /// \param TaskRedRef Task reduction special reference expression to handle
+ /// taskgroup descriptor.
/// \param HasCancel true if this directive has inner cancel directive.
///
static OMPTeamsDistributeParallelForDirective *
Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses,
- Stmt *AssociatedStmt, const HelperExprs &Exprs, bool HasCancel);
+ Stmt *AssociatedStmt, const HelperExprs &Exprs, Expr *TaskRedRef,
+ bool HasCancel);
/// Creates an empty directive with the place for \a NumClauses clauses.
///
@@ -4336,6 +4459,10 @@ class OMPTeamsDistributeParallelForDirective final : public OMPLoopDirective {
CreateEmpty(const ASTContext &C, unsigned NumClauses, unsigned CollapsedNum,
EmptyShell);
+ /// Returns special task reduction reference expression.
+ Expr *getTaskReductionRefExpr() { return TaskRedRef; }
+ const Expr *getTaskReductionRefExpr() const { return TaskRedRef; }
+
/// Return true if current directive has inner cancel directive.
bool hasCancel() const { return HasCancel; }
@@ -4483,6 +4610,9 @@ class OMPTargetTeamsDistributeDirective final : public OMPLoopDirective {
class OMPTargetTeamsDistributeParallelForDirective final
: public OMPLoopDirective {
friend class ASTStmtReader;
+ /// Special reference expression for handling task reduction. Used to store
+ /// the taskgroup descriptor returned by the runtime functions.
+ Expr *TaskRedRef = nullptr;
/// true if the construct has inner cancel directive.
bool HasCancel = false;
@@ -4516,6 +4646,9 @@ class OMPTargetTeamsDistributeParallelForDirective final
SourceLocation(), SourceLocation(), CollapsedNum, NumClauses),
HasCancel(false) {}
+ /// Sets special task reduction descriptor.
+ void setTaskReductionRefExpr(Expr *E) { TaskRedRef = E; }
+
/// Set cancel state.
void setHasCancel(bool Has) { HasCancel = Has; }
@@ -4529,12 +4662,15 @@ class OMPTargetTeamsDistributeParallelForDirective final
/// \param Clauses List of clauses.
/// \param AssociatedStmt Statement, associated with the directive.
/// \param Exprs Helper expressions for CodeGen.
+ /// \param TaskRedRef Task reduction special reference expression to handle
+ /// taskgroup descriptor.
/// \param HasCancel true if this directive has inner cancel directive.
///
static OMPTargetTeamsDistributeParallelForDirective *
Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses,
- Stmt *AssociatedStmt, const HelperExprs &Exprs, bool HasCancel);
+ Stmt *AssociatedStmt, const HelperExprs &Exprs, Expr *TaskRedRef,
+ bool HasCancel);
/// Creates an empty directive with the place for \a NumClauses clauses.
///
@@ -4546,6 +4682,10 @@ class OMPTargetTeamsDistributeParallelForDirective final
CreateEmpty(const ASTContext &C, unsigned NumClauses, unsigned CollapsedNum,
EmptyShell);
+ /// Returns special task reduction reference expression.
+ Expr *getTaskReductionRefExpr() { return TaskRedRef; }
+ const Expr *getTaskReductionRefExpr() const { return TaskRedRef; }
+
/// Return true if current directive has inner cancel directive.
bool hasCancel() const { return HasCancel; }
diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp
index 57cfbc505d95..788fac789270 100644
--- a/clang/lib/AST/StmtOpenMP.cpp
+++ b/clang/lib/AST/StmtOpenMP.cpp
@@ -161,7 +161,8 @@ void OMPLoopDirective::setFinalsConditions(ArrayRef<Expr *> A) {
OMPParallelDirective *OMPParallelDirective::Create(
const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
- ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, bool HasCancel) {
+ ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef,
+ bool HasCancel) {
unsigned Size =
llvm::alignTo(sizeof(OMPParallelDirective), alignof(OMPClause *));
void *Mem =
@@ -170,6 +171,7 @@ OMPParallelDirective *OMPParallelDirective::Create(
new (Mem) OMPParallelDirective(StartLoc, EndLoc, Clauses.size());
Dir->setClauses(Clauses);
Dir->setAssociatedStmt(AssociatedStmt);
+ Dir->setTaskReductionRefExpr(TaskRedRef);
Dir->setHasCancel(HasCancel);
return Dir;
}
@@ -227,11 +229,10 @@ OMPSimdDirective *OMPSimdDirective::CreateEmpty(const ASTContext &C,
return new (Mem) OMPSimdDirective(CollapsedNum, NumClauses);
}
-OMPForDirective *
-OMPForDirective::Create(const ASTContext &C, SourceLocation StartLoc,
- SourceLocation EndLoc, unsigned CollapsedNum,
- ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
- const HelperExprs &Exprs, bool HasCancel) {
+OMPForDirective *OMPForDirective::Create(
+ const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+ unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
+ const HelperExprs &Exprs, Expr *TaskRedRef, bool HasCancel) {
unsigned Size = llvm::alignTo(sizeof(OMPForDirective), alignof(OMPClause *));
void *Mem =
C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() +
@@ -264,6 +265,7 @@ OMPForDirective::Create(const ASTContext &C, SourceLocation StartLoc,
Dir->setDependentInits(Exprs.DependentInits);
Dir->setFinalsConditions(Exprs.FinalsConditions);
Dir->setPreInits(Exprs.PreInits);
+ Dir->setTaskReductionRefExpr(TaskRedRef);
Dir->setHasCancel(HasCancel);
return Dir;
}
@@ -334,7 +336,8 @@ OMPForSimdDirective *OMPForSimdDirective::CreateEmpty(const ASTContext &C,
OMPSectionsDirective *OMPSectionsDirective::Create(
const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
- ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, bool HasCancel) {
+ ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef,
+ bool HasCancel) {
unsigned Size =
llvm::alignTo(sizeof(OMPSectionsDirective), alignof(OMPClause *));
void *Mem =
@@ -343,6 +346,7 @@ OMPSectionsDirective *OMPSectionsDirective::Create(
new (Mem) OMPSectionsDirective(StartLoc, EndLoc, Clauses.size());
Dir->setClauses(Clauses);
Dir->setAssociatedStmt(AssociatedStmt);
+ Dir->setTaskReductionRefExpr(TaskRedRef);
Dir->setHasCancel(HasCancel);
return Dir;
}
@@ -449,7 +453,7 @@ OMPCriticalDirective *OMPCriticalDirective::CreateEmpty(const ASTContext &C,
OMPParallelForDirective *OMPParallelForDirective::Create(
const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
- const HelperExprs &Exprs, bool HasCancel) {
+ const HelperExprs &Exprs, Expr *TaskRedRef, bool HasCancel) {
unsigned Size =
llvm::alignTo(sizeof(OMPParallelForDirective), alignof(OMPClause *));
void *Mem = C.Allocate(Size + sizeof(OMPClause *) * Clauses.size() +
@@ -483,6 +487,7 @@ OMPParallelForDirective *OMPParallelForDirective::Create(
Dir->setDependentInits(Exprs.DependentInits);
Dir->setFinalsConditions(Exprs.FinalsConditions);
Dir->setPreInits(Exprs.PreInits);
+ Dir->setTaskReductionRefExpr(TaskRedRef);
Dir->setHasCancel(HasCancel);
return Dir;
}
@@ -552,7 +557,7 @@ OMPParallelForSimdDirective::CreateEmpty(const ASTContext &C,
OMPParallelMasterDirective *OMPParallelMasterDirective::Create(
const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
- ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt) {
+ ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef) {
unsigned Size =
llvm::alignTo(sizeof(OMPParallelMasterDirective), alignof(OMPClause *));
void *Mem =
@@ -561,6 +566,7 @@ OMPParallelMasterDirective *OMPParallelMasterDirective::Create(
new (Mem) OMPParallelMasterDirective(StartLoc, EndLoc, Clauses.size());
Dir->setClauses(Clauses);
Dir->setAssociatedStmt(AssociatedStmt);
+ Dir->setTaskReductionRefExpr(TaskRedRef);
return Dir;
}
@@ -576,7 +582,8 @@ OMPParallelMasterDirective *OMPParallelMasterDirective::CreateEmpty(const ASTCon
OMPParallelSectionsDirective *OMPParallelSectionsDirective::Create(
const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
- ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, bool HasCancel) {
+ ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef,
+ bool HasCancel) {
unsigned Size =
llvm::alignTo(sizeof(OMPParallelSectionsDirective), alignof(OMPClause *));
void *Mem =
@@ -585,6 +592,7 @@ OMPParallelSectionsDirective *OMPParallelSectionsDirective::Create(
new (Mem) OMPParallelSectionsDirective(StartLoc, EndLoc, Clauses.size());
Dir->setClauses(Clauses);
Dir->setAssociatedStmt(AssociatedStmt);
+ Dir->setTaskReductionRefExpr(TaskRedRef);
Dir->setHasCancel(HasCancel);
return Dir;
}
@@ -887,7 +895,8 @@ OMPTargetDirective *OMPTargetDirective::CreateEmpty(const ASTContext &C,
OMPTargetParallelDirective *OMPTargetParallelDirective::Create(
const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
- ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, bool HasCancel) {
+ ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt, Expr *TaskRedRef,
+ bool HasCancel) {
unsigned Size =
llvm::alignTo(sizeof(OMPTargetParallelDirective), alignof(OMPClause *));
void *Mem =
@@ -896,6 +905,7 @@ OMPTargetParallelDirective *OMPTargetParallelDirective::Create(
new (Mem) OMPTargetParallelDirective(StartLoc, EndLoc, Clauses.size());
Dir->setClauses(Clauses);
Dir->setAssociatedStmt(AssociatedStmt);
+ Dir->setTaskReductionRefExpr(TaskRedRef);
Dir->setHasCancel(HasCancel);
return Dir;
}
@@ -913,7 +923,7 @@ OMPTargetParallelDirective::CreateEmpty(const ASTContext &C,
OMPTargetParallelForDirective *OMPTargetParallelForDirective::Create(
const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
- const HelperExprs &Exprs, bool HasCancel) {
+ const HelperExprs &Exprs, Expr *TaskRedRef, bool HasCancel) {
unsigned Size = llvm::alignTo(sizeof(OMPTargetParallelForDirective),
alignof(OMPClause *));
void *Mem = C.Allocate(
@@ -947,6 +957,7 @@ OMPTargetParallelForDirective *OMPTargetParallelForDirective::Create(
Dir->setDependentInits(Exprs.DependentInits);
Dir->setFinalsConditions(Exprs.FinalsConditions);
Dir->setPreInits(Exprs.PreInits);
+ Dir->setTaskReductionRefExpr(TaskRedRef);
Dir->setHasCancel(HasCancel);
return Dir;
}
@@ -1457,7 +1468,7 @@ OMPTargetUpdateDirective::CreateEmpty(const ASTContext &C, unsigned NumClauses,
OMPDistributeParallelForDirective *OMPDistributeParallelForDirective::Create(
const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
- const HelperExprs &Exprs, bool HasCancel) {
+ const HelperExprs &Exprs, Expr *TaskRedRef, bool HasCancel) {
unsigned Size = llvm::alignTo(sizeof(OMPDistributeParallelForDirective),
alignof(OMPClause *));
void *Mem = C.Allocate(
@@ -1506,6 +1517,7 @@ OMPDistributeParallelForDirective *OMPDistributeParallelForDirective::Create(
Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB);
Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond);
Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond);
+ Dir->setTaskReductionRefExpr(TaskRedRef);
Dir->HasCancel = HasCancel;
return Dir;
}
@@ -1932,7 +1944,7 @@ OMPTeamsDistributeParallelForDirective *
OMPTeamsDistributeParallelForDirective::Create(
const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
- const HelperExprs &Exprs, bool HasCancel) {
+ const HelperExprs &Exprs, Expr *TaskRedRef, bool HasCancel) {
auto Size = llvm::alignTo(sizeof(OMPTeamsDistributeParallelForDirective),
alignof(OMPClause *));
void *Mem = C.Allocate(
@@ -1981,6 +1993,7 @@ OMPTeamsDistributeParallelForDirective::Create(
Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB);
Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond);
Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond);
+ Dir->setTaskReductionRefExpr(TaskRedRef);
Dir->HasCancel = HasCancel;
return Dir;
}
@@ -2084,7 +2097,7 @@ OMPTargetTeamsDistributeParallelForDirective *
OMPTargetTeamsDistributeParallelForDirective::Create(
const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
- const HelperExprs &Exprs, bool HasCancel) {
+ const HelperExprs &Exprs, Expr *TaskRedRef, bool HasCancel) {
auto Size =
llvm::alignTo(sizeof(OMPTargetTeamsDistributeParallelForDirective),
alignof(OMPClause *));
@@ -2135,6 +2148,7 @@ OMPTargetTeamsDistributeParallelForDirective::Create(
Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB);
Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond);
Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond);
+ Dir->setTaskReductionRefExpr(TaskRedRef);
Dir->HasCancel = HasCancel;
return Dir;
}
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index e7525937416e..307750ccfec9 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -710,6 +710,12 @@ enum OpenMPRTLFunction {
// Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
// *d);
OMPRTL__kmpc_task_reduction_get_th_data,
+ // Call to void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
+ // is_ws, int num, void *data);
+ OMPRTL__kmpc_taskred_modifier_init,
+ // Call to void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid,
+ // int is_ws);
+ OMPRTL__kmpc_task_reduction_modifier_fini,
// Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al);
OMPRTL__kmpc_alloc,
// Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
@@ -1020,26 +1026,25 @@ void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
if (!PrivateType->isVariablyModifiedType()) {
Sizes.emplace_back(
- CGF.getTypeSize(
- SharedAddresses[N].first.getType().getNonReferenceType()),
+ CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
nullptr);
return;
}
llvm::Value *Size;
llvm::Value *SizeInChars;
- auto *ElemType = cast<llvm::PointerType>(
- SharedAddresses[N].first.getPointer(CGF)->getType())
- ->getElementType();
+ auto *ElemType =
+ cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
+ ->getElementType();
auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
if (AsArraySection) {
- Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(CGF),
- SharedAddresses[N].first.getPointer(CGF));
+ Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
+ OrigAddresses[N].first.getPointer(CGF));
Size = CGF.Builder.CreateNUWAdd(
Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
} else {
- SizeInChars = CGF.getTypeSize(
- SharedAddresses[N].first.getType().getNonReferenceType());
+ SizeInChars =
+ CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
}
Sizes.emplace_back(SizeInChars, Size);
@@ -2347,6 +2352,28 @@ llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
break;
}
+ case OMPRTL__kmpc_taskred_modifier_init: {
+ // Build void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
+ // is_ws, int num_data, void *data);
+ llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.IntTy, CGM.IntTy,
+ CGM.IntTy, CGM.VoidPtrTy};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy,
+ /*Name=*/"__kmpc_taskred_modifier_init");
+ break;
+ }
+ case OMPRTL__kmpc_task_reduction_modifier_fini: {
+ // Build void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid,
+ // int is_ws);
+ llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.IntTy, CGM.IntTy};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+ RTLFn = CGM.CreateRuntimeFunction(
+ FnTy,
+ /*Name=*/"__kmpc_task_reduction_modifier_fini");
+ break;
+ }
case OMPRTL__kmpc_alloc: {
// Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t
// al); omp_allocator_handle_t type is void *.
@@ -6784,7 +6811,7 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
// kmp_task_red_input_t .rd_input.[Size];
Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
- ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionVars,
+ ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
Data.ReductionCopies, Data.ReductionOps);
for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
// kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
@@ -6848,6 +6875,22 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
FlagsLVal.getType());
}
+ if (Data.IsReductionWithTaskMod) {
+ // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
+ // is_ws, int num, void *data);
+ llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
+ llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
+ CGM.IntTy, /*isSigned=*/true);
+ llvm::Value *Args[] = {
+ IdentTLoc, GTid,
+ llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
+ /*isSigned=*/true),
+ llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
+ CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ TaskRedInput.getPointer(), CGM.VoidPtrTy)};
+ return CGF.EmitRuntimeCall(
+ createRuntimeFunction(OMPRTL__kmpc_taskred_modifier_init), Args);
+ }
// Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
llvm::Value *Args[] = {
CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
@@ -6859,6 +6902,22 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
Args);
}
+void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
+ SourceLocation Loc,
+ bool IsWorksharingReduction) {
+ // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
+ // is_ws, int num, void *data);
+ llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
+ llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
+ CGM.IntTy, /*isSigned=*/true);
+ llvm::Value *Args[] = {IdentTLoc, GTid,
+ llvm::ConstantInt::get(CGM.IntTy,
+ IsWorksharingReduction ? 1 : 0,
+ /*isSigned=*/true)};
+ (void)CGF.EmitRuntimeCall(
+ createRuntimeFunction(OMPRTL__kmpc_task_reduction_modifier_fini), Args);
+}
+
void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
SourceLocation Loc,
ReductionCodeGen &RCG,
@@ -12364,6 +12423,12 @@ llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
llvm_unreachable("Not supported in SIMD-only mode");
}
+void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
+ SourceLocation Loc,
+ bool IsWorksharingReduction) {
+ llvm_unreachable("Not supported in SIMD-only mode");
+}
+
void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
SourceLocation Loc,
ReductionCodeGen &RCG,
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h
index 830b1edc61f4..7a6a06aaf4a6 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -100,6 +100,7 @@ struct OMPTaskDataTy final {
SmallVector<const Expr *, 4> LastprivateVars;
SmallVector<const Expr *, 4> LastprivateCopies;
SmallVector<const Expr *, 4> ReductionVars;
+ SmallVector<const Expr *, 4> ReductionOrigs;
SmallVector<const Expr *, 4> ReductionCopies;
SmallVector<const Expr *, 4> ReductionOps;
struct DependData {
@@ -118,6 +119,8 @@ struct OMPTaskDataTy final {
unsigned NumberOfParts = 0;
bool Tied = true;
bool Nogroup = false;
+ bool IsReductionWithTaskMod = false;
+ bool IsWorksharingReduction = false;
};
/// Class intended to support codegen of all kind of the reduction clauses.
@@ -1418,18 +1421,34 @@ class CGOpenMPRuntime {
/// should be emitted for reduction:
/// \code
///
- /// _task_red_item_t red_data[n];
+ /// _taskred_item_t red_data[n];
/// ...
- /// red_data[i].shar = &origs[i];
+ /// red_data[i].shar = &shareds[i];
+ /// red_data[i].orig = &origs[i];
/// red_data[i].size = sizeof(origs[i]);
/// red_data[i].f_init = (void*)RedInit<i>;
/// red_data[i].f_fini = (void*)RedDest<i>;
/// red_data[i].f_comb = (void*)RedOp<i>;
/// red_data[i].flags = <Flag_i>;
/// ...
- /// void* tg1 = __kmpc_task_reduction_init(gtid, n, red_data);
+ /// void* tg1 = __kmpc_taskred_init(gtid, n, red_data);
/// \endcode
+ /// For reduction clause with task modifier it emits the next call:
+ /// \code
///
+ /// _taskred_item_t red_data[n];
+ /// ...
+ /// red_data[i].shar = &shareds[i];
+ /// red_data[i].orig = &origs[i];
+ /// red_data[i].size = sizeof(origs[i]);
+ /// red_data[i].f_init = (void*)RedInit<i>;
+ /// red_data[i].f_fini = (void*)RedDest<i>;
+ /// red_data[i].f_comb = (void*)RedOp<i>;
+ /// red_data[i].flags = <Flag_i>;
+ /// ...
+ /// void* tg1 = __kmpc_taskred_modifier_init(loc, gtid, is_worksharing, n,
+ /// red_data);
+ /// \endcode
/// \param LHSExprs List of LHS in \a Data.ReductionOps reduction operations.
/// \param RHSExprs List of RHS in \a Data.ReductionOps reduction operations.
/// \param Data Additional data for task generation like tiedness, final
@@ -1440,6 +1459,13 @@ class CGOpenMPRuntime {
ArrayRef<const Expr *> RHSExprs,
const OMPTaskDataTy &Data);
+ /// Emits the following code for reduction clause with task modifier:
+ /// \code
+ /// __kmpc_task_reduction_modifier_fini(loc, gtid, is_worksharing);
+ /// \endcode
+ virtual void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc,
+ bool IsWorksharingReduction);
+
/// Required to resolve existing problems in the runtime. Emits threadprivate
/// variables to store the size of the VLAs/array sections for
/// initializer/combiner/finalizer functions.
@@ -2192,18 +2218,34 @@ class CGOpenMPSIMDRuntime final : public CGOpenMPRuntime {
/// should be emitted for reduction:
/// \code
///
- /// _task_red_item_t red_data[n];
+ /// _taskred_item_t red_data[n];
/// ...
- /// red_data[i].shar = &origs[i];
+ /// red_data[i].shar = &shareds[i];
+ /// red_data[i].orig = &origs[i];
/// red_data[i].size = sizeof(origs[i]);
/// red_data[i].f_init = (void*)RedInit<i>;
/// red_data[i].f_fini = (void*)RedDest<i>;
/// red_data[i].f_comb = (void*)RedOp<i>;
/// red_data[i].flags = <Flag_i>;
/// ...
- /// void* tg1 = __kmpc_task_reduction_init(gtid, n, red_data);
+ /// void* tg1 = __kmpc_taskred_init(gtid, n, red_data);
/// \endcode
+ /// For reduction clause with task modifier it emits the next call:
+ /// \code
///
+ /// _taskred_item_t red_data[n];
+ /// ...
+ /// red_data[i].shar = &shareds[i];
+ /// red_data[i].orig = &origs[i];
+ /// red_data[i].size = sizeof(origs[i]);
+ /// red_data[i].f_init = (void*)RedInit<i>;
+ /// red_data[i].f_fini = (void*)RedDest<i>;
+ /// red_data[i].f_comb = (void*)RedOp<i>;
+ /// red_data[i].flags = <Flag_i>;
+ /// ...
+ /// void* tg1 = __kmpc_taskred_modifier_init(loc, gtid, is_worksharing, n,
+ /// red_data);
+ /// \endcode
/// \param LHSExprs List of LHS in \a Data.ReductionOps reduction operations.
/// \param RHSExprs List of RHS in \a Data.ReductionOps reduction operations.
/// \param Data Additional data for task generation like tiedness, final
@@ -2213,6 +2255,13 @@ class CGOpenMPSIMDRuntime final : public CGOpenMPRuntime {
ArrayRef<const Expr *> RHSExprs,
const OMPTaskDataTy &Data) override;
+ /// Emits the following code for reduction clause with task modifier:
+ /// \code
+ /// __kmpc_task_reduction_modifier_fini(loc, gtid, is_worksharing);
+ /// \endcode
+ void emitTaskReductionFini(CodeGenFunction &CGF, SourceLocation Loc,
+ bool IsWorksharingReduction) override;
+
/// Required to resolve existing problems in the runtime. Emits threadprivate
/// variables to store the size of the VLAs/array sections for
/// initializer/combiner/finalizer functions + emits threadprivate variable to
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 19bbcb83b8db..02075be36dd5 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -1169,21 +1169,23 @@ void CodeGenFunction::EmitOMPReductionClauseInit(
SmallVector<const Expr *, 4> ReductionOps;
SmallVector<const Expr *, 4> LHSs;
SmallVector<const Expr *, 4> RHSs;
+ OMPTaskDataTy Data;
+ SmallVector<const Expr *, 4> TaskLHSs;
+ SmallVector<const Expr *, 4> TaskRHSs;
for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
- auto IPriv = C->privates().begin();
- auto IRed = C->reduction_ops().begin();
- auto ILHS = C->lhs_exprs().begin();
- auto IRHS = C->rhs_exprs().begin();
- for (const Expr *Ref : C->varlists()) {
- Shareds.emplace_back(Ref);
- Privates.emplace_back(*IPriv);
- ReductionOps.emplace_back(*IRed);
- LHSs.emplace_back(*ILHS);
- RHSs.emplace_back(*IRHS);
- std::advance(IPriv, 1);
- std::advance(IRed, 1);
- std::advance(ILHS, 1);
- std::advance(IRHS, 1);
+ Shareds.append(C->varlist_begin(), C->varlist_end());
+ Privates.append(C->privates().begin(), C->privates().end());
+ ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
+ LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
+ RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
+ if (C->getModifier() == OMPC_REDUCTION_task) {
+ Data.ReductionVars.append(C->privates().begin(), C->privates().end());
+ Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
+ Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
+ Data.ReductionOps.append(C->reduction_ops().begin(),
+ C->reduction_ops().end());
+ TaskLHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
+ TaskRHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
}
}
ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
@@ -1261,6 +1263,117 @@ void CodeGenFunction::EmitOMPReductionClauseInit(
++IPriv;
++Count;
}
+ if (!Data.ReductionVars.empty()) {
+ Data.IsReductionWithTaskMod = true;
+ Data.IsWorksharingReduction =
+ isOpenMPWorksharingDirective(D.getDirectiveKind());
+ llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit(
+ *this, D.getBeginLoc(), TaskLHSs, TaskRHSs, Data);
+ const Expr *TaskRedRef = nullptr;
+ switch (D.getDirectiveKind()) {
+ case OMPD_parallel:
+ TaskRedRef = cast<OMPParallelDirective>(D).getTaskReductionRefExpr();
+ break;
+ case OMPD_for:
+ TaskRedRef = cast<OMPForDirective>(D).getTaskReductionRefExpr();
+ break;
+ case OMPD_sections:
+ TaskRedRef = cast<OMPSectionsDirective>(D).getTaskReductionRefExpr();
+ break;
+ case OMPD_parallel_for:
+ TaskRedRef = cast<OMPParallelForDirective>(D).getTaskReductionRefExpr();
+ break;
+ case OMPD_parallel_master:
+ TaskRedRef =
+ cast<OMPParallelMasterDirective>(D).getTaskReductionRefExpr();
+ break;
+ case OMPD_parallel_sections:
+ TaskRedRef =
+ cast<OMPParallelSectionsDirective>(D).getTaskReductionRefExpr();
+ break;
+ case OMPD_target_parallel:
+ TaskRedRef =
+ cast<OMPTargetParallelDirective>(D).getTaskReductionRefExpr();
+ break;
+ case OMPD_target_parallel_for:
+ TaskRedRef =
+ cast<OMPTargetParallelForDirective>(D).getTaskReductionRefExpr();
+ break;
+ case OMPD_distribute_parallel_for:
+ TaskRedRef =
+ cast<OMPDistributeParallelForDirective>(D).getTaskReductionRefExpr();
+ break;
+ case OMPD_teams_distribute_parallel_for:
+ TaskRedRef = cast<OMPTeamsDistributeParallelForDirective>(D)
+ .getTaskReductionRefExpr();
+ break;
+ case OMPD_target_teams_distribute_parallel_for:
+ TaskRedRef = cast<OMPTargetTeamsDistributeParallelForDirective>(D)
+ .getTaskReductionRefExpr();
+ break;
+ case OMPD_simd:
+ case OMPD_for_simd:
+ case OMPD_section:
+ case OMPD_single:
+ case OMPD_master:
+ case OMPD_critical:
+ case OMPD_parallel_for_simd:
+ case OMPD_task:
+ case OMPD_taskyield:
+ case OMPD_barrier:
+ case OMPD_taskwait:
+ case OMPD_taskgroup:
+ case OMPD_flush:
+ case OMPD_depobj:
+ case OMPD_scan:
+ case OMPD_ordered:
+ case OMPD_atomic:
+ case OMPD_teams:
+ case OMPD_target:
+ case OMPD_cancellation_point:
+ case OMPD_cancel:
+ case OMPD_target_data:
+ case OMPD_target_enter_data:
+ case OMPD_target_exit_data:
+ case OMPD_taskloop:
+ case OMPD_taskloop_simd:
+ case OMPD_master_taskloop:
+ case OMPD_master_taskloop_simd:
+ case OMPD_parallel_master_taskloop:
+ case OMPD_parallel_master_taskloop_simd:
+ case OMPD_distribute:
+ case OMPD_target_update:
+ case OMPD_distribute_parallel_for_simd:
+ case OMPD_distribute_simd:
+ case OMPD_target_parallel_for_simd:
+ case OMPD_target_simd:
+ case OMPD_teams_distribute:
+ case OMPD_teams_distribute_simd:
+ case OMPD_teams_distribute_parallel_for_simd:
+ case OMPD_target_teams:
+ case OMPD_target_teams_distribute:
+ case OMPD_target_teams_distribute_parallel_for_simd:
+ case OMPD_target_teams_distribute_simd:
+ case OMPD_declare_target:
+ case OMPD_end_declare_target:
+ case OMPD_threadprivate:
+ case OMPD_allocate:
+ case OMPD_declare_reduction:
+ case OMPD_declare_mapper:
+ case OMPD_declare_simd:
+ case OMPD_requires:
+ case OMPD_declare_variant:
+ case OMPD_begin_declare_variant:
+ case OMPD_end_declare_variant:
+ case OMPD_unknown:
+ llvm_unreachable("Enexpected directive with task reductions.");
+ }
+
+ const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(TaskRedRef)->getDecl());
+ EmitVarDecl(*VD);
+ EmitStoreOfScalar(ReductionDesc, GetAddrOfLocalVar(VD),
+ /*Volatile=*/false, TaskRedRef->getType());
+ }
}
void CodeGenFunction::EmitOMPReductionClauseFinal(
@@ -1272,14 +1385,22 @@ void CodeGenFunction::EmitOMPReductionClauseFinal(
llvm::SmallVector<const Expr *, 8> RHSExprs;
llvm::SmallVector<const Expr *, 8> ReductionOps;
bool HasAtLeastOneReduction = false;
+ bool IsReductionWithTaskMod = false;
for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
HasAtLeastOneReduction = true;
Privates.append(C->privates().begin(), C->privates().end());
LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
+ IsReductionWithTaskMod =
+ IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task;
}
if (HasAtLeastOneReduction) {
+ if (IsReductionWithTaskMod) {
+ CGM.getOpenMPRuntime().emitTaskReductionFini(
+ *this, D.getBeginLoc(),
+ isOpenMPWorksharingDirective(D.getDirectiveKind()));
+ }
bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
isOpenMPParallelDirective(D.getDirectiveKind()) ||
ReductionKind == OMPD_simd;
@@ -3382,21 +3503,13 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(
SmallVector<const Expr *, 4> LHSs;
SmallVector<const Expr *, 4> RHSs;
for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
- auto IPriv = C->privates().begin();
- auto IRed = C->reduction_ops().begin();
- auto ILHS = C->lhs_exprs().begin();
- auto IRHS = C->rhs_exprs().begin();
- for (const Expr *Ref : C->varlists()) {
- Data.ReductionVars.emplace_back(Ref);
- Data.ReductionCopies.emplace_back(*IPriv);
- Data.ReductionOps.emplace_back(*IRed);
- LHSs.emplace_back(*ILHS);
- RHSs.emplace_back(*IRHS);
- std::advance(IPriv, 1);
- std::advance(IRed, 1);
- std::advance(ILHS, 1);
- std::advance(IRHS, 1);
- }
+ Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
+ Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
+ Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
+ Data.ReductionOps.append(C->reduction_ops().begin(),
+ C->reduction_ops().end());
+ LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
+ RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
}
Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
*this, S.getBeginLoc(), LHSs, RHSs, Data);
@@ -3776,21 +3889,13 @@ void CodeGenFunction::EmitOMPTaskgroupDirective(
SmallVector<const Expr *, 4> RHSs;
OMPTaskDataTy Data;
for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
- auto IPriv = C->privates().begin();
- auto IRed = C->reduction_ops().begin();
- auto ILHS = C->lhs_exprs().begin();
- auto IRHS = C->rhs_exprs().begin();
- for (const Expr *Ref : C->varlists()) {
- Data.ReductionVars.emplace_back(Ref);
- Data.ReductionCopies.emplace_back(*IPriv);
- Data.ReductionOps.emplace_back(*IRed);
- LHSs.emplace_back(*ILHS);
- RHSs.emplace_back(*IRHS);
- std::advance(IPriv, 1);
- std::advance(IRed, 1);
- std::advance(ILHS, 1);
- std::advance(IRHS, 1);
- }
+ Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
+ Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
+ Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
+ Data.ReductionOps.append(C->reduction_ops().begin(),
+ C->reduction_ops().end());
+ LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
+ RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
}
llvm::Value *ReductionDesc =
CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(),
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 7364af70a856..82027b0e8d8a 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -519,11 +519,15 @@ class DSAStackTy {
getTopMostTaskgroupReductionData(const ValueDecl *D, SourceRange &SR,
const Expr *&ReductionRef,
Expr *&TaskgroupDescriptor) const;
- /// Return reduction reference expression for the current taskgroup.
+ /// Return reduction reference expression for the current taskgroup or
+ /// parallel/worksharing directives with task reductions.
Expr *getTaskgroupReductionRef() const {
- assert(getTopOfStack().Directive == OMPD_taskgroup &&
- "taskgroup reference expression requested for non taskgroup "
- "directive.");
+ assert((getTopOfStack().Directive == OMPD_taskgroup ||
+ ((isOpenMPParallelDirective(getTopOfStack().Directive) ||
+ isOpenMPWorksharingDirective(getTopOfStack().Directive)) &&
+ !isOpenMPSimdDirective(getTopOfStack().Directive))) &&
+ "taskgroup reference expression requested for non taskgroup or "
+ "parallel/worksharing directive.");
return getTopOfStack().TaskgroupReductionRef;
}
/// Checks if the given \p VD declaration is actually a taskgroup reduction
@@ -1351,7 +1355,10 @@ void DSAStackTy::addTaskgroupReductionData(const ValueDecl *D, SourceRange SR,
"Additional reduction info may be specified only for reduction items.");
ReductionData &ReductionData = getTopOfStack().ReductionMap[D];
assert(ReductionData.ReductionRange.isInvalid() &&
- getTopOfStack().Directive == OMPD_taskgroup &&
+ (getTopOfStack().Directive == OMPD_taskgroup ||
+ ((isOpenMPParallelDirective(getTopOfStack().Directive) ||
+ isOpenMPWorksharingDirective(getTopOfStack().Directive)) &&
+ !isOpenMPSimdDirective(getTopOfStack().Directive))) &&
"Additional reduction info may be specified only once for reduction "
"items.");
ReductionData.set(BOK, SR);
@@ -1374,7 +1381,10 @@ void DSAStackTy::addTaskgroupReductionData(const ValueDecl *D, SourceRange SR,
"Additional reduction info may be specified only for reduction items.");
ReductionData &ReductionData = getTopOfStack().ReductionMap[D];
assert(ReductionData.ReductionRange.isInvalid() &&
- getTopOfStack().Directive == OMPD_taskgroup &&
+ (getTopOfStack().Directive == OMPD_taskgroup ||
+ ((isOpenMPParallelDirective(getTopOfStack().Directive) ||
+ isOpenMPWorksharingDirective(getTopOfStack().Directive)) &&
+ !isOpenMPSimdDirective(getTopOfStack().Directive))) &&
"Additional reduction info may be specified only once for reduction "
"items.");
ReductionData.set(ReductionRef, SR);
@@ -1395,7 +1405,8 @@ const DSAStackTy::DSAVarData DSAStackTy::getTopMostTaskgroupReductionData(
assert(!isStackEmpty() && "Data-sharing attributes stack is empty.");
for (const_iterator I = begin() + 1, E = end(); I != E; ++I) {
const DSAInfo &Data = I->SharingMap.lookup(D);
- if (Data.Attributes != OMPC_reduction || I->Directive != OMPD_taskgroup)
+ if (Data.Attributes != OMPC_reduction ||
+ Data.Modifier != OMPC_REDUCTION_task)
continue;
const ReductionData &ReductionData = I->ReductionMap.lookup(D);
if (!ReductionData.ReductionOp ||
@@ -1407,8 +1418,8 @@ const DSAStackTy::DSAVarData DSAStackTy::getTopMostTaskgroupReductionData(
"expression for the descriptor is not "
"set.");
TaskgroupDescriptor = I->TaskgroupReductionRef;
- return DSAVarData(OMPD_taskgroup, OMPC_reduction, Data.RefExpr.getPointer(),
- Data.PrivateCopy, I->DefaultAttrLoc, /*Modifier=*/0);
+ return DSAVarData(I->Directive, OMPC_reduction, Data.RefExpr.getPointer(),
+ Data.PrivateCopy, I->DefaultAttrLoc, OMPC_REDUCTION_task);
}
return DSAVarData();
}
@@ -1420,7 +1431,8 @@ const DSAStackTy::DSAVarData DSAStackTy::getTopMostTaskgroupReductionData(
assert(!isStackEmpty() && "Data-sharing attributes stack is empty.");
for (const_iterator I = begin() + 1, E = end(); I != E; ++I) {
const DSAInfo &Data = I->SharingMap.lookup(D);
- if (Data.Attributes != OMPC_reduction || I->Directive != OMPD_taskgroup)
+ if (Data.Attributes != OMPC_reduction ||
+ Data.Modifier != OMPC_REDUCTION_task)
continue;
const ReductionData &ReductionData = I->ReductionMap.lookup(D);
if (!ReductionData.ReductionOp ||
@@ -1432,8 +1444,8 @@ const DSAStackTy::DSAVarData DSAStackTy::getTopMostTaskgroupReductionData(
"expression for the descriptor is not "
"set.");
TaskgroupDescriptor = I->TaskgroupReductionRef;
- return DSAVarData(OMPD_taskgroup, OMPC_reduction, Data.RefExpr.getPointer(),
- Data.PrivateCopy, I->DefaultAttrLoc, /*Modifier=*/0);
+ return DSAVarData(I->Directive, OMPC_reduction, Data.RefExpr.getPointer(),
+ Data.PrivateCopy, I->DefaultAttrLoc, OMPC_REDUCTION_task);
}
return DSAVarData();
}
@@ -2229,7 +2241,12 @@ OpenMPClauseKind Sema::isOpenMPPrivateDecl(ValueDecl *D, unsigned Level,
// Consider taskgroup reduction descriptor variable a private
// to avoid possible capture in the region.
(DSAStack->hasExplicitDirective(
- [](OpenMPDirectiveKind K) { return K == OMPD_taskgroup; },
+ [](OpenMPDirectiveKind K) {
+ return K == OMPD_taskgroup ||
+ ((isOpenMPParallelDirective(K) ||
+ isOpenMPWorksharingDirective(K)) &&
+ !isOpenMPSimdDirective(K));
+ },
Level) &&
DSAStack->isTaskgroupReductionRef(D, Level)))
? OMPC_private
@@ -4193,7 +4210,8 @@ StmtResult Sema::ActOnOpenMPRegionEnd(StmtResult S,
SmallVector<const OMPClauseWithPreInit *, 4> PICs;
// This is required for proper codegen.
for (OMPClause *Clause : Clauses) {
- if (isOpenMPTaskingDirective(DSAStack->getCurrentDirective()) &&
+ if (!LangOpts.OpenMPSimd &&
+ isOpenMPTaskingDirective(DSAStack->getCurrentDirective()) &&
Clause->getClauseKind() == OMPC_in_reduction) {
// Capture taskgroup task_reduction descriptors inside the tasking regions
// with the corresponding in_reduction items.
@@ -6137,6 +6155,7 @@ StmtResult Sema::ActOnOpenMPParallelDirective(ArrayRef<OMPClause *> Clauses,
setFunctionHasBranchProtectedScope();
return OMPParallelDirective::Create(Context, StartLoc, EndLoc, Clauses, AStmt,
+ DSAStack->getTaskgroupReductionRef(),
DSAStack->isCancelRegion());
}
@@ -8620,8 +8639,9 @@ Sema::ActOnOpenMPForDirective(ArrayRef<OMPClause *> Clauses, Stmt *AStmt,
}
setFunctionHasBranchProtectedScope();
- return OMPForDirective::Create(Context, StartLoc, EndLoc, NestedLoopCount,
- Clauses, AStmt, B, DSAStack->isCancelRegion());
+ return OMPForDirective::Create(
+ Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B,
+ DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion());
}
StmtResult Sema::ActOnOpenMPForSimdDirective(
@@ -8698,6 +8718,7 @@ StmtResult Sema::ActOnOpenMPSectionsDirective(ArrayRef<OMPClause *> Clauses,
setFunctionHasBranchProtectedScope();
return OMPSectionsDirective::Create(Context, StartLoc, EndLoc, Clauses, AStmt,
+ DSAStack->getTaskgroupReductionRef(),
DSAStack->isCancelRegion());
}
@@ -8858,9 +8879,9 @@ StmtResult Sema::ActOnOpenMPParallelForDirective(
}
setFunctionHasBranchProtectedScope();
- return OMPParallelForDirective::Create(Context, StartLoc, EndLoc,
- NestedLoopCount, Clauses, AStmt, B,
- DSAStack->isCancelRegion());
+ return OMPParallelForDirective::Create(
+ Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B,
+ DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion());
}
StmtResult Sema::ActOnOpenMPParallelForSimdDirective(
@@ -8924,8 +8945,9 @@ Sema::ActOnOpenMPParallelMasterDirective(ArrayRef<OMPClause *> Clauses,
setFunctionHasBranchProtectedScope();
- return OMPParallelMasterDirective::Create(Context, StartLoc, EndLoc, Clauses,
- AStmt);
+ return OMPParallelMasterDirective::Create(
+ Context, StartLoc, EndLoc, Clauses, AStmt,
+ DSAStack->getTaskgroupReductionRef());
}
StmtResult
@@ -8964,7 +8986,8 @@ Sema::ActOnOpenMPParallelSectionsDirective(ArrayRef<OMPClause *> Clauses,
setFunctionHasBranchProtectedScope();
return OMPParallelSectionsDirective::Create(
- Context, StartLoc, EndLoc, Clauses, AStmt, DSAStack->isCancelRegion());
+ Context, StartLoc, EndLoc, Clauses, AStmt,
+ DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion());
}
/// detach and mergeable clauses are mutially exclusive, check for it.
@@ -10016,8 +10039,9 @@ Sema::ActOnOpenMPTargetParallelDirective(ArrayRef<OMPClause *> Clauses,
setFunctionHasBranchProtectedScope();
- return OMPTargetParallelDirective::Create(Context, StartLoc, EndLoc, Clauses,
- AStmt, DSAStack->isCancelRegion());
+ return OMPTargetParallelDirective::Create(
+ Context, StartLoc, EndLoc, Clauses, AStmt,
+ DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion());
}
StmtResult Sema::ActOnOpenMPTargetParallelForDirective(
@@ -10069,9 +10093,9 @@ StmtResult Sema::ActOnOpenMPTargetParallelForDirective(
}
setFunctionHasBranchProtectedScope();
- return OMPTargetParallelForDirective::Create(Context, StartLoc, EndLoc,
- NestedLoopCount, Clauses, AStmt,
- B, DSAStack->isCancelRegion());
+ return OMPTargetParallelForDirective::Create(
+ Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B,
+ DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion());
}
/// Check for existence of a map clause in the list of clauses.
@@ -10683,7 +10707,7 @@ StmtResult Sema::ActOnOpenMPDistributeParallelForDirective(
setFunctionHasBranchProtectedScope();
return OMPDistributeParallelForDirective::Create(
Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B,
- DSAStack->isCancelRegion());
+ DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion());
}
StmtResult Sema::ActOnOpenMPDistributeParallelForSimdDirective(
@@ -11124,7 +11148,7 @@ StmtResult Sema::ActOnOpenMPTeamsDistributeParallelForDirective(
return OMPTeamsDistributeParallelForDirective::Create(
Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B,
- DSAStack->isCancelRegion());
+ DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion());
}
StmtResult Sema::ActOnOpenMPTargetTeamsDirective(ArrayRef<OMPClause *> Clauses,
@@ -11253,7 +11277,7 @@ StmtResult Sema::ActOnOpenMPTargetTeamsDistributeParallelForDirective(
setFunctionHasBranchProtectedScope();
return OMPTargetTeamsDistributeParallelForDirective::Create(
Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B,
- DSAStack->isCancelRegion());
+ DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion());
}
StmtResult Sema::ActOnOpenMPTargetTeamsDistributeParallelForSimdDirective(
@@ -15103,9 +15127,17 @@ static bool actOnOMPReductionKindClause(
}
// All reduction items are still marked as reduction (to do not increase
// code base size).
- Stack->addDSA(D, RefExpr->IgnoreParens(), OMPC_reduction, Ref,
- RD.RedModifier);
- if (CurrDir == OMPD_taskgroup) {
+ unsigned Modifier = RD.RedModifier;
+ // Consider task_reductions as reductions with task modifier. Required for
+ // correct analysis of in_reduction clauses.
+ if (CurrDir == OMPD_taskgroup && ClauseKind == OMPC_task_reduction)
+ Modifier = OMPC_REDUCTION_task;
+ Stack->addDSA(D, RefExpr->IgnoreParens(), OMPC_reduction, Ref, Modifier);
+ if (Modifier == OMPC_REDUCTION_task &&
+ (CurrDir == OMPD_taskgroup ||
+ ((isOpenMPParallelDirective(CurrDir) ||
+ isOpenMPWorksharingDirective(CurrDir)) &&
+ !isOpenMPSimdDirective(CurrDir)))) {
if (DeclareReductionRef.isUsable())
Stack->addTaskgroupReductionData(D, ReductionIdRange,
DeclareReductionRef.get());
diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp
index 2c91af31ee14..ea21d5e33c2e 100644
--- a/clang/lib/Serialization/ASTReaderStmt.cpp
+++ b/clang/lib/Serialization/ASTReaderStmt.cpp
@@ -2308,6 +2308,7 @@ void ASTStmtReader::VisitOMPParallelDirective(OMPParallelDirective *D) {
// The NumClauses field was read in ReadStmtFromStream.
Record.skipInts(1);
VisitOMPExecutableDirective(D);
+ D->setTaskReductionRefExpr(Record.readSubExpr());
D->setHasCancel(Record.readInt());
}
@@ -2317,6 +2318,7 @@ void ASTStmtReader::VisitOMPSimdDirective(OMPSimdDirective *D) {
void ASTStmtReader::VisitOMPForDirective(OMPForDirective *D) {
VisitOMPLoopDirective(D);
+ D->setTaskReductionRefExpr(Record.readSubExpr());
D->setHasCancel(Record.readInt());
}
@@ -2329,6 +2331,7 @@ void ASTStmtReader::VisitOMPSectionsDirective(OMPSectionsDirective *D) {
// The NumClauses field was read in ReadStmtFromStream.
Record.skipInts(1);
VisitOMPExecutableDirective(D);
+ D->setTaskReductionRefExpr(Record.readSubExpr());
D->setHasCancel(Record.readInt());
}
@@ -2360,6 +2363,7 @@ void ASTStmtReader::VisitOMPCriticalDirective(OMPCriticalDirective *D) {
void ASTStmtReader::VisitOMPParallelForDirective(OMPParallelForDirective *D) {
VisitOMPLoopDirective(D);
+ D->setTaskReductionRefExpr(Record.readSubExpr());
D->setHasCancel(Record.readInt());
}
@@ -2374,6 +2378,7 @@ void ASTStmtReader::VisitOMPParallelMasterDirective(
// The NumClauses field was read in ReadStmtFromStream.
Record.skipInts(1);
VisitOMPExecutableDirective(D);
+ D->setTaskReductionRefExpr(Record.readSubExpr());
}
void ASTStmtReader::VisitOMPParallelSectionsDirective(
@@ -2382,6 +2387,7 @@ void ASTStmtReader::VisitOMPParallelSectionsDirective(
// The NumClauses field was read in ReadStmtFromStream.
Record.skipInts(1);
VisitOMPExecutableDirective(D);
+ D->setTaskReductionRefExpr(Record.readSubExpr());
D->setHasCancel(Record.readInt());
}
@@ -2489,12 +2495,14 @@ void ASTStmtReader::VisitOMPTargetParallelDirective(
VisitStmt(D);
Record.skipInts(1);
VisitOMPExecutableDirective(D);
+ D->setTaskReductionRefExpr(Record.readSubExpr());
D->setHasCancel(Record.readBool());
}
void ASTStmtReader::VisitOMPTargetParallelForDirective(
OMPTargetParallelForDirective *D) {
VisitOMPLoopDirective(D);
+ D->setTaskReductionRefExpr(Record.readSubExpr());
D->setHasCancel(Record.readInt());
}
@@ -2564,6 +2572,7 @@ void ASTStmtReader::VisitOMPTargetUpdateDirective(OMPTargetUpdateDirective *D) {
void ASTStmtReader::VisitOMPDistributeParallelForDirective(
OMPDistributeParallelForDirective *D) {
VisitOMPLoopDirective(D);
+ D->setTaskReductionRefExpr(Record.readSubExpr());
D->setHasCancel(Record.readInt());
}
@@ -2604,6 +2613,7 @@ void ASTStmtReader::VisitOMPTeamsDistributeParallelForSimdDirective(
void ASTStmtReader::VisitOMPTeamsDistributeParallelForDirective(
OMPTeamsDistributeParallelForDirective *D) {
VisitOMPLoopDirective(D);
+ D->setTaskReductionRefExpr(Record.readSubExpr());
D->setHasCancel(Record.readInt());
}
@@ -2622,6 +2632,7 @@ void ASTStmtReader::VisitOMPTargetTeamsDistributeDirective(
void ASTStmtReader::VisitOMPTargetTeamsDistributeParallelForDirective(
OMPTargetTeamsDistributeParallelForDirective *D) {
VisitOMPLoopDirective(D);
+ D->setTaskReductionRefExpr(Record.readSubExpr());
D->setHasCancel(Record.readInt());
}
diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp
index fc96cffcb2ec..7c450bccc709 100644
--- a/clang/lib/Serialization/ASTWriterStmt.cpp
+++ b/clang/lib/Serialization/ASTWriterStmt.cpp
@@ -2195,6 +2195,7 @@ void ASTStmtWriter::VisitOMPParallelDirective(OMPParallelDirective *D) {
VisitStmt(D);
Record.push_back(D->getNumClauses());
VisitOMPExecutableDirective(D);
+ Record.AddStmt(D->getTaskReductionRefExpr());
Record.push_back(D->hasCancel() ? 1 : 0);
Code = serialization::STMT_OMP_PARALLEL_DIRECTIVE;
}
@@ -2206,6 +2207,7 @@ void ASTStmtWriter::VisitOMPSimdDirective(OMPSimdDirective *D) {
void ASTStmtWriter::VisitOMPForDirective(OMPForDirective *D) {
VisitOMPLoopDirective(D);
+ Record.AddStmt(D->getTaskReductionRefExpr());
Record.push_back(D->hasCancel() ? 1 : 0);
Code = serialization::STMT_OMP_FOR_DIRECTIVE;
}
@@ -2219,6 +2221,7 @@ void ASTStmtWriter::VisitOMPSectionsDirective(OMPSectionsDirective *D) {
VisitStmt(D);
Record.push_back(D->getNumClauses());
VisitOMPExecutableDirective(D);
+ Record.AddStmt(D->getTaskReductionRefExpr());
Record.push_back(D->hasCancel() ? 1 : 0);
Code = serialization::STMT_OMP_SECTIONS_DIRECTIVE;
}
@@ -2253,6 +2256,7 @@ void ASTStmtWriter::VisitOMPCriticalDirective(OMPCriticalDirective *D) {
void ASTStmtWriter::VisitOMPParallelForDirective(OMPParallelForDirective *D) {
VisitOMPLoopDirective(D);
+ Record.AddStmt(D->getTaskReductionRefExpr());
Record.push_back(D->hasCancel() ? 1 : 0);
Code = serialization::STMT_OMP_PARALLEL_FOR_DIRECTIVE;
}
@@ -2268,6 +2272,7 @@ void ASTStmtWriter::VisitOMPParallelMasterDirective(
VisitStmt(D);
Record.push_back(D->getNumClauses());
VisitOMPExecutableDirective(D);
+ Record.AddStmt(D->getTaskReductionRefExpr());
Code = serialization::STMT_OMP_PARALLEL_MASTER_DIRECTIVE;
}
@@ -2276,6 +2281,7 @@ void ASTStmtWriter::VisitOMPParallelSectionsDirective(
VisitStmt(D);
Record.push_back(D->getNumClauses());
VisitOMPExecutableDirective(D);
+ Record.AddStmt(D->getTaskReductionRefExpr());
Record.push_back(D->hasCancel() ? 1 : 0);
Code = serialization::STMT_OMP_PARALLEL_SECTIONS_DIRECTIVE;
}
@@ -2336,6 +2342,7 @@ void ASTStmtWriter::VisitOMPTargetParallelDirective(
VisitStmt(D);
Record.push_back(D->getNumClauses());
VisitOMPExecutableDirective(D);
+ Record.AddStmt(D->getTaskReductionRefExpr());
Record.writeBool(D->hasCancel());
Code = serialization::STMT_OMP_TARGET_PARALLEL_DIRECTIVE;
}
@@ -2343,6 +2350,7 @@ void ASTStmtWriter::VisitOMPTargetParallelDirective(
void ASTStmtWriter::VisitOMPTargetParallelForDirective(
OMPTargetParallelForDirective *D) {
VisitOMPLoopDirective(D);
+ Record.AddStmt(D->getTaskReductionRefExpr());
Record.push_back(D->hasCancel() ? 1 : 0);
Code = serialization::STMT_OMP_TARGET_PARALLEL_FOR_DIRECTIVE;
}
@@ -2476,6 +2484,7 @@ void ASTStmtWriter::VisitOMPTargetUpdateDirective(OMPTargetUpdateDirective *D) {
void ASTStmtWriter::VisitOMPDistributeParallelForDirective(
OMPDistributeParallelForDirective *D) {
VisitOMPLoopDirective(D);
+ Record.AddStmt(D->getTaskReductionRefExpr());
Record.push_back(D->hasCancel() ? 1 : 0);
Code = serialization::STMT_OMP_DISTRIBUTE_PARALLEL_FOR_DIRECTIVE;
}
@@ -2524,6 +2533,7 @@ void ASTStmtWriter::VisitOMPTeamsDistributeParallelForSimdDirective(
void ASTStmtWriter::VisitOMPTeamsDistributeParallelForDirective(
OMPTeamsDistributeParallelForDirective *D) {
VisitOMPLoopDirective(D);
+ Record.AddStmt(D->getTaskReductionRefExpr());
Record.push_back(D->hasCancel() ? 1 : 0);
Code = serialization::STMT_OMP_TEAMS_DISTRIBUTE_PARALLEL_FOR_DIRECTIVE;
}
@@ -2544,6 +2554,7 @@ void ASTStmtWriter::VisitOMPTargetTeamsDistributeDirective(
void ASTStmtWriter::VisitOMPTargetTeamsDistributeParallelForDirective(
OMPTargetTeamsDistributeParallelForDirective *D) {
VisitOMPLoopDirective(D);
+ Record.AddStmt(D->getTaskReductionRefExpr());
Record.push_back(D->hasCancel() ? 1 : 0);
Code = serialization::STMT_OMP_TARGET_TEAMS_DISTRIBUTE_PARALLEL_FOR_DIRECTIVE;
}
diff --git a/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp
new file mode 100644
index 000000000000..971e9be8534b
--- /dev/null
+++ b/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp
@@ -0,0 +1,130 @@
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK: @main
+int main(int argc, char **argv) {
+#pragma omp target teams
+#pragma omp distribute parallel for reduction(task, +: argc, argv[0:10][0:argc])
+ for (long long i = 0; i < 10; ++i) {
+#pragma omp task in_reduction(+: argc, argv[0:10][0:argc])
+ ;
+ }
+}
+
+// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i64 %{{.+}}, i64 %{{.+}}, i32* %{{.+}}, i8*** %{{.+}})
+
+// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i64 %{{.+}}, i64 %{{.+}}, i32* {{.+}}, i8*** {{.+}})
+// CHECK: alloca i32,
+// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32,
+// CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t],
+// CHECK: [[TG:%.+]] = alloca i8*,
+
+// Init firstprivate copy of argc
+// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]],
+// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]],
+// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]],
+
+// Init firstprivate copy of argv[0:10][0:argc]
+// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]]
+// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]]
+// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]]
+// CHECK: [[INIT]]:
+// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ]
+// CHECK: store i8 0, i8* [[EL]],
+// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1
+// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]]
+// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]]
+// CHECK: [[DONE]]:
+
+// Register task reduction.
+// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 0
+// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 0
+// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]],
+// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 1
+// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]],
+// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 2
+// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]],
+// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]],
+// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]],
+// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]],
+// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 6
+// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8*
+// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false)
+// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 1
+// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 0
+// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]],
+// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 1
+// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]],
+// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 2
+// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]],
+// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]],
+// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]],
+// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]],
+// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 6
+// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]],
+// CHECK: [[BC:%.+]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[TR]] to i8*
+// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i32 2, i8* [[BC]])
+// CHECK: store i8* [[TG_VAL]], i8** [[TG]],
+
+// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]*
+// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1
+// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0
+// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]],
+// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]],
+
+// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]])
+
+// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1)
+// CHECK: call i32 @__kmpc_reduce_nowait(
+
+// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: store i32 0, i32* %{{.+}},
+
+// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: store i32 [[ADD]], i32* %{{.+}},
+
+// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: phi i8*
+// CHECK: store i8 0, i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: phi i8*
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8
+// CHECK: store i8 [[CONV]], i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}})
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]],
+// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8*
+// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]],
+// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]],
+// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]],
+// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2
+
+#endif
diff --git a/clang/test/OpenMP/for_reduction_task_codegen.cpp b/clang/test/OpenMP/for_reduction_task_codegen.cpp
new file mode 100644
index 000000000000..ea8fc55d9cb2
--- /dev/null
+++ b/clang/test/OpenMP/for_reduction_task_codegen.cpp
@@ -0,0 +1,130 @@
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK: @main
+int main(int argc, char **argv) {
+#pragma omp parallel
+#pragma omp for reduction(task, +: argc, argv[0:10][0:argc])
+ for (long long i = 0; i < 10; ++i) {
+#pragma omp task in_reduction(+: argc, argv[0:10][0:argc])
+ ;
+ }
+}
+
+// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}})
+
+// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}})
+// CHECK: alloca i32,
+// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32,
+// CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t],
+// CHECK: [[TG:%.+]] = alloca i8*,
+
+// Init firstprivate copy of argc
+// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]],
+// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]],
+// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]],
+
+// Init firstprivate copy of argv[0:10][0:argc]
+// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]]
+// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]]
+// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]]
+// CHECK: [[INIT]]:
+// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ]
+// CHECK: store i8 0, i8* [[EL]],
+// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1
+// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]]
+// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]]
+// CHECK: [[DONE]]:
+
+// Register task reduction.
+// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 0
+// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 0
+// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]],
+// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 1
+// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]],
+// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 2
+// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]],
+// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]],
+// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]],
+// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]],
+// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 6
+// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8*
+// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false)
+// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 1
+// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 0
+// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]],
+// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 1
+// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]],
+// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 2
+// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]],
+// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]],
+// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]],
+// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]],
+// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 6
+// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]],
+// CHECK: [[BC:%.+]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[TR]] to i8*
+// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i32 2, i8* [[BC]])
+// CHECK: store i8* [[TG_VAL]], i8** [[TG]],
+
+// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]*
+// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1
+// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0
+// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]],
+// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]],
+
+// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]])
+
+// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1)
+// CHECK: call i32 @__kmpc_reduce(
+
+// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: store i32 0, i32* %{{.+}},
+
+// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: store i32 [[ADD]], i32* %{{.+}},
+
+// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: phi i8*
+// CHECK: store i8 0, i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: phi i8*
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8
+// CHECK: store i8 [[CONV]], i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}})
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]],
+// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8*
+// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]],
+// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]],
+// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]],
+// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2
+
+#endif
diff --git a/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp
new file mode 100644
index 000000000000..b4f4f83ec955
--- /dev/null
+++ b/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp
@@ -0,0 +1,129 @@
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK: @main
+int main(int argc, char **argv) {
+#pragma omp parallel for reduction(task, +: argc, argv[0:10][0:argc])
+ for (long long i = 0; i < 10; ++i) {
+#pragma omp task in_reduction(+: argc, argv[0:10][0:argc])
+ ;
+ }
+}
+
+// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}})
+
+// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}})
+// CHECK: alloca i32,
+// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32,
+// CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t],
+// CHECK: [[TG:%.+]] = alloca i8*,
+
+// Init firstprivate copy of argc
+// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]],
+// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]],
+// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]],
+
+// Init firstprivate copy of argv[0:10][0:argc]
+// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]]
+// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]]
+// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]]
+// CHECK: [[INIT]]:
+// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ]
+// CHECK: store i8 0, i8* [[EL]],
+// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1
+// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]]
+// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]]
+// CHECK: [[DONE]]:
+
+// Register task reduction.
+// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 0
+// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 0
+// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]],
+// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 1
+// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]],
+// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 2
+// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]],
+// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]],
+// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]],
+// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]],
+// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 6
+// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8*
+// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false)
+// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 1
+// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 0
+// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]],
+// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 1
+// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]],
+// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 2
+// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]],
+// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]],
+// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]],
+// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]],
+// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 6
+// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]],
+// CHECK: [[BC:%.+]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[TR]] to i8*
+// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i32 2, i8* [[BC]])
+// CHECK: store i8* [[TG_VAL]], i8** [[TG]],
+
+// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]*
+// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1
+// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0
+// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]],
+// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]],
+
+// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]])
+
+// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1)
+// CHECK: call i32 @__kmpc_reduce_nowait(
+
+// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: store i32 0, i32* %{{.+}},
+
+// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: store i32 [[ADD]], i32* %{{.+}},
+
+// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: phi i8*
+// CHECK: store i8 0, i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: phi i8*
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8
+// CHECK: store i8 [[CONV]], i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}})
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]],
+// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8*
+// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]],
+// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]],
+// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]],
+// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2
+
+#endif
diff --git a/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp
new file mode 100644
index 000000000000..0f8366fa95e3
--- /dev/null
+++ b/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp
@@ -0,0 +1,128 @@
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK: @main
+int main(int argc, char **argv) {
+#pragma omp parallel master reduction(task, +: argc, argv[0:10][0:argc])
+ {
+#pragma omp task in_reduction(+: argc, argv[0:10][0:argc])
+ ;
+ }
+}
+
+// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}})
+
+// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}})
+// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32,
+// CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t],
+// CHECK: [[TG:%.+]] = alloca i8*,
+
+// Init firstprivate copy of argc
+// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]],
+// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]],
+// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]],
+
+// Init firstprivate copy of argv[0:10][0:argc]
+// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]]
+// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]]
+// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]]
+// CHECK: [[INIT]]:
+// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ]
+// CHECK: store i8 0, i8* [[EL]],
+// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1
+// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]]
+// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]]
+// CHECK: [[DONE]]:
+
+// Register task reduction.
+// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 0
+// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 0
+// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]],
+// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 1
+// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]],
+// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 2
+// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]],
+// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]],
+// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]],
+// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]],
+// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 6
+// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8*
+// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false)
+// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 1
+// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 0
+// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]],
+// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 1
+// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]],
+// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 2
+// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]],
+// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]],
+// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]],
+// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]],
+// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 6
+// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]],
+// CHECK: [[BC:%.+]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[TR]] to i8*
+// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 0, i32 2, i8* [[BC]])
+// CHECK: store i8* [[TG_VAL]], i8** [[TG]],
+
+// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]*
+// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1
+// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0
+// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]],
+// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]],
+
+// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]])
+
+// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 0)
+// CHECK: call i32 @__kmpc_reduce_nowait(
+
+// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: store i32 0, i32* %{{.+}},
+
+// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: store i32 [[ADD]], i32* %{{.+}},
+
+// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: phi i8*
+// CHECK: store i8 0, i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: phi i8*
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8
+// CHECK: store i8 [[CONV]], i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}})
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]],
+// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8*
+// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]],
+// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]],
+// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]],
+// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2
+
+#endif
diff --git a/clang/test/OpenMP/parallel_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_reduction_task_codegen.cpp
new file mode 100644
index 000000000000..5e04aa8c1ec2
--- /dev/null
+++ b/clang/test/OpenMP/parallel_reduction_task_codegen.cpp
@@ -0,0 +1,128 @@
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK: @main
+int main(int argc, char **argv) {
+#pragma omp parallel reduction(task, +: argc, argv[0:10][0:argc])
+ {
+#pragma omp task in_reduction(+: argc, argv[0:10][0:argc])
+ ;
+ }
+}
+
+// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}})
+
+// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}})
+// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32,
+// CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t],
+// CHECK: [[TG:%.+]] = alloca i8*,
+
+// Init firstprivate copy of argc
+// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]],
+// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]],
+// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]],
+
+// Init firstprivate copy of argv[0:10][0:argc]
+// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]]
+// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]]
+// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]]
+// CHECK: [[INIT]]:
+// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ]
+// CHECK: store i8 0, i8* [[EL]],
+// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1
+// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]]
+// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]]
+// CHECK: [[DONE]]:
+
+// Register task reduction.
+// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 0
+// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 0
+// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]],
+// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 1
+// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]],
+// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 2
+// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]],
+// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]],
+// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]],
+// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]],
+// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 6
+// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8*
+// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false)
+// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 1
+// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 0
+// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]],
+// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 1
+// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]],
+// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 2
+// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]],
+// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]],
+// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]],
+// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]],
+// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 6
+// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]],
+// CHECK: [[BC:%.+]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[TR]] to i8*
+// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 0, i32 2, i8* [[BC]])
+// CHECK: store i8* [[TG_VAL]], i8** [[TG]],
+
+// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]*
+// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1
+// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0
+// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]],
+// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]],
+
+// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]])
+
+// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 0)
+// CHECK: call i32 @__kmpc_reduce_nowait(
+
+// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: store i32 0, i32* %{{.+}},
+
+// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: store i32 [[ADD]], i32* %{{.+}},
+
+// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: phi i8*
+// CHECK: store i8 0, i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: phi i8*
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8
+// CHECK: store i8 [[CONV]], i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}})
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]],
+// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8*
+// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]],
+// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]],
+// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]],
+// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2
+
+#endif
diff --git a/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp
new file mode 100644
index 000000000000..867eb45a1332
--- /dev/null
+++ b/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp
@@ -0,0 +1,133 @@
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK: @main
+int main(int argc, char **argv) {
+#pragma omp parallel sections reduction(task, +: argc, argv[0:10][0:argc])
+ {
+#pragma omp task in_reduction(+: argc, argv[0:10][0:argc])
+ ;
+ }
+}
+
+// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}})
+
+// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}})
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32,
+// CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t],
+// CHECK: [[TG:%.+]] = alloca i8*,
+
+// Init firstprivate copy of argc
+// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]],
+// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]],
+// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]],
+
+// Init firstprivate copy of argv[0:10][0:argc]
+// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]]
+// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]]
+// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]]
+// CHECK: [[INIT]]:
+// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ]
+// CHECK: store i8 0, i8* [[EL]],
+// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1
+// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]]
+// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]]
+// CHECK: [[DONE]]:
+
+// Register task reduction.
+// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 0
+// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 0
+// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]],
+// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 1
+// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]],
+// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 2
+// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]],
+// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]],
+// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]],
+// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]],
+// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 6
+// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8*
+// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false)
+// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 1
+// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 0
+// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]],
+// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 1
+// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]],
+// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 2
+// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]],
+// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]],
+// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]],
+// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]],
+// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 6
+// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]],
+// CHECK: [[BC:%.+]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[TR]] to i8*
+// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i32 2, i8* [[BC]])
+// CHECK: store i8* [[TG_VAL]], i8** [[TG]],
+
+// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]*
+// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1
+// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0
+// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]],
+// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]],
+
+// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]])
+
+// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1)
+// CHECK: call i32 @__kmpc_reduce_nowait(
+
+// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: store i32 0, i32* %{{.+}},
+
+// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: store i32 [[ADD]], i32* %{{.+}},
+
+// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: phi i8*
+// CHECK: store i8 0, i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: phi i8*
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8
+// CHECK: store i8 [[CONV]], i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}})
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]],
+// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8*
+// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]],
+// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]],
+// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]],
+// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2
+
+#endif
diff --git a/clang/test/OpenMP/sections_reduction_task_codegen.cpp b/clang/test/OpenMP/sections_reduction_task_codegen.cpp
new file mode 100644
index 000000000000..be67a2a17400
--- /dev/null
+++ b/clang/test/OpenMP/sections_reduction_task_codegen.cpp
@@ -0,0 +1,134 @@
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK: @main
+int main(int argc, char **argv) {
+#pragma omp parallel
+#pragma omp sections reduction(task, +: argc, argv[0:10][0:argc])
+ {
+#pragma omp task in_reduction(+: argc, argv[0:10][0:argc])
+ ;
+ }
+}
+
+// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}})
+
+// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}})
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: alloca i32,
+// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32,
+// CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t],
+// CHECK: [[TG:%.+]] = alloca i8*,
+
+// Init firstprivate copy of argc
+// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]],
+// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]],
+// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]],
+
+// Init firstprivate copy of argv[0:10][0:argc]
+// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]]
+// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]]
+// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]]
+// CHECK: [[INIT]]:
+// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ]
+// CHECK: store i8 0, i8* [[EL]],
+// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1
+// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]]
+// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]]
+// CHECK: [[DONE]]:
+
+// Register task reduction.
+// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 0
+// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 0
+// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]],
+// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 1
+// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]],
+// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 2
+// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]],
+// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]],
+// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]],
+// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]],
+// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 6
+// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8*
+// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false)
+// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 1
+// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 0
+// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]],
+// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 1
+// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]],
+// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 2
+// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]],
+// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]],
+// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]],
+// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]],
+// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 6
+// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]],
+// CHECK: [[BC:%.+]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[TR]] to i8*
+// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i32 2, i8* [[BC]])
+// CHECK: store i8* [[TG_VAL]], i8** [[TG]],
+
+// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]*
+// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1
+// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0
+// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]],
+// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]],
+
+// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]])
+
+// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1)
+// CHECK: call i32 @__kmpc_reduce(
+
+// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: store i32 0, i32* %{{.+}},
+
+// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: store i32 [[ADD]], i32* %{{.+}},
+
+// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: phi i8*
+// CHECK: store i8 0, i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: phi i8*
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8
+// CHECK: store i8 [[CONV]], i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}})
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]],
+// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8*
+// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]],
+// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]],
+// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]],
+// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2
+
+#endif
diff --git a/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp
new file mode 100644
index 000000000000..5c5ea6b90d52
--- /dev/null
+++ b/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp
@@ -0,0 +1,129 @@
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK: @main
+int main(int argc, char **argv) {
+#pragma omp target parallel for reduction(task, +: argc, argv[0:10][0:argc])
+ for (long long i = 0; i < 10; ++i) {
+#pragma omp task in_reduction(+: argc, argv[0:10][0:argc])
+ ;
+ }
+}
+
+// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}})
+
+// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}})
+// CHECK: alloca i32,
+// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32,
+// CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t],
+// CHECK: [[TG:%.+]] = alloca i8*,
+
+// Init firstprivate copy of argc
+// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]],
+// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]],
+// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]],
+
+// Init firstprivate copy of argv[0:10][0:argc]
+// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]]
+// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]]
+// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]]
+// CHECK: [[INIT]]:
+// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ]
+// CHECK: store i8 0, i8* [[EL]],
+// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1
+// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]]
+// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]]
+// CHECK: [[DONE]]:
+
+// Register task reduction.
+// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 0
+// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 0
+// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]],
+// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 1
+// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]],
+// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 2
+// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]],
+// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]],
+// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]],
+// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]],
+// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 6
+// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8*
+// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false)
+// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 1
+// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 0
+// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]],
+// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 1
+// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]],
+// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 2
+// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]],
+// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]],
+// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]],
+// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]],
+// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 6
+// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]],
+// CHECK: [[BC:%.+]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[TR]] to i8*
+// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i32 2, i8* [[BC]])
+// CHECK: store i8* [[TG_VAL]], i8** [[TG]],
+
+// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]*
+// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1
+// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0
+// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]],
+// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]],
+
+// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]])
+
+// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1)
+// CHECK: call i32 @__kmpc_reduce_nowait(
+
+// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: store i32 0, i32* %{{.+}},
+
+// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: store i32 [[ADD]], i32* %{{.+}},
+
+// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: phi i8*
+// CHECK: store i8 0, i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: phi i8*
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8
+// CHECK: store i8 [[CONV]], i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}})
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]],
+// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8*
+// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]],
+// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]],
+// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]],
+// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2
+
+#endif
diff --git a/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp b/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp
new file mode 100644
index 000000000000..2fc49d44c1e9
--- /dev/null
+++ b/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp
@@ -0,0 +1,128 @@
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK: @main
+int main(int argc, char **argv) {
+#pragma omp target parallel reduction(task, +: argc, argv[0:10][0:argc])
+ {
+#pragma omp task in_reduction(+: argc, argv[0:10][0:argc])
+ ;
+ }
+}
+
+// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* %{{.+}}, i8*** %{{.+}})
+
+// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{.+}}, i8*** {{.+}})
+// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32,
+// CHECK: [[TR:%.+]] = alloca [2 x %struct.kmp_taskred_input_t],
+// CHECK: [[TG:%.+]] = alloca i8*,
+
+// Init firstprivate copy of argc
+// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]],
+// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]],
+// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]],
+
+// Init firstprivate copy of argv[0:10][0:argc]
+// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]]
+// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]]
+// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]]
+// CHECK: [[INIT]]:
+// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ]
+// CHECK: store i8 0, i8* [[EL]],
+// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1
+// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]]
+// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]]
+// CHECK: [[DONE]]:
+
+// Register task reduction.
+// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 0
+// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 0
+// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]],
+// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 1
+// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]],
+// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 2
+// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]],
+// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]],
+// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]],
+// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]],
+// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR0_ADDR]], i32 0, i32 6
+// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8*
+// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false)
+// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x %struct.kmp_taskred_input_t], [2 x %struct.kmp_taskred_input_t]* [[TR]], i64 0, i64 1
+// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 0
+// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]],
+// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 1
+// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]],
+// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 2
+// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]],
+// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]],
+// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]],
+// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]],
+// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds %struct.kmp_taskred_input_t, %struct.kmp_taskred_input_t* [[TR1_ADDR]], i32 0, i32 6
+// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]],
+// CHECK: [[BC:%.+]] = bitcast [2 x %struct.kmp_taskred_input_t]* [[TR]] to i8*
+// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 0, i32 2, i8* [[BC]])
+// CHECK: store i8* [[TG_VAL]], i8** [[TG]],
+
+// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]*
+// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1
+// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0
+// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]],
+// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]],
+
+// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]])
+
+// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 0)
+// CHECK: call i32 @__kmpc_reduce_nowait(
+
+// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: store i32 0, i32* %{{.+}},
+
+// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: store i32 [[ADD]], i32* %{{.+}},
+
+// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: phi i8*
+// CHECK: store i8 0, i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: phi i8*
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8
+// CHECK: store i8 [[CONV]], i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}})
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]],
+// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8*
+// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]],
+// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]],
+// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]],
+// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2
+
+#endif
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp
new file mode 100644
index 000000000000..06c0f8744e8c
--- /dev/null
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp
@@ -0,0 +1,129 @@
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK: @main
+int main(int argc, char **argv) {
+#pragma omp target teams distribute parallel for reduction(task, +: argc, argv[0:10][0:argc])
+ for (long long i = 0; i < 10; ++i) {
+#pragma omp task in_reduction(+: argc, argv[0:10][0:argc])
+ ;
+ }
+}
+
+// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i64 %{{.+}}, i64 %{{.+}}, i32* %{{.+}}, i8*** %{{.+}})
+
+// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i64 %{{.+}}, i64 %{{.+}}, i32* {{.+}}, i8*** {{.+}})
+// CHECK: alloca i32,
+// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32,
+// CHECK: [[TR:%.+]] = alloca [2 x [[TASKRED_TY:%struct.kmp_taskred_input_t.*]]],
+// CHECK: [[TG:%.+]] = alloca i8*,
+
+// Init firstprivate copy of argc
+// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]],
+// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]],
+// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]],
+
+// Init firstprivate copy of argv[0:10][0:argc]
+// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]]
+// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]]
+// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]]
+// CHECK: [[INIT]]:
+// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ]
+// CHECK: store i8 0, i8* [[EL]],
+// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1
+// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]]
+// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]]
+// CHECK: [[DONE]]:
+
+// Register task reduction.
+// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x [[TASKRED_TY]]], [2 x [[TASKRED_TY]]]* [[TR]], i64 0, i64 0
+// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 0
+// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]],
+// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 1
+// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]],
+// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 2
+// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]],
+// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]],
+// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]],
+// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]],
+// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 6
+// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8*
+// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false)
+// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x [[TASKRED_TY]]], [2 x [[TASKRED_TY]]]* [[TR]], i64 0, i64 1
+// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 0
+// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]],
+// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 1
+// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]],
+// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 2
+// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]],
+// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]],
+// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]],
+// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]],
+// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 6
+// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]],
+// CHECK: [[BC:%.+]] = bitcast [2 x [[TASKRED_TY]]]* [[TR]] to i8*
+// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i32 2, i8* [[BC]])
+// CHECK: store i8* [[TG_VAL]], i8** [[TG]],
+
+// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]*
+// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1
+// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0
+// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]],
+// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]],
+
+// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]])
+
+// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1)
+// CHECK: call i32 @__kmpc_reduce_nowait(
+
+// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: store i32 0, i32* %{{.+}},
+
+// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: store i32 [[ADD]], i32* %{{.+}},
+
+// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: phi i8*
+// CHECK: store i8 0, i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: phi i8*
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8
+// CHECK: store i8 [[CONV]], i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}})
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]],
+// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8*
+// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]],
+// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]],
+// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]],
+// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2
+
+#endif
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp
new file mode 100644
index 000000000000..194999f8cbb0
--- /dev/null
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp
@@ -0,0 +1,130 @@
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-linux -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+// CHECK: @main
+int main(int argc, char **argv) {
+#pragma omp target
+#pragma omp teams distribute parallel for reduction(task, +: argc, argv[0:10][0:argc])
+ for (long long i = 0; i < 10; ++i) {
+#pragma omp task in_reduction(+: argc, argv[0:10][0:argc])
+ ;
+ }
+}
+
+// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @{{.+}}, i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i8***)* [[OUTLINED:@.+]] to void (i32*, i32*, ...)*), i64 %{{.+}}, i64 %{{.+}}, i32* %{{.+}}, i8*** %{{.+}})
+
+// CHECK: define internal void [[OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i64 %{{.+}}, i64 %{{.+}}, i32* {{.+}}, i8*** {{.+}})
+// CHECK: alloca i32,
+// CHECK: [[ARGC_FP_ADDR:%.+]] = alloca i32,
+// CHECK: [[TR:%.+]] = alloca [2 x [[TASKRED_TY:%struct.kmp_taskred_input_t.*]]],
+// CHECK: [[TG:%.+]] = alloca i8*,
+
+// Init firstprivate copy of argc
+// CHECK: store i32 0, i32* [[ARGC_FP_ADDR]],
+// CHECK: [[ARGV_FP_ADDR:%.+]] = alloca i8, i64 [[SIZE:%.+]],
+// CHECK: store i64 [[SIZE]], i64* [[SIZE_ADDR:%.+]],
+
+// Init firstprivate copy of argv[0:10][0:argc]
+// CHECK: [[END:%.+]] = getelementptr i8, i8* [[ARGV_FP_ADDR]], i64 [[SIZE]]
+// CHECK: [[EMPTY:%.+]] = icmp eq i8* [[ARGV_FP_ADDR]], [[END]]
+// CHECK: br i1 [[EMPTY]], label %[[DONE:.+]], label %[[INIT:.+]]
+// CHECK: [[INIT]]:
+// CHECK: [[EL:%.+]] = phi i8* [ [[ARGV_FP_ADDR]], %{{.+}} ], [ [[NEXT_EL:%.+]], %[[INIT]] ]
+// CHECK: store i8 0, i8* [[EL]],
+// CHECK: [[NEXT_EL:%.+]] = getelementptr i8, i8* [[EL]], i32 1
+// CHECK: [[FINISHED:%.+]] = icmp eq i8* [[NEXT_EL]], [[END]]
+// CHECK: br i1 [[FINISHED]], label %[[DONE]], label %[[INIT]]
+// CHECK: [[DONE]]:
+
+// Register task reduction.
+// CHECK: [[TR0_ADDR:%.+]] = getelementptr inbounds [2 x [[TASKRED_TY]]], [2 x [[TASKRED_TY]]]* [[TR]], i64 0, i64 0
+// CHECK: [[TR0_SHARED_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 0
+// CHECK: [[BC:%.+]] = bitcast i32* [[ARGC_FP_ADDR]] to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_SHARED_ADDR]],
+// CHECK: [[TR0_ORIG_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 1
+// CHECK: [[BC:%.+]] = bitcast i32* %{{.+}} to i8*
+// CHECK: store i8* [[BC]], i8** [[TR0_ORIG_ADDR]],
+// CHECK: [[TR0_SIZE_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 2
+// CHECK: store i64 4, i64* [[TR0_SIZE_ADDR]],
+// CHECK: [[TR0_INIT_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_INIT:@.+]] to i8*), i8** [[TR0_INIT_ADDR]],
+// CHECK: [[TR0_FINI_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR0_FINI_ADDR]],
+// CHECK: [[TR0_COMB_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGC_COMB:@.+]] to i8*), i8** [[TR0_COMB_ADDR]],
+// CHECK: [[TR0_FLAGS_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR0_ADDR]], i32 0, i32 6
+// CHECK: [[BC:%.+]] = bitcast i32* [[TR0_FLAGS_ADDR]] to i8*
+// CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}[[BC]], i8 0, i64 4, i1 false)
+// CHECK: [[TR1_ADDR:%.+]] = getelementptr inbounds [2 x [[TASKRED_TY]]], [2 x [[TASKRED_TY]]]* [[TR]], i64 0, i64 1
+// CHECK: [[TR1_SHARED_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 0
+// CHECK: store i8* [[ARGV_FP_ADDR]], i8** [[TR1_SHARED_ADDR]],
+// CHECK: [[TR1_ORIG_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 1
+// CHECK: store i8* %{{.+}}, i8** [[TR1_ORIG_ADDR]],
+// CHECK: [[TR1_SIZE_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 2
+// CHECK: store i64 %{{.+}}, i64* [[TR1_SIZE_ADDR]],
+// CHECK: [[TR1_INIT_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 3
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_INIT:@.+]] to i8*), i8** [[TR1_INIT_ADDR]],
+// CHECK: [[TR1_FINI_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 4
+// CHECK: store i8* null, i8** [[TR1_FINI_ADDR]],
+// CHECK: [[TR1_COMB_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 5
+// CHECK: store i8* bitcast (void (i8*, i8*)* [[ARGV_COMB:@.+]] to i8*), i8** [[TR1_COMB_ADDR]],
+// CHECK: [[TR1_FLAGS_ADDR:%.+]] = getelementptr inbounds [[TASKRED_TY]], [[TASKRED_TY]]* [[TR1_ADDR]], i32 0, i32 6
+// CHECK: store i32 1, i32* [[TR1_FLAGS_ADDR]],
+// CHECK: [[BC:%.+]] = bitcast [2 x [[TASKRED_TY]]]* [[TR]] to i8*
+// CHECK: [[TG_VAL:%.+]] = call i8* @__kmpc_taskred_modifier_init(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i32 2, i8* [[BC]])
+// CHECK: store i8* [[TG_VAL]], i8** [[TG]],
+
+// CHECK: [[PTR:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1, i64 48, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TASK_TY:%.+]]*)* [[TASK:@.+]] to i32 (i32, i8*)*))
+// CHECK: [[TASK_DATA_ADDR:%.+]] = bitcast i8* [[PTR]] to [[TASK_TY]]*
+// CHECK: [[PRIVATES_ADDR:%.+]] = getelementptr inbounds [[TASK_TY]], [[TASK_TY]]* [[TASK_DATA_ADDR]], i32 0, i32 1
+// CHECK: [[TG_PRIV_ADDR:%.+]] = getelementptr inbounds [[TASK_PRIVATES_TY:%.+]], %{{.+}}* [[PRIVATES_ADDR]], i32 0, i32 0
+// CHECK: [[TG_VAL:%.+]] = load i8*, i8** [[TG]],
+// CHECK: store i8* [[TG_VAL]], i8** [[TG_PRIV_ADDR]],
+
+// CHECK: call i32 @__kmpc_omp_task(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[PTR]])
+
+// CHECK: call void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 1)
+// CHECK: call i32 @__kmpc_reduce_nowait(
+
+// CHECK: define internal void [[ARGC_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: store i32 0, i32* %{{.+}},
+
+// CHECK: define internal void [[ARGC_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: store i32 [[ADD]], i32* %{{.+}},
+
+// CHECK: define internal void [[ARGV_INIT]](i8* noalias %{{.+}}, i8* noalias %{{.+}})
+// CHECK: phi i8*
+// CHECK: store i8 0, i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal void [[ARGV_COMB]](i8* %{{.+}}, i8* %{{.+}})
+// CHECK: phi i8*
+// CHECK: [[ADD:%.+]] = add nsw i32 %{{.+}}, %{{.+}}
+// CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8
+// CHECK: store i8 [[CONV]], i8* [[EL:%.+]],
+// CHECK: getelementptr i8, i8* [[EL]], i32 1
+
+// CHECK: define internal {{.*}}i32 [[TASK]](i32 {{.+}}, [[TASK_TY]]* {{.+}})
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGC_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR:%.+]],
+// CHECK-DAG: [[ARGC_REF]] = bitcast i32* [[ARGC_ADDR:%.+]] to i8*
+// CHECK-DAG: [[ARGC_ADDR]] = load i32*, i32** [[ARGC_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGC_ADDR_REF]] = getelementptr inbounds [[CAPS_TY:%.+]], %{{.+}}* [[CAP:%.+]], i32 0, i32 1
+// CHECK-DAG: call i8* @__kmpc_task_reduction_get_th_data(i32 %{{.+}}, i8* [[TG:%.+]], i8* [[ARGV_REF:%.+]])
+// CHECK_DAG: [[TG]] = load i8*, i8** [[TG_ADDR]],
+// CHECK-DAG: [[ARGV_REF]] = load i8*, i8** [[ARGV_ADDR:%.+]],
+// CHECK-DAG: [[ARGV_ADDR]] = load i8**, i8*** [[ARGV_ADDR_REF:%.+]],
+// CHECK-DAG: [[ARGV_ADDR_REF:%.+]] = load i8***, i8**** [[ARGV:%.+]],
+// CHECK-DAG: [[ARGV]] = getelementptr inbounds [[CAPS_TY]], [[CAPS_TY]]* [[CAP]], i32 0, i32 2
+
+#endif
More information about the cfe-commits
mailing list