[clang] 4097a24 - [Clang][OpenMP] Support for Code Generation of loop bind clause
Sandeep Kosuri via cfe-commits
cfe-commits at lists.llvm.org
Mon Aug 7 06:00:12 PDT 2023
Author: Sunil Kuravinakop
Date: 2023-08-07T07:58:59-05:00
New Revision: 4097a24584121dba562d471fab97d3dfec1b5bff
URL: https://github.com/llvm/llvm-project/commit/4097a24584121dba562d471fab97d3dfec1b5bff
DIFF: https://github.com/llvm/llvm-project/commit/4097a24584121dba562d471fab97d3dfec1b5bff.diff
LOG: [Clang][OpenMP] Support for Code Generation of loop bind clause
Added:
clang/test/OpenMP/loop_bind_codegen.cpp
clang/test/OpenMP/loop_bind_enclosed.cpp
clang/test/OpenMP/loop_bind_messages.cpp
Modified:
clang/include/clang/AST/StmtOpenMP.h
clang/include/clang/Basic/DiagnosticSemaKinds.td
clang/include/clang/Sema/Sema.h
clang/lib/AST/StmtOpenMP.cpp
clang/lib/Sema/SemaOpenMP.cpp
clang/lib/Sema/TreeTransform.h
clang/lib/Serialization/ASTReaderStmt.cpp
clang/lib/Serialization/ASTWriterStmt.cpp
clang/test/OpenMP/generic_loop_ast_print.cpp
clang/test/OpenMP/generic_loop_codegen.cpp
clang/test/OpenMP/nested_loop_codegen.cpp
clang/test/PCH/pragma-loop.cpp
Removed:
################################################################################
diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h
index 2d37fdbf4ca8fb..20cd198f5f0cc5 100644
--- a/clang/include/clang/AST/StmtOpenMP.h
+++ b/clang/include/clang/AST/StmtOpenMP.h
@@ -281,6 +281,15 @@ class OMPExecutableDirective : public Stmt {
return Data->getClauses();
}
+ /// Was this directive mapped from an another directive?
+ /// e.g. 1) omp loop bind(parallel) is mapped to OMPD_for
+ /// 2) omp loop bind(teams) is mapped to OMPD_distribute
+ /// 3) omp loop bind(thread) is mapped to OMPD_simd
+ /// It was necessary to note it down in the Directive because of
+ /// clang::TreeTransform::TransformOMPExecutableDirective() pass in
+ /// the frontend.
+ OpenMPDirectiveKind PrevMappedDirective = llvm::omp::OMPD_unknown;
+
protected:
/// Data, associated with the directive.
OMPChildren *Data = nullptr;
@@ -345,6 +354,10 @@ class OMPExecutableDirective : public Stmt {
return Inst;
}
+ void setMappedDirective(OpenMPDirectiveKind MappedDirective) {
+ PrevMappedDirective = MappedDirective;
+ }
+
public:
/// Iterates over expressions/statements used in the construct.
class used_clauses_child_iterator
@@ -598,6 +611,8 @@ class OMPExecutableDirective : public Stmt {
"Expected directive with the associated statement.");
return Data->getRawStmt();
}
+
+ OpenMPDirectiveKind getMappedDirective() const { return PrevMappedDirective; }
};
/// This represents '#pragma omp parallel' directive.
@@ -1604,7 +1619,8 @@ class OMPSimdDirective : public OMPLoopDirective {
SourceLocation EndLoc, unsigned CollapsedNum,
ArrayRef<OMPClause *> Clauses,
Stmt *AssociatedStmt,
- const HelperExprs &Exprs);
+ const HelperExprs &Exprs,
+ OpenMPDirectiveKind ParamPrevMappedDirective);
/// Creates an empty directive with the place
/// for \a NumClauses clauses.
@@ -1682,7 +1698,8 @@ class OMPForDirective : public OMPLoopDirective {
SourceLocation EndLoc, unsigned CollapsedNum,
ArrayRef<OMPClause *> Clauses,
Stmt *AssociatedStmt, const HelperExprs &Exprs,
- Expr *TaskRedRef, bool HasCancel);
+ Expr *TaskRedRef, bool HasCancel,
+ OpenMPDirectiveKind ParamPrevMappedDirective);
/// Creates an empty directive with the place
/// for \a NumClauses clauses.
@@ -4406,7 +4423,8 @@ class OMPDistributeDirective : public OMPLoopDirective {
static OMPDistributeDirective *
Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses,
- Stmt *AssociatedStmt, const HelperExprs &Exprs);
+ Stmt *AssociatedStmt, const HelperExprs &Exprs,
+ OpenMPDirectiveKind ParamPrevMappedDirective);
/// Creates an empty directive with the place
/// for \a NumClauses clauses.
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 4979f9f86d236d..5e0aca3c12d3d6 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -9869,6 +9869,11 @@ def err_break_not_in_loop_or_switch : Error<
def warn_loop_ctrl_binds_to_inner : Warning<
"'%0' is bound to current loop, GCC binds it to the enclosing loop">,
InGroup<GccCompat>;
+def err_omp_bind_required_on_loop : Error<
+ "expected 'bind' clause for 'loop' construct without an enclosing OpenMP "
+ "construct">;
+def err_omp_loop_reduction_clause : Error<
+ "'reduction' clause not allowed with '#pragma omp loop bind(teams)'">;
def warn_break_binds_to_switch : Warning<
"'break' is bound to loop, GCC binds it to switch">,
InGroup<GccCompat>;
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 44bd3c4cf3a665..6404cfce00900d 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -11175,6 +11175,23 @@ class Sema final {
/// All `omp assumes` we encountered so far.
SmallVector<AssumptionAttr *, 4> OMPAssumeGlobal;
+ /// OMPD_loop is mapped to OMPD_for, OMPD_distribute or OMPD_simd depending
+ /// on the parameter of the bind clause. In the methods for the
+ /// mapped directives, check the parameters of the lastprivate clause.
+ bool checkLastPrivateForMappedDirectives(ArrayRef<OMPClause *> Clauses);
+ /// Depending on the bind clause of OMPD_loop map the directive to new
+ /// directives.
+ /// 1) loop bind(parallel) --> OMPD_for
+ /// 2) loop bind(teams) --> OMPD_distribute
+ /// 3) loop bind(thread) --> OMPD_simd
+ /// This is being handled in Sema instead of Codegen because of the need for
+ /// rigorous semantic checking in the new mapped directives.
+ bool mapLoopConstruct(llvm::SmallVector<OMPClause *> &ClausesWithoutBind,
+ ArrayRef<OMPClause *> Clauses,
+ OpenMPBindClauseKind BindKind,
+ OpenMPDirectiveKind &Kind,
+ OpenMPDirectiveKind &PrevMappedDirective);
+
public:
/// The declarator \p D defines a function in the scope \p S which is nested
/// in an `omp begin/end declare variant` scope. In this method we create a
@@ -11470,7 +11487,8 @@ class Sema final {
StmtResult ActOnOpenMPExecutableDirective(
OpenMPDirectiveKind Kind, const DeclarationNameInfo &DirName,
OpenMPDirectiveKind CancelRegion, ArrayRef<OMPClause *> Clauses,
- Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc);
+ Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc,
+ OpenMPDirectiveKind PrevMappedDirective = llvm::omp::OMPD_unknown);
/// Called on well-formed '\#pragma omp parallel' after parsing
/// of the associated statement.
StmtResult ActOnOpenMPParallelDirective(ArrayRef<OMPClause *> Clauses,
diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp
index a544732bb4c219..5a1001d95b552c 100644
--- a/clang/lib/AST/StmtOpenMP.cpp
+++ b/clang/lib/AST/StmtOpenMP.cpp
@@ -297,11 +297,10 @@ OMPParallelDirective *OMPParallelDirective::CreateEmpty(const ASTContext &C,
/*NumChildren=*/1);
}
-OMPSimdDirective *
-OMPSimdDirective::Create(const ASTContext &C, SourceLocation StartLoc,
- SourceLocation EndLoc, unsigned CollapsedNum,
- ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
- const HelperExprs &Exprs) {
+OMPSimdDirective *OMPSimdDirective::Create(
+ const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+ unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
+ const HelperExprs &Exprs, OpenMPDirectiveKind ParamPrevMappedDirective) {
auto *Dir = createDirective<OMPSimdDirective>(
C, Clauses, AssociatedStmt, numLoopChildren(CollapsedNum, OMPD_simd),
StartLoc, EndLoc, CollapsedNum);
@@ -321,6 +320,7 @@ OMPSimdDirective::Create(const ASTContext &C, SourceLocation StartLoc,
Dir->setDependentInits(Exprs.DependentInits);
Dir->setFinalsConditions(Exprs.FinalsConditions);
Dir->setPreInits(Exprs.PreInits);
+ Dir->setMappedDirective(ParamPrevMappedDirective);
return Dir;
}
@@ -336,7 +336,8 @@ OMPSimdDirective *OMPSimdDirective::CreateEmpty(const ASTContext &C,
OMPForDirective *OMPForDirective::Create(
const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
- const HelperExprs &Exprs, Expr *TaskRedRef, bool HasCancel) {
+ const HelperExprs &Exprs, Expr *TaskRedRef, bool HasCancel,
+ OpenMPDirectiveKind ParamPrevMappedDirective) {
auto *Dir = createDirective<OMPForDirective>(
C, Clauses, AssociatedStmt, numLoopChildren(CollapsedNum, OMPD_for) + 1,
StartLoc, EndLoc, CollapsedNum);
@@ -366,6 +367,7 @@ OMPForDirective *OMPForDirective::Create(
Dir->setPreInits(Exprs.PreInits);
Dir->setTaskReductionRefExpr(TaskRedRef);
Dir->setHasCancel(HasCancel);
+ Dir->setMappedDirective(ParamPrevMappedDirective);
return Dir;
}
@@ -1515,7 +1517,7 @@ OMPParallelMaskedTaskLoopSimdDirective::CreateEmpty(const ASTContext &C,
OMPDistributeDirective *OMPDistributeDirective::Create(
const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
- const HelperExprs &Exprs) {
+ const HelperExprs &Exprs, OpenMPDirectiveKind ParamPrevMappedDirective) {
auto *Dir = createDirective<OMPDistributeDirective>(
C, Clauses, AssociatedStmt,
numLoopChildren(CollapsedNum, OMPD_distribute), StartLoc, EndLoc,
@@ -1544,6 +1546,7 @@ OMPDistributeDirective *OMPDistributeDirective::Create(
Dir->setDependentInits(Exprs.DependentInits);
Dir->setFinalsConditions(Exprs.FinalsConditions);
Dir->setPreInits(Exprs.PreInits);
+ Dir->setMappedDirective(ParamPrevMappedDirective);
return Dir;
}
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 305ea24383ca0e..3482cafbc74aa1 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -163,6 +163,10 @@ class DSAStackTy {
SourceLocation DefaultAttrLoc;
DefaultmapInfo DefaultmapMap[OMPC_DEFAULTMAP_unknown];
OpenMPDirectiveKind Directive = OMPD_unknown;
+ /// GenericLoopDirective with bind clause is mapped to other directives,
+ /// like for, distribute and simd. Presently, set MappedDirective to
+ /// OMPLoop. This may also be used in a similar way for other constructs.
+ OpenMPDirectiveKind MappedDirective = OMPD_unknown;
DeclarationNameInfo DirectiveName;
Scope *CurScope = nullptr;
DeclContext *Context = nullptr;
@@ -636,6 +640,24 @@ class DSAStackTy {
const SharingMapTy *Top = getTopOfStackOrNull();
return Top ? Top->Directive : OMPD_unknown;
}
+ OpenMPDirectiveKind getMappedDirective() const {
+ const SharingMapTy *Top = getTopOfStackOrNull();
+ return Top ? Top->MappedDirective : OMPD_unknown;
+ }
+ void setCurrentDirective(OpenMPDirectiveKind NewDK) {
+ SharingMapTy *Top = getTopOfStackOrNull();
+ assert(Top &&
+ "Before calling setCurrentDirective Top of Stack not to be NULL.");
+ // Store the old into MappedDirective & assign argument NewDK to Directive.
+ Top->Directive = NewDK;
+ }
+ void setMappedDirective(OpenMPDirectiveKind NewDK) {
+ SharingMapTy *Top = getTopOfStackOrNull();
+ assert(Top &&
+ "Before calling setMappedDirective Top of Stack not to be NULL.");
+ // Store the old into MappedDirective & assign argument NewDK to Directive.
+ Top->MappedDirective = NewDK;
+ }
/// Returns directive kind at specified level.
OpenMPDirectiveKind getDirective(unsigned Level) const {
assert(!isStackEmpty() && "No directive at specified level.");
@@ -5679,7 +5701,8 @@ static CapturedStmt *buildDistanceFunc(Sema &Actions, QualType LogicalTy,
// the step size, rounding-up the effective upper bound ensures that the
// last iteration is included.
// Note that the rounding-up may cause an overflow in a temporry that
- // could be avoided, but would have occurred in a C-style for-loop as well.
+ // could be avoided, but would have occurred in a C-style for-loop as
+ // well.
Expr *Divisor = BuildVarRef(NewStep);
if (Rel == BO_GE || Rel == BO_GT)
Divisor =
@@ -6086,10 +6109,95 @@ processImplicitMapsWithDefaultMappers(Sema &S, DSAStackTy *Stack,
}
}
+bool Sema::mapLoopConstruct(llvm::SmallVector<OMPClause *> &ClausesWithoutBind,
+ ArrayRef<OMPClause *> Clauses,
+ OpenMPBindClauseKind BindKind,
+ OpenMPDirectiveKind &Kind,
+ OpenMPDirectiveKind &PrevMappedDirective) {
+
+ bool UseClausesWithoutBind = false;
+
+ // Restricting to "#pragma omp loop bind"
+ if (getLangOpts().OpenMP >= 50 && Kind == OMPD_loop) {
+ if (BindKind == OMPC_BIND_unknown) {
+ // Setting the enclosing teams or parallel construct for the loop
+ // directive without bind clause.
+ BindKind = OMPC_BIND_thread; // Default bind(thread) if binding is unknown
+
+ const OpenMPDirectiveKind ParentDirective =
+ DSAStack->getParentDirective();
+ if (ParentDirective == OMPD_unknown) {
+ Diag(DSAStack->getDefaultDSALocation(),
+ diag::err_omp_bind_required_on_loop);
+ } else if (ParentDirective == OMPD_parallel ||
+ ParentDirective == OMPD_target_parallel) {
+ BindKind = OMPC_BIND_parallel;
+ } else if (ParentDirective == OMPD_teams ||
+ ParentDirective == OMPD_target_teams) {
+ BindKind = OMPC_BIND_teams;
+ }
+ } else {
+ // bind clause is present, so we should set flag indicating to only
+ // use the clauses that aren't the bind clause for the new directive that
+ // loop is lowered to.
+ UseClausesWithoutBind = true;
+ }
+
+ for (OMPClause *C : Clauses) {
+ // Spec restriction : bind(teams) and reduction not permitted.
+ if (BindKind == OMPC_BIND_teams &&
+ C->getClauseKind() == llvm::omp::Clause::OMPC_reduction)
+ Diag(DSAStack->getDefaultDSALocation(),
+ diag::err_omp_loop_reduction_clause);
+
+ // A new Vector ClausesWithoutBind, which does not contain the bind
+ // clause, for passing to new directive.
+ if (C->getClauseKind() != llvm::omp::Clause::OMPC_bind)
+ ClausesWithoutBind.push_back(C);
+ }
+
+ switch (BindKind) {
+ case OMPC_BIND_parallel:
+ Kind = OMPD_for;
+ DSAStack->setCurrentDirective(OMPD_for);
+ DSAStack->setMappedDirective(OMPD_loop);
+ PrevMappedDirective = OMPD_loop;
+ break;
+ case OMPC_BIND_teams:
+ Kind = OMPD_distribute;
+ DSAStack->setCurrentDirective(OMPD_distribute);
+ DSAStack->setMappedDirective(OMPD_loop);
+ PrevMappedDirective = OMPD_loop;
+ break;
+ case OMPC_BIND_thread:
+ Kind = OMPD_simd;
+ DSAStack->setCurrentDirective(OMPD_simd);
+ DSAStack->setMappedDirective(OMPD_loop);
+ PrevMappedDirective = OMPD_loop;
+ break;
+ case OMPC_BIND_unknown:
+ break;
+ }
+ } else if (PrevMappedDirective == OMPD_loop) {
+ /// An initial pass after recognizing all the statements is done in the
+ /// Parser when the directive OMPD_loop is mapped to OMPD_for,
+ /// OMPD_distribute or OMPD_simd. A second transform pass with call from
+ /// clang::TreeTransform::TransformOMPExecutableDirective() is done
+ /// with the Directive as one of the above mapped directive without
+ /// the bind clause. Then "PrevMappedDirective" stored in the
+ /// OMPExecutableDirective is accessed and hence this else statement.
+
+ DSAStack->setMappedDirective(OMPD_loop);
+ }
+
+ return UseClausesWithoutBind;
+}
+
StmtResult Sema::ActOnOpenMPExecutableDirective(
OpenMPDirectiveKind Kind, const DeclarationNameInfo &DirName,
OpenMPDirectiveKind CancelRegion, ArrayRef<OMPClause *> Clauses,
- Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc) {
+ Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc,
+ OpenMPDirectiveKind PrevMappedDirective) {
StmtResult Res = StmtError();
OpenMPBindClauseKind BindKind = OMPC_BIND_unknown;
if (const OMPBindClause *BC =
@@ -6106,10 +6214,21 @@ StmtResult Sema::ActOnOpenMPExecutableDirective(
isOpenMPTargetDataManagementDirective(Kind)))
Diag(StartLoc, diag::warn_hip_omp_target_directives);
+ llvm::SmallVector<OMPClause *> ClausesWithoutBind;
+ bool UseClausesWithoutBind = false;
+
+ UseClausesWithoutBind = mapLoopConstruct(ClausesWithoutBind, Clauses,
+ BindKind, Kind, PrevMappedDirective);
+
llvm::SmallVector<OMPClause *, 8> ClausesWithImplicit;
VarsWithInheritedDSAType VarsWithInheritedDSA;
bool ErrorFound = false;
- ClausesWithImplicit.append(Clauses.begin(), Clauses.end());
+ if (getLangOpts().OpenMP >= 50 && UseClausesWithoutBind) {
+ ClausesWithImplicit.append(ClausesWithoutBind.begin(),
+ ClausesWithoutBind.end());
+ } else {
+ ClausesWithImplicit.append(Clauses.begin(), Clauses.end());
+ }
if (AStmt && !CurContext->isDependentContext() && Kind != OMPD_atomic &&
Kind != OMPD_critical && Kind != OMPD_section && Kind != OMPD_master &&
Kind != OMPD_masked && !isOpenMPLoopTransformationDirective(Kind)) {
@@ -9203,9 +9322,13 @@ static bool checkOpenMPIterationSpace(
auto *CXXFor = dyn_cast_or_null<CXXForRangeStmt>(S);
// Ranged for is supported only in OpenMP 5.0.
if (!For && (SemaRef.LangOpts.OpenMP <= 45 || !CXXFor)) {
+ OpenMPDirectiveKind DK = (SemaRef.getLangOpts().OpenMP < 50 ||
+ DSA.getMappedDirective() == OMPD_unknown)
+ ? DKind
+ : DSA.getMappedDirective();
SemaRef.Diag(S->getBeginLoc(), diag::err_omp_not_for)
<< (CollapseLoopCountExpr != nullptr || OrderedLoopCountExpr != nullptr)
- << getOpenMPDirectiveName(DKind) << TotalNestedLoopCount
+ << getOpenMPDirectiveName(DK) << TotalNestedLoopCount
<< (CurrentNestedLoopCount > 0) << CurrentNestedLoopCount;
if (TotalNestedLoopCount > 1) {
if (CollapseLoopCountExpr && OrderedLoopCountExpr)
@@ -10320,6 +10443,24 @@ static bool checkSimdlenSafelenSpecified(Sema &S,
return false;
}
+static bool checkGenericLoopLastprivate(Sema &S, ArrayRef<OMPClause *> Clauses,
+ OpenMPDirectiveKind K,
+ DSAStackTy *Stack);
+
+bool Sema::checkLastPrivateForMappedDirectives(ArrayRef<OMPClause *> Clauses) {
+
+ // Check for syntax of lastprivate
+ // Param of the lastprivate have
diff erent meanings in the mapped directives
+ // e.g. "omp loop" Only loop iteration vars are allowed in lastprivate clause
+ // "omp for" lastprivate vars must be shared
+ if (getLangOpts().OpenMP >= 50 &&
+ DSAStack->getMappedDirective() == OMPD_loop &&
+ checkGenericLoopLastprivate(*this, Clauses, OMPD_loop, DSAStack)) {
+ return false;
+ }
+ return true;
+}
+
StmtResult
Sema::ActOnOpenMPSimdDirective(ArrayRef<OMPClause *> Clauses, Stmt *AStmt,
SourceLocation StartLoc, SourceLocation EndLoc,
@@ -10327,6 +10468,9 @@ Sema::ActOnOpenMPSimdDirective(ArrayRef<OMPClause *> Clauses, Stmt *AStmt,
if (!AStmt)
return StmtError();
+ if (!checkLastPrivateForMappedDirectives(Clauses))
+ return StmtError();
+
assert(isa<CapturedStmt>(AStmt) && "Captured statement expected");
OMPLoopBasedDirective::HelperExprs B;
// In presence of clause 'collapse' or 'ordered' with number of loops, it will
@@ -10355,8 +10499,10 @@ Sema::ActOnOpenMPSimdDirective(ArrayRef<OMPClause *> Clauses, Stmt *AStmt,
return StmtError();
setFunctionHasBranchProtectedScope();
- return OMPSimdDirective::Create(Context, StartLoc, EndLoc, NestedLoopCount,
- Clauses, AStmt, B);
+ auto *SimdDirective = OMPSimdDirective::Create(
+ Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B,
+ DSAStack->getMappedDirective());
+ return SimdDirective;
}
StmtResult
@@ -10366,6 +10512,9 @@ Sema::ActOnOpenMPForDirective(ArrayRef<OMPClause *> Clauses, Stmt *AStmt,
if (!AStmt)
return StmtError();
+ if (!checkLastPrivateForMappedDirectives(Clauses))
+ return StmtError();
+
assert(isa<CapturedStmt>(AStmt) && "Captured statement expected");
OMPLoopBasedDirective::HelperExprs B;
// In presence of clause 'collapse' or 'ordered' with number of loops, it will
@@ -10390,10 +10539,11 @@ Sema::ActOnOpenMPForDirective(ArrayRef<OMPClause *> Clauses, Stmt *AStmt,
}
}
- setFunctionHasBranchProtectedScope();
- return OMPForDirective::Create(
+ auto *ForDirective = OMPForDirective::Create(
Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B,
- DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion());
+ DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion(),
+ DSAStack->getMappedDirective());
+ return ForDirective;
}
StmtResult Sema::ActOnOpenMPForSimdDirective(
@@ -13940,6 +14090,9 @@ StmtResult Sema::ActOnOpenMPDistributeDirective(
if (!AStmt)
return StmtError();
+ if (!checkLastPrivateForMappedDirectives(Clauses))
+ return StmtError();
+
assert(isa<CapturedStmt>(AStmt) && "Captured statement expected");
OMPLoopBasedDirective::HelperExprs B;
// In presence of clause 'collapse' with number of loops, it will
@@ -13955,8 +14108,10 @@ StmtResult Sema::ActOnOpenMPDistributeDirective(
"omp for loop exprs were not built");
setFunctionHasBranchProtectedScope();
- return OMPDistributeDirective::Create(Context, StartLoc, EndLoc,
- NestedLoopCount, Clauses, AStmt, B);
+ auto *DistributeDirective = OMPDistributeDirective::Create(
+ Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B,
+ DSAStack->getMappedDirective());
+ return DistributeDirective;
}
StmtResult Sema::ActOnOpenMPDistributeParallelForDirective(
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index a73b54b668a415..d99c1d62223720 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -1645,14 +1645,15 @@ class TreeTransform {
///
/// By default, performs semantic analysis to build the new statement.
/// Subclasses may override this routine to provide
diff erent behavior.
- StmtResult RebuildOMPExecutableDirective(OpenMPDirectiveKind Kind,
- DeclarationNameInfo DirName,
- OpenMPDirectiveKind CancelRegion,
- ArrayRef<OMPClause *> Clauses,
- Stmt *AStmt, SourceLocation StartLoc,
- SourceLocation EndLoc) {
+ StmtResult RebuildOMPExecutableDirective(
+ OpenMPDirectiveKind Kind, DeclarationNameInfo DirName,
+ OpenMPDirectiveKind CancelRegion, ArrayRef<OMPClause *> Clauses,
+ Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc,
+ OpenMPDirectiveKind PrevMappedDirective = OMPD_unknown) {
+
return getSema().ActOnOpenMPExecutableDirective(
- Kind, DirName, CancelRegion, Clauses, AStmt, StartLoc, EndLoc);
+ Kind, DirName, CancelRegion, Clauses, AStmt, StartLoc, EndLoc,
+ PrevMappedDirective);
}
/// Build a new OpenMP 'if' clause.
@@ -8819,7 +8820,8 @@ StmtResult TreeTransform<Derived>::TransformOMPExecutableDirective(
return getDerived().RebuildOMPExecutableDirective(
D->getDirectiveKind(), DirName, CancelRegion, TClauses,
- AssociatedStmt.get(), D->getBeginLoc(), D->getEndLoc());
+ AssociatedStmt.get(), D->getBeginLoc(), D->getEndLoc(),
+ D->getMappedDirective());
}
template <typename Derived>
diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp
index 96307c35ad32c0..e9ad324bed04ff 100644
--- a/clang/lib/Serialization/ASTReaderStmt.cpp
+++ b/clang/lib/Serialization/ASTReaderStmt.cpp
@@ -2325,6 +2325,7 @@ void ASTStmtReader::VisitOMPExecutableDirective(OMPExecutableDirective *E) {
Record.readOMPChildren(E->Data);
E->setLocStart(readSourceLocation());
E->setLocEnd(readSourceLocation());
+ E->setMappedDirective(Record.readEnum<OpenMPDirectiveKind>());
}
void ASTStmtReader::VisitOMPLoopBasedDirective(OMPLoopBasedDirective *D) {
diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp
index 896e24c8a13de6..0c267d35bcf319 100644
--- a/clang/lib/Serialization/ASTWriterStmt.cpp
+++ b/clang/lib/Serialization/ASTWriterStmt.cpp
@@ -2235,6 +2235,7 @@ void ASTStmtWriter::VisitOMPExecutableDirective(OMPExecutableDirective *E) {
Record.writeOMPChildren(E->Data);
Record.AddSourceLocation(E->getBeginLoc());
Record.AddSourceLocation(E->getEndLoc());
+ Record.writeEnum(E->getMappedDirective());
}
void ASTStmtWriter::VisitOMPLoopBasedDirective(OMPLoopBasedDirective *D) {
diff --git a/clang/test/OpenMP/generic_loop_ast_print.cpp b/clang/test/OpenMP/generic_loop_ast_print.cpp
index e3a1eb673a70a8..29c48ffde80923 100644
--- a/clang/test/OpenMP/generic_loop_ast_print.cpp
+++ b/clang/test/OpenMP/generic_loop_ast_print.cpp
@@ -23,7 +23,7 @@
//PRINT: template <typename T, int C> void templ_foo(T t) {
//PRINT: T j, z;
-//PRINT: #pragma omp loop collapse(C) reduction(+: z) lastprivate(j) bind(thread)
+//PRINT: #pragma omp simd collapse(C) reduction(+: z) lastprivate(j)
//PRINT: for (T i = 0; i < t; ++i)
//PRINT: for (j = 0; j < t; ++j)
//PRINT: z += i + j;
@@ -31,20 +31,19 @@
//DUMP: FunctionTemplateDecl{{.*}}templ_foo
//DUMP: TemplateTypeParmDecl{{.*}}T
//DUMP: NonTypeTemplateParmDecl{{.*}}C
-//DUMP: OMPGenericLoopDirective
+//DUMP: OMPSimdDirective
//DUMP: OMPCollapseClause
//DUMP: DeclRefExpr{{.*}}'C' 'int'
//DUMP: OMPReductionClause
//DUMP: DeclRefExpr{{.*}}'z' 'T'
//DUMP: OMPLastprivateClause
//DUMP: DeclRefExpr{{.*}}'j' 'T'
-//DUMP: OMPBindClause
//DUMP: ForStmt
//DUMP: ForStmt
//PRINT: template<> void templ_foo<int, 2>(int t) {
//PRINT: int j, z;
-//PRINT: #pragma omp loop collapse(2) reduction(+: z) lastprivate(j) bind(thread)
+//PRINT: #pragma omp simd collapse(2) reduction(+: z) lastprivate(j)
//PRINT: for (int i = 0; i < t; ++i)
//PRINT: for (j = 0; j < t; ++j)
//PRINT: z += i + j;
@@ -53,7 +52,7 @@
//DUMP: TemplateArgument type 'int'
//DUMP: TemplateArgument integral 2
//DUMP: ParmVarDecl{{.*}}'int':'int'
-//DUMP: OMPGenericLoopDirective
+//DUMP: OMPSimdDirective
//DUMP: OMPCollapseClause
//DUMP: ConstantExpr{{.*}}'int'
//DUMP: value: Int 2
@@ -61,7 +60,6 @@
//DUMP: DeclRefExpr{{.*}}'z' 'int':'int'
//DUMP: OMPLastprivateClause
//DUMP: DeclRefExpr{{.*}}'j' 'int':'int'
-//DUMP: OMPBindClause
//DUMP: ForStmt
template <typename T, int C>
void templ_foo(T t) {
@@ -82,12 +80,12 @@ void test() {
int aaa[1000];
//PRINT: #pragma omp target teams distribute parallel for map(tofrom: MTX)
- //PRINT: #pragma omp loop
+ //PRINT: #pragma omp simd
//DUMP: OMPTargetTeamsDistributeParallelForDirective
//DUMP: CapturedStmt
//DUMP: ForStmt
//DUMP: CompoundStmt
- //DUMP: OMPGenericLoopDirective
+ //DUMP: OMPSimdDirective
#pragma omp target teams distribute parallel for map(MTX)
for (auto i = 0; i < N; ++i) {
#pragma omp loop
@@ -97,11 +95,11 @@ void test() {
}
//PRINT: #pragma omp target teams
- //PRINT: #pragma omp loop
+ //PRINT: #pragma omp distribute
//DUMP: OMPTargetTeamsDirective
//DUMP: CapturedStmt
//DUMP: ForStmt
- //DUMP: OMPGenericLoopDirective
+ //DUMP: OMPDistributeDirective
#pragma omp target teams
for (int i=0; i<1000; ++i) {
#pragma omp loop
@@ -111,8 +109,8 @@ void test() {
}
int j, z, z1;
- //PRINT: #pragma omp loop collapse(2) private(z) lastprivate(j) order(concurrent) reduction(+: z1) bind(parallel)
- //DUMP: OMPGenericLoopDirective
+ //PRINT: #pragma omp for collapse(2) private(z) lastprivate(j) order(concurrent) reduction(+: z1)
+ //DUMP: OMPForDirective
//DUMP: OMPCollapseClause
//DUMP: IntegerLiteral{{.*}}2
//DUMP: OMPPrivateClause
@@ -122,7 +120,6 @@ void test() {
//DUMP: OMPOrderClause
//DUMP: OMPReductionClause
//DUMP-NEXT: DeclRefExpr{{.*}}'z1'
- //DUMP: OMPBindClause
//DUMP: ForStmt
//DUMP: ForStmt
#pragma omp loop collapse(2) private(z) lastprivate(j) order(concurrent) \
@@ -136,10 +133,9 @@ void test() {
}
//PRINT: #pragma omp target teams
- //PRINT: #pragma omp loop bind(teams)
+ //PRINT: #pragma omp distribute
//DUMP: OMPTargetTeamsDirective
- //DUMP: OMPGenericLoopDirective
- //DUMP: OMPBindClause
+ //DUMP: OMPDistributeDirective
//DUMP: ForStmt
#pragma omp target teams
#pragma omp loop bind(teams)
@@ -147,11 +143,10 @@ void test() {
//PRINT: #pragma omp target
//PRINT: #pragma omp teams
- //PRINT: #pragma omp loop bind(teams)
+ //PRINT: #pragma omp distribute
//DUMP: OMPTargetDirective
//DUMP: OMPTeamsDirective
- //DUMP: OMPGenericLoopDirective
- //DUMP: OMPBindClause
+ //DUMP: OMPDistributeDirective
//DUMP: ForStmt
#pragma omp target
#pragma omp teams
@@ -159,17 +154,6 @@ void test() {
for (auto i = 0; i < N; ++i) { }
}
-//PRINT: void nobindingfunc() {
-//DUMP: FunctionDecl {{.*}}nobindingfunc 'void ()'
-void nobindingfunc()
-{
- //PRINT: #pragma omp loop
- //DUMP: OMPGenericLoopDirective
- //DUMP: ForStmt
- #pragma omp loop
- for (int i=0; i<10; ++i) { }
-}
-
void bar()
{
templ_foo<int,2>(8);
diff --git a/clang/test/OpenMP/generic_loop_codegen.cpp b/clang/test/OpenMP/generic_loop_codegen.cpp
index e73f457ca583cd..f58a71c7fc4e9c 100644
--- a/clang/test/OpenMP/generic_loop_codegen.cpp
+++ b/clang/test/OpenMP/generic_loop_codegen.cpp
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]"
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name tmp2 --version 2
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp %s
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
@@ -20,98 +20,242 @@ void foo(int t) {
z += i+j;
}
#endif
-// IR-LABEL: define {{[^@]+}}@_Z3fooi
+// IR-LABEL: define dso_local void {{[_A-Za-z0-9?@]*}}foo{{[@A-Za-z]*}}
// IR-SAME: (i32 noundef [[T:%.*]]) #[[ATTR0:[0-9]+]] {
// IR-NEXT: entry:
// IR-NEXT: [[T_ADDR:%.*]] = alloca i32, align 4
// IR-NEXT: [[I:%.*]] = alloca i32, align 4
// IR-NEXT: [[J:%.*]] = alloca i32, align 4
// IR-NEXT: [[Z:%.*]] = alloca i32, align 4
-// IR-NEXT: [[I1:%.*]] = alloca i32, align 4
+// IR-NEXT: [[TMP:%.*]] = alloca i32, align 4
+// IR-NEXT: [[TMP2TMP1:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8
+// IR-NEXT: [[I8:%.*]] = alloca i32, align 4
+// IR-NEXT: [[J9:%.*]] = alloca i32, align 4
+// IR-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8
+// IR-NEXT: [[I11:%.*]] = alloca i32, align 4
+// IR-NEXT: [[J12:%.*]] = alloca i32, align 4
+// IR-NEXT: [[Z13:%.*]] = alloca i32, align 4
// IR-NEXT: store i32 [[T]], ptr [[T_ADDR]], align 4
-// IR-NEXT: store i32 0, ptr [[I1]], align 4
-// IR-NEXT: br label [[FOR_COND:%.*]]
-// IR: for.cond:
-// IR-NEXT: [[TMP0:%.*]] = load i32, ptr [[I1]], align 4
+// IR-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_ADDR]], align 4
+// IR-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4
// IR-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_ADDR]], align 4
-// IR-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], [[TMP1]]
-// IR-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END8:%.*]]
-// IR: for.body:
-// IR-NEXT: store i32 0, ptr [[J]], align 4
-// IR-NEXT: br label [[FOR_COND2:%.*]]
-// IR: for.cond2:
-// IR-NEXT: [[TMP2:%.*]] = load i32, ptr [[J]], align 4
-// IR-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_ADDR]], align 4
-// IR-NEXT: [[CMP3:%.*]] = icmp slt i32 [[TMP2]], [[TMP3]]
-// IR-NEXT: br i1 [[CMP3]], label [[FOR_BODY4:%.*]], label [[FOR_END:%.*]]
-// IR: for.body4:
-// IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[I1]], align 4
-// IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[J]], align 4
-// IR-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP5]]
-// IR-NEXT: [[TMP6:%.*]] = load i32, ptr [[Z]], align 4
-// IR-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP6]], [[ADD]]
-// IR-NEXT: store i32 [[ADD5]], ptr [[Z]], align 4
-// IR-NEXT: br label [[FOR_INC:%.*]]
-// IR: for.inc:
-// IR-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4
-// IR-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1
-// IR-NEXT: store i32 [[INC]], ptr [[J]], align 4
-// IR-NEXT: br label [[FOR_COND2]], !llvm.loop [[LOOP3:![0-9]+]]
-// IR: for.end:
-// IR-NEXT: br label [[FOR_INC6:%.*]]
-// IR: for.inc6:
-// IR-NEXT: [[TMP8:%.*]] = load i32, ptr [[I1]], align 4
-// IR-NEXT: [[INC7:%.*]] = add nsw i32 [[TMP8]], 1
-// IR-NEXT: store i32 [[INC7]], ptr [[I1]], align 4
-// IR-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
-// IR: for.end8:
+// IR-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_2]], align 4
+// IR-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
+// IR-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// IR-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64
+// IR-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// IR-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP3]], 0
+// IR-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1
+// IR-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64
+// IR-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]]
+// IR-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1
+// IR-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8
+// IR-NEXT: store i32 0, ptr [[I8]], align 4
+// IR-NEXT: store i32 0, ptr [[J9]], align 4
+// IR-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]]
+// IR-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[SIMD_IF_END:%.*]]
+// IR: land.lhs.true:
+// IR-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// IR-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP5]]
+// IR-NEXT: br i1 [[CMP10]], label [[SIMD_IF_THEN:%.*]], label [[SIMD_IF_END]]
+// IR: simd.if.then:
+// IR-NEXT: store i64 0, ptr [[DOTOMP_IV]], align 8
+// IR-NEXT: store i32 0, ptr [[Z13]], align 4
+// IR-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
+// IR: omp.inner.for.cond:
+// IR-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3:![0-9]+]]
+// IR-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8, !llvm.access.group [[ACC_GRP3]]
+// IR-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP7]], 1
+// IR-NEXT: [[CMP14:%.*]] = icmp slt i64 [[TMP6]], [[ADD]]
+// IR-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// IR: omp.inner.for.body:
+// IR-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]]
+// IR-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]]
+// IR-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP9]], 0
+// IR-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1
+// IR-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]]
+// IR-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64
+// IR-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP8]], [[CONV18]]
+// IR-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1
+// IR-NEXT: [[ADD21:%.*]] = add nsw i64 0, [[MUL20]]
+// IR-NEXT: [[CONV22:%.*]] = trunc i64 [[ADD21]] to i32
+// IR-NEXT: store i32 [[CONV22]], ptr [[I11]], align 4, !llvm.access.group [[ACC_GRP3]]
+// IR-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]]
+// IR-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]]
+// IR-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]]
+// IR-NEXT: [[SUB23:%.*]] = sub nsw i32 [[TMP12]], 0
+// IR-NEXT: [[DIV24:%.*]] = sdiv i32 [[SUB23]], 1
+// IR-NEXT: [[MUL25:%.*]] = mul nsw i32 1, [[DIV24]]
+// IR-NEXT: [[CONV26:%.*]] = sext i32 [[MUL25]] to i64
+// IR-NEXT: [[DIV27:%.*]] = sdiv i64 [[TMP11]], [[CONV26]]
+// IR-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]]
+// IR-NEXT: [[SUB28:%.*]] = sub nsw i32 [[TMP13]], 0
+// IR-NEXT: [[DIV29:%.*]] = sdiv i32 [[SUB28]], 1
+// IR-NEXT: [[MUL30:%.*]] = mul nsw i32 1, [[DIV29]]
+// IR-NEXT: [[CONV31:%.*]] = sext i32 [[MUL30]] to i64
+// IR-NEXT: [[MUL32:%.*]] = mul nsw i64 [[DIV27]], [[CONV31]]
+// IR-NEXT: [[SUB33:%.*]] = sub nsw i64 [[TMP10]], [[MUL32]]
+// IR-NEXT: [[MUL34:%.*]] = mul nsw i64 [[SUB33]], 1
+// IR-NEXT: [[ADD35:%.*]] = add nsw i64 0, [[MUL34]]
+// IR-NEXT: [[CONV36:%.*]] = trunc i64 [[ADD35]] to i32
+// IR-NEXT: store i32 [[CONV36]], ptr [[J12]], align 4, !llvm.access.group [[ACC_GRP3]]
+// IR-NEXT: [[TMP14:%.*]] = load i32, ptr [[I11]], align 4, !llvm.access.group [[ACC_GRP3]]
+// IR-NEXT: [[TMP15:%.*]] = load i32, ptr [[J12]], align 4, !llvm.access.group [[ACC_GRP3]]
+// IR-NEXT: [[ADD37:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
+// IR-NEXT: [[TMP16:%.*]] = load i32, ptr [[Z13]], align 4, !llvm.access.group [[ACC_GRP3]]
+// IR-NEXT: [[ADD38:%.*]] = add nsw i32 [[TMP16]], [[ADD37]]
+// IR-NEXT: store i32 [[ADD38]], ptr [[Z13]], align 4, !llvm.access.group [[ACC_GRP3]]
+// IR-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
+// IR: omp.body.continue:
+// IR-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
+// IR: omp.inner.for.inc:
+// IR-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]]
+// IR-NEXT: [[ADD39:%.*]] = add nsw i64 [[TMP17]], 1
+// IR-NEXT: store i64 [[ADD39]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]]
+// IR-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]]
+// IR: omp.inner.for.end:
+// IR-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT: [[SUB40:%.*]] = sub nsw i32 [[TMP18]], 0
+// IR-NEXT: [[DIV41:%.*]] = sdiv i32 [[SUB40]], 1
+// IR-NEXT: [[MUL42:%.*]] = mul nsw i32 [[DIV41]], 1
+// IR-NEXT: [[ADD43:%.*]] = add nsw i32 0, [[MUL42]]
+// IR-NEXT: store i32 [[ADD43]], ptr [[I11]], align 4
+// IR-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// IR-NEXT: [[SUB44:%.*]] = sub nsw i32 [[TMP19]], 0
+// IR-NEXT: [[DIV45:%.*]] = sdiv i32 [[SUB44]], 1
+// IR-NEXT: [[MUL46:%.*]] = mul nsw i32 [[DIV45]], 1
+// IR-NEXT: [[ADD47:%.*]] = add nsw i32 0, [[MUL46]]
+// IR-NEXT: store i32 [[ADD47]], ptr [[J]], align 4
+// IR-NEXT: [[TMP20:%.*]] = load i32, ptr [[Z]], align 4
+// IR-NEXT: [[TMP21:%.*]] = load i32, ptr [[Z13]], align 4
+// IR-NEXT: [[ADD48:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
+// IR-NEXT: store i32 [[ADD48]], ptr [[Z]], align 4
+// IR-NEXT: br label [[SIMD_IF_END]]
+// IR: simd.if.end:
// IR-NEXT: ret void
//
//
-// IR-PCH-LABEL: define {{[^@]+}}@_Z3fooi
+// IR-PCH-LABEL: define dso_local void {{[_A-Za-z0-9?@]*}}foo{{[@A-Za-z]*}}
// IR-PCH-SAME: (i32 noundef [[T:%.*]]) #[[ATTR0:[0-9]+]] {
// IR-PCH-NEXT: entry:
// IR-PCH-NEXT: [[T_ADDR:%.*]] = alloca i32, align 4
// IR-PCH-NEXT: [[I:%.*]] = alloca i32, align 4
// IR-PCH-NEXT: [[J:%.*]] = alloca i32, align 4
// IR-PCH-NEXT: [[Z:%.*]] = alloca i32, align 4
-// IR-PCH-NEXT: [[I1:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[TMP:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[TMP2TMP1:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[I8:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[J9:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT: [[I11:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[J12:%.*]] = alloca i32, align 4
+// IR-PCH-NEXT: [[Z13:%.*]] = alloca i32, align 4
// IR-PCH-NEXT: store i32 [[T]], ptr [[T_ADDR]], align 4
-// IR-PCH-NEXT: store i32 0, ptr [[I1]], align 4
-// IR-PCH-NEXT: br label [[FOR_COND:%.*]]
-// IR-PCH: for.cond:
-// IR-PCH-NEXT: [[TMP0:%.*]] = load i32, ptr [[I1]], align 4
+// IR-PCH-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_ADDR]], align 4
+// IR-PCH-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR_]], align 4
// IR-PCH-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_ADDR]], align 4
-// IR-PCH-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], [[TMP1]]
-// IR-PCH-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END8:%.*]]
-// IR-PCH: for.body:
-// IR-PCH-NEXT: store i32 0, ptr [[J]], align 4
-// IR-PCH-NEXT: br label [[FOR_COND2:%.*]]
-// IR-PCH: for.cond2:
-// IR-PCH-NEXT: [[TMP2:%.*]] = load i32, ptr [[J]], align 4
-// IR-PCH-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_ADDR]], align 4
-// IR-PCH-NEXT: [[CMP3:%.*]] = icmp slt i32 [[TMP2]], [[TMP3]]
-// IR-PCH-NEXT: br i1 [[CMP3]], label [[FOR_BODY4:%.*]], label [[FOR_END:%.*]]
-// IR-PCH: for.body4:
-// IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[I1]], align 4
-// IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[J]], align 4
-// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP5]]
-// IR-PCH-NEXT: [[TMP6:%.*]] = load i32, ptr [[Z]], align 4
-// IR-PCH-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP6]], [[ADD]]
-// IR-PCH-NEXT: store i32 [[ADD5]], ptr [[Z]], align 4
-// IR-PCH-NEXT: br label [[FOR_INC:%.*]]
-// IR-PCH: for.inc:
-// IR-PCH-NEXT: [[TMP7:%.*]] = load i32, ptr [[J]], align 4
-// IR-PCH-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1
-// IR-PCH-NEXT: store i32 [[INC]], ptr [[J]], align 4
-// IR-PCH-NEXT: br label [[FOR_COND2]], !llvm.loop [[LOOP3:![0-9]+]]
-// IR-PCH: for.end:
-// IR-PCH-NEXT: br label [[FOR_INC6:%.*]]
-// IR-PCH: for.inc6:
-// IR-PCH-NEXT: [[TMP8:%.*]] = load i32, ptr [[I1]], align 4
-// IR-PCH-NEXT: [[INC7:%.*]] = add nsw i32 [[TMP8]], 1
-// IR-PCH-NEXT: store i32 [[INC7]], ptr [[I1]], align 4
-// IR-PCH-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
-// IR-PCH: for.end8:
+// IR-PCH-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_2]], align 4
+// IR-PCH-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-PCH-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0
+// IR-PCH-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1
+// IR-PCH-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64
+// IR-PCH-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// IR-PCH-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP3]], 0
+// IR-PCH-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1
+// IR-PCH-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64
+// IR-PCH-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]]
+// IR-PCH-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1
+// IR-PCH-NEXT: store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8
+// IR-PCH-NEXT: store i32 0, ptr [[I8]], align 4
+// IR-PCH-NEXT: store i32 0, ptr [[J9]], align 4
+// IR-PCH-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-PCH-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP4]]
+// IR-PCH-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[SIMD_IF_END:%.*]]
+// IR-PCH: land.lhs.true:
+// IR-PCH-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// IR-PCH-NEXT: [[CMP10:%.*]] = icmp slt i32 0, [[TMP5]]
+// IR-PCH-NEXT: br i1 [[CMP10]], label [[SIMD_IF_THEN:%.*]], label [[SIMD_IF_END]]
+// IR-PCH: simd.if.then:
+// IR-PCH-NEXT: store i64 0, ptr [[DOTOMP_IV]], align 8
+// IR-PCH-NEXT: store i32 0, ptr [[Z13]], align 4
+// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
+// IR-PCH: omp.inner.for.cond:
+// IR-PCH-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3:![0-9]+]]
+// IR-PCH-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8, !llvm.access.group [[ACC_GRP3]]
+// IR-PCH-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP7]], 1
+// IR-PCH-NEXT: [[CMP14:%.*]] = icmp slt i64 [[TMP6]], [[ADD]]
+// IR-PCH-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// IR-PCH: omp.inner.for.body:
+// IR-PCH-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]]
+// IR-PCH-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]]
+// IR-PCH-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP9]], 0
+// IR-PCH-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1
+// IR-PCH-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]]
+// IR-PCH-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64
+// IR-PCH-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP8]], [[CONV18]]
+// IR-PCH-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1
+// IR-PCH-NEXT: [[ADD21:%.*]] = add nsw i64 0, [[MUL20]]
+// IR-PCH-NEXT: [[CONV22:%.*]] = trunc i64 [[ADD21]] to i32
+// IR-PCH-NEXT: store i32 [[CONV22]], ptr [[I11]], align 4, !llvm.access.group [[ACC_GRP3]]
+// IR-PCH-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]]
+// IR-PCH-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]]
+// IR-PCH-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]]
+// IR-PCH-NEXT: [[SUB23:%.*]] = sub nsw i32 [[TMP12]], 0
+// IR-PCH-NEXT: [[DIV24:%.*]] = sdiv i32 [[SUB23]], 1
+// IR-PCH-NEXT: [[MUL25:%.*]] = mul nsw i32 1, [[DIV24]]
+// IR-PCH-NEXT: [[CONV26:%.*]] = sext i32 [[MUL25]] to i64
+// IR-PCH-NEXT: [[DIV27:%.*]] = sdiv i64 [[TMP11]], [[CONV26]]
+// IR-PCH-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]]
+// IR-PCH-NEXT: [[SUB28:%.*]] = sub nsw i32 [[TMP13]], 0
+// IR-PCH-NEXT: [[DIV29:%.*]] = sdiv i32 [[SUB28]], 1
+// IR-PCH-NEXT: [[MUL30:%.*]] = mul nsw i32 1, [[DIV29]]
+// IR-PCH-NEXT: [[CONV31:%.*]] = sext i32 [[MUL30]] to i64
+// IR-PCH-NEXT: [[MUL32:%.*]] = mul nsw i64 [[DIV27]], [[CONV31]]
+// IR-PCH-NEXT: [[SUB33:%.*]] = sub nsw i64 [[TMP10]], [[MUL32]]
+// IR-PCH-NEXT: [[MUL34:%.*]] = mul nsw i64 [[SUB33]], 1
+// IR-PCH-NEXT: [[ADD35:%.*]] = add nsw i64 0, [[MUL34]]
+// IR-PCH-NEXT: [[CONV36:%.*]] = trunc i64 [[ADD35]] to i32
+// IR-PCH-NEXT: store i32 [[CONV36]], ptr [[J12]], align 4, !llvm.access.group [[ACC_GRP3]]
+// IR-PCH-NEXT: [[TMP14:%.*]] = load i32, ptr [[I11]], align 4, !llvm.access.group [[ACC_GRP3]]
+// IR-PCH-NEXT: [[TMP15:%.*]] = load i32, ptr [[J12]], align 4, !llvm.access.group [[ACC_GRP3]]
+// IR-PCH-NEXT: [[ADD37:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
+// IR-PCH-NEXT: [[TMP16:%.*]] = load i32, ptr [[Z13]], align 4, !llvm.access.group [[ACC_GRP3]]
+// IR-PCH-NEXT: [[ADD38:%.*]] = add nsw i32 [[TMP16]], [[ADD37]]
+// IR-PCH-NEXT: store i32 [[ADD38]], ptr [[Z13]], align 4, !llvm.access.group [[ACC_GRP3]]
+// IR-PCH-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
+// IR-PCH: omp.body.continue:
+// IR-PCH-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
+// IR-PCH: omp.inner.for.inc:
+// IR-PCH-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]]
+// IR-PCH-NEXT: [[ADD39:%.*]] = add nsw i64 [[TMP17]], 1
+// IR-PCH-NEXT: store i64 [[ADD39]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]]
+// IR-PCH-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]]
+// IR-PCH: omp.inner.for.end:
+// IR-PCH-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-PCH-NEXT: [[SUB40:%.*]] = sub nsw i32 [[TMP18]], 0
+// IR-PCH-NEXT: [[DIV41:%.*]] = sdiv i32 [[SUB40]], 1
+// IR-PCH-NEXT: [[MUL42:%.*]] = mul nsw i32 [[DIV41]], 1
+// IR-PCH-NEXT: [[ADD43:%.*]] = add nsw i32 0, [[MUL42]]
+// IR-PCH-NEXT: store i32 [[ADD43]], ptr [[I11]], align 4
+// IR-PCH-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// IR-PCH-NEXT: [[SUB44:%.*]] = sub nsw i32 [[TMP19]], 0
+// IR-PCH-NEXT: [[DIV45:%.*]] = sdiv i32 [[SUB44]], 1
+// IR-PCH-NEXT: [[MUL46:%.*]] = mul nsw i32 [[DIV45]], 1
+// IR-PCH-NEXT: [[ADD47:%.*]] = add nsw i32 0, [[MUL46]]
+// IR-PCH-NEXT: store i32 [[ADD47]], ptr [[J]], align 4
+// IR-PCH-NEXT: [[TMP20:%.*]] = load i32, ptr [[Z]], align 4
+// IR-PCH-NEXT: [[TMP21:%.*]] = load i32, ptr [[Z13]], align 4
+// IR-PCH-NEXT: [[ADD48:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
+// IR-PCH-NEXT: store i32 [[ADD48]], ptr [[Z]], align 4
+// IR-PCH-NEXT: br label [[SIMD_IF_END]]
+// IR-PCH: simd.if.end:
// IR-PCH-NEXT: ret void
//
diff --git a/clang/test/OpenMP/loop_bind_codegen.cpp b/clang/test/OpenMP/loop_bind_codegen.cpp
new file mode 100644
index 00000000000000..69c12a94ffdb99
--- /dev/null
+++ b/clang/test/OpenMP/loop_bind_codegen.cpp
@@ -0,0 +1,133 @@
+// expected-no-diagnostics
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
+
+
+#define NNN 50
+int aaa[NNN];
+
+void parallel_loop() {
+ #pragma omp parallel
+ {
+ #pragma omp loop bind(parallel)
+ for (int j = 0 ; j < NNN ; j++) {
+ aaa[j] = j*NNN;
+ }
+ }
+}
+
+void parallel_loop_orphan() {
+ #pragma omp loop bind(parallel)
+ for (int j = 0 ; j < NNN ; j++) {
+ aaa[j] = j*NNN;
+ }
+}
+
+
+void teams_loop() {
+ #pragma omp teams
+ {
+ #pragma omp loop bind(teams)
+ for (int j = 0 ; j < NNN ; j++) {
+ aaa[j] = j*NNN;
+ }
+ }
+}
+
+void thread_loop() {
+ #pragma omp parallel
+ {
+ #pragma omp loop bind(thread)
+ for (int j = 0 ; j < NNN ; j++) {
+ aaa[j] = j*NNN;
+ }
+ }
+}
+
+void thread_loop_orphan() {
+ #pragma omp loop bind(thread)
+ for (int j = 0 ; j < NNN ; j++) {
+ aaa[j] = j*NNN;
+ }
+}
+
+int main() {
+ parallel_loop();
+ parallel_loop_orphan();
+ teams_loop();
+ thread_loop();
+ thread_loop_orphan();
+
+ return 0;
+}
+// CHECK-LABEL: define dso_local void @{{.+}}parallel_loop
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 0, ptr {{.+}}parallel_loop{{.+}}.omp_outlined{{.*}})
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: define internal void {{.+}}parallel_loop{{.+}}.omp_outlined
+// CHECK-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] {
+// CHECK: call void @__kmpc_for_static_init_4
+// CHECK: omp.inner.for.body:
+// CHECK: omp.loop.exit:
+// CHECK-NEXT: call void @__kmpc_for_static_fini
+// CHECK-NEXT: call void @__kmpc_barrier
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: define dso_local void {{.+}}parallel_loop_orphan{{.+}}
+// CHECK-NEXT: entry:
+// CHECK: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num
+// CHECK: call void @__kmpc_for_static_init_4
+// CHECK: omp.inner.for.body:
+// CHECK: omp.inner.for.end:
+// CHECK: omp.loop.exit:
+// CHECK-NEXT: call void @__kmpc_for_static_fini
+// CHECK-NEXT: call void @__kmpc_barrier
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: define dso_local void {{.+}}teams_loop{{.+}}
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr {{.+}}teams_loop{{.+}}.omp_outlined{{.*}})
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: define internal void {{.+}}teams_loop{{.+}}.omp_outlined{{.+}}
+// CHECK-NEXT: entry:
+// CHECK: call void @__kmpc_for_static_init_4
+// CHECK: omp.inner.for.body:
+// CHECK: omp.loop.exit:
+// CHECK-NEXT: call void @__kmpc_for_static_fini
+//
+//
+// CHECK-LABEL: define dso_local void {{.+}}thread_loop{{.+}}
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 0, ptr {{.+}}thread_loop{{.+}}.omp_outlined{{.*}})
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: define internal void {{.+}}thread_loop{{.+}}.omp_outlined{{.+}}
+// CHECK-NEXT: entry:
+// CHECK: omp.inner.for.body:
+// CHECK: omp.inner.for.end:
+//
+//
+// CHECK-LABEL: define dso_local void {{.+}}thread_loop_orphan{{.+}}
+// CHECK-NEXT: entry:
+// CHECK: omp.inner.for.cond:
+// CHECK: omp.inner.for.body:
+// CHECK: omp.inner.for.end:
+//
+//
+// CHECK-LABEL: define {{.+}}main{{.+}}
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
+// CHECK: call void {{.+}}parallel_loop{{.+}}()
+// CHECK-NEXT: call void {{.+}}parallel_loop_orphan{{.+}}()
+// CHECK-NEXT: call void {{.+}}teams_loop{{.+}}()
+// CHECK-NEXT: call void {{.+}}thread_loop{{.+}}()
+// CHECK-NEXT: call void {{.+}}thread_loop_orphan{{.+}}()
+// CHECK-NEXT: ret i32 0
+//
diff --git a/clang/test/OpenMP/loop_bind_enclosed.cpp b/clang/test/OpenMP/loop_bind_enclosed.cpp
new file mode 100644
index 00000000000000..dc62793ec3c303
--- /dev/null
+++ b/clang/test/OpenMP/loop_bind_enclosed.cpp
@@ -0,0 +1,190 @@
+// expected-no-diagnostics
+// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
+
+#define NNN 50
+int aaa[NNN];
+
+void parallel_taskgroup_loop() {
+ #pragma omp parallel
+ {
+ #pragma omp taskgroup
+ for (int i = 0 ; i < 2 ; i++) {
+ #pragma omp loop
+ for (int j = 0 ; j < NNN ; j++) {
+ aaa[j] = j*NNN;
+ }
+ }
+ }
+}
+
+void parallel_taskwait_loop() {
+ #pragma omp parallel
+ {
+ #pragma omp taskwait
+ for (int i = 0 ; i < 2 ; i++) {
+ #pragma omp loop
+ for (int j = 0 ; j < NNN ; j++) {
+ aaa[j] = j*NNN;
+ }
+ }
+ }
+}
+
+void parallel_single_loop() {
+ #pragma omp parallel
+ {
+ for (int i = 0 ; i < 2 ; i++) {
+ #pragma omp single
+ #pragma omp loop
+ for (int j = 0 ; j < NNN ; j++) {
+ aaa[j] = j*NNN;
+ }
+ }
+ }
+}
+
+void parallel_order_loop() {
+ #pragma omp parallel
+ {
+ #pragma omp for order(concurrent)
+ {
+ for (int i = 0 ; i < 2 ; i++) {
+ #pragma omp loop
+ for (int j = 0 ; j < NNN ; j++) {
+ aaa[j] = j*NNN;
+ }
+ }
+ }
+ }
+}
+
+
+void parallel_cancel_loop(bool flag) {
+ #pragma omp ordered
+ for (int i = 0 ; i < 2 ; i++) {
+ #pragma omp parallel
+ {
+ #pragma omp cancel parallel if(flag)
+ aaa[0] = 0;
+ #pragma omp loop bind(parallel)
+ for (int j = 0 ; j < NNN ; j++) {
+ aaa[j] = j*NNN;
+ }
+ }
+ }
+}
+
+int
+main(int argc, char *argv[]) {
+ parallel_taskgroup_loop();
+ parallel_taskwait_loop();
+ parallel_single_loop();
+ parallel_order_loop();
+ parallel_cancel_loop(true);
+ parallel_cancel_loop(false);
+
+ return 0;
+}
+// CHECK-LABEL: define dso_local void {{.+}}parallel_taskgroup_loop{{.+}} {
+// CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 0, ptr {{.+}}parallel_taskgroup_loop{{.+}}.omp_outlined{{.*}}
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: define internal void {{.+}}parallel_taskgroup_loop{{.+}}.omp_outlined{{.+}} {
+// CHECK: call void @__kmpc_taskgroup
+// CHECK: for.body:
+// CHECK: omp.inner.for.cond:
+// CHECK: omp.inner.for.body:
+// CHECK: omp.inner.for.inc:
+// CHECK: omp.inner.for.end:
+// CHECK: for.end:
+// CHECK: call void @__kmpc_end_taskgroup
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: define dso_local void {{.+}}parallel_taskwait_loop{{.+}} {
+// CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr {{.+}}parallel_taskwait_loop{{.+}}.omp_outlined{{.*}})
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: define internal void {{.+}}parallel_taskwait_loop{{.+}}.omp_outlined{{.+}} {
+// CHECK: [[TMP2:%.*]] = call i32 @__kmpc_omp_taskwait
+// CHECK: for.cond:
+// CHECK: for.body:
+// CHECK: call void @__kmpc_for_static_init_4
+// CHECK: omp.inner.for.cond:
+// CHECK: omp.inner.for.body:
+// CHECK: omp.body.continue:
+// CHECK: omp.inner.for.inc:
+// CHECK: omp.inner.for.end:
+// CHECK: omp.loop.exit:
+// CHECK: call void @__kmpc_for_static_fini
+// CHECK: call void @__kmpc_barrier
+// CHECK: for.end:
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: define dso_local void {{.+}}parallel_single_loop{{.+}} {
+// CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr {{.+}}parallel_single_loop{{.+}}.omp_outlined{{.*}})
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: define internal void {{.+}}parallel_single_loop{{.+}}.omp_outlined{{.+}} {
+// CHECK: for.body:
+// CHECK: [[TMP3:%.*]] = call i32 @__kmpc_single
+// CHECK: omp.inner.for.end:
+// CHECK: call void @__kmpc_end_single
+// CHECK: omp_if.end:
+// CHECK: call void @__kmpc_barrier
+// CHECK: for.end:
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: define dso_local void {{.+}}parallel_order_loop{{.+}} {
+// CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr {{.+}}parallel_order_loop{{.+}}.omp_outlined{{.*}})
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: define internal void {{.+}}parallel_order_loop{{.+}}.omp_outlined{{.+}} {
+// CHECK: call void @__kmpc_for_static_init_4
+// CHECK: omp.inner.for.body:
+// CHECK: omp.loop.exit:
+// CHECK: call void @__kmpc_for_static_fini
+// CHECK: call void @__kmpc_barrier
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: define dso_local void {{.+}}parallel_cancel_loop{{.+}} {
+// CHECK: [[FLAG_ADDR:%.*]] = alloca i8,
+// CHECK: call void @__kmpc_ordered
+// CHECK: for.body:
+// CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr {{.+}}parallel_cancel_loop{{.+}}.omp_outlined{{.*}}, ptr [[FLAG_ADDR]])
+// CHECK: for.end:
+// CHECK: call void @__kmpc_end_ordered
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: define internal void {{.+}}parallel_cancel_loop{{.+}}.omp_outlined{{.+}} {
+// CHECK: omp_if.then:
+// CHECK: [[TMP4:%.*]] = call i32 @__kmpc_cancel
+// CHECK: .cancel.exit:
+// CHECK: [[TMP8:%.*]] = call i32 @__kmpc_cancel_barrier
+// CHECK: omp_if.end:
+// CHECK: call void @__kmpc_for_static_init_4
+// CHECK: omp.inner.for.body:
+// CHECK: omp.loop.exit:
+// CHECK: call void @__kmpc_for_static_fini
+// CHECK: [[TMP24:%.*]] = call i32 @__kmpc_cancel_barrier
+// CHECK: .cancel.continue5:
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: define dso_local noundef i32 @main{{.+}} {
+// CHECK: call void {{.+}}parallel_taskgroup_loop{{.+}}()
+// CHECK-NEXT: call void {{.+}}parallel_taskwait_loop{{.+}}()
+// CHECK-NEXT: call void {{.+}}parallel_single_loop{{.+}}()
+// CHECK-NEXT: call void {{.+}}parallel_order_loop{{.+}}()
+// CHECK-NEXT: call void {{.+}}parallel_cancel_loop{{.+}}(i1 noundef zeroext true)
+// CHECK-NEXT: call void {{.+}}parallel_cancel_loop{{.+}}(i1 noundef zeroext false)
+//
diff --git a/clang/test/OpenMP/loop_bind_messages.cpp b/clang/test/OpenMP/loop_bind_messages.cpp
new file mode 100644
index 00000000000000..f7fdf289714328
--- /dev/null
+++ b/clang/test/OpenMP/loop_bind_messages.cpp
@@ -0,0 +1,76 @@
+#ifndef HEADER
+#define HEADER
+// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -verify %s
+
+#define NNN 50
+int aaa[NNN];
+
+void parallel_loop() {
+ #pragma omp parallel
+ {
+ #pragma omp loop
+ for (int j = 0 ; j < NNN ; j++) {
+ aaa[j] = j*NNN;
+ }
+ }
+}
+
+void teams_loop() {
+ int var1, var2;
+
+ #pragma omp teams
+ {
+ #pragma omp loop bind(teams)
+ for (int j = 0 ; j < NNN ; j++) {
+ aaa[j] = j*NNN;
+ }
+
+ #pragma omp loop bind(teams) collapse(2) private(var1)
+ for (int i = 0 ; i < 3 ; i++) {
+ for (int j = 0 ; j < NNN ; j++) {
+ var1 += aaa[j];
+ }
+ }
+ }
+}
+
+void orphan_loop_with_bind() {
+ #pragma omp loop bind(parallel)
+ for (int j = 0 ; j < NNN ; j++) {
+ aaa[j] = j*NNN;
+ }
+}
+
+void orphan_loop_no_bind() {
+ #pragma omp loop // expected-error{{expected 'bind' clause for 'loop' construct without an enclosing OpenMP construct}}
+ for (int j = 0 ; j < NNN ; j++) {
+ aaa[j] = j*NNN;
+ }
+}
+
+void teams_loop_reduction() {
+ int total = 0;
+
+ #pragma omp teams
+ {
+ #pragma omp loop bind(teams)
+ for (int j = 0 ; j < NNN ; j++) {
+ aaa[j] = j*NNN;
+ }
+
+ #pragma omp loop bind(teams) reduction(+:total) // expected-error{{'reduction' clause not allowed with '#pragma omp loop bind(teams)'}}
+ for (int j = 0 ; j < NNN ; j++) {
+ total+=aaa[j];
+ }
+ }
+}
+
+int main(int argc, char *argv[]) {
+ parallel_loop();
+ teams_loop();
+ orphan_loop_with_bind();
+ orphan_loop_no_bind();
+ teams_loop_reduction();
+}
+
+#endif
diff --git a/clang/test/OpenMP/nested_loop_codegen.cpp b/clang/test/OpenMP/nested_loop_codegen.cpp
index ba8d0347b8c7e2..02f474c8e1a45b 100644
--- a/clang/test/OpenMP/nested_loop_codegen.cpp
+++ b/clang/test/OpenMP/nested_loop_codegen.cpp
@@ -58,6 +58,12 @@ int inline_decl() {
// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
@@ -66,35 +72,27 @@ int inline_decl() {
// CHECK1-NEXT: store i32 0, ptr [[TMP0]], align 4
// CHECK1-NEXT: br label [[FOR_COND:%.*]]
// CHECK1: for.cond:
-// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP1]], 10
-// CHECK1-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END7:%.*]]
// CHECK1: for.body:
-// CHECK1-NEXT: store i32 0, ptr [[K]], align 4
-// CHECK1-NEXT: br label [[FOR_COND1:%.*]]
-// CHECK1: for.cond1:
-// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[K]], align 4
-// CHECK1-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP2]], 5
-// CHECK1-NEXT: br i1 [[CMP2]], label [[FOR_BODY3:%.*]], label [[FOR_END:%.*]]
-// CHECK1: for.body3:
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[K]], align 4
-// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1
-// CHECK1-NEXT: store i32 [[INC]], ptr [[K]], align 4
-// CHECK1-NEXT: br label [[FOR_INC:%.*]]
-// CHECK1: for.inc:
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[K]], align 4
-// CHECK1-NEXT: [[INC4:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK1-NEXT: store i32 [[INC4]], ptr [[K]], align 4
-// CHECK1-NEXT: br label [[FOR_COND1]], !llvm.loop [[LOOP3:![0-9]+]]
-// CHECK1: for.end:
-// CHECK1-NEXT: br label [[FOR_INC5:%.*]]
-// CHECK1: for.inc5:
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK1-NEXT: [[INC6:%.*]] = add nsw i32 [[TMP5]], 1
-// CHECK1-NEXT: store i32 [[INC6]], ptr [[TMP0]], align 4
-// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
-// CHECK1: for.end7:
-// CHECK1-NEXT: ret void
+// CHECK1-NEXT [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
+// CHECK1-NEXT call void @__kmpc_for_static_init_4(ptr @1, i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+//CHECK1 cond.end:
+//CHECK1 omp.inner.for.cond:
+//CHECK1 omp.inner.for.body:
+//CHECK1 omp.body.continue:
+//CHECK1 omp.inner.for.inc:
+//CHECK1 omp.inner.for.end:
+//CHECK1 omp.loop.exit:
+// CHECK1-NEXT [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT [[TMP14:%.*]] = load i32, ptr [[TMP12]], align 4
+// CHECK1-NEXT call void @__kmpc_for_static_fini(ptr @1, i32 [[TMP14]])
+// CHECK1-NEXT [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
+// CHECK1-NEXT call void @__kmpc_barrier(ptr @2, i32 [[TMP16]])
+//CHECK1 for.inc:
+//CHECK1 for.end:
+// CHECK1-NEXT ret void
+//
//
//
// CHECK1-LABEL: define {{[^@]+}}@_Z11inline_declv
@@ -114,45 +112,36 @@ int inline_decl() {
// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8
// CHECK1-NEXT: [[RES_ADDR:%.*]] = alloca ptr, align 8
-// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
-// CHECK1-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8
-// CHECK1-NEXT: store ptr [[RES]], ptr [[RES_ADDR]], align 8
-// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8
-// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RES_ADDR]], align 8
-// CHECK1-NEXT: store i32 0, ptr [[TMP0]], align 4
-// CHECK1-NEXT: br label [[FOR_COND:%.*]]
-// CHECK1: for.cond:
-// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10
-// CHECK1-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END7:%.*]]
-// CHECK1: for.body:
-// CHECK1-NEXT: store i32 0, ptr [[K]], align 4
-// CHECK1-NEXT: br label [[FOR_COND1:%.*]]
-// CHECK1: for.cond1:
-// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[K]], align 4
-// CHECK1-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP3]], 5
-// CHECK1-NEXT: br i1 [[CMP2]], label [[FOR_BODY3:%.*]], label [[FOR_END:%.*]]
-// CHECK1: for.body3:
-// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4
-// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1
-// CHECK1-NEXT: store i32 [[INC]], ptr [[TMP1]], align 4
-// CHECK1-NEXT: br label [[FOR_INC:%.*]]
-// CHECK1: for.inc:
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[K]], align 4
-// CHECK1-NEXT: [[INC4:%.*]] = add nsw i32 [[TMP5]], 1
-// CHECK1-NEXT: store i32 [[INC4]], ptr [[K]], align 4
-// CHECK1-NEXT: br label [[FOR_COND1]], !llvm.loop [[LOOP8:![0-9]+]]
-// CHECK1: for.end:
-// CHECK1-NEXT: br label [[FOR_INC5:%.*]]
-// CHECK1: for.inc5:
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP0]], align 4
-// CHECK1-NEXT: [[INC6:%.*]] = add nsw i32 [[TMP6]], 1
-// CHECK1-NEXT: store i32 [[INC6]], ptr [[TMP0]], align 4
-// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]]
-// CHECK1: for.end7:
-// CHECK1-NEXT: ret void
+// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// CHECK1: for.cond:
+// CHECK1: for.body:
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4
+// CHECK1-NEXT: store i32 4, ptr [[DOTOMP_UB]], align 4
+// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4
+// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
+// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @1, i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// CHECK1: omp.inner.for.cond:
+// CHECK1: omp.inner.for.body:
+// CHECK1: omp.body.continue:
+// CHECK1: omp.inner.for.inc:
+// CHECK1: omp.inner.for.end:
+// CHECK1: omp.loop.exit:
+// CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4
+// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @1, i32 [[TMP14]])
+// CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8
+// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4
+// CHECK1-NEXT: call void @__kmpc_barrier(ptr @2, i32 [[TMP16]])
+// CHECK1: for.inc:
+// CHECK1: for.end:
+// CHECK1-NEXT: ret void
//
//
// CHECK2-LABEL: define {{[^@]+}}@_Z12outline_declv
@@ -173,6 +162,12 @@ int inline_decl() {
// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK2-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META27:![0-9]+]], metadata !DIExpression()), !dbg [[DBG28:![0-9]+]]
@@ -180,39 +175,24 @@ int inline_decl() {
// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META29:![0-9]+]], metadata !DIExpression()), !dbg [[DBG28]]
// CHECK2-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8
// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[I_ADDR]], metadata [[META30:![0-9]+]], metadata !DIExpression()), !dbg [[DBG31:![0-9]+]]
-// CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8, !dbg [[DBG32:![0-9]+]]
-// CHECK2-NEXT: store i32 0, ptr [[TMP0]], align 4, !dbg [[DBG33:![0-9]+]]
-// CHECK2-NEXT: br label [[FOR_COND:%.*]], !dbg [[DBG35:![0-9]+]]
-// CHECK2: for.cond:
-// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4, !dbg [[DBG36:![0-9]+]]
-// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP1]], 10, !dbg [[DBG38:![0-9]+]]
-// CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END7:%.*]], !dbg [[DBG39:![0-9]+]]
// CHECK2: for.body:
-// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[K]], metadata [[META40:![0-9]+]], metadata !DIExpression()), !dbg [[DBG43:![0-9]+]]
-// CHECK2-NEXT: store i32 0, ptr [[K]], align 4, !dbg [[DBG44:![0-9]+]]
-// CHECK2-NEXT: br label [[FOR_COND1:%.*]], !dbg [[DBG46:![0-9]+]]
-// CHECK2: for.cond1:
-// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[K]], align 4, !dbg [[DBG47:![0-9]+]]
-// CHECK2-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP2]], 5, !dbg [[DBG49:![0-9]+]]
-// CHECK2-NEXT: br i1 [[CMP2]], label [[FOR_BODY3:%.*]], label [[FOR_END:%.*]], !dbg [[DBG50:![0-9]+]]
-// CHECK2: for.body3:
-// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[K]], align 4, !dbg [[DBG51:![0-9]+]]
-// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1, !dbg [[DBG51]]
-// CHECK2-NEXT: store i32 [[INC]], ptr [[K]], align 4, !dbg [[DBG51]]
-// CHECK2-NEXT: br label [[FOR_INC:%.*]], !dbg [[DBG53:![0-9]+]]
+// CHECK2: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg !50
+// CHECK2: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg !50
+// CHECK2: call void @__kmpc_for_static_init_4(ptr @1, i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg !51
+// CHECK2: omp.inner.for.cond:
+// CHECK2: omp.inner.for.body:
+// CHECK2: omp.body.continue:
+// CHECK2: omp.inner.for.inc:
+// CHECK2: omp.inner.for.end:
+// CHECK2: omp.loop.exit:
+// CHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg !51
+// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !dbg !51
+// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @3, i32 [[TMP13]]), !dbg !58
+// CHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg !58
+// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4, !dbg !58
+// CHECK2-NEXT: call void @__kmpc_barrier(ptr @4, i32 [[TMP15]]), !dbg !58
// CHECK2: for.inc:
-// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[K]], align 4, !dbg [[DBG54:![0-9]+]]
-// CHECK2-NEXT: [[INC4:%.*]] = add nsw i32 [[TMP4]], 1, !dbg [[DBG54]]
-// CHECK2-NEXT: store i32 [[INC4]], ptr [[K]], align 4, !dbg [[DBG54]]
-// CHECK2-NEXT: br label [[FOR_COND1]], !dbg [[DBG55:![0-9]+]], !llvm.loop [[LOOP56:![0-9]+]]
// CHECK2: for.end:
-// CHECK2-NEXT: br label [[FOR_INC5:%.*]], !dbg [[DBG59:![0-9]+]]
-// CHECK2: for.inc5:
-// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4, !dbg [[DBG60:![0-9]+]]
-// CHECK2-NEXT: [[INC6:%.*]] = add nsw i32 [[TMP5]], 1, !dbg [[DBG60]]
-// CHECK2-NEXT: store i32 [[INC6]], ptr [[TMP0]], align 4, !dbg [[DBG60]]
-// CHECK2-NEXT: br label [[FOR_COND]], !dbg [[DBG61:![0-9]+]], !llvm.loop [[LOOP62:![0-9]+]]
-// CHECK2: for.end7:
// CHECK2-NEXT: ret void, !dbg [[DBG64:![0-9]+]]
//
//
@@ -255,6 +235,12 @@ int inline_decl() {
// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
// CHECK2-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8
// CHECK2-NEXT: [[RES_ADDR:%.*]] = alloca ptr, align 8
+// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META84:![0-9]+]], metadata !DIExpression()), !dbg [[DBG85:![0-9]+]]
@@ -273,32 +259,31 @@ int inline_decl() {
// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10, !dbg [[DBG97:![0-9]+]]
// CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END7:%.*]], !dbg [[DBG98:![0-9]+]]
// CHECK2: for.body:
-// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[K]], metadata [[META99:![0-9]+]], metadata !DIExpression()), !dbg [[DBG103:![0-9]+]]
-// CHECK2-NEXT: store i32 0, ptr [[K]], align 4, !dbg [[DBG103]]
-// CHECK2-NEXT: br label [[FOR_COND1:%.*]], !dbg [[DBG104:![0-9]+]]
-// CHECK2: for.cond1:
-// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[K]], align 4, !dbg [[DBG105:![0-9]+]]
-// CHECK2-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP3]], 5, !dbg [[DBG107:![0-9]+]]
-// CHECK2-NEXT: br i1 [[CMP2]], label [[FOR_BODY3:%.*]], label [[FOR_END:%.*]], !dbg [[DBG108:![0-9]+]]
-// CHECK2: for.body3:
-// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG109:![0-9]+]]
-// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1, !dbg [[DBG109]]
-// CHECK2-NEXT: store i32 [[INC]], ptr [[TMP1]], align 4, !dbg [[DBG109]]
-// CHECK2-NEXT: br label [[FOR_INC:%.*]], !dbg [[DBG111:![0-9]+]]
+// CHECK2: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG103:![0-9]+]]
+// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !dbg [[DBG103:![0-9]+]]
+// CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @8, i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG103:![0-9]+]]
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG103:![0-9]+]]
+// CHECK2-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP5]], 4, !dbg [[DBG103:![0-9]+]]
+// CHECK2: omp.inner.for.cond:
+// CHECK2: omp.inner.for.body:
+// CHECK2: omp.body.continue:
+// CHECK2: omp.inner.for.inc:
+// CHECK2: omp.inner.for.end:
+// CHECK2: omp.loop.exit:
+// CHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg !111
+// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4, !dbg !111
+// CHECK2-NEXT: call void @__kmpc_for_static_fini(ptr @10, i32 [[TMP14]]), !dbg !118
+// CHECK2-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg !118
+// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4, !dbg !118
+// CHECK2-NEXT: call void @__kmpc_barrier(ptr @11, i32 [[TMP16]]), !dbg !118
+// CHECK2-NEXT br label [[FOR_INC]], !dbg !119
// CHECK2: for.inc:
-// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[K]], align 4, !dbg [[DBG112:![0-9]+]]
-// CHECK2-NEXT: [[INC4:%.*]] = add nsw i32 [[TMP5]], 1, !dbg [[DBG112]]
-// CHECK2-NEXT: store i32 [[INC4]], ptr [[K]], align 4, !dbg [[DBG112]]
-// CHECK2-NEXT: br label [[FOR_COND1]], !dbg [[DBG113:![0-9]+]], !llvm.loop [[LOOP114:![0-9]+]]
+// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP0]], align 4, !dbg [[DBG112:![0-9]+]]
+// CHECK2-NEXT: [[INC4:%.*]] = add nsw i32 [[TMP17]], 1, !dbg [[DBG112]]
+// CHECK2-NEXT: store i32 [[INC4]], ptr [[TMP0]], align 4, !dbg [[DBG112]]
+// CHECK2-NEXT: br label [[FOR_COND]], !dbg [[DBG113:![0-9]+]], !llvm.loop [[DBG113:![0-9]+]]
// CHECK2: for.end:
-// CHECK2-NEXT: br label [[FOR_INC5:%.*]], !dbg [[DBG116:![0-9]+]]
-// CHECK2: for.inc5:
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP0]], align 4, !dbg [[DBG117:![0-9]+]]
-// CHECK2-NEXT: [[INC6:%.*]] = add nsw i32 [[TMP6]], 1, !dbg [[DBG117]]
-// CHECK2-NEXT: store i32 [[INC6]], ptr [[TMP0]], align 4, !dbg [[DBG117]]
-// CHECK2-NEXT: br label [[FOR_COND]], !dbg [[DBG118:![0-9]+]], !llvm.loop [[LOOP119:![0-9]+]]
-// CHECK2: for.end7:
-// CHECK2-NEXT: ret void, !dbg [[DBG121:![0-9]+]]
+// CHECK2-NEXT: ret void, !dbg [[DBG114:![0-9]+]]
//
//
// CHECK2-LABEL: define {{[^@]+}}@_Z11inline_declv.omp_outlined
@@ -362,10 +347,14 @@ int inline_decl() {
// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8
// CHECK3-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4
// CHECK3-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4
// CHECK3-NEXT: br label [[OMP_PAR_REGION:%.*]]
// CHECK3: omp.par.region:
// CHECK3-NEXT: store i32 0, ptr [[LOADGEP_I]], align 4
-// CHECK3-NEXT: br label [[FOR_COND:%.*]]
+// CHECK3-NEXT: br label [[FOR_COND:]]
// CHECK3: for.cond:
// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[LOADGEP_I]], align 4
// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10
@@ -387,26 +376,35 @@ int inline_decl() {
// CHECK3-NEXT: [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4
// CHECK3-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]]
// CHECK3: omp_loop.preheader:
+// CHECK3-NEXT: store i32 0, ptr [[P_LOWERBOUND]], align 4
+// CHECK3-NEXT: [[TMP6:%.*]] = sub i32 [[DOTCOUNT]], 1
+// CHECK3-NEXT: store i32 [[TMP6]], ptr [[P_UPPERBOUND]], align 4
+// CHECK3-NEXT: store i32 1, ptr [[P_STRIDE]], align 4
+// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @1)
+// CHECK3-NEXT: call void @__kmpc_for_static_init_4u(ptr @1, i32 [[OMP_GLOBAL_THREAD_NUM2]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0)
+// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4
+// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[P_UPPERBOUND]], align 4
+// CHECK3-NEXT: [[TMP9:%.*]] = sub i32 [[TMP8]], [[TMP7]]
+// CHECK3-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], 1
// CHECK3-NEXT: br label [[OMP_LOOP_HEADER:%.*]]
// CHECK3: omp_loop.header:
// CHECK3-NEXT: [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ]
// CHECK3-NEXT: br label [[OMP_LOOP_COND:%.*]]
// CHECK3: omp_loop.cond:
-// CHECK3-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[DOTCOUNT]]
+// CHECK3-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[TMP10]]
// CHECK3-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp_loop.exit:
+// CHECK3-NEXT: call void @__kmpc_for_static_fini(ptr @1, i32 [[OMP_GLOBAL_THREAD_NUM2]])
+// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @1)
+// CHECK3-NEXT: call void @__kmpc_barrier(ptr @2, i32 [[OMP_GLOBAL_THREAD_NUM3]])
// CHECK3-NEXT: br label [[OMP_LOOP_AFTER:%.*]]
// CHECK3: omp_loop.after:
-// CHECK3-NEXT: br label [[FOR_INC:%.*]]
// CHECK3: for.inc:
-// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[LOADGEP_I]], align 4
-// CHECK3-NEXT: [[INC2:%.*]] = add nsw i32 [[TMP6]], 1
-// CHECK3-NEXT: store i32 [[INC2]], ptr [[LOADGEP_I]], align 4
-// CHECK3-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
// CHECK3: omp_loop.body:
-// CHECK3-NEXT: call void @__captured_stmt.1(ptr [[LOADGEP_K]], i32 [[OMP_LOOP_IV]], ptr [[AGG_CAPTURED1]])
-// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[LOADGEP_K]], align 4
-// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1
+// CHECK3-NEXT: [[TMP12:%.*]] = add i32 [[OMP_LOOP_IV]], [[TMP7]]
+// CHECK3-NEXT: call void @__captured_stmt.1(ptr [[LOADGEP_K]], i32 [[TMP12]], ptr [[AGG_CAPTURED1]])
+// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[LOADGEP_K]], align 4
+// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP13]], 1
// CHECK3-NEXT: store i32 [[INC]], ptr [[LOADGEP_K]], align 4
// CHECK3-NEXT: br label [[OMP_LOOP_INC]]
// CHECK3: omp_loop.inc:
@@ -513,20 +511,16 @@ int inline_decl() {
// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8
// CHECK3-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4
// CHECK3-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4
+// CHECK3-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4
// CHECK3-NEXT: br label [[OMP_PAR_REGION:%.*]]
// CHECK3: omp.par.region:
-// CHECK3-NEXT: store i32 0, ptr [[LOADGEP_I]], align 4
-// CHECK3-NEXT: br label [[FOR_COND:%.*]]
// CHECK3: for.cond:
-// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[LOADGEP_I]], align 4
-// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10
-// CHECK3-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
// CHECK3: for.end:
-// CHECK3-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]]
// CHECK3: omp.par.region.parallel.after:
-// CHECK3-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
// CHECK3: omp.par.pre_finalize:
-// CHECK3-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
// CHECK3: for.body:
// CHECK3-NEXT: store i32 0, ptr [[K]], align 4
// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 0
@@ -538,31 +532,15 @@ int inline_decl() {
// CHECK3-NEXT: [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4
// CHECK3-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]]
// CHECK3: omp_loop.preheader:
-// CHECK3-NEXT: br label [[OMP_LOOP_HEADER:%.*]]
// CHECK3: omp_loop.header:
// CHECK3-NEXT: [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ]
// CHECK3-NEXT: br label [[OMP_LOOP_COND:%.*]]
// CHECK3: omp_loop.cond:
-// CHECK3-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[DOTCOUNT]]
-// CHECK3-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]]
// CHECK3: omp_loop.exit:
-// CHECK3-NEXT: br label [[OMP_LOOP_AFTER:%.*]]
// CHECK3: omp_loop.after:
-// CHECK3-NEXT: br label [[FOR_INC:%.*]]
// CHECK3: for.inc:
-// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[LOADGEP_I]], align 4
-// CHECK3-NEXT: [[INC2:%.*]] = add nsw i32 [[TMP6]], 1
-// CHECK3-NEXT: store i32 [[INC2]], ptr [[LOADGEP_I]], align 4
-// CHECK3-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
// CHECK3: omp_loop.body:
-// CHECK3-NEXT: call void @__captured_stmt.3(ptr [[K]], i32 [[OMP_LOOP_IV]], ptr [[AGG_CAPTURED1]])
-// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[LOADGEP_RES]], align 4
-// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1
-// CHECK3-NEXT: store i32 [[INC]], ptr [[LOADGEP_RES]], align 4
-// CHECK3-NEXT: br label [[OMP_LOOP_INC]]
// CHECK3: omp_loop.inc:
-// CHECK3-NEXT: [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1
-// CHECK3-NEXT: br label [[OMP_LOOP_HEADER]]
// CHECK3: omp.par.outlined.exit.exitStub:
// CHECK3-NEXT: ret void
//
@@ -665,22 +643,18 @@ int inline_decl() {
// CHECK4-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8
// CHECK4-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4
// CHECK4-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4
+// CHECK4-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4
+// CHECK4-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4
+// CHECK4-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4
+// CHECK4-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4
// CHECK4-NEXT: br label [[OMP_PAR_REGION:%.*]]
// CHECK4: omp.par.region:
-// CHECK4-NEXT: store i32 0, ptr [[LOADGEP_I]], align 4, !dbg [[DBG23:![0-9]+]]
-// CHECK4-NEXT: br label [[FOR_COND:%.*]], !dbg [[DBG23]]
// CHECK4: for.cond:
-// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[LOADGEP_I]], align 4, !dbg [[DBG25:![0-9]+]]
-// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10, !dbg [[DBG25]]
-// CHECK4-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]], !dbg [[DBG23]]
// CHECK4: for.end:
-// CHECK4-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG27:![0-9]+]]
// CHECK4: omp.par.region.parallel.after:
-// CHECK4-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
// CHECK4: omp.par.pre_finalize:
-// CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG27]]
// CHECK4: for.body:
-// CHECK4-NEXT: store i32 0, ptr [[LOADGEP_K]], align 4, !dbg [[DBG28:![0-9]+]]
+// CHECK4: store i32 0, ptr [[LOADGEP_K]], align 4, !dbg [[DBG28:![0-9]+]]
// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0, !dbg [[DBG28]]
// CHECK4-NEXT: store ptr [[LOADGEP_K]], ptr [[TMP3]], align 8, !dbg [[DBG28]]
// CHECK4-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED1]], i32 0, i32 0, !dbg [[DBG28]]
@@ -690,31 +664,28 @@ int inline_decl() {
// CHECK4-NEXT: [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4, !dbg [[DBG28]]
// CHECK4-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]], !dbg [[DBG28]]
// CHECK4: omp_loop.preheader:
+// CHECK4-NEXT: store i32 0, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG28]]
+// CHECK4-NEXT: [[TMP6:%.*]] = sub i32 [[DOTCOUNT]], 1, !dbg [[DBG28]]
+// CHECK4-NEXT: store i32 [[TMP6]], ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG28]]
+// CHECK4-NEXT: store i32 1, ptr [[P_STRIDE]], align 4, !dbg [[DBG28]]
+// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @3), !dbg [[DBG28]]
+// CHECK4-NEXT: call void @__kmpc_for_static_init_4u(ptr @3, i32 [[OMP_GLOBAL_THREAD_NUM2]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0), !dbg [[DBG28]]
+// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG28]]
+// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG28]]
+// CHECK4-NEXT: [[TMP9:%.*]] = sub i32 [[TMP8]], [[TMP7]], !dbg [[DBG28]]
+// CHECK4-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], 1, !dbg [[DBG28]]
// CHECK4-NEXT: br label [[OMP_LOOP_HEADER:%.*]], !dbg [[DBG28]]
// CHECK4: omp_loop.header:
-// CHECK4-NEXT: [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ], !dbg [[DBG28]]
-// CHECK4-NEXT: br label [[OMP_LOOP_COND:%.*]], !dbg [[DBG28]]
// CHECK4: omp_loop.cond:
-// CHECK4-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[DOTCOUNT]], !dbg [[DBG28]]
-// CHECK4-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG28]]
// CHECK4: omp_loop.exit:
+// CHECK4: call void @__kmpc_for_static_fini(ptr @3, i32 [[OMP_GLOBAL_THREAD_NUM2]]), !dbg [[DBG28]]
+// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @3), !dbg [[DBG33:![0-9]+]]
+// CHECK4-NEXT: call void @__kmpc_barrier(ptr @4, i32 [[OMP_GLOBAL_THREAD_NUM3]]), !dbg [[DBG33]]
// CHECK4-NEXT: br label [[OMP_LOOP_AFTER:%.*]], !dbg [[DBG28]]
// CHECK4: omp_loop.after:
-// CHECK4-NEXT: br label [[FOR_INC:%.*]], !dbg [[DBG33:![0-9]+]]
// CHECK4: for.inc:
-// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[LOADGEP_I]], align 4, !dbg [[DBG25]]
-// CHECK4-NEXT: [[INC2:%.*]] = add nsw i32 [[TMP6]], 1, !dbg [[DBG25]]
-// CHECK4-NEXT: store i32 [[INC2]], ptr [[LOADGEP_I]], align 4, !dbg [[DBG25]]
-// CHECK4-NEXT: br label [[FOR_COND]], !dbg [[DBG25]], !llvm.loop [[LOOP34:![0-9]+]]
// CHECK4: omp_loop.body:
-// CHECK4-NEXT: call void @__captured_stmt.1(ptr [[LOADGEP_K]], i32 [[OMP_LOOP_IV]], ptr [[AGG_CAPTURED1]]), !dbg [[DBG28]]
-// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[LOADGEP_K]], align 4, !dbg [[DBG36:![0-9]+]]
-// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1, !dbg [[DBG36]]
-// CHECK4-NEXT: store i32 [[INC]], ptr [[LOADGEP_K]], align 4, !dbg [[DBG36]]
-// CHECK4-NEXT: br label [[OMP_LOOP_INC]], !dbg [[DBG28]]
// CHECK4: omp_loop.inc:
-// CHECK4-NEXT: [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1, !dbg [[DBG28]]
-// CHECK4-NEXT: br label [[OMP_LOOP_HEADER]], !dbg [[DBG28]]
// CHECK4: omp.par.outlined.exit.exitStub:
// CHECK4-NEXT: ret void
//
@@ -826,57 +797,29 @@ int inline_decl() {
// CHECK4-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8
// CHECK4-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4
// CHECK4-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4
+// CHECK4-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4
+// CHECK4-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4
+// CHECK4-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4
+// CHECK4-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4
// CHECK4-NEXT: br label [[OMP_PAR_REGION:%.*]]
// CHECK4: omp.par.region:
-// CHECK4-NEXT: store i32 0, ptr [[LOADGEP_I]], align 4, !dbg [[DBG86:![0-9]+]]
-// CHECK4-NEXT: br label [[FOR_COND:%.*]], !dbg [[DBG86]]
// CHECK4: for.cond:
-// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[LOADGEP_I]], align 4, !dbg [[DBG88:![0-9]+]]
-// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10, !dbg [[DBG88]]
-// CHECK4-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]], !dbg [[DBG86]]
// CHECK4: for.end:
-// CHECK4-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG90:![0-9]+]]
// CHECK4: omp.par.region.parallel.after:
-// CHECK4-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]]
// CHECK4: omp.par.pre_finalize:
-// CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG90]]
// CHECK4: for.body:
-// CHECK4-NEXT: call void @llvm.dbg.declare(metadata ptr [[K]], metadata [[META91:![0-9]+]], metadata !DIExpression()), !dbg [[DBG95:![0-9]+]]
-// CHECK4-NEXT: store i32 0, ptr [[K]], align 4, !dbg [[DBG95]]
-// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 0, !dbg [[DBG95]]
-// CHECK4-NEXT: store ptr [[K]], ptr [[TMP3]], align 8, !dbg [[DBG95]]
-// CHECK4-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED1]], i32 0, i32 0, !dbg [[DBG95]]
-// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[K]], align 4, !dbg [[DBG96:![0-9]+]]
-// CHECK4-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4, !dbg [[DBG95]]
+// CHECK4: store i32 [[TMP5]], ptr [[TMP4]], align 4, !dbg [[DBG95:![0-9]+]]
// CHECK4-NEXT: call void @__captured_stmt.2(ptr [[DOTCOUNT_ADDR]], ptr [[AGG_CAPTURED]]), !dbg [[DBG95]]
-// CHECK4-NEXT: [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4, !dbg [[DBG95]]
-// CHECK4-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]], !dbg [[DBG95]]
// CHECK4: omp_loop.preheader:
-// CHECK4-NEXT: br label [[OMP_LOOP_HEADER:%.*]], !dbg [[DBG95]]
// CHECK4: omp_loop.header:
-// CHECK4-NEXT: [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ], !dbg [[DBG95]]
-// CHECK4-NEXT: br label [[OMP_LOOP_COND:%.*]], !dbg [[DBG95]]
// CHECK4: omp_loop.cond:
-// CHECK4-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[DOTCOUNT]], !dbg [[DBG95]]
-// CHECK4-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG95]]
// CHECK4: omp_loop.exit:
-// CHECK4-NEXT: br label [[OMP_LOOP_AFTER:%.*]], !dbg [[DBG95]]
// CHECK4: omp_loop.after:
-// CHECK4-NEXT: br label [[FOR_INC:%.*]], !dbg [[DBG97:![0-9]+]]
// CHECK4: for.inc:
-// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[LOADGEP_I]], align 4, !dbg [[DBG88]]
-// CHECK4-NEXT: [[INC2:%.*]] = add nsw i32 [[TMP6]], 1, !dbg [[DBG88]]
-// CHECK4-NEXT: store i32 [[INC2]], ptr [[LOADGEP_I]], align 4, !dbg [[DBG88]]
-// CHECK4-NEXT: br label [[FOR_COND]], !dbg [[DBG88]], !llvm.loop [[LOOP98:![0-9]+]]
// CHECK4: omp_loop.body:
-// CHECK4-NEXT: call void @__captured_stmt.3(ptr [[K]], i32 [[OMP_LOOP_IV]], ptr [[AGG_CAPTURED1]]), !dbg [[DBG95]]
-// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[LOADGEP_RES]], align 4, !dbg [[DBG99:![0-9]+]]
-// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP7]], 1, !dbg [[DBG99]]
-// CHECK4-NEXT: store i32 [[INC]], ptr [[LOADGEP_RES]], align 4, !dbg [[DBG99]]
-// CHECK4-NEXT: br label [[OMP_LOOP_INC]], !dbg [[DBG95]]
+// CHECK4-NEXT: [[TMP12:%.*]] = add i32 [[OMP_LOOP_IV:%.*]], [[TMP7:%.*]], !dbg [[DBG98:![-9]+]]
+// CHECK4: call void @__captured_stmt.3(ptr [[K]], i32 [[TMP12]], ptr [[AGG_CAPTURED1]]), !dbg [[DBG96:![0-9]+]]
// CHECK4: omp_loop.inc:
-// CHECK4-NEXT: [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1, !dbg [[DBG95]]
-// CHECK4-NEXT: br label [[OMP_LOOP_HEADER]], !dbg [[DBG95]]
// CHECK4: omp.par.outlined.exit.exitStub:
// CHECK4-NEXT: ret void
//
diff --git a/clang/test/PCH/pragma-loop.cpp b/clang/test/PCH/pragma-loop.cpp
index b8079ff608e418..f5de630ffc9120 100644
--- a/clang/test/PCH/pragma-loop.cpp
+++ b/clang/test/PCH/pragma-loop.cpp
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -emit-pch -o %t.a %s
-// RUN: %clang_cc1 -include-pch %t.a %s -ast-print -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -emit-pch -o %t.a %s
+// RUN: %clang_cc1 -fopenmp -include-pch %t.a %s -ast-print -o - | FileCheck %s
// CHECK: #pragma clang loop vectorize_width(4)
// CHECK: #pragma clang loop interleave_count(8)
@@ -18,6 +18,9 @@
// CHECK: #pragma nounroll{{$}}
// CHECK: #pragma clang loop vectorize_width(V)
// CHECK: #pragma clang loop interleave_count(I)
+// CHECK: #pragma omp simd
+// CHECK: #pragma omp for
+// CHECK: #pragma omp distribute
#ifndef HEADER
#define HEADER
@@ -94,9 +97,33 @@ class pragma_test {
List[i] = i;
}
}
+
+ inline void run8(int *List, int Length) {
+ int i = 0;
+#pragma omp loop bind(thread)
+ for (int i = 0; i < Length; i++) {
+ List[i] = i;
+ }
+ }
+
+ inline void run9(int *List, int Length) {
+ int i = 0;
+#pragma omp loop bind(parallel)
+ for (int i = 0; i < Length; i++) {
+ List[i] = i;
+ }
+ }
+
+ inline void run10(int *List, int Length) {
+ int i = 0;
+#pragma omp loop bind(teams)
+ for (int i = 0; i < Length; i++) {
+ List[i] = i;
+ }
+ }
+
};
#else
-
void test() {
int List[100];
@@ -109,6 +136,9 @@ void test() {
pt.run5(List, 100);
pt.run6(List, 100);
pt.run7<2, 4>(List, 100);
+ pt.run8(List, 100);
+ pt.run9(List, 100);
+ pt.run10(List, 100);
}
#endif
More information about the cfe-commits
mailing list