[clang] [clang][OpenMP] Move "loop" directive mapping from sema to codegen (PR #99905)

Krzysztof Parzyszek via cfe-commits cfe-commits at lists.llvm.org
Mon Jul 22 11:17:02 PDT 2024


https://github.com/kparzysz updated https://github.com/llvm/llvm-project/pull/99905

>From 4657c66f04cf5b603afcc356468e61f8805cb264 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek <Krzysztof.Parzyszek at amd.com>
Date: Thu, 18 Jul 2024 15:21:10 -0500
Subject: [PATCH 1/4] [clang][OpenMP] Move "loop" directive mapping from sema
 to codegen

Given "loop" construct, clang will try to treat it as "for", "distribute"
or "simd", depending on either the implied binding, or the bind clause if
present. This patch moves the code that performs this construct remapping
from sema to codegen.

For a "loop" construct without a bind clause, this patch will create an
implicit bind clause based on implied binding to simplify further
analysis.

During codegen the function `EmitOMPGenericLoopDirective` (i.e. "loop")
will invoke the "emit" functions for "for", "distribute" or "simd",
depending on the bind clause.
---
 clang/include/clang/AST/StmtOpenMP.h         |  24 +-
 clang/include/clang/Sema/SemaOpenMP.h        |  23 +-
 clang/lib/AST/StmtOpenMP.cpp                 |  10 +-
 clang/lib/CodeGen/CGStmtOpenMP.cpp           | 348 ++++++++++++-------
 clang/lib/Sema/SemaOpenMP.cpp                | 222 +++---------
 clang/lib/Sema/TreeTransform.h               |   9 +-
 clang/lib/Serialization/ASTReaderStmt.cpp    |   1 -
 clang/lib/Serialization/ASTWriterStmt.cpp    |   1 -
 clang/test/OpenMP/generic_loop_ast_print.cpp |  33 +-
 clang/test/OpenMP/generic_loop_codegen.cpp   | 150 ++++----
 clang/test/PCH/pragma-loop.cpp               |   6 +-
 11 files changed, 373 insertions(+), 454 deletions(-)

diff --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h
index 194eb2d10dcb3..f313c480f9a08 100644
--- a/clang/include/clang/AST/StmtOpenMP.h
+++ b/clang/include/clang/AST/StmtOpenMP.h
@@ -281,15 +281,6 @@ class OMPExecutableDirective : public Stmt {
     return Data->getClauses();
   }
 
-  /// Was this directive mapped from an another directive?
-  /// e.g. 1) omp loop bind(parallel) is mapped to OMPD_for
-  ///      2) omp loop bind(teams) is mapped to OMPD_distribute
-  ///      3) omp loop bind(thread) is mapped to OMPD_simd
-  /// It was necessary to note it down in the Directive because of
-  /// clang::TreeTransform::TransformOMPExecutableDirective() pass in
-  /// the frontend.
-  OpenMPDirectiveKind PrevMappedDirective = llvm::omp::OMPD_unknown;
-
 protected:
   /// Data, associated with the directive.
   OMPChildren *Data = nullptr;
@@ -354,10 +345,6 @@ class OMPExecutableDirective : public Stmt {
     return Inst;
   }
 
-  void setMappedDirective(OpenMPDirectiveKind MappedDirective) {
-    PrevMappedDirective = MappedDirective;
-  }
-
 public:
   /// Iterates over expressions/statements used in the construct.
   class used_clauses_child_iterator
@@ -611,8 +598,6 @@ class OMPExecutableDirective : public Stmt {
            "Expected directive with the associated statement.");
     return Data->getRawStmt();
   }
-
-  OpenMPDirectiveKind getMappedDirective() const { return PrevMappedDirective; }
 };
 
 /// This represents '#pragma omp parallel' directive.
@@ -1620,8 +1605,7 @@ class OMPSimdDirective : public OMPLoopDirective {
                                   SourceLocation EndLoc, unsigned CollapsedNum,
                                   ArrayRef<OMPClause *> Clauses,
                                   Stmt *AssociatedStmt,
-                                  const HelperExprs &Exprs,
-                                  OpenMPDirectiveKind ParamPrevMappedDirective);
+                                  const HelperExprs &Exprs);
 
   /// Creates an empty directive with the place
   /// for \a NumClauses clauses.
@@ -1699,8 +1683,7 @@ class OMPForDirective : public OMPLoopDirective {
                                  SourceLocation EndLoc, unsigned CollapsedNum,
                                  ArrayRef<OMPClause *> Clauses,
                                  Stmt *AssociatedStmt, const HelperExprs &Exprs,
-                                 Expr *TaskRedRef, bool HasCancel,
-                                 OpenMPDirectiveKind ParamPrevMappedDirective);
+                                 Expr *TaskRedRef, bool HasCancel);
 
   /// Creates an empty directive with the place
   /// for \a NumClauses clauses.
@@ -4478,8 +4461,7 @@ class OMPDistributeDirective : public OMPLoopDirective {
   static OMPDistributeDirective *
   Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
          unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses,
-         Stmt *AssociatedStmt, const HelperExprs &Exprs,
-         OpenMPDirectiveKind ParamPrevMappedDirective);
+         Stmt *AssociatedStmt, const HelperExprs &Exprs);
 
   /// Creates an empty directive with the place
   /// for \a NumClauses clauses.
diff --git a/clang/include/clang/Sema/SemaOpenMP.h b/clang/include/clang/Sema/SemaOpenMP.h
index 54d81f91ffebc..aa61dae9415e2 100644
--- a/clang/include/clang/Sema/SemaOpenMP.h
+++ b/clang/include/clang/Sema/SemaOpenMP.h
@@ -398,8 +398,7 @@ class SemaOpenMP : public SemaBase {
   StmtResult ActOnOpenMPExecutableDirective(
       OpenMPDirectiveKind Kind, const DeclarationNameInfo &DirName,
       OpenMPDirectiveKind CancelRegion, ArrayRef<OMPClause *> Clauses,
-      Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc,
-      OpenMPDirectiveKind PrevMappedDirective = llvm::omp::OMPD_unknown);
+      Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc);
   /// Called on well-formed '\#pragma omp parallel' after parsing
   /// of the  associated statement.
   StmtResult ActOnOpenMPParallelDirective(ArrayRef<OMPClause *> Clauses,
@@ -1430,26 +1429,6 @@ class SemaOpenMP : public SemaBase {
 
   /// All `omp assumes` we encountered so far.
   SmallVector<OMPAssumeAttr *, 4> OMPAssumeGlobal;
-
-  /// OMPD_loop is mapped to OMPD_for, OMPD_distribute or OMPD_simd depending
-  /// on the parameter of the bind clause. In the methods for the
-  /// mapped directives, check the parameters of the lastprivate clause.
-  bool checkLastPrivateForMappedDirectives(ArrayRef<OMPClause *> Clauses);
-  /// Depending on the bind clause of OMPD_loop map the directive to new
-  /// directives.
-  ///    1) loop bind(parallel) --> OMPD_for
-  ///    2) loop bind(teams) --> OMPD_distribute
-  ///    3) loop bind(thread) --> OMPD_simd
-  /// This is being handled in Sema instead of Codegen because of the need for
-  /// rigorous semantic checking in the new mapped directives.
-  bool mapLoopConstruct(llvm::SmallVector<OMPClause *> &ClausesWithoutBind,
-                        ArrayRef<OMPClause *> Clauses,
-                        OpenMPBindClauseKind &BindKind,
-                        OpenMPDirectiveKind &Kind,
-                        OpenMPDirectiveKind &PrevMappedDirective,
-                        SourceLocation StartLoc, SourceLocation EndLoc,
-                        const DeclarationNameInfo &DirName,
-                        OpenMPDirectiveKind CancelRegion);
 };
 
 } // namespace clang
diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp
index a2325b177d41e..525d079da2670 100644
--- a/clang/lib/AST/StmtOpenMP.cpp
+++ b/clang/lib/AST/StmtOpenMP.cpp
@@ -300,7 +300,7 @@ OMPParallelDirective *OMPParallelDirective::CreateEmpty(const ASTContext &C,
 OMPSimdDirective *OMPSimdDirective::Create(
     const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
     unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
-    const HelperExprs &Exprs, OpenMPDirectiveKind ParamPrevMappedDirective) {
+    const HelperExprs &Exprs) {
   auto *Dir = createDirective<OMPSimdDirective>(
       C, Clauses, AssociatedStmt, numLoopChildren(CollapsedNum, OMPD_simd),
       StartLoc, EndLoc, CollapsedNum);
@@ -320,7 +320,6 @@ OMPSimdDirective *OMPSimdDirective::Create(
   Dir->setDependentInits(Exprs.DependentInits);
   Dir->setFinalsConditions(Exprs.FinalsConditions);
   Dir->setPreInits(Exprs.PreInits);
-  Dir->setMappedDirective(ParamPrevMappedDirective);
   return Dir;
 }
 
@@ -336,8 +335,7 @@ OMPSimdDirective *OMPSimdDirective::CreateEmpty(const ASTContext &C,
 OMPForDirective *OMPForDirective::Create(
     const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
     unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
-    const HelperExprs &Exprs, Expr *TaskRedRef, bool HasCancel,
-    OpenMPDirectiveKind ParamPrevMappedDirective) {
+    const HelperExprs &Exprs, Expr *TaskRedRef, bool HasCancel) {
   auto *Dir = createDirective<OMPForDirective>(
       C, Clauses, AssociatedStmt, numLoopChildren(CollapsedNum, OMPD_for) + 1,
       StartLoc, EndLoc, CollapsedNum);
@@ -367,7 +365,6 @@ OMPForDirective *OMPForDirective::Create(
   Dir->setPreInits(Exprs.PreInits);
   Dir->setTaskReductionRefExpr(TaskRedRef);
   Dir->setHasCancel(HasCancel);
-  Dir->setMappedDirective(ParamPrevMappedDirective);
   return Dir;
 }
 
@@ -1572,7 +1569,7 @@ OMPParallelMaskedTaskLoopSimdDirective::CreateEmpty(const ASTContext &C,
 OMPDistributeDirective *OMPDistributeDirective::Create(
     const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
     unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
-    const HelperExprs &Exprs, OpenMPDirectiveKind ParamPrevMappedDirective) {
+    const HelperExprs &Exprs) {
   auto *Dir = createDirective<OMPDistributeDirective>(
       C, Clauses, AssociatedStmt,
       numLoopChildren(CollapsedNum, OMPD_distribute), StartLoc, EndLoc,
@@ -1601,7 +1598,6 @@ OMPDistributeDirective *OMPDistributeDirective::Create(
   Dir->setDependentInits(Exprs.DependentInits);
   Dir->setFinalsConditions(Exprs.FinalsConditions);
   Dir->setPreInits(Exprs.PreInits);
-  Dir->setMappedDirective(ParamPrevMappedDirective);
   return Dir;
 }
 
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index adf74ea16c895..2e83213fa03e1 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -44,6 +44,8 @@ using namespace llvm::omp;
 #define TTL_CODEGEN_TYPE "target-teams-loop-codegen"
 
 static const VarDecl *getBaseDecl(const Expr *Ref);
+static OpenMPDirectiveKind
+getEffectiveDirectiveKind(const OMPExecutableDirective &S);
 
 namespace {
 /// Lexical scope for OpenMP executable constructs, that handles correct codegen
@@ -111,10 +113,10 @@ class OMPLexicalScope : public CodeGenFunction::LexicalScope {
 /// for captured expressions.
 class OMPParallelScope final : public OMPLexicalScope {
   bool EmitPreInitStmt(const OMPExecutableDirective &S) {
-    OpenMPDirectiveKind Kind = S.getDirectiveKind();
-    return !(isOpenMPTargetExecutionDirective(Kind) ||
-             isOpenMPLoopBoundSharingDirective(Kind)) &&
-           isOpenMPParallelDirective(Kind);
+    OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
+    return !(isOpenMPTargetExecutionDirective(EKind) ||
+             isOpenMPLoopBoundSharingDirective(EKind)) &&
+           isOpenMPParallelDirective(EKind);
   }
 
 public:
@@ -127,9 +129,9 @@ class OMPParallelScope final : public OMPLexicalScope {
 /// for captured expressions.
 class OMPTeamsScope final : public OMPLexicalScope {
   bool EmitPreInitStmt(const OMPExecutableDirective &S) {
-    OpenMPDirectiveKind Kind = S.getDirectiveKind();
-    return !isOpenMPTargetExecutionDirective(Kind) &&
-           isOpenMPTeamsDirective(Kind);
+    OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
+    return !isOpenMPTargetExecutionDirective(EKind) &&
+           isOpenMPTeamsDirective(EKind);
   }
 
 public:
@@ -268,7 +270,7 @@ class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope {
         }
       }
     }
-    if (!isOpenMPSimdDirective(S.getDirectiveKind()))
+    if (!isOpenMPSimdDirective(getEffectiveDirectiveKind(S)))
       CGF.EmitOMPPrivateClause(S, InlinedShareds);
     if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(&S)) {
       if (const Expr *E = TG->getReductionRef())
@@ -309,6 +311,30 @@ class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope {
 
 } // namespace
 
+// The loop directive with a bind clause will be mapped to a different
+// directive with corresponding semantics.
+static OpenMPDirectiveKind
+getEffectiveDirectiveKind(const OMPExecutableDirective &S) {
+  OpenMPDirectiveKind Kind = S.getDirectiveKind();
+  if (Kind != OMPD_loop)
+    return Kind;
+
+  OpenMPBindClauseKind BindKind = OMPC_BIND_unknown;
+  if (const auto *C = S.getSingleClause<OMPBindClause>())
+    BindKind = C->getBindKind();
+
+  switch (BindKind) {
+  case OMPC_BIND_parallel:
+    return OMPD_for;
+  case OMPC_BIND_teams:
+    return OMPD_distribute;
+  case OMPC_BIND_thread:
+    return OMPD_simd;
+  default:
+    return OMPD_loop;
+  }
+}
+
 static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
                                          const OMPExecutableDirective &S,
                                          const RegionCodeGenTy &CodeGen);
@@ -825,9 +851,9 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
                                                 OMPPrivateScope &PrivateScope) {
   if (!HaveInsertPoint())
     return false;
-  bool DeviceConstTarget =
-      getLangOpts().OpenMPIsTargetDevice &&
-      isOpenMPTargetExecutionDirective(D.getDirectiveKind());
+  OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(D);
+  bool DeviceConstTarget = getLangOpts().OpenMPIsTargetDevice &&
+                           isOpenMPTargetExecutionDirective(EKind);
   bool FirstprivateIsLastprivate = false;
   llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates;
   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
@@ -838,7 +864,7 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
   }
   llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
   llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
-  getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
+  getOpenMPCaptureRegions(CaptureRegions, EKind);
   // Force emission of the firstprivate copy if the directive does not emit
   // outlined function, like omp for, omp simd, omp distribute etc.
   bool MustEmitFirstprivateCopy =
@@ -1067,8 +1093,9 @@ bool CodeGenFunction::EmitOMPLastprivateClauseInit(
   if (!HaveInsertPoint())
     return false;
   bool HasAtLeastOneLastprivate = false;
+  OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(D);
   llvm::DenseSet<const VarDecl *> SIMDLCVs;
-  if (isOpenMPSimdDirective(D.getDirectiveKind())) {
+  if (isOpenMPSimdDirective(EKind)) {
     const auto *LoopDirective = cast<OMPLoopDirective>(&D);
     for (const Expr *C : LoopDirective->counters()) {
       SIMDLCVs.insert(
@@ -1078,7 +1105,7 @@ bool CodeGenFunction::EmitOMPLastprivateClauseInit(
   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
     HasAtLeastOneLastprivate = true;
-    if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
+    if (isOpenMPTaskLoopDirective(EKind) &&
         !getLangOpts().OpenMPSimd)
       break;
     const auto *IRef = C->varlist_begin();
@@ -1312,13 +1339,13 @@ void CodeGenFunction::EmitOMPReductionClauseInit(
     ++Count;
   }
   if (!Data.ReductionVars.empty()) {
+    OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(D);
     Data.IsReductionWithTaskMod = true;
-    Data.IsWorksharingReduction =
-        isOpenMPWorksharingDirective(D.getDirectiveKind());
+    Data.IsWorksharingReduction = isOpenMPWorksharingDirective(EKind);
     llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit(
         *this, D.getBeginLoc(), TaskLHSs, TaskRHSs, Data);
     const Expr *TaskRedRef = nullptr;
-    switch (D.getDirectiveKind()) {
+    switch (EKind) {
     case OMPD_parallel:
       TaskRedRef = cast<OMPParallelDirective>(D).getTaskReductionRefExpr();
       break;
@@ -1449,16 +1476,16 @@ void CodeGenFunction::EmitOMPReductionClauseFinal(
         IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task;
   }
   if (HasAtLeastOneReduction) {
+    OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(D);
     if (IsReductionWithTaskMod) {
       CGM.getOpenMPRuntime().emitTaskReductionFini(
-          *this, D.getBeginLoc(),
-          isOpenMPWorksharingDirective(D.getDirectiveKind()));
+          *this, D.getBeginLoc(), isOpenMPWorksharingDirective(EKind));
     }
     bool TeamsLoopCanBeParallel = false;
     if (auto *TTLD = dyn_cast<OMPTargetTeamsGenericLoopDirective>(&D))
       TeamsLoopCanBeParallel = TTLD->canBeParallelFor();
     bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
-                      isOpenMPParallelDirective(D.getDirectiveKind()) ||
+                      isOpenMPParallelDirective(EKind) ||
                       TeamsLoopCanBeParallel || ReductionKind == OMPD_simd;
     bool SimpleReduction = ReductionKind == OMPD_simd;
     // Emit nowait reduction if nowait clause is present or directive is a
@@ -1915,7 +1942,8 @@ void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
   // Update the linear variables.
   // In distribute directives only loop counters may be marked as linear, no
   // need to generate the code for them.
-  if (!isOpenMPDistributeDirective(D.getDirectiveKind())) {
+  OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(D);
+  if (!isOpenMPDistributeDirective(EKind)) {
     for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
       for (const Expr *UE : C->updates())
         EmitIgnoredExpr(UE);
@@ -1949,7 +1977,7 @@ void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
     OMPAfterScanBlock = createBasicBlock("omp.after.scan.bb");
     // No need to allocate inscan exit block, in simd mode it is selected in the
     // codegen for the scan directive.
-    if (D.getDirectiveKind() != OMPD_simd && !getLangOpts().OpenMPSimd)
+    if (EKind != OMPD_simd && !getLangOpts().OpenMPSimd)
       OMPScanExitBlock = createBasicBlock("omp.exit.inscan.bb");
     OMPScanDispatch = createBasicBlock("omp.inscan.dispatch");
     EmitBranch(OMPScanDispatch);
@@ -2362,7 +2390,8 @@ void CodeGenFunction::EmitOMPLinearClause(
   if (!HaveInsertPoint())
     return;
   llvm::DenseSet<const VarDecl *> SIMDLCVs;
-  if (isOpenMPSimdDirective(D.getDirectiveKind())) {
+  OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(D);
+  if (isOpenMPSimdDirective(EKind)) {
     const auto *LoopDirective = cast<OMPLoopDirective>(&D);
     for (const Expr *C : LoopDirective->counters()) {
       SIMDLCVs.insert(
@@ -2424,9 +2453,9 @@ void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) {
   if (const auto *C = D.getSingleClause<OMPOrderClause>())
     if (C->getKind() == OMPC_ORDER_concurrent)
       LoopStack.setParallel(/*Enable=*/true);
-  if ((D.getDirectiveKind() == OMPD_simd ||
-       (getLangOpts().OpenMPSimd &&
-        isOpenMPSimdDirective(D.getDirectiveKind()))) &&
+  OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(D);
+  if ((EKind == OMPD_simd ||
+       (getLangOpts().OpenMPSimd && isOpenMPSimdDirective(EKind))) &&
       llvm::any_of(D.getClausesOfKind<OMPReductionClause>(),
                    [](const OMPReductionClause *C) {
                      return C->getModifier() == OMPC_REDUCTION_inscan;
@@ -2513,7 +2542,8 @@ static void emitCommonSimdLoop(CodeGenFunction &CGF, const OMPLoopDirective &S,
     BodyCodeGen(CGF);
   };
   const Expr *IfCond = nullptr;
-  if (isOpenMPSimdDirective(S.getDirectiveKind())) {
+  OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
+  if (isOpenMPSimdDirective(EKind)) {
     for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
       if (CGF.getLangOpts().OpenMP >= 50 &&
           (C->getNameModifier() == OMPD_unknown ||
@@ -2534,21 +2564,24 @@ static void emitCommonSimdLoop(CodeGenFunction &CGF, const OMPLoopDirective &S,
 static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
                               PrePostActionTy &Action) {
   Action.Enter(CGF);
-  assert(isOpenMPSimdDirective(S.getDirectiveKind()) &&
-         "Expected simd directive");
   OMPLoopScope PreInitScope(CGF, S);
   // if (PreCond) {
   //   for (IV in 0..LastIteration) BODY;
   //   <Final counter/linear vars updates>;
   // }
-  //
-  if (isOpenMPDistributeDirective(S.getDirectiveKind()) ||
-      isOpenMPWorksharingDirective(S.getDirectiveKind()) ||
-      isOpenMPTaskLoopDirective(S.getDirectiveKind())) {
-    (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()));
-    (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()));
+
+  // The presence of lower/upper bound variable depends on the actual directive
+  // kind in the AST node. The variables must be emitted because some of the
+  // expressions associated with the loop will use them.
+  OpenMPDirectiveKind DKind = S.getDirectiveKind();
+  if (isOpenMPDistributeDirective(DKind) ||
+      isOpenMPWorksharingDirective(DKind) || isOpenMPTaskLoopDirective(DKind) ||
+      isOpenMPGenericLoopDirective(DKind)) {
+    EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()));
+    EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()));
   }
 
+  OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
   // Emit: if (PreCond) - begin.
   // If the condition constant folds and can be elided, avoid emitting the
   // whole loop.
@@ -2593,7 +2626,7 @@ static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
         CGF, S, CGF.EmitLValue(S.getIterationVariable()));
     bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
     (void)LoopScope.Privatize();
-    if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
+    if (isOpenMPTargetExecutionDirective(EKind))
       CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
 
     emitCommonSimdLoop(
@@ -2627,7 +2660,9 @@ static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
   }
 }
 
-static bool isSupportedByOpenMPIRBuilder(const OMPSimdDirective &S) {
+// Pass OMPLoopDirective (instead of OMPSimdDirective) to make this function
+// available for "loop bind(thread)", which maps to "simd".
+static bool isSimdSupportedByOpenMPIRBuilder(const OMPLoopDirective &S) {
   // Check for unsupported clauses
   for (OMPClause *C : S.clauses()) {
     // Currently only order, simdlen and safelen clauses are supported
@@ -2658,8 +2693,9 @@ static bool isSupportedByOpenMPIRBuilder(const OMPSimdDirective &S) {
   }
   return true;
 }
+
 static llvm::MapVector<llvm::Value *, llvm::Value *>
-GetAlignedMapping(const OMPSimdDirective &S, CodeGenFunction &CGF) {
+GetAlignedMapping(const OMPLoopDirective &S, CodeGenFunction &CGF) {
   llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars;
   for (const auto *Clause : S.getClausesOfKind<OMPAlignedClause>()) {
     llvm::APInt ClauseAlignment(64, 0);
@@ -2689,11 +2725,14 @@ GetAlignedMapping(const OMPSimdDirective &S, CodeGenFunction &CGF) {
   return AlignedVars;
 }
 
-void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
+// Pass OMPLoopDirective (instead of OMPSimdDirective) to make this function
+// available for "loop bind(thread)", which maps to "simd".
+void emitOMPSimdDirective(const OMPLoopDirective &S, CodeGenFunction &CGF,
+                          CodeGenModule &CGM) {
   bool UseOMPIRBuilder =
-      CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S);
+      CGM.getLangOpts().OpenMPIRBuilder && isSimdSupportedByOpenMPIRBuilder(S);
   if (UseOMPIRBuilder) {
-    auto &&CodeGenIRBuilder = [this, &S, UseOMPIRBuilder](CodeGenFunction &CGF,
+    auto &&CodeGenIRBuilder = [&S, &CGM, UseOMPIRBuilder](CodeGenFunction &CGF,
                                                           PrePostActionTy &) {
       // Use the OpenMPIRBuilder if enabled.
       if (UseOMPIRBuilder) {
@@ -2702,30 +2741,28 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
         // Emit the associated statement and get its loop representation.
         const Stmt *Inner = S.getRawStmt();
         llvm::CanonicalLoopInfo *CLI =
-            EmitOMPCollapsedCanonicalLoopNest(Inner, 1);
+            CGF.EmitOMPCollapsedCanonicalLoopNest(Inner, 1);
 
         llvm::OpenMPIRBuilder &OMPBuilder =
             CGM.getOpenMPRuntime().getOMPBuilder();
         // Add SIMD specific metadata
         llvm::ConstantInt *Simdlen = nullptr;
         if (const auto *C = S.getSingleClause<OMPSimdlenClause>()) {
-          RValue Len =
-              this->EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
-                                /*ignoreResult=*/true);
+          RValue Len = CGF.EmitAnyExpr(C->getSimdlen(), AggValueSlot::ignored(),
+                                       /*ignoreResult=*/true);
           auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
           Simdlen = Val;
         }
         llvm::ConstantInt *Safelen = nullptr;
         if (const auto *C = S.getSingleClause<OMPSafelenClause>()) {
-          RValue Len =
-              this->EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
-                                /*ignoreResult=*/true);
+          RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
+                                       /*ignoreResult=*/true);
           auto *Val = cast<llvm::ConstantInt>(Len.getScalarVal());
           Safelen = Val;
         }
         llvm::omp::OrderKind Order = llvm::omp::OrderKind::OMP_ORDER_unknown;
         if (const auto *C = S.getSingleClause<OMPOrderClause>()) {
-          if (C->getKind() == OpenMPOrderClauseKind ::OMPC_ORDER_concurrent) {
+          if (C->getKind() == OpenMPOrderClauseKind::OMPC_ORDER_concurrent) {
             Order = llvm::omp::OrderKind::OMP_ORDER_concurrent;
           }
         }
@@ -2738,27 +2775,31 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
     };
     {
       auto LPCRegion =
-          CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
-      OMPLexicalScope Scope(*this, S, OMPD_unknown);
-      CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd,
+          CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S);
+      OMPLexicalScope Scope(CGF, S, OMPD_unknown);
+      CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd,
                                                   CodeGenIRBuilder);
     }
     return;
   }
 
-  ParentLoopDirectiveForScanRegion ScanRegion(*this, S);
-  OMPFirstScanLoop = true;
+  CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S);
+  CGF.OMPFirstScanLoop = true;
   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
     emitOMPSimdRegion(CGF, S, Action);
   };
   {
     auto LPCRegion =
-        CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
-    OMPLexicalScope Scope(*this, S, OMPD_unknown);
-    CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
+        CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S);
+    OMPLexicalScope Scope(CGF, S, OMPD_unknown);
+    CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd, CodeGen);
   }
   // Check for outer lastprivate conditional update.
-  checkForLastprivateConditionalUpdate(*this, S);
+  checkForLastprivateConditionalUpdate(CGF, S);
+}
+
+void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
+  emitOMPSimdDirective(S, *this, CGM);
 }
 
 void CodeGenFunction::EmitOMPTileDirective(const OMPTileDirective &S) {
@@ -2903,12 +2944,13 @@ void CodeGenFunction::EmitOMPOuterLoop(
   JumpDest Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
   BreakContinueStack.push_back(BreakContinue(LoopExit, Continue));
 
+  OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
   emitCommonSimdLoop(
       *this, S,
-      [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) {
+      [&S, IsMonotonic, EKind](CodeGenFunction &CGF, PrePostActionTy &) {
         // Generate !llvm.loop.parallel metadata for loads and stores for loops
         // with dynamic/guided scheduling and without ordered clause.
-        if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
+        if (!isOpenMPSimdDirective(EKind)) {
           CGF.LoopStack.setParallel(!IsMonotonic);
           if (const auto *C = S.getSingleClause<OMPOrderClause>())
             if (C->getKind() == OMPC_ORDER_concurrent)
@@ -2955,7 +2997,7 @@ void CodeGenFunction::EmitOMPOuterLoop(
       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
                                                      LoopArgs.DKind);
   };
-  OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
+  OMPCancelStack.emitExit(*this, EKind, CodeGen);
 }
 
 void CodeGenFunction::EmitOMPForOuterLoop(
@@ -3041,8 +3083,9 @@ void CodeGenFunction::EmitOMPForOuterLoop(
     CGOpenMPRuntime::StaticRTInput StaticInit(
         IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
         LoopArgs.ST, LoopArgs.Chunk);
-    RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(),
-                         ScheduleKind, StaticInit);
+    OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
+    RT.emitForStaticInit(*this, S.getBeginLoc(), EKind, ScheduleKind,
+                         StaticInit);
   }
 
   auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
@@ -3087,6 +3130,7 @@ void CodeGenFunction::EmitOMPDistributeOuterLoop(
   const Expr *IVExpr = S.getIterationVariable();
   const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
   const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
+  OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
 
   CGOpenMPRuntime::StaticRTInput StaticInit(
       IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB,
@@ -3096,7 +3140,7 @@ void CodeGenFunction::EmitOMPDistributeOuterLoop(
   // for combined 'distribute' and 'for' the increment expression of distribute
   // is stored in DistInc. For 'distribute' alone, it is in Inc.
   Expr *IncExpr;
-  if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
+  if (isOpenMPLoopBoundSharingDirective(EKind))
     IncExpr = S.getDistInc();
   else
     IncExpr = S.getInc();
@@ -3110,20 +3154,20 @@ void CodeGenFunction::EmitOMPDistributeOuterLoop(
   OuterLoopArgs.ST = LoopArgs.ST;
   OuterLoopArgs.IL = LoopArgs.IL;
   OuterLoopArgs.Chunk = LoopArgs.Chunk;
-  OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
+  OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(EKind)
                           ? S.getCombinedEnsureUpperBound()
                           : S.getEnsureUpperBound();
   OuterLoopArgs.IncExpr = IncExpr;
-  OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
+  OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(EKind)
                            ? S.getCombinedInit()
                            : S.getInit();
-  OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
+  OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(EKind)
                            ? S.getCombinedCond()
                            : S.getCond();
-  OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
+  OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(EKind)
                              ? S.getCombinedNextLowerBound()
                              : S.getNextLowerBound();
-  OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
+  OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(EKind)
                              ? S.getCombinedNextUpperBound()
                              : S.getNextUpperBound();
   OuterLoopArgs.DKind = OMPD_distribute;
@@ -3215,11 +3259,12 @@ static void
 emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
                                  const OMPLoopDirective &S,
                                  CodeGenFunction::JumpDest LoopExit) {
-  auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
-                                         PrePostActionTy &Action) {
+  OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
+  auto &&CGInlinedWorksharingLoop = [&S, EKind](CodeGenFunction &CGF,
+                                               PrePostActionTy &Action) {
     Action.Enter(CGF);
     bool HasCancel = false;
-    if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
+    if (!isOpenMPSimdDirective(EKind)) {
       if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(&S))
         HasCancel = D->hasCancel();
       else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(&S))
@@ -3228,16 +3273,14 @@ emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
                    dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&S))
         HasCancel = D->hasCancel();
     }
-    CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
-                                                     HasCancel);
+    CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, EKind, HasCancel);
     CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(),
                                emitDistributeParallelForInnerBounds,
                                emitDistributeParallelForDispatchBounds);
   };
 
   emitCommonOMPParallelDirective(
-      CGF, S,
-      isOpenMPSimdDirective(S.getDirectiveKind()) ? OMPD_for_simd : OMPD_for,
+      CGF, S, isOpenMPSimdDirective(EKind) ? OMPD_for_simd : OMPD_for,
       CGInlinedWorksharingLoop,
       emitDistributeParallelForDistributeInnerBoundParams);
 }
@@ -3370,6 +3413,7 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
 
     // Emit 'then' code.
     {
+      OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
       OMPPrivateScope LoopScope(*this);
       if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) {
         // Emit implicit barrier to synchronize threads and avoid data races on
@@ -3387,7 +3431,7 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
       EmitOMPPrivateLoopCounters(S, LoopScope);
       EmitOMPLinearClause(S, LoopScope);
       (void)LoopScope.Privatize();
-      if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
+      if (isOpenMPTargetExecutionDirective(EKind))
         CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
 
       // Detect the loop schedule kind and chunk.
@@ -3425,8 +3469,7 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
       bool StaticChunkedOne =
           RT.isStaticChunked(ScheduleKind.Schedule,
                              /* Chunked */ Chunk != nullptr) &&
-          HasChunkSizeOne &&
-          isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
+          HasChunkSizeOne && isOpenMPLoopBoundSharingDirective(EKind);
       bool IsMonotonic =
           Ordered ||
           (ScheduleKind.Schedule == OMPC_SCHEDULE_static &&
@@ -3442,8 +3485,8 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
         emitCommonSimdLoop(
             *this, S,
-            [&S](CodeGenFunction &CGF, PrePostActionTy &) {
-              if (isOpenMPSimdDirective(S.getDirectiveKind())) {
+            [&S, EKind](CodeGenFunction &CGF, PrePostActionTy &) {
+              if (isOpenMPSimdDirective(EKind)) {
                 CGF.EmitOMPSimdInit(S);
               } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) {
                 if (C->getKind() == OMPC_ORDER_concurrent)
@@ -3451,7 +3494,7 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
               }
             },
             [IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk,
-             &S, ScheduleKind, LoopExit,
+             &S, ScheduleKind, LoopExit, EKind,
              &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
               // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
               // When no chunk_size is specified, the iteration space is divided
@@ -3463,8 +3506,7 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
                   UB.getAddress(), ST.getAddress(),
                   StaticChunkedOne ? Chunk : nullptr);
               CGF.CGM.getOpenMPRuntime().emitForStaticInit(
-                  CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind,
-                  StaticInit);
+                  CGF, S.getBeginLoc(), EKind, ScheduleKind, StaticInit);
               // UB = min(UB, GlobalUB);
               if (!StaticChunkedOne)
                 CGF.EmitIgnoredExpr(S.getEnsureUpperBound());
@@ -3499,7 +3541,7 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
           CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
                                                          OMPD_for);
         };
-        OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
+        OMPCancelStack.emitExit(*this, EKind, CodeGen);
       } else {
         // Emit the outer loop, which requests its work chunk [LB..UB] from
         // runtime and runs the inner loop to process it.
@@ -3510,14 +3552,14 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
         EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
                             LoopArguments, CGDispatchBounds);
       }
-      if (isOpenMPSimdDirective(S.getDirectiveKind())) {
+      if (isOpenMPSimdDirective(EKind)) {
         EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) {
           return CGF.Builder.CreateIsNotNull(
               CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
         });
       }
       EmitOMPReductionClauseFinal(
-          S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind())
+          S, /*ReductionKind=*/isOpenMPSimdDirective(EKind)
                  ? /*Parallel and Simd*/ OMPD_parallel_for_simd
                  : /*Parallel only*/ OMPD_parallel);
       // Emit post-update of the reduction variables if IsLastIter != 0.
@@ -3529,7 +3571,7 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
       // Emit final copy of the lastprivate variables if IsLastIter != 0.
       if (HasLastprivateClause)
         EmitOMPLastprivateClauseFinal(
-            S, isOpenMPSimdDirective(S.getDirectiveKind()),
+            S, isOpenMPSimdDirective(EKind),
             Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc())));
       LoopScope.restoreMap();
       EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) {
@@ -3825,7 +3867,8 @@ static void emitScanBasedDirective(
     auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getEndLoc());
     CGF.EmitBlock(ExitBB);
   };
-  if (isOpenMPParallelDirective(S.getDirectiveKind())) {
+  OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
+  if (isOpenMPParallelDirective(EKind)) {
     CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc());
     CGF.CGM.getOpenMPRuntime().emitBarrierCall(
         CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
@@ -3843,6 +3886,7 @@ static bool emitWorksharingDirective(CodeGenFunction &CGF,
                                      const OMPLoopDirective &S,
                                      bool HasCancel) {
   bool HasLastprivates;
+  OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
   if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
                    [](const OMPReductionClause *C) {
                      return C->getModifier() == OMPC_REDUCTION_inscan;
@@ -3852,9 +3896,8 @@ static bool emitWorksharingDirective(CodeGenFunction &CGF,
       OMPLoopScope LoopScope(CGF, S);
       return CGF.EmitScalarExpr(S.getNumIterations());
     };
-    const auto &&FirstGen = [&S, HasCancel](CodeGenFunction &CGF) {
-      CodeGenFunction::OMPCancelStackRAII CancelRegion(
-          CGF, S.getDirectiveKind(), HasCancel);
+    const auto &&FirstGen = [&S, HasCancel, EKind](CodeGenFunction &CGF) {
+      CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, EKind, HasCancel);
       (void)CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
                                        emitForLoopBounds,
                                        emitDispatchForLoopBounds);
@@ -3862,22 +3905,20 @@ static bool emitWorksharingDirective(CodeGenFunction &CGF,
       CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getBeginLoc(),
                                                  OMPD_for);
     };
-    const auto &&SecondGen = [&S, HasCancel,
+    const auto &&SecondGen = [&S, HasCancel, EKind,
                               &HasLastprivates](CodeGenFunction &CGF) {
-      CodeGenFunction::OMPCancelStackRAII CancelRegion(
-          CGF, S.getDirectiveKind(), HasCancel);
+      CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, EKind, HasCancel);
       HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
                                                    emitForLoopBounds,
                                                    emitDispatchForLoopBounds);
     };
-    if (!isOpenMPParallelDirective(S.getDirectiveKind()))
+    if (!isOpenMPParallelDirective(EKind))
       emitScanBasedDirectiveDecls(CGF, S, NumIteratorsGen);
     emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen);
-    if (!isOpenMPParallelDirective(S.getDirectiveKind()))
+    if (!isOpenMPParallelDirective(EKind))
       emitScanBasedDirectiveFinals(CGF, S, NumIteratorsGen);
   } else {
-    CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
-                                                     HasCancel);
+    CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, EKind, HasCancel);
     HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
                                                  emitForLoopBounds,
                                                  emitDispatchForLoopBounds);
@@ -3885,11 +3926,14 @@ static bool emitWorksharingDirective(CodeGenFunction &CGF,
   return HasLastprivates;
 }
 
-static bool isSupportedByOpenMPIRBuilder(const OMPForDirective &S) {
-  if (S.hasCancel())
+// Pass OMPLoopDirective (instead of OMPForDirective) to make this check
+// available for "loop bind(parallel)", which maps to "for".
+static bool isForSupportedByOpenMPIRBuilder(const OMPLoopDirective &S,
+                                            bool HasCancel) {
+  if (HasCancel)
     return false;
   for (OMPClause *C : S.clauses()) {
-    if (isa<OMPNowaitClause>(C))
+    if (isa<OMPNowaitClause>(C) || isa<OMPBindClause>(C))
       continue;
 
     if (auto *SC = dyn_cast<OMPScheduleClause>(C)) {
@@ -3934,11 +3978,14 @@ convertClauseKindToSchedKind(OpenMPScheduleClauseKind ScheduleClauseKind) {
   llvm_unreachable("Unhandled schedule kind");
 }
 
-void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
+// Pass OMPLoopDirective (instead of OMPForDirective) to make this function
+// available for "loop bind(parallel)", which maps to "for".
+void emitOMPForDirective(const OMPLoopDirective &S, CodeGenFunction &CGF,
+                         CodeGenModule &CGM, bool HasCancel) {
   bool HasLastprivates = false;
-  bool UseOMPIRBuilder =
-      CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S);
-  auto &&CodeGen = [this, &S, &HasLastprivates,
+  bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder &&
+                         isForSupportedByOpenMPIRBuilder(S, HasCancel);
+  auto &&CodeGen = [&S, &CGM, HasCancel, &HasLastprivates,
                     UseOMPIRBuilder](CodeGenFunction &CGF, PrePostActionTy &) {
     // Use the OpenMPIRBuilder if enabled.
     if (UseOMPIRBuilder) {
@@ -3950,43 +3997,47 @@ void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
         SchedKind =
             convertClauseKindToSchedKind(SchedClause->getScheduleKind());
         if (const Expr *ChunkSizeExpr = SchedClause->getChunkSize())
-          ChunkSize = EmitScalarExpr(ChunkSizeExpr);
+          ChunkSize = CGF.EmitScalarExpr(ChunkSizeExpr);
       }
 
       // Emit the associated statement and get its loop representation.
       const Stmt *Inner = S.getRawStmt();
       llvm::CanonicalLoopInfo *CLI =
-          EmitOMPCollapsedCanonicalLoopNest(Inner, 1);
+          CGF.EmitOMPCollapsedCanonicalLoopNest(Inner, 1);
 
       llvm::OpenMPIRBuilder &OMPBuilder =
           CGM.getOpenMPRuntime().getOMPBuilder();
       llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
-          AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
+          CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
       OMPBuilder.applyWorkshareLoop(
-          Builder.getCurrentDebugLocation(), CLI, AllocaIP, NeedsBarrier,
+          CGF.Builder.getCurrentDebugLocation(), CLI, AllocaIP, NeedsBarrier,
           SchedKind, ChunkSize, /*HasSimdModifier=*/false,
           /*HasMonotonicModifier=*/false, /*HasNonmonotonicModifier=*/false,
           /*HasOrderedClause=*/false);
       return;
     }
 
-    HasLastprivates = emitWorksharingDirective(CGF, S, S.hasCancel());
+    HasLastprivates = emitWorksharingDirective(CGF, S, HasCancel);
   };
   {
     auto LPCRegion =
-        CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
-    OMPLexicalScope Scope(*this, S, OMPD_unknown);
-    CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
-                                                S.hasCancel());
+        CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S);
+    OMPLexicalScope Scope(CGF, S, OMPD_unknown);
+    CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_for, CodeGen,
+                                                HasCancel);
   }
 
   if (!UseOMPIRBuilder) {
     // Emit an implicit barrier at the end.
     if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
-      CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
+      CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getBeginLoc(), OMPD_for);
   }
   // Check for outer lastprivate conditional update.
-  checkForLastprivateConditionalUpdate(*this, S);
+  checkForLastprivateConditionalUpdate(CGF, S);
+}
+
+void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
+  return emitOMPForDirective(S, *this, CGM, S.hasCancel());
 }
 
 void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
@@ -4022,7 +4073,8 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
   const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
   const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt);
   bool HasLastprivates = false;
-  auto &&CodeGen = [&S, CapturedStmt, CS,
+  OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
+  auto &&CodeGen = [&S, CapturedStmt, CS, EKind,
                     &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) {
     const ASTContext &C = CGF.getContext();
     QualType KmpInt32Ty =
@@ -4103,7 +4155,7 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
     HasLastprivates = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
     CGF.EmitOMPReductionClauseInit(S, LoopScope);
     (void)LoopScope.Privatize();
-    if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
+    if (isOpenMPTargetExecutionDirective(EKind))
       CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
 
     // Emit static non-chunked loop.
@@ -4113,7 +4165,7 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
         /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(),
         LB.getAddress(), UB.getAddress(), ST.getAddress());
     CGF.CGM.getOpenMPRuntime().emitForStaticInit(
-        CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, StaticInit);
+        CGF, S.getBeginLoc(), EKind, ScheduleKind, StaticInit);
     // UB = min(UB, GlobalUB);
     llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, S.getBeginLoc());
     llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect(
@@ -4129,7 +4181,7 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
       CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
                                                      OMPD_sections);
     };
-    CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
+    CGF.OMPCancelStack.emitExit(CGF, EKind, CodeGen);
     CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
     // Emit post-update of the reduction variables if IsLastIter != 0.
     emitPostUpdateForReductionClause(CGF, S, [IL, &S](CodeGenFunction &CGF) {
@@ -4150,7 +4202,7 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
     HasCancel = OSD->hasCancel();
   else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&S))
     HasCancel = OPSD->hasCancel();
-  OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel);
+  OMPCancelStackRAII CancelRegion(*this, EKind, HasCancel);
   CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
                                               HasCancel);
   // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
@@ -5028,12 +5080,12 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(
     Action.Enter(CGF);
     BodyGen(CGF);
   };
+  OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
   llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
-      S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
-      Data.NumberOfParts);
+      S, *I, *PartId, *TaskT, EKind, CodeGen, Data.Tied, Data.NumberOfParts);
   OMPLexicalScope Scope(*this, S, std::nullopt,
-                        !isOpenMPParallelDirective(S.getDirectiveKind()) &&
-                            !isOpenMPSimdDirective(S.getDirectiveKind()));
+                        !isOpenMPParallelDirective(EKind) &&
+                            !isOpenMPSimdDirective(EKind));
   TaskGen(*this, OutlinedFn, Data);
 }
 
@@ -5139,7 +5191,8 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
   }
   (void)TargetScope.Privatize();
   buildDependences(S, Data);
-  auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, MVD,
+  OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
+  auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, MVD, EKind,
                     &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) {
     // Set proper addresses for generated private copies.
     OMPPrivateScope Scope(CGF);
@@ -5194,7 +5247,7 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
     OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false);
     auto *TL = S.getSingleClause<OMPThreadLimitClause>();
     if (CGF.CGM.getLangOpts().OpenMP >= 51 &&
-        needsTaskBasedThreadLimit(S.getDirectiveKind()) && TL) {
+        needsTaskBasedThreadLimit(EKind) && TL) {
       // Emit __kmpc_set_thread_limit() to set the thread_limit for the task
       // enclosing this target region. This will indirectly set the thread_limit
       // for every applicable construct within target region.
@@ -5204,8 +5257,7 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
     BodyGen(CGF);
   };
   llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
-      S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true,
-      Data.NumberOfParts);
+      S, *I, *PartId, *TaskT, EKind, CodeGen, /*Tied=*/true, Data.NumberOfParts);
   llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0);
   IntegerLiteral IfCond(getContext(), TrueOrFalse,
                         getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
@@ -5219,8 +5271,9 @@ void CodeGenFunction::processInReduction(const OMPExecutableDirective &S,
                                          CodeGenFunction &CGF,
                                          const CapturedStmt *CS,
                                          OMPPrivateScope &Scope) {
+  OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
   if (Data.Reductions) {
-    OpenMPDirectiveKind CapturedRegion = S.getDirectiveKind();
+    OpenMPDirectiveKind CapturedRegion = EKind;
     OMPLexicalScope LexScope(CGF, S, CapturedRegion);
     ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars,
                            Data.ReductionCopies, Data.ReductionOps);
@@ -5879,13 +5932,20 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
   }
 }
 
-void CodeGenFunction::EmitOMPDistributeDirective(
-    const OMPDistributeDirective &S) {
+// Pass OMPLoopDirective (instead of OMPDistributeDirective) to make this
+// function available for "loop bind(teams)", which maps to "distribute".
+void emitOMPDistributeDirective(const OMPLoopDirective &S, CodeGenFunction &CGF,
+                                CodeGenModule &CGM) {
   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
     CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
   };
-  OMPLexicalScope Scope(*this, S, OMPD_unknown);
-  CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
+  OMPLexicalScope Scope(CGF, S, OMPD_unknown);
+  CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, CodeGen);
+}
+
+void CodeGenFunction::EmitOMPDistributeDirective(
+    const OMPDistributeDirective &S) {
+  emitOMPDistributeDirective(S, *this, CGM);
 }
 
 static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
@@ -7921,6 +7981,24 @@ void CodeGenFunction::EmitOMPTargetUpdateDirective(
 
 void CodeGenFunction::EmitOMPGenericLoopDirective(
     const OMPGenericLoopDirective &S) {
+  // Always expect a bind clause on the loop directive. It it wasn't
+  // in the source, it should have been added in sema.
+
+  OpenMPBindClauseKind BindKind = OMPC_BIND_unknown;
+  if (const auto *C = S.getSingleClause<OMPBindClause>())
+    BindKind = C->getBindKind();
+
+  switch (BindKind) {
+  case OMPC_BIND_parallel:  // for
+    return emitOMPForDirective(S, *this, CGM, /*HasCancel=*/false);
+  case OMPC_BIND_teams:     // distribute
+    return emitOMPDistributeDirective(S, *this, CGM);
+  case OMPC_BIND_thread:    // simd
+    return emitOMPSimdDirective(S, *this, CGM);
+  case OMPC_BIND_unknown:
+    break;
+  }
+
   // Unimplemented, just inline the underlying statement for now.
   auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
     // Emit the loop iteration variable.
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 67e3c1d9067f3..7dadb5cd31a69 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -168,10 +168,6 @@ class DSAStackTy {
     SourceLocation DefaultAttrLoc;
     DefaultmapInfo DefaultmapMap[OMPC_DEFAULTMAP_unknown + 1];
     OpenMPDirectiveKind Directive = OMPD_unknown;
-    /// GenericLoopDirective with bind clause is mapped to other directives,
-    /// like for, distribute and simd. Presently, set MappedDirective to
-    /// OMPLoop. This may also be used in a similar way for other constructs.
-    OpenMPDirectiveKind MappedDirective = OMPD_unknown;
     DeclarationNameInfo DirectiveName;
     Scope *CurScope = nullptr;
     DeclContext *Context = nullptr;
@@ -645,24 +641,6 @@ class DSAStackTy {
     const SharingMapTy *Top = getTopOfStackOrNull();
     return Top ? Top->Directive : OMPD_unknown;
   }
-  OpenMPDirectiveKind getMappedDirective() const {
-    const SharingMapTy *Top = getTopOfStackOrNull();
-    return Top ? Top->MappedDirective : OMPD_unknown;
-  }
-  void setCurrentDirective(OpenMPDirectiveKind NewDK) {
-    SharingMapTy *Top = getTopOfStackOrNull();
-    assert(Top &&
-           "Before calling setCurrentDirective Top of Stack not to be NULL.");
-    // Store the old into MappedDirective & assign argument NewDK to Directive.
-    Top->Directive = NewDK;
-  }
-  void setMappedDirective(OpenMPDirectiveKind NewDK) {
-    SharingMapTy *Top = getTopOfStackOrNull();
-    assert(Top &&
-           "Before calling setMappedDirective Top of Stack not to be NULL.");
-    // Store the old into MappedDirective & assign argument NewDK to Directive.
-    Top->MappedDirective = NewDK;
-  }
   /// Returns directive kind at specified level.
   OpenMPDirectiveKind getDirective(unsigned Level) const {
     assert(!isStackEmpty() && "No directive at specified level.");
@@ -5981,127 +5959,63 @@ static bool teamsLoopCanBeParallelFor(Stmt *AStmt, Sema &SemaRef) {
   return Checker.teamsLoopCanBeParallelFor();
 }
 
-bool SemaOpenMP::mapLoopConstruct(
-    llvm::SmallVector<OMPClause *> &ClausesWithoutBind,
-    ArrayRef<OMPClause *> Clauses, OpenMPBindClauseKind &BindKind,
-    OpenMPDirectiveKind &Kind, OpenMPDirectiveKind &PrevMappedDirective,
-    SourceLocation StartLoc, SourceLocation EndLoc,
-    const DeclarationNameInfo &DirName, OpenMPDirectiveKind CancelRegion) {
-
-  bool UseClausesWithoutBind = false;
-
-  // Restricting to "#pragma omp loop bind"
-  if (getLangOpts().OpenMP >= 50 && Kind == OMPD_loop) {
-
-    const OpenMPDirectiveKind ParentDirective = DSAStack->getParentDirective();
-
-    if (BindKind == OMPC_BIND_unknown) {
-      // Setting the enclosing teams or parallel construct for the loop
-      // directive without bind clause.
-      // [5.0:129:25-28] If the bind clause is not present on the construct and
-      // the loop construct is closely nested inside a teams or parallel
-      // construct, the binding region is the corresponding teams or parallel
-      // region. If none of those conditions hold, the binding region is not
-      // defined.
-      BindKind = OMPC_BIND_thread; // Default bind(thread) if binding is unknown
-      ArrayRef<OpenMPDirectiveKind> ParentLeafs =
-          getLeafConstructsOrSelf(ParentDirective);
-
-      if (ParentDirective == OMPD_unknown) {
-        Diag(DSAStack->getDefaultDSALocation(),
-             diag::err_omp_bind_required_on_loop);
-      } else if (ParentLeafs.back() == OMPD_parallel) {
-        BindKind = OMPC_BIND_parallel;
-      } else if (ParentLeafs.back() == OMPD_teams) {
-        BindKind = OMPC_BIND_teams;
-      }
-    } else {
-      // bind clause is present in loop directive. When the loop directive is
-      // changed to a new directive the bind clause is not used. So, we should
-      // set flag indicating to only use the clauses that aren't the
-      // bind clause.
-      UseClausesWithoutBind = true;
-    }
-
-    for (OMPClause *C : Clauses) {
-      // Spec restriction : bind(teams) and reduction not permitted.
-      if (BindKind == OMPC_BIND_teams &&
-          C->getClauseKind() == llvm::omp::Clause::OMPC_reduction)
-        Diag(DSAStack->getDefaultDSALocation(),
-             diag::err_omp_loop_reduction_clause);
-
-      // A new Vector ClausesWithoutBind, which does not contain the bind
-      // clause, for passing to new directive.
-      if (C->getClauseKind() != llvm::omp::Clause::OMPC_bind)
-        ClausesWithoutBind.push_back(C);
-    }
-
-    switch (BindKind) {
-    case OMPC_BIND_parallel:
-      Kind = OMPD_for;
-      DSAStack->setCurrentDirective(OMPD_for);
-      DSAStack->setMappedDirective(OMPD_loop);
-      PrevMappedDirective = OMPD_loop;
-      break;
-    case OMPC_BIND_teams:
-      Kind = OMPD_distribute;
-      DSAStack->setCurrentDirective(OMPD_distribute);
-      DSAStack->setMappedDirective(OMPD_loop);
-      PrevMappedDirective = OMPD_loop;
-      break;
-    case OMPC_BIND_thread:
-      Kind = OMPD_simd;
-      DSAStack->setCurrentDirective(OMPD_simd);
-      DSAStack->setMappedDirective(OMPD_loop);
-      PrevMappedDirective = OMPD_loop;
-      break;
-    case OMPC_BIND_unknown:
-      break;
-    }
-  } else if (PrevMappedDirective == OMPD_loop) {
-    /// An initial pass after recognizing all the statements is done in the
-    /// Parser when the directive OMPD_loop is mapped to OMPD_for,
-    /// OMPD_distribute or OMPD_simd. A second transform pass with call from
-    /// clang::TreeTransform::TransformOMPExecutableDirective() is done
-    /// with the Directive as one of the above mapped directive without
-    /// the bind clause. Then "PrevMappedDirective" stored in the
-    /// OMPExecutableDirective is accessed and hence this else statement.
-
-    DSAStack->setMappedDirective(OMPD_loop);
-  }
-
-  return UseClausesWithoutBind;
-}
-
 StmtResult SemaOpenMP::ActOnOpenMPExecutableDirective(
     OpenMPDirectiveKind Kind, const DeclarationNameInfo &DirName,
     OpenMPDirectiveKind CancelRegion, ArrayRef<OMPClause *> Clauses,
-    Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc,
-    OpenMPDirectiveKind PrevMappedDirective) {
+    Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc) {
   assert(isOpenMPExecutableDirective(Kind) && "Unexpected directive category");
 
   StmtResult Res = StmtError();
   OpenMPBindClauseKind BindKind = OMPC_BIND_unknown;
-  llvm::SmallVector<OMPClause *> ClausesWithoutBind;
-  bool UseClausesWithoutBind = false;
+  llvm::SmallVector<OMPClause *, 8> ClausesWithImplicit;
 
   if (const OMPBindClause *BC =
           OMPExecutableDirective::getSingleClause<OMPBindClause>(Clauses))
     BindKind = BC->getBindKind();
 
-  // Variable used to note down the DirectiveKind because mapLoopConstruct may
-  // change "Kind" variable, due to mapping of "omp loop" to other directives.
-  OpenMPDirectiveKind DK = Kind;
-  if (Kind == OMPD_loop || PrevMappedDirective == OMPD_loop) {
-    UseClausesWithoutBind = mapLoopConstruct(
-        ClausesWithoutBind, Clauses, BindKind, Kind, PrevMappedDirective,
-        StartLoc, EndLoc, DirName, CancelRegion);
-    DK = OMPD_loop;
+  if (Kind == OMPD_loop && BindKind == OMPC_BIND_unknown) {
+    const OpenMPDirectiveKind ParentDirective = DSAStack->getParentDirective();
+
+    // Setting the enclosing teams or parallel construct for the loop
+    // directive without bind clause.
+    // [5.0:129:25-28] If the bind clause is not present on the construct and
+    // the loop construct is closely nested inside a teams or parallel
+    // construct, the binding region is the corresponding teams or parallel
+    // region. If none of those conditions hold, the binding region is not
+    // defined.
+    BindKind = OMPC_BIND_thread; // Default bind(thread) if binding is unknown
+    ArrayRef<OpenMPDirectiveKind> ParentLeafs =
+        getLeafConstructsOrSelf(ParentDirective);
+
+    if (ParentDirective == OMPD_unknown) {
+      Diag(DSAStack->getDefaultDSALocation(),
+           diag::err_omp_bind_required_on_loop);
+    } else if (ParentLeafs.back() == OMPD_parallel) {
+      BindKind = OMPC_BIND_parallel;
+    } else if (ParentLeafs.back() == OMPD_teams) {
+      BindKind = OMPC_BIND_teams;
+    }
+
+    assert(BindKind != OMPC_BIND_unknown && "Expecting BindKind");
+
+    OMPClause *C =
+        ActOnOpenMPBindClause(BindKind, SourceLocation(), SourceLocation(),
+                              SourceLocation(), SourceLocation());
+    ClausesWithImplicit.push_back(C);
+  }
+
+  // Diagnose "loop bind(teams)" with "reduction".
+  if (Kind == OMPD_loop && BindKind == OMPC_BIND_teams) {
+    for (OMPClause *C : Clauses) {
+      if (C->getClauseKind() == OMPC_reduction)
+        Diag(DSAStack->getDefaultDSALocation(),
+             diag::err_omp_loop_reduction_clause);
+    }
   }
 
   // First check CancelRegion which is then used in checkNestingOfRegions.
   if (checkCancelRegion(SemaRef, Kind, CancelRegion, StartLoc) ||
-      checkNestingOfRegions(SemaRef, DSAStack, DK, DirName, CancelRegion,
+      checkNestingOfRegions(SemaRef, DSAStack, Kind, DirName, CancelRegion,
                             BindKind, StartLoc)) {
     return StmtError();
   }
@@ -6111,15 +6025,10 @@ StmtResult SemaOpenMP::ActOnOpenMPExecutableDirective(
                             isOpenMPTargetDataManagementDirective(Kind)))
     Diag(StartLoc, diag::warn_hip_omp_target_directives);
 
-  llvm::SmallVector<OMPClause *, 8> ClausesWithImplicit;
   VarsWithInheritedDSAType VarsWithInheritedDSA;
   bool ErrorFound = false;
-  if (getLangOpts().OpenMP >= 50 && UseClausesWithoutBind) {
-    ClausesWithImplicit.append(ClausesWithoutBind.begin(),
-                               ClausesWithoutBind.end());
-  } else {
-    ClausesWithImplicit.append(Clauses.begin(), Clauses.end());
-  }
+  ClausesWithImplicit.append(Clauses.begin(), Clauses.end());
+
   if (AStmt && !SemaRef.CurContext->isDependentContext() &&
       isOpenMPCapturingDirective(Kind)) {
     assert(isa<CapturedStmt>(AStmt) && "Captured statement expected");
@@ -9170,13 +9079,9 @@ static bool checkOpenMPIterationSpace(
   auto *CXXFor = dyn_cast_or_null<CXXForRangeStmt>(S);
   // Ranged for is supported only in OpenMP 5.0.
   if (!For && (SemaRef.LangOpts.OpenMP <= 45 || !CXXFor)) {
-    OpenMPDirectiveKind DK = (SemaRef.getLangOpts().OpenMP < 50 ||
-                              DSA.getMappedDirective() == OMPD_unknown)
-                                 ? DKind
-                                 : DSA.getMappedDirective();
     SemaRef.Diag(S->getBeginLoc(), diag::err_omp_not_for)
         << (CollapseLoopCountExpr != nullptr || OrderedLoopCountExpr != nullptr)
-        << getOpenMPDirectiveName(DK) << TotalNestedLoopCount
+        << getOpenMPDirectiveName(DKind) << TotalNestedLoopCount
         << (CurrentNestedLoopCount > 0) << CurrentNestedLoopCount;
     if (TotalNestedLoopCount > 1) {
       if (CollapseLoopCountExpr && OrderedLoopCountExpr)
@@ -9514,7 +9419,7 @@ static Stmt *buildPreInits(ASTContext &Context,
 /// contained DeclStmts need to be visible after the execution of the list. Used
 /// for OpenMP pre-init declarations/statements.
 static void appendFlattenedStmtList(SmallVectorImpl<Stmt *> &TargetList,
-                                     Stmt *Item) {
+                                    Stmt *Item) {
   // nullptr represents an empty list.
   if (!Item)
     return;
@@ -10331,34 +10236,12 @@ static bool checkSimdlenSafelenSpecified(Sema &S,
   return false;
 }
 
-static bool checkGenericLoopLastprivate(Sema &S, ArrayRef<OMPClause *> Clauses,
-                                        OpenMPDirectiveKind K,
-                                        DSAStackTy *Stack);
-
-bool SemaOpenMP::checkLastPrivateForMappedDirectives(
-    ArrayRef<OMPClause *> Clauses) {
-
-  // Check for syntax of lastprivate
-  // Param of the lastprivate have different meanings in the mapped directives
-  // e.g. "omp loop" Only loop iteration vars are allowed in lastprivate clause
-  //      "omp for"  lastprivate vars must be shared
-  if (getLangOpts().OpenMP >= 50 &&
-      DSAStack->getMappedDirective() == OMPD_loop &&
-      checkGenericLoopLastprivate(SemaRef, Clauses, OMPD_loop, DSAStack)) {
-    return false;
-  }
-  return true;
-}
-
 StmtResult SemaOpenMP::ActOnOpenMPSimdDirective(
     ArrayRef<OMPClause *> Clauses, Stmt *AStmt, SourceLocation StartLoc,
     SourceLocation EndLoc, VarsWithInheritedDSAType &VarsWithImplicitDSA) {
   if (!AStmt)
     return StmtError();
 
-  if (!checkLastPrivateForMappedDirectives(Clauses))
-    return StmtError();
-
   assert(isa<CapturedStmt>(AStmt) && "Captured statement expected");
   OMPLoopBasedDirective::HelperExprs B;
   // In presence of clause 'collapse' or 'ordered' with number of loops, it will
@@ -10377,8 +10260,7 @@ StmtResult SemaOpenMP::ActOnOpenMPSimdDirective(
 
   SemaRef.setFunctionHasBranchProtectedScope();
   auto *SimdDirective = OMPSimdDirective::Create(
-      getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B,
-      DSAStack->getMappedDirective());
+      getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B);
   return SimdDirective;
 }
 
@@ -10388,9 +10270,6 @@ StmtResult SemaOpenMP::ActOnOpenMPForDirective(
   if (!AStmt)
     return StmtError();
 
-  if (!checkLastPrivateForMappedDirectives(Clauses))
-    return StmtError();
-
   assert(isa<CapturedStmt>(AStmt) && "Captured statement expected");
   OMPLoopBasedDirective::HelperExprs B;
   // In presence of clause 'collapse' or 'ordered' with number of loops, it will
@@ -10406,8 +10285,7 @@ StmtResult SemaOpenMP::ActOnOpenMPForDirective(
 
   auto *ForDirective = OMPForDirective::Create(
       getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B,
-      DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion(),
-      DSAStack->getMappedDirective());
+      DSAStack->getTaskgroupReductionRef(), DSAStack->isCancelRegion());
   return ForDirective;
 }
 
@@ -13594,9 +13472,6 @@ StmtResult SemaOpenMP::ActOnOpenMPDistributeDirective(
   if (!AStmt)
     return StmtError();
 
-  if (!checkLastPrivateForMappedDirectives(Clauses))
-    return StmtError();
-
   assert(isa<CapturedStmt>(AStmt) && "Captured statement expected");
   OMPLoopBasedDirective::HelperExprs B;
   // In presence of clause 'collapse' with number of loops, it will
@@ -13613,8 +13488,7 @@ StmtResult SemaOpenMP::ActOnOpenMPDistributeDirective(
 
   SemaRef.setFunctionHasBranchProtectedScope();
   auto *DistributeDirective = OMPDistributeDirective::Create(
-      getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B,
-      DSAStack->getMappedDirective());
+      getASTContext(), StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B);
   return DistributeDirective;
 }
 
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 84e846356e437..cb3c9d19e9eba 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -1670,12 +1670,10 @@ class TreeTransform {
   StmtResult RebuildOMPExecutableDirective(
       OpenMPDirectiveKind Kind, DeclarationNameInfo DirName,
       OpenMPDirectiveKind CancelRegion, ArrayRef<OMPClause *> Clauses,
-      Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc,
-      OpenMPDirectiveKind PrevMappedDirective = OMPD_unknown) {
+      Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc) {
 
     return getSema().OpenMP().ActOnOpenMPExecutableDirective(
-        Kind, DirName, CancelRegion, Clauses, AStmt, StartLoc, EndLoc,
-        PrevMappedDirective);
+        Kind, DirName, CancelRegion, Clauses, AStmt, StartLoc, EndLoc);
   }
 
   /// Build a new OpenMP 'if' clause.
@@ -9182,8 +9180,7 @@ StmtResult TreeTransform<Derived>::TransformOMPExecutableDirective(
 
   return getDerived().RebuildOMPExecutableDirective(
       D->getDirectiveKind(), DirName, CancelRegion, TClauses,
-      AssociatedStmt.get(), D->getBeginLoc(), D->getEndLoc(),
-      D->getMappedDirective());
+      AssociatedStmt.get(), D->getBeginLoc(), D->getEndLoc());
 }
 
 template <typename Derived>
diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp
index a7fe20bd0a466..20427eb495c38 100644
--- a/clang/lib/Serialization/ASTReaderStmt.cpp
+++ b/clang/lib/Serialization/ASTReaderStmt.cpp
@@ -2400,7 +2400,6 @@ void ASTStmtReader::VisitOMPExecutableDirective(OMPExecutableDirective *E) {
   Record.readOMPChildren(E->Data);
   E->setLocStart(readSourceLocation());
   E->setLocEnd(readSourceLocation());
-  E->setMappedDirective(Record.readEnum<OpenMPDirectiveKind>());
 }
 
 void ASTStmtReader::VisitOMPLoopBasedDirective(OMPLoopBasedDirective *D) {
diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp
index caa222277f062..ec667b58337ff 100644
--- a/clang/lib/Serialization/ASTWriterStmt.cpp
+++ b/clang/lib/Serialization/ASTWriterStmt.cpp
@@ -2389,7 +2389,6 @@ void ASTStmtWriter::VisitOMPExecutableDirective(OMPExecutableDirective *E) {
   Record.writeOMPChildren(E->Data);
   Record.AddSourceLocation(E->getBeginLoc());
   Record.AddSourceLocation(E->getEndLoc());
-  Record.writeEnum(E->getMappedDirective());
 }
 
 void ASTStmtWriter::VisitOMPLoopBasedDirective(OMPLoopBasedDirective *D) {
diff --git a/clang/test/OpenMP/generic_loop_ast_print.cpp b/clang/test/OpenMP/generic_loop_ast_print.cpp
index b61ee79615d04..b361724c12a0d 100644
--- a/clang/test/OpenMP/generic_loop_ast_print.cpp
+++ b/clang/test/OpenMP/generic_loop_ast_print.cpp
@@ -23,7 +23,7 @@
 
 //PRINT: template <typename T, int C> void templ_foo(T t) {
 //PRINT:   T j, z;
-//PRINT:   #pragma omp simd collapse(C) reduction(+: z) lastprivate(j)
+//PRINT:   #pragma omp loop collapse(C) reduction(+: z) lastprivate(j) bind(thread)
 //PRINT:   for (T i = 0; i < t; ++i)
 //PRINT:       for (j = 0; j < t; ++j)
 //PRINT:           z += i + j;
@@ -31,19 +31,20 @@
 //DUMP: FunctionTemplateDecl{{.*}}templ_foo
 //DUMP: TemplateTypeParmDecl{{.*}}T
 //DUMP: NonTypeTemplateParmDecl{{.*}}C
-//DUMP: OMPSimdDirective
+//DUMP: OMPGenericLoopDirective
 //DUMP: OMPCollapseClause
 //DUMP: DeclRefExpr{{.*}}'C' 'int'
 //DUMP: OMPReductionClause
 //DUMP: DeclRefExpr{{.*}}'z' 'T'
 //DUMP: OMPLastprivateClause
 //DUMP: DeclRefExpr{{.*}}'j' 'T'
+//DUMP: OMPBindClause
 //DUMP: ForStmt
 //DUMP: ForStmt
 
 //PRINT: template<> void templ_foo<int, 2>(int t) {
 //PRINT:     int j, z;
-//PRINT:     #pragma omp simd collapse(2) reduction(+: z) lastprivate(j)
+//PRINT:     #pragma omp loop collapse(2) reduction(+: z) lastprivate(j) bind(thread)
 //PRINT:         for (int i = 0; i < t; ++i)
 //PRINT:             for (j = 0; j < t; ++j)
 //PRINT:                 z += i + j;
@@ -52,7 +53,7 @@
 //DUMP: TemplateArgument type 'int'
 //DUMP: TemplateArgument integral '2'
 //DUMP: ParmVarDecl{{.*}}'int'
-//DUMP: OMPSimdDirective
+//DUMP: OMPGenericLoopDirective
 //DUMP: OMPCollapseClause
 //DUMP: ConstantExpr{{.*}}'int'
 //DUMP: value: Int 2
@@ -60,6 +61,7 @@
 //DUMP: DeclRefExpr{{.*}}'z' 'int'
 //DUMP: OMPLastprivateClause
 //DUMP: DeclRefExpr{{.*}}'j' 'int'
+//DUMP: OMPBindClause
 //DUMP: ForStmt
 template <typename T, int C>
 void templ_foo(T t) {
@@ -80,12 +82,12 @@ void test() {
   int aaa[1000];
 
   //PRINT: #pragma omp target teams distribute parallel for map(tofrom: MTX)
-  //PRINT: #pragma omp simd
+  //PRINT: #pragma omp loop
   //DUMP: OMPTargetTeamsDistributeParallelForDirective
   //DUMP: CapturedStmt
   //DUMP: ForStmt
   //DUMP: CompoundStmt
-  //DUMP: OMPSimdDirective
+  //DUMP: OMPGenericLoopDirective
   #pragma omp target teams distribute parallel for map(MTX)
   for (auto i = 0; i < N; ++i) {
     #pragma omp loop
@@ -95,11 +97,11 @@ void test() {
   }
 
   //PRINT: #pragma omp target teams
-  //PRINT: #pragma omp distribute
+  //PRINT: #pragma omp loop
   //DUMP: OMPTargetTeamsDirective
   //DUMP: CapturedStmt
   //DUMP: ForStmt
-  //DUMP: OMPDistributeDirective
+  //DUMP: OMPGenericLoopDirective
   #pragma omp target teams
   for (int i=0; i<1000; ++i) {
     #pragma omp loop
@@ -109,8 +111,8 @@ void test() {
   }
 
   int j, z, z1;
-  //PRINT: #pragma omp for collapse(2) private(z) lastprivate(j) order(concurrent) reduction(+: z1)
-  //DUMP: OMPForDirective
+  //PRINT: #pragma omp loop collapse(2) private(z) lastprivate(j) order(concurrent) reduction(+: z1) bind(parallel)
+  //DUMP: OMPGenericLoopDirective
   //DUMP: OMPCollapseClause
   //DUMP: IntegerLiteral{{.*}}2
   //DUMP: OMPPrivateClause
@@ -120,6 +122,7 @@ void test() {
   //DUMP: OMPOrderClause
   //DUMP: OMPReductionClause
   //DUMP-NEXT: DeclRefExpr{{.*}}'z1'
+  //DUMP: OMPBindClause
   //DUMP: ForStmt
   //DUMP: ForStmt
   #pragma omp loop collapse(2) private(z) lastprivate(j) order(concurrent) \
@@ -133,9 +136,10 @@ void test() {
   }
 
   //PRINT: #pragma omp target teams
-  //PRINT: #pragma omp distribute
+  //PRINT: #pragma omp loop bind(teams)
   //DUMP: OMPTargetTeamsDirective
-  //DUMP: OMPDistributeDirective
+  //DUMP: OMPGenericLoopDirective
+  //DUMP: OMPBindClause
   //DUMP: ForStmt
   #pragma omp target teams
   #pragma omp loop bind(teams)
@@ -143,10 +147,11 @@ void test() {
 
   //PRINT: #pragma omp target
   //PRINT: #pragma omp teams
-  //PRINT: #pragma omp distribute
+  //PRINT: #pragma omp loop bind(teams)
   //DUMP: OMPTargetDirective
   //DUMP: OMPTeamsDirective
-  //DUMP: OMPDistributeDirective
+  //DUMP: OMPGenericLoopDirective
+  //DUMP: OMPBindClause
   //DUMP: ForStmt
   #pragma omp target
   #pragma omp teams
diff --git a/clang/test/OpenMP/generic_loop_codegen.cpp b/clang/test/OpenMP/generic_loop_codegen.cpp
index c3ad43bebccaf..d062695fee281 100644
--- a/clang/test/OpenMP/generic_loop_codegen.cpp
+++ b/clang/test/OpenMP/generic_loop_codegen.cpp
@@ -32,6 +32,8 @@ void foo(int t) {
 // IR-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // IR-NEXT:    [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
 // IR-NEXT:    [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8
+// IR-NEXT:    [[DOTOMP_LB:%.*]] = alloca i64, align 8
+// IR-NEXT:    [[DOTOMP_UB:%.*]] = alloca i64, align 8
 // IR-NEXT:    [[I8:%.*]] = alloca i32, align 4
 // IR-NEXT:    [[J9:%.*]] = alloca i32, align 4
 // IR-NEXT:    [[DOTOMP_IV:%.*]] = alloca i64, align 8
@@ -54,86 +56,89 @@ void foo(int t) {
 // IR-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]]
 // IR-NEXT:    [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1
 // IR-NEXT:    store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8
+// IR-NEXT:    store i64 0, ptr [[DOTOMP_LB]], align 8
+// IR-NEXT:    [[TMP4:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
+// IR-NEXT:    store i64 [[TMP4]], ptr [[DOTOMP_UB]], align 8
 // IR-NEXT:    store i32 0, ptr [[I8]], align 4
 // IR-NEXT:    store i32 0, ptr [[J9]], align 4
-// IR-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// IR-NEXT:    [[CMP:%.*]] = icmp slt i32 0, [[TMP4]]
+// IR-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT:    [[CMP:%.*]] = icmp slt i32 0, [[TMP5]]
 // IR-NEXT:    br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[SIMD_IF_END:%.*]]
 // IR:       land.lhs.true:
-// IR-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// IR-NEXT:    [[CMP10:%.*]] = icmp slt i32 0, [[TMP5]]
+// IR-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// IR-NEXT:    [[CMP10:%.*]] = icmp slt i32 0, [[TMP6]]
 // IR-NEXT:    br i1 [[CMP10]], label [[SIMD_IF_THEN:%.*]], label [[SIMD_IF_END]]
 // IR:       simd.if.then:
-// IR-NEXT:    store i64 0, ptr [[DOTOMP_IV]], align 8
+// IR-NEXT:    [[TMP7:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8
+// IR-NEXT:    store i64 [[TMP7]], ptr [[DOTOMP_IV]], align 8
 // IR-NEXT:    store i32 0, ptr [[Z13]], align 4
 // IR-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
 // IR:       omp.inner.for.cond:
-// IR-NEXT:    [[TMP6:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3:![0-9]+]]
-// IR-NEXT:    [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8, !llvm.access.group [[ACC_GRP3]]
-// IR-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP7]], 1
-// IR-NEXT:    [[CMP14:%.*]] = icmp slt i64 [[TMP6]], [[ADD]]
+// IR-NEXT:    [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3:![0-9]+]]
+// IR-NEXT:    [[TMP9:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP3]]
+// IR-NEXT:    [[CMP14:%.*]] = icmp sle i64 [[TMP8]], [[TMP9]]
 // IR-NEXT:    br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
 // IR:       omp.inner.for.body:
-// IR-NEXT:    [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]]
-// IR-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]]
-// IR-NEXT:    [[SUB15:%.*]] = sub nsw i32 [[TMP9]], 0
+// IR-NEXT:    [[TMP10:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]]
+// IR-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]]
+// IR-NEXT:    [[SUB15:%.*]] = sub nsw i32 [[TMP11]], 0
 // IR-NEXT:    [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1
 // IR-NEXT:    [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]]
 // IR-NEXT:    [[CONV18:%.*]] = sext i32 [[MUL17]] to i64
-// IR-NEXT:    [[DIV19:%.*]] = sdiv i64 [[TMP8]], [[CONV18]]
+// IR-NEXT:    [[DIV19:%.*]] = sdiv i64 [[TMP10]], [[CONV18]]
 // IR-NEXT:    [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1
 // IR-NEXT:    [[ADD21:%.*]] = add nsw i64 0, [[MUL20]]
 // IR-NEXT:    [[CONV22:%.*]] = trunc i64 [[ADD21]] to i32
 // IR-NEXT:    store i32 [[CONV22]], ptr [[I11]], align 4, !llvm.access.group [[ACC_GRP3]]
-// IR-NEXT:    [[TMP10:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]]
-// IR-NEXT:    [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]]
-// IR-NEXT:    [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]]
-// IR-NEXT:    [[SUB23:%.*]] = sub nsw i32 [[TMP12]], 0
+// IR-NEXT:    [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]]
+// IR-NEXT:    [[TMP13:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]]
+// IR-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]]
+// IR-NEXT:    [[SUB23:%.*]] = sub nsw i32 [[TMP14]], 0
 // IR-NEXT:    [[DIV24:%.*]] = sdiv i32 [[SUB23]], 1
 // IR-NEXT:    [[MUL25:%.*]] = mul nsw i32 1, [[DIV24]]
 // IR-NEXT:    [[CONV26:%.*]] = sext i32 [[MUL25]] to i64
-// IR-NEXT:    [[DIV27:%.*]] = sdiv i64 [[TMP11]], [[CONV26]]
-// IR-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]]
-// IR-NEXT:    [[SUB28:%.*]] = sub nsw i32 [[TMP13]], 0
+// IR-NEXT:    [[DIV27:%.*]] = sdiv i64 [[TMP13]], [[CONV26]]
+// IR-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]]
+// IR-NEXT:    [[SUB28:%.*]] = sub nsw i32 [[TMP15]], 0
 // IR-NEXT:    [[DIV29:%.*]] = sdiv i32 [[SUB28]], 1
 // IR-NEXT:    [[MUL30:%.*]] = mul nsw i32 1, [[DIV29]]
 // IR-NEXT:    [[CONV31:%.*]] = sext i32 [[MUL30]] to i64
 // IR-NEXT:    [[MUL32:%.*]] = mul nsw i64 [[DIV27]], [[CONV31]]
-// IR-NEXT:    [[SUB33:%.*]] = sub nsw i64 [[TMP10]], [[MUL32]]
+// IR-NEXT:    [[SUB33:%.*]] = sub nsw i64 [[TMP12]], [[MUL32]]
 // IR-NEXT:    [[MUL34:%.*]] = mul nsw i64 [[SUB33]], 1
 // IR-NEXT:    [[ADD35:%.*]] = add nsw i64 0, [[MUL34]]
 // IR-NEXT:    [[CONV36:%.*]] = trunc i64 [[ADD35]] to i32
 // IR-NEXT:    store i32 [[CONV36]], ptr [[J12]], align 4, !llvm.access.group [[ACC_GRP3]]
-// IR-NEXT:    [[TMP14:%.*]] = load i32, ptr [[I11]], align 4, !llvm.access.group [[ACC_GRP3]]
-// IR-NEXT:    [[TMP15:%.*]] = load i32, ptr [[J12]], align 4, !llvm.access.group [[ACC_GRP3]]
-// IR-NEXT:    [[ADD37:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// IR-NEXT:    [[TMP16:%.*]] = load i32, ptr [[Z13]], align 4, !llvm.access.group [[ACC_GRP3]]
-// IR-NEXT:    [[ADD38:%.*]] = add nsw i32 [[TMP16]], [[ADD37]]
+// IR-NEXT:    [[TMP16:%.*]] = load i32, ptr [[I11]], align 4, !llvm.access.group [[ACC_GRP3]]
+// IR-NEXT:    [[TMP17:%.*]] = load i32, ptr [[J12]], align 4, !llvm.access.group [[ACC_GRP3]]
+// IR-NEXT:    [[ADD37:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
+// IR-NEXT:    [[TMP18:%.*]] = load i32, ptr [[Z13]], align 4, !llvm.access.group [[ACC_GRP3]]
+// IR-NEXT:    [[ADD38:%.*]] = add nsw i32 [[TMP18]], [[ADD37]]
 // IR-NEXT:    store i32 [[ADD38]], ptr [[Z13]], align 4, !llvm.access.group [[ACC_GRP3]]
 // IR-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
 // IR:       omp.body.continue:
 // IR-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // IR:       omp.inner.for.inc:
-// IR-NEXT:    [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]]
-// IR-NEXT:    [[ADD39:%.*]] = add nsw i64 [[TMP17]], 1
+// IR-NEXT:    [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]]
+// IR-NEXT:    [[ADD39:%.*]] = add nsw i64 [[TMP19]], 1
 // IR-NEXT:    store i64 [[ADD39]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]]
 // IR-NEXT:    br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]]
 // IR:       omp.inner.for.end:
-// IR-NEXT:    [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// IR-NEXT:    [[SUB40:%.*]] = sub nsw i32 [[TMP18]], 0
+// IR-NEXT:    [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT:    [[SUB40:%.*]] = sub nsw i32 [[TMP20]], 0
 // IR-NEXT:    [[DIV41:%.*]] = sdiv i32 [[SUB40]], 1
 // IR-NEXT:    [[MUL42:%.*]] = mul nsw i32 [[DIV41]], 1
 // IR-NEXT:    [[ADD43:%.*]] = add nsw i32 0, [[MUL42]]
 // IR-NEXT:    store i32 [[ADD43]], ptr [[I11]], align 4
-// IR-NEXT:    [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// IR-NEXT:    [[SUB44:%.*]] = sub nsw i32 [[TMP19]], 0
+// IR-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// IR-NEXT:    [[SUB44:%.*]] = sub nsw i32 [[TMP21]], 0
 // IR-NEXT:    [[DIV45:%.*]] = sdiv i32 [[SUB44]], 1
 // IR-NEXT:    [[MUL46:%.*]] = mul nsw i32 [[DIV45]], 1
 // IR-NEXT:    [[ADD47:%.*]] = add nsw i32 0, [[MUL46]]
 // IR-NEXT:    store i32 [[ADD47]], ptr [[J]], align 4
-// IR-NEXT:    [[TMP20:%.*]] = load i32, ptr [[Z]], align 4
-// IR-NEXT:    [[TMP21:%.*]] = load i32, ptr [[Z13]], align 4
-// IR-NEXT:    [[ADD48:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
+// IR-NEXT:    [[TMP22:%.*]] = load i32, ptr [[Z]], align 4
+// IR-NEXT:    [[TMP23:%.*]] = load i32, ptr [[Z13]], align 4
+// IR-NEXT:    [[ADD48:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
 // IR-NEXT:    store i32 [[ADD48]], ptr [[Z]], align 4
 // IR-NEXT:    br label [[SIMD_IF_END]]
 // IR:       simd.if.end:
@@ -152,6 +157,8 @@ void foo(int t) {
 // IR-PCH-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
 // IR-PCH-NEXT:    [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
 // IR-PCH-NEXT:    [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT:    [[DOTOMP_LB:%.*]] = alloca i64, align 8
+// IR-PCH-NEXT:    [[DOTOMP_UB:%.*]] = alloca i64, align 8
 // IR-PCH-NEXT:    [[I8:%.*]] = alloca i32, align 4
 // IR-PCH-NEXT:    [[J9:%.*]] = alloca i32, align 4
 // IR-PCH-NEXT:    [[DOTOMP_IV:%.*]] = alloca i64, align 8
@@ -174,86 +181,89 @@ void foo(int t) {
 // IR-PCH-NEXT:    [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]]
 // IR-PCH-NEXT:    [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1
 // IR-PCH-NEXT:    store i64 [[SUB7]], ptr [[DOTCAPTURE_EXPR_3]], align 8
+// IR-PCH-NEXT:    store i64 0, ptr [[DOTOMP_LB]], align 8
+// IR-PCH-NEXT:    [[TMP4:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8
+// IR-PCH-NEXT:    store i64 [[TMP4]], ptr [[DOTOMP_UB]], align 8
 // IR-PCH-NEXT:    store i32 0, ptr [[I8]], align 4
 // IR-PCH-NEXT:    store i32 0, ptr [[J9]], align 4
-// IR-PCH-NEXT:    [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// IR-PCH-NEXT:    [[CMP:%.*]] = icmp slt i32 0, [[TMP4]]
+// IR-PCH-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-PCH-NEXT:    [[CMP:%.*]] = icmp slt i32 0, [[TMP5]]
 // IR-PCH-NEXT:    br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[SIMD_IF_END:%.*]]
 // IR-PCH:       land.lhs.true:
-// IR-PCH-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// IR-PCH-NEXT:    [[CMP10:%.*]] = icmp slt i32 0, [[TMP5]]
+// IR-PCH-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// IR-PCH-NEXT:    [[CMP10:%.*]] = icmp slt i32 0, [[TMP6]]
 // IR-PCH-NEXT:    br i1 [[CMP10]], label [[SIMD_IF_THEN:%.*]], label [[SIMD_IF_END]]
 // IR-PCH:       simd.if.then:
-// IR-PCH-NEXT:    store i64 0, ptr [[DOTOMP_IV]], align 8
+// IR-PCH-NEXT:    [[TMP7:%.*]] = load i64, ptr [[DOTOMP_LB]], align 8
+// IR-PCH-NEXT:    store i64 [[TMP7]], ptr [[DOTOMP_IV]], align 8
 // IR-PCH-NEXT:    store i32 0, ptr [[Z13]], align 4
 // IR-PCH-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
 // IR-PCH:       omp.inner.for.cond:
-// IR-PCH-NEXT:    [[TMP6:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3:![0-9]+]]
-// IR-PCH-NEXT:    [[TMP7:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8, !llvm.access.group [[ACC_GRP3]]
-// IR-PCH-NEXT:    [[ADD:%.*]] = add nsw i64 [[TMP7]], 1
-// IR-PCH-NEXT:    [[CMP14:%.*]] = icmp slt i64 [[TMP6]], [[ADD]]
+// IR-PCH-NEXT:    [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3:![0-9]+]]
+// IR-PCH-NEXT:    [[TMP9:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP3]]
+// IR-PCH-NEXT:    [[CMP14:%.*]] = icmp sle i64 [[TMP8]], [[TMP9]]
 // IR-PCH-NEXT:    br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
 // IR-PCH:       omp.inner.for.body:
-// IR-PCH-NEXT:    [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]]
-// IR-PCH-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]]
-// IR-PCH-NEXT:    [[SUB15:%.*]] = sub nsw i32 [[TMP9]], 0
+// IR-PCH-NEXT:    [[TMP10:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]]
+// IR-PCH-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]]
+// IR-PCH-NEXT:    [[SUB15:%.*]] = sub nsw i32 [[TMP11]], 0
 // IR-PCH-NEXT:    [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1
 // IR-PCH-NEXT:    [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]]
 // IR-PCH-NEXT:    [[CONV18:%.*]] = sext i32 [[MUL17]] to i64
-// IR-PCH-NEXT:    [[DIV19:%.*]] = sdiv i64 [[TMP8]], [[CONV18]]
+// IR-PCH-NEXT:    [[DIV19:%.*]] = sdiv i64 [[TMP10]], [[CONV18]]
 // IR-PCH-NEXT:    [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1
 // IR-PCH-NEXT:    [[ADD21:%.*]] = add nsw i64 0, [[MUL20]]
 // IR-PCH-NEXT:    [[CONV22:%.*]] = trunc i64 [[ADD21]] to i32
 // IR-PCH-NEXT:    store i32 [[CONV22]], ptr [[I11]], align 4, !llvm.access.group [[ACC_GRP3]]
-// IR-PCH-NEXT:    [[TMP10:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]]
-// IR-PCH-NEXT:    [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]]
-// IR-PCH-NEXT:    [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]]
-// IR-PCH-NEXT:    [[SUB23:%.*]] = sub nsw i32 [[TMP12]], 0
+// IR-PCH-NEXT:    [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]]
+// IR-PCH-NEXT:    [[TMP13:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]]
+// IR-PCH-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]]
+// IR-PCH-NEXT:    [[SUB23:%.*]] = sub nsw i32 [[TMP14]], 0
 // IR-PCH-NEXT:    [[DIV24:%.*]] = sdiv i32 [[SUB23]], 1
 // IR-PCH-NEXT:    [[MUL25:%.*]] = mul nsw i32 1, [[DIV24]]
 // IR-PCH-NEXT:    [[CONV26:%.*]] = sext i32 [[MUL25]] to i64
-// IR-PCH-NEXT:    [[DIV27:%.*]] = sdiv i64 [[TMP11]], [[CONV26]]
-// IR-PCH-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]]
-// IR-PCH-NEXT:    [[SUB28:%.*]] = sub nsw i32 [[TMP13]], 0
+// IR-PCH-NEXT:    [[DIV27:%.*]] = sdiv i64 [[TMP13]], [[CONV26]]
+// IR-PCH-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group [[ACC_GRP3]]
+// IR-PCH-NEXT:    [[SUB28:%.*]] = sub nsw i32 [[TMP15]], 0
 // IR-PCH-NEXT:    [[DIV29:%.*]] = sdiv i32 [[SUB28]], 1
 // IR-PCH-NEXT:    [[MUL30:%.*]] = mul nsw i32 1, [[DIV29]]
 // IR-PCH-NEXT:    [[CONV31:%.*]] = sext i32 [[MUL30]] to i64
 // IR-PCH-NEXT:    [[MUL32:%.*]] = mul nsw i64 [[DIV27]], [[CONV31]]
-// IR-PCH-NEXT:    [[SUB33:%.*]] = sub nsw i64 [[TMP10]], [[MUL32]]
+// IR-PCH-NEXT:    [[SUB33:%.*]] = sub nsw i64 [[TMP12]], [[MUL32]]
 // IR-PCH-NEXT:    [[MUL34:%.*]] = mul nsw i64 [[SUB33]], 1
 // IR-PCH-NEXT:    [[ADD35:%.*]] = add nsw i64 0, [[MUL34]]
 // IR-PCH-NEXT:    [[CONV36:%.*]] = trunc i64 [[ADD35]] to i32
 // IR-PCH-NEXT:    store i32 [[CONV36]], ptr [[J12]], align 4, !llvm.access.group [[ACC_GRP3]]
-// IR-PCH-NEXT:    [[TMP14:%.*]] = load i32, ptr [[I11]], align 4, !llvm.access.group [[ACC_GRP3]]
-// IR-PCH-NEXT:    [[TMP15:%.*]] = load i32, ptr [[J12]], align 4, !llvm.access.group [[ACC_GRP3]]
-// IR-PCH-NEXT:    [[ADD37:%.*]] = add nsw i32 [[TMP14]], [[TMP15]]
-// IR-PCH-NEXT:    [[TMP16:%.*]] = load i32, ptr [[Z13]], align 4, !llvm.access.group [[ACC_GRP3]]
-// IR-PCH-NEXT:    [[ADD38:%.*]] = add nsw i32 [[TMP16]], [[ADD37]]
+// IR-PCH-NEXT:    [[TMP16:%.*]] = load i32, ptr [[I11]], align 4, !llvm.access.group [[ACC_GRP3]]
+// IR-PCH-NEXT:    [[TMP17:%.*]] = load i32, ptr [[J12]], align 4, !llvm.access.group [[ACC_GRP3]]
+// IR-PCH-NEXT:    [[ADD37:%.*]] = add nsw i32 [[TMP16]], [[TMP17]]
+// IR-PCH-NEXT:    [[TMP18:%.*]] = load i32, ptr [[Z13]], align 4, !llvm.access.group [[ACC_GRP3]]
+// IR-PCH-NEXT:    [[ADD38:%.*]] = add nsw i32 [[TMP18]], [[ADD37]]
 // IR-PCH-NEXT:    store i32 [[ADD38]], ptr [[Z13]], align 4, !llvm.access.group [[ACC_GRP3]]
 // IR-PCH-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
 // IR-PCH:       omp.body.continue:
 // IR-PCH-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
 // IR-PCH:       omp.inner.for.inc:
-// IR-PCH-NEXT:    [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]]
-// IR-PCH-NEXT:    [[ADD39:%.*]] = add nsw i64 [[TMP17]], 1
+// IR-PCH-NEXT:    [[TMP19:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]]
+// IR-PCH-NEXT:    [[ADD39:%.*]] = add nsw i64 [[TMP19]], 1
 // IR-PCH-NEXT:    store i64 [[ADD39]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP3]]
 // IR-PCH-NEXT:    br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]]
 // IR-PCH:       omp.inner.for.end:
-// IR-PCH-NEXT:    [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// IR-PCH-NEXT:    [[SUB40:%.*]] = sub nsw i32 [[TMP18]], 0
+// IR-PCH-NEXT:    [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-PCH-NEXT:    [[SUB40:%.*]] = sub nsw i32 [[TMP20]], 0
 // IR-PCH-NEXT:    [[DIV41:%.*]] = sdiv i32 [[SUB40]], 1
 // IR-PCH-NEXT:    [[MUL42:%.*]] = mul nsw i32 [[DIV41]], 1
 // IR-PCH-NEXT:    [[ADD43:%.*]] = add nsw i32 0, [[MUL42]]
 // IR-PCH-NEXT:    store i32 [[ADD43]], ptr [[I11]], align 4
-// IR-PCH-NEXT:    [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// IR-PCH-NEXT:    [[SUB44:%.*]] = sub nsw i32 [[TMP19]], 0
+// IR-PCH-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// IR-PCH-NEXT:    [[SUB44:%.*]] = sub nsw i32 [[TMP21]], 0
 // IR-PCH-NEXT:    [[DIV45:%.*]] = sdiv i32 [[SUB44]], 1
 // IR-PCH-NEXT:    [[MUL46:%.*]] = mul nsw i32 [[DIV45]], 1
 // IR-PCH-NEXT:    [[ADD47:%.*]] = add nsw i32 0, [[MUL46]]
 // IR-PCH-NEXT:    store i32 [[ADD47]], ptr [[J]], align 4
-// IR-PCH-NEXT:    [[TMP20:%.*]] = load i32, ptr [[Z]], align 4
-// IR-PCH-NEXT:    [[TMP21:%.*]] = load i32, ptr [[Z13]], align 4
-// IR-PCH-NEXT:    [[ADD48:%.*]] = add nsw i32 [[TMP20]], [[TMP21]]
+// IR-PCH-NEXT:    [[TMP22:%.*]] = load i32, ptr [[Z]], align 4
+// IR-PCH-NEXT:    [[TMP23:%.*]] = load i32, ptr [[Z13]], align 4
+// IR-PCH-NEXT:    [[ADD48:%.*]] = add nsw i32 [[TMP22]], [[TMP23]]
 // IR-PCH-NEXT:    store i32 [[ADD48]], ptr [[Z]], align 4
 // IR-PCH-NEXT:    br label [[SIMD_IF_END]]
 // IR-PCH:       simd.if.end:
diff --git a/clang/test/PCH/pragma-loop.cpp b/clang/test/PCH/pragma-loop.cpp
index a3c6871041c0e..b02383c1485d5 100644
--- a/clang/test/PCH/pragma-loop.cpp
+++ b/clang/test/PCH/pragma-loop.cpp
@@ -18,9 +18,9 @@
 // CHECK: #pragma nounroll{{$}}
 // CHECK: #pragma clang loop vectorize_width(V)
 // CHECK: #pragma clang loop interleave_count(I)
-// CHECK: #pragma omp simd
-// CHECK: #pragma omp for
-// CHECK: #pragma omp distribute
+// CHECK: #pragma omp loop bind(thread)
+// CHECK: #pragma omp loop bind(parallel)
+// CHECK: #pragma omp loop bind(teams)
 
 #ifndef HEADER
 #define HEADER

>From b90e283c3ee43ffe8ad5b16275ee2a04865b08ba Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek <Krzysztof.Parzyszek at amd.com>
Date: Mon, 22 Jul 2024 12:39:24 -0500
Subject: [PATCH 2/4] clang-format

---
 clang/lib/AST/StmtOpenMP.cpp       | 18 ++++++++++--------
 clang/lib/CodeGen/CGStmtOpenMP.cpp | 18 +++++++++---------
 clang/lib/Sema/TreeTransform.h     | 10 ++++++----
 3 files changed, 25 insertions(+), 21 deletions(-)

diff --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp
index 525d079da2670..451a9fe9fe3d2 100644
--- a/clang/lib/AST/StmtOpenMP.cpp
+++ b/clang/lib/AST/StmtOpenMP.cpp
@@ -297,10 +297,11 @@ OMPParallelDirective *OMPParallelDirective::CreateEmpty(const ASTContext &C,
                                                     /*NumChildren=*/1);
 }
 
-OMPSimdDirective *OMPSimdDirective::Create(
-    const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
-    unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
-    const HelperExprs &Exprs) {
+OMPSimdDirective *
+OMPSimdDirective::Create(const ASTContext &C, SourceLocation StartLoc,
+                         SourceLocation EndLoc, unsigned CollapsedNum,
+                         ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
+                         const HelperExprs &Exprs) {
   auto *Dir = createDirective<OMPSimdDirective>(
       C, Clauses, AssociatedStmt, numLoopChildren(CollapsedNum, OMPD_simd),
       StartLoc, EndLoc, CollapsedNum);
@@ -1566,10 +1567,11 @@ OMPParallelMaskedTaskLoopSimdDirective::CreateEmpty(const ASTContext &C,
       CollapsedNum);
 }
 
-OMPDistributeDirective *OMPDistributeDirective::Create(
-    const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
-    unsigned CollapsedNum, ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
-    const HelperExprs &Exprs) {
+OMPDistributeDirective *
+OMPDistributeDirective::Create(const ASTContext &C, SourceLocation StartLoc,
+                               SourceLocation EndLoc, unsigned CollapsedNum,
+                               ArrayRef<OMPClause *> Clauses,
+                               Stmt *AssociatedStmt, const HelperExprs &Exprs) {
   auto *Dir = createDirective<OMPDistributeDirective>(
       C, Clauses, AssociatedStmt,
       numLoopChildren(CollapsedNum, OMPD_distribute), StartLoc, EndLoc,
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 2e83213fa03e1..d9906ddd77fdc 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -1105,8 +1105,7 @@ bool CodeGenFunction::EmitOMPLastprivateClauseInit(
   llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
   for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
     HasAtLeastOneLastprivate = true;
-    if (isOpenMPTaskLoopDirective(EKind) &&
-        !getLangOpts().OpenMPSimd)
+    if (isOpenMPTaskLoopDirective(EKind) && !getLangOpts().OpenMPSimd)
       break;
     const auto *IRef = C->varlist_begin();
     const auto *IDestRef = C->destination_exprs().begin();
@@ -3261,7 +3260,7 @@ emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
                                  CodeGenFunction::JumpDest LoopExit) {
   OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);
   auto &&CGInlinedWorksharingLoop = [&S, EKind](CodeGenFunction &CGF,
-                                               PrePostActionTy &Action) {
+                                                PrePostActionTy &Action) {
     Action.Enter(CGF);
     bool HasCancel = false;
     if (!isOpenMPSimdDirective(EKind)) {
@@ -4164,8 +4163,8 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
     CGOpenMPRuntime::StaticRTInput StaticInit(
         /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(),
         LB.getAddress(), UB.getAddress(), ST.getAddress());
-    CGF.CGM.getOpenMPRuntime().emitForStaticInit(
-        CGF, S.getBeginLoc(), EKind, ScheduleKind, StaticInit);
+    CGF.CGM.getOpenMPRuntime().emitForStaticInit(CGF, S.getBeginLoc(), EKind,
+                                                 ScheduleKind, StaticInit);
     // UB = min(UB, GlobalUB);
     llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, S.getBeginLoc());
     llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect(
@@ -5257,7 +5256,8 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
     BodyGen(CGF);
   };
   llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
-      S, *I, *PartId, *TaskT, EKind, CodeGen, /*Tied=*/true, Data.NumberOfParts);
+      S, *I, *PartId, *TaskT, EKind, CodeGen, /*Tied=*/true,
+      Data.NumberOfParts);
   llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0);
   IntegerLiteral IfCond(getContext(), TrueOrFalse,
                         getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
@@ -7989,11 +7989,11 @@ void CodeGenFunction::EmitOMPGenericLoopDirective(
     BindKind = C->getBindKind();
 
   switch (BindKind) {
-  case OMPC_BIND_parallel:  // for
+  case OMPC_BIND_parallel: // for
     return emitOMPForDirective(S, *this, CGM, /*HasCancel=*/false);
-  case OMPC_BIND_teams:     // distribute
+  case OMPC_BIND_teams: // distribute
     return emitOMPDistributeDirective(S, *this, CGM);
-  case OMPC_BIND_thread:    // simd
+  case OMPC_BIND_thread: // simd
     return emitOMPSimdDirective(S, *this, CGM);
   case OMPC_BIND_unknown:
     break;
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index cb3c9d19e9eba..4d68ebf0cc452 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -1667,10 +1667,12 @@ class TreeTransform {
   ///
   /// By default, performs semantic analysis to build the new statement.
   /// Subclasses may override this routine to provide different behavior.
-  StmtResult RebuildOMPExecutableDirective(
-      OpenMPDirectiveKind Kind, DeclarationNameInfo DirName,
-      OpenMPDirectiveKind CancelRegion, ArrayRef<OMPClause *> Clauses,
-      Stmt *AStmt, SourceLocation StartLoc, SourceLocation EndLoc) {
+  StmtResult RebuildOMPExecutableDirective(OpenMPDirectiveKind Kind,
+                                           DeclarationNameInfo DirName,
+                                           OpenMPDirectiveKind CancelRegion,
+                                           ArrayRef<OMPClause *> Clauses,
+                                           Stmt *AStmt, SourceLocation StartLoc,
+                                           SourceLocation EndLoc) {
 
     return getSema().OpenMP().ActOnOpenMPExecutableDirective(
         Kind, DirName, CancelRegion, Clauses, AStmt, StartLoc, EndLoc);

>From cac9788769932da963d1a8162fc312e9ccdab8a9 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek <Krzysztof.Parzyszek at amd.com>
Date: Mon, 22 Jul 2024 13:16:44 -0500
Subject: [PATCH 3/4] Update clang/lib/CodeGen/CGStmtOpenMP.cpp

Co-authored-by: Alexey Bataev <a.bataev at gmx.com>
---
 clang/lib/CodeGen/CGStmtOpenMP.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index d9906ddd77fdc..b105006e206ca 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -2576,8 +2576,8 @@ static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
   if (isOpenMPDistributeDirective(DKind) ||
       isOpenMPWorksharingDirective(DKind) || isOpenMPTaskLoopDirective(DKind) ||
       isOpenMPGenericLoopDirective(DKind)) {
-    EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()));
-    EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()));
+    (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()));
+    (void)EmitOMPHelperVar(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()));
   }
 
   OpenMPDirectiveKind EKind = getEffectiveDirectiveKind(S);

>From 6be171d8fe3764058b75076546e0b3242c507f8f Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek <Krzysztof.Parzyszek at amd.com>
Date: Mon, 22 Jul 2024 13:16:53 -0500
Subject: [PATCH 4/4] Update clang/lib/CodeGen/CGStmtOpenMP.cpp

Co-authored-by: Alexey Bataev <a.bataev at gmx.com>
---
 clang/lib/CodeGen/CGStmtOpenMP.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index b105006e206ca..0912f5f1ecbb2 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -3932,7 +3932,7 @@ static bool isForSupportedByOpenMPIRBuilder(const OMPLoopDirective &S,
   if (HasCancel)
     return false;
   for (OMPClause *C : S.clauses()) {
-    if (isa<OMPNowaitClause>(C) || isa<OMPBindClause>(C))
+    if (isa<OMPNowaitClause, OMPBindClause>(C))
       continue;
 
     if (auto *SC = dyn_cast<OMPScheduleClause>(C)) {



More information about the cfe-commits mailing list