r345509 - [OpenMP][NVPTX] Use single loops when generating code for distribute parallel for

Gheorghe-Teodor Bercea via cfe-commits cfe-commits at lists.llvm.org
Mon Oct 29 08:45:47 PDT 2018


Author: gbercea
Date: Mon Oct 29 08:45:47 2018
New Revision: 345509

URL: http://llvm.org/viewvc/llvm-project?rev=345509&view=rev
Log:
[OpenMP][NVPTX] Use single loops when generating code for distribute parallel for

Summary: This patch adds a new code generation path for bound sharing directives containing distribute parallel for. The new code generation scheme applies to chunked schedules on distribute and parallel for directives. The scheme simplifies the code that is being generated by eliminating the need for an outer for loop over chunks for both distribute and parallel for directives. In the case of distribute it applies to any sized chunk while in the parallel for case it only applies when chunk size is 1.

Reviewers: ABataev, caomhin

Reviewed By: ABataev

Subscribers: jholewinski, guansong, cfe-commits

Differential Revision: https://reviews.llvm.org/D53448

Modified:
    cfe/trunk/include/clang/AST/StmtOpenMP.h
    cfe/trunk/lib/AST/StmtOpenMP.cpp
    cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp
    cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h
    cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
    cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
    cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp
    cfe/trunk/lib/Sema/SemaOpenMP.cpp
    cfe/trunk/lib/Serialization/ASTReaderStmt.cpp
    cfe/trunk/lib/Serialization/ASTWriterStmt.cpp
    cfe/trunk/test/OpenMP/distribute_parallel_for_codegen.cpp
    cfe/trunk/test/OpenMP/distribute_parallel_for_simd_codegen.cpp
    cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp

Modified: cfe/trunk/include/clang/AST/StmtOpenMP.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/AST/StmtOpenMP.h?rev=345509&r1=345508&r2=345509&view=diff
==============================================================================
--- cfe/trunk/include/clang/AST/StmtOpenMP.h (original)
+++ cfe/trunk/include/clang/AST/StmtOpenMP.h Mon Oct 29 08:45:47 2018
@@ -392,9 +392,11 @@ class OMPLoopDirective : public OMPExecu
     CombinedConditionOffset = 25,
     CombinedNextLowerBoundOffset = 26,
     CombinedNextUpperBoundOffset = 27,
+    CombinedDistConditionOffset = 28,
+    CombinedParForInDistConditionOffset = 29,
     // Offset to the end (and start of the following counters/updates/finals
     // arrays) for combined distribute loop directives.
-    CombinedDistributeEnd = 28,
+    CombinedDistributeEnd = 30,
   };
 
   /// Get the counters storage.
@@ -605,6 +607,17 @@ protected:
            "expected loop bound sharing directive");
     *std::next(child_begin(), CombinedNextUpperBoundOffset) = CombNUB;
   }
+  void setCombinedDistCond(Expr *CombDistCond) {
+    assert(isOpenMPLoopBoundSharingDirective(getDirectiveKind()) &&
+           "expected loop bound distribute sharing directive");
+    *std::next(child_begin(), CombinedDistConditionOffset) = CombDistCond;
+  }
+  void setCombinedParForInDistCond(Expr *CombParForInDistCond) {
+    assert(isOpenMPLoopBoundSharingDirective(getDirectiveKind()) &&
+           "expected loop bound distribute sharing directive");
+    *std::next(child_begin(),
+               CombinedParForInDistConditionOffset) = CombParForInDistCond;
+  }
   void setCounters(ArrayRef<Expr *> A);
   void setPrivateCounters(ArrayRef<Expr *> A);
   void setInits(ArrayRef<Expr *> A);
@@ -637,6 +650,13 @@ public:
     /// Update of UpperBound for statically scheduled omp loops for
     /// outer loop in combined constructs (e.g. 'distribute parallel for')
     Expr *NUB;
+    /// Distribute Loop condition used when composing 'omp distribute'
+    ///  with 'omp for' in a same construct when schedule is chunked.
+    Expr *DistCond;
+    /// 'omp parallel for' loop condition used when composed with
+    /// 'omp distribute' in the same construct and when schedule is
+    /// chunked and the chunk size is 1.
+    Expr *ParForInDistCond;
   };
 
   /// The expressions built for the OpenMP loop CodeGen for the
@@ -754,6 +774,8 @@ public:
       DistCombinedFields.Cond = nullptr;
       DistCombinedFields.NLB = nullptr;
       DistCombinedFields.NUB = nullptr;
+      DistCombinedFields.DistCond = nullptr;
+      DistCombinedFields.ParForInDistCond = nullptr;
     }
   };
 
@@ -922,6 +944,18 @@ public:
     return const_cast<Expr *>(reinterpret_cast<const Expr *>(
         *std::next(child_begin(), CombinedNextUpperBoundOffset)));
   }
+  Expr *getCombinedDistCond() const {
+    assert(isOpenMPLoopBoundSharingDirective(getDirectiveKind()) &&
+           "expected loop bound distribute sharing directive");
+    return const_cast<Expr *>(reinterpret_cast<const Expr *>(
+        *std::next(child_begin(), CombinedDistConditionOffset)));
+  }
+  Expr *getCombinedParForInDistCond() const {
+    assert(isOpenMPLoopBoundSharingDirective(getDirectiveKind()) &&
+           "expected loop bound distribute sharing directive");
+    return const_cast<Expr *>(reinterpret_cast<const Expr *>(
+        *std::next(child_begin(), CombinedParForInDistConditionOffset)));
+  }
   const Stmt *getBody() const {
     // This relies on the loop form is already checked by Sema.
     const Stmt *Body =

Modified: cfe/trunk/lib/AST/StmtOpenMP.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/StmtOpenMP.cpp?rev=345509&r1=345508&r2=345509&view=diff
==============================================================================
--- cfe/trunk/lib/AST/StmtOpenMP.cpp (original)
+++ cfe/trunk/lib/AST/StmtOpenMP.cpp Mon Oct 29 08:45:47 2018
@@ -1079,6 +1079,8 @@ OMPDistributeParallelForDirective *OMPDi
   Dir->setCombinedCond(Exprs.DistCombinedFields.Cond);
   Dir->setCombinedNextLowerBound(Exprs.DistCombinedFields.NLB);
   Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB);
+  Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond);
+  Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond);
   Dir->HasCancel = HasCancel;
   return Dir;
 }
@@ -1145,6 +1147,8 @@ OMPDistributeParallelForSimdDirective::C
   Dir->setCombinedCond(Exprs.DistCombinedFields.Cond);
   Dir->setCombinedNextLowerBound(Exprs.DistCombinedFields.NLB);
   Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB);
+  Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond);
+  Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond);
   return Dir;
 }
 
@@ -1457,6 +1461,8 @@ OMPTeamsDistributeParallelForSimdDirecti
   Dir->setCombinedCond(Exprs.DistCombinedFields.Cond);
   Dir->setCombinedNextLowerBound(Exprs.DistCombinedFields.NLB);
   Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB);
+  Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond);
+  Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond);
   return Dir;
 }
 
@@ -1524,6 +1530,8 @@ OMPTeamsDistributeParallelForDirective::
   Dir->setCombinedCond(Exprs.DistCombinedFields.Cond);
   Dir->setCombinedNextLowerBound(Exprs.DistCombinedFields.NLB);
   Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB);
+  Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond);
+  Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond);
   Dir->HasCancel = HasCancel;
   return Dir;
 }
@@ -1670,6 +1678,8 @@ OMPTargetTeamsDistributeParallelForDirec
   Dir->setCombinedCond(Exprs.DistCombinedFields.Cond);
   Dir->setCombinedNextLowerBound(Exprs.DistCombinedFields.NLB);
   Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB);
+  Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond);
+  Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond);
   Dir->HasCancel = HasCancel;
   return Dir;
 }
@@ -1741,6 +1751,8 @@ OMPTargetTeamsDistributeParallelForSimdD
   Dir->setCombinedCond(Exprs.DistCombinedFields.Cond);
   Dir->setCombinedNextLowerBound(Exprs.DistCombinedFields.NLB);
   Dir->setCombinedNextUpperBound(Exprs.DistCombinedFields.NUB);
+  Dir->setCombinedDistCond(Exprs.DistCombinedFields.DistCond);
+  Dir->setCombinedParForInDistCond(Exprs.DistCombinedFields.ParForInDistCond);
   return Dir;
 }
 

Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp?rev=345509&r1=345508&r2=345509&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntime.cpp Mon Oct 29 08:45:47 2018
@@ -3292,6 +3292,18 @@ bool CGOpenMPRuntime::isStaticNonchunked
   return Schedule == OMP_dist_sch_static;
 }
 
+bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
+                                      bool Chunked) const {
+  OpenMPSchedType Schedule =
+      getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
+  return Schedule == OMP_sch_static_chunked;
+}
+
+bool CGOpenMPRuntime::isStaticChunked(
+    OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
+  OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
+  return Schedule == OMP_dist_sch_static_chunked;
+}
 
 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
   OpenMPSchedType Schedule =

Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h?rev=345509&r1=345508&r2=345509&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntime.h Mon Oct 29 08:45:47 2018
@@ -890,6 +890,20 @@ public:
   virtual bool isStaticNonchunked(OpenMPDistScheduleClauseKind ScheduleKind,
                                   bool Chunked) const;
 
+  /// Check if the specified \a ScheduleKind is static chunked.
+  /// \param ScheduleKind Schedule kind specified in the 'schedule' clause.
+  /// \param Chunked True if chunk is specified in the clause.
+  ///
+  virtual bool isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
+                               bool Chunked) const;
+
+  /// Check if the specified \a ScheduleKind is static non-chunked.
+  /// \param ScheduleKind Schedule kind specified in the 'dist_schedule' clause.
+  /// \param Chunked True if chunk is specified in the clause.
+  ///
+  virtual bool isStaticChunked(OpenMPDistScheduleClauseKind ScheduleKind,
+                               bool Chunked) const;
+
   /// Check if the specified \a ScheduleKind is dynamic.
   /// This kind of worksharing directive is emitted without outer loop.
   /// \param ScheduleKind Schedule Kind specified in the 'schedule' clause.
@@ -1506,7 +1520,7 @@ public:
   /// schedule clause.
   virtual void getDefaultScheduleAndChunk(CodeGenFunction &CGF,
       const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind,
-      llvm::Value *&Chunk) const {}
+      const Expr *&ChunkExpr) const {}
 
   /// Emits call of the outlined function with the provided arguments,
   /// translating these arguments to correct target-specific arguments.

Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp?rev=345509&r1=345508&r2=345509&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp Mon Oct 29 08:45:47 2018
@@ -4247,8 +4247,11 @@ void CGOpenMPRuntimeNVPTX::getDefaultDis
 void CGOpenMPRuntimeNVPTX::getDefaultScheduleAndChunk(
     CodeGenFunction &CGF, const OMPLoopDirective &S,
     OpenMPScheduleClauseKind &ScheduleKind,
-    llvm::Value *&Chunk) const {
+    const Expr *&ChunkExpr) const {
   ScheduleKind = OMPC_SCHEDULE_static;
-  Chunk = CGF.Builder.getIntN(CGF.getContext().getTypeSize(
-      S.getIterationVariable()->getType()), 1);
+  // Chunk size is 1 in this case.
+  llvm::APInt ChunkSize(32, 1);
+  ChunkExpr = IntegerLiteral::Create(CGF.getContext(), ChunkSize,
+      CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
+      SourceLocation());
 }

Modified: cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h?rev=345509&r1=345508&r2=345509&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h (original)
+++ cfe/trunk/lib/CodeGen/CGOpenMPRuntimeNVPTX.h Mon Oct 29 08:45:47 2018
@@ -348,7 +348,7 @@ public:
   /// Choose a default value for the schedule clause.
   void getDefaultScheduleAndChunk(CodeGenFunction &CGF,
       const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind,
-      llvm::Value *&Chunk) const override;
+      const Expr *&ChunkExpr) const override;
 
 private:
   /// Track the execution mode when codegening directives within a target

Modified: cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp?rev=345509&r1=345508&r2=345509&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp Mon Oct 29 08:45:47 2018
@@ -2006,7 +2006,7 @@ void CodeGenFunction::EmitOMPDistributeO
   RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind, StaticInit);
 
   // for combined 'distribute' and 'for' the increment expression of distribute
-  // is store in DistInc. For 'distribute' alone, it is in Inc.
+  // is stored in DistInc. For 'distribute' alone, it is in Inc.
   Expr *IncExpr;
   if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
     IncExpr = S.getDistInc();
@@ -2298,22 +2298,28 @@ bool CodeGenFunction::EmitOMPWorksharing
       (void)LoopScope.Privatize();
 
       // Detect the loop schedule kind and chunk.
-      llvm::Value *Chunk = nullptr;
+      const Expr *ChunkExpr = nullptr;
       OpenMPScheduleTy ScheduleKind;
       if (const auto *C = S.getSingleClause<OMPScheduleClause>()) {
         ScheduleKind.Schedule = C->getScheduleKind();
         ScheduleKind.M1 = C->getFirstScheduleModifier();
         ScheduleKind.M2 = C->getSecondScheduleModifier();
-        if (const Expr *Ch = C->getChunkSize()) {
-          Chunk = EmitScalarExpr(Ch);
-          Chunk = EmitScalarConversion(Chunk, Ch->getType(),
-                                       S.getIterationVariable()->getType(),
-                                       S.getBeginLoc());
-        }
+        ChunkExpr = C->getChunkSize();
       } else {
         // Default behaviour for schedule clause.
         CGM.getOpenMPRuntime().getDefaultScheduleAndChunk(
-            *this, S, ScheduleKind.Schedule, Chunk);
+            *this, S, ScheduleKind.Schedule, ChunkExpr);
+      }
+      bool HasChunkSizeOne = false;
+      llvm::Value *Chunk = nullptr;
+      if (ChunkExpr) {
+        Chunk = EmitScalarExpr(ChunkExpr);
+        Chunk = EmitScalarConversion(Chunk, ChunkExpr->getType(),
+                                     S.getIterationVariable()->getType(),
+                                     S.getBeginLoc());
+        llvm::APSInt EvaluatedChunk;
+        if (ChunkExpr->EvaluateAsInt(EvaluatedChunk, getContext()))
+          HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1);
       }
       const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
       const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
@@ -2321,8 +2327,12 @@ bool CodeGenFunction::EmitOMPWorksharing
       // If the static schedule kind is specified or if the ordered clause is
       // specified, and if no monotonic modifier is specified, the effect will
       // be as if the monotonic modifier was specified.
-      if (RT.isStaticNonchunked(ScheduleKind.Schedule,
-                                /* Chunked */ Chunk != nullptr) &&
+      bool StaticChunkedOne = RT.isStaticChunked(ScheduleKind.Schedule,
+          /* Chunked */ Chunk != nullptr) && HasChunkSizeOne &&
+          isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
+      if ((RT.isStaticNonchunked(ScheduleKind.Schedule,
+                                 /* Chunked */ Chunk != nullptr) ||
+           StaticChunkedOne) &&
           !Ordered) {
         if (isOpenMPSimdDirective(S.getDirectiveKind()))
           EmitOMPSimdInit(S, /*IsMonotonic=*/true);
@@ -2333,23 +2343,38 @@ bool CodeGenFunction::EmitOMPWorksharing
         // unspecified in this case.
         CGOpenMPRuntime::StaticRTInput StaticInit(
             IVSize, IVSigned, Ordered, IL.getAddress(), LB.getAddress(),
-            UB.getAddress(), ST.getAddress());
+            UB.getAddress(), ST.getAddress(),
+            StaticChunkedOne ? Chunk : nullptr);
         RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(),
                              ScheduleKind, StaticInit);
         JumpDest LoopExit =
             getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
         // UB = min(UB, GlobalUB);
-        EmitIgnoredExpr(S.getEnsureUpperBound());
+        if (!StaticChunkedOne)
+          EmitIgnoredExpr(S.getEnsureUpperBound());
         // IV = LB;
         EmitIgnoredExpr(S.getInit());
-        // while (idx <= UB) { BODY; ++idx; }
-        EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
-                         S.getInc(),
-                         [&S, LoopExit](CodeGenFunction &CGF) {
-                           CGF.EmitOMPLoopBody(S, LoopExit);
-                           CGF.EmitStopPoint(&S);
-                         },
-                         [](CodeGenFunction &) {});
+        // For unchunked static schedule generate:
+        //
+        // while (idx <= UB) {
+        //   BODY;
+        //   ++idx;
+        // }
+        //
+        // For static schedule with chunk one:
+        //
+        // while (IV <= PrevUB) {
+        //   BODY;
+        //   IV += ST;
+        // }
+        EmitOMPInnerLoop(S, LoopScope.requiresCleanups(),
+            StaticChunkedOne ? S.getCombinedParForInDistCond() : S.getCond(),
+            StaticChunkedOne ? S.getDistInc() : S.getInc(),
+            [&S, LoopExit](CodeGenFunction &CGF) {
+             CGF.EmitOMPLoopBody(S, LoopExit);
+             CGF.EmitStopPoint(&S);
+            },
+            [](CodeGenFunction &) {});
         EmitBlock(LoopExit.getBlock());
         // Tell the runtime we are done.
         auto &&CodeGen = [&S](CodeGenFunction &CGF) {
@@ -3345,13 +3370,18 @@ void CodeGenFunction::EmitOMPDistributeL
       // iteration space is divided into chunks that are approximately equal
       // in size, and at most one chunk is distributed to each team of the
       // league. The size of the chunks is unspecified in this case.
+      bool StaticChunked = RT.isStaticChunked(
+          ScheduleKind, /* Chunked */ Chunk != nullptr) &&
+          isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
       if (RT.isStaticNonchunked(ScheduleKind,
-                                /* Chunked */ Chunk != nullptr)) {
+                                /* Chunked */ Chunk != nullptr) ||
+          StaticChunked) {
         if (isOpenMPSimdDirective(S.getDirectiveKind()))
           EmitOMPSimdInit(S, /*IsMonotonic=*/true);
         CGOpenMPRuntime::StaticRTInput StaticInit(
             IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(),
-            LB.getAddress(), UB.getAddress(), ST.getAddress());
+            LB.getAddress(), UB.getAddress(), ST.getAddress(),
+            StaticChunked ? Chunk : nullptr);
         RT.emitDistributeStaticInit(*this, S.getBeginLoc(), ScheduleKind,
                                     StaticInit);
         JumpDest LoopExit =
@@ -3370,15 +3400,45 @@ void CodeGenFunction::EmitOMPDistributeL
                 ? S.getCombinedCond()
                 : S.getCond();
 
-        // for distribute alone,  codegen
-        // while (idx <= UB) { BODY; ++idx; }
-        // when combined with 'for' (e.g. as in 'distribute parallel for')
-        // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
+        if (StaticChunked)
+          Cond = S.getCombinedDistCond();
+
+        // For static unchunked schedules generate:
+        //
+        //  1. For distribute alone, codegen
+        //    while (idx <= UB) {
+        //      BODY;
+        //      ++idx;
+        //    }
+        //
+        //  2. When combined with 'for' (e.g. as in 'distribute parallel for')
+        //    while (idx <= UB) {
+        //      <CodeGen rest of pragma>(LB, UB);
+        //      idx += ST;
+        //    }
+        //
+        // For static chunk one schedule generate:
+        //
+        // while (IV <= GlobalUB) {
+        //   <CodeGen rest of pragma>(LB, UB);
+        //   LB += ST;
+        //   UB += ST;
+        //   UB = min(UB, GlobalUB);
+        //   IV = LB;
+        // }
+        //
         EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr,
                          [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
                            CodeGenLoop(CGF, S, LoopExit);
                          },
-                         [](CodeGenFunction &) {});
+                         [&S, StaticChunked](CodeGenFunction &CGF) {
+                           if (StaticChunked) {
+                             CGF.EmitIgnoredExpr(S.getCombinedNextLowerBound());
+                             CGF.EmitIgnoredExpr(S.getCombinedNextUpperBound());
+                             CGF.EmitIgnoredExpr(S.getCombinedEnsureUpperBound());
+                             CGF.EmitIgnoredExpr(S.getCombinedInit());
+                           }
+                         });
         EmitBlock(LoopExit.getBlock());
         // Tell the runtime we are done.
         RT.emitForStaticFinish(*this, S.getBeginLoc(), S.getDirectiveKind());

Modified: cfe/trunk/lib/Sema/SemaOpenMP.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaOpenMP.cpp?rev=345509&r1=345508&r2=345509&view=diff
==============================================================================
--- cfe/trunk/lib/Sema/SemaOpenMP.cpp (original)
+++ cfe/trunk/lib/Sema/SemaOpenMP.cpp Mon Oct 29 08:45:47 2018
@@ -410,7 +410,7 @@ public:
     }
     return IsDuplicate;
   }
-  
+
   /// Set default data sharing attribute to none.
   void setDefaultDSANone(SourceLocation Loc) {
     assert(!isStackEmpty());
@@ -5296,6 +5296,12 @@ checkOpenMPLoop(OpenMPDirectiveKind DKin
           ? SemaRef.BuildBinOp(CurScope, CondLoc, BO_LE, IV.get(), UB.get())
           : SemaRef.BuildBinOp(CurScope, CondLoc, BO_LT, IV.get(),
                                NumIterations.get());
+  ExprResult CombDistCond;
+  if (isOpenMPLoopBoundSharingDirective(DKind)) {
+    CombDistCond =
+        SemaRef.BuildBinOp(CurScope, CondLoc, BO_LE, IV.get(), NumIterations.get());
+  }
+
   ExprResult CombCond;
   if (isOpenMPLoopBoundSharingDirective(DKind)) {
     CombCond =
@@ -5370,7 +5376,7 @@ checkOpenMPLoop(OpenMPDirectiveKind DKin
   // on PrevUB instead of NumIterations - used to implement 'for' when found
   // in combination with 'distribute', like in 'distribute parallel for'
   SourceLocation DistIncLoc = AStmt->getBeginLoc();
-  ExprResult DistCond, DistInc, PrevEUB;
+  ExprResult DistCond, DistInc, PrevEUB, ParForInDistCond;
   if (isOpenMPLoopBoundSharingDirective(DKind)) {
     DistCond = SemaRef.BuildBinOp(CurScope, CondLoc, BO_LE, IV.get(), UB.get());
     assert(DistCond.isUsable() && "distribute cond expr was not built");
@@ -5393,6 +5399,11 @@ checkOpenMPLoop(OpenMPDirectiveKind DKin
     PrevEUB = SemaRef.BuildBinOp(CurScope, DistIncLoc, BO_Assign, UB.get(),
                                  CondOp.get());
     PrevEUB = SemaRef.ActOnFinishFullExpr(PrevEUB.get());
+
+    // Build IV <= PrevUB to be used in parallel for is in combination with
+    // a distribute directive with schedule(static, 1)
+    ParForInDistCond =
+        SemaRef.BuildBinOp(CurScope, CondLoc, BO_LE, IV.get(), PrevUB.get());
   }
 
   // Build updates and final values of the loop counters.
@@ -5516,6 +5527,8 @@ checkOpenMPLoop(OpenMPDirectiveKind DKin
   Built.DistCombinedFields.Cond = CombCond.get();
   Built.DistCombinedFields.NLB = CombNextLB.get();
   Built.DistCombinedFields.NUB = CombNextUB.get();
+  Built.DistCombinedFields.DistCond = CombDistCond.get();
+  Built.DistCombinedFields.ParForInDistCond = ParForInDistCond.get();
 
   return NestedLoopCount;
 }

Modified: cfe/trunk/lib/Serialization/ASTReaderStmt.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Serialization/ASTReaderStmt.cpp?rev=345509&r1=345508&r2=345509&view=diff
==============================================================================
--- cfe/trunk/lib/Serialization/ASTReaderStmt.cpp (original)
+++ cfe/trunk/lib/Serialization/ASTReaderStmt.cpp Mon Oct 29 08:45:47 2018
@@ -1875,6 +1875,8 @@ void ASTStmtReader::VisitOMPLoopDirectiv
     D->setCombinedCond(Record.readSubExpr());
     D->setCombinedNextLowerBound(Record.readSubExpr());
     D->setCombinedNextUpperBound(Record.readSubExpr());
+    D->setCombinedDistCond(Record.readSubExpr());
+    D->setCombinedParForInDistCond(Record.readSubExpr());
   }
   SmallVector<Expr *, 4> Sub;
   unsigned CollapsedNum = D->getCollapsedNumber();

Modified: cfe/trunk/lib/Serialization/ASTWriterStmt.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Serialization/ASTWriterStmt.cpp?rev=345509&r1=345508&r2=345509&view=diff
==============================================================================
--- cfe/trunk/lib/Serialization/ASTWriterStmt.cpp (original)
+++ cfe/trunk/lib/Serialization/ASTWriterStmt.cpp Mon Oct 29 08:45:47 2018
@@ -1876,6 +1876,8 @@ void ASTStmtWriter::VisitOMPLoopDirectiv
     Record.AddStmt(D->getCombinedCond());
     Record.AddStmt(D->getCombinedNextLowerBound());
     Record.AddStmt(D->getCombinedNextUpperBound());
+    Record.AddStmt(D->getCombinedDistCond());
+    Record.AddStmt(D->getCombinedParForInDistCond());
   }
   for (auto I : D->counters()) {
     Record.AddStmt(I);

Modified: cfe/trunk/test/OpenMP/distribute_parallel_for_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/distribute_parallel_for_codegen.cpp?rev=345509&r1=345508&r2=345509&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/distribute_parallel_for_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/distribute_parallel_for_codegen.cpp Mon Oct 29 08:45:47 2018
@@ -406,19 +406,27 @@ int main() {
     for (int i = 0; i < n; ++i) {
       a[i] = b[i] + c[i];
       // LAMBDA: define{{.+}} void [[OMP_OUTLINED_3]](
-      // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca
-      // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
-      // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
-      // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca
+      // LAMBDA: alloca
+      // LAMBDA: alloca
+      // LAMBDA: alloca
+      // LAMBDA: alloca
+      // LAMBDA: alloca
+      // LAMBDA: alloca
+      // LAMBDA: alloca
+      // LAMBDA: [[OMP_IV:%.+]] = alloca
+      // LAMBDA: alloca
+      // LAMBDA: alloca
+      // LAMBDA: alloca
+      // LAMBDA: alloca
+      // LAMBDA: [[OMP_LB:%.+]] = alloca
+      // LAMBDA: [[OMP_UB:%.+]] = alloca
+      // LAMBDA: [[OMP_ST:%.+]] = alloca
 
-      // unlike the previous tests, in this one we have a outer and inner loop for 'distribute'
       // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91,
-      // LAMBDA: br label %[[DIST_OUTER_LOOP_HEADER:.+]]
 
-      // LAMBDA: [[DIST_OUTER_LOOP_HEADER]]:
       // check EUB for distribute
       // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
-      // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}},
+      // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}}
       // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}}  [[OMP_UB_VAL_1]], [[NUM_IT_1]]
       // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
       // LAMBDA-DAG: [[EUB_TRUE]]:
@@ -437,18 +445,10 @@ int main() {
 
       // check exit condition
       // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
-      // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
-      // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
-      // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]]
-
-      // LAMBDA: [[DIST_OUTER_LOOP_BODY]]:
-      // LAMBDA: br label %[[DIST_INNER_LOOP_HEADER:.+]]
-
-      // LAMBDA: [[DIST_INNER_LOOP_HEADER]]:
-      // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]],
-      // LAMBDA-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]],
-      // LAMBDA: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]]
-      // LAMBDA: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
+      // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}}
+      // LAMBDA-DAG: [[OMP_UB_VAL_3_PLUS_ONE:%.+]] = add {{.+}} [[OMP_UB_VAL_3]], 1
+      // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3_PLUS_ONE]]
+      // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
 
       // check that PrevLB and PrevUB are passed to the 'for'
       // LAMBDA: [[DIST_INNER_LOOP_BODY]]:
@@ -467,13 +467,6 @@ int main() {
       // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
       // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]]
       // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
-      // LAMBDA: br label %[[DIST_INNER_LOOP_HEADER]]
-
-      // LAMBDA: [[DIST_INNER_LOOP_END]]:
-      // LAMBDA: br label %[[DIST_OUTER_LOOP_INC:.+]]
-
-      // LAMBDA: [[DIST_OUTER_LOOP_INC]]:
-      // check NextLB and NextUB
       // LAMBDA-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
       // LAMBDA-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
       // LAMBDA-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]]
@@ -482,10 +475,31 @@ int main() {
       // LAMBDA-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
       // LAMBDA-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]]
       // LAMBDA: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]],
-      // LAMBDA: br label %[[DIST_OUTER_LOOP_HEADER]]
 
-      // outer loop exit
-      // LAMBDA: [[DIST_OUTER_LOOP_END]]:
+      // Update UB
+      // LAMBDA-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
+      // LAMBDA: [[OMP_EXPR_VAL:%.+]] = load{{.+}}, {{.+}}
+      // LAMBDA-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], [[OMP_EXPR_VAL]]
+      // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]]
+      // LAMBDA-DAG: [[EUB_TRUE_1]]:
+      // LAMBDA: [[NUM_IT_3:%.+]] = load{{.+}}
+      // LAMBDA: br label %[[EUB_END_1:.+]]
+      // LAMBDA-DAG: [[EUB_FALSE_1]]:
+      // LAMBDA: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]],
+      // LAMBDA: br label %[[EUB_END_1]]
+      // LAMBDA-DAG: [[EUB_END_1]]:
+      // LAMBDA-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ [[NUM_IT_3]], %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ]
+      // LAMBDA: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]],
+
+      // Store LB in IV
+      // LAMBDA-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
+      // LAMBDA: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]],
+
+      // LAMBDA: [[DIST_INNER_LOOP_END]]:
+      // LAMBDA: br label %[[LOOP_EXIT:.+]]
+
+      // loop exit
+      // LAMBDA: [[LOOP_EXIT]]:
       // LAMBDA-DAG: call void @__kmpc_for_static_fini(
       // LAMBDA: ret
 
@@ -1154,19 +1168,28 @@ int main() {
   for (int i = 0; i < n; ++i) {
     a[i] = b[i] + c[i];
     // CHECK: define{{.+}} void [[OMP_OUTLINED_3]](
-    // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
-    // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
-    // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
-    // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
+    // CHECK: alloca
+    // CHECK: alloca
+    // CHECK: alloca
+    // CHECK: alloca
+    // CHECK: alloca
+    // CHECK: alloca
+    // CHECK: alloca
+    // CHECK: [[OMP_IV:%.+]] = alloca
+    // CHECK: alloca
+    // CHECK: alloca
+    // CHECK: alloca
+    // CHECK: alloca
+    // CHECK: [[OMP_LB:%.+]] = alloca
+    // CHECK: [[OMP_UB:%.+]] = alloca
+    // CHECK: [[OMP_ST:%.+]] = alloca
 
     // unlike the previous tests, in this one we have a outer and inner loop for 'distribute'
     // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91,
-    // CHECK: br label %[[DIST_OUTER_LOOP_HEADER:.+]]
 
-    // CHECK: [[DIST_OUTER_LOOP_HEADER]]:
     // check EUB for distribute
     // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
-    // CHECK: [[NUM_IT_1:%.+]] = load{{.+}},
+    // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}
     // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}}  [[OMP_UB_VAL_1]], [[NUM_IT_1]]
     // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
     // CHECK-DAG: [[EUB_TRUE]]:
@@ -1185,18 +1208,10 @@ int main() {
 
     // check exit condition
     // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
-    // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
-    // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
-    // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]]
-
-    // CHECK: [[DIST_OUTER_LOOP_BODY]]:
-    // CHECK: br label %[[DIST_INNER_LOOP_HEADER:.+]]
-
-    // CHECK: [[DIST_INNER_LOOP_HEADER]]:
-    // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]],
-    // CHECK-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]],
-    // CHECK: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]]
-    // CHECK: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
+    // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}}
+    // CHECK-DAG: [[OMP_UB_VAL_3_PLUS_ONE:%.+]] = add {{.+}} [[OMP_UB_VAL_3]], 1
+    // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3_PLUS_ONE]]
+    // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
 
     // check that PrevLB and PrevUB are passed to the 'for'
     // CHECK: [[DIST_INNER_LOOP_BODY]]:
@@ -1215,13 +1230,6 @@ int main() {
     // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
     // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]]
     // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
-    // CHECK: br label %[[DIST_INNER_LOOP_HEADER]]
-
-    // CHECK: [[DIST_INNER_LOOP_END]]:
-    // CHECK: br label %[[DIST_OUTER_LOOP_INC:.+]]
-
-    // CHECK: [[DIST_OUTER_LOOP_INC]]:
-    // check NextLB and NextUB
     // CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
     // CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
     // CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]]
@@ -1230,10 +1238,31 @@ int main() {
     // CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
     // CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]]
     // CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]],
-    // CHECK: br label %[[DIST_OUTER_LOOP_HEADER]]
 
-    // outer loop exit
-    // CHECK: [[DIST_OUTER_LOOP_END]]:
+    // Update UB
+    // CHECK-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
+    // CHECK: [[OMP_EXPR_VAL:%.+]] = load{{.+}}, {{.+}}
+    // CHECK-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], [[OMP_EXPR_VAL]]
+    // CHECK: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]]
+    // CHECK-DAG: [[EUB_TRUE_1]]:
+    // CHECK: [[NUM_IT_3:%.+]] = load{{.+}}
+    // CHECK: br label %[[EUB_END_1:.+]]
+    // CHECK-DAG: [[EUB_FALSE_1]]:
+    // CHECK: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]],
+    // CHECK: br label %[[EUB_END_1]]
+    // CHECK-DAG: [[EUB_END_1]]:
+    // CHECK-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ [[NUM_IT_3]], %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ]
+    // CHECK: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]],
+
+    // Store LB in IV
+    // CHECK-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
+    // CHECK: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]],
+
+    // CHECK: [[DIST_INNER_LOOP_END]]:
+    // CHECK: br label %[[LOOP_EXIT:.+]]
+
+    // loop exit
+    // CHECK: [[LOOP_EXIT]]:
     // CHECK-DAG: call void @__kmpc_for_static_fini(
     // CHECK: ret
 
@@ -1867,19 +1896,28 @@ int main() {
 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}})
 
 // CHECK: define{{.+}} void [[OMP_OUTLINED_3]](
-// CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
-// CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
-// CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
-// CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
+// CHECK: alloca
+// CHECK: alloca
+// CHECK: alloca
+// CHECK: alloca
+// CHECK: alloca
+// CHECK: alloca
+// CHECK: alloca
+// CHECK: [[OMP_IV:%.+]] = alloca
+// CHECK: alloca
+// CHECK: alloca
+// CHECK: alloca
+// CHECK: alloca
+// CHECK: [[OMP_LB:%.+]] = alloca
+// CHECK: [[OMP_UB:%.+]] = alloca
+// CHECK: [[OMP_ST:%.+]] = alloca
 
 // unlike the previous tests, in this one we have a outer and inner loop for 'distribute'
 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91,
-// CHECK: br label %[[DIST_OUTER_LOOP_HEADER:.+]]
 
-// CHECK: [[DIST_OUTER_LOOP_HEADER]]:
 // check EUB for distribute
 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
-// CHECK: [[NUM_IT_1:%.+]] = load{{.+}},
+// CHECK: [[NUM_IT_1:%.+]] = load{{.+}}
 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}}  [[OMP_UB_VAL_1]], [[NUM_IT_1]]
 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
 // CHECK-DAG: [[EUB_TRUE]]:
@@ -1898,18 +1936,10 @@ int main() {
 
 // check exit condition
 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
-// CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
-// CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
-// CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]]
-
-// CHECK: [[DIST_OUTER_LOOP_BODY]]:
-// CHECK: br label %[[DIST_INNER_LOOP_HEADER:.+]]
-
-// CHECK: [[DIST_INNER_LOOP_HEADER]]:
-// CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]],
-// CHECK-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]],
-// CHECK: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]]
-// CHECK: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
+// CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}}
+// CHECK-DAG: [[OMP_UB_VAL_3_PLUS_ONE:%.+]] = add {{.+}} [[OMP_UB_VAL_3]], 1
+// CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3_PLUS_ONE]]
+// CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
 
 // check that PrevLB and PrevUB are passed to the 'for'
 // CHECK: [[DIST_INNER_LOOP_BODY]]:
@@ -1928,13 +1958,6 @@ int main() {
 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]]
 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
-// CHECK: br label %[[DIST_INNER_LOOP_HEADER]]
-
-// CHECK: [[DIST_INNER_LOOP_END]]:
-// CHECK: br label %[[DIST_OUTER_LOOP_INC:.+]]
-
-// CHECK: [[DIST_OUTER_LOOP_INC]]:
-// check NextLB and NextUB
 // CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
 // CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
 // CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]]
@@ -1943,10 +1966,31 @@ int main() {
 // CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
 // CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]]
 // CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]],
-// CHECK: br label %[[DIST_OUTER_LOOP_HEADER]]
 
-// outer loop exit
-// CHECK: [[DIST_OUTER_LOOP_END]]:
+// Update UB
+// CHECK-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
+// CHECK: [[OMP_EXPR_VAL:%.+]] = load{{.+}}, {{.+}}
+// CHECK-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], [[OMP_EXPR_VAL]]
+// CHECK: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]]
+// CHECK-DAG: [[EUB_TRUE_1]]:
+// CHECK: [[NUM_IT_3:%.+]] = load{{.+}}
+// CHECK: br label %[[EUB_END_1:.+]]
+// CHECK-DAG: [[EUB_FALSE_1]]:
+// CHECK: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]],
+// CHECK: br label %[[EUB_END_1]]
+// CHECK-DAG: [[EUB_END_1]]:
+// CHECK-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ [[NUM_IT_3]], %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ]
+// CHECK: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]],
+
+// Store LB in IV
+// CHECK-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
+// CHECK: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]],
+
+// CHECK: [[DIST_INNER_LOOP_END]]:
+// CHECK: br label %[[LOOP_EXIT:.+]]
+
+// loop exit
+// CHECK: [[LOOP_EXIT]]:
 // CHECK-DAG: call void @__kmpc_for_static_fini(
 // CHECK: ret
 

Modified: cfe/trunk/test/OpenMP/distribute_parallel_for_simd_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/distribute_parallel_for_simd_codegen.cpp?rev=345509&r1=345508&r2=345509&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/distribute_parallel_for_simd_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/distribute_parallel_for_simd_codegen.cpp Mon Oct 29 08:45:47 2018
@@ -405,19 +405,27 @@ int main() {
     for (int i = 0; i < n; ++i) {
       a[i] = b[i] + c[i];
       // LAMBDA: define{{.+}} void [[OMP_OUTLINED_3]](
-      // LAMBDA-DAG: [[OMP_IV:%.omp.iv]] = alloca
-      // LAMBDA-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
-      // LAMBDA-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
-      // LAMBDA-DAG: [[OMP_ST:%.omp.stride]] = alloca
+      // LAMBDA: alloca
+      // LAMBDA: alloca
+      // LAMBDA: alloca
+      // LAMBDA: alloca
+      // LAMBDA: alloca
+      // LAMBDA: alloca
+      // LAMBDA: alloca
+      // LAMBDA: [[OMP_IV:%.+]] = alloca
+      // LAMBDA: alloca
+      // LAMBDA: alloca
+      // LAMBDA: alloca
+      // LAMBDA: alloca
+      // LAMBDA: [[OMP_LB:%.+]] = alloca
+      // LAMBDA: [[OMP_UB:%.+]] = alloca
+      // LAMBDA: [[OMP_ST:%.+]] = alloca
 
-      // unlike the previous tests, in this one we have a outer and inner loop for 'distribute'
       // LAMBDA: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91,
-      // LAMBDA: br label %[[DIST_OUTER_LOOP_HEADER:.+]]
 
-      // LAMBDA: [[DIST_OUTER_LOOP_HEADER]]:
       // check EUB for distribute
       // LAMBDA-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
-      // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}},
+      // LAMBDA: [[NUM_IT_1:%.+]] = load{{.+}}
       // LAMBDA-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}}  [[OMP_UB_VAL_1]], [[NUM_IT_1]]
       // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
       // LAMBDA-DAG: [[EUB_TRUE]]:
@@ -436,18 +444,10 @@ int main() {
 
       // check exit condition
       // LAMBDA-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
-      // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
-      // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
-      // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]]
-
-      // LAMBDA: [[DIST_OUTER_LOOP_BODY]]:
-      // LAMBDA: br label %[[DIST_INNER_LOOP_HEADER:.+]]
-
-      // LAMBDA: [[DIST_INNER_LOOP_HEADER]]:
-      // LAMBDA-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]],
-      // LAMBDA-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]],
-      // LAMBDA: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]]
-      // LAMBDA: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
+      // LAMBDA-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}}
+      // LAMBDA-DAG: [[OMP_UB_VAL_3_PLUS_ONE:%.+]] = add {{.+}} [[OMP_UB_VAL_3]], 1
+      // LAMBDA: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3_PLUS_ONE]]
+      // LAMBDA: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
 
       // check that PrevLB and PrevUB are passed to the 'for'
       // LAMBDA: [[DIST_INNER_LOOP_BODY]]:
@@ -466,13 +466,6 @@ int main() {
       // LAMBDA-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
       // LAMBDA: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]]
       // LAMBDA: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
-      // LAMBDA: br label %[[DIST_INNER_LOOP_HEADER]]
-
-      // LAMBDA: [[DIST_INNER_LOOP_END]]:
-      // LAMBDA: br label %[[DIST_OUTER_LOOP_INC:.+]]
-
-      // LAMBDA: [[DIST_OUTER_LOOP_INC]]:
-      // check NextLB and NextUB
       // LAMBDA-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
       // LAMBDA-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
       // LAMBDA-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]]
@@ -481,10 +474,31 @@ int main() {
       // LAMBDA-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
       // LAMBDA-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]]
       // LAMBDA: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]],
-      // LAMBDA: br label %[[DIST_OUTER_LOOP_HEADER]]
 
-      // outer loop exit
-      // LAMBDA: [[DIST_OUTER_LOOP_END]]:
+      // Update UB
+      // LAMBDA-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
+      // LAMBDA: [[OMP_EXPR_VAL:%.+]] = load{{.+}}, {{.+}}
+      // LAMBDA-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], [[OMP_EXPR_VAL]]
+      // LAMBDA: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]]
+      // LAMBDA-DAG: [[EUB_TRUE_1]]:
+      // LAMBDA: [[NUM_IT_3:%.+]] = load{{.+}}
+      // LAMBDA: br label %[[EUB_END_1:.+]]
+      // LAMBDA-DAG: [[EUB_FALSE_1]]:
+      // LAMBDA: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]],
+      // LAMBDA: br label %[[EUB_END_1]]
+      // LAMBDA-DAG: [[EUB_END_1]]:
+      // LAMBDA-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ [[NUM_IT_3]], %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ]
+      // LAMBDA: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]],
+
+      // Store LB in IV
+      // LAMBDA-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
+      // LAMBDA: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]],
+
+      // LAMBDA: [[DIST_INNER_LOOP_END]]:
+      // LAMBDA: br label %[[LOOP_EXIT:.+]]
+
+      // loop exit
+      // LAMBDA: [[LOOP_EXIT]]:
       // LAMBDA-DAG: call void @__kmpc_for_static_fini(
       // LAMBDA: ret
 
@@ -1153,19 +1167,28 @@ int main() {
   for (int i = 0; i < n; ++i) {
     a[i] = b[i] + c[i];
     // CHECK: define{{.+}} void [[OMP_OUTLINED_3]](
-    // CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
-    // CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
-    // CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
-    // CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
+    // CHECK: alloca
+    // CHECK: alloca
+    // CHECK: alloca
+    // CHECK: alloca
+    // CHECK: alloca
+    // CHECK: alloca
+    // CHECK: alloca
+    // CHECK: [[OMP_IV:%.+]] = alloca
+    // CHECK: alloca
+    // CHECK: alloca
+    // CHECK: alloca
+    // CHECK: alloca
+    // CHECK: [[OMP_LB:%.+]] = alloca
+    // CHECK: [[OMP_UB:%.+]] = alloca
+    // CHECK: [[OMP_ST:%.+]] = alloca
 
     // unlike the previous tests, in this one we have a outer and inner loop for 'distribute'
     // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91,
-    // CHECK: br label %[[DIST_OUTER_LOOP_HEADER:.+]]
 
-    // CHECK: [[DIST_OUTER_LOOP_HEADER]]:
     // check EUB for distribute
     // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
-    // CHECK: [[NUM_IT_1:%.+]] = load{{.+}},
+    // CHECK: [[NUM_IT_1:%.+]] = load{{.+}}
     // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}}  [[OMP_UB_VAL_1]], [[NUM_IT_1]]
     // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
     // CHECK-DAG: [[EUB_TRUE]]:
@@ -1184,18 +1207,10 @@ int main() {
 
     // check exit condition
     // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
-    // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
-    // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
-    // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]]
-
-    // CHECK: [[DIST_OUTER_LOOP_BODY]]:
-    // CHECK: br label %[[DIST_INNER_LOOP_HEADER:.+]]
-
-    // CHECK: [[DIST_INNER_LOOP_HEADER]]:
-    // CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]],
-    // CHECK-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]],
-    // CHECK: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]]
-    // CHECK: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
+    // CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}}
+    // CHECK-DAG: [[OMP_UB_VAL_3_PLUS_ONE:%.+]] = add {{.+}} [[OMP_UB_VAL_3]], 1
+    // CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3_PLUS_ONE]]
+    // CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
 
     // check that PrevLB and PrevUB are passed to the 'for'
     // CHECK: [[DIST_INNER_LOOP_BODY]]:
@@ -1214,13 +1229,6 @@ int main() {
     // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
     // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]]
     // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
-    // CHECK: br label %[[DIST_INNER_LOOP_HEADER]]
-
-    // CHECK: [[DIST_INNER_LOOP_END]]:
-    // CHECK: br label %[[DIST_OUTER_LOOP_INC:.+]]
-
-    // CHECK: [[DIST_OUTER_LOOP_INC]]:
-    // check NextLB and NextUB
     // CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
     // CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
     // CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]]
@@ -1229,10 +1237,31 @@ int main() {
     // CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
     // CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]]
     // CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]],
-    // CHECK: br label %[[DIST_OUTER_LOOP_HEADER]]
 
-    // outer loop exit
-    // CHECK: [[DIST_OUTER_LOOP_END]]:
+    // Update UB
+    // CHECK-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
+    // CHECK: [[OMP_EXPR_VAL:%.+]] = load{{.+}}, {{.+}}
+    // CHECK-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], [[OMP_EXPR_VAL]]
+    // CHECK: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]]
+    // CHECK-DAG: [[EUB_TRUE_1]]:
+    // CHECK: [[NUM_IT_3:%.+]] = load{{.+}}
+    // CHECK: br label %[[EUB_END_1:.+]]
+    // CHECK-DAG: [[EUB_FALSE_1]]:
+    // CHECK: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]],
+    // CHECK: br label %[[EUB_END_1]]
+    // CHECK-DAG: [[EUB_END_1]]:
+    // CHECK-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ [[NUM_IT_3]], %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ]
+    // CHECK: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]],
+
+    // Store LB in IV
+    // CHECK-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
+    // CHECK: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]],
+
+    // CHECK: [[DIST_INNER_LOOP_END]]:
+    // CHECK: br label %[[LOOP_EXIT:.+]]
+
+    // loop exit
+    // CHECK: [[LOOP_EXIT]]:
     // CHECK-DAG: call void @__kmpc_for_static_fini(
     // CHECK: ret
 
@@ -1866,19 +1895,28 @@ int main() {
 // CHECK: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 5, {{.+}}* [[OMP_OUTLINED_3:@.+]] to {{.+}})
 
 // CHECK: define{{.+}} void [[OMP_OUTLINED_3]](
-// CHECK-DAG: [[OMP_IV:%.omp.iv]] = alloca
-// CHECK-DAG: [[OMP_LB:%.omp.comb.lb]] = alloca
-// CHECK-DAG: [[OMP_UB:%.omp.comb.ub]] = alloca
-// CHECK-DAG: [[OMP_ST:%.omp.stride]] = alloca
+// CHECK: alloca
+// CHECK: alloca
+// CHECK: alloca
+// CHECK: alloca
+// CHECK: alloca
+// CHECK: alloca
+// CHECK: alloca
+// CHECK: [[OMP_IV:%.+]] = alloca
+// CHECK: alloca
+// CHECK: alloca
+// CHECK: alloca
+// CHECK: alloca
+// CHECK: [[OMP_LB:%.+]] = alloca
+// CHECK: [[OMP_UB:%.+]] = alloca
+// CHECK: [[OMP_ST:%.+]] = alloca
 
 // unlike the previous tests, in this one we have a outer and inner loop for 'distribute'
 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, i32 91,
-// CHECK: br label %[[DIST_OUTER_LOOP_HEADER:.+]]
 
-// CHECK: [[DIST_OUTER_LOOP_HEADER]]:
 // check EUB for distribute
 // CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
-// CHECK: [[NUM_IT_1:%.+]] = load{{.+}},
+// CHECK: [[NUM_IT_1:%.+]] = load{{.+}}
 // CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}}  [[OMP_UB_VAL_1]], [[NUM_IT_1]]
 // CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
 // CHECK-DAG: [[EUB_TRUE]]:
@@ -1897,18 +1935,10 @@ int main() {
 
 // check exit condition
 // CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
-// CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}} [[OMP_UB]],
-// CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3]]
-// CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_OUTER_LOOP_BODY:.+]], label %[[DIST_OUTER_LOOP_END:.+]]
-
-// CHECK: [[DIST_OUTER_LOOP_BODY]]:
-// CHECK: br label %[[DIST_INNER_LOOP_HEADER:.+]]
-
-// CHECK: [[DIST_INNER_LOOP_HEADER]]:
-// CHECK-DAG: [[OMP_IV_VAL_2:%.+]] = load {{.+}} [[OMP_IV]],
-// CHECK-DAG: [[OMP_UB_VAL_4:%.+]] = load {{.+}} [[OMP_UB]],
-// CHECK: [[CMP_IV_UB_2:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_2]], [[OMP_UB_VAL_4]]
-// CHECK: br{{.+}} [[CMP_IV_UB_2]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
+// CHECK-DAG: [[OMP_UB_VAL_3:%.+]] = load {{.+}}
+// CHECK-DAG: [[OMP_UB_VAL_3_PLUS_ONE:%.+]] = add {{.+}} [[OMP_UB_VAL_3]], 1
+// CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], [[OMP_UB_VAL_3_PLUS_ONE]]
+// CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
 
 // check that PrevLB and PrevUB are passed to the 'for'
 // CHECK: [[DIST_INNER_LOOP_BODY]]:
@@ -1927,13 +1957,6 @@ int main() {
 // CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
 // CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]]
 // CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
-// CHECK: br label %[[DIST_INNER_LOOP_HEADER]]
-
-// CHECK: [[DIST_INNER_LOOP_END]]:
-// CHECK: br label %[[DIST_OUTER_LOOP_INC:.+]]
-
-// CHECK: [[DIST_OUTER_LOOP_INC]]:
-// check NextLB and NextUB
 // CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
 // CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
 // CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]]
@@ -1942,10 +1965,31 @@ int main() {
 // CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
 // CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]]
 // CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]],
-// CHECK: br label %[[DIST_OUTER_LOOP_HEADER]]
 
-// outer loop exit
-// CHECK: [[DIST_OUTER_LOOP_END]]:
+// Update UB
+// CHECK-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
+// CHECK: [[OMP_EXPR_VAL:%.+]] = load{{.+}}, {{.+}}
+// CHECK-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], [[OMP_EXPR_VAL]]
+// CHECK: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]]
+// CHECK-DAG: [[EUB_TRUE_1]]:
+// CHECK: [[NUM_IT_3:%.+]] = load{{.+}}
+// CHECK: br label %[[EUB_END_1:.+]]
+// CHECK-DAG: [[EUB_FALSE_1]]:
+// CHECK: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]],
+// CHECK: br label %[[EUB_END_1]]
+// CHECK-DAG: [[EUB_END_1]]:
+// CHECK-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ [[NUM_IT_3]], %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ]
+// CHECK: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]],
+
+// Store LB in IV
+// CHECK-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
+// CHECK: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]],
+
+// CHECK: [[DIST_INNER_LOOP_END]]:
+// CHECK: br label %[[LOOP_EXIT:.+]]
+
+// loop exit
+// CHECK: [[LOOP_EXIT]]:
 // CHECK-DAG: call void @__kmpc_for_static_fini(
 // CHECK: ret
 

Modified: cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp?rev=345509&r1=345508&r2=345509&view=diff
==============================================================================
--- cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp (original)
+++ cfe/trunk/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp Mon Oct 29 08:45:47 2018
@@ -112,13 +112,89 @@ int bar(int n){
 // CHECK: call void @__kmpc_for_static_fini(
 // CHECK: ret void
 
+// Distribute with collapse(2)
 // CHECK: define {{.*}}void {{@__omp_offloading_.+}}({{.+}}, i{{32|64}} [[F_IN:%.+]])
+// CHECK: alloca
+// CHECK: alloca
+// CHECK: alloca
+// CHECK: alloca
+// CHECK: [[OMP_IV:%.+]] = alloca
+// CHECK: alloca
+// CHECK: alloca
+// CHECK: [[OMP_LB:%.+]] = alloca
+// CHECK: [[OMP_UB:%.+]] = alloca
+// CHECK: [[OMP_ST:%.+]] = alloca
 // CHECK: store {{.+}} [[F_IN]], {{.+}}* {{.+}},
 // CHECK-DAG: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
 // CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], i16 0, i16 0)
 // CHECK: store {{.+}} 99, {{.+}}* [[COMB_UB:%.+]], align
 // CHECK: call void @__kmpc_for_static_init_4({{.+}}, {{.+}}, {{.+}} 91, {{.+}}, {{.+}}, {{.+}}* [[COMB_UB]],
-// CHECK: {{call|invoke}} void [[OUTL4:@.+]](
+
+// check EUB for distribute
+// CHECK-DAG: [[OMP_UB_VAL_1:%.+]] = load{{.+}} [[OMP_UB]],
+// CHECK-DAG: [[CMP_UB_NUM_IT:%.+]] = icmp sgt {{.+}}  [[OMP_UB_VAL_1]], 99
+// CHECK: br {{.+}} [[CMP_UB_NUM_IT]], label %[[EUB_TRUE:.+]], label %[[EUB_FALSE:.+]]
+// CHECK-DAG: [[EUB_TRUE]]:
+// CHECK: br label %[[EUB_END:.+]]
+// CHECK-DAG: [[EUB_FALSE]]:
+// CHECK: [[OMP_UB_VAL2:%.+]] = load{{.+}} [[OMP_UB]],
+// CHECK: br label %[[EUB_END]]
+// CHECK-DAG: [[EUB_END]]:
+// CHECK-DAG: [[EUB_RES:%.+]] = phi{{.+}} [ 99, %[[EUB_TRUE]] ], [ [[OMP_UB_VAL2]], %[[EUB_FALSE]] ]
+// CHECK: store{{.+}} [[EUB_RES]], {{.+}}* [[OMP_UB]],
+
+// initialize omp.iv
+// CHECK: [[OMP_LB_VAL_1:%.+]] = load{{.+}}, {{.+}}* [[OMP_LB]],
+// CHECK: store {{.+}} [[OMP_LB_VAL_1]], {{.+}}* [[OMP_IV]],
+
+// check exit condition
+// CHECK-DAG: [[OMP_IV_VAL_1:%.+]] = load {{.+}} [[OMP_IV]],
+// CHECK: [[CMP_IV_UB:%.+]] = icmp sle {{.+}} [[OMP_IV_VAL_1]], 100
+// CHECK: br {{.+}} [[CMP_IV_UB]], label %[[DIST_INNER_LOOP_BODY:.+]], label %[[DIST_INNER_LOOP_END:.+]]
+
+// check that PrevLB and PrevUB are passed to the 'for'
+// CHECK: [[DIST_INNER_LOOP_BODY]]:
+// CHECK-DAG: [[OMP_PREV_LB:%.+]] = load {{.+}}, {{.+}} [[OMP_LB]],
+// CHECK-64-DAG: [[OMP_PREV_LB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_LB]] to {{.+}}
+// CHECK-DAG: [[OMP_PREV_UB:%.+]] = load {{.+}}, {{.+}} [[OMP_UB]],
+// CHECK-64-DAG: [[OMP_PREV_UB_EXT:%.+]] = zext {{.+}} [[OMP_PREV_UB]] to {{.+}}
+
+// check that distlb and distub are properly passed to the outlined function
+// CHECK-32: {{call|invoke}} void [[OUTL4:@.+]]({{.*}} i32 [[OMP_PREV_LB]], i32 [[OMP_PREV_UB]]
+// CHECK-64: {{call|invoke}} void [[OUTL4:@.+]]({{.*}} i64 [[OMP_PREV_LB_EXT]], i64 [[OMP_PREV_UB_EXT]]
+
+// check DistInc
+// CHECK-DAG: [[OMP_IV_VAL_3:%.+]] = load {{.+}}, {{.+}}* [[OMP_IV]],
+// CHECK-DAG: [[OMP_ST_VAL_1:%.+]] = load {{.+}}, {{.+}}* [[OMP_ST]],
+// CHECK: [[OMP_IV_INC:%.+]] = add{{.+}} [[OMP_IV_VAL_3]], [[OMP_ST_VAL_1]]
+// CHECK: store{{.+}} [[OMP_IV_INC]], {{.+}}* [[OMP_IV]],
+// CHECK-DAG: [[OMP_LB_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
+// CHECK-DAG: [[OMP_ST_VAL_2:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
+// CHECK-DAG: [[OMP_LB_NEXT:%.+]] = add{{.+}} [[OMP_LB_VAL_2]], [[OMP_ST_VAL_2]]
+// CHECK: store{{.+}} [[OMP_LB_NEXT]], {{.+}}* [[OMP_LB]],
+// CHECK-DAG: [[OMP_UB_VAL_5:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
+// CHECK-DAG: [[OMP_ST_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_ST]],
+// CHECK-DAG: [[OMP_UB_NEXT:%.+]] = add{{.+}} [[OMP_UB_VAL_5]], [[OMP_ST_VAL_3]]
+// CHECK: store{{.+}} [[OMP_UB_NEXT]], {{.+}}* [[OMP_UB]],
+
+// Update UB
+// CHECK-DAG: [[OMP_UB_VAL_6:%.+]] = load{{.+}}, {{.+}} [[OMP_UB]],
+// CHECK-DAG: [[CMP_UB_NUM_IT_1:%.+]] = icmp sgt {{.+}}[[OMP_UB_VAL_6]], 99
+// CHECK: br {{.+}} [[CMP_UB_NUM_IT_1]], label %[[EUB_TRUE_1:.+]], label %[[EUB_FALSE_1:.+]]
+// CHECK-DAG: [[EUB_TRUE_1]]:
+// CHECK: br label %[[EUB_END_1:.+]]
+// CHECK-DAG: [[EUB_FALSE_1]]:
+// CHECK: [[OMP_UB_VAL3:%.+]] = load{{.+}} [[OMP_UB]],
+// CHECK: br label %[[EUB_END_1]]
+// CHECK-DAG: [[EUB_END_1]]:
+// CHECK-DAG: [[EUB_RES_1:%.+]] = phi{{.+}} [ 99, %[[EUB_TRUE_1]] ], [ [[OMP_UB_VAL3]], %[[EUB_FALSE_1]] ]
+// CHECK: store{{.+}} [[EUB_RES_1]], {{.+}}* [[OMP_UB]],
+
+// Store LB in IV
+// CHECK-DAG: [[OMP_LB_VAL_3:%.+]] = load{{.+}}, {{.+}} [[OMP_LB]],
+// CHECK: store{{.+}} [[OMP_LB_VAL_3]], {{.+}}* [[OMP_IV]],
+
+// CHECK: [[DIST_INNER_LOOP_END]]:
 // CHECK: call void @__kmpc_for_static_fini(
 // CHECK: call void @__kmpc_spmd_kernel_deinit()
 // CHECK: ret void




More information about the cfe-commits mailing list