[llvm] a222361 - [OpenMP] Implement '#pragma omp unroll'.

Michael Kruse via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 10 12:34:05 PDT 2021


Author: Michael Kruse
Date: 2021-06-10T14:30:17-05:00
New Revision: a22236120f17074532bec70f5916a235beddd475

URL: https://github.com/llvm/llvm-project/commit/a22236120f17074532bec70f5916a235beddd475
DIFF: https://github.com/llvm/llvm-project/commit/a22236120f17074532bec70f5916a235beddd475.diff

LOG: [OpenMP] Implement '#pragma omp unroll'.

Implementation of the unroll directive introduced in OpenMP 5.1. Follows the approach from D76342 for the tile directive (i.e. AST-based, not using the OpenMPIRBuilder). Tries to use `llvm.loop.unroll.*` metadata where possible, but has to fall back to an AST representation of the outer loop if the partially unrolled generated loop is associated with another directive (because it needs to compute the number of iterations).

Reviewed By: ABataev

Differential Revision: https://reviews.llvm.org/D99459

Added: 
    clang/test/OpenMP/unroll_ast_print.cpp
    clang/test/OpenMP/unroll_codegen_factor.cpp
    clang/test/OpenMP/unroll_codegen_for_collapse_outer.cpp
    clang/test/OpenMP/unroll_codegen_for_partial.cpp
    clang/test/OpenMP/unroll_codegen_full.cpp
    clang/test/OpenMP/unroll_codegen_heuristic.cpp
    clang/test/OpenMP/unroll_codegen_parallel_for_factor.cpp
    clang/test/OpenMP/unroll_codegen_partial.cpp
    clang/test/OpenMP/unroll_codegen_tile_for.cpp
    clang/test/OpenMP/unroll_codegen_unroll_for.cpp
    clang/test/OpenMP/unroll_messages.cpp

Modified: 
    clang/include/clang-c/Index.h
    clang/include/clang/AST/OpenMPClause.h
    clang/include/clang/AST/RecursiveASTVisitor.h
    clang/include/clang/AST/StmtOpenMP.h
    clang/include/clang/Basic/DiagnosticSemaKinds.td
    clang/include/clang/Basic/StmtNodes.td
    clang/include/clang/Sema/Sema.h
    clang/include/clang/Serialization/ASTBitCodes.h
    clang/lib/AST/OpenMPClause.cpp
    clang/lib/AST/StmtOpenMP.cpp
    clang/lib/AST/StmtPrinter.cpp
    clang/lib/AST/StmtProfile.cpp
    clang/lib/Basic/OpenMPKinds.cpp
    clang/lib/CodeGen/CGOpenMPRuntime.cpp
    clang/lib/CodeGen/CGStmt.cpp
    clang/lib/CodeGen/CGStmtOpenMP.cpp
    clang/lib/CodeGen/CodeGenFunction.h
    clang/lib/Parse/ParseOpenMP.cpp
    clang/lib/Sema/SemaExceptionSpec.cpp
    clang/lib/Sema/SemaOpenMP.cpp
    clang/lib/Sema/TreeTransform.h
    clang/lib/Serialization/ASTReader.cpp
    clang/lib/Serialization/ASTReaderStmt.cpp
    clang/lib/Serialization/ASTWriter.cpp
    clang/lib/Serialization/ASTWriterStmt.cpp
    clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
    clang/tools/libclang/CIndex.cpp
    clang/tools/libclang/CXCursor.cpp
    llvm/include/llvm/Frontend/OpenMP/OMP.td

Removed: 
    


################################################################################
diff  --git a/clang/include/clang-c/Index.h b/clang/include/clang-c/Index.h
index 87592156dd6b8..c7d3b4e10622c 100644
--- a/clang/include/clang-c/Index.h
+++ b/clang/include/clang-c/Index.h
@@ -2588,7 +2588,11 @@ enum CXCursorKind {
    */
   CXCursor_OMPMaskedDirective = 292,
 
-  CXCursor_LastStmt = CXCursor_OMPMaskedDirective,
+  /** OpenMP unroll directive.
+   */
+  CXCursor_OMPUnrollDirective = 293,
+
+  CXCursor_LastStmt = CXCursor_OMPUnrollDirective,
 
   /**
    * Cursor that represents the translation unit itself.

diff  --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h
index e484a89bc84a8..aaddcfa307daf 100644
--- a/clang/include/clang/AST/OpenMPClause.h
+++ b/clang/include/clang/AST/OpenMPClause.h
@@ -888,6 +888,114 @@ class OMPSizesClause final
   }
 };
 
+/// Representation of the 'full' clause of the '#pragma omp unroll' directive.
+///
+/// \code
+/// #pragma omp unroll full
+/// for (int i = 0; i < 64; ++i)
+/// \endcode
+class OMPFullClause final : public OMPClause {
+  friend class OMPClauseReader;
+
+  /// Build an empty clause.
+  explicit OMPFullClause() : OMPClause(llvm::omp::OMPC_full, {}, {}) {}
+
+public:
+  /// Build an AST node for a 'full' clause.
+  ///
+  /// \param C        Context of the AST.
+  /// \param StartLoc Starting location of the clause.
+  /// \param EndLoc   Ending location of the clause.
+  static OMPFullClause *Create(const ASTContext &C, SourceLocation StartLoc,
+                               SourceLocation EndLoc);
+
+  /// Build an empty 'full' AST node for deserialization.
+  ///
+  /// \param C Context of the AST.
+  static OMPFullClause *CreateEmpty(const ASTContext &C);
+
+  child_range children() { return {child_iterator(), child_iterator()}; }
+  const_child_range children() const {
+    return {const_child_iterator(), const_child_iterator()};
+  }
+
+  child_range used_children() {
+    return child_range(child_iterator(), child_iterator());
+  }
+  const_child_range used_children() const {
+    return const_child_range(const_child_iterator(), const_child_iterator());
+  }
+
+  static bool classof(const OMPClause *T) {
+    return T->getClauseKind() == llvm::omp::OMPC_full;
+  }
+};
+
+/// Representation of the 'partial' clause of the '#pragma omp unroll'
+/// directive.
+///
+/// \code
+/// #pragma omp unroll partial(4)
+/// for (int i = start; i < end; ++i)
+/// \endcode
+class OMPPartialClause final : public OMPClause {
+  friend class OMPClauseReader;
+
+  /// Location of '('.
+  SourceLocation LParenLoc;
+
+  /// Optional argument to the clause (unroll factor).
+  Stmt *Factor;
+
+  /// Build an empty clause.
+  explicit OMPPartialClause() : OMPClause(llvm::omp::OMPC_partial, {}, {}) {}
+
+  /// Set the unroll factor.
+  void setFactor(Expr *E) { Factor = E; }
+
+  /// Sets the location of '('.
+  void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; }
+
+public:
+  /// Build an AST node for a 'partial' clause.
+  ///
+  /// \param C         Context of the AST.
+  /// \param StartLoc  Location of the 'partial' identifier.
+  /// \param LParenLoc Location of '('.
+  /// \param EndLoc    Location of ')'.
+  /// \param Factor    Clause argument.
+  static OMPPartialClause *Create(const ASTContext &C, SourceLocation StartLoc,
+                                  SourceLocation LParenLoc,
+                                  SourceLocation EndLoc, Expr *Factor);
+
+  /// Build an empty 'partial' AST node for deserialization.
+  ///
+  /// \param C     Context of the AST.
+  static OMPPartialClause *CreateEmpty(const ASTContext &C);
+
+  /// Returns the location of '('.
+  SourceLocation getLParenLoc() const { return LParenLoc; }
+
+  /// Returns the argument of the clause or nullptr if not set.
+  Expr *getFactor() const { return cast_or_null<Expr>(Factor); }
+
+  child_range children() { return child_range(&Factor, &Factor + 1); }
+  const_child_range children() const {
+    return const_child_range(&Factor, &Factor + 1);
+  }
+
+  child_range used_children() {
+    return child_range(child_iterator(), child_iterator());
+  }
+  const_child_range used_children() const {
+    return const_child_range(const_child_iterator(), const_child_iterator());
+  }
+
+  static bool classof(const OMPClause *T) {
+    return T->getClauseKind() == llvm::omp::OMPC_partial;
+  }
+};
+
 /// This represents 'collapse' clause in the '#pragma omp ...'
 /// directive.
 ///

diff  --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h
index 73d757f0255ad..a29559e5184ab 100644
--- a/clang/include/clang/AST/RecursiveASTVisitor.h
+++ b/clang/include/clang/AST/RecursiveASTVisitor.h
@@ -2841,6 +2841,9 @@ DEF_TRAVERSE_STMT(OMPSimdDirective,
 DEF_TRAVERSE_STMT(OMPTileDirective,
                   { TRY_TO(TraverseOMPExecutableDirective(S)); })
 
+DEF_TRAVERSE_STMT(OMPUnrollDirective,
+                  { TRY_TO(TraverseOMPExecutableDirective(S)); })
+
 DEF_TRAVERSE_STMT(OMPForDirective,
                   { TRY_TO(TraverseOMPExecutableDirective(S)); })
 
@@ -3097,6 +3100,17 @@ bool RecursiveASTVisitor<Derived>::VisitOMPSizesClause(OMPSizesClause *C) {
   return true;
 }
 
+template <typename Derived>
+bool RecursiveASTVisitor<Derived>::VisitOMPFullClause(OMPFullClause *C) {
+  return true;
+}
+
+template <typename Derived>
+bool RecursiveASTVisitor<Derived>::VisitOMPPartialClause(OMPPartialClause *C) {
+  TRY_TO(TraverseStmt(C->getFactor()));
+  return true;
+}
+
 template <typename Derived>
 bool
 RecursiveASTVisitor<Derived>::VisitOMPCollapseClause(OMPCollapseClause *C) {

diff  --git a/clang/include/clang/AST/StmtOpenMP.h b/clang/include/clang/AST/StmtOpenMP.h
index 67ba77a5d1b06..9c85df741f48a 100644
--- a/clang/include/clang/AST/StmtOpenMP.h
+++ b/clang/include/clang/AST/StmtOpenMP.h
@@ -5067,6 +5067,78 @@ class OMPTileDirective final : public OMPLoopBasedDirective {
   }
 };
 
+/// This represents the '#pragma omp unroll' loop transformation directive.
+///
+/// \code
+/// #pragma omp unroll
+/// for (int i = 0; i < 64; ++i)
+/// \endcode
+class OMPUnrollDirective final : public OMPLoopBasedDirective {
+  friend class ASTStmtReader;
+  friend class OMPExecutableDirective;
+
+  /// Default list of offsets.
+  enum {
+    PreInitsOffset = 0,
+    TransformedStmtOffset,
+  };
+
+  explicit OMPUnrollDirective(SourceLocation StartLoc, SourceLocation EndLoc)
+      : OMPLoopBasedDirective(OMPUnrollDirectiveClass, llvm::omp::OMPD_unroll,
+                              StartLoc, EndLoc, 1) {}
+
+  /// Set the pre-init statements.
+  void setPreInits(Stmt *PreInits) {
+    Data->getChildren()[PreInitsOffset] = PreInits;
+  }
+
+  /// Set the de-sugared statement.
+  void setTransformedStmt(Stmt *S) {
+    Data->getChildren()[TransformedStmtOffset] = S;
+  }
+
+public:
+  /// Create a new AST node representation for '#pragma omp unroll'.
+  ///
+  /// \param C         Context of the AST.
+  /// \param StartLoc  Location of the introducer (e.g. the 'omp' token).
+  /// \param EndLoc    Location of the directive's end (e.g. the tok::eod).
+  /// \param Clauses   The directive's clauses.
+  /// \param AssociatedStmt The outermost associated loop.
+  /// \param TransformedStmt The loop nest after tiling, or nullptr in
+  ///                        dependent contexts.
+  /// \param PreInits   Helper preinits statements for the loop nest.
+  static OMPUnrollDirective *
+  Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation EndLoc,
+         ArrayRef<OMPClause *> Clauses, Stmt *AssociatedStmt,
+         Stmt *TransformedStmt, Stmt *PreInits);
+
+  /// Build an empty '#pragma omp unroll' AST node for deserialization.
+  ///
+  /// \param C          Context of the AST.
+  /// \param NumClauses Number of clauses to allocate.
+  static OMPUnrollDirective *CreateEmpty(const ASTContext &C,
+                                         unsigned NumClauses);
+
+  /// Get the de-sugared associated loops after unrolling.
+  ///
+  /// This is only used if the unrolled loop becomes an associated loop of
+  /// another directive, otherwise the loop is emitted directly using loop
+  /// transformation metadata. When the unrolled loop cannot be used by another
+  /// directive (e.g. because of the full clause), the transformed stmt can also
+  /// be nullptr.
+  Stmt *getTransformedStmt() const {
+    return Data->getChildren()[TransformedStmtOffset];
+  }
+
+  /// Return the pre-init statements.
+  Stmt *getPreInits() const { return Data->getChildren()[PreInitsOffset]; }
+
+  static bool classof(const Stmt *T) {
+    return T->getStmtClass() == OMPUnrollDirectiveClass;
+  }
+};
+
 /// This represents '#pragma omp scan' directive.
 ///
 /// \code

diff  --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index ba55095a82667..af242cffbe7de 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -10748,6 +10748,9 @@ def err_omp_interop_var_multiple_actions : Error<
 def err_omp_dispatch_statement_call
     : Error<"statement after '#pragma omp dispatch' must be a direct call"
             " to a target function or an assignment to one">;
+def err_omp_unroll_full_variable_trip_count : Error<
+  "loop to be fully unrolled must have a constant trip count">;
+def note_omp_directive_here : Note<"'%0' directive found here">;
 } // end of OpenMP category
 
 let CategoryName = "Related Result Type Issue" in {

diff  --git a/clang/include/clang/Basic/StmtNodes.td b/clang/include/clang/Basic/StmtNodes.td
index 6134e60f35414..508f1fddf1b35 100644
--- a/clang/include/clang/Basic/StmtNodes.td
+++ b/clang/include/clang/Basic/StmtNodes.td
@@ -224,6 +224,7 @@ def OMPLoopDirective : StmtNode<OMPLoopBasedDirective, 1>;
 def OMPParallelDirective : StmtNode<OMPExecutableDirective>;
 def OMPSimdDirective : StmtNode<OMPLoopDirective>;
 def OMPTileDirective : StmtNode<OMPLoopBasedDirective>;
+def OMPUnrollDirective : StmtNode<OMPLoopBasedDirective>;
 def OMPForDirective : StmtNode<OMPLoopDirective>;
 def OMPForSimdDirective : StmtNode<OMPLoopDirective>;
 def OMPSectionsDirective : StmtNode<OMPExecutableDirective>;

diff  --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index d1ad6a1cc4af4..6ade9d7691266 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -32,6 +32,7 @@
 #include "clang/AST/NSAPI.h"
 #include "clang/AST/PrettyPrinter.h"
 #include "clang/AST/StmtCXX.h"
+#include "clang/AST/StmtOpenMP.h"
 #include "clang/AST/TypeLoc.h"
 #include "clang/AST/TypeOrdering.h"
 #include "clang/Basic/BitmaskEnum.h"
@@ -10220,7 +10221,8 @@ class Sema final {
   void DestroyDataSharingAttributesStack();
   ExprResult
   VerifyPositiveIntegerConstantInClause(Expr *Op, OpenMPClauseKind CKind,
-                                        bool StrictlyPositive = true);
+                                        bool StrictlyPositive = true,
+                                        bool SuppressExprDiags = false);
   /// Returns OpenMP nesting level for current directive.
   unsigned getOpenMPNestingLevel() const;
 
@@ -10238,6 +10240,25 @@ class Sema final {
   /// Pop OpenMP function region for non-capturing function.
   void popOpenMPFunctionRegion(const sema::FunctionScopeInfo *OldFSI);
 
+  /// Analyzes and checks a loop nest for use by a loop transformation.
+  ///
+  /// \param Kind          The loop transformation directive kind.
+  /// \param NumLoops      How many nested loops the directive is expecting.
+  /// \param AStmt         Associated statement of the transformation directive.
+  /// \param LoopHelpers   [out] The loop analysis result.
+  /// \param Body          [out] The body code nested in \p NumLoops loop.
+  /// \param OriginalInits [out] Collection of statements and declarations that
+  ///                      must have been executed/declared before entering the
+  ///                      loop.
+  ///
+  /// \return Whether there was any error.
+  bool checkTransformableLoopNest(
+      OpenMPDirectiveKind Kind, Stmt *AStmt, int NumLoops,
+      SmallVectorImpl<OMPLoopBasedDirective::HelperExprs> &LoopHelpers,
+      Stmt *&Body,
+      SmallVectorImpl<SmallVector<llvm::PointerUnion<Stmt *, Decl *>, 0>>
+          &OriginalInits);
+
   /// Helper to keep information about the current `omp begin/end declare
   /// variant` nesting.
   struct OMPDeclareVariantScope {
@@ -10543,6 +10564,11 @@ class Sema final {
   StmtResult ActOnOpenMPTileDirective(ArrayRef<OMPClause *> Clauses,
                                       Stmt *AStmt, SourceLocation StartLoc,
                                       SourceLocation EndLoc);
+  /// Called on well-formed '#pragma omp unroll' after parsing of its clauses
+  /// and the associated statement.
+  StmtResult ActOnOpenMPUnrollDirective(ArrayRef<OMPClause *> Clauses,
+                                        Stmt *AStmt, SourceLocation StartLoc,
+                                        SourceLocation EndLoc);
   /// Called on well-formed '\#pragma omp for' after parsing
   /// of the associated statement.
   StmtResult
@@ -10898,6 +10924,13 @@ class Sema final {
                                     SourceLocation StartLoc,
                                     SourceLocation LParenLoc,
                                     SourceLocation EndLoc);
+  /// Called on well-form 'full' clauses.
+  OMPClause *ActOnOpenMPFullClause(SourceLocation StartLoc,
+                                   SourceLocation EndLoc);
+  /// Called on well-form 'partial' clauses.
+  OMPClause *ActOnOpenMPPartialClause(Expr *FactorExpr, SourceLocation StartLoc,
+                                      SourceLocation LParenLoc,
+                                      SourceLocation EndLoc);
   /// Called on well-formed 'collapse' clause.
   OMPClause *ActOnOpenMPCollapseClause(Expr *NumForLoops,
                                        SourceLocation StartLoc,

diff  --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h
index edf8987d61a6c..ad2dcc2216ee4 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -1894,6 +1894,7 @@ enum StmtCode {
   STMT_OMP_PARALLEL_DIRECTIVE,
   STMT_OMP_SIMD_DIRECTIVE,
   STMT_OMP_TILE_DIRECTIVE,
+  STMT_OMP_UNROLL_DIRECTIVE,
   STMT_OMP_FOR_DIRECTIVE,
   STMT_OMP_FOR_SIMD_DIRECTIVE,
   STMT_OMP_SECTIONS_DIRECTIVE,

diff  --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp
index 2f542a16a8e75..50f40395a1975 100644
--- a/clang/lib/AST/OpenMPClause.cpp
+++ b/clang/lib/AST/OpenMPClause.cpp
@@ -942,6 +942,36 @@ OMPSizesClause *OMPSizesClause::CreateEmpty(const ASTContext &C,
   return new (Mem) OMPSizesClause(NumSizes);
 }
 
+OMPFullClause *OMPFullClause::Create(const ASTContext &C,
+                                     SourceLocation StartLoc,
+                                     SourceLocation EndLoc) {
+  OMPFullClause *Clause = CreateEmpty(C);
+  Clause->setLocStart(StartLoc);
+  Clause->setLocEnd(EndLoc);
+  return Clause;
+}
+
+OMPFullClause *OMPFullClause::CreateEmpty(const ASTContext &C) {
+  return new (C) OMPFullClause();
+}
+
+OMPPartialClause *OMPPartialClause::Create(const ASTContext &C,
+                                           SourceLocation StartLoc,
+                                           SourceLocation LParenLoc,
+                                           SourceLocation EndLoc,
+                                           Expr *Factor) {
+  OMPPartialClause *Clause = CreateEmpty(C);
+  Clause->setLocStart(StartLoc);
+  Clause->setLParenLoc(LParenLoc);
+  Clause->setLocEnd(EndLoc);
+  Clause->setFactor(Factor);
+  return Clause;
+}
+
+OMPPartialClause *OMPPartialClause::CreateEmpty(const ASTContext &C) {
+  return new (C) OMPPartialClause();
+}
+
 OMPAllocateClause *
 OMPAllocateClause::Create(const ASTContext &C, SourceLocation StartLoc,
                           SourceLocation LParenLoc, Expr *Allocator,
@@ -1602,6 +1632,18 @@ void OMPClausePrinter::VisitOMPSizesClause(OMPSizesClause *Node) {
   OS << ")";
 }
 
+void OMPClausePrinter::VisitOMPFullClause(OMPFullClause *Node) { OS << "full"; }
+
+void OMPClausePrinter::VisitOMPPartialClause(OMPPartialClause *Node) {
+  OS << "partial";
+
+  if (Expr *Factor = Node->getFactor()) {
+    OS << '(';
+    Factor->printPretty(OS, nullptr, Policy, 0);
+    OS << ')';
+  }
+}
+
 void OMPClausePrinter::VisitOMPAllocatorClause(OMPAllocatorClause *Node) {
   OS << "allocator(";
   Node->getAllocator()->printPretty(OS, nullptr, Policy, 0);

diff  --git a/clang/lib/AST/StmtOpenMP.cpp b/clang/lib/AST/StmtOpenMP.cpp
index dbb11e77ac51e..b0ef2f49ba040 100644
--- a/clang/lib/AST/StmtOpenMP.cpp
+++ b/clang/lib/AST/StmtOpenMP.cpp
@@ -129,9 +129,24 @@ bool OMPLoopBasedDirective::doForAllLoops(
         OnTransformationCallback) {
   CurStmt = CurStmt->IgnoreContainers();
   for (unsigned Cnt = 0; Cnt < NumLoops; ++Cnt) {
-    while (auto *Dir = dyn_cast<OMPTileDirective>(CurStmt)) {
-      OnTransformationCallback(Dir);
-      CurStmt = Dir->getTransformedStmt();
+    while (true) {
+      auto *OrigStmt = CurStmt;
+      if (auto *Dir = dyn_cast<OMPTileDirective>(OrigStmt)) {
+        OnTransformationCallback(Dir);
+        CurStmt = Dir->getTransformedStmt();
+      } else if (auto *Dir = dyn_cast<OMPUnrollDirective>(OrigStmt)) {
+        OnTransformationCallback(Dir);
+        CurStmt = Dir->getTransformedStmt();
+      } else {
+        break;
+      }
+
+      if (!CurStmt) {
+        // May happen if the loop transformation does not result in a generated
+        // loop (such as full unrolling).
+        CurStmt = OrigStmt;
+        break;
+      }
     }
     if (auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(CurStmt))
       CurStmt = CanonLoop->getLoopStmt();
@@ -359,6 +374,25 @@ OMPTileDirective *OMPTileDirective::CreateEmpty(const ASTContext &C,
       SourceLocation(), SourceLocation(), NumLoops);
 }
 
+OMPUnrollDirective *
+OMPUnrollDirective::Create(const ASTContext &C, SourceLocation StartLoc,
+                           SourceLocation EndLoc, ArrayRef<OMPClause *> Clauses,
+                           Stmt *AssociatedStmt, Stmt *TransformedStmt,
+                           Stmt *PreInits) {
+  auto *Dir = createDirective<OMPUnrollDirective>(
+      C, Clauses, AssociatedStmt, TransformedStmtOffset + 1, StartLoc, EndLoc);
+  Dir->setTransformedStmt(TransformedStmt);
+  Dir->setPreInits(PreInits);
+  return Dir;
+}
+
+OMPUnrollDirective *OMPUnrollDirective::CreateEmpty(const ASTContext &C,
+                                                    unsigned NumClauses) {
+  return createEmptyDirective<OMPUnrollDirective>(
+      C, NumClauses, /*HasAssociatedStmt=*/true, TransformedStmtOffset + 1,
+      SourceLocation(), SourceLocation());
+}
+
 OMPForSimdDirective *
 OMPForSimdDirective::Create(const ASTContext &C, SourceLocation StartLoc,
                             SourceLocation EndLoc, unsigned CollapsedNum,

diff  --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp
index 3e275744931a5..ed7f5763acde7 100644
--- a/clang/lib/AST/StmtPrinter.cpp
+++ b/clang/lib/AST/StmtPrinter.cpp
@@ -668,6 +668,11 @@ void StmtPrinter::VisitOMPTileDirective(OMPTileDirective *Node) {
   PrintOMPExecutableDirective(Node);
 }
 
+void StmtPrinter::VisitOMPUnrollDirective(OMPUnrollDirective *Node) {
+  Indent() << "#pragma omp unroll";
+  PrintOMPExecutableDirective(Node);
+}
+
 void StmtPrinter::VisitOMPForDirective(OMPForDirective *Node) {
   Indent() << "#pragma omp for";
   PrintOMPExecutableDirective(Node);

diff  --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp
index dc91043b44a67..ed000c2467fac 100644
--- a/clang/lib/AST/StmtProfile.cpp
+++ b/clang/lib/AST/StmtProfile.cpp
@@ -468,6 +468,13 @@ void OMPClauseProfiler::VisitOMPSizesClause(const OMPSizesClause *C) {
       Profiler->VisitExpr(E);
 }
 
+void OMPClauseProfiler::VisitOMPFullClause(const OMPFullClause *C) {}
+
+void OMPClauseProfiler::VisitOMPPartialClause(const OMPPartialClause *C) {
+  if (const Expr *Factor = C->getFactor())
+    Profiler->VisitExpr(Factor);
+}
+
 void OMPClauseProfiler::VisitOMPAllocatorClause(const OMPAllocatorClause *C) {
   if (C->getAllocator())
     Profiler->VisitStmt(C->getAllocator());
@@ -908,6 +915,10 @@ void StmtProfiler::VisitOMPTileDirective(const OMPTileDirective *S) {
   VisitOMPLoopBasedDirective(S);
 }
 
+void StmtProfiler::VisitOMPUnrollDirective(const OMPUnrollDirective *S) {
+  VisitOMPLoopBasedDirective(S);
+}
+
 void StmtProfiler::VisitOMPForDirective(const OMPForDirective *S) {
   VisitOMPLoopDirective(S);
 }

diff  --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp
index b1c78742fb55f..cfdba09eb1ecc 100644
--- a/clang/lib/Basic/OpenMPKinds.cpp
+++ b/clang/lib/Basic/OpenMPKinds.cpp
@@ -452,7 +452,8 @@ bool clang::isOpenMPLoopDirective(OpenMPDirectiveKind DKind) {
          DKind == OMPD_target_teams_distribute ||
          DKind == OMPD_target_teams_distribute_parallel_for ||
          DKind == OMPD_target_teams_distribute_parallel_for_simd ||
-         DKind == OMPD_target_teams_distribute_simd || DKind == OMPD_tile;
+         DKind == OMPD_target_teams_distribute_simd || DKind == OMPD_tile ||
+         DKind == OMPD_unroll;
 }
 
 bool clang::isOpenMPWorksharingDirective(OpenMPDirectiveKind DKind) {
@@ -580,7 +581,7 @@ bool clang::isOpenMPLoopBoundSharingDirective(OpenMPDirectiveKind Kind) {
 }
 
 bool clang::isOpenMPLoopTransformationDirective(OpenMPDirectiveKind DKind) {
-  return DKind == OMPD_tile;
+  return DKind == OMPD_tile || DKind == OMPD_unroll;
 }
 
 void clang::getOpenMPCaptureRegions(
@@ -668,6 +669,7 @@ void clang::getOpenMPCaptureRegions(
     CaptureRegions.push_back(OMPD_unknown);
     break;
   case OMPD_tile:
+  case OMPD_unroll:
     // loop transformations do not introduce captures.
     break;
   case OMPD_threadprivate:

diff  --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 8f65f38747d87..71b34a1578feb 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -6678,6 +6678,7 @@ emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
   case OMPD_task:
   case OMPD_simd:
   case OMPD_tile:
+  case OMPD_unroll:
   case OMPD_sections:
   case OMPD_section:
   case OMPD_single:
@@ -6996,6 +6997,7 @@ emitNumThreadsForTargetDirective(CodeGenFunction &CGF,
   case OMPD_task:
   case OMPD_simd:
   case OMPD_tile:
+  case OMPD_unroll:
   case OMPD_sections:
   case OMPD_section:
   case OMPD_single:
@@ -9603,6 +9605,7 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
     case OMPD_task:
     case OMPD_simd:
     case OMPD_tile:
+    case OMPD_unroll:
     case OMPD_sections:
     case OMPD_section:
     case OMPD_single:
@@ -10432,6 +10435,7 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
     case OMPD_task:
     case OMPD_simd:
     case OMPD_tile:
+    case OMPD_unroll:
     case OMPD_sections:
     case OMPD_section:
     case OMPD_single:
@@ -11144,6 +11148,7 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
     case OMPD_task:
     case OMPD_simd:
     case OMPD_tile:
+    case OMPD_unroll:
     case OMPD_sections:
     case OMPD_section:
     case OMPD_single:

diff  --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index 29eea3be87ce0..85390f2b14641 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -208,6 +208,9 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs) {
   case Stmt::OMPTileDirectiveClass:
     EmitOMPTileDirective(cast<OMPTileDirective>(*S));
     break;
+  case Stmt::OMPUnrollDirectiveClass:
+    EmitOMPUnrollDirective(cast<OMPUnrollDirective>(*S));
+    break;
   case Stmt::OMPForDirectiveClass:
     EmitOMPForDirective(cast<OMPForDirective>(*S));
     break;

diff  --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 9f2e52d4e5233..9dd1edbfd1e56 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -176,6 +176,8 @@ class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
       PreInits = cast_or_null<DeclStmt>(LD->getPreInits());
     } else if (const auto *Tile = dyn_cast<OMPTileDirective>(&S)) {
       PreInits = cast_or_null<DeclStmt>(Tile->getPreInits());
+    } else if (const auto *Unroll = dyn_cast<OMPUnrollDirective>(&S)) {
+      PreInits = cast_or_null<DeclStmt>(Unroll->getPreInits());
     } else {
       llvm_unreachable("Unknown loop-based directive kind.");
     }
@@ -1821,6 +1823,8 @@ static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop,
   if (SimplifiedS == NextLoop) {
     if (auto *Dir = dyn_cast<OMPTileDirective>(SimplifiedS))
       SimplifiedS = Dir->getTransformedStmt();
+    if (auto *Dir = dyn_cast<OMPUnrollDirective>(SimplifiedS))
+      SimplifiedS = Dir->getTransformedStmt();
     if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(SimplifiedS))
       SimplifiedS = CanonLoop->getLoopStmt();
     if (const auto *For = dyn_cast<ForStmt>(SimplifiedS)) {
@@ -2579,6 +2583,28 @@ void CodeGenFunction::EmitOMPTileDirective(const OMPTileDirective &S) {
   EmitStmt(S.getTransformedStmt());
 }
 
+void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective &S) {
+  // This function is only called if the unrolled loop is not consumed by any
+  // other loop-associated construct. Such a loop-associated construct will have
+  // used the transformed AST.
+
+  // Set the unroll metadata for the next emitted loop.
+  LoopStack.setUnrollState(LoopAttributes::Enable);
+
+  if (S.hasClausesOfKind<OMPFullClause>()) {
+    LoopStack.setUnrollState(LoopAttributes::Full);
+  } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) {
+    if (Expr *FactorExpr = PartialClause->getFactor()) {
+      uint64_t Factor =
+          FactorExpr->EvaluateKnownConstInt(getContext()).getZExtValue();
+      assert(Factor >= 1 && "Only positive factors are valid");
+      LoopStack.setUnrollCount(Factor);
+    }
+  }
+
+  EmitStmt(S.getAssociatedStmt());
+}
+
 void CodeGenFunction::EmitOMPOuterLoop(
     bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
     CodeGenFunction::OMPPrivateScope &LoopScope,
@@ -5762,6 +5788,8 @@ static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
   case OMPC_safelen:
   case OMPC_simdlen:
   case OMPC_sizes:
+  case OMPC_full:
+  case OMPC_partial:
   case OMPC_allocator:
   case OMPC_allocate:
   case OMPC_collapse:

diff  --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 564063d6b7c84..56120c478589b 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -3441,6 +3441,7 @@ class CodeGenFunction : public CodeGenTypeCache {
   void EmitOMPParallelDirective(const OMPParallelDirective &S);
   void EmitOMPSimdDirective(const OMPSimdDirective &S);
   void EmitOMPTileDirective(const OMPTileDirective &S);
+  void EmitOMPUnrollDirective(const OMPUnrollDirective &S);
   void EmitOMPForDirective(const OMPForDirective &S);
   void EmitOMPForSimdDirective(const OMPForSimdDirective &S);
   void EmitOMPSectionsDirective(const OMPSectionsDirective &S);

diff  --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp
index c8d62f7517a37..d3a456fca49c0 100644
--- a/clang/lib/Parse/ParseOpenMP.cpp
+++ b/clang/lib/Parse/ParseOpenMP.cpp
@@ -2166,6 +2166,7 @@ Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirectiveWithExtDecl(
   case OMPD_parallel:
   case OMPD_simd:
   case OMPD_tile:
+  case OMPD_unroll:
   case OMPD_task:
   case OMPD_taskyield:
   case OMPD_barrier:
@@ -2403,6 +2404,7 @@ Parser::ParseOpenMPDeclarativeOrExecutableDirective(ParsedStmtContext StmtCtx) {
   case OMPD_parallel:
   case OMPD_simd:
   case OMPD_tile:
+  case OMPD_unroll:
   case OMPD_for:
   case OMPD_for_simd:
   case OMPD_sections:
@@ -2793,6 +2795,7 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind,
   case OMPC_novariants:
   case OMPC_nocontext:
   case OMPC_filter:
+  case OMPC_partial:
     // OpenMP [2.5, Restrictions]
     //  At most one num_threads clause can appear on the directive.
     // OpenMP [2.8.1, simd construct, Restrictions]
@@ -2824,7 +2827,8 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind,
       ErrorFound = true;
     }
 
-    if (CKind == OMPC_ordered && PP.LookAhead(/*N=*/0).isNot(tok::l_paren))
+    if ((CKind == OMPC_ordered || CKind == OMPC_partial) &&
+        PP.LookAhead(/*N=*/0).isNot(tok::l_paren))
       Clause = ParseOpenMPClause(CKind, WrongDirective);
     else
       Clause = ParseOpenMPSingleExprClause(CKind, WrongDirective);
@@ -2887,6 +2891,7 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind,
   case OMPC_unified_shared_memory:
   case OMPC_reverse_offload:
   case OMPC_dynamic_allocators:
+  case OMPC_full:
     // OpenMP [2.7.1, Restrictions, p. 9]
     //  Only one ordered clause can appear on a loop directive.
     // OpenMP [2.7.1, Restrictions, C/C++, p. 4]

diff  --git a/clang/lib/Sema/SemaExceptionSpec.cpp b/clang/lib/Sema/SemaExceptionSpec.cpp
index fd6a4940c174f..8816c9c1fea02 100644
--- a/clang/lib/Sema/SemaExceptionSpec.cpp
+++ b/clang/lib/Sema/SemaExceptionSpec.cpp
@@ -1462,6 +1462,7 @@ CanThrowResult Sema::canThrow(const Stmt *S) {
   case Stmt::OMPSectionsDirectiveClass:
   case Stmt::OMPSimdDirectiveClass:
   case Stmt::OMPTileDirectiveClass:
+  case Stmt::OMPUnrollDirectiveClass:
   case Stmt::OMPSingleDirectiveClass:
   case Stmt::OMPTargetDataDirectiveClass:
   case Stmt::OMPTargetDirectiveClass:

diff  --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 27a2ff204adf9..2ec487130d30e 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -3810,6 +3810,11 @@ class DSAAttrChecker final : public StmtVisitor<DSAAttrChecker, void> {
     VisitStmt(S);
   }
 
+  void VisitOMPUnrollDirective(OMPUnrollDirective *S) {
+    // #pragma omp unroll does not introduce data sharing.
+    VisitStmt(S);
+  }
+
   void VisitStmt(Stmt *S) {
     for (Stmt *C : S->children()) {
       if (C) {
@@ -3976,6 +3981,7 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
   case OMPD_master:
   case OMPD_masked:
   case OMPD_tile:
+  case OMPD_unroll:
     break;
   case OMPD_simd:
   case OMPD_for:
@@ -5847,6 +5853,10 @@ StmtResult Sema::ActOnOpenMPExecutableDirective(
     Res =
         ActOnOpenMPTileDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc);
     break;
+  case OMPD_unroll:
+    Res = ActOnOpenMPUnrollDirective(ClausesWithImplicit, AStmt, StartLoc,
+                                     EndLoc);
+    break;
   case OMPD_for:
     Res = ActOnOpenMPForDirective(ClausesWithImplicit, AStmt, StartLoc, EndLoc,
                                   VarsWithInheritedDSA);
@@ -8967,8 +8977,14 @@ checkOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr,
             return false;
           },
           [&SemaRef, &Captures](OMPLoopBasedDirective *Transform) {
-            Stmt *DependentPreInits =
-                cast<OMPTileDirective>(Transform)->getPreInits();
+            Stmt *DependentPreInits;
+            if (auto *Dir = dyn_cast<OMPTileDirective>(Transform)) {
+              DependentPreInits = Dir->getPreInits();
+            } else if (auto *Dir = dyn_cast<OMPUnrollDirective>(Transform)) {
+              DependentPreInits = Dir->getPreInits();
+            } else {
+              llvm_unreachable("Unexpected loop transformation");
+            }
             if (!DependentPreInits)
               return;
             for (Decl *C : cast<DeclStmt>(DependentPreInits)->getDeclGroup()) {
@@ -12543,6 +12559,55 @@ StmtResult Sema::ActOnOpenMPTargetTeamsDistributeSimdDirective(
       Context, StartLoc, EndLoc, NestedLoopCount, Clauses, AStmt, B);
 }
 
+bool Sema::checkTransformableLoopNest(
+    OpenMPDirectiveKind Kind, Stmt *AStmt, int NumLoops,
+    SmallVectorImpl<OMPLoopBasedDirective::HelperExprs> &LoopHelpers,
+    Stmt *&Body,
+    SmallVectorImpl<SmallVector<llvm::PointerUnion<Stmt *, Decl *>, 0>>
+        &OriginalInits) {
+  OriginalInits.emplace_back();
+  bool Result = OMPLoopBasedDirective::doForAllLoops(
+      AStmt->IgnoreContainers(), /*TryImperfectlyNestedLoops=*/false, NumLoops,
+      [this, &LoopHelpers, &Body, &OriginalInits, Kind](unsigned Cnt,
+                                                        Stmt *CurStmt) {
+        VarsWithInheritedDSAType TmpDSA;
+        unsigned SingleNumLoops =
+            checkOpenMPLoop(Kind, nullptr, nullptr, CurStmt, *this, *DSAStack,
+                            TmpDSA, LoopHelpers[Cnt]);
+        if (SingleNumLoops == 0)
+          return true;
+        assert(SingleNumLoops == 1 && "Expect single loop iteration space");
+        if (auto *For = dyn_cast<ForStmt>(CurStmt)) {
+          OriginalInits.back().push_back(For->getInit());
+          Body = For->getBody();
+        } else {
+          assert(isa<CXXForRangeStmt>(CurStmt) &&
+                 "Expected canonical for or range-based for loops.");
+          auto *CXXFor = cast<CXXForRangeStmt>(CurStmt);
+          OriginalInits.back().push_back(CXXFor->getBeginStmt());
+          Body = CXXFor->getBody();
+        }
+        OriginalInits.emplace_back();
+        return false;
+      },
+      [&OriginalInits](OMPLoopBasedDirective *Transform) {
+        Stmt *DependentPreInits;
+        if (auto *Dir = dyn_cast<OMPTileDirective>(Transform))
+          DependentPreInits = Dir->getPreInits();
+        else if (auto *Dir = dyn_cast<OMPUnrollDirective>(Transform))
+          DependentPreInits = Dir->getPreInits();
+        else
+          llvm_unreachable("Unhandled loop transformation");
+        if (!DependentPreInits)
+          return;
+        for (Decl *C : cast<DeclStmt>(DependentPreInits)->getDeclGroup())
+          OriginalInits.back().push_back(C);
+      });
+  assert(OriginalInits.back().empty() && "No preinit after innermost loop");
+  OriginalInits.pop_back();
+  return Result;
+}
+
 StmtResult Sema::ActOnOpenMPTileDirective(ArrayRef<OMPClause *> Clauses,
                                           Stmt *AStmt, SourceLocation StartLoc,
                                           SourceLocation EndLoc) {
@@ -12563,38 +12628,9 @@ StmtResult Sema::ActOnOpenMPTileDirective(ArrayRef<OMPClause *> Clauses,
   SmallVector<OMPLoopBasedDirective::HelperExprs, 4> LoopHelpers(NumLoops);
   Stmt *Body = nullptr;
   SmallVector<SmallVector<llvm::PointerUnion<Stmt *, Decl *>, 0>, 4>
-      OriginalInits(1);
-  if (!OMPLoopBasedDirective::doForAllLoops(
-          AStmt->IgnoreContainers(), /*TryImperfectlyNestedLoops=*/false,
-          NumLoops,
-          [this, &LoopHelpers, &Body, &OriginalInits](unsigned Cnt,
-                                                      Stmt *CurStmt) {
-            VarsWithInheritedDSAType TmpDSA;
-            unsigned SingleNumLoops =
-                checkOpenMPLoop(OMPD_tile, nullptr, nullptr, CurStmt, *this,
-                                *DSAStack, TmpDSA, LoopHelpers[Cnt]);
-            if (SingleNumLoops == 0)
-              return true;
-            assert(SingleNumLoops == 1 && "Expect single loop iteration space");
-            if (auto *For = dyn_cast<ForStmt>(CurStmt)) {
-              OriginalInits.back().push_back(For->getInit());
-              Body = For->getBody();
-            } else {
-              assert(isa<CXXForRangeStmt>(CurStmt) &&
-                     "Expected canonical for or range-based for loops.");
-              auto *CXXFor = cast<CXXForRangeStmt>(CurStmt);
-              OriginalInits.back().push_back(CXXFor->getBeginStmt());
-              Body = CXXFor->getBody();
-            }
-            OriginalInits.emplace_back();
-            return false;
-          },
-          [&OriginalInits](OMPLoopBasedDirective *Transform) {
-            Stmt *DependentPreInits =
-                cast<OMPTileDirective>(Transform)->getPreInits();
-            for (Decl *C : cast<DeclStmt>(DependentPreInits)->getDeclGroup())
-              OriginalInits.back().push_back(C);
-          }))
+      OriginalInits;
+  if (!checkTransformableLoopNest(OMPD_tile, AStmt, NumLoops, LoopHelpers, Body,
+                                  OriginalInits))
     return StmtError();
 
   // Delay tiling to when template is completely instantiated.
@@ -12783,6 +12819,282 @@ StmtResult Sema::ActOnOpenMPTileDirective(ArrayRef<OMPClause *> Clauses,
                                   buildPreInits(Context, PreInits));
 }
 
+StmtResult Sema::ActOnOpenMPUnrollDirective(ArrayRef<OMPClause *> Clauses,
+                                            Stmt *AStmt,
+                                            SourceLocation StartLoc,
+                                            SourceLocation EndLoc) {
+  // Empty statement should only be possible if there already was an error.
+  if (!AStmt)
+    return StmtError();
+
+  if (checkMutuallyExclusiveClauses(*this, Clauses, {OMPC_partial, OMPC_full}))
+    return StmtError();
+
+  const OMPFullClause *FullClause =
+      OMPExecutableDirective::getSingleClause<OMPFullClause>(Clauses);
+  const OMPPartialClause *PartialClause =
+      OMPExecutableDirective::getSingleClause<OMPPartialClause>(Clauses);
+  assert(!(FullClause && PartialClause) &&
+         "mutual exclusivity must have been checked before");
+
+  constexpr unsigned NumLoops = 1;
+  Stmt *Body = nullptr;
+  SmallVector<OMPLoopBasedDirective::HelperExprs, NumLoops> LoopHelpers(
+      NumLoops);
+  SmallVector<SmallVector<llvm::PointerUnion<Stmt *, Decl *>, 0>, NumLoops + 1>
+      OriginalInits;
+  if (!checkTransformableLoopNest(OMPD_unroll, AStmt, NumLoops, LoopHelpers,
+                                  Body, OriginalInits))
+    return StmtError();
+
+  // Delay unrolling to when template is completely instantiated.
+  if (CurContext->isDependentContext())
+    return OMPUnrollDirective::Create(Context, StartLoc, EndLoc, Clauses, AStmt,
+                                      nullptr, nullptr);
+
+  OMPLoopBasedDirective::HelperExprs &LoopHelper = LoopHelpers.front();
+
+  if (FullClause) {
+    if (!VerifyPositiveIntegerConstantInClause(
+             LoopHelper.NumIterations, OMPC_full, /*StrictlyPositive=*/false,
+             /*SuppressExprDigs=*/true)
+             .isUsable()) {
+      Diag(AStmt->getBeginLoc(), diag::err_omp_unroll_full_variable_trip_count);
+      Diag(FullClause->getBeginLoc(), diag::note_omp_directive_here)
+          << "#pragma omp unroll full";
+      return StmtError();
+    }
+  }
+
+  // The generated loop may only be passed to other loop-associated directive
+  // when a partial clause is specified. Without the requirement it is
+  // sufficient to generate loop unroll metadata at code-generation.
+  if (!PartialClause)
+    return OMPUnrollDirective::Create(Context, StartLoc, EndLoc, Clauses, AStmt,
+                                      nullptr, nullptr);
+
+  // Otherwise, we need to provide a de-sugared/transformed AST that can be
+  // associated with another loop directive.
+  //
+  // The canonical loop analysis return by checkTransformableLoopNest assumes
+  // the following structure to be the same loop without transformations or
+  // directives applied: \code OriginalInits; LoopHelper.PreInits;
+  // LoopHelper.Counters;
+  // for (; IV < LoopHelper.NumIterations; ++IV) {
+  //   LoopHelper.Updates;
+  //   Body;
+  // }
+  // \endcode
+  // where IV is a variable declared and initialized to 0 in LoopHelper.PreInits
+  // and referenced by LoopHelper.IterationVarRef.
+  //
+  // The unrolling directive transforms this into the following loop:
+  // \code
+  // OriginalInits;         \
+  // LoopHelper.PreInits;    > NewPreInits
+  // LoopHelper.Counters;   /
+  // for (auto UIV = 0; UIV < LoopHelper.NumIterations; UIV+=Factor) {
+  //   #pragma clang loop unroll_count(Factor)
+  //   for (IV = UIV; IV < UIV + Factor && UIV < LoopHelper.NumIterations; ++IV)
+  //   {
+  //     LoopHelper.Updates;
+  //     Body;
+  //   }
+  // }
+  // \endcode
+  // where UIV is a new logical iteration counter. IV must be the same VarDecl
+  // as the original LoopHelper.IterationVarRef because LoopHelper.Updates
+  // references it. If the partially unrolled loop is associated with another
+  // loop directive (like an OMPForDirective), it will use checkOpenMPLoop to
+  // analyze this loop, i.e. the outer loop must fulfill the constraints of an
+  // OpenMP canonical loop. The inner loop is not an associable canonical loop
+  // and only exists to defer its unrolling to LLVM's LoopUnroll instead of
+  // doing it in the frontend (by adding loop metadata). NewPreInits becomes a
+  // property of the OMPLoopBasedDirective instead of statements in
+  // CompoundStatement. This is to allow the loop to become a non-outermost loop
+  // of a canonical loop nest where these PreInits are emitted before the
+  // outermost directive.
+
+  // Determine the PreInit declarations.
+  SmallVector<Decl *, 4> PreInits;
+  assert(OriginalInits.size() == 1 &&
+         "Expecting a single-dimensional loop iteration space");
+  for (auto &P : OriginalInits[0]) {
+    if (auto *D = P.dyn_cast<Decl *>())
+      PreInits.push_back(D);
+    else if (auto *PI = dyn_cast_or_null<DeclStmt>(P.dyn_cast<Stmt *>()))
+      PreInits.append(PI->decl_begin(), PI->decl_end());
+  }
+  if (auto *PI = cast_or_null<DeclStmt>(LoopHelper.PreInits))
+    PreInits.append(PI->decl_begin(), PI->decl_end());
+  // Gather declarations for the data members used as counters.
+  for (Expr *CounterRef : LoopHelper.Counters) {
+    auto *CounterDecl = cast<DeclRefExpr>(CounterRef)->getDecl();
+    if (isa<OMPCapturedExprDecl>(CounterDecl))
+      PreInits.push_back(CounterDecl);
+  }
+
+  auto *IterationVarRef = cast<DeclRefExpr>(LoopHelper.IterationVarRef);
+  QualType IVTy = IterationVarRef->getType();
+  assert(LoopHelper.Counters.size() == 1 &&
+         "Expecting a single-dimensional loop iteration space");
+  auto *OrigVar = cast<DeclRefExpr>(LoopHelper.Counters.front());
+
+  // Determine the unroll factor.
+  uint64_t Factor;
+  SourceLocation FactorLoc;
+  if (Expr *FactorVal = PartialClause->getFactor()) {
+    Factor =
+        FactorVal->getIntegerConstantExpr(Context).getValue().getZExtValue();
+    FactorLoc = FactorVal->getExprLoc();
+  } else {
+    // TODO: Use a better profitability model.
+    Factor = 2;
+  }
+  assert(Factor > 0 && "Expected positive unroll factor");
+  auto MakeFactorExpr = [this, Factor, IVTy, FactorLoc]() {
+    return IntegerLiteral::Create(
+        Context, llvm::APInt(Context.getIntWidth(IVTy), Factor), IVTy,
+        FactorLoc);
+  };
+
+  // Iteration variable SourceLocations.
+  SourceLocation OrigVarLoc = OrigVar->getExprLoc();
+  SourceLocation OrigVarLocBegin = OrigVar->getBeginLoc();
+  SourceLocation OrigVarLocEnd = OrigVar->getEndLoc();
+
+  // Internal variable names.
+  std::string OrigVarName = OrigVar->getNameInfo().getAsString();
+  std::string OuterIVName = (Twine(".unrolled.iv.") + OrigVarName).str();
+  std::string InnerIVName = (Twine(".unroll_inner.iv.") + OrigVarName).str();
+  std::string InnerTripCountName =
+      (Twine(".unroll_inner.tripcount.") + OrigVarName).str();
+
+  // Create the iteration variable for the unrolled loop.
+  VarDecl *OuterIVDecl =
+      buildVarDecl(*this, {}, IVTy, OuterIVName, nullptr, OrigVar);
+  auto MakeOuterRef = [this, OuterIVDecl, IVTy, OrigVarLoc]() {
+    return buildDeclRefExpr(*this, OuterIVDecl, IVTy, OrigVarLoc);
+  };
+
+  // Iteration variable for the inner loop: Reuse the iteration variable created
+  // by checkOpenMPLoop.
+  auto *InnerIVDecl = cast<VarDecl>(IterationVarRef->getDecl());
+  InnerIVDecl->setDeclName(&PP.getIdentifierTable().get(InnerIVName));
+  auto MakeInnerRef = [this, InnerIVDecl, IVTy, OrigVarLoc]() {
+    return buildDeclRefExpr(*this, InnerIVDecl, IVTy, OrigVarLoc);
+  };
+
+  // Make a copy of the NumIterations expression for each use: By the AST
+  // constraints, every expression object in a DeclContext must be unique.
+  CaptureVars CopyTransformer(*this);
+  auto MakeNumIterations = [&CopyTransformer, &LoopHelper]() -> Expr * {
+    return AssertSuccess(
+        CopyTransformer.TransformExpr(LoopHelper.NumIterations));
+  };
+
+  // Inner For init-statement: auto .unroll_inner.iv = .unrolled.iv
+  ExprResult LValueConv = DefaultLvalueConversion(MakeOuterRef());
+  AddInitializerToDecl(InnerIVDecl, LValueConv.get(), /*DirectInit=*/false);
+  StmtResult InnerInit = new (Context)
+      DeclStmt(DeclGroupRef(InnerIVDecl), OrigVarLocBegin, OrigVarLocEnd);
+  if (!InnerInit.isUsable())
+    return StmtError();
+
+  // Inner For cond-expression:
+  // \code
+  //   .unroll_inner.iv < .unrolled.iv + Factor &&
+  //   .unroll_inner.iv < NumIterations
+  // \endcode
+  // This conjunction of two conditions allows ScalarEvolution to derive the
+  // maximum trip count of the inner loop.
+  ExprResult EndOfTile = BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(),
+                                    BO_Add, MakeOuterRef(), MakeFactorExpr());
+  if (!EndOfTile.isUsable())
+    return StmtError();
+  ExprResult InnerCond1 = BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(),
+                                     BO_LE, MakeInnerRef(), EndOfTile.get());
+  if (!InnerCond1.isUsable())
+    return StmtError();
+  ExprResult InnerCond2 =
+      BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), BO_LE, MakeInnerRef(),
+                 MakeNumIterations());
+  if (!InnerCond2.isUsable())
+    return StmtError();
+  ExprResult InnerCond =
+      BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), BO_LAnd,
+                 InnerCond1.get(), InnerCond2.get());
+  if (!InnerCond.isUsable())
+    return StmtError();
+
+  // Inner For incr-statement: ++.unroll_inner.iv
+  ExprResult InnerIncr = BuildUnaryOp(CurScope, LoopHelper.Inc->getExprLoc(),
+                                      UO_PreInc, MakeInnerRef());
+  if (!InnerIncr.isUsable())
+    return StmtError();
+
+  // Inner For statement.
+  SmallVector<Stmt *> InnerBodyStmts;
+  InnerBodyStmts.append(LoopHelper.Updates.begin(), LoopHelper.Updates.end());
+  InnerBodyStmts.push_back(Body);
+  CompoundStmt *InnerBody = CompoundStmt::Create(
+      Context, InnerBodyStmts, Body->getBeginLoc(), Body->getEndLoc());
+  ForStmt *InnerFor = new (Context)
+      ForStmt(Context, InnerInit.get(), InnerCond.get(), nullptr,
+              InnerIncr.get(), InnerBody, LoopHelper.Init->getBeginLoc(),
+              LoopHelper.Init->getBeginLoc(), LoopHelper.Inc->getEndLoc());
+
+  // Unroll metadata for the inner loop.
+  // This needs to take into account the remainder portion of the unrolled loop,
+  // hence `unroll(full)` does not apply here, even though the LoopUnroll pass
+  // supports multiple loop exits. Instead, unroll using a factor equivalent to
+  // the maximum trip count, which will also generate a remainder loop. Just
+  // `unroll(enable)` (which could have been useful if the user has not
+  // specified a concrete factor; even though the outer loop cannot be
+  // influenced anymore, would avoid more code bloat than necessary) will refuse
+  // the loop because "Won't unroll; remainder loop could not be generated when
+  // assuming runtime trip count". Even if it did work, it must not choose a
+  // larger unroll factor than the maximum loop length, or it would always just
+  // execute the remainder loop.
+  LoopHintAttr *UnrollHintAttr =
+      LoopHintAttr::CreateImplicit(Context, LoopHintAttr::UnrollCount,
+                                   LoopHintAttr::Numeric, MakeFactorExpr());
+  AttributedStmt *InnerUnrolled =
+      AttributedStmt::Create(Context, StartLoc, {UnrollHintAttr}, InnerFor);
+
+  // Outer For init-statement: auto .unrolled.iv = 0
+  AddInitializerToDecl(
+      OuterIVDecl, ActOnIntegerConstant(LoopHelper.Init->getExprLoc(), 0).get(),
+      /*DirectInit=*/false);
+  StmtResult OuterInit = new (Context)
+      DeclStmt(DeclGroupRef(OuterIVDecl), OrigVarLocBegin, OrigVarLocEnd);
+  if (!OuterInit.isUsable())
+    return StmtError();
+
+  // Outer For cond-expression: .unrolled.iv < NumIterations
+  ExprResult OuterConde =
+      BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), BO_LT, MakeOuterRef(),
+                 MakeNumIterations());
+  if (!OuterConde.isUsable())
+    return StmtError();
+
+  // Outer For incr-statement: .unrolled.iv += Factor
+  ExprResult OuterIncr =
+      BuildBinOp(CurScope, LoopHelper.Inc->getExprLoc(), BO_AddAssign,
+                 MakeOuterRef(), MakeFactorExpr());
+  if (!OuterIncr.isUsable())
+    return StmtError();
+
+  // Outer For statement.
+  ForStmt *OuterFor = new (Context)
+      ForStmt(Context, OuterInit.get(), OuterConde.get(), nullptr,
+              OuterIncr.get(), InnerUnrolled, LoopHelper.Init->getBeginLoc(),
+              LoopHelper.Init->getBeginLoc(), LoopHelper.Inc->getEndLoc());
+
+  return OMPUnrollDirective::Create(Context, StartLoc, EndLoc, Clauses, AStmt,
+                                    OuterFor, buildPreInits(Context, PreInits));
+}
+
 OMPClause *Sema::ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind, Expr *Expr,
                                              SourceLocation StartLoc,
                                              SourceLocation LParenLoc,
@@ -12843,6 +13155,9 @@ OMPClause *Sema::ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind, Expr *Expr,
   case OMPC_filter:
     Res = ActOnOpenMPFilterClause(Expr, StartLoc, LParenLoc, EndLoc);
     break;
+  case OMPC_partial:
+    Res = ActOnOpenMPPartialClause(Expr, StartLoc, LParenLoc, EndLoc);
+    break;
   case OMPC_device:
   case OMPC_if:
   case OMPC_default:
@@ -13045,6 +13360,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause(
     case OMPD_end_declare_target:
     case OMPD_teams:
     case OMPD_tile:
+    case OMPD_unroll:
     case OMPD_for:
     case OMPD_sections:
     case OMPD_section:
@@ -13123,6 +13439,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause(
     case OMPD_teams:
     case OMPD_simd:
     case OMPD_tile:
+    case OMPD_unroll:
     case OMPD_for:
     case OMPD_for_simd:
     case OMPD_sections:
@@ -13204,6 +13521,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause(
     case OMPD_end_declare_target:
     case OMPD_simd:
     case OMPD_tile:
+    case OMPD_unroll:
     case OMPD_for:
     case OMPD_for_simd:
     case OMPD_sections:
@@ -13283,6 +13601,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause(
     case OMPD_end_declare_target:
     case OMPD_simd:
     case OMPD_tile:
+    case OMPD_unroll:
     case OMPD_for:
     case OMPD_for_simd:
     case OMPD_sections:
@@ -13363,6 +13682,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause(
     case OMPD_end_declare_target:
     case OMPD_simd:
     case OMPD_tile:
+    case OMPD_unroll:
     case OMPD_sections:
     case OMPD_section:
     case OMPD_single:
@@ -13442,6 +13762,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause(
     case OMPD_end_declare_target:
     case OMPD_simd:
     case OMPD_tile:
+    case OMPD_unroll:
     case OMPD_for:
     case OMPD_for_simd:
     case OMPD_sections:
@@ -13522,6 +13843,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause(
     case OMPD_end_declare_target:
     case OMPD_simd:
     case OMPD_tile:
+    case OMPD_unroll:
     case OMPD_for:
     case OMPD_for_simd:
     case OMPD_sections:
@@ -13603,6 +13925,7 @@ static OpenMPDirectiveKind getOpenMPCaptureRegionForClause(
     case OMPD_end_declare_target:
     case OMPD_simd:
     case OMPD_tile:
+    case OMPD_unroll:
     case OMPD_for:
     case OMPD_for_simd:
     case OMPD_sections:
@@ -13886,17 +14209,33 @@ OMPClause *Sema::ActOnOpenMPNumThreadsClause(Expr *NumThreads,
 
 ExprResult Sema::VerifyPositiveIntegerConstantInClause(Expr *E,
                                                        OpenMPClauseKind CKind,
-                                                       bool StrictlyPositive) {
+                                                       bool StrictlyPositive,
+                                                       bool SuppressExprDiags) {
   if (!E)
     return ExprError();
   if (E->isValueDependent() || E->isTypeDependent() ||
       E->isInstantiationDependent() || E->containsUnexpandedParameterPack())
     return E;
+
   llvm::APSInt Result;
-  ExprResult ICE =
-      VerifyIntegerConstantExpression(E, &Result, /*FIXME*/ AllowFold);
+  ExprResult ICE;
+  if (SuppressExprDiags) {
+    // Use a custom diagnoser that suppresses 'note' diagnostics about the
+    // expression.
+    struct SuppressedDiagnoser : public Sema::VerifyICEDiagnoser {
+      SuppressedDiagnoser() : VerifyICEDiagnoser(/*Suppress=*/true) {}
+      Sema::SemaDiagnosticBuilder diagnoseNotICE(Sema &S,
+                                                 SourceLocation Loc) override {
+        llvm_unreachable("Diagnostic suppressed");
+      }
+    } Diagnoser;
+    ICE = VerifyIntegerConstantExpression(E, &Result, Diagnoser, AllowFold);
+  } else {
+    ICE = VerifyIntegerConstantExpression(E, &Result, /*FIXME*/ AllowFold);
+  }
   if (ICE.isInvalid())
     return ExprError();
+
   if ((StrictlyPositive && !Result.isStrictlyPositive()) ||
       (!StrictlyPositive && !Result.isNonNegative())) {
     Diag(E->getExprLoc(), diag::err_omp_negative_expression_in_clause)
@@ -14301,6 +14640,29 @@ OMPClause *Sema::ActOnOpenMPSizesClause(ArrayRef<Expr *> SizeExprs,
                                 SizeExprs);
 }
 
+OMPClause *Sema::ActOnOpenMPFullClause(SourceLocation StartLoc,
+                                       SourceLocation EndLoc) {
+  return OMPFullClause::Create(Context, StartLoc, EndLoc);
+}
+
+OMPClause *Sema::ActOnOpenMPPartialClause(Expr *FactorExpr,
+                                          SourceLocation StartLoc,
+                                          SourceLocation LParenLoc,
+                                          SourceLocation EndLoc) {
+  if (FactorExpr) {
+    // If an argument is specified, it must be a constant (or an unevaluated
+    // template expression).
+    ExprResult FactorResult = VerifyPositiveIntegerConstantInClause(
+        FactorExpr, OMPC_partial, /*StrictlyPositive=*/true);
+    if (FactorResult.isInvalid())
+      return nullptr;
+    FactorExpr = FactorResult.get();
+  }
+
+  return OMPPartialClause::Create(Context, StartLoc, LParenLoc, EndLoc,
+                                  FactorExpr);
+}
+
 OMPClause *Sema::ActOnOpenMPSingleExprWithArgClause(
     OpenMPClauseKind Kind, ArrayRef<unsigned> Argument, Expr *Expr,
     SourceLocation StartLoc, SourceLocation LParenLoc,
@@ -14603,6 +14965,12 @@ OMPClause *Sema::ActOnOpenMPClause(OpenMPClauseKind Kind,
                                    /*LParenLoc=*/SourceLocation(),
                                    /*VarLoc=*/SourceLocation(), EndLoc);
     break;
+  case OMPC_full:
+    Res = ActOnOpenMPFullClause(StartLoc, EndLoc);
+    break;
+  case OMPC_partial:
+    Res = ActOnOpenMPPartialClause(nullptr, StartLoc, /*LParenLoc=*/{}, EndLoc);
+    break;
   case OMPC_if:
   case OMPC_final:
   case OMPC_num_threads:

diff  --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 506e0fc0ba057..21dc8c5d893bd 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -1633,6 +1633,20 @@ class TreeTransform {
     return getSema().ActOnOpenMPSizesClause(Sizes, StartLoc, LParenLoc, EndLoc);
   }
 
+  /// Build a new OpenMP 'full' clause.
+  OMPClause *RebuildOMPFullClause(SourceLocation StartLoc,
+                                  SourceLocation EndLoc) {
+    return getSema().ActOnOpenMPFullClause(StartLoc, EndLoc);
+  }
+
+  /// Build a new OpenMP 'partial' clause.
+  OMPClause *RebuildOMPPartialClause(Expr *Factor, SourceLocation StartLoc,
+                                     SourceLocation LParenLoc,
+                                     SourceLocation EndLoc) {
+    return getSema().ActOnOpenMPPartialClause(Factor, StartLoc, LParenLoc,
+                                              EndLoc);
+  }
+
   /// Build a new OpenMP 'allocator' clause.
   ///
   /// By default, performs semantic analysis to build the new OpenMP clause.
@@ -8521,6 +8535,17 @@ TreeTransform<Derived>::TransformOMPTileDirective(OMPTileDirective *D) {
   return Res;
 }
 
+template <typename Derived>
+StmtResult
+TreeTransform<Derived>::TransformOMPUnrollDirective(OMPUnrollDirective *D) {
+  DeclarationNameInfo DirName;
+  getDerived().getSema().StartOpenMPDSABlock(D->getDirectiveKind(), DirName,
+                                             nullptr, D->getBeginLoc());
+  StmtResult Res = getDerived().TransformOMPExecutableDirective(D);
+  getDerived().getSema().EndOpenMPDSABlock(Res.get());
+  return Res;
+}
+
 template <typename Derived>
 StmtResult
 TreeTransform<Derived>::TransformOMPForDirective(OMPForDirective *D) {
@@ -9220,6 +9245,28 @@ OMPClause *TreeTransform<Derived>::TransformOMPSizesClause(OMPSizesClause *C) {
                                C->getLParenLoc(), C->getEndLoc());
 }
 
+template <typename Derived>
+OMPClause *TreeTransform<Derived>::TransformOMPFullClause(OMPFullClause *C) {
+  if (!getDerived().AlwaysRebuild())
+    return C;
+  return RebuildOMPFullClause(C->getBeginLoc(), C->getEndLoc());
+}
+
+template <typename Derived>
+OMPClause *
+TreeTransform<Derived>::TransformOMPPartialClause(OMPPartialClause *C) {
+  ExprResult T = getDerived().TransformExpr(C->getFactor());
+  if (T.isInvalid())
+    return nullptr;
+  Expr *Factor = T.get();
+  bool Changed = Factor != C->getFactor();
+
+  if (!Changed && !getDerived().AlwaysRebuild())
+    return C;
+  return RebuildOMPPartialClause(Factor, C->getBeginLoc(), C->getLParenLoc(),
+                                 C->getEndLoc());
+}
+
 template <typename Derived>
 OMPClause *
 TreeTransform<Derived>::TransformOMPCollapseClause(OMPCollapseClause *C) {

diff  --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index c8290ac5c31bf..55e0d084ea4be 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -11720,6 +11720,12 @@ OMPClause *OMPClauseReader::readClause() {
     C = OMPSizesClause::CreateEmpty(Context, NumSizes);
     break;
   }
+  case llvm::omp::OMPC_full:
+    C = OMPFullClause::CreateEmpty(Context);
+    break;
+  case llvm::omp::OMPC_partial:
+    C = OMPPartialClause::CreateEmpty(Context);
+    break;
   case llvm::omp::OMPC_allocator:
     C = new (Context) OMPAllocatorClause();
     break;
@@ -12032,6 +12038,13 @@ void OMPClauseReader::VisitOMPSizesClause(OMPSizesClause *C) {
   C->setLParenLoc(Record.readSourceLocation());
 }
 
+void OMPClauseReader::VisitOMPFullClause(OMPFullClause *C) {}
+
+void OMPClauseReader::VisitOMPPartialClause(OMPPartialClause *C) {
+  C->setFactor(Record.readSubExpr());
+  C->setLParenLoc(Record.readSourceLocation());
+}
+
 void OMPClauseReader::VisitOMPAllocatorClause(OMPAllocatorClause *C) {
   C->setAllocator(Record.readExpr());
   C->setLParenLoc(Record.readSourceLocation());

diff  --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp
index c1f66709c8515..62d7a99b52c70 100644
--- a/clang/lib/Serialization/ASTReaderStmt.cpp
+++ b/clang/lib/Serialization/ASTReaderStmt.cpp
@@ -2321,6 +2321,10 @@ void ASTStmtReader::VisitOMPTileDirective(OMPTileDirective *D) {
   VisitOMPLoopBasedDirective(D);
 }
 
+void ASTStmtReader::VisitOMPUnrollDirective(OMPUnrollDirective *D) {
+  VisitOMPLoopBasedDirective(D);
+}
+
 void ASTStmtReader::VisitOMPForDirective(OMPForDirective *D) {
   VisitOMPLoopDirective(D);
   D->setHasCancel(Record.readBool());
@@ -3201,6 +3205,14 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) {
       break;
     }
 
+    case STMT_OMP_UNROLL_DIRECTIVE: {
+      unsigned NumLoops = Record[ASTStmtReader::NumStmtFields];
+      assert(NumLoops == 1 && "Unroll directive accepts only a single loop");
+      unsigned NumClauses = Record[ASTStmtReader::NumStmtFields + 1];
+      S = OMPUnrollDirective::CreateEmpty(Context, NumClauses);
+      break;
+    }
+
     case STMT_OMP_FOR_DIRECTIVE: {
       unsigned CollapsedNum = Record[ASTStmtReader::NumStmtFields];
       unsigned NumClauses = Record[ASTStmtReader::NumStmtFields + 1];

diff  --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index 5f038b318e268..4cdcf53775de1 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -6057,6 +6057,13 @@ void OMPClauseWriter::VisitOMPSizesClause(OMPSizesClause *C) {
   Record.AddSourceLocation(C->getLParenLoc());
 }
 
+void OMPClauseWriter::VisitOMPFullClause(OMPFullClause *C) {}
+
+void OMPClauseWriter::VisitOMPPartialClause(OMPPartialClause *C) {
+  Record.AddStmt(C->getFactor());
+  Record.AddSourceLocation(C->getLParenLoc());
+}
+
 void OMPClauseWriter::VisitOMPAllocatorClause(OMPAllocatorClause *C) {
   Record.AddStmt(C->getAllocator());
   Record.AddSourceLocation(C->getLParenLoc());

diff  --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp
index 10be9b894c277..2bb5e4f3563df 100644
--- a/clang/lib/Serialization/ASTWriterStmt.cpp
+++ b/clang/lib/Serialization/ASTWriterStmt.cpp
@@ -2222,6 +2222,11 @@ void ASTStmtWriter::VisitOMPTileDirective(OMPTileDirective *D) {
   Code = serialization::STMT_OMP_TILE_DIRECTIVE;
 }
 
+void ASTStmtWriter::VisitOMPUnrollDirective(OMPUnrollDirective *D) {
+  VisitOMPLoopBasedDirective(D);
+  Code = serialization::STMT_OMP_UNROLL_DIRECTIVE;
+}
+
 void ASTStmtWriter::VisitOMPForDirective(OMPForDirective *D) {
   VisitOMPLoopDirective(D);
   Record.writeBool(D->hasCancel());

diff  --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
index d40161f289f88..66332d3b848cd 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -1297,7 +1297,8 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred,
     case Stmt::OMPInteropDirectiveClass:
     case Stmt::OMPDispatchDirectiveClass:
     case Stmt::OMPMaskedDirectiveClass:
-    case Stmt::CapturedStmtClass: {
+    case Stmt::CapturedStmtClass:
+    case Stmt::OMPUnrollDirectiveClass: {
       const ExplodedNode *node = Bldr.generateSink(S, Pred, Pred->getState());
       Engine.addAbortedBlock(node, currBldrCtx->getBlock());
       break;

diff  --git a/clang/test/OpenMP/unroll_ast_print.cpp b/clang/test/OpenMP/unroll_ast_print.cpp
new file mode 100644
index 0000000000000..63e7b1dbe6eda
--- /dev/null
+++ b/clang/test/OpenMP/unroll_ast_print.cpp
@@ -0,0 +1,127 @@
+// Check no warnings/errors
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -fsyntax-only -verify %s
+// expected-no-diagnostics
+
+// Check AST and unparsing
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -ast-dump  %s | FileCheck %s --check-prefix=DUMP
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -ast-print %s | FileCheck %s --check-prefix=PRINT --match-full-lines
+
+// Check same results after serialization round-trip
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-pch -o %t %s
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -include-pch %t -ast-dump-all %s | FileCheck %s --check-prefix=DUMP
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -include-pch %t -ast-print    %s | FileCheck %s --check-prefix=PRINT --match-full-lines
+
+#ifndef HEADER
+#define HEADER
+
+// placeholder for loop body code.
+void body(...);
+
+
+// PRINT-LABEL: void func_unroll() {
+// DUMP-LABEL:  FunctionDecl {{.*}} func_unroll
+void func_unroll() {
+  // PRINT:  #pragma omp unroll
+  // DUMP:   OMPUnrollDirective
+  #pragma omp unroll
+  // PRINT-NEXT: for (int i = 7; i < 17; i += 3)
+  // DUMP-NEXT: ForStmt
+  for (int i = 7; i < 17; i += 3)
+    // PRINT-NEXT: body(i);
+    // DUMP: CallExpr
+    body(i);
+}
+
+
+// PRINT-LABEL: void func_unroll_full() {
+// DUMP-LABEL:  FunctionDecl {{.*}} func_unroll_full 
+void func_unroll_full() {
+  // PRINT:     #pragma omp unroll full
+  // DUMP:      OMPUnrollDirective
+  // DUMP-NEXT:   OMPFullClause
+  #pragma omp unroll full
+  // PRINT-NEXT: for (int i = 7; i < 17; i += 3)
+  // DUMP-NEXT: ForStmt
+  for (int i = 7; i < 17; i += 3)
+    // PRINT-NEXT: body(i);
+    // DUMP: CallExpr
+    body(i);
+}
+
+
+// PRINT-LABEL: void func_unroll_partial() {
+// DUMP-LABEL:  FunctionDecl {{.*}} func_unroll_partial 
+void func_unroll_partial() {
+  // PRINT:     #pragma omp unroll partial
+  // DUMP:      OMPUnrollDirective
+  // DUMP-NEXT:   OMPPartialClause
+  // DUMP-NEXT:     <<<NULL>>>
+  #pragma omp unroll partial
+  // PRINT-NEXT: for (int i = 7; i < 17; i += 3)
+  // DUMP-NEXT: ForStmt
+  for (int i = 7; i < 17; i += 3)
+    // PRINT: body(i);
+    // DUMP: CallExpr
+    body(i);
+}
+
+
+// PRINT-LABEL: void func_unroll_partial_factor() {
+// DUMP-LABEL:  FunctionDecl {{.*}} func_unroll_partial_factor 
+void func_unroll_partial_factor() {
+  // PRINT:     #pragma omp unroll partial(4)
+  // DUMP:      OMPUnrollDirective
+  // DUMP-NEXT:   OMPPartialClause
+  // DUMP-NEXT:     ConstantExpr
+  // DUMP-NEXT:       value: Int 4
+  // DUMP-NEXT:       IntegerLiteral {{.*}} 4
+  #pragma omp unroll partial(4)
+  // PRINT-NEXT: for (int i = 7; i < 17; i += 3)
+  // DUMP-NEXT: ForStmt
+  for (int i = 7; i < 17; i += 3)
+    // PRINT-NEXT: body(i);
+    // DUMP: CallExpr
+    body(i);
+}
+
+
+// PRINT-LABEL: void func_unroll_partial_factor_for() {
+// DUMP-LABEL:  FunctionDecl {{.*}} func_unroll_partial_factor_for 
+void func_unroll_partial_factor_for() {
+  // PRINT:     #pragma omp for
+  // DUMP:      OMPForDirective
+  #pragma omp for
+  // PRINT:       #pragma omp unroll partial(2)
+  // DUMP:        OMPUnrollDirective
+  // DUMP-NEXT:     OMPPartialClause
+  #pragma omp unroll partial(2)
+  // PRINT-NEXT: for (int i = 7; i < 17; i += 3)
+  // DUMP: ForStmt
+  for (int i = 7; i < 17; i += 3)
+    // PRINT-NEXT: body(i);
+    // DUMP: CallExpr
+    body(i);
+}
+
+
+// PRINT-LABEL: template <typename T, T Start, T End, T Step, int Factor> void unroll_templated() {
+// DUMP-LABEL:  FunctionTemplateDecl {{.*}} unroll_templated
+template<typename T, T Start, T End, T Step, int Factor>
+void unroll_templated() {
+  // PRINT: #pragma omp unroll partial(Factor)
+  // DUMP:      OMPUnrollDirective
+  // DUMP-NEXT: OMPPartialClause
+  // DUMP-NEXT:   DeclRefExpr {{.*}} 'Factor' 'int'
+  #pragma omp unroll partial(Factor)
+    // PRINT-NEXT: for (T i = Start; i < End; i += Step)
+    // DUMP-NEXT:  ForStmt
+    for (T i = Start; i < End; i += Step)
+      // PRINT-NEXT: body(i);
+      // DUMP:  CallExpr
+      body(i);
+}
+void unroll_template() {
+  unroll_templated<int,0,1024,1,4>();
+}
+
+#endif

diff  --git a/clang/test/OpenMP/unroll_codegen_factor.cpp b/clang/test/OpenMP/unroll_codegen_factor.cpp
new file mode 100644
index 0000000000000..e5280e3439c6c
--- /dev/null
+++ b/clang/test/OpenMP/unroll_codegen_factor.cpp
@@ -0,0 +1,65 @@
+// Check code generation
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
+
+// Check same results after serialization round-trip
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-pch -o %t %s
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+// placeholder for loop body code.
+extern "C" void body(...) {}
+
+
+// IR-LABEL: @func(
+// IR-NEXT:  [[ENTRY:.*]]:
+// IR-NEXT:    %[[START_ADDR:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[END_ADDR:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[STEP_ADDR:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[I:.+]] = alloca i32, align 4
+// IR-NEXT:    store i32 %[[START:.+]], i32* %[[START_ADDR]], align 4
+// IR-NEXT:    store i32 %[[END:.+]], i32* %[[END_ADDR]], align 4
+// IR-NEXT:    store i32 %[[STEP:.+]], i32* %[[STEP_ADDR]], align 4
+// IR-NEXT:    %[[TMP0:.+]] = load i32, i32* %[[START_ADDR]], align 4
+// IR-NEXT:    store i32 %[[TMP0]], i32* %[[I]], align 4
+// IR-NEXT:    br label %[[FOR_COND:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_COND]]:
+// IR-NEXT:    %[[TMP1:.+]] = load i32, i32* %[[I]], align 4
+// IR-NEXT:    %[[TMP2:.+]] = load i32, i32* %[[END_ADDR]], align 4
+// IR-NEXT:    %[[CMP:.+]] = icmp slt i32 %[[TMP1]], %[[TMP2]]
+// IR-NEXT:    br i1 %[[CMP]], label %[[FOR_BODY:.+]], label %[[FOR_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_BODY]]:
+// IR-NEXT:    %[[TMP3:.+]] = load i32, i32* %[[START_ADDR]], align 4
+// IR-NEXT:    %[[TMP4:.+]] = load i32, i32* %[[END_ADDR]], align 4
+// IR-NEXT:    %[[TMP5:.+]] = load i32, i32* %[[STEP_ADDR]], align 4
+// IR-NEXT:    %[[TMP6:.+]] = load i32, i32* %[[I]], align 4
+// IR-NEXT:    call void (...) @body(i32 %[[TMP3]], i32 %[[TMP4]], i32 %[[TMP5]], i32 %[[TMP6]])
+// IR-NEXT:    br label %[[FOR_INC:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_INC]]:
+// IR-NEXT:    %[[TMP7:.+]] = load i32, i32* %[[STEP_ADDR]], align 4
+// IR-NEXT:    %[[TMP8:.+]] = load i32, i32* %[[I]], align 4
+// IR-NEXT:    %[[ADD:.+]] = add nsw i32 %[[TMP8]], %[[TMP7]]
+// IR-NEXT:    store i32 %[[ADD]], i32* %[[I]], align 4
+// IR-NEXT:    br label %[[FOR_COND]], !llvm.loop ![[LOOP2:[0-9]+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_END]]:
+// IR-NEXT:    ret void
+// IR-NEXT:  }
+extern "C" void func(int start, int end, int step) {
+  #pragma omp unroll partial(4)
+  for (int i = start; i < end; i+=step)
+    body(start, end, step, i);
+}
+
+#endif /* HEADER */
+
+
+// IR: ![[LOOP2]] = distinct !{![[LOOP2]], ![[LOOPPROP3:[0-9]+]], ![[LOOPPROP4:[0-9]+]], ![[LOOPPROP5:[0-9]+]]}
+// IR: ![[LOOPPROP3]] = !{!"llvm.loop.mustprogress"}
+// IR: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.count", i32 4}
+// IR: ![[LOOPPROP5]] = !{!"llvm.loop.unroll.enable"}

diff  --git a/clang/test/OpenMP/unroll_codegen_for_collapse_outer.cpp b/clang/test/OpenMP/unroll_codegen_for_collapse_outer.cpp
new file mode 100644
index 0000000000000..879db7b911dbb
--- /dev/null
+++ b/clang/test/OpenMP/unroll_codegen_for_collapse_outer.cpp
@@ -0,0 +1,251 @@
+// Check code generation
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
+
+// Check same results after serialization round-trip
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-pch -o %t %s
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+// placeholder for loop body code.
+extern "C" void body(...) {}
+
+
+// IR-LABEL: @func(
+// IR-NEXT:  [[ENTRY:.*]]:
+// IR-NEXT:    %[[START_ADDR:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[END_ADDR:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[STEP_ADDR:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTOMP_IV:.+]] = alloca i64, align 8
+// IR-NEXT:    %[[TMP:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[TMP1:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_2:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_3:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[J:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_4:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_5:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_6:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_7:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_10:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_12:.+]] = alloca i64, align 8
+// IR-NEXT:    %[[I:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTUNROLLED_IV_J:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTOMP_LB:.+]] = alloca i64, align 8
+// IR-NEXT:    %[[DOTOMP_UB:.+]] = alloca i64, align 8
+// IR-NEXT:    %[[DOTOMP_STRIDE:.+]] = alloca i64, align 8
+// IR-NEXT:    %[[DOTOMP_IS_LAST:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[I22:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTUNROLLED_IV_J23:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTUNROLL_INNER_IV_J:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[TMP0:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2)
+// IR-NEXT:    store i32 %[[START:.+]], i32* %[[START_ADDR]], align 4
+// IR-NEXT:    store i32 %[[END:.+]], i32* %[[END_ADDR]], align 4
+// IR-NEXT:    store i32 %[[STEP:.+]], i32* %[[STEP_ADDR]], align 4
+// IR-NEXT:    %[[TMP1_1:.+]] = load i32, i32* %[[START_ADDR]], align 4
+// IR-NEXT:    store i32 %[[TMP1_1]], i32* %[[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT:    %[[TMP2:.+]] = load i32, i32* %[[END_ADDR]], align 4
+// IR-NEXT:    store i32 %[[TMP2]], i32* %[[DOTCAPTURE_EXPR_2]], align 4
+// IR-NEXT:    %[[TMP3:.+]] = load i32, i32* %[[STEP_ADDR]], align 4
+// IR-NEXT:    store i32 %[[TMP3]], i32* %[[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT:    %[[TMP4:.+]] = load i32, i32* %[[START_ADDR]], align 4
+// IR-NEXT:    store i32 %[[TMP4]], i32* %[[J]], align 4
+// IR-NEXT:    %[[TMP5:.+]] = load i32, i32* %[[START_ADDR]], align 4
+// IR-NEXT:    store i32 %[[TMP5]], i32* %[[DOTCAPTURE_EXPR_4]], align 4
+// IR-NEXT:    %[[TMP6:.+]] = load i32, i32* %[[END_ADDR]], align 4
+// IR-NEXT:    store i32 %[[TMP6]], i32* %[[DOTCAPTURE_EXPR_5]], align 4
+// IR-NEXT:    %[[TMP7:.+]] = load i32, i32* %[[STEP_ADDR]], align 4
+// IR-NEXT:    store i32 %[[TMP7]], i32* %[[DOTCAPTURE_EXPR_6]], align 4
+// IR-NEXT:    %[[TMP8:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_5]], align 4
+// IR-NEXT:    %[[TMP9:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_4]], align 4
+// IR-NEXT:    %[[SUB:.+]] = sub i32 %[[TMP8]], %[[TMP9]]
+// IR-NEXT:    %[[SUB8:.+]] = sub i32 %[[SUB]], 1
+// IR-NEXT:    %[[TMP10:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_6]], align 4
+// IR-NEXT:    %[[ADD:.+]] = add i32 %[[SUB8]], %[[TMP10]]
+// IR-NEXT:    %[[TMP11:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_6]], align 4
+// IR-NEXT:    %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP11]]
+// IR-NEXT:    %[[SUB9:.+]] = sub i32 %[[DIV]], 1
+// IR-NEXT:    store i32 %[[SUB9]], i32* %[[DOTCAPTURE_EXPR_7]], align 4
+// IR-NEXT:    %[[TMP12:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_7]], align 4
+// IR-NEXT:    %[[ADD11:.+]] = add i32 %[[TMP12]], 1
+// IR-NEXT:    store i32 %[[ADD11]], i32* %[[DOTCAPTURE_EXPR_10]], align 4
+// IR-NEXT:    %[[TMP13:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_2]], align 4
+// IR-NEXT:    %[[TMP14:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT:    %[[SUB13:.+]] = sub i32 %[[TMP13]], %[[TMP14]]
+// IR-NEXT:    %[[SUB14:.+]] = sub i32 %[[SUB13]], 1
+// IR-NEXT:    %[[TMP15:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT:    %[[ADD15:.+]] = add i32 %[[SUB14]], %[[TMP15]]
+// IR-NEXT:    %[[TMP16:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT:    %[[DIV16:.+]] = udiv i32 %[[ADD15]], %[[TMP16]]
+// IR-NEXT:    %[[CONV:.+]] = zext i32 %[[DIV16]] to i64
+// IR-NEXT:    %[[TMP17:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_10]], align 4
+// IR-NEXT:    %[[SUB17:.+]] = sub i32 %[[TMP17]], -1
+// IR-NEXT:    %[[DIV18:.+]] = udiv i32 %[[SUB17]], 2
+// IR-NEXT:    %[[CONV19:.+]] = zext i32 %[[DIV18]] to i64
+// IR-NEXT:    %[[MUL:.+]] = mul nsw i64 %[[CONV]], %[[CONV19]]
+// IR-NEXT:    %[[SUB20:.+]] = sub nsw i64 %[[MUL]], 1
+// IR-NEXT:    store i64 %[[SUB20]], i64* %[[DOTCAPTURE_EXPR_12]], align 8
+// IR-NEXT:    %[[TMP18:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT:    store i32 %[[TMP18]], i32* %[[I]], align 4
+// IR-NEXT:    store i32 0, i32* %[[DOTUNROLLED_IV_J]], align 4
+// IR-NEXT:    %[[TMP19:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT:    %[[TMP20:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_2]], align 4
+// IR-NEXT:    %[[CMP:.+]] = icmp slt i32 %[[TMP19]], %[[TMP20]]
+// IR-NEXT:    br i1 %[[CMP]], label %[[LAND_LHS_TRUE:.+]], label %[[OMP_PRECOND_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[LAND_LHS_TRUE]]:
+// IR-NEXT:    %[[TMP21:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_10]], align 4
+// IR-NEXT:    %[[CMP21:.+]] = icmp ult i32 0, %[[TMP21]]
+// IR-NEXT:    br i1 %[[CMP21]], label %[[OMP_PRECOND_THEN:.+]], label %[[OMP_PRECOND_END]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_PRECOND_THEN]]:
+// IR-NEXT:    store i64 0, i64* %[[DOTOMP_LB]], align 8
+// IR-NEXT:    %[[TMP22:.+]] = load i64, i64* %[[DOTCAPTURE_EXPR_12]], align 8
+// IR-NEXT:    store i64 %[[TMP22]], i64* %[[DOTOMP_UB]], align 8
+// IR-NEXT:    store i64 1, i64* %[[DOTOMP_STRIDE]], align 8
+// IR-NEXT:    store i32 0, i32* %[[DOTOMP_IS_LAST]], align 4
+// IR-NEXT:    call void @__kmpc_for_static_init_8(%struct.ident_t* @1, i32 %[[TMP0]], i32 34, i32* %[[DOTOMP_IS_LAST]], i64* %[[DOTOMP_LB]], i64* %[[DOTOMP_UB]], i64* %[[DOTOMP_STRIDE]], i64 1, i64 1)
+// IR-NEXT:    %[[TMP23:.+]] = load i64, i64* %[[DOTOMP_UB]], align 8
+// IR-NEXT:    %[[TMP24:.+]] = load i64, i64* %[[DOTCAPTURE_EXPR_12]], align 8
+// IR-NEXT:    %[[CMP24:.+]] = icmp sgt i64 %[[TMP23]], %[[TMP24]]
+// IR-NEXT:    br i1 %[[CMP24]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[COND_TRUE]]:
+// IR-NEXT:    %[[TMP25:.+]] = load i64, i64* %[[DOTCAPTURE_EXPR_12]], align 8
+// IR-NEXT:    br label %[[COND_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[COND_FALSE]]:
+// IR-NEXT:    %[[TMP26:.+]] = load i64, i64* %[[DOTOMP_UB]], align 8
+// IR-NEXT:    br label %[[COND_END]]
+// IR-EMPTY:
+// IR-NEXT:  [[COND_END]]:
+// IR-NEXT:    %[[COND:.+]] = phi i64 [ %[[TMP25]], %[[COND_TRUE]] ], [ %[[TMP26]], %[[COND_FALSE]] ]
+// IR-NEXT:    store i64 %[[COND]], i64* %[[DOTOMP_UB]], align 8
+// IR-NEXT:    %[[TMP27:.+]] = load i64, i64* %[[DOTOMP_LB]], align 8
+// IR-NEXT:    store i64 %[[TMP27]], i64* %[[DOTOMP_IV]], align 8
+// IR-NEXT:    br label %[[OMP_INNER_FOR_COND:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_INNER_FOR_COND]]:
+// IR-NEXT:    %[[TMP28:.+]] = load i64, i64* %[[DOTOMP_IV]], align 8
+// IR-NEXT:    %[[TMP29:.+]] = load i64, i64* %[[DOTOMP_UB]], align 8
+// IR-NEXT:    %[[CMP25:.+]] = icmp sle i64 %[[TMP28]], %[[TMP29]]
+// IR-NEXT:    br i1 %[[CMP25]], label %[[OMP_INNER_FOR_BODY:.+]], label %[[OMP_INNER_FOR_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_INNER_FOR_BODY]]:
+// IR-NEXT:    %[[TMP30:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT:    %[[CONV26:.+]] = sext i32 %[[TMP30]] to i64
+// IR-NEXT:    %[[TMP31:.+]] = load i64, i64* %[[DOTOMP_IV]], align 8
+// IR-NEXT:    %[[TMP32:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_10]], align 4
+// IR-NEXT:    %[[SUB27:.+]] = sub i32 %[[TMP32]], -1
+// IR-NEXT:    %[[DIV28:.+]] = udiv i32 %[[SUB27]], 2
+// IR-NEXT:    %[[MUL29:.+]] = mul i32 1, %[[DIV28]]
+// IR-NEXT:    %[[CONV30:.+]] = zext i32 %[[MUL29]] to i64
+// IR-NEXT:    %[[DIV31:.+]] = sdiv i64 %[[TMP31]], %[[CONV30]]
+// IR-NEXT:    %[[TMP33:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT:    %[[CONV32:.+]] = sext i32 %[[TMP33]] to i64
+// IR-NEXT:    %[[MUL33:.+]] = mul nsw i64 %[[DIV31]], %[[CONV32]]
+// IR-NEXT:    %[[ADD34:.+]] = add nsw i64 %[[CONV26]], %[[MUL33]]
+// IR-NEXT:    %[[CONV35:.+]] = trunc i64 %[[ADD34]] to i32
+// IR-NEXT:    store i32 %[[CONV35]], i32* %[[I22]], align 4
+// IR-NEXT:    %[[TMP34:.+]] = load i64, i64* %[[DOTOMP_IV]], align 8
+// IR-NEXT:    %[[TMP35:.+]] = load i64, i64* %[[DOTOMP_IV]], align 8
+// IR-NEXT:    %[[TMP36:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_10]], align 4
+// IR-NEXT:    %[[SUB36:.+]] = sub i32 %[[TMP36]], -1
+// IR-NEXT:    %[[DIV37:.+]] = udiv i32 %[[SUB36]], 2
+// IR-NEXT:    %[[MUL38:.+]] = mul i32 1, %[[DIV37]]
+// IR-NEXT:    %[[CONV39:.+]] = zext i32 %[[MUL38]] to i64
+// IR-NEXT:    %[[DIV40:.+]] = sdiv i64 %[[TMP35]], %[[CONV39]]
+// IR-NEXT:    %[[TMP37:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_10]], align 4
+// IR-NEXT:    %[[SUB41:.+]] = sub i32 %[[TMP37]], -1
+// IR-NEXT:    %[[DIV42:.+]] = udiv i32 %[[SUB41]], 2
+// IR-NEXT:    %[[MUL43:.+]] = mul i32 1, %[[DIV42]]
+// IR-NEXT:    %[[CONV44:.+]] = zext i32 %[[MUL43]] to i64
+// IR-NEXT:    %[[MUL45:.+]] = mul nsw i64 %[[DIV40]], %[[CONV44]]
+// IR-NEXT:    %[[SUB46:.+]] = sub nsw i64 %[[TMP34]], %[[MUL45]]
+// IR-NEXT:    %[[MUL47:.+]] = mul nsw i64 %[[SUB46]], 2
+// IR-NEXT:    %[[ADD48:.+]] = add nsw i64 0, %[[MUL47]]
+// IR-NEXT:    %[[CONV49:.+]] = trunc i64 %[[ADD48]] to i32
+// IR-NEXT:    store i32 %[[CONV49]], i32* %[[DOTUNROLLED_IV_J23]], align 4
+// IR-NEXT:    %[[TMP38:.+]] = load i32, i32* %[[DOTUNROLLED_IV_J23]], align 4
+// IR-NEXT:    store i32 %[[TMP38]], i32* %[[DOTUNROLL_INNER_IV_J]], align 4
+// IR-NEXT:    br label %[[FOR_COND:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_COND]]:
+// IR-NEXT:    %[[TMP39:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_J]], align 4
+// IR-NEXT:    %[[TMP40:.+]] = load i32, i32* %[[DOTUNROLLED_IV_J23]], align 4
+// IR-NEXT:    %[[ADD50:.+]] = add i32 %[[TMP40]], 2
+// IR-NEXT:    %[[CMP51:.+]] = icmp ule i32 %[[TMP39]], %[[ADD50]]
+// IR-NEXT:    br i1 %[[CMP51]], label %[[LAND_RHS:.+]], label %[[LAND_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[LAND_RHS]]:
+// IR-NEXT:    %[[TMP41:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_J]], align 4
+// IR-NEXT:    %[[TMP42:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_7]], align 4
+// IR-NEXT:    %[[ADD52:.+]] = add i32 %[[TMP42]], 1
+// IR-NEXT:    %[[CMP53:.+]] = icmp ule i32 %[[TMP41]], %[[ADD52]]
+// IR-NEXT:    br label %[[LAND_END]]
+// IR-EMPTY:
+// IR-NEXT:  [[LAND_END]]:
+// IR-NEXT:    %[[TMP43:.+]] = phi i1 [ false, %[[FOR_COND]] ], [ %[[CMP53]], %[[LAND_RHS]] ]
+// IR-NEXT:    br i1 %[[TMP43]], label %[[FOR_BODY:.+]], label %[[FOR_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_BODY]]:
+// IR-NEXT:    %[[TMP44:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_4]], align 4
+// IR-NEXT:    %[[TMP45:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_J]], align 4
+// IR-NEXT:    %[[TMP46:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_6]], align 4
+// IR-NEXT:    %[[MUL54:.+]] = mul i32 %[[TMP45]], %[[TMP46]]
+// IR-NEXT:    %[[ADD55:.+]] = add i32 %[[TMP44]], %[[MUL54]]
+// IR-NEXT:    store i32 %[[ADD55]], i32* %[[J]], align 4
+// IR-NEXT:    %[[TMP47:.+]] = load i32, i32* %[[START_ADDR]], align 4
+// IR-NEXT:    %[[TMP48:.+]] = load i32, i32* %[[END_ADDR]], align 4
+// IR-NEXT:    %[[TMP49:.+]] = load i32, i32* %[[STEP_ADDR]], align 4
+// IR-NEXT:    %[[TMP50:.+]] = load i32, i32* %[[I22]], align 4
+// IR-NEXT:    %[[TMP51:.+]] = load i32, i32* %[[J]], align 4
+// IR-NEXT:    call void (...) @body(i32 %[[TMP47]], i32 %[[TMP48]], i32 %[[TMP49]], i32 %[[TMP50]], i32 %[[TMP51]])
+// IR-NEXT:    br label %[[FOR_INC:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_INC]]:
+// IR-NEXT:    %[[TMP52:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_J]], align 4
+// IR-NEXT:    %[[INC:.+]] = add i32 %[[TMP52]], 1
+// IR-NEXT:    store i32 %[[INC]], i32* %[[DOTUNROLL_INNER_IV_J]], align 4
+// IR-NEXT:    br label %[[FOR_COND]], !llvm.loop ![[LOOP2:[0-9]+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_END]]:
+// IR-NEXT:    br label %[[OMP_BODY_CONTINUE:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_BODY_CONTINUE]]:
+// IR-NEXT:    br label %[[OMP_INNER_FOR_INC:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_INNER_FOR_INC]]:
+// IR-NEXT:    %[[TMP53:.+]] = load i64, i64* %[[DOTOMP_IV]], align 8
+// IR-NEXT:    %[[ADD56:.+]] = add nsw i64 %[[TMP53]], 1
+// IR-NEXT:    store i64 %[[ADD56]], i64* %[[DOTOMP_IV]], align 8
+// IR-NEXT:    br label %[[OMP_INNER_FOR_COND]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_INNER_FOR_END]]:
+// IR-NEXT:    br label %[[OMP_LOOP_EXIT:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_LOOP_EXIT]]:
+// IR-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @1, i32 %[[TMP0]])
+// IR-NEXT:    br label %[[OMP_PRECOND_END]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_PRECOND_END]]:
+// IR-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @3, i32 %[[TMP0]])
+// IR-NEXT:    ret void
+// IR-NEXT:  }
+extern "C" void func(int start, int end, int step) {
+  #pragma omp for collapse(2)
+  for (int i = start; i < end; i+=step) {
+    #pragma omp unroll partial
+    for (int j = start; j < end; j+=step)
+        body(start, end, step, i, j);
+  }
+}
+
+#endif /* HEADER */
+
+
+// IR: ![[LOOP2]] = distinct !{![[LOOP2]], ![[LOOPPROP3:[0-9]+]], ![[LOOPPROP4:[0-9]+]]}
+// IR: ![[LOOPPROP3]] = !{!"llvm.loop.mustprogress"}
+// IR: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.count", i32 2}

diff  --git a/clang/test/OpenMP/unroll_codegen_for_partial.cpp b/clang/test/OpenMP/unroll_codegen_for_partial.cpp
new file mode 100644
index 0000000000000..9a6e480e18f48
--- /dev/null
+++ b/clang/test/OpenMP/unroll_codegen_for_partial.cpp
@@ -0,0 +1,187 @@
+// Check code generation
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
+
+// Check same results after serialization round-trip
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-pch -o %t %s
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+// placeholder for loop body code.
+extern "C" void body(...) {}
+
+
+// IR-LABEL: @func(
+// IR-NEXT:  [[ENTRY:.*]]:
+// IR-NEXT:    %[[START_ADDR:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[END_ADDR:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[STEP_ADDR:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[I:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTOMP_IV:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[TMP:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_1:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_2:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_3:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_6:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_8:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTUNROLLED_IV_I:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTOMP_LB:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTOMP_UB:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTOMP_STRIDE:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTOMP_IS_LAST:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTUNROLLED_IV_I12:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTUNROLL_INNER_IV_I:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[TMP0:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2)
+// IR-NEXT:    store i32 %[[START:.+]], i32* %[[START_ADDR]], align 4
+// IR-NEXT:    store i32 %[[END:.+]], i32* %[[END_ADDR]], align 4
+// IR-NEXT:    store i32 %[[STEP:.+]], i32* %[[STEP_ADDR]], align 4
+// IR-NEXT:    %[[TMP1:.+]] = load i32, i32* %[[START_ADDR]], align 4
+// IR-NEXT:    store i32 %[[TMP1]], i32* %[[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT:    %[[TMP2:.+]] = load i32, i32* %[[END_ADDR]], align 4
+// IR-NEXT:    store i32 %[[TMP2]], i32* %[[DOTCAPTURE_EXPR_1]], align 4
+// IR-NEXT:    %[[TMP3:.+]] = load i32, i32* %[[STEP_ADDR]], align 4
+// IR-NEXT:    store i32 %[[TMP3]], i32* %[[DOTCAPTURE_EXPR_2]], align 4
+// IR-NEXT:    %[[TMP4:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_1]], align 4
+// IR-NEXT:    %[[TMP5:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT:    %[[SUB:.+]] = sub i32 %[[TMP4]], %[[TMP5]]
+// IR-NEXT:    %[[SUB4:.+]] = sub i32 %[[SUB]], 1
+// IR-NEXT:    %[[TMP6:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_2]], align 4
+// IR-NEXT:    %[[ADD:.+]] = add i32 %[[SUB4]], %[[TMP6]]
+// IR-NEXT:    %[[TMP7:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_2]], align 4
+// IR-NEXT:    %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP7]]
+// IR-NEXT:    %[[SUB5:.+]] = sub i32 %[[DIV]], 1
+// IR-NEXT:    store i32 %[[SUB5]], i32* %[[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT:    %[[TMP8:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT:    %[[ADD7:.+]] = add i32 %[[TMP8]], 1
+// IR-NEXT:    store i32 %[[ADD7]], i32* %[[DOTCAPTURE_EXPR_6]], align 4
+// IR-NEXT:    %[[TMP9:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_6]], align 4
+// IR-NEXT:    %[[SUB9:.+]] = sub i32 %[[TMP9]], -1
+// IR-NEXT:    %[[DIV10:.+]] = udiv i32 %[[SUB9]], 2
+// IR-NEXT:    %[[SUB11:.+]] = sub i32 %[[DIV10]], 1
+// IR-NEXT:    store i32 %[[SUB11]], i32* %[[DOTCAPTURE_EXPR_8]], align 4
+// IR-NEXT:    store i32 0, i32* %[[DOTUNROLLED_IV_I]], align 4
+// IR-NEXT:    %[[TMP10:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_6]], align 4
+// IR-NEXT:    %[[CMP:.+]] = icmp ult i32 0, %[[TMP10]]
+// IR-NEXT:    br i1 %[[CMP]], label %[[OMP_PRECOND_THEN:.+]], label %[[OMP_PRECOND_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_PRECOND_THEN]]:
+// IR-NEXT:    store i32 0, i32* %[[DOTOMP_LB]], align 4
+// IR-NEXT:    %[[TMP11:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_8]], align 4
+// IR-NEXT:    store i32 %[[TMP11]], i32* %[[DOTOMP_UB]], align 4
+// IR-NEXT:    store i32 1, i32* %[[DOTOMP_STRIDE]], align 4
+// IR-NEXT:    store i32 0, i32* %[[DOTOMP_IS_LAST]], align 4
+// IR-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @1, i32 %[[TMP0]], i32 34, i32* %[[DOTOMP_IS_LAST]], i32* %[[DOTOMP_LB]], i32* %[[DOTOMP_UB]], i32* %[[DOTOMP_STRIDE]], i32 1, i32 1)
+// IR-NEXT:    %[[TMP12:.+]] = load i32, i32* %[[DOTOMP_UB]], align 4
+// IR-NEXT:    %[[TMP13:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_8]], align 4
+// IR-NEXT:    %[[CMP13:.+]] = icmp ugt i32 %[[TMP12]], %[[TMP13]]
+// IR-NEXT:    br i1 %[[CMP13]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[COND_TRUE]]:
+// IR-NEXT:    %[[TMP14:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_8]], align 4
+// IR-NEXT:    br label %[[COND_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[COND_FALSE]]:
+// IR-NEXT:    %[[TMP15:.+]] = load i32, i32* %[[DOTOMP_UB]], align 4
+// IR-NEXT:    br label %[[COND_END]]
+// IR-EMPTY:
+// IR-NEXT:  [[COND_END]]:
+// IR-NEXT:    %[[COND:.+]] = phi i32 [ %[[TMP14]], %[[COND_TRUE]] ], [ %[[TMP15]], %[[COND_FALSE]] ]
+// IR-NEXT:    store i32 %[[COND]], i32* %[[DOTOMP_UB]], align 4
+// IR-NEXT:    %[[TMP16:.+]] = load i32, i32* %[[DOTOMP_LB]], align 4
+// IR-NEXT:    store i32 %[[TMP16]], i32* %[[DOTOMP_IV]], align 4
+// IR-NEXT:    br label %[[OMP_INNER_FOR_COND:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_INNER_FOR_COND]]:
+// IR-NEXT:    %[[TMP17:.+]] = load i32, i32* %[[DOTOMP_IV]], align 4
+// IR-NEXT:    %[[TMP18:.+]] = load i32, i32* %[[DOTOMP_UB]], align 4
+// IR-NEXT:    %[[ADD14:.+]] = add i32 %[[TMP18]], 1
+// IR-NEXT:    %[[CMP15:.+]] = icmp ult i32 %[[TMP17]], %[[ADD14]]
+// IR-NEXT:    br i1 %[[CMP15]], label %[[OMP_INNER_FOR_BODY:.+]], label %[[OMP_INNER_FOR_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_INNER_FOR_BODY]]:
+// IR-NEXT:    %[[TMP19:.+]] = load i32, i32* %[[DOTOMP_IV]], align 4
+// IR-NEXT:    %[[MUL:.+]] = mul i32 %[[TMP19]], 2
+// IR-NEXT:    %[[ADD16:.+]] = add i32 0, %[[MUL]]
+// IR-NEXT:    store i32 %[[ADD16]], i32* %[[DOTUNROLLED_IV_I12]], align 4
+// IR-NEXT:    %[[TMP20:.+]] = load i32, i32* %[[DOTUNROLLED_IV_I12]], align 4
+// IR-NEXT:    store i32 %[[TMP20]], i32* %[[DOTUNROLL_INNER_IV_I]], align 4
+// IR-NEXT:    br label %[[FOR_COND:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_COND]]:
+// IR-NEXT:    %[[TMP21:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4
+// IR-NEXT:    %[[TMP22:.+]] = load i32, i32* %[[DOTUNROLLED_IV_I12]], align 4
+// IR-NEXT:    %[[ADD17:.+]] = add i32 %[[TMP22]], 2
+// IR-NEXT:    %[[CMP18:.+]] = icmp ule i32 %[[TMP21]], %[[ADD17]]
+// IR-NEXT:    br i1 %[[CMP18]], label %[[LAND_RHS:.+]], label %[[LAND_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[LAND_RHS]]:
+// IR-NEXT:    %[[TMP23:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4
+// IR-NEXT:    %[[TMP24:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT:    %[[ADD19:.+]] = add i32 %[[TMP24]], 1
+// IR-NEXT:    %[[CMP20:.+]] = icmp ule i32 %[[TMP23]], %[[ADD19]]
+// IR-NEXT:    br label %[[LAND_END]]
+// IR-EMPTY:
+// IR-NEXT:  [[LAND_END]]:
+// IR-NEXT:    %[[TMP25:.+]] = phi i1 [ false, %[[FOR_COND]] ], [ %[[CMP20]], %[[LAND_RHS]] ]
+// IR-NEXT:    br i1 %[[TMP25]], label %[[FOR_BODY:.+]], label %[[FOR_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_BODY]]:
+// IR-NEXT:    %[[TMP26:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT:    %[[TMP27:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4
+// IR-NEXT:    %[[TMP28:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_2]], align 4
+// IR-NEXT:    %[[MUL21:.+]] = mul i32 %[[TMP27]], %[[TMP28]]
+// IR-NEXT:    %[[ADD22:.+]] = add i32 %[[TMP26]], %[[MUL21]]
+// IR-NEXT:    store i32 %[[ADD22]], i32* %[[I]], align 4
+// IR-NEXT:    %[[TMP29:.+]] = load i32, i32* %[[START_ADDR]], align 4
+// IR-NEXT:    %[[TMP30:.+]] = load i32, i32* %[[END_ADDR]], align 4
+// IR-NEXT:    %[[TMP31:.+]] = load i32, i32* %[[STEP_ADDR]], align 4
+// IR-NEXT:    %[[TMP32:.+]] = load i32, i32* %[[I]], align 4
+// IR-NEXT:    call void (...) @body(i32 %[[TMP29]], i32 %[[TMP30]], i32 %[[TMP31]], i32 %[[TMP32]])
+// IR-NEXT:    br label %[[FOR_INC:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_INC]]:
+// IR-NEXT:    %[[TMP33:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4
+// IR-NEXT:    %[[INC:.+]] = add i32 %[[TMP33]], 1
+// IR-NEXT:    store i32 %[[INC]], i32* %[[DOTUNROLL_INNER_IV_I]], align 4
+// IR-NEXT:    br label %[[FOR_COND]], !llvm.loop ![[LOOP2:[0-9]+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_END]]:
+// IR-NEXT:    br label %[[OMP_BODY_CONTINUE:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_BODY_CONTINUE]]:
+// IR-NEXT:    br label %[[OMP_INNER_FOR_INC:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_INNER_FOR_INC]]:
+// IR-NEXT:    %[[TMP34:.+]] = load i32, i32* %[[DOTOMP_IV]], align 4
+// IR-NEXT:    %[[ADD23:.+]] = add i32 %[[TMP34]], 1
+// IR-NEXT:    store i32 %[[ADD23]], i32* %[[DOTOMP_IV]], align 4
+// IR-NEXT:    br label %[[OMP_INNER_FOR_COND]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_INNER_FOR_END]]:
+// IR-NEXT:    br label %[[OMP_LOOP_EXIT:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_LOOP_EXIT]]:
+// IR-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @1, i32 %[[TMP0]])
+// IR-NEXT:    br label %[[OMP_PRECOND_END]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_PRECOND_END]]:
+// IR-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @3, i32 %[[TMP0]])
+// IR-NEXT:    ret void
+// IR-NEXT:  }
+extern "C" void func(int start, int end, int step) {
+  int i;
+  #pragma omp for
+  #pragma omp unroll partial
+  for (i = start; i < end; i+=step)
+    body(start, end, step, i);
+}
+
+#endif /* HEADER */
+
+
+// IR: ![[LOOP2]] = distinct !{![[LOOP2]], ![[LOOPPROP3:[0-9]+]], ![[LOOPPROP4:[0-9]+]]}
+// IR: ![[LOOPPROP3]] = !{!"llvm.loop.mustprogress"}
+// IR: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.count", i32 2}

diff  --git a/clang/test/OpenMP/unroll_codegen_full.cpp b/clang/test/OpenMP/unroll_codegen_full.cpp
new file mode 100644
index 0000000000000..cf5cac94a32b6
--- /dev/null
+++ b/clang/test/OpenMP/unroll_codegen_full.cpp
@@ -0,0 +1,52 @@
+// Check code generation
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
+
+// Check same results after serialization round-trip
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-pch -o %t %s
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+// placeholder for loop body code.
+extern "C" void body(...) {}
+
+
+// IR-LABEL: @func(
+// IR-NEXT:  [[ENTRY:.*]]:
+// IR-NEXT:    %[[I:.+]] = alloca i32, align 4
+// IR-NEXT:    store i32 7, i32* %[[I]], align 4
+// IR-NEXT:    br label %[[FOR_COND:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_COND]]:
+// IR-NEXT:    %[[TMP0:.+]] = load i32, i32* %[[I]], align 4
+// IR-NEXT:    %[[CMP:.+]] = icmp slt i32 %[[TMP0]], 17
+// IR-NEXT:    br i1 %[[CMP]], label %[[FOR_BODY:.+]], label %[[FOR_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_BODY]]:
+// IR-NEXT:    %[[TMP1:.+]] = load i32, i32* %[[I]], align 4
+// IR-NEXT:    call void (...) @body(i32 %[[TMP1]])
+// IR-NEXT:    br label %[[FOR_INC:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_INC]]:
+// IR-NEXT:    %[[TMP2:.+]] = load i32, i32* %[[I]], align 4
+// IR-NEXT:    %[[ADD:.+]] = add nsw i32 %[[TMP2]], 3
+// IR-NEXT:    store i32 %[[ADD]], i32* %[[I]], align 4
+// IR-NEXT:    br label %[[FOR_COND]], !llvm.loop ![[LOOP2:[0-9]+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_END]]:
+// IR-NEXT:    ret void
+// IR-NEXT:  }
+extern "C" void func() {
+  #pragma omp unroll full
+  for (int i = 7; i < 17; i += 3)
+    body(i);
+}
+
+#endif /* HEADER */
+
+
+// IR: ![[LOOP2]] = distinct !{![[LOOP2]], ![[LOOPPROP3:[0-9]+]], ![[LOOPPROP4:[0-9]+]]}
+// IR: ![[LOOPPROP3]] = !{!"llvm.loop.mustprogress"}
+// IR: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.full"}

diff  --git a/clang/test/OpenMP/unroll_codegen_heuristic.cpp b/clang/test/OpenMP/unroll_codegen_heuristic.cpp
new file mode 100644
index 0000000000000..d66b53a118b6d
--- /dev/null
+++ b/clang/test/OpenMP/unroll_codegen_heuristic.cpp
@@ -0,0 +1,64 @@
+// Check code generation
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
+
+// Check same results after serialization round-trip
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-pch -o %t %s
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+// placeholder for loop body code.
+extern "C" void body(...) {}
+
+
+// IR-LABEL: @func(
+// IR-NEXT:  [[ENTRY:.*]]:
+// IR-NEXT:    %[[START_ADDR:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[END_ADDR:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[STEP_ADDR:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[I:.+]] = alloca i32, align 4
+// IR-NEXT:    store i32 %[[START:.+]], i32* %[[START_ADDR]], align 4
+// IR-NEXT:    store i32 %[[END:.+]], i32* %[[END_ADDR]], align 4
+// IR-NEXT:    store i32 %[[STEP:.+]], i32* %[[STEP_ADDR]], align 4
+// IR-NEXT:    %[[TMP0:.+]] = load i32, i32* %[[START_ADDR]], align 4
+// IR-NEXT:    store i32 %[[TMP0]], i32* %[[I]], align 4
+// IR-NEXT:    br label %[[FOR_COND:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_COND]]:
+// IR-NEXT:    %[[TMP1:.+]] = load i32, i32* %[[I]], align 4
+// IR-NEXT:    %[[TMP2:.+]] = load i32, i32* %[[END_ADDR]], align 4
+// IR-NEXT:    %[[CMP:.+]] = icmp slt i32 %[[TMP1]], %[[TMP2]]
+// IR-NEXT:    br i1 %[[CMP]], label %[[FOR_BODY:.+]], label %[[FOR_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_BODY]]:
+// IR-NEXT:    %[[TMP3:.+]] = load i32, i32* %[[START_ADDR]], align 4
+// IR-NEXT:    %[[TMP4:.+]] = load i32, i32* %[[END_ADDR]], align 4
+// IR-NEXT:    %[[TMP5:.+]] = load i32, i32* %[[STEP_ADDR]], align 4
+// IR-NEXT:    %[[TMP6:.+]] = load i32, i32* %[[I]], align 4
+// IR-NEXT:    call void (...) @body(i32 %[[TMP3]], i32 %[[TMP4]], i32 %[[TMP5]], i32 %[[TMP6]])
+// IR-NEXT:    br label %[[FOR_INC:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_INC]]:
+// IR-NEXT:    %[[TMP7:.+]] = load i32, i32* %[[STEP_ADDR]], align 4
+// IR-NEXT:    %[[TMP8:.+]] = load i32, i32* %[[I]], align 4
+// IR-NEXT:    %[[ADD:.+]] = add nsw i32 %[[TMP8]], %[[TMP7]]
+// IR-NEXT:    store i32 %[[ADD]], i32* %[[I]], align 4
+// IR-NEXT:    br label %[[FOR_COND]], !llvm.loop ![[LOOP2:[0-9]+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_END]]:
+// IR-NEXT:    ret void
+// IR-NEXT:  }
+extern "C" void func(int start, int end, int step) {
+  #pragma omp unroll
+  for (int i = start; i < end; i+=step)
+    body(start, end, step, i);
+}
+
+#endif /* HEADER */
+
+
+// IR: ![[LOOP2]] = distinct !{![[LOOP2]], ![[LOOPPROP3:[0-9]+]], ![[LOOPPROP4:[0-9]+]]}
+// IR: ![[LOOPPROP3]] = !{!"llvm.loop.mustprogress"}
+// IR: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.enable"}

diff  --git a/clang/test/OpenMP/unroll_codegen_parallel_for_factor.cpp b/clang/test/OpenMP/unroll_codegen_parallel_for_factor.cpp
new file mode 100644
index 0000000000000..cee8fc212c03b
--- /dev/null
+++ b/clang/test/OpenMP/unroll_codegen_parallel_for_factor.cpp
@@ -0,0 +1,210 @@
+// Check code generation
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
+
+// Check same results after serialization round-trip
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-pch -o %t %s
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+// placeholder for loop body code.
+extern "C" void body(...) {}
+
+
+// IR-LABEL: @func(
+// IR-NEXT:  [[ENTRY:.*]]:
+// IR-NEXT:    %[[START_ADDR:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[END_ADDR:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[STEP_ADDR:.+]] = alloca i32, align 4
+// IR-NEXT:    store i32 %[[START:.+]], i32* %[[START_ADDR]], align 4
+// IR-NEXT:    store i32 %[[END:.+]], i32* %[[END_ADDR]], align 4
+// IR-NEXT:    store i32 %[[STEP:.+]], i32* %[[STEP_ADDR]], align 4
+// IR-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @2, i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* %[[END_ADDR]], i32* %[[STEP_ADDR]], i32* %[[START_ADDR]])
+// IR-NEXT:    ret void
+// IR-NEXT:  }
+extern "C" void func(int start, int end, int step) {
+  #pragma omp parallel for
+  #pragma omp unroll partial(7)
+  for (int i = start; i < end; i+=step)
+    body(start, end, step, i);
+}
+
+
+// IR-LABEL: @.omp_outlined.(
+// IR-NEXT:  [[ENTRY:.*]]:
+// IR-NEXT:    %[[DOTGLOBAL_TID__ADDR:.+]] = alloca i32*, align 8
+// IR-NEXT:    %[[DOTBOUND_TID__ADDR:.+]] = alloca i32*, align 8
+// IR-NEXT:    %[[END_ADDR:.+]] = alloca i32*, align 8
+// IR-NEXT:    %[[STEP_ADDR:.+]] = alloca i32*, align 8
+// IR-NEXT:    %[[START_ADDR:.+]] = alloca i32*, align 8
+// IR-NEXT:    %[[DOTOMP_IV:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[TMP:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[I:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_1:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_2:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_3:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_6:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_8:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTUNROLLED_IV_I:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTOMP_LB:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTOMP_UB:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTOMP_STRIDE:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTOMP_IS_LAST:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTUNROLLED_IV_I12:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTUNROLL_INNER_IV_I:.+]] = alloca i32, align 4
+// IR-NEXT:    store i32* %[[DOTGLOBAL_TID_:.+]], i32** %[[DOTGLOBAL_TID__ADDR]], align 8
+// IR-NEXT:    store i32* %[[DOTBOUND_TID_:.+]], i32** %[[DOTBOUND_TID__ADDR]], align 8
+// IR-NEXT:    store i32* %[[END:.+]], i32** %[[END_ADDR]], align 8
+// IR-NEXT:    store i32* %[[STEP:.+]], i32** %[[STEP_ADDR]], align 8
+// IR-NEXT:    store i32* %[[START:.+]], i32** %[[START_ADDR]], align 8
+// IR-NEXT:    %[[TMP0:.+]] = load i32*, i32** %[[END_ADDR]], align 8
+// IR-NEXT:    %[[TMP1:.+]] = load i32*, i32** %[[STEP_ADDR]], align 8
+// IR-NEXT:    %[[TMP2:.+]] = load i32*, i32** %[[START_ADDR]], align 8
+// IR-NEXT:    %[[TMP3:.+]] = load i32, i32* %[[TMP2]], align 4
+// IR-NEXT:    store i32 %[[TMP3]], i32* %[[I]], align 4
+// IR-NEXT:    %[[TMP4:.+]] = load i32, i32* %[[TMP2]], align 4
+// IR-NEXT:    store i32 %[[TMP4]], i32* %[[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT:    %[[TMP5:.+]] = load i32, i32* %[[TMP0]], align 4
+// IR-NEXT:    store i32 %[[TMP5]], i32* %[[DOTCAPTURE_EXPR_1]], align 4
+// IR-NEXT:    %[[TMP6:.+]] = load i32, i32* %[[TMP1]], align 4
+// IR-NEXT:    store i32 %[[TMP6]], i32* %[[DOTCAPTURE_EXPR_2]], align 4
+// IR-NEXT:    %[[TMP7:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_1]], align 4
+// IR-NEXT:    %[[TMP8:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT:    %[[SUB:.+]] = sub i32 %[[TMP7]], %[[TMP8]]
+// IR-NEXT:    %[[SUB4:.+]] = sub i32 %[[SUB]], 1
+// IR-NEXT:    %[[TMP9:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_2]], align 4
+// IR-NEXT:    %[[ADD:.+]] = add i32 %[[SUB4]], %[[TMP9]]
+// IR-NEXT:    %[[TMP10:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_2]], align 4
+// IR-NEXT:    %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP10]]
+// IR-NEXT:    %[[SUB5:.+]] = sub i32 %[[DIV]], 1
+// IR-NEXT:    store i32 %[[SUB5]], i32* %[[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT:    %[[TMP11:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT:    %[[ADD7:.+]] = add i32 %[[TMP11]], 1
+// IR-NEXT:    store i32 %[[ADD7]], i32* %[[DOTCAPTURE_EXPR_6]], align 4
+// IR-NEXT:    %[[TMP12:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_6]], align 4
+// IR-NEXT:    %[[SUB9:.+]] = sub i32 %[[TMP12]], -6
+// IR-NEXT:    %[[DIV10:.+]] = udiv i32 %[[SUB9]], 7
+// IR-NEXT:    %[[SUB11:.+]] = sub i32 %[[DIV10]], 1
+// IR-NEXT:    store i32 %[[SUB11]], i32* %[[DOTCAPTURE_EXPR_8]], align 4
+// IR-NEXT:    store i32 0, i32* %[[DOTUNROLLED_IV_I]], align 4
+// IR-NEXT:    %[[TMP13:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_6]], align 4
+// IR-NEXT:    %[[CMP:.+]] = icmp ult i32 0, %[[TMP13]]
+// IR-NEXT:    br i1 %[[CMP]], label %[[OMP_PRECOND_THEN:.+]], label %[[OMP_PRECOND_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_PRECOND_THEN]]:
+// IR-NEXT:    store i32 0, i32* %[[DOTOMP_LB]], align 4
+// IR-NEXT:    %[[TMP14:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_8]], align 4
+// IR-NEXT:    store i32 %[[TMP14]], i32* %[[DOTOMP_UB]], align 4
+// IR-NEXT:    store i32 1, i32* %[[DOTOMP_STRIDE]], align 4
+// IR-NEXT:    store i32 0, i32* %[[DOTOMP_IS_LAST]], align 4
+// IR-NEXT:    %[[TMP15:.+]] = load i32*, i32** %[[DOTGLOBAL_TID__ADDR]], align 8
+// IR-NEXT:    %[[TMP16:.+]] = load i32, i32* %[[TMP15]], align 4
+// IR-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @1, i32 %[[TMP16]], i32 34, i32* %[[DOTOMP_IS_LAST]], i32* %[[DOTOMP_LB]], i32* %[[DOTOMP_UB]], i32* %[[DOTOMP_STRIDE]], i32 1, i32 1)
+// IR-NEXT:    %[[TMP17:.+]] = load i32, i32* %[[DOTOMP_UB]], align 4
+// IR-NEXT:    %[[TMP18:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_8]], align 4
+// IR-NEXT:    %[[CMP13:.+]] = icmp ugt i32 %[[TMP17]], %[[TMP18]]
+// IR-NEXT:    br i1 %[[CMP13]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[COND_TRUE]]:
+// IR-NEXT:    %[[TMP19:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_8]], align 4
+// IR-NEXT:    br label %[[COND_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[COND_FALSE]]:
+// IR-NEXT:    %[[TMP20:.+]] = load i32, i32* %[[DOTOMP_UB]], align 4
+// IR-NEXT:    br label %[[COND_END]]
+// IR-EMPTY:
+// IR-NEXT:  [[COND_END]]:
+// IR-NEXT:    %[[COND:.+]] = phi i32 [ %[[TMP19]], %[[COND_TRUE]] ], [ %[[TMP20]], %[[COND_FALSE]] ]
+// IR-NEXT:    store i32 %[[COND]], i32* %[[DOTOMP_UB]], align 4
+// IR-NEXT:    %[[TMP21:.+]] = load i32, i32* %[[DOTOMP_LB]], align 4
+// IR-NEXT:    store i32 %[[TMP21]], i32* %[[DOTOMP_IV]], align 4
+// IR-NEXT:    br label %[[OMP_INNER_FOR_COND:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_INNER_FOR_COND]]:
+// IR-NEXT:    %[[TMP22:.+]] = load i32, i32* %[[DOTOMP_IV]], align 4
+// IR-NEXT:    %[[TMP23:.+]] = load i32, i32* %[[DOTOMP_UB]], align 4
+// IR-NEXT:    %[[ADD14:.+]] = add i32 %[[TMP23]], 1
+// IR-NEXT:    %[[CMP15:.+]] = icmp ult i32 %[[TMP22]], %[[ADD14]]
+// IR-NEXT:    br i1 %[[CMP15]], label %[[OMP_INNER_FOR_BODY:.+]], label %[[OMP_INNER_FOR_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_INNER_FOR_BODY]]:
+// IR-NEXT:    %[[TMP24:.+]] = load i32, i32* %[[DOTOMP_IV]], align 4
+// IR-NEXT:    %[[MUL:.+]] = mul i32 %[[TMP24]], 7
+// IR-NEXT:    %[[ADD16:.+]] = add i32 0, %[[MUL]]
+// IR-NEXT:    store i32 %[[ADD16]], i32* %[[DOTUNROLLED_IV_I12]], align 4
+// IR-NEXT:    %[[TMP25:.+]] = load i32, i32* %[[DOTUNROLLED_IV_I12]], align 4
+// IR-NEXT:    store i32 %[[TMP25]], i32* %[[DOTUNROLL_INNER_IV_I]], align 4
+// IR-NEXT:    br label %[[FOR_COND:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_COND]]:
+// IR-NEXT:    %[[TMP26:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4
+// IR-NEXT:    %[[TMP27:.+]] = load i32, i32* %[[DOTUNROLLED_IV_I12]], align 4
+// IR-NEXT:    %[[ADD17:.+]] = add i32 %[[TMP27]], 7
+// IR-NEXT:    %[[CMP18:.+]] = icmp ule i32 %[[TMP26]], %[[ADD17]]
+// IR-NEXT:    br i1 %[[CMP18]], label %[[LAND_RHS:.+]], label %[[LAND_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[LAND_RHS]]:
+// IR-NEXT:    %[[TMP28:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4
+// IR-NEXT:    %[[TMP29:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT:    %[[ADD19:.+]] = add i32 %[[TMP29]], 1
+// IR-NEXT:    %[[CMP20:.+]] = icmp ule i32 %[[TMP28]], %[[ADD19]]
+// IR-NEXT:    br label %[[LAND_END]]
+// IR-EMPTY:
+// IR-NEXT:  [[LAND_END]]:
+// IR-NEXT:    %[[TMP30:.+]] = phi i1 [ false, %[[FOR_COND]] ], [ %[[CMP20]], %[[LAND_RHS]] ]
+// IR-NEXT:    br i1 %[[TMP30]], label %[[FOR_BODY:.+]], label %[[FOR_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_BODY]]:
+// IR-NEXT:    %[[TMP31:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT:    %[[TMP32:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4
+// IR-NEXT:    %[[TMP33:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_2]], align 4
+// IR-NEXT:    %[[MUL21:.+]] = mul i32 %[[TMP32]], %[[TMP33]]
+// IR-NEXT:    %[[ADD22:.+]] = add i32 %[[TMP31]], %[[MUL21]]
+// IR-NEXT:    store i32 %[[ADD22]], i32* %[[I]], align 4
+// IR-NEXT:    %[[TMP34:.+]] = load i32, i32* %[[TMP2]], align 4
+// IR-NEXT:    %[[TMP35:.+]] = load i32, i32* %[[TMP0]], align 4
+// IR-NEXT:    %[[TMP36:.+]] = load i32, i32* %[[TMP1]], align 4
+// IR-NEXT:    %[[TMP37:.+]] = load i32, i32* %[[I]], align 4
+// IR-NEXT:    call void (...) @body(i32 %[[TMP34]], i32 %[[TMP35]], i32 %[[TMP36]], i32 %[[TMP37]])
+// IR-NEXT:    br label %[[FOR_INC:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_INC]]:
+// IR-NEXT:    %[[TMP38:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4
+// IR-NEXT:    %[[INC:.+]] = add i32 %[[TMP38]], 1
+// IR-NEXT:    store i32 %[[INC]], i32* %[[DOTUNROLL_INNER_IV_I]], align 4
+// IR-NEXT:    br label %[[FOR_COND]], !llvm.loop ![[LOOP2:[0-9]+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_END]]:
+// IR-NEXT:    br label %[[OMP_BODY_CONTINUE:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_BODY_CONTINUE]]:
+// IR-NEXT:    br label %[[OMP_INNER_FOR_INC:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_INNER_FOR_INC]]:
+// IR-NEXT:    %[[TMP39:.+]] = load i32, i32* %[[DOTOMP_IV]], align 4
+// IR-NEXT:    %[[ADD23:.+]] = add i32 %[[TMP39]], 1
+// IR-NEXT:    store i32 %[[ADD23]], i32* %[[DOTOMP_IV]], align 4
+// IR-NEXT:    br label %[[OMP_INNER_FOR_COND]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_INNER_FOR_END]]:
+// IR-NEXT:    br label %[[OMP_LOOP_EXIT:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_LOOP_EXIT]]:
+// IR-NEXT:    %[[TMP40:.+]] = load i32*, i32** %[[DOTGLOBAL_TID__ADDR]], align 8
+// IR-NEXT:    %[[TMP41:.+]] = load i32, i32* %[[TMP40]], align 4
+// IR-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @1, i32 %[[TMP41]])
+// IR-NEXT:    br label %[[OMP_PRECOND_END]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_PRECOND_END]]:
+// IR-NEXT:    ret void
+// IR-NEXT:  }
+
+#endif /* HEADER */
+
+
+// IR: ![[LOOP2]] = distinct !{![[LOOP2]], ![[LOOPPROP3:[0-9]+]], ![[LOOPPROP4:[0-9]+]]}
+// IR: ![[LOOPPROP3]] = !{!"llvm.loop.mustprogress"}
+// IR: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.count", i32 7}

diff  --git a/clang/test/OpenMP/unroll_codegen_partial.cpp b/clang/test/OpenMP/unroll_codegen_partial.cpp
new file mode 100644
index 0000000000000..9f18d73312695
--- /dev/null
+++ b/clang/test/OpenMP/unroll_codegen_partial.cpp
@@ -0,0 +1,64 @@
+// Check code generation
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
+
+// Check same results after serialization round-trip
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-pch -o %t %s
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+// placeholder for loop body code.
+extern "C" void body(...) {}
+
+
+// IR-LABEL: @func(
+// IR-NEXT:  [[ENTRY:.*]]:
+// IR-NEXT:    %[[START_ADDR:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[END_ADDR:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[STEP_ADDR:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[I:.+]] = alloca i32, align 4
+// IR-NEXT:    store i32 %[[START:.+]], i32* %[[START_ADDR]], align 4
+// IR-NEXT:    store i32 %[[END:.+]], i32* %[[END_ADDR]], align 4
+// IR-NEXT:    store i32 %[[STEP:.+]], i32* %[[STEP_ADDR]], align 4
+// IR-NEXT:    %[[TMP0:.+]] = load i32, i32* %[[START_ADDR]], align 4
+// IR-NEXT:    store i32 %[[TMP0]], i32* %[[I]], align 4
+// IR-NEXT:    br label %[[FOR_COND:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_COND]]:
+// IR-NEXT:    %[[TMP1:.+]] = load i32, i32* %[[I]], align 4
+// IR-NEXT:    %[[TMP2:.+]] = load i32, i32* %[[END_ADDR]], align 4
+// IR-NEXT:    %[[CMP:.+]] = icmp slt i32 %[[TMP1]], %[[TMP2]]
+// IR-NEXT:    br i1 %[[CMP]], label %[[FOR_BODY:.+]], label %[[FOR_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_BODY]]:
+// IR-NEXT:    %[[TMP3:.+]] = load i32, i32* %[[START_ADDR]], align 4
+// IR-NEXT:    %[[TMP4:.+]] = load i32, i32* %[[END_ADDR]], align 4
+// IR-NEXT:    %[[TMP5:.+]] = load i32, i32* %[[STEP_ADDR]], align 4
+// IR-NEXT:    %[[TMP6:.+]] = load i32, i32* %[[I]], align 4
+// IR-NEXT:    call void (...) @body(i32 %[[TMP3]], i32 %[[TMP4]], i32 %[[TMP5]], i32 %[[TMP6]])
+// IR-NEXT:    br label %[[FOR_INC:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_INC]]:
+// IR-NEXT:    %[[TMP7:.+]] = load i32, i32* %[[STEP_ADDR]], align 4
+// IR-NEXT:    %[[TMP8:.+]] = load i32, i32* %[[I]], align 4
+// IR-NEXT:    %[[ADD:.+]] = add nsw i32 %[[TMP8]], %[[TMP7]]
+// IR-NEXT:    store i32 %[[ADD]], i32* %[[I]], align 4
+// IR-NEXT:    br label %[[FOR_COND]], !llvm.loop ![[LOOP2:[0-9]+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_END]]:
+// IR-NEXT:    ret void
+// IR-NEXT:  }
+extern "C" void func(int start, int end, int step) {
+  #pragma omp unroll partial
+  for (int i = start; i < end; i+=step)
+    body(start, end, step, i);
+}
+
+#endif /* HEADER */
+
+
+// IR: ![[LOOP2]] = distinct !{![[LOOP2]], ![[LOOPPROP3:[0-9]+]], ![[LOOPPROP4:[0-9]+]]}
+// IR: ![[LOOPPROP3]] = !{!"llvm.loop.mustprogress"}
+// IR: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.enable"}

diff  --git a/clang/test/OpenMP/unroll_codegen_tile_for.cpp b/clang/test/OpenMP/unroll_codegen_tile_for.cpp
new file mode 100644
index 0000000000000..4f7bc347c3598
--- /dev/null
+++ b/clang/test/OpenMP/unroll_codegen_tile_for.cpp
@@ -0,0 +1,245 @@
+// Check code generation
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
+
+// Check same results after serialization round-trip
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-pch -o %t %s
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+// placeholder for loop body code.
+extern "C" void body(...) {}
+
+
+// IR-LABEL: @func(
+// IR-NEXT:  [[ENTRY:.*]]:
+// IR-NEXT:    %[[START_ADDR:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[END_ADDR:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[STEP_ADDR:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTOMP_IV:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[TMP:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[I:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_1:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_2:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_3:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTUNROLLED_IV_I:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_6:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_8:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_12:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_14:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTFLOOR_0_IV__UNROLLED_IV_I:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTOMP_LB:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTOMP_UB:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTOMP_STRIDE:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTOMP_IS_LAST:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTFLOOR_0_IV__UNROLLED_IV_I18:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTTILE_0_IV__UNROLLED_IV_I:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTUNROLL_INNER_IV_I:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[TMP0:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2)
+// IR-NEXT:    store i32 %[[START:.+]], i32* %[[START_ADDR]], align 4
+// IR-NEXT:    store i32 %[[END:.+]], i32* %[[END_ADDR]], align 4
+// IR-NEXT:    store i32 %[[STEP:.+]], i32* %[[STEP_ADDR]], align 4
+// IR-NEXT:    %[[TMP1:.+]] = load i32, i32* %[[START_ADDR]], align 4
+// IR-NEXT:    store i32 %[[TMP1]], i32* %[[I]], align 4
+// IR-NEXT:    %[[TMP2:.+]] = load i32, i32* %[[START_ADDR]], align 4
+// IR-NEXT:    store i32 %[[TMP2]], i32* %[[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT:    %[[TMP3:.+]] = load i32, i32* %[[END_ADDR]], align 4
+// IR-NEXT:    store i32 %[[TMP3]], i32* %[[DOTCAPTURE_EXPR_1]], align 4
+// IR-NEXT:    %[[TMP4:.+]] = load i32, i32* %[[STEP_ADDR]], align 4
+// IR-NEXT:    store i32 %[[TMP4]], i32* %[[DOTCAPTURE_EXPR_2]], align 4
+// IR-NEXT:    %[[TMP5:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_1]], align 4
+// IR-NEXT:    %[[TMP6:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT:    %[[SUB:.+]] = sub i32 %[[TMP5]], %[[TMP6]]
+// IR-NEXT:    %[[SUB4:.+]] = sub i32 %[[SUB]], 1
+// IR-NEXT:    %[[TMP7:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_2]], align 4
+// IR-NEXT:    %[[ADD:.+]] = add i32 %[[SUB4]], %[[TMP7]]
+// IR-NEXT:    %[[TMP8:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_2]], align 4
+// IR-NEXT:    %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP8]]
+// IR-NEXT:    %[[SUB5:.+]] = sub i32 %[[DIV]], 1
+// IR-NEXT:    store i32 %[[SUB5]], i32* %[[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT:    store i32 0, i32* %[[DOTUNROLLED_IV_I]], align 4
+// IR-NEXT:    %[[TMP9:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT:    %[[ADD7:.+]] = add i32 %[[TMP9]], 1
+// IR-NEXT:    store i32 %[[ADD7]], i32* %[[DOTCAPTURE_EXPR_6]], align 4
+// IR-NEXT:    %[[TMP10:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_6]], align 4
+// IR-NEXT:    %[[SUB9:.+]] = sub i32 %[[TMP10]], -1
+// IR-NEXT:    %[[DIV10:.+]] = udiv i32 %[[SUB9]], 2
+// IR-NEXT:    %[[SUB11:.+]] = sub i32 %[[DIV10]], 1
+// IR-NEXT:    store i32 %[[SUB11]], i32* %[[DOTCAPTURE_EXPR_8]], align 4
+// IR-NEXT:    %[[TMP11:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_8]], align 4
+// IR-NEXT:    %[[ADD13:.+]] = add i32 %[[TMP11]], 1
+// IR-NEXT:    store i32 %[[ADD13]], i32* %[[DOTCAPTURE_EXPR_12]], align 4
+// IR-NEXT:    %[[TMP12:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_12]], align 4
+// IR-NEXT:    %[[SUB15:.+]] = sub i32 %[[TMP12]], -3
+// IR-NEXT:    %[[DIV16:.+]] = udiv i32 %[[SUB15]], 4
+// IR-NEXT:    %[[SUB17:.+]] = sub i32 %[[DIV16]], 1
+// IR-NEXT:    store i32 %[[SUB17]], i32* %[[DOTCAPTURE_EXPR_14]], align 4
+// IR-NEXT:    store i32 0, i32* %[[DOTFLOOR_0_IV__UNROLLED_IV_I]], align 4
+// IR-NEXT:    %[[TMP13:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_12]], align 4
+// IR-NEXT:    %[[CMP:.+]] = icmp ult i32 0, %[[TMP13]]
+// IR-NEXT:    br i1 %[[CMP]], label %[[OMP_PRECOND_THEN:.+]], label %[[OMP_PRECOND_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_PRECOND_THEN]]:
+// IR-NEXT:    store i32 0, i32* %[[DOTOMP_LB]], align 4
+// IR-NEXT:    %[[TMP14:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_14]], align 4
+// IR-NEXT:    store i32 %[[TMP14]], i32* %[[DOTOMP_UB]], align 4
+// IR-NEXT:    store i32 1, i32* %[[DOTOMP_STRIDE]], align 4
+// IR-NEXT:    store i32 0, i32* %[[DOTOMP_IS_LAST]], align 4
+// IR-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @1, i32 %[[TMP0]], i32 34, i32* %[[DOTOMP_IS_LAST]], i32* %[[DOTOMP_LB]], i32* %[[DOTOMP_UB]], i32* %[[DOTOMP_STRIDE]], i32 1, i32 1)
+// IR-NEXT:    %[[TMP15:.+]] = load i32, i32* %[[DOTOMP_UB]], align 4
+// IR-NEXT:    %[[TMP16:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_14]], align 4
+// IR-NEXT:    %[[CMP19:.+]] = icmp ugt i32 %[[TMP15]], %[[TMP16]]
+// IR-NEXT:    br i1 %[[CMP19]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[COND_TRUE]]:
+// IR-NEXT:    %[[TMP17:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_14]], align 4
+// IR-NEXT:    br label %[[COND_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[COND_FALSE]]:
+// IR-NEXT:    %[[TMP18:.+]] = load i32, i32* %[[DOTOMP_UB]], align 4
+// IR-NEXT:    br label %[[COND_END]]
+// IR-EMPTY:
+// IR-NEXT:  [[COND_END]]:
+// IR-NEXT:    %[[COND:.+]] = phi i32 [ %[[TMP17]], %[[COND_TRUE]] ], [ %[[TMP18]], %[[COND_FALSE]] ]
+// IR-NEXT:    store i32 %[[COND]], i32* %[[DOTOMP_UB]], align 4
+// IR-NEXT:    %[[TMP19:.+]] = load i32, i32* %[[DOTOMP_LB]], align 4
+// IR-NEXT:    store i32 %[[TMP19]], i32* %[[DOTOMP_IV]], align 4
+// IR-NEXT:    br label %[[OMP_INNER_FOR_COND:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_INNER_FOR_COND]]:
+// IR-NEXT:    %[[TMP20:.+]] = load i32, i32* %[[DOTOMP_IV]], align 4
+// IR-NEXT:    %[[TMP21:.+]] = load i32, i32* %[[DOTOMP_UB]], align 4
+// IR-NEXT:    %[[ADD20:.+]] = add i32 %[[TMP21]], 1
+// IR-NEXT:    %[[CMP21:.+]] = icmp ult i32 %[[TMP20]], %[[ADD20]]
+// IR-NEXT:    br i1 %[[CMP21]], label %[[OMP_INNER_FOR_BODY:.+]], label %[[OMP_INNER_FOR_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_INNER_FOR_BODY]]:
+// IR-NEXT:    %[[TMP22:.+]] = load i32, i32* %[[DOTOMP_IV]], align 4
+// IR-NEXT:    %[[MUL:.+]] = mul i32 %[[TMP22]], 4
+// IR-NEXT:    %[[ADD22:.+]] = add i32 0, %[[MUL]]
+// IR-NEXT:    store i32 %[[ADD22]], i32* %[[DOTFLOOR_0_IV__UNROLLED_IV_I18]], align 4
+// IR-NEXT:    %[[TMP23:.+]] = load i32, i32* %[[DOTFLOOR_0_IV__UNROLLED_IV_I18]], align 4
+// IR-NEXT:    store i32 %[[TMP23]], i32* %[[DOTTILE_0_IV__UNROLLED_IV_I]], align 4
+// IR-NEXT:    br label %[[FOR_COND:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_COND]]:
+// IR-NEXT:    %[[TMP24:.+]] = load i32, i32* %[[DOTTILE_0_IV__UNROLLED_IV_I]], align 4
+// IR-NEXT:    %[[TMP25:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_8]], align 4
+// IR-NEXT:    %[[ADD23:.+]] = add i32 %[[TMP25]], 1
+// IR-NEXT:    %[[TMP26:.+]] = load i32, i32* %[[DOTFLOOR_0_IV__UNROLLED_IV_I18]], align 4
+// IR-NEXT:    %[[ADD24:.+]] = add i32 %[[TMP26]], 4
+// IR-NEXT:    %[[CMP25:.+]] = icmp ult i32 %[[ADD23]], %[[ADD24]]
+// IR-NEXT:    br i1 %[[CMP25]], label %[[COND_TRUE26:.+]], label %[[COND_FALSE28:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[COND_TRUE26]]:
+// IR-NEXT:    %[[TMP27:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_8]], align 4
+// IR-NEXT:    %[[ADD27:.+]] = add i32 %[[TMP27]], 1
+// IR-NEXT:    br label %[[COND_END30:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[COND_FALSE28]]:
+// IR-NEXT:    %[[TMP28:.+]] = load i32, i32* %[[DOTFLOOR_0_IV__UNROLLED_IV_I18]], align 4
+// IR-NEXT:    %[[ADD29:.+]] = add i32 %[[TMP28]], 4
+// IR-NEXT:    br label %[[COND_END30]]
+// IR-EMPTY:
+// IR-NEXT:  [[COND_END30]]:
+// IR-NEXT:    %[[COND31:.+]] = phi i32 [ %[[ADD27]], %[[COND_TRUE26]] ], [ %[[ADD29]], %[[COND_FALSE28]] ]
+// IR-NEXT:    %[[CMP32:.+]] = icmp ult i32 %[[TMP24]], %[[COND31]]
+// IR-NEXT:    br i1 %[[CMP32]], label %[[FOR_BODY:.+]], label %[[FOR_END45:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_BODY]]:
+// IR-NEXT:    %[[TMP29:.+]] = load i32, i32* %[[DOTTILE_0_IV__UNROLLED_IV_I]], align 4
+// IR-NEXT:    %[[MUL33:.+]] = mul i32 %[[TMP29]], 2
+// IR-NEXT:    %[[ADD34:.+]] = add i32 0, %[[MUL33]]
+// IR-NEXT:    store i32 %[[ADD34]], i32* %[[DOTUNROLLED_IV_I]], align 4
+// IR-NEXT:    %[[TMP30:.+]] = load i32, i32* %[[DOTUNROLLED_IV_I]], align 4
+// IR-NEXT:    store i32 %[[TMP30]], i32* %[[DOTUNROLL_INNER_IV_I]], align 4
+// IR-NEXT:    br label %[[FOR_COND35:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_COND35]]:
+// IR-NEXT:    %[[TMP31:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4
+// IR-NEXT:    %[[TMP32:.+]] = load i32, i32* %[[DOTUNROLLED_IV_I]], align 4
+// IR-NEXT:    %[[ADD36:.+]] = add i32 %[[TMP32]], 2
+// IR-NEXT:    %[[CMP37:.+]] = icmp ule i32 %[[TMP31]], %[[ADD36]]
+// IR-NEXT:    br i1 %[[CMP37]], label %[[LAND_RHS:.+]], label %[[LAND_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[LAND_RHS]]:
+// IR-NEXT:    %[[TMP33:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4
+// IR-NEXT:    %[[TMP34:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT:    %[[ADD38:.+]] = add i32 %[[TMP34]], 1
+// IR-NEXT:    %[[CMP39:.+]] = icmp ule i32 %[[TMP33]], %[[ADD38]]
+// IR-NEXT:    br label %[[LAND_END]]
+// IR-EMPTY:
+// IR-NEXT:  [[LAND_END]]:
+// IR-NEXT:    %[[TMP35:.+]] = phi i1 [ false, %[[FOR_COND35]] ], [ %[[CMP39]], %[[LAND_RHS]] ]
+// IR-NEXT:    br i1 %[[TMP35]], label %[[FOR_BODY40:.+]], label %[[FOR_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_BODY40]]:
+// IR-NEXT:    %[[TMP36:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT:    %[[TMP37:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4
+// IR-NEXT:    %[[TMP38:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_2]], align 4
+// IR-NEXT:    %[[MUL41:.+]] = mul i32 %[[TMP37]], %[[TMP38]]
+// IR-NEXT:    %[[ADD42:.+]] = add i32 %[[TMP36]], %[[MUL41]]
+// IR-NEXT:    store i32 %[[ADD42]], i32* %[[I]], align 4
+// IR-NEXT:    %[[TMP39:.+]] = load i32, i32* %[[START_ADDR]], align 4
+// IR-NEXT:    %[[TMP40:.+]] = load i32, i32* %[[END_ADDR]], align 4
+// IR-NEXT:    %[[TMP41:.+]] = load i32, i32* %[[STEP_ADDR]], align 4
+// IR-NEXT:    %[[TMP42:.+]] = load i32, i32* %[[I]], align 4
+// IR-NEXT:    call void (...) @body(i32 %[[TMP39]], i32 %[[TMP40]], i32 %[[TMP41]], i32 %[[TMP42]])
+// IR-NEXT:    br label %[[FOR_INC:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_INC]]:
+// IR-NEXT:    %[[TMP43:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4
+// IR-NEXT:    %[[INC:.+]] = add i32 %[[TMP43]], 1
+// IR-NEXT:    store i32 %[[INC]], i32* %[[DOTUNROLL_INNER_IV_I]], align 4
+// IR-NEXT:    br label %[[FOR_COND35]], !llvm.loop ![[LOOP2:[0-9]+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_END]]:
+// IR-NEXT:    br label %[[FOR_INC43:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_INC43]]:
+// IR-NEXT:    %[[TMP44:.+]] = load i32, i32* %[[DOTTILE_0_IV__UNROLLED_IV_I]], align 4
+// IR-NEXT:    %[[INC44:.+]] = add i32 %[[TMP44]], 1
+// IR-NEXT:    store i32 %[[INC44]], i32* %[[DOTTILE_0_IV__UNROLLED_IV_I]], align 4
+// IR-NEXT:    br label %[[FOR_COND]], !llvm.loop ![[LOOP5:[0-9]+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_END45]]:
+// IR-NEXT:    br label %[[OMP_BODY_CONTINUE:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_BODY_CONTINUE]]:
+// IR-NEXT:    br label %[[OMP_INNER_FOR_INC:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_INNER_FOR_INC]]:
+// IR-NEXT:    %[[TMP45:.+]] = load i32, i32* %[[DOTOMP_IV]], align 4
+// IR-NEXT:    %[[ADD46:.+]] = add i32 %[[TMP45]], 1
+// IR-NEXT:    store i32 %[[ADD46]], i32* %[[DOTOMP_IV]], align 4
+// IR-NEXT:    br label %[[OMP_INNER_FOR_COND]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_INNER_FOR_END]]:
+// IR-NEXT:    br label %[[OMP_LOOP_EXIT:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_LOOP_EXIT]]:
+// IR-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @1, i32 %[[TMP0]])
+// IR-NEXT:    br label %[[OMP_PRECOND_END]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_PRECOND_END]]:
+// IR-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @3, i32 %[[TMP0]])
+// IR-NEXT:    ret void
+// IR-NEXT:  }
+extern "C" void func(int start, int end, int step) {
+  #pragma omp for
+  #pragma omp tile sizes(4)
+  #pragma omp unroll partial
+  for (int i = start; i < end; i+=step)
+    body(start, end, step, i);
+}
+
+#endif /* HEADER */
+
+
+// IR: ![[LOOP2]] = distinct !{![[LOOP2]], ![[LOOPPROP3:[0-9]+]], ![[LOOPPROP4:[0-9]+]]}
+// IR: ![[LOOPPROP3]] = !{!"llvm.loop.mustprogress"}
+// IR: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.count", i32 2}
+// IR: ![[LOOP5]] = distinct !{![[LOOP5]], ![[LOOPPROP3]]}

diff  --git a/clang/test/OpenMP/unroll_codegen_unroll_for.cpp b/clang/test/OpenMP/unroll_codegen_unroll_for.cpp
new file mode 100644
index 0000000000000..1d82b6eb77d96
--- /dev/null
+++ b/clang/test/OpenMP/unroll_codegen_unroll_for.cpp
@@ -0,0 +1,239 @@
+// Check code generation
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
+
+// Check same results after serialization round-trip
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -emit-pch -o %t %s
+// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -fopenmp-version=51 -include-pch %t -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+// placeholder for loop body code.
+extern "C" void body(...) {}
+
+
+// IR-LABEL: @func(
+// IR-NEXT:  [[ENTRY:.*]]:
+// IR-NEXT:    %[[START_ADDR:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[END_ADDR:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[STEP_ADDR:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTOMP_IV:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[TMP:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[I:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_1:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_2:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_3:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTUNROLLED_IV_I:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_6:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_8:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_12:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTCAPTURE_EXPR_14:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTUNROLLED_IV__UNROLLED_IV_I:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTOMP_LB:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTOMP_UB:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTOMP_STRIDE:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTOMP_IS_LAST:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTUNROLLED_IV__UNROLLED_IV_I18:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTUNROLL_INNER_IV__UNROLLED_IV_I:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[DOTUNROLL_INNER_IV_I:.+]] = alloca i32, align 4
+// IR-NEXT:    %[[TMP0:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2)
+// IR-NEXT:    store i32 %[[START:.+]], i32* %[[START_ADDR]], align 4
+// IR-NEXT:    store i32 %[[END:.+]], i32* %[[END_ADDR]], align 4
+// IR-NEXT:    store i32 %[[STEP:.+]], i32* %[[STEP_ADDR]], align 4
+// IR-NEXT:    %[[TMP1:.+]] = load i32, i32* %[[START_ADDR]], align 4
+// IR-NEXT:    store i32 %[[TMP1]], i32* %[[I]], align 4
+// IR-NEXT:    %[[TMP2:.+]] = load i32, i32* %[[START_ADDR]], align 4
+// IR-NEXT:    store i32 %[[TMP2]], i32* %[[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT:    %[[TMP3:.+]] = load i32, i32* %[[END_ADDR]], align 4
+// IR-NEXT:    store i32 %[[TMP3]], i32* %[[DOTCAPTURE_EXPR_1]], align 4
+// IR-NEXT:    %[[TMP4:.+]] = load i32, i32* %[[STEP_ADDR]], align 4
+// IR-NEXT:    store i32 %[[TMP4]], i32* %[[DOTCAPTURE_EXPR_2]], align 4
+// IR-NEXT:    %[[TMP5:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_1]], align 4
+// IR-NEXT:    %[[TMP6:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT:    %[[SUB:.+]] = sub i32 %[[TMP5]], %[[TMP6]]
+// IR-NEXT:    %[[SUB4:.+]] = sub i32 %[[SUB]], 1
+// IR-NEXT:    %[[TMP7:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_2]], align 4
+// IR-NEXT:    %[[ADD:.+]] = add i32 %[[SUB4]], %[[TMP7]]
+// IR-NEXT:    %[[TMP8:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_2]], align 4
+// IR-NEXT:    %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP8]]
+// IR-NEXT:    %[[SUB5:.+]] = sub i32 %[[DIV]], 1
+// IR-NEXT:    store i32 %[[SUB5]], i32* %[[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT:    store i32 0, i32* %[[DOTUNROLLED_IV_I]], align 4
+// IR-NEXT:    %[[TMP9:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT:    %[[ADD7:.+]] = add i32 %[[TMP9]], 1
+// IR-NEXT:    store i32 %[[ADD7]], i32* %[[DOTCAPTURE_EXPR_6]], align 4
+// IR-NEXT:    %[[TMP10:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_6]], align 4
+// IR-NEXT:    %[[SUB9:.+]] = sub i32 %[[TMP10]], -1
+// IR-NEXT:    %[[DIV10:.+]] = udiv i32 %[[SUB9]], 2
+// IR-NEXT:    %[[SUB11:.+]] = sub i32 %[[DIV10]], 1
+// IR-NEXT:    store i32 %[[SUB11]], i32* %[[DOTCAPTURE_EXPR_8]], align 4
+// IR-NEXT:    %[[TMP11:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_8]], align 4
+// IR-NEXT:    %[[ADD13:.+]] = add i32 %[[TMP11]], 1
+// IR-NEXT:    store i32 %[[ADD13]], i32* %[[DOTCAPTURE_EXPR_12]], align 4
+// IR-NEXT:    %[[TMP12:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_12]], align 4
+// IR-NEXT:    %[[SUB15:.+]] = sub i32 %[[TMP12]], -1
+// IR-NEXT:    %[[DIV16:.+]] = udiv i32 %[[SUB15]], 2
+// IR-NEXT:    %[[SUB17:.+]] = sub i32 %[[DIV16]], 1
+// IR-NEXT:    store i32 %[[SUB17]], i32* %[[DOTCAPTURE_EXPR_14]], align 4
+// IR-NEXT:    store i32 0, i32* %[[DOTUNROLLED_IV__UNROLLED_IV_I]], align 4
+// IR-NEXT:    %[[TMP13:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_12]], align 4
+// IR-NEXT:    %[[CMP:.+]] = icmp ult i32 0, %[[TMP13]]
+// IR-NEXT:    br i1 %[[CMP]], label %[[OMP_PRECOND_THEN:.+]], label %[[OMP_PRECOND_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_PRECOND_THEN]]:
+// IR-NEXT:    store i32 0, i32* %[[DOTOMP_LB]], align 4
+// IR-NEXT:    %[[TMP14:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_14]], align 4
+// IR-NEXT:    store i32 %[[TMP14]], i32* %[[DOTOMP_UB]], align 4
+// IR-NEXT:    store i32 1, i32* %[[DOTOMP_STRIDE]], align 4
+// IR-NEXT:    store i32 0, i32* %[[DOTOMP_IS_LAST]], align 4
+// IR-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @1, i32 %[[TMP0]], i32 34, i32* %[[DOTOMP_IS_LAST]], i32* %[[DOTOMP_LB]], i32* %[[DOTOMP_UB]], i32* %[[DOTOMP_STRIDE]], i32 1, i32 1)
+// IR-NEXT:    %[[TMP15:.+]] = load i32, i32* %[[DOTOMP_UB]], align 4
+// IR-NEXT:    %[[TMP16:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_14]], align 4
+// IR-NEXT:    %[[CMP19:.+]] = icmp ugt i32 %[[TMP15]], %[[TMP16]]
+// IR-NEXT:    br i1 %[[CMP19]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[COND_TRUE]]:
+// IR-NEXT:    %[[TMP17:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_14]], align 4
+// IR-NEXT:    br label %[[COND_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[COND_FALSE]]:
+// IR-NEXT:    %[[TMP18:.+]] = load i32, i32* %[[DOTOMP_UB]], align 4
+// IR-NEXT:    br label %[[COND_END]]
+// IR-EMPTY:
+// IR-NEXT:  [[COND_END]]:
+// IR-NEXT:    %[[COND:.+]] = phi i32 [ %[[TMP17]], %[[COND_TRUE]] ], [ %[[TMP18]], %[[COND_FALSE]] ]
+// IR-NEXT:    store i32 %[[COND]], i32* %[[DOTOMP_UB]], align 4
+// IR-NEXT:    %[[TMP19:.+]] = load i32, i32* %[[DOTOMP_LB]], align 4
+// IR-NEXT:    store i32 %[[TMP19]], i32* %[[DOTOMP_IV]], align 4
+// IR-NEXT:    br label %[[OMP_INNER_FOR_COND:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_INNER_FOR_COND]]:
+// IR-NEXT:    %[[TMP20:.+]] = load i32, i32* %[[DOTOMP_IV]], align 4
+// IR-NEXT:    %[[TMP21:.+]] = load i32, i32* %[[DOTOMP_UB]], align 4
+// IR-NEXT:    %[[ADD20:.+]] = add i32 %[[TMP21]], 1
+// IR-NEXT:    %[[CMP21:.+]] = icmp ult i32 %[[TMP20]], %[[ADD20]]
+// IR-NEXT:    br i1 %[[CMP21]], label %[[OMP_INNER_FOR_BODY:.+]], label %[[OMP_INNER_FOR_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_INNER_FOR_BODY]]:
+// IR-NEXT:    %[[TMP22:.+]] = load i32, i32* %[[DOTOMP_IV]], align 4
+// IR-NEXT:    %[[MUL:.+]] = mul i32 %[[TMP22]], 2
+// IR-NEXT:    %[[ADD22:.+]] = add i32 0, %[[MUL]]
+// IR-NEXT:    store i32 %[[ADD22]], i32* %[[DOTUNROLLED_IV__UNROLLED_IV_I18]], align 4
+// IR-NEXT:    %[[TMP23:.+]] = load i32, i32* %[[DOTUNROLLED_IV__UNROLLED_IV_I18]], align 4
+// IR-NEXT:    store i32 %[[TMP23]], i32* %[[DOTUNROLL_INNER_IV__UNROLLED_IV_I]], align 4
+// IR-NEXT:    br label %[[FOR_COND:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_COND]]:
+// IR-NEXT:    %[[TMP24:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV__UNROLLED_IV_I]], align 4
+// IR-NEXT:    %[[TMP25:.+]] = load i32, i32* %[[DOTUNROLLED_IV__UNROLLED_IV_I18]], align 4
+// IR-NEXT:    %[[ADD23:.+]] = add i32 %[[TMP25]], 2
+// IR-NEXT:    %[[CMP24:.+]] = icmp ule i32 %[[TMP24]], %[[ADD23]]
+// IR-NEXT:    br i1 %[[CMP24]], label %[[LAND_RHS:.+]], label %[[LAND_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[LAND_RHS]]:
+// IR-NEXT:    %[[TMP26:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV__UNROLLED_IV_I]], align 4
+// IR-NEXT:    %[[TMP27:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_8]], align 4
+// IR-NEXT:    %[[ADD25:.+]] = add i32 %[[TMP27]], 1
+// IR-NEXT:    %[[CMP26:.+]] = icmp ule i32 %[[TMP26]], %[[ADD25]]
+// IR-NEXT:    br label %[[LAND_END]]
+// IR-EMPTY:
+// IR-NEXT:  [[LAND_END]]:
+// IR-NEXT:    %[[TMP28:.+]] = phi i1 [ false, %[[FOR_COND]] ], [ %[[CMP26]], %[[LAND_RHS]] ]
+// IR-NEXT:    br i1 %[[TMP28]], label %[[FOR_BODY:.+]], label %[[FOR_END41:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_BODY]]:
+// IR-NEXT:    %[[TMP29:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV__UNROLLED_IV_I]], align 4
+// IR-NEXT:    %[[MUL27:.+]] = mul i32 %[[TMP29]], 2
+// IR-NEXT:    %[[ADD28:.+]] = add i32 0, %[[MUL27]]
+// IR-NEXT:    store i32 %[[ADD28]], i32* %[[DOTUNROLLED_IV_I]], align 4
+// IR-NEXT:    %[[TMP30:.+]] = load i32, i32* %[[DOTUNROLLED_IV_I]], align 4
+// IR-NEXT:    store i32 %[[TMP30]], i32* %[[DOTUNROLL_INNER_IV_I]], align 4
+// IR-NEXT:    br label %[[FOR_COND29:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_COND29]]:
+// IR-NEXT:    %[[TMP31:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4
+// IR-NEXT:    %[[TMP32:.+]] = load i32, i32* %[[DOTUNROLLED_IV_I]], align 4
+// IR-NEXT:    %[[ADD30:.+]] = add i32 %[[TMP32]], 2
+// IR-NEXT:    %[[CMP31:.+]] = icmp ule i32 %[[TMP31]], %[[ADD30]]
+// IR-NEXT:    br i1 %[[CMP31]], label %[[LAND_RHS32:.+]], label %[[LAND_END35:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[LAND_RHS32]]:
+// IR-NEXT:    %[[TMP33:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4
+// IR-NEXT:    %[[TMP34:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_3]], align 4
+// IR-NEXT:    %[[ADD33:.+]] = add i32 %[[TMP34]], 1
+// IR-NEXT:    %[[CMP34:.+]] = icmp ule i32 %[[TMP33]], %[[ADD33]]
+// IR-NEXT:    br label %[[LAND_END35]]
+// IR-EMPTY:
+// IR-NEXT:  [[LAND_END35]]:
+// IR-NEXT:    %[[TMP35:.+]] = phi i1 [ false, %[[FOR_COND29]] ], [ %[[CMP34]], %[[LAND_RHS32]] ]
+// IR-NEXT:    br i1 %[[TMP35]], label %[[FOR_BODY36:.+]], label %[[FOR_END:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_BODY36]]:
+// IR-NEXT:    %[[TMP36:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT:    %[[TMP37:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4
+// IR-NEXT:    %[[TMP38:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_2]], align 4
+// IR-NEXT:    %[[MUL37:.+]] = mul i32 %[[TMP37]], %[[TMP38]]
+// IR-NEXT:    %[[ADD38:.+]] = add i32 %[[TMP36]], %[[MUL37]]
+// IR-NEXT:    store i32 %[[ADD38]], i32* %[[I]], align 4
+// IR-NEXT:    %[[TMP39:.+]] = load i32, i32* %[[START_ADDR]], align 4
+// IR-NEXT:    %[[TMP40:.+]] = load i32, i32* %[[END_ADDR]], align 4
+// IR-NEXT:    %[[TMP41:.+]] = load i32, i32* %[[STEP_ADDR]], align 4
+// IR-NEXT:    %[[TMP42:.+]] = load i32, i32* %[[I]], align 4
+// IR-NEXT:    call void (...) @body(i32 %[[TMP39]], i32 %[[TMP40]], i32 %[[TMP41]], i32 %[[TMP42]])
+// IR-NEXT:    br label %[[FOR_INC:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_INC]]:
+// IR-NEXT:    %[[TMP43:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_I]], align 4
+// IR-NEXT:    %[[INC:.+]] = add i32 %[[TMP43]], 1
+// IR-NEXT:    store i32 %[[INC]], i32* %[[DOTUNROLL_INNER_IV_I]], align 4
+// IR-NEXT:    br label %[[FOR_COND29]], !llvm.loop ![[LOOP2:[0-9]+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_END]]:
+// IR-NEXT:    br label %[[FOR_INC39:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_INC39]]:
+// IR-NEXT:    %[[TMP44:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV__UNROLLED_IV_I]], align 4
+// IR-NEXT:    %[[INC40:.+]] = add i32 %[[TMP44]], 1
+// IR-NEXT:    store i32 %[[INC40]], i32* %[[DOTUNROLL_INNER_IV__UNROLLED_IV_I]], align 4
+// IR-NEXT:    br label %[[FOR_COND]], !llvm.loop ![[LOOP5:[0-9]+]]
+// IR-EMPTY:
+// IR-NEXT:  [[FOR_END41]]:
+// IR-NEXT:    br label %[[OMP_BODY_CONTINUE:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_BODY_CONTINUE]]:
+// IR-NEXT:    br label %[[OMP_INNER_FOR_INC:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_INNER_FOR_INC]]:
+// IR-NEXT:    %[[TMP45:.+]] = load i32, i32* %[[DOTOMP_IV]], align 4
+// IR-NEXT:    %[[ADD42:.+]] = add i32 %[[TMP45]], 1
+// IR-NEXT:    store i32 %[[ADD42]], i32* %[[DOTOMP_IV]], align 4
+// IR-NEXT:    br label %[[OMP_INNER_FOR_COND]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_INNER_FOR_END]]:
+// IR-NEXT:    br label %[[OMP_LOOP_EXIT:.+]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_LOOP_EXIT]]:
+// IR-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @1, i32 %[[TMP0]])
+// IR-NEXT:    br label %[[OMP_PRECOND_END]]
+// IR-EMPTY:
+// IR-NEXT:  [[OMP_PRECOND_END]]:
+// IR-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @3, i32 %[[TMP0]])
+// IR-NEXT:    ret void
+// IR-NEXT:  }
+extern "C" void func(int start, int end, int step) {
+  #pragma omp for
+  #pragma omp unroll partial
+  #pragma omp unroll partial
+  for (int i = start; i < end; i+=step)
+    body(start, end, step, i);
+}
+
+#endif /* HEADER */
+
+
+// IR: ![[LOOP2]] = distinct !{![[LOOP2]], ![[LOOPPROP3:[0-9]+]], ![[LOOPPROP4:[0-9]+]]}
+// IR: ![[LOOPPROP3]] = !{!"llvm.loop.mustprogress"}
+// IR: ![[LOOPPROP4]] = !{!"llvm.loop.unroll.count", i32 2}
+// IR: ![[LOOP5]] = distinct !{![[LOOP5]], ![[LOOPPROP3]], ![[LOOPPROP4]]}

diff  --git a/clang/test/OpenMP/unroll_messages.cpp b/clang/test/OpenMP/unroll_messages.cpp
new file mode 100644
index 0000000000000..d167ce3194fd9
--- /dev/null
+++ b/clang/test/OpenMP/unroll_messages.cpp
@@ -0,0 +1,130 @@
+// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -std=c++17 -fopenmp -fopenmp-version=51 -fsyntax-only -Wuninitialized -verify %s
+
+void func(int n) {
+  // expected-error at +2 {{statement after '#pragma omp unroll' must be a for loop}}
+  #pragma omp unroll
+  func(n);
+
+  // expected-error at +2 {{statement after '#pragma omp unroll' must be a for loop}}
+  #pragma omp unroll
+    ;
+
+  // expected-error at +2 {{the loop condition expression depends on the current loop control variable}}
+  #pragma omp unroll
+  for (int i = 0; i < 2*(i-4); ++i) {}
+
+  // expected-error at +2 {{condition of OpenMP for loop must be a relational comparison ('<', '<=', '>', '>=', or '!=') of loop variable 'i'}}
+  #pragma omp unroll
+  for (int i = 0; i/3 < 7; ++i) {}
+
+  // expected-warning at +1 {{extra tokens at the end of '#pragma omp unroll' are ignored}}
+  #pragma omp unroll foo
+  for (int i = 0; i < n; ++i) {}
+
+  // expected-error at +1 {{expected expression}} expected-error at +1 {{expected ')'}} expected-note at +1 {{to match this '('}}
+  #pragma omp unroll partial(
+  for (int i = 0; i < n; ++i) {}
+  
+  // expected-error at +1 {{expected ')'}} expected-note at +1 {{to match this '('}}
+  #pragma omp unroll partial(4
+  for (int i = 0; i < n; ++i) {}
+
+  // expected-error at +1 {{expected expression}} expected-error at +1 {{expected ')'}} expected-note at +1 {{to match this '('}}
+  #pragma omp unroll partial(4+
+  for (int i = 0; i < n; ++i) {}
+
+  // expected-error at +1 {{expected expression}} expected-error at +1 {{expected ')'}} expected-note at +1 {{to match this '('}}
+  #pragma omp unroll partial(for)
+  for (int i = 0; i < n; ++i) {}
+
+  // expected-error at +1 {{integral constant expression must have integral or unscoped enumeration type, not 'void (int)'}}
+  #pragma omp unroll partial(func)
+  for (int i = 0; i < n; ++i) {}
+
+  // expected-error at +1 {{expected expression}}
+  #pragma omp unroll partial()
+  for (int i = 0; i < n; ++i) {}
+
+  // expected-error at +1 {{expected ')'}} expected-note at +1 {{to match this '('}}
+  #pragma omp unroll partial(4,4)
+  for (int i = 0; i < n; ++i) {}
+
+  // expected-error at +3 {{expression is not an integral constant expression}} expected-note at +3 {{read of non-const variable 'a' is not allowed in a constant expression}}
+  // expected-note at +1 {{declared here}}
+  int a;
+  #pragma omp unroll partial(a)
+  for (int i = 0; i < n; ++i) {}
+
+  // expected-error at +1 {{argument to 'partial' clause must be a strictly positive integer value}} 
+  #pragma omp unroll partial(0)
+  for (int i = 0; i < n; ++i) {}
+    
+  // expected-error at +1 {{directive '#pragma omp unroll' cannot contain more than one 'partial' clause}} 
+  #pragma omp unroll partial partial
+  for (int i = 0; i < n; ++i) {}
+
+  // expected-error at +1 {{directive '#pragma omp unroll' cannot contain more than one 'partial' clause}} 
+  #pragma omp unroll partial(4) partial
+  for (int i = 0; i < n; ++i) {}
+
+  // expected-error at +1 {{directive '#pragma omp unroll' cannot contain more than one 'full' clause}}
+  #pragma omp unroll full full
+  for (int i = 0; i < 128; ++i) {}
+
+  // expected-error at +1 {{'full' and 'partial' clause are mutually exclusive and may not appear on the same directive}} expected-note at +1 {{'partial' clause is specified here}}
+  #pragma omp unroll partial full
+  for (int i = 0; i < n; ++i) {}
+
+  // expected-error at +1 {{'partial' and 'full' clause are mutually exclusive and may not appear on the same directive}} expected-note at +1 {{'full' clause is specified here}}
+  #pragma omp unroll full partial
+  for (int i = 0; i < n; ++i) {}
+
+  // expected-error at +2 {{loop to be fully unrolled must have a constant trip count}} expected-note at +1 {{'#pragma omp unroll full' directive found here}}
+  #pragma omp unroll full
+  for (int i = 0; i < n; ++i) {}
+
+  // expected-error at +2 {{statement after '#pragma omp for' must be a for loop}}
+  #pragma omp for
+  #pragma omp unroll
+  for (int i = 0; i < n; ++i) {}
+
+    // expected-error at +2 {{statement after '#pragma omp for' must be a for loop}}
+  #pragma omp for
+  #pragma omp unroll full
+  for (int i = 0; i < 128; ++i) {}
+
+  // expected-error at +2 {{statement after '#pragma omp unroll' must be a for loop}}
+  #pragma omp unroll
+  #pragma omp unroll
+  for (int i = 0; i < n; ++i) {}
+  
+  // expected-error at +2 {{statement after '#pragma omp tile' must be a for loop}}
+  #pragma omp tile sizes(4)
+  #pragma omp unroll
+  for (int i = 0; i < n; ++i) {}
+  
+  // expected-error at +4 {{expected 2 for loops after '#pragma omp for', but found only 1}} 
+  // expected-note at +1 {{as specified in 'collapse' clause}}
+  #pragma omp for collapse(2)
+  for (int i = 0; i < n; ++i) {
+    #pragma omp unroll full
+    for (int j = 0; j < 128; ++j) {}
+  }
+}
+
+
+template<typename T, int Factor>
+void templated_func(int n) {
+  // expected-error at +1 {{argument to 'partial' clause must be a strictly positive integer value}} 
+  #pragma omp unroll partial(Factor)
+  for (T i = 0; i < n; ++i) {}
+
+  // expected-error at +2 {{loop to be fully unrolled must have a constant trip count}} expected-note at +1 {{'#pragma omp unroll full' directive found here}}
+  #pragma omp unroll full
+  for (int i = 0; i < n; i-=Factor) {}
+}
+
+void template_inst(int n) {
+  // expected-note at +1 {{in instantiation of function template specialization 'templated_func<int, -1>' requested here}}
+  templated_func<int, -1>(n);
+}

diff  --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp
index 89f7063b73643..e3d34e1cf59bb 100644
--- a/clang/tools/libclang/CIndex.cpp
+++ b/clang/tools/libclang/CIndex.cpp
@@ -2045,6 +2045,7 @@ class EnqueueVisitor : public ConstStmtVisitor<EnqueueVisitor, void> {
   void VisitOMPParallelDirective(const OMPParallelDirective *D);
   void VisitOMPSimdDirective(const OMPSimdDirective *D);
   void VisitOMPTileDirective(const OMPTileDirective *D);
+  void VisitOMPUnrollDirective(const OMPUnrollDirective *D);
   void VisitOMPForDirective(const OMPForDirective *D);
   void VisitOMPForSimdDirective(const OMPForSimdDirective *D);
   void VisitOMPSectionsDirective(const OMPSectionsDirective *D);
@@ -2223,6 +2224,12 @@ void OMPClauseEnqueue::VisitOMPSizesClause(const OMPSizesClause *C) {
     Visitor->AddStmt(E);
 }
 
+void OMPClauseEnqueue::VisitOMPFullClause(const OMPFullClause *C) {}
+
+void OMPClauseEnqueue::VisitOMPPartialClause(const OMPPartialClause *C) {
+  Visitor->AddStmt(C->getFactor());
+}
+
 void OMPClauseEnqueue::VisitOMPAllocatorClause(const OMPAllocatorClause *C) {
   Visitor->AddStmt(C->getAllocator());
 }
@@ -2896,6 +2903,10 @@ void EnqueueVisitor::VisitOMPTileDirective(const OMPTileDirective *D) {
   VisitOMPLoopBasedDirective(D);
 }
 
+void EnqueueVisitor::VisitOMPUnrollDirective(const OMPUnrollDirective *D) {
+  VisitOMPLoopBasedDirective(D);
+}
+
 void EnqueueVisitor::VisitOMPForDirective(const OMPForDirective *D) {
   VisitOMPLoopDirective(D);
 }
@@ -5575,6 +5586,8 @@ CXString clang_getCursorKindSpelling(enum CXCursorKind Kind) {
     return cxstring::createRef("OMPSimdDirective");
   case CXCursor_OMPTileDirective:
     return cxstring::createRef("OMPTileDirective");
+  case CXCursor_OMPUnrollDirective:
+    return cxstring::createRef("OMPUnrollDirective");
   case CXCursor_OMPForDirective:
     return cxstring::createRef("OMPForDirective");
   case CXCursor_OMPForSimdDirective:

diff  --git a/clang/tools/libclang/CXCursor.cpp b/clang/tools/libclang/CXCursor.cpp
index 7c6b7bcd05937..6fb47300efb82 100644
--- a/clang/tools/libclang/CXCursor.cpp
+++ b/clang/tools/libclang/CXCursor.cpp
@@ -652,6 +652,9 @@ CXCursor cxcursor::MakeCXCursor(const Stmt *S, const Decl *Parent,
   case Stmt::OMPTileDirectiveClass:
     K = CXCursor_OMPTileDirective;
     break;
+  case Stmt::OMPUnrollDirectiveClass:
+    K = CXCursor_OMPUnrollDirective;
+    break;
   case Stmt::OMPForDirectiveClass:
     K = CXCursor_OMPForDirective;
     break;

diff  --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td
index 4db8bcbab27bc..3dc6194c78308 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMP.td
+++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td
@@ -68,6 +68,8 @@ def OMPC_Private : Clause<"private"> {
   let flangClass = "OmpObjectList";
 }
 def OMPC_Sizes: Clause<"sizes"> { let clangClass = "OMPSizesClause"; }
+def OMPC_Full: Clause<"full"> { let clangClass = "OMPFullClause"; }
+def OMPC_Partial: Clause<"partial"> { let clangClass = "OMPPartialClause"; }
 def OMPC_FirstPrivate : Clause<"firstprivate"> {
   let clangClass = "OMPFirstprivateClause";
   let flangClass = "OmpObjectList";
@@ -400,6 +402,12 @@ def OMP_Tile : Directive<"tile"> {
     VersionedClause<OMPC_Sizes, 51>,
   ];
 }
+def OMP_Unroll : Directive<"unroll"> {
+  let allowedOnceClauses = [
+    VersionedClause<OMPC_Full, 51>,
+    VersionedClause<OMPC_Partial, 51>,
+  ];
+}
 def OMP_For : Directive<"for"> {
   let allowedClauses = [
     VersionedClause<OMPC_Private>,


        


More information about the llvm-commits mailing list