[clang] [Clang][OpenMP][Tile] Ensure AST node uniqueness. (PR #91325)

Michael Kruse via cfe-commits cfe-commits at lists.llvm.org
Tue May 7 06:19:04 PDT 2024


https://github.com/Meinersbur created https://github.com/llvm/llvm-project/pull/91325

One of the constraints of an AST is that every node object must appear at most once, hence we define lamdas that create a new AST node at every use.

>From 1c1910b6885cd5be18cb15e364569f2a2f662955 Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-project at meinersbur.de>
Date: Tue, 7 May 2024 14:47:45 +0200
Subject: [PATCH] Make unique instances

---
 clang/lib/Sema/SemaOpenMP.cpp                 |  77 ++--
 clang/test/OpenMP/tile_codegen.cpp            |  58 +--
 .../OpenMP/tile_codegen_for_dependent.cpp     | 326 +++++++------
 clang/test/OpenMP/tile_codegen_tile_for.cpp   | 435 +++++++++---------
 4 files changed, 447 insertions(+), 449 deletions(-)

diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index cf5447f223d45..ba86bd4e62786 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -15109,6 +15109,8 @@ StmtResult SemaOpenMP::ActOnOpenMPTileDirective(ArrayRef<OMPClause *> Clauses,
                                                 SourceLocation StartLoc,
                                                 SourceLocation EndLoc) {
   ASTContext &Context = getASTContext();
+  Scope *CurScope = SemaRef.getCurScope();
+
   auto SizesClauses =
       OMPExecutableDirective::getClausesOfKind<OMPSizesClause>(Clauses);
   if (SizesClauses.empty()) {
@@ -15137,6 +15139,7 @@ StmtResult SemaOpenMP::ActOnOpenMPTileDirective(ArrayRef<OMPClause *> Clauses,
                                     NumLoops, AStmt, nullptr, nullptr);
 
   SmallVector<Decl *, 4> PreInits;
+  CaptureVars CopyTransformer(SemaRef);
 
   // Create iteration variables for the generated loops.
   SmallVector<VarDecl *, 4> FloorIndVars;
@@ -15194,25 +15197,37 @@ StmtResult SemaOpenMP::ActOnOpenMPTileDirective(ArrayRef<OMPClause *> Clauses,
   // Once the original iteration values are set, append the innermost body.
   Stmt *Inner = Body;
 
+  auto MakeDimTileSize = [&SemaRef = this->SemaRef, &CopyTransformer, &Context,
+                          SizesClause, CurScope](int I) -> Expr * {
+    Expr *DimTileSizeExpr = SizesClause->getSizesRefs()[I];
+    return AssertSuccess(CopyTransformer.TransformExpr(DimTileSizeExpr));
+  };
+
   // Create tile loops from the inside to the outside.
   for (int I = NumLoops - 1; I >= 0; --I) {
     OMPLoopBasedDirective::HelperExprs &LoopHelper = LoopHelpers[I];
     Expr *NumIterations = LoopHelper.NumIterations;
     auto *OrigCntVar = cast<DeclRefExpr>(LoopHelper.Counters[0]);
-    QualType CntTy = OrigCntVar->getType();
-    Expr *DimTileSize = SizesClause->getSizesRefs()[I];
-    Scope *CurScope = SemaRef.getCurScope();
-
-    // Commonly used variables.
-    DeclRefExpr *TileIV = buildDeclRefExpr(SemaRef, TileIndVars[I], CntTy,
-                                           OrigCntVar->getExprLoc());
-    DeclRefExpr *FloorIV = buildDeclRefExpr(SemaRef, FloorIndVars[I], CntTy,
-                                            OrigCntVar->getExprLoc());
+    QualType IVTy = NumIterations->getType();
+
+    // Commonly used variables. One of the constraints of an AST is that every
+    // node object must appear at most once, hence we define lamdas that create
+    // a new AST node at every use.
+    auto MakeTileIVRef = [&SemaRef = this->SemaRef, &TileIndVars, I, IVTy,
+                          OrigCntVar]() {
+      return buildDeclRefExpr(SemaRef, TileIndVars[I], IVTy,
+                              OrigCntVar->getExprLoc());
+    };
+    auto MakeFloorIVRef = [&SemaRef = this->SemaRef, &FloorIndVars, I, IVTy,
+                           OrigCntVar]() {
+      return buildDeclRefExpr(SemaRef, FloorIndVars[I], IVTy,
+                              OrigCntVar->getExprLoc());
+    };
 
     // For init-statement: auto .tile.iv = .floor.iv
-    SemaRef.AddInitializerToDecl(TileIndVars[I],
-                                 SemaRef.DefaultLvalueConversion(FloorIV).get(),
-                                 /*DirectInit=*/false);
+    SemaRef.AddInitializerToDecl(
+        TileIndVars[I], SemaRef.DefaultLvalueConversion(MakeFloorIVRef()).get(),
+        /*DirectInit=*/false);
     Decl *CounterDecl = TileIndVars[I];
     StmtResult InitStmt = new (Context)
         DeclStmt(DeclGroupRef::Create(Context, &CounterDecl, 1),
@@ -15220,10 +15235,11 @@ StmtResult SemaOpenMP::ActOnOpenMPTileDirective(ArrayRef<OMPClause *> Clauses,
     if (!InitStmt.isUsable())
       return StmtError();
 
-    // For cond-expression: .tile.iv < min(.floor.iv + DimTileSize,
-    // NumIterations)
-    ExprResult EndOfTile = SemaRef.BuildBinOp(
-        CurScope, LoopHelper.Cond->getExprLoc(), BO_Add, FloorIV, DimTileSize);
+    // For cond-expression:
+    //   .tile.iv < min(.floor.iv + DimTileSize, NumIterations)
+    ExprResult EndOfTile =
+        SemaRef.BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), BO_Add,
+                           MakeFloorIVRef(), MakeDimTileSize(I));
     if (!EndOfTile.isUsable())
       return StmtError();
     ExprResult IsPartialTile =
@@ -15238,25 +15254,28 @@ StmtResult SemaOpenMP::ActOnOpenMPTileDirective(ArrayRef<OMPClause *> Clauses,
       return StmtError();
     ExprResult CondExpr =
         SemaRef.BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), BO_LT,
-                           TileIV, MinTileAndIterSpace.get());
+                           MakeTileIVRef(), MinTileAndIterSpace.get());
     if (!CondExpr.isUsable())
       return StmtError();
 
     // For incr-statement: ++.tile.iv
     ExprResult IncrStmt = SemaRef.BuildUnaryOp(
-        CurScope, LoopHelper.Inc->getExprLoc(), UO_PreInc, TileIV);
+        CurScope, LoopHelper.Inc->getExprLoc(), UO_PreInc, MakeTileIVRef());
     if (!IncrStmt.isUsable())
       return StmtError();
 
     // Statements to set the original iteration variable's value from the
     // logical iteration number.
     // Generated for loop is:
+    // \code
     // Original_for_init;
-    // for (auto .tile.iv = .floor.iv; .tile.iv < min(.floor.iv + DimTileSize,
-    // NumIterations); ++.tile.iv) {
+    // for (auto .tile.iv = .floor.iv;
+    //      .tile.iv < min(.floor.iv + DimTileSize, NumIterations);
+    //      ++.tile.iv) {
     //   Original_Body;
     //   Original_counter_update;
     // }
+    // \endcode
     // FIXME: If the innermost body is an loop itself, inserting these
     // statements stops it being recognized  as a perfectly nested loop (e.g.
     // for applying tiling again). If this is the case, sink the expressions
@@ -15277,13 +15296,14 @@ StmtResult SemaOpenMP::ActOnOpenMPTileDirective(ArrayRef<OMPClause *> Clauses,
     auto &LoopHelper = LoopHelpers[I];
     Expr *NumIterations = LoopHelper.NumIterations;
     DeclRefExpr *OrigCntVar = cast<DeclRefExpr>(LoopHelper.Counters[0]);
-    QualType CntTy = OrigCntVar->getType();
-    Expr *DimTileSize = SizesClause->getSizesRefs()[I];
-    Scope *CurScope = SemaRef.getCurScope();
+    QualType IVTy = NumIterations->getType();
 
     // Commonly used variables.
-    DeclRefExpr *FloorIV = buildDeclRefExpr(SemaRef, FloorIndVars[I], CntTy,
-                                            OrigCntVar->getExprLoc());
+    auto MakeFloorIVRef = [&SemaRef = this->SemaRef, &FloorIndVars, I, IVTy,
+                           OrigCntVar]() {
+      return buildDeclRefExpr(SemaRef, FloorIndVars[I], IVTy,
+                              OrigCntVar->getExprLoc());
+    };
 
     // For init-statement: auto .floor.iv = 0
     SemaRef.AddInitializerToDecl(
@@ -15298,15 +15318,16 @@ StmtResult SemaOpenMP::ActOnOpenMPTileDirective(ArrayRef<OMPClause *> Clauses,
       return StmtError();
 
     // For cond-expression: .floor.iv < NumIterations
-    ExprResult CondExpr = SemaRef.BuildBinOp(
-        CurScope, LoopHelper.Cond->getExprLoc(), BO_LT, FloorIV, NumIterations);
+    ExprResult CondExpr =
+        SemaRef.BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), BO_LT,
+                           MakeFloorIVRef(), NumIterations);
     if (!CondExpr.isUsable())
       return StmtError();
 
     // For incr-statement: .floor.iv += DimTileSize
     ExprResult IncrStmt =
         SemaRef.BuildBinOp(CurScope, LoopHelper.Inc->getExprLoc(), BO_AddAssign,
-                           FloorIV, DimTileSize);
+                           MakeFloorIVRef(), MakeDimTileSize(I));
     if (!IncrStmt.isUsable())
       return StmtError();
 
diff --git a/clang/test/OpenMP/tile_codegen.cpp b/clang/test/OpenMP/tile_codegen.cpp
index 76cf2d8f1992d..45f4cf0a44a6b 100644
--- a/clang/test/OpenMP/tile_codegen.cpp
+++ b/clang/test/OpenMP/tile_codegen.cpp
@@ -98,7 +98,7 @@ extern "C" void tfoo7() {
 //
 //
 // CHECK1-LABEL: define {{[^@]+}}@_ZN1SC1Ev
-// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2:[0-9]+]] comdat align 2 {
+// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR0]] comdat align 2 {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
@@ -108,7 +108,7 @@ extern "C" void tfoo7() {
 //
 //
 // CHECK1-LABEL: define {{[^@]+}}@_ZN1SC2Ev
-// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 {
+// CHECK1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR0]] comdat align 2 {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[I:%.*]] = alloca ptr, align 8
@@ -219,7 +219,7 @@ extern "C" void tfoo7() {
 // CHECK1-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
 // CHECK1-NEXT:    [[ADD7:%.*]] = add i32 [[TMP11]], 1
 // CHECK1-NEXT:    [[TMP12:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4
-// CHECK1-NEXT:    [[ADD8:%.*]] = add nsw i32 [[TMP12]], 5
+// CHECK1-NEXT:    [[ADD8:%.*]] = add i32 [[TMP12]], 5
 // CHECK1-NEXT:    [[CMP9:%.*]] = icmp ult i32 [[ADD7]], [[ADD8]]
 // CHECK1-NEXT:    br i1 [[CMP9]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
 // CHECK1:       cond.true:
@@ -228,7 +228,7 @@ extern "C" void tfoo7() {
 // CHECK1-NEXT:    br label [[COND_END:%.*]]
 // CHECK1:       cond.false:
 // CHECK1-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4
-// CHECK1-NEXT:    [[ADD11:%.*]] = add nsw i32 [[TMP14]], 5
+// CHECK1-NEXT:    [[ADD11:%.*]] = add i32 [[TMP14]], 5
 // CHECK1-NEXT:    br label [[COND_END]]
 // CHECK1:       cond.end:
 // CHECK1-NEXT:    [[COND:%.*]] = phi i32 [ [[ADD10]], [[COND_TRUE]] ], [ [[ADD11]], [[COND_FALSE]] ]
@@ -246,14 +246,14 @@ extern "C" void tfoo7() {
 // CHECK1-NEXT:    br label [[FOR_INC:%.*]]
 // CHECK1:       for.inc:
 // CHECK1-NEXT:    [[TMP19:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4
-// CHECK1-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP19]], 1
+// CHECK1-NEXT:    [[INC:%.*]] = add i32 [[TMP19]], 1
 // CHECK1-NEXT:    store i32 [[INC]], ptr [[DOTTILE_0_IV_I]], align 4
 // CHECK1-NEXT:    br label [[FOR_COND6]], !llvm.loop [[LOOP6:![0-9]+]]
 // CHECK1:       for.end:
 // CHECK1-NEXT:    br label [[FOR_INC15:%.*]]
 // CHECK1:       for.inc15:
 // CHECK1-NEXT:    [[TMP20:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4
-// CHECK1-NEXT:    [[ADD16:%.*]] = add nsw i32 [[TMP20]], 5
+// CHECK1-NEXT:    [[ADD16:%.*]] = add i32 [[TMP20]], 5
 // CHECK1-NEXT:    store i32 [[ADD16]], ptr [[DOTFLOOR_0_IV_I]], align 4
 // CHECK1-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
 // CHECK1:       for.end17:
@@ -885,7 +885,7 @@ extern "C" void tfoo7() {
 //
 //
 // CHECK1-LABEL: define {{[^@]+}}@foo6.omp_outlined
-// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5:[0-9]+]] {
+// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4:[0-9]+]] {
 // CHECK1-NEXT:  entry:
 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
 // CHECK1-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
@@ -1031,7 +1031,7 @@ extern "C" void tfoo7() {
 // CHECK1-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
 // CHECK1-NEXT:    [[ADD7:%.*]] = add i32 [[TMP9]], 1
 // CHECK1-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4
-// CHECK1-NEXT:    [[ADD8:%.*]] = add nsw i32 [[TMP10]], 5
+// CHECK1-NEXT:    [[ADD8:%.*]] = add i32 [[TMP10]], 5
 // CHECK1-NEXT:    [[CMP9:%.*]] = icmp ult i32 [[ADD7]], [[ADD8]]
 // CHECK1-NEXT:    br i1 [[CMP9]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
 // CHECK1:       cond.true:
@@ -1040,7 +1040,7 @@ extern "C" void tfoo7() {
 // CHECK1-NEXT:    br label [[COND_END:%.*]]
 // CHECK1:       cond.false:
 // CHECK1-NEXT:    [[TMP12:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4
-// CHECK1-NEXT:    [[ADD11:%.*]] = add nsw i32 [[TMP12]], 5
+// CHECK1-NEXT:    [[ADD11:%.*]] = add i32 [[TMP12]], 5
 // CHECK1-NEXT:    br label [[COND_END]]
 // CHECK1:       cond.end:
 // CHECK1-NEXT:    [[COND:%.*]] = phi i32 [ [[ADD10]], [[COND_TRUE]] ], [ [[ADD11]], [[COND_FALSE]] ]
@@ -1057,14 +1057,14 @@ extern "C" void tfoo7() {
 // CHECK1-NEXT:    br label [[FOR_INC:%.*]]
 // CHECK1:       for.inc:
 // CHECK1-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4
-// CHECK1-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP16]], 1
+// CHECK1-NEXT:    [[INC:%.*]] = add i32 [[TMP16]], 1
 // CHECK1-NEXT:    store i32 [[INC]], ptr [[DOTTILE_0_IV_I]], align 4
 // CHECK1-NEXT:    br label [[FOR_COND6]], !llvm.loop [[LOOP21:![0-9]+]]
 // CHECK1:       for.end:
 // CHECK1-NEXT:    br label [[FOR_INC15:%.*]]
 // CHECK1:       for.inc15:
 // CHECK1-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4
-// CHECK1-NEXT:    [[ADD16:%.*]] = add nsw i32 [[TMP17]], 5
+// CHECK1-NEXT:    [[ADD16:%.*]] = add i32 [[TMP17]], 5
 // CHECK1-NEXT:    store i32 [[ADD16]], ptr [[DOTFLOOR_0_IV_I]], align 4
 // CHECK1-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
 // CHECK1:       for.end17:
@@ -1159,13 +1159,13 @@ extern "C" void tfoo7() {
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@body
-// CHECK2-SAME: (...) #[[ATTR2:[0-9]+]] {
+// CHECK2-SAME: (...) #[[ATTR1]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    ret void
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@foo1
-// CHECK2-SAME: (i32 noundef [[START:%.*]], i32 noundef [[END:%.*]], i32 noundef [[STEP:%.*]]) #[[ATTR2]] {
+// CHECK2-SAME: (i32 noundef [[START:%.*]], i32 noundef [[END:%.*]], i32 noundef [[STEP:%.*]]) #[[ATTR1]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[START_ADDR:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[END_ADDR:%.*]] = alloca i32, align 4
@@ -1213,7 +1213,7 @@ extern "C" void tfoo7() {
 // CHECK2-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
 // CHECK2-NEXT:    [[ADD7:%.*]] = add i32 [[TMP11]], 1
 // CHECK2-NEXT:    [[TMP12:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4
-// CHECK2-NEXT:    [[ADD8:%.*]] = add nsw i32 [[TMP12]], 5
+// CHECK2-NEXT:    [[ADD8:%.*]] = add i32 [[TMP12]], 5
 // CHECK2-NEXT:    [[CMP9:%.*]] = icmp ult i32 [[ADD7]], [[ADD8]]
 // CHECK2-NEXT:    br i1 [[CMP9]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
 // CHECK2:       cond.true:
@@ -1222,7 +1222,7 @@ extern "C" void tfoo7() {
 // CHECK2-NEXT:    br label [[COND_END:%.*]]
 // CHECK2:       cond.false:
 // CHECK2-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4
-// CHECK2-NEXT:    [[ADD11:%.*]] = add nsw i32 [[TMP14]], 5
+// CHECK2-NEXT:    [[ADD11:%.*]] = add i32 [[TMP14]], 5
 // CHECK2-NEXT:    br label [[COND_END]]
 // CHECK2:       cond.end:
 // CHECK2-NEXT:    [[COND:%.*]] = phi i32 [ [[ADD10]], [[COND_TRUE]] ], [ [[ADD11]], [[COND_FALSE]] ]
@@ -1240,14 +1240,14 @@ extern "C" void tfoo7() {
 // CHECK2-NEXT:    br label [[FOR_INC:%.*]]
 // CHECK2:       for.inc:
 // CHECK2-NEXT:    [[TMP19:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4
-// CHECK2-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP19]], 1
+// CHECK2-NEXT:    [[INC:%.*]] = add i32 [[TMP19]], 1
 // CHECK2-NEXT:    store i32 [[INC]], ptr [[DOTTILE_0_IV_I]], align 4
 // CHECK2-NEXT:    br label [[FOR_COND6]], !llvm.loop [[LOOP6:![0-9]+]]
 // CHECK2:       for.end:
 // CHECK2-NEXT:    br label [[FOR_INC15:%.*]]
 // CHECK2:       for.inc15:
 // CHECK2-NEXT:    [[TMP20:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4
-// CHECK2-NEXT:    [[ADD16:%.*]] = add nsw i32 [[TMP20]], 5
+// CHECK2-NEXT:    [[ADD16:%.*]] = add i32 [[TMP20]], 5
 // CHECK2-NEXT:    store i32 [[ADD16]], ptr [[DOTFLOOR_0_IV_I]], align 4
 // CHECK2-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
 // CHECK2:       for.end17:
@@ -1255,7 +1255,7 @@ extern "C" void tfoo7() {
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@foo2
-// CHECK2-SAME: (i32 noundef [[START:%.*]], i32 noundef [[END:%.*]], i32 noundef [[STEP:%.*]]) #[[ATTR2]] {
+// CHECK2-SAME: (i32 noundef [[START:%.*]], i32 noundef [[END:%.*]], i32 noundef [[STEP:%.*]]) #[[ATTR1]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[START_ADDR:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[END_ADDR:%.*]] = alloca i32, align 4
@@ -1368,7 +1368,7 @@ extern "C" void tfoo7() {
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@foo3
-// CHECK2-SAME: () #[[ATTR2]] {
+// CHECK2-SAME: () #[[ATTR1]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[TMP:%.*]] = alloca i32, align 4
@@ -1510,7 +1510,7 @@ extern "C" void tfoo7() {
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@foo4
-// CHECK2-SAME: () #[[ATTR2]] {
+// CHECK2-SAME: () #[[ATTR1]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[TMP:%.*]] = alloca i32, align 4
@@ -1663,7 +1663,7 @@ extern "C" void tfoo7() {
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@foo5
-// CHECK2-SAME: () #[[ATTR2]] {
+// CHECK2-SAME: () #[[ATTR1]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTOMP_IV:%.*]] = alloca i64, align 8
 // CHECK2-NEXT:    [[TMP:%.*]] = alloca i32, align 4
@@ -1872,14 +1872,14 @@ extern "C" void tfoo7() {
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@foo6
-// CHECK2-SAME: () #[[ATTR2]] {
+// CHECK2-SAME: () #[[ATTR1]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 0, ptr @foo6.omp_outlined)
 // CHECK2-NEXT:    ret void
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@foo6.omp_outlined
-// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR5:[0-9]+]] {
+// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4:[0-9]+]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
 // CHECK2-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
@@ -1975,14 +1975,14 @@ extern "C" void tfoo7() {
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@tfoo7
-// CHECK2-SAME: () #[[ATTR2]] {
+// CHECK2-SAME: () #[[ATTR1]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    call void @_Z4foo7IiTnT_Li3ETnS0_Li5EEvS0_S0_(i32 noundef 0, i32 noundef 42)
 // CHECK2-NEXT:    ret void
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@_Z4foo7IiTnT_Li3ETnS0_Li5EEvS0_S0_
-// CHECK2-SAME: (i32 noundef [[START:%.*]], i32 noundef [[END:%.*]]) #[[ATTR2]] comdat {
+// CHECK2-SAME: (i32 noundef [[START:%.*]], i32 noundef [[END:%.*]]) #[[ATTR1]] comdat {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[START_ADDR:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    [[END_ADDR:%.*]] = alloca i32, align 4
@@ -2025,7 +2025,7 @@ extern "C" void tfoo7() {
 // CHECK2-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
 // CHECK2-NEXT:    [[ADD7:%.*]] = add i32 [[TMP9]], 1
 // CHECK2-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4
-// CHECK2-NEXT:    [[ADD8:%.*]] = add nsw i32 [[TMP10]], 5
+// CHECK2-NEXT:    [[ADD8:%.*]] = add i32 [[TMP10]], 5
 // CHECK2-NEXT:    [[CMP9:%.*]] = icmp ult i32 [[ADD7]], [[ADD8]]
 // CHECK2-NEXT:    br i1 [[CMP9]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
 // CHECK2:       cond.true:
@@ -2034,7 +2034,7 @@ extern "C" void tfoo7() {
 // CHECK2-NEXT:    br label [[COND_END:%.*]]
 // CHECK2:       cond.false:
 // CHECK2-NEXT:    [[TMP12:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4
-// CHECK2-NEXT:    [[ADD11:%.*]] = add nsw i32 [[TMP12]], 5
+// CHECK2-NEXT:    [[ADD11:%.*]] = add i32 [[TMP12]], 5
 // CHECK2-NEXT:    br label [[COND_END]]
 // CHECK2:       cond.end:
 // CHECK2-NEXT:    [[COND:%.*]] = phi i32 [ [[ADD10]], [[COND_TRUE]] ], [ [[ADD11]], [[COND_FALSE]] ]
@@ -2051,14 +2051,14 @@ extern "C" void tfoo7() {
 // CHECK2-NEXT:    br label [[FOR_INC:%.*]]
 // CHECK2:       for.inc:
 // CHECK2-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4
-// CHECK2-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP16]], 1
+// CHECK2-NEXT:    [[INC:%.*]] = add i32 [[TMP16]], 1
 // CHECK2-NEXT:    store i32 [[INC]], ptr [[DOTTILE_0_IV_I]], align 4
 // CHECK2-NEXT:    br label [[FOR_COND6]], !llvm.loop [[LOOP21:![0-9]+]]
 // CHECK2:       for.end:
 // CHECK2-NEXT:    br label [[FOR_INC15:%.*]]
 // CHECK2:       for.inc15:
 // CHECK2-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4
-// CHECK2-NEXT:    [[ADD16:%.*]] = add nsw i32 [[TMP17]], 5
+// CHECK2-NEXT:    [[ADD16:%.*]] = add i32 [[TMP17]], 5
 // CHECK2-NEXT:    store i32 [[ADD16]], ptr [[DOTFLOOR_0_IV_I]], align 4
 // CHECK2-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
 // CHECK2:       for.end17:
diff --git a/clang/test/OpenMP/tile_codegen_for_dependent.cpp b/clang/test/OpenMP/tile_codegen_for_dependent.cpp
index 93c51c9165a47..a95bb919ac41b 100644
--- a/clang/test/OpenMP/tile_codegen_for_dependent.cpp
+++ b/clang/test/OpenMP/tile_codegen_for_dependent.cpp
@@ -1,3 +1,4 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ --version 4
 // Check code generation
 // RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
 
@@ -17,172 +18,6 @@
 extern "C" void body(...) {}
 
 
-// IR-LABEL: @func(
-// IR-NEXT:  [[ENTRY:.*]]:
-// IR-NEXT:    %[[START_ADDR:.+]] = alloca i32, align 4
-// IR-NEXT:    %[[END_ADDR:.+]] = alloca i32, align 4
-// IR-NEXT:    %[[STEP_ADDR:.+]] = alloca i32, align 4
-// IR-NEXT:    %[[DOTOMP_IV:.+]] = alloca i32, align 4
-// IR-NEXT:    %[[TMP:.+]] = alloca i32, align 4
-// IR-NEXT:    %[[I:.+]] = alloca i32, align 4
-// IR-NEXT:    %[[DOTCAPTURE_EXPR_:.+]] = alloca i32, align 4
-// IR-NEXT:    %[[DOTCAPTURE_EXPR_1:.+]] = alloca i32, align 4
-// IR-NEXT:    %[[DOTCAPTURE_EXPR_2:.+]] = alloca i32, align 4
-// IR-NEXT:    %[[DOTCAPTURE_EXPR_3:.+]] = alloca i32, align 4
-// IR-NEXT:    %[[DOTCAPTURE_EXPR_6:.+]] = alloca i32, align 4
-// IR-NEXT:    %[[DOTCAPTURE_EXPR_8:.+]] = alloca i32, align 4
-// IR-NEXT:    %[[DOTFLOOR_0_IV_I:.+]] = alloca i32, align 4
-// IR-NEXT:    %[[DOTOMP_LB:.+]] = alloca i32, align 4
-// IR-NEXT:    %[[DOTOMP_UB:.+]] = alloca i32, align 4
-// IR-NEXT:    %[[DOTOMP_STRIDE:.+]] = alloca i32, align 4
-// IR-NEXT:    %[[DOTOMP_IS_LAST:.+]] = alloca i32, align 4
-// IR-NEXT:    %[[DOTFLOOR_0_IV_I12:.+]] = alloca i32, align 4
-// IR-NEXT:    %[[DOTTILE_0_IV_I:.+]] = alloca i32, align 4
-// IR-NEXT:    %[[TMP0:.+]] = call i32 @__kmpc_global_thread_num(ptr @2)
-// IR-NEXT:    store i32 %[[START:.+]], ptr %[[START_ADDR]], align 4
-// IR-NEXT:    store i32 %[[END:.+]], ptr %[[END_ADDR]], align 4
-// IR-NEXT:    store i32 %[[STEP:.+]], ptr %[[STEP_ADDR]], align 4
-// IR-NEXT:    %[[TMP1:.+]] = load i32, ptr %[[START_ADDR]], align 4
-// IR-NEXT:    store i32 %[[TMP1]], ptr %[[I]], align 4
-// IR-NEXT:    %[[TMP2:.+]] = load i32, ptr %[[START_ADDR]], align 4
-// IR-NEXT:    store i32 %[[TMP2]], ptr %[[DOTCAPTURE_EXPR_]], align 4
-// IR-NEXT:    %[[TMP3:.+]] = load i32, ptr %[[END_ADDR]], align 4
-// IR-NEXT:    store i32 %[[TMP3]], ptr %[[DOTCAPTURE_EXPR_1]], align 4
-// IR-NEXT:    %[[TMP4:.+]] = load i32, ptr %[[STEP_ADDR]], align 4
-// IR-NEXT:    store i32 %[[TMP4]], ptr %[[DOTCAPTURE_EXPR_2]], align 4
-// IR-NEXT:    %[[TMP5:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_1]], align 4
-// IR-NEXT:    %[[TMP6:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_]], align 4
-// IR-NEXT:    %[[SUB:.+]] = sub i32 %[[TMP5]], %[[TMP6]]
-// IR-NEXT:    %[[SUB4:.+]] = sub i32 %[[SUB]], 1
-// IR-NEXT:    %[[TMP7:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_2]], align 4
-// IR-NEXT:    %[[ADD:.+]] = add i32 %[[SUB4]], %[[TMP7]]
-// IR-NEXT:    %[[TMP8:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_2]], align 4
-// IR-NEXT:    %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP8]]
-// IR-NEXT:    %[[SUB5:.+]] = sub i32 %[[DIV]], 1
-// IR-NEXT:    store i32 %[[SUB5]], ptr %[[DOTCAPTURE_EXPR_3]], align 4
-// IR-NEXT:    %[[TMP9:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_3]], align 4
-// IR-NEXT:    %[[ADD7:.+]] = add i32 %[[TMP9]], 1
-// IR-NEXT:    store i32 %[[ADD7]], ptr %[[DOTCAPTURE_EXPR_6]], align 4
-// IR-NEXT:    %[[TMP10:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_6]], align 4
-// IR-NEXT:    %[[SUB9:.+]] = sub i32 %[[TMP10]], -3
-// IR-NEXT:    %[[DIV10:.+]] = udiv i32 %[[SUB9]], 4
-// IR-NEXT:    %[[SUB11:.+]] = sub i32 %[[DIV10]], 1
-// IR-NEXT:    store i32 %[[SUB11]], ptr %[[DOTCAPTURE_EXPR_8]], align 4
-// IR-NEXT:    store i32 0, ptr %[[DOTFLOOR_0_IV_I]], align 4
-// IR-NEXT:    %[[TMP11:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_6]], align 4
-// IR-NEXT:    %[[CMP:.+]] = icmp ult i32 0, %[[TMP11]]
-// IR-NEXT:    br i1 %[[CMP]], label %[[OMP_PRECOND_THEN:.+]], label %[[OMP_PRECOND_END:.+]]
-// IR-EMPTY:
-// IR-NEXT:  [[OMP_PRECOND_THEN]]:
-// IR-NEXT:    store i32 0, ptr %[[DOTOMP_LB]], align 4
-// IR-NEXT:    %[[TMP12:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_8]], align 4
-// IR-NEXT:    store i32 %[[TMP12]], ptr %[[DOTOMP_UB]], align 4
-// IR-NEXT:    store i32 1, ptr %[[DOTOMP_STRIDE]], align 4
-// IR-NEXT:    store i32 0, ptr %[[DOTOMP_IS_LAST]], align 4
-// IR-NEXT:    call void @__kmpc_for_static_init_4u(ptr @1, i32 %[[TMP0]], i32 34, ptr %[[DOTOMP_IS_LAST]], ptr %[[DOTOMP_LB]], ptr %[[DOTOMP_UB]], ptr %[[DOTOMP_STRIDE]], i32 1, i32 1)
-// IR-NEXT:    %[[TMP13:.+]] = load i32, ptr %[[DOTOMP_UB]], align 4
-// IR-NEXT:    %[[TMP14:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_8]], align 4
-// IR-NEXT:    %[[CMP13:.+]] = icmp ugt i32 %[[TMP13]], %[[TMP14]]
-// IR-NEXT:    br i1 %[[CMP13]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]]
-// IR-EMPTY:
-// IR-NEXT:  [[COND_TRUE]]:
-// IR-NEXT:    %[[TMP15:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_8]], align 4
-// IR-NEXT:    br label %[[COND_END:.+]]
-// IR-EMPTY:
-// IR-NEXT:  [[COND_FALSE]]:
-// IR-NEXT:    %[[TMP16:.+]] = load i32, ptr %[[DOTOMP_UB]], align 4
-// IR-NEXT:    br label %[[COND_END]]
-// IR-EMPTY:
-// IR-NEXT:  [[COND_END]]:
-// IR-NEXT:    %[[COND:.+]] = phi i32 [ %[[TMP15]], %[[COND_TRUE]] ], [ %[[TMP16]], %[[COND_FALSE]] ]
-// IR-NEXT:    store i32 %[[COND]], ptr %[[DOTOMP_UB]], align 4
-// IR-NEXT:    %[[TMP17:.+]] = load i32, ptr %[[DOTOMP_LB]], align 4
-// IR-NEXT:    store i32 %[[TMP17]], ptr %[[DOTOMP_IV]], align 4
-// IR-NEXT:    br label %[[OMP_INNER_FOR_COND:.+]]
-// IR-EMPTY:
-// IR-NEXT:  [[OMP_INNER_FOR_COND]]:
-// IR-NEXT:    %[[TMP18:.+]] = load i32, ptr %[[DOTOMP_IV]], align 4
-// IR-NEXT:    %[[TMP19:.+]] = load i32, ptr %[[DOTOMP_UB]], align 4
-// IR-NEXT:    %[[ADD14:.+]] = add i32 %[[TMP19]], 1
-// IR-NEXT:    %[[CMP15:.+]] = icmp ult i32 %[[TMP18]], %[[ADD14]]
-// IR-NEXT:    br i1 %[[CMP15]], label %[[OMP_INNER_FOR_BODY:.+]], label %[[OMP_INNER_FOR_END:.+]]
-// IR-EMPTY:
-// IR-NEXT:  [[OMP_INNER_FOR_BODY]]:
-// IR-NEXT:    %[[TMP20:.+]] = load i32, ptr %[[DOTOMP_IV]], align 4
-// IR-NEXT:    %[[MUL:.+]] = mul i32 %[[TMP20]], 4
-// IR-NEXT:    %[[ADD16:.+]] = add i32 0, %[[MUL]]
-// IR-NEXT:    store i32 %[[ADD16]], ptr %[[DOTFLOOR_0_IV_I12]], align 4
-// IR-NEXT:    %[[TMP21:.+]] = load i32, ptr %[[DOTFLOOR_0_IV_I12]], align 4
-// IR-NEXT:    store i32 %[[TMP21]], ptr %[[DOTTILE_0_IV_I]], align 4
-// IR-NEXT:    br label %[[FOR_COND:.+]]
-// IR-EMPTY:
-// IR-NEXT:  [[FOR_COND]]:
-// IR-NEXT:    %[[TMP22:.+]] = load i32, ptr %[[DOTTILE_0_IV_I]], align 4
-// IR-NEXT:    %[[TMP23:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_3]], align 4
-// IR-NEXT:    %[[ADD17:.+]] = add i32 %[[TMP23]], 1
-// IR-NEXT:    %[[TMP24:.+]] = load i32, ptr %[[DOTFLOOR_0_IV_I12]], align 4
-// IR-NEXT:    %[[ADD18:.+]] = add nsw i32 %[[TMP24]], 4
-// IR-NEXT:    %[[CMP19:.+]] = icmp ult i32 %[[ADD17]], %[[ADD18]]
-// IR-NEXT:    br i1 %[[CMP19]], label %[[COND_TRUE20:.+]], label %[[COND_FALSE22:.+]]
-// IR-EMPTY:
-// IR-NEXT:  [[COND_TRUE20]]:
-// IR-NEXT:    %[[TMP25:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_3]], align 4
-// IR-NEXT:    %[[ADD21:.+]] = add i32 %[[TMP25]], 1
-// IR-NEXT:    br label %[[COND_END24:.+]]
-// IR-EMPTY:
-// IR-NEXT:  [[COND_FALSE22]]:
-// IR-NEXT:    %[[TMP26:.+]] = load i32, ptr %[[DOTFLOOR_0_IV_I12]], align 4
-// IR-NEXT:    %[[ADD23:.+]] = add nsw i32 %[[TMP26]], 4
-// IR-NEXT:    br label %[[COND_END24]]
-// IR-EMPTY:
-// IR-NEXT:  [[COND_END24]]:
-// IR-NEXT:    %[[COND25:.+]] = phi i32 [ %[[ADD21]], %[[COND_TRUE20]] ], [ %[[ADD23]], %[[COND_FALSE22]] ]
-// IR-NEXT:    %[[CMP26:.+]] = icmp ult i32 %[[TMP22]], %[[COND25]]
-// IR-NEXT:    br i1 %[[CMP26]], label %[[FOR_BODY:.+]], label %[[FOR_END:.+]]
-// IR-EMPTY:
-// IR-NEXT:  [[FOR_BODY]]:
-// IR-NEXT:    %[[TMP27:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_]], align 4
-// IR-NEXT:    %[[TMP28:.+]] = load i32, ptr %[[DOTTILE_0_IV_I]], align 4
-// IR-NEXT:    %[[TMP29:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_2]], align 4
-// IR-NEXT:    %[[MUL27:.+]] = mul i32 %[[TMP28]], %[[TMP29]]
-// IR-NEXT:    %[[ADD28:.+]] = add i32 %[[TMP27]], %[[MUL27]]
-// IR-NEXT:    store i32 %[[ADD28]], ptr %[[I]], align 4
-// IR-NEXT:    %[[TMP30:.+]] = load i32, ptr %[[START_ADDR]], align 4
-// IR-NEXT:    %[[TMP31:.+]] = load i32, ptr %[[END_ADDR]], align 4
-// IR-NEXT:    %[[TMP32:.+]] = load i32, ptr %[[STEP_ADDR]], align 4
-// IR-NEXT:    %[[TMP33:.+]] = load i32, ptr %[[I]], align 4
-// IR-NEXT:    call void (...) @body(i32 noundef %[[TMP30]], i32 noundef %[[TMP31]], i32 noundef %[[TMP32]], i32 noundef %[[TMP33]])
-// IR-NEXT:    br label %[[FOR_INC:.+]]
-// IR-EMPTY:
-// IR-NEXT:  [[FOR_INC]]:
-// IR-NEXT:    %[[TMP34:.+]] = load i32, ptr %[[DOTTILE_0_IV_I]], align 4
-// IR-NEXT:    %[[INC:.+]] = add nsw i32 %[[TMP34]], 1
-// IR-NEXT:    store i32 %[[INC]], ptr %[[DOTTILE_0_IV_I]], align 4
-// IR-NEXT:    br label %[[FOR_COND]], !llvm.loop ![[LOOP2:[0-9]+]]
-// IR-EMPTY:
-// IR-NEXT:  [[FOR_END]]:
-// IR-NEXT:    br label %[[OMP_BODY_CONTINUE:.+]]
-// IR-EMPTY:
-// IR-NEXT:  [[OMP_BODY_CONTINUE]]:
-// IR-NEXT:    br label %[[OMP_INNER_FOR_INC:.+]]
-// IR-EMPTY:
-// IR-NEXT:  [[OMP_INNER_FOR_INC]]:
-// IR-NEXT:    %[[TMP35:.+]] = load i32, ptr %[[DOTOMP_IV]], align 4
-// IR-NEXT:    %[[ADD29:.+]] = add i32 %[[TMP35]], 1
-// IR-NEXT:    store i32 %[[ADD29]], ptr %[[DOTOMP_IV]], align 4
-// IR-NEXT:    br label %[[OMP_INNER_FOR_COND]]
-// IR-EMPTY:
-// IR-NEXT:  [[OMP_INNER_FOR_END]]:
-// IR-NEXT:    br label %[[OMP_LOOP_EXIT:.+]]
-// IR-EMPTY:
-// IR-NEXT:  [[OMP_LOOP_EXIT]]:
-// IR-NEXT:    call void @__kmpc_for_static_fini(ptr @1, i32 %[[TMP0]])
-// IR-NEXT:    br label %[[OMP_PRECOND_END]]
-// IR-EMPTY:
-// IR-NEXT:  [[OMP_PRECOND_END]]:
-// IR-NEXT:    call void @__kmpc_barrier(ptr @3, i32 %[[TMP0]])
-// IR-NEXT:    ret void
-// IR-NEXT:  }
 extern "C" void func(int start, int end, int step) {
 #pragma omp for
 #pragma omp tile sizes(4)
@@ -191,3 +26,162 @@ extern "C" void func(int start, int end, int step) {
 }
 
 #endif /* HEADER */
+// IR-LABEL: define dso_local void @body(
+// IR-SAME: ...) #[[ATTR0:[0-9]+]] {
+// IR-NEXT:  entry:
+// IR-NEXT:    ret void
+//
+//
+// IR-LABEL: define dso_local void @func(
+// IR-SAME: i32 noundef [[START:%.*]], i32 noundef [[END:%.*]], i32 noundef [[STEP:%.*]]) #[[ATTR0]] {
+// IR-NEXT:  entry:
+// IR-NEXT:    [[START_ADDR:%.*]] = alloca i32, align 4
+// IR-NEXT:    [[END_ADDR:%.*]] = alloca i32, align 4
+// IR-NEXT:    [[STEP_ADDR:%.*]] = alloca i32, align 4
+// IR-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
+// IR-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// IR-NEXT:    [[I:%.*]] = alloca i32, align 4
+// IR-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// IR-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// IR-NEXT:    [[DOTNEW_STEP:%.*]] = alloca i32, align 4
+// IR-NEXT:    [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
+// IR-NEXT:    [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4
+// IR-NEXT:    [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4
+// IR-NEXT:    [[DOTFLOOR_0_IV_I:%.*]] = alloca i32, align 4
+// IR-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
+// IR-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
+// IR-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
+// IR-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// IR-NEXT:    [[DOTFLOOR_0_IV_I11:%.*]] = alloca i32, align 4
+// IR-NEXT:    [[DOTTILE_0_IV_I:%.*]] = alloca i32, align 4
+// IR-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
+// IR-NEXT:    store i32 [[START]], ptr [[START_ADDR]], align 4
+// IR-NEXT:    store i32 [[END]], ptr [[END_ADDR]], align 4
+// IR-NEXT:    store i32 [[STEP]], ptr [[STEP_ADDR]], align 4
+// IR-NEXT:    [[TMP1:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// IR-NEXT:    store i32 [[TMP1]], ptr [[I]], align 4
+// IR-NEXT:    [[TMP2:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// IR-NEXT:    store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT:    [[TMP3:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// IR-NEXT:    store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// IR-NEXT:    [[TMP4:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// IR-NEXT:    store i32 [[TMP4]], ptr [[DOTNEW_STEP]], align 4
+// IR-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// IR-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT:    [[SUB:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+// IR-NEXT:    [[SUB3:%.*]] = sub i32 [[SUB]], 1
+// IR-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// IR-NEXT:    [[ADD:%.*]] = add i32 [[SUB3]], [[TMP7]]
+// IR-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// IR-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP8]]
+// IR-NEXT:    [[SUB4:%.*]] = sub i32 [[DIV]], 1
+// IR-NEXT:    store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4
+// IR-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// IR-NEXT:    [[ADD6:%.*]] = add i32 [[TMP9]], 1
+// IR-NEXT:    store i32 [[ADD6]], ptr [[DOTCAPTURE_EXPR_5]], align 4
+// IR-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4
+// IR-NEXT:    [[SUB8:%.*]] = sub i32 [[TMP10]], -3
+// IR-NEXT:    [[DIV9:%.*]] = udiv i32 [[SUB8]], 4
+// IR-NEXT:    [[SUB10:%.*]] = sub i32 [[DIV9]], 1
+// IR-NEXT:    store i32 [[SUB10]], ptr [[DOTCAPTURE_EXPR_7]], align 4
+// IR-NEXT:    store i32 0, ptr [[DOTFLOOR_0_IV_I]], align 4
+// IR-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4
+// IR-NEXT:    [[CMP:%.*]] = icmp ult i32 0, [[TMP11]]
+// IR-NEXT:    br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
+// IR:       omp.precond.then:
+// IR-NEXT:    store i32 0, ptr [[DOTOMP_LB]], align 4
+// IR-NEXT:    [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4
+// IR-NEXT:    store i32 [[TMP12]], ptr [[DOTOMP_UB]], align 4
+// IR-NEXT:    store i32 1, ptr [[DOTOMP_STRIDE]], align 4
+// IR-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// IR-NEXT:    call void @__kmpc_for_static_init_4u(ptr @[[GLOB1:[0-9]+]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// IR-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// IR-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4
+// IR-NEXT:    [[CMP12:%.*]] = icmp ugt i32 [[TMP13]], [[TMP14]]
+// IR-NEXT:    br i1 [[CMP12]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// IR:       cond.true:
+// IR-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4
+// IR-NEXT:    br label [[COND_END:%.*]]
+// IR:       cond.false:
+// IR-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// IR-NEXT:    br label [[COND_END]]
+// IR:       cond.end:
+// IR-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP15]], [[COND_TRUE]] ], [ [[TMP16]], [[COND_FALSE]] ]
+// IR-NEXT:    store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
+// IR-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
+// IR-NEXT:    store i32 [[TMP17]], ptr [[DOTOMP_IV]], align 4
+// IR-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
+// IR:       omp.inner.for.cond:
+// IR-NEXT:    [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// IR-NEXT:    [[TMP19:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// IR-NEXT:    [[ADD13:%.*]] = add i32 [[TMP19]], 1
+// IR-NEXT:    [[CMP14:%.*]] = icmp ult i32 [[TMP18]], [[ADD13]]
+// IR-NEXT:    br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// IR:       omp.inner.for.body:
+// IR-NEXT:    [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// IR-NEXT:    [[MUL:%.*]] = mul i32 [[TMP20]], 4
+// IR-NEXT:    [[ADD15:%.*]] = add i32 0, [[MUL]]
+// IR-NEXT:    store i32 [[ADD15]], ptr [[DOTFLOOR_0_IV_I11]], align 4
+// IR-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I11]], align 4
+// IR-NEXT:    store i32 [[TMP21]], ptr [[DOTTILE_0_IV_I]], align 4
+// IR-NEXT:    br label [[FOR_COND:%.*]]
+// IR:       for.cond:
+// IR-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4
+// IR-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// IR-NEXT:    [[ADD16:%.*]] = add i32 [[TMP23]], 1
+// IR-NEXT:    [[TMP24:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I11]], align 4
+// IR-NEXT:    [[ADD17:%.*]] = add i32 [[TMP24]], 4
+// IR-NEXT:    [[CMP18:%.*]] = icmp ult i32 [[ADD16]], [[ADD17]]
+// IR-NEXT:    br i1 [[CMP18]], label [[COND_TRUE19:%.*]], label [[COND_FALSE21:%.*]]
+// IR:       cond.true19:
+// IR-NEXT:    [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// IR-NEXT:    [[ADD20:%.*]] = add i32 [[TMP25]], 1
+// IR-NEXT:    br label [[COND_END23:%.*]]
+// IR:       cond.false21:
+// IR-NEXT:    [[TMP26:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I11]], align 4
+// IR-NEXT:    [[ADD22:%.*]] = add i32 [[TMP26]], 4
+// IR-NEXT:    br label [[COND_END23]]
+// IR:       cond.end23:
+// IR-NEXT:    [[COND24:%.*]] = phi i32 [ [[ADD20]], [[COND_TRUE19]] ], [ [[ADD22]], [[COND_FALSE21]] ]
+// IR-NEXT:    [[CMP25:%.*]] = icmp ult i32 [[TMP22]], [[COND24]]
+// IR-NEXT:    br i1 [[CMP25]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
+// IR:       for.body:
+// IR-NEXT:    [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT:    [[TMP28:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4
+// IR-NEXT:    [[TMP29:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// IR-NEXT:    [[MUL26:%.*]] = mul i32 [[TMP28]], [[TMP29]]
+// IR-NEXT:    [[ADD27:%.*]] = add i32 [[TMP27]], [[MUL26]]
+// IR-NEXT:    store i32 [[ADD27]], ptr [[I]], align 4
+// IR-NEXT:    [[TMP30:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// IR-NEXT:    [[TMP31:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// IR-NEXT:    [[TMP32:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// IR-NEXT:    [[TMP33:%.*]] = load i32, ptr [[I]], align 4
+// IR-NEXT:    call void (...) @body(i32 noundef [[TMP30]], i32 noundef [[TMP31]], i32 noundef [[TMP32]], i32 noundef [[TMP33]])
+// IR-NEXT:    br label [[FOR_INC:%.*]]
+// IR:       for.inc:
+// IR-NEXT:    [[TMP34:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4
+// IR-NEXT:    [[INC:%.*]] = add i32 [[TMP34]], 1
+// IR-NEXT:    store i32 [[INC]], ptr [[DOTTILE_0_IV_I]], align 4
+// IR-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
+// IR:       for.end:
+// IR-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
+// IR:       omp.body.continue:
+// IR-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
+// IR:       omp.inner.for.inc:
+// IR-NEXT:    [[TMP35:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// IR-NEXT:    [[ADD28:%.*]] = add i32 [[TMP35]], 1
+// IR-NEXT:    store i32 [[ADD28]], ptr [[DOTOMP_IV]], align 4
+// IR-NEXT:    br label [[OMP_INNER_FOR_COND]]
+// IR:       omp.inner.for.end:
+// IR-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
+// IR:       omp.loop.exit:
+// IR-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP0]])
+// IR-NEXT:    br label [[OMP_PRECOND_END]]
+// IR:       omp.precond.end:
+// IR-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP0]])
+// IR-NEXT:    ret void
+//
+//.
+// IR: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]}
+// IR: [[META4]] = !{!"llvm.loop.mustprogress"}
+//.
diff --git a/clang/test/OpenMP/tile_codegen_tile_for.cpp b/clang/test/OpenMP/tile_codegen_tile_for.cpp
index d0fb89398c241..8b5756677e843 100644
--- a/clang/test/OpenMP/tile_codegen_tile_for.cpp
+++ b/clang/test/OpenMP/tile_codegen_tile_for.cpp
@@ -1,3 +1,4 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ --version 4
 // Check code generation
 // RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp -emit-llvm %s -o - | FileCheck %s --check-prefix=IR
 
@@ -16,227 +17,6 @@
 extern "C" void body(...) {}
 
 
-// IR-LABEL: @func(
-// IR-NEXT:  [[ENTRY:.*]]:
-// IR-NEXT:    %[[START_ADDR:.+]] = alloca i32, align 4
-// IR-NEXT:    %[[END_ADDR:.+]] = alloca i32, align 4
-// IR-NEXT:    %[[STEP_ADDR:.+]] = alloca i32, align 4
-// IR-NEXT:    %[[DOTOMP_IV:.+]] = alloca i32, align 4
-// IR-NEXT:    %[[TMP:.+]] = alloca i32, align 4
-// IR-NEXT:    %[[I:.+]] = alloca i32, align 4
-// IR-NEXT:    %[[DOTCAPTURE_EXPR_:.+]] = alloca i32, align 4
-// IR-NEXT:    %[[DOTCAPTURE_EXPR_1:.+]] = alloca i32, align 4
-// IR-NEXT:    %[[DOTCAPTURE_EXPR_2:.+]] = alloca i32, align 4
-// IR-NEXT:    %[[DOTCAPTURE_EXPR_3:.+]] = alloca i32, align 4
-// IR-NEXT:    %[[DOTFLOOR_0_IV_I:.+]] = alloca i32, align 4
-// IR-NEXT:    %[[DOTCAPTURE_EXPR_6:.+]] = alloca i32, align 4
-// IR-NEXT:    %[[DOTCAPTURE_EXPR_8:.+]] = alloca i32, align 4
-// IR-NEXT:    %[[DOTCAPTURE_EXPR_12:.+]] = alloca i32, align 4
-// IR-NEXT:    %[[DOTCAPTURE_EXPR_14:.+]] = alloca i32, align 4
-// IR-NEXT:    %[[DOTFLOOR_0_IV__FLOOR_0_IV_I:.+]] = alloca i32, align 4
-// IR-NEXT:    %[[DOTOMP_LB:.+]] = alloca i32, align 4
-// IR-NEXT:    %[[DOTOMP_UB:.+]] = alloca i32, align 4
-// IR-NEXT:    %[[DOTOMP_STRIDE:.+]] = alloca i32, align 4
-// IR-NEXT:    %[[DOTOMP_IS_LAST:.+]] = alloca i32, align 4
-// IR-NEXT:    %[[DOTFLOOR_0_IV__FLOOR_0_IV_I18:.+]] = alloca i32, align 4
-// IR-NEXT:    %[[DOTTILE_0_IV__FLOOR_0_IV_I:.+]] = alloca i32, align 4
-// IR-NEXT:    %[[DOTTILE_0_IV_I:.+]] = alloca i32, align 4
-// IR-NEXT:    %[[TMP0:.+]] = call i32 @__kmpc_global_thread_num(ptr @2)
-// IR-NEXT:    store i32 %[[START:.+]], ptr %[[START_ADDR]], align 4
-// IR-NEXT:    store i32 %[[END:.+]], ptr %[[END_ADDR]], align 4
-// IR-NEXT:    store i32 %[[STEP:.+]], ptr %[[STEP_ADDR]], align 4
-// IR-NEXT:    %[[TMP1:.+]] = load i32, ptr %[[START_ADDR]], align 4
-// IR-NEXT:    store i32 %[[TMP1]], ptr %[[I]], align 4
-// IR-NEXT:    %[[TMP2:.+]] = load i32, ptr %[[START_ADDR]], align 4
-// IR-NEXT:    store i32 %[[TMP2]], ptr %[[DOTCAPTURE_EXPR_]], align 4
-// IR-NEXT:    %[[TMP3:.+]] = load i32, ptr %[[END_ADDR]], align 4
-// IR-NEXT:    store i32 %[[TMP3]], ptr %[[DOTCAPTURE_EXPR_1]], align 4
-// IR-NEXT:    %[[TMP4:.+]] = load i32, ptr %[[STEP_ADDR]], align 4
-// IR-NEXT:    store i32 %[[TMP4]], ptr %[[DOTCAPTURE_EXPR_2]], align 4
-// IR-NEXT:    %[[TMP5:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_1]], align 4
-// IR-NEXT:    %[[TMP6:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_]], align 4
-// IR-NEXT:    %[[SUB:.+]] = sub i32 %[[TMP5]], %[[TMP6]]
-// IR-NEXT:    %[[SUB4:.+]] = sub i32 %[[SUB]], 1
-// IR-NEXT:    %[[TMP7:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_2]], align 4
-// IR-NEXT:    %[[ADD:.+]] = add i32 %[[SUB4]], %[[TMP7]]
-// IR-NEXT:    %[[TMP8:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_2]], align 4
-// IR-NEXT:    %[[DIV:.+]] = udiv i32 %[[ADD]], %[[TMP8]]
-// IR-NEXT:    %[[SUB5:.+]] = sub i32 %[[DIV]], 1
-// IR-NEXT:    store i32 %[[SUB5]], ptr %[[DOTCAPTURE_EXPR_3]], align 4
-// IR-NEXT:    store i32 0, ptr %[[DOTFLOOR_0_IV_I]], align 4
-// IR-NEXT:    %[[TMP9:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_3]], align 4
-// IR-NEXT:    %[[ADD7:.+]] = add i32 %[[TMP9]], 1
-// IR-NEXT:    store i32 %[[ADD7]], ptr %[[DOTCAPTURE_EXPR_6]], align 4
-// IR-NEXT:    %[[TMP10:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_6]], align 4
-// IR-NEXT:    %[[SUB9:.+]] = sub i32 %[[TMP10]], -3
-// IR-NEXT:    %[[DIV10:.+]] = udiv i32 %[[SUB9]], 4
-// IR-NEXT:    %[[SUB11:.+]] = sub i32 %[[DIV10]], 1
-// IR-NEXT:    store i32 %[[SUB11]], ptr %[[DOTCAPTURE_EXPR_8]], align 4
-// IR-NEXT:    %[[TMP11:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_8]], align 4
-// IR-NEXT:    %[[ADD13:.+]] = add i32 %[[TMP11]], 1
-// IR-NEXT:    store i32 %[[ADD13]], ptr %[[DOTCAPTURE_EXPR_12]], align 4
-// IR-NEXT:    %[[TMP12:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_12]], align 4
-// IR-NEXT:    %[[SUB15:.+]] = sub i32 %[[TMP12]], -2
-// IR-NEXT:    %[[DIV16:.+]] = udiv i32 %[[SUB15]], 3
-// IR-NEXT:    %[[SUB17:.+]] = sub i32 %[[DIV16]], 1
-// IR-NEXT:    store i32 %[[SUB17]], ptr %[[DOTCAPTURE_EXPR_14]], align 4
-// IR-NEXT:    store i32 0, ptr %[[DOTFLOOR_0_IV__FLOOR_0_IV_I]], align 4
-// IR-NEXT:    %[[TMP13:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_12]], align 4
-// IR-NEXT:    %[[CMP:.+]] = icmp ult i32 0, %[[TMP13]]
-// IR-NEXT:    br i1 %[[CMP]], label %[[OMP_PRECOND_THEN:.+]], label %[[OMP_PRECOND_END:.+]]
-// IR-EMPTY:
-// IR-NEXT:  [[OMP_PRECOND_THEN]]:
-// IR-NEXT:    store i32 0, ptr %[[DOTOMP_LB]], align 4
-// IR-NEXT:    %[[TMP14:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_14]], align 4
-// IR-NEXT:    store i32 %[[TMP14]], ptr %[[DOTOMP_UB]], align 4
-// IR-NEXT:    store i32 1, ptr %[[DOTOMP_STRIDE]], align 4
-// IR-NEXT:    store i32 0, ptr %[[DOTOMP_IS_LAST]], align 4
-// IR-NEXT:    call void @__kmpc_for_static_init_4u(ptr @1, i32 %[[TMP0]], i32 34, ptr %[[DOTOMP_IS_LAST]], ptr %[[DOTOMP_LB]], ptr %[[DOTOMP_UB]], ptr %[[DOTOMP_STRIDE]], i32 1, i32 1)
-// IR-NEXT:    %[[TMP15:.+]] = load i32, ptr %[[DOTOMP_UB]], align 4
-// IR-NEXT:    %[[TMP16:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_14]], align 4
-// IR-NEXT:    %[[CMP19:.+]] = icmp ugt i32 %[[TMP15]], %[[TMP16]]
-// IR-NEXT:    br i1 %[[CMP19]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]]
-// IR-EMPTY:
-// IR-NEXT:  [[COND_TRUE]]:
-// IR-NEXT:    %[[TMP17:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_14]], align 4
-// IR-NEXT:    br label %[[COND_END:.+]]
-// IR-EMPTY:
-// IR-NEXT:  [[COND_FALSE]]:
-// IR-NEXT:    %[[TMP18:.+]] = load i32, ptr %[[DOTOMP_UB]], align 4
-// IR-NEXT:    br label %[[COND_END]]
-// IR-EMPTY:
-// IR-NEXT:  [[COND_END]]:
-// IR-NEXT:    %[[COND:.+]] = phi i32 [ %[[TMP17]], %[[COND_TRUE]] ], [ %[[TMP18]], %[[COND_FALSE]] ]
-// IR-NEXT:    store i32 %[[COND]], ptr %[[DOTOMP_UB]], align 4
-// IR-NEXT:    %[[TMP19:.+]] = load i32, ptr %[[DOTOMP_LB]], align 4
-// IR-NEXT:    store i32 %[[TMP19]], ptr %[[DOTOMP_IV]], align 4
-// IR-NEXT:    br label %[[OMP_INNER_FOR_COND:.+]]
-// IR-EMPTY:
-// IR-NEXT:  [[OMP_INNER_FOR_COND]]:
-// IR-NEXT:    %[[TMP20:.+]] = load i32, ptr %[[DOTOMP_IV]], align 4
-// IR-NEXT:    %[[TMP21:.+]] = load i32, ptr %[[DOTOMP_UB]], align 4
-// IR-NEXT:    %[[ADD20:.+]] = add i32 %[[TMP21]], 1
-// IR-NEXT:    %[[CMP21:.+]] = icmp ult i32 %[[TMP20]], %[[ADD20]]
-// IR-NEXT:    br i1 %[[CMP21]], label %[[OMP_INNER_FOR_BODY:.+]], label %[[OMP_INNER_FOR_END:.+]]
-// IR-EMPTY:
-// IR-NEXT:  [[OMP_INNER_FOR_BODY]]:
-// IR-NEXT:    %[[TMP22:.+]] = load i32, ptr %[[DOTOMP_IV]], align 4
-// IR-NEXT:    %[[MUL:.+]] = mul i32 %[[TMP22]], 3
-// IR-NEXT:    %[[ADD22:.+]] = add i32 0, %[[MUL]]
-// IR-NEXT:    store i32 %[[ADD22]], ptr %[[DOTFLOOR_0_IV__FLOOR_0_IV_I18]], align 4
-// IR-NEXT:    %[[TMP23:.+]] = load i32, ptr %[[DOTFLOOR_0_IV__FLOOR_0_IV_I18]], align 4
-// IR-NEXT:    store i32 %[[TMP23]], ptr %[[DOTTILE_0_IV__FLOOR_0_IV_I]], align 4
-// IR-NEXT:    br label %[[FOR_COND:.+]]
-// IR-EMPTY:
-// IR-NEXT:  [[FOR_COND]]:
-// IR-NEXT:    %[[TMP24:.+]] = load i32, ptr %[[DOTTILE_0_IV__FLOOR_0_IV_I]], align 4
-// IR-NEXT:    %[[TMP25:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_8]], align 4
-// IR-NEXT:    %[[ADD23:.+]] = add i32 %[[TMP25]], 1
-// IR-NEXT:    %[[TMP26:.+]] = load i32, ptr %[[DOTFLOOR_0_IV__FLOOR_0_IV_I18]], align 4
-// IR-NEXT:    %[[ADD24:.+]] = add i32 %[[TMP26]], 3
-// IR-NEXT:    %[[CMP25:.+]] = icmp ult i32 %[[ADD23]], %[[ADD24]]
-// IR-NEXT:    br i1 %[[CMP25]], label %[[COND_TRUE26:.+]], label %[[COND_FALSE28:.+]]
-// IR-EMPTY:
-// IR-NEXT:  [[COND_TRUE26]]:
-// IR-NEXT:    %[[TMP27:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_8]], align 4
-// IR-NEXT:    %[[ADD27:.+]] = add i32 %[[TMP27]], 1
-// IR-NEXT:    br label %[[COND_END30:.+]]
-// IR-EMPTY:
-// IR-NEXT:  [[COND_FALSE28]]:
-// IR-NEXT:    %[[TMP28:.+]] = load i32, ptr %[[DOTFLOOR_0_IV__FLOOR_0_IV_I18]], align 4
-// IR-NEXT:    %[[ADD29:.+]] = add i32 %[[TMP28]], 3
-// IR-NEXT:    br label %[[COND_END30]]
-// IR-EMPTY:
-// IR-NEXT:  [[COND_END30]]:
-// IR-NEXT:    %[[COND31:.+]] = phi i32 [ %[[ADD27]], %[[COND_TRUE26]] ], [ %[[ADD29]], %[[COND_FALSE28]] ]
-// IR-NEXT:    %[[CMP32:.+]] = icmp ult i32 %[[TMP24]], %[[COND31]]
-// IR-NEXT:    br i1 %[[CMP32]], label %[[FOR_BODY:.+]], label %[[FOR_END51:.+]]
-// IR-EMPTY:
-// IR-NEXT:  [[FOR_BODY]]:
-// IR-NEXT:    %[[TMP29:.+]] = load i32, ptr %[[DOTTILE_0_IV__FLOOR_0_IV_I]], align 4
-// IR-NEXT:    %[[MUL33:.+]] = mul i32 %[[TMP29]], 4
-// IR-NEXT:    %[[ADD34:.+]] = add i32 0, %[[MUL33]]
-// IR-NEXT:    store i32 %[[ADD34]], ptr %[[DOTFLOOR_0_IV_I]], align 4
-// IR-NEXT:    %[[TMP30:.+]] = load i32, ptr %[[DOTFLOOR_0_IV_I]], align 4
-// IR-NEXT:    store i32 %[[TMP30]], ptr %[[DOTTILE_0_IV_I]], align 4
-// IR-NEXT:    br label %[[FOR_COND35:.+]]
-// IR-EMPTY:
-// IR-NEXT:  [[FOR_COND35]]:
-// IR-NEXT:    %[[TMP31:.+]] = load i32, ptr %[[DOTTILE_0_IV_I]], align 4
-// IR-NEXT:    %[[TMP32:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_3]], align 4
-// IR-NEXT:    %[[ADD36:.+]] = add i32 %[[TMP32]], 1
-// IR-NEXT:    %[[TMP33:.+]] = load i32, ptr %[[DOTFLOOR_0_IV_I]], align 4
-// IR-NEXT:    %[[ADD37:.+]] = add nsw i32 %[[TMP33]], 4
-// IR-NEXT:    %[[CMP38:.+]] = icmp ult i32 %[[ADD36]], %[[ADD37]]
-// IR-NEXT:    br i1 %[[CMP38]], label %[[COND_TRUE39:.+]], label %[[COND_FALSE41:.+]]
-// IR-EMPTY:
-// IR-NEXT:  [[COND_TRUE39]]:
-// IR-NEXT:    %[[TMP34:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_3]], align 4
-// IR-NEXT:    %[[ADD40:.+]] = add i32 %[[TMP34]], 1
-// IR-NEXT:    br label %[[COND_END43:.+]]
-// IR-EMPTY:
-// IR-NEXT:  [[COND_FALSE41]]:
-// IR-NEXT:    %[[TMP35:.+]] = load i32, ptr %[[DOTFLOOR_0_IV_I]], align 4
-// IR-NEXT:    %[[ADD42:.+]] = add nsw i32 %[[TMP35]], 4
-// IR-NEXT:    br label %[[COND_END43]]
-// IR-EMPTY:
-// IR-NEXT:  [[COND_END43]]:
-// IR-NEXT:    %[[COND44:.+]] = phi i32 [ %[[ADD40]], %[[COND_TRUE39]] ], [ %[[ADD42]], %[[COND_FALSE41]] ]
-// IR-NEXT:    %[[CMP45:.+]] = icmp ult i32 %[[TMP31]], %[[COND44]]
-// IR-NEXT:    br i1 %[[CMP45]], label %[[FOR_BODY46:.+]], label %[[FOR_END:.+]]
-// IR-EMPTY:
-// IR-NEXT:  [[FOR_BODY46]]:
-// IR-NEXT:    %[[TMP36:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_]], align 4
-// IR-NEXT:    %[[TMP37:.+]] = load i32, ptr %[[DOTTILE_0_IV_I]], align 4
-// IR-NEXT:    %[[TMP38:.+]] = load i32, ptr %[[DOTCAPTURE_EXPR_2]], align 4
-// IR-NEXT:    %[[MUL47:.+]] = mul i32 %[[TMP37]], %[[TMP38]]
-// IR-NEXT:    %[[ADD48:.+]] = add i32 %[[TMP36]], %[[MUL47]]
-// IR-NEXT:    store i32 %[[ADD48]], ptr %[[I]], align 4
-// IR-NEXT:    %[[TMP39:.+]] = load i32, ptr %[[START_ADDR]], align 4
-// IR-NEXT:    %[[TMP40:.+]] = load i32, ptr %[[END_ADDR]], align 4
-// IR-NEXT:    %[[TMP41:.+]] = load i32, ptr %[[STEP_ADDR]], align 4
-// IR-NEXT:    %[[TMP42:.+]] = load i32, ptr %[[I]], align 4
-// IR-NEXT:    call void (...) @body(i32 noundef %[[TMP39]], i32 noundef %[[TMP40]], i32 noundef %[[TMP41]], i32 noundef %[[TMP42]])
-// IR-NEXT:    br label %[[FOR_INC:.+]]
-// IR-EMPTY:
-// IR-NEXT:  [[FOR_INC]]:
-// IR-NEXT:    %[[TMP43:.+]] = load i32, ptr %[[DOTTILE_0_IV_I]], align 4
-// IR-NEXT:    %[[INC:.+]] = add nsw i32 %[[TMP43]], 1
-// IR-NEXT:    store i32 %[[INC]], ptr %[[DOTTILE_0_IV_I]], align 4
-// IR-NEXT:    br label %[[FOR_COND35]], !llvm.loop ![[LOOP2:[0-9]+]]
-// IR-EMPTY:
-// IR-NEXT:  [[FOR_END]]:
-// IR-NEXT:    br label %[[FOR_INC49:.+]]
-// IR-EMPTY:
-// IR-NEXT:  [[FOR_INC49]]:
-// IR-NEXT:    %[[TMP44:.+]] = load i32, ptr %[[DOTTILE_0_IV__FLOOR_0_IV_I]], align 4
-// IR-NEXT:    %[[INC50:.+]] = add i32 %[[TMP44]], 1
-// IR-NEXT:    store i32 %[[INC50]], ptr %[[DOTTILE_0_IV__FLOOR_0_IV_I]], align 4
-// IR-NEXT:    br label %[[FOR_COND]], !llvm.loop ![[LOOP4:[0-9]+]]
-// IR-EMPTY:
-// IR-NEXT:  [[FOR_END51]]:
-// IR-NEXT:    br label %[[OMP_BODY_CONTINUE:.+]]
-// IR-EMPTY:
-// IR-NEXT:  [[OMP_BODY_CONTINUE]]:
-// IR-NEXT:    br label %[[OMP_INNER_FOR_INC:.+]]
-// IR-EMPTY:
-// IR-NEXT:  [[OMP_INNER_FOR_INC]]:
-// IR-NEXT:    %[[TMP45:.+]] = load i32, ptr %[[DOTOMP_IV]], align 4
-// IR-NEXT:    %[[ADD52:.+]] = add i32 %[[TMP45]], 1
-// IR-NEXT:    store i32 %[[ADD52]], ptr %[[DOTOMP_IV]], align 4
-// IR-NEXT:    br label %[[OMP_INNER_FOR_COND]]
-// IR-EMPTY:
-// IR-NEXT:  [[OMP_INNER_FOR_END]]:
-// IR-NEXT:    br label %[[OMP_LOOP_EXIT:.+]]
-// IR-EMPTY:
-// IR-NEXT:  [[OMP_LOOP_EXIT]]:
-// IR-NEXT:    call void @__kmpc_for_static_fini(ptr @1, i32 %[[TMP0]])
-// IR-NEXT:    br label %[[OMP_PRECOND_END]]
-// IR-EMPTY:
-// IR-NEXT:  [[OMP_PRECOND_END]]:
-// IR-NEXT:    call void @__kmpc_barrier(ptr @3, i32 %[[TMP0]])
-// IR-NEXT:    ret void
-// IR-NEXT:  }
 extern "C" void func(int start, int end, int step) {
 #pragma omp for
 #pragma omp tile sizes(3)
@@ -246,8 +26,211 @@ extern "C" void func(int start, int end, int step) {
 }
 
 #endif /* HEADER */
-// IR: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-// IR: ![[META1:[0-9]+]] = !{!"{{[^"]*}}"}
-// IR: ![[LOOP2]] = distinct !{![[LOOP2]], ![[LOOPPROP3:[0-9]+]]}
-// IR: ![[LOOPPROP3]] = !{!"llvm.loop.mustprogress"}
-// IR: ![[LOOP4]] = distinct !{![[LOOP4]], ![[LOOPPROP3]]}
+// IR-LABEL: define dso_local void @body(
+// IR-SAME: ...) #[[ATTR0:[0-9]+]] {
+// IR-NEXT:  entry:
+// IR-NEXT:    ret void
+//
+//
+// IR-LABEL: define dso_local void @func(
+// IR-SAME: i32 noundef [[START:%.*]], i32 noundef [[END:%.*]], i32 noundef [[STEP:%.*]]) #[[ATTR0]] {
+// IR-NEXT:  entry:
+// IR-NEXT:    [[START_ADDR:%.*]] = alloca i32, align 4
+// IR-NEXT:    [[END_ADDR:%.*]] = alloca i32, align 4
+// IR-NEXT:    [[STEP_ADDR:%.*]] = alloca i32, align 4
+// IR-NEXT:    [[DOTOMP_IV:%.*]] = alloca i32, align 4
+// IR-NEXT:    [[TMP:%.*]] = alloca i32, align 4
+// IR-NEXT:    [[I:%.*]] = alloca i32, align 4
+// IR-NEXT:    [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
+// IR-NEXT:    [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// IR-NEXT:    [[DOTNEW_STEP:%.*]] = alloca i32, align 4
+// IR-NEXT:    [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
+// IR-NEXT:    [[DOTFLOOR_0_IV_I:%.*]] = alloca i32, align 4
+// IR-NEXT:    [[DOTCAPTURE_EXPR_5:%.*]] = alloca i32, align 4
+// IR-NEXT:    [[DOTCAPTURE_EXPR_7:%.*]] = alloca i32, align 4
+// IR-NEXT:    [[DOTCAPTURE_EXPR_11:%.*]] = alloca i32, align 4
+// IR-NEXT:    [[DOTCAPTURE_EXPR_13:%.*]] = alloca i32, align 4
+// IR-NEXT:    [[DOTFLOOR_0_IV__FLOOR_0_IV_I:%.*]] = alloca i32, align 4
+// IR-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
+// IR-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
+// IR-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
+// IR-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// IR-NEXT:    [[DOTFLOOR_0_IV__FLOOR_0_IV_I17:%.*]] = alloca i32, align 4
+// IR-NEXT:    [[DOTTILE_0_IV__FLOOR_0_IV_I:%.*]] = alloca i32, align 4
+// IR-NEXT:    [[DOTTILE_0_IV_I:%.*]] = alloca i32, align 4
+// IR-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
+// IR-NEXT:    store i32 [[START]], ptr [[START_ADDR]], align 4
+// IR-NEXT:    store i32 [[END]], ptr [[END_ADDR]], align 4
+// IR-NEXT:    store i32 [[STEP]], ptr [[STEP_ADDR]], align 4
+// IR-NEXT:    [[TMP1:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// IR-NEXT:    store i32 [[TMP1]], ptr [[I]], align 4
+// IR-NEXT:    [[TMP2:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// IR-NEXT:    store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT:    [[TMP3:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// IR-NEXT:    store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_1]], align 4
+// IR-NEXT:    [[TMP4:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// IR-NEXT:    store i32 [[TMP4]], ptr [[DOTNEW_STEP]], align 4
+// IR-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
+// IR-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT:    [[SUB:%.*]] = sub i32 [[TMP5]], [[TMP6]]
+// IR-NEXT:    [[SUB3:%.*]] = sub i32 [[SUB]], 1
+// IR-NEXT:    [[TMP7:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// IR-NEXT:    [[ADD:%.*]] = add i32 [[SUB3]], [[TMP7]]
+// IR-NEXT:    [[TMP8:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// IR-NEXT:    [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP8]]
+// IR-NEXT:    [[SUB4:%.*]] = sub i32 [[DIV]], 1
+// IR-NEXT:    store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4
+// IR-NEXT:    store i32 0, ptr [[DOTFLOOR_0_IV_I]], align 4
+// IR-NEXT:    [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// IR-NEXT:    [[ADD6:%.*]] = add i32 [[TMP9]], 1
+// IR-NEXT:    store i32 [[ADD6]], ptr [[DOTCAPTURE_EXPR_5]], align 4
+// IR-NEXT:    [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_5]], align 4
+// IR-NEXT:    [[SUB8:%.*]] = sub i32 [[TMP10]], -3
+// IR-NEXT:    [[DIV9:%.*]] = udiv i32 [[SUB8]], 4
+// IR-NEXT:    [[SUB10:%.*]] = sub i32 [[DIV9]], 1
+// IR-NEXT:    store i32 [[SUB10]], ptr [[DOTCAPTURE_EXPR_7]], align 4
+// IR-NEXT:    [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4
+// IR-NEXT:    [[ADD12:%.*]] = add i32 [[TMP11]], 1
+// IR-NEXT:    store i32 [[ADD12]], ptr [[DOTCAPTURE_EXPR_11]], align 4
+// IR-NEXT:    [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_11]], align 4
+// IR-NEXT:    [[SUB14:%.*]] = sub i32 [[TMP12]], -2
+// IR-NEXT:    [[DIV15:%.*]] = udiv i32 [[SUB14]], 3
+// IR-NEXT:    [[SUB16:%.*]] = sub i32 [[DIV15]], 1
+// IR-NEXT:    store i32 [[SUB16]], ptr [[DOTCAPTURE_EXPR_13]], align 4
+// IR-NEXT:    store i32 0, ptr [[DOTFLOOR_0_IV__FLOOR_0_IV_I]], align 4
+// IR-NEXT:    [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_11]], align 4
+// IR-NEXT:    [[CMP:%.*]] = icmp ult i32 0, [[TMP13]]
+// IR-NEXT:    br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
+// IR:       omp.precond.then:
+// IR-NEXT:    store i32 0, ptr [[DOTOMP_LB]], align 4
+// IR-NEXT:    [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_13]], align 4
+// IR-NEXT:    store i32 [[TMP14]], ptr [[DOTOMP_UB]], align 4
+// IR-NEXT:    store i32 1, ptr [[DOTOMP_STRIDE]], align 4
+// IR-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// IR-NEXT:    call void @__kmpc_for_static_init_4u(ptr @[[GLOB1:[0-9]+]], i32 [[TMP0]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1)
+// IR-NEXT:    [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// IR-NEXT:    [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_13]], align 4
+// IR-NEXT:    [[CMP18:%.*]] = icmp ugt i32 [[TMP15]], [[TMP16]]
+// IR-NEXT:    br i1 [[CMP18]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// IR:       cond.true:
+// IR-NEXT:    [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_13]], align 4
+// IR-NEXT:    br label [[COND_END:%.*]]
+// IR:       cond.false:
+// IR-NEXT:    [[TMP18:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// IR-NEXT:    br label [[COND_END]]
+// IR:       cond.end:
+// IR-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP17]], [[COND_TRUE]] ], [ [[TMP18]], [[COND_FALSE]] ]
+// IR-NEXT:    store i32 [[COND]], ptr [[DOTOMP_UB]], align 4
+// IR-NEXT:    [[TMP19:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4
+// IR-NEXT:    store i32 [[TMP19]], ptr [[DOTOMP_IV]], align 4
+// IR-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
+// IR:       omp.inner.for.cond:
+// IR-NEXT:    [[TMP20:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// IR-NEXT:    [[TMP21:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
+// IR-NEXT:    [[ADD19:%.*]] = add i32 [[TMP21]], 1
+// IR-NEXT:    [[CMP20:%.*]] = icmp ult i32 [[TMP20]], [[ADD19]]
+// IR-NEXT:    br i1 [[CMP20]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
+// IR:       omp.inner.for.body:
+// IR-NEXT:    [[TMP22:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// IR-NEXT:    [[MUL:%.*]] = mul i32 [[TMP22]], 3
+// IR-NEXT:    [[ADD21:%.*]] = add i32 0, [[MUL]]
+// IR-NEXT:    store i32 [[ADD21]], ptr [[DOTFLOOR_0_IV__FLOOR_0_IV_I17]], align 4
+// IR-NEXT:    [[TMP23:%.*]] = load i32, ptr [[DOTFLOOR_0_IV__FLOOR_0_IV_I17]], align 4
+// IR-NEXT:    store i32 [[TMP23]], ptr [[DOTTILE_0_IV__FLOOR_0_IV_I]], align 4
+// IR-NEXT:    br label [[FOR_COND:%.*]]
+// IR:       for.cond:
+// IR-NEXT:    [[TMP24:%.*]] = load i32, ptr [[DOTTILE_0_IV__FLOOR_0_IV_I]], align 4
+// IR-NEXT:    [[TMP25:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4
+// IR-NEXT:    [[ADD22:%.*]] = add i32 [[TMP25]], 1
+// IR-NEXT:    [[TMP26:%.*]] = load i32, ptr [[DOTFLOOR_0_IV__FLOOR_0_IV_I17]], align 4
+// IR-NEXT:    [[ADD23:%.*]] = add i32 [[TMP26]], 3
+// IR-NEXT:    [[CMP24:%.*]] = icmp ult i32 [[ADD22]], [[ADD23]]
+// IR-NEXT:    br i1 [[CMP24]], label [[COND_TRUE25:%.*]], label [[COND_FALSE27:%.*]]
+// IR:       cond.true25:
+// IR-NEXT:    [[TMP27:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_7]], align 4
+// IR-NEXT:    [[ADD26:%.*]] = add i32 [[TMP27]], 1
+// IR-NEXT:    br label [[COND_END29:%.*]]
+// IR:       cond.false27:
+// IR-NEXT:    [[TMP28:%.*]] = load i32, ptr [[DOTFLOOR_0_IV__FLOOR_0_IV_I17]], align 4
+// IR-NEXT:    [[ADD28:%.*]] = add i32 [[TMP28]], 3
+// IR-NEXT:    br label [[COND_END29]]
+// IR:       cond.end29:
+// IR-NEXT:    [[COND30:%.*]] = phi i32 [ [[ADD26]], [[COND_TRUE25]] ], [ [[ADD28]], [[COND_FALSE27]] ]
+// IR-NEXT:    [[CMP31:%.*]] = icmp ult i32 [[TMP24]], [[COND30]]
+// IR-NEXT:    br i1 [[CMP31]], label [[FOR_BODY:%.*]], label [[FOR_END50:%.*]]
+// IR:       for.body:
+// IR-NEXT:    [[TMP29:%.*]] = load i32, ptr [[DOTTILE_0_IV__FLOOR_0_IV_I]], align 4
+// IR-NEXT:    [[MUL32:%.*]] = mul i32 [[TMP29]], 4
+// IR-NEXT:    [[ADD33:%.*]] = add i32 0, [[MUL32]]
+// IR-NEXT:    store i32 [[ADD33]], ptr [[DOTFLOOR_0_IV_I]], align 4
+// IR-NEXT:    [[TMP30:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4
+// IR-NEXT:    store i32 [[TMP30]], ptr [[DOTTILE_0_IV_I]], align 4
+// IR-NEXT:    br label [[FOR_COND34:%.*]]
+// IR:       for.cond34:
+// IR-NEXT:    [[TMP31:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4
+// IR-NEXT:    [[TMP32:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// IR-NEXT:    [[ADD35:%.*]] = add i32 [[TMP32]], 1
+// IR-NEXT:    [[TMP33:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4
+// IR-NEXT:    [[ADD36:%.*]] = add i32 [[TMP33]], 4
+// IR-NEXT:    [[CMP37:%.*]] = icmp ult i32 [[ADD35]], [[ADD36]]
+// IR-NEXT:    br i1 [[CMP37]], label [[COND_TRUE38:%.*]], label [[COND_FALSE40:%.*]]
+// IR:       cond.true38:
+// IR-NEXT:    [[TMP34:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// IR-NEXT:    [[ADD39:%.*]] = add i32 [[TMP34]], 1
+// IR-NEXT:    br label [[COND_END42:%.*]]
+// IR:       cond.false40:
+// IR-NEXT:    [[TMP35:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4
+// IR-NEXT:    [[ADD41:%.*]] = add i32 [[TMP35]], 4
+// IR-NEXT:    br label [[COND_END42]]
+// IR:       cond.end42:
+// IR-NEXT:    [[COND43:%.*]] = phi i32 [ [[ADD39]], [[COND_TRUE38]] ], [ [[ADD41]], [[COND_FALSE40]] ]
+// IR-NEXT:    [[CMP44:%.*]] = icmp ult i32 [[TMP31]], [[COND43]]
+// IR-NEXT:    br i1 [[CMP44]], label [[FOR_BODY45:%.*]], label [[FOR_END:%.*]]
+// IR:       for.body45:
+// IR-NEXT:    [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
+// IR-NEXT:    [[TMP37:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4
+// IR-NEXT:    [[TMP38:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// IR-NEXT:    [[MUL46:%.*]] = mul i32 [[TMP37]], [[TMP38]]
+// IR-NEXT:    [[ADD47:%.*]] = add i32 [[TMP36]], [[MUL46]]
+// IR-NEXT:    store i32 [[ADD47]], ptr [[I]], align 4
+// IR-NEXT:    [[TMP39:%.*]] = load i32, ptr [[START_ADDR]], align 4
+// IR-NEXT:    [[TMP40:%.*]] = load i32, ptr [[END_ADDR]], align 4
+// IR-NEXT:    [[TMP41:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
+// IR-NEXT:    [[TMP42:%.*]] = load i32, ptr [[I]], align 4
+// IR-NEXT:    call void (...) @body(i32 noundef [[TMP39]], i32 noundef [[TMP40]], i32 noundef [[TMP41]], i32 noundef [[TMP42]])
+// IR-NEXT:    br label [[FOR_INC:%.*]]
+// IR:       for.inc:
+// IR-NEXT:    [[TMP43:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4
+// IR-NEXT:    [[INC:%.*]] = add i32 [[TMP43]], 1
+// IR-NEXT:    store i32 [[INC]], ptr [[DOTTILE_0_IV_I]], align 4
+// IR-NEXT:    br label [[FOR_COND34]], !llvm.loop [[LOOP3:![0-9]+]]
+// IR:       for.end:
+// IR-NEXT:    br label [[FOR_INC48:%.*]]
+// IR:       for.inc48:
+// IR-NEXT:    [[TMP44:%.*]] = load i32, ptr [[DOTTILE_0_IV__FLOOR_0_IV_I]], align 4
+// IR-NEXT:    [[INC49:%.*]] = add i32 [[TMP44]], 1
+// IR-NEXT:    store i32 [[INC49]], ptr [[DOTTILE_0_IV__FLOOR_0_IV_I]], align 4
+// IR-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
+// IR:       for.end50:
+// IR-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
+// IR:       omp.body.continue:
+// IR-NEXT:    br label [[OMP_INNER_FOR_INC:%.*]]
+// IR:       omp.inner.for.inc:
+// IR-NEXT:    [[TMP45:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4
+// IR-NEXT:    [[ADD51:%.*]] = add i32 [[TMP45]], 1
+// IR-NEXT:    store i32 [[ADD51]], ptr [[DOTOMP_IV]], align 4
+// IR-NEXT:    br label [[OMP_INNER_FOR_COND]]
+// IR:       omp.inner.for.end:
+// IR-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
+// IR:       omp.loop.exit:
+// IR-NEXT:    call void @__kmpc_for_static_fini(ptr @[[GLOB1]], i32 [[TMP0]])
+// IR-NEXT:    br label [[OMP_PRECOND_END]]
+// IR:       omp.precond.end:
+// IR-NEXT:    call void @__kmpc_barrier(ptr @[[GLOB3:[0-9]+]], i32 [[TMP0]])
+// IR-NEXT:    ret void
+//
+//.
+// IR: [[LOOP3]] = distinct !{[[LOOP3]], [[META4:![0-9]+]]}
+// IR: [[META4]] = !{!"llvm.loop.mustprogress"}
+// IR: [[LOOP5]] = distinct !{[[LOOP5]], [[META4]]}
+//.



More information about the cfe-commits mailing list