[Mlir-commits] [clang] [llvm] [mlir] [openmp] [LoopTiling][Clang][MLIR] Canonical Intra-tile Loops (PR #191114)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Wed Apr 15 05:27:43 PDT 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mlir-llvm
Author: Amit Tiwari (amitamd7)
<details>
<summary>Changes</summary>
This PR canonicalizes the Intra-tile in Loop Tiling.
---
Patch is 672.74 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/191114.diff
23 Files Affected:
- (modified) clang/lib/Sema/SemaOpenMP.cpp (+121-46)
- (modified) clang/test/OpenMP/interchange_codegen.cpp (+1734-2449)
- (modified) clang/test/OpenMP/irbuilder_unroll_partial_factor_for.c (+34-33)
- (modified) clang/test/OpenMP/irbuilder_unroll_partial_heuristic_constant_for.c (+44-43)
- (modified) clang/test/OpenMP/irbuilder_unroll_partial_heuristic_runtime_for.c (+45-44)
- (modified) clang/test/OpenMP/irbuilder_unroll_unroll_partial_factor.c (+33-32)
- (modified) clang/test/OpenMP/irbuilder_unroll_unroll_partial_heuristic.c (+34-33)
- (modified) clang/test/OpenMP/tile_codegen.cpp (+1099-1405)
- (modified) clang/test/OpenMP/tile_codegen_for_dependent.cpp (+146-162)
- (modified) clang/test/OpenMP/tile_codegen_tile_for.cpp (+193-224)
- (modified) clang/test/OpenMP/tile_messages.cpp (+1-1)
- (added) clang/test/OpenMP/tile_rect_codegen.cpp (+50)
- (added) clang/test/OpenMP/tile_rect_codegen_ir.cpp (+84)
- (modified) clang/test/OpenMP/unroll_codegen_tile_for.cpp (+190-214)
- (modified) llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp (+39-25)
- (modified) llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp (+30-5)
- (modified) mlir/test/Target/LLVMIR/openmp-cli-tile01.mlir (+10-8)
- (modified) mlir/test/Target/LLVMIR/openmp-cli-tile02.mlir (+16-14)
- (modified) mlir/test/Target/LLVMIR/openmp-cli-tile03.mlir (+63-57)
- (modified) openmp/runtime/test/transform/tile/foreach.cpp (+36)
- (modified) openmp/runtime/test/transform/tile/intfor.c (+39-39)
- (modified) openmp/runtime/test/transform/tile/iterfor.cpp (+27)
- (modified) openmp/runtime/test/transform/tile/parallel-wsloop-collapse-foreach.cpp (+108)
``````````diff
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index fada37ba45755..0aece2f027fe3 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -14957,8 +14957,10 @@ StmtResult SemaOpenMP::ActOnOpenMPTileDirective(ArrayRef<OMPClause *> Clauses,
// Create iteration variables for the generated loops.
SmallVector<VarDecl *, 4> FloorIndVars;
SmallVector<VarDecl *, 4> TileIndVars;
+ SmallVector<VarDecl *, 4> TileCntVars;
FloorIndVars.resize(NumLoops);
TileIndVars.resize(NumLoops);
+ TileCntVars.resize(NumLoops);
for (unsigned I = 0; I < NumLoops; ++I) {
OMPLoopBasedDirective::HelperExprs &LoopHelper = LoopHelpers[I];
@@ -14978,27 +14980,101 @@ StmtResult SemaOpenMP::ActOnOpenMPTileDirective(ArrayRef<OMPClause *> Clauses,
FloorIndVars[I] = FloorCntDecl;
}
- // Iteration variable for the tile (i.e. inner) loop.
+ // Logical iteration variable for the tile loop. Retains the meaning of
+ // the original logical iteration number (floor_iv + tile_cnt) so that
+ // LoopHelper.Updates can derive the original loop variable unchanged.
{
- std::string TileCntName =
+ std::string TileIVName =
(Twine(".tile_") + llvm::utostr(I) + ".iv." + OrigVarName).str();
- // Reuse the iteration variable created by checkOpenMPLoop. It is also
- // used by the expressions to derive the original iteration variable's
- // value from the logical iteration number.
- auto *TileCntDecl = cast<VarDecl>(IterVarRef->getDecl());
- TileCntDecl->setDeclName(
- &SemaRef.PP.getIdentifierTable().get(TileCntName));
- TileIndVars[I] = TileCntDecl;
+ auto *TileIVDecl = cast<VarDecl>(IterVarRef->getDecl());
+ TileIVDecl->setDeclName(&SemaRef.PP.getIdentifierTable().get(TileIVName));
+ TileIndVars[I] = TileIVDecl;
+ }
+
+ // Loop counter for the rectangular tile loop [0, TileSize).
+ {
+ std::string TileCntName =
+ (Twine(".tile.cnt.") + llvm::utostr(I) + ".iv." + OrigVarName).str();
+ VarDecl *TileCntDecl =
+ buildVarDecl(SemaRef, {}, CntTy, TileCntName, nullptr, OrigCntVar);
+ TileCntVars[I] = TileCntDecl;
}
addLoopPreInits(Context, LoopHelper, LoopStmts[I], OriginalInits[I],
PreInits);
+
+ // Declare the logical tile IV in PreInits so it is in scope for the
+ // entire loop nest (it will be assigned in each tile loop body).
+ Decl *TileIVDeclPtr = TileIndVars[I];
+ PreInits.push_back(new (Context) DeclStmt(
+ DeclGroupRef::Create(Context, &TileIVDeclPtr, 1), {}, {}));
}
// Once the original iteration values are set, append the innermost body.
Stmt *Inner = Body;
+ // Build a combined validity predicate that guards the innermost body.
+ // For each tiled dimension, check that the logical iteration number
+ // (.tile.iv) is within the original trip count. This is required because the
+ // tile loop now has rectangular (constant) bounds and may overshoot on the
+ // remainder tile. The predicate is: .tile.iv.0 < N0 && .tile.iv.1 < N1 ...
+ //
+ // Optimization: if every dimension's trip count is a compile-time constant
+ // that is evenly divisible by the corresponding tile size (also a constant),
+ // then the remainder tile is empty and the predicate is trivially true.
+ {
+ bool PredicateNeeded = false;
+ for (unsigned I = 0; I < NumLoops; ++I) {
+ Expr *TSExpr = SizesClause->getSizesRefs()[I];
+ Expr *NExpr = LoopHelpers[I].NumIterations;
+ llvm::APSInt TileVal, TripVal;
+ bool TSConst =
+ !TSExpr->containsErrors() && TSExpr->isIntegerConstantExpr(Context);
+ bool NConst = NExpr->isIntegerConstantExpr(Context);
+ if (TSConst && NConst) {
+ Expr::EvalResult TSResult;
+ TSExpr->EvaluateAsInt(TSResult, Context);
+ TileVal = TSResult.Val.getInt();
+ Expr::EvalResult NResult;
+ NExpr->EvaluateAsInt(NResult, Context);
+ TripVal = NResult.Val.getInt();
+ if (TileVal.isStrictlyPositive() && (TripVal.srem(TileVal)).isZero())
+ continue;
+ }
+ PredicateNeeded = true;
+ break;
+ }
+
+ if (PredicateNeeded) {
+ Expr *CombinedPred = nullptr;
+ for (unsigned I = 0; I < NumLoops; ++I) {
+ auto *OrigCntVar = cast<DeclRefExpr>(LoopHelpers[I].Counters[0]);
+ QualType IVTy = LoopHelpers[I].NumIterations->getType();
+ Expr *TileIVRef = buildDeclRefExpr(SemaRef, TileIndVars[I], IVTy,
+ OrigCntVar->getExprLoc());
+ ExprResult DimPred =
+ SemaRef.BuildBinOp(CurScope, OrigCntVar->getExprLoc(), BO_LT,
+ TileIVRef, LoopHelpers[I].NumIterations);
+ if (!DimPred.isUsable())
+ return StmtError();
+ if (CombinedPred) {
+ ExprResult Combined =
+ SemaRef.BuildBinOp(CurScope, OrigCntVar->getExprLoc(), BO_LAnd,
+ CombinedPred, DimPred.get());
+ if (!Combined.isUsable())
+ return StmtError();
+ CombinedPred = Combined.get();
+ } else {
+ CombinedPred = DimPred.get();
+ }
+ }
+ Inner = IfStmt::Create(
+ Context, SourceLocation(), IfStatementKind::Ordinary, nullptr,
+ nullptr, CombinedPred, SourceLocation(), SourceLocation(), Inner);
+ }
+ }
+
auto MakeDimTileSize = [&SemaRef = this->SemaRef, &CopyTransformer, &Context,
SizesClause, CurScope](int I) -> Expr * {
Expr *DimTileSizeExpr = SizesClause->getSizesRefs()[I];
@@ -15006,7 +15082,7 @@ StmtResult SemaOpenMP::ActOnOpenMPTileDirective(ArrayRef<OMPClause *> Clauses,
if (DimTileSizeExpr->containsErrors())
return nullptr;
- if (isa<ConstantExpr>(DimTileSizeExpr))
+ if (DimTileSizeExpr->isIntegerConstantExpr(Context))
return AssertSuccess(CopyTransformer.TransformExpr(DimTileSizeExpr));
// When the tile size is not a constant but a variable, it is possible to
@@ -15042,6 +15118,9 @@ StmtResult SemaOpenMP::ActOnOpenMPTileDirective(ArrayRef<OMPClause *> Clauses,
};
// Create tile loops from the inside to the outside.
+ // Each tile loop uses .tile.cnt as its counter with rectangular bounds
+ // [0, TileSize), and computes .tile.iv = .floor.iv + .tile.cnt to set
+ // the logical iteration number for LoopHelper.Updates.
for (int I = NumLoops - 1; I >= 0; --I) {
OMPLoopBasedDirective::HelperExprs &LoopHelper = LoopHelpers[I];
Expr *NumIterations = LoopHelper.NumIterations;
@@ -15052,70 +15131,65 @@ StmtResult SemaOpenMP::ActOnOpenMPTileDirective(ArrayRef<OMPClause *> Clauses,
// Commonly used variables. One of the constraints of an AST is that every
// node object must appear at most once, hence we define a lambda that
// creates a new AST node at every use.
+ auto MakeTileCntRef = [&SemaRef = this->SemaRef, &TileCntVars, I, IVTy,
+ OrigCntVar]() {
+ return buildDeclRefExpr(SemaRef, TileCntVars[I], IVTy,
+ OrigCntVar->getExprLoc());
+ };
auto MakeTileIVRef = [&SemaRef = this->SemaRef, &TileIndVars, I, IVTy,
OrigCntVar]() {
return buildDeclRefExpr(SemaRef, TileIndVars[I], IVTy,
OrigCntVar->getExprLoc());
};
- // For init-statement: auto .tile.iv = .floor.iv
+ // For init-statement: auto .tile.cnt = 0
SemaRef.AddInitializerToDecl(
- TileIndVars[I],
- SemaRef
- .DefaultLvalueConversion(
- makeFloorIVRef(SemaRef, FloorIndVars, I, IVTy, OrigCntVar))
- .get(),
+ TileCntVars[I],
+ SemaRef.ActOnIntegerConstant(LoopHelper.Init->getExprLoc(), 0).get(),
/*DirectInit=*/false);
- Decl *CounterDecl = TileIndVars[I];
+ Decl *CounterDecl = TileCntVars[I];
StmtResult InitStmt = new (Context)
DeclStmt(DeclGroupRef::Create(Context, &CounterDecl, 1),
OrigCntVar->getBeginLoc(), OrigCntVar->getEndLoc());
if (!InitStmt.isUsable())
return StmtError();
- // For cond-expression:
- // .tile.iv < min(.floor.iv + DimTileSize, NumIterations)
+ // For cond-expression: .tile.cnt < DimTileSize (rectangular bound)
Expr *DimTileSize = MakeDimTileSize(I);
if (!DimTileSize)
return StmtError();
- ExprResult EndOfTile = SemaRef.BuildBinOp(
- CurScope, LoopHelper.Cond->getExprLoc(), BO_Add,
- makeFloorIVRef(SemaRef, FloorIndVars, I, IVTy, OrigCntVar),
- DimTileSize);
- if (!EndOfTile.isUsable())
- return StmtError();
- ExprResult IsPartialTile =
- SemaRef.BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), BO_LT,
- NumIterations, EndOfTile.get());
- if (!IsPartialTile.isUsable())
- return StmtError();
- ExprResult MinTileAndIterSpace = SemaRef.ActOnConditionalOp(
- LoopHelper.Cond->getBeginLoc(), LoopHelper.Cond->getEndLoc(),
- IsPartialTile.get(), NumIterations, EndOfTile.get());
- if (!MinTileAndIterSpace.isUsable())
- return StmtError();
ExprResult CondExpr =
SemaRef.BuildBinOp(CurScope, LoopHelper.Cond->getExprLoc(), BO_LT,
- MakeTileIVRef(), MinTileAndIterSpace.get());
+ MakeTileCntRef(), DimTileSize);
if (!CondExpr.isUsable())
return StmtError();
- // For incr-statement: ++.tile.iv
+ // For incr-statement: ++.tile.cnt
ExprResult IncrStmt = SemaRef.BuildUnaryOp(
- CurScope, LoopHelper.Inc->getExprLoc(), UO_PreInc, MakeTileIVRef());
+ CurScope, LoopHelper.Inc->getExprLoc(), UO_PreInc, MakeTileCntRef());
if (!IncrStmt.isUsable())
return StmtError();
- // Statements to set the original iteration variable's value from the
- // logical iteration number.
+ // Compute the logical iteration number:
+ // .tile.iv = .floor.iv + .tile.cnt
+ ExprResult FloorPlusCnt = SemaRef.BuildBinOp(
+ CurScope, OrigCntVar->getExprLoc(), BO_Add,
+ makeFloorIVRef(SemaRef, FloorIndVars, I, IVTy, OrigCntVar),
+ MakeTileCntRef());
+ if (!FloorPlusCnt.isUsable())
+ return StmtError();
+ ExprResult TileIVAssign =
+ SemaRef.BuildBinOp(CurScope, OrigCntVar->getExprLoc(), BO_Assign,
+ MakeTileIVRef(), FloorPlusCnt.get());
+ if (!TileIVAssign.isUsable())
+ return StmtError();
+
// Generated for loop is:
// \code
- // Original_for_init;
- // for (auto .tile.iv = .floor.iv;
- // .tile.iv < min(.floor.iv + DimTileSize, NumIterations);
- // ++.tile.iv) {
- // Original_Body;
- // Original_counter_update;
+ // for (auto .tile.cnt = 0; .tile.cnt < DimTileSize; ++.tile.cnt) {
+ // .tile.iv = .floor.iv + .tile.cnt;
+ // Original_counter_update; // derives orig var from .tile.iv
+ // Inner; // predicated body or inner tile loops
// }
// \endcode
// FIXME: If the innermost body is an loop itself, inserting these
@@ -15123,6 +15197,7 @@ StmtResult SemaOpenMP::ActOnOpenMPTileDirective(ArrayRef<OMPClause *> Clauses,
// for applying tiling again). If this is the case, sink the expressions
// further into the inner loop.
SmallVector<Stmt *, 4> BodyParts;
+ BodyParts.push_back(TileIVAssign.get());
BodyParts.append(LoopHelper.Updates.begin(), LoopHelper.Updates.end());
if (auto *SourceCXXFor = dyn_cast<CXXForRangeStmt>(LoopStmt))
BodyParts.push_back(SourceCXXFor->getLoopVarStmt());
diff --git a/clang/test/OpenMP/interchange_codegen.cpp b/clang/test/OpenMP/interchange_codegen.cpp
index 8e833c9df324c..b062d42c9f162 100644
--- a/clang/test/OpenMP/interchange_codegen.cpp
+++ b/clang/test/OpenMP/interchange_codegen.cpp
@@ -123,6 +123,7 @@ extern "C" void foo10() {
#endif /* HEADER */
+
// CHECK1-LABEL: define {{[^@]+}}@body
// CHECK1-SAME: (...) #[[ATTR0:[0-9]+]] {
// CHECK1-NEXT: entry:
@@ -156,7 +157,7 @@ extern "C" void foo10() {
// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4
// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], [[TMP4]]
// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4
-// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
+// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]]
// CHECK1: for.end:
// CHECK1-NEXT: ret void
//
@@ -262,14 +263,14 @@ extern "C" void foo10() {
// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTPERMUTED_1_IV_I]], align 4
// CHECK1-NEXT: [[INC:%.*]] = add i32 [[TMP28]], 1
// CHECK1-NEXT: store i32 [[INC]], ptr [[DOTPERMUTED_1_IV_I]], align 4
-// CHECK1-NEXT: br label [[FOR_COND16]], !llvm.loop [[LOOP5:![0-9]+]]
+// CHECK1-NEXT: br label [[FOR_COND16]], !llvm.loop [[LOOP4:![0-9]+]]
// CHECK1: for.end:
// CHECK1-NEXT: br label [[FOR_INC22:%.*]]
// CHECK1: for.inc22:
// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTPERMUTED_0_IV_J]], align 4
// CHECK1-NEXT: [[INC23:%.*]] = add i32 [[TMP29]], 1
// CHECK1-NEXT: store i32 [[INC23]], ptr [[DOTPERMUTED_0_IV_J]], align 4
-// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]]
+// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
// CHECK1: for.end24:
// CHECK1-NEXT: ret void
//
@@ -342,7 +343,7 @@ extern "C" void foo10() {
// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTPERMUTED_1_IV_I]], align 4
// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1
// CHECK1-NEXT: store i32 [[INC]], ptr [[DOTPERMUTED_1_IV_I]], align 4
-// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
+// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]]
// CHECK1: for.end:
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1: omp.body.continue:
@@ -439,7 +440,7 @@ extern "C" void foo10() {
// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTPERMUTED_1_IV_I]], align 4
// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP14]], 1
// CHECK1-NEXT: store i32 [[INC]], ptr [[DOTPERMUTED_1_IV_I]], align 4
-// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]]
+// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
// CHECK1: for.end:
// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
// CHECK1: omp.body.continue:
@@ -754,28 +755,28 @@ extern "C" void foo10() {
// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTPERMUTED_3_IV_I]], align 4
// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1
// CHECK1-NEXT: store i32 [[INC]], ptr [[DOTPERMUTED_3_IV_I]], align 4
-// CHECK1-NEXT: br label [[FOR_COND11]], !llvm.loop [[LOOP9:![0-9]+]]
+// CHECK1-NEXT: br label [[FOR_COND11]], !llvm.loop [[LOOP8:![0-9]+]]
// CHECK1: for.end:
// CHECK1-NEXT: br label [[FOR_INC16:%.*]]
// CHECK1: for.inc16:
// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTPERMUTED_2_IV_L]], align 4
// CHECK1-NEXT: [[INC17:%.*]] = add nsw i32 [[TMP13]], 1
// CHECK1-NEXT: store i32 [[INC17]], ptr [[DOTPERMUTED_2_IV_L]], align 4
-// CHECK1-NEXT: br label [[FOR_COND6]], !llvm.loop [[LOOP10:![0-9]+]]
+// CHECK1-NEXT: br label [[FOR_COND6]], !llvm.loop [[LOOP9:![0-9]+]]
// CHECK1: for.end18:
// CHECK1-NEXT: br label [[FOR_INC19:%.*]]
// CHECK1: for.inc19:
// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTPERMUTED_1_IV_K]], align 4
// CHECK1-NEXT: [[INC20:%.*]] = add nsw i32 [[TMP14]], 1
// CHECK1-NEXT: store i32 [[INC20]], ptr [[DOTPERMUTED_1_IV_K]], align 4
-// CHECK1-NEXT: br label [[FOR_COND1]], !llvm.loop [[LOOP11:![0-9]+]]
+// CHECK1-NEXT: br label [[FOR_COND1]], !llvm.loop [[LOOP10:![0-9]+]]
// CHECK1: for.end21:
// CHECK1-NEXT: br label [[FOR_INC22:%.*]]
// CHECK1: for.inc22:
// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTPERMUTED_0_IV_J]], align 4
// CHECK1-NEXT: [[INC23:%.*]] = add nsw i32 [[TMP15]], 1
// CHECK1-NEXT: store i32 [[INC23]], ptr [[DOTPERMUTED_0_IV_J]], align 4
-// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]]
+// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]]
// CHECK1: for.end24:
// CHECK1-NEXT: ret void
//
@@ -810,22 +811,21 @@ extern "C" void foo10() {
// CHECK1-NEXT: [[DOTCAPTURE_EXPR_9:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTNEW_STEP10:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTCAPTURE_EXPR_11:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTTILE_0_IV_K:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTCAPTURE_EXPR_14:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_16:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_17:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_22:%.*]] = alloca i64, align 8
+// CHECK1-NEXT: [[DOTCAPTURE_EXPR_16:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTFLOOR_0_IV_K:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTTILE_0_IV_K:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTTILE_CNT_0_IV_K:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8
// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[I49:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[J50:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTFLOOR_0_IV_K51:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTTILE_0_IV_K52:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[I35:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[J36:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTFLOOR_0_IV_K37:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTTILE_CNT_0_IV_K38:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]])
// CHECK1-NEXT: store i32 [[START]], ptr [[START_ADDR]], align 4
// CHECK1-NEXT: store i32 [[END]], ptr [[END_ADDR]], align 4
@@ -863,630 +863,452 @@ extern "C" void foo10() {
// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_11]], align 4
// CHECK1-NEXT: [[ADD15:%.*]] = add i32 [[TMP15]], 1
// CHECK1-NEXT: store i32 [[ADD15]], ptr [[DOTCAPTURE_EXPR_14]], align 4
-// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[_TMP2]], align 4
-// CHECK1-NEXT: store i32 [[TMP16]], ptr [[DOTCAPTURE_EXPR_16]], align 4
-// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK1-NEXT: [[ADD18:%.*]] = add i32 [[TMP17]], 1
-// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[_TMP2]], align 4
-// CHECK1-NEXT: [[ADD19:%.*]] = add i32 [[TMP18]], 32
-// CHECK1-NEXT: [[CMP:%.*]] = icmp ult i32 [[ADD18]], [[ADD19]]
-// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-// CHECK1: cond.true:
-// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_11]], align 4
-// CHECK1-NEXT: [[ADD20:%.*]] = add i32 [[TMP19]], 1
-// CHECK1-NEXT: br label [[COND_END:%.*]]
-// CHECK1: cond.false:
-// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[_TMP2]], align 4
-// CHECK1-NEXT: [[ADD21:%.*]] = add i32 [[TMP20]], 32
-// CHECK1-NEXT: br label [[COND_END]]
-// CHECK1: cond.end:
-// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[ADD20]], [[COND_TRUE]] ], [ [[ADD21]], [[COND_FALSE]] ]
-// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTCAPTURE_EXPR_17]], align 4
-// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_4]], align 4
-// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
-// CHECK1-NEXT: [[SUB23:%.*]] = sub i32 [[TMP21]], [[TMP22]]
-// CHECK1-NEXT: [[SUB24:%.*]] = sub i32 [[SUB23]], 1
-// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
-// CHECK1-NEXT: [[ADD25:%.*]] = add i32 [[SUB24]], [[TMP23]]
-// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
-// CHECK1-NEXT: [[DIV26:%.*]] = udiv i32 [[ADD25]], [[TMP24]]
-// CHECK1-NEXT: [[CONV:%.*]] = zext i32 [[DIV26]] to i64
-// CHECK1-NEXT: [[TMP25:%.*]] = load i32,...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/191114
More information about the Mlir-commits
mailing list