[Openmp-commits] [openmp] ddde069 - [OpenMP]Fix PR55970: Miscompile of collapse(3) with non-rectangular loop nest.
Alexey Bataev via Openmp-commits
openmp-commits at lists.llvm.org
Tue Feb 14 10:40:52 PST 2023
Author: Alexey Bataev
Date: 2023-02-14T10:39:04-08:00
New Revision: ddde06906be11ea540870d5c0e1b3336a4460612
URL: https://github.com/llvm/llvm-project/commit/ddde06906be11ea540870d5c0e1b3336a4460612
DIFF: https://github.com/llvm/llvm-project/commit/ddde06906be11ea540870d5c0e1b3336a4460612.diff
LOG: [OpenMP]Fix PR55970: Miscompile of collapse(3) with non-rectangular loop nest.
Need to assign the calculated lower bound back to temp variable,
otherwise incorrect value (upper bound instead of lower bound) might be
used.
Differential Revision: https://reviews.llvm.org/D144015
Added:
openmp/runtime/test/worksharing/for/omp_for_collapse_non_rectangular.c
Modified:
clang/lib/Sema/SemaOpenMP.cpp
clang/test/OpenMP/for_codegen.cpp
clang/test/OpenMP/tile_codegen.cpp
Removed:
################################################################################
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 368f5ecfa23d5..20e75fb710676 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -4668,11 +4668,12 @@ static DeclRefExpr *buildCapture(Sema &S, ValueDecl *D, Expr *CaptureExpr,
CaptureExpr->getExprLoc());
}
-static ExprResult buildCapture(Sema &S, Expr *CaptureExpr, DeclRefExpr *&Ref) {
+static ExprResult buildCapture(Sema &S, Expr *CaptureExpr, DeclRefExpr *&Ref,
+ StringRef Name) {
CaptureExpr = S.DefaultLvalueConversion(CaptureExpr).get();
if (!Ref) {
OMPCapturedExprDecl *CD = buildCaptureDecl(
- S, &S.getASTContext().Idents.get(".capture_expr."), CaptureExpr,
+ S, &S.getASTContext().Idents.get(Name), CaptureExpr,
/*WithInit=*/true, S.CurContext, /*AsExpression=*/true);
Ref = buildDeclRefExpr(S, CD, CD->getType().getNonReferenceType(),
CaptureExpr->getExprLoc());
@@ -8441,7 +8442,8 @@ bool OpenMPIterationSpaceChecker::checkAndSetInc(Expr *S) {
static ExprResult
tryBuildCapture(Sema &SemaRef, Expr *Capture,
- llvm::MapVector<const Expr *, DeclRefExpr *> &Captures) {
+ llvm::MapVector<const Expr *, DeclRefExpr *> &Captures,
+ StringRef Name = ".capture_expr.") {
if (SemaRef.CurContext->isDependentContext() || Capture->containsErrors())
return Capture;
if (Capture->isEvaluatable(SemaRef.Context, Expr::SE_AllowSideEffects))
@@ -8450,9 +8452,9 @@ tryBuildCapture(Sema &SemaRef, Expr *Capture,
/*AllowExplicit=*/true);
auto I = Captures.find(Capture);
if (I != Captures.end())
- return buildCapture(SemaRef, Capture, I->second);
+ return buildCapture(SemaRef, Capture, I->second, Name);
DeclRefExpr *Ref = nullptr;
- ExprResult Res = buildCapture(SemaRef, Capture, Ref);
+ ExprResult Res = buildCapture(SemaRef, Capture, Ref, Name);
Captures[Capture] = Ref;
return Res;
}
@@ -8464,7 +8466,7 @@ calculateNumIters(Sema &SemaRef, Scope *S, SourceLocation DefaultLoc,
Expr *Lower, Expr *Upper, Expr *Step, QualType LCTy,
bool TestIsStrictOp, bool RoundToStep,
llvm::MapVector<const Expr *, DeclRefExpr *> &Captures) {
- ExprResult NewStep = tryBuildCapture(SemaRef, Step, Captures);
+ ExprResult NewStep = tryBuildCapture(SemaRef, Step, Captures, ".new_step");
if (!NewStep.isUsable())
return nullptr;
llvm::APSInt LRes, SRes;
@@ -8640,8 +8642,8 @@ Expr *OpenMPIterationSpaceChecker::buildNumIterations(
return nullptr;
Expr *LBVal = LB;
Expr *UBVal = UB;
- // LB = TestIsLessOp.getValue() ? min(LB(MinVal), LB(MaxVal)) :
- // max(LB(MinVal), LB(MaxVal))
+ // OuterVar = (LB = TestIsLessOp.getValue() ? min(LB(MinVal), LB(MaxVal)) :
+ // max(LB(MinVal), LB(MaxVal)))
if (InitDependOnLC) {
const LoopIterationSpace &IS = ResultIterSpaces[*InitDependOnLC - 1];
if (!IS.MinValue || !IS.MaxValue)
@@ -8686,8 +8688,10 @@ Expr *OpenMPIterationSpaceChecker::buildNumIterations(
if (!LBMaxVal.isUsable())
return nullptr;
- Expr *LBMin = tryBuildCapture(SemaRef, LBMinVal.get(), Captures).get();
- Expr *LBMax = tryBuildCapture(SemaRef, LBMaxVal.get(), Captures).get();
+ Expr *LBMin =
+ tryBuildCapture(SemaRef, LBMinVal.get(), Captures, ".lb_min").get();
+ Expr *LBMax =
+ tryBuildCapture(SemaRef, LBMaxVal.get(), Captures, ".lb_max").get();
if (!LBMin || !LBMax)
return nullptr;
// LB(MinVal) < LB(MaxVal)
@@ -8696,7 +8700,8 @@ Expr *OpenMPIterationSpaceChecker::buildNumIterations(
if (!MinLessMaxRes.isUsable())
return nullptr;
Expr *MinLessMax =
- tryBuildCapture(SemaRef, MinLessMaxRes.get(), Captures).get();
+ tryBuildCapture(SemaRef, MinLessMaxRes.get(), Captures, ".min_less_max")
+ .get();
if (!MinLessMax)
return nullptr;
if (*TestIsLessOp) {
@@ -8716,6 +8721,12 @@ Expr *OpenMPIterationSpaceChecker::buildNumIterations(
return nullptr;
LBVal = MaxLB.get();
}
+ // OuterVar = LB
+ LBMinVal =
+ SemaRef.BuildBinOp(S, DefaultLoc, BO_Assign, IS.CounterVar, LBVal);
+ if (!LBMinVal.isUsable())
+ return nullptr;
+ LBVal = LBMinVal.get();
}
// UB = TestIsLessOp.getValue() ? max(UB(MinVal), UB(MaxVal)) :
// min(UB(MinVal), UB(MaxVal))
@@ -8763,8 +8774,10 @@ Expr *OpenMPIterationSpaceChecker::buildNumIterations(
if (!UBMaxVal.isUsable())
return nullptr;
- Expr *UBMin = tryBuildCapture(SemaRef, UBMinVal.get(), Captures).get();
- Expr *UBMax = tryBuildCapture(SemaRef, UBMaxVal.get(), Captures).get();
+ Expr *UBMin =
+ tryBuildCapture(SemaRef, UBMinVal.get(), Captures, ".ub_min").get();
+ Expr *UBMax =
+ tryBuildCapture(SemaRef, UBMaxVal.get(), Captures, ".ub_max").get();
if (!UBMin || !UBMax)
return nullptr;
// UB(MinVal) > UB(MaxVal)
@@ -8772,8 +8785,9 @@ Expr *OpenMPIterationSpaceChecker::buildNumIterations(
SemaRef.BuildBinOp(S, DefaultLoc, BO_GT, UBMin, UBMax);
if (!MinGreaterMaxRes.isUsable())
return nullptr;
- Expr *MinGreaterMax =
- tryBuildCapture(SemaRef, MinGreaterMaxRes.get(), Captures).get();
+ Expr *MinGreaterMax = tryBuildCapture(SemaRef, MinGreaterMaxRes.get(),
+ Captures, ".min_greater_max")
+ .get();
if (!MinGreaterMax)
return nullptr;
if (*TestIsLessOp) {
@@ -8796,8 +8810,8 @@ Expr *OpenMPIterationSpaceChecker::buildNumIterations(
}
Expr *UBExpr = *TestIsLessOp ? UBVal : LBVal;
Expr *LBExpr = *TestIsLessOp ? LBVal : UBVal;
- Expr *Upper = tryBuildCapture(SemaRef, UBExpr, Captures).get();
- Expr *Lower = tryBuildCapture(SemaRef, LBExpr, Captures).get();
+ Expr *Upper = tryBuildCapture(SemaRef, UBExpr, Captures, ".upper").get();
+ Expr *Lower = tryBuildCapture(SemaRef, LBExpr, Captures, ".lower").get();
if (!Upper || !Lower)
return nullptr;
@@ -8891,7 +8905,7 @@ std::pair<Expr *, Expr *> OpenMPIterationSpaceChecker::buildMinMaxValues(
if (!Diff.isUsable())
return std::make_pair(nullptr, nullptr);
- ExprResult NewStep = tryBuildCapture(SemaRef, Step, Captures);
+ ExprResult NewStep = tryBuildCapture(SemaRef, Step, Captures, ".new_step");
if (!NewStep.isUsable())
return std::make_pair(nullptr, nullptr);
Diff = SemaRef.BuildBinOp(S, DefaultLoc, BO_Mul, Diff.get(), NewStep.get());
diff --git a/clang/test/OpenMP/for_codegen.cpp b/clang/test/OpenMP/for_codegen.cpp
index d1710fd295194..761618c4b5420 100644
--- a/clang/test/OpenMP/for_codegen.cpp
+++ b/clang/test/OpenMP/for_codegen.cpp
@@ -78,6 +78,7 @@ void loop_with_counter_collapse() {
// CHECK: br label %[[EXIT]]
// CHECK: [[EXIT]]:
// CHECK: [[J_LB_VAL:%.+]] = phi i32 [ [[J_LB_MIN_VAL]], %[[TRUE]] ], [ [[J_LB_MAX_VAL]], %[[FALSE]] ]
+ // CHECK: store i32 [[J_LB_VAL]], ptr [[I_TMP]],
// CHECK: store i32 [[J_LB_VAL]], ptr [[J_LB:%.+]],
// CHECK: [[J_UB_VAL:%.+]] = load i32, ptr [[J_UB]],
// CHECK: [[J_LB_VAL:%.+]] = load i32, ptr [[J_LB]],
diff --git a/clang/test/OpenMP/tile_codegen.cpp b/clang/test/OpenMP/tile_codegen.cpp
index adf2e540b6030..c2b26eaf6f397 100644
--- a/clang/test/OpenMP/tile_codegen.cpp
+++ b/clang/test/OpenMP/tile_codegen.cpp
@@ -179,8 +179,8 @@ extern "C" void tfoo7() {
// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK1-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
-// CHECK1-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTFLOOR_0_IV_I:%.*]] = alloca i32, align 4
// CHECK1-NEXT: [[DOTTILE_0_IV_I:%.*]] = alloca i32, align 4
// CHECK1-NEXT: store i32 [[START]], ptr [[START_ADDR]], align 4
@@ -191,56 +191,56 @@ extern "C" void tfoo7() {
// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[END_ADDR]], align 4
// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
-// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTNEW_STEP]], align 4
// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP3]], [[TMP4]]
-// CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[SUB]], 1
-// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB4]], [[TMP5]]
-// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1
+// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], [[TMP5]]
+// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
// CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP6]]
-// CHECK1-NEXT: [[SUB5:%.*]] = sub i32 [[DIV]], 1
-// CHECK1-NEXT: store i32 [[SUB5]], ptr [[DOTCAPTURE_EXPR_3]], align 4
+// CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1
+// CHECK1-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4
// CHECK1-NEXT: store i32 0, ptr [[DOTFLOOR_0_IV_I]], align 4
// CHECK1-NEXT: br label [[FOR_COND:%.*]]
// CHECK1: for.cond:
// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4
-// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK1-NEXT: [[ADD6:%.*]] = add i32 [[TMP8]], 1
-// CHECK1-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP7]], [[ADD6]]
-// CHECK1-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END18:%.*]]
+// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: [[ADD5:%.*]] = add i32 [[TMP8]], 1
+// CHECK1-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP7]], [[ADD5]]
+// CHECK1-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END17:%.*]]
// CHECK1: for.body:
// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4
// CHECK1-NEXT: store i32 [[TMP9]], ptr [[DOTTILE_0_IV_I]], align 4
-// CHECK1-NEXT: br label [[FOR_COND7:%.*]]
-// CHECK1: for.cond7:
+// CHECK1-NEXT: br label [[FOR_COND6:%.*]]
+// CHECK1: for.cond6:
// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4
-// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK1-NEXT: [[ADD8:%.*]] = add i32 [[TMP11]], 1
+// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: [[ADD7:%.*]] = add i32 [[TMP11]], 1
// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4
-// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP12]], 5
-// CHECK1-NEXT: [[CMP10:%.*]] = icmp ult i32 [[ADD8]], [[ADD9]]
-// CHECK1-NEXT: br i1 [[CMP10]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP12]], 5
+// CHECK1-NEXT: [[CMP9:%.*]] = icmp ult i32 [[ADD7]], [[ADD8]]
+// CHECK1-NEXT: br i1 [[CMP9]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK1: cond.true:
-// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK1-NEXT: [[ADD11:%.*]] = add i32 [[TMP13]], 1
+// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: [[ADD10:%.*]] = add i32 [[TMP13]], 1
// CHECK1-NEXT: br label [[COND_END:%.*]]
// CHECK1: cond.false:
// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4
-// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP14]], 5
+// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP14]], 5
// CHECK1-NEXT: br label [[COND_END]]
// CHECK1: cond.end:
-// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[ADD11]], [[COND_TRUE]] ], [ [[ADD12]], [[COND_FALSE]] ]
-// CHECK1-NEXT: [[CMP13:%.*]] = icmp ult i32 [[TMP10]], [[COND]]
-// CHECK1-NEXT: br i1 [[CMP13]], label [[FOR_BODY14:%.*]], label [[FOR_END:%.*]]
-// CHECK1: for.body14:
+// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[ADD10]], [[COND_TRUE]] ], [ [[ADD11]], [[COND_FALSE]] ]
+// CHECK1-NEXT: [[CMP12:%.*]] = icmp ult i32 [[TMP10]], [[COND]]
+// CHECK1-NEXT: br i1 [[CMP12]], label [[FOR_BODY13:%.*]], label [[FOR_END:%.*]]
+// CHECK1: for.body13:
// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4
-// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
// CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP16]], [[TMP17]]
-// CHECK1-NEXT: [[ADD15:%.*]] = add i32 [[TMP15]], [[MUL]]
-// CHECK1-NEXT: store i32 [[ADD15]], ptr [[I]], align 4
+// CHECK1-NEXT: [[ADD14:%.*]] = add i32 [[TMP15]], [[MUL]]
+// CHECK1-NEXT: store i32 [[ADD14]], ptr [[I]], align 4
// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4
// CHECK1-NEXT: call void (...) @body(i32 noundef [[TMP18]])
// CHECK1-NEXT: br label [[FOR_INC:%.*]]
@@ -248,15 +248,15 @@ extern "C" void tfoo7() {
// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4
// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP19]], 1
// CHECK1-NEXT: store i32 [[INC]], ptr [[DOTTILE_0_IV_I]], align 4
-// CHECK1-NEXT: br label [[FOR_COND7]], !llvm.loop [[LOOP6:![0-9]+]]
+// CHECK1-NEXT: br label [[FOR_COND6]], !llvm.loop [[LOOP6:![0-9]+]]
// CHECK1: for.end:
-// CHECK1-NEXT: br label [[FOR_INC16:%.*]]
-// CHECK1: for.inc16:
+// CHECK1-NEXT: br label [[FOR_INC15:%.*]]
+// CHECK1: for.inc15:
// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4
-// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 5
-// CHECK1-NEXT: store i32 [[ADD17]], ptr [[DOTFLOOR_0_IV_I]], align 4
+// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP20]], 5
+// CHECK1-NEXT: store i32 [[ADD16]], ptr [[DOTFLOOR_0_IV_I]], align 4
// CHECK1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
-// CHECK1: for.end18:
+// CHECK1: for.end17:
// CHECK1-NEXT: ret void
//
//
@@ -1173,8 +1173,8 @@ extern "C" void tfoo7() {
// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4
+// CHECK2-NEXT: [[DOTNEW_STEP:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4
-// CHECK2-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTFLOOR_0_IV_I:%.*]] = alloca i32, align 4
// CHECK2-NEXT: [[DOTTILE_0_IV_I:%.*]] = alloca i32, align 4
// CHECK2-NEXT: store i32 [[START]], ptr [[START_ADDR]], align 4
@@ -1185,56 +1185,56 @@ extern "C" void tfoo7() {
// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[END_ADDR]], align 4
// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[STEP_ADDR]], align 4
-// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: store i32 [[TMP2]], ptr [[DOTNEW_STEP]], align 4
// CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4
// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK2-NEXT: [[SUB:%.*]] = sub i32 [[TMP3]], [[TMP4]]
-// CHECK2-NEXT: [[SUB4:%.*]] = sub i32 [[SUB]], 1
-// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
-// CHECK2-NEXT: [[ADD:%.*]] = add i32 [[SUB4]], [[TMP5]]
-// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1
+// CHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
+// CHECK2-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], [[TMP5]]
+// CHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
// CHECK2-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP6]]
-// CHECK2-NEXT: [[SUB5:%.*]] = sub i32 [[DIV]], 1
-// CHECK2-NEXT: store i32 [[SUB5]], ptr [[DOTCAPTURE_EXPR_3]], align 4
+// CHECK2-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1
+// CHECK2-NEXT: store i32 [[SUB4]], ptr [[DOTCAPTURE_EXPR_2]], align 4
// CHECK2-NEXT: store i32 0, ptr [[DOTFLOOR_0_IV_I]], align 4
// CHECK2-NEXT: br label [[FOR_COND:%.*]]
// CHECK2: for.cond:
// CHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4
-// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK2-NEXT: [[ADD6:%.*]] = add i32 [[TMP8]], 1
-// CHECK2-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP7]], [[ADD6]]
-// CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END18:%.*]]
+// CHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: [[ADD5:%.*]] = add i32 [[TMP8]], 1
+// CHECK2-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP7]], [[ADD5]]
+// CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END17:%.*]]
// CHECK2: for.body:
// CHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4
// CHECK2-NEXT: store i32 [[TMP9]], ptr [[DOTTILE_0_IV_I]], align 4
-// CHECK2-NEXT: br label [[FOR_COND7:%.*]]
-// CHECK2: for.cond7:
+// CHECK2-NEXT: br label [[FOR_COND6:%.*]]
+// CHECK2: for.cond6:
// CHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4
-// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK2-NEXT: [[ADD8:%.*]] = add i32 [[TMP11]], 1
+// CHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: [[ADD7:%.*]] = add i32 [[TMP11]], 1
// CHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4
-// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP12]], 5
-// CHECK2-NEXT: [[CMP10:%.*]] = icmp ult i32 [[ADD8]], [[ADD9]]
-// CHECK2-NEXT: br i1 [[CMP10]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP12]], 5
+// CHECK2-NEXT: [[CMP9:%.*]] = icmp ult i32 [[ADD7]], [[ADD8]]
+// CHECK2-NEXT: br i1 [[CMP9]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
// CHECK2: cond.true:
-// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4
-// CHECK2-NEXT: [[ADD11:%.*]] = add i32 [[TMP13]], 1
+// CHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: [[ADD10:%.*]] = add i32 [[TMP13]], 1
// CHECK2-NEXT: br label [[COND_END:%.*]]
// CHECK2: cond.false:
// CHECK2-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4
-// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP14]], 5
+// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP14]], 5
// CHECK2-NEXT: br label [[COND_END]]
// CHECK2: cond.end:
-// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[ADD11]], [[COND_TRUE]] ], [ [[ADD12]], [[COND_FALSE]] ]
-// CHECK2-NEXT: [[CMP13:%.*]] = icmp ult i32 [[TMP10]], [[COND]]
-// CHECK2-NEXT: br i1 [[CMP13]], label [[FOR_BODY14:%.*]], label [[FOR_END:%.*]]
-// CHECK2: for.body14:
+// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[ADD10]], [[COND_TRUE]] ], [ [[ADD11]], [[COND_FALSE]] ]
+// CHECK2-NEXT: [[CMP12:%.*]] = icmp ult i32 [[TMP10]], [[COND]]
+// CHECK2-NEXT: br i1 [[CMP12]], label [[FOR_BODY13:%.*]], label [[FOR_END:%.*]]
+// CHECK2: for.body13:
// CHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4
// CHECK2-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4
-// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4
+// CHECK2-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTNEW_STEP]], align 4
// CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP16]], [[TMP17]]
-// CHECK2-NEXT: [[ADD15:%.*]] = add i32 [[TMP15]], [[MUL]]
-// CHECK2-NEXT: store i32 [[ADD15]], ptr [[I]], align 4
+// CHECK2-NEXT: [[ADD14:%.*]] = add i32 [[TMP15]], [[MUL]]
+// CHECK2-NEXT: store i32 [[ADD14]], ptr [[I]], align 4
// CHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[I]], align 4
// CHECK2-NEXT: call void (...) @body(i32 noundef [[TMP18]])
// CHECK2-NEXT: br label [[FOR_INC:%.*]]
@@ -1242,15 +1242,15 @@ extern "C" void tfoo7() {
// CHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTTILE_0_IV_I]], align 4
// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP19]], 1
// CHECK2-NEXT: store i32 [[INC]], ptr [[DOTTILE_0_IV_I]], align 4
-// CHECK2-NEXT: br label [[FOR_COND7]], !llvm.loop [[LOOP6:![0-9]+]]
+// CHECK2-NEXT: br label [[FOR_COND6]], !llvm.loop [[LOOP6:![0-9]+]]
// CHECK2: for.end:
-// CHECK2-NEXT: br label [[FOR_INC16:%.*]]
-// CHECK2: for.inc16:
+// CHECK2-NEXT: br label [[FOR_INC15:%.*]]
+// CHECK2: for.inc15:
// CHECK2-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTFLOOR_0_IV_I]], align 4
-// CHECK2-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 5
-// CHECK2-NEXT: store i32 [[ADD17]], ptr [[DOTFLOOR_0_IV_I]], align 4
+// CHECK2-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP20]], 5
+// CHECK2-NEXT: store i32 [[ADD16]], ptr [[DOTFLOOR_0_IV_I]], align 4
// CHECK2-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]]
-// CHECK2: for.end18:
+// CHECK2: for.end17:
// CHECK2-NEXT: ret void
//
//
diff --git a/openmp/runtime/test/worksharing/for/omp_for_collapse_non_rectangular.c b/openmp/runtime/test/worksharing/for/omp_for_collapse_non_rectangular.c
new file mode 100644
index 0000000000000..770e4b8218792
--- /dev/null
+++ b/openmp/runtime/test/worksharing/for/omp_for_collapse_non_rectangular.c
@@ -0,0 +1,22 @@
+// RUN: %libomp-compile-and-run
+
+#include <stdio.h>
+
+#define N 3
+
+int arr[N][N][N];
+int main() {
+#pragma omp for collapse(3)
+ for (unsigned int i = 0; i < N; ++i)
+ for (unsigned int j = i; j < N; ++j)
+ for (unsigned int k = j; k < N; ++k)
+ arr[i][j][k] = 1;
+ int num_failed = 0;
+ for (unsigned int i = 0; i < N; ++i)
+ for (unsigned int j = 0; j < N; ++j)
+ for (unsigned int k = 0; k < N; ++k)
+ if (arr[i][j][k] == (j >= i && k >= j) ? 0 : 1)
+ ++num_failed;
+
+ return num_failed;
+}
More information about the Openmp-commits
mailing list