[flang-commits] [flang] [flang][OpenMP] Implement collapse for imperfectly nested loops (PR #202435)
Caroline Newcombe via flang-commits
flang-commits at lists.llvm.org
Tue Jun 23 08:26:05 PDT 2026
https://github.com/cenewcombe updated https://github.com/llvm/llvm-project/pull/202435
>From 5225e3417bf4df48c4a0fa0e353d58ba17bc8b3a Mon Sep 17 00:00:00 2001
From: Caroline Newcombe <caroline.newcombe at hpe.com>
Date: Mon, 1 Jun 2026 10:31:10 -0500
Subject: [PATCH 1/3] [flang][OpenMP] Implement collapse for imperfectly nested
loops
---
flang/lib/Lower/OpenMP/OpenMP.cpp | 165 ++++-
flang/lib/Lower/OpenMP/Utils.cpp | 3 +-
flang/lib/Semantics/check-omp-loop.cpp | 50 ++
flang/lib/Semantics/openmp-utils.cpp | 17 +-
.../Lower/OpenMP/collapse-imperfect-nest.f90 | 604 ++++++++++++++++++
flang/test/Semantics/OpenMP/do-collapse.f90 | 8 +-
.../OpenMP/do-concurrent-collapse-60.f90 | 4 +-
.../OpenMP/do-concurrent-collapse.f90 | 4 +-
flang/test/Semantics/OpenMP/do08.f90 | 15 -
flang/test/Semantics/OpenMP/do10.f90 | 2 +-
flang/test/Semantics/OpenMP/do13.f90 | 10 +-
flang/test/Semantics/OpenMP/do15.f90 | 9 -
flang/test/Semantics/OpenMP/do16.f90 | 6 -
flang/test/Semantics/OpenMP/do22.f90 | 127 +++-
.../OpenMP/doacross-nesting-omp52.f90 | 110 ++++
15 files changed, 1076 insertions(+), 58 deletions(-)
create mode 100644 flang/test/Lower/OpenMP/collapse-imperfect-nest.f90
create mode 100644 flang/test/Semantics/OpenMP/doacross-nesting-omp52.f90
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index ac75277e001da..f9eb59cbe0b95 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -36,6 +36,7 @@
#include "flang/Optimizer/Builder/BoxValue.h"
#include "flang/Optimizer/Builder/FIRBuilder.h"
#include "flang/Optimizer/Builder/Todo.h"
+#include "flang/Optimizer/Dialect/FIROpsSupport.h"
#include "flang/Optimizer/Dialect/FIRType.h"
#include "flang/Optimizer/HLFIR/HLFIROps.h"
#include "flang/Parser/characters.h"
@@ -678,6 +679,145 @@ static void genNestedEvaluations(lower::AbstractConverter &converter,
converter.genEval(e);
}
+/// Emit the body of a collapsed loop nest, including any intervening code
+/// from imperfect nesting at intermediate levels (CLN relaxation, applied
+/// retroactively for all OMP versions).
+///
+/// Because omp.loop_nest places its entire body at the innermost nesting
+/// level, intervening code must be guarded so that it only executes on the
+/// iterations where the corresponding inner induction variables are at their
+/// initial (for intervening code before nested loop) or final (for intervening
+/// code after nested loop) values.
+///
+/// \param [in] converter - PFT to MLIR conversion interface.
+/// \param [in] outerEval - the evaluation containing the outermost loop
+/// (typically the OpenMP construct evaluation).
+/// \param [in] collapseValue - number of loops being collapsed (>= 1).
+static void genCollapsedLoopNestBody(lower::AbstractConverter &converter,
+ lower::pft::Evaluation &outerEval,
+ int collapseValue) {
+ assert(collapseValue >= 1);
+ if (collapseValue == 1) {
+ genNestedEvaluations(converter, outerEval, /*collapseValue=*/1);
+ return;
+ }
+
+ fir::FirOpBuilder &firOpBuilder{converter.getFirOpBuilder()};
+ const mlir::Location loc{converter.getCurrentLocation()};
+
+ // Get the enclosing omp.loop_nest to access induction variables and bounds.
+ auto loopNestOp{mlir::dyn_cast<mlir::omp::LoopNestOp>(
+ firOpBuilder.getInsertionBlock()->getParentOp())};
+ assert(loopNestOp && "expected to be inside omp.loop_nest");
+
+ // Collect before/after evaluations at each intermediate level.
+ struct LevelInfo {
+ llvm::SmallVector<lower::pft::Evaluation *> before;
+ llvm::SmallVector<lower::pft::Evaluation *> after;
+ };
+ llvm::SmallVector<LevelInfo> levels;
+
+ lower::pft::Evaluation *curEval{&outerEval};
+ for (int i{0}; i < collapseValue - 1; ++i) {
+ lower::pft::Evaluation *const doEval{getNestedDoConstruct(*curEval)};
+ LevelInfo level;
+ bool pastDo{false};
+ for (lower::pft::Evaluation &e : doEval->getNestedEvaluations()) {
+ if (e.getIf<parser::NonLabelDoStmt>() || e.getIf<parser::EndDoStmt>())
+ continue;
+ // Semantics guarantees the only DoConstruct here is the next associated
+ // loop (non-associated DO loops are rejected as intervening code).
+ if (e.getIf<parser::DoConstruct>()) {
+ pastDo = true;
+ continue;
+ }
+ if (!pastDo)
+ level.before.push_back(&e);
+ else
+ level.after.push_back(&e);
+ }
+ levels.push_back(std::move(level));
+ curEval = doEval;
+ }
+
+ // Build a guard condition: all induction variables from
+ // startLevel..endLevel-1 equal their respective bound values.
+ // For "before" guards (useLowerBound=true), compare iv == lb (first iter).
+ // For "after" guards (useLowerBound=false), compare iv == last_iv where
+ // last_iv = lb + ((ub - lb) / step) * step, which accounts for non-unit
+ // steps where the IV may never exactly equal the upper bound.
+ auto buildGuard = [&](const int startLevel, const int endLevel,
+ const bool useLowerBound) -> mlir::Value {
+ mlir::Value cond{};
+ const auto lbs{loopNestOp.getLoopLowerBounds()};
+ const auto ubs{loopNestOp.getLoopUpperBounds()};
+ const auto steps{loopNestOp.getLoopSteps()};
+ for (int lvl{startLevel}; lvl < endLevel; ++lvl) {
+ const mlir::Value iv{loopNestOp.getRegion().getArgument(lvl)};
+ mlir::Value target;
+ if (useLowerBound) {
+ target = lbs[lvl];
+ } else {
+ // For unit steps, the last induction variable always equals ub.
+ const auto constStep{fir::getIntIfConstant(steps[lvl])};
+ if (constStep && (*constStep == 1 || *constStep == -1)) {
+ target = ubs[lvl];
+ } else {
+ // Compute last_iv = lb + ((ub - lb) / step) * step.
+ const mlir::Value lb{lbs[lvl]};
+ const mlir::Value ub{ubs[lvl]};
+ const mlir::Value step{steps[lvl]};
+ const mlir::Value range{
+ mlir::arith::SubIOp::create(firOpBuilder, loc, ub, lb)};
+ const mlir::Value tripMinus1{
+ mlir::arith::DivSIOp::create(firOpBuilder, loc, range, step)};
+ const mlir::Value lastOffset{
+ mlir::arith::MulIOp::create(firOpBuilder, loc, tripMinus1, step)};
+ target =
+ mlir::arith::AddIOp::create(firOpBuilder, loc, lb, lastOffset);
+ }
+ }
+ const mlir::Value cmp = mlir::arith::CmpIOp::create(
+ firOpBuilder, loc, mlir::arith::CmpIPredicate::eq, iv, target);
+ if (!cond)
+ cond = cmp;
+ else
+ cond = mlir::arith::AndIOp::create(firOpBuilder, loc, cond, cmp);
+ }
+ return cond;
+ };
+
+ // Emit "before" code at each level, guarded by inner IVs == lower bounds.
+ for (int i{0}; i < static_cast<int>(levels.size()); ++i) {
+ if (levels[i].before.empty())
+ continue;
+ const mlir::Value guard{
+ buildGuard(i + 1, collapseValue, /*useLowerBound=*/true)};
+ auto ifOp{fir::IfOp::create(firOpBuilder, loc, guard, /*else*/ false)};
+ firOpBuilder.setInsertionPointToStart(&ifOp.getThenRegion().front());
+ for (auto *e : levels[i].before)
+ converter.genEval(*e);
+ firOpBuilder.setInsertionPointAfter(ifOp);
+ }
+
+ // Emit innermost loop body.
+ genNestedEvaluations(converter, *curEval, /*collapseValue=*/1);
+
+ // Emit "after" code at each level (innermost first), guarded by
+ // inner IVs == last iteration values (accounts for non-unit steps).
+ for (int i{static_cast<int>(levels.size()) - 1}; i >= 0; --i) {
+ if (levels[i].after.empty())
+ continue;
+ const mlir::Value guard{
+ buildGuard(i + 1, collapseValue, /*useLowerBound=*/false)};
+ auto ifOp{fir::IfOp::create(firOpBuilder, loc, guard, /*else*/ false)};
+ firOpBuilder.setInsertionPointToStart(&ifOp.getThenRegion().front());
+ for (auto *e : levels[i].after)
+ converter.genEval(*e);
+ firOpBuilder.setInsertionPointAfter(ifOp);
+ }
+}
+
static fir::GlobalOp globalInitialization(lower::AbstractConverter &converter,
fir::FirOpBuilder &firOpBuilder,
const semantics::Symbol &sym,
@@ -1233,6 +1373,13 @@ struct OpWithBodyGenInfo {
return *this;
}
+ OpWithBodyGenInfo &setCollapseInfo(int value,
+ lower::pft::Evaluation &outerEval) {
+ collapseValue = value;
+ outerCollapseEval = &outerEval;
+ return *this;
+ }
+
/// [inout] converter to use for the clauses.
lower::AbstractConverter &converter;
/// [in] Symbol table
@@ -1261,6 +1408,10 @@ struct OpWithBodyGenInfo {
bool genSkeletonOnly = false;
/// [in] enables handling of privatized variable unless set to `false`.
bool privatize = true;
+ /// [in] if set, outermost evaluation and collapse depth for emitting
+ /// intervening code from imperfect collapsed loop nests.
+ lower::pft::Evaluation *outerCollapseEval{nullptr};
+ int collapseValue{0};
};
/// Create the body (block) for an OpenMP Operation.
@@ -1355,7 +1506,11 @@ static void createBodyOfOp(mlir::Operation &op, const OpWithBodyGenInfo &info,
firOpBuilder.setInsertionPointToEnd(&op.getRegion(0).back());
auto *temp = lower::genOpenMPTerminator(firOpBuilder, &op, info.loc);
firOpBuilder.setInsertionPointAfter(marker);
- genNestedEvaluations(info.converter, info.eval);
+ if (info.outerCollapseEval)
+ genCollapsedLoopNestBody(info.converter, *info.outerCollapseEval,
+ info.collapseValue);
+ else
+ genNestedEvaluations(info.converter, info.eval);
temp->erase();
}
}
@@ -2192,7 +2347,8 @@ genLoopNestOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
directive)
.setClauses(&item->clauses)
.setDataSharingProcessor(&dsp)
- .setGenRegionEntryCb(ivCallback),
+ .setGenRegionEntryCb(ivCallback)
+ .setCollapseInfo(nestValue, eval),
queue, item, clauseOps);
}
@@ -2282,7 +2438,7 @@ static void genCanonicalLoopNest(
// Step 1: Loop prologues
// Computing the trip count must happen before entering the outermost loop
lower::pft::Evaluation *innermostEval = nestedEval;
- for ([[maybe_unused]] auto iv : ivs) {
+ for (std::size_t i{0}; i < ivs.size(); ++i) {
if (innermostEval->getIf<parser::DoConstruct>()->IsDoConcurrent()) {
// OpenMP specifies DO CONCURRENT only with the `!omp loop` construct.
// Will need to add special cases for this combination.
@@ -2364,7 +2520,8 @@ static void genCanonicalLoopNest(
mlir::Value cli = newcli.getResult();
clis.push_back(cli);
- innermostEval = &*std::next(innermostEval->getNestedEvaluations().begin());
+ if (i + 1 < ivs.size())
+ innermostEval = getNestedDoConstruct(*innermostEval);
}
// Step 2: Create nested canoncial loops
diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp
index 0ef824df8455b..d75e3fb2608bb 100644
--- a/flang/lib/Lower/OpenMP/Utils.cpp
+++ b/flang/lib/Lower/OpenMP/Utils.cpp
@@ -717,7 +717,8 @@ pft::Evaluation *getNestedDoConstruct(pft::Evaluation &eval) {
// constructs between the directive and the actual do-loop nest.
if (nested.getIf<parser::OpenMPConstruct>())
return getNestedDoConstruct(nested);
- assert(false && "Unexpected construct in the nested evaluations");
+ // Skip valid intervening code in imperfect loop nests
+ continue;
}
llvm_unreachable("Expected do loop to be in the nested evaluations");
}
diff --git a/flang/lib/Semantics/check-omp-loop.cpp b/flang/lib/Semantics/check-omp-loop.cpp
index 6c816d71b35a8..a10d6e4fc93e1 100644
--- a/flang/lib/Semantics/check-omp-loop.cpp
+++ b/flang/lib/Semantics/check-omp-loop.cpp
@@ -111,6 +111,49 @@ class AssociatedLoopChecker {
std::int64_t level_;
std::map<std::string, std::int64_t> constructNamesAndLevels_;
};
+
+/// Visitor that detects an ordered directive with a doacross clause
+/// (or the pre-5.2 depend(sink/source) equivalent).
+/// Does not descend into nested OpenMP block or loop constructs, since
+/// doacross directives inside them bind to an inner worksharing-loop
+/// region, not the one being checked.
+struct DoacrossFinder {
+ bool found{false};
+ template <typename T> bool Pre(const T &) { return !found; }
+ template <typename T> void Post(const T &) {}
+
+ // Stop descent into nested OpenMP regions that create new binding contexts.
+ bool Pre(const parser::OmpBlockConstruct &) { return false; }
+ bool Pre(const parser::OpenMPLoopConstruct &) { return false; }
+
+ void Post(const parser::OpenMPSimpleStandaloneConstruct &x) {
+ if (found) {
+ return;
+ }
+ if (x.v.DirId() != llvm::omp::Directive::OMPD_ordered) {
+ return;
+ }
+ for (const auto &clause : x.v.Clauses().v) {
+ if (std::holds_alternative<parser::OmpClause::Doacross>(clause.u)) {
+ found = true;
+ return;
+ }
+ if (const auto *depend{
+ std::get_if<parser::OmpClause::Depend>(&clause.u)}) {
+ if (std::holds_alternative<parser::OmpDoacross>(depend->v.u)) {
+ found = true;
+ return;
+ }
+ }
+ }
+ }
+};
+
+bool ContainsDoacrossDirective(const parser::Block &block) {
+ DoacrossFinder finder;
+ parser::Walk(block, finder);
+ return finder.found;
+}
} // namespace
namespace Fortran::semantics {
@@ -326,6 +369,13 @@ void OmpStructureChecker::CheckNestedConstruct(
// Check requirements on nest depth.
auto [needDepth, needPerfect]{
GetAffectedNestDepthWithReason(beginSpec, version)};
+
+ // In OpenMP 5.2+, perfect nesting is only required for doacross loop
+ // nests (those whose body contains ordered doacross directives).
+ if (!needPerfect && version >= 52 && ContainsDoacrossDirective(body)) {
+ needPerfect = true;
+ }
+
auto &[haveSema, havePerf]{sequence.depth()};
auto haveDepth{needPerfect ? havePerf : haveSema};
diff --git a/flang/lib/Semantics/openmp-utils.cpp b/flang/lib/Semantics/openmp-utils.cpp
index 51dd08d0924b1..edff3cb3e9cfd 100644
--- a/flang/lib/Semantics/openmp-utils.cpp
+++ b/flang/lib/Semantics/openmp-utils.cpp
@@ -990,10 +990,19 @@ std::pair<WithReason<int64_t>, bool> GetAffectedNestDepthWithReason(
oreason = Reason();
}
if (ccount < ocount) {
- // `ocount` cannot be std::nullopt here (C++ std guarantee).
- return {{ocount.value_or(1), std::move(oreason)}, true};
- }
- return {{ccount.value_or(1), std::move(creason)}, true};
+ // Prior to 5.2, ordered(n) requires perfect nesting unconditionally.
+ // In 5.2+, perfect nesting is required only for doacross (checked later
+ // with ContainsDoacrossDirective).
+ return {{ocount.value_or(1), std::move(oreason)}, version < 52};
+ }
+ // Prior to 5.2, ordered(n) requires perfect nesting.
+ // In 5.2+, only doacross nests require it (checked separately).
+ // CLN relaxation for collapse is applied retroactively for all versions.
+ bool needPerfect{false};
+ if (version < 52) {
+ needPerfect = ocount.has_value();
+ }
+ return {{ccount.value_or(1), std::move(creason)}, needPerfect};
}
if (IsLoopTransforming(dir)) {
diff --git a/flang/test/Lower/OpenMP/collapse-imperfect-nest.f90 b/flang/test/Lower/OpenMP/collapse-imperfect-nest.f90
new file mode 100644
index 0000000000000..9fe0582ce66cd
--- /dev/null
+++ b/flang/test/Lower/OpenMP/collapse-imperfect-nest.f90
@@ -0,0 +1,604 @@
+! Test lowering of imperfectly nested collapse loops (CLN relaxation).
+! Intervening code is guarded by IV comparisons to restore correct
+! execution frequency and ordering within the flat omp.loop_nest body.
+
+! RUN: %flang_fc1 -fopenmp -emit-hlfir %s -o - | FileCheck %s
+
+! CHECK-LABEL: func.func @_QPcollapse2_imperfect
+subroutine collapse2_imperfect(n, x)
+ integer, intent(in) :: n
+ integer, intent(inout) :: x
+ integer :: i, j
+
+ !$omp do collapse(2)
+ do i = 1, n
+ x = x + 1
+ do j = 1, n
+ x = x + j
+ end do
+ end do
+ !$omp end do
+end subroutine
+
+! CHECK: omp.wsloop
+! CHECK-NEXT: omp.loop_nest (%[[I:.*]], %[[J:.*]]) : i32 =
+! CHECK-SAME: (%{{.*}}, %[[LB_J:.*]]) to
+! CHECK: hlfir.assign %[[I]]
+! CHECK: hlfir.assign %[[J]]
+! Guard: j == lower_bound (before code executes once per i)
+! CHECK: %[[CMP:.*]] = arith.cmpi eq, %[[J]], %[[LB_J]] : i32
+! CHECK: fir.if %[[CMP]] {
+! Intervening code: x = x + 1
+! CHECK: %[[X1:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! CHECK: %[[C1:.*]] = arith.constant 1 : i32
+! CHECK: %[[ADD1:.*]] = arith.addi %[[X1]], %[[C1]] : i32
+! CHECK: hlfir.assign %[[ADD1]] to %{{.*}} : i32, !fir.ref<i32>
+! CHECK: }
+! Innermost body: x = x + j
+! CHECK: %[[X2:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! CHECK: %[[JVAL:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! CHECK: %[[ADD2:.*]] = arith.addi %[[X2]], %[[JVAL]] : i32
+! CHECK: hlfir.assign %[[ADD2]] to %{{.*}} : i32, !fir.ref<i32>
+! CHECK: omp.yield
+
+! CHECK-LABEL: func.func @_QPcollapse3_imperfect
+subroutine collapse3_imperfect(n, x)
+ integer, intent(in) :: n
+ integer, intent(inout) :: x
+ integer :: i, j, k
+
+ !$omp do collapse(3)
+ do i = 1, n
+ x = x + i
+ do j = 1, n
+ x = x + j
+ do k = 1, n
+ x = x + k
+ end do
+ end do
+ end do
+ !$omp end do
+end subroutine
+
+! CHECK: omp.wsloop
+! CHECK-NEXT: omp.loop_nest (%[[I3:.*]], %[[J3:.*]], %[[K3:.*]]) : i32 =
+! CHECK-SAME: (%{{.*}}, %[[LB_J3:.*]], %[[LB_K3:.*]]) to
+! CHECK: hlfir.assign %[[I3]]
+! CHECK: hlfir.assign %[[J3]]
+! CHECK: hlfir.assign %[[K3]]
+! Guard: j == lb_j AND k == lb_k (level 0 before code, once per i)
+! CHECK: %[[CMP_J:.*]] = arith.cmpi eq, %[[J3]], %[[LB_J3]] : i32
+! CHECK: %[[CMP_K1:.*]] = arith.cmpi eq, %[[K3]], %[[LB_K3]] : i32
+! CHECK: %[[AND1:.*]] = arith.andi %[[CMP_J]], %[[CMP_K1]] : i1
+! CHECK: fir.if %[[AND1]] {
+! Intervening code at level 0: x = x + i
+! CHECK: %[[XI:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! CHECK: %[[IVAL:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! CHECK: %[[ADDI:.*]] = arith.addi %[[XI]], %[[IVAL]] : i32
+! CHECK: hlfir.assign %[[ADDI]] to %{{.*}} : i32, !fir.ref<i32>
+! CHECK: }
+! Guard: k == lb_k (level 1 before code, once per (i,j))
+! CHECK: %[[CMP_K2:.*]] = arith.cmpi eq, %[[K3]], %[[LB_K3]] : i32
+! CHECK: fir.if %[[CMP_K2]] {
+! Intervening code at level 1: x = x + j
+! CHECK: %[[XJ:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! CHECK: %[[JVAL3:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! CHECK: %[[ADDJ:.*]] = arith.addi %[[XJ]], %[[JVAL3]] : i32
+! CHECK: hlfir.assign %[[ADDJ]] to %{{.*}} : i32, !fir.ref<i32>
+! CHECK: }
+! Innermost body: x = x + k
+! CHECK: %[[XK:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! CHECK: %[[KVAL:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! CHECK: %[[ADDK:.*]] = arith.addi %[[XK]], %[[KVAL]] : i32
+! CHECK: hlfir.assign %[[ADDK]] to %{{.*}} : i32, !fir.ref<i32>
+! CHECK: omp.yield
+
+! CHECK-LABEL: func.func @_QPcollapse2_both_sides
+subroutine collapse2_both_sides(n, x)
+ integer, intent(in) :: n
+ integer, intent(inout) :: x
+ integer :: i, j
+
+ !$omp simd collapse(2)
+ do i = 1, n
+ x = x + 1
+ do j = 1, n
+ x = x + j
+ end do
+ call ext_sub(x)
+ end do
+ !$omp end simd
+end subroutine
+
+! CHECK: omp.simd
+! CHECK-NEXT: omp.loop_nest (%[[I4:.*]], %[[J4:.*]]) : i32 =
+! CHECK-SAME: (%{{[^)]*}}, %[[LB_J4:[^)]*]]) to (%{{[^)]*}}, %[[UB_J4:[^)]*]])
+! CHECK: hlfir.assign %[[I4]]
+! CHECK: hlfir.assign %[[J4]]
+! Guard: j == lower_bound (before code)
+! CHECK: %[[CMP_B:.*]] = arith.cmpi eq, %[[J4]], %[[LB_J4]] : i32
+! CHECK: fir.if %[[CMP_B]] {
+! Intervening code before inner loop: x = x + 1
+! CHECK: %[[XB:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! CHECK: %[[CB:.*]] = arith.constant 1 : i32
+! CHECK: %[[ADDB:.*]] = arith.addi %[[XB]], %[[CB]] : i32
+! CHECK: hlfir.assign %[[ADDB]] to %{{.*}} : i32, !fir.ref<i32>
+! CHECK: }
+! Innermost body: x = x + j
+! CHECK: %[[XIN:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! CHECK: %[[JIN:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! CHECK: %[[ADDIN:.*]] = arith.addi %[[XIN]], %[[JIN]] : i32
+! CHECK: hlfir.assign %[[ADDIN]] to %{{.*}} : i32, !fir.ref<i32>
+! Guard: j == upper_bound (after code)
+! CHECK: %[[CMP_A:.*]] = arith.cmpi eq, %[[J4]], %[[UB_J4]] : i32
+! CHECK: fir.if %[[CMP_A]] {
+! Intervening code after inner loop: call ext_sub(x)
+! CHECK: fir.call @_QPext_sub
+! CHECK: }
+! CHECK: omp.yield
+
+! Test collapse(3) with both before and after code at multiple levels.
+! CHECK-LABEL: func.func @_QPcollapse3_both_sides
+subroutine collapse3_both_sides(n, x)
+ integer, intent(in) :: n
+ integer, intent(inout) :: x
+ integer :: i, j, k
+
+ !$omp do collapse(3)
+ do i = 1, n
+ x = x + i
+ do j = 1, n
+ x = x + j
+ do k = 1, n
+ x = x + k
+ end do
+ x = x - j
+ end do
+ x = x - i
+ end do
+ !$omp end do
+end subroutine
+
+! CHECK: omp.wsloop
+! CHECK-NEXT: omp.loop_nest (%[[I:.*]], %[[J:.*]], %[[K:.*]]) : i32 =
+! CHECK-SAME: (%{{[^)]*}}, %[[LB_J:[^)]*]], %[[LB_K:[^)]*]]) to (%{{[^)]*}}, %[[UB_J:[^)]*]], %[[UB_K:[^)]*]])
+! CHECK: hlfir.assign %[[I]]
+! CHECK: hlfir.assign %[[J]]
+! CHECK: hlfir.assign %[[K]]
+!
+! --- "before" guards (outermost level first) ---
+!
+! Guard level 0 before: j == lb_j AND k == lb_k (once per i)
+! CHECK: %[[CJ1:.*]] = arith.cmpi eq, %[[J]], %[[LB_J]] : i32
+! CHECK: %[[CK1:.*]] = arith.cmpi eq, %[[K]], %[[LB_K]] : i32
+! CHECK: %[[AND1:.*]] = arith.andi %[[CJ1]], %[[CK1]] : i1
+! CHECK: fir.if %[[AND1]] {
+! CHECK: arith.addi
+! CHECK: hlfir.assign
+! CHECK: }
+! Guard level 1 before: k == lb_k (once per (i,j))
+! CHECK: %[[CK2:.*]] = arith.cmpi eq, %[[K]], %[[LB_K]] : i32
+! CHECK: fir.if %[[CK2]] {
+! CHECK: arith.addi
+! CHECK: hlfir.assign
+! CHECK: }
+!
+! --- innermost body: x = x + k ---
+! CHECK: arith.addi
+! CHECK: hlfir.assign
+!
+! --- "after" guards (innermost level first) ---
+!
+! Guard level 1 after: k == ub_k (once per (i,j))
+! CHECK: %[[CK3:.*]] = arith.cmpi eq, %[[K]], %[[UB_K]] : i32
+! CHECK: fir.if %[[CK3]] {
+! CHECK: arith.subi
+! CHECK: hlfir.assign
+! CHECK: }
+! Guard level 0 after: j == ub_j AND k == ub_k (once per i)
+! CHECK: %[[CJ2:.*]] = arith.cmpi eq, %[[J]], %[[UB_J]] : i32
+! CHECK: %[[CK4:.*]] = arith.cmpi eq, %[[K]], %[[UB_K]] : i32
+! CHECK: %[[AND2:.*]] = arith.andi %[[CJ2]], %[[CK4]] : i1
+! CHECK: fir.if %[[AND2]] {
+! CHECK: arith.subi
+! CHECK: hlfir.assign
+! CHECK: }
+! CHECK: omp.yield
+
+! Test collapse(4) with imperfect nesting at some levels and perfectly nested
+! innermost loops. Level 0 (i->j) has before+after, level 1 (j->k) has before
+! only, level 2 (k->l) is perfectly nested. This exercises skipping empty levels.
+! CHECK-LABEL: func.func @_QPcollapse4_mixed
+subroutine collapse4_mixed(n, x)
+ integer, intent(in) :: n
+ integer, intent(inout) :: x
+ integer :: i, j, k, l
+
+ !$omp do collapse(4)
+ do i = 1, n
+ x = x + i
+ do j = 1, n
+ x = x + j
+ do k = 1, n
+ do l = 1, n
+ x = x + l
+ end do
+ end do
+ end do
+ x = x - i
+ end do
+ !$omp end do
+end subroutine
+
+! CHECK: omp.wsloop
+! CHECK-NEXT: omp.loop_nest (%[[I:.*]], %[[J:.*]], %[[K:.*]], %[[L:.*]]) : i32 =
+! CHECK-SAME: (%{{[^)]*}}, %[[LB_J:[^)]*]], %[[LB_K:[^)]*]], %[[LB_L:[^)]*]]) to (%{{[^)]*}}, %[[UB_J:[^)]*]], %[[UB_K:[^)]*]], %[[UB_L:[^)]*]])
+! CHECK: hlfir.assign %[[I]]
+! CHECK: hlfir.assign %[[J]]
+! CHECK: hlfir.assign %[[K]]
+! CHECK: hlfir.assign %[[L]]
+!
+! --- "before" guards ---
+!
+! Guard level 0 before: j == lb_j AND k == lb_k AND l == lb_l (once per i)
+! CHECK: %[[CJ1:.*]] = arith.cmpi eq, %[[J]], %[[LB_J]] : i32
+! CHECK: %[[CK1:.*]] = arith.cmpi eq, %[[K]], %[[LB_K]] : i32
+! CHECK: %[[A1:.*]] = arith.andi %[[CJ1]], %[[CK1]] : i1
+! CHECK: %[[CL1:.*]] = arith.cmpi eq, %[[L]], %[[LB_L]] : i32
+! CHECK: %[[A2:.*]] = arith.andi %[[A1]], %[[CL1]] : i1
+! CHECK: fir.if %[[A2]] {
+! CHECK: arith.addi
+! CHECK: hlfir.assign
+! CHECK: }
+! Guard level 1 before: k == lb_k AND l == lb_l (once per (i,j))
+! CHECK: %[[CK2:.*]] = arith.cmpi eq, %[[K]], %[[LB_K]] : i32
+! CHECK: %[[CL2:.*]] = arith.cmpi eq, %[[L]], %[[LB_L]] : i32
+! CHECK: %[[A3:.*]] = arith.andi %[[CK2]], %[[CL2]] : i1
+! CHECK: fir.if %[[A3]] {
+! CHECK: arith.addi
+! CHECK: hlfir.assign
+! CHECK: }
+! Level 2 (k->l) is perfectly nested: no guard emitted.
+!
+! --- innermost body: x = x + l ---
+! CHECK: arith.addi
+! CHECK: hlfir.assign
+!
+! --- "after" guards (innermost first) ---
+!
+! Level 2 after: empty (perfectly nested), no guard emitted.
+! Level 1 after: empty, no guard emitted.
+! Guard level 0 after: j == ub_j AND k == ub_k AND l == ub_l (once per i)
+! CHECK: %[[CJ2:.*]] = arith.cmpi eq, %[[J]], %[[UB_J]] : i32
+! CHECK: %[[CK3:.*]] = arith.cmpi eq, %[[K]], %[[UB_K]] : i32
+! CHECK: %[[A4:.*]] = arith.andi %[[CJ2]], %[[CK3]] : i1
+! CHECK: %[[CL3:.*]] = arith.cmpi eq, %[[L]], %[[UB_L]] : i32
+! CHECK: %[[A5:.*]] = arith.andi %[[A4]], %[[CL3]] : i1
+! CHECK: fir.if %[[A5]] {
+! CHECK: arith.subi
+! CHECK: hlfir.assign
+! CHECK: }
+! CHECK: omp.yield
+
+! Test collapse(2) with only after-code (no before-code). Exercises the path
+! where levels[i].before.empty() is true and the "before" loop is entirely skipped.
+! CHECK-LABEL: func.func @_QPcollapse2_after_only
+subroutine collapse2_after_only(n, x)
+ integer, intent(in) :: n
+ integer, intent(inout) :: x
+ integer :: i, j
+
+ !$omp do collapse(2)
+ do i = 1, n
+ do j = 1, n
+ x = x + j
+ end do
+ x = x - i
+ end do
+ !$omp end do
+end subroutine
+
+! CHECK: omp.wsloop
+! CHECK-NEXT: omp.loop_nest (%[[I:.*]], %[[J:.*]]) : i32 =
+! CHECK-SAME: (%{{[^)]*}}, %{{[^)]*}}) to (%{{[^)]*}}, %[[UB_J:[^)]*]])
+! CHECK: hlfir.assign %[[I]]
+! CHECK: hlfir.assign %[[J]]
+! No "before" guard emitted (level 0 before is empty).
+! Innermost body: x = x + j
+! CHECK: arith.addi
+! CHECK: hlfir.assign
+! Guard: j == upper_bound (after code)
+! CHECK: %[[CMP:.*]] = arith.cmpi eq, %[[J]], %[[UB_J]] : i32
+! CHECK: fir.if %[[CMP]] {
+! CHECK: arith.subi
+! CHECK: hlfir.assign
+! CHECK: }
+! CHECK: omp.yield
+
+! Test collapse(2) with multiple statements inside a single guard. Verifies
+! that all evals in level.before land inside the same fir.if block.
+! CHECK-LABEL: func.func @_QPcollapse2_multi_stmt
+subroutine collapse2_multi_stmt(n, x)
+ integer, intent(in) :: n
+ integer, intent(inout) :: x
+ integer :: i, j
+
+ !$omp do collapse(2)
+ do i = 1, n
+ x = x + 1
+ x = x + i
+ do j = 1, n
+ x = x + j
+ end do
+ end do
+ !$omp end do
+end subroutine
+
+! CHECK: omp.wsloop
+! CHECK-NEXT: omp.loop_nest (%[[I:.*]], %[[J:.*]]) : i32 =
+! CHECK-SAME: (%{{[^)]*}}, %[[LB_J:[^)]*]]) to
+! CHECK: hlfir.assign %[[I]]
+! CHECK: hlfir.assign %[[J]]
+! Guard: j == lower_bound (before code, multiple statements in one guard)
+! CHECK: %[[CMP:.*]] = arith.cmpi eq, %[[J]], %[[LB_J]] : i32
+! CHECK: fir.if %[[CMP]] {
+! First intervening statement: x = x + 1
+! CHECK: arith.addi
+! CHECK: hlfir.assign
+! Second intervening statement: x = x + i
+! CHECK: arith.addi
+! CHECK: hlfir.assign
+! CHECK: }
+! Innermost body: x = x + j
+! CHECK: arith.addi
+! CHECK: hlfir.assign
+! CHECK: omp.yield
+
+! Test collapse(2) with non-unit lower bound on inner loop. Verifies the guard
+! compares against the actual loop lower bound operand (3, not 1).
+! CHECK-LABEL: func.func @_QPcollapse2_nonunit_lb
+subroutine collapse2_nonunit_lb(n, x)
+ integer, intent(in) :: n
+ integer, intent(inout) :: x
+ integer :: i, j
+
+ !$omp do collapse(2)
+ do i = 1, n
+ x = x + i
+ do j = 3, n
+ x = x + j
+ end do
+ end do
+ !$omp end do
+end subroutine
+
+! CHECK: omp.wsloop
+! CHECK-NEXT: omp.loop_nest (%[[I:.*]], %[[J:.*]]) : i32 =
+! CHECK-SAME: (%{{[^)]*}}, %[[LB_J:[^)]*]]) to
+! CHECK: hlfir.assign %[[I]]
+! CHECK: hlfir.assign %[[J]]
+! Guard: j == lb_j (lb_j is 3, not 1)
+! CHECK: %[[CMP:.*]] = arith.cmpi eq, %[[J]], %[[LB_J]] : i32
+! CHECK: fir.if %[[CMP]] {
+! CHECK: arith.addi
+! CHECK: hlfir.assign
+! CHECK: }
+! Innermost body: x = x + j
+! CHECK: arith.addi
+! CHECK: hlfir.assign
+! CHECK: omp.yield
+
+! Test collapse(3) with after-only at level 0 and before-only at level 1.
+! Exercises the independent skip logic at each level in both emission loops.
+! CHECK-LABEL: func.func @_QPcollapse3_mixed_sides
+subroutine collapse3_mixed_sides(n, x)
+ integer, intent(in) :: n
+ integer, intent(inout) :: x
+ integer :: i, j, k
+
+ !$omp do collapse(3)
+ do i = 1, n
+ do j = 1, n
+ x = x + j
+ do k = 1, n
+ x = x + k
+ end do
+ end do
+ x = x - i
+ end do
+ !$omp end do
+end subroutine
+
+! CHECK: omp.wsloop
+! CHECK-NEXT: omp.loop_nest (%[[I:.*]], %[[J:.*]], %[[K:.*]]) : i32 =
+! CHECK-SAME: (%{{[^)]*}}, %{{[^)]*}}, %[[LB_K:[^)]*]]) to (%{{[^)]*}}, %[[UB_J:[^)]*]], %[[UB_K:[^)]*]])
+! CHECK: hlfir.assign %[[I]]
+! CHECK: hlfir.assign %[[J]]
+! CHECK: hlfir.assign %[[K]]
+! Level 0 before: empty (skipped).
+! Guard level 1 before: k == lb_k (once per (i,j))
+! CHECK: %[[CK:.*]] = arith.cmpi eq, %[[K]], %[[LB_K]] : i32
+! CHECK: fir.if %[[CK]] {
+! CHECK: arith.addi
+! CHECK: hlfir.assign
+! CHECK: }
+! Innermost body: x = x + k
+! CHECK: arith.addi
+! CHECK: hlfir.assign
+! Level 1 after: empty (skipped).
+! Guard level 0 after: j == ub_j AND k == ub_k (once per i)
+! CHECK: %[[CJ:.*]] = arith.cmpi eq, %[[J]], %[[UB_J]] : i32
+! CHECK: %[[CK2:.*]] = arith.cmpi eq, %[[K]], %[[UB_K]] : i32
+! CHECK: %[[AND:.*]] = arith.andi %[[CJ]], %[[CK2]] : i1
+! CHECK: fir.if %[[AND]] {
+! CHECK: arith.subi
+! CHECK: hlfir.assign
+! CHECK: }
+! CHECK: omp.yield
+
+! Test collapse(2) with non-unit positive step and after-code.
+! The after guard must compare iv against the last executed value
+! (lb + ((ub - lb) / step) * step), not the upper bound directly.
+! For do j = 1, 10, 4: last_iv = 1 + ((10-1)/4)*4 = 1 + 8 = 9.
+! CHECK-LABEL: func.func @_QPcollapse2_nonunit_step_after
+subroutine collapse2_nonunit_step_after(n, x)
+ integer, intent(in) :: n
+ integer, intent(inout) :: x
+ integer :: i, j
+
+ !$omp do collapse(2)
+ do i = 1, n
+ do j = 1, 10, 4
+ x = x + j
+ end do
+ x = x - i
+ end do
+ !$omp end do
+end subroutine
+
+! CHECK: omp.wsloop
+! CHECK-NEXT: omp.loop_nest (%[[I:.*]], %[[J:.*]]) : i32 =
+! CHECK-SAME: (%{{[^)]*}}, %[[LB_J:[^)]*]]) to (%{{[^)]*}}, %[[UB_J:[^)]*]]) inclusive step (%{{[^)]*}}, %[[STEP_J:[^)]*]])
+! CHECK: hlfir.assign %[[I]]
+! CHECK: hlfir.assign %[[J]]
+! Innermost body: x = x + j
+! CHECK: arith.addi
+! CHECK: hlfir.assign
+! After guard: compute last_iv = lb + ((ub - lb) / step) * step
+! CHECK: %[[RANGE:.*]] = arith.subi %[[UB_J]], %[[LB_J]] : i32
+! CHECK: %[[DIV:.*]] = arith.divsi %[[RANGE]], %[[STEP_J]] : i32
+! CHECK: %[[MUL:.*]] = arith.muli %[[DIV]], %[[STEP_J]] : i32
+! CHECK: %[[LAST:.*]] = arith.addi %[[LB_J]], %[[MUL]] : i32
+! CHECK: %[[CMP:.*]] = arith.cmpi eq, %[[J]], %[[LAST]] : i32
+! CHECK: fir.if %[[CMP]] {
+! CHECK: arith.subi
+! CHECK: hlfir.assign
+! CHECK: }
+! CHECK: omp.yield
+
+! Test collapse(2) with negative step and after-code.
+! For do j = 10, 1, -4: last_iv = 10 + ((1-10)/(-4))*(-4) = 10 + (2*-4) = 2.
+! CHECK-LABEL: func.func @_QPcollapse2_negative_step_after
+subroutine collapse2_negative_step_after(n, x)
+ integer, intent(in) :: n
+ integer, intent(inout) :: x
+ integer :: i, j
+
+ !$omp do collapse(2)
+ do i = 1, n
+ do j = 10, 1, -4
+ x = x + j
+ end do
+ x = x - i
+ end do
+ !$omp end do
+end subroutine
+
+! CHECK: omp.wsloop
+! CHECK-NEXT: omp.loop_nest (%[[I:.*]], %[[J:.*]]) : i32 =
+! CHECK-SAME: (%{{[^)]*}}, %[[LB_J:[^)]*]]) to (%{{[^)]*}}, %[[UB_J:[^)]*]]) inclusive step (%{{[^)]*}}, %[[STEP_J:[^)]*]])
+! CHECK: hlfir.assign %[[I]]
+! CHECK: hlfir.assign %[[J]]
+! Innermost body: x = x + j
+! CHECK: arith.addi
+! CHECK: hlfir.assign
+! After guard: compute last_iv for negative step
+! CHECK: %[[RANGE:.*]] = arith.subi %[[UB_J]], %[[LB_J]] : i32
+! CHECK: %[[DIV:.*]] = arith.divsi %[[RANGE]], %[[STEP_J]] : i32
+! CHECK: %[[MUL:.*]] = arith.muli %[[DIV]], %[[STEP_J]] : i32
+! CHECK: %[[LAST:.*]] = arith.addi %[[LB_J]], %[[MUL]] : i32
+! CHECK: %[[CMP:.*]] = arith.cmpi eq, %[[J]], %[[LAST]] : i32
+! CHECK: fir.if %[[CMP]] {
+! CHECK: arith.subi
+! CHECK: hlfir.assign
+! CHECK: }
+! CHECK: omp.yield
+
+! Test collapse(3) with non-unit step on the middle loop (not innermost).
+! For do j = 1, n, 3: last_iv = lb + ((ub - lb) / step) * step (runtime).
+! CHECK-LABEL: func.func @_QPcollapse3_nonunit_step_middle
+subroutine collapse3_nonunit_step_middle(n, x)
+ integer, intent(in) :: n
+ integer, intent(inout) :: x
+ integer :: i, j, k
+
+ !$omp do collapse(3)
+ do i = 1, n
+ do j = 1, n, 3
+ x = x + j
+ do k = 1, n
+ x = x + k
+ end do
+ end do
+ x = x - i
+ end do
+ !$omp end do
+end subroutine
+
+! CHECK: omp.wsloop
+! CHECK-NEXT: omp.loop_nest (%[[I:.*]], %[[J:.*]], %[[K:.*]]) : i32 =
+! CHECK-SAME: (%{{[^)]*}}, %[[LB_J:[^)]*]], %[[LB_K:[^)]*]]) to (%{{[^)]*}}, %[[UB_J:[^)]*]], %[[UB_K:[^)]*]]) inclusive step (%{{[^)]*}}, %[[STEP_J:[^)]*]], %{{[^)]*}})
+! CHECK: hlfir.assign %[[I]]
+! CHECK: hlfir.assign %[[J]]
+! CHECK: hlfir.assign %[[K]]
+! Guard level 1 before: k == lb_k (once per (i,j))
+! CHECK: %[[CK1:.*]] = arith.cmpi eq, %[[K]], %[[LB_K]] : i32
+! CHECK: fir.if %[[CK1]] {
+! CHECK: arith.addi
+! CHECK: hlfir.assign
+! CHECK: }
+! Innermost body: x = x + k
+! CHECK: arith.addi
+! CHECK: hlfir.assign
+! Guard level 0 after: must compute last_iv for j (non-unit step) AND k == ub_k
+! CHECK: %[[RANGE:.*]] = arith.subi %[[UB_J]], %[[LB_J]] : i32
+! CHECK: %[[DIV:.*]] = arith.divsi %[[RANGE]], %[[STEP_J]] : i32
+! CHECK: %[[MUL:.*]] = arith.muli %[[DIV]], %[[STEP_J]] : i32
+! CHECK: %[[LASTJ:.*]] = arith.addi %[[LB_J]], %[[MUL]] : i32
+! CHECK: %[[CJ:.*]] = arith.cmpi eq, %[[J]], %[[LASTJ]] : i32
+! CHECK: %[[CK2:.*]] = arith.cmpi eq, %[[K]], %[[UB_K]] : i32
+! CHECK: %[[AND:.*]] = arith.andi %[[CJ]], %[[CK2]] : i1
+! CHECK: fir.if %[[AND]] {
+! CHECK: arith.subi
+! CHECK: hlfir.assign
+! CHECK: }
+! CHECK: omp.yield
+
+! Test collapse(2) with a dynamic (runtime) step value.
+! The step is not a compile-time constant, so the last_iv computation
+! cannot be folded away and must remain as arith ops in the IR.
+! CHECK-LABEL: func.func @_QPcollapse2_dynamic_step_after
+subroutine collapse2_dynamic_step_after(n, s, x)
+ integer, intent(in) :: n, s
+ integer, intent(inout) :: x
+ integer :: i, j
+
+ !$omp do collapse(2)
+ do i = 1, n
+ do j = 1, n, s
+ x = x + j
+ end do
+ x = x - i
+ end do
+ !$omp end do
+end subroutine
+
+! CHECK: omp.wsloop
+! CHECK-NEXT: omp.loop_nest (%[[I:.*]], %[[J:.*]]) : i32 =
+! CHECK-SAME: (%{{[^)]*}}, %[[LB_J:[^)]*]]) to (%{{[^)]*}}, %[[UB_J:[^)]*]]) inclusive step (%{{[^)]*}}, %[[STEP_J:[^)]*]])
+! CHECK: hlfir.assign %[[I]]
+! CHECK: hlfir.assign %[[J]]
+! Innermost body: x = x + j
+! CHECK: arith.addi
+! CHECK: hlfir.assign
+! After guard: dynamic step forces last_iv computation to stay in IR
+! CHECK: %[[RANGE:.*]] = arith.subi %[[UB_J]], %[[LB_J]] : i32
+! CHECK: %[[DIV:.*]] = arith.divsi %[[RANGE]], %[[STEP_J]] : i32
+! CHECK: %[[MUL:.*]] = arith.muli %[[DIV]], %[[STEP_J]] : i32
+! CHECK: %[[LAST:.*]] = arith.addi %[[LB_J]], %[[MUL]] : i32
+! CHECK: %[[CMP:.*]] = arith.cmpi eq, %[[J]], %[[LAST]] : i32
+! CHECK: fir.if %[[CMP]] {
+! CHECK: arith.subi
+! CHECK: hlfir.assign
+! CHECK: }
+! CHECK: omp.yield
diff --git a/flang/test/Semantics/OpenMP/do-collapse.f90 b/flang/test/Semantics/OpenMP/do-collapse.f90
index 86354d6a61a31..29839a7e0b93d 100644
--- a/flang/test/Semantics/OpenMP/do-collapse.f90
+++ b/flang/test/Semantics/OpenMP/do-collapse.f90
@@ -3,7 +3,7 @@
! 2.7.1 Collapse Clause
program omp_doCollapse
integer:: i,j
- !ERROR: This construct requires a perfect nest of depth 3, but the associated nest is a perfect nest of depth 2
+ !ERROR: This construct requires a nest of depth 3, but the associated nest is a nest of depth 2
!BECAUSE: COLLAPSE clause was specified with argument 3
!$omp do collapse(3)
do i = 1,10
@@ -15,7 +15,7 @@ program omp_doCollapse
do i = 1,10
do j = 1, 10
- !ERROR: This construct requires a perfect nest of depth 2, but the associated nest is a perfect nest of depth 1
+ !ERROR: This construct requires a nest of depth 2, but the associated nest is a nest of depth 1
!BECAUSE: COLLAPSE clause was specified with argument 2
!$omp do collapse(2)
do k = 1, 10
@@ -25,7 +25,7 @@ program omp_doCollapse
end do
end do
- !ERROR: This construct requires a perfect nest of depth 2, but the associated nest is a perfect nest of depth 1
+ !ERROR: This construct requires a nest of depth 2, but the associated nest is a nest of depth 1
!BECAUSE: COLLAPSE clause was specified with argument 2
!$omp parallel do collapse(2)
do i = 1, 3
@@ -35,7 +35,7 @@ program omp_doCollapse
end do
end do
- !ERROR: This construct requires a perfect nest of depth 2, but the associated nest is a perfect nest of depth 1
+ !ERROR: This construct requires a nest of depth 2, but the associated nest is a nest of depth 1
!BECAUSE: COLLAPSE clause was specified with argument 2
!ERROR: At most one COLLAPSE clause can appear on the SIMD directive
!$omp simd collapse(2) collapse(1)
diff --git a/flang/test/Semantics/OpenMP/do-concurrent-collapse-60.f90 b/flang/test/Semantics/OpenMP/do-concurrent-collapse-60.f90
index 4ba8e71a26323..75351cdf7190c 100644
--- a/flang/test/Semantics/OpenMP/do-concurrent-collapse-60.f90
+++ b/flang/test/Semantics/OpenMP/do-concurrent-collapse-60.f90
@@ -3,7 +3,7 @@
subroutine f
integer :: i
- !ERROR: This construct requires a perfect nest of depth 2, but the associated nest is a perfect nest of depth 1
+ !ERROR: This construct requires a nest of depth 2, but the associated nest is a nest of depth 1
!BECAUSE: COLLAPSE clause was specified with argument 2
!$omp parallel do collapse(2)
do i = 1, 1
@@ -42,7 +42,7 @@ subroutine f
print *, j
end do
- !ERROR: This construct requires a perfect nest of depth 2, but the associated nest is a perfect nest of depth 1
+ !ERROR: This construct requires a nest of depth 2, but the associated nest is a nest of depth 1
!BECAUSE: COLLAPSE clause was specified with argument 2
!$omp loop collapse(2)
do i = 1, 1
diff --git a/flang/test/Semantics/OpenMP/do-concurrent-collapse.f90 b/flang/test/Semantics/OpenMP/do-concurrent-collapse.f90
index 7cd10518d845a..0a6906b0e2e63 100644
--- a/flang/test/Semantics/OpenMP/do-concurrent-collapse.f90
+++ b/flang/test/Semantics/OpenMP/do-concurrent-collapse.f90
@@ -1,7 +1,7 @@
!RUN: %python %S/../test_errors.py %s %flang -fopenmp
integer :: i, j
-! ERROR: This construct requires a perfect nest of depth 2, but the associated nest is a perfect nest of depth 1
+! ERROR: This construct requires a nest of depth 2, but the associated nest is a nest of depth 1
! BECAUSE: COLLAPSE clause was specified with argument 2
!$omp parallel do collapse(2)
do i = 1, 1
@@ -33,7 +33,7 @@
print *, j
end do
-! ERROR: This construct requires a perfect nest of depth 2, but the associated nest is a perfect nest of depth 1
+! ERROR: This construct requires a nest of depth 2, but the associated nest is a nest of depth 1
! BECAUSE: COLLAPSE clause was specified with argument 2
!$omp loop collapse(2)
do i = 1, 1
diff --git a/flang/test/Semantics/OpenMP/do08.f90 b/flang/test/Semantics/OpenMP/do08.f90
index 300485f067b1d..24755022f0296 100644
--- a/flang/test/Semantics/OpenMP/do08.f90
+++ b/flang/test/Semantics/OpenMP/do08.f90
@@ -7,11 +7,8 @@ program omp
logical cond(10,10,10)
cond = .false.
- !ERROR: This construct requires a perfect nest of depth 3, but the associated nest is a perfect nest of depth 1
- !BECAUSE: COLLAPSE clause was specified with argument 3
!$omp do collapse(3)
do i = 0, 10
- !BECAUSE: This code prevents perfect nesting
!ERROR: CYCLE statement to non-innermost associated loop of an OpenMP DO construct
if (i .lt. 1) cycle
do j = 0, 10
@@ -22,12 +19,9 @@ program omp
end do
!$omp end do
- !ERROR: This construct requires a perfect nest of depth 3, but the associated nest is a perfect nest of depth 2
- !BECAUSE: COLLAPSE clause was specified with argument 3
!$omp do collapse(3)
do i = 0, 10
do j = 0, 10
- !BECAUSE: This code prevents perfect nesting
!ERROR: CYCLE statement to non-innermost associated loop of an OpenMP DO construct
if (i .lt. 1) cycle
do k = 0, 10
@@ -37,11 +31,8 @@ program omp
end do
!$omp end do
- !ERROR: This construct requires a perfect nest of depth 2, but the associated nest is a perfect nest of depth 1
- !BECAUSE: COLLAPSE clause was specified with argument 2
!$omp do collapse(2)
do i = 0, 10
- !BECAUSE: This code prevents perfect nesting
!ERROR: CYCLE statement to non-innermost associated loop of an OpenMP DO construct
if (i .lt. 1) cycle
do j = 0, 10
@@ -53,11 +44,8 @@ program omp
!$omp end do
- !ERROR: This construct requires a perfect nest of depth 2, but the associated nest is a perfect nest of depth 1
- !BECAUSE: COLLAPSE clause was specified with argument 2
!$omp do collapse(2)
foo: do i = 0, 10
- !BECAUSE: This code prevents perfect nesting
!ERROR: CYCLE statement to non-innermost associated loop of an OpenMP DO construct
if (i .lt. 1) cycle foo
do j = 0, 10
@@ -69,12 +57,9 @@ program omp
!$omp end do
- !ERROR: This construct requires a perfect nest of depth 3, but the associated nest is a perfect nest of depth 2
- !BECAUSE: COLLAPSE clause was specified with argument 3
!$omp do collapse(3)
do 60 i=2,200,2
do j=1,10
- !BECAUSE: This code prevents perfect nesting
!ERROR: CYCLE statement to non-innermost associated loop of an OpenMP DO construct
if (i == 100) cycle
do k = 1, 10
diff --git a/flang/test/Semantics/OpenMP/do10.f90 b/flang/test/Semantics/OpenMP/do10.f90
index 1878864a4a5db..1fae5e098dc56 100644
--- a/flang/test/Semantics/OpenMP/do10.f90
+++ b/flang/test/Semantics/OpenMP/do10.f90
@@ -14,7 +14,7 @@ program omp_do
end do
!$omp end do
- !ERROR: This construct requires a perfect nest of depth 3, but the associated nest is a perfect nest of depth 2
+ !ERROR: This construct requires a nest of depth 3, but the associated nest is a nest of depth 2
!BECAUSE: COLLAPSE clause was specified with argument 3
!$omp do collapse(3)
do i = 1, 10
diff --git a/flang/test/Semantics/OpenMP/do13.f90 b/flang/test/Semantics/OpenMP/do13.f90
index 6d5e799e951b0..dd9f4be03e7e1 100644
--- a/flang/test/Semantics/OpenMP/do13.f90
+++ b/flang/test/Semantics/OpenMP/do13.f90
@@ -5,7 +5,7 @@
program omp
integer i, j, k
- !ERROR: This construct requires a perfect nest of depth 3, but the associated nest is a perfect nest of depth 1
+ !ERROR: This construct requires a nest of depth 3, but the associated nest is a nest of depth 1
!BECAUSE: COLLAPSE clause was specified with argument 3
!$omp do collapse(3)
do i = 0, 10
@@ -20,7 +20,7 @@ program omp
end do
!$omp end do
- !ERROR: This construct requires a perfect nest of depth 3, but the associated nest is a perfect nest of depth 2
+ !ERROR: This construct requires a nest of depth 3, but the associated nest is a nest of depth 2
!BECAUSE: COLLAPSE clause was specified with argument 3
!$omp do collapse(3)
do i = 0, 10
@@ -35,7 +35,7 @@ program omp
end do
!$omp end do
- !ERROR: This construct requires a perfect nest of depth 2, but the associated nest is a perfect nest of depth 1
+ !ERROR: This construct requires a nest of depth 2, but the associated nest is a nest of depth 1
!BECAUSE: COLLAPSE clause was specified with argument 2
!$omp do collapse(2)
do i = 0, 10
@@ -51,7 +51,7 @@ program omp
!$omp end do
- !ERROR: This construct requires a perfect nest of depth 2, but the associated nest is a perfect nest of depth 1
+ !ERROR: This construct requires a nest of depth 2, but the associated nest is a nest of depth 1
!BECAUSE: COLLAPSE clause was specified with argument 2
!$omp do collapse(2)
foo: do i = 0, 10
@@ -67,7 +67,7 @@ program omp
!$omp end do
- !ERROR: This construct requires a perfect nest of depth 3, but the associated nest is a perfect nest of depth 2
+ !ERROR: This construct requires a nest of depth 3, but the associated nest is a nest of depth 2
!BECAUSE: COLLAPSE clause was specified with argument 3
!$omp do collapse(3)
do 60 i=1,10
diff --git a/flang/test/Semantics/OpenMP/do15.f90 b/flang/test/Semantics/OpenMP/do15.f90
index 00baa0c431c5f..96c195b2d1306 100644
--- a/flang/test/Semantics/OpenMP/do15.f90
+++ b/flang/test/Semantics/OpenMP/do15.f90
@@ -5,11 +5,8 @@
program omp
integer i, j, k
- !ERROR: This construct requires a perfect nest of depth 3, but the associated nest is a perfect nest of depth 1
- !BECAUSE: COLLAPSE clause was specified with argument 3
!$omp do collapse(3)
do i = 0, 10
- !BECAUSE: This code prevents perfect nesting
if (i .lt. 1) then
!ERROR: CYCLE statement to non-innermost associated loop of an OpenMP DO construct
cycle
@@ -22,12 +19,9 @@ program omp
end do
!$omp end do
- !ERROR: This construct requires a perfect nest of depth 3, but the associated nest is a perfect nest of depth 2
- !BECAUSE: COLLAPSE clause was specified with argument 3
!$omp do collapse(3)
do i = 0, 10
do j = 0, 10
- !BECAUSE: This code prevents perfect nesting
if (i .lt. 1) then
!ERROR: CYCLE statement to non-innermost associated loop of an OpenMP DO construct
cycle
@@ -56,12 +50,9 @@ program omp
!$omp end do
- !ERROR: This construct requires a perfect nest of depth 3, but the associated nest is a perfect nest of depth 2
- !BECAUSE: COLLAPSE clause was specified with argument 3
!$omp do collapse(3)
foo: do i = 0, 10
foo1: do j = 0, 10
- !BECAUSE: This code prevents perfect nesting
if (i .lt. 1) then
!ERROR: CYCLE statement to non-innermost associated loop of an OpenMP DO construct
cycle foo
diff --git a/flang/test/Semantics/OpenMP/do16.f90 b/flang/test/Semantics/OpenMP/do16.f90
index 35d94b17f3c68..2a871156de75f 100644
--- a/flang/test/Semantics/OpenMP/do16.f90
+++ b/flang/test/Semantics/OpenMP/do16.f90
@@ -5,11 +5,8 @@
program omp
integer i, j, k
- !ERROR: This construct requires a perfect nest of depth 3, but the associated nest is a perfect nest of depth 1
- !BECAUSE: COLLAPSE clause was specified with argument 3
!$omp do collapse(3)
do i = 0, 10
- !BECAUSE: This code prevents perfect nesting
select case (i)
case(1)
!ERROR: CYCLE statement to non-innermost associated loop of an OpenMP DO construct
@@ -23,12 +20,9 @@ program omp
end do
!$omp end do
- !ERROR: This construct requires a perfect nest of depth 3, but the associated nest is a perfect nest of depth 2
- !BECAUSE: COLLAPSE clause was specified with argument 3
!$omp do collapse(3)
do i = 0, 10
do j = 0, 10
- !BECAUSE: This code prevents perfect nesting
select case (i)
case(1)
!ERROR: CYCLE statement to non-innermost associated loop of an OpenMP DO construct
diff --git a/flang/test/Semantics/OpenMP/do22.f90 b/flang/test/Semantics/OpenMP/do22.f90
index 2ced881a2af8b..818de56aa1093 100644
--- a/flang/test/Semantics/OpenMP/do22.f90
+++ b/flang/test/Semantics/OpenMP/do22.f90
@@ -4,11 +4,9 @@
subroutine do_imperfectly_nested_before
integer i, j
- !ERROR: This construct requires a perfect nest of depth 2, but the associated nest is a perfect nest of depth 1
- !BECAUSE: COLLAPSE clause was specified with argument 2
+ ! Valid: print is allowed as CLN intervening code with collapse
!$omp do collapse(2)
do i = 1, 10
- !BECAUSE: This code prevents perfect nesting
print *, i
do j = 1, 10
print *, i, j
@@ -21,15 +19,134 @@ subroutine do_imperfectly_nested_before
subroutine do_imperfectly_nested_behind
integer i, j
- !ERROR: This construct requires a perfect nest of depth 2, but the associated nest is a perfect nest of depth 1
- !BECAUSE: COLLAPSE clause was specified with argument 2
+ ! Valid: print is allowed as CLN intervening code with collapse
+ !$omp do collapse(2)
+ do i = 1, 10
+ do j = 1, 10
+ print *, i, j
+ end do
+ print *, i
+ end do
+ !$omp end do
+end subroutine
+
+
+subroutine do_imperfectly_nested_scalar_assign
+ integer i, j, x
+
+ ! Valid: scalar assignment is allowed as CLN intervening code with collapse
+ !$omp do collapse(2)
+ do i = 1, 10
+ x = i + 1
+ do j = 1, 10
+ print *, i, j, x
+ end do
+ end do
+ !$omp end do
+end subroutine
+
+
+subroutine do_imperfectly_nested_call
+ integer i, j
+
+ ! Valid: subroutine call is allowed as CLN intervening code with collapse
!$omp do collapse(2)
do i = 1, 10
+ call sub(i)
+ do j = 1, 10
+ print *, i, j
+ end do
+ end do
+ !$omp end do
+end subroutine
+
+
+subroutine do_imperfectly_nested_multiple
+ integer i, j, x
+
+ ! Valid: multiple scalar statements are allowed as CLN intervening code
+ !$omp do collapse(2)
+ do i = 1, 10
+ x = i * 2
+ print *, x
+ call sub(x)
+ do j = 1, 10
+ print *, i, j
+ end do
+ end do
+ !$omp end do
+end subroutine
+
+
+subroutine do_imperfect_collapse_bare_ordered
+ integer i, j, x
+
+ ! Valid: bare ORDERED does not require a perfect nest.
+ !$omp do collapse(2) ordered
+ do i = 1, 10
+ x = 0
+ do j = 1, 10
+ !$omp ordered
+ print *, i, j, x
+ !$omp end ordered
+ end do
+ end do
+ !$omp end do
+end subroutine
+
+
+subroutine do_imperfect_ordered_requires_perfect
+ integer i, j
+
+ ! ordered(2) still requires perfect nesting at default OpenMP version
+ !ERROR: This construct requires a perfect nest of depth 2, but the associated nest is a perfect nest of depth 1
+ !BECAUSE: ORDERED clause was specified with argument 2
+ !$omp do ordered(2)
+ do i = 1, 10
+ !BECAUSE: This code prevents perfect nesting
+ print *, i
do j = 1, 10
print *, i, j
end do
+ end do
+ !$omp end do
+end subroutine
+
+
+subroutine do_imperfect_collapse_ordered_requires_perfect
+ integer i, j, k
+
+ ! collapse(2) ordered(3) requires perfect nesting at default OpenMP version because ordered(3) > collapse(2)
+ !ERROR: This construct requires a perfect nest of depth 3, but the associated nest is a perfect nest of depth 1
+ !BECAUSE: ORDERED clause was specified with argument 3
+ !$omp do collapse(2) ordered(3)
+ do i = 1, 10
!BECAUSE: This code prevents perfect nesting
print *, i
+ do j = 1, 10
+ do k = 1, 10
+ print *, i, j, k
+ end do
+ end do
+ end do
+ !$omp end do
+end subroutine
+
+
+subroutine do_imperfect_array_assign_invalid
+ integer i, j
+ integer :: a(10)
+
+ ! Array assignment is invalid CLN intervening code
+ !ERROR: This construct requires a nest of depth 2, but the associated nest is a nest of depth 1
+ !BECAUSE: COLLAPSE clause was specified with argument 2
+ !$omp do collapse(2)
+ do i = 1, 10
+ !BECAUSE: The nest contains code that prevents it from being canonical at this nesting level
+ a(:) = 0
+ do j = 1, 10
+ print *, i, j
+ end do
end do
!$omp end do
end subroutine
diff --git a/flang/test/Semantics/OpenMP/doacross-nesting-omp52.f90 b/flang/test/Semantics/OpenMP/doacross-nesting-omp52.f90
new file mode 100644
index 0000000000000..492a961fa997f
--- /dev/null
+++ b/flang/test/Semantics/OpenMP/doacross-nesting-omp52.f90
@@ -0,0 +1,110 @@
+! RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=52
+! OpenMP 5.2: doacross loop nests (those with ordered doacross(sink/source)
+! constructs) require perfect nesting.
+
+! ordered(2) without doacross directives: imperfect nesting is valid in 5.2.
+subroutine ordered_no_doacross_imperfect
+ integer i, j
+
+ !$omp do ordered(2)
+ do i = 1, 10
+ print *, i
+ do j = 1, 10
+ print *, i, j
+ end do
+ end do
+ !$omp end do
+end subroutine
+
+! Perfectly nested doacross: valid.
+subroutine doacross_perfect
+ integer i, j
+
+ !$omp do ordered(2)
+ do i = 1, 10
+ do j = 1, 10
+ !$omp ordered doacross(sink: i-1, j)
+ print *, i, j
+ !$omp ordered doacross(source)
+ end do
+ end do
+ !$omp end do
+end subroutine
+
+! Imperfectly nested doacross: invalid in 5.2.
+subroutine doacross_imperfect
+ integer i, j
+
+ !ERROR: This construct requires a perfect nest of depth 2, but the associated nest is a perfect nest of depth 1
+ !BECAUSE: ORDERED clause was specified with argument 2
+ !$omp do ordered(2)
+ do i = 1, 10
+ !BECAUSE: This code prevents perfect nesting
+ print *, i
+ do j = 1, 10
+ !$omp ordered doacross(sink: i-1, j)
+ print *, i, j
+ !$omp ordered doacross(source)
+ end do
+ end do
+ !$omp end do
+end subroutine
+
+! collapse(2) ordered(3) without doacross: imperfect nesting is valid.
+subroutine collapse_ordered_no_doacross_imperfect
+ integer i, j, k
+
+ !$omp do collapse(2) ordered(3)
+ do i = 1, 10
+ print *, i
+ do j = 1, 10
+ do k = 1, 10
+ print *, i, j, k
+ end do
+ end do
+ end do
+ !$omp end do
+end subroutine
+
+! Doacross with collapse: ordered(N) controls depth when N > collapse.
+subroutine doacross_collapse
+ integer i, j, k
+
+ !ERROR: This construct requires a perfect nest of depth 3, but the associated nest is a perfect nest of depth 1
+ !BECAUSE: ORDERED clause was specified with argument 3
+ !$omp do collapse(2) ordered(3)
+ do i = 1, 10
+ !BECAUSE: This code prevents perfect nesting
+ print *, i
+ do j = 1, 10
+ do k = 1, 10
+ !$omp ordered doacross(sink: i-1, j, k)
+ print *, i, j, k
+ !$omp ordered doacross(source)
+ end do
+ end do
+ end do
+ !$omp end do
+end subroutine
+
+! Doacross inside a nested OpenMP region should not force perfect nesting on
+! the outer loop. The doacross binds to the inner loop, not the outer one.
+subroutine doacross_in_nested_region
+ integer i, j, k
+
+ !$omp do collapse(2)
+ do i = 1, 10
+ print *, i
+ do j = 1, 10
+ !$omp parallel
+ !$omp do ordered(1)
+ do k = 1, 10
+ !$omp ordered doacross(source)
+ print *, k
+ end do
+ !$omp end do
+ !$omp end parallel
+ end do
+ end do
+ !$omp end do
+end subroutine
>From 2639171cf3c72a4c62d928daa48075144757cd30 Mon Sep 17 00:00:00 2001
From: Caroline Newcombe <caroline.newcombe at hpe.com>
Date: Tue, 16 Jun 2026 10:53:59 -0500
Subject: [PATCH 2/3] Remove brace initializations to match lowering style
---
flang/lib/Lower/OpenMP/OpenMP.cpp | 70 +++++++++++++++----------------
1 file changed, 35 insertions(+), 35 deletions(-)
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index f9eb59cbe0b95..3c0fce0703aa5 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -702,12 +702,12 @@ static void genCollapsedLoopNestBody(lower::AbstractConverter &converter,
return;
}
- fir::FirOpBuilder &firOpBuilder{converter.getFirOpBuilder()};
- const mlir::Location loc{converter.getCurrentLocation()};
+ fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
+ const mlir::Location loc = converter.getCurrentLocation();
// Get the enclosing omp.loop_nest to access induction variables and bounds.
- auto loopNestOp{mlir::dyn_cast<mlir::omp::LoopNestOp>(
- firOpBuilder.getInsertionBlock()->getParentOp())};
+ auto loopNestOp = mlir::dyn_cast<mlir::omp::LoopNestOp>(
+ firOpBuilder.getInsertionBlock()->getParentOp());
assert(loopNestOp && "expected to be inside omp.loop_nest");
// Collect before/after evaluations at each intermediate level.
@@ -717,11 +717,11 @@ static void genCollapsedLoopNestBody(lower::AbstractConverter &converter,
};
llvm::SmallVector<LevelInfo> levels;
- lower::pft::Evaluation *curEval{&outerEval};
- for (int i{0}; i < collapseValue - 1; ++i) {
- lower::pft::Evaluation *const doEval{getNestedDoConstruct(*curEval)};
+ lower::pft::Evaluation *curEval = &outerEval;
+ for (int i = 0; i < collapseValue - 1; ++i) {
+ lower::pft::Evaluation *doEval = getNestedDoConstruct(*curEval);
LevelInfo level;
- bool pastDo{false};
+ bool pastDo = false;
for (lower::pft::Evaluation &e : doEval->getNestedEvaluations()) {
if (e.getIf<parser::NonLabelDoStmt>() || e.getIf<parser::EndDoStmt>())
continue;
@@ -748,31 +748,31 @@ static void genCollapsedLoopNestBody(lower::AbstractConverter &converter,
// steps where the IV may never exactly equal the upper bound.
auto buildGuard = [&](const int startLevel, const int endLevel,
const bool useLowerBound) -> mlir::Value {
- mlir::Value cond{};
- const auto lbs{loopNestOp.getLoopLowerBounds()};
- const auto ubs{loopNestOp.getLoopUpperBounds()};
- const auto steps{loopNestOp.getLoopSteps()};
- for (int lvl{startLevel}; lvl < endLevel; ++lvl) {
- const mlir::Value iv{loopNestOp.getRegion().getArgument(lvl)};
+ mlir::Value cond;
+ const auto lbs = loopNestOp.getLoopLowerBounds();
+ const auto ubs = loopNestOp.getLoopUpperBounds();
+ const auto steps = loopNestOp.getLoopSteps();
+ for (int lvl = startLevel; lvl < endLevel; ++lvl) {
+ const mlir::Value iv = loopNestOp.getRegion().getArgument(lvl);
mlir::Value target;
if (useLowerBound) {
target = lbs[lvl];
} else {
// For unit steps, the last induction variable always equals ub.
- const auto constStep{fir::getIntIfConstant(steps[lvl])};
+ const auto constStep = fir::getIntIfConstant(steps[lvl]);
if (constStep && (*constStep == 1 || *constStep == -1)) {
target = ubs[lvl];
} else {
// Compute last_iv = lb + ((ub - lb) / step) * step.
- const mlir::Value lb{lbs[lvl]};
- const mlir::Value ub{ubs[lvl]};
- const mlir::Value step{steps[lvl]};
- const mlir::Value range{
- mlir::arith::SubIOp::create(firOpBuilder, loc, ub, lb)};
- const mlir::Value tripMinus1{
- mlir::arith::DivSIOp::create(firOpBuilder, loc, range, step)};
- const mlir::Value lastOffset{
- mlir::arith::MulIOp::create(firOpBuilder, loc, tripMinus1, step)};
+ const mlir::Value lb = lbs[lvl];
+ const mlir::Value ub = ubs[lvl];
+ const mlir::Value step = steps[lvl];
+ const mlir::Value range =
+ mlir::arith::SubIOp::create(firOpBuilder, loc, ub, lb);
+ const mlir::Value tripMinus1 =
+ mlir::arith::DivSIOp::create(firOpBuilder, loc, range, step);
+ const mlir::Value lastOffset =
+ mlir::arith::MulIOp::create(firOpBuilder, loc, tripMinus1, step);
target =
mlir::arith::AddIOp::create(firOpBuilder, loc, lb, lastOffset);
}
@@ -788,12 +788,12 @@ static void genCollapsedLoopNestBody(lower::AbstractConverter &converter,
};
// Emit "before" code at each level, guarded by inner IVs == lower bounds.
- for (int i{0}; i < static_cast<int>(levels.size()); ++i) {
+ for (int i = 0; i < static_cast<int>(levels.size()); ++i) {
if (levels[i].before.empty())
continue;
- const mlir::Value guard{
- buildGuard(i + 1, collapseValue, /*useLowerBound=*/true)};
- auto ifOp{fir::IfOp::create(firOpBuilder, loc, guard, /*else*/ false)};
+ const mlir::Value guard =
+ buildGuard(i + 1, collapseValue, /*useLowerBound=*/true);
+ auto ifOp = fir::IfOp::create(firOpBuilder, loc, guard, /*else*/ false);
firOpBuilder.setInsertionPointToStart(&ifOp.getThenRegion().front());
for (auto *e : levels[i].before)
converter.genEval(*e);
@@ -805,12 +805,12 @@ static void genCollapsedLoopNestBody(lower::AbstractConverter &converter,
// Emit "after" code at each level (innermost first), guarded by
// inner IVs == last iteration values (accounts for non-unit steps).
- for (int i{static_cast<int>(levels.size()) - 1}; i >= 0; --i) {
+ for (int i = static_cast<int>(levels.size()) - 1; i >= 0; --i) {
if (levels[i].after.empty())
continue;
- const mlir::Value guard{
- buildGuard(i + 1, collapseValue, /*useLowerBound=*/false)};
- auto ifOp{fir::IfOp::create(firOpBuilder, loc, guard, /*else*/ false)};
+ const mlir::Value guard =
+ buildGuard(i + 1, collapseValue, /*useLowerBound=*/false);
+ auto ifOp = fir::IfOp::create(firOpBuilder, loc, guard, /*else*/ false);
firOpBuilder.setInsertionPointToStart(&ifOp.getThenRegion().front());
for (auto *e : levels[i].after)
converter.genEval(*e);
@@ -1410,8 +1410,8 @@ struct OpWithBodyGenInfo {
bool privatize = true;
/// [in] if set, outermost evaluation and collapse depth for emitting
/// intervening code from imperfect collapsed loop nests.
- lower::pft::Evaluation *outerCollapseEval{nullptr};
- int collapseValue{0};
+ lower::pft::Evaluation *outerCollapseEval = nullptr;
+ int collapseValue = 0;
};
/// Create the body (block) for an OpenMP Operation.
@@ -2438,7 +2438,7 @@ static void genCanonicalLoopNest(
// Step 1: Loop prologues
// Computing the trip count must happen before entering the outermost loop
lower::pft::Evaluation *innermostEval = nestedEval;
- for (std::size_t i{0}; i < ivs.size(); ++i) {
+ for (std::size_t i = 0; i < ivs.size(); ++i) {
if (innermostEval->getIf<parser::DoConstruct>()->IsDoConcurrent()) {
// OpenMP specifies DO CONCURRENT only with the `!omp loop` construct.
// Will need to add special cases for this combination.
>From 323db59c8b17f03afdcec6e021a3d23cca0ecba0 Mon Sep 17 00:00:00 2001
From: Caroline Newcombe <caroline.newcombe at hpe.com>
Date: Thu, 18 Jun 2026 15:08:14 -0500
Subject: [PATCH 3/3] [flang][OpenMP] Refine perfect-nesting requirement for
doacross loop nests
Fix the perfect-nesting requirement for doacross loop nests by version: in
5.0 any ORDERED clause requires a perfect nest, while in 5.2 an ORDERED
clause with an argument requires a perfect nest. In 6.0 the requirement is
keyed off the body containing an ORDERED directive with a doacross
dependence rather than off the ORDERED clause.
Add a public IsDoacrossAffected(const OpenMPLoopConstruct &) utility in
openmp-utils (with the DoacrossFinder body scan), used by check-omp-loop only
after 5.2. Expand the test coverage (renamed omp52 -> omp60 and added ordered-
nesting tests), including ORDERED with no argument defaulting to the COLLAPSE value.
---
flang/include/flang/Semantics/openmp-utils.h | 6 ++
flang/lib/Semantics/check-omp-loop.cpp | 52 ++---------
flang/lib/Semantics/openmp-utils.cpp | 92 +++++++++++++++++--
...g-omp52.f90 => doacross-nesting-omp60.f90} | 46 +++++++++-
.../OpenMP/ordered-nesting-omp50.f90 | 50 ++++++++++
.../OpenMP/ordered-nesting-omp51.f90 | 36 ++++++++
6 files changed, 222 insertions(+), 60 deletions(-)
rename flang/test/Semantics/OpenMP/{doacross-nesting-omp52.f90 => doacross-nesting-omp60.f90} (68%)
create mode 100644 flang/test/Semantics/OpenMP/ordered-nesting-omp50.f90
create mode 100644 flang/test/Semantics/OpenMP/ordered-nesting-omp51.f90
diff --git a/flang/include/flang/Semantics/openmp-utils.h b/flang/include/flang/Semantics/openmp-utils.h
index c2e89fe829ce0..4a519dbde6f33 100644
--- a/flang/include/flang/Semantics/openmp-utils.h
+++ b/flang/include/flang/Semantics/openmp-utils.h
@@ -270,6 +270,12 @@ std::optional<std::vector<const parser::DoConstruct *>> CollectAffectedDoLoops(
const parser::OpenMPLoopConstruct &x, unsigned version,
SemanticsContext *semaCtx = nullptr);
+/// Returns whether the loop nest associated with `x` is a doacross loop nest,
+/// i.e. its body contains an `ordered` directive carrying a doacross
+/// dependence (the `doacross` clause, or the pre-5.2 `depend(sink/source)`
+/// equivalent) that binds to `x`. Such a nest must be perfectly nested.
+bool IsDoacrossAffected(const parser::OpenMPLoopConstruct &x);
+
struct LoopSequence {
LoopSequence(const parser::ExecutionPartConstruct &root, unsigned version,
bool allowAllLoops = false, SemanticsContext *semaCtx = nullptr);
diff --git a/flang/lib/Semantics/check-omp-loop.cpp b/flang/lib/Semantics/check-omp-loop.cpp
index a10d6e4fc93e1..87f6f28f2dda2 100644
--- a/flang/lib/Semantics/check-omp-loop.cpp
+++ b/flang/lib/Semantics/check-omp-loop.cpp
@@ -111,49 +111,6 @@ class AssociatedLoopChecker {
std::int64_t level_;
std::map<std::string, std::int64_t> constructNamesAndLevels_;
};
-
-/// Visitor that detects an ordered directive with a doacross clause
-/// (or the pre-5.2 depend(sink/source) equivalent).
-/// Does not descend into nested OpenMP block or loop constructs, since
-/// doacross directives inside them bind to an inner worksharing-loop
-/// region, not the one being checked.
-struct DoacrossFinder {
- bool found{false};
- template <typename T> bool Pre(const T &) { return !found; }
- template <typename T> void Post(const T &) {}
-
- // Stop descent into nested OpenMP regions that create new binding contexts.
- bool Pre(const parser::OmpBlockConstruct &) { return false; }
- bool Pre(const parser::OpenMPLoopConstruct &) { return false; }
-
- void Post(const parser::OpenMPSimpleStandaloneConstruct &x) {
- if (found) {
- return;
- }
- if (x.v.DirId() != llvm::omp::Directive::OMPD_ordered) {
- return;
- }
- for (const auto &clause : x.v.Clauses().v) {
- if (std::holds_alternative<parser::OmpClause::Doacross>(clause.u)) {
- found = true;
- return;
- }
- if (const auto *depend{
- std::get_if<parser::OmpClause::Depend>(&clause.u)}) {
- if (std::holds_alternative<parser::OmpDoacross>(depend->v.u)) {
- found = true;
- return;
- }
- }
- }
- }
-};
-
-bool ContainsDoacrossDirective(const parser::Block &block) {
- DoacrossFinder finder;
- parser::Walk(block, finder);
- return finder.found;
-}
} // namespace
namespace Fortran::semantics {
@@ -370,9 +327,12 @@ void OmpStructureChecker::CheckNestedConstruct(
auto [needDepth, needPerfect]{
GetAffectedNestDepthWithReason(beginSpec, version)};
- // In OpenMP 5.2+, perfect nesting is only required for doacross loop
- // nests (those whose body contains ordered doacross directives).
- if (!needPerfect && version >= 52 && ContainsDoacrossDirective(body)) {
+ // Perfect nesting for doacross loop nests is handled differently across
+ // versions. Only in 6.0+ is the requirement keyed off the body
+ // actually containing an ORDERED directive with a doacross dependence
+ // rather than the ORDERED clause, so the body scan applies only to those
+ // later versions.
+ if (!needPerfect && version > 52 && IsDoacrossAffected(x)) {
needPerfect = true;
}
diff --git a/flang/lib/Semantics/openmp-utils.cpp b/flang/lib/Semantics/openmp-utils.cpp
index edff3cb3e9cfd..e93d9109e7b04 100644
--- a/flang/lib/Semantics/openmp-utils.cpp
+++ b/flang/lib/Semantics/openmp-utils.cpp
@@ -989,18 +989,31 @@ std::pair<WithReason<int64_t>, bool> GetAffectedNestDepthWithReason(
ocount = std::nullopt;
oreason = Reason();
}
+ bool hasOrdered{parser::omp::FindClause(
+ spec, llvm::omp::Clause::OMPC_ordered) != nullptr};
+ // Perfect-nesting requirement for the ORDERED clause, by version:
+ //
+ // 5.0: Any ORDERED clause makes the associated loops a doacross loop
+ // nest that must be perfectly nested, whether or not the clause
+ // has an argument.
+ // 5.1/5.2: Only an ORDERED clause *with* an argument requires perfect
+ // nesting; a bare ORDERED clause does not.
+ // 6.0: Perfect nesting is required only when the body actually
+ // contains an ORDERED directive with a doacross dependence;
+ // that is detected separately by the caller via
+ // IsDoacrossAffected, so ORDERED(n) alone does not force
+ // perfect nesting here.
if (ccount < ocount) {
- // Prior to 5.2, ordered(n) requires perfect nesting unconditionally.
- // In 5.2+, perfect nesting is required only for doacross (checked later
- // with ContainsDoacrossDirective).
- return {{ocount.value_or(1), std::move(oreason)}, version < 52};
- }
- // Prior to 5.2, ordered(n) requires perfect nesting.
- // In 5.2+, only doacross nests require it (checked separately).
- // CLN relaxation for collapse is applied retroactively for all versions.
+ return {{ocount.value_or(1), std::move(oreason)}, version <= 52};
+ }
+ // Same rule as above when COLLAPSE drives the depth: ORDERED(n) requires a
+ // perfect nest through 5.2, while > 5.2 defers to IsDoacrossAffected. In
+ // 5.0, an ORDERED clause without argument also requires perfect nesting.
+ // The CLN relaxation for COLLAPSE is applied retroactively for all
+ // versions.
bool needPerfect{false};
- if (version < 52) {
- needPerfect = ocount.has_value();
+ if (version <= 52) {
+ needPerfect = ocount.has_value() || (version == 50 && hasOrdered);
}
return {{ccount.value_or(1), std::move(creason)}, needPerfect};
}
@@ -1220,6 +1233,65 @@ std::optional<int64_t> GetMinimumSequenceCount(
return GetMinimumSequenceCount(std::nullopt, std::nullopt);
}
+namespace {
+/// Visitor that detects an `ordered` directive carrying a doacross dependence
+/// (the `doacross` clause, or the pre-5.2 `depend(sink/source)` equivalent)
+/// that binds to the loop construct being checked. Prunes nested constructs
+/// that start their own associated loop nest, but descends into
+/// loop-transforming constructs (e.g. tile, unroll), whose generated loops
+/// extend the current nest.
+struct DoacrossFinder {
+ bool found{false};
+ bool inOrdered{false};
+ template <typename T> bool Pre(const T &) { return !found; }
+ template <typename T> void Post(const T &) {}
+
+ // Prune nested constructs that start their own associated loop nest; a
+ // doacross inside them binds there, not here. Loop-transforming constructs
+ // are the exception: their generated loops extend the current nest, so a
+ // doacross inside one still binds to the construct being checked.
+ bool Pre(const parser::OmpBlockConstruct &) { return false; }
+ bool Pre(const parser::OpenMPLoopConstruct &x) {
+ if (IsLoopTransforming(x.BeginDir().DirId())) {
+ return !found;
+ }
+ return false;
+ }
+
+ bool Pre(const parser::OpenMPSimpleStandaloneConstruct &x) {
+ inOrdered = x.v.DirId() == llvm::omp::Directive::OMPD_ordered;
+ return !found;
+ }
+ void Post(const parser::OpenMPSimpleStandaloneConstruct &) {
+ inOrdered = false;
+ }
+
+ bool Pre(const parser::OmpDoacross &) {
+ if (inOrdered) {
+ found = true;
+ }
+ return false;
+ }
+};
+
+static bool ContainsOrderedDoacross(const parser::Block &block) {
+ DoacrossFinder finder;
+ parser::Walk(block, finder);
+ return finder.found;
+}
+} // namespace
+
+bool IsDoacrossAffected(const parser::OpenMPLoopConstruct &x) {
+ // A loop nest is doacross-affected when it has an `ordered` clause and a
+ // stand-alone `ordered` construct carrying a doacross dependence is closely
+ // nested in its body.
+ const parser::OmpDirectiveSpecification &spec{x.BeginDir()};
+ if (!parser::omp::FindClause(spec, llvm::omp::Clause::OMPC_ordered)) {
+ return false;
+ }
+ return ContainsOrderedDoacross(std::get<parser::Block>(x.t));
+}
+
/// Collect the DO loops that are affected directly by the given loop
/// transformation. Not all DO loops nested in the associated nest are
/// affected by the top-level loop transformation, e.g.
diff --git a/flang/test/Semantics/OpenMP/doacross-nesting-omp52.f90 b/flang/test/Semantics/OpenMP/doacross-nesting-omp60.f90
similarity index 68%
rename from flang/test/Semantics/OpenMP/doacross-nesting-omp52.f90
rename to flang/test/Semantics/OpenMP/doacross-nesting-omp60.f90
index 492a961fa997f..d11be5a60e072 100644
--- a/flang/test/Semantics/OpenMP/doacross-nesting-omp52.f90
+++ b/flang/test/Semantics/OpenMP/doacross-nesting-omp60.f90
@@ -1,8 +1,8 @@
-! RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=52
-! OpenMP 5.2: doacross loop nests (those with ordered doacross(sink/source)
+! RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=60
+! OpenMP 6.0: doacross loop nests (those with ordered doacross(sink/source)
! constructs) require perfect nesting.
-! ordered(2) without doacross directives: imperfect nesting is valid in 5.2.
+! ordered(2) without doacross directives: imperfect nesting is valid in 6.0.
subroutine ordered_no_doacross_imperfect
integer i, j
@@ -16,6 +16,23 @@ subroutine ordered_no_doacross_imperfect
!$omp end do
end subroutine
+! Bare ORDERED (no argument) carries no doacross intent and does not require
+! perfect nesting.
+subroutine bare_ordered_no_doacross
+ integer i, j, x
+
+ !$omp do ordered
+ do i = 1, 10
+ x = 0
+ do j = 1, 10
+ !$omp ordered
+ print *, i, j, x
+ !$omp end ordered
+ end do
+ end do
+ !$omp end do
+end subroutine
+
! Perfectly nested doacross: valid.
subroutine doacross_perfect
integer i, j
@@ -31,7 +48,7 @@ subroutine doacross_perfect
!$omp end do
end subroutine
-! Imperfectly nested doacross: invalid in 5.2.
+! Imperfectly nested doacross: invalid in 6.0.
subroutine doacross_imperfect
integer i, j
@@ -87,6 +104,27 @@ subroutine doacross_collapse
!$omp end do
end subroutine
+! ORDERED with no argument: the number of doacross-affected loops defaults to
+! the COLLAPSE value (2 here). An imperfectly nested doacross is therefore
+! invalid, and the diagnostic is keyed off COLLAPSE rather than ORDERED.
+subroutine doacross_collapse_ordered_default
+ integer i, j
+
+ !ERROR: This construct requires a perfect nest of depth 2, but the associated nest is a perfect nest of depth 1
+ !BECAUSE: COLLAPSE clause was specified with argument 2
+ !$omp do collapse(2) ordered
+ do i = 1, 10
+ !BECAUSE: This code prevents perfect nesting
+ print *, i
+ do j = 1, 10
+ !$omp ordered doacross(sink: i-1, j)
+ print *, i, j
+ !$omp ordered doacross(source)
+ end do
+ end do
+ !$omp end do
+end subroutine
+
! Doacross inside a nested OpenMP region should not force perfect nesting on
! the outer loop. The doacross binds to the inner loop, not the outer one.
subroutine doacross_in_nested_region
diff --git a/flang/test/Semantics/OpenMP/ordered-nesting-omp50.f90 b/flang/test/Semantics/OpenMP/ordered-nesting-omp50.f90
new file mode 100644
index 0000000000000..f2b82c343cf9c
--- /dev/null
+++ b/flang/test/Semantics/OpenMP/ordered-nesting-omp50.f90
@@ -0,0 +1,50 @@
+! RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=50
+! OpenMP 5.0: an ORDERED clause requires perfect nesting, whether or not the
+! clause has an argument. This is stricter than 5.1/5.2, where only ORDERED(n)
+! (with an argument) requires perfect nesting (see ordered-nesting-omp51.f90).
+
+! Bare ORDERED with COLLAPSE(2): the two associated loops must be perfectly
+! nested in 5.0, so intervening code between them is an error.
+subroutine bare_ordered_collapse_imperfect
+ integer i, j
+
+ !ERROR: This construct requires a perfect nest of depth 2, but the associated nest is a perfect nest of depth 1
+ !BECAUSE: COLLAPSE clause was specified with argument 2
+ !$omp do collapse(2) ordered
+ do i = 1, 10
+ !BECAUSE: This code prevents perfect nesting
+ print *, i
+ do j = 1, 10
+ print *, i, j
+ end do
+ end do
+ !$omp end do
+end subroutine
+
+! Bare ORDERED with COLLAPSE(2), perfectly nested: valid.
+subroutine bare_ordered_collapse_perfect
+ integer i, j
+
+ !$omp do collapse(2) ordered
+ do i = 1, 10
+ do j = 1, 10
+ print *, i, j
+ end do
+ end do
+ !$omp end do
+end subroutine
+
+! Bare ORDERED with no COLLAPSE: only one loop is associated (depth 1), so
+! perfect nesting is trivially satisfied and intervening code is allowed.
+subroutine bare_ordered_no_collapse
+ integer i, j
+
+ !$omp do ordered
+ do i = 1, 10
+ print *, i
+ do j = 1, 10
+ print *, i, j
+ end do
+ end do
+ !$omp end do
+end subroutine
diff --git a/flang/test/Semantics/OpenMP/ordered-nesting-omp51.f90 b/flang/test/Semantics/OpenMP/ordered-nesting-omp51.f90
new file mode 100644
index 0000000000000..a5f282dbdcd24
--- /dev/null
+++ b/flang/test/Semantics/OpenMP/ordered-nesting-omp51.f90
@@ -0,0 +1,36 @@
+! RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=51
+! OpenMP 5.1/5.2: only an ORDERED clause *with* an argument requires perfect
+! nesting. A bare ORDERED clause does not, unlike in 5.0 (see
+! ordered-nesting-omp50.f90).
+
+! Bare ORDERED with COLLAPSE(2), imperfectly nested: valid in 5.1, since the
+! bare ORDERED clause does not impose perfect nesting.
+subroutine bare_ordered_collapse_imperfect
+ integer i, j
+
+ !$omp do collapse(2) ordered
+ do i = 1, 10
+ print *, i
+ do j = 1, 10
+ print *, i, j
+ end do
+ end do
+ !$omp end do
+end subroutine
+
+! ORDERED(2) with an argument still requires perfect nesting.
+subroutine ordered_arg_imperfect
+ integer i, j
+
+ !ERROR: This construct requires a perfect nest of depth 2, but the associated nest is a perfect nest of depth 1
+ !BECAUSE: ORDERED clause was specified with argument 2
+ !$omp do ordered(2)
+ do i = 1, 10
+ !BECAUSE: This code prevents perfect nesting
+ print *, i
+ do j = 1, 10
+ print *, i, j
+ end do
+ end do
+ !$omp end do
+end subroutine
More information about the flang-commits
mailing list