[flang-commits] [flang] [flang][semantics][openacc] Allow collapse clauses on do concurrent (PR #192488)
Andre Kuhlenschmidt via flang-commits
flang-commits at lists.llvm.org
Thu Apr 16 09:51:59 PDT 2026
https://github.com/akuhlens created https://github.com/llvm/llvm-project/pull/192488
This PR generalizes the semantic checking for collapse clauses to work on `do concurrent` and fixes two bugs exposed along the way:
- The first was that `collapse (n)` where n < the number of nested loops was giving an assertion violation.
- The second was do concurrent were getting missing variables because they hadn't been delated.
The lowering is implemented as a TODO which will happen in a following diff.
>From 0ab163c07683f875e78b399bbf5d975d0e98fbe7 Mon Sep 17 00:00:00 2001
From: Andre Kuhlenschmidt <akuhlenschmi at nvidia.com>
Date: Thu, 16 Apr 2026 09:23:37 -0700
Subject: [PATCH] initial commit
---
flang/lib/Lower/OpenACC.cpp | 13 +++
flang/lib/Semantics/canonicalize-acc.cpp | 5 -
flang/lib/Semantics/resolve-directives.cpp | 75 ++++++++++-----
.../Todo/do-loops-to-acc-loops-todo.f90 | 16 ++++
flang/test/Lower/OpenACC/acc-loop.f90 | 1 +
.../OpenACC/acc-canonicalization-validity.f90 | 2 -
flang/test/Semantics/OpenACC/acc-loop.f90 | 91 +++++++++++++++++++
7 files changed, 175 insertions(+), 28 deletions(-)
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 5a7fe899b372f..af2d2db1b68a8 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -15,6 +15,7 @@
#include "flang/Common/idioms.h"
#include "flang/Lower/Bridge.h"
#include "flang/Lower/ConvertType.h"
+#include "flang/Lower/ConvertVariable.h"
#include "flang/Lower/DirectivesCommon.h"
#include "flang/Lower/Mangler.h"
#include "flang/Lower/PFTBuilder.h"
@@ -1414,6 +1415,13 @@ static void privatizeIv(
builder.setInsertionPointToStart(builder.getAllocaBlock());
ivValue = builder.createTemporaryAlloc(loc, ivTy, toStringRef(sym.name()));
builder.restoreInsertionPoint(insPt);
+ // Register an hlfir.declare so that remapDataOperandSymbols can find this
+ // locally-scoped IV and remap it to the privatized copy inside the
+ // acc.loop region. Without this, the symbolMap lookup in
+ // remapDataOperandSymbols fails because the DO CONCURRENT body (which
+ // normally binds the IV) has not been lowered yet at this point.
+ Fortran::lower::genDeclareSymbol(converter, converter.getSymbolMap(), sym,
+ ivValue);
}
mlir::Operation *privateOp = nullptr;
@@ -2240,6 +2248,11 @@ static mlir::acc::LoopOp createLoopOp(
uint64_t loopsToProcess =
Fortran::lower::getLoopCountForCollapseAndTile(accClauseList);
+
+ if (outerDoConstruct.IsDoConcurrent() &&
+ Fortran::lower::getCollapseSizeAndForce(accClauseList).first > 1)
+ TODO(currentLocation, "collapse on acc loop with do concurrent");
+
auto loopOp = buildACCLoopOp(
converter, currentLocation, semanticsContext, stmtCtx, outerDoConstruct,
eval, privateOperands, dataMap, gangOperands, workerNumOperands,
diff --git a/flang/lib/Semantics/canonicalize-acc.cpp b/flang/lib/Semantics/canonicalize-acc.cpp
index 9d2d2ce3a82fb..b878b500963fa 100644
--- a/flang/lib/Semantics/canonicalize-acc.cpp
+++ b/flang/lib/Semantics/canonicalize-acc.cpp
@@ -108,11 +108,6 @@ class CanonicalizationOfAcc {
"TILE clause may not appear on loop construct "
"associated with DO CONCURRENT"_err_en_US);
}
- if (std::holds_alternative<parser::AccClause::Collapse>(clause.u)) {
- messages_.Say(beginLoopDirective.source,
- "COLLAPSE clause may not appear on loop construct "
- "associated with DO CONCURRENT"_err_en_US);
- }
}
}
diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp
index 6b42a7290e260..c58cdef547a6c 100644
--- a/flang/lib/Semantics/resolve-directives.cpp
+++ b/flang/lib/Semantics/resolve-directives.cpp
@@ -1751,34 +1751,67 @@ void AccAttributeVisitor::CheckAssociatedLoop(
Symbol::Flag flag = Symbol::Flag::AccPrivate;
llvm::SmallVector<Symbol *> ivs;
- using Bounds = parser::LoopControl::Bounds;
+
+ // Iterate the index variables of one DoConstruct, calling fn(name, lower,
+ // upper) for each: once for a regular do loop, once per control variable for
+ // a do concurrent loop. Null pointers signal a loop without valid bounds
+ // (e.g. do while); the level must still be consumed.
+ auto forEachIndex = [this](const parser::DoConstruct &loop, auto &&fn) {
+ if (loop.IsDoConcurrent()) {
+ const auto &loopControl{*loop.GetLoopControl()};
+ const auto &concurrent{
+ std::get<parser::LoopControl::Concurrent>(loopControl.u)};
+ const auto &header{std::get<parser::ConcurrentHeader>(concurrent.t)};
+ for (const auto &control :
+ std::get<std::list<parser::ConcurrentControl>>(header.t)) {
+ fn(&std::get<parser::Name>(control.t),
+ &parser::UnwrapRef<parser::Expr>(std::get<1>(control.t)),
+ &parser::UnwrapRef<parser::Expr>(std::get<2>(control.t)));
+ }
+ } else {
+ auto bounds{GetLoopBounds(loop)};
+ const parser::ScalarExpr *lower{std::get<1>(bounds)};
+ const parser::ScalarExpr *upper{std::get<2>(bounds)};
+ fn(std::get<0>(bounds),
+ lower ? &parser::UnwrapRef<parser::Expr>(*lower) : nullptr,
+ upper ? &parser::UnwrapRef<parser::Expr>(*upper) : nullptr);
+ }
+ };
+
for (const parser::DoConstruct *loop{&outerDoConstruct}; loop && level > 0;) {
- // Go through all nested loops to ensure index variable exists.
- if (const parser::Name *ivName{GetLoopIndex(*loop)}) {
- if (auto *symbol{ResolveAcc(*ivName, flag, currScope())}) {
- if (auto &control{loop->GetLoopControl()}) {
- if (const Bounds *b{std::get_if<Bounds>(&control->u)}) {
- if (auto lowerExpr{semantics::AnalyzeExpr(context_, b->Lower())}) {
- semantics::UnorderedSymbolSet lowerSyms =
- evaluate::CollectSymbols(*lowerExpr);
- checkExprHasSymbols(ivs, lowerSyms);
- }
- if (auto upperExpr{semantics::AnalyzeExpr(context_, b->Upper())}) {
- semantics::UnorderedSymbolSet upperSyms =
- evaluate::CollectSymbols(*upperExpr);
- checkExprHasSymbols(ivs, upperSyms);
+ forEachIndex(*loop,
+ [&](const parser::Name *ivName, const parser::Expr *lower,
+ const parser::Expr *upper) {
+ if (level <= 0)
+ return;
+ if (ivName && lower && upper) {
+ if (auto *symbol{ResolveAcc(*ivName, flag, currScope())}) {
+ if (auto lowerExpr{semantics::AnalyzeExpr(context_, *lower)}) {
+ semantics::UnorderedSymbolSet lowerSyms =
+ evaluate::CollectSymbols(*lowerExpr);
+ checkExprHasSymbols(ivs, lowerSyms);
+ }
+ if (auto upperExpr{semantics::AnalyzeExpr(context_, *upper)}) {
+ semantics::UnorderedSymbolSet upperSyms =
+ evaluate::CollectSymbols(*upperExpr);
+ checkExprHasSymbols(ivs, upperSyms);
+ }
+ ivs.push_back(symbol);
}
}
- }
- ivs.push_back(symbol);
- }
- }
+ --level;
+ });
const auto &block{std::get<parser::Block>(loop->t)};
- --level;
loop = getNextDoConstruct(block, level);
}
- CHECK(level == 0);
+
+ if (level != 0) {
+ context_.Say(GetContext().directiveSource,
+ "Not enough perfectly nested loops for COLLAPSE(%jd) clause, found %jd, expected %jd more"_err_en_US,
+ GetContext().associatedLoopLevel,
+ GetContext().associatedLoopLevel - level, level);
+ }
}
void AccAttributeVisitor::EnsureAllocatableOrPointer(
diff --git a/flang/test/Lower/OpenACC/Todo/do-loops-to-acc-loops-todo.f90 b/flang/test/Lower/OpenACC/Todo/do-loops-to-acc-loops-todo.f90
index 3f2b77a9a1484..f8243105b832b 100644
--- a/flang/test/Lower/OpenACC/Todo/do-loops-to-acc-loops-todo.f90
+++ b/flang/test/Lower/OpenACC/Todo/do-loops-to-acc-loops-todo.f90
@@ -3,6 +3,7 @@
! RUN: %not_todo_cmd bbc -fopenacc -emit-hlfir %t/do_loop_with_cycle_goto.f90 -o - 2>&1 | FileCheck %s --check-prefix=CHECK2
! RUN: %not_todo_cmd bbc -fopenacc -emit-hlfir %t/nested_goto_loop.f90 -o - 2>&1 | FileCheck %s --check-prefix=CHECK3
! RUN: %not_todo_cmd bbc -fopenacc -emit-hlfir %t/nested_loop_with_inner_goto.f90 -o - 2>&1 | FileCheck %s --check-prefix=CHECK4
+! RUN: %not_todo_cmd bbc -fopenacc -emit-hlfir %t/collapse.f90 -o - 2>&1 | FileCheck %s --check-prefix=CHECK5
//--- do_loop_with_stop.f90
@@ -89,3 +90,18 @@ subroutine nested_loop_with_inner_goto()
! CHECK4: not yet implemented: unstructured do loop in acc kernels
end subroutine
+
+//--- collapse.f90
+
+! !$acc parallel loop collapse(N) over a do concurrent.
+subroutine combined(i, j, k)
+ integer :: i, j, k
+ integer :: a(i,j,k)
+ !$acc parallel loop collapse(3)
+ do concurrent (i=1:10, j=1:100, k=1:200)
+ a(i,j,k) = a(i,j,k) + 1
+ end do
+ ! CHECK5: not yet implemented: collapse on acc loop with do concurrent
+end subroutine
+
+
diff --git a/flang/test/Lower/OpenACC/acc-loop.f90 b/flang/test/Lower/OpenACC/acc-loop.f90
index ed87cf76038b5..3fae0332052a8 100644
--- a/flang/test/Lower/OpenACC/acc-loop.f90
+++ b/flang/test/Lower/OpenACC/acc-loop.f90
@@ -400,3 +400,4 @@ subroutine sub1(i, j, k)
! CHECK: %[[P_K:.*]] = acc.private varPtr(%[[DC_K]]#0 : !fir.ref<i32>) recipe(@privatization_ref_i32) -> !fir.ref<i32> {implicit = true, name = "k"}
! CHECK: acc.loop combined(parallel) private(%[[P_I]], %[[P_J]], %[[P_K]] : !fir.ref<i32>, !fir.ref<i32>, !fir.ref<i32>) control(%{{.*}} : i32, %{{.*}} : i32, %{{.*}} : i32) = (%c1{{.*}}, %c1{{.*}}, %c1{{.*}} : i32, i32, i32) to (%c10{{.*}}, %c100{{.*}}, %c200{{.*}} : i32, i32, i32) step (%c1{{.*}}, %c1{{.*}}, %c1{{.*}} : i32, i32, i32)
! CHECK: } attributes {inclusiveUpperbound = array<i1: true, true, true>, independent = [#acc.device_type<none>]}
+
diff --git a/flang/test/Semantics/OpenACC/acc-canonicalization-validity.f90 b/flang/test/Semantics/OpenACC/acc-canonicalization-validity.f90
index a92be44c60b74..3151d726380f1 100644
--- a/flang/test/Semantics/OpenACC/acc-canonicalization-validity.f90
+++ b/flang/test/Semantics/OpenACC/acc-canonicalization-validity.f90
@@ -85,7 +85,6 @@ program openacc_clause_validity
end do
!$acc parallel
- !ERROR: COLLAPSE clause may not appear on loop construct associated with DO CONCURRENT
!$acc loop collapse(2)
do concurrent (i = 1:N, j = 1:N)
aa(i, j) = 3.14
@@ -102,7 +101,6 @@ program openacc_clause_validity
!$acc parallel
!ERROR: TILE clause may not appear on loop construct associated with DO CONCURRENT
- !ERROR: COLLAPSE clause may not appear on loop construct associated with DO CONCURRENT
!$acc loop tile(2, 2) collapse(2)
do concurrent (i = 1:N, j = 1:N)
aa(i, j) = 3.14
diff --git a/flang/test/Semantics/OpenACC/acc-loop.f90 b/flang/test/Semantics/OpenACC/acc-loop.f90
index 635dbb04cd666..0358b2fa4e1c6 100644
--- a/flang/test/Semantics/OpenACC/acc-loop.f90
+++ b/flang/test/Semantics/OpenACC/acc-loop.f90
@@ -447,6 +447,97 @@ program openacc_loop_validity
END DO
END DO
+ ! do concurrent: each index variable counts as one collapse level.
+
+ ! Valid: collapse(2) covers both indices of a 2-index do concurrent.
+ !$acc loop collapse(2)
+ DO CONCURRENT (i = 1:n, j = 1:n)
+ aa(i, j) = 3.14d0
+ END DO
+
+ ! Valid: collapse(3) covers both concurrent indices then one nested do.
+ !$acc loop collapse(3)
+ DO CONCURRENT (i = 1:n, j = 1:n)
+ DO k = 1, n
+ aa(i, j) = aa(i, j) + a(k)
+ END DO
+ END DO
+
+ ! Valid: collapse(2) with single-index do concurrent followed by a nested do.
+ !$acc loop collapse(2)
+ DO CONCURRENT (i = 1:n)
+ DO j = 1, n
+ aa(i, j) = 3.14d0
+ END DO
+ END DO
+
+ ! Valid: combined directive, collapse(2) with do concurrent.
+ !$acc parallel loop collapse(2)
+ DO CONCURRENT (i = 1:n, j = 1:n)
+ aa(i, j) = 3.14d0
+ END DO
+
+ ! Valid: outer regular do followed by inner do concurrent covering the
+ ! remaining collapse levels.
+ !$acc loop collapse(3)
+ DO i = 1, n
+ DO CONCURRENT (j = 1:n, k = 1:n)
+ aa(i, j) = aa(i, j) + a(k)
+ END DO
+ END DO
+
+ ! Valid (more concurrent indices than collapse levels): collapse(2) consumes
+ ! only the first two indices of a 3-index do concurrent; the third is outside
+ ! the collapsed nest.
+ !$acc loop collapse(2)
+ DO CONCURRENT (i = 1:n, j = 1:n, k = 1:n)
+ aa(i, j) = aa(i, j) + a(k)
+ END DO
+
+ ! Valid (more loops than collapse levels): collapse(1) consumes only the
+ ! first index of a 2-index do concurrent; the second index is outside the
+ ! collapsed nest.
+ !$acc loop collapse(1)
+ DO CONCURRENT (i = 1:n, j = 1:n)
+ aa(i, j) = 3.14d0
+ END DO
+
+ ! Invalid: nested do's upper bound depends on a collapsed concurrent index.
+ !ERROR: Trip count must be computable and invariant
+ !$acc loop collapse(3)
+ DO CONCURRENT (i = 1:n, j = 1:n)
+ DO k = 1, i
+ aa(i, j) = aa(i, j) + a(k)
+ END DO
+ END DO
+
+ ! Invalid: nested do's upper bound depends on a collapsed concurrent index.
+ !ERROR: Trip count must be computable and invariant
+ !$acc loop collapse(2)
+ DO CONCURRENT (i = 1:n)
+ DO j = 1, i
+ aa(i, j) = 3.14d0
+ END DO
+ END DO
+
+ ! Invalid: inner concurrent index bound depends on the outer collapsed regular
+ ! do index.
+ !ERROR: Trip count must be computable and invariant
+ !$acc loop collapse(3)
+ DO i = 1, n
+ DO CONCURRENT (j = 1:n, k = 1:i)
+ aa(i, j) = aa(i, j) + a(k)
+ END DO
+ END DO
+
+ ! Fewer loops than collapse(n): collapse(3) but only 2 levels exist.
+ ! This exercises the loop-nest depth check.
+ !ERROR: Not enough perfectly nested loops for COLLAPSE(3) clause, found 2, expected 1 more
+ !$acc loop collapse(3)
+ DO CONCURRENT (i = 1:n, j = 1:n)
+ aa(i, j) = 3.14d0
+ END DO
+
contains
subroutine sub1()
More information about the flang-commits
mailing list