[flang-commits] [flang] [flang][semantics][openacc] Allow collapse clauses on do concurrent (PR #192488)
Andre Kuhlenschmidt via flang-commits
flang-commits at lists.llvm.org
Wed Apr 22 19:54:30 PDT 2026
https://github.com/akuhlens updated https://github.com/llvm/llvm-project/pull/192488
>From 0ab163c07683f875e78b399bbf5d975d0e98fbe7 Mon Sep 17 00:00:00 2001
From: Andre Kuhlenschmidt <akuhlenschmi at nvidia.com>
Date: Thu, 16 Apr 2026 09:23:37 -0700
Subject: [PATCH 1/3] initial commit
---
flang/lib/Lower/OpenACC.cpp | 13 +++
flang/lib/Semantics/canonicalize-acc.cpp | 5 -
flang/lib/Semantics/resolve-directives.cpp | 75 ++++++++++-----
.../Todo/do-loops-to-acc-loops-todo.f90 | 16 ++++
flang/test/Lower/OpenACC/acc-loop.f90 | 1 +
.../OpenACC/acc-canonicalization-validity.f90 | 2 -
flang/test/Semantics/OpenACC/acc-loop.f90 | 91 +++++++++++++++++++
7 files changed, 175 insertions(+), 28 deletions(-)
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 5a7fe899b372f..af2d2db1b68a8 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -15,6 +15,7 @@
#include "flang/Common/idioms.h"
#include "flang/Lower/Bridge.h"
#include "flang/Lower/ConvertType.h"
+#include "flang/Lower/ConvertVariable.h"
#include "flang/Lower/DirectivesCommon.h"
#include "flang/Lower/Mangler.h"
#include "flang/Lower/PFTBuilder.h"
@@ -1414,6 +1415,13 @@ static void privatizeIv(
builder.setInsertionPointToStart(builder.getAllocaBlock());
ivValue = builder.createTemporaryAlloc(loc, ivTy, toStringRef(sym.name()));
builder.restoreInsertionPoint(insPt);
+ // Register an hlfir.declare so that remapDataOperandSymbols can find this
+ // locally-scoped IV and remap it to the privatized copy inside the
+ // acc.loop region. Without this, the symbolMap lookup in
+ // remapDataOperandSymbols fails because the DO CONCURRENT body (which
+ // normally binds the IV) has not been lowered yet at this point.
+ Fortran::lower::genDeclareSymbol(converter, converter.getSymbolMap(), sym,
+ ivValue);
}
mlir::Operation *privateOp = nullptr;
@@ -2240,6 +2248,11 @@ static mlir::acc::LoopOp createLoopOp(
uint64_t loopsToProcess =
Fortran::lower::getLoopCountForCollapseAndTile(accClauseList);
+
+ if (outerDoConstruct.IsDoConcurrent() &&
+ Fortran::lower::getCollapseSizeAndForce(accClauseList).first > 1)
+ TODO(currentLocation, "collapse on acc loop with do concurrent");
+
auto loopOp = buildACCLoopOp(
converter, currentLocation, semanticsContext, stmtCtx, outerDoConstruct,
eval, privateOperands, dataMap, gangOperands, workerNumOperands,
diff --git a/flang/lib/Semantics/canonicalize-acc.cpp b/flang/lib/Semantics/canonicalize-acc.cpp
index 9d2d2ce3a82fb..b878b500963fa 100644
--- a/flang/lib/Semantics/canonicalize-acc.cpp
+++ b/flang/lib/Semantics/canonicalize-acc.cpp
@@ -108,11 +108,6 @@ class CanonicalizationOfAcc {
"TILE clause may not appear on loop construct "
"associated with DO CONCURRENT"_err_en_US);
}
- if (std::holds_alternative<parser::AccClause::Collapse>(clause.u)) {
- messages_.Say(beginLoopDirective.source,
- "COLLAPSE clause may not appear on loop construct "
- "associated with DO CONCURRENT"_err_en_US);
- }
}
}
diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp
index 6b42a7290e260..c58cdef547a6c 100644
--- a/flang/lib/Semantics/resolve-directives.cpp
+++ b/flang/lib/Semantics/resolve-directives.cpp
@@ -1751,34 +1751,67 @@ void AccAttributeVisitor::CheckAssociatedLoop(
Symbol::Flag flag = Symbol::Flag::AccPrivate;
llvm::SmallVector<Symbol *> ivs;
- using Bounds = parser::LoopControl::Bounds;
+
+ // Iterate the index variables of one DoConstruct, calling fn(name, lower,
+ // upper) for each: once for a regular do loop, once per control variable for
+ // a do concurrent loop. Null pointers signal a loop without valid bounds
+ // (e.g. do while); the level must still be consumed.
+ auto forEachIndex = [this](const parser::DoConstruct &loop, auto &&fn) {
+ if (loop.IsDoConcurrent()) {
+ const auto &loopControl{*loop.GetLoopControl()};
+ const auto &concurrent{
+ std::get<parser::LoopControl::Concurrent>(loopControl.u)};
+ const auto &header{std::get<parser::ConcurrentHeader>(concurrent.t)};
+ for (const auto &control :
+ std::get<std::list<parser::ConcurrentControl>>(header.t)) {
+ fn(&std::get<parser::Name>(control.t),
+ &parser::UnwrapRef<parser::Expr>(std::get<1>(control.t)),
+ &parser::UnwrapRef<parser::Expr>(std::get<2>(control.t)));
+ }
+ } else {
+ auto bounds{GetLoopBounds(loop)};
+ const parser::ScalarExpr *lower{std::get<1>(bounds)};
+ const parser::ScalarExpr *upper{std::get<2>(bounds)};
+ fn(std::get<0>(bounds),
+ lower ? &parser::UnwrapRef<parser::Expr>(*lower) : nullptr,
+ upper ? &parser::UnwrapRef<parser::Expr>(*upper) : nullptr);
+ }
+ };
+
for (const parser::DoConstruct *loop{&outerDoConstruct}; loop && level > 0;) {
- // Go through all nested loops to ensure index variable exists.
- if (const parser::Name *ivName{GetLoopIndex(*loop)}) {
- if (auto *symbol{ResolveAcc(*ivName, flag, currScope())}) {
- if (auto &control{loop->GetLoopControl()}) {
- if (const Bounds *b{std::get_if<Bounds>(&control->u)}) {
- if (auto lowerExpr{semantics::AnalyzeExpr(context_, b->Lower())}) {
- semantics::UnorderedSymbolSet lowerSyms =
- evaluate::CollectSymbols(*lowerExpr);
- checkExprHasSymbols(ivs, lowerSyms);
- }
- if (auto upperExpr{semantics::AnalyzeExpr(context_, b->Upper())}) {
- semantics::UnorderedSymbolSet upperSyms =
- evaluate::CollectSymbols(*upperExpr);
- checkExprHasSymbols(ivs, upperSyms);
+ forEachIndex(*loop,
+ [&](const parser::Name *ivName, const parser::Expr *lower,
+ const parser::Expr *upper) {
+ if (level <= 0)
+ return;
+ if (ivName && lower && upper) {
+ if (auto *symbol{ResolveAcc(*ivName, flag, currScope())}) {
+ if (auto lowerExpr{semantics::AnalyzeExpr(context_, *lower)}) {
+ semantics::UnorderedSymbolSet lowerSyms =
+ evaluate::CollectSymbols(*lowerExpr);
+ checkExprHasSymbols(ivs, lowerSyms);
+ }
+ if (auto upperExpr{semantics::AnalyzeExpr(context_, *upper)}) {
+ semantics::UnorderedSymbolSet upperSyms =
+ evaluate::CollectSymbols(*upperExpr);
+ checkExprHasSymbols(ivs, upperSyms);
+ }
+ ivs.push_back(symbol);
}
}
- }
- ivs.push_back(symbol);
- }
- }
+ --level;
+ });
const auto &block{std::get<parser::Block>(loop->t)};
- --level;
loop = getNextDoConstruct(block, level);
}
- CHECK(level == 0);
+
+ if (level != 0) {
+ context_.Say(GetContext().directiveSource,
+ "Not enough perfectly nested loops for COLLAPSE(%jd) clause, found %jd, expected %jd more"_err_en_US,
+ GetContext().associatedLoopLevel,
+ GetContext().associatedLoopLevel - level, level);
+ }
}
void AccAttributeVisitor::EnsureAllocatableOrPointer(
diff --git a/flang/test/Lower/OpenACC/Todo/do-loops-to-acc-loops-todo.f90 b/flang/test/Lower/OpenACC/Todo/do-loops-to-acc-loops-todo.f90
index 3f2b77a9a1484..f8243105b832b 100644
--- a/flang/test/Lower/OpenACC/Todo/do-loops-to-acc-loops-todo.f90
+++ b/flang/test/Lower/OpenACC/Todo/do-loops-to-acc-loops-todo.f90
@@ -3,6 +3,7 @@
! RUN: %not_todo_cmd bbc -fopenacc -emit-hlfir %t/do_loop_with_cycle_goto.f90 -o - 2>&1 | FileCheck %s --check-prefix=CHECK2
! RUN: %not_todo_cmd bbc -fopenacc -emit-hlfir %t/nested_goto_loop.f90 -o - 2>&1 | FileCheck %s --check-prefix=CHECK3
! RUN: %not_todo_cmd bbc -fopenacc -emit-hlfir %t/nested_loop_with_inner_goto.f90 -o - 2>&1 | FileCheck %s --check-prefix=CHECK4
+! RUN: %not_todo_cmd bbc -fopenacc -emit-hlfir %t/collapse.f90 -o - 2>&1 | FileCheck %s --check-prefix=CHECK5
//--- do_loop_with_stop.f90
@@ -89,3 +90,18 @@ subroutine nested_loop_with_inner_goto()
! CHECK4: not yet implemented: unstructured do loop in acc kernels
end subroutine
+
+//--- collapse.f90
+
+! !$acc parallel loop collapse(N) over a do concurrent.
+subroutine combined(i, j, k)
+ integer :: i, j, k
+ integer :: a(i,j,k)
+ !$acc parallel loop collapse(3)
+ do concurrent (i=1:10, j=1:100, k=1:200)
+ a(i,j,k) = a(i,j,k) + 1
+ end do
+ ! CHECK5: not yet implemented: collapse on acc loop with do concurrent
+end subroutine
+
+
diff --git a/flang/test/Lower/OpenACC/acc-loop.f90 b/flang/test/Lower/OpenACC/acc-loop.f90
index ed87cf76038b5..3fae0332052a8 100644
--- a/flang/test/Lower/OpenACC/acc-loop.f90
+++ b/flang/test/Lower/OpenACC/acc-loop.f90
@@ -400,3 +400,4 @@ subroutine sub1(i, j, k)
! CHECK: %[[P_K:.*]] = acc.private varPtr(%[[DC_K]]#0 : !fir.ref<i32>) recipe(@privatization_ref_i32) -> !fir.ref<i32> {implicit = true, name = "k"}
! CHECK: acc.loop combined(parallel) private(%[[P_I]], %[[P_J]], %[[P_K]] : !fir.ref<i32>, !fir.ref<i32>, !fir.ref<i32>) control(%{{.*}} : i32, %{{.*}} : i32, %{{.*}} : i32) = (%c1{{.*}}, %c1{{.*}}, %c1{{.*}} : i32, i32, i32) to (%c10{{.*}}, %c100{{.*}}, %c200{{.*}} : i32, i32, i32) step (%c1{{.*}}, %c1{{.*}}, %c1{{.*}} : i32, i32, i32)
! CHECK: } attributes {inclusiveUpperbound = array<i1: true, true, true>, independent = [#acc.device_type<none>]}
+
diff --git a/flang/test/Semantics/OpenACC/acc-canonicalization-validity.f90 b/flang/test/Semantics/OpenACC/acc-canonicalization-validity.f90
index a92be44c60b74..3151d726380f1 100644
--- a/flang/test/Semantics/OpenACC/acc-canonicalization-validity.f90
+++ b/flang/test/Semantics/OpenACC/acc-canonicalization-validity.f90
@@ -85,7 +85,6 @@ program openacc_clause_validity
end do
!$acc parallel
- !ERROR: COLLAPSE clause may not appear on loop construct associated with DO CONCURRENT
!$acc loop collapse(2)
do concurrent (i = 1:N, j = 1:N)
aa(i, j) = 3.14
@@ -102,7 +101,6 @@ program openacc_clause_validity
!$acc parallel
!ERROR: TILE clause may not appear on loop construct associated with DO CONCURRENT
- !ERROR: COLLAPSE clause may not appear on loop construct associated with DO CONCURRENT
!$acc loop tile(2, 2) collapse(2)
do concurrent (i = 1:N, j = 1:N)
aa(i, j) = 3.14
diff --git a/flang/test/Semantics/OpenACC/acc-loop.f90 b/flang/test/Semantics/OpenACC/acc-loop.f90
index 635dbb04cd666..0358b2fa4e1c6 100644
--- a/flang/test/Semantics/OpenACC/acc-loop.f90
+++ b/flang/test/Semantics/OpenACC/acc-loop.f90
@@ -447,6 +447,97 @@ program openacc_loop_validity
END DO
END DO
+ ! do concurrent: each index variable counts as one collapse level.
+
+ ! Valid: collapse(2) covers both indices of a 2-index do concurrent.
+ !$acc loop collapse(2)
+ DO CONCURRENT (i = 1:n, j = 1:n)
+ aa(i, j) = 3.14d0
+ END DO
+
+ ! Valid: collapse(3) covers both concurrent indices then one nested do.
+ !$acc loop collapse(3)
+ DO CONCURRENT (i = 1:n, j = 1:n)
+ DO k = 1, n
+ aa(i, j) = aa(i, j) + a(k)
+ END DO
+ END DO
+
+ ! Valid: collapse(2) with single-index do concurrent followed by a nested do.
+ !$acc loop collapse(2)
+ DO CONCURRENT (i = 1:n)
+ DO j = 1, n
+ aa(i, j) = 3.14d0
+ END DO
+ END DO
+
+ ! Valid: combined directive, collapse(2) with do concurrent.
+ !$acc parallel loop collapse(2)
+ DO CONCURRENT (i = 1:n, j = 1:n)
+ aa(i, j) = 3.14d0
+ END DO
+
+ ! Valid: outer regular do followed by inner do concurrent covering the
+ ! remaining collapse levels.
+ !$acc loop collapse(3)
+ DO i = 1, n
+ DO CONCURRENT (j = 1:n, k = 1:n)
+ aa(i, j) = aa(i, j) + a(k)
+ END DO
+ END DO
+
+ ! Valid (more concurrent indices than collapse levels): collapse(2) consumes
+ ! only the first two indices of a 3-index do concurrent; the third is outside
+ ! the collapsed nest.
+ !$acc loop collapse(2)
+ DO CONCURRENT (i = 1:n, j = 1:n, k = 1:n)
+ aa(i, j) = aa(i, j) + a(k)
+ END DO
+
+ ! Valid (more loops than collapse levels): collapse(1) consumes only the
+ ! first index of a 2-index do concurrent; the second index is outside the
+ ! collapsed nest.
+ !$acc loop collapse(1)
+ DO CONCURRENT (i = 1:n, j = 1:n)
+ aa(i, j) = 3.14d0
+ END DO
+
+ ! Invalid: nested do's upper bound depends on a collapsed concurrent index.
+ !ERROR: Trip count must be computable and invariant
+ !$acc loop collapse(3)
+ DO CONCURRENT (i = 1:n, j = 1:n)
+ DO k = 1, i
+ aa(i, j) = aa(i, j) + a(k)
+ END DO
+ END DO
+
+ ! Invalid: nested do's upper bound depends on a collapsed concurrent index.
+ !ERROR: Trip count must be computable and invariant
+ !$acc loop collapse(2)
+ DO CONCURRENT (i = 1:n)
+ DO j = 1, i
+ aa(i, j) = 3.14d0
+ END DO
+ END DO
+
+ ! Invalid: inner concurrent index bound depends on the outer collapsed regular
+ ! do index.
+ !ERROR: Trip count must be computable and invariant
+ !$acc loop collapse(3)
+ DO i = 1, n
+ DO CONCURRENT (j = 1:n, k = 1:i)
+ aa(i, j) = aa(i, j) + a(k)
+ END DO
+ END DO
+
+ ! Fewer loops than collapse(n): collapse(3) but only 2 levels exist.
+ ! This exercises the loop-nest depth check.
+ !ERROR: Not enough perfectly nested loops for COLLAPSE(3) clause, found 2, expected 1 more
+ !$acc loop collapse(3)
+ DO CONCURRENT (i = 1:n, j = 1:n)
+ aa(i, j) = 3.14d0
+ END DO
+
contains
subroutine sub1()
>From d93569ee265aeccb7074b49215ccd8a571911297 Mon Sep 17 00:00:00 2001
From: Andre Kuhlenschmidt <akuhlenschmi at nvidia.com>
Date: Wed, 22 Apr 2026 19:53:03 -0700
Subject: [PATCH 2/3] addressing feedback
---
flang/lib/Lower/OpenACC.cpp | 18 ++++++------------
flang/lib/Semantics/resolve-directives.cpp | 3 ++-
.../Todo/do-loops-to-acc-loops-todo.f90 | 19 ++++++++++++++++++-
3 files changed, 26 insertions(+), 14 deletions(-)
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index af2d2db1b68a8..e32655c3c9c2e 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -1415,13 +1415,6 @@ static void privatizeIv(
builder.setInsertionPointToStart(builder.getAllocaBlock());
ivValue = builder.createTemporaryAlloc(loc, ivTy, toStringRef(sym.name()));
builder.restoreInsertionPoint(insPt);
- // Register an hlfir.declare so that remapDataOperandSymbols can find this
- // locally-scoped IV and remap it to the privatized copy inside the
- // acc.loop region. Without this, the symbolMap lookup in
- // remapDataOperandSymbols fails because the DO CONCURRENT body (which
- // normally binds the IV) has not been lowered yet at this point.
- Fortran::lower::genDeclareSymbol(converter, converter.getSymbolMap(), sym,
- ivValue);
}
mlir::Operation *privateOp = nullptr;
@@ -1551,10 +1544,11 @@ static void visitLoopControl(
}
if (!innerDo)
break; // No deeper loop; stop collecting collapsed bounds.
-
- loopControl = &*innerDo->GetLoopControl();
mlir::Location loc =
- converter.genLocation(Fortran::parser::FindSourceLocation(*innerDo));
+ converter.genLocation(Fortran::parser::FindSourceLocation(*innerDo));
+ if (innerDo->IsDoConcurrent())
+ TODO(loc, "OpenACC LOOP with nested DO CONCURRENT");
+ loopControl = &*innerDo->GetLoopControl();
callback(std::get<Fortran::parser::LoopControl::Bounds>(loopControl->u),
loc);
}
@@ -1930,7 +1924,7 @@ static void privatizeInductionVariables(
llvm::SmallVector<mlir::Type> ivTypes;
llvm::SmallVector<mlir::Location> ivLocs;
assert(!outerDoConstruct.IsDoConcurrent() &&
- "do concurrent loops are not expected to contained earlty exits");
+ "do concurrent loops are not expected to contained early exits");
visitLoopControl(converter, outerDoConstruct, loopsToProcess, eval,
[&](const Fortran::parser::LoopControl::Bounds &bounds,
mlir::Location loc) {
@@ -2251,7 +2245,7 @@ static mlir::acc::LoopOp createLoopOp(
if (outerDoConstruct.IsDoConcurrent() &&
Fortran::lower::getCollapseSizeAndForce(accClauseList).first > 1)
- TODO(currentLocation, "collapse on acc loop with do concurrent");
+ TODO(currentLocation, "OpenACC LOOP COLLAPSE with DO CONCURRENT");
auto loopOp = buildACCLoopOp(
converter, currentLocation, semanticsContext, stmtCtx, outerDoConstruct,
diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp
index c58cdef547a6c..b3cc45dd0d809 100644
--- a/flang/lib/Semantics/resolve-directives.cpp
+++ b/flang/lib/Semantics/resolve-directives.cpp
@@ -1808,7 +1808,8 @@ void AccAttributeVisitor::CheckAssociatedLoop(
if (level != 0) {
context_.Say(GetContext().directiveSource,
- "Not enough perfectly nested loops for COLLAPSE(%jd) clause, found %jd, expected %jd more"_err_en_US,
+ "Not enough %s for COLLAPSE(%jd) clause, found %jd, expected %jd more"_err_en_US,
+ forceCollapsed ? "nested loops" : "perfectly nested loops",
GetContext().associatedLoopLevel,
GetContext().associatedLoopLevel - level, level);
}
diff --git a/flang/test/Lower/OpenACC/Todo/do-loops-to-acc-loops-todo.f90 b/flang/test/Lower/OpenACC/Todo/do-loops-to-acc-loops-todo.f90
index f8243105b832b..aa293642b0865 100644
--- a/flang/test/Lower/OpenACC/Todo/do-loops-to-acc-loops-todo.f90
+++ b/flang/test/Lower/OpenACC/Todo/do-loops-to-acc-loops-todo.f90
@@ -4,6 +4,7 @@
! RUN: %not_todo_cmd bbc -fopenacc -emit-hlfir %t/nested_goto_loop.f90 -o - 2>&1 | FileCheck %s --check-prefix=CHECK3
! RUN: %not_todo_cmd bbc -fopenacc -emit-hlfir %t/nested_loop_with_inner_goto.f90 -o - 2>&1 | FileCheck %s --check-prefix=CHECK4
! RUN: %not_todo_cmd bbc -fopenacc -emit-hlfir %t/collapse.f90 -o - 2>&1 | FileCheck %s --check-prefix=CHECK5
+! RUN: %not_todo_cmd bbc -fopenacc -emit-hlfir %t/collapse_nested.f90 -o - 2>&1 | FileCheck %s --check-prefix=CHECK6
//--- do_loop_with_stop.f90
@@ -101,7 +102,23 @@ subroutine combined(i, j, k)
do concurrent (i=1:10, j=1:100, k=1:200)
a(i,j,k) = a(i,j,k) + 1
end do
- ! CHECK5: not yet implemented: collapse on acc loop with do concurrent
+ ! CHECK5: not yet implemented: OpenACC LOOP COLLAPSE with DO CONCURRENT
end subroutine
+//--- collapse_nested.f90
+
+! !$acc parallel loop collapse(N) over a nested do concurrent.
+subroutine combined(i, j, k)
+ integer :: i, j, k
+ integer :: a(i,j,k)
+ !$acc parallel loop collapse(3)
+ do i = 1, 10
+ do concurrent (j=1:100, k=1:200)
+ a(i,j,k) = a(i,j,k) + 1
+ end do
+ end do
+ ! CHECK6: not yet implemented: OpenACC LOOP with nested DO CONCURRENT
+
+end subroutine
+
>From b5a33cc554fdfae97be2e95dc35caa6047719011 Mon Sep 17 00:00:00 2001
From: Andre Kuhlenschmidt <akuhlenschmi at nvidia.com>
Date: Wed, 22 Apr 2026 19:54:16 -0700
Subject: [PATCH 3/3] clang-format
---
flang/lib/Lower/OpenACC.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index e32655c3c9c2e..cacc59ab8a5f6 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -1545,7 +1545,7 @@ static void visitLoopControl(
if (!innerDo)
break; // No deeper loop; stop collecting collapsed bounds.
mlir::Location loc =
- converter.genLocation(Fortran::parser::FindSourceLocation(*innerDo));
+ converter.genLocation(Fortran::parser::FindSourceLocation(*innerDo));
if (innerDo->IsDoConcurrent())
TODO(loc, "OpenACC LOOP with nested DO CONCURRENT");
loopControl = &*innerDo->GetLoopControl();
More information about the flang-commits
mailing list