[flang-commits] [flang] [flang][semantics][openacc] Allow collapse clauses on do concurrent (PR #192488)

Andre Kuhlenschmidt via flang-commits flang-commits at lists.llvm.org
Thu Apr 16 09:51:59 PDT 2026


https://github.com/akuhlens created https://github.com/llvm/llvm-project/pull/192488

This PR generalizes the semantic checking for collapse clauses to work on `do concurrent` and fixes two bugs exposed along the way:
- The first was that `collapse (n)` where n < the number of nested loops was giving an assertion violation.
- The second was do concurrent were getting missing variables because they hadn't been delated.
The lowering is implemented as a TODO which will happen in a following diff.

>From 0ab163c07683f875e78b399bbf5d975d0e98fbe7 Mon Sep 17 00:00:00 2001
From: Andre Kuhlenschmidt <akuhlenschmi at nvidia.com>
Date: Thu, 16 Apr 2026 09:23:37 -0700
Subject: [PATCH] initial commit

---
 flang/lib/Lower/OpenACC.cpp                   | 13 +++
 flang/lib/Semantics/canonicalize-acc.cpp      |  5 -
 flang/lib/Semantics/resolve-directives.cpp    | 75 ++++++++++-----
 .../Todo/do-loops-to-acc-loops-todo.f90       | 16 ++++
 flang/test/Lower/OpenACC/acc-loop.f90         |  1 +
 .../OpenACC/acc-canonicalization-validity.f90 |  2 -
 flang/test/Semantics/OpenACC/acc-loop.f90     | 91 +++++++++++++++++++
 7 files changed, 175 insertions(+), 28 deletions(-)

diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 5a7fe899b372f..af2d2db1b68a8 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -15,6 +15,7 @@
 #include "flang/Common/idioms.h"
 #include "flang/Lower/Bridge.h"
 #include "flang/Lower/ConvertType.h"
+#include "flang/Lower/ConvertVariable.h"
 #include "flang/Lower/DirectivesCommon.h"
 #include "flang/Lower/Mangler.h"
 #include "flang/Lower/PFTBuilder.h"
@@ -1414,6 +1415,13 @@ static void privatizeIv(
     builder.setInsertionPointToStart(builder.getAllocaBlock());
     ivValue = builder.createTemporaryAlloc(loc, ivTy, toStringRef(sym.name()));
     builder.restoreInsertionPoint(insPt);
+    // Register an hlfir.declare so that remapDataOperandSymbols can find this
+    // locally-scoped IV and remap it to the privatized copy inside the
+    // acc.loop region. Without this, the symbolMap lookup in
+    // remapDataOperandSymbols fails because the DO CONCURRENT body (which
+    // normally binds the IV) has not been lowered yet at this point.
+    Fortran::lower::genDeclareSymbol(converter, converter.getSymbolMap(), sym,
+                                     ivValue);
   }
 
   mlir::Operation *privateOp = nullptr;
@@ -2240,6 +2248,11 @@ static mlir::acc::LoopOp createLoopOp(
 
   uint64_t loopsToProcess =
       Fortran::lower::getLoopCountForCollapseAndTile(accClauseList);
+
+  if (outerDoConstruct.IsDoConcurrent() &&
+      Fortran::lower::getCollapseSizeAndForce(accClauseList).first > 1)
+    TODO(currentLocation, "collapse on acc loop with do concurrent");
+
   auto loopOp = buildACCLoopOp(
       converter, currentLocation, semanticsContext, stmtCtx, outerDoConstruct,
       eval, privateOperands, dataMap, gangOperands, workerNumOperands,
diff --git a/flang/lib/Semantics/canonicalize-acc.cpp b/flang/lib/Semantics/canonicalize-acc.cpp
index 9d2d2ce3a82fb..b878b500963fa 100644
--- a/flang/lib/Semantics/canonicalize-acc.cpp
+++ b/flang/lib/Semantics/canonicalize-acc.cpp
@@ -108,11 +108,6 @@ class CanonicalizationOfAcc {
             "TILE clause may not appear on loop construct "
             "associated with DO CONCURRENT"_err_en_US);
       }
-      if (std::holds_alternative<parser::AccClause::Collapse>(clause.u)) {
-        messages_.Say(beginLoopDirective.source,
-            "COLLAPSE clause may not appear on loop construct "
-            "associated with DO CONCURRENT"_err_en_US);
-      }
     }
   }
 
diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp
index 6b42a7290e260..c58cdef547a6c 100644
--- a/flang/lib/Semantics/resolve-directives.cpp
+++ b/flang/lib/Semantics/resolve-directives.cpp
@@ -1751,34 +1751,67 @@ void AccAttributeVisitor::CheckAssociatedLoop(
 
   Symbol::Flag flag = Symbol::Flag::AccPrivate;
   llvm::SmallVector<Symbol *> ivs;
-  using Bounds = parser::LoopControl::Bounds;
+
+  // Iterate the index variables of one DoConstruct, calling fn(name, lower,
+  // upper) for each: once for a regular do loop, once per control variable for
+  // a do concurrent loop.  Null pointers signal a loop without valid bounds
+  // (e.g. do while); the level must still be consumed.
+  auto forEachIndex = [this](const parser::DoConstruct &loop, auto &&fn) {
+    if (loop.IsDoConcurrent()) {
+      const auto &loopControl{*loop.GetLoopControl()};
+      const auto &concurrent{
+          std::get<parser::LoopControl::Concurrent>(loopControl.u)};
+      const auto &header{std::get<parser::ConcurrentHeader>(concurrent.t)};
+      for (const auto &control :
+          std::get<std::list<parser::ConcurrentControl>>(header.t)) {
+        fn(&std::get<parser::Name>(control.t),
+            &parser::UnwrapRef<parser::Expr>(std::get<1>(control.t)),
+            &parser::UnwrapRef<parser::Expr>(std::get<2>(control.t)));
+      }
+    } else {
+      auto bounds{GetLoopBounds(loop)};
+      const parser::ScalarExpr *lower{std::get<1>(bounds)};
+      const parser::ScalarExpr *upper{std::get<2>(bounds)};
+      fn(std::get<0>(bounds),
+          lower ? &parser::UnwrapRef<parser::Expr>(*lower) : nullptr,
+          upper ? &parser::UnwrapRef<parser::Expr>(*upper) : nullptr);
+    }
+  };
+
   for (const parser::DoConstruct *loop{&outerDoConstruct}; loop && level > 0;) {
-    // Go through all nested loops to ensure index variable exists.
-    if (const parser::Name *ivName{GetLoopIndex(*loop)}) {
-      if (auto *symbol{ResolveAcc(*ivName, flag, currScope())}) {
-        if (auto &control{loop->GetLoopControl()}) {
-          if (const Bounds *b{std::get_if<Bounds>(&control->u)}) {
-            if (auto lowerExpr{semantics::AnalyzeExpr(context_, b->Lower())}) {
-              semantics::UnorderedSymbolSet lowerSyms =
-                  evaluate::CollectSymbols(*lowerExpr);
-              checkExprHasSymbols(ivs, lowerSyms);
-            }
-            if (auto upperExpr{semantics::AnalyzeExpr(context_, b->Upper())}) {
-              semantics::UnorderedSymbolSet upperSyms =
-                  evaluate::CollectSymbols(*upperExpr);
-              checkExprHasSymbols(ivs, upperSyms);
+    forEachIndex(*loop,
+        [&](const parser::Name *ivName, const parser::Expr *lower,
+            const parser::Expr *upper) {
+          if (level <= 0)
+            return;
+          if (ivName && lower && upper) {
+            if (auto *symbol{ResolveAcc(*ivName, flag, currScope())}) {
+              if (auto lowerExpr{semantics::AnalyzeExpr(context_, *lower)}) {
+                semantics::UnorderedSymbolSet lowerSyms =
+                    evaluate::CollectSymbols(*lowerExpr);
+                checkExprHasSymbols(ivs, lowerSyms);
+              }
+              if (auto upperExpr{semantics::AnalyzeExpr(context_, *upper)}) {
+                semantics::UnorderedSymbolSet upperSyms =
+                    evaluate::CollectSymbols(*upperExpr);
+                checkExprHasSymbols(ivs, upperSyms);
+              }
+              ivs.push_back(symbol);
             }
           }
-        }
-        ivs.push_back(symbol);
-      }
-    }
+          --level;
+        });
 
     const auto &block{std::get<parser::Block>(loop->t)};
-    --level;
     loop = getNextDoConstruct(block, level);
   }
-  CHECK(level == 0);
+
+  if (level != 0) {
+    context_.Say(GetContext().directiveSource,
+        "Not enough perfectly nested loops for COLLAPSE(%jd) clause, found %jd, expected %jd more"_err_en_US,
+        GetContext().associatedLoopLevel,
+        GetContext().associatedLoopLevel - level, level);
+  }
 }
 
 void AccAttributeVisitor::EnsureAllocatableOrPointer(
diff --git a/flang/test/Lower/OpenACC/Todo/do-loops-to-acc-loops-todo.f90 b/flang/test/Lower/OpenACC/Todo/do-loops-to-acc-loops-todo.f90
index 3f2b77a9a1484..f8243105b832b 100644
--- a/flang/test/Lower/OpenACC/Todo/do-loops-to-acc-loops-todo.f90
+++ b/flang/test/Lower/OpenACC/Todo/do-loops-to-acc-loops-todo.f90
@@ -3,6 +3,7 @@
 ! RUN: %not_todo_cmd bbc -fopenacc -emit-hlfir %t/do_loop_with_cycle_goto.f90 -o - 2>&1 | FileCheck %s --check-prefix=CHECK2
 ! RUN: %not_todo_cmd bbc -fopenacc -emit-hlfir %t/nested_goto_loop.f90 -o - 2>&1 | FileCheck %s --check-prefix=CHECK3
 ! RUN: %not_todo_cmd bbc -fopenacc -emit-hlfir %t/nested_loop_with_inner_goto.f90 -o - 2>&1 | FileCheck %s --check-prefix=CHECK4
+! RUN: %not_todo_cmd bbc -fopenacc -emit-hlfir %t/collapse.f90 -o - 2>&1 | FileCheck %s --check-prefix=CHECK5
 
 //--- do_loop_with_stop.f90
 
@@ -89,3 +90,18 @@ subroutine nested_loop_with_inner_goto()
 ! CHECK4: not yet implemented: unstructured do loop in acc kernels
 
 end subroutine
+
+//--- collapse.f90
+
+! !$acc parallel loop collapse(N) over a do concurrent.
+subroutine combined(i, j, k)
+  integer :: i, j, k
+  integer :: a(i,j,k)
+  !$acc parallel loop collapse(3)
+  do concurrent (i=1:10, j=1:100, k=1:200)
+    a(i,j,k) = a(i,j,k) + 1
+  end do
+  ! CHECK5: not yet implemented: collapse on acc loop with do concurrent
+end subroutine
+
+
diff --git a/flang/test/Lower/OpenACC/acc-loop.f90 b/flang/test/Lower/OpenACC/acc-loop.f90
index ed87cf76038b5..3fae0332052a8 100644
--- a/flang/test/Lower/OpenACC/acc-loop.f90
+++ b/flang/test/Lower/OpenACC/acc-loop.f90
@@ -400,3 +400,4 @@ subroutine sub1(i, j, k)
 ! CHECK: %[[P_K:.*]] = acc.private varPtr(%[[DC_K]]#0 : !fir.ref<i32>) recipe(@privatization_ref_i32) -> !fir.ref<i32> {implicit = true, name = "k"}
 ! CHECK: acc.loop combined(parallel) private(%[[P_I]], %[[P_J]], %[[P_K]] : !fir.ref<i32>, !fir.ref<i32>, !fir.ref<i32>) control(%{{.*}} : i32, %{{.*}} : i32, %{{.*}} : i32) = (%c1{{.*}}, %c1{{.*}}, %c1{{.*}} : i32, i32, i32) to (%c10{{.*}}, %c100{{.*}}, %c200{{.*}} : i32, i32, i32)  step (%c1{{.*}}, %c1{{.*}}, %c1{{.*}} : i32, i32, i32)
 ! CHECK: } attributes {inclusiveUpperbound = array<i1: true, true, true>, independent = [#acc.device_type<none>]}
+
diff --git a/flang/test/Semantics/OpenACC/acc-canonicalization-validity.f90 b/flang/test/Semantics/OpenACC/acc-canonicalization-validity.f90
index a92be44c60b74..3151d726380f1 100644
--- a/flang/test/Semantics/OpenACC/acc-canonicalization-validity.f90
+++ b/flang/test/Semantics/OpenACC/acc-canonicalization-validity.f90
@@ -85,7 +85,6 @@ program openacc_clause_validity
   end do
 
   !$acc parallel
-  !ERROR: COLLAPSE clause may not appear on loop construct associated with DO CONCURRENT
   !$acc loop collapse(2)
   do concurrent (i = 1:N, j = 1:N)
     aa(i, j) = 3.14
@@ -102,7 +101,6 @@ program openacc_clause_validity
 
   !$acc parallel
   !ERROR: TILE clause may not appear on loop construct associated with DO CONCURRENT
-  !ERROR: COLLAPSE clause may not appear on loop construct associated with DO CONCURRENT
   !$acc loop tile(2, 2) collapse(2)
   do concurrent (i = 1:N, j = 1:N)
     aa(i, j) = 3.14
diff --git a/flang/test/Semantics/OpenACC/acc-loop.f90 b/flang/test/Semantics/OpenACC/acc-loop.f90
index 635dbb04cd666..0358b2fa4e1c6 100644
--- a/flang/test/Semantics/OpenACC/acc-loop.f90
+++ b/flang/test/Semantics/OpenACC/acc-loop.f90
@@ -447,6 +447,97 @@ program openacc_loop_validity
     END DO
   END DO
 
+  ! do concurrent: each index variable counts as one collapse level.
+
+  ! Valid: collapse(2) covers both indices of a 2-index do concurrent.
+  !$acc loop collapse(2)
+  DO CONCURRENT (i = 1:n, j = 1:n)
+    aa(i, j) = 3.14d0
+  END DO
+
+  ! Valid: collapse(3) covers both concurrent indices then one nested do.
+  !$acc loop collapse(3)
+  DO CONCURRENT (i = 1:n, j = 1:n)
+    DO k = 1, n
+      aa(i, j) = aa(i, j) + a(k)
+    END DO
+  END DO
+
+  ! Valid: collapse(2) with single-index do concurrent followed by a nested do.
+  !$acc loop collapse(2)
+  DO CONCURRENT (i = 1:n)
+    DO j = 1, n
+      aa(i, j) = 3.14d0
+    END DO
+  END DO
+
+  ! Valid: combined directive, collapse(2) with do concurrent.
+  !$acc parallel loop collapse(2)
+  DO CONCURRENT (i = 1:n, j = 1:n)
+    aa(i, j) = 3.14d0
+  END DO
+
+  ! Valid: outer regular do followed by inner do concurrent covering the
+  ! remaining collapse levels.
+  !$acc loop collapse(3)
+  DO i = 1, n
+    DO CONCURRENT (j = 1:n, k = 1:n)
+      aa(i, j) = aa(i, j) + a(k)
+    END DO
+  END DO
+
+  ! Valid (more concurrent indices than collapse levels): collapse(2) consumes
+  ! only the first two indices of a 3-index do concurrent; the third is outside
+  ! the collapsed nest.
+  !$acc loop collapse(2)
+  DO CONCURRENT (i = 1:n, j = 1:n, k = 1:n)
+    aa(i, j) = aa(i, j) + a(k)
+  END DO
+
+  ! Valid (more loops than collapse levels): collapse(1) consumes only the
+  ! first index of a 2-index do concurrent; the second index is outside the
+  ! collapsed nest.
+  !$acc loop collapse(1)
+  DO CONCURRENT (i = 1:n, j = 1:n)
+    aa(i, j) = 3.14d0
+  END DO
+
+  ! Invalid: nested do's upper bound depends on a collapsed concurrent index.
+  !ERROR: Trip count must be computable and invariant
+  !$acc loop collapse(3)
+  DO CONCURRENT (i = 1:n, j = 1:n)
+    DO k = 1, i
+      aa(i, j) = aa(i, j) + a(k)
+    END DO
+  END DO
+
+  ! Invalid: nested do's upper bound depends on a collapsed concurrent index.
+  !ERROR: Trip count must be computable and invariant
+  !$acc loop collapse(2)
+  DO CONCURRENT (i = 1:n)
+    DO j = 1, i
+      aa(i, j) = 3.14d0
+    END DO
+  END DO
+
+  ! Invalid: inner concurrent index bound depends on the outer collapsed regular
+  ! do index.
+  !ERROR: Trip count must be computable and invariant
+  !$acc loop collapse(3)
+  DO i = 1, n
+    DO CONCURRENT (j = 1:n, k = 1:i)
+      aa(i, j) = aa(i, j) + a(k)
+    END DO
+  END DO
+
+  ! Fewer loops than collapse(n): collapse(3) but only 2 levels exist.
+  ! This exercises the loop-nest depth check.
+  !ERROR: Not enough perfectly nested loops for COLLAPSE(3) clause, found 2, expected 1 more
+  !$acc loop collapse(3)
+  DO CONCURRENT (i = 1:n, j = 1:n)
+    aa(i, j) = 3.14d0
+  END DO
+
 contains
 
   subroutine sub1()



More information about the flang-commits mailing list