[flang-commits] [flang] c4138a2 - [mlir][acc][flang] Lower nested ACC loops with tile clause as collapsed loops (#147801)

via flang-commits flang-commits at lists.llvm.org
Wed Jul 9 13:47:15 PDT 2025


Author: Vijay Kandiah
Date: 2025-07-09T15:47:11-05:00
New Revision: c4138a24dc254783697f512f053e794fb1c68f88

URL: https://github.com/llvm/llvm-project/commit/c4138a24dc254783697f512f053e794fb1c68f88
DIFF: https://github.com/llvm/llvm-project/commit/c4138a24dc254783697f512f053e794fb1c68f88.diff

LOG: [mlir][acc][flang] Lower nested ACC loops with tile clause as collapsed loops (#147801)

In the case of nested loops, `acc.loop` is meant to subsume all of the
loops that it applies to (when explicitly described as doing so in the
OpenACC specification). So when there is a `acc loop tile(...)` present
on nested Fortran DO loops, `acc.loop` should apply to the `n` loops
that `tile` applies to. This change lowers such nested Fortran loops
with tile clause into a collapsed `acc.loop` with `n` IVs, loop bounds,
and step, in a similar fashion to the current lowering for acc loops
with `collapse` clause.

Added: 
    

Modified: 
    flang/include/flang/Lower/OpenACC.h
    flang/lib/Lower/Bridge.cpp
    flang/lib/Lower/OpenACC.cpp
    flang/test/Lower/OpenACC/acc-kernels-loop.f90
    flang/test/Lower/OpenACC/acc-loop.f90
    flang/test/Lower/OpenACC/acc-parallel-loop.f90
    flang/test/Lower/OpenACC/acc-serial-loop.f90

Removed: 
    


################################################################################
diff  --git a/flang/include/flang/Lower/OpenACC.h b/flang/include/flang/Lower/OpenACC.h
index dad841863ac00..af3451023e3df 100644
--- a/flang/include/flang/Lower/OpenACC.h
+++ b/flang/include/flang/Lower/OpenACC.h
@@ -114,7 +114,7 @@ void attachDeclarePostDeallocAction(AbstractConverter &, fir::FirOpBuilder &,
 void genOpenACCTerminator(fir::FirOpBuilder &, mlir::Operation *,
                           mlir::Location);
 
-int64_t getCollapseValue(const Fortran::parser::AccClauseList &);
+int64_t getLoopCountForCollapseAndTile(const Fortran::parser::AccClauseList &);
 
 bool isInOpenACCLoop(fir::FirOpBuilder &);
 

diff  --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index ed062ecf07166..2c8b3fb95e255 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -3083,25 +3083,25 @@ class FirConverter : public Fortran::lower::AbstractConverter {
     Fortran::lower::pft::Evaluation *curEval = &getEval();
 
     if (accLoop || accCombined) {
-      int64_t collapseValue;
+      int64_t loopCount;
       if (accLoop) {
         const Fortran::parser::AccBeginLoopDirective &beginLoopDir =
             std::get<Fortran::parser::AccBeginLoopDirective>(accLoop->t);
         const Fortran::parser::AccClauseList &clauseList =
             std::get<Fortran::parser::AccClauseList>(beginLoopDir.t);
-        collapseValue = Fortran::lower::getCollapseValue(clauseList);
+        loopCount = Fortran::lower::getLoopCountForCollapseAndTile(clauseList);
       } else if (accCombined) {
         const Fortran::parser::AccBeginCombinedDirective &beginCombinedDir =
             std::get<Fortran::parser::AccBeginCombinedDirective>(
                 accCombined->t);
         const Fortran::parser::AccClauseList &clauseList =
             std::get<Fortran::parser::AccClauseList>(beginCombinedDir.t);
-        collapseValue = Fortran::lower::getCollapseValue(clauseList);
+        loopCount = Fortran::lower::getLoopCountForCollapseAndTile(clauseList);
       }
 
       if (curEval->lowerAsStructured()) {
         curEval = &curEval->getFirstNestedEvaluation();
-        for (int64_t i = 1; i < collapseValue; i++)
+        for (int64_t i = 1; i < loopCount; i++)
           curEval = &*std::next(curEval->getNestedEvaluations().begin());
       }
     }
@@ -6155,8 +6155,8 @@ class FirConverter : public Fortran::lower::AbstractConverter {
 
       Fortran::lower::defineModuleVariable(*this, var);
     }
-      for (auto &eval : mod.evaluationList)
-        genFIR(eval);
+    for (auto &eval : mod.evaluationList)
+      genFIR(eval);
   }
 
   /// Lower functions contained in a module.

diff  --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index e56d7f7ed9b6f..42842bcb41a74 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -2442,8 +2442,9 @@ static mlir::acc::LoopOp createLoopOp(
       inclusiveBounds.push_back(true);
     }
   } else {
-    int64_t collapseValue = Fortran::lower::getCollapseValue(accClauseList);
-    for (unsigned i = 0; i < collapseValue; ++i) {
+    int64_t loopCount =
+        Fortran::lower::getLoopCountForCollapseAndTile(accClauseList);
+    for (unsigned i = 0; i < loopCount; ++i) {
       const Fortran::parser::LoopControl *loopControl;
       if (i == 0) {
         loopControl = &*outerDoConstruct.GetLoopControl();
@@ -2478,7 +2479,7 @@ static mlir::acc::LoopOp createLoopOp(
 
       inclusiveBounds.push_back(true);
 
-      if (i < collapseValue - 1)
+      if (i < loopCount - 1)
         crtEval = &*std::next(crtEval->getNestedEvaluations().begin());
     }
   }
@@ -4940,15 +4941,25 @@ void Fortran::lower::genEarlyReturnInOpenACCLoop(fir::FirOpBuilder &builder,
   builder.create<mlir::acc::YieldOp>(loc, yieldValue);
 }
 
-int64_t Fortran::lower::getCollapseValue(
+int64_t Fortran::lower::getLoopCountForCollapseAndTile(
     const Fortran::parser::AccClauseList &clauseList) {
+  int64_t collapseLoopCount = 1;
+  int64_t tileLoopCount = 1;
   for (const Fortran::parser::AccClause &clause : clauseList.v) {
     if (const auto *collapseClause =
             std::get_if<Fortran::parser::AccClause::Collapse>(&clause.u)) {
       const parser::AccCollapseArg &arg = collapseClause->v;
       const auto &collapseValue{std::get<parser::ScalarIntConstantExpr>(arg.t)};
-      return *Fortran::semantics::GetIntValue(collapseValue);
+      collapseLoopCount = *Fortran::semantics::GetIntValue(collapseValue);
+    }
+    if (const auto *tileClause =
+            std::get_if<Fortran::parser::AccClause::Tile>(&clause.u)) {
+      const parser::AccTileExprList &tileExprList = tileClause->v;
+      const std::list<parser::AccTileExpr> &listTileExpr = tileExprList.v;
+      tileLoopCount = listTileExpr.size();
     }
   }
-  return 1;
+  if (tileLoopCount > collapseLoopCount)
+    return tileLoopCount;
+  return collapseLoopCount;
 }

diff  --git a/flang/test/Lower/OpenACC/acc-kernels-loop.f90 b/flang/test/Lower/OpenACC/acc-kernels-loop.f90
index 4e968144399a8..8d95f35b186ee 100644
--- a/flang/test/Lower/OpenACC/acc-kernels-loop.f90
+++ b/flang/test/Lower/OpenACC/acc-kernels-loop.f90
@@ -663,7 +663,7 @@ subroutine acc_kernels_loop
 ! CHECK:      acc.kernels {{.*}} {
 ! CHECK:        [[TILESIZE1:%.*]] = arith.constant 2 : i32
 ! CHECK:        [[TILESIZE2:%.*]] = arith.constant 2 : i32
-! CHECK:        acc.loop {{.*}} tile({[[TILESIZE1]] : i32, [[TILESIZE2]] : i32}) {{.*}} {
+! CHECK:        acc.loop {{.*}} tile({[[TILESIZE1]] : i32, [[TILESIZE2]] : i32}) control(%arg0 : i32, %arg1 : i32) {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.terminator
@@ -689,7 +689,7 @@ subroutine acc_kernels_loop
   END DO
 
 ! CHECK:      acc.kernels {{.*}} {
-! CHECK:        acc.loop {{.*}} tile({%{{.*}} : i32, %{{.*}} : i32}) {{.*}} {
+! CHECK:        acc.loop {{.*}} tile({%{{.*}} : i32, %{{.*}} : i32}) control(%arg0 : i32, %arg1 : i32) {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.terminator

diff  --git a/flang/test/Lower/OpenACC/acc-loop.f90 b/flang/test/Lower/OpenACC/acc-loop.f90
index 5baa485534b2a..c6df28ec5e000 100644
--- a/flang/test/Lower/OpenACC/acc-loop.f90
+++ b/flang/test/Lower/OpenACC/acc-loop.f90
@@ -9,7 +9,7 @@
 
 program acc_loop
 
-  integer :: i, j
+  integer :: i, j, k
   integer, parameter :: n = 10
   real, dimension(n) :: a, b
   real, dimension(n, n) :: c, d
@@ -209,9 +209,9 @@ program acc_loop
 
 ! CHECK:      [[TILESIZE1:%.*]] = arith.constant 2 : i32
 ! CHECK:      [[TILESIZE2:%.*]] = arith.constant 2 : i32
-! CHECK:      acc.loop {{.*}} tile({[[TILESIZE1]] : i32, [[TILESIZE2]] : i32}) control(%arg0 : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) {
+! CHECK:      acc.loop {{.*}} tile({[[TILESIZE1]] : i32, [[TILESIZE2]] : i32}) control(%arg0 : i32, %arg1 : i32) = (%{{.*}} : i32, i32) to (%{{.*}} : i32, i32) step (%{{.*}} : i32, i32) {
 ! CHECK:        acc.yield
-! CHECK-NEXT: } attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]}
+! CHECK-NEXT: } attributes {inclusiveUpperbound = array<i1: true, true>, independent = [#acc.device_type<none>]}
 
   !$acc loop tile(tileSize)
   DO i = 1, n
@@ -229,9 +229,9 @@ program acc_loop
     END DO
   END DO
 
-! CHECK:      acc.loop {{.*}} tile({%{{.*}} : i32, %{{.*}} : i32}) control(%arg0 : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) {
+! CHECK:      acc.loop {{.*}} tile({%{{.*}} : i32, %{{.*}} : i32}) control(%arg0 : i32, %arg1 : i32) = (%{{.*}} : i32, i32) to (%{{.*}} : i32, i32) step (%{{.*}} : i32, i32) {
 ! CHECK:        acc.yield
-! CHECK-NEXT: } attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]}
+! CHECK-NEXT: } attributes {inclusiveUpperbound = array<i1: true, true>, independent = [#acc.device_type<none>]}
 
   !$acc loop collapse(2)
   DO i = 1, n
@@ -246,6 +246,51 @@ program acc_loop
 ! CHECK:        acc.yield
 ! CHECK-NEXT: } attributes {collapse = [2], collapseDeviceType = [#acc.device_type<none>]{{.*}}}
 
+  !$acc loop collapse(2) tile(tileSize)
+  DO i = 1, n
+    DO j = 1, n
+      c(i, j) = d(i, j)
+    END DO
+  END DO
+
+! CHECK:      acc.loop {{.*}} tile({%{{.*}} : i32}) control(%arg0 : i32, %arg1 : i32) = (%{{.*}} : i32, i32) to (%{{.*}} : i32, i32) step (%{{.*}} : i32, i32) {
+! CHECK:        fir.store %arg0 to %{{.*}} : !fir.ref<i32>
+! CHECK:        fir.store %arg1 to %{{.*}} : !fir.ref<i32>
+! CHECK:        acc.yield
+! CHECK-NEXT: } attributes {collapse = [2], collapseDeviceType = [#acc.device_type<none>]{{.*}}}
+
+  !$acc loop collapse(2) tile(tileSize, tileSize, tileSize)
+  DO i = 1, n
+    DO j = 1, n
+      DO k = 1, n
+        c(i, j) = d(i, j)
+      END DO
+    END DO
+  END DO
+
+! CHECK:      acc.loop {{.*}} tile({%{{.*}} : i32, %{{.*}} : i32, %{{.*}} : i32}) control(%arg0 : i32, %arg1 : i32, %arg2 : i32) = (%{{.*}} : i32, i32, i32) to (%{{.*}} : i32, i32, i32) step (%{{.*}} : i32, i32, i32) {
+! CHECK:        fir.store %arg0 to %{{.*}} : !fir.ref<i32>
+! CHECK:        fir.store %arg1 to %{{.*}} : !fir.ref<i32>
+! CHECK:        fir.store %arg2 to %{{.*}} : !fir.ref<i32>
+! CHECK:        acc.yield
+! CHECK-NEXT: } attributes {collapse = [2], collapseDeviceType = [#acc.device_type<none>]{{.*}}}
+
+!$acc loop collapse(3) tile(tileSize, tileSize)
+  DO i = 1, n
+    DO j = 1, n
+      DO k = 1, n
+        c(i, j) = d(i, j)
+      END DO
+    END DO
+  END DO
+
+! CHECK:      acc.loop {{.*}} tile({%{{.*}} : i32, %{{.*}} : i32}) control(%arg0 : i32, %arg1 : i32, %arg2 : i32) = (%{{.*}} : i32, i32, i32) to (%{{.*}} : i32, i32, i32) step (%{{.*}} : i32, i32, i32) {
+! CHECK:        fir.store %arg0 to %{{.*}} : !fir.ref<i32>
+! CHECK:        fir.store %arg1 to %{{.*}} : !fir.ref<i32>
+! CHECK:        fir.store %arg2 to %{{.*}} : !fir.ref<i32>
+! CHECK:        acc.yield
+! CHECK-NEXT: } attributes {collapse = [3], collapseDeviceType = [#acc.device_type<none>]{{.*}}}
+
   !$acc loop
   DO i = 1, n
     !$acc loop

diff  --git a/flang/test/Lower/OpenACC/acc-parallel-loop.f90 b/flang/test/Lower/OpenACC/acc-parallel-loop.f90
index 32060179acdf1..8086080bd3797 100644
--- a/flang/test/Lower/OpenACC/acc-parallel-loop.f90
+++ b/flang/test/Lower/OpenACC/acc-parallel-loop.f90
@@ -681,7 +681,7 @@ subroutine acc_parallel_loop
 ! CHECK:      acc.parallel {{.*}} {
 ! CHECK:        [[TILESIZE1:%.*]] = arith.constant 2 : i32
 ! CHECK:        [[TILESIZE2:%.*]] = arith.constant 2 : i32
-! CHECK:        acc.loop {{.*}} tile({[[TILESIZE1]] : i32, [[TILESIZE2]] : i32}) {{.*}} {
+! CHECK:        acc.loop {{.*}} tile({[[TILESIZE1]] : i32, [[TILESIZE2]] : i32}) control(%arg0 : i32, %arg1 : i32) {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -707,7 +707,7 @@ subroutine acc_parallel_loop
   END DO
 
 ! CHECK:      acc.parallel {{.*}} {
-! CHECK:        acc.loop {{.*}} tile({%{{.*}} : i32, %{{.*}} : i32}) {{.*}} {
+! CHECK:        acc.loop {{.*}} tile({%{{.*}} : i32, %{{.*}} : i32}) control(%arg0 : i32, %arg1 : i32) {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield

diff  --git a/flang/test/Lower/OpenACC/acc-serial-loop.f90 b/flang/test/Lower/OpenACC/acc-serial-loop.f90
index af7bb0fac158c..cad0ee73f6cc5 100644
--- a/flang/test/Lower/OpenACC/acc-serial-loop.f90
+++ b/flang/test/Lower/OpenACC/acc-serial-loop.f90
@@ -620,7 +620,7 @@ subroutine acc_serial_loop
 ! CHECK:      acc.serial {{.*}} {
 ! CHECK:        [[TILESIZE1:%.*]] = arith.constant 2 : i32
 ! CHECK:        [[TILESIZE2:%.*]] = arith.constant 2 : i32
-! CHECK:        acc.loop {{.*}} tile({[[TILESIZE1]] : i32, [[TILESIZE2]] : i32}) {{.*}} {
+! CHECK:        acc.loop {{.*}} tile({[[TILESIZE1]] : i32, [[TILESIZE2]] : i32}) control(%arg0 : i32, %arg1 : i32) {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield
@@ -646,7 +646,7 @@ subroutine acc_serial_loop
   END DO
 
 ! CHECK:      acc.serial {{.*}} {
-! CHECK:        acc.loop {{.*}} tile({%{{.*}} : i32, %{{.*}} : i32}) {{.*}} {
+! CHECK:        acc.loop {{.*}} tile({%{{.*}} : i32, %{{.*}} : i32}) control(%arg0 : i32, %arg1 : i32) {{.*}} {
 ! CHECK:          acc.yield
 ! CHECK-NEXT:   }{{$}}
 ! CHECK:        acc.yield


        


More information about the flang-commits mailing list