[flang-commits] [flang] c4138a2 - [mlir][acc][flang] Lower nested ACC loops with tile clause as collapsed loops (#147801)
via flang-commits
flang-commits at lists.llvm.org
Wed Jul 9 13:47:15 PDT 2025
Author: Vijay Kandiah
Date: 2025-07-09T15:47:11-05:00
New Revision: c4138a24dc254783697f512f053e794fb1c68f88
URL: https://github.com/llvm/llvm-project/commit/c4138a24dc254783697f512f053e794fb1c68f88
DIFF: https://github.com/llvm/llvm-project/commit/c4138a24dc254783697f512f053e794fb1c68f88.diff
LOG: [mlir][acc][flang] Lower nested ACC loops with tile clause as collapsed loops (#147801)
In the case of nested loops, `acc.loop` is meant to subsume all of the
loops that it applies to (when explicitly described as doing so in the
OpenACC specification). So when there is a `acc loop tile(...)` present
on nested Fortran DO loops, `acc.loop` should apply to the `n` loops
that `tile` applies to. This change lowers such nested Fortran loops
with tile clause into a collapsed `acc.loop` with `n` IVs, loop bounds,
and step, in a similar fashion to the current lowering for acc loops
with `collapse` clause.
Added:
Modified:
flang/include/flang/Lower/OpenACC.h
flang/lib/Lower/Bridge.cpp
flang/lib/Lower/OpenACC.cpp
flang/test/Lower/OpenACC/acc-kernels-loop.f90
flang/test/Lower/OpenACC/acc-loop.f90
flang/test/Lower/OpenACC/acc-parallel-loop.f90
flang/test/Lower/OpenACC/acc-serial-loop.f90
Removed:
################################################################################
diff --git a/flang/include/flang/Lower/OpenACC.h b/flang/include/flang/Lower/OpenACC.h
index dad841863ac00..af3451023e3df 100644
--- a/flang/include/flang/Lower/OpenACC.h
+++ b/flang/include/flang/Lower/OpenACC.h
@@ -114,7 +114,7 @@ void attachDeclarePostDeallocAction(AbstractConverter &, fir::FirOpBuilder &,
void genOpenACCTerminator(fir::FirOpBuilder &, mlir::Operation *,
mlir::Location);
-int64_t getCollapseValue(const Fortran::parser::AccClauseList &);
+int64_t getLoopCountForCollapseAndTile(const Fortran::parser::AccClauseList &);
bool isInOpenACCLoop(fir::FirOpBuilder &);
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index ed062ecf07166..2c8b3fb95e255 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -3083,25 +3083,25 @@ class FirConverter : public Fortran::lower::AbstractConverter {
Fortran::lower::pft::Evaluation *curEval = &getEval();
if (accLoop || accCombined) {
- int64_t collapseValue;
+ int64_t loopCount;
if (accLoop) {
const Fortran::parser::AccBeginLoopDirective &beginLoopDir =
std::get<Fortran::parser::AccBeginLoopDirective>(accLoop->t);
const Fortran::parser::AccClauseList &clauseList =
std::get<Fortran::parser::AccClauseList>(beginLoopDir.t);
- collapseValue = Fortran::lower::getCollapseValue(clauseList);
+ loopCount = Fortran::lower::getLoopCountForCollapseAndTile(clauseList);
} else if (accCombined) {
const Fortran::parser::AccBeginCombinedDirective &beginCombinedDir =
std::get<Fortran::parser::AccBeginCombinedDirective>(
accCombined->t);
const Fortran::parser::AccClauseList &clauseList =
std::get<Fortran::parser::AccClauseList>(beginCombinedDir.t);
- collapseValue = Fortran::lower::getCollapseValue(clauseList);
+ loopCount = Fortran::lower::getLoopCountForCollapseAndTile(clauseList);
}
if (curEval->lowerAsStructured()) {
curEval = &curEval->getFirstNestedEvaluation();
- for (int64_t i = 1; i < collapseValue; i++)
+ for (int64_t i = 1; i < loopCount; i++)
curEval = &*std::next(curEval->getNestedEvaluations().begin());
}
}
@@ -6155,8 +6155,8 @@ class FirConverter : public Fortran::lower::AbstractConverter {
Fortran::lower::defineModuleVariable(*this, var);
}
- for (auto &eval : mod.evaluationList)
- genFIR(eval);
+ for (auto &eval : mod.evaluationList)
+ genFIR(eval);
}
/// Lower functions contained in a module.
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index e56d7f7ed9b6f..42842bcb41a74 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -2442,8 +2442,9 @@ static mlir::acc::LoopOp createLoopOp(
inclusiveBounds.push_back(true);
}
} else {
- int64_t collapseValue = Fortran::lower::getCollapseValue(accClauseList);
- for (unsigned i = 0; i < collapseValue; ++i) {
+ int64_t loopCount =
+ Fortran::lower::getLoopCountForCollapseAndTile(accClauseList);
+ for (unsigned i = 0; i < loopCount; ++i) {
const Fortran::parser::LoopControl *loopControl;
if (i == 0) {
loopControl = &*outerDoConstruct.GetLoopControl();
@@ -2478,7 +2479,7 @@ static mlir::acc::LoopOp createLoopOp(
inclusiveBounds.push_back(true);
- if (i < collapseValue - 1)
+ if (i < loopCount - 1)
crtEval = &*std::next(crtEval->getNestedEvaluations().begin());
}
}
@@ -4940,15 +4941,25 @@ void Fortran::lower::genEarlyReturnInOpenACCLoop(fir::FirOpBuilder &builder,
builder.create<mlir::acc::YieldOp>(loc, yieldValue);
}
-int64_t Fortran::lower::getCollapseValue(
+int64_t Fortran::lower::getLoopCountForCollapseAndTile(
const Fortran::parser::AccClauseList &clauseList) {
+ int64_t collapseLoopCount = 1;
+ int64_t tileLoopCount = 1;
for (const Fortran::parser::AccClause &clause : clauseList.v) {
if (const auto *collapseClause =
std::get_if<Fortran::parser::AccClause::Collapse>(&clause.u)) {
const parser::AccCollapseArg &arg = collapseClause->v;
const auto &collapseValue{std::get<parser::ScalarIntConstantExpr>(arg.t)};
- return *Fortran::semantics::GetIntValue(collapseValue);
+ collapseLoopCount = *Fortran::semantics::GetIntValue(collapseValue);
+ }
+ if (const auto *tileClause =
+ std::get_if<Fortran::parser::AccClause::Tile>(&clause.u)) {
+ const parser::AccTileExprList &tileExprList = tileClause->v;
+ const std::list<parser::AccTileExpr> &listTileExpr = tileExprList.v;
+ tileLoopCount = listTileExpr.size();
}
}
- return 1;
+ if (tileLoopCount > collapseLoopCount)
+ return tileLoopCount;
+ return collapseLoopCount;
}
diff --git a/flang/test/Lower/OpenACC/acc-kernels-loop.f90 b/flang/test/Lower/OpenACC/acc-kernels-loop.f90
index 4e968144399a8..8d95f35b186ee 100644
--- a/flang/test/Lower/OpenACC/acc-kernels-loop.f90
+++ b/flang/test/Lower/OpenACC/acc-kernels-loop.f90
@@ -663,7 +663,7 @@ subroutine acc_kernels_loop
! CHECK: acc.kernels {{.*}} {
! CHECK: [[TILESIZE1:%.*]] = arith.constant 2 : i32
! CHECK: [[TILESIZE2:%.*]] = arith.constant 2 : i32
-! CHECK: acc.loop {{.*}} tile({[[TILESIZE1]] : i32, [[TILESIZE2]] : i32}) {{.*}} {
+! CHECK: acc.loop {{.*}} tile({[[TILESIZE1]] : i32, [[TILESIZE2]] : i32}) control(%arg0 : i32, %arg1 : i32) {{.*}} {
! CHECK: acc.yield
! CHECK-NEXT: }{{$}}
! CHECK: acc.terminator
@@ -689,7 +689,7 @@ subroutine acc_kernels_loop
END DO
! CHECK: acc.kernels {{.*}} {
-! CHECK: acc.loop {{.*}} tile({%{{.*}} : i32, %{{.*}} : i32}) {{.*}} {
+! CHECK: acc.loop {{.*}} tile({%{{.*}} : i32, %{{.*}} : i32}) control(%arg0 : i32, %arg1 : i32) {{.*}} {
! CHECK: acc.yield
! CHECK-NEXT: }{{$}}
! CHECK: acc.terminator
diff --git a/flang/test/Lower/OpenACC/acc-loop.f90 b/flang/test/Lower/OpenACC/acc-loop.f90
index 5baa485534b2a..c6df28ec5e000 100644
--- a/flang/test/Lower/OpenACC/acc-loop.f90
+++ b/flang/test/Lower/OpenACC/acc-loop.f90
@@ -9,7 +9,7 @@
program acc_loop
- integer :: i, j
+ integer :: i, j, k
integer, parameter :: n = 10
real, dimension(n) :: a, b
real, dimension(n, n) :: c, d
@@ -209,9 +209,9 @@ program acc_loop
! CHECK: [[TILESIZE1:%.*]] = arith.constant 2 : i32
! CHECK: [[TILESIZE2:%.*]] = arith.constant 2 : i32
-! CHECK: acc.loop {{.*}} tile({[[TILESIZE1]] : i32, [[TILESIZE2]] : i32}) control(%arg0 : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) {
+! CHECK: acc.loop {{.*}} tile({[[TILESIZE1]] : i32, [[TILESIZE2]] : i32}) control(%arg0 : i32, %arg1 : i32) = (%{{.*}} : i32, i32) to (%{{.*}} : i32, i32) step (%{{.*}} : i32, i32) {
! CHECK: acc.yield
-! CHECK-NEXT: } attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]}
+! CHECK-NEXT: } attributes {inclusiveUpperbound = array<i1: true, true>, independent = [#acc.device_type<none>]}
!$acc loop tile(tileSize)
DO i = 1, n
@@ -229,9 +229,9 @@ program acc_loop
END DO
END DO
-! CHECK: acc.loop {{.*}} tile({%{{.*}} : i32, %{{.*}} : i32}) control(%arg0 : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) {
+! CHECK: acc.loop {{.*}} tile({%{{.*}} : i32, %{{.*}} : i32}) control(%arg0 : i32, %arg1 : i32) = (%{{.*}} : i32, i32) to (%{{.*}} : i32, i32) step (%{{.*}} : i32, i32) {
! CHECK: acc.yield
-! CHECK-NEXT: } attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]}
+! CHECK-NEXT: } attributes {inclusiveUpperbound = array<i1: true, true>, independent = [#acc.device_type<none>]}
!$acc loop collapse(2)
DO i = 1, n
@@ -246,6 +246,51 @@ program acc_loop
! CHECK: acc.yield
! CHECK-NEXT: } attributes {collapse = [2], collapseDeviceType = [#acc.device_type<none>]{{.*}}}
+ !$acc loop collapse(2) tile(tileSize)
+ DO i = 1, n
+ DO j = 1, n
+ c(i, j) = d(i, j)
+ END DO
+ END DO
+
+! CHECK: acc.loop {{.*}} tile({%{{.*}} : i32}) control(%arg0 : i32, %arg1 : i32) = (%{{.*}} : i32, i32) to (%{{.*}} : i32, i32) step (%{{.*}} : i32, i32) {
+! CHECK: fir.store %arg0 to %{{.*}} : !fir.ref<i32>
+! CHECK: fir.store %arg1 to %{{.*}} : !fir.ref<i32>
+! CHECK: acc.yield
+! CHECK-NEXT: } attributes {collapse = [2], collapseDeviceType = [#acc.device_type<none>]{{.*}}}
+
+ !$acc loop collapse(2) tile(tileSize, tileSize, tileSize)
+ DO i = 1, n
+ DO j = 1, n
+ DO k = 1, n
+ c(i, j) = d(i, j)
+ END DO
+ END DO
+ END DO
+
+! CHECK: acc.loop {{.*}} tile({%{{.*}} : i32, %{{.*}} : i32, %{{.*}} : i32}) control(%arg0 : i32, %arg1 : i32, %arg2 : i32) = (%{{.*}} : i32, i32, i32) to (%{{.*}} : i32, i32, i32) step (%{{.*}} : i32, i32, i32) {
+! CHECK: fir.store %arg0 to %{{.*}} : !fir.ref<i32>
+! CHECK: fir.store %arg1 to %{{.*}} : !fir.ref<i32>
+! CHECK: fir.store %arg2 to %{{.*}} : !fir.ref<i32>
+! CHECK: acc.yield
+! CHECK-NEXT: } attributes {collapse = [2], collapseDeviceType = [#acc.device_type<none>]{{.*}}}
+
+!$acc loop collapse(3) tile(tileSize, tileSize)
+ DO i = 1, n
+ DO j = 1, n
+ DO k = 1, n
+ c(i, j) = d(i, j)
+ END DO
+ END DO
+ END DO
+
+! CHECK: acc.loop {{.*}} tile({%{{.*}} : i32, %{{.*}} : i32}) control(%arg0 : i32, %arg1 : i32, %arg2 : i32) = (%{{.*}} : i32, i32, i32) to (%{{.*}} : i32, i32, i32) step (%{{.*}} : i32, i32, i32) {
+! CHECK: fir.store %arg0 to %{{.*}} : !fir.ref<i32>
+! CHECK: fir.store %arg1 to %{{.*}} : !fir.ref<i32>
+! CHECK: fir.store %arg2 to %{{.*}} : !fir.ref<i32>
+! CHECK: acc.yield
+! CHECK-NEXT: } attributes {collapse = [3], collapseDeviceType = [#acc.device_type<none>]{{.*}}}
+
!$acc loop
DO i = 1, n
!$acc loop
diff --git a/flang/test/Lower/OpenACC/acc-parallel-loop.f90 b/flang/test/Lower/OpenACC/acc-parallel-loop.f90
index 32060179acdf1..8086080bd3797 100644
--- a/flang/test/Lower/OpenACC/acc-parallel-loop.f90
+++ b/flang/test/Lower/OpenACC/acc-parallel-loop.f90
@@ -681,7 +681,7 @@ subroutine acc_parallel_loop
! CHECK: acc.parallel {{.*}} {
! CHECK: [[TILESIZE1:%.*]] = arith.constant 2 : i32
! CHECK: [[TILESIZE2:%.*]] = arith.constant 2 : i32
-! CHECK: acc.loop {{.*}} tile({[[TILESIZE1]] : i32, [[TILESIZE2]] : i32}) {{.*}} {
+! CHECK: acc.loop {{.*}} tile({[[TILESIZE1]] : i32, [[TILESIZE2]] : i32}) control(%arg0 : i32, %arg1 : i32) {{.*}} {
! CHECK: acc.yield
! CHECK-NEXT: }{{$}}
! CHECK: acc.yield
@@ -707,7 +707,7 @@ subroutine acc_parallel_loop
END DO
! CHECK: acc.parallel {{.*}} {
-! CHECK: acc.loop {{.*}} tile({%{{.*}} : i32, %{{.*}} : i32}) {{.*}} {
+! CHECK: acc.loop {{.*}} tile({%{{.*}} : i32, %{{.*}} : i32}) control(%arg0 : i32, %arg1 : i32) {{.*}} {
! CHECK: acc.yield
! CHECK-NEXT: }{{$}}
! CHECK: acc.yield
diff --git a/flang/test/Lower/OpenACC/acc-serial-loop.f90 b/flang/test/Lower/OpenACC/acc-serial-loop.f90
index af7bb0fac158c..cad0ee73f6cc5 100644
--- a/flang/test/Lower/OpenACC/acc-serial-loop.f90
+++ b/flang/test/Lower/OpenACC/acc-serial-loop.f90
@@ -620,7 +620,7 @@ subroutine acc_serial_loop
! CHECK: acc.serial {{.*}} {
! CHECK: [[TILESIZE1:%.*]] = arith.constant 2 : i32
! CHECK: [[TILESIZE2:%.*]] = arith.constant 2 : i32
-! CHECK: acc.loop {{.*}} tile({[[TILESIZE1]] : i32, [[TILESIZE2]] : i32}) {{.*}} {
+! CHECK: acc.loop {{.*}} tile({[[TILESIZE1]] : i32, [[TILESIZE2]] : i32}) control(%arg0 : i32, %arg1 : i32) {{.*}} {
! CHECK: acc.yield
! CHECK-NEXT: }{{$}}
! CHECK: acc.yield
@@ -646,7 +646,7 @@ subroutine acc_serial_loop
END DO
! CHECK: acc.serial {{.*}} {
-! CHECK: acc.loop {{.*}} tile({%{{.*}} : i32, %{{.*}} : i32}) {{.*}} {
+! CHECK: acc.loop {{.*}} tile({%{{.*}} : i32, %{{.*}} : i32}) control(%arg0 : i32, %arg1 : i32) {{.*}} {
! CHECK: acc.yield
! CHECK-NEXT: }{{$}}
! CHECK: acc.yield
More information about the flang-commits
mailing list