[flang-commits] [flang] [FIR][OpenACC] fix loop order with generatePrivateInit (PR #155002)

Scott Manley via flang-commits flang-commits at lists.llvm.org
Fri Aug 22 11:13:39 PDT 2025


https://github.com/rscottmanley created https://github.com/llvm/llvm-project/pull/155002

When generating a loop nest to initialize a private array, the loop nest should run from the slowest dimension to the fastest dimension. When you get a shape from a SequenceType it is from fastest to slowest dimension. Reverse the the order.

>From d11ffb9e07fb5534e41c4d0612875a8802bb228c Mon Sep 17 00:00:00 2001
From: Scott Manley <scmanley at nvidia.com>
Date: Fri, 22 Aug 2025 11:06:10 -0700
Subject: [PATCH] [FIR][OpenACC] fix loop order with generatePrivateInit

When generating a loop nest to initialize a private array, the loop nest
should run from the slowest dimension to the fastest dimension. When you
get a shape from a SequenceType it is from fastest to slowest dimension.
Reverse the the order.
---
 .../Support/FIROpenACCTypeInterfaces.cpp      |  3 +-
 flang/test/Lower/OpenACC/acc-reduction.f90    | 31 +++++++++++++++++++
 2 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
index 8fefda59806c8..5b6d904fb0d59 100644
--- a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
+++ b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
@@ -591,7 +591,8 @@ mlir::Value OpenACCMappableModel<Ty>::generatePrivateInit(
           hlfir::AssignOp::create(firBuilder, loc, initVal,
                                   declareOp.getBase());
         } else {
-          for (auto ext : seqTy.getShape()) {
+          // Generate loop nest from slowest to fastest running dimension
+          for (auto ext : llvm::reverse(seqTy.getShape())) {
             auto lb = firBuilder.createIntegerConstant(loc, idxTy, 0);
             auto ub = firBuilder.createIntegerConstant(loc, idxTy, ext - 1);
             auto step = firBuilder.createIntegerConstant(loc, idxTy, 1);
diff --git a/flang/test/Lower/OpenACC/acc-reduction.f90 b/flang/test/Lower/OpenACC/acc-reduction.f90
index 20b5ad28f78a1..035b38b8a4da4 100644
--- a/flang/test/Lower/OpenACC/acc-reduction.f90
+++ b/flang/test/Lower/OpenACC/acc-reduction.f90
@@ -189,6 +189,14 @@
 ! CHECK:   acc.yield %arg0 : !fir.box<!fir.array<?xi32>>
 ! CHECK: }
 
+! CHECK-LABEL: acc.reduction.recipe @reduction_add_section_lb0.ub9xlb0.ub19_ref_10x20xi32 : !fir.ref<!fir.array<10x20xi32>> reduction_operator <add> init {
+! CHECK:     fir.do_loop %arg1 = %c0 to %c19 step %c1 {
+! CHECK:       fir.do_loop %arg2 = %c0_0 to %c9 step %c1_1 {
+! CHECK: } combiner {
+! CHECK:     fir.do_loop %arg2 = %c0 to %c19 step %c1 {
+! CHECK:       fir.do_loop %arg3 = %c0_0 to %c9 step %c1_1 {
+! CHECK: }
+
 ! CHECK-LABEL: acc.reduction.recipe @reduction_mul_ref_z32 : !fir.ref<complex<f32>> reduction_operator <mul> init {
 ! CHECK: ^bb0(%{{.*}}: !fir.ref<complex<f32>>):
 ! CHECK:   %[[REAL:.*]] = arith.constant 1.000000e+00 : f32
@@ -1167,6 +1175,29 @@ subroutine acc_reduction_add_static_slice(a)
 ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%[[DECLARG0]]#0 : !fir.ref<!fir.array<100xi32>>) bounds(%[[BOUND]]) -> !fir.ref<!fir.array<100xi32>> {name = "a(11:20)"}
 ! CHECK: acc.parallel reduction(@reduction_add_section_lb10.ub19_ref_100xi32 -> %[[RED]] : !fir.ref<!fir.array<100xi32>>)
 
+subroutine acc_reduction_add_static_slice_2d(a)
+  integer :: a(10,20)
+  !$acc parallel reduction(+:a(:10,:20))
+  !$acc end parallel
+end subroutine
+
+! CHECK-LABEL: func.func @_QPacc_reduction_add_static_slice_2d(
+! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.array<10x20xi32>> {fir.bindc_name = "a"})
+! CHECK: %[[C10:.*]] = arith.constant 10 : index
+! CHECK: %[[C20:.*]] = arith.constant 20 : index
+! CHECK: %[[DECLARG0:.*]]:2 = hlfir.declare %[[ARG0]]
+! CHECK: %[[LB:.*]] = arith.constant 0 : index
+! CHECK: %[[C1:.*]] = arith.constant 1 : index
+! CHECK: %[[UB9:.*]] = arith.constant 9 : index
+! CHECK: %[[STRIDE1:.*]] = arith.constant 10 : index
+! CHECK: %[[BOUND0:.*]] = acc.bounds lowerbound(%[[LB]] : index) upperbound(%[[UB9]] : index) extent(%[[C10]] : index) stride(%[[C1]] : index) startIdx(%[[C1]] : index)
+! CHECK: %[[UB19:.*]] = arith.constant 19 : index
+! CHECK: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : index) upperbound(%[[UB19]] : index) extent(%[[C20]] : index)
+! stride(%[[STRIDE1]] : index) startIdx(%[[C1]] : index)
+! CHECK: %[[RED:.*]] = acc.reduction varPtr(%[[DECLARG0]]#0 : !fir.ref<!fir.array<10x20xi32>>) bounds(%[[BOUND0]], %[[BOUND1]]) ->
+! !fir.ref<!fir.array<10x20xi32>> {name = "a(:10,:20)"}
+! CHECK: acc.parallel reduction(@reduction_add_section_lb0.ub9xlb0.ub19_ref_10x20xi32 -> %[[RED]] : !fir.ref<!fir.array<10x20xi32>>)
+
 subroutine acc_reduction_add_dynamic_extent_add(a)
   integer :: a(:)
   !$acc parallel reduction(+:a)



More information about the flang-commits mailing list