[flang-commits] [flang] 9a81d85 - [flang][acc] Fix the indexing of the reduction combiner for multidimensional static arrays (#155536)

via flang-commits flang-commits at lists.llvm.org
Tue Aug 26 21:27:02 PDT 2025


Author: khaki3
Date: 2025-08-26T21:26:58-07:00
New Revision: 9a81d853cee219af05ee58959ea9c5e9ac69980e

URL: https://github.com/llvm/llvm-project/commit/9a81d853cee219af05ee58959ea9c5e9ac69980e
DIFF: https://github.com/llvm/llvm-project/commit/9a81d853cee219af05ee58959ea9c5e9ac69980e.diff

LOG: [flang][acc] Fix the indexing of the reduction combiner for multidimensional static arrays (#155536)

In the following example of reducing a static 2D array, we have
incorrect coordinates for array access in the reduction combiner. This
PR reverses the order of the induction variables used for such array
indexing. For other cases of static arrays, we reverse the loop order as
well so that the innermost loop can handle the innermost dimension.

```Fortran
program main
  implicit none
  integer, parameter :: m = 2
  integer, parameter :: n = 10
  integer :: r(n,m), i

  r = 0

  !$acc parallel loop reduction(+:r(:n,:m))
  do i = 1, n
     r(i, 1) = i
  enddo

  print *, r
end program main
```

Currently, we have:
```mlir
fir.do_loop %arg2 = %c0 to %c1 step %c1 {
  fir.do_loop %arg3 = %c0 to %c9 step %c1 {
    %0 = fir.coordinate_of %arg0, %arg2, %arg3 : (!fir.ref<!fir.array<10x2xi32>>, index, index) -> !fir.ref<i32>
    %1 = fir.coordinate_of %arg1, %arg2, %arg3 : (!fir.ref<!fir.array<10x2xi32>>, index, index) -> !fir.ref<i32>
```

We'll obtain:
```mlir
fir.do_loop %arg2 = %c0 to %c1 step %c1 {
  fir.do_loop %arg3 = %c0 to %c9 step %c1 {
    %0 = fir.coordinate_of %arg0, %arg3, %arg2 : (!fir.ref<!fir.array<10x2xi32>>, index, index) -> !fir.ref<i32>
    %1 = fir.coordinate_of %arg1, %arg3, %arg2 : (!fir.ref<!fir.array<10x2xi32>>, index, index) -> !fir.ref<i32>
```

Added: 
    

Modified: 
    flang/lib/Lower/OpenACC.cpp
    flang/test/Lower/OpenACC/acc-reduction-unwrap-defaultbounds.f90
    flang/test/Lower/OpenACC/acc-reduction.f90

Removed: 
    


################################################################################
diff  --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 35edcb0926b69..7a84b21913bae 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -1575,7 +1575,7 @@ static void genCombiner(fir::FirOpBuilder &builder, mlir::Location loc,
     if (bounds.empty()) {
       llvm::SmallVector<mlir::Value> extents;
       mlir::Type idxTy = builder.getIndexType();
-      for (auto extent : seqTy.getShape()) {
+      for (auto extent : llvm::reverse(seqTy.getShape())) {
         mlir::Value lb = mlir::arith::ConstantOp::create(
             builder, loc, idxTy, builder.getIntegerAttr(idxTy, 0));
         mlir::Value ub = mlir::arith::ConstantOp::create(
@@ -1607,12 +1607,11 @@ static void genCombiner(fir::FirOpBuilder &builder, mlir::Location loc,
       }
     } else {
       // Lowerbound, upperbound and step are passed as block arguments.
-      [[maybe_unused]] unsigned nbRangeArgs =
+      unsigned nbRangeArgs =
           recipe.getCombinerRegion().getArguments().size() - 2;
       assert((nbRangeArgs / 3 == seqTy.getDimension()) &&
              "Expect 3 block arguments per dimension");
-      for (unsigned i = 2; i < recipe.getCombinerRegion().getArguments().size();
-           i += 3) {
+      for (int i = nbRangeArgs - 1; i >= 2; i -= 3) {
         mlir::Value lb = recipe.getCombinerRegion().getArgument(i);
         mlir::Value ub = recipe.getCombinerRegion().getArgument(i + 1);
         mlir::Value step = recipe.getCombinerRegion().getArgument(i + 2);
@@ -1623,8 +1622,11 @@ static void genCombiner(fir::FirOpBuilder &builder, mlir::Location loc,
         ivs.push_back(loop.getInductionVar());
       }
     }
-    auto addr1 = fir::CoordinateOp::create(builder, loc, refTy, value1, ivs);
-    auto addr2 = fir::CoordinateOp::create(builder, loc, refTy, value2, ivs);
+    llvm::SmallVector<mlir::Value> reversedIvs(ivs.rbegin(), ivs.rend());
+    auto addr1 =
+        fir::CoordinateOp::create(builder, loc, refTy, value1, reversedIvs);
+    auto addr2 =
+        fir::CoordinateOp::create(builder, loc, refTy, value2, reversedIvs);
     auto load1 = fir::LoadOp::create(builder, loc, addr1);
     auto load2 = fir::LoadOp::create(builder, loc, addr2);
     mlir::Value res =

diff  --git a/flang/test/Lower/OpenACC/acc-reduction-unwrap-defaultbounds.f90 b/flang/test/Lower/OpenACC/acc-reduction-unwrap-defaultbounds.f90
index 5bb751678ed53..02a152c9d7ae2 100644
--- a/flang/test/Lower/OpenACC/acc-reduction-unwrap-defaultbounds.f90
+++ b/flang/test/Lower/OpenACC/acc-reduction-unwrap-defaultbounds.f90
@@ -381,8 +381,8 @@
 ! CHECK:     %[[UB1:.*]] = arith.constant 99 : index
 ! CHECK:     %[[STEP1:.*]] = arith.constant 1 : index
 ! CHECK:     fir.do_loop %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
-! CHECK:       %[[COORD1:.*]] = fir.coordinate_of %[[ARG0:.*]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
-! CHECK:       %[[COORD2:.*]] = fir.coordinate_of %[[ARG1:.*]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
+! CHECK:       %[[COORD1:.*]] = fir.coordinate_of %[[ARG0:.*]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
+! CHECK:       %[[COORD2:.*]] = fir.coordinate_of %[[ARG1:.*]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
 ! CHECK:       %[[LOAD1:.*]] = fir.load %[[COORD1]] : !fir.ref<i32>
 ! CHECK:       %[[LOAD2:.*]] = fir.load %[[COORD2]] : !fir.ref<i32>
 ! CHECK:       %[[CMP:.*]] = arith.cmpi sgt, %[[LOAD1]], %[[LOAD2]] : i32
@@ -427,8 +427,8 @@
 ! CHECK:     %[[UB1:.*]] = arith.constant 99 : index
 ! CHECK:     %[[STEP1:.*]] = arith.constant 1 : index
 ! CHECK:     fir.do_loop %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
-! CHECK:       %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xf32>>, index, index) -> !fir.ref<f32>
-! CHECK:       %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xf32>>, index, index) -> !fir.ref<f32>
+! CHECK:       %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xf32>>, index, index) -> !fir.ref<f32>
+! CHECK:       %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xf32>>, index, index) -> !fir.ref<f32>
 ! CHECK:       %[[LOAD1:.*]] = fir.load %[[COORD1]] : !fir.ref<f32>
 ! CHECK:       %[[LOAD2:.*]] = fir.load %[[COORD2]] : !fir.ref<f32>
 ! CHECK:       %[[CMP:.*]] = arith.cmpf olt, %[[LOAD1]], %[[LOAD2]] {{.*}} : f32
@@ -612,8 +612,8 @@
 ! CHECK:       %[[UB2:.*]] = arith.constant 99 : index
 ! CHECK:       %[[STEP2:.*]] = arith.constant 1 : index
 ! CHECK:       fir.do_loop %[[IV2:.*]] = %[[LB2]] to %[[UB2]] step %[[STEP2]] {
-! CHECK:         %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV0]], %[[IV1]], %[[IV2]] : (!fir.ref<!fir.array<100x10x2xi32>>, index, index, index) -> !fir.ref<i32>
-! CHECK:         %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV0]], %[[IV1]], %[[IV2]] : (!fir.ref<!fir.array<100x10x2xi32>>, index, index, index) -> !fir.ref<i32>
+! CHECK:         %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV2]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10x2xi32>>, index, index, index) -> !fir.ref<i32>
+! CHECK:         %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV2]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10x2xi32>>, index, index, index) -> !fir.ref<i32>
 ! CHECK:         %[[LOAD1:.*]] = fir.load %[[COORD1]] : !fir.ref<i32>
 ! CHECK:         %[[LOAD2:.*]] = fir.load %[[COORD2]] : !fir.ref<i32>
 ! CHECK:         %[[COMBINED:.*]] = arith.addi %[[LOAD1]], %[[LOAD2]] : i32
@@ -641,8 +641,8 @@
 ! CHECK:     %[[UB1:.*]] = arith.constant 99 : index
 ! CHECK:     %[[STEP1:.*]] = arith.constant 1 : index
 ! CHECK:     fir.do_loop %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
-! CHECK:       %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
-! CHECK:       %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
+! CHECK:       %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
+! CHECK:       %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
 ! CHECK:       %[[LOAD1]] = fir.load %[[COORD1]] : !fir.ref<i32>
 ! CHECK:       %[[LOAD2]] = fir.load %[[COORD2]] : !fir.ref<i32>
 ! CHECK:       %[[COMBINED:.*]] = arith.addi %[[LOAD1]], %[[LOAD2]] : i32

diff  --git a/flang/test/Lower/OpenACC/acc-reduction.f90 b/flang/test/Lower/OpenACC/acc-reduction.f90
index 035b38b8a4da4..2a896c6b8d771 100644
--- a/flang/test/Lower/OpenACC/acc-reduction.f90
+++ b/flang/test/Lower/OpenACC/acc-reduction.f90
@@ -423,15 +423,15 @@
 ! CHECK: } combiner {
 ! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.array<100x10xi32>>, %[[ARG1:.*]]: !fir.ref<!fir.array<100x10xi32>>):
 ! CHECK:   %[[LB0:.*]] = arith.constant 0 : index
-! CHECK:   %[[UB0:.*]] = arith.constant 99 : index
+! CHECK:   %[[UB0:.*]] = arith.constant 9 : index
 ! CHECK:   %[[STEP0:.*]] = arith.constant 1 : index
 ! CHECK:   fir.do_loop %[[IV0:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] {
 ! CHECK:     %[[LB1:.*]] = arith.constant 0 : index
-! CHECK:     %[[UB1:.*]] = arith.constant 9 : index
+! CHECK:     %[[UB1:.*]] = arith.constant 99 : index
 ! CHECK:     %[[STEP1:.*]] = arith.constant 1 : index
 ! CHECK:     fir.do_loop %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
-! CHECK:       %[[COORD1:.*]] = fir.coordinate_of %[[ARG0:.*]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
-! CHECK:       %[[COORD2:.*]] = fir.coordinate_of %[[ARG1:.*]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
+! CHECK:       %[[COORD1:.*]] = fir.coordinate_of %[[ARG0:.*]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
+! CHECK:       %[[COORD2:.*]] = fir.coordinate_of %[[ARG1:.*]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
 ! CHECK:       %[[LOAD1:.*]] = fir.load %[[COORD1]] : !fir.ref<i32>
 ! CHECK:       %[[LOAD2:.*]] = fir.load %[[COORD2]] : !fir.ref<i32>
 ! CHECK:       %[[CMP:.*]] = arith.cmpi sgt, %[[LOAD1]], %[[LOAD2]] : i32
@@ -469,15 +469,15 @@
 ! CHECK: } combiner {
 ! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.array<100x10xf32>>, %[[ARG1:.*]]: !fir.ref<!fir.array<100x10xf32>>):
 ! CHECK:   %[[LB0:.*]] = arith.constant 0 : index
-! CHECK:   %[[UB0:.*]] = arith.constant 99 : index
+! CHECK:   %[[UB0:.*]] = arith.constant 9 : index
 ! CHECK:   %[[STEP0:.*]] = arith.constant 1 : index
 ! CHECK:   fir.do_loop %[[IV0:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] {
 ! CHECK:     %[[LB1:.*]] = arith.constant 0 : index
-! CHECK:     %[[UB1:.*]] = arith.constant 9 : index
+! CHECK:     %[[UB1:.*]] = arith.constant 99 : index
 ! CHECK:     %[[STEP1:.*]] = arith.constant 1 : index
 ! CHECK:     fir.do_loop %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
-! CHECK:       %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xf32>>, index, index) -> !fir.ref<f32>
-! CHECK:       %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xf32>>, index, index) -> !fir.ref<f32>
+! CHECK:       %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xf32>>, index, index) -> !fir.ref<f32>
+! CHECK:       %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xf32>>, index, index) -> !fir.ref<f32>
 ! CHECK:       %[[LOAD1:.*]] = fir.load %[[COORD1]] : !fir.ref<f32>
 ! CHECK:       %[[LOAD2:.*]] = fir.load %[[COORD2]] : !fir.ref<f32>
 ! CHECK:       %[[CMP:.*]] = arith.cmpf olt, %[[LOAD1]], %[[LOAD2]] {{.*}} : f32
@@ -650,7 +650,7 @@
 ! CHECK: } combiner {
 ! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.array<100x10x2xi32>>, %[[ARG1:.*]]: !fir.ref<!fir.array<100x10x2xi32>>):
 ! CHECK:   %[[LB0:.*]] = arith.constant 0 : index
-! CHECK:   %[[UB0:.*]] = arith.constant 99 : index
+! CHECK:   %[[UB0:.*]] = arith.constant 1 : index
 ! CHECK:   %[[STEP0:.*]] = arith.constant 1 : index
 ! CHECK:   fir.do_loop %[[IV0:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] {
 ! CHECK:     %[[LB1:.*]] = arith.constant 0 : index
@@ -658,11 +658,11 @@
 ! CHECK:     %[[STEP1:.*]] = arith.constant 1 : index
 ! CHECK:     fir.do_loop %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
 ! CHECK:       %[[LB2:.*]] = arith.constant 0 : index
-! CHECK:       %[[UB2:.*]] = arith.constant 1 : index
+! CHECK:       %[[UB2:.*]] = arith.constant 99 : index
 ! CHECK:       %[[STEP2:.*]] = arith.constant 1 : index
 ! CHECK:       fir.do_loop %[[IV2:.*]] = %[[LB2]] to %[[UB2]] step %[[STEP2]] {
-! CHECK:         %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV0]], %[[IV1]], %[[IV2]] : (!fir.ref<!fir.array<100x10x2xi32>>, index, index, index) -> !fir.ref<i32>
-! CHECK:         %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV0]], %[[IV1]], %[[IV2]] : (!fir.ref<!fir.array<100x10x2xi32>>, index, index, index) -> !fir.ref<i32>
+! CHECK:         %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV2]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10x2xi32>>, index, index, index) -> !fir.ref<i32>
+! CHECK:         %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV2]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10x2xi32>>, index, index, index) -> !fir.ref<i32>
 ! CHECK:         %[[LOAD1:.*]] = fir.load %[[COORD1]] : !fir.ref<i32>
 ! CHECK:         %[[LOAD2:.*]] = fir.load %[[COORD2]] : !fir.ref<i32>
 ! CHECK:         %[[COMBINED:.*]] = arith.addi %[[LOAD1]], %[[LOAD2]] : i32
@@ -683,15 +683,15 @@
 ! CHECK: } combiner {
 ! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.array<100x10xi32>>, %[[ARG1:.*]]: !fir.ref<!fir.array<100x10xi32>>):
 ! CHECK:   %[[LB0:.*]] = arith.constant 0 : index
-! CHECK:   %[[UB0:.*]] = arith.constant 99 : index
+! CHECK:   %[[UB0:.*]] = arith.constant 9 : index
 ! CHECK:   %[[STEP0:.*]] = arith.constant 1 : index
 ! CHECK:   fir.do_loop %[[IV0:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] {
 ! CHECK:     %[[LB1:.*]] = arith.constant 0 : index
-! CHECK:     %[[UB1:.*]] = arith.constant 9 : index
+! CHECK:     %[[UB1:.*]] = arith.constant 99 : index
 ! CHECK:     %[[STEP1:.*]] = arith.constant 1 : index
 ! CHECK:     fir.do_loop %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
-! CHECK:       %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
-! CHECK:       %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
+! CHECK:       %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
+! CHECK:       %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
 ! CHECK:       %[[LOAD1]] = fir.load %[[COORD1]] : !fir.ref<i32>
 ! CHECK:       %[[LOAD2]] = fir.load %[[COORD2]] : !fir.ref<i32>
 ! CHECK:       %[[COMBINED:.*]] = arith.addi %[[LOAD1]], %[[LOAD2]] : i32


        


More information about the flang-commits mailing list