[flang-commits] [flang] 9a81d85 - [flang][acc] Fix the indexing of the reduction combiner for multidimensional static arrays (#155536)
via flang-commits
flang-commits at lists.llvm.org
Tue Aug 26 21:27:02 PDT 2025
Author: khaki3
Date: 2025-08-26T21:26:58-07:00
New Revision: 9a81d853cee219af05ee58959ea9c5e9ac69980e
URL: https://github.com/llvm/llvm-project/commit/9a81d853cee219af05ee58959ea9c5e9ac69980e
DIFF: https://github.com/llvm/llvm-project/commit/9a81d853cee219af05ee58959ea9c5e9ac69980e.diff
LOG: [flang][acc] Fix the indexing of the reduction combiner for multidimensional static arrays (#155536)
In the following example of reducing a static 2D array, we have
incorrect coordinates for array access in the reduction combiner. This
PR reverses the order of the induction variables used for such array
indexing. For other cases of static arrays, we reverse the loop order as
well so that the innermost loop can handle the innermost dimension.
```Fortran
program main
implicit none
integer, parameter :: m = 2
integer, parameter :: n = 10
integer :: r(n,m), i
r = 0
!$acc parallel loop reduction(+:r(:n,:m))
do i = 1, n
r(i, 1) = i
enddo
print *, r
end program main
```
Currently, we have:
```mlir
fir.do_loop %arg2 = %c0 to %c1 step %c1 {
fir.do_loop %arg3 = %c0 to %c9 step %c1 {
%0 = fir.coordinate_of %arg0, %arg2, %arg3 : (!fir.ref<!fir.array<10x2xi32>>, index, index) -> !fir.ref<i32>
%1 = fir.coordinate_of %arg1, %arg2, %arg3 : (!fir.ref<!fir.array<10x2xi32>>, index, index) -> !fir.ref<i32>
```
We'll obtain:
```mlir
fir.do_loop %arg2 = %c0 to %c1 step %c1 {
fir.do_loop %arg3 = %c0 to %c9 step %c1 {
%0 = fir.coordinate_of %arg0, %arg3, %arg2 : (!fir.ref<!fir.array<10x2xi32>>, index, index) -> !fir.ref<i32>
%1 = fir.coordinate_of %arg1, %arg3, %arg2 : (!fir.ref<!fir.array<10x2xi32>>, index, index) -> !fir.ref<i32>
```
Added:
Modified:
flang/lib/Lower/OpenACC.cpp
flang/test/Lower/OpenACC/acc-reduction-unwrap-defaultbounds.f90
flang/test/Lower/OpenACC/acc-reduction.f90
Removed:
################################################################################
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 35edcb0926b69..7a84b21913bae 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -1575,7 +1575,7 @@ static void genCombiner(fir::FirOpBuilder &builder, mlir::Location loc,
if (bounds.empty()) {
llvm::SmallVector<mlir::Value> extents;
mlir::Type idxTy = builder.getIndexType();
- for (auto extent : seqTy.getShape()) {
+ for (auto extent : llvm::reverse(seqTy.getShape())) {
mlir::Value lb = mlir::arith::ConstantOp::create(
builder, loc, idxTy, builder.getIntegerAttr(idxTy, 0));
mlir::Value ub = mlir::arith::ConstantOp::create(
@@ -1607,12 +1607,11 @@ static void genCombiner(fir::FirOpBuilder &builder, mlir::Location loc,
}
} else {
// Lowerbound, upperbound and step are passed as block arguments.
- [[maybe_unused]] unsigned nbRangeArgs =
+ unsigned nbRangeArgs =
recipe.getCombinerRegion().getArguments().size() - 2;
assert((nbRangeArgs / 3 == seqTy.getDimension()) &&
"Expect 3 block arguments per dimension");
- for (unsigned i = 2; i < recipe.getCombinerRegion().getArguments().size();
- i += 3) {
+ for (int i = nbRangeArgs - 1; i >= 2; i -= 3) {
mlir::Value lb = recipe.getCombinerRegion().getArgument(i);
mlir::Value ub = recipe.getCombinerRegion().getArgument(i + 1);
mlir::Value step = recipe.getCombinerRegion().getArgument(i + 2);
@@ -1623,8 +1622,11 @@ static void genCombiner(fir::FirOpBuilder &builder, mlir::Location loc,
ivs.push_back(loop.getInductionVar());
}
}
- auto addr1 = fir::CoordinateOp::create(builder, loc, refTy, value1, ivs);
- auto addr2 = fir::CoordinateOp::create(builder, loc, refTy, value2, ivs);
+ llvm::SmallVector<mlir::Value> reversedIvs(ivs.rbegin(), ivs.rend());
+ auto addr1 =
+ fir::CoordinateOp::create(builder, loc, refTy, value1, reversedIvs);
+ auto addr2 =
+ fir::CoordinateOp::create(builder, loc, refTy, value2, reversedIvs);
auto load1 = fir::LoadOp::create(builder, loc, addr1);
auto load2 = fir::LoadOp::create(builder, loc, addr2);
mlir::Value res =
diff --git a/flang/test/Lower/OpenACC/acc-reduction-unwrap-defaultbounds.f90 b/flang/test/Lower/OpenACC/acc-reduction-unwrap-defaultbounds.f90
index 5bb751678ed53..02a152c9d7ae2 100644
--- a/flang/test/Lower/OpenACC/acc-reduction-unwrap-defaultbounds.f90
+++ b/flang/test/Lower/OpenACC/acc-reduction-unwrap-defaultbounds.f90
@@ -381,8 +381,8 @@
! CHECK: %[[UB1:.*]] = arith.constant 99 : index
! CHECK: %[[STEP1:.*]] = arith.constant 1 : index
! CHECK: fir.do_loop %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
-! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0:.*]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
-! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1:.*]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
+! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0:.*]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
+! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1:.*]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
! CHECK: %[[LOAD1:.*]] = fir.load %[[COORD1]] : !fir.ref<i32>
! CHECK: %[[LOAD2:.*]] = fir.load %[[COORD2]] : !fir.ref<i32>
! CHECK: %[[CMP:.*]] = arith.cmpi sgt, %[[LOAD1]], %[[LOAD2]] : i32
@@ -427,8 +427,8 @@
! CHECK: %[[UB1:.*]] = arith.constant 99 : index
! CHECK: %[[STEP1:.*]] = arith.constant 1 : index
! CHECK: fir.do_loop %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
-! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xf32>>, index, index) -> !fir.ref<f32>
-! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xf32>>, index, index) -> !fir.ref<f32>
+! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xf32>>, index, index) -> !fir.ref<f32>
+! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xf32>>, index, index) -> !fir.ref<f32>
! CHECK: %[[LOAD1:.*]] = fir.load %[[COORD1]] : !fir.ref<f32>
! CHECK: %[[LOAD2:.*]] = fir.load %[[COORD2]] : !fir.ref<f32>
! CHECK: %[[CMP:.*]] = arith.cmpf olt, %[[LOAD1]], %[[LOAD2]] {{.*}} : f32
@@ -612,8 +612,8 @@
! CHECK: %[[UB2:.*]] = arith.constant 99 : index
! CHECK: %[[STEP2:.*]] = arith.constant 1 : index
! CHECK: fir.do_loop %[[IV2:.*]] = %[[LB2]] to %[[UB2]] step %[[STEP2]] {
-! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV0]], %[[IV1]], %[[IV2]] : (!fir.ref<!fir.array<100x10x2xi32>>, index, index, index) -> !fir.ref<i32>
-! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV0]], %[[IV1]], %[[IV2]] : (!fir.ref<!fir.array<100x10x2xi32>>, index, index, index) -> !fir.ref<i32>
+! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV2]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10x2xi32>>, index, index, index) -> !fir.ref<i32>
+! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV2]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10x2xi32>>, index, index, index) -> !fir.ref<i32>
! CHECK: %[[LOAD1:.*]] = fir.load %[[COORD1]] : !fir.ref<i32>
! CHECK: %[[LOAD2:.*]] = fir.load %[[COORD2]] : !fir.ref<i32>
! CHECK: %[[COMBINED:.*]] = arith.addi %[[LOAD1]], %[[LOAD2]] : i32
@@ -641,8 +641,8 @@
! CHECK: %[[UB1:.*]] = arith.constant 99 : index
! CHECK: %[[STEP1:.*]] = arith.constant 1 : index
! CHECK: fir.do_loop %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
-! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
-! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
+! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
+! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
! CHECK: %[[LOAD1]] = fir.load %[[COORD1]] : !fir.ref<i32>
! CHECK: %[[LOAD2]] = fir.load %[[COORD2]] : !fir.ref<i32>
! CHECK: %[[COMBINED:.*]] = arith.addi %[[LOAD1]], %[[LOAD2]] : i32
diff --git a/flang/test/Lower/OpenACC/acc-reduction.f90 b/flang/test/Lower/OpenACC/acc-reduction.f90
index 035b38b8a4da4..2a896c6b8d771 100644
--- a/flang/test/Lower/OpenACC/acc-reduction.f90
+++ b/flang/test/Lower/OpenACC/acc-reduction.f90
@@ -423,15 +423,15 @@
! CHECK: } combiner {
! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.array<100x10xi32>>, %[[ARG1:.*]]: !fir.ref<!fir.array<100x10xi32>>):
! CHECK: %[[LB0:.*]] = arith.constant 0 : index
-! CHECK: %[[UB0:.*]] = arith.constant 99 : index
+! CHECK: %[[UB0:.*]] = arith.constant 9 : index
! CHECK: %[[STEP0:.*]] = arith.constant 1 : index
! CHECK: fir.do_loop %[[IV0:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] {
! CHECK: %[[LB1:.*]] = arith.constant 0 : index
-! CHECK: %[[UB1:.*]] = arith.constant 9 : index
+! CHECK: %[[UB1:.*]] = arith.constant 99 : index
! CHECK: %[[STEP1:.*]] = arith.constant 1 : index
! CHECK: fir.do_loop %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
-! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0:.*]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
-! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1:.*]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
+! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0:.*]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
+! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1:.*]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
! CHECK: %[[LOAD1:.*]] = fir.load %[[COORD1]] : !fir.ref<i32>
! CHECK: %[[LOAD2:.*]] = fir.load %[[COORD2]] : !fir.ref<i32>
! CHECK: %[[CMP:.*]] = arith.cmpi sgt, %[[LOAD1]], %[[LOAD2]] : i32
@@ -469,15 +469,15 @@
! CHECK: } combiner {
! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.array<100x10xf32>>, %[[ARG1:.*]]: !fir.ref<!fir.array<100x10xf32>>):
! CHECK: %[[LB0:.*]] = arith.constant 0 : index
-! CHECK: %[[UB0:.*]] = arith.constant 99 : index
+! CHECK: %[[UB0:.*]] = arith.constant 9 : index
! CHECK: %[[STEP0:.*]] = arith.constant 1 : index
! CHECK: fir.do_loop %[[IV0:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] {
! CHECK: %[[LB1:.*]] = arith.constant 0 : index
-! CHECK: %[[UB1:.*]] = arith.constant 9 : index
+! CHECK: %[[UB1:.*]] = arith.constant 99 : index
! CHECK: %[[STEP1:.*]] = arith.constant 1 : index
! CHECK: fir.do_loop %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
-! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xf32>>, index, index) -> !fir.ref<f32>
-! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xf32>>, index, index) -> !fir.ref<f32>
+! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xf32>>, index, index) -> !fir.ref<f32>
+! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xf32>>, index, index) -> !fir.ref<f32>
! CHECK: %[[LOAD1:.*]] = fir.load %[[COORD1]] : !fir.ref<f32>
! CHECK: %[[LOAD2:.*]] = fir.load %[[COORD2]] : !fir.ref<f32>
! CHECK: %[[CMP:.*]] = arith.cmpf olt, %[[LOAD1]], %[[LOAD2]] {{.*}} : f32
@@ -650,7 +650,7 @@
! CHECK: } combiner {
! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.array<100x10x2xi32>>, %[[ARG1:.*]]: !fir.ref<!fir.array<100x10x2xi32>>):
! CHECK: %[[LB0:.*]] = arith.constant 0 : index
-! CHECK: %[[UB0:.*]] = arith.constant 99 : index
+! CHECK: %[[UB0:.*]] = arith.constant 1 : index
! CHECK: %[[STEP0:.*]] = arith.constant 1 : index
! CHECK: fir.do_loop %[[IV0:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] {
! CHECK: %[[LB1:.*]] = arith.constant 0 : index
@@ -658,11 +658,11 @@
! CHECK: %[[STEP1:.*]] = arith.constant 1 : index
! CHECK: fir.do_loop %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
! CHECK: %[[LB2:.*]] = arith.constant 0 : index
-! CHECK: %[[UB2:.*]] = arith.constant 1 : index
+! CHECK: %[[UB2:.*]] = arith.constant 99 : index
! CHECK: %[[STEP2:.*]] = arith.constant 1 : index
! CHECK: fir.do_loop %[[IV2:.*]] = %[[LB2]] to %[[UB2]] step %[[STEP2]] {
-! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV0]], %[[IV1]], %[[IV2]] : (!fir.ref<!fir.array<100x10x2xi32>>, index, index, index) -> !fir.ref<i32>
-! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV0]], %[[IV1]], %[[IV2]] : (!fir.ref<!fir.array<100x10x2xi32>>, index, index, index) -> !fir.ref<i32>
+! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV2]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10x2xi32>>, index, index, index) -> !fir.ref<i32>
+! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV2]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10x2xi32>>, index, index, index) -> !fir.ref<i32>
! CHECK: %[[LOAD1:.*]] = fir.load %[[COORD1]] : !fir.ref<i32>
! CHECK: %[[LOAD2:.*]] = fir.load %[[COORD2]] : !fir.ref<i32>
! CHECK: %[[COMBINED:.*]] = arith.addi %[[LOAD1]], %[[LOAD2]] : i32
@@ -683,15 +683,15 @@
! CHECK: } combiner {
! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.array<100x10xi32>>, %[[ARG1:.*]]: !fir.ref<!fir.array<100x10xi32>>):
! CHECK: %[[LB0:.*]] = arith.constant 0 : index
-! CHECK: %[[UB0:.*]] = arith.constant 99 : index
+! CHECK: %[[UB0:.*]] = arith.constant 9 : index
! CHECK: %[[STEP0:.*]] = arith.constant 1 : index
! CHECK: fir.do_loop %[[IV0:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] {
! CHECK: %[[LB1:.*]] = arith.constant 0 : index
-! CHECK: %[[UB1:.*]] = arith.constant 9 : index
+! CHECK: %[[UB1:.*]] = arith.constant 99 : index
! CHECK: %[[STEP1:.*]] = arith.constant 1 : index
! CHECK: fir.do_loop %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
-! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
-! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
+! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
+! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV1]], %[[IV0]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
! CHECK: %[[LOAD1]] = fir.load %[[COORD1]] : !fir.ref<i32>
! CHECK: %[[LOAD2]] = fir.load %[[COORD2]] : !fir.ref<i32>
! CHECK: %[[COMBINED:.*]] = arith.addi %[[LOAD1]], %[[LOAD2]] : i32
More information about the flang-commits
mailing list