[flang-commits] [flang] d5a50ed - [flang][openacc] Add lowering support for multi-dimensional arrays reduction

Thu Jun 22 13:11:37 PDT 2023

Author: Valentin Clement
Date: 2023-06-22T13:11:29-07:00
New Revision: d5a50ed9bd3901bb54503b032b3bb6da01807d64

URL: https://github.com/llvm/llvm-project/commit/d5a50ed9bd3901bb54503b032b3bb6da01807d64
DIFF: https://github.com/llvm/llvm-project/commit/d5a50ed9bd3901bb54503b032b3bb6da01807d64.diff

LOG: [flang][openacc] Add lowering support for multi-dimensional arrays reduction

Lower multi-dimensional arrays reduction for add and mul operator.

Depends on D153448

Reviewed By: razvanlupusoru

Differential Revision: https://reviews.llvm.org/D153455

Added: 
    

Modified: 
    flang/lib/Lower/OpenACC.cpp
    flang/test/Lower/OpenACC/acc-reduction.f90

Removed: 
    


################################################################################
diff  --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index bc5ab46aca726..55365a77edf3b 100644

--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -640,31 +640,34 @@ static mlir::Value genCombiner(fir::FirOpBuilder &builder, mlir::Location loc,
   // Handle combiner on arrays.
   if (auto refTy = mlir::dyn_cast<fir::ReferenceType>(ty)) {
     if (auto seqTy = mlir::dyn_cast<fir::SequenceType>(refTy.getEleTy())) {
-      if (seqTy.getShape().size() > 1)
-        TODO(loc, "OpenACC reduction on array with more than one dimension");
       if (seqTy.hasDynamicExtents())
         TODO(loc, "OpenACC reduction on array with dynamic extents");
       mlir::Type idxTy = builder.getIndexType();
       mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy());
-      auto lb = builder.create<mlir::arith::ConstantOp>(
-          loc, idxTy, builder.getIntegerAttr(idxTy, 0));
-      auto ub = builder.create<mlir::arith::ConstantOp>(
-          loc, idxTy, builder.getIntegerAttr(idxTy, seqTy.getShape()[0] - 1));
-      auto step = builder.create<mlir::arith::ConstantOp>(
-          loc, idxTy, builder.getIntegerAttr(idxTy, 1));
-      auto loop = builder.create<fir::DoLoopOp>(loc, lb, ub, step,
-                                                /*unordered=*/false);
-      builder.setInsertionPointToStart(loop.getBody());
-      auto addr1 = builder.create<fir::CoordinateOp>(
-          loc, refTy, value1, mlir::ValueRange{loop.getInductionVar()});
-      auto addr2 = builder.create<fir::CoordinateOp>(
-          loc, refTy, value2, mlir::ValueRange{loop.getInductionVar()});
+
+      llvm::SmallVector<fir::DoLoopOp> loops;
+      llvm::SmallVector<mlir::Value> ivs;
+      for (auto ext : seqTy.getShape()) {
+        auto lb = builder.create<mlir::arith::ConstantOp>(
+            loc, idxTy, builder.getIntegerAttr(idxTy, 0));
+        auto ub = builder.create<mlir::arith::ConstantOp>(
+            loc, idxTy, builder.getIntegerAttr(idxTy, ext - 1));
+        auto step = builder.create<mlir::arith::ConstantOp>(
+            loc, idxTy, builder.getIntegerAttr(idxTy, 1));
+        auto loop = builder.create<fir::DoLoopOp>(loc, lb, ub, step,
+                                                  /*unordered=*/false);
+        builder.setInsertionPointToStart(loop.getBody());
+        loops.push_back(loop);
+        ivs.push_back(loop.getInductionVar());
+      }
+      auto addr1 = builder.create<fir::CoordinateOp>(loc, refTy, value1, ivs);
+      auto addr2 = builder.create<fir::CoordinateOp>(loc, refTy, value2, ivs);
       auto load1 = builder.create<fir::LoadOp>(loc, addr1);
       auto load2 = builder.create<fir::LoadOp>(loc, addr2);
       auto combined =
           genCombiner(builder, loc, op, seqTy.getEleTy(), load1, load2);
       builder.create<fir::StoreOp>(loc, combined, addr1);
-      builder.setInsertionPointAfter(loop);
+      builder.setInsertionPointAfter(loops[0]);
       return value1;
     }
   }

diff  --git a/flang/test/Lower/OpenACC/acc-reduction.f90 b/flang/test/Lower/OpenACC/acc-reduction.f90
index 88fec63552b90..13e4ad4bcb1c0 100644
--- a/flang/test/Lower/OpenACC/acc-reduction.f90
+++ b/flang/test/Lower/OpenACC/acc-reduction.f90
@@ -116,6 +116,61 @@
 ! CHECK:   acc.yield %[[COMBINED]] : f32
 ! CHECK: }
 
+! CHECK-LABEL: acc.reduction.recipe @reduction_add_ref_100x10x2xi32 : !fir.ref<!fir.array<100x10x2xi32>> reduction_operator <add> init {
+! CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.array<100x10x2xi32>>):
+! CHECK:   %[[CST:.*]] = arith.constant dense<0> : vector<100x10x2xi32>
+! CHECK:   acc.yield %[[CST]] : vector<100x10x2xi32>
+! CHECK: } combiner {
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.array<100x10x2xi32>>, %[[ARG1:.*]]: !fir.ref<!fir.array<100x10x2xi32>>):
+! CHECK:   %[[LB0:.*]] = arith.constant 0 : index
+! CHECK:   %[[UB0:.*]] = arith.constant 99 : index
+! CHECK:   %[[STEP0:.*]] = arith.constant 1 : index
+! CHECK:   fir.do_loop %[[IV0:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] {
+! CHECK:     %[[LB1:.*]] = arith.constant 0 : index
+! CHECK:     %[[UB1:.*]] = arith.constant 9 : index
+! CHECK:     %[[STEP1:.*]] = arith.constant 1 : index
+! CHECK:     fir.do_loop %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
+! CHECK:       %[[LB2:.*]] = arith.constant 0 : index
+! CHECK:       %[[UB2:.*]] = arith.constant 1 : index
+! CHECK:       %[[STEP2:.*]] = arith.constant 1 : index
+! CHECK:       fir.do_loop %[[IV2:.*]] = %[[LB2]] to %[[UB2]] step %[[STEP2]] {
+! CHECK:         %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV0]], %[[IV1]], %[[IV2]] : (!fir.ref<!fir.array<100x10x2xi32>>, index, index, index) -> !fir.ref<i32>
+! CHECK:         %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV0]], %[[IV1]], %[[IV2]] : (!fir.ref<!fir.array<100x10x2xi32>>, index, index, index) -> !fir.ref<i32>
+! CHECK:         %[[LOAD1:.*]] = fir.load %[[COORD1]] : !fir.ref<i32>
+! CHECK:         %[[LOAD2:.*]] = fir.load %[[COORD2]] : !fir.ref<i32>
+! CHECK:         %[[COMBINED:.*]] = arith.addi %[[LOAD1]], %[[LOAD2]] : i32
+! CHECK:         fir.store %[[COMBINED]] to %[[COORD1]] : !fir.ref<i32>
+! CHECK:       }
+! CHECK:     }
+! CHECK:   }
+! CHECK:   acc.yield %[[ARG0]] : !fir.ref<!fir.array<100x10x2xi32>>
+! CHECK: }
+
+! CHECK-LABEL: acc.reduction.recipe @reduction_add_ref_100x10xi32 : !fir.ref<!fir.array<100x10xi32>> reduction_operator <add> init {
+! CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.array<100x10xi32>>):
+! CHECK: %[[CST:.*]] = arith.constant dense<0> : vector<100x10xi32>
+! CHECK: acc.yield %[[CST]] : vector<100x10xi32>
+! CHECK: } combiner {
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref<!fir.array<100x10xi32>>, %[[ARG1:.*]]: !fir.ref<!fir.array<100x10xi32>>):
+! CHECK:   %[[LB0:.*]] = arith.constant 0 : index
+! CHECK:   %[[UB0:.*]] = arith.constant 99 : index
+! CHECK:   %[[STEP0:.*]] = arith.constant 1 : index
+! CHECK:   fir.do_loop %[[IV0:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] {
+! CHECK:     %[[LB1:.*]] = arith.constant 0 : index
+! CHECK:     %[[UB1:.*]] = arith.constant 9 : index
+! CHECK:     %[[STEP1:.*]] = arith.constant 1 : index
+! CHECK:     fir.do_loop %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] {
+! CHECK:       %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
+! CHECK:       %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV0]], %[[IV1]] : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
+! CHECK:       %[[LOAD1]] = fir.load %[[COORD1]] : !fir.ref<i32>
+! CHECK:       %[[LOAD2]] = fir.load %[[COORD2]] : !fir.ref<i32>
+! CHECK:       %[[COMBINED:.*]] = arith.addi %[[LOAD1]], %[[LOAD2]] : i32
+! CHECK:       fir.store %[[COMBINED]] to %[[COORD1]] : !fir.ref<i32>
+! CHECK:     }
+! CHECK:   }
+! CHECK:   acc.yield %[[ARG0]] : !fir.ref<!fir.array<100x10xi32>>
+! CHECK: }
+
 ! CHECK-LABEL: acc.reduction.recipe @reduction_add_ref_100xi32 : !fir.ref<!fir.array<100xi32>> reduction_operator <add> init {
 ! CHECK: ^bb0(%{{.*}}: !fir.ref<!fir.array<100xi32>>):
 ! CHECK:   %[[CST:.*]] = arith.constant dense<0> : vector<100xi32>
@@ -174,6 +229,42 @@ subroutine acc_reduction_add_int_array_1d(a, b)
 ! CHECK-SAME:  %{{.*}}: !fir.ref<!fir.array<100xi32>> {fir.bindc_name = "a"}, %[[B:.*]]: !fir.ref<!fir.array<100xi32>> {fir.bindc_name = "b"})
 ! CHECK:       acc.loop reduction(@reduction_add_ref_100xi32 -> %[[B]] : !fir.ref<!fir.array<100xi32>>)
 
+subroutine acc_reduction_add_int_array_2d(a, b)
+  integer :: a(100, 10), b(100, 10)
+  integer :: i, j
+
+  !$acc loop collapse(2) reduction(+:b)
+  do i = 1, 100
+    do j = 1, 10
+      b(i, j) = b(i, j) + a(i, j)
+    end do
+  end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPacc_reduction_add_int_array_2d(
+! CHECK-SAME:  %[[ARG0:.*]]: !fir.ref<!fir.array<100x10xi32>> {fir.bindc_name = "a"}, %[[ARG1:.*]]: !fir.ref<!fir.array<100x10xi32>> {fir.bindc_name = "b"}) {
+! CHECK:       acc.loop reduction(@reduction_add_ref_100x10xi32 -> %[[ARG1]] : !fir.ref<!fir.array<100x10xi32>>) {
+! CHECK: } attributes {collapse = 2 : i64}
+
+subroutine acc_reduction_add_int_array_3d(a, b)
+  integer :: a(100, 10, 2), b(100, 10, 2)
+  integer :: i, j, k
+
+  !$acc loop collapse(3) reduction(+:b)
+  do i = 1, 100
+    do j = 1, 10
+      do k = 1, 2
+        b(i, j, k) = b(i, j, k) + a(i, j, k)
+      end do
+    end do
+  end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPacc_reduction_add_int_array_3d(
+! CHECK-SAME: %{{.*}}: !fir.ref<!fir.array<100x10x2xi32>> {fir.bindc_name = "a"}, %[[ARG1:.*]]: !fir.ref<!fir.array<100x10x2xi32>> {fir.bindc_name = "b"})
+! CHECK: acc.loop reduction(@reduction_add_ref_100x10x2xi32 -> %[[ARG1]] : !fir.ref<!fir.array<100x10x2xi32>>)
+! CHECK: } attributes {collapse = 3 : i64}
+
 subroutine acc_reduction_add_float(a, b)
   real :: a(100), b
   integer :: i