[llvm-branch-commits] [flang] [flang][OpenMP] lower reductions of assumed shape arrays (PR #86982)

Thu Mar 28 10:22:48 PDT 2024

https://github.com/tblah created https://github.com/llvm/llvm-project/pull/86982

Patch 1: https://github.com/llvm/llvm-project/pull/86978
Patch 2: https://github.com/llvm/llvm-project/pull/86979

>From 9f68c844b6f4c4a52002cd9d90cd158b10e64bf2 Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Tue, 19 Mar 2024 15:41:59 +0000
Subject: [PATCH] [flang][OpenMP] lower reductions of assumed shape arrays

---
 flang/lib/Lower/OpenMP/ReductionProcessor.cpp | 25 +++++-
 .../wsloop-reduction-array-assumed-shape.f90  | 90 +++++++++++++++++++
 2 files changed, 112 insertions(+), 3 deletions(-)
 create mode 100644 flang/test/Lower/OpenMP/wsloop-reduction-array-assumed-shape.f90

diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
index 0d05ca5aee658b..afb1a6e7107641 100644
--- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
@@ -522,11 +522,16 @@ void ReductionProcessor::addDeclareReduction(
     if (reductionSymbols)
       reductionSymbols->push_back(symbol);
     mlir::Value symVal = converter.getSymbolAddress(*symbol);
-    auto redType = mlir::cast<fir::ReferenceType>(symVal.getType());
+    mlir::Type eleType;
+    auto refType = mlir::dyn_cast_or_null<fir::ReferenceType>(symVal.getType());
+    if (refType)
+      eleType = refType.getEleTy();
+    else
+      eleType = symVal.getType();
 
     // all arrays must be boxed so that we have convenient access to all the
     // information needed to iterate over the array
-    if (mlir::isa<fir::SequenceType>(redType.getEleTy())) {
+    if (mlir::isa<fir::SequenceType>(eleType)) {
       hlfir::Entity entity{symVal};
       entity = genVariableBox(currentLocation, builder, entity);
       mlir::Value box = entity.getBase();
@@ -538,11 +543,25 @@ void ReductionProcessor::addDeclareReduction(
       builder.create<fir::StoreOp>(currentLocation, box, alloca);
 
       symVal = alloca;
-      redType = mlir::cast<fir::ReferenceType>(symVal.getType());
+    } else if (mlir::isa<fir::BaseBoxType>(symVal.getType())) {
+      // boxed arrays are passed as values not by reference. Unfortunately,
+      // we can't pass a box by value to omp.redution_declare, so turn it
+      // into a reference
+
+      auto alloca =
+          builder.create<fir::AllocaOp>(currentLocation, symVal.getType());
+      builder.create<fir::StoreOp>(currentLocation, symVal, alloca);
+      symVal = alloca;
     } else if (auto declOp = symVal.getDefiningOp<hlfir::DeclareOp>()) {
       symVal = declOp.getBase();
     }
 
+    // this isn't the same as the by-val and by-ref passing later in the
+    // pipeline. Both styles assume that the variable is a reference at
+    // this point
+    assert(mlir::isa<fir::ReferenceType>(symVal.getType()) &&
+           "reduction input var is a reference");
+
     reductionVars.push_back(symVal);
   }
   const bool isByRef = doReductionByRef(reductionVars);
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-array-assumed-shape.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-array-assumed-shape.f90
new file mode 100644
index 00000000000000..a1f339faea5cd5
--- /dev/null
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-array-assumed-shape.f90
@@ -0,0 +1,90 @@
+! RUN: bbc -emit-hlfir -fopenmp -o - %s | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s | FileCheck %s
+
+program reduce_assumed_shape
+real(8), dimension(2) :: r
+r = 0
+call reduce(r)
+print *, r
+
+contains
+subroutine reduce(r)
+  implicit none
+  real(8),intent(inout) :: r(:)
+  integer :: i = 0
+
+  !$omp parallel do reduction(+:r)
+  do i=0,10
+    r(1) = i
+    r(2) = 1
+  enddo
+  !$omp end parallel do
+end subroutine
+end program
+
+! CHECK-LABEL:   omp.declare_reduction @add_reduction_byref_box_Uxf64 : !fir.ref<!fir.box<!fir.array<?xf64>>> init {
+! CHECK:         ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.array<?xf64>>>):
+! CHECK:           %[[VAL_1:.*]] = arith.constant 0.000000e+00 : f64
+! CHECK:           %[[VAL_2:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.array<?xf64>>>
+! CHECK:           %[[VAL_3:.*]] = arith.constant 0 : index
+! CHECK:           %[[VAL_4:.*]]:3 = fir.box_dims %[[VAL_2]], %[[VAL_3]] : (!fir.box<!fir.array<?xf64>>, index) -> (index, index, index)
+! CHECK:           %[[VAL_5:.*]] = fir.shape %[[VAL_4]]#1 : (index) -> !fir.shape<1>
+! CHECK:           %[[VAL_6:.*]] = fir.alloca !fir.array<?xf64>, %[[VAL_4]]#1 {bindc_name = ".tmp"}
+! CHECK:           %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]](%[[VAL_5]]) {uniq_name = ".tmp"} : (!fir.ref<!fir.array<?xf64>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xf64>>, !fir.ref<!fir.array<?xf64>>)
+! CHECK:           hlfir.assign %[[VAL_1]] to %[[VAL_7]]#0 : f64, !fir.box<!fir.array<?xf64>>
+! CHECK:           %[[VAL_8:.*]] = fir.alloca !fir.box<!fir.array<?xf64>>
+! CHECK:           fir.store %[[VAL_7]]#0 to %[[VAL_8]] : !fir.ref<!fir.box<!fir.array<?xf64>>>
+! CHECK:           omp.yield(%[[VAL_8]] : !fir.ref<!fir.box<!fir.array<?xf64>>>)
+
+! CHECK-LABEL:   } combiner {
+! CHECK:         ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.array<?xf64>>>, %[[VAL_1:.*]]: !fir.ref<!fir.box<!fir.array<?xf64>>>):
+! CHECK:           %[[VAL_2:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.array<?xf64>>>
+! CHECK:           %[[VAL_3:.*]] = fir.load %[[VAL_1]] : !fir.ref<!fir.box<!fir.array<?xf64>>>
+! CHECK:           %[[VAL_4:.*]] = arith.constant 0 : index
+! CHECK:           %[[VAL_5:.*]]:3 = fir.box_dims %[[VAL_2]], %[[VAL_4]] : (!fir.box<!fir.array<?xf64>>, index) -> (index, index, index)
+! CHECK:           %[[VAL_6:.*]] = fir.shape_shift %[[VAL_5]]#0, %[[VAL_5]]#1 : (index, index) -> !fir.shapeshift<1>
+! CHECK:           %[[VAL_7:.*]] = arith.constant 1 : index
+! CHECK:           fir.do_loop %[[VAL_8:.*]] = %[[VAL_7]] to %[[VAL_5]]#1 step %[[VAL_7]] unordered {
+! CHECK:             %[[VAL_9:.*]] = fir.array_coor %[[VAL_2]](%[[VAL_6]]) %[[VAL_8]] : (!fir.box<!fir.array<?xf64>>, !fir.shapeshift<1>, index) -> !fir.ref<f64>
+! CHECK:             %[[VAL_10:.*]] = fir.array_coor %[[VAL_3]](%[[VAL_6]]) %[[VAL_8]] : (!fir.box<!fir.array<?xf64>>, !fir.shapeshift<1>, index) -> !fir.ref<f64>
+! CHECK:             %[[VAL_11:.*]] = fir.load %[[VAL_9]] : !fir.ref<f64>
+! CHECK:             %[[VAL_12:.*]] = fir.load %[[VAL_10]] : !fir.ref<f64>
+! CHECK:             %[[VAL_13:.*]] = arith.addf %[[VAL_11]], %[[VAL_12]] fastmath<contract> : f64
+! CHECK:             fir.store %[[VAL_13]] to %[[VAL_9]] : !fir.ref<f64>
+! CHECK:           }
+! CHECK:           omp.yield(%[[VAL_0]] : !fir.ref<!fir.box<!fir.array<?xf64>>>)
+! CHECK:         }
+
+! CHECK-LABEL:   func.func private @_QFPreduce(
+! CHECK-SAME:                                  %[[VAL_0:.*]]: !fir.box<!fir.array<?xf64>> {fir.bindc_name = "r"}) attributes {{.*}} {
+! CHECK:           %[[VAL_1:.*]] = fir.address_of(@_QFFreduceEi) : !fir.ref<i32>
+! CHECK:           %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFFreduceEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]] {fortran_attrs = {{.*}}, uniq_name = "_QFFreduceEr"} : (!fir.box<!fir.array<?xf64>>) -> (!fir.box<!fir.array<?xf64>>, !fir.box<!fir.array<?xf64>>)
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_4:.*]] = fir.alloca i32 {adapt.valuebyref, pinned}
+! CHECK:             %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {uniq_name = "_QFFreduceEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:             %[[VAL_6:.*]] = arith.constant 0 : i32
+! CHECK:             %[[VAL_7:.*]] = arith.constant 10 : i32
+! CHECK:             %[[VAL_8:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_9:.*]] = fir.alloca !fir.box<!fir.array<?xf64>>
+! CHECK:             fir.store %[[VAL_3]]#1 to %[[VAL_9]] : !fir.ref<!fir.box<!fir.array<?xf64>>>
+! CHECK:             omp.wsloop byref reduction(@add_reduction_byref_box_Uxf64 %[[VAL_9]] -> %[[VAL_10:.*]] : !fir.ref<!fir.box<!fir.array<?xf64>>>)  for  (%[[VAL_11:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) {
+! CHECK:               fir.store %[[VAL_11]] to %[[VAL_5]]#1 : !fir.ref<i32>
+! CHECK:               %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {fortran_attrs = {{.*}}, uniq_name = "_QFFreduceEr"} : (!fir.ref<!fir.box<!fir.array<?xf64>>>) -> (!fir.ref<!fir.box<!fir.array<?xf64>>>, !fir.ref<!fir.box<!fir.array<?xf64>>>)
+! CHECK:               %[[VAL_13:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<i32>
+! CHECK:               %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (i32) -> f64
+! CHECK:               %[[VAL_15:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<!fir.box<!fir.array<?xf64>>>
+! CHECK:               %[[VAL_16:.*]] = arith.constant 1 : index
+! CHECK:               %[[VAL_17:.*]] = hlfir.designate %[[VAL_15]] (%[[VAL_16]])  : (!fir.box<!fir.array<?xf64>>, index) -> !fir.ref<f64>
+! CHECK:               hlfir.assign %[[VAL_14]] to %[[VAL_17]] : f64, !fir.ref<f64>
+! CHECK:               %[[VAL_18:.*]] = arith.constant 1.000000e+00 : f64
+! CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref<!fir.box<!fir.array<?xf64>>>
+! CHECK:               %[[VAL_20:.*]] = arith.constant 2 : index
+! CHECK:               %[[VAL_21:.*]] = hlfir.designate %[[VAL_19]] (%[[VAL_20]])  : (!fir.box<!fir.array<?xf64>>, index) -> !fir.ref<f64>
+! CHECK:               hlfir.assign %[[VAL_18]] to %[[VAL_21]] : f64, !fir.ref<f64>
+! CHECK:               omp.yield
+! CHECK:             }
+! CHECK:             omp.terminator
+! CHECK:           }
+! CHECK:           return
+! CHECK:         }