[flang-commits] [flang] [flang][OpenMP] support more reduction types for procedure designators (PR #96057)

Tom Eccles via flang-commits flang-commits at lists.llvm.org
Wed Jun 19 04:05:49 PDT 2024


https://github.com/tblah created https://github.com/llvm/llvm-project/pull/96057

This re-uses reduction declarations from intrinsic operators to add support for reductions of allocatables, pointers, and arrays with procedure designators (e.g. min/max).

I have split this into two commits to make it easier to review. The first one makes the functional change. The second cleans things up now that we can share much more code between intrinsic operators and procedure designators.

>From daef048e7647c30acd7007340a894f3b063bbf56 Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Wed, 12 Jun 2024 13:59:58 +0000
Subject: [PATCH 1/2] [flang][OpenMP] support more reduction types for
 procedure designators

This re-uses reduction declarations from intrinsic operators to add
support for reductions of allocatables, pointers, and arrays with
procedure designators (e.g. min/max).
---
 flang/lib/Lower/OpenMP/ReductionProcessor.cpp |  28 +-
 .../OpenMP/Todo/reduction-array-intrinsic.f90 |  11 -
 .../OpenMP/reduction-array-intrinsic.f90      |  96 ++++++
 ...oop-reduction-allocatable-array-minmax.f90 | 299 ++++++++++++++++++
 4 files changed, 413 insertions(+), 21 deletions(-)
 delete mode 100644 flang/test/Lower/OpenMP/Todo/reduction-array-intrinsic.f90
 create mode 100644 flang/test/Lower/OpenMP/reduction-array-intrinsic.f90
 create mode 100644 flang/test/Lower/OpenMP/wsloop-reduction-allocatable-array-minmax.f90

diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
index 237f9cdf22a1b..5c6a105b9b6ea 100644
--- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
@@ -709,8 +709,8 @@ void ReductionProcessor::addDeclareReduction(
     }
   }
 
-  // initial pass to collect all reduction vars so we can figure out if this
-  // should happen byref
+  // Reduction variable processing common to both intrinsic operators and
+  // procedure designators
   fir::FirOpBuilder &builder = converter.getFirOpBuilder();
   for (const Object &object : objectList) {
     const semantics::Symbol *symbol = object.sym();
@@ -808,15 +808,23 @@ void ReductionProcessor::addDeclareReduction(
       ReductionProcessor::ReductionIdentifier redId =
           ReductionProcessor::getReductionType(*reductionIntrinsic);
       for (auto [symVal, isByRef] : llvm::zip(reductionVars, reduceVarByRef)) {
+        // TODO: share code with above
         auto redType = mlir::cast<fir::ReferenceType>(symVal.getType());
-        if (!redType.getEleTy().isIntOrIndexOrFloat())
-          TODO(currentLocation,
-               "Reduction of some types is not supported for intrinsics");
-        decl = createDeclareReduction(
-            firOpBuilder,
-            getReductionName(getRealName(*reductionIntrinsic).ToString(),
-                             firOpBuilder.getKindMap(), redType, isByRef),
-            redId, redType, currentLocation, isByRef);
+        const auto &kindMap = firOpBuilder.getKindMap();
+        if (mlir::isa<fir::LogicalType>(redType.getEleTy()))
+          decl = createDeclareReduction(
+              firOpBuilder,
+              getReductionName(
+                  /*DIFFERENCE:*/ getRealName(*reductionIntrinsic).ToString(),
+                  kindMap, firOpBuilder.getI1Type(), isByRef),
+              redId, redType, currentLocation, isByRef);
+        else
+          decl = createDeclareReduction(
+              firOpBuilder,
+              getReductionName(
+                  /*DIFFERENCE:*/ getRealName(*reductionIntrinsic).ToString(),
+                  kindMap, redType, isByRef),
+              redId, redType, currentLocation, isByRef);
         reductionDeclSymbols.push_back(mlir::SymbolRefAttr::get(
             firOpBuilder.getContext(), decl.getSymName()));
       }
diff --git a/flang/test/Lower/OpenMP/Todo/reduction-array-intrinsic.f90 b/flang/test/Lower/OpenMP/Todo/reduction-array-intrinsic.f90
deleted file mode 100644
index 49c899238d2a3..0000000000000
--- a/flang/test/Lower/OpenMP/Todo/reduction-array-intrinsic.f90
+++ /dev/null
@@ -1,11 +0,0 @@
-! RUN: %not_todo_cmd bbc -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
-! RUN: %not_todo_cmd %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
-
-! CHECK: not yet implemented: Reduction of some types is not supported for intrinsics
-subroutine max_array_reduction(l, r)
-  integer :: l(:), r(:)
-
-  !$omp parallel reduction(max:l)
-    l = max(l, r)
-  !$omp end parallel
-end subroutine
diff --git a/flang/test/Lower/OpenMP/reduction-array-intrinsic.f90 b/flang/test/Lower/OpenMP/reduction-array-intrinsic.f90
new file mode 100644
index 0000000000000..208cda28a3e59
--- /dev/null
+++ b/flang/test/Lower/OpenMP/reduction-array-intrinsic.f90
@@ -0,0 +1,96 @@
+! RUN: bbc -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
+
+subroutine max_array_reduction(l, r)
+  integer :: l(:), r(:)
+
+  !$omp parallel reduction(max:l)
+    l = max(l, r)
+  !$omp end parallel
+end subroutine
+
+! CHECK-LABEL:   omp.declare_reduction @max_byref_box_Uxi32 : !fir.ref<!fir.box<!fir.array<?xi32>>> init {
+! CHECK:         ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.array<?xi32>>>):
+! CHECK:           %[[VAL_1:.*]] = arith.constant -2147483648 : i32
+! CHECK:           %[[VAL_2:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.array<?xi32>>>
+! CHECK:           %[[VAL_3:.*]] = fir.alloca !fir.box<!fir.array<?xi32>>
+! CHECK:           %[[VAL_4:.*]] = arith.constant 0 : index
+! CHECK:           %[[VAL_5:.*]]:3 = fir.box_dims %[[VAL_2]], %[[VAL_4]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+! CHECK:           %[[VAL_6:.*]] = fir.shape %[[VAL_5]]#1 : (index) -> !fir.shape<1>
+! CHECK:           %[[VAL_7:.*]] = fir.allocmem !fir.array<?xi32>, %[[VAL_5]]#1 {bindc_name = ".tmp", uniq_name = ""}
+! CHECK:           %[[VAL_8:.*]] = arith.constant true
+! CHECK:           %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_7]](%[[VAL_6]]) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.heap<!fir.array<?xi32>>)
+! CHECK:           %[[VAL_10:.*]] = arith.constant 0 : index
+! CHECK:           %[[VAL_11:.*]]:3 = fir.box_dims %[[VAL_2]], %[[VAL_10]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+! CHECK:           %[[VAL_12:.*]] = fir.shape_shift %[[VAL_11]]#0, %[[VAL_11]]#1 : (index, index) -> !fir.shapeshift<1>
+! CHECK:           %[[VAL_13:.*]] = fir.rebox %[[VAL_9]]#0(%[[VAL_12]]) : (!fir.box<!fir.array<?xi32>>, !fir.shapeshift<1>) -> !fir.box<!fir.array<?xi32>>
+! CHECK:           hlfir.assign %[[VAL_1]] to %[[VAL_13]] : i32, !fir.box<!fir.array<?xi32>>
+! CHECK:           fir.store %[[VAL_13]] to %[[VAL_3]] : !fir.ref<!fir.box<!fir.array<?xi32>>>
+! CHECK:           omp.yield(%[[VAL_3]] : !fir.ref<!fir.box<!fir.array<?xi32>>>)
+! CHECK-LABEL:   } combiner {
+! CHECK:         ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.array<?xi32>>>, %[[VAL_1:.*]]: !fir.ref<!fir.box<!fir.array<?xi32>>>):
+! CHECK:           %[[VAL_2:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.array<?xi32>>>
+! CHECK:           %[[VAL_3:.*]] = fir.load %[[VAL_1]] : !fir.ref<!fir.box<!fir.array<?xi32>>>
+! CHECK:           %[[VAL_4:.*]] = arith.constant 0 : index
+! CHECK:           %[[VAL_5:.*]]:3 = fir.box_dims %[[VAL_2]], %[[VAL_4]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+! CHECK:           %[[VAL_6:.*]] = fir.shape_shift %[[VAL_5]]#0, %[[VAL_5]]#1 : (index, index) -> !fir.shapeshift<1>
+! CHECK:           %[[VAL_7:.*]] = arith.constant 1 : index
+! CHECK:           fir.do_loop %[[VAL_8:.*]] = %[[VAL_7]] to %[[VAL_5]]#1 step %[[VAL_7]] unordered {
+! CHECK:             %[[VAL_9:.*]] = fir.array_coor %[[VAL_2]](%[[VAL_6]]) %[[VAL_8]] : (!fir.box<!fir.array<?xi32>>, !fir.shapeshift<1>, index) -> !fir.ref<i32>
+! CHECK:             %[[VAL_10:.*]] = fir.array_coor %[[VAL_3]](%[[VAL_6]]) %[[VAL_8]] : (!fir.box<!fir.array<?xi32>>, !fir.shapeshift<1>, index) -> !fir.ref<i32>
+! CHECK:             %[[VAL_11:.*]] = fir.load %[[VAL_9]] : !fir.ref<i32>
+! CHECK:             %[[VAL_12:.*]] = fir.load %[[VAL_10]] : !fir.ref<i32>
+! CHECK:             %[[VAL_13:.*]] = arith.maxsi %[[VAL_11]], %[[VAL_12]] : i32
+! CHECK:             fir.store %[[VAL_13]] to %[[VAL_9]] : !fir.ref<i32>
+! CHECK:           }
+! CHECK:           omp.yield(%[[VAL_0]] : !fir.ref<!fir.box<!fir.array<?xi32>>>)
+! CHECK-LABEL:   }  cleanup {
+! CHECK:         ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.array<?xi32>>>):
+! CHECK:           %[[VAL_1:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.array<?xi32>>>
+! CHECK:           %[[VAL_2:.*]] = fir.box_addr %[[VAL_1]] : (!fir.box<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>>
+! CHECK:           %[[VAL_3:.*]] = fir.convert %[[VAL_2]] : (!fir.ref<!fir.array<?xi32>>) -> i64
+! CHECK:           %[[VAL_4:.*]] = arith.constant 0 : i64
+! CHECK:           %[[VAL_5:.*]] = arith.cmpi ne, %[[VAL_3]], %[[VAL_4]] : i64
+! CHECK:           fir.if %[[VAL_5]] {
+! CHECK:             %[[VAL_6:.*]] = fir.convert %[[VAL_2]] : (!fir.ref<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>>
+! CHECK:             fir.freemem %[[VAL_6]] : !fir.heap<!fir.array<?xi32>>
+! CHECK:           }
+! CHECK:           omp.yield
+! CHECK:         }
+
+! CHECK-LABEL:   func.func @_QPmax_array_reduction(
+! CHECK-SAME:                                      %[[VAL_0:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "l"},
+! CHECK-SAME:                                      %[[VAL_1:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "r"}) {
+! CHECK:           %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK:           %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[VAL_2]] {uniq_name = "_QFmax_array_reductionEl"} : (!fir.box<!fir.array<?xi32>>, !fir.dscope) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+! CHECK:           %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_1]] dummy_scope %[[VAL_2]] {uniq_name = "_QFmax_array_reductionEr"} : (!fir.box<!fir.array<?xi32>>, !fir.dscope) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+! CHECK:           %[[VAL_5:.*]] = fir.alloca !fir.box<!fir.array<?xi32>>
+! CHECK:           fir.store %[[VAL_3]]#1 to %[[VAL_5]] : !fir.ref<!fir.box<!fir.array<?xi32>>>
+! CHECK:           omp.parallel reduction(byref @max_byref_box_Uxi32 %[[VAL_5]] -> %[[VAL_6:.*]] : !fir.ref<!fir.box<!fir.array<?xi32>>>) {
+! CHECK:             %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFmax_array_reductionEl"} : (!fir.ref<!fir.box<!fir.array<?xi32>>>) -> (!fir.ref<!fir.box<!fir.array<?xi32>>>, !fir.ref<!fir.box<!fir.array<?xi32>>>)
+! CHECK:             %[[VAL_8:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<!fir.box<!fir.array<?xi32>>>
+! CHECK:             %[[VAL_9:.*]] = arith.constant 0 : index
+! CHECK:             %[[VAL_10:.*]]:3 = fir.box_dims %[[VAL_8]], %[[VAL_9]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+! CHECK:             %[[VAL_11:.*]] = fir.shape %[[VAL_10]]#1 : (index) -> !fir.shape<1>
+! CHECK:             %[[VAL_12:.*]] = hlfir.elemental %[[VAL_11]] unordered : (!fir.shape<1>) -> !hlfir.expr<?xi32> {
+! CHECK:             ^bb0(%[[VAL_13:.*]]: index):
+! CHECK:               %[[VAL_14:.*]] = arith.constant 0 : index
+! CHECK:               %[[VAL_15:.*]]:3 = fir.box_dims %[[VAL_8]], %[[VAL_14]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+! CHECK:               %[[VAL_16:.*]] = arith.constant 1 : index
+! CHECK:               %[[VAL_17:.*]] = arith.subi %[[VAL_15]]#0, %[[VAL_16]] : index
+! CHECK:               %[[VAL_18:.*]] = arith.addi %[[VAL_13]], %[[VAL_17]] : index
+! CHECK:               %[[VAL_19:.*]] = hlfir.designate %[[VAL_8]] (%[[VAL_18]])  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+! CHECK:               %[[VAL_20:.*]] = fir.load %[[VAL_19]] : !fir.ref<i32>
+! CHECK:               %[[VAL_21:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_13]])  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+! CHECK:               %[[VAL_22:.*]] = fir.load %[[VAL_21]] : !fir.ref<i32>
+! CHECK:               %[[VAL_23:.*]] = arith.cmpi sgt, %[[VAL_20]], %[[VAL_22]] : i32
+! CHECK:               %[[VAL_24:.*]] = arith.select %[[VAL_23]], %[[VAL_20]], %[[VAL_22]] : i32
+! CHECK:               hlfir.yield_element %[[VAL_24]] : i32
+! CHECK:             }
+! CHECK:             %[[VAL_25:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<!fir.box<!fir.array<?xi32>>>
+! CHECK:             hlfir.assign %[[VAL_12]] to %[[VAL_25]] : !hlfir.expr<?xi32>, !fir.box<!fir.array<?xi32>>
+! CHECK:             hlfir.destroy %[[VAL_12]] : !hlfir.expr<?xi32>
+! CHECK:             omp.terminator
+! CHECK:           }
+! CHECK:           return
+! CHECK:         }
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-allocatable-array-minmax.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-allocatable-array-minmax.f90
new file mode 100644
index 0000000000000..bc22c8b05b967
--- /dev/null
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-allocatable-array-minmax.f90
@@ -0,0 +1,299 @@
+! RUN: bbc -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
+program reduce15
+  integer, parameter :: SIZE = 10
+  integer, dimension(:), allocatable :: arr,maxes,mins
+  integer :: i
+
+  allocate(arr(10))
+  allocate(maxes(10))
+  allocate(mins(10))
+
+  maxes = 5
+  mins = 5
+  do i = 1,SIZE
+    arr(i) = i
+  end do
+
+  !$omp parallel do reduction(max:maxes)
+  do i = 1,SIZE
+    maxes = max(arr, maxes)
+  end do
+  !$omp end parallel do
+
+
+  !$omp parallel do reduction(min:mins)
+  do i = 1,SIZE
+    mins = min(arr, mins)
+  end do
+  !$omp end parallel do
+
+  print *,"max: ", maxes
+  print *,"min: ", mins
+end program
+
+! CHECK-LABEL:   omp.declare_reduction @min_byref_box_heap_Uxi32 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> init {
+! CHECK:         ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>):
+! CHECK:           %[[VAL_1:.*]] = arith.constant 2147483647 : i32
+! CHECK:           %[[VAL_2:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK:           %[[VAL_3:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xi32>>>
+! CHECK:           %[[VAL_4:.*]] = fir.box_addr %[[VAL_2]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
+! CHECK:           %[[VAL_5:.*]] = fir.convert %[[VAL_4]] : (!fir.heap<!fir.array<?xi32>>) -> i64
+! CHECK:           %[[VAL_6:.*]] = arith.constant 0 : i64
+! CHECK:           %[[VAL_7:.*]] = arith.cmpi eq, %[[VAL_5]], %[[VAL_6]] : i64
+! CHECK:           fir.if %[[VAL_7]] {
+! CHECK:             %[[VAL_8:.*]] = fir.embox %[[VAL_4]] : (!fir.heap<!fir.array<?xi32>>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
+! CHECK:             fir.store %[[VAL_8]] to %[[VAL_3]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK:           } else {
+! CHECK:             %[[VAL_9:.*]] = arith.constant 0 : index
+! CHECK:             %[[VAL_10:.*]]:3 = fir.box_dims %[[VAL_2]], %[[VAL_9]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+! CHECK:             %[[VAL_11:.*]] = fir.shape %[[VAL_10]]#1 : (index) -> !fir.shape<1>
+! CHECK:             %[[VAL_12:.*]] = fir.allocmem !fir.array<?xi32>, %[[VAL_10]]#1 {bindc_name = ".tmp", uniq_name = ""}
+! CHECK:             %[[VAL_13:.*]] = arith.constant true
+! CHECK:             %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]](%[[VAL_11]]) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.heap<!fir.array<?xi32>>)
+! CHECK:             %[[VAL_15:.*]] = arith.constant 0 : index
+! CHECK:             %[[VAL_16:.*]]:3 = fir.box_dims %[[VAL_2]], %[[VAL_15]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+! CHECK:             %[[VAL_17:.*]] = fir.shape_shift %[[VAL_16]]#0, %[[VAL_16]]#1 : (index, index) -> !fir.shapeshift<1>
+! CHECK:             %[[VAL_18:.*]] = fir.rebox %[[VAL_14]]#0(%[[VAL_17]]) : (!fir.box<!fir.array<?xi32>>, !fir.shapeshift<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
+! CHECK:             hlfir.assign %[[VAL_1]] to %[[VAL_18]] : i32, !fir.box<!fir.heap<!fir.array<?xi32>>>
+! CHECK:             fir.store %[[VAL_18]] to %[[VAL_3]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK:           }
+! CHECK:           omp.yield(%[[VAL_3]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+! CHECK-LABEL:   } combiner {
+! CHECK:         ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, %[[VAL_1:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>):
+! CHECK:           %[[VAL_2:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK:           %[[VAL_3:.*]] = fir.load %[[VAL_1]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK:           %[[VAL_4:.*]] = arith.constant 0 : index
+! CHECK:           %[[VAL_5:.*]]:3 = fir.box_dims %[[VAL_2]], %[[VAL_4]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+! CHECK:           %[[VAL_6:.*]] = fir.shape_shift %[[VAL_5]]#0, %[[VAL_5]]#1 : (index, index) -> !fir.shapeshift<1>
+! CHECK:           %[[VAL_7:.*]] = arith.constant 1 : index
+! CHECK:           fir.do_loop %[[VAL_8:.*]] = %[[VAL_7]] to %[[VAL_5]]#1 step %[[VAL_7]] unordered {
+! CHECK:             %[[VAL_9:.*]] = fir.array_coor %[[VAL_2]](%[[VAL_6]]) %[[VAL_8]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, !fir.shapeshift<1>, index) -> !fir.ref<i32>
+! CHECK:             %[[VAL_10:.*]] = fir.array_coor %[[VAL_3]](%[[VAL_6]]) %[[VAL_8]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, !fir.shapeshift<1>, index) -> !fir.ref<i32>
+! CHECK:             %[[VAL_11:.*]] = fir.load %[[VAL_9]] : !fir.ref<i32>
+! CHECK:             %[[VAL_12:.*]] = fir.load %[[VAL_10]] : !fir.ref<i32>
+! CHECK:             %[[VAL_13:.*]] = arith.minsi %[[VAL_11]], %[[VAL_12]] : i32
+! CHECK:             fir.store %[[VAL_13]] to %[[VAL_9]] : !fir.ref<i32>
+! CHECK:           }
+! CHECK:           omp.yield(%[[VAL_0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+! CHECK-LABEL:   }  cleanup {
+! CHECK:         ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>):
+! CHECK:           %[[VAL_1:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK:           %[[VAL_2:.*]] = fir.box_addr %[[VAL_1]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
+! CHECK:           %[[VAL_3:.*]] = fir.convert %[[VAL_2]] : (!fir.heap<!fir.array<?xi32>>) -> i64
+! CHECK:           %[[VAL_4:.*]] = arith.constant 0 : i64
+! CHECK:           %[[VAL_5:.*]] = arith.cmpi ne, %[[VAL_3]], %[[VAL_4]] : i64
+! CHECK:           fir.if %[[VAL_5]] {
+! CHECK:             fir.freemem %[[VAL_2]] : !fir.heap<!fir.array<?xi32>>
+! CHECK:           }
+! CHECK:           omp.yield
+! CHECK:         }
+
+! CHECK-LABEL:   omp.declare_reduction @max_byref_box_heap_Uxi32 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> init {
+! CHECK:         ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>):
+! CHECK:           %[[VAL_1:.*]] = arith.constant -2147483648 : i32
+! CHECK:           %[[VAL_2:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK:           %[[VAL_3:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xi32>>>
+! CHECK:           %[[VAL_4:.*]] = fir.box_addr %[[VAL_2]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
+! CHECK:           %[[VAL_5:.*]] = fir.convert %[[VAL_4]] : (!fir.heap<!fir.array<?xi32>>) -> i64
+! CHECK:           %[[VAL_6:.*]] = arith.constant 0 : i64
+! CHECK:           %[[VAL_7:.*]] = arith.cmpi eq, %[[VAL_5]], %[[VAL_6]] : i64
+! CHECK:           fir.if %[[VAL_7]] {
+! CHECK:             %[[VAL_8:.*]] = fir.embox %[[VAL_4]] : (!fir.heap<!fir.array<?xi32>>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
+! CHECK:             fir.store %[[VAL_8]] to %[[VAL_3]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK:           } else {
+! CHECK:             %[[VAL_9:.*]] = arith.constant 0 : index
+! CHECK:             %[[VAL_10:.*]]:3 = fir.box_dims %[[VAL_2]], %[[VAL_9]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+! CHECK:             %[[VAL_11:.*]] = fir.shape %[[VAL_10]]#1 : (index) -> !fir.shape<1>
+! CHECK:             %[[VAL_12:.*]] = fir.allocmem !fir.array<?xi32>, %[[VAL_10]]#1 {bindc_name = ".tmp", uniq_name = ""}
+! CHECK:             %[[VAL_13:.*]] = arith.constant true
+! CHECK:             %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]](%[[VAL_11]]) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.heap<!fir.array<?xi32>>)
+! CHECK:             %[[VAL_15:.*]] = arith.constant 0 : index
+! CHECK:             %[[VAL_16:.*]]:3 = fir.box_dims %[[VAL_2]], %[[VAL_15]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+! CHECK:             %[[VAL_17:.*]] = fir.shape_shift %[[VAL_16]]#0, %[[VAL_16]]#1 : (index, index) -> !fir.shapeshift<1>
+! CHECK:             %[[VAL_18:.*]] = fir.rebox %[[VAL_14]]#0(%[[VAL_17]]) : (!fir.box<!fir.array<?xi32>>, !fir.shapeshift<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
+! CHECK:             hlfir.assign %[[VAL_1]] to %[[VAL_18]] : i32, !fir.box<!fir.heap<!fir.array<?xi32>>>
+! CHECK:             fir.store %[[VAL_18]] to %[[VAL_3]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK:           }
+! CHECK:           omp.yield(%[[VAL_3]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+! CHECK-LABEL:   } combiner {
+! CHECK:         ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, %[[VAL_1:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>):
+! CHECK:           %[[VAL_2:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK:           %[[VAL_3:.*]] = fir.load %[[VAL_1]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK:           %[[VAL_4:.*]] = arith.constant 0 : index
+! CHECK:           %[[VAL_5:.*]]:3 = fir.box_dims %[[VAL_2]], %[[VAL_4]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+! CHECK:           %[[VAL_6:.*]] = fir.shape_shift %[[VAL_5]]#0, %[[VAL_5]]#1 : (index, index) -> !fir.shapeshift<1>
+! CHECK:           %[[VAL_7:.*]] = arith.constant 1 : index
+! CHECK:           fir.do_loop %[[VAL_8:.*]] = %[[VAL_7]] to %[[VAL_5]]#1 step %[[VAL_7]] unordered {
+! CHECK:             %[[VAL_9:.*]] = fir.array_coor %[[VAL_2]](%[[VAL_6]]) %[[VAL_8]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, !fir.shapeshift<1>, index) -> !fir.ref<i32>
+! CHECK:             %[[VAL_10:.*]] = fir.array_coor %[[VAL_3]](%[[VAL_6]]) %[[VAL_8]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, !fir.shapeshift<1>, index) -> !fir.ref<i32>
+! CHECK:             %[[VAL_11:.*]] = fir.load %[[VAL_9]] : !fir.ref<i32>
+! CHECK:             %[[VAL_12:.*]] = fir.load %[[VAL_10]] : !fir.ref<i32>
+! CHECK:             %[[VAL_13:.*]] = arith.maxsi %[[VAL_11]], %[[VAL_12]] : i32
+! CHECK:             fir.store %[[VAL_13]] to %[[VAL_9]] : !fir.ref<i32>
+! CHECK:           }
+! CHECK:           omp.yield(%[[VAL_0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+! CHECK-LABEL:   }  cleanup {
+! CHECK:         ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>):
+! CHECK:           %[[VAL_1:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK:           %[[VAL_2:.*]] = fir.box_addr %[[VAL_1]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
+! CHECK:           %[[VAL_3:.*]] = fir.convert %[[VAL_2]] : (!fir.heap<!fir.array<?xi32>>) -> i64
+! CHECK:           %[[VAL_4:.*]] = arith.constant 0 : i64
+! CHECK:           %[[VAL_5:.*]] = arith.cmpi ne, %[[VAL_3]], %[[VAL_4]] : i64
+! CHECK:           fir.if %[[VAL_5]] {
+! CHECK:             fir.freemem %[[VAL_2]] : !fir.heap<!fir.array<?xi32>>
+! CHECK:           }
+! CHECK:           omp.yield
+! CHECK:         }
+
+! CHECK-LABEL:   func.func @_QQmain() attributes {fir.bindc_name = "reduce15"} {
+! CHECK:           %[[VAL_0:.*]] = fir.address_of(@_QFEarr) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK:           %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {fortran_attrs = {{.*}}<allocatable>, uniq_name = "_QFEarr"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+! CHECK:           %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFEi"}
+! CHECK:           %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_4:.*]] = fir.address_of(@_QFEmaxes) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK:           %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {fortran_attrs = {{.*}}<allocatable>, uniq_name = "_QFEmaxes"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+! CHECK:           %[[VAL_6:.*]] = fir.address_of(@_QFEmins) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK:           %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {fortran_attrs = {{.*}}<allocatable>, uniq_name = "_QFEmins"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+! CHECK:           %[[VAL_8:.*]] = fir.address_of(@_QFECsize) : !fir.ref<i32>
+! CHECK:           %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_8]] {fortran_attrs = {{.*}}<parameter>, uniq_name = "_QFECsize"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:           %[[VAL_10:.*]] = arith.constant 10 : i32
+! CHECK:           %[[VAL_11:.*]] = fir.convert %[[VAL_10]] : (i32) -> index
+! CHECK:           %[[VAL_12:.*]] = arith.constant 0 : index
+! CHECK:           %[[VAL_13:.*]] = arith.cmpi sgt, %[[VAL_11]], %[[VAL_12]] : index
+! CHECK:           %[[VAL_14:.*]] = arith.select %[[VAL_13]], %[[VAL_11]], %[[VAL_12]] : index
+! CHECK:           %[[VAL_15:.*]] = fir.allocmem !fir.array<?xi32>, %[[VAL_14]] {fir.must_be_heap = true, uniq_name = "_QFEarr.alloc"}
+! CHECK:           %[[VAL_16:.*]] = fir.shape %[[VAL_14]] : (index) -> !fir.shape<1>
+! CHECK:           %[[VAL_17:.*]] = fir.embox %[[VAL_15]](%[[VAL_16]]) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
+! CHECK:           fir.store %[[VAL_17]] to %[[VAL_1]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK:           %[[VAL_18:.*]] = arith.constant 10 : i32
+! CHECK:           %[[VAL_19:.*]] = fir.convert %[[VAL_18]] : (i32) -> index
+! CHECK:           %[[VAL_20:.*]] = arith.constant 0 : index
+! CHECK:           %[[VAL_21:.*]] = arith.cmpi sgt, %[[VAL_19]], %[[VAL_20]] : index
+! CHECK:           %[[VAL_22:.*]] = arith.select %[[VAL_21]], %[[VAL_19]], %[[VAL_20]] : index
+! CHECK:           %[[VAL_23:.*]] = fir.allocmem !fir.array<?xi32>, %[[VAL_22]] {fir.must_be_heap = true, uniq_name = "_QFEmaxes.alloc"}
+! CHECK:           %[[VAL_24:.*]] = fir.shape %[[VAL_22]] : (index) -> !fir.shape<1>
+! CHECK:           %[[VAL_25:.*]] = fir.embox %[[VAL_23]](%[[VAL_24]]) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
+! CHECK:           fir.store %[[VAL_25]] to %[[VAL_5]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK:           %[[VAL_26:.*]] = arith.constant 10 : i32
+! CHECK:           %[[VAL_27:.*]] = fir.convert %[[VAL_26]] : (i32) -> index
+! CHECK:           %[[VAL_28:.*]] = arith.constant 0 : index
+! CHECK:           %[[VAL_29:.*]] = arith.cmpi sgt, %[[VAL_27]], %[[VAL_28]] : index
+! CHECK:           %[[VAL_30:.*]] = arith.select %[[VAL_29]], %[[VAL_27]], %[[VAL_28]] : index
+! CHECK:           %[[VAL_31:.*]] = fir.allocmem !fir.array<?xi32>, %[[VAL_30]] {fir.must_be_heap = true, uniq_name = "_QFEmins.alloc"}
+! CHECK:           %[[VAL_32:.*]] = fir.shape %[[VAL_30]] : (index) -> !fir.shape<1>
+! CHECK:           %[[VAL_33:.*]] = fir.embox %[[VAL_31]](%[[VAL_32]]) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
+! CHECK:           fir.store %[[VAL_33]] to %[[VAL_7]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK:           %[[VAL_34:.*]] = arith.constant 5 : i32
+! CHECK:           hlfir.assign %[[VAL_34]] to %[[VAL_5]]#0 realloc : i32, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK:           %[[VAL_35:.*]] = arith.constant 5 : i32
+! CHECK:           hlfir.assign %[[VAL_35]] to %[[VAL_7]]#0 realloc : i32, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK:           %[[VAL_36:.*]] = arith.constant 1 : i32
+! CHECK:           %[[VAL_37:.*]] = fir.convert %[[VAL_36]] : (i32) -> index
+! CHECK:           %[[VAL_38:.*]] = arith.constant 10 : i32
+! CHECK:           %[[VAL_39:.*]] = fir.convert %[[VAL_38]] : (i32) -> index
+! CHECK:           %[[VAL_40:.*]] = arith.constant 1 : index
+! CHECK:           %[[VAL_41:.*]] = fir.convert %[[VAL_37]] : (index) -> i32
+! CHECK:           %[[VAL_42:.*]]:2 = fir.do_loop %[[VAL_43:.*]] = %[[VAL_37]] to %[[VAL_39]] step %[[VAL_40]] iter_args(%[[VAL_44:.*]] = %[[VAL_41]]) -> (index, i32) {
+! CHECK:             fir.store %[[VAL_44]] to %[[VAL_3]]#1 : !fir.ref<i32>
+! CHECK:             %[[VAL_45:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref<i32>
+! CHECK:             %[[VAL_46:.*]] = fir.load %[[VAL_1]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK:             %[[VAL_47:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref<i32>
+! CHECK:             %[[VAL_48:.*]] = fir.convert %[[VAL_47]] : (i32) -> i64
+! CHECK:             %[[VAL_49:.*]] = hlfir.designate %[[VAL_46]] (%[[VAL_48]])  : (!fir.box<!fir.heap<!fir.array<?xi32>>>, i64) -> !fir.ref<i32>
+! CHECK:             hlfir.assign %[[VAL_45]] to %[[VAL_49]] : i32, !fir.ref<i32>
+! CHECK:             %[[VAL_50:.*]] = arith.addi %[[VAL_43]], %[[VAL_40]] : index
+! CHECK:             %[[VAL_51:.*]] = fir.convert %[[VAL_40]] : (index) -> i32
+! CHECK:             %[[VAL_52:.*]] = fir.load %[[VAL_3]]#1 : !fir.ref<i32>
+! CHECK:             %[[VAL_53:.*]] = arith.addi %[[VAL_52]], %[[VAL_51]] : i32
+! CHECK:             fir.result %[[VAL_50]], %[[VAL_53]] : index, i32
+! CHECK:           }
+! CHECK:           fir.store %[[VAL_54:.*]]#1 to %[[VAL_3]]#1 : !fir.ref<i32>
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_55:.*]] = fir.alloca i32 {bindc_name = "i", pinned, uniq_name = "_QFEi"}
+! CHECK:             %[[VAL_56:.*]]:2 = hlfir.declare %[[VAL_55]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:             %[[VAL_57:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_58:.*]] = arith.constant 10 : i32
+! CHECK:             %[[VAL_59:.*]] = arith.constant 1 : i32
+! CHECK:             omp.wsloop reduction(byref @max_byref_box_heap_Uxi32 %[[VAL_5]]#0 -> %[[VAL_60:.*]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) {
+! CHECK:               omp.loop_nest (%[[VAL_61:.*]]) : i32 = (%[[VAL_57]]) to (%[[VAL_58]]) inclusive step (%[[VAL_59]]) {
+! CHECK:                 %[[VAL_62:.*]]:2 = hlfir.declare %[[VAL_60]] {fortran_attrs = {{.*}}<allocatable>, uniq_name = "_QFEmaxes"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+! CHECK:                 fir.store %[[VAL_61]] to %[[VAL_56]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_63:.*]] = fir.load %[[VAL_1]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK:                 %[[VAL_64:.*]] = arith.constant 0 : index
+! CHECK:                 %[[VAL_65:.*]]:3 = fir.box_dims %[[VAL_63]], %[[VAL_64]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+! CHECK:                 %[[VAL_66:.*]] = fir.shape %[[VAL_65]]#1 : (index) -> !fir.shape<1>
+! CHECK:                 %[[VAL_67:.*]] = fir.load %[[VAL_62]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK:                 %[[VAL_68:.*]] = hlfir.elemental %[[VAL_66]] unordered : (!fir.shape<1>) -> !hlfir.expr<?xi32> {
+! CHECK:                 ^bb0(%[[VAL_69:.*]]: index):
+! CHECK:                   %[[VAL_70:.*]] = arith.constant 0 : index
+! CHECK:                   %[[VAL_71:.*]]:3 = fir.box_dims %[[VAL_63]], %[[VAL_70]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+! CHECK:                   %[[VAL_72:.*]] = arith.constant 1 : index
+! CHECK:                   %[[VAL_73:.*]] = arith.subi %[[VAL_71]]#0, %[[VAL_72]] : index
+! CHECK:                   %[[VAL_74:.*]] = arith.addi %[[VAL_69]], %[[VAL_73]] : index
+! CHECK:                   %[[VAL_75:.*]] = hlfir.designate %[[VAL_63]] (%[[VAL_74]])  : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> !fir.ref<i32>
+! CHECK:                   %[[VAL_76:.*]] = fir.load %[[VAL_75]] : !fir.ref<i32>
+! CHECK:                   %[[VAL_77:.*]] = arith.constant 0 : index
+! CHECK:                   %[[VAL_78:.*]]:3 = fir.box_dims %[[VAL_67]], %[[VAL_77]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+! CHECK:                   %[[VAL_79:.*]] = arith.constant 1 : index
+! CHECK:                   %[[VAL_80:.*]] = arith.subi %[[VAL_78]]#0, %[[VAL_79]] : index
+! CHECK:                   %[[VAL_81:.*]] = arith.addi %[[VAL_69]], %[[VAL_80]] : index
+! CHECK:                   %[[VAL_82:.*]] = hlfir.designate %[[VAL_67]] (%[[VAL_81]])  : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> !fir.ref<i32>
+! CHECK:                   %[[VAL_83:.*]] = fir.load %[[VAL_82]] : !fir.ref<i32>
+! CHECK:                   %[[VAL_84:.*]] = arith.cmpi sgt, %[[VAL_76]], %[[VAL_83]] : i32
+! CHECK:                   %[[VAL_85:.*]] = arith.select %[[VAL_84]], %[[VAL_76]], %[[VAL_83]] : i32
+! CHECK:                   hlfir.yield_element %[[VAL_85]] : i32
+! CHECK:                 }
+! CHECK:                 %[[VAL_86:.*]] = fir.load %[[VAL_62]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK:                 hlfir.assign %[[VAL_68]] to %[[VAL_86]] : !hlfir.expr<?xi32>, !fir.box<!fir.heap<!fir.array<?xi32>>>
+! CHECK:                 hlfir.destroy %[[VAL_68]] : !hlfir.expr<?xi32>
+! CHECK:                 omp.yield
+! CHECK:               }
+! CHECK:               omp.terminator
+! CHECK:             }
+! CHECK:             omp.terminator
+! CHECK:           }
+! CHECK:           omp.parallel {
+! CHECK:             %[[VAL_87:.*]] = fir.alloca i32 {bindc_name = "i", pinned, uniq_name = "_QFEi"}
+! CHECK:             %[[VAL_88:.*]]:2 = hlfir.declare %[[VAL_87]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK:             %[[VAL_89:.*]] = arith.constant 1 : i32
+! CHECK:             %[[VAL_90:.*]] = arith.constant 10 : i32
+! CHECK:             %[[VAL_91:.*]] = arith.constant 1 : i32
+! CHECK:             omp.wsloop reduction(byref @min_byref_box_heap_Uxi32 %[[VAL_7]]#0 -> %[[VAL_92:.*]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) {
+! CHECK:               omp.loop_nest (%[[VAL_93:.*]]) : i32 = (%[[VAL_89]]) to (%[[VAL_90]]) inclusive step (%[[VAL_91]]) {
+! CHECK:                 %[[VAL_94:.*]]:2 = hlfir.declare %[[VAL_92]] {fortran_attrs = {{.*}}<allocatable>, uniq_name = "_QFEmins"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+! CHECK:                 fir.store %[[VAL_93]] to %[[VAL_88]]#1 : !fir.ref<i32>
+! CHECK:                 %[[VAL_95:.*]] = fir.load %[[VAL_1]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK:                 %[[VAL_96:.*]] = arith.constant 0 : index
+! CHECK:                 %[[VAL_97:.*]]:3 = fir.box_dims %[[VAL_95]], %[[VAL_96]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+! CHECK:                 %[[VAL_98:.*]] = fir.shape %[[VAL_97]]#1 : (index) -> !fir.shape<1>
+! CHECK:                 %[[VAL_99:.*]] = fir.load %[[VAL_94]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK:                 %[[VAL_100:.*]] = hlfir.elemental %[[VAL_98]] unordered : (!fir.shape<1>) -> !hlfir.expr<?xi32> {
+! CHECK:                 ^bb0(%[[VAL_101:.*]]: index):
+! CHECK:                   %[[VAL_102:.*]] = arith.constant 0 : index
+! CHECK:                   %[[VAL_103:.*]]:3 = fir.box_dims %[[VAL_95]], %[[VAL_102]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+! CHECK:                   %[[VAL_104:.*]] = arith.constant 1 : index
+! CHECK:                   %[[VAL_105:.*]] = arith.subi %[[VAL_103]]#0, %[[VAL_104]] : index
+! CHECK:                   %[[VAL_106:.*]] = arith.addi %[[VAL_101]], %[[VAL_105]] : index
+! CHECK:                   %[[VAL_107:.*]] = hlfir.designate %[[VAL_95]] (%[[VAL_106]])  : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> !fir.ref<i32>
+! CHECK:                   %[[VAL_108:.*]] = fir.load %[[VAL_107]] : !fir.ref<i32>
+! CHECK:                   %[[VAL_109:.*]] = arith.constant 0 : index
+! CHECK:                   %[[VAL_110:.*]]:3 = fir.box_dims %[[VAL_99]], %[[VAL_109]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+! CHECK:                   %[[VAL_111:.*]] = arith.constant 1 : index
+! CHECK:                   %[[VAL_112:.*]] = arith.subi %[[VAL_110]]#0, %[[VAL_111]] : index
+! CHECK:                   %[[VAL_113:.*]] = arith.addi %[[VAL_101]], %[[VAL_112]] : index
+! CHECK:                   %[[VAL_114:.*]] = hlfir.designate %[[VAL_99]] (%[[VAL_113]])  : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> !fir.ref<i32>
+! CHECK:                   %[[VAL_115:.*]] = fir.load %[[VAL_114]] : !fir.ref<i32>
+! CHECK:                   %[[VAL_116:.*]] = arith.cmpi slt, %[[VAL_108]], %[[VAL_115]] : i32
+! CHECK:                   %[[VAL_117:.*]] = arith.select %[[VAL_116]], %[[VAL_108]], %[[VAL_115]] : i32
+! CHECK:                   hlfir.yield_element %[[VAL_117]] : i32
+! CHECK:                 }
+! CHECK:                 %[[VAL_118:.*]] = fir.load %[[VAL_94]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK:                 hlfir.assign %[[VAL_100]] to %[[VAL_118]] : !hlfir.expr<?xi32>, !fir.box<!fir.heap<!fir.array<?xi32>>>
+! CHECK:                 hlfir.destroy %[[VAL_100]] : !hlfir.expr<?xi32>
+! CHECK:                 omp.yield
+! CHECK:               }
+! CHECK:               omp.terminator
+! CHECK:             }
+! CHECK:             omp.terminator
+! CHECK:           }

>From 24d4d377ed84b1ad734b2588b48aafb201d14f34 Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Wed, 12 Jun 2024 16:44:13 +0000
Subject: [PATCH 2/2] [flang][OpenMP][NFC] refactor reduction logic

---
 flang/lib/Lower/OpenMP/ReductionProcessor.cpp | 110 ++++++++----------
 1 file changed, 47 insertions(+), 63 deletions(-)

diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
index 5c6a105b9b6ea..c3c1f363033c2 100644
--- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
@@ -763,72 +763,56 @@ void ReductionProcessor::addDeclareReduction(
     reduceVarByRef.push_back(doReductionByRef(symVal));
   }
 
-  if (const auto &redDefinedOp =
-          std::get_if<omp::clause::DefinedOperator>(&redOperator.u)) {
-    const auto &intrinsicOp{
-        std::get<omp::clause::DefinedOperator::IntrinsicOperator>(
-            redDefinedOp->u)};
-    ReductionIdentifier redId = getReductionType(intrinsicOp);
-    switch (redId) {
-    case ReductionIdentifier::ADD:
-    case ReductionIdentifier::MULTIPLY:
-    case ReductionIdentifier::AND:
-    case ReductionIdentifier::EQV:
-    case ReductionIdentifier::OR:
-    case ReductionIdentifier::NEQV:
-      break;
-    default:
-      TODO(currentLocation,
-           "Reduction of some intrinsic operators is not supported");
-      break;
-    }
+  for (auto [symVal, isByRef] : llvm::zip(reductionVars, reduceVarByRef)) {
+    auto redType = mlir::cast<fir::ReferenceType>(symVal.getType());
+    const auto &kindMap = firOpBuilder.getKindMap();
+    std::string reductionName;
+    ReductionIdentifier redId;
+    mlir::Type redNameTy = redType;
+    if (mlir::isa<fir::LogicalType>(redType.getEleTy()))
+      redNameTy = builder.getI1Type();
+
+    if (const auto &redDefinedOp =
+            std::get_if<omp::clause::DefinedOperator>(&redOperator.u)) {
+      const auto &intrinsicOp{
+          std::get<omp::clause::DefinedOperator::IntrinsicOperator>(
+              redDefinedOp->u)};
+      redId = getReductionType(intrinsicOp);
+      switch (redId) {
+      case ReductionIdentifier::ADD:
+      case ReductionIdentifier::MULTIPLY:
+      case ReductionIdentifier::AND:
+      case ReductionIdentifier::EQV:
+      case ReductionIdentifier::OR:
+      case ReductionIdentifier::NEQV:
+        break;
+      default:
+        TODO(currentLocation,
+             "Reduction of some intrinsic operators is not supported");
+        break;
+      }
 
-    for (auto [symVal, isByRef] : llvm::zip(reductionVars, reduceVarByRef)) {
-      auto redType = mlir::cast<fir::ReferenceType>(symVal.getType());
-      const auto &kindMap = firOpBuilder.getKindMap();
-      if (mlir::isa<fir::LogicalType>(redType.getEleTy()))
-        decl = createDeclareReduction(firOpBuilder,
-                                      getReductionName(intrinsicOp, kindMap,
-                                                       firOpBuilder.getI1Type(),
-                                                       isByRef),
-                                      redId, redType, currentLocation, isByRef);
-      else
-        decl = createDeclareReduction(
-            firOpBuilder,
-            getReductionName(intrinsicOp, kindMap, redType, isByRef), redId,
-            redType, currentLocation, isByRef);
-      reductionDeclSymbols.push_back(mlir::SymbolRefAttr::get(
-          firOpBuilder.getContext(), decl.getSymName()));
-    }
-  } else if (const auto *reductionIntrinsic =
-                 std::get_if<omp::clause::ProcedureDesignator>(
-                     &redOperator.u)) {
-    if (ReductionProcessor::supportedIntrinsicProcReduction(
-            *reductionIntrinsic)) {
-      ReductionProcessor::ReductionIdentifier redId =
-          ReductionProcessor::getReductionType(*reductionIntrinsic);
-      for (auto [symVal, isByRef] : llvm::zip(reductionVars, reduceVarByRef)) {
-        // TODO: share code with above
-        auto redType = mlir::cast<fir::ReferenceType>(symVal.getType());
-        const auto &kindMap = firOpBuilder.getKindMap();
-        if (mlir::isa<fir::LogicalType>(redType.getEleTy()))
-          decl = createDeclareReduction(
-              firOpBuilder,
-              getReductionName(
-                  /*DIFFERENCE:*/ getRealName(*reductionIntrinsic).ToString(),
-                  kindMap, firOpBuilder.getI1Type(), isByRef),
-              redId, redType, currentLocation, isByRef);
-        else
-          decl = createDeclareReduction(
-              firOpBuilder,
-              getReductionName(
-                  /*DIFFERENCE:*/ getRealName(*reductionIntrinsic).ToString(),
-                  kindMap, redType, isByRef),
-              redId, redType, currentLocation, isByRef);
-        reductionDeclSymbols.push_back(mlir::SymbolRefAttr::get(
-            firOpBuilder.getContext(), decl.getSymName()));
+      reductionName =
+          getReductionName(intrinsicOp, kindMap, redNameTy, isByRef);
+    } else if (const auto *reductionIntrinsic =
+                   std::get_if<omp::clause::ProcedureDesignator>(
+                       &redOperator.u)) {
+      if (!ReductionProcessor::supportedIntrinsicProcReduction(
+              *reductionIntrinsic)) {
+        TODO(currentLocation, "Unsupported intrinsic proc reduction");
       }
+      redId = getReductionType(*reductionIntrinsic);
+      reductionName =
+          getReductionName(getRealName(*reductionIntrinsic).ToString(), kindMap,
+                           redNameTy, isByRef);
+    } else {
+      TODO(currentLocation, "Unexpected reduction type");
     }
+
+    decl = createDeclareReduction(firOpBuilder, reductionName, redId, redType,
+                                  currentLocation, isByRef);
+    reductionDeclSymbols.push_back(
+        mlir::SymbolRefAttr::get(firOpBuilder.getContext(), decl.getSymName()));
   }
 }
 



More information about the flang-commits mailing list