[flang-commits] [flang] [flang][OpenMP] support more reduction types for procedure designators (PR #96057)
Tom Eccles via flang-commits
flang-commits at lists.llvm.org
Wed Jun 19 04:05:49 PDT 2024
https://github.com/tblah created https://github.com/llvm/llvm-project/pull/96057
This re-uses reduction declarations from intrinsic operators to add support for reductions of allocatables, pointers, and arrays with procedure designators (e.g. min/max).
I have split this into two commits to make it easier to review. The first one makes the functional change. The second cleans things up now that we can share much more code between intrinsic operators and procedure designators.
>From daef048e7647c30acd7007340a894f3b063bbf56 Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Wed, 12 Jun 2024 13:59:58 +0000
Subject: [PATCH 1/2] [flang][OpenMP] support more reduction types for
procedure designators
This re-uses reduction declarations from intrinsic operators to add
support for reductions of allocatables, pointers, and arrays with
procedure designators (e.g. min/max).
---
flang/lib/Lower/OpenMP/ReductionProcessor.cpp | 28 +-
.../OpenMP/Todo/reduction-array-intrinsic.f90 | 11 -
.../OpenMP/reduction-array-intrinsic.f90 | 96 ++++++
...oop-reduction-allocatable-array-minmax.f90 | 299 ++++++++++++++++++
4 files changed, 413 insertions(+), 21 deletions(-)
delete mode 100644 flang/test/Lower/OpenMP/Todo/reduction-array-intrinsic.f90
create mode 100644 flang/test/Lower/OpenMP/reduction-array-intrinsic.f90
create mode 100644 flang/test/Lower/OpenMP/wsloop-reduction-allocatable-array-minmax.f90
diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
index 237f9cdf22a1b..5c6a105b9b6ea 100644
--- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
@@ -709,8 +709,8 @@ void ReductionProcessor::addDeclareReduction(
}
}
- // initial pass to collect all reduction vars so we can figure out if this
- // should happen byref
+ // Reduction variable processing common to both intrinsic operators and
+ // procedure designators
fir::FirOpBuilder &builder = converter.getFirOpBuilder();
for (const Object &object : objectList) {
const semantics::Symbol *symbol = object.sym();
@@ -808,15 +808,23 @@ void ReductionProcessor::addDeclareReduction(
ReductionProcessor::ReductionIdentifier redId =
ReductionProcessor::getReductionType(*reductionIntrinsic);
for (auto [symVal, isByRef] : llvm::zip(reductionVars, reduceVarByRef)) {
+ // TODO: share code with above
auto redType = mlir::cast<fir::ReferenceType>(symVal.getType());
- if (!redType.getEleTy().isIntOrIndexOrFloat())
- TODO(currentLocation,
- "Reduction of some types is not supported for intrinsics");
- decl = createDeclareReduction(
- firOpBuilder,
- getReductionName(getRealName(*reductionIntrinsic).ToString(),
- firOpBuilder.getKindMap(), redType, isByRef),
- redId, redType, currentLocation, isByRef);
+ const auto &kindMap = firOpBuilder.getKindMap();
+ if (mlir::isa<fir::LogicalType>(redType.getEleTy()))
+ decl = createDeclareReduction(
+ firOpBuilder,
+ getReductionName(
+ /*DIFFERENCE:*/ getRealName(*reductionIntrinsic).ToString(),
+ kindMap, firOpBuilder.getI1Type(), isByRef),
+ redId, redType, currentLocation, isByRef);
+ else
+ decl = createDeclareReduction(
+ firOpBuilder,
+ getReductionName(
+ /*DIFFERENCE:*/ getRealName(*reductionIntrinsic).ToString(),
+ kindMap, redType, isByRef),
+ redId, redType, currentLocation, isByRef);
reductionDeclSymbols.push_back(mlir::SymbolRefAttr::get(
firOpBuilder.getContext(), decl.getSymName()));
}
diff --git a/flang/test/Lower/OpenMP/Todo/reduction-array-intrinsic.f90 b/flang/test/Lower/OpenMP/Todo/reduction-array-intrinsic.f90
deleted file mode 100644
index 49c899238d2a3..0000000000000
--- a/flang/test/Lower/OpenMP/Todo/reduction-array-intrinsic.f90
+++ /dev/null
@@ -1,11 +0,0 @@
-! RUN: %not_todo_cmd bbc -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
-! RUN: %not_todo_cmd %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
-
-! CHECK: not yet implemented: Reduction of some types is not supported for intrinsics
-subroutine max_array_reduction(l, r)
- integer :: l(:), r(:)
-
- !$omp parallel reduction(max:l)
- l = max(l, r)
- !$omp end parallel
-end subroutine
diff --git a/flang/test/Lower/OpenMP/reduction-array-intrinsic.f90 b/flang/test/Lower/OpenMP/reduction-array-intrinsic.f90
new file mode 100644
index 0000000000000..208cda28a3e59
--- /dev/null
+++ b/flang/test/Lower/OpenMP/reduction-array-intrinsic.f90
@@ -0,0 +1,96 @@
+! RUN: bbc -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
+
+subroutine max_array_reduction(l, r)
+ integer :: l(:), r(:)
+
+ !$omp parallel reduction(max:l)
+ l = max(l, r)
+ !$omp end parallel
+end subroutine
+
+! CHECK-LABEL: omp.declare_reduction @max_byref_box_Uxi32 : !fir.ref<!fir.box<!fir.array<?xi32>>> init {
+! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.array<?xi32>>>):
+! CHECK: %[[VAL_1:.*]] = arith.constant -2147483648 : i32
+! CHECK: %[[VAL_2:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.array<?xi32>>>
+! CHECK: %[[VAL_3:.*]] = fir.alloca !fir.box<!fir.array<?xi32>>
+! CHECK: %[[VAL_4:.*]] = arith.constant 0 : index
+! CHECK: %[[VAL_5:.*]]:3 = fir.box_dims %[[VAL_2]], %[[VAL_4]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+! CHECK: %[[VAL_6:.*]] = fir.shape %[[VAL_5]]#1 : (index) -> !fir.shape<1>
+! CHECK: %[[VAL_7:.*]] = fir.allocmem !fir.array<?xi32>, %[[VAL_5]]#1 {bindc_name = ".tmp", uniq_name = ""}
+! CHECK: %[[VAL_8:.*]] = arith.constant true
+! CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_7]](%[[VAL_6]]) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.heap<!fir.array<?xi32>>)
+! CHECK: %[[VAL_10:.*]] = arith.constant 0 : index
+! CHECK: %[[VAL_11:.*]]:3 = fir.box_dims %[[VAL_2]], %[[VAL_10]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+! CHECK: %[[VAL_12:.*]] = fir.shape_shift %[[VAL_11]]#0, %[[VAL_11]]#1 : (index, index) -> !fir.shapeshift<1>
+! CHECK: %[[VAL_13:.*]] = fir.rebox %[[VAL_9]]#0(%[[VAL_12]]) : (!fir.box<!fir.array<?xi32>>, !fir.shapeshift<1>) -> !fir.box<!fir.array<?xi32>>
+! CHECK: hlfir.assign %[[VAL_1]] to %[[VAL_13]] : i32, !fir.box<!fir.array<?xi32>>
+! CHECK: fir.store %[[VAL_13]] to %[[VAL_3]] : !fir.ref<!fir.box<!fir.array<?xi32>>>
+! CHECK: omp.yield(%[[VAL_3]] : !fir.ref<!fir.box<!fir.array<?xi32>>>)
+! CHECK-LABEL: } combiner {
+! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.array<?xi32>>>, %[[VAL_1:.*]]: !fir.ref<!fir.box<!fir.array<?xi32>>>):
+! CHECK: %[[VAL_2:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.array<?xi32>>>
+! CHECK: %[[VAL_3:.*]] = fir.load %[[VAL_1]] : !fir.ref<!fir.box<!fir.array<?xi32>>>
+! CHECK: %[[VAL_4:.*]] = arith.constant 0 : index
+! CHECK: %[[VAL_5:.*]]:3 = fir.box_dims %[[VAL_2]], %[[VAL_4]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+! CHECK: %[[VAL_6:.*]] = fir.shape_shift %[[VAL_5]]#0, %[[VAL_5]]#1 : (index, index) -> !fir.shapeshift<1>
+! CHECK: %[[VAL_7:.*]] = arith.constant 1 : index
+! CHECK: fir.do_loop %[[VAL_8:.*]] = %[[VAL_7]] to %[[VAL_5]]#1 step %[[VAL_7]] unordered {
+! CHECK: %[[VAL_9:.*]] = fir.array_coor %[[VAL_2]](%[[VAL_6]]) %[[VAL_8]] : (!fir.box<!fir.array<?xi32>>, !fir.shapeshift<1>, index) -> !fir.ref<i32>
+! CHECK: %[[VAL_10:.*]] = fir.array_coor %[[VAL_3]](%[[VAL_6]]) %[[VAL_8]] : (!fir.box<!fir.array<?xi32>>, !fir.shapeshift<1>, index) -> !fir.ref<i32>
+! CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_9]] : !fir.ref<i32>
+! CHECK: %[[VAL_12:.*]] = fir.load %[[VAL_10]] : !fir.ref<i32>
+! CHECK: %[[VAL_13:.*]] = arith.maxsi %[[VAL_11]], %[[VAL_12]] : i32
+! CHECK: fir.store %[[VAL_13]] to %[[VAL_9]] : !fir.ref<i32>
+! CHECK: }
+! CHECK: omp.yield(%[[VAL_0]] : !fir.ref<!fir.box<!fir.array<?xi32>>>)
+! CHECK-LABEL: } cleanup {
+! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.array<?xi32>>>):
+! CHECK: %[[VAL_1:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.array<?xi32>>>
+! CHECK: %[[VAL_2:.*]] = fir.box_addr %[[VAL_1]] : (!fir.box<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>>
+! CHECK: %[[VAL_3:.*]] = fir.convert %[[VAL_2]] : (!fir.ref<!fir.array<?xi32>>) -> i64
+! CHECK: %[[VAL_4:.*]] = arith.constant 0 : i64
+! CHECK: %[[VAL_5:.*]] = arith.cmpi ne, %[[VAL_3]], %[[VAL_4]] : i64
+! CHECK: fir.if %[[VAL_5]] {
+! CHECK: %[[VAL_6:.*]] = fir.convert %[[VAL_2]] : (!fir.ref<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>>
+! CHECK: fir.freemem %[[VAL_6]] : !fir.heap<!fir.array<?xi32>>
+! CHECK: }
+! CHECK: omp.yield
+! CHECK: }
+
+! CHECK-LABEL: func.func @_QPmax_array_reduction(
+! CHECK-SAME: %[[VAL_0:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "l"},
+! CHECK-SAME: %[[VAL_1:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "r"}) {
+! CHECK: %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[VAL_2]] {uniq_name = "_QFmax_array_reductionEl"} : (!fir.box<!fir.array<?xi32>>, !fir.dscope) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_1]] dummy_scope %[[VAL_2]] {uniq_name = "_QFmax_array_reductionEr"} : (!fir.box<!fir.array<?xi32>>, !fir.dscope) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+! CHECK: %[[VAL_5:.*]] = fir.alloca !fir.box<!fir.array<?xi32>>
+! CHECK: fir.store %[[VAL_3]]#1 to %[[VAL_5]] : !fir.ref<!fir.box<!fir.array<?xi32>>>
+! CHECK: omp.parallel reduction(byref @max_byref_box_Uxi32 %[[VAL_5]] -> %[[VAL_6:.*]] : !fir.ref<!fir.box<!fir.array<?xi32>>>) {
+! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {uniq_name = "_QFmax_array_reductionEl"} : (!fir.ref<!fir.box<!fir.array<?xi32>>>) -> (!fir.ref<!fir.box<!fir.array<?xi32>>>, !fir.ref<!fir.box<!fir.array<?xi32>>>)
+! CHECK: %[[VAL_8:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<!fir.box<!fir.array<?xi32>>>
+! CHECK: %[[VAL_9:.*]] = arith.constant 0 : index
+! CHECK: %[[VAL_10:.*]]:3 = fir.box_dims %[[VAL_8]], %[[VAL_9]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+! CHECK: %[[VAL_11:.*]] = fir.shape %[[VAL_10]]#1 : (index) -> !fir.shape<1>
+! CHECK: %[[VAL_12:.*]] = hlfir.elemental %[[VAL_11]] unordered : (!fir.shape<1>) -> !hlfir.expr<?xi32> {
+! CHECK: ^bb0(%[[VAL_13:.*]]: index):
+! CHECK: %[[VAL_14:.*]] = arith.constant 0 : index
+! CHECK: %[[VAL_15:.*]]:3 = fir.box_dims %[[VAL_8]], %[[VAL_14]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+! CHECK: %[[VAL_16:.*]] = arith.constant 1 : index
+! CHECK: %[[VAL_17:.*]] = arith.subi %[[VAL_15]]#0, %[[VAL_16]] : index
+! CHECK: %[[VAL_18:.*]] = arith.addi %[[VAL_13]], %[[VAL_17]] : index
+! CHECK: %[[VAL_19:.*]] = hlfir.designate %[[VAL_8]] (%[[VAL_18]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+! CHECK: %[[VAL_20:.*]] = fir.load %[[VAL_19]] : !fir.ref<i32>
+! CHECK: %[[VAL_21:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_13]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+! CHECK: %[[VAL_22:.*]] = fir.load %[[VAL_21]] : !fir.ref<i32>
+! CHECK: %[[VAL_23:.*]] = arith.cmpi sgt, %[[VAL_20]], %[[VAL_22]] : i32
+! CHECK: %[[VAL_24:.*]] = arith.select %[[VAL_23]], %[[VAL_20]], %[[VAL_22]] : i32
+! CHECK: hlfir.yield_element %[[VAL_24]] : i32
+! CHECK: }
+! CHECK: %[[VAL_25:.*]] = fir.load %[[VAL_7]]#0 : !fir.ref<!fir.box<!fir.array<?xi32>>>
+! CHECK: hlfir.assign %[[VAL_12]] to %[[VAL_25]] : !hlfir.expr<?xi32>, !fir.box<!fir.array<?xi32>>
+! CHECK: hlfir.destroy %[[VAL_12]] : !hlfir.expr<?xi32>
+! CHECK: omp.terminator
+! CHECK: }
+! CHECK: return
+! CHECK: }
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-allocatable-array-minmax.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-allocatable-array-minmax.f90
new file mode 100644
index 0000000000000..bc22c8b05b967
--- /dev/null
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-allocatable-array-minmax.f90
@@ -0,0 +1,299 @@
+! RUN: bbc -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s
+program reduce15
+ integer, parameter :: SIZE = 10
+ integer, dimension(:), allocatable :: arr,maxes,mins
+ integer :: i
+
+ allocate(arr(10))
+ allocate(maxes(10))
+ allocate(mins(10))
+
+ maxes = 5
+ mins = 5
+ do i = 1,SIZE
+ arr(i) = i
+ end do
+
+ !$omp parallel do reduction(max:maxes)
+ do i = 1,SIZE
+ maxes = max(arr, maxes)
+ end do
+ !$omp end parallel do
+
+
+ !$omp parallel do reduction(min:mins)
+ do i = 1,SIZE
+ mins = min(arr, mins)
+ end do
+ !$omp end parallel do
+
+ print *,"max: ", maxes
+ print *,"min: ", mins
+end program
+
+! CHECK-LABEL: omp.declare_reduction @min_byref_box_heap_Uxi32 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> init {
+! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>):
+! CHECK: %[[VAL_1:.*]] = arith.constant 2147483647 : i32
+! CHECK: %[[VAL_2:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: %[[VAL_3:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xi32>>>
+! CHECK: %[[VAL_4:.*]] = fir.box_addr %[[VAL_2]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
+! CHECK: %[[VAL_5:.*]] = fir.convert %[[VAL_4]] : (!fir.heap<!fir.array<?xi32>>) -> i64
+! CHECK: %[[VAL_6:.*]] = arith.constant 0 : i64
+! CHECK: %[[VAL_7:.*]] = arith.cmpi eq, %[[VAL_5]], %[[VAL_6]] : i64
+! CHECK: fir.if %[[VAL_7]] {
+! CHECK: %[[VAL_8:.*]] = fir.embox %[[VAL_4]] : (!fir.heap<!fir.array<?xi32>>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
+! CHECK: fir.store %[[VAL_8]] to %[[VAL_3]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: } else {
+! CHECK: %[[VAL_9:.*]] = arith.constant 0 : index
+! CHECK: %[[VAL_10:.*]]:3 = fir.box_dims %[[VAL_2]], %[[VAL_9]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+! CHECK: %[[VAL_11:.*]] = fir.shape %[[VAL_10]]#1 : (index) -> !fir.shape<1>
+! CHECK: %[[VAL_12:.*]] = fir.allocmem !fir.array<?xi32>, %[[VAL_10]]#1 {bindc_name = ".tmp", uniq_name = ""}
+! CHECK: %[[VAL_13:.*]] = arith.constant true
+! CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]](%[[VAL_11]]) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.heap<!fir.array<?xi32>>)
+! CHECK: %[[VAL_15:.*]] = arith.constant 0 : index
+! CHECK: %[[VAL_16:.*]]:3 = fir.box_dims %[[VAL_2]], %[[VAL_15]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+! CHECK: %[[VAL_17:.*]] = fir.shape_shift %[[VAL_16]]#0, %[[VAL_16]]#1 : (index, index) -> !fir.shapeshift<1>
+! CHECK: %[[VAL_18:.*]] = fir.rebox %[[VAL_14]]#0(%[[VAL_17]]) : (!fir.box<!fir.array<?xi32>>, !fir.shapeshift<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
+! CHECK: hlfir.assign %[[VAL_1]] to %[[VAL_18]] : i32, !fir.box<!fir.heap<!fir.array<?xi32>>>
+! CHECK: fir.store %[[VAL_18]] to %[[VAL_3]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: }
+! CHECK: omp.yield(%[[VAL_3]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+! CHECK-LABEL: } combiner {
+! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, %[[VAL_1:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>):
+! CHECK: %[[VAL_2:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: %[[VAL_3:.*]] = fir.load %[[VAL_1]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: %[[VAL_4:.*]] = arith.constant 0 : index
+! CHECK: %[[VAL_5:.*]]:3 = fir.box_dims %[[VAL_2]], %[[VAL_4]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+! CHECK: %[[VAL_6:.*]] = fir.shape_shift %[[VAL_5]]#0, %[[VAL_5]]#1 : (index, index) -> !fir.shapeshift<1>
+! CHECK: %[[VAL_7:.*]] = arith.constant 1 : index
+! CHECK: fir.do_loop %[[VAL_8:.*]] = %[[VAL_7]] to %[[VAL_5]]#1 step %[[VAL_7]] unordered {
+! CHECK: %[[VAL_9:.*]] = fir.array_coor %[[VAL_2]](%[[VAL_6]]) %[[VAL_8]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, !fir.shapeshift<1>, index) -> !fir.ref<i32>
+! CHECK: %[[VAL_10:.*]] = fir.array_coor %[[VAL_3]](%[[VAL_6]]) %[[VAL_8]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, !fir.shapeshift<1>, index) -> !fir.ref<i32>
+! CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_9]] : !fir.ref<i32>
+! CHECK: %[[VAL_12:.*]] = fir.load %[[VAL_10]] : !fir.ref<i32>
+! CHECK: %[[VAL_13:.*]] = arith.minsi %[[VAL_11]], %[[VAL_12]] : i32
+! CHECK: fir.store %[[VAL_13]] to %[[VAL_9]] : !fir.ref<i32>
+! CHECK: }
+! CHECK: omp.yield(%[[VAL_0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+! CHECK-LABEL: } cleanup {
+! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>):
+! CHECK: %[[VAL_1:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: %[[VAL_2:.*]] = fir.box_addr %[[VAL_1]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
+! CHECK: %[[VAL_3:.*]] = fir.convert %[[VAL_2]] : (!fir.heap<!fir.array<?xi32>>) -> i64
+! CHECK: %[[VAL_4:.*]] = arith.constant 0 : i64
+! CHECK: %[[VAL_5:.*]] = arith.cmpi ne, %[[VAL_3]], %[[VAL_4]] : i64
+! CHECK: fir.if %[[VAL_5]] {
+! CHECK: fir.freemem %[[VAL_2]] : !fir.heap<!fir.array<?xi32>>
+! CHECK: }
+! CHECK: omp.yield
+! CHECK: }
+
+! CHECK-LABEL: omp.declare_reduction @max_byref_box_heap_Uxi32 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> init {
+! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>):
+! CHECK: %[[VAL_1:.*]] = arith.constant -2147483648 : i32
+! CHECK: %[[VAL_2:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: %[[VAL_3:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xi32>>>
+! CHECK: %[[VAL_4:.*]] = fir.box_addr %[[VAL_2]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
+! CHECK: %[[VAL_5:.*]] = fir.convert %[[VAL_4]] : (!fir.heap<!fir.array<?xi32>>) -> i64
+! CHECK: %[[VAL_6:.*]] = arith.constant 0 : i64
+! CHECK: %[[VAL_7:.*]] = arith.cmpi eq, %[[VAL_5]], %[[VAL_6]] : i64
+! CHECK: fir.if %[[VAL_7]] {
+! CHECK: %[[VAL_8:.*]] = fir.embox %[[VAL_4]] : (!fir.heap<!fir.array<?xi32>>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
+! CHECK: fir.store %[[VAL_8]] to %[[VAL_3]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: } else {
+! CHECK: %[[VAL_9:.*]] = arith.constant 0 : index
+! CHECK: %[[VAL_10:.*]]:3 = fir.box_dims %[[VAL_2]], %[[VAL_9]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+! CHECK: %[[VAL_11:.*]] = fir.shape %[[VAL_10]]#1 : (index) -> !fir.shape<1>
+! CHECK: %[[VAL_12:.*]] = fir.allocmem !fir.array<?xi32>, %[[VAL_10]]#1 {bindc_name = ".tmp", uniq_name = ""}
+! CHECK: %[[VAL_13:.*]] = arith.constant true
+! CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]](%[[VAL_11]]) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.heap<!fir.array<?xi32>>)
+! CHECK: %[[VAL_15:.*]] = arith.constant 0 : index
+! CHECK: %[[VAL_16:.*]]:3 = fir.box_dims %[[VAL_2]], %[[VAL_15]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+! CHECK: %[[VAL_17:.*]] = fir.shape_shift %[[VAL_16]]#0, %[[VAL_16]]#1 : (index, index) -> !fir.shapeshift<1>
+! CHECK: %[[VAL_18:.*]] = fir.rebox %[[VAL_14]]#0(%[[VAL_17]]) : (!fir.box<!fir.array<?xi32>>, !fir.shapeshift<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
+! CHECK: hlfir.assign %[[VAL_1]] to %[[VAL_18]] : i32, !fir.box<!fir.heap<!fir.array<?xi32>>>
+! CHECK: fir.store %[[VAL_18]] to %[[VAL_3]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: }
+! CHECK: omp.yield(%[[VAL_3]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+! CHECK-LABEL: } combiner {
+! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, %[[VAL_1:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>):
+! CHECK: %[[VAL_2:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: %[[VAL_3:.*]] = fir.load %[[VAL_1]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: %[[VAL_4:.*]] = arith.constant 0 : index
+! CHECK: %[[VAL_5:.*]]:3 = fir.box_dims %[[VAL_2]], %[[VAL_4]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+! CHECK: %[[VAL_6:.*]] = fir.shape_shift %[[VAL_5]]#0, %[[VAL_5]]#1 : (index, index) -> !fir.shapeshift<1>
+! CHECK: %[[VAL_7:.*]] = arith.constant 1 : index
+! CHECK: fir.do_loop %[[VAL_8:.*]] = %[[VAL_7]] to %[[VAL_5]]#1 step %[[VAL_7]] unordered {
+! CHECK: %[[VAL_9:.*]] = fir.array_coor %[[VAL_2]](%[[VAL_6]]) %[[VAL_8]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, !fir.shapeshift<1>, index) -> !fir.ref<i32>
+! CHECK: %[[VAL_10:.*]] = fir.array_coor %[[VAL_3]](%[[VAL_6]]) %[[VAL_8]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, !fir.shapeshift<1>, index) -> !fir.ref<i32>
+! CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_9]] : !fir.ref<i32>
+! CHECK: %[[VAL_12:.*]] = fir.load %[[VAL_10]] : !fir.ref<i32>
+! CHECK: %[[VAL_13:.*]] = arith.maxsi %[[VAL_11]], %[[VAL_12]] : i32
+! CHECK: fir.store %[[VAL_13]] to %[[VAL_9]] : !fir.ref<i32>
+! CHECK: }
+! CHECK: omp.yield(%[[VAL_0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+! CHECK-LABEL: } cleanup {
+! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>):
+! CHECK: %[[VAL_1:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: %[[VAL_2:.*]] = fir.box_addr %[[VAL_1]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
+! CHECK: %[[VAL_3:.*]] = fir.convert %[[VAL_2]] : (!fir.heap<!fir.array<?xi32>>) -> i64
+! CHECK: %[[VAL_4:.*]] = arith.constant 0 : i64
+! CHECK: %[[VAL_5:.*]] = arith.cmpi ne, %[[VAL_3]], %[[VAL_4]] : i64
+! CHECK: fir.if %[[VAL_5]] {
+! CHECK: fir.freemem %[[VAL_2]] : !fir.heap<!fir.array<?xi32>>
+! CHECK: }
+! CHECK: omp.yield
+! CHECK: }
+
+! CHECK-LABEL: func.func @_QQmain() attributes {fir.bindc_name = "reduce15"} {
+! CHECK: %[[VAL_0:.*]] = fir.address_of(@_QFEarr) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {fortran_attrs = {{.*}}<allocatable>, uniq_name = "_QFEarr"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+! CHECK: %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFEi"}
+! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_4:.*]] = fir.address_of(@_QFEmaxes) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {fortran_attrs = {{.*}}<allocatable>, uniq_name = "_QFEmaxes"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+! CHECK: %[[VAL_6:.*]] = fir.address_of(@_QFEmins) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]] {fortran_attrs = {{.*}}<allocatable>, uniq_name = "_QFEmins"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+! CHECK: %[[VAL_8:.*]] = fir.address_of(@_QFECsize) : !fir.ref<i32>
+! CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_8]] {fortran_attrs = {{.*}}<parameter>, uniq_name = "_QFECsize"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_10:.*]] = arith.constant 10 : i32
+! CHECK: %[[VAL_11:.*]] = fir.convert %[[VAL_10]] : (i32) -> index
+! CHECK: %[[VAL_12:.*]] = arith.constant 0 : index
+! CHECK: %[[VAL_13:.*]] = arith.cmpi sgt, %[[VAL_11]], %[[VAL_12]] : index
+! CHECK: %[[VAL_14:.*]] = arith.select %[[VAL_13]], %[[VAL_11]], %[[VAL_12]] : index
+! CHECK: %[[VAL_15:.*]] = fir.allocmem !fir.array<?xi32>, %[[VAL_14]] {fir.must_be_heap = true, uniq_name = "_QFEarr.alloc"}
+! CHECK: %[[VAL_16:.*]] = fir.shape %[[VAL_14]] : (index) -> !fir.shape<1>
+! CHECK: %[[VAL_17:.*]] = fir.embox %[[VAL_15]](%[[VAL_16]]) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
+! CHECK: fir.store %[[VAL_17]] to %[[VAL_1]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: %[[VAL_18:.*]] = arith.constant 10 : i32
+! CHECK: %[[VAL_19:.*]] = fir.convert %[[VAL_18]] : (i32) -> index
+! CHECK: %[[VAL_20:.*]] = arith.constant 0 : index
+! CHECK: %[[VAL_21:.*]] = arith.cmpi sgt, %[[VAL_19]], %[[VAL_20]] : index
+! CHECK: %[[VAL_22:.*]] = arith.select %[[VAL_21]], %[[VAL_19]], %[[VAL_20]] : index
+! CHECK: %[[VAL_23:.*]] = fir.allocmem !fir.array<?xi32>, %[[VAL_22]] {fir.must_be_heap = true, uniq_name = "_QFEmaxes.alloc"}
+! CHECK: %[[VAL_24:.*]] = fir.shape %[[VAL_22]] : (index) -> !fir.shape<1>
+! CHECK: %[[VAL_25:.*]] = fir.embox %[[VAL_23]](%[[VAL_24]]) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
+! CHECK: fir.store %[[VAL_25]] to %[[VAL_5]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: %[[VAL_26:.*]] = arith.constant 10 : i32
+! CHECK: %[[VAL_27:.*]] = fir.convert %[[VAL_26]] : (i32) -> index
+! CHECK: %[[VAL_28:.*]] = arith.constant 0 : index
+! CHECK: %[[VAL_29:.*]] = arith.cmpi sgt, %[[VAL_27]], %[[VAL_28]] : index
+! CHECK: %[[VAL_30:.*]] = arith.select %[[VAL_29]], %[[VAL_27]], %[[VAL_28]] : index
+! CHECK: %[[VAL_31:.*]] = fir.allocmem !fir.array<?xi32>, %[[VAL_30]] {fir.must_be_heap = true, uniq_name = "_QFEmins.alloc"}
+! CHECK: %[[VAL_32:.*]] = fir.shape %[[VAL_30]] : (index) -> !fir.shape<1>
+! CHECK: %[[VAL_33:.*]] = fir.embox %[[VAL_31]](%[[VAL_32]]) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
+! CHECK: fir.store %[[VAL_33]] to %[[VAL_7]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: %[[VAL_34:.*]] = arith.constant 5 : i32
+! CHECK: hlfir.assign %[[VAL_34]] to %[[VAL_5]]#0 realloc : i32, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: %[[VAL_35:.*]] = arith.constant 5 : i32
+! CHECK: hlfir.assign %[[VAL_35]] to %[[VAL_7]]#0 realloc : i32, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: %[[VAL_36:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_37:.*]] = fir.convert %[[VAL_36]] : (i32) -> index
+! CHECK: %[[VAL_38:.*]] = arith.constant 10 : i32
+! CHECK: %[[VAL_39:.*]] = fir.convert %[[VAL_38]] : (i32) -> index
+! CHECK: %[[VAL_40:.*]] = arith.constant 1 : index
+! CHECK: %[[VAL_41:.*]] = fir.convert %[[VAL_37]] : (index) -> i32
+! CHECK: %[[VAL_42:.*]]:2 = fir.do_loop %[[VAL_43:.*]] = %[[VAL_37]] to %[[VAL_39]] step %[[VAL_40]] iter_args(%[[VAL_44:.*]] = %[[VAL_41]]) -> (index, i32) {
+! CHECK: fir.store %[[VAL_44]] to %[[VAL_3]]#1 : !fir.ref<i32>
+! CHECK: %[[VAL_45:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_46:.*]] = fir.load %[[VAL_1]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: %[[VAL_47:.*]] = fir.load %[[VAL_3]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_48:.*]] = fir.convert %[[VAL_47]] : (i32) -> i64
+! CHECK: %[[VAL_49:.*]] = hlfir.designate %[[VAL_46]] (%[[VAL_48]]) : (!fir.box<!fir.heap<!fir.array<?xi32>>>, i64) -> !fir.ref<i32>
+! CHECK: hlfir.assign %[[VAL_45]] to %[[VAL_49]] : i32, !fir.ref<i32>
+! CHECK: %[[VAL_50:.*]] = arith.addi %[[VAL_43]], %[[VAL_40]] : index
+! CHECK: %[[VAL_51:.*]] = fir.convert %[[VAL_40]] : (index) -> i32
+! CHECK: %[[VAL_52:.*]] = fir.load %[[VAL_3]]#1 : !fir.ref<i32>
+! CHECK: %[[VAL_53:.*]] = arith.addi %[[VAL_52]], %[[VAL_51]] : i32
+! CHECK: fir.result %[[VAL_50]], %[[VAL_53]] : index, i32
+! CHECK: }
+! CHECK: fir.store %[[VAL_54:.*]]#1 to %[[VAL_3]]#1 : !fir.ref<i32>
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_55:.*]] = fir.alloca i32 {bindc_name = "i", pinned, uniq_name = "_QFEi"}
+! CHECK: %[[VAL_56:.*]]:2 = hlfir.declare %[[VAL_55]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_57:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_58:.*]] = arith.constant 10 : i32
+! CHECK: %[[VAL_59:.*]] = arith.constant 1 : i32
+! CHECK: omp.wsloop reduction(byref @max_byref_box_heap_Uxi32 %[[VAL_5]]#0 -> %[[VAL_60:.*]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) {
+! CHECK: omp.loop_nest (%[[VAL_61:.*]]) : i32 = (%[[VAL_57]]) to (%[[VAL_58]]) inclusive step (%[[VAL_59]]) {
+! CHECK: %[[VAL_62:.*]]:2 = hlfir.declare %[[VAL_60]] {fortran_attrs = {{.*}}<allocatable>, uniq_name = "_QFEmaxes"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+! CHECK: fir.store %[[VAL_61]] to %[[VAL_56]]#1 : !fir.ref<i32>
+! CHECK: %[[VAL_63:.*]] = fir.load %[[VAL_1]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: %[[VAL_64:.*]] = arith.constant 0 : index
+! CHECK: %[[VAL_65:.*]]:3 = fir.box_dims %[[VAL_63]], %[[VAL_64]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+! CHECK: %[[VAL_66:.*]] = fir.shape %[[VAL_65]]#1 : (index) -> !fir.shape<1>
+! CHECK: %[[VAL_67:.*]] = fir.load %[[VAL_62]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: %[[VAL_68:.*]] = hlfir.elemental %[[VAL_66]] unordered : (!fir.shape<1>) -> !hlfir.expr<?xi32> {
+! CHECK: ^bb0(%[[VAL_69:.*]]: index):
+! CHECK: %[[VAL_70:.*]] = arith.constant 0 : index
+! CHECK: %[[VAL_71:.*]]:3 = fir.box_dims %[[VAL_63]], %[[VAL_70]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+! CHECK: %[[VAL_72:.*]] = arith.constant 1 : index
+! CHECK: %[[VAL_73:.*]] = arith.subi %[[VAL_71]]#0, %[[VAL_72]] : index
+! CHECK: %[[VAL_74:.*]] = arith.addi %[[VAL_69]], %[[VAL_73]] : index
+! CHECK: %[[VAL_75:.*]] = hlfir.designate %[[VAL_63]] (%[[VAL_74]]) : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> !fir.ref<i32>
+! CHECK: %[[VAL_76:.*]] = fir.load %[[VAL_75]] : !fir.ref<i32>
+! CHECK: %[[VAL_77:.*]] = arith.constant 0 : index
+! CHECK: %[[VAL_78:.*]]:3 = fir.box_dims %[[VAL_67]], %[[VAL_77]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+! CHECK: %[[VAL_79:.*]] = arith.constant 1 : index
+! CHECK: %[[VAL_80:.*]] = arith.subi %[[VAL_78]]#0, %[[VAL_79]] : index
+! CHECK: %[[VAL_81:.*]] = arith.addi %[[VAL_69]], %[[VAL_80]] : index
+! CHECK: %[[VAL_82:.*]] = hlfir.designate %[[VAL_67]] (%[[VAL_81]]) : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> !fir.ref<i32>
+! CHECK: %[[VAL_83:.*]] = fir.load %[[VAL_82]] : !fir.ref<i32>
+! CHECK: %[[VAL_84:.*]] = arith.cmpi sgt, %[[VAL_76]], %[[VAL_83]] : i32
+! CHECK: %[[VAL_85:.*]] = arith.select %[[VAL_84]], %[[VAL_76]], %[[VAL_83]] : i32
+! CHECK: hlfir.yield_element %[[VAL_85]] : i32
+! CHECK: }
+! CHECK: %[[VAL_86:.*]] = fir.load %[[VAL_62]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: hlfir.assign %[[VAL_68]] to %[[VAL_86]] : !hlfir.expr<?xi32>, !fir.box<!fir.heap<!fir.array<?xi32>>>
+! CHECK: hlfir.destroy %[[VAL_68]] : !hlfir.expr<?xi32>
+! CHECK: omp.yield
+! CHECK: }
+! CHECK: omp.terminator
+! CHECK: }
+! CHECK: omp.terminator
+! CHECK: }
+! CHECK: omp.parallel {
+! CHECK: %[[VAL_87:.*]] = fir.alloca i32 {bindc_name = "i", pinned, uniq_name = "_QFEi"}
+! CHECK: %[[VAL_88:.*]]:2 = hlfir.declare %[[VAL_87]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_89:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_90:.*]] = arith.constant 10 : i32
+! CHECK: %[[VAL_91:.*]] = arith.constant 1 : i32
+! CHECK: omp.wsloop reduction(byref @min_byref_box_heap_Uxi32 %[[VAL_7]]#0 -> %[[VAL_92:.*]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) {
+! CHECK: omp.loop_nest (%[[VAL_93:.*]]) : i32 = (%[[VAL_89]]) to (%[[VAL_90]]) inclusive step (%[[VAL_91]]) {
+! CHECK: %[[VAL_94:.*]]:2 = hlfir.declare %[[VAL_92]] {fortran_attrs = {{.*}}<allocatable>, uniq_name = "_QFEmins"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
+! CHECK: fir.store %[[VAL_93]] to %[[VAL_88]]#1 : !fir.ref<i32>
+! CHECK: %[[VAL_95:.*]] = fir.load %[[VAL_1]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: %[[VAL_96:.*]] = arith.constant 0 : index
+! CHECK: %[[VAL_97:.*]]:3 = fir.box_dims %[[VAL_95]], %[[VAL_96]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+! CHECK: %[[VAL_98:.*]] = fir.shape %[[VAL_97]]#1 : (index) -> !fir.shape<1>
+! CHECK: %[[VAL_99:.*]] = fir.load %[[VAL_94]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: %[[VAL_100:.*]] = hlfir.elemental %[[VAL_98]] unordered : (!fir.shape<1>) -> !hlfir.expr<?xi32> {
+! CHECK: ^bb0(%[[VAL_101:.*]]: index):
+! CHECK: %[[VAL_102:.*]] = arith.constant 0 : index
+! CHECK: %[[VAL_103:.*]]:3 = fir.box_dims %[[VAL_95]], %[[VAL_102]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+! CHECK: %[[VAL_104:.*]] = arith.constant 1 : index
+! CHECK: %[[VAL_105:.*]] = arith.subi %[[VAL_103]]#0, %[[VAL_104]] : index
+! CHECK: %[[VAL_106:.*]] = arith.addi %[[VAL_101]], %[[VAL_105]] : index
+! CHECK: %[[VAL_107:.*]] = hlfir.designate %[[VAL_95]] (%[[VAL_106]]) : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> !fir.ref<i32>
+! CHECK: %[[VAL_108:.*]] = fir.load %[[VAL_107]] : !fir.ref<i32>
+! CHECK: %[[VAL_109:.*]] = arith.constant 0 : index
+! CHECK: %[[VAL_110:.*]]:3 = fir.box_dims %[[VAL_99]], %[[VAL_109]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+! CHECK: %[[VAL_111:.*]] = arith.constant 1 : index
+! CHECK: %[[VAL_112:.*]] = arith.subi %[[VAL_110]]#0, %[[VAL_111]] : index
+! CHECK: %[[VAL_113:.*]] = arith.addi %[[VAL_101]], %[[VAL_112]] : index
+! CHECK: %[[VAL_114:.*]] = hlfir.designate %[[VAL_99]] (%[[VAL_113]]) : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> !fir.ref<i32>
+! CHECK: %[[VAL_115:.*]] = fir.load %[[VAL_114]] : !fir.ref<i32>
+! CHECK: %[[VAL_116:.*]] = arith.cmpi slt, %[[VAL_108]], %[[VAL_115]] : i32
+! CHECK: %[[VAL_117:.*]] = arith.select %[[VAL_116]], %[[VAL_108]], %[[VAL_115]] : i32
+! CHECK: hlfir.yield_element %[[VAL_117]] : i32
+! CHECK: }
+! CHECK: %[[VAL_118:.*]] = fir.load %[[VAL_94]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: hlfir.assign %[[VAL_100]] to %[[VAL_118]] : !hlfir.expr<?xi32>, !fir.box<!fir.heap<!fir.array<?xi32>>>
+! CHECK: hlfir.destroy %[[VAL_100]] : !hlfir.expr<?xi32>
+! CHECK: omp.yield
+! CHECK: }
+! CHECK: omp.terminator
+! CHECK: }
+! CHECK: omp.terminator
+! CHECK: }
>From 24d4d377ed84b1ad734b2588b48aafb201d14f34 Mon Sep 17 00:00:00 2001
From: Tom Eccles <tom.eccles at arm.com>
Date: Wed, 12 Jun 2024 16:44:13 +0000
Subject: [PATCH 2/2] [flang][OpenMP][NFC] refactor reduction logic
---
flang/lib/Lower/OpenMP/ReductionProcessor.cpp | 110 ++++++++----------
1 file changed, 47 insertions(+), 63 deletions(-)
diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
index 5c6a105b9b6ea..c3c1f363033c2 100644
--- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp
@@ -763,72 +763,56 @@ void ReductionProcessor::addDeclareReduction(
reduceVarByRef.push_back(doReductionByRef(symVal));
}
- if (const auto &redDefinedOp =
- std::get_if<omp::clause::DefinedOperator>(&redOperator.u)) {
- const auto &intrinsicOp{
- std::get<omp::clause::DefinedOperator::IntrinsicOperator>(
- redDefinedOp->u)};
- ReductionIdentifier redId = getReductionType(intrinsicOp);
- switch (redId) {
- case ReductionIdentifier::ADD:
- case ReductionIdentifier::MULTIPLY:
- case ReductionIdentifier::AND:
- case ReductionIdentifier::EQV:
- case ReductionIdentifier::OR:
- case ReductionIdentifier::NEQV:
- break;
- default:
- TODO(currentLocation,
- "Reduction of some intrinsic operators is not supported");
- break;
- }
+ for (auto [symVal, isByRef] : llvm::zip(reductionVars, reduceVarByRef)) {
+ auto redType = mlir::cast<fir::ReferenceType>(symVal.getType());
+ const auto &kindMap = firOpBuilder.getKindMap();
+ std::string reductionName;
+ ReductionIdentifier redId;
+ mlir::Type redNameTy = redType;
+ if (mlir::isa<fir::LogicalType>(redType.getEleTy()))
+ redNameTy = builder.getI1Type();
+
+ if (const auto &redDefinedOp =
+ std::get_if<omp::clause::DefinedOperator>(&redOperator.u)) {
+ const auto &intrinsicOp{
+ std::get<omp::clause::DefinedOperator::IntrinsicOperator>(
+ redDefinedOp->u)};
+ redId = getReductionType(intrinsicOp);
+ switch (redId) {
+ case ReductionIdentifier::ADD:
+ case ReductionIdentifier::MULTIPLY:
+ case ReductionIdentifier::AND:
+ case ReductionIdentifier::EQV:
+ case ReductionIdentifier::OR:
+ case ReductionIdentifier::NEQV:
+ break;
+ default:
+ TODO(currentLocation,
+ "Reduction of some intrinsic operators is not supported");
+ break;
+ }
- for (auto [symVal, isByRef] : llvm::zip(reductionVars, reduceVarByRef)) {
- auto redType = mlir::cast<fir::ReferenceType>(symVal.getType());
- const auto &kindMap = firOpBuilder.getKindMap();
- if (mlir::isa<fir::LogicalType>(redType.getEleTy()))
- decl = createDeclareReduction(firOpBuilder,
- getReductionName(intrinsicOp, kindMap,
- firOpBuilder.getI1Type(),
- isByRef),
- redId, redType, currentLocation, isByRef);
- else
- decl = createDeclareReduction(
- firOpBuilder,
- getReductionName(intrinsicOp, kindMap, redType, isByRef), redId,
- redType, currentLocation, isByRef);
- reductionDeclSymbols.push_back(mlir::SymbolRefAttr::get(
- firOpBuilder.getContext(), decl.getSymName()));
- }
- } else if (const auto *reductionIntrinsic =
- std::get_if<omp::clause::ProcedureDesignator>(
- &redOperator.u)) {
- if (ReductionProcessor::supportedIntrinsicProcReduction(
- *reductionIntrinsic)) {
- ReductionProcessor::ReductionIdentifier redId =
- ReductionProcessor::getReductionType(*reductionIntrinsic);
- for (auto [symVal, isByRef] : llvm::zip(reductionVars, reduceVarByRef)) {
- // TODO: share code with above
- auto redType = mlir::cast<fir::ReferenceType>(symVal.getType());
- const auto &kindMap = firOpBuilder.getKindMap();
- if (mlir::isa<fir::LogicalType>(redType.getEleTy()))
- decl = createDeclareReduction(
- firOpBuilder,
- getReductionName(
- /*DIFFERENCE:*/ getRealName(*reductionIntrinsic).ToString(),
- kindMap, firOpBuilder.getI1Type(), isByRef),
- redId, redType, currentLocation, isByRef);
- else
- decl = createDeclareReduction(
- firOpBuilder,
- getReductionName(
- /*DIFFERENCE:*/ getRealName(*reductionIntrinsic).ToString(),
- kindMap, redType, isByRef),
- redId, redType, currentLocation, isByRef);
- reductionDeclSymbols.push_back(mlir::SymbolRefAttr::get(
- firOpBuilder.getContext(), decl.getSymName()));
+ reductionName =
+ getReductionName(intrinsicOp, kindMap, redNameTy, isByRef);
+ } else if (const auto *reductionIntrinsic =
+ std::get_if<omp::clause::ProcedureDesignator>(
+ &redOperator.u)) {
+ if (!ReductionProcessor::supportedIntrinsicProcReduction(
+ *reductionIntrinsic)) {
+ TODO(currentLocation, "Unsupported intrinsic proc reduction");
}
+ redId = getReductionType(*reductionIntrinsic);
+ reductionName =
+ getReductionName(getRealName(*reductionIntrinsic).ToString(), kindMap,
+ redNameTy, isByRef);
+ } else {
+ TODO(currentLocation, "Unexpected reduction type");
}
+
+ decl = createDeclareReduction(firOpBuilder, reductionName, redId, redType,
+ currentLocation, isByRef);
+ reductionDeclSymbols.push_back(
+ mlir::SymbolRefAttr::get(firOpBuilder.getContext(), decl.getSymName()));
}
}
More information about the flang-commits
mailing list