[flang-commits] [flang] 861a763 - [flang][acc] use new acc.reduction_combine under option (#182066)
via flang-commits
flang-commits at lists.llvm.org
Wed Feb 18 09:56:55 PST 2026
Author: jeanPerier
Date: 2026-02-18T18:56:50+01:00
New Revision: 861a76378a20970df8db46b1e9ccfbd5ef16aead
URL: https://github.com/llvm/llvm-project/commit/861a76378a20970df8db46b1e9ccfbd5ef16aead
DIFF: https://github.com/llvm/llvm-project/commit/861a76378a20970df8db46b1e9ccfbd5ef16aead.diff
LOG: [flang][acc] use new acc.reduction_combine under option (#182066)
Use the new acc.reduction_combine operation that was added in
https://github.com/llvm/llvm-project/pull/181853 when generating
reduction recipies.
To do this, the optional callback of genNoAliasArrayAssignment is
updated to work on addresses and being delegated the assignment part
too.
This change is guarded by two cl switch:
- openacc-use-reduction-combine: enable the change for
real/complex/integer reduction except min/max
- openacc-use-reduction-combine-all: enable the changed for all types
and operators.
Added:
Modified:
flang/include/flang/Optimizer/Builder/HLFIRTools.h
flang/lib/Optimizer/Builder/HLFIRTools.cpp
flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
flang/test/Lower/OpenACC/acc-reduction.f90
Removed:
################################################################################
diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
index 37297dd0ad6d1..e75bddc7e1bef 100644
--- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h
+++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
@@ -456,13 +456,18 @@ mlir::Value inlineElementalOp(
/// over the optimal extents deduced from both shapes. If \p emitWorkshareLoop
/// is true, a workshare loop construct may be emitted when available.
/// Allocatable LHS must be allocated with the right shape and parameters.
+/// An optional scalarCombineAndAssign can be provided to provide logic for more
+/// complex assignment actions like for reductions that may need to happen
+/// atomically. When provided, the callback will be passed scalar addresses for
+/// the LHS and RHS elements and is in charge of generating the combination and
+/// assignment logic.
void genNoAliasArrayAssignment(
mlir::Location loc, fir::FirOpBuilder &builder, hlfir::Entity rhs,
hlfir::Entity lhs, bool emitWorkshareLoop = false,
bool temporaryLHS = false,
- std::function<hlfir::Entity(mlir::Location, fir::FirOpBuilder &,
- hlfir::Entity, hlfir::Entity)> *combiner =
- nullptr,
+ std::function<void(mlir::Location, fir::FirOpBuilder &, hlfir::Entity,
+ hlfir::Entity, mlir::ArrayAttr)>
+ *scalarCombineAndAssign = nullptr,
mlir::ArrayAttr accessGroups = {});
/// Generate an assignment from \p rhs to \p lhs when they are known not to
@@ -474,19 +479,19 @@ void genNoAliasAssignment(
mlir::Location loc, fir::FirOpBuilder &builder, hlfir::Entity rhs,
hlfir::Entity lhs, bool emitWorkshareLoop = false,
bool temporaryLHS = false,
- std::function<hlfir::Entity(mlir::Location, fir::FirOpBuilder &,
- hlfir::Entity, hlfir::Entity)> *combiner =
- nullptr,
+ std::function<void(mlir::Location, fir::FirOpBuilder &, hlfir::Entity,
+ hlfir::Entity, mlir::ArrayAttr accessGroups)>
+ *scalarCombineAndAssign = nullptr,
mlir::ArrayAttr accessGroups = {});
inline void genNoAliasAssignment(
mlir::Location loc, fir::FirOpBuilder &builder, hlfir::Entity rhs,
hlfir::Entity lhs, bool emitWorkshareLoop, bool temporaryLHS,
- std::function<hlfir::Entity(mlir::Location, fir::FirOpBuilder &,
- hlfir::Entity, hlfir::Entity)>
- combiner,
+ std::function<void(mlir::Location, fir::FirOpBuilder &, hlfir::Entity,
+ hlfir::Entity, mlir::ArrayAttr)>
+ scalarCombineAndAssign,
mlir::ArrayAttr accessGroups = {}) {
genNoAliasAssignment(loc, builder, rhs, lhs, emitWorkshareLoop, temporaryLHS,
- &combiner, accessGroups);
+ &scalarCombineAndAssign, accessGroups);
}
/// Create a new temporary with the shape and parameters of the provided
diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
index 3355bf1475e30..e7a286b73bb4b 100644
--- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp
+++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
@@ -1395,20 +1395,17 @@ bool hlfir::elementalOpMustProduceTemp(hlfir::ElementalOp elemental) {
static void combineAndStoreElement(
mlir::Location loc, fir::FirOpBuilder &builder, hlfir::Entity lhs,
hlfir::Entity rhs, bool temporaryLHS,
- std::function<hlfir::Entity(mlir::Location, fir::FirOpBuilder &,
- hlfir::Entity, hlfir::Entity)> *combiner,
+ std::function<void(mlir::Location, fir::FirOpBuilder &, hlfir::Entity,
+ hlfir::Entity, mlir::ArrayAttr)> *scalarCombineAndAssign,
mlir::ArrayAttr accessGroups) {
+ if (scalarCombineAndAssign) {
+ (*scalarCombineAndAssign)(loc, builder, lhs, rhs, accessGroups);
+ return;
+ }
hlfir::Entity valueToAssign = hlfir::loadTrivialScalar(loc, builder, rhs);
if (accessGroups)
if (auto load = valueToAssign.getDefiningOp<fir::LoadOp>())
load.setAccessGroupsAttr(accessGroups);
- if (combiner) {
- hlfir::Entity lhsValue = hlfir::loadTrivialScalar(loc, builder, lhs);
- if (accessGroups)
- if (auto load = lhsValue.getDefiningOp<fir::LoadOp>())
- load.setAccessGroupsAttr(accessGroups);
- valueToAssign = (*combiner)(loc, builder, lhsValue, valueToAssign);
- }
auto assign = hlfir::AssignOp::create(builder, loc, valueToAssign, lhs,
/*realloc=*/false,
/*keep_lhs_length_if_realloc=*/false,
@@ -1420,8 +1417,8 @@ static void combineAndStoreElement(
void hlfir::genNoAliasArrayAssignment(
mlir::Location loc, fir::FirOpBuilder &builder, hlfir::Entity rhs,
hlfir::Entity lhs, bool emitWorkshareLoop, bool temporaryLHS,
- std::function<hlfir::Entity(mlir::Location, fir::FirOpBuilder &,
- hlfir::Entity, hlfir::Entity)> *combiner,
+ std::function<void(mlir::Location, fir::FirOpBuilder &, hlfir::Entity,
+ hlfir::Entity, mlir::ArrayAttr)> *scalarCombineAndAssign,
mlir::ArrayAttr accessGroups) {
mlir::OpBuilder::InsertionGuard guard(builder);
rhs = hlfir::derefPointersAndAllocatables(loc, builder, rhs);
@@ -1441,28 +1438,30 @@ void hlfir::genNoAliasArrayAssignment(
builder.setInsertionPointToStart(loopNest.body);
auto rhsArrayElement =
hlfir::getElementAt(loc, builder, rhs, loopNest.oneBasedIndices);
- rhsArrayElement = hlfir::loadTrivialScalar(loc, builder, rhsArrayElement);
+ if (!scalarCombineAndAssign)
+ rhsArrayElement = hlfir::loadTrivialScalar(loc, builder, rhsArrayElement);
auto lhsArrayElement =
hlfir::getElementAt(loc, builder, lhs, loopNest.oneBasedIndices);
combineAndStoreElement(loc, builder, lhsArrayElement, rhsArrayElement,
- temporaryLHS, combiner, accessGroups);
+ temporaryLHS, scalarCombineAndAssign, accessGroups);
}
void hlfir::genNoAliasAssignment(
mlir::Location loc, fir::FirOpBuilder &builder, hlfir::Entity rhs,
hlfir::Entity lhs, bool emitWorkshareLoop, bool temporaryLHS,
- std::function<hlfir::Entity(mlir::Location, fir::FirOpBuilder &,
- hlfir::Entity, hlfir::Entity)> *combiner,
+ std::function<void(mlir::Location, fir::FirOpBuilder &, hlfir::Entity,
+ hlfir::Entity, mlir::ArrayAttr)> *scalarCombineAndAssign,
mlir::ArrayAttr accessGroups) {
if (lhs.isArray()) {
genNoAliasArrayAssignment(loc, builder, rhs, lhs, emitWorkshareLoop,
- temporaryLHS, combiner, accessGroups);
+ temporaryLHS, scalarCombineAndAssign,
+ accessGroups);
return;
}
rhs = hlfir::derefPointersAndAllocatables(loc, builder, rhs);
lhs = hlfir::derefPointersAndAllocatables(loc, builder, lhs);
- combineAndStoreElement(loc, builder, lhs, rhs, temporaryLHS, combiner,
- accessGroups);
+ combineAndStoreElement(loc, builder, lhs, rhs, temporaryLHS,
+ scalarCombineAndAssign, accessGroups);
}
std::pair<hlfir::Entity, bool>
diff --git a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
index 9ced235f05707..d8ed9ce968e0a 100644
--- a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
+++ b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
@@ -31,6 +31,19 @@
#include "mlir/Support/LLVM.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/TypeSwitch.h"
+#include "llvm/Support/CommandLine.h"
+
+static llvm::cl::opt<bool> useAccReductionCombine(
+ "openacc-use-reduction-combine",
+ llvm::cl::desc("Whether to generate acc.reduction_combine. Does not "
+ "control reduction for MIN/MAX and logical reductions."),
+ llvm::cl::init(false));
+
+static llvm::cl::opt<bool> useAccReductionCombineAll(
+ "openacc-use-reduction-combine-all",
+ llvm::cl::desc("Whether to generate acc.reduction_combine for all types "
+ "and operators"),
+ llvm::cl::init(false));
namespace fir::acc {
@@ -1045,6 +1058,25 @@ static mlir::Value genScalarCombiner(fir::FirOpBuilder &builder,
TODO(loc, "reduction operator");
}
+static bool useAccReductionCombineOp(mlir::Type elementType,
+ mlir::acc::ReductionOperator op) {
+ if (useAccReductionCombineAll)
+ return true;
+ if (!useAccReductionCombine)
+ return false;
+ // LOGICAL operators do not have mlir operators and requires FIR specific
+ // logic to interpret the TRUE and FALSE values from the storage (implemented
+ // in fir.convert to i1).
+ if (!llvm::isa<mlir::IntegerType, mlir::FloatType, mlir::ComplexType>(
+ elementType))
+ return false;
+ // MIN/MAX for floating point can have
diff erent edge-case behaviors (NANs).
+ // Currently the mlir operator does not match the behavior implemented by
+ // flang.
+ return op != mlir::acc::ReductionOperator::AccMax &&
+ op != mlir::acc::ReductionOperator::AccMin;
+}
+
template <typename Ty>
bool OpenACCMappableModel<Ty>::generateCombiner(
mlir::Type type, mlir::OpBuilder &mlirBuilder, mlir::Location loc,
@@ -1069,11 +1101,25 @@ bool OpenACCMappableModel<Ty>::generateCombiner(
}
mlir::Type elementType = fir::getFortranElementType(dest.getType());
- auto genKernel = [&](mlir::Location l, fir::FirOpBuilder &b,
- hlfir::Entity srcElementValue,
- hlfir::Entity destElementValue) -> hlfir::Entity {
- return hlfir::Entity{genScalarCombiner(builder, loc, op, elementType,
- srcElementValue, destElementValue)};
+ auto genKernel =
+ [&](mlir::Location l, fir::FirOpBuilder &b, hlfir::Entity destElementAddr,
+ hlfir::Entity srcElementAddr, mlir::ArrayAttr accessGroups) -> void {
+ assert(!accessGroups && "access groups not expected in acc reductions");
+ if (useAccReductionCombineOp(elementType, op)) {
+ mlir::acc::ReductionCombineOp::create(builder, loc, destElementAddr,
+ srcElementAddr, op);
+ return;
+ }
+ hlfir::Entity srcElementValue =
+ hlfir::loadTrivialScalar(loc, builder, srcElementAddr);
+ hlfir::Entity destElementValue =
+ hlfir::loadTrivialScalar(loc, builder, destElementAddr);
+ hlfir::Entity combined(genScalarCombiner(
+ builder, loc, op, elementType, destElementValue, srcElementValue));
+ hlfir::AssignOp::create(builder, loc, combined, destElementAddr,
+ /*realloc=*/false,
+ /*keep_lhs_length_if_realloc=*/false,
+ /*temporary_lhs=*/false);
};
hlfir::genNoAliasAssignment(loc, builder, srcSection, destSection,
/*emitWorkshareLoop=*/false,
diff --git a/flang/test/Lower/OpenACC/acc-reduction.f90 b/flang/test/Lower/OpenACC/acc-reduction.f90
index 339a4e3435c0d..2c79cacada050 100644
--- a/flang/test/Lower/OpenACC/acc-reduction.f90
+++ b/flang/test/Lower/OpenACC/acc-reduction.f90
@@ -1,6 +1,7 @@
! This test checks lowering of OpenACC reduction clause.
! RUN: bbc -fopenacc -emit-hlfir %s -o - | FileCheck %s
+! RUN: bbc -fopenacc -emit-hlfir %s -o - -openacc-use-reduction-combine | FileCheck -check-prefix=ACC_COMBINE %s
! CHECK-LABEL: acc.reduction.recipe @reduction_lor_ref_box_heap_l32 : !fir.ref<!fir.box<!fir.heap<!fir.logical<4>>>> reduction_operator <lor> init {
! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.logical<4>>>>):
@@ -97,7 +98,6 @@
! CHECK: %[[SUBI_1:.*]] = arith.subi %[[BOX_DIMS_5]]#0, %[[CONSTANT_7]] : index
! CHECK: %[[ADDI_1:.*]] = arith.addi %[[VAL_2]], %[[SUBI_1]] : index
! CHECK: %[[DESIGNATE_0:.*]] = hlfir.designate %[[VAL_1]] (%[[ADDI_0]], %[[ADDI_1]]) : (!fir.box<!fir.array<?x?xf32>>, index, index) -> !fir.ref<f32>
-! CHECK: %[[LOAD_0:.*]] = fir.load %[[DESIGNATE_0]] : !fir.ref<f32>
! CHECK: %[[CONSTANT_8:.*]] = arith.constant 0 : index
! CHECK: %[[BOX_DIMS_6:.*]]:3 = fir.box_dims %[[VAL_0]], %[[CONSTANT_8]] : (!fir.box<!fir.array<?x?xf32>>, index) -> (index, index, index)
! CHECK: %[[CONSTANT_9:.*]] = arith.constant 1 : index
@@ -108,6 +108,7 @@
! CHECK: %[[SUBI_3:.*]] = arith.subi %[[BOX_DIMS_7]]#0, %[[CONSTANT_10]] : index
! CHECK: %[[ADDI_3:.*]] = arith.addi %[[VAL_2]], %[[SUBI_3]] : index
! CHECK: %[[DESIGNATE_1:.*]] = hlfir.designate %[[VAL_0]] (%[[ADDI_2]], %[[ADDI_3]]) : (!fir.box<!fir.array<?x?xf32>>, index, index) -> !fir.ref<f32>
+! CHECK: %[[LOAD_0:.*]] = fir.load %[[DESIGNATE_0]] : !fir.ref<f32>
! CHECK: %[[LOAD_1:.*]] = fir.load %[[DESIGNATE_1]] : !fir.ref<f32>
! CHECK: %[[CMPF_0:.*]] = arith.cmpf ogt, %[[LOAD_1]], %[[LOAD_0]] fastmath<contract> : f32
! CHECK: %[[SELECT_0:.*]] = arith.select %[[CMPF_0]], %[[LOAD_1]], %[[LOAD_0]] : f32
@@ -170,13 +171,13 @@
! CHECK: %[[SUBI_0:.*]] = arith.subi %[[BOX_DIMS_2]]#0, %[[CONSTANT_4]] : index
! CHECK: %[[ADDI_0:.*]] = arith.addi %[[VAL_2]], %[[SUBI_0]] : index
! CHECK: %[[DESIGNATE_0:.*]] = hlfir.designate %[[LOAD_0]] (%[[ADDI_0]]) : (!fir.box<!fir.ptr<!fir.array<?xf32>>>, index) -> !fir.ref<f32>
-! CHECK: %[[LOAD_2:.*]] = fir.load %[[DESIGNATE_0]] : !fir.ref<f32>
! CHECK: %[[CONSTANT_5:.*]] = arith.constant 0 : index
! CHECK: %[[BOX_DIMS_3:.*]]:3 = fir.box_dims %[[LOAD_1]], %[[CONSTANT_5]] : (!fir.box<!fir.ptr<!fir.array<?xf32>>>, index) -> (index, index, index)
! CHECK: %[[CONSTANT_6:.*]] = arith.constant 1 : index
! CHECK: %[[SUBI_1:.*]] = arith.subi %[[BOX_DIMS_3]]#0, %[[CONSTANT_6]] : index
! CHECK: %[[ADDI_1:.*]] = arith.addi %[[VAL_2]], %[[SUBI_1]] : index
! CHECK: %[[DESIGNATE_1:.*]] = hlfir.designate %[[LOAD_1]] (%[[ADDI_1]]) : (!fir.box<!fir.ptr<!fir.array<?xf32>>>, index) -> !fir.ref<f32>
+! CHECK: %[[LOAD_2:.*]] = fir.load %[[DESIGNATE_0]] : !fir.ref<f32>
! CHECK: %[[LOAD_3:.*]] = fir.load %[[DESIGNATE_1]] : !fir.ref<f32>
! CHECK: %[[CMPF_0:.*]] = arith.cmpf ogt, %[[LOAD_3]], %[[LOAD_2]] fastmath<contract> : f32
! CHECK: %[[SELECT_0:.*]] = arith.select %[[CMPF_0]], %[[LOAD_3]], %[[LOAD_2]] : f32
@@ -239,13 +240,13 @@
! CHECK: %[[SUBI_0:.*]] = arith.subi %[[BOX_DIMS_2]]#0, %[[CONSTANT_4]] : index
! CHECK: %[[ADDI_0:.*]] = arith.addi %[[VAL_2]], %[[SUBI_0]] : index
! CHECK: %[[DESIGNATE_0:.*]] = hlfir.designate %[[LOAD_0]] (%[[ADDI_0]]) : (!fir.box<!fir.heap<!fir.array<?xf32>>>, index) -> !fir.ref<f32>
-! CHECK: %[[LOAD_2:.*]] = fir.load %[[DESIGNATE_0]] : !fir.ref<f32>
! CHECK: %[[CONSTANT_5:.*]] = arith.constant 0 : index
! CHECK: %[[BOX_DIMS_3:.*]]:3 = fir.box_dims %[[LOAD_1]], %[[CONSTANT_5]] : (!fir.box<!fir.heap<!fir.array<?xf32>>>, index) -> (index, index, index)
! CHECK: %[[CONSTANT_6:.*]] = arith.constant 1 : index
! CHECK: %[[SUBI_1:.*]] = arith.subi %[[BOX_DIMS_3]]#0, %[[CONSTANT_6]] : index
! CHECK: %[[ADDI_1:.*]] = arith.addi %[[VAL_2]], %[[SUBI_1]] : index
! CHECK: %[[DESIGNATE_1:.*]] = hlfir.designate %[[LOAD_1]] (%[[ADDI_1]]) : (!fir.box<!fir.heap<!fir.array<?xf32>>>, index) -> !fir.ref<f32>
+! CHECK: %[[LOAD_2:.*]] = fir.load %[[DESIGNATE_0]] : !fir.ref<f32>
! CHECK: %[[LOAD_3:.*]] = fir.load %[[DESIGNATE_1]] : !fir.ref<f32>
! CHECK: %[[CMPF_0:.*]] = arith.cmpf ogt, %[[LOAD_3]], %[[LOAD_2]] fastmath<contract> : f32
! CHECK: %[[SELECT_0:.*]] = arith.select %[[CMPF_0]], %[[LOAD_3]], %[[LOAD_2]] : f32
@@ -321,8 +322,8 @@
! CHECK: %[[CONSTANT_9:.*]] = arith.constant 1 : index
! CHECK: fir.do_loop %[[VAL_2:.*]] = %[[CONSTANT_9]] to %[[CONSTANT_5]] step %[[CONSTANT_9]] unordered {
! CHECK: %[[DESIGNATE_2:.*]] = hlfir.designate %[[DESIGNATE_0]] (%[[VAL_2]]) : (!fir.box<!fir.array<3xi32>>, index) -> !fir.ref<i32>
-! CHECK: %[[LOAD_0:.*]] = fir.load %[[DESIGNATE_2]] : !fir.ref<i32>
! CHECK: %[[DESIGNATE_3:.*]] = hlfir.designate %[[DESIGNATE_1]] (%[[VAL_2]]) : (!fir.box<!fir.array<3xi32>>, index) -> !fir.ref<i32>
+! CHECK: %[[LOAD_0:.*]] = fir.load %[[DESIGNATE_2]] : !fir.ref<i32>
! CHECK: %[[LOAD_1:.*]] = fir.load %[[DESIGNATE_3]] : !fir.ref<i32>
! CHECK: %[[ADDI_4:.*]] = arith.addi %[[LOAD_1]], %[[LOAD_0]] : i32
! CHECK: hlfir.assign %[[ADDI_4]] to %[[DESIGNATE_3]] : i32, !fir.ref<i32>
@@ -371,13 +372,13 @@
! CHECK: %[[SUBI_0:.*]] = arith.subi %[[BOX_DIMS_2]]#0, %[[CONSTANT_4]] : index
! CHECK: %[[ADDI_0:.*]] = arith.addi %[[VAL_2]], %[[SUBI_0]] : index
! CHECK: %[[DESIGNATE_0:.*]] = hlfir.designate %[[VAL_1]] (%[[ADDI_0]]) : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
-! CHECK: %[[LOAD_0:.*]] = fir.load %[[DESIGNATE_0]] : !fir.ref<f32>
! CHECK: %[[CONSTANT_5:.*]] = arith.constant 0 : index
! CHECK: %[[BOX_DIMS_3:.*]]:3 = fir.box_dims %[[VAL_0]], %[[CONSTANT_5]] : (!fir.box<!fir.array<?xf32>>, index) -> (index, index, index)
! CHECK: %[[CONSTANT_6:.*]] = arith.constant 1 : index
! CHECK: %[[SUBI_1:.*]] = arith.subi %[[BOX_DIMS_3]]#0, %[[CONSTANT_6]] : index
! CHECK: %[[ADDI_1:.*]] = arith.addi %[[VAL_2]], %[[SUBI_1]] : index
! CHECK: %[[DESIGNATE_1:.*]] = hlfir.designate %[[VAL_0]] (%[[ADDI_1]]) : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
+! CHECK: %[[LOAD_0:.*]] = fir.load %[[DESIGNATE_0]] : !fir.ref<f32>
! CHECK: %[[LOAD_1:.*]] = fir.load %[[DESIGNATE_1]] : !fir.ref<f32>
! CHECK: %[[CMPF_0:.*]] = arith.cmpf ogt, %[[LOAD_1]], %[[LOAD_0]] fastmath<contract> : f32
! CHECK: %[[SELECT_0:.*]] = arith.select %[[CMPF_0]], %[[LOAD_1]], %[[LOAD_0]] : f32
@@ -434,13 +435,13 @@
! CHECK: %[[SUBI_0:.*]] = arith.subi %[[BOX_DIMS_2]]#0, %[[CONSTANT_4]] : index
! CHECK: %[[ADDI_0:.*]] = arith.addi %[[VAL_2]], %[[SUBI_0]] : index
! CHECK: %[[DESIGNATE_0:.*]] = hlfir.designate %[[VAL_1]] (%[[ADDI_0]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-! CHECK: %[[LOAD_0:.*]] = fir.load %[[DESIGNATE_0]] : !fir.ref<i32>
! CHECK: %[[CONSTANT_5:.*]] = arith.constant 0 : index
! CHECK: %[[BOX_DIMS_3:.*]]:3 = fir.box_dims %[[VAL_0]], %[[CONSTANT_5]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
! CHECK: %[[CONSTANT_6:.*]] = arith.constant 1 : index
! CHECK: %[[SUBI_1:.*]] = arith.subi %[[BOX_DIMS_3]]#0, %[[CONSTANT_6]] : index
! CHECK: %[[ADDI_1:.*]] = arith.addi %[[VAL_2]], %[[SUBI_1]] : index
! CHECK: %[[DESIGNATE_1:.*]] = hlfir.designate %[[VAL_0]] (%[[ADDI_1]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+! CHECK: %[[LOAD_0:.*]] = fir.load %[[DESIGNATE_0]] : !fir.ref<i32>
! CHECK: %[[LOAD_1:.*]] = fir.load %[[DESIGNATE_1]] : !fir.ref<i32>
! CHECK: %[[ADDI_2:.*]] = arith.addi %[[LOAD_1]], %[[LOAD_0]] : i32
! CHECK: hlfir.assign %[[ADDI_2]] to %[[DESIGNATE_1]] : i32, !fir.ref<i32>
@@ -524,8 +525,8 @@
! CHECK: fir.do_loop %[[VAL_2:.*]] = %[[CONSTANT_17]] to %[[CONSTANT_9]] step %[[CONSTANT_17]] unordered {
! CHECK: fir.do_loop %[[VAL_3:.*]] = %[[CONSTANT_17]] to %[[CONSTANT_6]] step %[[CONSTANT_17]] unordered {
! CHECK: %[[DESIGNATE_2:.*]] = hlfir.designate %[[DESIGNATE_0]] (%[[VAL_3]], %[[VAL_2]]) : (!fir.ref<!fir.array<10x20xi32>>, index, index) -> !fir.ref<i32>
-! CHECK: %[[LOAD_0:.*]] = fir.load %[[DESIGNATE_2]] : !fir.ref<i32>
! CHECK: %[[DESIGNATE_3:.*]] = hlfir.designate %[[DESIGNATE_1]] (%[[VAL_3]], %[[VAL_2]]) : (!fir.ref<!fir.array<10x20xi32>>, index, index) -> !fir.ref<i32>
+! CHECK: %[[LOAD_0:.*]] = fir.load %[[DESIGNATE_2]] : !fir.ref<i32>
! CHECK: %[[LOAD_1:.*]] = fir.load %[[DESIGNATE_3]] : !fir.ref<i32>
! CHECK: %[[ADDI_0:.*]] = arith.addi %[[LOAD_1]], %[[LOAD_0]] : i32
! CHECK: hlfir.assign %[[ADDI_0]] to %[[DESIGNATE_3]] : i32, !fir.ref<i32>
@@ -584,8 +585,8 @@
! CHECK: %[[CONSTANT_10:.*]] = arith.constant 1 : index
! CHECK: fir.do_loop %[[VAL_2:.*]] = %[[CONSTANT_10]] to %[[CONSTANT_5]] step %[[CONSTANT_10]] unordered {
! CHECK: %[[DESIGNATE_2:.*]] = hlfir.designate %[[DESIGNATE_0]] (%[[VAL_2]]) : (!fir.ref<!fir.array<10xi32>>, index) -> !fir.ref<i32>
-! CHECK: %[[LOAD_0:.*]] = fir.load %[[DESIGNATE_2]] : !fir.ref<i32>
! CHECK: %[[DESIGNATE_3:.*]] = hlfir.designate %[[DESIGNATE_1]] (%[[VAL_2]]) : (!fir.ref<!fir.array<10xi32>>, index) -> !fir.ref<i32>
+! CHECK: %[[LOAD_0:.*]] = fir.load %[[DESIGNATE_2]] : !fir.ref<i32>
! CHECK: %[[LOAD_1:.*]] = fir.load %[[DESIGNATE_3]] : !fir.ref<i32>
! CHECK: %[[ADDI_0:.*]] = arith.addi %[[LOAD_1]], %[[LOAD_0]] : i32
! CHECK: hlfir.assign %[[ADDI_0]] to %[[DESIGNATE_3]] : i32, !fir.ref<i32>
@@ -847,8 +848,8 @@
! CHECK: %[[CONSTANT_2:.*]] = arith.constant 1 : index
! CHECK: fir.do_loop %[[VAL_2:.*]] = %[[CONSTANT_2]] to %[[CONSTANT_0]] step %[[CONSTANT_2]] unordered {
! CHECK: %[[DESIGNATE_0:.*]] = hlfir.designate %[[VAL_1]] (%[[VAL_2]]) : (!fir.ref<!fir.array<100xf32>>, index) -> !fir.ref<f32>
-! CHECK: %[[LOAD_0:.*]] = fir.load %[[DESIGNATE_0]] : !fir.ref<f32>
! CHECK: %[[DESIGNATE_1:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_2]]) : (!fir.ref<!fir.array<100xf32>>, index) -> !fir.ref<f32>
+! CHECK: %[[LOAD_0:.*]] = fir.load %[[DESIGNATE_0]] : !fir.ref<f32>
! CHECK: %[[LOAD_1:.*]] = fir.load %[[DESIGNATE_1]] : !fir.ref<f32>
! CHECK: %[[CMPF_0:.*]] = arith.cmpf ogt, %[[LOAD_1]], %[[LOAD_0]] fastmath<contract> : f32
! CHECK: %[[SELECT_0:.*]] = arith.select %[[CMPF_0]], %[[LOAD_1]], %[[LOAD_0]] : f32
@@ -906,8 +907,8 @@
! CHECK: fir.do_loop %[[VAL_2:.*]] = %[[CONSTANT_4]] to %[[CONSTANT_1]] step %[[CONSTANT_4]] unordered {
! CHECK: fir.do_loop %[[VAL_3:.*]] = %[[CONSTANT_4]] to %[[CONSTANT_0]] step %[[CONSTANT_4]] unordered {
! CHECK: %[[DESIGNATE_0:.*]] = hlfir.designate %[[VAL_1]] (%[[VAL_3]], %[[VAL_2]]) : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
-! CHECK: %[[LOAD_0:.*]] = fir.load %[[DESIGNATE_0]] : !fir.ref<i32>
! CHECK: %[[DESIGNATE_1:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_3]], %[[VAL_2]]) : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
+! CHECK: %[[LOAD_0:.*]] = fir.load %[[DESIGNATE_0]] : !fir.ref<i32>
! CHECK: %[[LOAD_1:.*]] = fir.load %[[DESIGNATE_1]] : !fir.ref<i32>
! CHECK: %[[CMPI_0:.*]] = arith.cmpi sgt, %[[LOAD_1]], %[[LOAD_0]] : i32
! CHECK: %[[SELECT_0:.*]] = arith.select %[[CMPI_0]], %[[LOAD_1]], %[[LOAD_0]] : i32
@@ -966,8 +967,8 @@
! CHECK: fir.do_loop %[[VAL_2:.*]] = %[[CONSTANT_4]] to %[[CONSTANT_1]] step %[[CONSTANT_4]] unordered {
! CHECK: fir.do_loop %[[VAL_3:.*]] = %[[CONSTANT_4]] to %[[CONSTANT_0]] step %[[CONSTANT_4]] unordered {
! CHECK: %[[DESIGNATE_0:.*]] = hlfir.designate %[[VAL_1]] (%[[VAL_3]], %[[VAL_2]]) : (!fir.ref<!fir.array<100x10xf32>>, index, index) -> !fir.ref<f32>
-! CHECK: %[[LOAD_0:.*]] = fir.load %[[DESIGNATE_0]] : !fir.ref<f32>
! CHECK: %[[DESIGNATE_1:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_3]], %[[VAL_2]]) : (!fir.ref<!fir.array<100x10xf32>>, index, index) -> !fir.ref<f32>
+! CHECK: %[[LOAD_0:.*]] = fir.load %[[DESIGNATE_0]] : !fir.ref<f32>
! CHECK: %[[LOAD_1:.*]] = fir.load %[[DESIGNATE_1]] : !fir.ref<f32>
! CHECK: %[[CMPF_0:.*]] = arith.cmpf olt, %[[LOAD_1]], %[[LOAD_0]] fastmath<contract> : f32
! CHECK: %[[SELECT_0:.*]] = arith.select %[[CMPF_0]], %[[LOAD_1]], %[[LOAD_0]] : f32
@@ -1019,8 +1020,8 @@
! CHECK: %[[CONSTANT_2:.*]] = arith.constant 1 : index
! CHECK: fir.do_loop %[[VAL_2:.*]] = %[[CONSTANT_2]] to %[[CONSTANT_0]] step %[[CONSTANT_2]] unordered {
! CHECK: %[[DESIGNATE_0:.*]] = hlfir.designate %[[VAL_1]] (%[[VAL_2]]) : (!fir.ref<!fir.array<100xi32>>, index) -> !fir.ref<i32>
-! CHECK: %[[LOAD_0:.*]] = fir.load %[[DESIGNATE_0]] : !fir.ref<i32>
! CHECK: %[[DESIGNATE_1:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_2]]) : (!fir.ref<!fir.array<100xi32>>, index) -> !fir.ref<i32>
+! CHECK: %[[LOAD_0:.*]] = fir.load %[[DESIGNATE_0]] : !fir.ref<i32>
! CHECK: %[[LOAD_1:.*]] = fir.load %[[DESIGNATE_1]] : !fir.ref<i32>
! CHECK: %[[CMPI_0:.*]] = arith.cmpi slt, %[[LOAD_1]], %[[LOAD_0]] : i32
! CHECK: %[[SELECT_0:.*]] = arith.select %[[CMPI_0]], %[[LOAD_1]], %[[LOAD_0]] : i32
@@ -1071,8 +1072,8 @@
! CHECK: %[[CONSTANT_2:.*]] = arith.constant 1 : index
! CHECK: fir.do_loop %[[VAL_2:.*]] = %[[CONSTANT_2]] to %[[CONSTANT_0]] step %[[CONSTANT_2]] unordered {
! CHECK: %[[DESIGNATE_0:.*]] = hlfir.designate %[[VAL_1]] (%[[VAL_2]]) : (!fir.ref<!fir.array<100xf32>>, index) -> !fir.ref<f32>
-! CHECK: %[[LOAD_0:.*]] = fir.load %[[DESIGNATE_0]] : !fir.ref<f32>
! CHECK: %[[DESIGNATE_1:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_2]]) : (!fir.ref<!fir.array<100xf32>>, index) -> !fir.ref<f32>
+! CHECK: %[[LOAD_0:.*]] = fir.load %[[DESIGNATE_0]] : !fir.ref<f32>
! CHECK: %[[LOAD_1:.*]] = fir.load %[[DESIGNATE_1]] : !fir.ref<f32>
! CHECK: %[[MULF_0:.*]] = arith.mulf %[[LOAD_1]], %[[LOAD_0]] fastmath<contract> : f32
! CHECK: hlfir.assign %[[MULF_0]] to %[[DESIGNATE_1]] : f32, !fir.ref<f32>
@@ -1121,8 +1122,8 @@
! CHECK: %[[CONSTANT_2:.*]] = arith.constant 1 : index
! CHECK: fir.do_loop %[[VAL_2:.*]] = %[[CONSTANT_2]] to %[[CONSTANT_0]] step %[[CONSTANT_2]] unordered {
! CHECK: %[[DESIGNATE_0:.*]] = hlfir.designate %[[VAL_1]] (%[[VAL_2]]) : (!fir.ref<!fir.array<100xi32>>, index) -> !fir.ref<i32>
-! CHECK: %[[LOAD_0:.*]] = fir.load %[[DESIGNATE_0]] : !fir.ref<i32>
! CHECK: %[[DESIGNATE_1:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_2]]) : (!fir.ref<!fir.array<100xi32>>, index) -> !fir.ref<i32>
+! CHECK: %[[LOAD_0:.*]] = fir.load %[[DESIGNATE_0]] : !fir.ref<i32>
! CHECK: %[[LOAD_1:.*]] = fir.load %[[DESIGNATE_1]] : !fir.ref<i32>
! CHECK: %[[MULI_0:.*]] = arith.muli %[[LOAD_1]], %[[LOAD_0]] : i32
! CHECK: hlfir.assign %[[MULI_0]] to %[[DESIGNATE_1]] : i32, !fir.ref<i32>
@@ -1171,8 +1172,8 @@
! CHECK: %[[CONSTANT_2:.*]] = arith.constant 1 : index
! CHECK: fir.do_loop %[[VAL_2:.*]] = %[[CONSTANT_2]] to %[[CONSTANT_0]] step %[[CONSTANT_2]] unordered {
! CHECK: %[[DESIGNATE_0:.*]] = hlfir.designate %[[VAL_1]] (%[[VAL_2]]) : (!fir.ref<!fir.array<100xf32>>, index) -> !fir.ref<f32>
-! CHECK: %[[LOAD_0:.*]] = fir.load %[[DESIGNATE_0]] : !fir.ref<f32>
! CHECK: %[[DESIGNATE_1:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_2]]) : (!fir.ref<!fir.array<100xf32>>, index) -> !fir.ref<f32>
+! CHECK: %[[LOAD_0:.*]] = fir.load %[[DESIGNATE_0]] : !fir.ref<f32>
! CHECK: %[[LOAD_1:.*]] = fir.load %[[DESIGNATE_1]] : !fir.ref<f32>
! CHECK: %[[ADDF_0:.*]] = arith.addf %[[LOAD_1]], %[[LOAD_0]] fastmath<contract> : f32
! CHECK: hlfir.assign %[[ADDF_0]] to %[[DESIGNATE_1]] : f32, !fir.ref<f32>
@@ -1235,8 +1236,8 @@
! CHECK: fir.do_loop %[[VAL_3:.*]] = %[[CONSTANT_6]] to %[[CONSTANT_1]] step %[[CONSTANT_6]] unordered {
! CHECK: fir.do_loop %[[VAL_4:.*]] = %[[CONSTANT_6]] to %[[CONSTANT_0]] step %[[CONSTANT_6]] unordered {
! CHECK: %[[DESIGNATE_0:.*]] = hlfir.designate %[[VAL_1]] (%[[VAL_4]], %[[VAL_3]], %[[VAL_2]]) : (!fir.ref<!fir.array<100x10x2xi32>>, index, index, index) -> !fir.ref<i32>
-! CHECK: %[[LOAD_0:.*]] = fir.load %[[DESIGNATE_0]] : !fir.ref<i32>
! CHECK: %[[DESIGNATE_1:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_4]], %[[VAL_3]], %[[VAL_2]]) : (!fir.ref<!fir.array<100x10x2xi32>>, index, index, index) -> !fir.ref<i32>
+! CHECK: %[[LOAD_0:.*]] = fir.load %[[DESIGNATE_0]] : !fir.ref<i32>
! CHECK: %[[LOAD_1:.*]] = fir.load %[[DESIGNATE_1]] : !fir.ref<i32>
! CHECK: %[[ADDI_0:.*]] = arith.addi %[[LOAD_1]], %[[LOAD_0]] : i32
! CHECK: hlfir.assign %[[ADDI_0]] to %[[DESIGNATE_1]] : i32, !fir.ref<i32>
@@ -1278,8 +1279,8 @@
! CHECK: fir.do_loop %[[VAL_2:.*]] = %[[CONSTANT_4]] to %[[CONSTANT_1]] step %[[CONSTANT_4]] unordered {
! CHECK: fir.do_loop %[[VAL_3:.*]] = %[[CONSTANT_4]] to %[[CONSTANT_0]] step %[[CONSTANT_4]] unordered {
! CHECK: %[[DESIGNATE_0:.*]] = hlfir.designate %[[VAL_1]] (%[[VAL_3]], %[[VAL_2]]) : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
-! CHECK: %[[LOAD_0:.*]] = fir.load %[[DESIGNATE_0]] : !fir.ref<i32>
! CHECK: %[[DESIGNATE_1:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_3]], %[[VAL_2]]) : (!fir.ref<!fir.array<100x10xi32>>, index, index) -> !fir.ref<i32>
+! CHECK: %[[LOAD_0:.*]] = fir.load %[[DESIGNATE_0]] : !fir.ref<i32>
! CHECK: %[[LOAD_1:.*]] = fir.load %[[DESIGNATE_1]] : !fir.ref<i32>
! CHECK: %[[ADDI_0:.*]] = arith.addi %[[LOAD_1]], %[[LOAD_0]] : i32
! CHECK: hlfir.assign %[[ADDI_0]] to %[[DESIGNATE_1]] : i32, !fir.ref<i32>
@@ -1313,8 +1314,8 @@
! CHECK: %[[CONSTANT_2:.*]] = arith.constant 1 : index
! CHECK: fir.do_loop %[[VAL_2:.*]] = %[[CONSTANT_2]] to %[[CONSTANT_0]] step %[[CONSTANT_2]] unordered {
! CHECK: %[[DESIGNATE_0:.*]] = hlfir.designate %[[VAL_1]] (%[[VAL_2]]) : (!fir.ref<!fir.array<100xi32>>, index) -> !fir.ref<i32>
-! CHECK: %[[LOAD_0:.*]] = fir.load %[[DESIGNATE_0]] : !fir.ref<i32>
! CHECK: %[[DESIGNATE_1:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_2]]) : (!fir.ref<!fir.array<100xi32>>, index) -> !fir.ref<i32>
+! CHECK: %[[LOAD_0:.*]] = fir.load %[[DESIGNATE_0]] : !fir.ref<i32>
! CHECK: %[[LOAD_1:.*]] = fir.load %[[DESIGNATE_1]] : !fir.ref<i32>
! CHECK: %[[ADDI_0:.*]] = arith.addi %[[LOAD_1]], %[[LOAD_0]] : i32
! CHECK: hlfir.assign %[[ADDI_0]] to %[[DESIGNATE_1]] : i32, !fir.ref<i32>
@@ -1884,3 +1885,111 @@ subroutine acc_reduction_logical_allocatable(l)
! CHECK-LABEL: func.func @_QPacc_reduction_logical_allocatable(
! CHECK: %[[REDUCTION_0:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.logical<4>>>>) recipe(@reduction_lor_ref_box_heap_l32) -> !fir.ref<!fir.box<!fir.heap<!fir.logical<4>>>> {name = "l"}
! CHECK: acc.parallel reduction(%[[REDUCTION_0]] : !fir.ref<!fir.box<!fir.heap<!fir.logical<4>>>>)
+
+! ACC_COMBINE-LABEL: acc.reduction.recipe @reduction_lor_ref_box_heap_l32 : !fir.ref<!fir.box<!fir.heap<!fir.logical<4>>>> reduction_operator <lor> init {
+! ACC_COMBINE-NOT: acc.reduction_combine
+! ACC_COMBINE-LABEL: acc.reduction.recipe @reduction_max_box_UxUxf32 : !fir.box<!fir.array<?x?xf32>> reduction_operator <max> init {
+! ACC_COMBINE-NOT: acc.reduction_combine
+! ACC_COMBINE-LABEL: acc.reduction.recipe @reduction_max_ref_box_ptr_Uxf32 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>> reduction_operator <max> init {
+! ACC_COMBINE-NOT: acc.reduction_combine
+! ACC_COMBINE-LABEL: acc.reduction.recipe @reduction_max_ref_box_heap_Uxf32 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> reduction_operator <max> init {
+! ACC_COMBINE-NOT: acc.reduction_combine
+
+! ACC_COMBINE-LABEL: acc.reduction.recipe @reduction_add_section_lb1.ub3_box_Uxi32 : !fir.box<!fir.array<?xi32>> reduction_operator <add> init {
+! ACC_COMBINE-LABEL: } combiner {
+! ACC_COMBINE: ^bb0(%[[VAL_0:.*]]: !fir.box<!fir.array<?xi32>>, %[[VAL_1:.*]]: !fir.box<!fir.array<?xi32>>):
+! ACC_COMBINE: %[[CONSTANT_0:.*]] = arith.constant 1 : index
+! ACC_COMBINE: %[[CONSTANT_1:.*]] = arith.constant 1 : index
+! ACC_COMBINE: %[[CONSTANT_2:.*]] = arith.constant 3 : index
+! ACC_COMBINE: %[[CONSTANT_3:.*]] = arith.constant 0 : index
+! ACC_COMBINE: %[[CONSTANT_4:.*]] = arith.constant 2 : index
+! ACC_COMBINE: %[[CONSTANT_5:.*]] = arith.constant 3 : index
+! ACC_COMBINE: %[[CONSTANT_6:.*]] = arith.constant true
+! ACC_COMBINE: %[[SHAPE_0:.*]] = fir.shape %[[CONSTANT_5]] : (index) -> !fir.shape<1>
+! ACC_COMBINE: %[[CONSTANT_7:.*]] = arith.constant 0 : index
+! ACC_COMBINE: %[[BOX_DIMS_0:.*]]:3 = fir.box_dims %[[VAL_0]], %[[CONSTANT_7]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+! ACC_COMBINE: %[[ADDI_0:.*]] = arith.addi %[[BOX_DIMS_0]]#0, %[[CONSTANT_1]] : index
+! ACC_COMBINE: %[[ADDI_1:.*]] = arith.addi %[[BOX_DIMS_0]]#0, %[[CONSTANT_2]] : index
+! ACC_COMBINE: %[[CONSTANT_8:.*]] = arith.constant 0 : index
+! ACC_COMBINE: %[[BOX_DIMS_1:.*]]:3 = fir.box_dims %[[VAL_1]], %[[CONSTANT_8]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+! ACC_COMBINE: %[[ADDI_2:.*]] = arith.addi %[[BOX_DIMS_1]]#0, %[[CONSTANT_1]] : index
+! ACC_COMBINE: %[[ADDI_3:.*]] = arith.addi %[[BOX_DIMS_1]]#0, %[[CONSTANT_2]] : index
+! ACC_COMBINE: %[[DESIGNATE_0:.*]] = hlfir.designate %[[VAL_1]] (%[[ADDI_2]]:%[[ADDI_3]]:%[[CONSTANT_0]]) shape %[[SHAPE_0]] : (!fir.box<!fir.array<?xi32>>, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<3xi32>>
+! ACC_COMBINE: %[[DESIGNATE_1:.*]] = hlfir.designate %[[VAL_0]] (%[[ADDI_0]]:%[[ADDI_1]]:%[[CONSTANT_0]]) shape %[[SHAPE_0]] : (!fir.box<!fir.array<?xi32>>, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<3xi32>>
+! ACC_COMBINE: %[[CONSTANT_9:.*]] = arith.constant 1 : index
+! ACC_COMBINE: fir.do_loop %[[VAL_2:.*]] = %[[CONSTANT_9]] to %[[CONSTANT_5]] step %[[CONSTANT_9]] unordered {
+! ACC_COMBINE: %[[DESIGNATE_2:.*]] = hlfir.designate %[[DESIGNATE_0]] (%[[VAL_2]]) : (!fir.box<!fir.array<3xi32>>, index) -> !fir.ref<i32>
+! ACC_COMBINE: %[[DESIGNATE_3:.*]] = hlfir.designate %[[DESIGNATE_1]] (%[[VAL_2]]) : (!fir.box<!fir.array<3xi32>>, index) -> !fir.ref<i32>
+! ACC_COMBINE: acc.reduction_combine %[[DESIGNATE_2]] into %[[DESIGNATE_3]] <add> : !fir.ref<i32>
+! ACC_COMBINE: }
+! ACC_COMBINE: acc.yield %[[VAL_0]] : !fir.box<!fir.array<?xi32>>
+! ACC_COMBINE: }
+
+
+! ACC_COMBINE-LABEL: acc.reduction.recipe @reduction_max_box_Uxf32 : !fir.box<!fir.array<?xf32>> reduction_operator <max> init {
+! ACC_COMBINE-NOT: acc.reduction_combine
+! ACC_COMBINE-LABEL: acc.reduction.recipe @reduction_add_box_Uxi32 : !fir.box<!fir.array<?xi32>> reduction_operator <add> init {
+! ACC_COMBINE: acc.reduction_combine %{{.*}} into %{{.*}} <add> : !fir.ref<i32>
+! ACC_COMBINE-LABEL: acc.reduction.recipe @reduction_add_section_lb0.ub9xlb0.ub19_ref_10x20xi32 : !fir.ref<!fir.array<10x20xi32>> reduction_operator <add> init {
+! ACC_COMBINE: acc.reduction_combine %{{.*}} into %{{.*}} <add> : !fir.ref<i32>
+! ACC_COMBINE-LABEL: acc.reduction.recipe @reduction_add_section_lb10.ub19_ref_100xi32 : !fir.ref<!fir.array<100xi32>> reduction_operator <add> init {
+! ACC_COMBINE: acc.reduction_combine %{{.*}} into %{{.*}} <add> : !fir.ref<i32>
+! ACC_COMBINE-LABEL: acc.reduction.recipe @reduction_add_ref_box_ptr_i32 : !fir.ref<!fir.box<!fir.ptr<i32>>> reduction_operator <add> init {
+! ACC_COMBINE: acc.reduction_combine %{{.*}} into %{{.*}} <add> : !fir.ptr<i32>
+! ACC_COMBINE-LABEL: acc.reduction.recipe @reduction_add_ref_box_heap_i32 : !fir.ref<!fir.box<!fir.heap<i32>>> reduction_operator <add> init {
+! ACC_COMBINE: acc.reduction_combine %{{.*}} into %{{.*}} <add> : !fir.heap<i32>
+! ACC_COMBINE-LABEL: acc.reduction.recipe @reduction_mul_ref_z32 : !fir.ref<complex<f32>> reduction_operator <mul> init {
+! ACC_COMBINE: acc.reduction_combine %{{.*}} into %{{.*}} <mul> : !fir.ref<complex<f32>>
+! ACC_COMBINE-LABEL: acc.reduction.recipe @reduction_add_ref_z32 : !fir.ref<complex<f32>> reduction_operator <add> init {
+! ACC_COMBINE: acc.reduction_combine %{{.*}} into %{{.*}} <add> : !fir.ref<complex<f32>>
+! ACC_COMBINE-LABEL: acc.reduction.recipe @reduction_neqv_ref_l32 : !fir.ref<!fir.logical<4>> reduction_operator <neqv> init {
+! ACC_COMBINE-NOT: acc.reduction_combine
+! ACC_COMBINE-LABEL: acc.reduction.recipe @reduction_eqv_ref_l32 : !fir.ref<!fir.logical<4>> reduction_operator <eqv> init {
+! ACC_COMBINE-NOT: acc.reduction_combine
+! ACC_COMBINE-LABEL: acc.reduction.recipe @reduction_lor_ref_l32 : !fir.ref<!fir.logical<4>> reduction_operator <lor> init {
+! ACC_COMBINE-NOT: acc.reduction_combine
+! ACC_COMBINE-LABEL: acc.reduction.recipe @reduction_land_ref_l32 : !fir.ref<!fir.logical<4>> reduction_operator <land> init {
+! ACC_COMBINE-NOT: acc.reduction_combine
+! ACC_COMBINE-LABEL: acc.reduction.recipe @reduction_xor_ref_i32 : !fir.ref<i32> reduction_operator <xor> init {
+! ACC_COMBINE: acc.reduction_combine %{{.*}} into %{{.*}} <xor> : !fir.ref<i32>
+! ACC_COMBINE-LABEL: acc.reduction.recipe @reduction_ior_ref_i32 : !fir.ref<i32> reduction_operator <ior> init {
+! ACC_COMBINE: acc.reduction_combine %{{.*}} into %{{.*}} <ior> : !fir.ref<i32>
+! ACC_COMBINE-LABEL: acc.reduction.recipe @reduction_iand_ref_i32 : !fir.ref<i32> reduction_operator <iand> init {
+! ACC_COMBINE: acc.reduction_combine %{{.*}} into %{{.*}} <iand> : !fir.ref<i32>
+! ACC_COMBINE-LABEL: acc.reduction.recipe @reduction_max_ref_100xf32 : !fir.ref<!fir.array<100xf32>> reduction_operator <max> init {
+! ACC_COMBINE-NOT: acc.reduction_combine
+! ACC_COMBINE-LABEL: acc.reduction.recipe @reduction_max_ref_f32 : !fir.ref<f32> reduction_operator <max> init {
+! ACC_COMBINE-NOT: acc.reduction_combine
+! ACC_COMBINE-LABEL: acc.reduction.recipe @reduction_max_ref_100x10xi32 : !fir.ref<!fir.array<100x10xi32>> reduction_operator <max> init {
+! ACC_COMBINE-NOT: acc.reduction_combine
+! ACC_COMBINE-LABEL: acc.reduction.recipe @reduction_max_ref_i32 : !fir.ref<i32> reduction_operator <max> init {
+! ACC_COMBINE-NOT: acc.reduction_combine
+! ACC_COMBINE-LABEL: acc.reduction.recipe @reduction_min_ref_100x10xf32 : !fir.ref<!fir.array<100x10xf32>> reduction_operator <min> init {
+! ACC_COMBINE-NOT: acc.reduction_combine
+! ACC_COMBINE-LABEL: acc.reduction.recipe @reduction_min_ref_f32 : !fir.ref<f32> reduction_operator <min> init {
+! ACC_COMBINE-NOT: acc.reduction_combine
+! ACC_COMBINE-LABEL: acc.reduction.recipe @reduction_min_ref_100xi32 : !fir.ref<!fir.array<100xi32>> reduction_operator <min> init {
+! ACC_COMBINE-NOT: acc.reduction_combine
+! ACC_COMBINE-LABEL: acc.reduction.recipe @reduction_min_ref_i32 : !fir.ref<i32> reduction_operator <min> init {
+! ACC_COMBINE-NOT: acc.reduction_combine
+! ACC_COMBINE-LABEL: acc.reduction.recipe @reduction_mul_ref_100xf32 : !fir.ref<!fir.array<100xf32>> reduction_operator <mul> init {
+! ACC_COMBINE-NOT: acc.reduction_combine
+! ACC_COMBINE: acc.reduction_combine %{{.*}} into %{{.*}} <mul> : !fir.ref<f32>
+! ACC_COMBINE-LABEL: acc.reduction.recipe @reduction_mul_ref_f32 : !fir.ref<f32> reduction_operator <mul> init {
+! ACC_COMBINE: acc.reduction_combine %{{.*}} into %{{.*}} <mul> : !fir.ref<f32>
+! ACC_COMBINE-LABEL: acc.reduction.recipe @reduction_mul_ref_100xi32 : !fir.ref<!fir.array<100xi32>> reduction_operator <mul> init {
+! ACC_COMBINE: acc.reduction_combine %{{.*}} into %{{.*}} <mul> : !fir.ref<i32>
+! ACC_COMBINE-LABEL: acc.reduction.recipe @reduction_mul_ref_i32 : !fir.ref<i32> reduction_operator <mul> init {
+! ACC_COMBINE: acc.reduction_combine %{{.*}} into %{{.*}} <mul> : !fir.ref<i32>
+! ACC_COMBINE-LABEL: acc.reduction.recipe @reduction_add_ref_100xf32 : !fir.ref<!fir.array<100xf32>> reduction_operator <add> init {
+! ACC_COMBINE: acc.reduction_combine %{{.*}} into %{{.*}} <add> : !fir.ref<f32>
+! ACC_COMBINE-LABEL: acc.reduction.recipe @reduction_add_ref_f32 : !fir.ref<f32> reduction_operator <add> init {
+! ACC_COMBINE: acc.reduction_combine %{{.*}} into %{{.*}} <add> : !fir.ref<f32>
+! ACC_COMBINE-LABEL: acc.reduction.recipe @reduction_add_ref_100x10x2xi32 : !fir.ref<!fir.array<100x10x2xi32>> reduction_operator <add> init {
+! ACC_COMBINE: acc.reduction_combine %{{.*}} into %{{.*}} <add> : !fir.ref<i32>
+! ACC_COMBINE-LABEL: acc.reduction.recipe @reduction_add_ref_100x10xi32 : !fir.ref<!fir.array<100x10xi32>> reduction_operator <add> init {
+! ACC_COMBINE: acc.reduction_combine %{{.*}} into %{{.*}} <add> : !fir.ref<i32>
+! ACC_COMBINE-LABEL: acc.reduction.recipe @reduction_add_ref_100xi32 : !fir.ref<!fir.array<100xi32>> reduction_operator <add> init {
+! ACC_COMBINE: acc.reduction_combine %{{.*}} into %{{.*}} <add> : !fir.ref<i32>
+! ACC_COMBINE-LABEL: acc.reduction.recipe @reduction_add_ref_i32 : !fir.ref<i32> reduction_operator <add> init {
+! ACC_COMBINE: acc.reduction_combine %{{.*}} into %{{.*}} <add> : !fir.ref<i32>
More information about the flang-commits
mailing list