[flang-commits] [flang] [flang][acc] use new acc.reduction_combine under option (PR #182066)

via flang-commits flang-commits at lists.llvm.org
Wed Feb 18 09:05:04 PST 2026


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-openacc

Author: None (jeanPerier)

<details>
<summary>Changes</summary>

Use the new acc.reduction_combine operation that was added in https://github.com/llvm/llvm-project/pull/181853 when generating reduction recipies.

To do this, the optional callback of genNoAliasArrayAssignment is updated to work on addresses and being delegated the assignment part too.

---

Patch is 41.46 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/182066.diff


4 Files Affected:

- (modified) flang/include/flang/Optimizer/Builder/HLFIRTools.h (+15-10) 
- (modified) flang/lib/Optimizer/Builder/HLFIRTools.cpp (+17-18) 
- (modified) flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp (+51-5) 
- (modified) flang/test/Lower/OpenACC/acc-reduction.f90 (+127-18) 


``````````diff
diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
index 37297dd0ad6d1..e75bddc7e1bef 100644
--- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h
+++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
@@ -456,13 +456,18 @@ mlir::Value inlineElementalOp(
 /// over the optimal extents deduced from both shapes. If \p emitWorkshareLoop
 /// is true, a workshare loop construct may be emitted when available.
 /// Allocatable LHS must be allocated with the right shape and parameters.
+/// An optional scalarCombineAndAssign can be provided to provide logic for more
+/// complex assignment actions like for reductions that may need to happen
+/// atomically. When provided, the callback will be passed scalar addresses for
+/// the LHS and RHS elements and is in charge of generating the combination and
+/// assignment logic.
 void genNoAliasArrayAssignment(
     mlir::Location loc, fir::FirOpBuilder &builder, hlfir::Entity rhs,
     hlfir::Entity lhs, bool emitWorkshareLoop = false,
     bool temporaryLHS = false,
-    std::function<hlfir::Entity(mlir::Location, fir::FirOpBuilder &,
-                                hlfir::Entity, hlfir::Entity)> *combiner =
-        nullptr,
+    std::function<void(mlir::Location, fir::FirOpBuilder &, hlfir::Entity,
+                       hlfir::Entity, mlir::ArrayAttr)>
+        *scalarCombineAndAssign = nullptr,
     mlir::ArrayAttr accessGroups = {});
 
 /// Generate an assignment from \p rhs to \p lhs when they are known not to
@@ -474,19 +479,19 @@ void genNoAliasAssignment(
     mlir::Location loc, fir::FirOpBuilder &builder, hlfir::Entity rhs,
     hlfir::Entity lhs, bool emitWorkshareLoop = false,
     bool temporaryLHS = false,
-    std::function<hlfir::Entity(mlir::Location, fir::FirOpBuilder &,
-                                hlfir::Entity, hlfir::Entity)> *combiner =
-        nullptr,
+    std::function<void(mlir::Location, fir::FirOpBuilder &, hlfir::Entity,
+                       hlfir::Entity, mlir::ArrayAttr accessGroups)>
+        *scalarCombineAndAssign = nullptr,
     mlir::ArrayAttr accessGroups = {});
 inline void genNoAliasAssignment(
     mlir::Location loc, fir::FirOpBuilder &builder, hlfir::Entity rhs,
     hlfir::Entity lhs, bool emitWorkshareLoop, bool temporaryLHS,
-    std::function<hlfir::Entity(mlir::Location, fir::FirOpBuilder &,
-                                hlfir::Entity, hlfir::Entity)>
-        combiner,
+    std::function<void(mlir::Location, fir::FirOpBuilder &, hlfir::Entity,
+                       hlfir::Entity, mlir::ArrayAttr)>
+        scalarCombineAndAssign,
     mlir::ArrayAttr accessGroups = {}) {
   genNoAliasAssignment(loc, builder, rhs, lhs, emitWorkshareLoop, temporaryLHS,
-                       &combiner, accessGroups);
+                       &scalarCombineAndAssign, accessGroups);
 }
 
 /// Create a new temporary with the shape and parameters of the provided
diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
index 3355bf1475e30..e7a286b73bb4b 100644
--- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp
+++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
@@ -1395,20 +1395,17 @@ bool hlfir::elementalOpMustProduceTemp(hlfir::ElementalOp elemental) {
 static void combineAndStoreElement(
     mlir::Location loc, fir::FirOpBuilder &builder, hlfir::Entity lhs,
     hlfir::Entity rhs, bool temporaryLHS,
-    std::function<hlfir::Entity(mlir::Location, fir::FirOpBuilder &,
-                                hlfir::Entity, hlfir::Entity)> *combiner,
+    std::function<void(mlir::Location, fir::FirOpBuilder &, hlfir::Entity,
+                       hlfir::Entity, mlir::ArrayAttr)> *scalarCombineAndAssign,
     mlir::ArrayAttr accessGroups) {
+  if (scalarCombineAndAssign) {
+    (*scalarCombineAndAssign)(loc, builder, lhs, rhs, accessGroups);
+    return;
+  }
   hlfir::Entity valueToAssign = hlfir::loadTrivialScalar(loc, builder, rhs);
   if (accessGroups)
     if (auto load = valueToAssign.getDefiningOp<fir::LoadOp>())
       load.setAccessGroupsAttr(accessGroups);
-  if (combiner) {
-    hlfir::Entity lhsValue = hlfir::loadTrivialScalar(loc, builder, lhs);
-    if (accessGroups)
-      if (auto load = lhsValue.getDefiningOp<fir::LoadOp>())
-        load.setAccessGroupsAttr(accessGroups);
-    valueToAssign = (*combiner)(loc, builder, lhsValue, valueToAssign);
-  }
   auto assign = hlfir::AssignOp::create(builder, loc, valueToAssign, lhs,
                                         /*realloc=*/false,
                                         /*keep_lhs_length_if_realloc=*/false,
@@ -1420,8 +1417,8 @@ static void combineAndStoreElement(
 void hlfir::genNoAliasArrayAssignment(
     mlir::Location loc, fir::FirOpBuilder &builder, hlfir::Entity rhs,
     hlfir::Entity lhs, bool emitWorkshareLoop, bool temporaryLHS,
-    std::function<hlfir::Entity(mlir::Location, fir::FirOpBuilder &,
-                                hlfir::Entity, hlfir::Entity)> *combiner,
+    std::function<void(mlir::Location, fir::FirOpBuilder &, hlfir::Entity,
+                       hlfir::Entity, mlir::ArrayAttr)> *scalarCombineAndAssign,
     mlir::ArrayAttr accessGroups) {
   mlir::OpBuilder::InsertionGuard guard(builder);
   rhs = hlfir::derefPointersAndAllocatables(loc, builder, rhs);
@@ -1441,28 +1438,30 @@ void hlfir::genNoAliasArrayAssignment(
   builder.setInsertionPointToStart(loopNest.body);
   auto rhsArrayElement =
       hlfir::getElementAt(loc, builder, rhs, loopNest.oneBasedIndices);
-  rhsArrayElement = hlfir::loadTrivialScalar(loc, builder, rhsArrayElement);
+  if (!scalarCombineAndAssign)
+    rhsArrayElement = hlfir::loadTrivialScalar(loc, builder, rhsArrayElement);
   auto lhsArrayElement =
       hlfir::getElementAt(loc, builder, lhs, loopNest.oneBasedIndices);
   combineAndStoreElement(loc, builder, lhsArrayElement, rhsArrayElement,
-                         temporaryLHS, combiner, accessGroups);
+                         temporaryLHS, scalarCombineAndAssign, accessGroups);
 }
 
 void hlfir::genNoAliasAssignment(
     mlir::Location loc, fir::FirOpBuilder &builder, hlfir::Entity rhs,
     hlfir::Entity lhs, bool emitWorkshareLoop, bool temporaryLHS,
-    std::function<hlfir::Entity(mlir::Location, fir::FirOpBuilder &,
-                                hlfir::Entity, hlfir::Entity)> *combiner,
+    std::function<void(mlir::Location, fir::FirOpBuilder &, hlfir::Entity,
+                       hlfir::Entity, mlir::ArrayAttr)> *scalarCombineAndAssign,
     mlir::ArrayAttr accessGroups) {
   if (lhs.isArray()) {
     genNoAliasArrayAssignment(loc, builder, rhs, lhs, emitWorkshareLoop,
-                              temporaryLHS, combiner, accessGroups);
+                              temporaryLHS, scalarCombineAndAssign,
+                              accessGroups);
     return;
   }
   rhs = hlfir::derefPointersAndAllocatables(loc, builder, rhs);
   lhs = hlfir::derefPointersAndAllocatables(loc, builder, lhs);
-  combineAndStoreElement(loc, builder, lhs, rhs, temporaryLHS, combiner,
-                         accessGroups);
+  combineAndStoreElement(loc, builder, lhs, rhs, temporaryLHS,
+                         scalarCombineAndAssign, accessGroups);
 }
 
 std::pair<hlfir::Entity, bool>
diff --git a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
index 9ced235f05707..d8ed9ce968e0a 100644
--- a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
+++ b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
@@ -31,6 +31,19 @@
 #include "mlir/Support/LLVM.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/TypeSwitch.h"
+#include "llvm/Support/CommandLine.h"
+
+static llvm::cl::opt<bool> useAccReductionCombine(
+    "openacc-use-reduction-combine",
+    llvm::cl::desc("Whether to generate acc.reduction_combine. Does not "
+                   "control reduction for MIN/MAX and logical reductions."),
+    llvm::cl::init(false));
+
+static llvm::cl::opt<bool> useAccReductionCombineAll(
+    "openacc-use-reduction-combine-all",
+    llvm::cl::desc("Whether to generate acc.reduction_combine for all types "
+                   "and operators"),
+    llvm::cl::init(false));
 
 namespace fir::acc {
 
@@ -1045,6 +1058,25 @@ static mlir::Value genScalarCombiner(fir::FirOpBuilder &builder,
   TODO(loc, "reduction operator");
 }
 
+static bool useAccReductionCombineOp(mlir::Type elementType,
+                                     mlir::acc::ReductionOperator op) {
+  if (useAccReductionCombineAll)
+    return true;
+  if (!useAccReductionCombine)
+    return false;
+  // LOGICAL operators do not have mlir operators and requires FIR specific
+  // logic to interpret the TRUE and FALSE values from the storage (implemented
+  // in fir.convert to i1).
+  if (!llvm::isa<mlir::IntegerType, mlir::FloatType, mlir::ComplexType>(
+          elementType))
+    return false;
+  // MIN/MAX for floating point can have different edge-case behaviors (NANs).
+  // Currently the mlir operator does not match the behavior implemented by
+  // flang.
+  return op != mlir::acc::ReductionOperator::AccMax &&
+         op != mlir::acc::ReductionOperator::AccMin;
+}
+
 template <typename Ty>
 bool OpenACCMappableModel<Ty>::generateCombiner(
     mlir::Type type, mlir::OpBuilder &mlirBuilder, mlir::Location loc,
@@ -1069,11 +1101,25 @@ bool OpenACCMappableModel<Ty>::generateCombiner(
   }
 
   mlir::Type elementType = fir::getFortranElementType(dest.getType());
-  auto genKernel = [&](mlir::Location l, fir::FirOpBuilder &b,
-                       hlfir::Entity srcElementValue,
-                       hlfir::Entity destElementValue) -> hlfir::Entity {
-    return hlfir::Entity{genScalarCombiner(builder, loc, op, elementType,
-                                           srcElementValue, destElementValue)};
+  auto genKernel =
+      [&](mlir::Location l, fir::FirOpBuilder &b, hlfir::Entity destElementAddr,
+          hlfir::Entity srcElementAddr, mlir::ArrayAttr accessGroups) -> void {
+    assert(!accessGroups && "access groups not expected in acc reductions");
+    if (useAccReductionCombineOp(elementType, op)) {
+      mlir::acc::ReductionCombineOp::create(builder, loc, destElementAddr,
+                                            srcElementAddr, op);
+      return;
+    }
+    hlfir::Entity srcElementValue =
+        hlfir::loadTrivialScalar(loc, builder, srcElementAddr);
+    hlfir::Entity destElementValue =
+        hlfir::loadTrivialScalar(loc, builder, destElementAddr);
+    hlfir::Entity combined(genScalarCombiner(
+        builder, loc, op, elementType, destElementValue, srcElementValue));
+    hlfir::AssignOp::create(builder, loc, combined, destElementAddr,
+                            /*realloc=*/false,
+                            /*keep_lhs_length_if_realloc=*/false,
+                            /*temporary_lhs=*/false);
   };
   hlfir::genNoAliasAssignment(loc, builder, srcSection, destSection,
                               /*emitWorkshareLoop=*/false,
diff --git a/flang/test/Lower/OpenACC/acc-reduction.f90 b/flang/test/Lower/OpenACC/acc-reduction.f90
index 339a4e3435c0d..2c79cacada050 100644
--- a/flang/test/Lower/OpenACC/acc-reduction.f90
+++ b/flang/test/Lower/OpenACC/acc-reduction.f90
@@ -1,6 +1,7 @@
 ! This test checks lowering of OpenACC reduction clause.
 
 ! RUN: bbc -fopenacc -emit-hlfir %s -o - | FileCheck %s
+! RUN: bbc -fopenacc -emit-hlfir %s -o - -openacc-use-reduction-combine | FileCheck -check-prefix=ACC_COMBINE %s
 
 ! CHECK-LABEL:   acc.reduction.recipe @reduction_lor_ref_box_heap_l32 : !fir.ref<!fir.box<!fir.heap<!fir.logical<4>>>> reduction_operator <lor> init {
 ! CHECK:         ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.logical<4>>>>):
@@ -97,7 +98,6 @@
 ! CHECK:               %[[SUBI_1:.*]] = arith.subi %[[BOX_DIMS_5]]#0, %[[CONSTANT_7]] : index
 ! CHECK:               %[[ADDI_1:.*]] = arith.addi %[[VAL_2]], %[[SUBI_1]] : index
 ! CHECK:               %[[DESIGNATE_0:.*]] = hlfir.designate %[[VAL_1]] (%[[ADDI_0]], %[[ADDI_1]])  : (!fir.box<!fir.array<?x?xf32>>, index, index) -> !fir.ref<f32>
-! CHECK:               %[[LOAD_0:.*]] = fir.load %[[DESIGNATE_0]] : !fir.ref<f32>
 ! CHECK:               %[[CONSTANT_8:.*]] = arith.constant 0 : index
 ! CHECK:               %[[BOX_DIMS_6:.*]]:3 = fir.box_dims %[[VAL_0]], %[[CONSTANT_8]] : (!fir.box<!fir.array<?x?xf32>>, index) -> (index, index, index)
 ! CHECK:               %[[CONSTANT_9:.*]] = arith.constant 1 : index
@@ -108,6 +108,7 @@
 ! CHECK:               %[[SUBI_3:.*]] = arith.subi %[[BOX_DIMS_7]]#0, %[[CONSTANT_10]] : index
 ! CHECK:               %[[ADDI_3:.*]] = arith.addi %[[VAL_2]], %[[SUBI_3]] : index
 ! CHECK:               %[[DESIGNATE_1:.*]] = hlfir.designate %[[VAL_0]] (%[[ADDI_2]], %[[ADDI_3]])  : (!fir.box<!fir.array<?x?xf32>>, index, index) -> !fir.ref<f32>
+! CHECK:               %[[LOAD_0:.*]] = fir.load %[[DESIGNATE_0]] : !fir.ref<f32>
 ! CHECK:               %[[LOAD_1:.*]] = fir.load %[[DESIGNATE_1]] : !fir.ref<f32>
 ! CHECK:               %[[CMPF_0:.*]] = arith.cmpf ogt, %[[LOAD_1]], %[[LOAD_0]] fastmath<contract> : f32
 ! CHECK:               %[[SELECT_0:.*]] = arith.select %[[CMPF_0]], %[[LOAD_1]], %[[LOAD_0]] : f32
@@ -170,13 +171,13 @@
 ! CHECK:             %[[SUBI_0:.*]] = arith.subi %[[BOX_DIMS_2]]#0, %[[CONSTANT_4]] : index
 ! CHECK:             %[[ADDI_0:.*]] = arith.addi %[[VAL_2]], %[[SUBI_0]] : index
 ! CHECK:             %[[DESIGNATE_0:.*]] = hlfir.designate %[[LOAD_0]] (%[[ADDI_0]])  : (!fir.box<!fir.ptr<!fir.array<?xf32>>>, index) -> !fir.ref<f32>
-! CHECK:             %[[LOAD_2:.*]] = fir.load %[[DESIGNATE_0]] : !fir.ref<f32>
 ! CHECK:             %[[CONSTANT_5:.*]] = arith.constant 0 : index
 ! CHECK:             %[[BOX_DIMS_3:.*]]:3 = fir.box_dims %[[LOAD_1]], %[[CONSTANT_5]] : (!fir.box<!fir.ptr<!fir.array<?xf32>>>, index) -> (index, index, index)
 ! CHECK:             %[[CONSTANT_6:.*]] = arith.constant 1 : index
 ! CHECK:             %[[SUBI_1:.*]] = arith.subi %[[BOX_DIMS_3]]#0, %[[CONSTANT_6]] : index
 ! CHECK:             %[[ADDI_1:.*]] = arith.addi %[[VAL_2]], %[[SUBI_1]] : index
 ! CHECK:             %[[DESIGNATE_1:.*]] = hlfir.designate %[[LOAD_1]] (%[[ADDI_1]])  : (!fir.box<!fir.ptr<!fir.array<?xf32>>>, index) -> !fir.ref<f32>
+! CHECK:             %[[LOAD_2:.*]] = fir.load %[[DESIGNATE_0]] : !fir.ref<f32>
 ! CHECK:             %[[LOAD_3:.*]] = fir.load %[[DESIGNATE_1]] : !fir.ref<f32>
 ! CHECK:             %[[CMPF_0:.*]] = arith.cmpf ogt, %[[LOAD_3]], %[[LOAD_2]] fastmath<contract> : f32
 ! CHECK:             %[[SELECT_0:.*]] = arith.select %[[CMPF_0]], %[[LOAD_3]], %[[LOAD_2]] : f32
@@ -239,13 +240,13 @@
 ! CHECK:             %[[SUBI_0:.*]] = arith.subi %[[BOX_DIMS_2]]#0, %[[CONSTANT_4]] : index
 ! CHECK:             %[[ADDI_0:.*]] = arith.addi %[[VAL_2]], %[[SUBI_0]] : index
 ! CHECK:             %[[DESIGNATE_0:.*]] = hlfir.designate %[[LOAD_0]] (%[[ADDI_0]])  : (!fir.box<!fir.heap<!fir.array<?xf32>>>, index) -> !fir.ref<f32>
-! CHECK:             %[[LOAD_2:.*]] = fir.load %[[DESIGNATE_0]] : !fir.ref<f32>
 ! CHECK:             %[[CONSTANT_5:.*]] = arith.constant 0 : index
 ! CHECK:             %[[BOX_DIMS_3:.*]]:3 = fir.box_dims %[[LOAD_1]], %[[CONSTANT_5]] : (!fir.box<!fir.heap<!fir.array<?xf32>>>, index) -> (index, index, index)
 ! CHECK:             %[[CONSTANT_6:.*]] = arith.constant 1 : index
 ! CHECK:             %[[SUBI_1:.*]] = arith.subi %[[BOX_DIMS_3]]#0, %[[CONSTANT_6]] : index
 ! CHECK:             %[[ADDI_1:.*]] = arith.addi %[[VAL_2]], %[[SUBI_1]] : index
 ! CHECK:             %[[DESIGNATE_1:.*]] = hlfir.designate %[[LOAD_1]] (%[[ADDI_1]])  : (!fir.box<!fir.heap<!fir.array<?xf32>>>, index) -> !fir.ref<f32>
+! CHECK:             %[[LOAD_2:.*]] = fir.load %[[DESIGNATE_0]] : !fir.ref<f32>
 ! CHECK:             %[[LOAD_3:.*]] = fir.load %[[DESIGNATE_1]] : !fir.ref<f32>
 ! CHECK:             %[[CMPF_0:.*]] = arith.cmpf ogt, %[[LOAD_3]], %[[LOAD_2]] fastmath<contract> : f32
 ! CHECK:             %[[SELECT_0:.*]] = arith.select %[[CMPF_0]], %[[LOAD_3]], %[[LOAD_2]] : f32
@@ -321,8 +322,8 @@
 ! CHECK:           %[[CONSTANT_9:.*]] = arith.constant 1 : index
 ! CHECK:           fir.do_loop %[[VAL_2:.*]] = %[[CONSTANT_9]] to %[[CONSTANT_5]] step %[[CONSTANT_9]] unordered {
 ! CHECK:             %[[DESIGNATE_2:.*]] = hlfir.designate %[[DESIGNATE_0]] (%[[VAL_2]])  : (!fir.box<!fir.array<3xi32>>, index) -> !fir.ref<i32>
-! CHECK:             %[[LOAD_0:.*]] = fir.load %[[DESIGNATE_2]] : !fir.ref<i32>
 ! CHECK:             %[[DESIGNATE_3:.*]] = hlfir.designate %[[DESIGNATE_1]] (%[[VAL_2]])  : (!fir.box<!fir.array<3xi32>>, index) -> !fir.ref<i32>
+! CHECK:             %[[LOAD_0:.*]] = fir.load %[[DESIGNATE_2]] : !fir.ref<i32>
 ! CHECK:             %[[LOAD_1:.*]] = fir.load %[[DESIGNATE_3]] : !fir.ref<i32>
 ! CHECK:             %[[ADDI_4:.*]] = arith.addi %[[LOAD_1]], %[[LOAD_0]] : i32
 ! CHECK:             hlfir.assign %[[ADDI_4]] to %[[DESIGNATE_3]] : i32, !fir.ref<i32>
@@ -371,13 +372,13 @@
 ! CHECK:             %[[SUBI_0:.*]] = arith.subi %[[BOX_DIMS_2]]#0, %[[CONSTANT_4]] : index
 ! CHECK:             %[[ADDI_0:.*]] = arith.addi %[[VAL_2]], %[[SUBI_0]] : index
 ! CHECK:             %[[DESIGNATE_0:.*]] = hlfir.designate %[[VAL_1]] (%[[ADDI_0]])  : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
-! CHECK:             %[[LOAD_0:.*]] = fir.load %[[DESIGNATE_0]] : !fir.ref<f32>
 ! CHECK:             %[[CONSTANT_5:.*]] = arith.constant 0 : index
 ! CHECK:             %[[BOX_DIMS_3:.*]]:3 = fir.box_dims %[[VAL_0]], %[[CONSTANT_5]] : (!fir.box<!fir.array<?xf32>>, index) -> (index, index, index)
 ! CHECK:             %[[CONSTANT_6:.*]] = arith.constant 1 : index
 ! CHECK:             %[[SUBI_1:.*]] = arith.subi %[[BOX_DIMS_3]]#0, %[[CONSTANT_6]] : index
 ! CHECK:             %[[ADDI_1:.*]] = arith.addi %[[VAL_2]], %[[SUBI_1]] : index
 ! CHECK:             %[[DESIGNATE_1:.*]] = hlfir.designate %[[VAL_0]] (%[[ADDI_1]])  : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
+! CHECK:             %[[LOAD_0:.*]] = fir.load %[[DESIGNATE_0]] : !fir.ref<f32>
 ! CHECK:             %[[LOAD_1:.*]] = fir.load %[[DESIGNATE_1]] : !fir.ref<f32>
 ! CHECK:             %[[CMPF_0:.*]] = arith.cmpf ogt, %[[LOAD_1]], %[[LOAD_0]] fastmath<contract> : f32
 ! CHECK:             %[[SELECT_0:.*]] = arith.select %[[CMPF_0]], %[[LOAD_1]], %[[LOAD_0]] : f32
@@ -434,13 +435,13 @@
 ! CHECK:             %[[SUBI_0:.*]] = arith.subi %[[BOX_DIMS_2]]#0, %[[CONSTANT_4]] : index
 ! CHECK:             %[[ADDI_0:.*]] = arith.addi %[[VAL_2]], %[[SUBI_0]] : index
 ! CHECK:             %[[DESIGNATE_0:.*]] = hlfir.designate %[[VAL_1]] (%[[ADDI_0]])  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-! CHECK:             %[[LOAD_0:.*]] = fir.load %[[DESIGNATE_0]] : !fir.ref<i32>
 ! CHECK:             %[[CONSTANT_5:.*]] = arith.constant 0 : index
 ! CHECK:             %[[BOX_DIMS_3:.*]]:3 = fir.box_dims %[[VAL_0]], %[[CONSTANT_5]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
 ! CHECK:             %[[CONSTANT_6:.*]] = arith.constant 1 : index
 ! CHECK:             %[[SUBI_1:.*]] = arith.subi %[[BOX_DIMS_3]]#0, %[[CONSTANT_6]] : index
 ! CHECK:             %[[ADDI_1:.*]] = arith.addi %[[VAL_2]], %[[SUBI_1]] : index
 ! CHECK:             %[[DESIGNATE_1:.*]] = hlfir.designate %[[VAL_0]] (%[[ADDI_1]])  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+! CHECK:             %[[LOAD_0:.*]] = fir.load %[[DESIGNATE_0]] : !fir.ref<i32>
 ! CHECK:             %[[LOAD_1:.*]] = fir.load %[[DESIGNATE_1]] : !fir.ref<i32>
 ! CHECK:             %[[ADDI_2:.*]] = arith.addi %[[LOAD_1]], %[[LOAD_0]] : i32
 ! CHECK:             hlfir.assign %[[ADDI_2]] to %[[DESIGNATE_1]] : i32, !fir.ref<i32>
@@ -524,8 +525,8 @@
 ! CHECK:           fir.do_loop %[[VAL_2:.*]] = %[[CONSTANT_17]] to %[[CONSTANT_9]] step %[[CONSTANT_17]] unordered {
 ! CHECK:             fir.do_loop %[[VAL_3:.*]] = %[[CONSTANT_17]] to %[[CONSTANT_6]] step %[[CONSTANT_17]] unordered {
 ! CHECK:               %[[DESIGNATE_2:.*]] = hlfir.designate %[[DESIGNATE_0]] (%[[VAL_3]], %[[VAL_2]])  : (!fir.ref<!fir.array<10x20xi32>>, index, index) -> !fir.ref<i32>
-! CHECK:               %[[LOAD_0:.*]] = fir.load %[[DESIGNATE_2]] : !fir.ref<i32>
 ! CHECK:               %[[DESIGNATE_3:.*]] = hlfir.designate %[[DESIGNATE_1]] (%[[VAL_3]], %[[VAL_2]])  : (!fir.ref<!fir.array<10x20xi32>>, index, inde...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/182066


More information about the flang-commits mailing list