[flang-commits] [flang] [flang][fold] fix bug with folding mix/max (PR #144162)

Fri Jun 13 14:27:17 PDT 2025

https://github.com/akuhlens created https://github.com/llvm/llvm-project/pull/144162

Fixes #133


>From 58219f2654e1d93dcc2845a21a7d65e872d9e6c3 Mon Sep 17 00:00:00 2001
From: Andre Kuhlenschmidt <akuhlenschmi at nvidia.com>
Date: Fri, 13 Jun 2025 11:34:08 -0700
Subject: [PATCH] initial commit

---
 flang/lib/Evaluate/fold-implementation.h      | 94 +++++++++++++------
 flang/test/Lower/HLFIR/custom-intrinsic.f90   | 16 ++--
 .../OpenMP/reduction-array-intrinsic.f90      |  8 +-
 ...oop-reduction-allocatable-array-minmax.f90 | 58 ++++++------
 .../OpenMP/wsloop-reduction-max-byref.f90     | 10 +-
 .../Lower/OpenMP/wsloop-reduction-max.f90     | 10 +-
 .../OpenMP/wsloop-reduction-min-byref.f90     | 10 +-
 .../Lower/OpenMP/wsloop-reduction-min.f90     | 10 +-
 8 files changed, 125 insertions(+), 91 deletions(-)

diff --git a/flang/lib/Evaluate/fold-implementation.h b/flang/lib/Evaluate/fold-implementation.h
index b0f39e63d0941..0b870089dd28b 100644
--- a/flang/lib/Evaluate/fold-implementation.h
+++ b/flang/lib/Evaluate/fold-implementation.h
@@ -1109,42 +1109,76 @@ Expr<T> FoldMINorMAX(
       T::category == TypeCategory::Unsigned ||
       T::category == TypeCategory::Real ||
       T::category == TypeCategory::Character);
+
+  // Lots of constraints:
+  // - We want Extremum<T> generated by semantics to compare equal to
+  //   Extremum<T> written out to source files as max or min calls.
+  // - Users can also write min/max calls that must also compare equal
+  //   to min/max calls that wind up being written to module files.
+  // - Extremeum<T> is binary and can't currently handle processing
+  //   optional arguments that may show up in 3rd + argument.
+  // - The code below only accepts more than 2 arguments if all the
+  //   arguments are constant (and hence known to be present).
+  // - ConvertExprToHLFIR can't currently handle Extremum<Character>
+  // - Semantics doesn't currently generate Extremum<Character>
+  // The original code did the folding of arguments and the overall extremum
+  // operation in a single pass. This was shorter code-wise, but took me
+  // a while to tease out all the logic and was doing redundant work.
+  // So I split it into two passes:
+  // 1) fold the arguments and check if they are constant,
+  // 2) Decide if we:
+  //    - can constant-fold the min/max operation, or
+  //    - need to generate an extremum anyway,
+  //    and do it if so.
+  //    Otherwise, return the original call.
   auto &args{funcRef.arguments()};
-  bool ok{true};
-  std::optional<Expr<T>> result;
-  Folder<T> folder{context};
-  for (std::optional<ActualArgument> &arg : args) {
-    // Call Folding on all arguments to make operand promotion explicit.
-    if (!folder.Folding(arg)) {
-      // TODO: Lowering can't handle having every FunctionRef for max and min
-      // being converted into Extremum<T>.  That needs fixing.  Until that
-      // is corrected, however, it is important that max and min references
-      // in module files be converted into Extremum<T> even when not constant;
-      // the Extremum<SubscriptInteger> operations created to normalize the
-      // values of array bounds are formatted as max operations in the
-      // declarations in modules, and need to be read back in as such in
-      // order for expression comparison to not produce false inequalities
-      // when checking function results for procedure interface compatibility.
-      if (!context.moduleFileName()) {
-        ok = false;
+  size_t nargs{args.size()};
+  bool allArgsConstant{true};
+  bool extremumAnyway{nargs == 2 && T::category != TypeCategory::Character};
+  // 1a)Fold the first two arguments.
+  {
+    Folder<T> folder{context, false};
+    if (!folder.Folding(args[0])) {
+      allArgsConstant = false;
+    }
+    if (!folder.Folding(args[1])) {
+      allArgsConstant = false;
+    }
+  }
+  // 1b) Fold any optional arguments.
+  if (nargs > 2) {
+    Folder<T> folder{context, true};
+    for (size_t i{2}; i < nargs; ++i) {
+      if (args[i]) {
+        if (!folder.Folding(args[i])) {
+          allArgsConstant = false;
+        }
       }
     }
-    Expr<SomeType> *argExpr{arg ? arg->UnwrapExpr() : nullptr};
-    if (argExpr) {
-      *argExpr = Fold(context, std::move(*argExpr));
-    }
-    if (Expr<T> * tExpr{UnwrapExpr<Expr<T>>(argExpr)}) {
-      if (result) {
-        result = FoldOperation(
-            context, Extremum<T>{order, std::move(*result), Expr<T>{*tExpr}});
-      } else {
-        result = Expr<T>{*tExpr};
+  }
+  // 2) If we can fold the result or the call to min/max may compare equal to
+  // an extremum generated by semantics go ahead and convert to an extremum,
+  // and try to fold the result.
+  if (allArgsConstant || extremumAnyway) {
+    // Folding updates the argument expressions in place, no need to call
+    // Fold() on each argument again.
+    if (auto *resultp{UnwrapExpr<Expr<T>>(args[0])}) {
+      Expr<T> result{*resultp};
+      for (size_t i{1}; i < nargs; ++i) {
+        if (auto *tExpr{UnwrapExpr<Expr<T>>(args[i])}) {
+          result = FoldOperation(
+              context, Extremum<T>{order, std::move(result), *tExpr});
+        } else {
+          // This should never happen, but here is a value to return.
+          return Expr<T>{std::move(funcRef)};
+        }
       }
-    } else {
-      ok = false;
+      return result;
     }
   }
-  return ok && result ? std::move(*result) : Expr<T>{std::move(funcRef)};
+  // If we decided to not generate an extremum just return the original call,
+  // with the arguments folded.
+  return Expr<T>{std::move(funcRef)};
 }
 
 // For AMAX0, AMIN0, AMAX1, AMIN1, DMAX1, DMIN1, MAX0, MIN0, MAX1, and MIN1
diff --git a/flang/test/Lower/HLFIR/custom-intrinsic.f90 b/flang/test/Lower/HLFIR/custom-intrinsic.f90
index 161a2ab75b7c8..5ec6e0a17e9ac 100644
--- a/flang/test/Lower/HLFIR/custom-intrinsic.f90
+++ b/flang/test/Lower/HLFIR/custom-intrinsic.f90
@@ -115,10 +115,10 @@ function max_array(a, b)
 ! CHECK:           %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_9]](%[[VAL_10]]) {uniq_name = "_QFmax_arrayEmax_array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
 ! CHECK:           %[[VAL_12:.*]] = hlfir.elemental %[[VAL_3]] unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> {
 ! CHECK:           ^bb0(%[[VAL_13:.*]]: index):
-! CHECK:             %[[VAL_14:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_13]])  : (!fir.ref<!fir.array<42xi32>>, index) -> !fir.ref<i32>
-! CHECK:             %[[VAL_15:.*]] = fir.load %[[VAL_14]] : !fir.ref<i32>
-! CHECK:             %[[VAL_16:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_13]])  : (!fir.ref<!fir.array<42xi32>>, index) -> !fir.ref<i32>
-! CHECK:             %[[VAL_17:.*]] = fir.load %[[VAL_16]] : !fir.ref<i32>
+! CHECK-DAG:             %[[VAL_14:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_13]])  : (!fir.ref<!fir.array<42xi32>>, index) -> !fir.ref<i32>
+! CHECK-DAG:             %[[VAL_15:.*]] = fir.load %[[VAL_14]] : !fir.ref<i32>
+! CHECK-DAG:             %[[VAL_16:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_13]])  : (!fir.ref<!fir.array<42xi32>>, index) -> !fir.ref<i32>
+! CHECK-DAG:             %[[VAL_17:.*]] = fir.load %[[VAL_16]] : !fir.ref<i32>
 ! CHECK:             %[[VAL_18:.*]] = arith.cmpi sgt, %[[VAL_15]], %[[VAL_17]] : i32
 ! CHECK:             %[[VAL_19:.*]] = arith.select %[[VAL_18]], %[[VAL_15]], %[[VAL_17]] : i32
 ! CHECK:             hlfir.yield_element %[[VAL_19]] : i32
@@ -288,10 +288,10 @@ function min_array(a, b)
 ! CHECK:           %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_9]](%[[VAL_10]]) {uniq_name = "_QFmin_arrayEmin_array"} : (!fir.ref<!fir.array<42xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<42xi32>>, !fir.ref<!fir.array<42xi32>>)
 ! CHECK:           %[[VAL_12:.*]] = hlfir.elemental %[[VAL_3]] unordered : (!fir.shape<1>) -> !hlfir.expr<42xi32> {
 ! CHECK:           ^bb0(%[[VAL_13:.*]]: index):
-! CHECK:             %[[VAL_14:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_13]])  : (!fir.ref<!fir.array<42xi32>>, index) -> !fir.ref<i32>
-! CHECK:             %[[VAL_15:.*]] = fir.load %[[VAL_14]] : !fir.ref<i32>
-! CHECK:             %[[VAL_16:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_13]])  : (!fir.ref<!fir.array<42xi32>>, index) -> !fir.ref<i32>
-! CHECK:             %[[VAL_17:.*]] = fir.load %[[VAL_16]] : !fir.ref<i32>
+! CHECK-DAG:             %[[VAL_14:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_13]])  : (!fir.ref<!fir.array<42xi32>>, index) -> !fir.ref<i32>
+! CHECK-DAG:             %[[VAL_15:.*]] = fir.load %[[VAL_14]] : !fir.ref<i32>
+! CHECK-DAG:             %[[VAL_16:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_13]])  : (!fir.ref<!fir.array<42xi32>>, index) -> !fir.ref<i32>
+! CHECK-DAG:             %[[VAL_17:.*]] = fir.load %[[VAL_16]] : !fir.ref<i32>
 ! CHECK:             %[[VAL_18:.*]] = arith.cmpi slt, %[[VAL_15]], %[[VAL_17]] : i32
 ! CHECK:             %[[VAL_19:.*]] = arith.select %[[VAL_18]], %[[VAL_15]], %[[VAL_17]] : i32
 ! CHECK:             hlfir.yield_element %[[VAL_19]] : i32
diff --git a/flang/test/Lower/OpenMP/reduction-array-intrinsic.f90 b/flang/test/Lower/OpenMP/reduction-array-intrinsic.f90
index 8b4f37278185e..0cf88cf889868 100644
--- a/flang/test/Lower/OpenMP/reduction-array-intrinsic.f90
+++ b/flang/test/Lower/OpenMP/reduction-array-intrinsic.f90
@@ -82,10 +82,10 @@ subroutine max_array_reduction(l, r)
 ! CHECK:               %[[VAL_16:.*]] = arith.constant 1 : index
 ! CHECK:               %[[VAL_17:.*]] = arith.subi %[[VAL_15]]#0, %[[VAL_16]] : index
 ! CHECK:               %[[VAL_18:.*]] = arith.addi %[[VAL_13]], %[[VAL_17]] : index
-! CHECK:               %[[VAL_19:.*]] = hlfir.designate %[[VAL_8]] (%[[VAL_18]])  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-! CHECK:               %[[VAL_20:.*]] = fir.load %[[VAL_19]] : !fir.ref<i32>
-! CHECK:               %[[VAL_21:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_13]])  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-! CHECK:               %[[VAL_22:.*]] = fir.load %[[VAL_21]] : !fir.ref<i32>
+! CHECK-DAG:               %[[VAL_19:.*]] = hlfir.designate %[[VAL_8]] (%[[VAL_18]])  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+! CHECK-DAG:               %[[VAL_20:.*]] = fir.load %[[VAL_19]] : !fir.ref<i32>
+! CHECK-DAG:               %[[VAL_21:.*]] = hlfir.designate %[[VAL_4]]#0 (%[[VAL_13]])  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+! CHECK-DAG:               %[[VAL_22:.*]] = fir.load %[[VAL_21]] : !fir.ref<i32>
 ! CHECK:               %[[VAL_23:.*]] = arith.cmpi sgt, %[[VAL_20]], %[[VAL_22]] : i32
 ! CHECK:               %[[VAL_24:.*]] = arith.select %[[VAL_23]], %[[VAL_20]], %[[VAL_22]] : i32
 ! CHECK:               hlfir.yield_element %[[VAL_24]] : i32
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-allocatable-array-minmax.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-allocatable-array-minmax.f90
index 5b4c5e65ffccc..58b68e5ec4cfd 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-allocatable-array-minmax.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-allocatable-array-minmax.f90
@@ -228,11 +228,11 @@ program reduce15
 ! CHECK:                 %[[VAL_56:.*]]:2 = hlfir.declare %[[VAL_55]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_62:.*]]:2 = hlfir.declare %[[VAL_60]] {fortran_attrs = {{.*}}<allocatable>, uniq_name = "_QFEmaxes"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
 ! CHECK:                 hlfir.assign %[[VAL_61]] to %[[VAL_56]]#0 : i32, !fir.ref<i32>
-! CHECK:                 %[[VAL_63:.*]] = fir.load %[[VAL_1]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-! CHECK:                 %[[VAL_64:.*]] = arith.constant 0 : index
-! CHECK:                 %[[VAL_65:.*]]:3 = fir.box_dims %[[VAL_63]], %[[VAL_64]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
-! CHECK:                 %[[VAL_66:.*]] = fir.shape %[[VAL_65]]#1 : (index) -> !fir.shape<1>
-! CHECK:                 %[[VAL_67:.*]] = fir.load %[[VAL_62]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK-DAG:             %[[VAL_63:.*]] = fir.load %[[VAL_1]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK-DAG:             %[[VAL_64:.*]] = arith.constant 0 : index
+! CHECK-DAG:             %[[VAL_65:.*]]:3 = fir.box_dims %[[VAL_63]], %[[VAL_64]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+! CHECK-DAG:             %[[VAL_66:.*]] = fir.shape %[[VAL_65]]#1 : (index) -> !fir.shape<1>
+! CHECK-DAG:             %[[VAL_67:.*]] = fir.load %[[VAL_62]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
 ! CHECK:                 %[[VAL_68:.*]] = hlfir.elemental %[[VAL_66]] unordered : (!fir.shape<1>) -> !hlfir.expr<?xi32> {
 ! CHECK:                 ^bb0(%[[VAL_69:.*]]: index):
 ! CHECK:                   %[[VAL_70:.*]] = arith.constant 0 : index
@@ -240,15 +240,15 @@ program reduce15
 ! CHECK:                   %[[VAL_72:.*]] = arith.constant 1 : index
 ! CHECK:                   %[[VAL_73:.*]] = arith.subi %[[VAL_71]]#0, %[[VAL_72]] : index
 ! CHECK:                   %[[VAL_74:.*]] = arith.addi %[[VAL_69]], %[[VAL_73]] : index
-! CHECK:                   %[[VAL_75:.*]] = hlfir.designate %[[VAL_63]] (%[[VAL_74]])  : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> !fir.ref<i32>
-! CHECK:                   %[[VAL_76:.*]] = fir.load %[[VAL_75]] : !fir.ref<i32>
-! CHECK:                   %[[VAL_77:.*]] = arith.constant 0 : index
-! CHECK:                   %[[VAL_78:.*]]:3 = fir.box_dims %[[VAL_67]], %[[VAL_77]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
-! CHECK:                   %[[VAL_79:.*]] = arith.constant 1 : index
-! CHECK:                   %[[VAL_80:.*]] = arith.subi %[[VAL_78]]#0, %[[VAL_79]] : index
-! CHECK:                   %[[VAL_81:.*]] = arith.addi %[[VAL_69]], %[[VAL_80]] : index
-! CHECK:                   %[[VAL_82:.*]] = hlfir.designate %[[VAL_67]] (%[[VAL_81]])  : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> !fir.ref<i32>
-! CHECK:                   %[[VAL_83:.*]] = fir.load %[[VAL_82]] : !fir.ref<i32>
+! CHECK-DAG:                   %[[VAL_75:.*]] = hlfir.designate %[[VAL_63]] (%[[VAL_74]])  : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> !fir.ref<i32>
+! CHECK-DAG:                   %[[VAL_76:.*]] = fir.load %[[VAL_75]] : !fir.ref<i32>
+! CHECK-DAG:                   %[[VAL_77:.*]] = arith.constant 0 : index
+! CHECK-DAG:                   %[[VAL_78:.*]]:3 = fir.box_dims %[[VAL_67]], %[[VAL_77]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+! CHECK-DAG:                   %[[VAL_79:.*]] = arith.constant 1 : index
+! CHECK-DAG:                   %[[VAL_80:.*]] = arith.subi %[[VAL_78]]#0, %[[VAL_79]] : index
+! CHECK-DAG:                   %[[VAL_81:.*]] = arith.addi %[[VAL_69]], %[[VAL_80]] : index
+! CHECK-DAG:                   %[[VAL_82:.*]] = hlfir.designate %[[VAL_67]] (%[[VAL_81]])  : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> !fir.ref<i32>
+! CHECK-DAG:                   %[[VAL_83:.*]] = fir.load %[[VAL_82]] : !fir.ref<i32>
 ! CHECK:                   %[[VAL_84:.*]] = arith.cmpi sgt, %[[VAL_76]], %[[VAL_83]] : i32
 ! CHECK:                   %[[VAL_85:.*]] = arith.select %[[VAL_84]], %[[VAL_76]], %[[VAL_83]] : i32
 ! CHECK:                   hlfir.yield_element %[[VAL_85]] : i32
@@ -269,27 +269,27 @@ program reduce15
 ! CHECK:                 %[[VAL_88:.*]]:2 = hlfir.declare %[[VAL_87]] {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_94:.*]]:2 = hlfir.declare %[[VAL_92]] {fortran_attrs = {{.*}}<allocatable>, uniq_name = "_QFEmins"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
 ! CHECK:                 hlfir.assign %[[VAL_93]] to %[[VAL_88]]#0 : i32, !fir.ref<i32>
-! CHECK:                 %[[VAL_95:.*]] = fir.load %[[VAL_1]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-! CHECK:                 %[[VAL_96:.*]] = arith.constant 0 : index
-! CHECK:                 %[[VAL_97:.*]]:3 = fir.box_dims %[[VAL_95]], %[[VAL_96]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
-! CHECK:                 %[[VAL_98:.*]] = fir.shape %[[VAL_97]]#1 : (index) -> !fir.shape<1>
-! CHECK:                 %[[VAL_99:.*]] = fir.load %[[VAL_94]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-! CHECK:                 %[[VAL_100:.*]] = hlfir.elemental %[[VAL_98]] unordered : (!fir.shape<1>) -> !hlfir.expr<?xi32> {
+! CHECK-DAG:                 %[[VAL_95:.*]] = fir.load %[[VAL_1]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK-DAG:                 %[[VAL_96:.*]] = arith.constant 0 : index
+! CHECK-DAG:                 %[[VAL_97:.*]]:3 = fir.box_dims %[[VAL_95]], %[[VAL_96]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+! CHECK-DAG:                 %[[VAL_98:.*]] = fir.shape %[[VAL_97]]#1 : (index) -> !fir.shape<1>
+! CHECK-DAG:                 %[[VAL_99:.*]] = fir.load %[[VAL_94]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK-DAG:                 %[[VAL_100:.*]] = hlfir.elemental %[[VAL_98]] unordered : (!fir.shape<1>) -> !hlfir.expr<?xi32> {
 ! CHECK:                 ^bb0(%[[VAL_101:.*]]: index):
 ! CHECK:                   %[[VAL_102:.*]] = arith.constant 0 : index
 ! CHECK:                   %[[VAL_103:.*]]:3 = fir.box_dims %[[VAL_95]], %[[VAL_102]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
 ! CHECK:                   %[[VAL_104:.*]] = arith.constant 1 : index
 ! CHECK:                   %[[VAL_105:.*]] = arith.subi %[[VAL_103]]#0, %[[VAL_104]] : index
 ! CHECK:                   %[[VAL_106:.*]] = arith.addi %[[VAL_101]], %[[VAL_105]] : index
-! CHECK:                   %[[VAL_107:.*]] = hlfir.designate %[[VAL_95]] (%[[VAL_106]])  : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> !fir.ref<i32>
-! CHECK:                   %[[VAL_108:.*]] = fir.load %[[VAL_107]] : !fir.ref<i32>
-! CHECK:                   %[[VAL_109:.*]] = arith.constant 0 : index
-! CHECK:                   %[[VAL_110:.*]]:3 = fir.box_dims %[[VAL_99]], %[[VAL_109]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
-! CHECK:                   %[[VAL_111:.*]] = arith.constant 1 : index
-! CHECK:                   %[[VAL_112:.*]] = arith.subi %[[VAL_110]]#0, %[[VAL_111]] : index
-! CHECK:                   %[[VAL_113:.*]] = arith.addi %[[VAL_101]], %[[VAL_112]] : index
-! CHECK:                   %[[VAL_114:.*]] = hlfir.designate %[[VAL_99]] (%[[VAL_113]])  : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> !fir.ref<i32>
-! CHECK:                   %[[VAL_115:.*]] = fir.load %[[VAL_114]] : !fir.ref<i32>
+! CHECK-DAG:               %[[VAL_107:.*]] = hlfir.designate %[[VAL_95]] (%[[VAL_106]])  : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> !fir.ref<i32>
+! CHECK-DAG:               %[[VAL_108:.*]] = fir.load %[[VAL_107]] : !fir.ref<i32>
+! CHECK-DAG:               %[[VAL_109:.*]] = arith.constant 0 : index
+! CHECK-DAG:               %[[VAL_110:.*]]:3 = fir.box_dims %[[VAL_99]], %[[VAL_109]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+! CHECK-DAG:               %[[VAL_111:.*]] = arith.constant 1 : index
+! CHECK-DAG:               %[[VAL_112:.*]] = arith.subi %[[VAL_110]]#0, %[[VAL_111]] : index
+! CHECK-DAG:               %[[VAL_113:.*]] = arith.addi %[[VAL_101]], %[[VAL_112]] : index
+! CHECK-DAG:               %[[VAL_114:.*]] = hlfir.designate %[[VAL_99]] (%[[VAL_113]])  : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> !fir.ref<i32>
+! CHECK-DAG:               %[[VAL_115:.*]] = fir.load %[[VAL_114]] : !fir.ref<i32>
 ! CHECK:                   %[[VAL_116:.*]] = arith.cmpi slt, %[[VAL_108]], %[[VAL_115]] : i32
 ! CHECK:                   %[[VAL_117:.*]] = arith.select %[[VAL_116]], %[[VAL_108]], %[[VAL_115]] : i32
 ! CHECK:                   hlfir.yield_element %[[VAL_117]] : i32
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-max-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-max-byref.f90
index d27804fb5606e..69219331ab3ab 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-max-byref.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-max-byref.f90
@@ -53,11 +53,11 @@
 ! CHECK:                 %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFreduction_max_intEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_max_intEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 hlfir.assign %[[VAL_13]] to %[[VAL_8]]#0 : i32, !fir.ref<i32>
-! CHECK:                 %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
-! CHECK:                 %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64
-! CHECK:                 %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]])  : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
-! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
-! CHECK:                 %[[VAL_19:.*]] = fir.load %[[VAL_17]] : !fir.ref<i32>
+! CHECK-DAG:                 %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
+! CHECK-DAG:                 %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64
+! CHECK-DAG:                 %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]])  : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
+! CHECK-DAG:                 %[[VAL_18:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
+! CHECK-DAG:                 %[[VAL_19:.*]] = fir.load %[[VAL_17]] : !fir.ref<i32>
 ! CHECK:                 %[[VAL_20:.*]] = arith.cmpi sgt, %[[VAL_18]], %[[VAL_19]] : i32
 ! CHECK:                 %[[VAL_21:.*]] = arith.select %[[VAL_20]], %[[VAL_18]], %[[VAL_19]] : i32
 ! CHECK:                 hlfir.assign %[[VAL_21]] to %[[VAL_14]]#0 : i32, !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-max.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-max.f90
index 5eddd8655d9a8..83582d279fd3d 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-max.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-max.f90
@@ -43,11 +43,11 @@
 ! CHECK:                 %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFreduction_max_intEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_max_intEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 hlfir.assign %[[VAL_13]] to %[[VAL_8]]#0 : i32, !fir.ref<i32>
-! CHECK:                 %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
-! CHECK:                 %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64
-! CHECK:                 %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]])  : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
-! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
-! CHECK:                 %[[VAL_19:.*]] = fir.load %[[VAL_17]] : !fir.ref<i32>
+! CHECK-DAG:                 %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
+! CHECK-DAG:                 %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64
+! CHECK-DAG:                 %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]])  : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
+! CHECK-DAG:                 %[[VAL_18:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
+! CHECK-DAG:                 %[[VAL_19:.*]] = fir.load %[[VAL_17]] : !fir.ref<i32>
 ! CHECK:                 %[[VAL_20:.*]] = arith.cmpi sgt, %[[VAL_18]], %[[VAL_19]] : i32
 ! CHECK:                 %[[VAL_21:.*]] = arith.select %[[VAL_20]], %[[VAL_18]], %[[VAL_19]] : i32
 ! CHECK:                 hlfir.assign %[[VAL_21]] to %[[VAL_14]]#0 : i32, !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-min-byref.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-min-byref.f90
index e1b5c3d6eaae3..f691d57e276df 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-min-byref.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-min-byref.f90
@@ -53,11 +53,11 @@
 ! CHECK:                 %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFreduction_min_intEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_min_intEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 hlfir.assign %[[VAL_13]] to %[[VAL_8]]#0 : i32, !fir.ref<i32>
-! CHECK:                 %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
-! CHECK:                 %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64
-! CHECK:                 %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]])  : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
-! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
-! CHECK:                 %[[VAL_19:.*]] = fir.load %[[VAL_17]] : !fir.ref<i32>
+! CHECK-DAG:                 %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
+! CHECK-DAG:                 %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64
+! CHECK-DAG:                 %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]])  : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
+! CHECK-DAG:                 %[[VAL_18:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
+! CHECK-DAG:                 %[[VAL_19:.*]] = fir.load %[[VAL_17]] : !fir.ref<i32>
 ! CHECK:                 %[[VAL_20:.*]] = arith.cmpi slt, %[[VAL_18]], %[[VAL_19]] : i32
 ! CHECK:                 %[[VAL_21:.*]] = arith.select %[[VAL_20]], %[[VAL_18]], %[[VAL_19]] : i32
 ! CHECK:                 hlfir.assign %[[VAL_21]] to %[[VAL_14]]#0 : i32, !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-min.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-min.f90
index 326447d6fd5c6..3ee2ecc50e19a 100644
--- a/flang/test/Lower/OpenMP/wsloop-reduction-min.f90
+++ b/flang/test/Lower/OpenMP/wsloop-reduction-min.f90
@@ -43,11 +43,11 @@
 ! CHECK:                 %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFreduction_min_intEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFreduction_min_intEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
 ! CHECK:                 hlfir.assign %[[VAL_13]] to %[[VAL_8]]#0 : i32, !fir.ref<i32>
-! CHECK:                 %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
-! CHECK:                 %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64
-! CHECK:                 %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]])  : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
-! CHECK:                 %[[VAL_18:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
-! CHECK:                 %[[VAL_19:.*]] = fir.load %[[VAL_17]] : !fir.ref<i32>
+! CHECK-DAG:                 %[[VAL_15:.*]] = fir.load %[[VAL_8]]#0 : !fir.ref<i32>
+! CHECK-DAG:                 %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (i32) -> i64
+! CHECK-DAG:                 %[[VAL_17:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_16]])  : (!fir.box<!fir.array<?xi32>>, i64) -> !fir.ref<i32>
+! CHECK-DAG:                 %[[VAL_18:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
+! CHECK-DAG:                 %[[VAL_19:.*]] = fir.load %[[VAL_17]] : !fir.ref<i32>
 ! CHECK:                 %[[VAL_20:.*]] = arith.cmpi slt, %[[VAL_18]], %[[VAL_19]] : i32
 ! CHECK:                 %[[VAL_21:.*]] = arith.select %[[VAL_20]], %[[VAL_18]], %[[VAL_19]] : i32
 ! CHECK:                 hlfir.assign %[[VAL_21]] to %[[VAL_14]]#0 : i32, !fir.ref<i32>