[flang-commits] [flang] [flang] Fetch the initial reduction value from the input array. (PR #136790)

Tue Apr 22 17:14:18 PDT 2025

https://github.com/vzakhari created https://github.com/llvm/llvm-project/pull/136790

Instead of using loop-carried IsFirst predicate, we can fetch
the initial reduction values for MIN/MAX LOC/VAL reductions
from the array itself. This results in a little bit cleaner
loop nests, especially, generated for total reductions.
Otherwise, LLVM is able to peel the first iteration of the innermost
loop, but the surroudings of the peeled code are executed
multiple times withing the outer loop(s).

This patch does the manual peeling, which only works for
non-masked reductions where the input array is not empty.


>From 144fd509737b9d38d4e501df0f6cb817eee143cf Mon Sep 17 00:00:00 2001
From: Slava Zakharin <szakharin at nvidia.com>
Date: Tue, 22 Apr 2025 08:20:01 -0700
Subject: [PATCH] [flang] Fetch the initial reduction value from the input
 array.

Instead of using loop-carried IsFirst predicate, we can fetch
the initial reduction values for MIN/MAX LOC/VAL reductions
from the array itself. This results in a little bit cleaner
loop nests, especially, generated for total reductions.
Otherwise, LLVM is able to peel the first iteration of the innermost
loop, but the surroudings of the peeled code are executed
multiple times withing the outer loop(s).

This patch does the manual peeling, which only works for
non-masked reductions where the input array is not empty.
---
 .../Transforms/SimplifyHLFIRIntrinsics.cpp    | 259 +++++++++++++++---
 .../simplify-hlfir-intrinsics-maxloc.fir      | 123 +++++++++
 .../simplify-hlfir-intrinsics-maxval.fir      |  84 ++++++
 .../simplify-hlfir-intrinsics-minloc.fir      | 123 +++++++++
 .../simplify-hlfir-intrinsics-minval.fir      |  84 ++++++
 5 files changed, 633 insertions(+), 40 deletions(-)

diff --git a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
index e9d820adbd22b..54746a45b1aec 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
@@ -232,7 +232,17 @@ class ReductionAsElementalConverter {
   /// by the reduction loop. In general, there is a single
   /// loop-carried reduction value (e.g. for SUM), but, for example,
   /// MAXLOC/MINLOC implementation uses multiple reductions.
-  virtual llvm::SmallVector<mlir::Value> genReductionInitValues() = 0;
+  /// \p oneBasedIndices contains any array indices predefined
+  /// before the reduction loop, i.e. it is empty for total
+  /// reductions, and contains the one-based indices of the wrapping
+  /// hlfir.elemental.
+  /// \p extents are the pre-computed extents of the input array.
+  /// For total reductions, \p extents holds extents of all dimensions.
+  /// For partial reductions, \p extents holds a single extent
+  /// of the DIM dimension.
+  virtual llvm::SmallVector<mlir::Value>
+  genReductionInitValues(mlir::ValueRange oneBasedIndices,
+                         const llvm::SmallVectorImpl<mlir::Value> &extents) = 0;
 
   /// Perform reduction(s) update given a single input array's element
   /// identified by \p array and \p oneBasedIndices coordinates.
@@ -396,6 +406,54 @@ genMinMaxComparison(mlir::Location loc, fir::FirOpBuilder &builder,
   llvm_unreachable("unsupported type");
 }
 
+// Generate a predicate value indicating that an array with the given
+// extents is not empty.
+static mlir::Value
+genIsNotEmptyArrayExtents(mlir::Location loc, fir::FirOpBuilder &builder,
+                          const llvm::SmallVectorImpl<mlir::Value> &extents) {
+  mlir::Value isNotEmpty = builder.createBool(loc, true);
+  for (auto extent : extents) {
+    mlir::Value zero =
+        fir::factory::createZeroValue(builder, loc, extent.getType());
+    mlir::Value cmp = builder.create<mlir::arith::CmpIOp>(
+        loc, mlir::arith::CmpIPredicate::ne, extent, zero);
+    isNotEmpty = builder.create<mlir::arith::AndIOp>(loc, isNotEmpty, cmp);
+  }
+  return isNotEmpty;
+}
+
+// Helper method for MIN/MAX LOC/VAL reductions.
+// It returns a vector of indices such that they address
+// the first element of an array (in case of total reduction)
+// or its section (in case of partial reduction).
+//
+// If case of total reduction oneBasedIndices must be empty,
+// otherwise, they contain the one based indices of the wrapping
+// hlfir.elemental.
+// Basically, the method adds the necessary number of constant-one
+// indices into oneBasedIndices.
+static llvm::SmallVector<mlir::Value> genFirstElementIndicesForReduction(
+    mlir::Location loc, fir::FirOpBuilder &builder, bool isTotalReduction,
+    mlir::FailureOr<int64_t> dim, unsigned rank,
+    mlir::ValueRange oneBasedIndices) {
+  llvm::SmallVector<mlir::Value> indices{oneBasedIndices};
+  mlir::Value one =
+      builder.createIntegerConstant(loc, builder.getIndexType(), 1);
+  if (isTotalReduction) {
+    assert(oneBasedIndices.size() == 0 &&
+           "wrong number of indices for total reduction");
+    // Set indices to all-ones.
+    indices.append(rank, one);
+  } else {
+    assert(oneBasedIndices.size() == rank - 1 &&
+           "there must be RANK-1 indices for partial reduction");
+    assert(mlir::succeeded(dim) && "partial reduction with invalid DIM");
+    // Insert constant-one index at DIM dimension.
+    indices.insert(indices.begin() + *dim - 1, one);
+  }
+  return indices;
+}
+
 /// Implementation of ReductionAsElementalConverter interface
 /// for MAXLOC/MINLOC.
 template <typename T>
@@ -410,6 +468,9 @@ class MinMaxlocAsElementalConverter : public ReductionAsElementalConverter {
   //   * 1 reduction value holding the current MIN/MAX.
   //   * 1 boolean indicating whether it is the first time
   //     the mask is true.
+  //
+  // If precomputeFirst() returns true, then the boolean loop-carried
+  // value is not used.
   static constexpr unsigned maxNumReductions = Fortran::common::maxRank + 2;
   static constexpr bool isMax = std::is_same_v<T, hlfir::MaxlocOp>;
   using Base = ReductionAsElementalConverter;
@@ -444,7 +505,9 @@ class MinMaxlocAsElementalConverter : public ReductionAsElementalConverter {
     return getResultRank() == 0 || !getDim();
   }
 
-  virtual llvm::SmallVector<mlir::Value> genReductionInitValues() final;
+  virtual llvm::SmallVector<mlir::Value> genReductionInitValues(
+      mlir::ValueRange oneBasedIndices,
+      const llvm::SmallVectorImpl<mlir::Value> &extents) final;
   virtual llvm::SmallVector<mlir::Value>
   reduceOneElement(const llvm::SmallVectorImpl<mlir::Value> &currentValue,
                    hlfir::Entity array, mlir::ValueRange oneBasedIndices) final;
@@ -460,8 +523,12 @@ class MinMaxlocAsElementalConverter : public ReductionAsElementalConverter {
 
   void
   checkReductions(const llvm::SmallVectorImpl<mlir::Value> &reductions) const {
-    assert(reductions.size() == getNumCoors() + 2 &&
-           "invalid number of reductions for MINLOC/MAXLOC");
+    if (precomputeFirst())
+      assert(reductions.size() == getNumCoors() + 1 &&
+             "invalid number of reductions for MINLOC/MAXLOC");
+    else
+      assert(reductions.size() == getNumCoors() + 2 &&
+             "invalid number of reductions for MINLOC/MAXLOC");
   }
 
   mlir::Value
@@ -473,13 +540,52 @@ class MinMaxlocAsElementalConverter : public ReductionAsElementalConverter {
   mlir::Value
   getIsFirst(const llvm::SmallVectorImpl<mlir::Value> &reductions) const {
     checkReductions(reductions);
+    assert(!precomputeFirst() && "IsFirst predicate must not be used");
     return reductions[getNumCoors() + 1];
   }
+
+  // Return true iff the reductions can be initialized
+  // by reading the first element of the array (or its section).
+  // If it returns false, then we use an auxiliary boolean
+  // to identify the very first reduction update.
+  bool precomputeFirst() const { return !getMask(); }
 };
 
 template <typename T>
 llvm::SmallVector<mlir::Value>
-MinMaxlocAsElementalConverter<T>::genReductionInitValues() {
+MinMaxlocAsElementalConverter<T>::genReductionInitValues(
+    mlir::ValueRange oneBasedIndices,
+    const llvm::SmallVectorImpl<mlir::Value> &extents) {
+  fir::IfOp ifOp;
+  if (precomputeFirst()) {
+    // Check if we can load the value of the first element in the array
+    // or its section (for partial reduction).
+    assert(extents.size() == getNumCoors() &&
+           "wrong number of extents for MINLOC/MAXLOC reduction");
+    mlir::Value isNotEmpty = genIsNotEmptyArrayExtents(loc, builder, extents);
+
+    llvm::SmallVector<mlir::Value> indices = genFirstElementIndicesForReduction(
+        loc, builder, isTotalReduction(), getConstDim(), getSourceRank(),
+        oneBasedIndices);
+
+    llvm::SmallVector<mlir::Type> ifTypes(getNumCoors(),
+                                          getResultElementType());
+    ifTypes.push_back(getSourceElementType());
+    ifOp = builder.create<fir::IfOp>(loc, ifTypes, isNotEmpty,
+                                     /*withElseRegion=*/true);
+    builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
+    mlir::Value one =
+        builder.createIntegerConstant(loc, getResultElementType(), 1);
+    llvm::SmallVector<mlir::Value> results(getNumCoors(), one);
+    mlir::Value minMaxFirst =
+        hlfir::loadElementAt(loc, builder, hlfir::Entity{getSource()}, indices);
+    results.push_back(minMaxFirst);
+    builder.create<fir::ResultOp>(loc, results);
+
+    // In the 'else' block use default init values.
+    builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
+  }
+
   // Initial value for the coordinate(s) is zero.
   mlir::Value zeroCoor =
       fir::factory::createZeroValue(builder, loc, getResultElementType());
@@ -490,11 +596,17 @@ MinMaxlocAsElementalConverter<T>::genReductionInitValues() {
       genMinMaxInitValue<isMax>(loc, builder, getSourceElementType());
   result.push_back(minMaxInit);
 
-  // Initial value for isFirst predicate. It is switched to false,
-  // when the reduction update dynamically happens inside the reduction
-  // loop.
-  mlir::Value trueVal = builder.createBool(loc, true);
-  result.push_back(trueVal);
+  if (ifOp) {
+    builder.create<fir::ResultOp>(loc, result);
+    builder.setInsertionPointAfter(ifOp);
+    result = ifOp.getResults();
+  } else {
+    // Initial value for isFirst predicate. It is switched to false,
+    // when the reduction update dynamically happens inside the reduction
+    // loop.
+    mlir::Value trueVal = builder.createBool(loc, true);
+    result.push_back(trueVal);
+  }
 
   return result;
 }
@@ -509,9 +621,12 @@ MinMaxlocAsElementalConverter<T>::reduceOneElement(
       hlfir::loadElementAt(loc, builder, array, oneBasedIndices);
   mlir::Value cmp = genMinMaxComparison<isMax>(loc, builder, elementValue,
                                                getCurrentMinMax(currentValue));
-  // If isFirst is true, then do the reduction update regardless
-  // of the FP comparison.
-  cmp = builder.create<mlir::arith::OrIOp>(loc, cmp, getIsFirst(currentValue));
+  if (!precomputeFirst()) {
+    // If isFirst is true, then do the reduction update regardless
+    // of the FP comparison.
+    cmp =
+        builder.create<mlir::arith::OrIOp>(loc, cmp, getIsFirst(currentValue));
+  }
 
   llvm::SmallVector<mlir::Value> newIndices;
   int64_t dim = 1;
@@ -537,8 +652,10 @@ MinMaxlocAsElementalConverter<T>::reduceOneElement(
       loc, cmp, elementValue, getCurrentMinMax(currentValue));
   newIndices.push_back(newMinMax);
 
-  mlir::Value newIsFirst = builder.createBool(loc, false);
-  newIndices.push_back(newIsFirst);
+  if (!precomputeFirst()) {
+    mlir::Value newIsFirst = builder.createBool(loc, false);
+    newIndices.push_back(newIsFirst);
+  }
 
   assert(currentValue.size() == newIndices.size() &&
          "invalid number of updated reductions");
@@ -629,7 +746,8 @@ class MinMaxvalAsElementalConverter
   //
   // The boolean flag is used to replace the initial value
   // with the first input element even if it is NaN.
-  static constexpr unsigned numReductions = 2;
+  // If precomputeFirst() returns true, then the boolean loop-carried
+  // value is not used.
   static constexpr bool isMax = std::is_same_v<T, hlfir::MaxvalOp>;
   using Base = NumericReductionAsElementalConverterBase<T>;
 
@@ -646,19 +764,9 @@ class MinMaxvalAsElementalConverter
     return mlir::success();
   }
 
-  virtual llvm::SmallVector<mlir::Value> genReductionInitValues() final {
-    llvm::SmallVector<mlir::Value> result;
-    fir::FirOpBuilder &builder = this->builder;
-    mlir::Location loc = this->loc;
-    mlir::Value init =
-        genMinMaxInitValue<isMax>(loc, builder, this->getResultElementType());
-    result.push_back(init);
-    // Initial value for isFirst predicate. It is switched to false,
-    // when the reduction update dynamically happens inside the reduction
-    // loop.
-    result.push_back(builder.createBool(loc, true));
-    return result;
-  }
+  virtual llvm::SmallVector<mlir::Value> genReductionInitValues(
+      mlir::ValueRange oneBasedIndices,
+      const llvm::SmallVectorImpl<mlir::Value> &extents) final;
 
   virtual llvm::SmallVector<mlir::Value>
   reduceOneElement(const llvm::SmallVectorImpl<mlir::Value> &currentValue,
@@ -673,12 +781,14 @@ class MinMaxvalAsElementalConverter
     mlir::Value currentMinMax = getCurrentMinMax(currentValue);
     mlir::Value cmp =
         genMinMaxComparison<isMax>(loc, builder, elementValue, currentMinMax);
-    cmp =
-        builder.create<mlir::arith::OrIOp>(loc, cmp, getIsFirst(currentValue));
+    if (!precomputeFirst())
+      cmp = builder.create<mlir::arith::OrIOp>(loc, cmp,
+                                               getIsFirst(currentValue));
     mlir::Value newMinMax = builder.create<mlir::arith::SelectOp>(
         loc, cmp, elementValue, currentMinMax);
     result.push_back(newMinMax);
-    result.push_back(builder.createBool(loc, false));
+    if (!precomputeFirst())
+      result.push_back(builder.createBool(loc, false));
     return result;
   }
 
@@ -690,7 +800,7 @@ class MinMaxvalAsElementalConverter
 
   void
   checkReductions(const llvm::SmallVectorImpl<mlir::Value> &reductions) const {
-    assert(reductions.size() == numReductions &&
+    assert(reductions.size() == getNumReductions() &&
            "invalid number of reductions for MINVAL/MAXVAL");
   }
 
@@ -703,10 +813,70 @@ class MinMaxvalAsElementalConverter
   mlir::Value
   getIsFirst(const llvm::SmallVectorImpl<mlir::Value> &reductions) const {
     this->checkReductions(reductions);
+    assert(!precomputeFirst() && "IsFirst predicate must not be used");
     return reductions[1];
   }
+
+  // Return true iff the reductions can be initialized
+  // by reading the first element of the array (or its section).
+  // If it returns false, then we use an auxiliary boolean
+  // to identify the very first reduction update.
+  bool precomputeFirst() const { return !this->getMask(); }
+
+  std::size_t getNumReductions() const { return precomputeFirst() ? 1 : 2; }
 };
 
+template <typename T>
+llvm::SmallVector<mlir::Value>
+MinMaxvalAsElementalConverter<T>::genReductionInitValues(
+    mlir::ValueRange oneBasedIndices,
+    const llvm::SmallVectorImpl<mlir::Value> &extents) {
+  llvm::SmallVector<mlir::Value> result;
+  fir::FirOpBuilder &builder = this->builder;
+  mlir::Location loc = this->loc;
+
+  fir::IfOp ifOp;
+  if (precomputeFirst()) {
+    // Check if we can load the value of the first element in the array
+    // or its section (for partial reduction).
+    assert(extents.size() == this->isTotalReduction()
+               ? this->getSourceRank()
+               : 1u && "wrong number of extents for MINVAL/MAXVAL reduction");
+    mlir::Value isNotEmpty = genIsNotEmptyArrayExtents(loc, builder, extents);
+    llvm::SmallVector<mlir::Value> indices = genFirstElementIndicesForReduction(
+        loc, builder, this->isTotalReduction(), this->getConstDim(),
+        this->getSourceRank(), oneBasedIndices);
+
+    ifOp =
+        builder.create<fir::IfOp>(loc, this->getResultElementType(), isNotEmpty,
+                                  /*withElseRegion=*/true);
+    builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
+    mlir::Value minMaxFirst = hlfir::loadElementAt(
+        loc, builder, hlfir::Entity{this->getSource()}, indices);
+    builder.create<fir::ResultOp>(loc, minMaxFirst);
+
+    // In the 'else' block use default init values.
+    builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
+  }
+
+  mlir::Value init =
+      genMinMaxInitValue<isMax>(loc, builder, this->getResultElementType());
+  result.push_back(init);
+
+  if (ifOp) {
+    builder.create<fir::ResultOp>(loc, result);
+    builder.setInsertionPointAfter(ifOp);
+    result = ifOp.getResults();
+  } else {
+    // Initial value for isFirst predicate. It is switched to false,
+    // when the reduction update dynamically happens inside the reduction
+    // loop.
+    result.push_back(builder.createBool(loc, true));
+  }
+
+  return result;
+}
+
 /// Reduction converter for SUM.
 class SumAsElementalConverter
     : public NumericReductionAsElementalConverterBase<hlfir::SumOp> {
@@ -717,7 +887,10 @@ class SumAsElementalConverter
       : Base{op, rewriter} {}
 
 private:
-  virtual llvm::SmallVector<mlir::Value> genReductionInitValues() final {
+  virtual llvm::SmallVector<mlir::Value> genReductionInitValues(
+      [[maybe_unused]] mlir::ValueRange oneBasedIndices,
+      [[maybe_unused]] const llvm::SmallVectorImpl<mlir::Value> &extents)
+      final {
     return {
         fir::factory::createZeroValue(builder, loc, getResultElementType())};
   }
@@ -781,7 +954,10 @@ class AllAnyAsElementalConverter
       : Base{op, rewriter} {}
 
 private:
-  virtual llvm::SmallVector<mlir::Value> genReductionInitValues() final {
+  virtual llvm::SmallVector<mlir::Value> genReductionInitValues(
+      [[maybe_unused]] mlir::ValueRange oneBasedIndices,
+      [[maybe_unused]] const llvm::SmallVectorImpl<mlir::Value> &extents)
+      final {
     return {this->builder.createBool(this->loc, isAll ? true : false)};
   }
   virtual llvm::SmallVector<mlir::Value>
@@ -819,7 +995,10 @@ class CountAsElementalConverter
       : Base{op, rewriter} {}
 
 private:
-  virtual llvm::SmallVector<mlir::Value> genReductionInitValues() final {
+  virtual llvm::SmallVector<mlir::Value> genReductionInitValues(
+      [[maybe_unused]] mlir::ValueRange oneBasedIndices,
+      [[maybe_unused]] const llvm::SmallVectorImpl<mlir::Value> &extents)
+      final {
     return {
         fir::factory::createZeroValue(builder, loc, getResultElementType())};
   }
@@ -881,10 +1060,6 @@ mlir::LogicalResult ReductionAsElementalConverter::convert() {
     // Loop over all indices in the DIM dimension, and reduce all values.
     // If DIM is not present, do total reduction.
 
-    // Initial value for the reduction.
-    llvm::SmallVector<mlir::Value, 1> reductionInitValues =
-        genReductionInitValues();
-
     llvm::SmallVector<mlir::Value> extents;
     if (isTotalReduce)
       extents = arrayExtents;
@@ -892,6 +1067,10 @@ mlir::LogicalResult ReductionAsElementalConverter::convert() {
       extents.push_back(
           builder.createConvert(loc, builder.getIndexType(), dimExtent));
 
+    // Initial value for the reduction.
+    llvm::SmallVector<mlir::Value, 1> reductionInitValues =
+        genReductionInitValues(inputIndices, extents);
+
     auto genBody = [&](mlir::Location loc, fir::FirOpBuilder &builder,
                        mlir::ValueRange oneBasedIndices,
                        mlir::ValueRange reductionArgs)
diff --git a/flang/test/HLFIR/simplify-hlfir-intrinsics-maxloc.fir b/flang/test/HLFIR/simplify-hlfir-intrinsics-maxloc.fir
index 4e9f5d0ebb08a..f917f99c2b9ab 100644
--- a/flang/test/HLFIR/simplify-hlfir-intrinsics-maxloc.fir
+++ b/flang/test/HLFIR/simplify-hlfir-intrinsics-maxloc.fir
@@ -294,6 +294,129 @@ func.func @test_partial_var(%input: !fir.box<!fir.array<?x?x?xf32>>, %mask: !fir
 // CHECK:           return %[[VAL_14]] : !hlfir.expr<?x?xi32>
 // CHECK:         }
 
+func.func @test_total_expr_nomask(%input: !hlfir.expr<?x?x?xf32>) -> !hlfir.expr<3xi32> {
+  %0 = hlfir.maxloc %input {fastmath = #arith.fastmath<reassoc>} : (!hlfir.expr<?x?x?xf32>) -> !hlfir.expr<3xi32>
+  return %0 : !hlfir.expr<3xi32>
+}
+// CHECK-LABEL:   func.func @test_total_expr_nomask(
+// CHECK-SAME:                                      %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x?x?xf32>) -> !hlfir.expr<3xi32> {
+// CHECK:           %[[VAL_1:.*]] = arith.constant false
+// CHECK:           %[[VAL_2:.*]] = arith.constant 3 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant 2 : index
+// CHECK:           %[[VAL_4:.*]] = arith.constant -3.40282347E+38 : f32
+// CHECK:           %[[VAL_5:.*]] = arith.constant 0 : i32
+// CHECK:           %[[VAL_6:.*]] = arith.constant 1 : i32
+// CHECK:           %[[VAL_7:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_8:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_9:.*]] = fir.alloca !fir.array<3xi32>
+// CHECK:           %[[VAL_10:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?x?x?xf32>) -> !fir.shape<3>
+// CHECK:           %[[VAL_11:.*]] = hlfir.get_extent %[[VAL_10]] {dim = 0 : index} : (!fir.shape<3>) -> index
+// CHECK:           %[[VAL_12:.*]] = hlfir.get_extent %[[VAL_10]] {dim = 1 : index} : (!fir.shape<3>) -> index
+// CHECK:           %[[VAL_13:.*]] = hlfir.get_extent %[[VAL_10]] {dim = 2 : index} : (!fir.shape<3>) -> index
+// CHECK:           %[[VAL_14:.*]] = arith.cmpi ne, %[[VAL_11]], %[[VAL_8]] : index
+// CHECK:           %[[VAL_15:.*]] = arith.cmpi ne, %[[VAL_12]], %[[VAL_8]] : index
+// CHECK:           %[[VAL_16:.*]] = arith.andi %[[VAL_14]], %[[VAL_15]] : i1
+// CHECK:           %[[VAL_17:.*]] = arith.cmpi ne, %[[VAL_13]], %[[VAL_8]] : index
+// CHECK:           %[[VAL_18:.*]] = arith.andi %[[VAL_16]], %[[VAL_17]] : i1
+// CHECK:           %[[VAL_19:.*]]:4 = fir.if %[[VAL_18]] -> (i32, i32, i32, f32) {
+// CHECK:             %[[VAL_20:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_7]], %[[VAL_7]], %[[VAL_7]] : (!hlfir.expr<?x?x?xf32>, index, index, index) -> f32
+// CHECK:             fir.result %[[VAL_6]], %[[VAL_6]], %[[VAL_6]], %[[VAL_20]] : i32, i32, i32, f32
+// CHECK:           } else {
+// CHECK:             fir.result %[[VAL_5]], %[[VAL_5]], %[[VAL_5]], %[[VAL_4]] : i32, i32, i32, f32
+// CHECK:           }
+// CHECK:           %[[VAL_21:.*]]:4 = fir.do_loop %[[VAL_22:.*]] = %[[VAL_7]] to %[[VAL_13]] step %[[VAL_7]] unordered iter_args(%[[VAL_23:.*]] = %[[VAL_24:.*]]#0, %[[VAL_25:.*]] = %[[VAL_24]]#1, %[[VAL_26:.*]] = %[[VAL_24]]#2, %[[VAL_27:.*]] = %[[VAL_24]]#3) -> (i32, i32, i32, f32) {
+// CHECK:             %[[VAL_28:.*]]:4 = fir.do_loop %[[VAL_29:.*]] = %[[VAL_7]] to %[[VAL_12]] step %[[VAL_7]] unordered iter_args(%[[VAL_30:.*]] = %[[VAL_23]], %[[VAL_31:.*]] = %[[VAL_25]], %[[VAL_32:.*]] = %[[VAL_26]], %[[VAL_33:.*]] = %[[VAL_27]]) -> (i32, i32, i32, f32) {
+// CHECK:               %[[VAL_34:.*]]:4 = fir.do_loop %[[VAL_35:.*]] = %[[VAL_7]] to %[[VAL_11]] step %[[VAL_7]] unordered iter_args(%[[VAL_36:.*]] = %[[VAL_30]], %[[VAL_37:.*]] = %[[VAL_31]], %[[VAL_38:.*]] = %[[VAL_32]], %[[VAL_39:.*]] = %[[VAL_33]]) -> (i32, i32, i32, f32) {
+// CHECK:                 %[[VAL_40:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_35]], %[[VAL_29]], %[[VAL_22]] : (!hlfir.expr<?x?x?xf32>, index, index, index) -> f32
+// CHECK:                 %[[VAL_41:.*]] = arith.cmpf ogt, %[[VAL_40]], %[[VAL_39]] fastmath<reassoc> : f32
+// CHECK:                 %[[VAL_42:.*]] = arith.cmpf une, %[[VAL_39]], %[[VAL_39]] fastmath<reassoc> : f32
+// CHECK:                 %[[VAL_43:.*]] = arith.cmpf oeq, %[[VAL_40]], %[[VAL_40]] fastmath<reassoc> : f32
+// CHECK:                 %[[VAL_44:.*]] = arith.andi %[[VAL_42]], %[[VAL_43]] : i1
+// CHECK:                 %[[VAL_45:.*]] = arith.ori %[[VAL_41]], %[[VAL_44]] : i1
+// CHECK:                 %[[VAL_46:.*]] = fir.convert %[[VAL_35]] : (index) -> i32
+// CHECK:                 %[[VAL_47:.*]] = arith.select %[[VAL_45]], %[[VAL_46]], %[[VAL_36]] : i32
+// CHECK:                 %[[VAL_48:.*]] = fir.convert %[[VAL_29]] : (index) -> i32
+// CHECK:                 %[[VAL_49:.*]] = arith.select %[[VAL_45]], %[[VAL_48]], %[[VAL_37]] : i32
+// CHECK:                 %[[VAL_50:.*]] = fir.convert %[[VAL_22]] : (index) -> i32
+// CHECK:                 %[[VAL_51:.*]] = arith.select %[[VAL_45]], %[[VAL_50]], %[[VAL_38]] : i32
+// CHECK:                 %[[VAL_52:.*]] = arith.select %[[VAL_45]], %[[VAL_40]], %[[VAL_39]] : f32
+// CHECK:                 fir.result %[[VAL_47]], %[[VAL_49]], %[[VAL_51]], %[[VAL_52]] : i32, i32, i32, f32
+// CHECK:               }
+// CHECK:               fir.result %[[VAL_53:.*]]#0, %[[VAL_53]]#1, %[[VAL_53]]#2, %[[VAL_53]]#3 : i32, i32, i32, f32
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_54:.*]]#0, %[[VAL_54]]#1, %[[VAL_54]]#2, %[[VAL_54]]#3 : i32, i32, i32, f32
+// CHECK:           }
+// CHECK:           %[[VAL_55:.*]] = hlfir.designate %[[VAL_9]] (%[[VAL_7]])  : (!fir.ref<!fir.array<3xi32>>, index) -> !fir.ref<i32>
+// CHECK:           hlfir.assign %[[VAL_56:.*]]#0 to %[[VAL_55]] : i32, !fir.ref<i32>
+// CHECK:           %[[VAL_57:.*]] = hlfir.designate %[[VAL_9]] (%[[VAL_3]])  : (!fir.ref<!fir.array<3xi32>>, index) -> !fir.ref<i32>
+// CHECK:           hlfir.assign %[[VAL_56]]#1 to %[[VAL_57]] : i32, !fir.ref<i32>
+// CHECK:           %[[VAL_58:.*]] = hlfir.designate %[[VAL_9]] (%[[VAL_2]])  : (!fir.ref<!fir.array<3xi32>>, index) -> !fir.ref<i32>
+// CHECK:           hlfir.assign %[[VAL_56]]#2 to %[[VAL_58]] : i32, !fir.ref<i32>
+// CHECK:           %[[VAL_59:.*]] = hlfir.as_expr %[[VAL_9]] move %[[VAL_1]] : (!fir.ref<!fir.array<3xi32>>, i1) -> !hlfir.expr<3xi32>
+// CHECK:           return %[[VAL_59]] : !hlfir.expr<3xi32>
+// CHECK:         }
+
+func.func @test_partial_var_nomask(%input: !fir.box<!fir.array<?x?x?xf32>>) -> !hlfir.expr<?x?xi32> {
+  %dim = arith.constant 2 : i32
+  %0 = hlfir.maxloc %input dim %dim {fastmath = #arith.fastmath<reassoc>} : (!fir.box<!fir.array<?x?x?xf32>>, i32) -> !hlfir.expr<?x?xi32>
+  return %0 : !hlfir.expr<?x?xi32>
+}
+// CHECK-LABEL:   func.func @test_partial_var_nomask(
+// CHECK-SAME:                                       %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box<!fir.array<?x?x?xf32>>) -> !hlfir.expr<?x?xi32> {
+// CHECK:           %[[VAL_1:.*]] = arith.constant -3.40282347E+38 : f32
+// CHECK:           %[[VAL_2:.*]] = arith.constant 0 : i32
+// CHECK:           %[[VAL_3:.*]] = arith.constant 1 : i32
+// CHECK:           %[[VAL_4:.*]] = arith.constant 2 : index
+// CHECK:           %[[VAL_5:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_6:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_7:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_6]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_8:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_5]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_9:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_10:.*]] = fir.shape %[[VAL_7]]#1, %[[VAL_9]]#1 : (index, index) -> !fir.shape<2>
+// CHECK:           %[[VAL_11:.*]] = hlfir.elemental %[[VAL_10]] unordered : (!fir.shape<2>) -> !hlfir.expr<?x?xi32> {
+// CHECK:           ^bb0(%[[VAL_12:.*]]: index, %[[VAL_13:.*]]: index):
+// CHECK:             %[[VAL_14:.*]] = arith.cmpi ne, %[[VAL_8]]#1, %[[VAL_6]] : index
+// CHECK:             %[[VAL_15:.*]]:2 = fir.if %[[VAL_14]] -> (i32, f32) {
+// CHECK:               %[[VAL_16:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_6]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_17:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_5]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_18:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_19:.*]] = arith.subi %[[VAL_16]]#0, %[[VAL_5]] : index
+// CHECK:               %[[VAL_20:.*]] = arith.addi %[[VAL_12]], %[[VAL_19]] : index
+// CHECK:               %[[VAL_21:.*]] = arith.subi %[[VAL_18]]#0, %[[VAL_5]] : index
+// CHECK:               %[[VAL_22:.*]] = arith.addi %[[VAL_13]], %[[VAL_21]] : index
+// CHECK:               %[[VAL_23:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_20]], %[[VAL_17]]#0, %[[VAL_22]])  : (!fir.box<!fir.array<?x?x?xf32>>, index, index, index) -> !fir.ref<f32>
+// CHECK:               %[[VAL_24:.*]] = fir.load %[[VAL_23]] : !fir.ref<f32>
+// CHECK:               fir.result %[[VAL_3]], %[[VAL_24]] : i32, f32
+// CHECK:             } else {
+// CHECK:               fir.result %[[VAL_2]], %[[VAL_1]] : i32, f32
+// CHECK:             }
+// CHECK:             %[[VAL_25:.*]]:2 = fir.do_loop %[[VAL_26:.*]] = %[[VAL_5]] to %[[VAL_8]]#1 step %[[VAL_5]] unordered iter_args(%[[VAL_27:.*]] = %[[VAL_28:.*]]#0, %[[VAL_29:.*]] = %[[VAL_28]]#1) -> (i32, f32) {
+// CHECK:               %[[VAL_30:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_6]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_31:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_5]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_32:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_33:.*]] = arith.subi %[[VAL_30]]#0, %[[VAL_5]] : index
+// CHECK:               %[[VAL_34:.*]] = arith.addi %[[VAL_12]], %[[VAL_33]] : index
+// CHECK:               %[[VAL_35:.*]] = arith.subi %[[VAL_31]]#0, %[[VAL_5]] : index
+// CHECK:               %[[VAL_36:.*]] = arith.addi %[[VAL_26]], %[[VAL_35]] : index
+// CHECK:               %[[VAL_37:.*]] = arith.subi %[[VAL_32]]#0, %[[VAL_5]] : index
+// CHECK:               %[[VAL_38:.*]] = arith.addi %[[VAL_13]], %[[VAL_37]] : index
+// CHECK:               %[[VAL_39:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_34]], %[[VAL_36]], %[[VAL_38]])  : (!fir.box<!fir.array<?x?x?xf32>>, index, index, index) -> !fir.ref<f32>
+// CHECK:               %[[VAL_40:.*]] = fir.load %[[VAL_39]] : !fir.ref<f32>
+// CHECK:               %[[VAL_41:.*]] = arith.cmpf ogt, %[[VAL_40]], %[[VAL_29]] fastmath<reassoc> : f32
+// CHECK:               %[[VAL_42:.*]] = arith.cmpf une, %[[VAL_29]], %[[VAL_29]] fastmath<reassoc> : f32
+// CHECK:               %[[VAL_43:.*]] = arith.cmpf oeq, %[[VAL_40]], %[[VAL_40]] fastmath<reassoc> : f32
+// CHECK:               %[[VAL_44:.*]] = arith.andi %[[VAL_42]], %[[VAL_43]] : i1
+// CHECK:               %[[VAL_45:.*]] = arith.ori %[[VAL_41]], %[[VAL_44]] : i1
+// CHECK:               %[[VAL_46:.*]] = fir.convert %[[VAL_26]] : (index) -> i32
+// CHECK:               %[[VAL_47:.*]] = arith.select %[[VAL_45]], %[[VAL_46]], %[[VAL_27]] : i32
+// CHECK:               %[[VAL_48:.*]] = arith.select %[[VAL_45]], %[[VAL_40]], %[[VAL_29]] : f32
+// CHECK:               fir.result %[[VAL_47]], %[[VAL_48]] : i32, f32
+// CHECK:             }
+// CHECK:             hlfir.yield_element %[[VAL_49:.*]]#0 : i32
+// CHECK:           }
+// CHECK:           return %[[VAL_11]] : !hlfir.expr<?x?xi32>
+// CHECK:         }
+
 // Character comparisons are not supported yet.
 func.func @test_character(%input: !fir.box<!fir.array<?x!fir.char<1>>>) -> !hlfir.expr<1xi32> {
   %0 = hlfir.maxloc %input : (!fir.box<!fir.array<?x!fir.char<1>>>) -> !hlfir.expr<1xi32>
diff --git a/flang/test/HLFIR/simplify-hlfir-intrinsics-maxval.fir b/flang/test/HLFIR/simplify-hlfir-intrinsics-maxval.fir
index 8f414e5c4b563..9f4b4f4516d04 100644
--- a/flang/test/HLFIR/simplify-hlfir-intrinsics-maxval.fir
+++ b/flang/test/HLFIR/simplify-hlfir-intrinsics-maxval.fir
@@ -184,3 +184,87 @@ func.func @test_partial_var(%input: !fir.box<!fir.array<?x?xf16>>, %mask: !fir.b
 // CHECK:           }
 // CHECK:           return %[[VAL_10]] : !hlfir.expr<?xf16>
 // CHECK:         }
+
+func.func @test_partial_expr_nomask(%input: !hlfir.expr<?x?xf64>) -> !hlfir.expr<?xf64> {
+  %dim = arith.constant 1 : i32
+  %0 = hlfir.maxval %input dim %dim {fastmath = #arith.fastmath<reassoc>} : (!hlfir.expr<?x?xf64>, i32) -> !hlfir.expr<?xf64>
+  return %0 : !hlfir.expr<?xf64>
+}
+// CHECK-LABEL:   func.func @test_partial_expr_nomask(
+// CHECK-SAME:                                        %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x?xf64>) -> !hlfir.expr<?xf64> {
+// CHECK:           %[[VAL_1:.*]] = arith.constant -1.7976931348623157E+308 : f64
+// CHECK:           %[[VAL_2:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_4:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?x?xf64>) -> !fir.shape<2>
+// CHECK:           %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 0 : index} : (!fir.shape<2>) -> index
+// CHECK:           %[[VAL_6:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 1 : index} : (!fir.shape<2>) -> index
+// CHECK:           %[[VAL_7:.*]] = fir.shape %[[VAL_6]] : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_8:.*]] = hlfir.elemental %[[VAL_7]] unordered : (!fir.shape<1>) -> !hlfir.expr<?xf64> {
+// CHECK:           ^bb0(%[[VAL_9:.*]]: index):
+// CHECK:             %[[VAL_10:.*]] = arith.cmpi ne, %[[VAL_5]], %[[VAL_3]] : index
+// CHECK:             %[[VAL_11:.*]] = fir.if %[[VAL_10]] -> (f64) {
+// CHECK:               %[[VAL_12:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_2]], %[[VAL_9]] : (!hlfir.expr<?x?xf64>, index, index) -> f64
+// CHECK:               fir.result %[[VAL_12]] : f64
+// CHECK:             } else {
+// CHECK:               fir.result %[[VAL_1]] : f64
+// CHECK:             }
+// CHECK:             %[[VAL_13:.*]] = fir.do_loop %[[VAL_14:.*]] = %[[VAL_2]] to %[[VAL_5]] step %[[VAL_2]] unordered iter_args(%[[VAL_15:.*]] = %[[VAL_11]]) -> (f64) {
+// CHECK:               %[[VAL_16:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_14]], %[[VAL_9]] : (!hlfir.expr<?x?xf64>, index, index) -> f64
+// CHECK:               %[[VAL_17:.*]] = arith.cmpf ogt, %[[VAL_16]], %[[VAL_15]] fastmath<reassoc> : f64
+// CHECK:               %[[VAL_18:.*]] = arith.cmpf une, %[[VAL_15]], %[[VAL_15]] fastmath<reassoc> : f64
+// CHECK:               %[[VAL_19:.*]] = arith.cmpf oeq, %[[VAL_16]], %[[VAL_16]] fastmath<reassoc> : f64
+// CHECK:               %[[VAL_20:.*]] = arith.andi %[[VAL_18]], %[[VAL_19]] : i1
+// CHECK:               %[[VAL_21:.*]] = arith.ori %[[VAL_17]], %[[VAL_20]] : i1
+// CHECK:               %[[VAL_22:.*]] = arith.select %[[VAL_21]], %[[VAL_16]], %[[VAL_15]] : f64
+// CHECK:               fir.result %[[VAL_22]] : f64
+// CHECK:             }
+// CHECK:             hlfir.yield_element %[[VAL_13]] : f64
+// CHECK:           }
+// CHECK:           return %[[VAL_8]] : !hlfir.expr<?xf64>
+// CHECK:         }
+
+func.func @test_total_var_nomask(%input: !fir.box<!fir.array<?x?xf16>>) -> f16 {
+  %0 = hlfir.maxval %input {fastmath = #arith.fastmath<reassoc>} : (!fir.box<!fir.array<?x?xf16>>) -> f16
+  return %0 : f16
+}
+// CHECK-LABEL:   func.func @test_total_var_nomask(
+// CHECK-SAME:                                     %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box<!fir.array<?x?xf16>>) -> f16 {
+// CHECK:           %[[VAL_1:.*]] = arith.constant -6.550400e+04 : f16
+// CHECK:           %[[VAL_2:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_4:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_5:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_2]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_6:.*]] = arith.cmpi ne, %[[VAL_4]]#1, %[[VAL_3]] : index
+// CHECK:           %[[VAL_7:.*]] = arith.cmpi ne, %[[VAL_5]]#1, %[[VAL_3]] : index
+// CHECK:           %[[VAL_8:.*]] = arith.andi %[[VAL_6]], %[[VAL_7]] : i1
+// CHECK:           %[[VAL_9:.*]] = fir.if %[[VAL_8]] -> (f16) {
+// CHECK:             %[[VAL_10:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:             %[[VAL_11:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_2]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:             %[[VAL_12:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_10]]#0, %[[VAL_11]]#0)  : (!fir.box<!fir.array<?x?xf16>>, index, index) -> !fir.ref<f16>
+// CHECK:             %[[VAL_13:.*]] = fir.load %[[VAL_12]] : !fir.ref<f16>
+// CHECK:             fir.result %[[VAL_13]] : f16
+// CHECK:           } else {
+// CHECK:             fir.result %[[VAL_1]] : f16
+// CHECK:           }
+// CHECK:           %[[VAL_14:.*]] = fir.do_loop %[[VAL_15:.*]] = %[[VAL_2]] to %[[VAL_5]]#1 step %[[VAL_2]] unordered iter_args(%[[VAL_16:.*]] = %[[VAL_9]]) -> (f16) {
+// CHECK:             %[[VAL_17:.*]] = fir.do_loop %[[VAL_18:.*]] = %[[VAL_2]] to %[[VAL_4]]#1 step %[[VAL_2]] unordered iter_args(%[[VAL_19:.*]] = %[[VAL_16]]) -> (f16) {
+// CHECK:               %[[VAL_20:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_21:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_2]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_22:.*]] = arith.subi %[[VAL_20]]#0, %[[VAL_2]] : index
+// CHECK:               %[[VAL_23:.*]] = arith.addi %[[VAL_18]], %[[VAL_22]] : index
+// CHECK:               %[[VAL_24:.*]] = arith.subi %[[VAL_21]]#0, %[[VAL_2]] : index
+// CHECK:               %[[VAL_25:.*]] = arith.addi %[[VAL_15]], %[[VAL_24]] : index
+// CHECK:               %[[VAL_26:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_23]], %[[VAL_25]])  : (!fir.box<!fir.array<?x?xf16>>, index, index) -> !fir.ref<f16>
+// CHECK:               %[[VAL_27:.*]] = fir.load %[[VAL_26]] : !fir.ref<f16>
+// CHECK:               %[[VAL_28:.*]] = arith.cmpf ogt, %[[VAL_27]], %[[VAL_19]] fastmath<reassoc> : f16
+// CHECK:               %[[VAL_29:.*]] = arith.cmpf une, %[[VAL_19]], %[[VAL_19]] fastmath<reassoc> : f16
+// CHECK:               %[[VAL_30:.*]] = arith.cmpf oeq, %[[VAL_27]], %[[VAL_27]] fastmath<reassoc> : f16
+// CHECK:               %[[VAL_31:.*]] = arith.andi %[[VAL_29]], %[[VAL_30]] : i1
+// CHECK:               %[[VAL_32:.*]] = arith.ori %[[VAL_28]], %[[VAL_31]] : i1
+// CHECK:               %[[VAL_33:.*]] = arith.select %[[VAL_32]], %[[VAL_27]], %[[VAL_19]] : f16
+// CHECK:               fir.result %[[VAL_33]] : f16
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_17]] : f16
+// CHECK:           }
+// CHECK:           return %[[VAL_14]] : f16
+// CHECK:         }
diff --git a/flang/test/HLFIR/simplify-hlfir-intrinsics-minloc.fir b/flang/test/HLFIR/simplify-hlfir-intrinsics-minloc.fir
index 0c17fd6fea92c..fb1a41c8b1d8b 100644
--- a/flang/test/HLFIR/simplify-hlfir-intrinsics-minloc.fir
+++ b/flang/test/HLFIR/simplify-hlfir-intrinsics-minloc.fir
@@ -294,6 +294,129 @@ func.func @test_partial_var(%input: !fir.box<!fir.array<?x?x?xf32>>, %mask: !fir
 // CHECK:           return %[[VAL_14]] : !hlfir.expr<?x?xi32>
 // CHECK:         }
 
+func.func @test_total_expr_nomask(%input: !hlfir.expr<?x?x?xf32>) -> !hlfir.expr<3xi32> {
+  %0 = hlfir.minloc %input {fastmath = #arith.fastmath<reassoc>} : (!hlfir.expr<?x?x?xf32>) -> !hlfir.expr<3xi32>
+  return %0 : !hlfir.expr<3xi32>
+}
+// CHECK-LABEL:   func.func @test_total_expr_nomask(
+// CHECK-SAME:                                      %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x?x?xf32>) -> !hlfir.expr<3xi32> {
+// CHECK:           %[[VAL_1:.*]] = arith.constant false
+// CHECK:           %[[VAL_2:.*]] = arith.constant 3 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant 2 : index
+// CHECK:           %[[VAL_4:.*]] = arith.constant 3.40282347E+38 : f32
+// CHECK:           %[[VAL_5:.*]] = arith.constant 0 : i32
+// CHECK:           %[[VAL_6:.*]] = arith.constant 1 : i32
+// CHECK:           %[[VAL_7:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_8:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_9:.*]] = fir.alloca !fir.array<3xi32>
+// CHECK:           %[[VAL_10:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?x?x?xf32>) -> !fir.shape<3>
+// CHECK:           %[[VAL_11:.*]] = hlfir.get_extent %[[VAL_10]] {dim = 0 : index} : (!fir.shape<3>) -> index
+// CHECK:           %[[VAL_12:.*]] = hlfir.get_extent %[[VAL_10]] {dim = 1 : index} : (!fir.shape<3>) -> index
+// CHECK:           %[[VAL_13:.*]] = hlfir.get_extent %[[VAL_10]] {dim = 2 : index} : (!fir.shape<3>) -> index
+// CHECK:           %[[VAL_14:.*]] = arith.cmpi ne, %[[VAL_11]], %[[VAL_8]] : index
+// CHECK:           %[[VAL_15:.*]] = arith.cmpi ne, %[[VAL_12]], %[[VAL_8]] : index
+// CHECK:           %[[VAL_16:.*]] = arith.andi %[[VAL_14]], %[[VAL_15]] : i1
+// CHECK:           %[[VAL_17:.*]] = arith.cmpi ne, %[[VAL_13]], %[[VAL_8]] : index
+// CHECK:           %[[VAL_18:.*]] = arith.andi %[[VAL_16]], %[[VAL_17]] : i1
+// CHECK:           %[[VAL_19:.*]]:4 = fir.if %[[VAL_18]] -> (i32, i32, i32, f32) {
+// CHECK:             %[[VAL_20:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_7]], %[[VAL_7]], %[[VAL_7]] : (!hlfir.expr<?x?x?xf32>, index, index, index) -> f32
+// CHECK:             fir.result %[[VAL_6]], %[[VAL_6]], %[[VAL_6]], %[[VAL_20]] : i32, i32, i32, f32
+// CHECK:           } else {
+// CHECK:             fir.result %[[VAL_5]], %[[VAL_5]], %[[VAL_5]], %[[VAL_4]] : i32, i32, i32, f32
+// CHECK:           }
+// CHECK:           %[[VAL_21:.*]]:4 = fir.do_loop %[[VAL_22:.*]] = %[[VAL_7]] to %[[VAL_13]] step %[[VAL_7]] unordered iter_args(%[[VAL_23:.*]] = %[[VAL_24:.*]]#0, %[[VAL_25:.*]] = %[[VAL_24]]#1, %[[VAL_26:.*]] = %[[VAL_24]]#2, %[[VAL_27:.*]] = %[[VAL_24]]#3) -> (i32, i32, i32, f32) {
+// CHECK:             %[[VAL_28:.*]]:4 = fir.do_loop %[[VAL_29:.*]] = %[[VAL_7]] to %[[VAL_12]] step %[[VAL_7]] unordered iter_args(%[[VAL_30:.*]] = %[[VAL_23]], %[[VAL_31:.*]] = %[[VAL_25]], %[[VAL_32:.*]] = %[[VAL_26]], %[[VAL_33:.*]] = %[[VAL_27]]) -> (i32, i32, i32, f32) {
+// CHECK:               %[[VAL_34:.*]]:4 = fir.do_loop %[[VAL_35:.*]] = %[[VAL_7]] to %[[VAL_11]] step %[[VAL_7]] unordered iter_args(%[[VAL_36:.*]] = %[[VAL_30]], %[[VAL_37:.*]] = %[[VAL_31]], %[[VAL_38:.*]] = %[[VAL_32]], %[[VAL_39:.*]] = %[[VAL_33]]) -> (i32, i32, i32, f32) {
+// CHECK:                 %[[VAL_40:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_35]], %[[VAL_29]], %[[VAL_22]] : (!hlfir.expr<?x?x?xf32>, index, index, index) -> f32
+// CHECK:                 %[[VAL_41:.*]] = arith.cmpf olt, %[[VAL_40]], %[[VAL_39]] fastmath<reassoc> : f32
+// CHECK:                 %[[VAL_42:.*]] = arith.cmpf une, %[[VAL_39]], %[[VAL_39]] fastmath<reassoc> : f32
+// CHECK:                 %[[VAL_43:.*]] = arith.cmpf oeq, %[[VAL_40]], %[[VAL_40]] fastmath<reassoc> : f32
+// CHECK:                 %[[VAL_44:.*]] = arith.andi %[[VAL_42]], %[[VAL_43]] : i1
+// CHECK:                 %[[VAL_45:.*]] = arith.ori %[[VAL_41]], %[[VAL_44]] : i1
+// CHECK:                 %[[VAL_46:.*]] = fir.convert %[[VAL_35]] : (index) -> i32
+// CHECK:                 %[[VAL_47:.*]] = arith.select %[[VAL_45]], %[[VAL_46]], %[[VAL_36]] : i32
+// CHECK:                 %[[VAL_48:.*]] = fir.convert %[[VAL_29]] : (index) -> i32
+// CHECK:                 %[[VAL_49:.*]] = arith.select %[[VAL_45]], %[[VAL_48]], %[[VAL_37]] : i32
+// CHECK:                 %[[VAL_50:.*]] = fir.convert %[[VAL_22]] : (index) -> i32
+// CHECK:                 %[[VAL_51:.*]] = arith.select %[[VAL_45]], %[[VAL_50]], %[[VAL_38]] : i32
+// CHECK:                 %[[VAL_52:.*]] = arith.select %[[VAL_45]], %[[VAL_40]], %[[VAL_39]] : f32
+// CHECK:                 fir.result %[[VAL_47]], %[[VAL_49]], %[[VAL_51]], %[[VAL_52]] : i32, i32, i32, f32
+// CHECK:               }
+// CHECK:               fir.result %[[VAL_53:.*]]#0, %[[VAL_53]]#1, %[[VAL_53]]#2, %[[VAL_53]]#3 : i32, i32, i32, f32
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_54:.*]]#0, %[[VAL_54]]#1, %[[VAL_54]]#2, %[[VAL_54]]#3 : i32, i32, i32, f32
+// CHECK:           }
+// CHECK:           %[[VAL_55:.*]] = hlfir.designate %[[VAL_9]] (%[[VAL_7]])  : (!fir.ref<!fir.array<3xi32>>, index) -> !fir.ref<i32>
+// CHECK:           hlfir.assign %[[VAL_56:.*]]#0 to %[[VAL_55]] : i32, !fir.ref<i32>
+// CHECK:           %[[VAL_57:.*]] = hlfir.designate %[[VAL_9]] (%[[VAL_3]])  : (!fir.ref<!fir.array<3xi32>>, index) -> !fir.ref<i32>
+// CHECK:           hlfir.assign %[[VAL_56]]#1 to %[[VAL_57]] : i32, !fir.ref<i32>
+// CHECK:           %[[VAL_58:.*]] = hlfir.designate %[[VAL_9]] (%[[VAL_2]])  : (!fir.ref<!fir.array<3xi32>>, index) -> !fir.ref<i32>
+// CHECK:           hlfir.assign %[[VAL_56]]#2 to %[[VAL_58]] : i32, !fir.ref<i32>
+// CHECK:           %[[VAL_59:.*]] = hlfir.as_expr %[[VAL_9]] move %[[VAL_1]] : (!fir.ref<!fir.array<3xi32>>, i1) -> !hlfir.expr<3xi32>
+// CHECK:           return %[[VAL_59]] : !hlfir.expr<3xi32>
+// CHECK:         }
+
+func.func @test_partial_var_nomask(%input: !fir.box<!fir.array<?x?x?xf32>>) -> !hlfir.expr<?x?xi32> {
+  %dim = arith.constant 2 : i32
+  %0 = hlfir.minloc %input dim %dim {fastmath = #arith.fastmath<reassoc>} : (!fir.box<!fir.array<?x?x?xf32>>, i32) -> !hlfir.expr<?x?xi32>
+  return %0 : !hlfir.expr<?x?xi32>
+}
+// CHECK-LABEL:   func.func @test_partial_var_nomask(
+// CHECK-SAME:                                       %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box<!fir.array<?x?x?xf32>>) -> !hlfir.expr<?x?xi32> {
+// CHECK:           %[[VAL_1:.*]] = arith.constant 3.40282347E+38 : f32
+// CHECK:           %[[VAL_2:.*]] = arith.constant 0 : i32
+// CHECK:           %[[VAL_3:.*]] = arith.constant 1 : i32
+// CHECK:           %[[VAL_4:.*]] = arith.constant 2 : index
+// CHECK:           %[[VAL_5:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_6:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_7:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_6]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_8:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_5]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_9:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_10:.*]] = fir.shape %[[VAL_7]]#1, %[[VAL_9]]#1 : (index, index) -> !fir.shape<2>
+// CHECK:           %[[VAL_11:.*]] = hlfir.elemental %[[VAL_10]] unordered : (!fir.shape<2>) -> !hlfir.expr<?x?xi32> {
+// CHECK:           ^bb0(%[[VAL_12:.*]]: index, %[[VAL_13:.*]]: index):
+// CHECK:             %[[VAL_14:.*]] = arith.cmpi ne, %[[VAL_8]]#1, %[[VAL_6]] : index
+// CHECK:             %[[VAL_15:.*]]:2 = fir.if %[[VAL_14]] -> (i32, f32) {
+// CHECK:               %[[VAL_16:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_6]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_17:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_5]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_18:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_19:.*]] = arith.subi %[[VAL_16]]#0, %[[VAL_5]] : index
+// CHECK:               %[[VAL_20:.*]] = arith.addi %[[VAL_12]], %[[VAL_19]] : index
+// CHECK:               %[[VAL_21:.*]] = arith.subi %[[VAL_18]]#0, %[[VAL_5]] : index
+// CHECK:               %[[VAL_22:.*]] = arith.addi %[[VAL_13]], %[[VAL_21]] : index
+// CHECK:               %[[VAL_23:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_20]], %[[VAL_17]]#0, %[[VAL_22]])  : (!fir.box<!fir.array<?x?x?xf32>>, index, index, index) -> !fir.ref<f32>
+// CHECK:               %[[VAL_24:.*]] = fir.load %[[VAL_23]] : !fir.ref<f32>
+// CHECK:               fir.result %[[VAL_3]], %[[VAL_24]] : i32, f32
+// CHECK:             } else {
+// CHECK:               fir.result %[[VAL_2]], %[[VAL_1]] : i32, f32
+// CHECK:             }
+// CHECK:             %[[VAL_25:.*]]:2 = fir.do_loop %[[VAL_26:.*]] = %[[VAL_5]] to %[[VAL_8]]#1 step %[[VAL_5]] unordered iter_args(%[[VAL_27:.*]] = %[[VAL_28:.*]]#0, %[[VAL_29:.*]] = %[[VAL_28]]#1) -> (i32, f32) {
+// CHECK:               %[[VAL_30:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_6]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_31:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_5]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_32:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_33:.*]] = arith.subi %[[VAL_30]]#0, %[[VAL_5]] : index
+// CHECK:               %[[VAL_34:.*]] = arith.addi %[[VAL_12]], %[[VAL_33]] : index
+// CHECK:               %[[VAL_35:.*]] = arith.subi %[[VAL_31]]#0, %[[VAL_5]] : index
+// CHECK:               %[[VAL_36:.*]] = arith.addi %[[VAL_26]], %[[VAL_35]] : index
+// CHECK:               %[[VAL_37:.*]] = arith.subi %[[VAL_32]]#0, %[[VAL_5]] : index
+// CHECK:               %[[VAL_38:.*]] = arith.addi %[[VAL_13]], %[[VAL_37]] : index
+// CHECK:               %[[VAL_39:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_34]], %[[VAL_36]], %[[VAL_38]])  : (!fir.box<!fir.array<?x?x?xf32>>, index, index, index) -> !fir.ref<f32>
+// CHECK:               %[[VAL_40:.*]] = fir.load %[[VAL_39]] : !fir.ref<f32>
+// CHECK:               %[[VAL_41:.*]] = arith.cmpf olt, %[[VAL_40]], %[[VAL_29]] fastmath<reassoc> : f32
+// CHECK:               %[[VAL_42:.*]] = arith.cmpf une, %[[VAL_29]], %[[VAL_29]] fastmath<reassoc> : f32
+// CHECK:               %[[VAL_43:.*]] = arith.cmpf oeq, %[[VAL_40]], %[[VAL_40]] fastmath<reassoc> : f32
+// CHECK:               %[[VAL_44:.*]] = arith.andi %[[VAL_42]], %[[VAL_43]] : i1
+// CHECK:               %[[VAL_45:.*]] = arith.ori %[[VAL_41]], %[[VAL_44]] : i1
+// CHECK:               %[[VAL_46:.*]] = fir.convert %[[VAL_26]] : (index) -> i32
+// CHECK:               %[[VAL_47:.*]] = arith.select %[[VAL_45]], %[[VAL_46]], %[[VAL_27]] : i32
+// CHECK:               %[[VAL_48:.*]] = arith.select %[[VAL_45]], %[[VAL_40]], %[[VAL_29]] : f32
+// CHECK:               fir.result %[[VAL_47]], %[[VAL_48]] : i32, f32
+// CHECK:             }
+// CHECK:             hlfir.yield_element %[[VAL_49:.*]]#0 : i32
+// CHECK:           }
+// CHECK:           return %[[VAL_11]] : !hlfir.expr<?x?xi32>
+// CHECK:         }
+
 // Character comparisons are not supported yet.
 func.func @test_character(%input: !fir.box<!fir.array<?x!fir.char<1>>>) -> !hlfir.expr<1xi32> {
   %0 = hlfir.minloc %input : (!fir.box<!fir.array<?x!fir.char<1>>>) -> !hlfir.expr<1xi32>
diff --git a/flang/test/HLFIR/simplify-hlfir-intrinsics-minval.fir b/flang/test/HLFIR/simplify-hlfir-intrinsics-minval.fir
index 98e4c692fb72f..2c360f884a066 100644
--- a/flang/test/HLFIR/simplify-hlfir-intrinsics-minval.fir
+++ b/flang/test/HLFIR/simplify-hlfir-intrinsics-minval.fir
@@ -184,3 +184,87 @@ func.func @test_partial_var(%input: !fir.box<!fir.array<?x?xf16>>, %mask: !fir.b
 // CHECK:           }
 // CHECK:           return %[[VAL_10]] : !hlfir.expr<?xf16>
 // CHECK:         }
+
+func.func @test_partial_expr_nomask(%input: !hlfir.expr<?x?xf64>) -> !hlfir.expr<?xf64> {
+  %dim = arith.constant 1 : i32
+  %0 = hlfir.minval %input dim %dim {fastmath = #arith.fastmath<reassoc>} : (!hlfir.expr<?x?xf64>, i32) -> !hlfir.expr<?xf64>
+  return %0 : !hlfir.expr<?xf64>
+}
+// CHECK-LABEL:   func.func @test_partial_expr_nomask(
+// CHECK-SAME:                                        %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x?xf64>) -> !hlfir.expr<?xf64> {
+// CHECK:           %[[VAL_1:.*]] = arith.constant 1.7976931348623157E+308 : f64
+// CHECK:           %[[VAL_2:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_4:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?x?xf64>) -> !fir.shape<2>
+// CHECK:           %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 0 : index} : (!fir.shape<2>) -> index
+// CHECK:           %[[VAL_6:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 1 : index} : (!fir.shape<2>) -> index
+// CHECK:           %[[VAL_7:.*]] = fir.shape %[[VAL_6]] : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_8:.*]] = hlfir.elemental %[[VAL_7]] unordered : (!fir.shape<1>) -> !hlfir.expr<?xf64> {
+// CHECK:           ^bb0(%[[VAL_9:.*]]: index):
+// CHECK:             %[[VAL_10:.*]] = arith.cmpi ne, %[[VAL_5]], %[[VAL_3]] : index
+// CHECK:             %[[VAL_11:.*]] = fir.if %[[VAL_10]] -> (f64) {
+// CHECK:               %[[VAL_12:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_2]], %[[VAL_9]] : (!hlfir.expr<?x?xf64>, index, index) -> f64
+// CHECK:               fir.result %[[VAL_12]] : f64
+// CHECK:             } else {
+// CHECK:               fir.result %[[VAL_1]] : f64
+// CHECK:             }
+// CHECK:             %[[VAL_13:.*]] = fir.do_loop %[[VAL_14:.*]] = %[[VAL_2]] to %[[VAL_5]] step %[[VAL_2]] unordered iter_args(%[[VAL_15:.*]] = %[[VAL_11]]) -> (f64) {
+// CHECK:               %[[VAL_16:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_14]], %[[VAL_9]] : (!hlfir.expr<?x?xf64>, index, index) -> f64
+// CHECK:               %[[VAL_17:.*]] = arith.cmpf olt, %[[VAL_16]], %[[VAL_15]] fastmath<reassoc> : f64
+// CHECK:               %[[VAL_18:.*]] = arith.cmpf une, %[[VAL_15]], %[[VAL_15]] fastmath<reassoc> : f64
+// CHECK:               %[[VAL_19:.*]] = arith.cmpf oeq, %[[VAL_16]], %[[VAL_16]] fastmath<reassoc> : f64
+// CHECK:               %[[VAL_20:.*]] = arith.andi %[[VAL_18]], %[[VAL_19]] : i1
+// CHECK:               %[[VAL_21:.*]] = arith.ori %[[VAL_17]], %[[VAL_20]] : i1
+// CHECK:               %[[VAL_22:.*]] = arith.select %[[VAL_21]], %[[VAL_16]], %[[VAL_15]] : f64
+// CHECK:               fir.result %[[VAL_22]] : f64
+// CHECK:             }
+// CHECK:             hlfir.yield_element %[[VAL_13]] : f64
+// CHECK:           }
+// CHECK:           return %[[VAL_8]] : !hlfir.expr<?xf64>
+// CHECK:         }
+
+func.func @test_total_var_nomask(%input: !fir.box<!fir.array<?x?xf16>>) -> f16 {
+  %0 = hlfir.minval %input {fastmath = #arith.fastmath<reassoc>} : (!fir.box<!fir.array<?x?xf16>>) -> f16
+  return %0 : f16
+}
+// CHECK-LABEL:   func.func @test_total_var_nomask(
+// CHECK-SAME:                                     %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box<!fir.array<?x?xf16>>) -> f16 {
+// CHECK:           %[[VAL_1:.*]] = arith.constant 6.550400e+04 : f16
+// CHECK:           %[[VAL_2:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_4:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_5:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_2]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_6:.*]] = arith.cmpi ne, %[[VAL_4]]#1, %[[VAL_3]] : index
+// CHECK:           %[[VAL_7:.*]] = arith.cmpi ne, %[[VAL_5]]#1, %[[VAL_3]] : index
+// CHECK:           %[[VAL_8:.*]] = arith.andi %[[VAL_6]], %[[VAL_7]] : i1
+// CHECK:           %[[VAL_9:.*]] = fir.if %[[VAL_8]] -> (f16) {
+// CHECK:             %[[VAL_10:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:             %[[VAL_11:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_2]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:             %[[VAL_12:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_10]]#0, %[[VAL_11]]#0)  : (!fir.box<!fir.array<?x?xf16>>, index, index) -> !fir.ref<f16>
+// CHECK:             %[[VAL_13:.*]] = fir.load %[[VAL_12]] : !fir.ref<f16>
+// CHECK:             fir.result %[[VAL_13]] : f16
+// CHECK:           } else {
+// CHECK:             fir.result %[[VAL_1]] : f16
+// CHECK:           }
+// CHECK:           %[[VAL_14:.*]] = fir.do_loop %[[VAL_15:.*]] = %[[VAL_2]] to %[[VAL_5]]#1 step %[[VAL_2]] unordered iter_args(%[[VAL_16:.*]] = %[[VAL_9]]) -> (f16) {
+// CHECK:             %[[VAL_17:.*]] = fir.do_loop %[[VAL_18:.*]] = %[[VAL_2]] to %[[VAL_4]]#1 step %[[VAL_2]] unordered iter_args(%[[VAL_19:.*]] = %[[VAL_16]]) -> (f16) {
+// CHECK:               %[[VAL_20:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_21:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_2]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_22:.*]] = arith.subi %[[VAL_20]]#0, %[[VAL_2]] : index
+// CHECK:               %[[VAL_23:.*]] = arith.addi %[[VAL_18]], %[[VAL_22]] : index
+// CHECK:               %[[VAL_24:.*]] = arith.subi %[[VAL_21]]#0, %[[VAL_2]] : index
+// CHECK:               %[[VAL_25:.*]] = arith.addi %[[VAL_15]], %[[VAL_24]] : index
+// CHECK:               %[[VAL_26:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_23]], %[[VAL_25]])  : (!fir.box<!fir.array<?x?xf16>>, index, index) -> !fir.ref<f16>
+// CHECK:               %[[VAL_27:.*]] = fir.load %[[VAL_26]] : !fir.ref<f16>
+// CHECK:               %[[VAL_28:.*]] = arith.cmpf olt, %[[VAL_27]], %[[VAL_19]] fastmath<reassoc> : f16
+// CHECK:               %[[VAL_29:.*]] = arith.cmpf une, %[[VAL_19]], %[[VAL_19]] fastmath<reassoc> : f16
+// CHECK:               %[[VAL_30:.*]] = arith.cmpf oeq, %[[VAL_27]], %[[VAL_27]] fastmath<reassoc> : f16
+// CHECK:               %[[VAL_31:.*]] = arith.andi %[[VAL_29]], %[[VAL_30]] : i1
+// CHECK:               %[[VAL_32:.*]] = arith.ori %[[VAL_28]], %[[VAL_31]] : i1
+// CHECK:               %[[VAL_33:.*]] = arith.select %[[VAL_32]], %[[VAL_27]], %[[VAL_19]] : f16
+// CHECK:               fir.result %[[VAL_33]] : f16
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_17]] : f16
+// CHECK:           }
+// CHECK:           return %[[VAL_14]] : f16
+// CHECK:         }