[flang-commits] [flang] 7dad8b9 - [flang] Fetch the initial reduction value from the input array. (#136790)

Wed Apr 30 13:53:29 PDT 2025

Author: Slava Zakharin
Date: 2025-04-30T13:53:26-07:00
New Revision: 7dad8b91bc94741034052a4eb06ef45e7cb47c06

URL: https://github.com/llvm/llvm-project/commit/7dad8b91bc94741034052a4eb06ef45e7cb47c06
DIFF: https://github.com/llvm/llvm-project/commit/7dad8b91bc94741034052a4eb06ef45e7cb47c06.diff

LOG: [flang] Fetch the initial reduction value from the input array. (#136790)

Instead of using loop-carried IsFirst predicate, we can fetch
the initial reduction values for MIN/MAX LOC/VAL reductions
from the array itself. This results in a little bit cleaner
loop nests, especially, generated for total reductions.
Otherwise, LLVM is able to peel the first iteration of the innermost
loop, but the surroudings of the peeled code are executed
multiple times withing the outer loop(s).

This patch does the manual peeling, which only works for
non-masked reductions where the input array is not empty.

Added: 
    

Modified: 
    flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
    flang/test/HLFIR/simplify-hlfir-intrinsics-maxloc.fir
    flang/test/HLFIR/simplify-hlfir-intrinsics-maxval.fir
    flang/test/HLFIR/simplify-hlfir-intrinsics-minloc.fir
    flang/test/HLFIR/simplify-hlfir-intrinsics-minval.fir

Removed: 
    


################################################################################
diff  --git a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
index 1dea7d89e180d..79582390d1294 100644

--- a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
@@ -232,7 +232,17 @@ class ReductionAsElementalConverter {
   /// by the reduction loop. In general, there is a single
   /// loop-carried reduction value (e.g. for SUM), but, for example,
   /// MAXLOC/MINLOC implementation uses multiple reductions.
-  virtual llvm::SmallVector<mlir::Value> genReductionInitValues() = 0;
+  /// \p oneBasedIndices contains any array indices predefined
+  /// before the reduction loop, i.e. it is empty for total
+  /// reductions, and contains the one-based indices of the wrapping
+  /// hlfir.elemental.
+  /// \p extents are the pre-computed extents of the input array.
+  /// For total reductions, \p extents holds extents of all dimensions.
+  /// For partial reductions, \p extents holds a single extent
+  /// of the DIM dimension.
+  virtual llvm::SmallVector<mlir::Value>
+  genReductionInitValues(mlir::ValueRange oneBasedIndices,
+                         const llvm::SmallVectorImpl<mlir::Value> &extents) = 0;
 
   /// Perform reduction(s) update given a single input array's element
   /// identified by \p array and \p oneBasedIndices coordinates.
@@ -396,6 +406,54 @@ genMinMaxComparison(mlir::Location loc, fir::FirOpBuilder &builder,
   llvm_unreachable("unsupported type");
 }
 
+// Generate a predicate value indicating that an array with the given
+// extents is not empty.
+static mlir::Value
+genIsNotEmptyArrayExtents(mlir::Location loc, fir::FirOpBuilder &builder,
+                          const llvm::SmallVectorImpl<mlir::Value> &extents) {
+  mlir::Value isNotEmpty = builder.createBool(loc, true);
+  for (auto extent : extents) {
+    mlir::Value zero =
+        fir::factory::createZeroValue(builder, loc, extent.getType());
+    mlir::Value cmp = builder.create<mlir::arith::CmpIOp>(
+        loc, mlir::arith::CmpIPredicate::ne, extent, zero);
+    isNotEmpty = builder.create<mlir::arith::AndIOp>(loc, isNotEmpty, cmp);
+  }
+  return isNotEmpty;
+}
+
+// Helper method for MIN/MAX LOC/VAL reductions.
+// It returns a vector of indices such that they address
+// the first element of an array (in case of total reduction)
+// or its section (in case of partial reduction).
+//
+// If case of total reduction oneBasedIndices must be empty,
+// otherwise, they contain the one based indices of the wrapping
+// hlfir.elemental.
+// Basically, the method adds the necessary number of constant-one
+// indices into oneBasedIndices.
+static llvm::SmallVector<mlir::Value> genFirstElementIndicesForReduction(
+    mlir::Location loc, fir::FirOpBuilder &builder, bool isTotalReduction,
+    mlir::FailureOr<int64_t> dim, unsigned rank,
+    mlir::ValueRange oneBasedIndices) {
+  llvm::SmallVector<mlir::Value> indices{oneBasedIndices};
+  mlir::Value one =
+      builder.createIntegerConstant(loc, builder.getIndexType(), 1);
+  if (isTotalReduction) {
+    assert(oneBasedIndices.size() == 0 &&
+           "wrong number of indices for total reduction");
+    // Set indices to all-ones.
+    indices.append(rank, one);
+  } else {
+    assert(oneBasedIndices.size() == rank - 1 &&
+           "there must be RANK-1 indices for partial reduction");
+    assert(mlir::succeeded(dim) && "partial reduction with invalid DIM");
+    // Insert constant-one index at DIM dimension.
+    indices.insert(indices.begin() + *dim - 1, one);
+  }
+  return indices;
+}
+
 /// Implementation of ReductionAsElementalConverter interface
 /// for MAXLOC/MINLOC.
 template <typename T>
@@ -410,6 +468,9 @@ class MinMaxlocAsElementalConverter : public ReductionAsElementalConverter {
   //   * 1 reduction value holding the current MIN/MAX.
   //   * 1 boolean indicating whether it is the first time
   //     the mask is true.
+  //
+  // If useIsFirst() returns false, then the boolean loop-carried
+  // value is not used.
   static constexpr unsigned maxNumReductions = Fortran::common::maxRank + 2;
   static constexpr bool isMax = std::is_same_v<T, hlfir::MaxlocOp>;
   using Base = ReductionAsElementalConverter;
@@ -444,7 +505,9 @@ class MinMaxlocAsElementalConverter : public ReductionAsElementalConverter {
     return getResultRank() == 0 || !getDim();
   }
 
-  virtual llvm::SmallVector<mlir::Value> genReductionInitValues() final;
+  virtual llvm::SmallVector<mlir::Value> genReductionInitValues(
+      mlir::ValueRange oneBasedIndices,
+      const llvm::SmallVectorImpl<mlir::Value> &extents) final;
   virtual llvm::SmallVector<mlir::Value>
   reduceOneElement(const llvm::SmallVectorImpl<mlir::Value> &currentValue,
                    hlfir::Entity array, mlir::ValueRange oneBasedIndices) final;
@@ -460,8 +523,12 @@ class MinMaxlocAsElementalConverter : public ReductionAsElementalConverter {
 
   void
   checkReductions(const llvm::SmallVectorImpl<mlir::Value> &reductions) const {
-    assert(reductions.size() == getNumCoors() + 2 &&
-           "invalid number of reductions for MINLOC/MAXLOC");
+    if (!useIsFirst())
+      assert(reductions.size() == getNumCoors() + 1 &&
+             "invalid number of reductions for MINLOC/MAXLOC");
+    else
+      assert(reductions.size() == getNumCoors() + 2 &&
+             "invalid number of reductions for MINLOC/MAXLOC");
   }
 
   mlir::Value
@@ -473,13 +540,62 @@ class MinMaxlocAsElementalConverter : public ReductionAsElementalConverter {
   mlir::Value
   getIsFirst(const llvm::SmallVectorImpl<mlir::Value> &reductions) const {
     checkReductions(reductions);
+    assert(useIsFirst() && "IsFirst predicate must not be used");
     return reductions[getNumCoors() + 1];
   }
+
+  // Return true iff the input can contain NaNs, and they should be
+  // honored, such that all-NaNs input must produce the location
+  // of the first unmasked NaN.
+  bool honorNans() const {
+    return !static_cast<bool>(getFastMath() & mlir::arith::FastMathFlags::nnan);
+  }
+
+  // Return true iff we have to use the loop-carried IsFirst predicate.
+  // If there is no mask, we can initialize the reductions using
+  // the first elements of the input.
+  // If NaNs are not honored, we can initialize the starting MIN/MAX
+  // value to +/-LARGEST; the coordinates are guaranteed to be updated
+  // properly for non-empty input without NaNs.
+  bool useIsFirst() const { return getMask() && honorNans(); }
 };
 
 template <typename T>
 llvm::SmallVector<mlir::Value>
-MinMaxlocAsElementalConverter<T>::genReductionInitValues() {
+MinMaxlocAsElementalConverter<T>::genReductionInitValues(
+    mlir::ValueRange oneBasedIndices,
+    const llvm::SmallVectorImpl<mlir::Value> &extents) {
+  fir::IfOp ifOp;
+  if (!useIsFirst() && honorNans()) {
+    // Check if we can load the value of the first element in the array
+    // or its section (for partial reduction).
+    assert(!getMask() && "cannot fetch first element when mask is present");
+    assert(extents.size() == getNumCoors() &&
+           "wrong number of extents for MINLOC/MAXLOC reduction");
+    mlir::Value isNotEmpty = genIsNotEmptyArrayExtents(loc, builder, extents);
+
+    llvm::SmallVector<mlir::Value> indices = genFirstElementIndicesForReduction(
+        loc, builder, isTotalReduction(), getConstDim(), getSourceRank(),
+        oneBasedIndices);
+
+    llvm::SmallVector<mlir::Type> ifTypes(getNumCoors(),
+                                          getResultElementType());
+    ifTypes.push_back(getSourceElementType());
+    ifOp = builder.create<fir::IfOp>(loc, ifTypes, isNotEmpty,
+                                     /*withElseRegion=*/true);
+    builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
+    mlir::Value one =
+        builder.createIntegerConstant(loc, getResultElementType(), 1);
+    llvm::SmallVector<mlir::Value> results(getNumCoors(), one);
+    mlir::Value minMaxFirst =
+        hlfir::loadElementAt(loc, builder, hlfir::Entity{getSource()}, indices);
+    results.push_back(minMaxFirst);
+    builder.create<fir::ResultOp>(loc, results);
+
+    // In the 'else' block use default init values.
+    builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
+  }
+
   // Initial value for the coordinate(s) is zero.
   mlir::Value zeroCoor =
       fir::factory::createZeroValue(builder, loc, getResultElementType());
@@ -490,11 +606,17 @@ MinMaxlocAsElementalConverter<T>::genReductionInitValues() {
       genMinMaxInitValue<isMax>(loc, builder, getSourceElementType());
   result.push_back(minMaxInit);
 
-  // Initial value for isFirst predicate. It is switched to false,
-  // when the reduction update dynamically happens inside the reduction
-  // loop.
-  mlir::Value trueVal = builder.createBool(loc, true);
-  result.push_back(trueVal);
+  if (ifOp) {
+    builder.create<fir::ResultOp>(loc, result);
+    builder.setInsertionPointAfter(ifOp);
+    result = ifOp.getResults();
+  } else if (useIsFirst()) {
+    // Initial value for isFirst predicate. It is switched to false,
+    // when the reduction update dynamically happens inside the reduction
+    // loop.
+    mlir::Value trueVal = builder.createBool(loc, true);
+    result.push_back(trueVal);
+  }
 
   return result;
 }
@@ -509,9 +631,12 @@ MinMaxlocAsElementalConverter<T>::reduceOneElement(
       hlfir::loadElementAt(loc, builder, array, oneBasedIndices);
   mlir::Value cmp = genMinMaxComparison<isMax>(loc, builder, elementValue,
                                                getCurrentMinMax(currentValue));
-  // If isFirst is true, then do the reduction update regardless
-  // of the FP comparison.
-  cmp = builder.create<mlir::arith::OrIOp>(loc, cmp, getIsFirst(currentValue));
+  if (useIsFirst()) {
+    // If isFirst is true, then do the reduction update regardless
+    // of the FP comparison.
+    cmp =
+        builder.create<mlir::arith::OrIOp>(loc, cmp, getIsFirst(currentValue));
+  }
 
   llvm::SmallVector<mlir::Value> newIndices;
   int64_t dim = 1;
@@ -537,8 +662,10 @@ MinMaxlocAsElementalConverter<T>::reduceOneElement(
       loc, cmp, elementValue, getCurrentMinMax(currentValue));
   newIndices.push_back(newMinMax);
 
-  mlir::Value newIsFirst = builder.createBool(loc, false);
-  newIndices.push_back(newIsFirst);
+  if (useIsFirst()) {
+    mlir::Value newIsFirst = builder.createBool(loc, false);
+    newIndices.push_back(newIsFirst);
+  }
 
   assert(currentValue.size() == newIndices.size() &&
          "invalid number of updated reductions");
@@ -629,7 +756,8 @@ class MinMaxvalAsElementalConverter
   //
   // The boolean flag is used to replace the initial value
   // with the first input element even if it is NaN.
-  static constexpr unsigned numReductions = 2;
+  // If useIsFirst() returns false, then the boolean loop-carried
+  // value is not used.
   static constexpr bool isMax = std::is_same_v<T, hlfir::MaxvalOp>;
   using Base = NumericReductionAsElementalConverterBase<T>;
 
@@ -646,19 +774,9 @@ class MinMaxvalAsElementalConverter
     return mlir::success();
   }
 
-  virtual llvm::SmallVector<mlir::Value> genReductionInitValues() final {
-    llvm::SmallVector<mlir::Value> result;
-    fir::FirOpBuilder &builder = this->builder;
-    mlir::Location loc = this->loc;
-    mlir::Value init =
-        genMinMaxInitValue<isMax>(loc, builder, this->getResultElementType());
-    result.push_back(init);
-    // Initial value for isFirst predicate. It is switched to false,
-    // when the reduction update dynamically happens inside the reduction
-    // loop.
-    result.push_back(builder.createBool(loc, true));
-    return result;
-  }
+  virtual llvm::SmallVector<mlir::Value> genReductionInitValues(
+      mlir::ValueRange oneBasedIndices,
+      const llvm::SmallVectorImpl<mlir::Value> &extents) final;
 
   virtual llvm::SmallVector<mlir::Value>
   reduceOneElement(const llvm::SmallVectorImpl<mlir::Value> &currentValue,
@@ -673,12 +791,14 @@ class MinMaxvalAsElementalConverter
     mlir::Value currentMinMax = getCurrentMinMax(currentValue);
     mlir::Value cmp =
         genMinMaxComparison<isMax>(loc, builder, elementValue, currentMinMax);
-    cmp =
-        builder.create<mlir::arith::OrIOp>(loc, cmp, getIsFirst(currentValue));
+    if (useIsFirst())
+      cmp = builder.create<mlir::arith::OrIOp>(loc, cmp,
+                                               getIsFirst(currentValue));
     mlir::Value newMinMax = builder.create<mlir::arith::SelectOp>(
         loc, cmp, elementValue, currentMinMax);
     result.push_back(newMinMax);
-    result.push_back(builder.createBool(loc, false));
+    if (useIsFirst())
+      result.push_back(builder.createBool(loc, false));
     return result;
   }
 
@@ -690,7 +810,7 @@ class MinMaxvalAsElementalConverter
 
   void
   checkReductions(const llvm::SmallVectorImpl<mlir::Value> &reductions) const {
-    assert(reductions.size() == numReductions &&
+    assert(reductions.size() == getNumReductions() &&
            "invalid number of reductions for MINVAL/MAXVAL");
   }
 
@@ -703,10 +823,80 @@ class MinMaxvalAsElementalConverter
   mlir::Value
   getIsFirst(const llvm::SmallVectorImpl<mlir::Value> &reductions) const {
     this->checkReductions(reductions);
+    assert(useIsFirst() && "IsFirst predicate must not be used");
     return reductions[1];
   }
+
+  // Return true iff the input can contain NaNs, and they should be
+  // honored, such that all-NaNs input must produce NaN result.
+  bool honorNans() const {
+    return !static_cast<bool>(this->getFastMath() &
+                              mlir::arith::FastMathFlags::nnan);
+  }
+
+  // Return true iff we have to use the loop-carried IsFirst predicate.
+  // If there is no mask, we can initialize the reductions using
+  // the first elements of the input.
+  // If NaNs are not honored, we can initialize the starting MIN/MAX
+  // value to +/-LARGEST.
+  bool useIsFirst() const { return this->getMask() && honorNans(); }
+
+  std::size_t getNumReductions() const { return useIsFirst() ? 2 : 1; }
 };
 
+template <typename T>
+llvm::SmallVector<mlir::Value>
+MinMaxvalAsElementalConverter<T>::genReductionInitValues(
+    mlir::ValueRange oneBasedIndices,
+    const llvm::SmallVectorImpl<mlir::Value> &extents) {
+  llvm::SmallVector<mlir::Value> result;
+  fir::FirOpBuilder &builder = this->builder;
+  mlir::Location loc = this->loc;
+
+  fir::IfOp ifOp;
+  if (!useIsFirst() && honorNans()) {
+    // Check if we can load the value of the first element in the array
+    // or its section (for partial reduction).
+    assert(!this->getMask() &&
+           "cannot fetch first element when mask is present");
+    assert(extents.size() ==
+               (this->isTotalReduction() ? this->getSourceRank() : 1u) &&
+           "wrong number of extents for MINVAL/MAXVAL reduction");
+    mlir::Value isNotEmpty = genIsNotEmptyArrayExtents(loc, builder, extents);
+    llvm::SmallVector<mlir::Value> indices = genFirstElementIndicesForReduction(
+        loc, builder, this->isTotalReduction(), this->getConstDim(),
+        this->getSourceRank(), oneBasedIndices);
+
+    ifOp =
+        builder.create<fir::IfOp>(loc, this->getResultElementType(), isNotEmpty,
+                                  /*withElseRegion=*/true);
+    builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
+    mlir::Value minMaxFirst = hlfir::loadElementAt(
+        loc, builder, hlfir::Entity{this->getSource()}, indices);
+    builder.create<fir::ResultOp>(loc, minMaxFirst);
+
+    // In the 'else' block use default init values.
+    builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
+  }
+
+  mlir::Value init =
+      genMinMaxInitValue<isMax>(loc, builder, this->getResultElementType());
+  result.push_back(init);
+
+  if (ifOp) {
+    builder.create<fir::ResultOp>(loc, result);
+    builder.setInsertionPointAfter(ifOp);
+    result = ifOp.getResults();
+  } else if (useIsFirst()) {
+    // Initial value for isFirst predicate. It is switched to false,
+    // when the reduction update dynamically happens inside the reduction
+    // loop.
+    result.push_back(builder.createBool(loc, true));
+  }
+
+  return result;
+}
+
 /// Reduction converter for SUM.
 class SumAsElementalConverter
     : public NumericReductionAsElementalConverterBase<hlfir::SumOp> {
@@ -717,7 +907,10 @@ class SumAsElementalConverter
       : Base{op, rewriter} {}
 
 private:
-  virtual llvm::SmallVector<mlir::Value> genReductionInitValues() final {
+  virtual llvm::SmallVector<mlir::Value> genReductionInitValues(
+      [[maybe_unused]] mlir::ValueRange oneBasedIndices,
+      [[maybe_unused]] const llvm::SmallVectorImpl<mlir::Value> &extents)
+      final {
     return {
         fir::factory::createZeroValue(builder, loc, getResultElementType())};
   }
@@ -781,7 +974,10 @@ class AllAnyAsElementalConverter
       : Base{op, rewriter} {}
 
 private:
-  virtual llvm::SmallVector<mlir::Value> genReductionInitValues() final {
+  virtual llvm::SmallVector<mlir::Value> genReductionInitValues(
+      [[maybe_unused]] mlir::ValueRange oneBasedIndices,
+      [[maybe_unused]] const llvm::SmallVectorImpl<mlir::Value> &extents)
+      final {
     return {this->builder.createBool(this->loc, isAll ? true : false)};
   }
   virtual llvm::SmallVector<mlir::Value>
@@ -819,7 +1015,10 @@ class CountAsElementalConverter
       : Base{op, rewriter} {}
 
 private:
-  virtual llvm::SmallVector<mlir::Value> genReductionInitValues() final {
+  virtual llvm::SmallVector<mlir::Value> genReductionInitValues(
+      [[maybe_unused]] mlir::ValueRange oneBasedIndices,
+      [[maybe_unused]] const llvm::SmallVectorImpl<mlir::Value> &extents)
+      final {
     return {
         fir::factory::createZeroValue(builder, loc, getResultElementType())};
   }
@@ -881,10 +1080,6 @@ mlir::LogicalResult ReductionAsElementalConverter::convert() {
     // Loop over all indices in the DIM dimension, and reduce all values.
     // If DIM is not present, do total reduction.
 
-    // Initial value for the reduction.
-    llvm::SmallVector<mlir::Value, 1> reductionInitValues =
-        genReductionInitValues();
-
     llvm::SmallVector<mlir::Value> extents;
     if (isTotalReduce)
       extents = arrayExtents;
@@ -892,6 +1087,10 @@ mlir::LogicalResult ReductionAsElementalConverter::convert() {
       extents.push_back(
           builder.createConvert(loc, builder.getIndexType(), dimExtent));
 
+    // Initial value for the reduction.
+    llvm::SmallVector<mlir::Value, 1> reductionInitValues =
+        genReductionInitValues(inputIndices, extents);
+
     auto genBody = [&](mlir::Location loc, fir::FirOpBuilder &builder,
                        mlir::ValueRange oneBasedIndices,
                        mlir::ValueRange reductionArgs)

diff  --git a/flang/test/HLFIR/simplify-hlfir-intrinsics-maxloc.fir b/flang/test/HLFIR/simplify-hlfir-intrinsics-maxloc.fir
index 4e9f5d0ebb08a..b285945027afb 100644
--- a/flang/test/HLFIR/simplify-hlfir-intrinsics-maxloc.fir
+++ b/flang/test/HLFIR/simplify-hlfir-intrinsics-maxloc.fir
@@ -294,6 +294,179 @@ func.func @test_partial_var(%input: !fir.box<!fir.array<?x?x?xf32>>, %mask: !fir
 // CHECK:           return %[[VAL_14]] : !hlfir.expr<?x?xi32>
 // CHECK:         }
 
+func.func @test_total_expr_nomask(%input: !hlfir.expr<?x?x?xf32>) -> !hlfir.expr<3xi32> {
+  %0 = hlfir.maxloc %input {fastmath = #arith.fastmath<reassoc>} : (!hlfir.expr<?x?x?xf32>) -> !hlfir.expr<3xi32>
+  return %0 : !hlfir.expr<3xi32>
+}
+// CHECK-LABEL:   func.func @test_total_expr_nomask(
+// CHECK-SAME:                                      %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x?x?xf32>) -> !hlfir.expr<3xi32> {
+// CHECK:           %[[VAL_1:.*]] = arith.constant false
+// CHECK:           %[[VAL_2:.*]] = arith.constant 3 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant 2 : index
+// CHECK:           %[[VAL_4:.*]] = arith.constant -3.40282347E+38 : f32
+// CHECK:           %[[VAL_5:.*]] = arith.constant 0 : i32
+// CHECK:           %[[VAL_6:.*]] = arith.constant 1 : i32
+// CHECK:           %[[VAL_7:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_8:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_9:.*]] = fir.alloca !fir.array<3xi32>
+// CHECK:           %[[VAL_10:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?x?x?xf32>) -> !fir.shape<3>
+// CHECK:           %[[VAL_11:.*]] = hlfir.get_extent %[[VAL_10]] {dim = 0 : index} : (!fir.shape<3>) -> index
+// CHECK:           %[[VAL_12:.*]] = hlfir.get_extent %[[VAL_10]] {dim = 1 : index} : (!fir.shape<3>) -> index
+// CHECK:           %[[VAL_13:.*]] = hlfir.get_extent %[[VAL_10]] {dim = 2 : index} : (!fir.shape<3>) -> index
+// CHECK:           %[[VAL_14:.*]] = arith.cmpi ne, %[[VAL_11]], %[[VAL_8]] : index
+// CHECK:           %[[VAL_15:.*]] = arith.cmpi ne, %[[VAL_12]], %[[VAL_8]] : index
+// CHECK:           %[[VAL_16:.*]] = arith.andi %[[VAL_14]], %[[VAL_15]] : i1
+// CHECK:           %[[VAL_17:.*]] = arith.cmpi ne, %[[VAL_13]], %[[VAL_8]] : index
+// CHECK:           %[[VAL_18:.*]] = arith.andi %[[VAL_16]], %[[VAL_17]] : i1
+// CHECK:           %[[VAL_19:.*]]:4 = fir.if %[[VAL_18]] -> (i32, i32, i32, f32) {
+// CHECK:             %[[VAL_20:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_7]], %[[VAL_7]], %[[VAL_7]] : (!hlfir.expr<?x?x?xf32>, index, index, index) -> f32
+// CHECK:             fir.result %[[VAL_6]], %[[VAL_6]], %[[VAL_6]], %[[VAL_20]] : i32, i32, i32, f32
+// CHECK:           } else {
+// CHECK:             fir.result %[[VAL_5]], %[[VAL_5]], %[[VAL_5]], %[[VAL_4]] : i32, i32, i32, f32
+// CHECK:           }
+// CHECK:           %[[VAL_21:.*]]:4 = fir.do_loop %[[VAL_22:.*]] = %[[VAL_7]] to %[[VAL_13]] step %[[VAL_7]] unordered iter_args(%[[VAL_23:.*]] = %[[VAL_24:.*]]#0, %[[VAL_25:.*]] = %[[VAL_24]]#1, %[[VAL_26:.*]] = %[[VAL_24]]#2, %[[VAL_27:.*]] = %[[VAL_24]]#3) -> (i32, i32, i32, f32) {
+// CHECK:             %[[VAL_28:.*]]:4 = fir.do_loop %[[VAL_29:.*]] = %[[VAL_7]] to %[[VAL_12]] step %[[VAL_7]] unordered iter_args(%[[VAL_30:.*]] = %[[VAL_23]], %[[VAL_31:.*]] = %[[VAL_25]], %[[VAL_32:.*]] = %[[VAL_26]], %[[VAL_33:.*]] = %[[VAL_27]]) -> (i32, i32, i32, f32) {
+// CHECK:               %[[VAL_34:.*]]:4 = fir.do_loop %[[VAL_35:.*]] = %[[VAL_7]] to %[[VAL_11]] step %[[VAL_7]] unordered iter_args(%[[VAL_36:.*]] = %[[VAL_30]], %[[VAL_37:.*]] = %[[VAL_31]], %[[VAL_38:.*]] = %[[VAL_32]], %[[VAL_39:.*]] = %[[VAL_33]]) -> (i32, i32, i32, f32) {
+// CHECK:                 %[[VAL_40:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_35]], %[[VAL_29]], %[[VAL_22]] : (!hlfir.expr<?x?x?xf32>, index, index, index) -> f32
+// CHECK:                 %[[VAL_41:.*]] = arith.cmpf ogt, %[[VAL_40]], %[[VAL_39]] fastmath<reassoc> : f32
+// CHECK:                 %[[VAL_42:.*]] = arith.cmpf une, %[[VAL_39]], %[[VAL_39]] fastmath<reassoc> : f32
+// CHECK:                 %[[VAL_43:.*]] = arith.cmpf oeq, %[[VAL_40]], %[[VAL_40]] fastmath<reassoc> : f32
+// CHECK:                 %[[VAL_44:.*]] = arith.andi %[[VAL_42]], %[[VAL_43]] : i1
+// CHECK:                 %[[VAL_45:.*]] = arith.ori %[[VAL_41]], %[[VAL_44]] : i1
+// CHECK:                 %[[VAL_46:.*]] = fir.convert %[[VAL_35]] : (index) -> i32
+// CHECK:                 %[[VAL_47:.*]] = arith.select %[[VAL_45]], %[[VAL_46]], %[[VAL_36]] : i32
+// CHECK:                 %[[VAL_48:.*]] = fir.convert %[[VAL_29]] : (index) -> i32
+// CHECK:                 %[[VAL_49:.*]] = arith.select %[[VAL_45]], %[[VAL_48]], %[[VAL_37]] : i32
+// CHECK:                 %[[VAL_50:.*]] = fir.convert %[[VAL_22]] : (index) -> i32
+// CHECK:                 %[[VAL_51:.*]] = arith.select %[[VAL_45]], %[[VAL_50]], %[[VAL_38]] : i32
+// CHECK:                 %[[VAL_52:.*]] = arith.select %[[VAL_45]], %[[VAL_40]], %[[VAL_39]] : f32
+// CHECK:                 fir.result %[[VAL_47]], %[[VAL_49]], %[[VAL_51]], %[[VAL_52]] : i32, i32, i32, f32
+// CHECK:               }
+// CHECK:               fir.result %[[VAL_53:.*]]#0, %[[VAL_53]]#1, %[[VAL_53]]#2, %[[VAL_53]]#3 : i32, i32, i32, f32
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_54:.*]]#0, %[[VAL_54]]#1, %[[VAL_54]]#2, %[[VAL_54]]#3 : i32, i32, i32, f32
+// CHECK:           }
+// CHECK:           %[[VAL_55:.*]] = hlfir.designate %[[VAL_9]] (%[[VAL_7]])  : (!fir.ref<!fir.array<3xi32>>, index) -> !fir.ref<i32>
+// CHECK:           hlfir.assign %[[VAL_56:.*]]#0 to %[[VAL_55]] : i32, !fir.ref<i32>
+// CHECK:           %[[VAL_57:.*]] = hlfir.designate %[[VAL_9]] (%[[VAL_3]])  : (!fir.ref<!fir.array<3xi32>>, index) -> !fir.ref<i32>
+// CHECK:           hlfir.assign %[[VAL_56]]#1 to %[[VAL_57]] : i32, !fir.ref<i32>
+// CHECK:           %[[VAL_58:.*]] = hlfir.designate %[[VAL_9]] (%[[VAL_2]])  : (!fir.ref<!fir.array<3xi32>>, index) -> !fir.ref<i32>
+// CHECK:           hlfir.assign %[[VAL_56]]#2 to %[[VAL_58]] : i32, !fir.ref<i32>
+// CHECK:           %[[VAL_59:.*]] = hlfir.as_expr %[[VAL_9]] move %[[VAL_1]] : (!fir.ref<!fir.array<3xi32>>, i1) -> !hlfir.expr<3xi32>
+// CHECK:           return %[[VAL_59]] : !hlfir.expr<3xi32>
+// CHECK:         }
+
+func.func @test_partial_var_nomask(%input: !fir.box<!fir.array<?x?x?xf32>>) -> !hlfir.expr<?x?xi32> {
+  %dim = arith.constant 2 : i32
+  %0 = hlfir.maxloc %input dim %dim {fastmath = #arith.fastmath<reassoc>} : (!fir.box<!fir.array<?x?x?xf32>>, i32) -> !hlfir.expr<?x?xi32>
+  return %0 : !hlfir.expr<?x?xi32>
+}
+// CHECK-LABEL:   func.func @test_partial_var_nomask(
+// CHECK-SAME:                                       %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box<!fir.array<?x?x?xf32>>) -> !hlfir.expr<?x?xi32> {
+// CHECK:           %[[VAL_1:.*]] = arith.constant -3.40282347E+38 : f32
+// CHECK:           %[[VAL_2:.*]] = arith.constant 0 : i32
+// CHECK:           %[[VAL_3:.*]] = arith.constant 1 : i32
+// CHECK:           %[[VAL_4:.*]] = arith.constant 2 : index
+// CHECK:           %[[VAL_5:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_6:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_7:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_6]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_8:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_5]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_9:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_10:.*]] = fir.shape %[[VAL_7]]#1, %[[VAL_9]]#1 : (index, index) -> !fir.shape<2>
+// CHECK:           %[[VAL_11:.*]] = hlfir.elemental %[[VAL_10]] unordered : (!fir.shape<2>) -> !hlfir.expr<?x?xi32> {
+// CHECK:           ^bb0(%[[VAL_12:.*]]: index, %[[VAL_13:.*]]: index):
+// CHECK:             %[[VAL_14:.*]] = arith.cmpi ne, %[[VAL_8]]#1, %[[VAL_6]] : index
+// CHECK:             %[[VAL_15:.*]]:2 = fir.if %[[VAL_14]] -> (i32, f32) {
+// CHECK:               %[[VAL_16:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_6]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_17:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_5]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_18:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_19:.*]] = arith.subi %[[VAL_16]]#0, %[[VAL_5]] : index
+// CHECK:               %[[VAL_20:.*]] = arith.addi %[[VAL_12]], %[[VAL_19]] : index
+// CHECK:               %[[VAL_21:.*]] = arith.subi %[[VAL_18]]#0, %[[VAL_5]] : index
+// CHECK:               %[[VAL_22:.*]] = arith.addi %[[VAL_13]], %[[VAL_21]] : index
+// CHECK:               %[[VAL_23:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_20]], %[[VAL_17]]#0, %[[VAL_22]])  : (!fir.box<!fir.array<?x?x?xf32>>, index, index, index) -> !fir.ref<f32>
+// CHECK:               %[[VAL_24:.*]] = fir.load %[[VAL_23]] : !fir.ref<f32>
+// CHECK:               fir.result %[[VAL_3]], %[[VAL_24]] : i32, f32
+// CHECK:             } else {
+// CHECK:               fir.result %[[VAL_2]], %[[VAL_1]] : i32, f32
+// CHECK:             }
+// CHECK:             %[[VAL_25:.*]]:2 = fir.do_loop %[[VAL_26:.*]] = %[[VAL_5]] to %[[VAL_8]]#1 step %[[VAL_5]] unordered iter_args(%[[VAL_27:.*]] = %[[VAL_28:.*]]#0, %[[VAL_29:.*]] = %[[VAL_28]]#1) -> (i32, f32) {
+// CHECK:               %[[VAL_30:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_6]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_31:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_5]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_32:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_33:.*]] = arith.subi %[[VAL_30]]#0, %[[VAL_5]] : index
+// CHECK:               %[[VAL_34:.*]] = arith.addi %[[VAL_12]], %[[VAL_33]] : index
+// CHECK:               %[[VAL_35:.*]] = arith.subi %[[VAL_31]]#0, %[[VAL_5]] : index
+// CHECK:               %[[VAL_36:.*]] = arith.addi %[[VAL_26]], %[[VAL_35]] : index
+// CHECK:               %[[VAL_37:.*]] = arith.subi %[[VAL_32]]#0, %[[VAL_5]] : index
+// CHECK:               %[[VAL_38:.*]] = arith.addi %[[VAL_13]], %[[VAL_37]] : index
+// CHECK:               %[[VAL_39:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_34]], %[[VAL_36]], %[[VAL_38]])  : (!fir.box<!fir.array<?x?x?xf32>>, index, index, index) -> !fir.ref<f32>
+// CHECK:               %[[VAL_40:.*]] = fir.load %[[VAL_39]] : !fir.ref<f32>
+// CHECK:               %[[VAL_41:.*]] = arith.cmpf ogt, %[[VAL_40]], %[[VAL_29]] fastmath<reassoc> : f32
+// CHECK:               %[[VAL_42:.*]] = arith.cmpf une, %[[VAL_29]], %[[VAL_29]] fastmath<reassoc> : f32
+// CHECK:               %[[VAL_43:.*]] = arith.cmpf oeq, %[[VAL_40]], %[[VAL_40]] fastmath<reassoc> : f32
+// CHECK:               %[[VAL_44:.*]] = arith.andi %[[VAL_42]], %[[VAL_43]] : i1
+// CHECK:               %[[VAL_45:.*]] = arith.ori %[[VAL_41]], %[[VAL_44]] : i1
+// CHECK:               %[[VAL_46:.*]] = fir.convert %[[VAL_26]] : (index) -> i32
+// CHECK:               %[[VAL_47:.*]] = arith.select %[[VAL_45]], %[[VAL_46]], %[[VAL_27]] : i32
+// CHECK:               %[[VAL_48:.*]] = arith.select %[[VAL_45]], %[[VAL_40]], %[[VAL_29]] : f32
+// CHECK:               fir.result %[[VAL_47]], %[[VAL_48]] : i32, f32
+// CHECK:             }
+// CHECK:             hlfir.yield_element %[[VAL_49:.*]]#0 : i32
+// CHECK:           }
+// CHECK:           return %[[VAL_11]] : !hlfir.expr<?x?xi32>
+// CHECK:         }
+
+// Test that 'nnan' allows using -LARGEST value as the reduction init.
+func.func @test_total_expr_nnan(%input: !hlfir.expr<?x?x?xf32>) -> !hlfir.expr<3xi32> {
+  %0 = hlfir.maxloc %input {fastmath = #arith.fastmath<nnan>} : (!hlfir.expr<?x?x?xf32>) -> !hlfir.expr<3xi32>
+  return %0 : !hlfir.expr<3xi32>
+}
+// CHECK-LABEL:   func.func @test_total_expr_nnan(
+// CHECK-SAME:                                    %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x?x?xf32>) -> !hlfir.expr<3xi32> {
+// CHECK:           %[[VAL_1:.*]] = arith.constant false
+// CHECK:           %[[VAL_2:.*]] = arith.constant 3 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant 2 : index
+// CHECK:           %[[VAL_4:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_5:.*]] = arith.constant -3.40282347E+38 : f32
+// CHECK:           %[[VAL_6:.*]] = arith.constant 0 : i32
+// CHECK:           %[[VAL_7:.*]] = fir.alloca !fir.array<3xi32>
+// CHECK:           %[[VAL_8:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?x?x?xf32>) -> !fir.shape<3>
+// CHECK:           %[[VAL_9:.*]] = hlfir.get_extent %[[VAL_8]] {dim = 0 : index} : (!fir.shape<3>) -> index
+// CHECK:           %[[VAL_10:.*]] = hlfir.get_extent %[[VAL_8]] {dim = 1 : index} : (!fir.shape<3>) -> index
+// CHECK:           %[[VAL_11:.*]] = hlfir.get_extent %[[VAL_8]] {dim = 2 : index} : (!fir.shape<3>) -> index
+// CHECK:           %[[VAL_12:.*]]:4 = fir.do_loop %[[VAL_13:.*]] = %[[VAL_4]] to %[[VAL_11]] step %[[VAL_4]] iter_args(%[[VAL_14:.*]] = %[[VAL_6]], %[[VAL_15:.*]] = %[[VAL_6]], %[[VAL_16:.*]] = %[[VAL_6]], %[[VAL_17:.*]] = %[[VAL_5]]) -> (i32, i32, i32, f32) {
+// CHECK:             %[[VAL_18:.*]]:4 = fir.do_loop %[[VAL_19:.*]] = %[[VAL_4]] to %[[VAL_10]] step %[[VAL_4]] iter_args(%[[VAL_20:.*]] = %[[VAL_14]], %[[VAL_21:.*]] = %[[VAL_15]], %[[VAL_22:.*]] = %[[VAL_16]], %[[VAL_23:.*]] = %[[VAL_17]]) -> (i32, i32, i32, f32) {
+// CHECK:               %[[VAL_24:.*]]:4 = fir.do_loop %[[VAL_25:.*]] = %[[VAL_4]] to %[[VAL_9]] step %[[VAL_4]] iter_args(%[[VAL_26:.*]] = %[[VAL_20]], %[[VAL_27:.*]] = %[[VAL_21]], %[[VAL_28:.*]] = %[[VAL_22]], %[[VAL_29:.*]] = %[[VAL_23]]) -> (i32, i32, i32, f32) {
+// CHECK:                 %[[VAL_30:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_25]], %[[VAL_19]], %[[VAL_13]] : (!hlfir.expr<?x?x?xf32>, index, index, index) -> f32
+// CHECK:                 %[[VAL_31:.*]] = arith.cmpf ogt, %[[VAL_30]], %[[VAL_29]] fastmath<nnan> : f32
+// CHECK:                 %[[VAL_32:.*]] = arith.cmpf une, %[[VAL_29]], %[[VAL_29]] fastmath<nnan> : f32
+// CHECK:                 %[[VAL_33:.*]] = arith.cmpf oeq, %[[VAL_30]], %[[VAL_30]] fastmath<nnan> : f32
+// CHECK:                 %[[VAL_34:.*]] = arith.andi %[[VAL_32]], %[[VAL_33]] : i1
+// CHECK:                 %[[VAL_35:.*]] = arith.ori %[[VAL_31]], %[[VAL_34]] : i1
+// CHECK:                 %[[VAL_36:.*]] = fir.convert %[[VAL_25]] : (index) -> i32
+// CHECK:                 %[[VAL_37:.*]] = arith.select %[[VAL_35]], %[[VAL_36]], %[[VAL_26]] : i32
+// CHECK:                 %[[VAL_38:.*]] = fir.convert %[[VAL_19]] : (index) -> i32
+// CHECK:                 %[[VAL_39:.*]] = arith.select %[[VAL_35]], %[[VAL_38]], %[[VAL_27]] : i32
+// CHECK:                 %[[VAL_40:.*]] = fir.convert %[[VAL_13]] : (index) -> i32
+// CHECK:                 %[[VAL_41:.*]] = arith.select %[[VAL_35]], %[[VAL_40]], %[[VAL_28]] : i32
+// CHECK:                 %[[VAL_42:.*]] = arith.select %[[VAL_35]], %[[VAL_30]], %[[VAL_29]] : f32
+// CHECK:                 fir.result %[[VAL_37]], %[[VAL_39]], %[[VAL_41]], %[[VAL_42]] : i32, i32, i32, f32
+// CHECK:               }
+// CHECK:               fir.result %[[VAL_43:.*]]#0, %[[VAL_43]]#1, %[[VAL_43]]#2, %[[VAL_43]]#3 : i32, i32, i32, f32
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_44:.*]]#0, %[[VAL_44]]#1, %[[VAL_44]]#2, %[[VAL_44]]#3 : i32, i32, i32, f32
+// CHECK:           }
+// CHECK:           %[[VAL_45:.*]] = hlfir.designate %[[VAL_7]] (%[[VAL_4]])  : (!fir.ref<!fir.array<3xi32>>, index) -> !fir.ref<i32>
+// CHECK:           hlfir.assign %[[VAL_46:.*]]#0 to %[[VAL_45]] : i32, !fir.ref<i32>
+// CHECK:           %[[VAL_47:.*]] = hlfir.designate %[[VAL_7]] (%[[VAL_3]])  : (!fir.ref<!fir.array<3xi32>>, index) -> !fir.ref<i32>
+// CHECK:           hlfir.assign %[[VAL_46]]#1 to %[[VAL_47]] : i32, !fir.ref<i32>
+// CHECK:           %[[VAL_48:.*]] = hlfir.designate %[[VAL_7]] (%[[VAL_2]])  : (!fir.ref<!fir.array<3xi32>>, index) -> !fir.ref<i32>
+// CHECK:           hlfir.assign %[[VAL_46]]#2 to %[[VAL_48]] : i32, !fir.ref<i32>
+// CHECK:           %[[VAL_49:.*]] = hlfir.as_expr %[[VAL_7]] move %[[VAL_1]] : (!fir.ref<!fir.array<3xi32>>, i1) -> !hlfir.expr<3xi32>
+// CHECK:           return %[[VAL_49]] : !hlfir.expr<3xi32>
+// CHECK:         }
+
 // Character comparisons are not supported yet.
 func.func @test_character(%input: !fir.box<!fir.array<?x!fir.char<1>>>) -> !hlfir.expr<1xi32> {
   %0 = hlfir.maxloc %input : (!fir.box<!fir.array<?x!fir.char<1>>>) -> !hlfir.expr<1xi32>

diff  --git a/flang/test/HLFIR/simplify-hlfir-intrinsics-maxval.fir b/flang/test/HLFIR/simplify-hlfir-intrinsics-maxval.fir
index 8f414e5c4b563..87ed365f9de26 100644
--- a/flang/test/HLFIR/simplify-hlfir-intrinsics-maxval.fir
+++ b/flang/test/HLFIR/simplify-hlfir-intrinsics-maxval.fir
@@ -184,3 +184,118 @@ func.func @test_partial_var(%input: !fir.box<!fir.array<?x?xf16>>, %mask: !fir.b
 // CHECK:           }
 // CHECK:           return %[[VAL_10]] : !hlfir.expr<?xf16>
 // CHECK:         }
+
+func.func @test_partial_expr_nomask(%input: !hlfir.expr<?x?xf64>) -> !hlfir.expr<?xf64> {
+  %dim = arith.constant 1 : i32
+  %0 = hlfir.maxval %input dim %dim {fastmath = #arith.fastmath<reassoc>} : (!hlfir.expr<?x?xf64>, i32) -> !hlfir.expr<?xf64>
+  return %0 : !hlfir.expr<?xf64>
+}
+// CHECK-LABEL:   func.func @test_partial_expr_nomask(
+// CHECK-SAME:                                        %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x?xf64>) -> !hlfir.expr<?xf64> {
+// CHECK:           %[[VAL_1:.*]] = arith.constant -1.7976931348623157E+308 : f64
+// CHECK:           %[[VAL_2:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_4:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?x?xf64>) -> !fir.shape<2>
+// CHECK:           %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 0 : index} : (!fir.shape<2>) -> index
+// CHECK:           %[[VAL_6:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 1 : index} : (!fir.shape<2>) -> index
+// CHECK:           %[[VAL_7:.*]] = fir.shape %[[VAL_6]] : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_8:.*]] = hlfir.elemental %[[VAL_7]] unordered : (!fir.shape<1>) -> !hlfir.expr<?xf64> {
+// CHECK:           ^bb0(%[[VAL_9:.*]]: index):
+// CHECK:             %[[VAL_10:.*]] = arith.cmpi ne, %[[VAL_5]], %[[VAL_3]] : index
+// CHECK:             %[[VAL_11:.*]] = fir.if %[[VAL_10]] -> (f64) {
+// CHECK:               %[[VAL_12:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_2]], %[[VAL_9]] : (!hlfir.expr<?x?xf64>, index, index) -> f64
+// CHECK:               fir.result %[[VAL_12]] : f64
+// CHECK:             } else {
+// CHECK:               fir.result %[[VAL_1]] : f64
+// CHECK:             }
+// CHECK:             %[[VAL_13:.*]] = fir.do_loop %[[VAL_14:.*]] = %[[VAL_2]] to %[[VAL_5]] step %[[VAL_2]] unordered iter_args(%[[VAL_15:.*]] = %[[VAL_11]]) -> (f64) {
+// CHECK:               %[[VAL_16:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_14]], %[[VAL_9]] : (!hlfir.expr<?x?xf64>, index, index) -> f64
+// CHECK:               %[[VAL_17:.*]] = arith.cmpf ogt, %[[VAL_16]], %[[VAL_15]] fastmath<reassoc> : f64
+// CHECK:               %[[VAL_18:.*]] = arith.cmpf une, %[[VAL_15]], %[[VAL_15]] fastmath<reassoc> : f64
+// CHECK:               %[[VAL_19:.*]] = arith.cmpf oeq, %[[VAL_16]], %[[VAL_16]] fastmath<reassoc> : f64
+// CHECK:               %[[VAL_20:.*]] = arith.andi %[[VAL_18]], %[[VAL_19]] : i1
+// CHECK:               %[[VAL_21:.*]] = arith.ori %[[VAL_17]], %[[VAL_20]] : i1
+// CHECK:               %[[VAL_22:.*]] = arith.select %[[VAL_21]], %[[VAL_16]], %[[VAL_15]] : f64
+// CHECK:               fir.result %[[VAL_22]] : f64
+// CHECK:             }
+// CHECK:             hlfir.yield_element %[[VAL_13]] : f64
+// CHECK:           }
+// CHECK:           return %[[VAL_8]] : !hlfir.expr<?xf64>
+// CHECK:         }
+
+func.func @test_total_var_nomask(%input: !fir.box<!fir.array<?x?xf16>>) -> f16 {
+  %0 = hlfir.maxval %input {fastmath = #arith.fastmath<reassoc>} : (!fir.box<!fir.array<?x?xf16>>) -> f16
+  return %0 : f16
+}
+// CHECK-LABEL:   func.func @test_total_var_nomask(
+// CHECK-SAME:                                     %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box<!fir.array<?x?xf16>>) -> f16 {
+// CHECK:           %[[VAL_1:.*]] = arith.constant -6.550400e+04 : f16
+// CHECK:           %[[VAL_2:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_4:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_5:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_2]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_6:.*]] = arith.cmpi ne, %[[VAL_4]]#1, %[[VAL_3]] : index
+// CHECK:           %[[VAL_7:.*]] = arith.cmpi ne, %[[VAL_5]]#1, %[[VAL_3]] : index
+// CHECK:           %[[VAL_8:.*]] = arith.andi %[[VAL_6]], %[[VAL_7]] : i1
+// CHECK:           %[[VAL_9:.*]] = fir.if %[[VAL_8]] -> (f16) {
+// CHECK:             %[[VAL_10:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:             %[[VAL_11:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_2]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:             %[[VAL_12:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_10]]#0, %[[VAL_11]]#0)  : (!fir.box<!fir.array<?x?xf16>>, index, index) -> !fir.ref<f16>
+// CHECK:             %[[VAL_13:.*]] = fir.load %[[VAL_12]] : !fir.ref<f16>
+// CHECK:             fir.result %[[VAL_13]] : f16
+// CHECK:           } else {
+// CHECK:             fir.result %[[VAL_1]] : f16
+// CHECK:           }
+// CHECK:           %[[VAL_14:.*]] = fir.do_loop %[[VAL_15:.*]] = %[[VAL_2]] to %[[VAL_5]]#1 step %[[VAL_2]] unordered iter_args(%[[VAL_16:.*]] = %[[VAL_9]]) -> (f16) {
+// CHECK:             %[[VAL_17:.*]] = fir.do_loop %[[VAL_18:.*]] = %[[VAL_2]] to %[[VAL_4]]#1 step %[[VAL_2]] unordered iter_args(%[[VAL_19:.*]] = %[[VAL_16]]) -> (f16) {
+// CHECK:               %[[VAL_20:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_21:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_2]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_22:.*]] = arith.subi %[[VAL_20]]#0, %[[VAL_2]] : index
+// CHECK:               %[[VAL_23:.*]] = arith.addi %[[VAL_18]], %[[VAL_22]] : index
+// CHECK:               %[[VAL_24:.*]] = arith.subi %[[VAL_21]]#0, %[[VAL_2]] : index
+// CHECK:               %[[VAL_25:.*]] = arith.addi %[[VAL_15]], %[[VAL_24]] : index
+// CHECK:               %[[VAL_26:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_23]], %[[VAL_25]])  : (!fir.box<!fir.array<?x?xf16>>, index, index) -> !fir.ref<f16>
+// CHECK:               %[[VAL_27:.*]] = fir.load %[[VAL_26]] : !fir.ref<f16>
+// CHECK:               %[[VAL_28:.*]] = arith.cmpf ogt, %[[VAL_27]], %[[VAL_19]] fastmath<reassoc> : f16
+// CHECK:               %[[VAL_29:.*]] = arith.cmpf une, %[[VAL_19]], %[[VAL_19]] fastmath<reassoc> : f16
+// CHECK:               %[[VAL_30:.*]] = arith.cmpf oeq, %[[VAL_27]], %[[VAL_27]] fastmath<reassoc> : f16
+// CHECK:               %[[VAL_31:.*]] = arith.andi %[[VAL_29]], %[[VAL_30]] : i1
+// CHECK:               %[[VAL_32:.*]] = arith.ori %[[VAL_28]], %[[VAL_31]] : i1
+// CHECK:               %[[VAL_33:.*]] = arith.select %[[VAL_32]], %[[VAL_27]], %[[VAL_19]] : f16
+// CHECK:               fir.result %[[VAL_33]] : f16
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_17]] : f16
+// CHECK:           }
+// CHECK:           return %[[VAL_14]] : f16
+// CHECK:         }
+
+// Test that 'nnan' allows using -LARGEST value as the reduction init.
+func.func @test_partial_expr_nnan(%input: !hlfir.expr<?x?xf64>) -> !hlfir.expr<?xf64> {
+  %dim = arith.constant 1 : i32
+  %0 = hlfir.maxval %input dim %dim {fastmath = #arith.fastmath<nnan>} : (!hlfir.expr<?x?xf64>, i32) -> !hlfir.expr<?xf64>
+  return %0 : !hlfir.expr<?xf64>
+}
+// CHECK-LABEL:   func.func @test_partial_expr_nnan(
+// CHECK-SAME:                                      %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x?xf64>) -> !hlfir.expr<?xf64> {
+// CHECK:           %[[VAL_1:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_2:.*]] = arith.constant -1.7976931348623157E+308 : f64
+// CHECK:           %[[VAL_3:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?x?xf64>) -> !fir.shape<2>
+// CHECK:           %[[VAL_4:.*]] = hlfir.get_extent %[[VAL_3]] {dim = 0 : index} : (!fir.shape<2>) -> index
+// CHECK:           %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_3]] {dim = 1 : index} : (!fir.shape<2>) -> index
+// CHECK:           %[[VAL_6:.*]] = fir.shape %[[VAL_5]] : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_7:.*]] = hlfir.elemental %[[VAL_6]] unordered : (!fir.shape<1>) -> !hlfir.expr<?xf64> {
+// CHECK:           ^bb0(%[[VAL_8:.*]]: index):
+// CHECK:             %[[VAL_9:.*]] = fir.do_loop %[[VAL_10:.*]] = %[[VAL_1]] to %[[VAL_4]] step %[[VAL_1]] iter_args(%[[VAL_11:.*]] = %[[VAL_2]]) -> (f64) {
+// CHECK:               %[[VAL_12:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_10]], %[[VAL_8]] : (!hlfir.expr<?x?xf64>, index, index) -> f64
+// CHECK:               %[[VAL_13:.*]] = arith.cmpf ogt, %[[VAL_12]], %[[VAL_11]] fastmath<nnan> : f64
+// CHECK:               %[[VAL_14:.*]] = arith.cmpf une, %[[VAL_11]], %[[VAL_11]] fastmath<nnan> : f64
+// CHECK:               %[[VAL_15:.*]] = arith.cmpf oeq, %[[VAL_12]], %[[VAL_12]] fastmath<nnan> : f64
+// CHECK:               %[[VAL_16:.*]] = arith.andi %[[VAL_14]], %[[VAL_15]] : i1
+// CHECK:               %[[VAL_17:.*]] = arith.ori %[[VAL_13]], %[[VAL_16]] : i1
+// CHECK:               %[[VAL_18:.*]] = arith.select %[[VAL_17]], %[[VAL_12]], %[[VAL_11]] : f64
+// CHECK:               fir.result %[[VAL_18]] : f64
+// CHECK:             }
+// CHECK:             hlfir.yield_element %[[VAL_9]] : f64
+// CHECK:           }
+// CHECK:           return %[[VAL_7]] : !hlfir.expr<?xf64>
+// CHECK:         }

diff  --git a/flang/test/HLFIR/simplify-hlfir-intrinsics-minloc.fir b/flang/test/HLFIR/simplify-hlfir-intrinsics-minloc.fir
index 0c17fd6fea92c..b9a7195b5f139 100644
--- a/flang/test/HLFIR/simplify-hlfir-intrinsics-minloc.fir
+++ b/flang/test/HLFIR/simplify-hlfir-intrinsics-minloc.fir
@@ -294,6 +294,179 @@ func.func @test_partial_var(%input: !fir.box<!fir.array<?x?x?xf32>>, %mask: !fir
 // CHECK:           return %[[VAL_14]] : !hlfir.expr<?x?xi32>
 // CHECK:         }
 
+func.func @test_total_expr_nomask(%input: !hlfir.expr<?x?x?xf32>) -> !hlfir.expr<3xi32> {
+  %0 = hlfir.minloc %input {fastmath = #arith.fastmath<reassoc>} : (!hlfir.expr<?x?x?xf32>) -> !hlfir.expr<3xi32>
+  return %0 : !hlfir.expr<3xi32>
+}
+// CHECK-LABEL:   func.func @test_total_expr_nomask(
+// CHECK-SAME:                                      %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x?x?xf32>) -> !hlfir.expr<3xi32> {
+// CHECK:           %[[VAL_1:.*]] = arith.constant false
+// CHECK:           %[[VAL_2:.*]] = arith.constant 3 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant 2 : index
+// CHECK:           %[[VAL_4:.*]] = arith.constant 3.40282347E+38 : f32
+// CHECK:           %[[VAL_5:.*]] = arith.constant 0 : i32
+// CHECK:           %[[VAL_6:.*]] = arith.constant 1 : i32
+// CHECK:           %[[VAL_7:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_8:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_9:.*]] = fir.alloca !fir.array<3xi32>
+// CHECK:           %[[VAL_10:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?x?x?xf32>) -> !fir.shape<3>
+// CHECK:           %[[VAL_11:.*]] = hlfir.get_extent %[[VAL_10]] {dim = 0 : index} : (!fir.shape<3>) -> index
+// CHECK:           %[[VAL_12:.*]] = hlfir.get_extent %[[VAL_10]] {dim = 1 : index} : (!fir.shape<3>) -> index
+// CHECK:           %[[VAL_13:.*]] = hlfir.get_extent %[[VAL_10]] {dim = 2 : index} : (!fir.shape<3>) -> index
+// CHECK:           %[[VAL_14:.*]] = arith.cmpi ne, %[[VAL_11]], %[[VAL_8]] : index
+// CHECK:           %[[VAL_15:.*]] = arith.cmpi ne, %[[VAL_12]], %[[VAL_8]] : index
+// CHECK:           %[[VAL_16:.*]] = arith.andi %[[VAL_14]], %[[VAL_15]] : i1
+// CHECK:           %[[VAL_17:.*]] = arith.cmpi ne, %[[VAL_13]], %[[VAL_8]] : index
+// CHECK:           %[[VAL_18:.*]] = arith.andi %[[VAL_16]], %[[VAL_17]] : i1
+// CHECK:           %[[VAL_19:.*]]:4 = fir.if %[[VAL_18]] -> (i32, i32, i32, f32) {
+// CHECK:             %[[VAL_20:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_7]], %[[VAL_7]], %[[VAL_7]] : (!hlfir.expr<?x?x?xf32>, index, index, index) -> f32
+// CHECK:             fir.result %[[VAL_6]], %[[VAL_6]], %[[VAL_6]], %[[VAL_20]] : i32, i32, i32, f32
+// CHECK:           } else {
+// CHECK:             fir.result %[[VAL_5]], %[[VAL_5]], %[[VAL_5]], %[[VAL_4]] : i32, i32, i32, f32
+// CHECK:           }
+// CHECK:           %[[VAL_21:.*]]:4 = fir.do_loop %[[VAL_22:.*]] = %[[VAL_7]] to %[[VAL_13]] step %[[VAL_7]] unordered iter_args(%[[VAL_23:.*]] = %[[VAL_24:.*]]#0, %[[VAL_25:.*]] = %[[VAL_24]]#1, %[[VAL_26:.*]] = %[[VAL_24]]#2, %[[VAL_27:.*]] = %[[VAL_24]]#3) -> (i32, i32, i32, f32) {
+// CHECK:             %[[VAL_28:.*]]:4 = fir.do_loop %[[VAL_29:.*]] = %[[VAL_7]] to %[[VAL_12]] step %[[VAL_7]] unordered iter_args(%[[VAL_30:.*]] = %[[VAL_23]], %[[VAL_31:.*]] = %[[VAL_25]], %[[VAL_32:.*]] = %[[VAL_26]], %[[VAL_33:.*]] = %[[VAL_27]]) -> (i32, i32, i32, f32) {
+// CHECK:               %[[VAL_34:.*]]:4 = fir.do_loop %[[VAL_35:.*]] = %[[VAL_7]] to %[[VAL_11]] step %[[VAL_7]] unordered iter_args(%[[VAL_36:.*]] = %[[VAL_30]], %[[VAL_37:.*]] = %[[VAL_31]], %[[VAL_38:.*]] = %[[VAL_32]], %[[VAL_39:.*]] = %[[VAL_33]]) -> (i32, i32, i32, f32) {
+// CHECK:                 %[[VAL_40:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_35]], %[[VAL_29]], %[[VAL_22]] : (!hlfir.expr<?x?x?xf32>, index, index, index) -> f32
+// CHECK:                 %[[VAL_41:.*]] = arith.cmpf olt, %[[VAL_40]], %[[VAL_39]] fastmath<reassoc> : f32
+// CHECK:                 %[[VAL_42:.*]] = arith.cmpf une, %[[VAL_39]], %[[VAL_39]] fastmath<reassoc> : f32
+// CHECK:                 %[[VAL_43:.*]] = arith.cmpf oeq, %[[VAL_40]], %[[VAL_40]] fastmath<reassoc> : f32
+// CHECK:                 %[[VAL_44:.*]] = arith.andi %[[VAL_42]], %[[VAL_43]] : i1
+// CHECK:                 %[[VAL_45:.*]] = arith.ori %[[VAL_41]], %[[VAL_44]] : i1
+// CHECK:                 %[[VAL_46:.*]] = fir.convert %[[VAL_35]] : (index) -> i32
+// CHECK:                 %[[VAL_47:.*]] = arith.select %[[VAL_45]], %[[VAL_46]], %[[VAL_36]] : i32
+// CHECK:                 %[[VAL_48:.*]] = fir.convert %[[VAL_29]] : (index) -> i32
+// CHECK:                 %[[VAL_49:.*]] = arith.select %[[VAL_45]], %[[VAL_48]], %[[VAL_37]] : i32
+// CHECK:                 %[[VAL_50:.*]] = fir.convert %[[VAL_22]] : (index) -> i32
+// CHECK:                 %[[VAL_51:.*]] = arith.select %[[VAL_45]], %[[VAL_50]], %[[VAL_38]] : i32
+// CHECK:                 %[[VAL_52:.*]] = arith.select %[[VAL_45]], %[[VAL_40]], %[[VAL_39]] : f32
+// CHECK:                 fir.result %[[VAL_47]], %[[VAL_49]], %[[VAL_51]], %[[VAL_52]] : i32, i32, i32, f32
+// CHECK:               }
+// CHECK:               fir.result %[[VAL_53:.*]]#0, %[[VAL_53]]#1, %[[VAL_53]]#2, %[[VAL_53]]#3 : i32, i32, i32, f32
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_54:.*]]#0, %[[VAL_54]]#1, %[[VAL_54]]#2, %[[VAL_54]]#3 : i32, i32, i32, f32
+// CHECK:           }
+// CHECK:           %[[VAL_55:.*]] = hlfir.designate %[[VAL_9]] (%[[VAL_7]])  : (!fir.ref<!fir.array<3xi32>>, index) -> !fir.ref<i32>
+// CHECK:           hlfir.assign %[[VAL_56:.*]]#0 to %[[VAL_55]] : i32, !fir.ref<i32>
+// CHECK:           %[[VAL_57:.*]] = hlfir.designate %[[VAL_9]] (%[[VAL_3]])  : (!fir.ref<!fir.array<3xi32>>, index) -> !fir.ref<i32>
+// CHECK:           hlfir.assign %[[VAL_56]]#1 to %[[VAL_57]] : i32, !fir.ref<i32>
+// CHECK:           %[[VAL_58:.*]] = hlfir.designate %[[VAL_9]] (%[[VAL_2]])  : (!fir.ref<!fir.array<3xi32>>, index) -> !fir.ref<i32>
+// CHECK:           hlfir.assign %[[VAL_56]]#2 to %[[VAL_58]] : i32, !fir.ref<i32>
+// CHECK:           %[[VAL_59:.*]] = hlfir.as_expr %[[VAL_9]] move %[[VAL_1]] : (!fir.ref<!fir.array<3xi32>>, i1) -> !hlfir.expr<3xi32>
+// CHECK:           return %[[VAL_59]] : !hlfir.expr<3xi32>
+// CHECK:         }
+
+func.func @test_partial_var_nomask(%input: !fir.box<!fir.array<?x?x?xf32>>) -> !hlfir.expr<?x?xi32> {
+  %dim = arith.constant 2 : i32
+  %0 = hlfir.minloc %input dim %dim {fastmath = #arith.fastmath<reassoc>} : (!fir.box<!fir.array<?x?x?xf32>>, i32) -> !hlfir.expr<?x?xi32>
+  return %0 : !hlfir.expr<?x?xi32>
+}
+// CHECK-LABEL:   func.func @test_partial_var_nomask(
+// CHECK-SAME:                                       %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box<!fir.array<?x?x?xf32>>) -> !hlfir.expr<?x?xi32> {
+// CHECK:           %[[VAL_1:.*]] = arith.constant 3.40282347E+38 : f32
+// CHECK:           %[[VAL_2:.*]] = arith.constant 0 : i32
+// CHECK:           %[[VAL_3:.*]] = arith.constant 1 : i32
+// CHECK:           %[[VAL_4:.*]] = arith.constant 2 : index
+// CHECK:           %[[VAL_5:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_6:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_7:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_6]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_8:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_5]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_9:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_10:.*]] = fir.shape %[[VAL_7]]#1, %[[VAL_9]]#1 : (index, index) -> !fir.shape<2>
+// CHECK:           %[[VAL_11:.*]] = hlfir.elemental %[[VAL_10]] unordered : (!fir.shape<2>) -> !hlfir.expr<?x?xi32> {
+// CHECK:           ^bb0(%[[VAL_12:.*]]: index, %[[VAL_13:.*]]: index):
+// CHECK:             %[[VAL_14:.*]] = arith.cmpi ne, %[[VAL_8]]#1, %[[VAL_6]] : index
+// CHECK:             %[[VAL_15:.*]]:2 = fir.if %[[VAL_14]] -> (i32, f32) {
+// CHECK:               %[[VAL_16:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_6]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_17:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_5]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_18:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_19:.*]] = arith.subi %[[VAL_16]]#0, %[[VAL_5]] : index
+// CHECK:               %[[VAL_20:.*]] = arith.addi %[[VAL_12]], %[[VAL_19]] : index
+// CHECK:               %[[VAL_21:.*]] = arith.subi %[[VAL_18]]#0, %[[VAL_5]] : index
+// CHECK:               %[[VAL_22:.*]] = arith.addi %[[VAL_13]], %[[VAL_21]] : index
+// CHECK:               %[[VAL_23:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_20]], %[[VAL_17]]#0, %[[VAL_22]])  : (!fir.box<!fir.array<?x?x?xf32>>, index, index, index) -> !fir.ref<f32>
+// CHECK:               %[[VAL_24:.*]] = fir.load %[[VAL_23]] : !fir.ref<f32>
+// CHECK:               fir.result %[[VAL_3]], %[[VAL_24]] : i32, f32
+// CHECK:             } else {
+// CHECK:               fir.result %[[VAL_2]], %[[VAL_1]] : i32, f32
+// CHECK:             }
+// CHECK:             %[[VAL_25:.*]]:2 = fir.do_loop %[[VAL_26:.*]] = %[[VAL_5]] to %[[VAL_8]]#1 step %[[VAL_5]] unordered iter_args(%[[VAL_27:.*]] = %[[VAL_28:.*]]#0, %[[VAL_29:.*]] = %[[VAL_28]]#1) -> (i32, f32) {
+// CHECK:               %[[VAL_30:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_6]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_31:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_5]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_32:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_33:.*]] = arith.subi %[[VAL_30]]#0, %[[VAL_5]] : index
+// CHECK:               %[[VAL_34:.*]] = arith.addi %[[VAL_12]], %[[VAL_33]] : index
+// CHECK:               %[[VAL_35:.*]] = arith.subi %[[VAL_31]]#0, %[[VAL_5]] : index
+// CHECK:               %[[VAL_36:.*]] = arith.addi %[[VAL_26]], %[[VAL_35]] : index
+// CHECK:               %[[VAL_37:.*]] = arith.subi %[[VAL_32]]#0, %[[VAL_5]] : index
+// CHECK:               %[[VAL_38:.*]] = arith.addi %[[VAL_13]], %[[VAL_37]] : index
+// CHECK:               %[[VAL_39:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_34]], %[[VAL_36]], %[[VAL_38]])  : (!fir.box<!fir.array<?x?x?xf32>>, index, index, index) -> !fir.ref<f32>
+// CHECK:               %[[VAL_40:.*]] = fir.load %[[VAL_39]] : !fir.ref<f32>
+// CHECK:               %[[VAL_41:.*]] = arith.cmpf olt, %[[VAL_40]], %[[VAL_29]] fastmath<reassoc> : f32
+// CHECK:               %[[VAL_42:.*]] = arith.cmpf une, %[[VAL_29]], %[[VAL_29]] fastmath<reassoc> : f32
+// CHECK:               %[[VAL_43:.*]] = arith.cmpf oeq, %[[VAL_40]], %[[VAL_40]] fastmath<reassoc> : f32
+// CHECK:               %[[VAL_44:.*]] = arith.andi %[[VAL_42]], %[[VAL_43]] : i1
+// CHECK:               %[[VAL_45:.*]] = arith.ori %[[VAL_41]], %[[VAL_44]] : i1
+// CHECK:               %[[VAL_46:.*]] = fir.convert %[[VAL_26]] : (index) -> i32
+// CHECK:               %[[VAL_47:.*]] = arith.select %[[VAL_45]], %[[VAL_46]], %[[VAL_27]] : i32
+// CHECK:               %[[VAL_48:.*]] = arith.select %[[VAL_45]], %[[VAL_40]], %[[VAL_29]] : f32
+// CHECK:               fir.result %[[VAL_47]], %[[VAL_48]] : i32, f32
+// CHECK:             }
+// CHECK:             hlfir.yield_element %[[VAL_49:.*]]#0 : i32
+// CHECK:           }
+// CHECK:           return %[[VAL_11]] : !hlfir.expr<?x?xi32>
+// CHECK:         }
+
+// Test that 'nnan' allows using LARGEST value as the reduction init.
+func.func @test_total_expr_nnan(%input: !hlfir.expr<?x?x?xf32>) -> !hlfir.expr<3xi32> {
+  %0 = hlfir.minloc %input {fastmath = #arith.fastmath<nnan>} : (!hlfir.expr<?x?x?xf32>) -> !hlfir.expr<3xi32>
+  return %0 : !hlfir.expr<3xi32>
+}
+// CHECK-LABEL:   func.func @test_total_expr_nnan(
+// CHECK-SAME:                                    %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x?x?xf32>) -> !hlfir.expr<3xi32> {
+// CHECK:           %[[VAL_1:.*]] = arith.constant false
+// CHECK:           %[[VAL_2:.*]] = arith.constant 3 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant 2 : index
+// CHECK:           %[[VAL_4:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_5:.*]] = arith.constant 3.40282347E+38 : f32
+// CHECK:           %[[VAL_6:.*]] = arith.constant 0 : i32
+// CHECK:           %[[VAL_7:.*]] = fir.alloca !fir.array<3xi32>
+// CHECK:           %[[VAL_8:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?x?x?xf32>) -> !fir.shape<3>
+// CHECK:           %[[VAL_9:.*]] = hlfir.get_extent %[[VAL_8]] {dim = 0 : index} : (!fir.shape<3>) -> index
+// CHECK:           %[[VAL_10:.*]] = hlfir.get_extent %[[VAL_8]] {dim = 1 : index} : (!fir.shape<3>) -> index
+// CHECK:           %[[VAL_11:.*]] = hlfir.get_extent %[[VAL_8]] {dim = 2 : index} : (!fir.shape<3>) -> index
+// CHECK:           %[[VAL_12:.*]]:4 = fir.do_loop %[[VAL_13:.*]] = %[[VAL_4]] to %[[VAL_11]] step %[[VAL_4]] iter_args(%[[VAL_14:.*]] = %[[VAL_6]], %[[VAL_15:.*]] = %[[VAL_6]], %[[VAL_16:.*]] = %[[VAL_6]], %[[VAL_17:.*]] = %[[VAL_5]]) -> (i32, i32, i32, f32) {
+// CHECK:             %[[VAL_18:.*]]:4 = fir.do_loop %[[VAL_19:.*]] = %[[VAL_4]] to %[[VAL_10]] step %[[VAL_4]] iter_args(%[[VAL_20:.*]] = %[[VAL_14]], %[[VAL_21:.*]] = %[[VAL_15]], %[[VAL_22:.*]] = %[[VAL_16]], %[[VAL_23:.*]] = %[[VAL_17]]) -> (i32, i32, i32, f32) {
+// CHECK:               %[[VAL_24:.*]]:4 = fir.do_loop %[[VAL_25:.*]] = %[[VAL_4]] to %[[VAL_9]] step %[[VAL_4]] iter_args(%[[VAL_26:.*]] = %[[VAL_20]], %[[VAL_27:.*]] = %[[VAL_21]], %[[VAL_28:.*]] = %[[VAL_22]], %[[VAL_29:.*]] = %[[VAL_23]]) -> (i32, i32, i32, f32) {
+// CHECK:                 %[[VAL_30:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_25]], %[[VAL_19]], %[[VAL_13]] : (!hlfir.expr<?x?x?xf32>, index, index, index) -> f32
+// CHECK:                 %[[VAL_31:.*]] = arith.cmpf olt, %[[VAL_30]], %[[VAL_29]] fastmath<nnan> : f32
+// CHECK:                 %[[VAL_32:.*]] = arith.cmpf une, %[[VAL_29]], %[[VAL_29]] fastmath<nnan> : f32
+// CHECK:                 %[[VAL_33:.*]] = arith.cmpf oeq, %[[VAL_30]], %[[VAL_30]] fastmath<nnan> : f32
+// CHECK:                 %[[VAL_34:.*]] = arith.andi %[[VAL_32]], %[[VAL_33]] : i1
+// CHECK:                 %[[VAL_35:.*]] = arith.ori %[[VAL_31]], %[[VAL_34]] : i1
+// CHECK:                 %[[VAL_36:.*]] = fir.convert %[[VAL_25]] : (index) -> i32
+// CHECK:                 %[[VAL_37:.*]] = arith.select %[[VAL_35]], %[[VAL_36]], %[[VAL_26]] : i32
+// CHECK:                 %[[VAL_38:.*]] = fir.convert %[[VAL_19]] : (index) -> i32
+// CHECK:                 %[[VAL_39:.*]] = arith.select %[[VAL_35]], %[[VAL_38]], %[[VAL_27]] : i32
+// CHECK:                 %[[VAL_40:.*]] = fir.convert %[[VAL_13]] : (index) -> i32
+// CHECK:                 %[[VAL_41:.*]] = arith.select %[[VAL_35]], %[[VAL_40]], %[[VAL_28]] : i32
+// CHECK:                 %[[VAL_42:.*]] = arith.select %[[VAL_35]], %[[VAL_30]], %[[VAL_29]] : f32
+// CHECK:                 fir.result %[[VAL_37]], %[[VAL_39]], %[[VAL_41]], %[[VAL_42]] : i32, i32, i32, f32
+// CHECK:               }
+// CHECK:               fir.result %[[VAL_43:.*]]#0, %[[VAL_43]]#1, %[[VAL_43]]#2, %[[VAL_43]]#3 : i32, i32, i32, f32
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_44:.*]]#0, %[[VAL_44]]#1, %[[VAL_44]]#2, %[[VAL_44]]#3 : i32, i32, i32, f32
+// CHECK:           }
+// CHECK:           %[[VAL_45:.*]] = hlfir.designate %[[VAL_7]] (%[[VAL_4]])  : (!fir.ref<!fir.array<3xi32>>, index) -> !fir.ref<i32>
+// CHECK:           hlfir.assign %[[VAL_46:.*]]#0 to %[[VAL_45]] : i32, !fir.ref<i32>
+// CHECK:           %[[VAL_47:.*]] = hlfir.designate %[[VAL_7]] (%[[VAL_3]])  : (!fir.ref<!fir.array<3xi32>>, index) -> !fir.ref<i32>
+// CHECK:           hlfir.assign %[[VAL_46]]#1 to %[[VAL_47]] : i32, !fir.ref<i32>
+// CHECK:           %[[VAL_48:.*]] = hlfir.designate %[[VAL_7]] (%[[VAL_2]])  : (!fir.ref<!fir.array<3xi32>>, index) -> !fir.ref<i32>
+// CHECK:           hlfir.assign %[[VAL_46]]#2 to %[[VAL_48]] : i32, !fir.ref<i32>
+// CHECK:           %[[VAL_49:.*]] = hlfir.as_expr %[[VAL_7]] move %[[VAL_1]] : (!fir.ref<!fir.array<3xi32>>, i1) -> !hlfir.expr<3xi32>
+// CHECK:           return %[[VAL_49]] : !hlfir.expr<3xi32>
+// CHECK:         }
+
 // Character comparisons are not supported yet.
 func.func @test_character(%input: !fir.box<!fir.array<?x!fir.char<1>>>) -> !hlfir.expr<1xi32> {
   %0 = hlfir.minloc %input : (!fir.box<!fir.array<?x!fir.char<1>>>) -> !hlfir.expr<1xi32>

diff  --git a/flang/test/HLFIR/simplify-hlfir-intrinsics-minval.fir b/flang/test/HLFIR/simplify-hlfir-intrinsics-minval.fir
index 98e4c692fb72f..9ab419893c6f3 100644
--- a/flang/test/HLFIR/simplify-hlfir-intrinsics-minval.fir
+++ b/flang/test/HLFIR/simplify-hlfir-intrinsics-minval.fir
@@ -184,3 +184,118 @@ func.func @test_partial_var(%input: !fir.box<!fir.array<?x?xf16>>, %mask: !fir.b
 // CHECK:           }
 // CHECK:           return %[[VAL_10]] : !hlfir.expr<?xf16>
 // CHECK:         }
+
+func.func @test_partial_expr_nomask(%input: !hlfir.expr<?x?xf64>) -> !hlfir.expr<?xf64> {
+  %dim = arith.constant 1 : i32
+  %0 = hlfir.minval %input dim %dim {fastmath = #arith.fastmath<reassoc>} : (!hlfir.expr<?x?xf64>, i32) -> !hlfir.expr<?xf64>
+  return %0 : !hlfir.expr<?xf64>
+}
+// CHECK-LABEL:   func.func @test_partial_expr_nomask(
+// CHECK-SAME:                                        %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x?xf64>) -> !hlfir.expr<?xf64> {
+// CHECK:           %[[VAL_1:.*]] = arith.constant 1.7976931348623157E+308 : f64
+// CHECK:           %[[VAL_2:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_4:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?x?xf64>) -> !fir.shape<2>
+// CHECK:           %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 0 : index} : (!fir.shape<2>) -> index
+// CHECK:           %[[VAL_6:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 1 : index} : (!fir.shape<2>) -> index
+// CHECK:           %[[VAL_7:.*]] = fir.shape %[[VAL_6]] : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_8:.*]] = hlfir.elemental %[[VAL_7]] unordered : (!fir.shape<1>) -> !hlfir.expr<?xf64> {
+// CHECK:           ^bb0(%[[VAL_9:.*]]: index):
+// CHECK:             %[[VAL_10:.*]] = arith.cmpi ne, %[[VAL_5]], %[[VAL_3]] : index
+// CHECK:             %[[VAL_11:.*]] = fir.if %[[VAL_10]] -> (f64) {
+// CHECK:               %[[VAL_12:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_2]], %[[VAL_9]] : (!hlfir.expr<?x?xf64>, index, index) -> f64
+// CHECK:               fir.result %[[VAL_12]] : f64
+// CHECK:             } else {
+// CHECK:               fir.result %[[VAL_1]] : f64
+// CHECK:             }
+// CHECK:             %[[VAL_13:.*]] = fir.do_loop %[[VAL_14:.*]] = %[[VAL_2]] to %[[VAL_5]] step %[[VAL_2]] unordered iter_args(%[[VAL_15:.*]] = %[[VAL_11]]) -> (f64) {
+// CHECK:               %[[VAL_16:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_14]], %[[VAL_9]] : (!hlfir.expr<?x?xf64>, index, index) -> f64
+// CHECK:               %[[VAL_17:.*]] = arith.cmpf olt, %[[VAL_16]], %[[VAL_15]] fastmath<reassoc> : f64
+// CHECK:               %[[VAL_18:.*]] = arith.cmpf une, %[[VAL_15]], %[[VAL_15]] fastmath<reassoc> : f64
+// CHECK:               %[[VAL_19:.*]] = arith.cmpf oeq, %[[VAL_16]], %[[VAL_16]] fastmath<reassoc> : f64
+// CHECK:               %[[VAL_20:.*]] = arith.andi %[[VAL_18]], %[[VAL_19]] : i1
+// CHECK:               %[[VAL_21:.*]] = arith.ori %[[VAL_17]], %[[VAL_20]] : i1
+// CHECK:               %[[VAL_22:.*]] = arith.select %[[VAL_21]], %[[VAL_16]], %[[VAL_15]] : f64
+// CHECK:               fir.result %[[VAL_22]] : f64
+// CHECK:             }
+// CHECK:             hlfir.yield_element %[[VAL_13]] : f64
+// CHECK:           }
+// CHECK:           return %[[VAL_8]] : !hlfir.expr<?xf64>
+// CHECK:         }
+
+func.func @test_total_var_nomask(%input: !fir.box<!fir.array<?x?xf16>>) -> f16 {
+  %0 = hlfir.minval %input {fastmath = #arith.fastmath<reassoc>} : (!fir.box<!fir.array<?x?xf16>>) -> f16
+  return %0 : f16
+}
+// CHECK-LABEL:   func.func @test_total_var_nomask(
+// CHECK-SAME:                                     %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box<!fir.array<?x?xf16>>) -> f16 {
+// CHECK:           %[[VAL_1:.*]] = arith.constant 6.550400e+04 : f16
+// CHECK:           %[[VAL_2:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_4:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_5:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_2]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_6:.*]] = arith.cmpi ne, %[[VAL_4]]#1, %[[VAL_3]] : index
+// CHECK:           %[[VAL_7:.*]] = arith.cmpi ne, %[[VAL_5]]#1, %[[VAL_3]] : index
+// CHECK:           %[[VAL_8:.*]] = arith.andi %[[VAL_6]], %[[VAL_7]] : i1
+// CHECK:           %[[VAL_9:.*]] = fir.if %[[VAL_8]] -> (f16) {
+// CHECK:             %[[VAL_10:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:             %[[VAL_11:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_2]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:             %[[VAL_12:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_10]]#0, %[[VAL_11]]#0)  : (!fir.box<!fir.array<?x?xf16>>, index, index) -> !fir.ref<f16>
+// CHECK:             %[[VAL_13:.*]] = fir.load %[[VAL_12]] : !fir.ref<f16>
+// CHECK:             fir.result %[[VAL_13]] : f16
+// CHECK:           } else {
+// CHECK:             fir.result %[[VAL_1]] : f16
+// CHECK:           }
+// CHECK:           %[[VAL_14:.*]] = fir.do_loop %[[VAL_15:.*]] = %[[VAL_2]] to %[[VAL_5]]#1 step %[[VAL_2]] unordered iter_args(%[[VAL_16:.*]] = %[[VAL_9]]) -> (f16) {
+// CHECK:             %[[VAL_17:.*]] = fir.do_loop %[[VAL_18:.*]] = %[[VAL_2]] to %[[VAL_4]]#1 step %[[VAL_2]] unordered iter_args(%[[VAL_19:.*]] = %[[VAL_16]]) -> (f16) {
+// CHECK:               %[[VAL_20:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_21:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_2]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_22:.*]] = arith.subi %[[VAL_20]]#0, %[[VAL_2]] : index
+// CHECK:               %[[VAL_23:.*]] = arith.addi %[[VAL_18]], %[[VAL_22]] : index
+// CHECK:               %[[VAL_24:.*]] = arith.subi %[[VAL_21]]#0, %[[VAL_2]] : index
+// CHECK:               %[[VAL_25:.*]] = arith.addi %[[VAL_15]], %[[VAL_24]] : index
+// CHECK:               %[[VAL_26:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_23]], %[[VAL_25]])  : (!fir.box<!fir.array<?x?xf16>>, index, index) -> !fir.ref<f16>
+// CHECK:               %[[VAL_27:.*]] = fir.load %[[VAL_26]] : !fir.ref<f16>
+// CHECK:               %[[VAL_28:.*]] = arith.cmpf olt, %[[VAL_27]], %[[VAL_19]] fastmath<reassoc> : f16
+// CHECK:               %[[VAL_29:.*]] = arith.cmpf une, %[[VAL_19]], %[[VAL_19]] fastmath<reassoc> : f16
+// CHECK:               %[[VAL_30:.*]] = arith.cmpf oeq, %[[VAL_27]], %[[VAL_27]] fastmath<reassoc> : f16
+// CHECK:               %[[VAL_31:.*]] = arith.andi %[[VAL_29]], %[[VAL_30]] : i1
+// CHECK:               %[[VAL_32:.*]] = arith.ori %[[VAL_28]], %[[VAL_31]] : i1
+// CHECK:               %[[VAL_33:.*]] = arith.select %[[VAL_32]], %[[VAL_27]], %[[VAL_19]] : f16
+// CHECK:               fir.result %[[VAL_33]] : f16
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_17]] : f16
+// CHECK:           }
+// CHECK:           return %[[VAL_14]] : f16
+// CHECK:         }
+
+// Test that 'nnan' allows using LARGEST value as the reduction init.
+func.func @test_partial_expr_nnan(%input: !hlfir.expr<?x?xf64>) -> !hlfir.expr<?xf64> {
+  %dim = arith.constant 1 : i32
+  %0 = hlfir.minval %input dim %dim {fastmath = #arith.fastmath<nnan>} : (!hlfir.expr<?x?xf64>, i32) -> !hlfir.expr<?xf64>
+  return %0 : !hlfir.expr<?xf64>
+}
+// CHECK-LABEL:   func.func @test_partial_expr_nnan(
+// CHECK-SAME:                                      %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x?xf64>) -> !hlfir.expr<?xf64> {
+// CHECK:           %[[VAL_1:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_2:.*]] = arith.constant 1.7976931348623157E+308 : f64
+// CHECK:           %[[VAL_3:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?x?xf64>) -> !fir.shape<2>
+// CHECK:           %[[VAL_4:.*]] = hlfir.get_extent %[[VAL_3]] {dim = 0 : index} : (!fir.shape<2>) -> index
+// CHECK:           %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_3]] {dim = 1 : index} : (!fir.shape<2>) -> index
+// CHECK:           %[[VAL_6:.*]] = fir.shape %[[VAL_5]] : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_7:.*]] = hlfir.elemental %[[VAL_6]] unordered : (!fir.shape<1>) -> !hlfir.expr<?xf64> {
+// CHECK:           ^bb0(%[[VAL_8:.*]]: index):
+// CHECK:             %[[VAL_9:.*]] = fir.do_loop %[[VAL_10:.*]] = %[[VAL_1]] to %[[VAL_4]] step %[[VAL_1]] iter_args(%[[VAL_11:.*]] = %[[VAL_2]]) -> (f64) {
+// CHECK:               %[[VAL_12:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_10]], %[[VAL_8]] : (!hlfir.expr<?x?xf64>, index, index) -> f64
+// CHECK:               %[[VAL_13:.*]] = arith.cmpf olt, %[[VAL_12]], %[[VAL_11]] fastmath<nnan> : f64
+// CHECK:               %[[VAL_14:.*]] = arith.cmpf une, %[[VAL_11]], %[[VAL_11]] fastmath<nnan> : f64
+// CHECK:               %[[VAL_15:.*]] = arith.cmpf oeq, %[[VAL_12]], %[[VAL_12]] fastmath<nnan> : f64
+// CHECK:               %[[VAL_16:.*]] = arith.andi %[[VAL_14]], %[[VAL_15]] : i1
+// CHECK:               %[[VAL_17:.*]] = arith.ori %[[VAL_13]], %[[VAL_16]] : i1
+// CHECK:               %[[VAL_18:.*]] = arith.select %[[VAL_17]], %[[VAL_12]], %[[VAL_11]] : f64
+// CHECK:               fir.result %[[VAL_18]] : f64
+// CHECK:             }
+// CHECK:             hlfir.yield_element %[[VAL_9]] : f64
+// CHECK:           }
+// CHECK:           return %[[VAL_7]] : !hlfir.expr<?xf64>
+// CHECK:         }