[flang-commits] [flang] 7242896 - [Flang] Attempt to fix Nan handling in Minloc/Maxloc intrinsic simplification (#82313)

Wed Feb 21 01:31:33 PST 2024

Author: David Green
Date: 2024-02-21T09:31:29Z
New Revision: 7242896233635e553694507e6584decb43ee4a16

URL: https://github.com/llvm/llvm-project/commit/7242896233635e553694507e6584decb43ee4a16
DIFF: https://github.com/llvm/llvm-project/commit/7242896233635e553694507e6584decb43ee4a16.diff

LOG: [Flang] Attempt to fix Nan handling in Minloc/Maxloc intrinsic simplification (#82313)

In certain case "extreme" values like Nan, Inf and 0xffffffff could lead
to generating different code via the inline-generated intrinsics vs the
versions in the runtimes (and other compilers like gfortran). There are
some examples I was using for testing in
https://godbolt.org/z/x4EfqEss5.

This changes the generation for the intrinsics to be more like the
runtimes, using a condition that is similar to:
  isFirst || (prev != prev && elem == elem) || elem < prev
The middle part is only used for floating point operations, and checks
if the values are Nan. This should then hopefully make the logic closer
to - return the first element with the lowest value, with Nans ignored
unless there are only Nans. The initial limit value for floats are also
changed from the largest float to Inf, to make sure it is handled
correctly.

The integer reductions are also changed to use a similar scheme to make
sure they work with masked values. This means that the preamble after
the loop can be removed.

Added: 
    

Modified: 
    flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
    flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp
    flang/test/HLFIR/maxloc-elemental.fir
    flang/test/HLFIR/minloc-elemental.fir
    flang/test/Transforms/simplifyintrinsics.fir

Removed: 
    


################################################################################
diff  --git a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
index c2512c7df32f46..685c73d6762570 100644

--- a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
@@ -852,9 +852,8 @@ class MinMaxlocElementalConversion : public mlir::OpRewritePattern<Op> {
                         mlir::Type elementType) {
       if (auto ty = elementType.dyn_cast<mlir::FloatType>()) {
         const llvm::fltSemantics &sem = ty.getFloatSemantics();
-        return builder.createRealConstant(
-            loc, elementType,
-            llvm::APFloat::getLargest(sem, /*Negative=*/isMax));
+        llvm::APFloat limit = llvm::APFloat::getInf(sem, /*Negative=*/isMax);
+        return builder.createRealConstant(loc, elementType, limit);
       }
       unsigned bits = elementType.getIntOrFloatBitWidth();
       int64_t limitInt =
@@ -895,7 +894,7 @@ class MinMaxlocElementalConversion : public mlir::OpRewritePattern<Op> {
       // Set flag that mask was true at some point
       mlir::Value flagSet = builder.createIntegerConstant(
           loc, mlir::cast<fir::ReferenceType>(flagRef.getType()).getEleTy(), 1);
-      builder.create<fir::StoreOp>(loc, flagSet, flagRef);
+      mlir::Value isFirst = builder.create<fir::LoadOp>(loc, flagRef);
       mlir::Value addr = hlfir::getElementAt(loc, builder, hlfir::Entity{array},
                                              oneBasedIndices);
       mlir::Value elem = builder.create<fir::LoadOp>(loc, addr);
@@ -903,11 +902,22 @@ class MinMaxlocElementalConversion : public mlir::OpRewritePattern<Op> {
       // Compare with the max reduction value
       mlir::Value cmp;
       if (elementType.isa<mlir::FloatType>()) {
+        // For FP reductions we want the first smallest value to be used, that
+        // is not NaN. A OGL/OLT condition will usually work for this unless all
+        // the values are Nan or Inf. This follows the same logic as
+        // NumericCompare for Minloc/Maxlox in extrema.cpp.
         cmp = builder.create<mlir::arith::CmpFOp>(
             loc,
             isMax ? mlir::arith::CmpFPredicate::OGT
                   : mlir::arith::CmpFPredicate::OLT,
             elem, reduction);
+
+        mlir::Value cmpNan = builder.create<mlir::arith::CmpFOp>(
+            loc, mlir::arith::CmpFPredicate::UNE, reduction, reduction);
+        mlir::Value cmpNan2 = builder.create<mlir::arith::CmpFOp>(
+            loc, mlir::arith::CmpFPredicate::OEQ, elem, elem);
+        cmpNan = builder.create<mlir::arith::AndIOp>(loc, cmpNan, cmpNan2);
+        cmp = builder.create<mlir::arith::OrIOp>(loc, cmp, cmpNan);
       } else if (elementType.isa<mlir::IntegerType>()) {
         cmp = builder.create<mlir::arith::CmpIOp>(
             loc,
@@ -918,11 +928,18 @@ class MinMaxlocElementalConversion : public mlir::OpRewritePattern<Op> {
         llvm_unreachable("unsupported type");
       }
 
+      // The condition used for the loop is isFirst || <the condition above>.
+      isFirst = builder.create<fir::ConvertOp>(loc, cmp.getType(), isFirst);
+      isFirst = builder.create<mlir::arith::XOrIOp>(
+          loc, isFirst, builder.createIntegerConstant(loc, cmp.getType(), 1));
+      cmp = builder.create<mlir::arith::OrIOp>(loc, cmp, isFirst);
+
       // Set the new coordinate to the result
       fir::IfOp ifOp = builder.create<fir::IfOp>(loc, elementType, cmp,
                                                  /*withElseRegion*/ true);
 
       builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
+      builder.create<fir::StoreOp>(loc, flagSet, flagRef);
       mlir::Type resultElemTy =
           hlfir::getFortranElementType(resultArr.getType());
       mlir::Type returnRefTy = builder.getRefType(resultElemTy);

diff  --git a/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp b/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp
index 86343e23c6e5db..f483651a68dc17 100644
--- a/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp
+++ b/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp
@@ -649,42 +649,6 @@ void fir::genMinMaxlocReductionLoop(
       reductionVal = ifOp.getResult(0);
     }
   }
-
-  // Check for case where array was full of max values.
-  // flag will be 0 if mask was never true, 1 if mask was true as some point,
-  // this is needed to avoid catching cases where we didn't access any elements
-  // e.g. mask=.FALSE.
-  mlir::Value flagValue =
-      builder.create<fir::LoadOp>(loc, resultElemType, flagRef);
-  mlir::Value flagCmp = builder.create<mlir::arith::CmpIOp>(
-      loc, mlir::arith::CmpIPredicate::eq, flagValue, flagSet);
-  fir::IfOp ifMaskTrueOp =
-      builder.create<fir::IfOp>(loc, flagCmp, /*withElseRegion=*/false);
-  builder.setInsertionPointToStart(&ifMaskTrueOp.getThenRegion().front());
-
-  mlir::Value testInit = initVal(builder, loc, elementType);
-  fir::IfOp ifMinSetOp;
-  if (elementType.isa<mlir::FloatType>()) {
-    mlir::Value cmp = builder.create<mlir::arith::CmpFOp>(
-        loc, mlir::arith::CmpFPredicate::OEQ, testInit, reductionVal);
-    ifMinSetOp = builder.create<fir::IfOp>(loc, cmp,
-                                           /*withElseRegion*/ false);
-  } else {
-    mlir::Value cmp = builder.create<mlir::arith::CmpIOp>(
-        loc, mlir::arith::CmpIPredicate::eq, testInit, reductionVal);
-    ifMinSetOp = builder.create<fir::IfOp>(loc, cmp,
-                                           /*withElseRegion*/ false);
-  }
-  builder.setInsertionPointToStart(&ifMinSetOp.getThenRegion().front());
-
-  // Load output array with 1s instead of 0s
-  for (unsigned int i = 0; i < rank; ++i) {
-    mlir::Value index = builder.createIntegerConstant(loc, idxTy, i);
-    mlir::Value resultElemAddr =
-        getAddrFn(builder, loc, resultElemType, resultArr, index);
-    builder.create<fir::StoreOp>(loc, flagSet, resultElemAddr);
-  }
-  builder.setInsertionPointAfter(ifMaskTrueOp);
 }
 
 static void genRuntimeMinMaxlocBody(fir::FirOpBuilder &builder,
@@ -697,8 +661,8 @@ static void genRuntimeMinMaxlocBody(fir::FirOpBuilder &builder,
                       mlir::Type elementType) {
     if (auto ty = elementType.dyn_cast<mlir::FloatType>()) {
       const llvm::fltSemantics &sem = ty.getFloatSemantics();
-      return builder.createRealConstant(
-          loc, elementType, llvm::APFloat::getLargest(sem, /*Negative=*/isMax));
+      llvm::APFloat limit = llvm::APFloat::getInf(sem, /*Negative=*/isMax);
+      return builder.createRealConstant(loc, elementType, limit);
     }
     unsigned bits = elementType.getIntOrFloatBitWidth();
     int64_t initValue = (isMax ? llvm::APInt::getSignedMinValue(bits)
@@ -770,7 +734,7 @@ static void genRuntimeMinMaxlocBody(fir::FirOpBuilder &builder,
     // Set flag that mask was true at some point
     mlir::Value flagSet = builder.createIntegerConstant(
         loc, mlir::cast<fir::ReferenceType>(flagRef.getType()).getEleTy(), 1);
-    builder.create<fir::StoreOp>(loc, flagSet, flagRef);
+    mlir::Value isFirst = builder.create<fir::LoadOp>(loc, flagRef);
     mlir::Type eleRefTy = builder.getRefType(elementType);
     mlir::Value addr =
         builder.create<fir::CoordinateOp>(loc, eleRefTy, array, indices);
@@ -778,11 +742,22 @@ static void genRuntimeMinMaxlocBody(fir::FirOpBuilder &builder,
 
     mlir::Value cmp;
     if (elementType.isa<mlir::FloatType>()) {
+      // For FP reductions we want the first smallest value to be used, that
+      // is not NaN. A OGL/OLT condition will usually work for this unless all
+      // the values are Nan or Inf. This follows the same logic as
+      // NumericCompare for Minloc/Maxlox in extrema.cpp.
       cmp = builder.create<mlir::arith::CmpFOp>(
           loc,
           isMax ? mlir::arith::CmpFPredicate::OGT
                 : mlir::arith::CmpFPredicate::OLT,
           elem, reduction);
+
+      mlir::Value cmpNan = builder.create<mlir::arith::CmpFOp>(
+          loc, mlir::arith::CmpFPredicate::UNE, reduction, reduction);
+      mlir::Value cmpNan2 = builder.create<mlir::arith::CmpFOp>(
+          loc, mlir::arith::CmpFPredicate::OEQ, elem, elem);
+      cmpNan = builder.create<mlir::arith::AndIOp>(loc, cmpNan, cmpNan2);
+      cmp = builder.create<mlir::arith::OrIOp>(loc, cmp, cmpNan);
     } else if (elementType.isa<mlir::IntegerType>()) {
       cmp = builder.create<mlir::arith::CmpIOp>(
           loc,
@@ -793,10 +768,16 @@ static void genRuntimeMinMaxlocBody(fir::FirOpBuilder &builder,
       llvm_unreachable("unsupported type");
     }
 
+    // The condition used for the loop is isFirst || <the condition above>.
+    isFirst = builder.create<fir::ConvertOp>(loc, cmp.getType(), isFirst);
+    isFirst = builder.create<mlir::arith::XOrIOp>(
+        loc, isFirst, builder.createIntegerConstant(loc, cmp.getType(), 1));
+    cmp = builder.create<mlir::arith::OrIOp>(loc, cmp, isFirst);
     fir::IfOp ifOp = builder.create<fir::IfOp>(loc, elementType, cmp,
                                                /*withElseRegion*/ true);
 
     builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
+    builder.create<fir::StoreOp>(loc, flagSet, flagRef);
     mlir::Type resultElemTy = hlfir::getFortranElementType(resultArr.getType());
     mlir::Type returnRefTy = builder.getRefType(resultElemTy);
     mlir::IndexType idxTy = builder.getIndexType();

diff  --git a/flang/test/HLFIR/maxloc-elemental.fir b/flang/test/HLFIR/maxloc-elemental.fir
index b4a3ca0d86068f..c97117dd10de13 100644
--- a/flang/test/HLFIR/maxloc-elemental.fir
+++ b/flang/test/HLFIR/maxloc-elemental.fir
@@ -23,6 +23,7 @@ func.func @_QPtest(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}
   return
 }
 // CHECK-LABEL: func.func @_QPtest(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}, %arg1: !fir.ref<i32> {fir.bindc_name = "val"}, %arg2: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "m"}) {
+// CHECK-NEXT:    %true = arith.constant true
 // CHECK-NEXT:    %c-2147483648_i32 = arith.constant -2147483648 : i32
 // CHECK-NEXT:    %c1_i32 = arith.constant 1 : i32
 // CHECK-NEXT:    %c0 = arith.constant 0 : index
@@ -45,14 +46,18 @@ func.func @_QPtest(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}
 // CHECK-NEXT:      %[[V16:.*]] = fir.load %[[V15]] : !fir.ref<i32>
 // CHECK-NEXT:      %[[V17:.*]] = arith.cmpi sge, %[[V16]], %[[V4]] : i32
 // CHECK-NEXT:      %[[V18:.*]] = fir.if %[[V17]] -> (i32) {
-// CHECK-NEXT:        fir.store %c1_i32 to %[[V0]] : !fir.ref<i32>
+// CHECK-NEXT:        %[[ISFIRST:.*]] = fir.load %[[V0]] : !fir.ref<i32>
 // CHECK-NEXT:        %[[DIMS:.*]]:3 = fir.box_dims %[[V1]]#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
 // CHECK-NEXT:        %[[SUB:.*]] = arith.subi %[[DIMS]]#0, %c1 : index
 // CHECK-NEXT:        %[[ADD:.*]] = arith.addi %[[V14]], %[[SUB]] : index
 // CHECK-NEXT:        %[[V19:.*]] = hlfir.designate %[[V1]]#0 (%[[ADD]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
 // CHECK-NEXT:        %[[V20:.*]] = fir.load %[[V19]] : !fir.ref<i32>
 // CHECK-NEXT:        %[[V21:.*]] = arith.cmpi sgt, %[[V20]], %arg4 : i32
-// CHECK-NEXT:        %[[V22:.*]] = fir.if %[[V21]] -> (i32) {
+// CHECK-NEXT:        %[[ISFIRSTL:.*]] = fir.convert %[[ISFIRST]] : (i32) -> i1
+// CHECK-NEXT:        %[[ISFIRSTNOT:.*]] = arith.xori %[[ISFIRSTL]], %true : i1
+// CHECK-NEXT:        %[[ORCOND:.*]] = arith.ori %[[V21]], %[[ISFIRSTNOT]] : i1
+// CHECK-NEXT:        %[[V22:.*]] = fir.if %[[ORCOND]] -> (i32) {
+// CHECK-NEXT:          fir.store %c1_i32 to %[[V0]] : !fir.ref<i32>
 // CHECK-NEXT:          %[[V23:.*]] = hlfir.designate %[[RES]] (%c1) : (!fir.ref<!fir.array<1xi32>>, index) -> !fir.ref<i32>
 // CHECK-NEXT:          %[[V24:.*]] = fir.convert %[[V14]] : (index) -> i32
 // CHECK-NEXT:          fir.store %[[V24]] to %[[V23]] : !fir.ref<i32>
@@ -66,15 +71,6 @@ func.func @_QPtest(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}
 // CHECK-NEXT:      }
 // CHECK-NEXT:      fir.result %[[V18]] : i32
 // CHECK-NEXT:    }
-// CHECK-NEXT:    %[[V12:.*]] = fir.load %[[V0]] : !fir.ref<i32>
-// CHECK-NEXT:    %[[V13:.*]] = arith.cmpi eq, %[[V12]], %c1_i32 : i32
-// CHECK-NEXT:    fir.if %[[V13]] {
-// CHECK-NEXT:      %[[V14:.*]] = arith.cmpi eq, %[[V11]], %c-2147483648_i32 : i32
-// CHECK-NEXT:      fir.if %[[V14]] {
-// CHECK-NEXT:        %[[V15:.*]] = hlfir.designate %[[RES]] (%c1) : (!fir.ref<!fir.array<1xi32>>, index) -> !fir.ref<i32>
-// CHECK-NEXT:        fir.store %c1_i32 to %[[V15]] : !fir.ref<i32>
-// CHECK-NEXT:      }
-// CHECK-NEXT:    }
 // CHECK-NEXT:    %[[BD:.*]]:3 = fir.box_dims %[[V2]]#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
 // CHECK-NEXT:    fir.do_loop %arg3 = %c1 to %[[BD]]#1 step %c1 unordered {
 // CHECK-NEXT:      %[[V13:.*]] = hlfir.designate %[[RES]] (%arg3)  : (!fir.ref<!fir.array<1xi32>>, index) -> !fir.ref<i32>
@@ -110,21 +106,29 @@ func.func @_QPtest_float(%arg0: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "a
   return
 }
 // CHECK-LABEL: _QPtest_float
-// CHECK:        %cst = arith.constant -3.40282347E+38 : f32
+// CHECK:        %cst = arith.constant 0xFF800000 : f32
 // CHECK:        %[[V11:.*]] = fir.do_loop %arg3 = %c0 to %[[V10:.*]] step %c1 iter_args(%arg4 = %cst) -> (f32) {
 // CHECK-NEXT:     %[[V14:.*]] = arith.addi %arg3, %c1 : index
 // CHECK-NEXT:     %[[V15:.*]] = hlfir.designate %[[V1:.*]]#0 (%[[V14]])  : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
 // CHECK-NEXT:     %[[V16:.*]] = fir.load %[[V15]] : !fir.ref<f32>
 // CHECK-NEXT:     %[[V17:.*]] = arith.cmpf oge, %[[V16]], %[[V4:.*]] : f32
 // CHECK-NEXT:     %[[V18:.*]] = fir.if %[[V17]] -> (f32) {
-// CHECK-NEXT:       fir.store %c1_i32 to %[[V0:.*]] : !fir.ref<i32>
+// CHECK-NEXT:       %[[ISFIRST:.*]] = fir.load %[[V0:.*]] : !fir.ref<i32>
 // CHECK-NEXT:       %[[DIMS:.*]]:3 = fir.box_dims %2#0, %c0 : (!fir.box<!fir.array<?xf32>>, index) -> (index, index, index)
 // CHECK-NEXT:       %[[SUB:.*]] = arith.subi %[[DIMS]]#0, %c1 : index
 // CHECK-NEXT:       %[[ADD:.*]] = arith.addi %[[V14]], %[[SUB]] : index
 // CHECK-NEXT:       %[[V19:.*]] = hlfir.designate %[[V1]]#0 (%[[ADD]]) : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
 // CHECK-NEXT:       %[[V20:.*]] = fir.load %[[V19]] : !fir.ref<f32>
-// CHECK-NEXT:       %[[V21:.*]] = arith.cmpf ogt, %[[V20]], %arg4 fastmath<contract> : f32
-// CHECK-NEXT:       %[[V22:.*]] = fir.if %[[V21]] -> (f32) {
+// CHECK-NEXT:       %[[NEW_MIN:.*]] = arith.cmpf ogt, %[[V20]], %arg4 fastmath<contract> : f32
+// CHECK-NEXT:       %[[CONDRED:.*]] = arith.cmpf une, %arg4, %arg4 fastmath<contract> : f32
+// CHECK-NEXT:       %[[CONDELEM:.*]] = arith.cmpf oeq, %[[V20]], %[[V20]] fastmath<contract> : f32
+// CHECK-NEXT:       %[[ANDCOND:.*]] = arith.andi %[[CONDRED]], %[[CONDELEM]] : i1
+// CHECK-NEXT:       %[[NEW_MIN2:.*]] = arith.ori %[[NEW_MIN]], %[[ANDCOND]] : i1
+// CHECK-NEXT:       %[[ISFIRSTL:.*]] = fir.convert %[[ISFIRST]] : (i32) -> i1
+// CHECK-NEXT:       %[[ISFIRSTNOT:.*]] = arith.xori %[[ISFIRSTL]], %true : i1
+// CHECK-NEXT:       %[[ORCOND:.*]] = arith.ori %[[NEW_MIN2]], %[[ISFIRSTNOT]] : i1
+// CHECK-NEXT:       %[[V22:.*]] = fir.if %[[ORCOND]] -> (f32) {
+// CHECK-NEXT:         fir.store %c1_i32 to %[[V0]] : !fir.ref<i32>
 // CHECK-NEXT:         %[[V23:.*]] = hlfir.designate %{{.}} (%c1) : (!fir.ref<!fir.array<1xi32>>, index) -> !fir.ref<i32>
 // CHECK-NEXT:         %[[V24:.*]] = fir.convert %[[V14]] : (index) -> i32
 // CHECK-NEXT:         fir.store %[[V24]] to %[[V23]] : !fir.ref<i32>

diff  --git a/flang/test/HLFIR/minloc-elemental.fir b/flang/test/HLFIR/minloc-elemental.fir
index 5cc608b65be8bc..58cfe3ea012793 100644
--- a/flang/test/HLFIR/minloc-elemental.fir
+++ b/flang/test/HLFIR/minloc-elemental.fir
@@ -23,6 +23,7 @@ func.func @_QPtest(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}
   return
 }
 // CHECK-LABEL: func.func @_QPtest(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}, %arg1: !fir.ref<i32> {fir.bindc_name = "val"}, %arg2: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "m"}) {
+// CHECK-NEXT:    %true = arith.constant true
 // CHECK-NEXT:    %c2147483647_i32 = arith.constant 2147483647 : i32
 // CHECK-NEXT:    %c1_i32 = arith.constant 1 : i32
 // CHECK-NEXT:    %c0 = arith.constant 0 : index
@@ -45,14 +46,18 @@ func.func @_QPtest(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}
 // CHECK-NEXT:      %[[V16:.*]] = fir.load %[[V15]] : !fir.ref<i32>
 // CHECK-NEXT:      %[[V17:.*]] = arith.cmpi sge, %[[V16]], %[[V4]] : i32
 // CHECK-NEXT:      %[[V18:.*]] = fir.if %[[V17]] -> (i32) {
-// CHECK-NEXT:        fir.store %c1_i32 to %[[V0]] : !fir.ref<i32>
+// CHECK-NEXT:        %[[ISFIRST:.*]] = fir.load %[[V0]] : !fir.ref<i32>
 // CHECK-NEXT:        %[[DIMS:.*]]:3 = fir.box_dims %[[V1]]#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
 // CHECK-NEXT:        %[[SUB:.*]] = arith.subi %[[DIMS]]#0, %c1 : index
 // CHECK-NEXT:        %[[ADD:.*]] = arith.addi %[[V14]], %[[SUB]] : index
 // CHECK-NEXT:        %[[V19:.*]] = hlfir.designate %[[V1]]#0 (%[[ADD]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
 // CHECK-NEXT:        %[[V20:.*]] = fir.load %[[V19]] : !fir.ref<i32>
 // CHECK-NEXT:        %[[V21:.*]] = arith.cmpi slt, %[[V20]], %arg4 : i32
-// CHECK-NEXT:        %[[V22:.*]] = fir.if %[[V21]] -> (i32) {
+// CHECK-NEXT:        %[[ISFIRSTL:.*]] = fir.convert %[[ISFIRST]] : (i32) -> i1
+// CHECK-NEXT:        %[[ISFIRSTNOT:.*]] = arith.xori %[[ISFIRSTL]], %true : i1
+// CHECK-NEXT:        %[[ORCOND:.*]] = arith.ori %[[V21]], %[[ISFIRSTNOT]] : i1
+// CHECK-NEXT:        %[[V22:.*]] = fir.if %[[ORCOND]] -> (i32) {
+// CHECK-NEXT:          fir.store %c1_i32 to %[[V0]] : !fir.ref<i32>
 // CHECK-NEXT:          %[[V23:.*]] = hlfir.designate %[[RES]] (%c1) : (!fir.ref<!fir.array<1xi32>>, index) -> !fir.ref<i32>
 // CHECK-NEXT:          %[[V24:.*]] = fir.convert %[[V14]] : (index) -> i32
 // CHECK-NEXT:          fir.store %[[V24]] to %[[V23]] : !fir.ref<i32>
@@ -66,15 +71,6 @@ func.func @_QPtest(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}
 // CHECK-NEXT:      }
 // CHECK-NEXT:      fir.result %[[V18]] : i32
 // CHECK-NEXT:    }
-// CHECK-NEXT:    %[[V12:.*]] = fir.load %[[V0]] : !fir.ref<i32>
-// CHECK-NEXT:    %[[V13:.*]] = arith.cmpi eq, %[[V12]], %c1_i32 : i32
-// CHECK-NEXT:    fir.if %[[V13]] {
-// CHECK-NEXT:      %[[V14:.*]] = arith.cmpi eq, %[[V11]], %c2147483647_i32 : i32
-// CHECK-NEXT:      fir.if %[[V14]] {
-// CHECK-NEXT:        %[[V15:.*]] = hlfir.designate %[[RES]] (%c1) : (!fir.ref<!fir.array<1xi32>>, index) -> !fir.ref<i32>
-// CHECK-NEXT:        fir.store %c1_i32 to %[[V15]] : !fir.ref<i32>
-// CHECK-NEXT:      }
-// CHECK-NEXT:    }
 // CHECK-NEXT:    %[[BD:.*]]:3 = fir.box_dims %[[V2]]#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
 // CHECK-NEXT:    fir.do_loop %arg3 = %c1 to %[[BD]]#1 step %c1 unordered {
 // CHECK-NEXT:      %[[V13:.*]] = hlfir.designate %[[RES]] (%arg3)  : (!fir.ref<!fir.array<1xi32>>, index) -> !fir.ref<i32>
@@ -109,6 +105,7 @@ func.func @_QPtest_kind2(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a
   return
 }
 // CHECK-LABEL:  func.func @_QPtest_kind2(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}, %arg1: !fir.ref<i32> {fir.bindc_name = "val"}, %arg2: !fir.box<!fir.array<?xi16>> {fir.bindc_name = "m"}) {
+// CHECK-NEXT:    %true = arith.constant true
 // CHECK-NEXT:    %c2147483647_i32 = arith.constant 2147483647 : i32
 // CHECK-NEXT:    %c1_i16 = arith.constant 1 : i16
 // CHECK-NEXT:    %c0 = arith.constant 0 : index
@@ -131,14 +128,18 @@ func.func @_QPtest_kind2(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a
 // CHECK-NEXT:      %[[V16:.*]] = fir.load %[[V15]] : !fir.ref<i32>
 // CHECK-NEXT:      %[[V17:.*]] = arith.cmpi sge, %[[V16]], %[[V4]] : i32
 // CHECK-NEXT:      %[[V18:.*]] = fir.if %[[V17]] -> (i32) {
-// CHECK-NEXT:        fir.store %c1_i16 to %[[V0]] : !fir.ref<i16>
+// CHECK-NEXT:        %[[ISFIRST:.*]] = fir.load %[[V0]] : !fir.ref<i16>
 // CHECK-NEXT:        %[[DIMS:.*]]:3 = fir.box_dims %[[V1]]#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
 // CHECK-NEXT:        %[[SUB:.*]] = arith.subi %[[DIMS]]#0, %c1 : index
 // CHECK-NEXT:        %[[ADD:.*]] = arith.addi %[[V14]], %[[SUB]] : index
 // CHECK-NEXT:        %[[V19:.*]] = hlfir.designate %[[V1]]#0 (%[[ADD]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
 // CHECK-NEXT:        %[[V20:.*]] = fir.load %[[V19]] : !fir.ref<i32>
 // CHECK-NEXT:        %[[V21:.*]] = arith.cmpi slt, %[[V20]], %arg4 : i32
-// CHECK-NEXT:        %[[V22:.*]] = fir.if %[[V21]] -> (i32) {
+// CHECK-NEXT:        %[[ISFIRSTL:.*]] = fir.convert %[[ISFIRST]] : (i16) -> i1
+// CHECK-NEXT:        %[[ISFIRSTNOT:.*]] = arith.xori %[[ISFIRSTL]], %true : i1
+// CHECK-NEXT:        %[[ORCOND:.*]] = arith.ori %[[V21]], %[[ISFIRSTNOT]] : i1
+// CHECK-NEXT:        %[[V22:.*]] = fir.if %[[ORCOND]] -> (i32) {
+// CHECK-NEXT:          fir.store %c1_i16 to %[[V0]] : !fir.ref<i16>
 // CHECK-NEXT:          %[[V23:.*]] = hlfir.designate %[[RES]] (%c1) : (!fir.ref<!fir.array<1xi16>>, index) -> !fir.ref<i16>
 // CHECK-NEXT:          %[[V24:.*]] = fir.convert %[[V14]] : (index) -> i16
 // CHECK-NEXT:          fir.store %[[V24]] to %[[V23]] : !fir.ref<i16>
@@ -152,15 +153,6 @@ func.func @_QPtest_kind2(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a
 // CHECK-NEXT:      }
 // CHECK-NEXT:      fir.result %[[V18]] : i32
 // CHECK-NEXT:    }
-// CHECK-NEXT:    %[[V12:.*]] = fir.load %[[V0]] : !fir.ref<i16>
-// CHECK-NEXT:    %[[V13:.*]] = arith.cmpi eq, %[[V12]], %c1_i16 : i16
-// CHECK-NEXT:    fir.if %[[V13]] {
-// CHECK-NEXT:      %[[V14:.*]] = arith.cmpi eq, %[[V11]], %c2147483647_i32 : i32
-// CHECK-NEXT:      fir.if %[[V14]] {
-// CHECK-NEXT:        %[[V15:.*]] = hlfir.designate %[[RES]] (%c1) : (!fir.ref<!fir.array<1xi16>>, index) -> !fir.ref<i16>
-// CHECK-NEXT:        fir.store %c1_i16 to %[[V15]] : !fir.ref<i16>
-// CHECK-NEXT:      }
-// CHECK-NEXT:    }
 // CHECK-NEXT:    %[[BD:.*]]:3 = fir.box_dims %[[V2]]#0, %c0 : (!fir.box<!fir.array<?xi16>>, index) -> (index, index, index)
 // CHECK-NEXT:    fir.do_loop %arg3 = %c1 to %[[BD]]#1 step %c1 unordered {
 // CHECK-NEXT:      %[[V13:.*]] = hlfir.designate %[[RES]] (%arg3)  : (!fir.ref<!fir.array<1xi16>>, index) -> !fir.ref<i16>
@@ -206,6 +198,7 @@ func.func @_QPtest_kind2_convert(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_n
 // CHECK-LABEL: _QPtest_kind2_convert
 // CHECK-SAME:     (%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}, %arg1: !fir.ref<i32> {fir.bindc_name = "val"}, %arg2: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "m"}) {
 // CHECK-NEXT:   %false = arith.constant false
+// CHECK-NEXT:   %true = arith.constant true
 // CHECK-NEXT:   %c2147483647_i32 = arith.constant 2147483647 : i32
 // CHECK-NEXT:   %c1_i16 = arith.constant 1 : i16
 // CHECK-NEXT:   %c0 = arith.constant 0 : index
@@ -228,14 +221,18 @@ func.func @_QPtest_kind2_convert(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_n
 // CHECK-NEXT:     %[[V17:.*]] = fir.load %[[V16]] : !fir.ref<i32>
 // CHECK-NEXT:     %[[V18:.*]] = arith.cmpi sge, %[[V17]], %[[V5]] : i32
 // CHECK-NEXT:     %[[V19:.*]] = fir.if %[[V18]] -> (i32) {
-// CHECK-NEXT:       fir.store %c1_i16 to %[[V0]] : !fir.ref<i16>
+// CHECK-NEXT:       %[[ISFIRST:.*]] = fir.load %[[V0]] : !fir.ref<i16>
 // CHECK-NEXT:       %[[V20:.*]]:3 = fir.box_dims %[[V2]]#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
 // CHECK-NEXT:       %[[V21:.*]] = arith.subi %[[V20]]#0, %c1 : index
 // CHECK-NEXT:       %[[V22:.*]] = arith.addi %[[V15]], %[[V21]] : index
 // CHECK-NEXT:       %[[V23:.*]] = hlfir.designate %[[V2]]#0 (%[[V22]])  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
 // CHECK-NEXT:       %[[V24:.*]] = fir.load %[[V23]] : !fir.ref<i32>
 // CHECK-NEXT:       %[[V25:.*]] = arith.cmpi slt, %[[V24]], %arg4 : i32
-// CHECK-NEXT:       %[[V26:.*]] = fir.if %[[V25]] -> (i32) {
+// CHECK-NEXT:       %[[ISFIRSTL:.*]] = fir.convert %[[ISFIRST]] : (i16) -> i1
+// CHECK-NEXT:       %[[ISFIRSTNOT:.*]] = arith.xori %[[ISFIRSTL]], %true : i1
+// CHECK-NEXT:       %[[ORCOND:.*]] = arith.ori %[[V25]], %[[ISFIRSTNOT]] : i1
+// CHECK-NEXT:       %[[V26:.*]] = fir.if %[[ORCOND]] -> (i32) {
+// CHECK-NEXT:         fir.store %c1_i16 to %[[V0]] : !fir.ref<i16>
 // CHECK-NEXT:         %[[V27:.*]] = hlfir.designate %[[V1]] (%c1)  : (!fir.ref<!fir.array<1xi16>>, index) -> !fir.ref<i16>
 // CHECK-NEXT:         %[[V28:.*]] = fir.convert %[[V15]] : (index) -> i16
 // CHECK-NEXT:         fir.store %[[V28]] to %[[V27]] : !fir.ref<i16>
@@ -249,15 +246,6 @@ func.func @_QPtest_kind2_convert(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_n
 // CHECK-NEXT:     }
 // CHECK-NEXT:     fir.result %[[V19]] : i32
 // CHECK-NEXT:   }
-// CHECK-NEXT:   %[[V10:.*]] = fir.load %[[V0]] : !fir.ref<i16>
-// CHECK-NEXT:   %[[V11:.*]] = arith.cmpi eq, %[[V10]], %c1_i16 : i16
-// CHECK-NEXT:   fir.if %[[V11]] {
-// CHECK-NEXT:     %[[V15]] = arith.cmpi eq, %[[V9]], %c2147483647_i32 : i32
-// CHECK-NEXT:     fir.if %[[V15]] {
-// CHECK-NEXT:       %[[V16]] = hlfir.designate %[[V1]] (%c1)  : (!fir.ref<!fir.array<1xi16>>, index) -> !fir.ref<i16>
-// CHECK-NEXT:       fir.store %c1_i16 to %[[V16]] : !fir.ref<i16>
-// CHECK-NEXT:     }
-// CHECK-NEXT:   }
 // CHECK-NEXT:   %[[V12:.*]] = hlfir.as_expr %[[V1]] move %false : (!fir.ref<!fir.array<1xi16>>, i1) -> !hlfir.expr<1xi16>
 // CHECK-NEXT:   %[[V13:.*]] = fir.shape %c1 : (index) -> !fir.shape<1>
 // CHECK-NEXT:   %[[V14:.*]] = hlfir.elemental %[[V13]] unordered : (!fir.shape<1>) -> !hlfir.expr<?xi32> {
@@ -295,21 +283,29 @@ func.func @_QPtest_float(%arg0: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "a
   return
 }
 // CHECK-LABEL: _QPtest_float
-// CHECK:        %cst = arith.constant 3.40282347E+38 : f32
+// CHECK:        %cst = arith.constant 0x7F800000 : f32
 // CHECK:        %[[V11:.*]] = fir.do_loop %arg3 = %c0 to %[[V10:.*]] step %c1 iter_args(%arg4 = %cst) -> (f32) {
 // CHECK-NEXT:     %[[V14:.*]] = arith.addi %arg3, %c1 : index
 // CHECK-NEXT:     %[[V15:.*]] = hlfir.designate %[[V1:.*]]#0 (%[[V14]])  : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
 // CHECK-NEXT:     %[[V16:.*]] = fir.load %[[V15]] : !fir.ref<f32>
 // CHECK-NEXT:     %[[V17:.*]] = arith.cmpf oge, %[[V16]], %[[V4:.*]] : f32
 // CHECK-NEXT:     %[[V18:.*]] = fir.if %[[V17]] -> (f32) {
-// CHECK-NEXT:       fir.store %c1_i32 to %[[V0:.*]] : !fir.ref<i32>
+// CHECK-NEXT:       %[[ISFIRST:.*]] = fir.load %[[V0:.*]] : !fir.ref<i32>
 // CHECK-NEXT:       %[[DIMS:.*]]:3 = fir.box_dims %2#0, %c0 : (!fir.box<!fir.array<?xf32>>, index) -> (index, index, index)
 // CHECK-NEXT:       %[[SUB:.*]] = arith.subi %[[DIMS]]#0, %c1 : index
 // CHECK-NEXT:       %[[ADD:.*]] = arith.addi %[[V14]], %[[SUB]] : index
 // CHECK-NEXT:       %[[V19:.*]] = hlfir.designate %[[V1]]#0 (%[[ADD]]) : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
 // CHECK-NEXT:       %[[V20:.*]] = fir.load %[[V19]] : !fir.ref<f32>
-// CHECK-NEXT:       %[[V21:.*]] = arith.cmpf olt, %[[V20]], %arg4 fastmath<contract> : f32
-// CHECK-NEXT:       %[[V22:.*]] = fir.if %[[V21]] -> (f32) {
+// CHECK-NEXT:       %[[NEW_MIN:.*]] = arith.cmpf olt, %[[V20]], %arg4 fastmath<contract> : f32
+// CHECK-NEXT:       %[[CONDRED:.*]] = arith.cmpf une, %arg4, %arg4 fastmath<contract> : f32
+// CHECK-NEXT:       %[[CONDELEM:.*]] = arith.cmpf oeq, %[[V20]], %[[V20]] fastmath<contract> : f32
+// CHECK-NEXT:       %[[ANDCOND:.*]] = arith.andi %[[CONDRED]], %[[CONDELEM]] : i1
+// CHECK-NEXT:       %[[NEW_MIN2:.*]] = arith.ori %[[NEW_MIN]], %[[ANDCOND]] : i1
+// CHECK-NEXT:       %[[ISFIRSTL:.*]] = fir.convert %[[ISFIRST]] : (i32) -> i1
+// CHECK-NEXT:       %[[ISFIRSTNOT:.*]] = arith.xori %[[ISFIRSTL]], %true : i1
+// CHECK-NEXT:       %[[ORCOND:.*]] = arith.ori %[[NEW_MIN2]], %[[ISFIRSTNOT]] : i1
+// CHECK-NEXT:       %[[V22:.*]] = fir.if %[[ORCOND]] -> (f32) {
+// CHECK-NEXT:         fir.store %c1_i32 to %[[V0]] : !fir.ref<i32>
 // CHECK-NEXT:         %[[V23:.*]] = hlfir.designate %{{.}} (%c1) : (!fir.ref<!fir.array<1xi32>>, index) -> !fir.ref<i32>
 // CHECK-NEXT:         %[[V24:.*]] = fir.convert %[[V14]] : (index) -> i32
 // CHECK-NEXT:         fir.store %[[V24]] to %[[V23]] : !fir.ref<i32>

diff  --git a/flang/test/Transforms/simplifyintrinsics.fir b/flang/test/Transforms/simplifyintrinsics.fir
index cd059cc797a3f4..ce9f2dbd3e0fbb 100644
--- a/flang/test/Transforms/simplifyintrinsics.fir
+++ b/flang/test/Transforms/simplifyintrinsics.fir
@@ -1780,11 +1780,15 @@ func.func @_QPtestminloc_works1d(%arg0: !fir.ref<!fir.array<10xi32>> {fir.bindc_
 // CHECK:             %[[MASK_IF_ITEM:.*]] = fir.convert %[[MASK_ITEMVAL]] : (!fir.logical<4>) -> i1
 // CHECK:             %[[IF_MASK:.*]] = fir.if %[[MASK_IF_ITEM]] -> (i32) {
 // CHECK:               %[[FLAG_SET2:.*]] = arith.constant 1 : i32
-// CHECK:               fir.store %[[FLAG_SET2]] to %[[FLAG_ALLOC]] : !fir.ref<i32>
+// CHECK:               %[[ISFIRST:.*]] = fir.load %[[FLAG_ALLOC]] : !fir.ref<i32>
 // CHECK:               %[[INARR_ITEM:.*]] = fir.coordinate_of %[[BOX_INARR]], %[[ITER]] : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
 // CHECK:               %[[INARR_ITEMVAL:.*]] = fir.load %[[INARR_ITEM]] : !fir.ref<i32>
 // CHECK:               %[[NEW_MIN:.*]] = arith.cmpi slt, %[[INARR_ITEMVAL]], %[[MIN]] : i32
-// CHECK:               %[[IF_NEW_MIN:.*]] = fir.if %[[NEW_MIN]] -> (i32) {
+// CHECK:               %[[ISFIRSTL:.*]] = fir.convert %[[ISFIRST]] : (i32) -> i1
+// CHECK:               %[[ISFIRSTNOT:.*]] = arith.xori %[[ISFIRSTL]], %true : i1
+// CHECK:               %[[ORCOND:.*]] = arith.ori %[[NEW_MIN]], %[[ISFIRSTNOT]] : i1
+// CHECK:               %[[IF_NEW_MIN:.*]] = fir.if %[[ORCOND]] -> (i32) {
+// CHECK:                 fir.store %[[FLAG_SET2]] to %[[FLAG_ALLOC]] : !fir.ref<i32>
 // CHECK:                 %[[ONE:.*]] = arith.constant 1 : i32
 // CHECK:                 %[[OUTARR_IDX:.*]] = arith.constant 0 : index
 // CHECK:                 %[[OUTARR_ITEM:.*]] = fir.coordinate_of %[[BOX_OUTARR]], %[[OUTARR_IDX]] : (!fir.box<!fir.heap<!fir.array<1xi32>>>, index) -> !fir.ref<i32>
@@ -1801,17 +1805,6 @@ func.func @_QPtestminloc_works1d(%arg0: !fir.ref<!fir.array<10xi32>> {fir.bindc_
 // CHECK:             }
 // CHECK:             fir.result %[[IF_MASK:.*]] : i32
 // CHECK:           }
-// CHECK:           %[[FLAG_VAL:.*]] = fir.load %[[FLAG_ALLOC]] : !fir.ref<i32>
-// CHECK:           %[[FLAG_WAS_SET:.*]] = arith.cmpi eq, %[[FLAG_VAL]], %[[FLAG_SET]] : i32
-// CHECK:           fir.if %[[FLAG_WAS_SET]] {
-// CHECK:             %[[TEST_MAX:.*]] = arith.constant 2147483647 : i32
-// CHECK:             %[[INIT_NOT_CHANGED:.*]] = arith.cmpi eq, %[[TEST_MAX]], %[[DO_LOOP:.*]] : i32
-// CHECK:             fir.if %[[INIT_NOT_CHANGED]] {
-// CHECK:               %[[FLAG_OUTARR_IDX:.*]] = arith.constant 0 : index
-// CHECK:               %[[FLAG_OUTARR_ITEM:.*]] = fir.coordinate_of %[[BOX_OUTARR]], %[[FLAG_OUTARR_IDX]] : (!fir.box<!fir.heap<!fir.array<1xi32>>>, index) -> !fir.ref<i32>
-// CHECK:               fir.store %[[FLAG_SET]] to %[[FLAG_OUTARR_ITEM]] : !fir.ref<i32>
-// CHECK:             }
-// CHECK:           }
 // CHECK:           %[[REF_BOX_OUTARR:.*]] = fir.convert %[[REF_BOX_OUTARR_NONE]] : (!fir.ref<!fir.box<none>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<1xi32>>>>
 // CHECK:           fir.store %[[BOX_OUTARR]] to %[[REF_BOX_OUTARR]] : !fir.ref<!fir.box<!fir.heap<!fir.array<1xi32>>>>
 // CHECK:           return
@@ -1903,10 +1896,16 @@ func.func @_QPtestminloc_works2d_nomask(%arg0: !fir.ref<!fir.array<10x10xi32>> {
 // CHECK:           %[[EXTENT1:.*]] = arith.subi %[[DIMS1]]#1, %[[C_INDEX1]] : index
 // CHECK:           %[[DOLOOP0:.*]] = fir.do_loop %[[ITER0:.*]] = %[[C_INDEX0]] to %[[EXTENT1]] step %[[C_INDEX1]] iter_args(%[[MIN0:.*]] = %[[MAX]]) -> (i32) {
 // CHECK:             %[[DOLOOP1:.*]] = fir.do_loop %[[ITER1:.*]] = %[[C_INDEX0]] to %[[EXTENT0]] step %[[C_INDEX1]] iter_args(%[[MIN1:.*]] = %[[MIN0]]) -> (i32) {
+// CHECK:               %[[FLAG_SET2:.*]] = arith.constant 1 : i64
+// CHECK:               %[[ISFIRST:.*]] = fir.load %[[FLAG_ALLOC]] : !fir.ref<i64>
 // CHECK:               %[[INARR_ITEM:.*]] = fir.coordinate_of %[[BOX_INARR]], %[[ITER1]], %[[ITER0]] : (!fir.box<!fir.array<?x?xi32>>, index, index) -> !fir.ref<i32>
 // CHECK:               %[[INARR_ITEMVAL:.*]] = fir.load %[[INARR_ITEM]] : !fir.ref<i32>
 // CHECK:               %[[NEW_MIN:.*]] = arith.cmpi slt, %[[INARR_ITEMVAL]], %[[MIN1]] : i32
-// CHECK:               %[[IF_NEW_MIN:.*]] = fir.if %[[NEW_MIN]] -> (i32) {
+// CHECK:               %[[ISFIRSTL:.*]] = fir.convert %[[ISFIRST]] : (i64) -> i1
+// CHECK:               %[[ISFIRSTNOT:.*]] = arith.xori %[[ISFIRSTL]], %true : i1
+// CHECK:               %[[ORCOND:.*]] = arith.ori %[[NEW_MIN]], %[[ISFIRSTNOT]] : i1
+// CHECK:               %[[IF_NEW_MIN:.*]] = fir.if %[[ORCOND]] -> (i32) {
+// CHECK:                 fir.store %[[FLAG_SET2]] to %[[FLAG_ALLOC]] : !fir.ref<i64>
 // CHECK:                 %[[ONE:.*]] = arith.constant 1 : i64
 // CHECK:                 %[[OUTARR_IDX0:.*]] = arith.constant 0 : index
 // CHECK:                 %[[OUTARR_ITEM0:.*]] = fir.coordinate_of %[[BOX_OUTARR]], %[[OUTARR_IDX0]] : (!fir.box<!fir.heap<!fir.array<2xi64>>>, index) -> !fir.ref<i64>
@@ -1926,20 +1925,6 @@ func.func @_QPtestminloc_works2d_nomask(%arg0: !fir.ref<!fir.array<10x10xi32>> {
 // CHECK:             }
 // CHECK:             fir.result %[[DOLOOP1:.*]] : i32
 // CHECK:           }
-// CHECK:           %[[FLAG_VAL:.*]] = fir.load %[[FLAG_ALLOC]] : !fir.ref<i64>
-// CHECK:           %[[FLAG_WAS_SET:.*]] = arith.cmpi eq, %[[FLAG_VAL]], %[[FLAG_SET]] : i64
-// CHECK:           fir.if %[[FLAG_WAS_SET]] {
-// CHECK:             %[[TEST_MAX:.*]] = arith.constant 2147483647 : i32
-// CHECK:             %[[INIT_NOT_CHANGED:.*]] = arith.cmpi eq, %[[TEST_MAX]], %[[DO_LOOP:.*]] : i32
-// CHECK:             fir.if %[[INIT_NOT_CHANGED]] {
-// CHECK:               %[[FLAG_OUTARR_IDX0:.*]] = arith.constant 0 : index
-// CHECK:               %[[FLAG_OUTARR_ITEM0:.*]] = fir.coordinate_of %[[BOX_OUTARR]], %[[FLAG_OUTARR_IDX0]] : (!fir.box<!fir.heap<!fir.array<2xi64>>>, index) -> !fir.ref<i64>
-// CHECK:               fir.store %[[FLAG_SET]] to %[[FLAG_OUTARR_ITEM0]] : !fir.ref<i64>
-// CHECK:               %[[FLAG_OUTARR_IDX1:.*]] = arith.constant 1 : index
-// CHECK:               %[[FLAG_OUTARR_ITEM1:.*]] = fir.coordinate_of %[[BOX_OUTARR]], %[[FLAG_OUTARR_IDX1]] : (!fir.box<!fir.heap<!fir.array<2xi64>>>, index) -> !fir.ref<i64>
-// CHECK:               fir.store %[[FLAG_SET]] to %[[FLAG_OUTARR_ITEM1]] : !fir.ref<i64
-// CHECK:             }
-// CHECK:           }
 // CHECK:           %[[REF_BOX_OUTARR:.*]] = fir.convert %[[REF_BOX_OUTARR_NONE]] : (!fir.ref<!fir.box<none>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<2xi64>>>>
 // CHECK:           fir.store %[[BOX_OUTARR]] to %[[REF_BOX_OUTARR]] : !fir.ref<!fir.box<!fir.heap<!fir.array<2xi64>>>>
 // CHECK:           return
@@ -2021,16 +2006,25 @@ func.func @_QPtestminloc_works1d_scalarmask_f64(%arg0: !fir.ref<!fir.array<10xf6
 // CHECK:             %[[FLAG_SET:.*]] = arith.constant 1 : i32
 // CHECK:             %[[FLAG_EMPTY:.*]] = arith.constant 0 : i32
 // CHECK:             fir.store %[[FLAG_EMPTY]] to %[[FLAG_ALLOC]] : !fir.ref<i32>
-// CHECK:             %[[MAX:.*]] = arith.constant 1.7976931348623157E+308 : f64
+// CHECK:             %[[MAX:.*]] = arith.constant 0x7FF0000000000000 : f64
 // CHECK:             %[[C_INDEX1:.*]] = arith.constant 1 : index
 // CHECK:             %[[DIM_INDEX:.*]] = arith.constant 0 : index
 // CHECK:             %[[DIMS:.*]]:3 = fir.box_dims %[[BOX_INARR]], %[[DIM_INDEX]] : (!fir.box<!fir.array<?xf64>>, index) -> (index, index, index)
 // CHECK:             %[[EXTENT:.*]] = arith.subi %[[DIMS]]#1, %[[C_INDEX1]] : index
 // CHECK:             %[[DOLOOP:.*]] = fir.do_loop %[[ITER:.*]] = %[[C_INDEX0]] to %[[EXTENT]] step %[[C_INDEX1]] iter_args(%[[MIN:.*]] = %[[MAX]]) -> (f64) {
+// CHECK:               %[[FLAG_SET2:.*]] = arith.constant 1 : i32
+// CHECK:               %[[ISFIRST:.*]] = fir.load %[[FLAG_ALLOC]] : !fir.ref<i32>
 // CHECK:               %[[INARR_ITEM:.*]] = fir.coordinate_of %[[BOX_INARR]], %[[ITER]] : (!fir.box<!fir.array<?xf64>>, index) -> !fir.ref<f64>
 // CHECK:               %[[INARR_ITEMVAL:.*]] = fir.load %[[INARR_ITEM]] : !fir.ref<f64>
-// CHECK:               %[[NEW_MIN:.*]] = arith.cmpf olt, %[[INARR_ITEMVAL]], %[[MIN]] fastmath<{{.*}}> : f64
-// CHECK:               %[[IF_NEW_MIN:.*]] = fir.if %[[NEW_MIN]] -> (f64) {
+// CHECK:               %[[NEW_MIN:.*]] = arith.cmpf olt, %[[INARR_ITEMVAL]], %arg4 fastmath<contract> : f64
+// CHECK:               %[[CONDRED:.*]] = arith.cmpf une, %arg4, %arg4 fastmath<contract> : f64
+// CHECK:               %[[CONDELEM:.*]] = arith.cmpf oeq, %[[INARR_ITEMVAL]], %[[INARR_ITEMVAL]] fastmath<contract> : f64
+// CHECK:               %[[ANDCOND:.*]] = arith.andi %[[CONDRED]], %[[CONDELEM]] : i1
+// CHECK:               %[[NEW_MIN2:.*]] = arith.ori %[[NEW_MIN]], %[[ANDCOND]] : i1
+// CHECK:               %[[ISFIRSTL:.*]] = fir.convert %[[ISFIRST]] : (i32) -> i1
+// CHECK:               %[[ISFIRSTNOT:.*]] = arith.xori %[[ISFIRSTL]], %true : i1
+// CHECK:               %[[ORCOND:.*]] = arith.ori %[[NEW_MIN2]], %[[ISFIRSTNOT]] : i1
+// CHECK:               %[[IF_NEW_MIN:.*]] = fir.if %[[ORCOND]] -> (f64) {
 // CHECK:                 %[[ONE:.*]] = arith.constant 1 : i32
 // CHECK:                 %[[OUTARR_IDX:.*]] = arith.constant 0 : index
 // CHECK:                 %[[OUTARR_ITEM:.*]] = fir.coordinate_of %[[BOX_OUTARR]], %[[OUTARR_IDX]] : (!fir.box<!fir.heap<!fir.array<1xi32>>>, index) -> !fir.ref<i32>
@@ -2044,18 +2038,6 @@ func.func @_QPtestminloc_works1d_scalarmask_f64(%arg0: !fir.ref<!fir.array<10xf6
 // CHECK:               fir.result %[[IF_NEW_MIN:.*]] : f64
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[FLAG_CHECK:.*]] = arith.constant 1 : i32
-// CHECK:           %[[FLAG_VAL:.*]] = fir.load %[[FLAG_ALLOC]] : !fir.ref<i32>
-// CHECK:           %[[FLAG_WAS_SET:.*]] = arith.cmpi eq, %[[FLAG_VAL]], %[[FLAG_CHECK]] : i32
-// CHECK:           fir.if %[[FLAG_WAS_SET]] {
-// CHECK:             %[[TEST_MAX:.*]] = arith.constant 1.7976931348623157E+308 : f64
-// CHECK:             %[[INIT_NOT_CHANGED:.*]] = arith.cmpf oeq, %[[TEST_MAX]], %[[INIT_RES:.*]] fastmath<{{.*}}> : f64
-// CHECK:             fir.if %[[INIT_NOT_CHANGED]] {
-// CHECK:               %[[FLAG_OUTARR_IDX:.*]] = arith.constant 0 : index
-// CHECK:               %[[FLAG_OUTARR_ITEM:.*]] = fir.coordinate_of %[[BOX_OUTARR]], %[[FLAG_OUTARR_IDX]] : (!fir.box<!fir.heap<!fir.array<1xi32>>>, index) -> !fir.ref<i32>
-// CHECK:               fir.store %[[FLAG_CHECK]] to %[[FLAG_OUTARR_ITEM]] : !fir.ref<i32>
-// CHECK:             }
-// CHECK:           }
 // CHECK:           %[[REF_BOX_OUTARR:.*]] = fir.convert %[[VAL_0]] : (!fir.ref<!fir.box<none>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<1xi32>>>>
 // CHECK:           fir.store %[[BOX_OUTARR]] to %[[REF_BOX_OUTARR]] : !fir.ref<!fir.box<!fir.heap<!fir.array<1xi32>>>>
 // CHECK:           return
@@ -2444,11 +2426,15 @@ func.func @_QPtestmaxloc_works1d(%arg0: !fir.ref<!fir.array<10xi32>> {fir.bindc_
 // CHECK:             %[[MASK_IF_ITEM:.*]] = fir.convert %[[MASK_ITEMVAL]] : (!fir.logical<4>) -> i1
 // CHECK:             %[[IF_MASK:.*]] = fir.if %[[MASK_IF_ITEM]] -> (i32) {
 // CHECK:               %[[FLAG_SET2:.*]] = arith.constant 1 : i32
-// CHECK:               fir.store %[[FLAG_SET2]] to %[[FLAG_ALLOC]] : !fir.ref<i32>
+// CHECK:               %[[ISFIRST:.*]] = fir.load %[[FLAG_ALLOC]] : !fir.ref<i32>
 // CHECK:               %[[INARR_ITEM:.*]] = fir.coordinate_of %[[BOX_INARR]], %[[ITER]] : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
 // CHECK:               %[[INARR_ITEMVAL:.*]] = fir.load %[[INARR_ITEM]] : !fir.ref<i32>
 // CHECK:               %[[NEW_MIN:.*]] = arith.cmpi sgt, %[[INARR_ITEMVAL]], %[[MIN]] : i32
-// CHECK:               %[[IF_NEW_MIN:.*]] = fir.if %[[NEW_MIN]] -> (i32) {
+// CHECK:               %[[ISFIRSTL:.*]] = fir.convert %[[ISFIRST]] : (i32) -> i1
+// CHECK:               %[[ISFIRSTNOT:.*]] = arith.xori %[[ISFIRSTL]], %true : i1
+// CHECK:               %[[ORCOND:.*]] = arith.ori %[[NEW_MIN]], %[[ISFIRSTNOT]] : i1
+// CHECK:               %[[IF_NEW_MIN:.*]] = fir.if %[[ORCOND]] -> (i32) {
+// CHECK:                 fir.store %[[FLAG_SET2]] to %[[FLAG_ALLOC]] : !fir.ref<i32>
 // CHECK:                 %[[ONE:.*]] = arith.constant 1 : i32
 // CHECK:                 %[[OUTARR_IDX:.*]] = arith.constant 0 : index
 // CHECK:                 %[[OUTARR_ITEM:.*]] = fir.coordinate_of %[[BOX_OUTARR]], %[[OUTARR_IDX]] : (!fir.box<!fir.heap<!fir.array<1xi32>>>, index) -> !fir.ref<i32>
@@ -2465,17 +2451,6 @@ func.func @_QPtestmaxloc_works1d(%arg0: !fir.ref<!fir.array<10xi32>> {fir.bindc_
 // CHECK:             }
 // CHECK:             fir.result %[[IF_MASK:.*]] : i32
 // CHECK:           }
-// CHECK:           %[[FLAG_VAL:.*]] = fir.load %[[FLAG_ALLOC]] : !fir.ref<i32>
-// CHECK:           %[[FLAG_WAS_SET:.*]] = arith.cmpi eq, %[[FLAG_VAL]], %[[FLAG_SET]] : i32
-// CHECK:           fir.if %[[FLAG_WAS_SET]] {
-// CHECK:             %[[TEST_MAX:.*]] = arith.constant -2147483648 : i32
-// CHECK:             %[[INIT_NOT_CHANGED:.*]] = arith.cmpi eq, %[[TEST_MAX]], %[[DO_LOOP:.*]] : i32
-// CHECK:             fir.if %[[INIT_NOT_CHANGED]] {
-// CHECK:               %[[FLAG_OUTARR_IDX:.*]] = arith.constant 0 : index
-// CHECK:               %[[FLAG_OUTARR_ITEM:.*]] = fir.coordinate_of %[[BOX_OUTARR]], %[[FLAG_OUTARR_IDX]] : (!fir.box<!fir.heap<!fir.array<1xi32>>>, index) -> !fir.ref<i32>
-// CHECK:               fir.store %[[FLAG_SET]] to %[[FLAG_OUTARR_ITEM]] : !fir.ref<i32>
-// CHECK:             }
-// CHECK:           }
 // CHECK:           %[[REF_BOX_OUTARR:.*]] = fir.convert %[[REF_BOX_OUTARR_NONE]] : (!fir.ref<!fir.box<none>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<1xi32>>>>
 // CHECK:           fir.store %[[BOX_OUTARR]] to %[[REF_BOX_OUTARR]] : !fir.ref<!fir.box<!fir.heap<!fir.array<1xi32>>>>
 // CHECK:           return
@@ -2557,16 +2532,25 @@ func.func @_QPtestmaxloc_works1d_scalarmask_f64(%arg0: !fir.ref<!fir.array<10xf6
 // CHECK:             %[[FLAG_SET:.*]] = arith.constant 1 : i32
 // CHECK:             %[[FLAG_EMPTY:.*]] = arith.constant 0 : i32
 // CHECK:             fir.store %[[FLAG_EMPTY]] to %[[FLAG_ALLOC]] : !fir.ref<i32>
-// CHECK:             %[[MAX:.*]] = arith.constant -1.7976931348623157E+308 : f64
+// CHECK:             %[[MAX:.*]] = arith.constant 0xFFF0000000000000 : f64
 // CHECK:             %[[C_INDEX1:.*]] = arith.constant 1 : index
 // CHECK:             %[[DIM_INDEX:.*]] = arith.constant 0 : index
 // CHECK:             %[[DIMS:.*]]:3 = fir.box_dims %[[BOX_INARR]], %[[DIM_INDEX]] : (!fir.box<!fir.array<?xf64>>, index) -> (index, index, index)
 // CHECK:             %[[EXTENT:.*]] = arith.subi %[[DIMS]]#1, %[[C_INDEX1]] : index
 // CHECK:             %[[DOLOOP:.*]] = fir.do_loop %[[ITER:.*]] = %[[C_INDEX0]] to %[[EXTENT]] step %[[C_INDEX1]] iter_args(%[[MIN:.*]] = %[[MAX]]) -> (f64) {
+// CHECK:               %[[FLAG_SET2:.*]] = arith.constant 1 : i32
+// CHECK:               %[[ISFIRST:.*]] = fir.load %[[FLAG_ALLOC]] : !fir.ref<i32>
 // CHECK:               %[[INARR_ITEM:.*]] = fir.coordinate_of %[[BOX_INARR]], %[[ITER]] : (!fir.box<!fir.array<?xf64>>, index) -> !fir.ref<f64>
 // CHECK:               %[[INARR_ITEMVAL:.*]] = fir.load %[[INARR_ITEM]] : !fir.ref<f64>
-// CHECK:               %[[NEW_MIN:.*]] = arith.cmpf ogt, %[[INARR_ITEMVAL]], %[[MIN]] fastmath<{{.*}}> : f64
-// CHECK:               %[[IF_NEW_MIN:.*]] = fir.if %[[NEW_MIN]] -> (f64) {
+// CHECK:               %[[NEW_MIN:.*]] = arith.cmpf ogt, %[[INARR_ITEMVAL]], %arg4 fastmath<contract> : f64
+// CHECK:               %[[CONDRED:.*]] = arith.cmpf une, %arg4, %arg4 fastmath<contract> : f64
+// CHECK:               %[[CONDELEM:.*]] = arith.cmpf oeq, %[[INARR_ITEMVAL]], %[[INARR_ITEMVAL]] fastmath<contract> : f64
+// CHECK:               %[[ANDCOND:.*]] = arith.andi %[[CONDRED]], %[[CONDELEM]] : i1
+// CHECK:               %[[NEW_MIN2:.*]] = arith.ori %[[NEW_MIN]], %[[ANDCOND]] : i1
+// CHECK:               %[[ISFIRSTL:.*]] = fir.convert %[[ISFIRST]] : (i32) -> i1
+// CHECK:               %[[ISFIRSTNOT:.*]] = arith.xori %[[ISFIRSTL]], %true : i1
+// CHECK:               %[[ORCOND:.*]] = arith.ori %[[NEW_MIN2]], %[[ISFIRSTNOT]] : i1
+// CHECK:               %[[IF_NEW_MIN:.*]] = fir.if %[[ORCOND]] -> (f64) {
 // CHECK:                 %[[ONE:.*]] = arith.constant 1 : i32
 // CHECK:                 %[[OUTARR_IDX:.*]] = arith.constant 0 : index
 // CHECK:                 %[[OUTARR_ITEM:.*]] = fir.coordinate_of %[[BOX_OUTARR]], %[[OUTARR_IDX]] : (!fir.box<!fir.heap<!fir.array<1xi32>>>, index) -> !fir.ref<i32>
@@ -2580,18 +2564,6 @@ func.func @_QPtestmaxloc_works1d_scalarmask_f64(%arg0: !fir.ref<!fir.array<10xf6
 // CHECK:               fir.result %[[IF_NEW_MIN:.*]] : f64
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[FLAG_CHECK:.*]] = arith.constant 1 : i32
-// CHECK:           %[[FLAG_VAL:.*]] = fir.load %[[FLAG_ALLOC]] : !fir.ref<i32>
-// CHECK:           %[[FLAG_WAS_SET:.*]] = arith.cmpi eq, %[[FLAG_VAL]], %[[FLAG_CHECK]] : i32
-// CHECK:           fir.if %[[FLAG_WAS_SET]] {
-// CHECK:             %[[TEST_MAX:.*]] = arith.constant -1.7976931348623157E+308 : f64
-// CHECK:             %[[INIT_NOT_CHANGED:.*]] = arith.cmpf oeq, %[[TEST_MAX]], %[[INIT_RES:.*]] fastmath<{{.*}}> : f64
-// CHECK:             fir.if %[[INIT_NOT_CHANGED]] {
-// CHECK:               %[[FLAG_OUTARR_IDX:.*]] = arith.constant 0 : index
-// CHECK:               %[[FLAG_OUTARR_ITEM:.*]] = fir.coordinate_of %[[BOX_OUTARR]], %[[FLAG_OUTARR_IDX]] : (!fir.box<!fir.heap<!fir.array<1xi32>>>, index) -> !fir.ref<i32>
-// CHECK:               fir.store %[[FLAG_CHECK]] to %[[FLAG_OUTARR_ITEM]] : !fir.ref<i32>
-// CHECK:             }
-// CHECK:           }
 // CHECK:           %[[REF_BOX_OUTARR:.*]] = fir.convert %[[VAL_0]] : (!fir.ref<!fir.box<none>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<1xi32>>>>
 // CHECK:           fir.store %[[BOX_OUTARR]] to %[[REF_BOX_OUTARR]] : !fir.ref<!fir.box<!fir.heap<!fir.array<1xi32>>>>
 // CHECK:           return