[flang-commits] [flang] [Flang] Minloc elemental intrinsic lowering (PR #74828)

David Green via flang-commits flang-commits at lists.llvm.org
Fri Jan 12 07:01:25 PST 2024


https://github.com/davemgreen updated https://github.com/llvm/llvm-project/pull/74828

>From 15d65d5656773f7ffd6176eff865abc0c5273890 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Fri, 12 Jan 2024 14:58:09 +0000
Subject: [PATCH] [Flang] Minloc elemental intrinsic lowering

Currently the lowering of a minloc intrinsic with a mask will look something
like:
  %e = hlfir.elemental %shape ({
    ...
  })
  %m = hlfir.minloc %array mask %e
  hlfir.assign %m to %result
  hlfir.destroy %m
The elemental will be expanded into a temporary+loop, the minloc into a
FortranAMinloc call (which hopefully gets simplified to a specialized call that
can be inlined at the call site), and the assign might get expanded to a
FortranAAssign. It would be better to generate the entire construct as single
loop if we can - one that performs the minloc calculation with the mask
elemental computed inline.

This patch attempt to do that, adding a hlfir version of the expansion code
from SimplifyIntrinsics that turns an minloc+elemental into a single combined
loop nest. It attempts to reuse the methods in genMinlocReductionLoop for
constructing the loop with a modified loop body. The declaration for the
function is curently in Optimizer/Support/Utils.h, but there might be a better
place for it.

It is currently added as port of the OptimizedBufferizationPass, like the
similar count/any/all that have been added recently.
---
 flang/include/flang/Optimizer/Support/Utils.h |  19 +
 .../Transforms/OptimizedBufferization.cpp     | 199 ++++++++
 .../Transforms/SimplifyIntrinsics.cpp         | 200 ++++----
 flang/test/HLFIR/minloc-elemental.fir         | 426 ++++++++++++++++++
 flang/test/Transforms/simplifyintrinsics.fir  |  10 +-
 5 files changed, 751 insertions(+), 103 deletions(-)
 create mode 100644 flang/test/HLFIR/minloc-elemental.fir

diff --git a/flang/include/flang/Optimizer/Support/Utils.h b/flang/include/flang/Optimizer/Support/Utils.h
index 34c8e79173bcd4..e31121260acdae 100644
--- a/flang/include/flang/Optimizer/Support/Utils.h
+++ b/flang/include/flang/Optimizer/Support/Utils.h
@@ -133,6 +133,25 @@ inline void intrinsicTypeTODO(fir::FirOpBuilder &builder, mlir::Type type,
            fir::numericMlirTypeToFortran(builder, type, loc, intrinsicName) +
            " in " + intrinsicName);
 }
+
+using MinlocBodyOpGeneratorTy = llvm::function_ref<mlir::Value(
+    fir::FirOpBuilder &, mlir::Location, const mlir::Type &, mlir::Value,
+    mlir::Value, mlir::Value, const llvm::SmallVectorImpl<mlir::Value> &)>;
+using InitValGeneratorTy = llvm::function_ref<mlir::Value(
+    fir::FirOpBuilder &, mlir::Location, const mlir::Type &)>;
+using AddrGeneratorTy = llvm::function_ref<mlir::Value(
+    fir::FirOpBuilder &, mlir::Location, const mlir::Type &, mlir::Value,
+    mlir::Value)>;
+
+// Produces a loop nest for a Minloc intrinsic.
+void genMinMaxlocReductionLoop(fir::FirOpBuilder &builder, mlir::Value array,
+                               InitValGeneratorTy initVal,
+                               MinlocBodyOpGeneratorTy genBody,
+                               fir::AddrGeneratorTy getAddrFn, unsigned rank,
+                               mlir::Type elementType, mlir::Location loc,
+                               mlir::Type maskElemType, mlir::Value resultArr,
+                               bool maskMayBeLogicalScalar);
+
 } // namespace fir
 
 #endif // FORTRAN_OPTIMIZER_SUPPORT_UTILS_H
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
index a7bf2502153846..73afdaf9e117c5 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
@@ -20,6 +20,7 @@
 #include "flang/Optimizer/HLFIR/HLFIRDialect.h"
 #include "flang/Optimizer/HLFIR/HLFIROps.h"
 #include "flang/Optimizer/HLFIR/Passes.h"
+#include "flang/Optimizer/Support/Utils.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/IR/Dominance.h"
 #include "mlir/IR/PatternMatch.h"
@@ -807,6 +808,203 @@ class ReductionElementalConversion : public mlir::OpRewritePattern<Op> {
   }
 };
 
+// Look for minloc(mask=elemental) and generate the minloc loop with
+// inlined elemental.
+//  %e = hlfir.elemental %shape ({ ... })
+//  %m = hlfir.minloc %array mask %e
+class MinMaxlocElementalConversion
+    : public mlir::OpRewritePattern<hlfir::MinlocOp> {
+public:
+  using mlir::OpRewritePattern<hlfir::MinlocOp>::OpRewritePattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(hlfir::MinlocOp minloc,
+                  mlir::PatternRewriter &rewriter) const override {
+    if (!minloc.getMask() || minloc.getDim() || minloc.getBack())
+      return rewriter.notifyMatchFailure(minloc, "Did not find valid minloc");
+
+    auto elemental = minloc.getMask().getDefiningOp<hlfir::ElementalOp>();
+    if (!elemental || hlfir::elementalOpMustProduceTemp(elemental))
+      return rewriter.notifyMatchFailure(minloc, "Did not find elemental");
+
+    mlir::Value array = minloc.getArray();
+
+    unsigned rank = mlir::cast<hlfir::ExprType>(minloc.getType()).getShape()[0];
+    mlir::Type arrayType = array.getType();
+    if (!arrayType.isa<fir::BoxType>())
+      return rewriter.notifyMatchFailure(
+          minloc, "Currently requires a boxed type input");
+    mlir::Type elementType = hlfir::getFortranElementType(arrayType);
+    if (!fir::isa_trivial(elementType))
+      return rewriter.notifyMatchFailure(
+          minloc, "Character arrays are currently not handled");
+
+    mlir::Location loc = minloc.getLoc();
+    fir::FirOpBuilder builder{rewriter, minloc.getOperation()};
+    mlir::Value resultArr = builder.createTemporary(
+        loc, fir::SequenceType::get(
+                 rank, hlfir::getFortranElementType(minloc.getType())));
+
+    auto init = [](fir::FirOpBuilder builder, mlir::Location loc,
+                   mlir::Type elementType) {
+      if (auto ty = elementType.dyn_cast<mlir::FloatType>()) {
+        const llvm::fltSemantics &sem = ty.getFloatSemantics();
+        return builder.createRealConstant(
+            loc, elementType,
+            llvm::APFloat::getLargest(sem, /*Negative=*/false));
+      }
+      unsigned bits = elementType.getIntOrFloatBitWidth();
+      int64_t maxInt = llvm::APInt::getSignedMaxValue(bits).getSExtValue();
+      return builder.createIntegerConstant(loc, elementType, maxInt);
+    };
+
+    auto genBodyOp =
+        [&rank, &resultArr, &elemental](
+            fir::FirOpBuilder builder, mlir::Location loc,
+            mlir::Type elementType, mlir::Value array, mlir::Value flagRef,
+            mlir::Value reduction,
+            const llvm::SmallVectorImpl<mlir::Value> &indices) -> mlir::Value {
+      // We are in the innermost loop: generate the elemental inline
+      mlir::Value oneIdx =
+          builder.createIntegerConstant(loc, builder.getIndexType(), 1);
+      llvm::SmallVector<mlir::Value> oneBasedIndices;
+      llvm::transform(
+          indices, std::back_inserter(oneBasedIndices), [&](mlir::Value V) {
+            return builder.create<mlir::arith::AddIOp>(loc, V, oneIdx);
+          });
+      hlfir::YieldElementOp yield =
+          hlfir::inlineElementalOp(loc, builder, elemental, oneBasedIndices);
+      mlir::Value maskElem = yield.getElementValue();
+      yield->erase();
+
+      mlir::Type ifCompatType = builder.getI1Type();
+      mlir::Value ifCompatElem =
+          builder.create<fir::ConvertOp>(loc, ifCompatType, maskElem);
+
+      llvm::SmallVector<mlir::Type> resultsTy = {elementType, elementType};
+      fir::IfOp maskIfOp =
+          builder.create<fir::IfOp>(loc, elementType, ifCompatElem,
+                                    /*withElseRegion=*/true);
+      builder.setInsertionPointToStart(&maskIfOp.getThenRegion().front());
+
+      // Set flag that mask was true at some point
+      mlir::Value flagSet = builder.createIntegerConstant(
+          loc, mlir::cast<fir::ReferenceType>(flagRef.getType()).getEleTy(), 1);
+      builder.create<fir::StoreOp>(loc, flagSet, flagRef);
+      mlir::Value addr = hlfir::getElementAt(loc, builder, hlfir::Entity{array},
+                                             oneBasedIndices);
+      mlir::Value elem = builder.create<fir::LoadOp>(loc, addr);
+
+      // Compare with the max reduction value
+      mlir::Value cmp;
+      if (elementType.isa<mlir::FloatType>()) {
+        cmp = builder.create<mlir::arith::CmpFOp>(
+            loc, mlir::arith::CmpFPredicate::OLT, elem, reduction);
+      } else if (elementType.isa<mlir::IntegerType>()) {
+        cmp = builder.create<mlir::arith::CmpIOp>(
+            loc, mlir::arith::CmpIPredicate::slt, elem, reduction);
+      } else {
+        llvm_unreachable("unsupported type");
+      }
+
+      // Set the new coordinate to the result
+      fir::IfOp ifOp = builder.create<fir::IfOp>(loc, elementType, cmp,
+                                                 /*withElseRegion*/ true);
+
+      builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
+      mlir::Type resultElemTy =
+          hlfir::getFortranElementType(resultArr.getType());
+      mlir::Type returnRefTy = builder.getRefType(resultElemTy);
+      mlir::IndexType idxTy = builder.getIndexType();
+
+      for (unsigned int i = 0; i < rank; ++i) {
+        mlir::Value index = builder.createIntegerConstant(loc, idxTy, i + 1);
+        mlir::Value resultElemAddr = builder.create<hlfir::DesignateOp>(
+            loc, returnRefTy, resultArr, index);
+        mlir::Value fortranIndex = builder.create<fir::ConvertOp>(
+            loc, resultElemTy, oneBasedIndices[i]);
+        builder.create<fir::StoreOp>(loc, fortranIndex, resultElemAddr);
+      }
+      builder.create<fir::ResultOp>(loc, elem);
+      builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
+      builder.create<fir::ResultOp>(loc, reduction);
+      builder.setInsertionPointAfter(ifOp);
+
+      // Close the mask if
+      builder.create<fir::ResultOp>(loc, ifOp.getResult(0));
+      builder.setInsertionPointToStart(&maskIfOp.getElseRegion().front());
+      builder.create<fir::ResultOp>(loc, reduction);
+      builder.setInsertionPointAfter(maskIfOp);
+
+      return maskIfOp.getResult(0);
+    };
+    auto getAddrFn = [](fir::FirOpBuilder builder, mlir::Location loc,
+                         const mlir::Type &resultElemType,
+                         mlir::Value resultArr, mlir::Value index) {
+      mlir::Type resultRefTy = builder.getRefType(resultElemType);
+      mlir::Value oneIdx =
+          builder.createIntegerConstant(loc, builder.getIndexType(), 1);
+      index = builder.create<mlir::arith::AddIOp>(loc, index, oneIdx);
+      return builder.create<hlfir::DesignateOp>(loc, resultRefTy, resultArr,
+                                                index);
+    };
+
+    // Initialize the result
+    mlir::Type resultElemTy = hlfir::getFortranElementType(resultArr.getType());
+    mlir::Type resultRefTy = builder.getRefType(resultElemTy);
+    mlir::Value returnValue =
+        builder.createIntegerConstant(loc, resultElemTy, 0);
+    for (unsigned int i = 0; i < rank; ++i) {
+      mlir::Value index =
+          builder.createIntegerConstant(loc, builder.getIndexType(), i + 1);
+      mlir::Value resultElemAddr = builder.create<hlfir::DesignateOp>(
+          loc, resultRefTy, resultArr, index);
+      builder.create<fir::StoreOp>(loc, returnValue, resultElemAddr);
+    }
+
+    fir::genMinMaxlocReductionLoop(builder, array, init, genBodyOp, getAddrFn,
+                                   rank, elementType, loc, builder.getI1Type(),
+                                   resultArr, false);
+
+    mlir::Value asExpr = builder.create<hlfir::AsExprOp>(
+        loc, resultArr, builder.createBool(loc, false));
+
+    // Check all the users - the destroy is no longer required, and any assign
+    // can use resultArr directly so that VariableAssignBufferization in this
+    // pass can optimize the results. Other operations are replaces with an
+    // AsExpr for the temporary resultArr.
+    llvm::SmallVector<hlfir::DestroyOp> destroys;
+    llvm::SmallVector<hlfir::AssignOp> assigns;
+    for (auto user : minloc->getUsers()) {
+      if (auto destroy = mlir::dyn_cast<hlfir::DestroyOp>(user))
+        destroys.push_back(destroy);
+      else if (auto assign = mlir::dyn_cast<hlfir::AssignOp>(user))
+        assigns.push_back(assign);
+    }
+
+    // Check if the minloc was the only user of the elemental (apart from a
+    // destroy), and remove it if so.
+    mlir::Operation::user_range elemUsers = elemental->getUsers();
+    hlfir::DestroyOp elemDestroy;
+    if (std::distance(elemUsers.begin(), elemUsers.end()) == 2) {
+      elemDestroy = mlir::dyn_cast<hlfir::DestroyOp>(*elemUsers.begin());
+      if (!elemDestroy)
+        elemDestroy = mlir::dyn_cast<hlfir::DestroyOp>(*++elemUsers.begin());
+    }
+
+    for (auto d : destroys)
+      rewriter.eraseOp(d);
+    for (auto a : assigns)
+      a.setOperand(0, resultArr);
+    rewriter.replaceOp(minloc, asExpr);
+    if (elemDestroy) {
+      rewriter.eraseOp(elemDestroy);
+      rewriter.eraseOp(elemental);
+    }
+    return mlir::success();
+  }
+};
+
 class OptimizedBufferizationPass
     : public hlfir::impl::OptimizedBufferizationBase<
           OptimizedBufferizationPass> {
@@ -832,6 +1030,7 @@ class OptimizedBufferizationPass
     patterns.insert<ReductionElementalConversion<hlfir::CountOp>>(context);
     patterns.insert<ReductionElementalConversion<hlfir::AnyOp>>(context);
     patterns.insert<ReductionElementalConversion<hlfir::AllOp>>(context);
+    patterns.insert<MinMaxlocElementalConversion>(context);
 
     if (mlir::failed(mlir::applyPatternsAndFoldGreedily(
             func, std::move(patterns), config))) {
diff --git a/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp b/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp
index c89ee6d5e20391..2301e7146f1410 100644
--- a/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp
+++ b/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp
@@ -31,6 +31,7 @@
 #include "flang/Optimizer/Dialect/FIRType.h"
 #include "flang/Optimizer/Dialect/Support/FIRContext.h"
 #include "flang/Optimizer/HLFIR/HLFIRDialect.h"
+#include "flang/Optimizer/Support/Utils.h"
 #include "flang/Optimizer/Transforms/Passes.h"
 #include "flang/Runtime/entry-names.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
@@ -243,8 +244,6 @@ static std::optional<mlir::Type> getArgElementType(mlir::Value val) {
 using BodyOpGeneratorTy = llvm::function_ref<mlir::Value(
     fir::FirOpBuilder &, mlir::Location, const mlir::Type &, mlir::Value,
     mlir::Value)>;
-using InitValGeneratorTy = llvm::function_ref<mlir::Value(
-    fir::FirOpBuilder &, mlir::Location, const mlir::Type &)>;
 using ContinueLoopGenTy = llvm::function_ref<llvm::SmallVector<mlir::Value>(
     fir::FirOpBuilder &, mlir::Location, mlir::Value)>;
 
@@ -266,7 +265,7 @@ using ContinueLoopGenTy = llvm::function_ref<llvm::SmallVector<mlir::Value>(
 template <typename OP, typename T, int resultIndex>
 static void
 genReductionLoop(fir::FirOpBuilder &builder, mlir::func::FuncOp &funcOp,
-                 InitValGeneratorTy initVal, ContinueLoopGenTy loopCond,
+                 fir::InitValGeneratorTy initVal, ContinueLoopGenTy loopCond,
                  T unorderedOrInitialLoopCond, BodyOpGeneratorTy genBody,
                  unsigned rank, mlir::Type elementType, mlir::Location loc) {
 
@@ -353,28 +352,22 @@ genReductionLoop(fir::FirOpBuilder &builder, mlir::func::FuncOp &funcOp,
   // Return the reduction value from the function.
   builder.create<mlir::func::ReturnOp>(loc, results[resultIndex]);
 }
-using MinMaxlocBodyOpGeneratorTy = llvm::function_ref<mlir::Value(
-    fir::FirOpBuilder &, mlir::Location, const mlir::Type &, mlir::Value,
-    mlir::Value, llvm::SmallVector<mlir::Value, Fortran::common::maxRank> &)>;
-
-static void genMinMaxlocReductionLoop(
-    fir::FirOpBuilder &builder, mlir::func::FuncOp &funcOp,
-    InitValGeneratorTy initVal, MinMaxlocBodyOpGeneratorTy genBody,
-    unsigned rank, mlir::Type elementType, mlir::Location loc, bool hasMask,
-    mlir::Type maskElemType, mlir::Value resultArr) {
 
+void fir::genMinMaxlocReductionLoop(
+    fir::FirOpBuilder &builder, mlir::Value array,
+    fir::InitValGeneratorTy initVal, fir::MinlocBodyOpGeneratorTy genBody,
+    fir::AddrGeneratorTy getAddrFn, unsigned rank, mlir::Type elementType,
+    mlir::Location loc, mlir::Type maskElemType, mlir::Value resultArr,
+    bool maskMayBeLogicalScalar) {
   mlir::IndexType idxTy = builder.getIndexType();
 
-  mlir::Block::BlockArgListType args = funcOp.front().getArguments();
-  mlir::Value arg = args[1];
-
   mlir::Value zeroIdx = builder.createIntegerConstant(loc, idxTy, 0);
 
   fir::SequenceType::Shape flatShape(rank,
                                      fir::SequenceType::getUnknownExtent());
   mlir::Type arrTy = fir::SequenceType::get(flatShape, elementType);
   mlir::Type boxArrTy = fir::BoxType::get(arrTy);
-  mlir::Value array = builder.create<fir::ConvertOp>(loc, boxArrTy, arg);
+  array = builder.create<fir::ConvertOp>(loc, boxArrTy, array);
 
   mlir::Type resultElemType = hlfir::getFortranElementType(resultArr.getType());
   mlir::Value flagSet = builder.createIntegerConstant(loc, resultElemType, 1);
@@ -382,13 +375,6 @@ static void genMinMaxlocReductionLoop(
   mlir::Value flagRef = builder.createTemporary(loc, resultElemType);
   builder.create<fir::StoreOp>(loc, zero, flagRef);
 
-  mlir::Value mask;
-  if (hasMask) {
-    mlir::Type maskTy = fir::SequenceType::get(flatShape, maskElemType);
-    mlir::Type boxMaskTy = fir::BoxType::get(maskTy);
-    mask = builder.create<fir::ConvertOp>(loc, boxMaskTy, args[2]);
-  }
-
   mlir::Value init = initVal(builder, loc, elementType);
   llvm::SmallVector<mlir::Value, Fortran::common::maxRank> bounds;
 
@@ -431,44 +417,8 @@ static void genMinMaxlocReductionLoop(
   // Reverse the indices such that they are ordered as:
   //   <dim-0-idx, dim-1-idx, ...>
   std::reverse(indices.begin(), indices.end());
-  // We are in the innermost loop: generate the reduction body.
-  if (hasMask) {
-    mlir::Type logicalRef = builder.getRefType(maskElemType);
-    mlir::Value maskAddr =
-        builder.create<fir::CoordinateOp>(loc, logicalRef, mask, indices);
-    mlir::Value maskElem = builder.create<fir::LoadOp>(loc, maskAddr);
-
-    // fir::IfOp requires argument to be I1 - won't accept logical or any other
-    // Integer.
-    mlir::Type ifCompatType = builder.getI1Type();
-    mlir::Value ifCompatElem =
-        builder.create<fir::ConvertOp>(loc, ifCompatType, maskElem);
-
-    llvm::SmallVector<mlir::Type> resultsTy = {elementType, elementType};
-    fir::IfOp ifOp = builder.create<fir::IfOp>(loc, elementType, ifCompatElem,
-                                               /*withElseRegion=*/true);
-    builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
-  }
-
-  // Set flag that mask was true at some point
-  builder.create<fir::StoreOp>(loc, flagSet, flagRef);
-  mlir::Type eleRefTy = builder.getRefType(elementType);
-  mlir::Value addr =
-      builder.create<fir::CoordinateOp>(loc, eleRefTy, array, indices);
-  mlir::Value elem = builder.create<fir::LoadOp>(loc, addr);
-
   mlir::Value reductionVal =
-      genBody(builder, loc, elementType, elem, init, indices);
-
-  if (hasMask) {
-    fir::IfOp ifOp =
-        mlir::dyn_cast<fir::IfOp>(builder.getBlock()->getParentOp());
-    builder.create<fir::ResultOp>(loc, reductionVal);
-    builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
-    builder.create<fir::ResultOp>(loc, init);
-    reductionVal = ifOp.getResult(0);
-    builder.setInsertionPointAfter(ifOp);
-  }
+      genBody(builder, loc, elementType, array, flagRef, init, indices);
 
   // Unwind the loop nest and insert ResultOp on each level
   // to return the updated value of the reduction to the enclosing
@@ -483,13 +433,15 @@ static void genMinMaxlocReductionLoop(
     builder.setInsertionPointAfter(loop.getOperation());
   }
   // End of loop nest. The insertion point is after the outermost loop.
-  if (fir::IfOp ifOp =
-          mlir::dyn_cast<fir::IfOp>(builder.getBlock()->getParentOp())) {
-    builder.create<fir::ResultOp>(loc, reductionVal);
-    builder.setInsertionPointAfter(ifOp);
-    // Redefine flagSet to escape scope of ifOp
-    flagSet = builder.createIntegerConstant(loc, resultElemType, 1);
-    reductionVal = ifOp.getResult(0);
+  if (maskMayBeLogicalScalar) {
+    if (fir::IfOp ifOp =
+            mlir::dyn_cast<fir::IfOp>(builder.getBlock()->getParentOp())) {
+      builder.create<fir::ResultOp>(loc, reductionVal);
+      builder.setInsertionPointAfter(ifOp);
+      // Redefine flagSet to escape scope of ifOp
+      flagSet = builder.createIntegerConstant(loc, resultElemType, 1);
+      reductionVal = ifOp.getResult(0);
+    }
   }
 
   // Check for case where array was full of max values.
@@ -521,28 +473,12 @@ static void genMinMaxlocReductionLoop(
 
   // Load output array with 1s instead of 0s
   for (unsigned int i = 0; i < rank; ++i) {
-    mlir::Type resultRefTy = builder.getRefType(resultElemType);
-    // mlir::Value one = builder.createIntegerConstant(loc, resultElemType, 1);
     mlir::Value index = builder.createIntegerConstant(loc, idxTy, i);
     mlir::Value resultElemAddr =
-        builder.create<fir::CoordinateOp>(loc, resultRefTy, resultArr, index);
+        getAddrFn(builder, loc, resultElemType, resultArr, index);
     builder.create<fir::StoreOp>(loc, flagSet, resultElemAddr);
   }
   builder.setInsertionPointAfter(ifMaskTrueOp);
-  // Store newly created output array to the reference passed in
-  fir::SequenceType::Shape resultShape(1, rank);
-  mlir::Type outputArrTy = fir::SequenceType::get(resultShape, resultElemType);
-  mlir::Type outputHeapTy = fir::HeapType::get(outputArrTy);
-  mlir::Type outputBoxTy = fir::BoxType::get(outputHeapTy);
-  mlir::Type outputRefTy = builder.getRefType(outputBoxTy);
-
-  mlir::Value outputArrNone = args[0];
-  mlir::Value outputArr =
-      builder.create<fir::ConvertOp>(loc, outputRefTy, outputArrNone);
-
-  // Store nearly created array to output array
-  builder.create<fir::StoreOp>(loc, resultArr, outputArr);
-  builder.create<mlir::func::ReturnOp>(loc);
 }
 
 static llvm::SmallVector<mlir::Value> nopLoopCond(fir::FirOpBuilder &builder,
@@ -791,6 +727,14 @@ static void genRuntimeMinMaxlocBody(fir::FirOpBuilder &builder,
 
   mlir::Type resultRefTy = builder.getRefType(resultElemTy);
 
+  if (maskRank > 0) {
+    fir::SequenceType::Shape flatShape(rank,
+                                       fir::SequenceType::getUnknownExtent());
+    mlir::Type maskTy = fir::SequenceType::get(flatShape, maskElemType);
+    mlir::Type boxMaskTy = fir::BoxType::get(maskTy);
+    mask = builder.create<fir::ConvertOp>(loc, boxMaskTy, mask);
+  }
+
   for (unsigned int i = 0; i < rank; ++i) {
     mlir::Value index = builder.createIntegerConstant(loc, idxTy, i);
     mlir::Value resultElemAddr =
@@ -799,24 +743,51 @@ static void genRuntimeMinMaxlocBody(fir::FirOpBuilder &builder,
   }
 
   auto genBodyOp =
-      [&rank, &resultArr,
-       isMax](fir::FirOpBuilder builder, mlir::Location loc,
-              mlir::Type elementType, mlir::Value elem1, mlir::Value elem2,
-              llvm::SmallVector<mlir::Value, Fortran::common::maxRank> indices)
-      -> mlir::Value {
+      [&rank, &resultArr, isMax, &mask, &maskElemType, &maskRank](
+          fir::FirOpBuilder builder, mlir::Location loc, mlir::Type elementType,
+          mlir::Value array, mlir::Value flagRef, mlir::Value reduction,
+          const llvm::SmallVectorImpl<mlir::Value> &indices) -> mlir::Value {
+    // We are in the innermost loop: generate the reduction body.
+    if (maskRank > 0) {
+      mlir::Type logicalRef = builder.getRefType(maskElemType);
+      mlir::Value maskAddr =
+          builder.create<fir::CoordinateOp>(loc, logicalRef, mask, indices);
+      mlir::Value maskElem = builder.create<fir::LoadOp>(loc, maskAddr);
+
+      // fir::IfOp requires argument to be I1 - won't accept logical or any
+      // other Integer.
+      mlir::Type ifCompatType = builder.getI1Type();
+      mlir::Value ifCompatElem =
+          builder.create<fir::ConvertOp>(loc, ifCompatType, maskElem);
+
+      llvm::SmallVector<mlir::Type> resultsTy = {elementType, elementType};
+      fir::IfOp ifOp = builder.create<fir::IfOp>(loc, elementType, ifCompatElem,
+                                                 /*withElseRegion=*/true);
+      builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
+    }
+
+    // Set flag that mask was true at some point
+    mlir::Value flagSet = builder.createIntegerConstant(
+        loc, mlir::cast<fir::ReferenceType>(flagRef.getType()).getEleTy(), 1);
+    builder.create<fir::StoreOp>(loc, flagSet, flagRef);
+    mlir::Type eleRefTy = builder.getRefType(elementType);
+    mlir::Value addr =
+        builder.create<fir::CoordinateOp>(loc, eleRefTy, array, indices);
+    mlir::Value elem = builder.create<fir::LoadOp>(loc, addr);
+
     mlir::Value cmp;
     if (elementType.isa<mlir::FloatType>()) {
       cmp = builder.create<mlir::arith::CmpFOp>(
           loc,
           isMax ? mlir::arith::CmpFPredicate::OGT
                 : mlir::arith::CmpFPredicate::OLT,
-          elem1, elem2);
+          elem, reduction);
     } else if (elementType.isa<mlir::IntegerType>()) {
       cmp = builder.create<mlir::arith::CmpIOp>(
           loc,
           isMax ? mlir::arith::CmpIPredicate::sgt
                 : mlir::arith::CmpIPredicate::slt,
-          elem1, elem2);
+          elem, reduction);
     } else {
       llvm_unreachable("unsupported type");
     }
@@ -841,11 +812,24 @@ static void genRuntimeMinMaxlocBody(fir::FirOpBuilder &builder,
           builder.create<mlir::arith::AddIOp>(loc, convert, one);
       builder.create<fir::StoreOp>(loc, fortranIndex, resultElemAddr);
     }
-    builder.create<fir::ResultOp>(loc, elem1);
+    builder.create<fir::ResultOp>(loc, elem);
     builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
-    builder.create<fir::ResultOp>(loc, elem2);
+    builder.create<fir::ResultOp>(loc, reduction);
     builder.setInsertionPointAfter(ifOp);
-    return ifOp.getResult(0);
+    mlir::Value reductionVal = ifOp.getResult(0);
+
+    // Close the mask if needed
+    if (maskRank > 0) {
+      fir::IfOp ifOp =
+          mlir::dyn_cast<fir::IfOp>(builder.getBlock()->getParentOp());
+      builder.create<fir::ResultOp>(loc, reductionVal);
+      builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
+      builder.create<fir::ResultOp>(loc, reduction);
+      reductionVal = ifOp.getResult(0);
+      builder.setInsertionPointAfter(ifOp);
+    }
+
+    return reductionVal;
   };
 
   // if mask is a logical scalar, we can check its value before the main loop
@@ -879,12 +863,30 @@ static void genRuntimeMinMaxlocBody(fir::FirOpBuilder &builder,
 
     builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
   }
+  auto getAddrFn = [](fir::FirOpBuilder builder, mlir::Location loc,
+                      const mlir::Type &resultElemType, mlir::Value resultArr,
+                      mlir::Value index) {
+    mlir::Type resultRefTy = builder.getRefType(resultElemType);
+    return builder.create<fir::CoordinateOp>(loc, resultRefTy, resultArr,
+                                             index);
+  };
+
+  genMinMaxlocReductionLoop(builder, funcOp.front().getArgument(1), init,
+                            genBodyOp, getAddrFn, rank, elementType, loc,
+                            maskElemType, resultArr, maskRank == 0);
+
+  // Store newly created output array to the reference passed in
+  fir::SequenceType::Shape resultShape(1, rank);
+  mlir::Type outputArrTy = fir::SequenceType::get(resultShape, resultElemTy);
+  mlir::Type outputHeapTy = fir::HeapType::get(outputArrTy);
+  mlir::Type outputBoxTy = fir::BoxType::get(outputHeapTy);
+  mlir::Type outputRefTy = builder.getRefType(outputBoxTy);
+  mlir::Value outputArr = builder.create<fir::ConvertOp>(
+      loc, outputRefTy, funcOp.front().getArgument(0));
 
-  // bit of a hack - maskRank is set to -1 for absent mask arg, so don't
-  // generate high level mask or element by element mask.
-  bool hasMask = maskRank > 0;
-  genMinMaxlocReductionLoop(builder, funcOp, init, genBodyOp, rank, elementType,
-                            loc, hasMask, maskElemType, resultArr);
+  // Store nearly created array to output array
+  builder.create<fir::StoreOp>(loc, resultArr, outputArr);
+  builder.create<mlir::func::ReturnOp>(loc);
 }
 
 /// Generate function type for the simplified version of RTNAME(DotProduct)
diff --git a/flang/test/HLFIR/minloc-elemental.fir b/flang/test/HLFIR/minloc-elemental.fir
new file mode 100644
index 00000000000000..cb483d51e05d13
--- /dev/null
+++ b/flang/test/HLFIR/minloc-elemental.fir
@@ -0,0 +1,426 @@
+// RUN: fir-opt %s -opt-bufferization | FileCheck %s
+
+func.func @_QPtest(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}, %arg1: !fir.ref<i32> {fir.bindc_name = "val"}, %arg2: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "m"}) {
+  %c0 = arith.constant 0 : index
+  %0:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+  %1:2 = hlfir.declare %arg2 {uniq_name = "_QFtestEm"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+  %2:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %3 = fir.load %2#0 : !fir.ref<i32>
+  %4:3 = fir.box_dims %0#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+  %5 = fir.shape %4#1 : (index) -> !fir.shape<1>
+  %6 = hlfir.elemental %5 unordered : (!fir.shape<1>) -> !hlfir.expr<?x!fir.logical<4>> {
+  ^bb0(%arg3: index):
+    %8 = hlfir.designate %0#0 (%arg3)  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+    %9 = fir.load %8 : !fir.ref<i32>
+    %10 = arith.cmpi sge, %9, %3 : i32
+    %11 = fir.convert %10 : (i1) -> !fir.logical<4>
+    hlfir.yield_element %11 : !fir.logical<4>
+  }
+  %7 = hlfir.minloc %0#0 mask %6 {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?xi32>>, !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi32>
+  hlfir.assign %7 to %1#0 : !hlfir.expr<1xi32>, !fir.box<!fir.array<?xi32>>
+  hlfir.destroy %7 : !hlfir.expr<1xi32>
+  hlfir.destroy %6 : !hlfir.expr<?x!fir.logical<4>>
+  return
+}
+// CHECK-LABEL: func.func @_QPtest(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}, %arg1: !fir.ref<i32> {fir.bindc_name = "val"}, %arg2: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "m"}) {
+// CHECK-NEXT:    %c2147483647_i32 = arith.constant 2147483647 : i32
+// CHECK-NEXT:    %c1_i32 = arith.constant 1 : i32
+// CHECK-NEXT:    %c0 = arith.constant 0 : index
+// CHECK-NEXT:    %c1 = arith.constant 1 : index
+// CHECK-NEXT:    %c0_i32 = arith.constant 0 : i32
+// CHECK-NEXT:    %[[V0:.*]] = fir.alloca i32
+// CHECK-NEXT:    %[[RES:.*]] = fir.alloca !fir.array<1xi32>
+// CHECK-NEXT:    %[[V1:.*]]:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+// CHECK-NEXT:    %[[V2:.*]]:2 = hlfir.declare %arg2 {uniq_name = "_QFtestEm"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+// CHECK-NEXT:    %[[V3:.*]]:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK-NEXT:    %[[V4:.*]] = fir.load %[[V3]]#0 : !fir.ref<i32>
+// CHECK-NEXT:    %[[V8:.*]] = hlfir.designate %[[RES]] (%c1) : (!fir.ref<!fir.array<1xi32>>, index) -> !fir.ref<i32>
+// CHECK-NEXT:    fir.store %c0_i32 to %[[V8]] : !fir.ref<i32>
+// CHECK-NEXT:    fir.store %c0_i32 to %[[V0]] : !fir.ref<i32>
+// CHECK-NEXT:    %[[V9:.*]]:3 = fir.box_dims %[[V1]]#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+// CHECK-NEXT:    %[[V10:.*]] = arith.subi %[[V9]]#1, %c1 : index
+// CHECK-NEXT:    %[[V11:.*]] = fir.do_loop %arg3 = %c0 to %[[V10]] step %c1 iter_args(%arg4 = %c2147483647_i32) -> (i32) {
+// CHECK-NEXT:      %[[V14:.*]] = arith.addi %arg3, %c1 : index
+// CHECK-NEXT:      %[[V15:.*]] = hlfir.designate %[[V1]]#0 (%[[V14]])  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK-NEXT:      %[[V16:.*]] = fir.load %[[V15]] : !fir.ref<i32>
+// CHECK-NEXT:      %[[V17:.*]] = arith.cmpi sge, %[[V16]], %[[V4]] : i32
+// CHECK-NEXT:      %[[V18:.*]] = fir.if %[[V17]] -> (i32) {
+// CHECK-NEXT:        fir.store %c1_i32 to %[[V0]] : !fir.ref<i32>
+// CHECK-NEXT:        %[[DIMS:.*]]:3 = fir.box_dims %[[V1]]#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+// CHECK-NEXT:        %[[SUB:.*]] = arith.subi %[[DIMS]]#0, %c1 : index
+// CHECK-NEXT:        %[[ADD:.*]] = arith.addi %[[V14]], %[[SUB]] : index
+// CHECK-NEXT:        %[[V19:.*]] = hlfir.designate %[[V1]]#0 (%[[ADD]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK-NEXT:        %[[V20:.*]] = fir.load %[[V19]] : !fir.ref<i32>
+// CHECK-NEXT:        %[[V21:.*]] = arith.cmpi slt, %[[V20]], %arg4 : i32
+// CHECK-NEXT:        %[[V22:.*]] = fir.if %[[V21]] -> (i32) {
+// CHECK-NEXT:          %[[V23:.*]] = hlfir.designate %[[RES]] (%c1) : (!fir.ref<!fir.array<1xi32>>, index) -> !fir.ref<i32>
+// CHECK-NEXT:          %[[V24:.*]] = fir.convert %[[V14]] : (index) -> i32
+// CHECK-NEXT:          fir.store %[[V24]] to %[[V23]] : !fir.ref<i32>
+// CHECK-NEXT:          fir.result %[[V20]] : i32
+// CHECK-NEXT:        } else {
+// CHECK-NEXT:          fir.result %arg4 : i32
+// CHECK-NEXT:        }
+// CHECK-NEXT:        fir.result %[[V22]] : i32
+// CHECK-NEXT:      } else {
+// CHECK-NEXT:        fir.result %arg4 : i32
+// CHECK-NEXT:      }
+// CHECK-NEXT:      fir.result %[[V18]] : i32
+// CHECK-NEXT:    }
+// CHECK-NEXT:    %[[V12:.*]] = fir.load %[[V0]] : !fir.ref<i32>
+// CHECK-NEXT:    %[[V13:.*]] = arith.cmpi eq, %[[V12]], %c1_i32 : i32
+// CHECK-NEXT:    fir.if %[[V13]] {
+// CHECK-NEXT:      %[[V14:.*]] = arith.cmpi eq, %[[V11]], %c2147483647_i32 : i32
+// CHECK-NEXT:      fir.if %[[V14]] {
+// CHECK-NEXT:        %[[V15:.*]] = hlfir.designate %[[RES]] (%c1) : (!fir.ref<!fir.array<1xi32>>, index) -> !fir.ref<i32>
+// CHECK-NEXT:        fir.store %c1_i32 to %[[V15]] : !fir.ref<i32>
+// CHECK-NEXT:      }
+// CHECK-NEXT:    }
+// CHECK-NEXT:    %[[BD:.*]]:3 = fir.box_dims %[[V2]]#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+// CHECK-NEXT:    fir.do_loop %arg3 = %c1 to %[[BD]]#1 step %c1 unordered {
+// CHECK-NEXT:      %[[V13:.*]] = hlfir.designate %[[RES]] (%arg3)  : (!fir.ref<!fir.array<1xi32>>, index) -> !fir.ref<i32>
+// CHECK-NEXT:      %[[V14:.*]] = fir.load %[[V13]] : !fir.ref<i32>
+// CHECK-NEXT:      %[[V15:.*]] = hlfir.designate %[[V2]]#0 (%arg3)  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK-NEXT:      hlfir.assign %[[V14]] to %[[V15]] : i32, !fir.ref<i32>
+// CHECK-NEXT:    }
+// CHECK-NEXT:    return
+// CHECK-NEXT:  }
+
+
+func.func @_QPtest_kind2(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}, %arg1: !fir.ref<i32> {fir.bindc_name = "val"}, %arg2: !fir.box<!fir.array<?xi16>> {fir.bindc_name = "m"}) {
+  %c0 = arith.constant 0 : index
+  %0:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+  %1:2 = hlfir.declare %arg2 {uniq_name = "_QFtestEm"} : (!fir.box<!fir.array<?xi16>>) -> (!fir.box<!fir.array<?xi16>>, !fir.box<!fir.array<?xi16>>)
+  %2:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %3 = fir.load %2#0 : !fir.ref<i32>
+  %4:3 = fir.box_dims %0#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+  %5 = fir.shape %4#1 : (index) -> !fir.shape<1>
+  %6 = hlfir.elemental %5 unordered : (!fir.shape<1>) -> !hlfir.expr<?x!fir.logical<4>> {
+  ^bb0(%arg3: index):
+    %8 = hlfir.designate %0#0 (%arg3)  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+    %9 = fir.load %8 : !fir.ref<i32>
+    %10 = arith.cmpi sge, %9, %3 : i32
+    %11 = fir.convert %10 : (i1) -> !fir.logical<4>
+    hlfir.yield_element %11 : !fir.logical<4>
+  }
+  %7 = hlfir.minloc %0#0 mask %6 {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?xi32>>, !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi16>
+  hlfir.assign %7 to %1#0 : !hlfir.expr<1xi16>, !fir.box<!fir.array<?xi16>>
+  hlfir.destroy %7 : !hlfir.expr<1xi16>
+  hlfir.destroy %6 : !hlfir.expr<?x!fir.logical<4>>
+  return
+}
+// CHECK-LABEL:  func.func @_QPtest_kind2(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}, %arg1: !fir.ref<i32> {fir.bindc_name = "val"}, %arg2: !fir.box<!fir.array<?xi16>> {fir.bindc_name = "m"}) {
+// CHECK-NEXT:    %c2147483647_i32 = arith.constant 2147483647 : i32
+// CHECK-NEXT:    %c1_i16 = arith.constant 1 : i16
+// CHECK-NEXT:    %c0 = arith.constant 0 : index
+// CHECK-NEXT:    %c1 = arith.constant 1 : index
+// CHECK-NEXT:    %c0_i16 = arith.constant 0 : i16
+// CHECK-NEXT:    %[[V0:.*]] = fir.alloca i16
+// CHECK-NEXT:    %[[RES:.*]] = fir.alloca !fir.array<1xi16>
+// CHECK-NEXT:    %[[V1:.*]]:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+// CHECK-NEXT:    %[[V2:.*]]:2 = hlfir.declare %arg2 {uniq_name = "_QFtestEm"} : (!fir.box<!fir.array<?xi16>>) -> (!fir.box<!fir.array<?xi16>>, !fir.box<!fir.array<?xi16>>)
+// CHECK-NEXT:    %[[V3:.*]]:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK-NEXT:    %[[V4:.*]] = fir.load %[[V3]]#0 : !fir.ref<i32>
+// CHECK-NEXT:    %[[V8:.*]] = hlfir.designate %[[RES]] (%c1) : (!fir.ref<!fir.array<1xi16>>, index) -> !fir.ref<i16>
+// CHECK-NEXT:    fir.store %c0_i16 to %[[V8]] : !fir.ref<i16>
+// CHECK-NEXT:    fir.store %c0_i16 to %[[V0]] : !fir.ref<i16>
+// CHECK-NEXT:    %[[V9:.*]]:3 = fir.box_dims %[[V1]]#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+// CHECK-NEXT:    %[[V10:.*]] = arith.subi %[[V9]]#1, %c1 : index
+// CHECK-NEXT:    %[[V11:.*]] = fir.do_loop %arg3 = %c0 to %[[V10]] step %c1 iter_args(%arg4 = %c2147483647_i32) -> (i32) {
+// CHECK-NEXT:      %[[V14:.*]] = arith.addi %arg3, %c1 : index
+// CHECK-NEXT:      %[[V15:.*]] = hlfir.designate %[[V1]]#0 (%[[V14]])  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK-NEXT:      %[[V16:.*]] = fir.load %[[V15]] : !fir.ref<i32>
+// CHECK-NEXT:      %[[V17:.*]] = arith.cmpi sge, %[[V16]], %[[V4]] : i32
+// CHECK-NEXT:      %[[V18:.*]] = fir.if %[[V17]] -> (i32) {
+// CHECK-NEXT:        fir.store %c1_i16 to %[[V0]] : !fir.ref<i16>
+// CHECK-NEXT:        %[[DIMS:.*]]:3 = fir.box_dims %[[V1]]#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+// CHECK-NEXT:        %[[SUB:.*]] = arith.subi %[[DIMS]]#0, %c1 : index
+// CHECK-NEXT:        %[[ADD:.*]] = arith.addi %[[V14]], %[[SUB]] : index
+// CHECK-NEXT:        %[[V19:.*]] = hlfir.designate %[[V1]]#0 (%[[ADD]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK-NEXT:        %[[V20:.*]] = fir.load %[[V19]] : !fir.ref<i32>
+// CHECK-NEXT:        %[[V21:.*]] = arith.cmpi slt, %[[V20]], %arg4 : i32
+// CHECK-NEXT:        %[[V22:.*]] = fir.if %[[V21]] -> (i32) {
+// CHECK-NEXT:          %[[V23:.*]] = hlfir.designate %[[RES]] (%c1) : (!fir.ref<!fir.array<1xi16>>, index) -> !fir.ref<i16>
+// CHECK-NEXT:          %[[V24:.*]] = fir.convert %[[V14]] : (index) -> i16
+// CHECK-NEXT:          fir.store %[[V24]] to %[[V23]] : !fir.ref<i16>
+// CHECK-NEXT:          fir.result %[[V20]] : i32
+// CHECK-NEXT:        } else {
+// CHECK-NEXT:          fir.result %arg4 : i32
+// CHECK-NEXT:        }
+// CHECK-NEXT:        fir.result %[[V22]] : i32
+// CHECK-NEXT:      } else {
+// CHECK-NEXT:        fir.result %arg4 : i32
+// CHECK-NEXT:      }
+// CHECK-NEXT:      fir.result %[[V18]] : i32
+// CHECK-NEXT:    }
+// CHECK-NEXT:    %[[V12:.*]] = fir.load %[[V0]] : !fir.ref<i16>
+// CHECK-NEXT:    %[[V13:.*]] = arith.cmpi eq, %[[V12]], %c1_i16 : i16
+// CHECK-NEXT:    fir.if %[[V13]] {
+// CHECK-NEXT:      %[[V14:.*]] = arith.cmpi eq, %[[V11]], %c2147483647_i32 : i32
+// CHECK-NEXT:      fir.if %[[V14]] {
+// CHECK-NEXT:        %[[V15:.*]] = hlfir.designate %[[RES]] (%c1) : (!fir.ref<!fir.array<1xi16>>, index) -> !fir.ref<i16>
+// CHECK-NEXT:        fir.store %c1_i16 to %[[V15]] : !fir.ref<i16>
+// CHECK-NEXT:      }
+// CHECK-NEXT:    }
+// CHECK-NEXT:    %[[BD:.*]]:3 = fir.box_dims %[[V2]]#0, %c0 : (!fir.box<!fir.array<?xi16>>, index) -> (index, index, index)
+// CHECK-NEXT:    fir.do_loop %arg3 = %c1 to %[[BD]]#1 step %c1 unordered {
+// CHECK-NEXT:      %[[V13:.*]] = hlfir.designate %[[RES]] (%arg3)  : (!fir.ref<!fir.array<1xi16>>, index) -> !fir.ref<i16>
+// CHECK-NEXT:      %[[V14:.*]] = fir.load %[[V13]] : !fir.ref<i16>
+// CHECK-NEXT:      %[[V15:.*]] = hlfir.designate %[[V2]]#0 (%arg3)  : (!fir.box<!fir.array<?xi16>>, index) -> !fir.ref<i16>
+// CHECK-NEXT:      hlfir.assign %[[V14]] to %[[V15]] : i16, !fir.ref<i16>
+// CHECK-NEXT:    }
+// CHECK-NEXT:    return
+
+
+func.func @_QPtest_kind2_convert(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}, %arg1: !fir.ref<i32> {fir.bindc_name = "val"}, %arg2: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "m"}) {
+  %c1 = arith.constant 1 : index
+  %c0 = arith.constant 0 : index
+  %0:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+  %1:2 = hlfir.declare %arg2 {uniq_name = "_QFtestEm"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+  %2:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %3 = fir.load %2#0 : !fir.ref<i32>
+  %4:3 = fir.box_dims %0#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+  %5 = fir.shape %4#1 : (index) -> !fir.shape<1>
+  %6 = hlfir.elemental %5 unordered : (!fir.shape<1>) -> !hlfir.expr<?x!fir.logical<4>> {
+  ^bb0(%arg3: index):
+    %10 = hlfir.designate %0#0 (%arg3)  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+    %11 = fir.load %10 : !fir.ref<i32>
+    %12 = arith.cmpi sge, %11, %3 : i32
+    %13 = fir.convert %12 : (i1) -> !fir.logical<4>
+    hlfir.yield_element %13 : !fir.logical<4>
+  }
+  %7 = hlfir.minloc %0#0 mask %6 {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?xi32>>, !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi16>
+  %8 = fir.shape %c1 : (index) -> !fir.shape<1>
+  %9 = hlfir.elemental %8 unordered : (!fir.shape<1>) -> !hlfir.expr<?xi32> {
+  ^bb0(%arg3: index):
+    %10 = hlfir.apply %7, %arg3 : (!hlfir.expr<1xi16>, index) -> i16
+    %11 = fir.convert %10 : (i16) -> i32
+    hlfir.yield_element %11 : i32
+  }
+  hlfir.assign %9 to %1#0 : !hlfir.expr<?xi32>, !fir.box<!fir.array<?xi32>>
+  hlfir.destroy %9 : !hlfir.expr<?xi32>
+  hlfir.destroy %7 : !hlfir.expr<1xi16>
+  hlfir.destroy %6 : !hlfir.expr<?x!fir.logical<4>>
+  return
+}
+// The minloc has other uses, not an assign that gets optimized out.
+// CHECK-LABEL: _QPtest_kind2_convert
+// CHECK-SAME:     (%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}, %arg1: !fir.ref<i32> {fir.bindc_name = "val"}, %arg2: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "m"}) {
+// CHECK-NEXT:   %false = arith.constant false
+// CHECK-NEXT:   %c2147483647_i32 = arith.constant 2147483647 : i32
+// CHECK-NEXT:   %c1_i16 = arith.constant 1 : i16
+// CHECK-NEXT:   %c0 = arith.constant 0 : index
+// CHECK-NEXT:   %c0_i16 = arith.constant 0 : i16
+// CHECK-NEXT:   %c1 = arith.constant 1 : index
+// CHECK-NEXT:   %[[V0:.*]] = fir.alloca i16
+// CHECK-NEXT:   %[[V1:.*]] = fir.alloca !fir.array<1xi16>
+// CHECK-NEXT:   %[[V2:.*]]:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+// CHECK-NEXT:   %[[V3:.*]]:2 = hlfir.declare %arg2 {uniq_name = "_QFtestEm"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+// CHECK-NEXT:   %[[V4:.*]]:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK-NEXT:   %[[V5:.*]] = fir.load %[[V4]]#0 : !fir.ref<i32>
+// CHECK-NEXT:   %[[V6:.*]] = hlfir.designate %[[V1]] (%c1)  : (!fir.ref<!fir.array<1xi16>>, index) -> !fir.ref<i16>
+// CHECK-NEXT:   fir.store %c0_i16 to %[[V6]] : !fir.ref<i16>
+// CHECK-NEXT:   fir.store %c0_i16 to %[[V0]] : !fir.ref<i16>
+// CHECK-NEXT:   %[[V7:.*]]:3 = fir.box_dims %[[V2]]#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+// CHECK-NEXT:   %[[V8:.*]] = arith.subi %[[V7]]#1, %c1 : index
+// CHECK-NEXT:   %[[V9:.*]] = fir.do_loop %arg3 = %c0 to %[[V8]] step %c1 iter_args(%arg4 = %c2147483647_i32) -> (i32) {
+// CHECK-NEXT:     %[[V15:.*]] = arith.addi %arg3, %c1 : index
+// CHECK-NEXT:     %[[V16:.*]] = hlfir.designate %[[V2]]#0 (%[[V15]])  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK-NEXT:     %[[V17:.*]] = fir.load %[[V16]] : !fir.ref<i32>
+// CHECK-NEXT:     %[[V18:.*]] = arith.cmpi sge, %[[V17]], %[[V5]] : i32
+// CHECK-NEXT:     %[[V19:.*]] = fir.if %[[V18]] -> (i32) {
+// CHECK-NEXT:       fir.store %c1_i16 to %[[V0]] : !fir.ref<i16>
+// CHECK-NEXT:       %[[V20:.*]]:3 = fir.box_dims %[[V2]]#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+// CHECK-NEXT:       %[[V21:.*]] = arith.subi %[[V20]]#0, %c1 : index
+// CHECK-NEXT:       %[[V22:.*]] = arith.addi %[[V15]], %[[V21]] : index
+// CHECK-NEXT:       %[[V23:.*]] = hlfir.designate %[[V2]]#0 (%[[V22]])  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK-NEXT:       %[[V24:.*]] = fir.load %[[V23]] : !fir.ref<i32>
+// CHECK-NEXT:       %[[V25:.*]] = arith.cmpi slt, %[[V24]], %arg4 : i32
+// CHECK-NEXT:       %[[V26:.*]] = fir.if %[[V25]] -> (i32) {
+// CHECK-NEXT:         %[[V27:.*]] = hlfir.designate %[[V1]] (%c1)  : (!fir.ref<!fir.array<1xi16>>, index) -> !fir.ref<i16>
+// CHECK-NEXT:         %[[V28:.*]] = fir.convert %[[V15]] : (index) -> i16
+// CHECK-NEXT:         fir.store %[[V28]] to %[[V27]] : !fir.ref<i16>
+// CHECK-NEXT:         fir.result %[[V24]] : i32
+// CHECK-NEXT:       } else {
+// CHECK-NEXT:         fir.result %arg4 : i32
+// CHECK-NEXT:       }
+// CHECK-NEXT:       fir.result %[[V26]] : i32
+// CHECK-NEXT:     } else {
+// CHECK-NEXT:       fir.result %arg4 : i32
+// CHECK-NEXT:     }
+// CHECK-NEXT:     fir.result %[[V19]] : i32
+// CHECK-NEXT:   }
+// CHECK-NEXT:   %[[V10:.*]] = fir.load %[[V0]] : !fir.ref<i16>
+// CHECK-NEXT:   %[[V11:.*]] = arith.cmpi eq, %[[V10]], %c1_i16 : i16
+// CHECK-NEXT:   fir.if %[[V11]] {
+// CHECK-NEXT:     %[[V15]] = arith.cmpi eq, %[[V9]], %c2147483647_i32 : i32
+// CHECK-NEXT:     fir.if %[[V15]] {
+// CHECK-NEXT:       %[[V16]] = hlfir.designate %[[V1]] (%c1)  : (!fir.ref<!fir.array<1xi16>>, index) -> !fir.ref<i16>
+// CHECK-NEXT:       fir.store %c1_i16 to %[[V16]] : !fir.ref<i16>
+// CHECK-NEXT:     }
+// CHECK-NEXT:   }
+// CHECK-NEXT:   %[[V12:.*]] = hlfir.as_expr %[[V1]] move %false : (!fir.ref<!fir.array<1xi16>>, i1) -> !hlfir.expr<1xi16>
+// CHECK-NEXT:   %[[V13:.*]] = fir.shape %c1 : (index) -> !fir.shape<1>
+// CHECK-NEXT:   %[[V14:.*]] = hlfir.elemental %[[V13]] unordered : (!fir.shape<1>) -> !hlfir.expr<?xi32> {
+// CHECK-NEXT:   ^bb0(%arg3: index):
+// CHECK-NEXT:     %[[V15:.*]] = hlfir.apply %[[V12]], %arg3 : (!hlfir.expr<1xi16>, index) -> i16
+// CHECK-NEXT:     %[[V16:.*]] = fir.convert %[[V15]] : (i16) -> i32
+// CHECK-NEXT:     hlfir.yield_element %[[V16]] : i32
+// CHECK-NEXT:   }
+// CHECK-NEXT:   hlfir.assign %[[V14]] to %[[V3]]#0 : !hlfir.expr<?xi32>, !fir.box<!fir.array<?xi32>>
+// CHECK-NEXT:   hlfir.destroy %[[V14]] : !hlfir.expr<?xi32>
+// CHECK-NEXT:   return
+
+
+
+func.func @_QPtest_float(%arg0: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "array"}, %arg1: !fir.ref<f32> {fir.bindc_name = "val"}, %arg2: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "m"}) {
+  %c0 = arith.constant 0 : index
+  %0:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xf32>>) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>)
+  %1:2 = hlfir.declare %arg2 {uniq_name = "_QFtestEm"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+  %2:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+  %3 = fir.load %2#0 : !fir.ref<f32>
+  %4:3 = fir.box_dims %0#0, %c0 : (!fir.box<!fir.array<?xf32>>, index) -> (index, index, index)
+  %5 = fir.shape %4#1 : (index) -> !fir.shape<1>
+  %6 = hlfir.elemental %5 unordered : (!fir.shape<1>) -> !hlfir.expr<?x!fir.logical<4>> {
+  ^bb0(%arg3: index):
+    %8 = hlfir.designate %0#0 (%arg3)  : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
+    %9 = fir.load %8 : !fir.ref<f32>
+    %10 = arith.cmpf oge, %9, %3 : f32
+    %11 = fir.convert %10 : (i1) -> !fir.logical<4>
+    hlfir.yield_element %11 : !fir.logical<4>
+  }
+  %7 = hlfir.minloc %0#0 mask %6 {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?xf32>>, !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi32>
+  hlfir.assign %7 to %1#0 : !hlfir.expr<1xi32>, !fir.box<!fir.array<?xi32>>
+  hlfir.destroy %7 : !hlfir.expr<1xi32>
+  hlfir.destroy %6 : !hlfir.expr<?x!fir.logical<4>>
+  return
+}
+// CHECK-LABEL: _QPtest_float
+// CHECK:        %[[V11:.*]] = fir.do_loop %arg3 = %c0 to %[[V10:.*]] step %c1 iter_args(%arg4 = %cst) -> (f32) {
+// CHECK-NEXT:     %[[V14:.*]] = arith.addi %arg3, %c1 : index
+// CHECK-NEXT:     %[[V15:.*]] = hlfir.designate %[[V1:.*]]#0 (%[[V14]])  : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
+// CHECK-NEXT:     %[[V16:.*]] = fir.load %[[V15]] : !fir.ref<f32>
+// CHECK-NEXT:     %[[V17:.*]] = arith.cmpf oge, %[[V16]], %[[V4:.*]] : f32
+// CHECK-NEXT:     %[[V18:.*]] = fir.if %[[V17]] -> (f32) {
+// CHECK-NEXT:       fir.store %c1_i32 to %[[V0:.*]] : !fir.ref<i32>
+// CHECK-NEXT:       %[[DIMS:.*]]:3 = fir.box_dims %2#0, %c0 : (!fir.box<!fir.array<?xf32>>, index) -> (index, index, index)
+// CHECK-NEXT:       %[[SUB:.*]] = arith.subi %[[DIMS]]#0, %c1 : index
+// CHECK-NEXT:       %[[ADD:.*]] = arith.addi %[[V14]], %[[SUB]] : index
+// CHECK-NEXT:       %[[V19:.*]] = hlfir.designate %[[V1]]#0 (%[[ADD]]) : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
+// CHECK-NEXT:       %[[V20:.*]] = fir.load %[[V19]] : !fir.ref<f32>
+// CHECK-NEXT:       %[[V21:.*]] = arith.cmpf olt, %[[V20]], %arg4 fastmath<contract> : f32
+// CHECK-NEXT:       %[[V22:.*]] = fir.if %[[V21]] -> (f32) {
+// CHECK-NEXT:         %[[V23:.*]] = hlfir.designate %{{.}} (%c1) : (!fir.ref<!fir.array<1xi32>>, index) -> !fir.ref<i32>
+// CHECK-NEXT:         %[[V24:.*]] = fir.convert %[[V14]] : (index) -> i32
+// CHECK-NEXT:         fir.store %[[V24]] to %[[V23]] : !fir.ref<i32>
+// CHECK-NEXT:         fir.result %[[V20]] : f32
+// CHECK-NEXT:       } else {
+// CHECK-NEXT:         fir.result %arg4 : f32
+// CHECK-NEXT:       }
+// CHECK-NEXT:       fir.result %[[V22]] : f32
+// CHECK-NEXT:     } else {
+// CHECK-NEXT:       fir.result %arg4 : f32
+// CHECK-NEXT:     }
+// CHECK-NEXT:     fir.result %[[V18]] : f32
+// CHECK-NEXT:   }
+
+
+func.func @_QPtest_assignshape(%arg0: !fir.ref<!fir.array<3x3xf32>> {fir.bindc_name = "array"}, %arg1: !fir.ref<f32> {fir.bindc_name = "val"}, %arg2: !fir.ref<!fir.array<3xi32>> {fir.bindc_name = "m"}) {
+  %c2 = arith.constant 2 : index
+  %c1 = arith.constant 1 : index
+  %c3 = arith.constant 3 : index
+  %0 = fir.shape %c3, %c3 : (index, index) -> !fir.shape<2>
+  %1:2 = hlfir.declare %arg0(%0) {uniq_name = "_QFtestEarray"} : (!fir.ref<!fir.array<3x3xf32>>, !fir.shape<2>) -> (!fir.ref<!fir.array<3x3xf32>>, !fir.ref<!fir.array<3x3xf32>>)
+  %2 = fir.shape %c3 : (index) -> !fir.shape<1>
+  %3:2 = hlfir.declare %arg2(%2) {uniq_name = "_QFtestEm"} : (!fir.ref<!fir.array<3xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<3xi32>>, !fir.ref<!fir.array<3xi32>>)
+  %4:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+  %5 = fir.load %4#0 : !fir.ref<f32>
+  %6 = hlfir.elemental %0 unordered : (!fir.shape<2>) -> !hlfir.expr<3x3x!fir.logical<4>> {
+  ^bb0(%arg3: index, %arg4: index):
+    %10 = hlfir.designate %1#0 (%arg3, %arg4)  : (!fir.ref<!fir.array<3x3xf32>>, index, index) -> !fir.ref<f32>
+    %11 = fir.load %10 : !fir.ref<f32>
+    %12 = arith.cmpf oge, %11, %5 : f32
+    %13 = fir.convert %12 : (i1) -> !fir.logical<4>
+    hlfir.yield_element %13 : !fir.logical<4>
+  }
+  %7 = hlfir.minloc %1#0 mask %6 {fastmath = #arith.fastmath<contract>} : (!fir.ref<!fir.array<3x3xf32>>, !hlfir.expr<3x3x!fir.logical<4>>) -> !hlfir.expr<2xi32>
+  %8 = fir.shape %c2 : (index) -> !fir.shape<1>
+  %9 = hlfir.designate %3#0 (%c1:%c2:%c1)  shape %8 : (!fir.ref<!fir.array<3xi32>>, index, index, index, !fir.shape<1>) -> !fir.ref<!fir.array<2xi32>>
+  hlfir.assign %7 to %9 : !hlfir.expr<2xi32>, !fir.ref<!fir.array<2xi32>>
+  hlfir.destroy %7 : !hlfir.expr<2xi32>
+  hlfir.destroy %6 : !hlfir.expr<3x3x!fir.logical<4>>
+  return
+}
+// Not supported as the input is not a box
+// CHECK-LABEL: _QPtest_assignshape
+// CHECK: hlfir.minloc
+
+
+func.func @_QFPtest_character(%arg0: !fir.box<!fir.array<?x!fir.char<1>>> {fir.bindc_name = "b"}, %arg1: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "c"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> i32 {
+  %c0 = arith.constant 0 : index
+  %c1 = arith.constant 1 : index
+  %0:2 = hlfir.declare %arg0 typeparams %c1 {uniq_name = "_QFFtestEb"} : (!fir.box<!fir.array<?x!fir.char<1>>>, index) -> (!fir.box<!fir.array<?x!fir.char<1>>>, !fir.box<!fir.array<?x!fir.char<1>>>)
+  %1:2 = hlfir.declare %arg1 {uniq_name = "_QFFtestEc"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+  %2 = fir.alloca !fir.array<1xi32> {bindc_name = "m", uniq_name = "_QFFtestEm"}
+  %3 = fir.shape %c1 : (index) -> !fir.shape<1>
+  %4:2 = hlfir.declare %2(%3) {uniq_name = "_QFFtestEm"} : (!fir.ref<!fir.array<1xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<1xi32>>, !fir.ref<!fir.array<1xi32>>)
+  %5 = fir.alloca i32 {bindc_name = "test", uniq_name = "_QFFtestEtest"}
+  %6:2 = hlfir.declare %5 {uniq_name = "_QFFtestEtest"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %7:2 = hlfir.declare %arg2 {uniq_name = "_QFFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %8 = fir.load %7#0 : !fir.ref<i32>
+  %9:3 = fir.box_dims %1#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+  %10 = fir.shape %9#1 : (index) -> !fir.shape<1>
+  %11 = hlfir.elemental %10 unordered : (!fir.shape<1>) -> !hlfir.expr<?x!fir.logical<4>> {
+  ^bb0(%arg3: index):
+    %16 = hlfir.designate %1#0 (%arg3)  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+    %17 = fir.load %16 : !fir.ref<i32>
+    %18 = arith.cmpi eq, %17, %8 : i32
+    %19 = fir.convert %18 : (i1) -> !fir.logical<4>
+    hlfir.yield_element %19 : !fir.logical<4>
+  }
+  %12 = hlfir.minloc %0#0 mask %11 {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?x!fir.char<1>>>, !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi32>
+  hlfir.assign %12 to %4#0 : !hlfir.expr<1xi32>, !fir.ref<!fir.array<1xi32>>
+  hlfir.destroy %12 : !hlfir.expr<1xi32>
+  hlfir.destroy %11 : !hlfir.expr<?x!fir.logical<4>>
+  %13 = hlfir.designate %4#0 (%c1)  : (!fir.ref<!fir.array<1xi32>>, index) -> !fir.ref<i32>
+  %14 = fir.load %13 : !fir.ref<i32>
+  hlfir.assign %14 to %6#0 : i32, !fir.ref<i32>
+  %15 = fir.load %6#1 : !fir.ref<i32>
+  return %15 : i32
+}
+// Characters are not supported at the moment
+// CHECK-LABEL: _QFPtest_character
+// CHECK: hlfir.minloc
+
+
+func.func @_QPtest_parts(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "x"}, %arg1: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "mask"}) -> f32 {
+  %c1 = arith.constant 1 : index
+  %c5 = arith.constant 5 : index
+  %c0 = arith.constant 0 : index
+  %c5_i32 = arith.constant 5 : i32
+  %0:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEmask"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+  %1 = fir.alloca f32 {bindc_name = "test", uniq_name = "_QFtestEtest"}
+  %2:2 = hlfir.declare %1 {uniq_name = "_QFtestEtest"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+  %3:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEx"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+  %4:3 = fir.box_dims %0#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+  %5 = fir.shape %4#1 : (index) -> !fir.shape<1>
+  %6 = hlfir.elemental %5 unordered : (!fir.shape<1>) -> !hlfir.expr<?x!fir.logical<4>> {
+  ^bb0(%arg2: index):
+    %11 = hlfir.designate %0#0 (%arg2)  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+    %12 = fir.load %11 : !fir.ref<i32>
+    %13 = arith.cmpi sge, %12, %c5_i32 : i32
+    %14 = fir.convert %13 : (i1) -> !fir.logical<4>
+    hlfir.yield_element %14 : !fir.logical<4>
+  }
+  %7 = hlfir.minloc %3#0 mask %6 {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?xi32>>, !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi32>
+  %8 = fir.shape %c1 : (index) -> !fir.shape<1>
+  %9 = hlfir.designate %3#0 (%c5:%c5:%c1)  shape %8 : (!fir.box<!fir.array<?xi32>>, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<1xi32>>
+  hlfir.assign %7 to %9 : !hlfir.expr<1xi32>, !fir.box<!fir.array<1xi32>>
+  hlfir.destroy %7 : !hlfir.expr<1xi32>
+  hlfir.destroy %6 : !hlfir.expr<?x!fir.logical<4>>
+  %10 = fir.load %2#1 : !fir.ref<f32>
+  return %10 : f32
+}
+// Characters are not supported at the moment
+// CHECK-LABEL: _QPtest_parts
+// CHECK: fir.do_loop %{{.*}} = %c0 to %{{.*}} step %c1 iter_args(%{{.*}} = %c2147483647_i32) -> (i32) {
+
diff --git a/flang/test/Transforms/simplifyintrinsics.fir b/flang/test/Transforms/simplifyintrinsics.fir
index 0bd6ac7c436ff2..cd059cc797a3f4 100644
--- a/flang/test/Transforms/simplifyintrinsics.fir
+++ b/flang/test/Transforms/simplifyintrinsics.fir
@@ -1760,6 +1760,7 @@ func.func @_QPtestminloc_works1d(%arg0: !fir.ref<!fir.array<10xi32>> {fir.bindc_
 // CHECK:           %[[OUTARR:.*]] = fir.allocmem !fir.array<1xi32>
 // CHECK:           %[[OUTARR_SHAPE:.*]] = fir.shape %[[OUTARR_SIZE]] : (index) -> !fir.shape<1>
 // CHECK:           %[[BOX_OUTARR:.*]] = fir.embox %[[OUTARR]](%[[OUTARR_SHAPE]]) : (!fir.heap<!fir.array<1xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<1xi32>>>
+// CHECK:           %[[BOX_MASK:.*]] = fir.convert %[[BOX_MASK_NONE]] : (!fir.box<none>) -> !fir.box<!fir.array<?x!fir.logical<4>>>
 // CHECK:           %[[OUTARR_IDX0:.*]] = arith.constant 0 : index
 // CHECK:           %[[OUTARR_ITEM0:.*]] = fir.coordinate_of %[[BOX_OUTARR]], %[[OUTARR_IDX0]] : (!fir.box<!fir.heap<!fir.array<1xi32>>>, index) -> !fir.ref<i32>
 // CHECK:           fir.store %[[INIT_OUT_IDX]] to %[[OUTARR_ITEM0]] : !fir.ref<i32>
@@ -1768,7 +1769,6 @@ func.func @_QPtestminloc_works1d(%arg0: !fir.ref<!fir.array<10xi32>> {fir.bindc_
 // CHECK:           %[[FLAG_SET:.*]] = arith.constant 1 : i32
 // CHECK:           %[[FLAG_EMPTY:.*]] = arith.constant 0 : i32
 // CHECK:           fir.store %[[FLAG_EMPTY]] to %[[FLAG_ALLOC]] : !fir.ref<i32>
-// CHECK:           %[[BOX_MASK:.*]] = fir.convert %[[BOX_MASK_NONE]] : (!fir.box<none>) -> !fir.box<!fir.array<?x!fir.logical<4>>>
 // CHECK:           %[[MAX:.*]] = arith.constant 2147483647 : i32
 // CHECK:           %[[CINDEX_1:.*]] = arith.constant 1 : index
 // CHECK:           %[[DIM_INDEX0:.*]] = arith.constant 0 : index
@@ -1779,7 +1779,8 @@ func.func @_QPtestminloc_works1d(%arg0: !fir.ref<!fir.array<10xi32>> {fir.bindc_
 // CHECK:             %[[MASK_ITEMVAL:.*]] = fir.load %[[MASK_ITEM]] : !fir.ref<!fir.logical<4>>
 // CHECK:             %[[MASK_IF_ITEM:.*]] = fir.convert %[[MASK_ITEMVAL]] : (!fir.logical<4>) -> i1
 // CHECK:             %[[IF_MASK:.*]] = fir.if %[[MASK_IF_ITEM]] -> (i32) {
-// CHECK:               fir.store %[[FLAG_SET]] to %[[FLAG_ALLOC]] : !fir.ref<i32>
+// CHECK:               %[[FLAG_SET2:.*]] = arith.constant 1 : i32
+// CHECK:               fir.store %[[FLAG_SET2]] to %[[FLAG_ALLOC]] : !fir.ref<i32>
 // CHECK:               %[[INARR_ITEM:.*]] = fir.coordinate_of %[[BOX_INARR]], %[[ITER]] : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
 // CHECK:               %[[INARR_ITEMVAL:.*]] = fir.load %[[INARR_ITEM]] : !fir.ref<i32>
 // CHECK:               %[[NEW_MIN:.*]] = arith.cmpi slt, %[[INARR_ITEMVAL]], %[[MIN]] : i32
@@ -2423,6 +2424,7 @@ func.func @_QPtestmaxloc_works1d(%arg0: !fir.ref<!fir.array<10xi32>> {fir.bindc_
 // CHECK:           %[[OUTARR:.*]] = fir.allocmem !fir.array<1xi32>
 // CHECK:           %[[OUTARR_SHAPE:.*]] = fir.shape %[[OUTARR_SIZE]] : (index) -> !fir.shape<1>
 // CHECK:           %[[BOX_OUTARR:.*]] = fir.embox %[[OUTARR]](%[[OUTARR_SHAPE]]) : (!fir.heap<!fir.array<1xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<1xi32>>>
+// CHECK:           %[[BOX_MASK:.*]] = fir.convert %[[BOX_MASK_NONE]] : (!fir.box<none>) -> !fir.box<!fir.array<?x!fir.logical<4>>>
 // CHECK:           %[[OUTARR_IDX0:.*]] = arith.constant 0 : index
 // CHECK:           %[[OUTARR_ITEM0:.*]] = fir.coordinate_of %[[BOX_OUTARR]], %[[OUTARR_IDX0]] : (!fir.box<!fir.heap<!fir.array<1xi32>>>, index) -> !fir.ref<i32>
 // CHECK:           fir.store %[[INIT_OUT_IDX]] to %[[OUTARR_ITEM0]] : !fir.ref<i32>
@@ -2431,7 +2433,6 @@ func.func @_QPtestmaxloc_works1d(%arg0: !fir.ref<!fir.array<10xi32>> {fir.bindc_
 // CHECK:           %[[FLAG_SET:.*]] = arith.constant 1 : i32
 // CHECK:           %[[FLAG_EMPTY:.*]] = arith.constant 0 : i32
 // CHECK:           fir.store %[[FLAG_EMPTY]] to %[[FLAG_ALLOC]] : !fir.ref<i32>
-// CHECK:           %[[BOX_MASK:.*]] = fir.convert %[[BOX_MASK_NONE]] : (!fir.box<none>) -> !fir.box<!fir.array<?x!fir.logical<4>>>
 // CHECK:           %[[MAX:.*]] = arith.constant -2147483648 : i32
 // CHECK:           %[[CINDEX_1:.*]] = arith.constant 1 : index
 // CHECK:           %[[DIM_INDEX0:.*]] = arith.constant 0 : index
@@ -2442,7 +2443,8 @@ func.func @_QPtestmaxloc_works1d(%arg0: !fir.ref<!fir.array<10xi32>> {fir.bindc_
 // CHECK:             %[[MASK_ITEMVAL:.*]] = fir.load %[[MASK_ITEM]] : !fir.ref<!fir.logical<4>>
 // CHECK:             %[[MASK_IF_ITEM:.*]] = fir.convert %[[MASK_ITEMVAL]] : (!fir.logical<4>) -> i1
 // CHECK:             %[[IF_MASK:.*]] = fir.if %[[MASK_IF_ITEM]] -> (i32) {
-// CHECK:               fir.store %[[FLAG_SET]] to %[[FLAG_ALLOC]] : !fir.ref<i32>
+// CHECK:               %[[FLAG_SET2:.*]] = arith.constant 1 : i32
+// CHECK:               fir.store %[[FLAG_SET2]] to %[[FLAG_ALLOC]] : !fir.ref<i32>
 // CHECK:               %[[INARR_ITEM:.*]] = fir.coordinate_of %[[BOX_INARR]], %[[ITER]] : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
 // CHECK:               %[[INARR_ITEMVAL:.*]] = fir.load %[[INARR_ITEM]] : !fir.ref<i32>
 // CHECK:               %[[NEW_MIN:.*]] = arith.cmpi sgt, %[[INARR_ITEMVAL]], %[[MIN]] : i32



More information about the flang-commits mailing list