[flang-commits] [flang] 614cd72 - [Flang] Add Minloc to simplify intrinsics pass

Sacha Ballantyne via flang-commits flang-commits at lists.llvm.org
Mon Feb 27 03:37:01 PST 2023


Author: Sacha Ballantyne
Date: 2023-02-27T11:36:55Z
New Revision: 614cd721e1c379ab6926687fd81167bd8f9b0179

URL: https://github.com/llvm/llvm-project/commit/614cd721e1c379ab6926687fd81167bd8f9b0179
DIFF: https://github.com/llvm/llvm-project/commit/614cd721e1c379ab6926687fd81167bd8f9b0179.diff

LOG: [Flang] Add Minloc to simplify intrinsics pass

This patch adds minloc to the simplify intrinsics pass, supporting calls with KIND or MASK arguments while calls which have BACK, DIM or have a CHARACTER input array are rejected. This patch is targeting exchange2, and in benchmarks provides a ~11% improvement in performance.

Also included are some minor style changes / cleanup in simplifyIntrinsics.cpp.

Reviewed By: vzakhari

Differential Revision: https://reviews.llvm.org/D144103

Added: 
    

Modified: 
    flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp
    flang/test/Transforms/simplifyintrinsics.fir

Removed: 
    


################################################################################
diff  --git a/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp b/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp
index 8815e1cef6aef..53ab094ca02fb 100644
--- a/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp
+++ b/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp
@@ -22,8 +22,10 @@
 /// and small in size.
 //===----------------------------------------------------------------------===//
 
+#include "flang/Common/Fortran.h"
 #include "flang/Optimizer/Builder/BoxValue.h"
 #include "flang/Optimizer/Builder/FIRBuilder.h"
+#include "flang/Optimizer/Builder/LowLevelIntrinsics.h"
 #include "flang/Optimizer/Builder/Todo.h"
 #include "flang/Optimizer/Dialect/FIROps.h"
 #include "flang/Optimizer/Dialect/FIRType.h"
@@ -33,13 +35,17 @@
 #include "flang/Runtime/entry-names.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/IR/Matchers.h"
-#include "mlir/IR/TypeUtilities.h"
+#include "mlir/IR/Operation.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Transforms/DialectConversion.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
 #include "mlir/Transforms/RegionUtils.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include <llvm/CodeGen/SelectionDAGNodes.h>
+#include <llvm/Support/ErrorHandling.h>
+#include <mlir/Dialect/Arith/IR/Arith.h>
+#include <mlir/IR/BuiltinTypes.h>
 #include <mlir/IR/Location.h>
 #include <mlir/IR/MLIRContext.h>
 #include <mlir/IR/Value.h>
@@ -94,6 +100,8 @@ class SimplifyIntrinsicsPass
   void simplifyLogicalDim1Reduction(fir::CallOp call,
                                     const fir::KindMapping &kindMap,
                                     GenReductionBodyTy genBodyFunc);
+  void simplifyMinlocReduction(fir::CallOp call,
+                               const fir::KindMapping &kindMap);
   void simplifyReductionBody(fir::CallOp call, const fir::KindMapping &kindMap,
                              GenReductionBodyTy genBodyFunc,
                              fir::FirOpBuilder &builder,
@@ -141,6 +149,112 @@ static mlir::FunctionType genNoneBoxType(fir::FirOpBuilder &builder,
                                  {elementType});
 }
 
+template <typename Op>
+Op expectOp(mlir::Value val) {
+  if (Op op = mlir::dyn_cast_or_null<Op>(val.getDefiningOp()))
+    return op;
+  LLVM_DEBUG(llvm::dbgs() << "Didn't find expected " << Op::getOperationName()
+                          << '\n');
+  return nullptr;
+}
+
+template <typename Op>
+static mlir::Value findDefSingle(fir::ConvertOp op) {
+  if (auto defOp = expectOp<Op>(op->getOperand(0))) {
+    return defOp.getResult();
+  }
+  return {};
+}
+
+template <typename... Ops>
+static mlir::Value findDef(fir::ConvertOp op) {
+  mlir::Value defOp;
+  // Loop over the operation types given to see if any match, exiting once
+  // a match is found. Cast to void is needed to avoid compiler complaining
+  // that the result of expression is unused
+  (void)((defOp = findDefSingle<Ops>(op), (defOp)) || ...);
+  return defOp;
+}
+
+static bool isOperandAbsent(mlir::Value val) {
+  if (auto op = expectOp<fir::ConvertOp>(val)) {
+    assert(op->getOperands().size() != 0);
+    return mlir::isa_and_nonnull<fir::AbsentOp>(
+        op->getOperand(0).getDefiningOp());
+  }
+  return false;
+}
+
+static bool isTrueOrNotConstant(mlir::Value val) {
+  if (auto op = expectOp<mlir::arith::ConstantOp>(val)) {
+    return !mlir::matchPattern(val, mlir::m_Zero());
+  }
+  return true;
+}
+
+static bool isZero(mlir::Value val) {
+  if (auto op = expectOp<fir::ConvertOp>(val)) {
+    assert(op->getOperands().size() != 0);
+    if (mlir::Operation *defOp = op->getOperand(0).getDefiningOp())
+      return mlir::matchPattern(defOp, mlir::m_Zero());
+  }
+  return false;
+}
+
+static mlir::Value findBoxDef(mlir::Value val) {
+  if (auto op = expectOp<fir::ConvertOp>(val)) {
+    assert(op->getOperands().size() != 0);
+    return findDef<fir::EmboxOp, fir::ReboxOp>(op);
+  }
+  return {};
+}
+
+static mlir::Value findMaskDef(mlir::Value val) {
+  if (auto op = expectOp<fir::ConvertOp>(val)) {
+    assert(op->getOperands().size() != 0);
+    return findDef<fir::EmboxOp, fir::ReboxOp, fir::AbsentOp>(op);
+  }
+  return {};
+}
+
+static unsigned getDimCount(mlir::Value val) {
+  // In order to find the dimensions count, we look for EmboxOp/ReboxOp
+  // and take the count from its *result* type. Note that in case
+  // of sliced emboxing the operand and the result of EmboxOp/ReboxOp
+  // have 
diff erent types.
+  // Actually, we can take the box type from the operand of
+  // the first ConvertOp that has non-opaque box type that we meet
+  // going through the ConvertOp chain.
+  if (mlir::Value emboxVal = findBoxDef(val))
+    if (auto boxTy = emboxVal.getType().dyn_cast<fir::BoxType>())
+      if (auto seqTy = boxTy.getEleTy().dyn_cast<fir::SequenceType>())
+        return seqTy.getDimension();
+  return 0;
+}
+
+/// Given the call operation's box argument \p val, discover
+/// the element type of the underlying array object.
+/// \returns the element type or std::nullopt if the type cannot
+/// be reliably found.
+/// We expect that the argument is a result of fir.convert
+/// with the destination type of !fir.box<none>.
+static std::optional<mlir::Type> getArgElementType(mlir::Value val) {
+  mlir::Operation *defOp;
+  do {
+    defOp = val.getDefiningOp();
+    // Analyze only sequences of convert operations.
+    if (!mlir::isa<fir::ConvertOp>(defOp))
+      return std::nullopt;
+    val = defOp->getOperand(0);
+    // The convert operation is expected to convert from one
+    // box type to another box type.
+    auto boxType = val.getType().cast<fir::BoxType>();
+    auto elementType = fir::unwrapSeqOrBoxedSeqType(boxType);
+    if (!elementType.isa<mlir::NoneType>())
+      return elementType;
+  } while (true);
+}
+
 using BodyOpGeneratorTy = llvm::function_ref<mlir::Value(
     fir::FirOpBuilder &, mlir::Location, const mlir::Type &, mlir::Value,
     mlir::Value)>;
@@ -186,7 +300,7 @@ genReductionLoop(fir::FirOpBuilder &builder, mlir::func::FuncOp &funcOp,
   mlir::Type resultType = funcOp.getResultTypes()[0];
   mlir::Value init = initVal(builder, loc, resultType);
 
-  llvm::SmallVector<mlir::Value, 15> bounds;
+  llvm::SmallVector<mlir::Value, Fortran::common::maxRank> bounds;
 
   assert(rank > 0 && "rank cannot be zero");
   mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1);
@@ -210,7 +324,7 @@ genReductionLoop(fir::FirOpBuilder &builder, mlir::func::FuncOp &funcOp,
   // array's element.
   // The loops are generated such that the innermost loop processes
   // the 0 dimension.
-  llvm::SmallVector<mlir::Value, 15> indices;
+  llvm::SmallVector<mlir::Value, Fortran::common::maxRank> indices;
   for (unsigned i = rank; 0 < i; --i) {
     mlir::Value step = one;
     mlir::Value loopCount = bounds[i - 1];
@@ -254,9 +368,201 @@ genReductionLoop(fir::FirOpBuilder &builder, mlir::func::FuncOp &funcOp,
   // Return the reduction value from the function.
   builder.create<mlir::func::ReturnOp>(loc, results[resultIndex]);
 }
+using MinlocBodyOpGeneratorTy = llvm::function_ref<mlir::Value(
+    fir::FirOpBuilder &, mlir::Location, const mlir::Type &, mlir::Value,
+    mlir::Value, llvm::SmallVector<mlir::Value, Fortran::common::maxRank> &)>;
+
+static void
+genMinlocReductionLoop(fir::FirOpBuilder &builder, mlir::func::FuncOp &funcOp,
+                       InitValGeneratorTy initVal,
+                       MinlocBodyOpGeneratorTy genBody, unsigned rank,
+                       mlir::Type elementType, mlir::Location loc, bool hasMask,
+                       mlir::Type maskElemType, mlir::Value resultArr) {
+
+  mlir::IndexType idxTy = builder.getIndexType();
+
+  mlir::Block::BlockArgListType args = funcOp.front().getArguments();
+  mlir::Value arg = args[1];
+
+  mlir::Value zeroIdx = builder.createIntegerConstant(loc, idxTy, 0);
+
+  fir::SequenceType::Shape flatShape(rank,
+                                     fir::SequenceType::getUnknownExtent());
+  mlir::Type arrTy = fir::SequenceType::get(flatShape, elementType);
+  mlir::Type boxArrTy = fir::BoxType::get(arrTy);
+  mlir::Value array = builder.create<fir::ConvertOp>(loc, boxArrTy, arg);
+
+  mlir::Type resultElemType = hlfir::getFortranElementType(resultArr.getType());
+  mlir::Value flagSet = builder.createIntegerConstant(loc, resultElemType, 1);
+  mlir::Value zero = builder.createIntegerConstant(loc, resultElemType, 0);
+  mlir::Value flagRef = builder.createTemporary(loc, resultElemType);
+  builder.create<fir::StoreOp>(loc, zero, flagRef);
+
+  mlir::Value mask;
+  if (hasMask) {
+    mlir::Type maskTy = fir::SequenceType::get(flatShape, maskElemType);
+    mlir::Type boxMaskTy = fir::BoxType::get(maskTy);
+    mask = builder.create<fir::ConvertOp>(loc, boxMaskTy, args[2]);
+  }
+
+  mlir::Value init = initVal(builder, loc, elementType);
+  llvm::SmallVector<mlir::Value, Fortran::common::maxRank> bounds;
+
+  assert(rank > 0 && "rank cannot be zero");
+  mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1);
+
+  // Compute all the upper bounds before the loop nest.
+  // It is not strictly necessary for performance, since the loop nest
+  // does not have any store operations and any LICM optimization
+  // should be able to optimize the redundancy.
+  for (unsigned i = 0; i < rank; ++i) {
+    mlir::Value dimIdx = builder.createIntegerConstant(loc, idxTy, i);
+    auto dims =
+        builder.create<fir::BoxDimsOp>(loc, idxTy, idxTy, idxTy, array, dimIdx);
+    mlir::Value len = dims.getResult(1);
+    // We use C indexing here, so len-1 as loopcount
+    mlir::Value loopCount = builder.create<mlir::arith::SubIOp>(loc, len, one);
+    bounds.push_back(loopCount);
+  }
+  // Create a loop nest consisting of OP operations.
+  // Collect the loops' induction variables into indices array,
+  // which will be used in the innermost loop to load the input
+  // array's element.
+  // The loops are generated such that the innermost loop processes
+  // the 0 dimension.
+  llvm::SmallVector<mlir::Value, Fortran::common::maxRank> indices;
+  for (unsigned i = rank; 0 < i; --i) {
+    mlir::Value step = one;
+    mlir::Value loopCount = bounds[i - 1];
+    auto loop =
+        builder.create<fir::DoLoopOp>(loc, zeroIdx, loopCount, step, false,
+                                      /*finalCountValue=*/false, init);
+    init = loop.getRegionIterArgs()[0];
+    indices.push_back(loop.getInductionVar());
+    // Set insertion point to the loop body so that the next loop
+    // is inserted inside the current one.
+    builder.setInsertionPointToStart(loop.getBody());
+  }
+
+  // Reverse the indices such that they are ordered as:
+  //   <dim-0-idx, dim-1-idx, ...>
+  std::reverse(indices.begin(), indices.end());
+  // We are in the innermost loop: generate the reduction body.
+  if (hasMask) {
+    mlir::Type logicalRef = builder.getRefType(maskElemType);
+    mlir::Value maskAddr =
+        builder.create<fir::CoordinateOp>(loc, logicalRef, mask, indices);
+    mlir::Value maskElem = builder.create<fir::LoadOp>(loc, maskAddr);
+
+    // fir::IfOp requires argument to be I1 - won't accept logical or any other
+    // Integer.
+    mlir::Type ifCompatType = builder.getI1Type();
+    mlir::Value ifCompatElem =
+        builder.create<fir::ConvertOp>(loc, ifCompatType, maskElem);
+
+    llvm::SmallVector<mlir::Type> resultsTy = {elementType, elementType};
+    fir::IfOp ifOp = builder.create<fir::IfOp>(loc, elementType, ifCompatElem,
+                                               /*withElseRegion=*/true);
+    builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
+  }
+
+  // Set flag that mask was true at some point
+  builder.create<fir::StoreOp>(loc, flagSet, flagRef);
+  mlir::Type eleRefTy = builder.getRefType(elementType);
+  mlir::Value addr =
+      builder.create<fir::CoordinateOp>(loc, eleRefTy, array, indices);
+  mlir::Value elem = builder.create<fir::LoadOp>(loc, addr);
+
+  mlir::Value reductionVal =
+      genBody(builder, loc, elementType, elem, init, indices);
+
+  if (hasMask) {
+    fir::IfOp ifOp =
+        mlir::dyn_cast<fir::IfOp>(builder.getBlock()->getParentOp());
+    builder.create<fir::ResultOp>(loc, reductionVal);
+    builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
+    builder.create<fir::ResultOp>(loc, init);
+    reductionVal = ifOp.getResult(0);
+    builder.setInsertionPointAfter(ifOp);
+  }
+
+  // Unwind the loop nest and insert ResultOp on each level
+  // to return the updated value of the reduction to the enclosing
+  // loops.
+  for (unsigned i = 0; i < rank; ++i) {
+    auto result = builder.create<fir::ResultOp>(loc, reductionVal);
+    // Proceed to the outer loop.
+    auto loop = mlir::cast<fir::DoLoopOp>(result->getParentOp());
+    reductionVal = loop.getResult(0);
+    // Set insertion point after the loop operation that we have
+    // just processed.
+    builder.setInsertionPointAfter(loop.getOperation());
+  }
+  // End of loop nest. The insertion point is after the outermost loop.
+  if (fir::IfOp ifOp =
+          mlir::dyn_cast<fir::IfOp>(builder.getBlock()->getParentOp())) {
+    builder.create<fir::ResultOp>(loc, reductionVal);
+    builder.setInsertionPointAfter(ifOp);
+    // Redefine flagSet to escape scope of ifOp
+    flagSet = builder.createIntegerConstant(loc, resultElemType, 1);
+    reductionVal = ifOp.getResult(0);
+  }
+
+  // Check for case where array was full of max values.
+  // flag will be 0 if mask was never true, 1 if mask was true as some point,
+  // this is needed to avoid catching cases where we didn't access any elements
+  // e.g. mask=.FALSE.
+  mlir::Value flagValue =
+      builder.create<fir::LoadOp>(loc, resultElemType, flagRef);
+  mlir::Value flagCmp = builder.create<mlir::arith::CmpIOp>(
+      loc, mlir::arith::CmpIPredicate::eq, flagValue, flagSet);
+  fir::IfOp ifMaskTrueOp =
+      builder.create<fir::IfOp>(loc, flagCmp, /*withElseRegion=*/false);
+  builder.setInsertionPointToStart(&ifMaskTrueOp.getThenRegion().front());
+
+  mlir::Value testInit = initVal(builder, loc, elementType);
+  fir::IfOp ifMinSetOp;
+  if (elementType.isa<mlir::FloatType>()) {
+    mlir::Value cmp = builder.create<mlir::arith::CmpFOp>(
+        loc, mlir::arith::CmpFPredicate::OEQ, testInit, reductionVal);
+    ifMinSetOp = builder.create<fir::IfOp>(loc, cmp,
+                                           /*withElseRegion*/ false);
+  } else {
+    mlir::Value cmp = builder.create<mlir::arith::CmpIOp>(
+        loc, mlir::arith::CmpIPredicate::eq, testInit, reductionVal);
+    ifMinSetOp = builder.create<fir::IfOp>(loc, cmp,
+                                           /*withElseRegion*/ false);
+  }
+  builder.setInsertionPointToStart(&ifMinSetOp.getThenRegion().front());
+
+  // Load output array with 1s instead of 0s
+  for (unsigned int i = 0; i < rank; ++i) {
+    mlir::Type resultRefTy = builder.getRefType(resultElemType);
+    // mlir::Value one = builder.createIntegerConstant(loc, resultElemType, 1);
+    mlir::Value index = builder.createIntegerConstant(loc, idxTy, i);
+    mlir::Value resultElemAddr =
+        builder.create<fir::CoordinateOp>(loc, resultRefTy, resultArr, index);
+    builder.create<fir::StoreOp>(loc, flagSet, resultElemAddr);
+  }
+  builder.setInsertionPointAfter(ifMaskTrueOp);
+  // Store newly created output array to the reference passed in
+  fir::SequenceType::Shape resultShape(1, rank);
+  mlir::Type outputArrTy = fir::SequenceType::get(resultShape, resultElemType);
+  mlir::Type outputHeapTy = fir::HeapType::get(outputArrTy);
+  mlir::Type outputBoxTy = fir::BoxType::get(outputHeapTy);
+  mlir::Type outputRefTy = builder.getRefType(outputBoxTy);
+
+  mlir::Value outputArrNone = args[0];
+  mlir::Value outputArr =
+      builder.create<fir::ConvertOp>(loc, outputRefTy, outputArrNone);
+
+  // Store nearly created array to output array
+  builder.create<fir::StoreOp>(loc, resultArr, outputArr);
+  builder.create<mlir::func::ReturnOp>(loc);
+}
 
 static llvm::SmallVector<mlir::Value> nopLoopCond(fir::FirOpBuilder &builder,
-                                                  mlir::Location,
+                                                  mlir::Location loc,
                                                   mlir::Value reductionVal) {
   return {reductionVal};
 }
@@ -441,6 +747,144 @@ static void genRuntimeAllBody(fir::FirOpBuilder &builder,
       loc);
 }
 
+static mlir::FunctionType genRuntimeMinlocType(fir::FirOpBuilder &builder,
+                                               unsigned int rank) {
+  mlir::Type boxType = fir::BoxType::get(builder.getNoneType());
+  mlir::Type boxRefType = builder.getRefType(boxType);
+
+  return mlir::FunctionType::get(builder.getContext(),
+                                 {boxRefType, boxType, boxType}, {});
+}
+
+static void genRuntimeMinlocBody(fir::FirOpBuilder &builder,
+                                 mlir::func::FuncOp &funcOp, unsigned rank,
+                                 int maskRank, mlir::Type elementType,
+                                 mlir::Type maskElemType,
+                                 mlir::Type resultElemTy) {
+  auto init = [](fir::FirOpBuilder builder, mlir::Location loc,
+                 mlir::Type elementType) {
+    if (auto ty = elementType.dyn_cast<mlir::FloatType>()) {
+      const llvm::fltSemantics &sem = ty.getFloatSemantics();
+      return builder.createRealConstant(
+          loc, elementType, llvm::APFloat::getLargest(sem, /*Negative=*/false));
+    }
+    unsigned bits = elementType.getIntOrFloatBitWidth();
+    int64_t maxInt = llvm::APInt::getSignedMaxValue(bits).getSExtValue();
+    return builder.createIntegerConstant(loc, elementType, maxInt);
+  };
+
+  mlir::Location loc = mlir::UnknownLoc::get(builder.getContext());
+  builder.setInsertionPointToEnd(funcOp.addEntryBlock());
+
+  mlir::Value mask = funcOp.front().getArgument(2);
+
+  // Set up result array in case of early exit / 0 length array
+  mlir::IndexType idxTy = builder.getIndexType();
+  mlir::Type resultTy = fir::SequenceType::get(rank, resultElemTy);
+  mlir::Type resultHeapTy = fir::HeapType::get(resultTy);
+  mlir::Type resultBoxTy = fir::BoxType::get(resultHeapTy);
+
+  mlir::Value returnValue = builder.createIntegerConstant(loc, resultElemTy, 0);
+  mlir::Value resultArrSize = builder.createIntegerConstant(loc, idxTy, rank);
+
+  mlir::Value resultArrInit = builder.create<fir::AllocMemOp>(loc, resultTy);
+  mlir::Value resultArrShape = builder.create<fir::ShapeOp>(loc, resultArrSize);
+  mlir::Value resultArr = builder.create<fir::EmboxOp>(
+      loc, resultBoxTy, resultArrInit, resultArrShape);
+
+  mlir::Type resultRefTy = builder.getRefType(resultElemTy);
+
+  for (unsigned int i = 0; i < rank; ++i) {
+    mlir::Value index = builder.createIntegerConstant(loc, idxTy, i);
+    mlir::Value resultElemAddr =
+        builder.create<fir::CoordinateOp>(loc, resultRefTy, resultArr, index);
+    builder.create<fir::StoreOp>(loc, returnValue, resultElemAddr);
+  }
+
+  auto genBodyOp =
+      [&rank, &resultArr](
+          fir::FirOpBuilder builder, mlir::Location loc, mlir::Type elementType,
+          mlir::Value elem1, mlir::Value elem2,
+          llvm::SmallVector<mlir::Value, Fortran::common::maxRank> indices)
+      -> mlir::Value {
+    mlir::Value cmp;
+    if (elementType.isa<mlir::FloatType>()) {
+      cmp = builder.create<mlir::arith::CmpFOp>(
+          loc, mlir::arith::CmpFPredicate::OLT, elem1, elem2);
+    } else if (elementType.isa<mlir::IntegerType>()) {
+      cmp = builder.create<mlir::arith::CmpIOp>(
+          loc, mlir::arith::CmpIPredicate::slt, elem1, elem2);
+    } else {
+      llvm_unreachable("unsupported type");
+    }
+
+    fir::IfOp ifOp = builder.create<fir::IfOp>(loc, elementType, cmp,
+                                               /*withElseRegion*/ true);
+
+    builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
+    mlir::Type resultElemTy = hlfir::getFortranElementType(resultArr.getType());
+    mlir::Type returnRefTy = builder.getRefType(resultElemTy);
+    mlir::IndexType idxTy = builder.getIndexType();
+
+    mlir::Value one = builder.createIntegerConstant(loc, resultElemTy, 1);
+
+    for (unsigned int i = 0; i < rank; ++i) {
+      mlir::Value index = builder.createIntegerConstant(loc, idxTy, i);
+      mlir::Value resultElemAddr =
+          builder.create<fir::CoordinateOp>(loc, returnRefTy, resultArr, index);
+      mlir::Value convert =
+          builder.create<fir::ConvertOp>(loc, resultElemTy, indices[i]);
+      mlir::Value fortranIndex =
+          builder.create<mlir::arith::AddIOp>(loc, convert, one);
+      builder.create<fir::StoreOp>(loc, fortranIndex, resultElemAddr);
+    }
+    builder.create<fir::ResultOp>(loc, elem1);
+    builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
+    builder.create<fir::ResultOp>(loc, elem2);
+    builder.setInsertionPointAfter(ifOp);
+    return ifOp.getResult(0);
+  };
+
+  // if mask is a logical scalar, we can check its value before the main loop
+  // and either ignore the fact it is there or exit early.
+  if (maskRank == 0) {
+    mlir::Type logical = builder.getI1Type();
+    mlir::IndexType idxTy = builder.getIndexType();
+
+    fir::SequenceType::Shape singleElement(1, 1);
+    mlir::Type arrTy = fir::SequenceType::get(singleElement, logical);
+    mlir::Type boxArrTy = fir::BoxType::get(arrTy);
+    mlir::Value array = builder.create<fir::ConvertOp>(loc, boxArrTy, mask);
+
+    mlir::Value indx = builder.createIntegerConstant(loc, idxTy, 0);
+    mlir::Type logicalRefTy = builder.getRefType(logical);
+    mlir::Value condAddr =
+        builder.create<fir::CoordinateOp>(loc, logicalRefTy, array, indx);
+    mlir::Value cond = builder.create<fir::LoadOp>(loc, condAddr);
+
+    fir::IfOp ifOp = builder.create<fir::IfOp>(loc, elementType, cond,
+                                               /*withElseRegion=*/true);
+
+    builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
+    mlir::Value basicValue;
+    if (elementType.isa<mlir::IntegerType>()) {
+      basicValue = builder.createIntegerConstant(loc, elementType, 0);
+    } else {
+      basicValue = builder.createRealConstant(loc, elementType, 0);
+    }
+    builder.create<fir::ResultOp>(loc, basicValue);
+
+    builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
+  }
+
+  // bit of a hack - maskRank is set to -1 for absent mask arg, so don't
+  // generate high level mask or element by element mask.
+  bool hasMask = maskRank > 0;
+
+  genMinlocReductionLoop(builder, funcOp, init, genBodyOp, rank, elementType,
+                         loc, hasMask, maskElemType, resultArr);
+}
+
 /// Generate function type for the simplified version of RTNAME(DotProduct)
 /// operating on the given \p elementType.
 static mlir::FunctionType genRuntimeDotType(fir::FirOpBuilder &builder,
@@ -594,83 +1038,6 @@ mlir::func::FuncOp SimplifyIntrinsicsPass::getOrCreateFunction(
   return newFunc;
 }
 
-fir::ConvertOp expectConvertOp(mlir::Value val) {
-  if (fir::ConvertOp op =
-          mlir::dyn_cast_or_null<fir::ConvertOp>(val.getDefiningOp()))
-    return op;
-  LLVM_DEBUG(llvm::dbgs() << "Didn't find expected fir::ConvertOp\n");
-  return nullptr;
-}
-
-static bool isOperandAbsent(mlir::Value val) {
-  if (auto op = expectConvertOp(val)) {
-    assert(op->getOperands().size() != 0);
-    return mlir::isa_and_nonnull<fir::AbsentOp>(
-        op->getOperand(0).getDefiningOp());
-  }
-  return false;
-}
-
-static bool isZero(mlir::Value val) {
-  if (auto op = expectConvertOp(val)) {
-    assert(op->getOperands().size() != 0);
-    if (mlir::Operation *defOp = op->getOperand(0).getDefiningOp())
-      return mlir::matchPattern(defOp, mlir::m_Zero());
-  }
-  return false;
-}
-
-static mlir::Value findBoxDef(mlir::Value val) {
-  if (auto op = expectConvertOp(val)) {
-    assert(op->getOperands().size() != 0);
-    if (auto box = mlir::dyn_cast_or_null<fir::EmboxOp>(
-            op->getOperand(0).getDefiningOp()))
-      return box.getResult();
-    if (auto box = mlir::dyn_cast_or_null<fir::ReboxOp>(
-            op->getOperand(0).getDefiningOp()))
-      return box.getResult();
-  }
-  return {};
-}
-
-static unsigned getDimCount(mlir::Value val) {
-  // In order to find the dimensions count, we look for EmboxOp/ReboxOp
-  // and take the count from its *result* type. Note that in case
-  // of sliced emboxing the operand and the result of EmboxOp/ReboxOp
-  // have 
diff erent types.
-  // Actually, we can take the box type from the operand of
-  // the first ConvertOp that has non-opaque box type that we meet
-  // going through the ConvertOp chain.
-  if (mlir::Value emboxVal = findBoxDef(val))
-    if (auto boxTy = emboxVal.getType().dyn_cast<fir::BoxType>())
-      if (auto seqTy = boxTy.getEleTy().dyn_cast<fir::SequenceType>())
-        return seqTy.getDimension();
-  return 0;
-}
-
-/// Given the call operation's box argument \p val, discover
-/// the element type of the underlying array object.
-/// \returns the element type or std::nullopt if the type cannot
-/// be reliably found.
-/// We expect that the argument is a result of fir.convert
-/// with the destination type of !fir.box<none>.
-static std::optional<mlir::Type> getArgElementType(mlir::Value val) {
-  mlir::Operation *defOp;
-  do {
-    defOp = val.getDefiningOp();
-    // Analyze only sequences of convert operations.
-    if (!mlir::isa<fir::ConvertOp>(defOp))
-      return std::nullopt;
-    val = defOp->getOperand(0);
-    // The convert operation is expected to convert from one
-    // box type to another box type.
-    auto boxType = val.getType().cast<fir::BoxType>();
-    auto elementType = fir::unwrapSeqOrBoxedSeqType(boxType);
-    if (!elementType.isa<mlir::NoneType>())
-      return elementType;
-  } while (true);
-}
-
 void SimplifyIntrinsicsPass::simplifyIntOrFloatReduction(
     fir::CallOp call, const fir::KindMapping &kindMap,
     GenReductionBodyTy genBodyFunc) {
@@ -741,8 +1108,7 @@ void SimplifyIntrinsicsPass::simplifyLogicalDim0Reduction(
   // Treating logicals as integers makes things a lot easier
   fir::LogicalType logicalType = {elementType.dyn_cast<fir::LogicalType>()};
   fir::KindTy kind = logicalType.getFKind();
-  mlir::Type intElementType =
-      mlir::IntegerType::get(builder.getContext(), kind * 8);
+  mlir::Type intElementType = builder.getIntegerType(kind * 8);
 
   // Mangle kind into function name as it is not done by default
   std::string funcName =
@@ -777,8 +1143,7 @@ void SimplifyIntrinsicsPass::simplifyLogicalDim1Reduction(
   // Treating logicals as integers makes things a lot easier
   fir::LogicalType logicalType = {elementType.dyn_cast<fir::LogicalType>()};
   fir::KindTy kind = logicalType.getFKind();
-  mlir::Type intElementType =
-      mlir::IntegerType::get(builder.getContext(), kind * 8);
+  mlir::Type intElementType = builder.getIntegerType(kind * 8);
 
   // Mangle kind into function name as it is not done by default
   std::string funcName =
@@ -790,6 +1155,87 @@ void SimplifyIntrinsicsPass::simplifyLogicalDim1Reduction(
                         intElementType);
 }
 
+void SimplifyIntrinsicsPass::simplifyMinlocReduction(
+    fir::CallOp call, const fir::KindMapping &kindMap) {
+
+  mlir::Operation::operand_range args = call.getArgs();
+
+  mlir::Value back = args[6];
+  if (isTrueOrNotConstant(back))
+    return;
+
+  mlir::Value mask = args[5];
+  mlir::Value maskDef = findMaskDef(mask);
+
+  // maskDef is set to NULL when the defining op is not one we accept.
+  // This tends to be because it is a selectOp, in which case let the
+  // runtime deal with it.
+  if (maskDef == NULL)
+    return;
+
+  mlir::SymbolRefAttr callee = call.getCalleeAttr();
+  mlir::StringRef funcNameBase = callee.getLeafReference().getValue();
+  unsigned rank = getDimCount(args[1]);
+  if (funcNameBase.ends_with("Dim") || !(rank > 0))
+    return;
+
+  fir::FirOpBuilder builder{getSimplificationBuilder(call, kindMap)};
+  mlir::Location loc = call.getLoc();
+  auto inputBox = findBoxDef(args[1]);
+  mlir::Type inputType = hlfir::getFortranElementType(inputBox.getType());
+
+  if (inputType.isa<fir::CharacterType>())
+    return;
+
+  int maskRank;
+  fir::KindTy kind = 0;
+  mlir::Type logicalConvertType = builder.getI1Type();
+  if (isOperandAbsent(mask)) {
+    maskRank = -1;
+  } else {
+    maskRank = getDimCount(mask);
+    mlir::Type maskElemTy = hlfir::getFortranElementType(maskDef.getType());
+    fir::LogicalType maskLogiTy = {maskElemTy.dyn_cast<fir::LogicalType>()};
+    kind = maskLogiTy.getFKind();
+    logicalConvertType = builder.getIntegerType(kind * 8);
+  }
+
+  mlir::Operation *outputDef = args[0].getDefiningOp();
+  mlir::Value outputAlloc = outputDef->getOperand(0);
+  mlir::Type outType = hlfir::getFortranElementType(outputAlloc.getType());
+
+  std::string fmfString{getFastMathFlagsString(builder)};
+  std::string funcName =
+      (mlir::Twine{callee.getLeafReference().getValue(), "x"} +
+       mlir::Twine{rank} +
+       (maskRank >= 0
+            ? "_Logical" + mlir::Twine{kind} + "x" + mlir::Twine{maskRank}
+            : "") +
+       "_")
+          .str();
+
+  llvm::raw_string_ostream nameOS(funcName);
+  outType.print(nameOS);
+  nameOS << '_' << fmfString;
+
+  auto typeGenerator = [rank](fir::FirOpBuilder &builder) {
+    return genRuntimeMinlocType(builder, rank);
+  };
+  auto bodyGenerator = [rank, maskRank, inputType, logicalConvertType,
+                        outType](fir::FirOpBuilder &builder,
+                                 mlir::func::FuncOp &funcOp) {
+    genRuntimeMinlocBody(builder, funcOp, rank, maskRank, inputType,
+                         logicalConvertType, outType);
+  };
+
+  mlir::func::FuncOp newFunc =
+      getOrCreateFunction(builder, funcName, typeGenerator, bodyGenerator);
+  builder.create<fir::CallOp>(loc, newFunc,
+                              mlir::ValueRange{args[0], args[1], args[5]});
+  call->dropAllReferences();
+  call->erase();
+}
+
 void SimplifyIntrinsicsPass::simplifyReductionBody(
     fir::CallOp call, const fir::KindMapping &kindMap,
     GenReductionBodyTy genBodyFunc, fir::FirOpBuilder &builder,
@@ -924,6 +1370,10 @@ void SimplifyIntrinsicsPass::runOnOperation() {
           simplifyLogicalDim1Reduction(call, kindMap, genRuntimeAllBody);
           return;
         }
+        if (funcName.startswith(RTNAME_STRING(Minloc))) {
+          simplifyMinlocReduction(call, kindMap);
+          return;
+        }
       }
     }
   });

diff  --git a/flang/test/Transforms/simplifyintrinsics.fir b/flang/test/Transforms/simplifyintrinsics.fir
index 22f55f3490b3c..806eeb2bd06ae 100644
--- a/flang/test/Transforms/simplifyintrinsics.fir
+++ b/flang/test/Transforms/simplifyintrinsics.fir
@@ -1636,3 +1636,666 @@ func.func private @_FortranAAllDim(!fir.ref<!fir.box<none>>, !fir.box<none>, i32
 // CHECK-NOT        fir.call @_FortranAAllDimLogical4x1_simplified({{.*}})
 // CHECK:           fir.call @_FortranAAllDim({{.*}}) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, i32, !fir.ref<i8>, i32) -> none
 // CHECK-NOT        fir.call @_FortranAAllDimLogical4x1_simplified({{.*}})
+
+// -----
+// Check Minloc simplifies correctly for 1D case with 1D mask, I32 input
+func.func @_QPtestminloc_works1d(%arg0: !fir.ref<!fir.array<10xi32>> {fir.bindc_name = "a"}, %arg1: !fir.ref<!fir.array<10x!fir.logical<4>>> {fir.bindc_name = "b"}) -> !fir.array<1xi32> {
+  %0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xi32>>>
+  %c10 = arith.constant 10 : index
+  %c10_0 = arith.constant 10 : index
+  %c1 = arith.constant 1 : index
+  %1 = fir.alloca !fir.array<1xi32> {bindc_name = "testminloc_works1d", uniq_name = "_QFtestminloc_works1dEtestminloc_works1d"}
+  %2 = fir.shape %c1 : (index) -> !fir.shape<1>
+  %3 = fir.array_load %1(%2) : (!fir.ref<!fir.array<1xi32>>, !fir.shape<1>) -> !fir.array<1xi32>
+  %4 = fir.shape %c10 : (index) -> !fir.shape<1>
+  %5 = fir.embox %arg0(%4) : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<10xi32>>
+  %6 = fir.shape %c10_0 : (index) -> !fir.shape<1>
+  %7 = fir.embox %arg1(%6) : (!fir.ref<!fir.array<10x!fir.logical<4>>>, !fir.shape<1>) -> !fir.box<!fir.array<10x!fir.logical<4>>>
+  %c4 = arith.constant 4 : index
+  %false = arith.constant false
+  %8 = fir.zero_bits !fir.heap<!fir.array<?xi32>>
+  %c0 = arith.constant 0 : index
+  %9 = fir.shape %c0 : (index) -> !fir.shape<1>
+  %10 = fir.embox %8(%9) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
+  fir.store %10 to %0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+  %11 = fir.address_of(@_QQcl.ea5bcf7f706678e1796661f8916f3379) : !fir.ref<!fir.char<1,55>>
+  %c5_i32 = arith.constant 5 : i32
+  %12 = fir.convert %0 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<none>>
+  %13 = fir.convert %5 : (!fir.box<!fir.array<10xi32>>) -> !fir.box<none>
+  %14 = fir.convert %c4 : (index) -> i32
+  %15 = fir.convert %11 : (!fir.ref<!fir.char<1,55>>) -> !fir.ref<i8>
+  %16 = fir.convert %7 : (!fir.box<!fir.array<10x!fir.logical<4>>>) -> !fir.box<none>
+  %17 = fir.call @_FortranAMinlocInteger4(%12, %13, %14, %15, %c5_i32, %16, %false) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, i32, !fir.ref<i8>, i32, !fir.box<none>, i1) -> none
+  %18 = fir.load %0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+  %c0_1 = arith.constant 0 : index
+  %19:3 = fir.box_dims %18, %c0_1 : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+  %20 = fir.box_addr %18 : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
+  %21 = fir.shape_shift %19#0, %19#1 : (index, index) -> !fir.shapeshift<1>
+  %22 = fir.array_load %20(%21) : (!fir.heap<!fir.array<?xi32>>, !fir.shapeshift<1>) -> !fir.array<?xi32>
+  %c1_2 = arith.constant 1 : index
+  %c0_3 = arith.constant 0 : index
+  %23 = arith.subi %c1, %c1_2 : index
+  %24 = fir.do_loop %arg2 = %c0_3 to %23 step %c1_2 unordered iter_args(%arg3 = %3) -> (!fir.array<1xi32>) {
+    %26 = fir.array_fetch %22, %arg2 : (!fir.array<?xi32>, index) -> i32
+    %27 = fir.array_update %arg3, %26, %arg2 : (!fir.array<1xi32>, i32, index) -> !fir.array<1xi32>
+    fir.result %27 : !fir.array<1xi32>
+  }
+  fir.array_merge_store %3, %24 to %1 : !fir.array<1xi32>, !fir.array<1xi32>, !fir.ref<!fir.array<1xi32>>
+  fir.freemem %20 : !fir.heap<!fir.array<?xi32>>
+  %25 = fir.load %1 : !fir.ref<!fir.array<1xi32>>
+  return %25 : !fir.array<1xi32>
+}
+
+// CHECK-LABEL:   func.func @_QPtestminloc_works1d(
+// CHECK-SAME:                                     %[[INARR:.*]]: !fir.ref<!fir.array<10xi32>> {fir.bindc_name = "a"},
+// CHECK-SAME:                                     %[[MASK:.*]]: !fir.ref<!fir.array<10x!fir.logical<4>>> {fir.bindc_name = "b"}) -> !fir.array<1xi32> {
+// CHECK:           %[[OUTARR:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xi32>>>
+// CHECK:           %[[SIZE10_0:.*]] = arith.constant 10 : index
+// CHECK:           %[[SIZE10_1:.*]] = arith.constant 10 : index
+// CHECK:           %[[INARR_SHAPE:.*]] = fir.shape %[[SIZE10_0]] : (index) -> !fir.shape<1>
+// CHECK:           %[[BOX_INARR:.*]] = fir.embox %[[INARR]](%[[INARR_SHAPE]]) : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<10xi32>>
+// CHECK:           %[[MASK_SHAPE:.*]] = fir.shape %[[SIZE10_1]] : (index) -> !fir.shape<1>
+// CHECK:           %[[BOX_MASK:.*]] = fir.embox %[[MASK]](%[[MASK_SHAPE]]) : (!fir.ref<!fir.array<10x!fir.logical<4>>>, !fir.shape<1>) -> !fir.box<!fir.array<10x!fir.logical<4>>>
+// CHECK:           %[[REF_BOX_OUTARR_NONE:.*]] = fir.convert %[[OUTARR]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<none>>
+// CHECK:           %[[BOX_INARR_NONE:.*]] = fir.convert %[[BOX_INARR]] : (!fir.box<!fir.array<10xi32>>) -> !fir.box<none>
+// CHECK:           %[[BOX_MASK_NONE:.*]] = fir.convert %[[BOX_MASK]] : (!fir.box<!fir.array<10x!fir.logical<4>>>) -> !fir.box<none>
+// CHECK:           fir.call @_FortranAMinlocInteger4x1_Logical4x1_i32_contract_simplified(%[[REF_BOX_OUTARR_NONE]], %[[BOX_INARR_NONE]], %[[BOX_MASK_NONE]]) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, !fir.box<none>) -> ()
+
+// CHECK-LABEL:   func.func private @_FortranAMinlocInteger4x1_Logical4x1_i32_contract_simplified(
+// CHECK-SAME:             %[[REF_BOX_OUTARR_NONE:.*]]: !fir.ref<!fir.box<none>>,
+// CHECK-SAME:             %[[BOX_INARR_NONE:.*]]: !fir.box<none>,
+// CHECK-SAME:             %[[BOX_MASK_NONE:.*]]: !fir.box<none>) attributes {llvm.linkage = #llvm.linkage<linkonce_odr>} {
+// CHECK:           %[[FLAG_ALLOC:.*]] = fir.alloca i32
+// CHECK:           %[[INIT_OUT_IDX:.*]] = arith.constant 0 : i32
+// CHECK:           %[[OUTARR_SIZE:.*]] = arith.constant 1 : index
+// CHECK:           %[[OUTARR:.*]] = fir.allocmem !fir.array<1xi32>
+// CHECK:           %[[OUTARR_SHAPE:.*]] = fir.shape %[[OUTARR_SIZE]] : (index) -> !fir.shape<1>
+// CHECK:           %[[BOX_OUTARR:.*]] = fir.embox %[[OUTARR]](%[[OUTARR_SHAPE]]) : (!fir.heap<!fir.array<1xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<1xi32>>>
+// CHECK:           %[[OUTARR_IDX0:.*]] = arith.constant 0 : index
+// CHECK:           %[[OUTARR_ITEM0:.*]] = fir.coordinate_of %[[BOX_OUTARR]], %[[OUTARR_IDX0]] : (!fir.box<!fir.heap<!fir.array<1xi32>>>, index) -> !fir.ref<i32>
+// CHECK:           fir.store %[[INIT_OUT_IDX]] to %[[OUTARR_ITEM0]] : !fir.ref<i32>
+// CHECK:           %[[CINDEX_0:.*]] = arith.constant 0 : index
+// CHECK:           %[[BOX_INARR:.*]] = fir.convert %[[BOX_INARR_NONE]] : (!fir.box<none>) -> !fir.box<!fir.array<?xi32>>
+// CHECK:           %[[FLAG_SET:.*]] = arith.constant 1 : i32
+// CHECK:           %[[FLAG_EMPTY:.*]] = arith.constant 0 : i32
+// CHECK:           fir.store %[[FLAG_EMPTY]] to %[[FLAG_ALLOC]] : !fir.ref<i32>
+// CHECK:           %[[BOX_MASK:.*]] = fir.convert %[[BOX_MASK_NONE]] : (!fir.box<none>) -> !fir.box<!fir.array<?xi32>>
+// CHECK:           %[[MAX:.*]] = arith.constant 2147483647 : i32
+// CHECK:           %[[CINDEX_1:.*]] = arith.constant 1 : index
+// CHECK:           %[[DIM_INDEX0:.*]] = arith.constant 0 : index
+// CHECK:           %[[DIMS:.*]]:3 = fir.box_dims %[[BOX_INARR]], %[[DIM_INDEX0]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+// CHECK:           %[[EXTENT:.*]] = arith.subi %[[DIMS]]#1, %[[CINDEX_1]] : index
+// CHECK:           %[[DOLOOP:.*]] = fir.do_loop %[[ITER:.*]] = %[[CINDEX_0]] to %[[EXTENT]] step %[[CINDEX_1]] iter_args(%[[MIN:.*]] = %[[MAX]]) -> (i32) {
+// CHECK:             %[[MASK_ITEM:.*]] = fir.coordinate_of %[[BOX_MASK]], %[[ITER]] : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK:             %[[MASK_ITEMVAL:.*]] = fir.load %[[MASK_ITEM]] : !fir.ref<i32>
+// CHECK:             %[[MASK_IF_ITEM:.*]] = fir.convert %[[MASK_ITEMVAL]] : (i32) -> i1
+// CHECK:             %[[IF_MASK:.*]] = fir.if %[[MASK_IF_ITEM]] -> (i32) {
+// CHECK:               fir.store %[[FLAG_SET]] to %[[FLAG_ALLOC]] : !fir.ref<i32>
+// CHECK:               %[[INARR_ITEM:.*]] = fir.coordinate_of %[[BOX_INARR]], %[[ITER]] : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK:               %[[INARR_ITEMVAL:.*]] = fir.load %[[INARR_ITEM]] : !fir.ref<i32>
+// CHECK:               %[[NEW_MIN:.*]] = arith.cmpi slt, %[[INARR_ITEMVAL]], %[[MIN]] : i32
+// CHECK:               %[[IF_NEW_MIN:.*]] = fir.if %[[NEW_MIN]] -> (i32) {
+// CHECK:                 %[[ONE:.*]] = arith.constant 1 : i32
+// CHECK:                 %[[OUTARR_IDX:.*]] = arith.constant 0 : index
+// CHECK:                 %[[OUTARR_ITEM:.*]] = fir.coordinate_of %[[BOX_OUTARR]], %[[OUTARR_IDX]] : (!fir.box<!fir.heap<!fir.array<1xi32>>>, index) -> !fir.ref<i32>
+// CHECK:                 %[[ITER_I32:.*]] = fir.convert %[[ITER]] : (index) -> i32
+// CHECK:                 %[[FORTRAN_IDX:.*]] = arith.addi %[[ITER_I32]], %[[ONE]] : i32
+// CHECK:                 fir.store %[[FORTRAN_IDX]] to %[[OUTARR_ITEM]] : !fir.ref<i32>
+// CHECK:                 fir.result %[[INARR_ITEMVAL]] : i32
+// CHECK:               } else {
+// CHECK:                 fir.result %[[MIN]] : i32
+// CHECK:               }
+// CHECK:               fir.result %[[IF_NEW_MIN:.*]] : i32
+// CHECK:             } else {
+// CHECK:               fir.result %[[MIN]] : i32
+// CHECK:             }
+// CHECK:             fir.result %[[IF_MASK:.*]] : i32
+// CHECK:           }
+// CHECK:           %[[FLAG_VAL:.*]] = fir.load %[[FLAG_ALLOC]] : !fir.ref<i32>
+// CHECK:           %[[FLAG_WAS_SET:.*]] = arith.cmpi eq, %[[FLAG_VAL]], %[[FLAG_SET]] : i32
+// CHECK:           fir.if %[[FLAG_WAS_SET]] {
+// CHECK:             %[[TEST_MAX:.*]] = arith.constant 2147483647 : i32
+// CHECK:             %[[INIT_NOT_CHANGED:.*]] = arith.cmpi eq, %[[TEST_MAX]], %[[DO_LOOP:.*]] : i32
+// CHECK:             fir.if %[[INIT_NOT_CHANGED]] {
+// CHECK:               %[[FLAG_OUTARR_IDX:.*]] = arith.constant 0 : index
+// CHECK:               %[[FLAG_OUTARR_ITEM:.*]] = fir.coordinate_of %[[BOX_OUTARR]], %[[FLAG_OUTARR_IDX]] : (!fir.box<!fir.heap<!fir.array<1xi32>>>, index) -> !fir.ref<i32>
+// CHECK:               fir.store %[[FLAG_SET]] to %[[FLAG_OUTARR_ITEM]] : !fir.ref<i32>
+// CHECK:             }
+// CHECK:           }
+// CHECK:           %[[REF_BOX_OUTARR:.*]] = fir.convert %[[REF_BOX_OUTARR_NONE]] : (!fir.ref<!fir.box<none>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<1xi32>>>>
+// CHECK:           fir.store %[[BOX_OUTARR]] to %[[REF_BOX_OUTARR]] : !fir.ref<!fir.box<!fir.heap<!fir.array<1xi32>>>>
+// CHECK:           return
+// CHECK:         }
+
+// -----
+// Check Minloc simplifies correctly for 2D case with no mask and I64 Int as result
+
+func.func @_QPtestminloc_works2d_nomask(%arg0: !fir.ref<!fir.array<10x10xi32>> {fir.bindc_name = "a"}) -> !fir.array<2xi32> {
+  %0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xi64>>>
+  %c10 = arith.constant 10 : index
+  %c10_0 = arith.constant 10 : index
+  %c2 = arith.constant 2 : index
+  %1 = fir.alloca !fir.array<2xi32> {bindc_name = "testminloc_works2d_nomask", uniq_name = "_QFtestminloc_works2d_nomaskEtestminloc_works2d_nomask"}
+  %2 = fir.shape %c2 : (index) -> !fir.shape<1>
+  %3 = fir.array_load %1(%2) : (!fir.ref<!fir.array<2xi32>>, !fir.shape<1>) -> !fir.array<2xi32>
+  %4 = fir.shape %c10, %c10_0 : (index, index) -> !fir.shape<2>
+  %5 = fir.embox %arg0(%4) : (!fir.ref<!fir.array<10x10xi32>>, !fir.shape<2>) -> !fir.box<!fir.array<10x10xi32>>
+  %c8_i32 = arith.constant 8 : i32
+  %6 = fir.absent !fir.box<i1>
+  %false = arith.constant false
+  %7 = fir.zero_bits !fir.heap<!fir.array<?xi64>>
+  %c0 = arith.constant 0 : index
+  %8 = fir.shape %c0 : (index) -> !fir.shape<1>
+  %9 = fir.embox %7(%8) : (!fir.heap<!fir.array<?xi64>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xi64>>>
+  fir.store %9 to %0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi64>>>>
+  %10 = fir.address_of(@_QQcl.cba8b79c45ccae77d79d66a39ac99823) : !fir.ref<!fir.char<1,62>>
+  %c4_i32 = arith.constant 4 : i32
+  %11 = fir.convert %0 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi64>>>>) -> !fir.ref<!fir.box<none>>
+  %12 = fir.convert %5 : (!fir.box<!fir.array<10x10xi32>>) -> !fir.box<none>
+  %13 = fir.convert %10 : (!fir.ref<!fir.char<1,62>>) -> !fir.ref<i8>
+  %14 = fir.convert %6 : (!fir.box<i1>) -> !fir.box<none>
+  %15 = fir.call @_FortranAMinlocInteger4(%11, %12, %c8_i32, %13, %c4_i32, %14, %false) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, i32, !fir.ref<i8>, i32, !fir.box<none>, i1) -> none
+  %16 = fir.load %0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi64>>>>
+  %c0_1 = arith.constant 0 : index
+  %17:3 = fir.box_dims %16, %c0_1 : (!fir.box<!fir.heap<!fir.array<?xi64>>>, index) -> (index, index, index)
+  %18 = fir.box_addr %16 : (!fir.box<!fir.heap<!fir.array<?xi64>>>) -> !fir.heap<!fir.array<?xi64>>
+  %19 = fir.shape_shift %17#0, %17#1 : (index, index) -> !fir.shapeshift<1>
+  %20 = fir.array_load %18(%19) : (!fir.heap<!fir.array<?xi64>>, !fir.shapeshift<1>) -> !fir.array<?xi64>
+  %c1 = arith.constant 1 : index
+  %c0_2 = arith.constant 0 : index
+  %21 = arith.subi %c2, %c1 : index
+  %22 = fir.do_loop %arg1 = %c0_2 to %21 step %c1 unordered iter_args(%arg2 = %3) -> (!fir.array<2xi32>) {
+    %24 = fir.array_fetch %20, %arg1 : (!fir.array<?xi64>, index) -> i64
+    %25 = fir.convert %24 : (i64) -> i32
+    %26 = fir.array_update %arg2, %25, %arg1 : (!fir.array<2xi32>, i32, index) -> !fir.array<2xi32>
+    fir.result %26 : !fir.array<2xi32>
+  }
+  fir.array_merge_store %3, %22 to %1 : !fir.array<2xi32>, !fir.array<2xi32>, !fir.ref<!fir.array<2xi32>>
+  fir.freemem %18 : !fir.heap<!fir.array<?xi64>>
+  %23 = fir.load %1 : !fir.ref<!fir.array<2xi32>>
+  return %23 : !fir.array<2xi32>
+}
+
+// CHECK-LABEL:   func.func @_QPtestminloc_works2d_nomask(
+// CHECK-SAME:                                     %[[INARR:.*]]: !fir.ref<!fir.array<10x10xi32>> {fir.bindc_name = "a"}) -> !fir.array<2xi32> {
+// CHECK:           %[[ABSENT_MASK:.*]] = fir.absent !fir.box<i1>
+// CHECK:           %[[ABSENT_MASK_NONE:.*]] = fir.convert %[[ABSENT_MASK]] : (!fir.box<i1>) -> !fir.box<none>
+// CHECK:           fir.call @_FortranAMinlocInteger4x2_i64_contract_simplified(%{{.*}}, %{{.*}}, %[[ABSENT_MASK_NONE]]) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, !fir.box<none>) -> ()
+
+// CHECK-LABEL:   func.func private @_FortranAMinlocInteger4x2_i64_contract_simplified(
+// CHECK-SAME:              %[[REF_BOX_OUTARR_NONE:.*]]: !fir.ref<!fir.box<none>>,
+// CHECK-SAME:              %[[BOX_INARR_NONE:.*]]: !fir.box<none>,
+// CHECK-SAME:              %[[BOX_MASK_NONE:.*]]: !fir.box<none>) attributes {llvm.linkage = #llvm.linkage<linkonce_odr>} {
+// CHECK:           %[[FLAG_ALLOC:.*]] = fir.alloca i64
+// CHECK:           %[[INIT_OUT_IDX:.*]] = arith.constant 0 : i64
+// CHECK:           %[[OUTARR_SIZE:.*]] = arith.constant 2 : index
+// CHECK:           %[[OUTARR:.*]] = fir.allocmem !fir.array<2xi64>
+// CHECK:           %[[OUTARR_SHAPE:.*]] = fir.shape %[[OUTARR_SIZE]] : (index) -> !fir.shape<1>
+// CHECK:           %[[BOX_OUTARR:.*]] = fir.embox %[[OUTARR]](%[[OUTARR_SHAPE]]) : (!fir.heap<!fir.array<2xi64>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<2xi64>>>
+// CHECK:           %[[OUTARR_IDX0:.*]] = arith.constant 0 : index
+// CHECK:           %[[OUTARR_ITEM0:.*]] = fir.coordinate_of %[[BOX_OUTARR]], %[[OUTARR_IDX0]] : (!fir.box<!fir.heap<!fir.array<2xi64>>>, index) -> !fir.ref<i64>
+// CHECK:           fir.store %[[INIT_OUT_IDX]] to %[[OUTARR_ITEM0]] : !fir.ref<i64>
+// CHECK:           %[[OUTARR_IDX1:.*]] = arith.constant 1 : index
+// CHECK:           %[[OUTARR_ITEM1:.*]] = fir.coordinate_of %[[BOX_OUTARR]], %[[OUTARR_IDX1]] : (!fir.box<!fir.heap<!fir.array<2xi64>>>, index) -> !fir.ref<i64>
+// CHECK:           fir.store %[[INIT_OUT_IDX]] to %[[OUTARR_ITEM1]] : !fir.ref<i64>
+// CHECK:           %[[C_INDEX0:.*]] = arith.constant 0 : index
+// CHECK:           %[[BOX_INARR:.*]] = fir.convert %[[BOX_INARR_NONE]] : (!fir.box<none>) -> !fir.box<!fir.array<?x?xi32>>
+// CHECK:           %[[FLAG_SET:.*]] = arith.constant 1 : i64
+// CHECK:           %[[FLAG_EMPTY:.*]] = arith.constant 0 : i64
+// CHECK:           fir.store %[[FLAG_EMPTY]] to %[[FLAG_ALLOC]] : !fir.ref<i64>
+// CHECK:           %[[MAX:.*]] = arith.constant 2147483647 : i32
+// CHECK:           %[[C_INDEX1:.*]] = arith.constant 1 : index
+// CHECK:           %[[DIM_INDEX0:.*]] = arith.constant 0 : index
+// CHECK:           %[[DIMS0:.*]]:3 = fir.box_dims %[[BOX_INARR]], %[[DIM_INDEX0]] : (!fir.box<!fir.array<?x?xi32>>, index) -> (index, index, index)
+// CHECK:           %[[EXTENT0:.*]] = arith.subi %[[DIMS0]]#1, %[[C_INDEX1]] : index
+// CHECK:           %[[DIM_INDEX1:.*]] = arith.constant 1 : index
+// CHECK:           %[[DIMS1:.*]]:3 = fir.box_dims %[[BOX_INARR]], %[[DIM_INDEX1]] : (!fir.box<!fir.array<?x?xi32>>, index) -> (index, index, index)
+// CHECK:           %[[EXTENT1:.*]] = arith.subi %[[DIMS1]]#1, %[[C_INDEX1]] : index
+// CHECK:           %[[DOLOOP0:.*]] = fir.do_loop %[[ITER0:.*]] = %[[C_INDEX0]] to %[[EXTENT1]] step %[[C_INDEX1]] iter_args(%[[MIN0:.*]] = %[[MAX]]) -> (i32) {
+// CHECK:             %[[DOLOOP1:.*]] = fir.do_loop %[[ITER1:.*]] = %[[C_INDEX0]] to %[[EXTENT0]] step %[[C_INDEX1]] iter_args(%[[MIN1:.*]] = %[[MIN0]]) -> (i32) {
+// CHECK:               %[[INARR_ITEM:.*]] = fir.coordinate_of %[[BOX_INARR]], %[[ITER1]], %[[ITER0]] : (!fir.box<!fir.array<?x?xi32>>, index, index) -> !fir.ref<i32>
+// CHECK:               %[[INARR_ITEMVAL:.*]] = fir.load %[[INARR_ITEM]] : !fir.ref<i32>
+// CHECK:               %[[NEW_MIN:.*]] = arith.cmpi slt, %[[INARR_ITEMVAL]], %[[MIN1]] : i32
+// CHECK:               %[[IF_NEW_MIN:.*]] = fir.if %[[NEW_MIN]] -> (i32) {
+// CHECK:                 %[[ONE:.*]] = arith.constant 1 : i64
+// CHECK:                 %[[OUTARR_IDX0:.*]] = arith.constant 0 : index
+// CHECK:                 %[[OUTARR_ITEM0:.*]] = fir.coordinate_of %[[BOX_OUTARR]], %[[OUTARR_IDX0]] : (!fir.box<!fir.heap<!fir.array<2xi64>>>, index) -> !fir.ref<i64>
+// CHECK:                 %[[ITER1_I64:.*]] = fir.convert %[[ITER1]] : (index) -> i64
+// CHECK:                 %[[FORTRAN_IDX1:.*]] = arith.addi %[[ITER1_I64]], %[[ONE]] : i64
+// CHECK:                 fir.store %[[FORTRAN_IDX1]] to %[[OUTARR_ITEM0]] : !fir.ref<i64>
+// CHECK:                 %[[OUTARR_IDX1:.*]] = arith.constant 1 : index
+// CHECK:                 %[[OUTARR_ITEM1:.*]] = fir.coordinate_of %[[BOX_OUTARR]], %[[OUTARR_IDX1]] : (!fir.box<!fir.heap<!fir.array<2xi64>>>, index) -> !fir.ref<i64>
+// CHECK:                 %[[ITER0_I64:.*]] = fir.convert %[[ITER0]] : (index) -> i64
+// CHECK:                 %[[FORTRAN_IDX0:.*]] = arith.addi %[[ITER0_I64]], %[[ONE]] : i64
+// CHECK:                 fir.store %[[FORTRAN_IDX0]] to %[[OUTARR_ITEM1]] : !fir.ref<i64>
+// CHECK:                 fir.result %[[INARR_ITEMVAL]] : i32
+// CHECK:               } else {
+// CHECK:                 fir.result %[[MIN1]] : i32
+// CHECK:               }
+// CHECK:               fir.result %[[IF_NEW_MIN:.*]] : i32
+// CHECK:             }
+// CHECK:             fir.result %[[DOLOOP1:.*]] : i32
+// CHECK:           }
+// CHECK:           %[[FLAG_VAL:.*]] = fir.load %[[FLAG_ALLOC]] : !fir.ref<i64>
+// CHECK:           %[[FLAG_WAS_SET:.*]] = arith.cmpi eq, %[[FLAG_VAL]], %[[FLAG_SET]] : i64
+// CHECK:           fir.if %[[FLAG_WAS_SET]] {
+// CHECK:             %[[TEST_MAX:.*]] = arith.constant 2147483647 : i32
+// CHECK:             %[[INIT_NOT_CHANGED:.*]] = arith.cmpi eq, %[[TEST_MAX]], %[[DO_LOOP:.*]] : i32
+// CHECK:             fir.if %[[INIT_NOT_CHANGED]] {
+// CHECK:               %[[FLAG_OUTARR_IDX0:.*]] = arith.constant 0 : index
+// CHECK:               %[[FLAG_OUTARR_ITEM0:.*]] = fir.coordinate_of %[[BOX_OUTARR]], %[[FLAG_OUTARR_IDX0]] : (!fir.box<!fir.heap<!fir.array<2xi64>>>, index) -> !fir.ref<i64>
+// CHECK:               fir.store %[[FLAG_SET]] to %[[FLAG_OUTARR_ITEM0]] : !fir.ref<i64>
+// CHECK:               %[[FLAG_OUTARR_IDX1:.*]] = arith.constant 1 : index
+// CHECK:               %[[FLAG_OUTARR_ITEM1:.*]] = fir.coordinate_of %[[BOX_OUTARR]], %[[FLAG_OUTARR_IDX1]] : (!fir.box<!fir.heap<!fir.array<2xi64>>>, index) -> !fir.ref<i64>
+// CHECK:               fir.store %[[FLAG_SET]] to %[[FLAG_OUTARR_ITEM1]] : !fir.ref<i64
+// CHECK:             }
+// CHECK:           }
+// CHECK:           %[[REF_BOX_OUTARR:.*]] = fir.convert %[[REF_BOX_OUTARR_NONE]] : (!fir.ref<!fir.box<none>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<2xi64>>>>
+// CHECK:           fir.store %[[BOX_OUTARR]] to %[[REF_BOX_OUTARR]] : !fir.ref<!fir.box<!fir.heap<!fir.array<2xi64>>>>
+// CHECK:           return
+// CHECK:         }
+
+// -----
+// Check Minloc simplifies correctly for 1D case with scalar mask and f64 input
+
+func.func @_QPtestminloc_works1d_scalarmask_f64(%arg0: !fir.ref<!fir.array<10xf64>> {fir.bindc_name = "a"}, %arg1: !fir.ref<!fir.logical<4>> {fir.bindc_name = "b"}) -> !fir.array<1xi32> {
+  %0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xi32>>>
+  %c10 = arith.constant 10 : index
+  %c1 = arith.constant 1 : index
+  %1 = fir.alloca !fir.array<1xi32> {bindc_name = "testminloc_works1d_scalarmask_f64", uniq_name = "_QFtestminloc_works1d_scalarmask_f64Etestminloc_works1d_scalarmask_f64"}
+  %2 = fir.shape %c1 : (index) -> !fir.shape<1>
+  %3 = fir.array_load %1(%2) : (!fir.ref<!fir.array<1xi32>>, !fir.shape<1>) -> !fir.array<1xi32>
+  %4 = fir.shape %c10 : (index) -> !fir.shape<1>
+  %5 = fir.embox %arg0(%4) : (!fir.ref<!fir.array<10xf64>>, !fir.shape<1>) -> !fir.box<!fir.array<10xf64>>
+  %6 = fir.embox %arg1 : (!fir.ref<!fir.logical<4>>) -> !fir.box<!fir.logical<4>>
+  %c4 = arith.constant 4 : index
+  %false = arith.constant false
+  %7 = fir.zero_bits !fir.heap<!fir.array<?xi32>>
+  %c0 = arith.constant 0 : index
+  %8 = fir.shape %c0 : (index) -> !fir.shape<1>
+  %9 = fir.embox %7(%8) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
+  fir.store %9 to %0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+  %10 = fir.address_of(@_QQcl.66951c28c5b8bab5cdb25c1ac762b978) : !fir.ref<!fir.char<1,65>>
+  %c6_i32 = arith.constant 6 : i32
+  %11 = fir.convert %0 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<none>>
+  %12 = fir.convert %5 : (!fir.box<!fir.array<10xf64>>) -> !fir.box<none>
+  %13 = fir.convert %c4 : (index) -> i32
+  %14 = fir.convert %10 : (!fir.ref<!fir.char<1,65>>) -> !fir.ref<i8>
+  %15 = fir.convert %6 : (!fir.box<!fir.logical<4>>) -> !fir.box<none>
+  %16 = fir.call @_FortranAMinlocReal8(%11, %12, %13, %14, %c6_i32, %15, %false) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, i32, !fir.ref<i8>, i32, !fir.box<none>, i1) -> none
+  %17 = fir.load %0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+  %c0_0 = arith.constant 0 : index
+  %18:3 = fir.box_dims %17, %c0_0 : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+  %19 = fir.box_addr %17 : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
+  %20 = fir.shape_shift %18#0, %18#1 : (index, index) -> !fir.shapeshift<1>
+  %21 = fir.array_load %19(%20) : (!fir.heap<!fir.array<?xi32>>, !fir.shapeshift<1>) -> !fir.array<?xi32>
+  %c1_1 = arith.constant 1 : index
+  %c0_2 = arith.constant 0 : index
+  %22 = arith.subi %c1, %c1_1 : index
+  %23 = fir.do_loop %arg2 = %c0_2 to %22 step %c1_1 unordered iter_args(%arg3 = %3) -> (!fir.array<1xi32>) {
+    %25 = fir.array_fetch %21, %arg2 : (!fir.array<?xi32>, index) -> i32
+    %26 = fir.array_update %arg3, %25, %arg2 : (!fir.array<1xi32>, i32, index) -> !fir.array<1xi32>
+    fir.result %26 : !fir.array<1xi32>
+  }
+  fir.array_merge_store %3, %23 to %1 : !fir.array<1xi32>, !fir.array<1xi32>, !fir.ref<!fir.array<1xi32>>
+  fir.freemem %19 : !fir.heap<!fir.array<?xi32>>
+  %24 = fir.load %1 : !fir.ref<!fir.array<1xi32>>
+  return %24 : !fir.array<1xi32>
+}
+
+// CHECK-LABEL:   func.func @_QPtestminloc_works1d_scalarmask_f64(
+// CHECK-SAME:                                     %[[INARR:.*]]: !fir.ref<!fir.array<10xf64>> {fir.bindc_name = "a"},
+// CHECK-SAME:                                     %[[MASK:.*]]: !fir.ref<!fir.logical<4>> {fir.bindc_name = "b"}) -> !fir.array<1xi32> {
+// CHECK:           fir.call @_FortranAMinlocReal8x1_Logical4x0_i32_contract_simplified({{.*}}, {{.*}}, {{.*}}) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, !fir.box<none>) -> ()
+
+// CHECK-LABEL:   func.func private @_FortranAMinlocReal8x1_Logical4x0_i32_contract_simplified(
+// CHECK-SAME:             %[[REF_BOX_OUTARR_NONE:.*]]: !fir.ref<!fir.box<none>>,
+// CHECK-SAME:             %[[BOX_INARR_NONE:.*]]: !fir.box<none>,
+// CHECK-SAME:             %[[BOX_MASK_NONE:.*]]: !fir.box<none>) attributes {llvm.linkage = #llvm.linkage<linkonce_odr>} {
+// CHECK:           %[[FLAG_ALLOC:.*]] = fir.alloca i32
+// CHECK:           %[[INIT_OUT_IDX:.*]] = arith.constant 0 : i32
+// CHECK:           %[[OUTARR_SIZE:.*]] = arith.constant 1 : index
+// CHECK:           %[[OUTARR:.*]] = fir.allocmem !fir.array<1xi32>
+// CHECK:           %[[OUTARR_SHAPE:.*]] = fir.shape %[[OUTARR_SIZE]] : (index) -> !fir.shape<1>
+// CHECK:           %[[BOX_OUTARR:.*]] = fir.embox %[[OUTARR]](%[[OUTARR_SHAPE]]) : (!fir.heap<!fir.array<1xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<1xi32>>>
+// CHECK:           %[[OUTARR_IDX0:.*]] = arith.constant 0 : index
+// CHECK:           %[[OUTARR_ITEM0:.*]] = fir.coordinate_of %[[BOX_OUTARR]], %[[OUTARR_IDX0]] : (!fir.box<!fir.heap<!fir.array<1xi32>>>, index) -> !fir.ref<i32>
+// CHECK:           fir.store %[[INIT_OUT_IDX]] to %[[OUTARR_ITEM0]] : !fir.ref<i32>
+// CHECK:           %[[BOX_MASK:.*]] = fir.convert %[[BOX_MASK_NONE]] : (!fir.box<none>) -> !fir.box<!fir.array<1xi1>>
+// CHECK:           %[[MASK_IDX0:.*]] = arith.constant 0 : index
+// CHECK:           %[[MASK_ITEM:.*]] = fir.coordinate_of %[[BOX_MASK]], %[[MASK_IDX0]] : (!fir.box<!fir.array<1xi1>>, index) -> !fir.ref<i1>
+// CHECK:           %[[MASK:.*]] = fir.load %[[MASK_ITEM]] : !fir.ref<i1>
+// CHECK:           %[[INIT_RES:.*]] = fir.if %[[MASK]] -> (f64) {
+// CHECK:             %[[C_INDEX0:.*]] = arith.constant 0 : index
+// CHECK:             %[[BOX_INARR:.*]] = fir.convert %[[BOX_INARR_NONE]] : (!fir.box<none>) -> !fir.box<!fir.array<?xf64>>
+// CHECK:             %[[FLAG_SET:.*]] = arith.constant 1 : i32
+// CHECK:             %[[FLAG_EMPTY:.*]] = arith.constant 0 : i32
+// CHECK:             fir.store %[[FLAG_EMPTY]] to %[[FLAG_ALLOC]] : !fir.ref<i32>
+// CHECK:             %[[MAX:.*]] = arith.constant 1.7976931348623157E+308 : f64
+// CHECK:             %[[C_INDEX1:.*]] = arith.constant 1 : index
+// CHECK:             %[[DIM_INDEX:.*]] = arith.constant 0 : index
+// CHECK:             %[[DIMS:.*]]:3 = fir.box_dims %[[BOX_INARR]], %[[DIM_INDEX]] : (!fir.box<!fir.array<?xf64>>, index) -> (index, index, index)
+// CHECK:             %[[EXTENT:.*]] = arith.subi %[[DIMS]]#1, %[[C_INDEX1]] : index
+// CHECK:             %[[DOLOOP:.*]] = fir.do_loop %[[ITER:.*]] = %[[C_INDEX0]] to %[[EXTENT]] step %[[C_INDEX1]] iter_args(%[[MIN:.*]] = %[[MAX]]) -> (f64) {
+// CHECK:               %[[INARR_ITEM:.*]] = fir.coordinate_of %[[BOX_INARR]], %[[ITER]] : (!fir.box<!fir.array<?xf64>>, index) -> !fir.ref<f64>
+// CHECK:               %[[INARR_ITEMVAL:.*]] = fir.load %[[INARR_ITEM]] : !fir.ref<f64>
+// CHECK:               %[[NEW_MIN:.*]] = arith.cmpf olt, %[[INARR_ITEMVAL]], %[[MIN]] : f64
+// CHECK:               %[[IF_NEW_MIN:.*]] = fir.if %[[NEW_MIN]] -> (f64) {
+// CHECK:                 %[[ONE:.*]] = arith.constant 1 : i32
+// CHECK:                 %[[OUTARR_IDX:.*]] = arith.constant 0 : index
+// CHECK:                 %[[OUTARR_ITEM:.*]] = fir.coordinate_of %[[BOX_OUTARR]], %[[OUTARR_IDX]] : (!fir.box<!fir.heap<!fir.array<1xi32>>>, index) -> !fir.ref<i32>
+// CHECK:                 %[[ITER_I32:.*]] = fir.convert %[[ITER]] : (index) -> i32
+// CHECK:                 %[[FORTRAN_IDX:.*]] = arith.addi %[[ITER_I32]], %[[ONE]] : i32
+// CHECK:                 fir.store %[[FORTRAN_IDX]] to %[[OUTARR_ITEM]] : !fir.ref<i32>
+// CHECK:                 fir.result %[[INARR_ITEMVAL]] : f64
+// CHECK:               } else {
+// CHECK:                 fir.result %[[MIN]] : f64
+// CHECK:               }
+// CHECK:               fir.result %[[IF_NEW_MIN:.*]] : f64
+// CHECK:             }
+// CHECK:           }
+// CHECK:           %[[FLAG_CHECK:.*]] = arith.constant 1 : i32
+// CHECK:           %[[FLAG_VAL:.*]] = fir.load %[[FLAG_ALLOC]] : !fir.ref<i32>
+// CHECK:           %[[FLAG_WAS_SET:.*]] = arith.cmpi eq, %[[FLAG_VAL]], %[[FLAG_CHECK]] : i32
+// CHECK:           fir.if %[[FLAG_WAS_SET]] {
+// CHECK:             %[[TEST_MAX:.*]] = arith.constant 1.7976931348623157E+308 : f64
+// CHECK:             %[[INIT_NOT_CHANGED:.*]] = arith.cmpf oeq, %[[TEST_MAX]], %[[INIT_RES:.*]] : f64
+// CHECK:             fir.if %[[INIT_NOT_CHANGED]] {
+// CHECK:               %[[FLAG_OUTARR_IDX:.*]] = arith.constant 0 : index
+// CHECK:               %[[FLAG_OUTARR_ITEM:.*]] = fir.coordinate_of %[[BOX_OUTARR]], %[[FLAG_OUTARR_IDX]] : (!fir.box<!fir.heap<!fir.array<1xi32>>>, index) -> !fir.ref<i32>
+// CHECK:               fir.store %[[FLAG_CHECK]] to %[[FLAG_OUTARR_ITEM]] : !fir.ref<i32>
+// CHECK:             }
+// CHECK:           }
+// CHECK:           %[[REF_BOX_OUTARR:.*]] = fir.convert %[[VAL_0]] : (!fir.ref<!fir.box<none>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<1xi32>>>>
+// CHECK:           fir.store %[[BOX_OUTARR]] to %[[REF_BOX_OUTARR]] : !fir.ref<!fir.box<!fir.heap<!fir.array<1xi32>>>>
+// CHECK:           return
+// CHECK:         }
+
+// -----
+// Check Minloc is not simplified when BACK arg is set
+
+func.func @_QPtestminloc_doesntwork1d_back(%arg0: !fir.ref<!fir.array<10xi32>> {fir.bindc_name = "a"}) -> !fir.array<1xi32> {
+  %0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xi32>>>
+  %c10 = arith.constant 10 : index
+  %c1 = arith.constant 1 : index
+  %1 = fir.alloca !fir.array<1xi32> {bindc_name = "testminloc_doesntwork1d_back", uniq_name = "_QFtestminloc_doesntwork1d_backEtestminloc_doesntwork1d_back"}
+  %2 = fir.shape %c1 : (index) -> !fir.shape<1>
+  %3 = fir.array_load %1(%2) : (!fir.ref<!fir.array<1xi32>>, !fir.shape<1>) -> !fir.array<1xi32>
+  %4 = fir.shape %c10 : (index) -> !fir.shape<1>
+  %5 = fir.embox %arg0(%4) : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<10xi32>>
+  %true = arith.constant true
+  %6 = fir.absent !fir.box<i1>
+  %c4 = arith.constant 4 : index
+  %7 = fir.zero_bits !fir.heap<!fir.array<?xi32>>
+  %c0 = arith.constant 0 : index
+  %8 = fir.shape %c0 : (index) -> !fir.shape<1>
+  %9 = fir.embox %7(%8) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
+  fir.store %9 to %0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+  %10 = fir.address_of(@_QQcl.3791f01d699716ba5914ae524c6a8dee) : !fir.ref<!fir.char<1,62>>
+  %c4_i32 = arith.constant 4 : i32
+  %11 = fir.convert %0 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<none>>
+  %12 = fir.convert %5 : (!fir.box<!fir.array<10xi32>>) -> !fir.box<none>
+  %13 = fir.convert %c4 : (index) -> i32
+  %14 = fir.convert %10 : (!fir.ref<!fir.char<1,62>>) -> !fir.ref<i8>
+  %15 = fir.convert %6 : (!fir.box<i1>) -> !fir.box<none>
+  %16 = fir.call @_FortranAMinlocInteger4(%11, %12, %13, %14, %c4_i32, %15, %true) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, i32, !fir.ref<i8>, i32, !fir.box<none>, i1) -> none
+  %17 = fir.load %0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+  %c0_0 = arith.constant 0 : index
+  %18:3 = fir.box_dims %17, %c0_0 : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+  %19 = fir.box_addr %17 : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
+  %20 = fir.shape_shift %18#0, %18#1 : (index, index) -> !fir.shapeshift<1>
+  %21 = fir.array_load %19(%20) : (!fir.heap<!fir.array<?xi32>>, !fir.shapeshift<1>) -> !fir.array<?xi32>
+  %c1_1 = arith.constant 1 : index
+  %c0_2 = arith.constant 0 : index
+  %22 = arith.subi %c1, %c1_1 : index
+  %23 = fir.do_loop %arg1 = %c0_2 to %22 step %c1_1 unordered iter_args(%arg2 = %3) -> (!fir.array<1xi32>) {
+    %25 = fir.array_fetch %21, %arg1 : (!fir.array<?xi32>, index) -> i32
+    %26 = fir.array_update %arg2, %25, %arg1 : (!fir.array<1xi32>, i32, index) -> !fir.array<1xi32>
+    fir.result %26 : !fir.array<1xi32>
+  }
+  fir.array_merge_store %3, %23 to %1 : !fir.array<1xi32>, !fir.array<1xi32>, !fir.ref<!fir.array<1xi32>>
+  fir.freemem %19 : !fir.heap<!fir.array<?xi32>>
+  %24 = fir.load %1 : !fir.ref<!fir.array<1xi32>>
+  return %24 : !fir.array<1xi32>
+}
+
+// CHECK-LABEL:   func.func @_QPtestminloc_doesntwork1d_back(
+// CHECK-SAME:                                               %[[ARR:.*]]: !fir.ref<!fir.array<10xi32>> {fir.bindc_name = "a"}) -> !fir.array<1xi32> {
+// CHECK-NOT:         fir.call @_FortranAMinlocInteger4x1_i32_contract_simplified({{.*}}) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, !fir.box<none>) -> ()
+// CHECK:             fir.call @_FortranAMinlocInteger4({{.*}}) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, i32, !fir.ref<i8>, i32, !fir.box<none>, i1) -> none
+// CHECK-NOT:         fir.call @_FortranAMinlocInteger4x1_i32_contract_simplified({{.*}}) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, !fir.box<none>) -> ()
+
+// -----
+// Check Minloc is not simplified when DIM arg is set
+
+func.func @_QPtestminloc_doesntwork1d_dim(%arg0: !fir.ref<!fir.array<10xi32>> {fir.bindc_name = "a"}) -> !fir.array<1xi32> {
+  %0 = fir.alloca !fir.box<!fir.heap<i32>>
+  %c10 = arith.constant 10 : index
+  %c1 = arith.constant 1 : index
+  %1 = fir.alloca !fir.array<1xi32> {bindc_name = "testminloc_doesntwork1d_dim", uniq_name = "_QFtestminloc_doesntwork1d_dimEtestminloc_doesntwork1d_dim"}
+  %2 = fir.shape %c1 : (index) -> !fir.shape<1>
+  %3 = fir.array_load %1(%2) : (!fir.ref<!fir.array<1xi32>>, !fir.shape<1>) -> !fir.array<1xi32>
+  %4 = fir.shape %c10 : (index) -> !fir.shape<1>
+  %5 = fir.embox %arg0(%4) : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<10xi32>>
+  %c1_i32 = arith.constant 1 : i32
+  %6 = fir.absent !fir.box<i1>
+  %c4 = arith.constant 4 : index
+  %false = arith.constant false
+  %7 = fir.zero_bits !fir.heap<i32>
+  %8 = fir.embox %7 : (!fir.heap<i32>) -> !fir.box<!fir.heap<i32>>
+  fir.store %8 to %0 : !fir.ref<!fir.box<!fir.heap<i32>>>
+  %9 = fir.address_of(@_QQcl.cfcf4329f25d06a4b02a0c8f532ee9df) : !fir.ref<!fir.char<1,61>>
+  %c4_i32 = arith.constant 4 : i32
+  %10 = fir.convert %0 : (!fir.ref<!fir.box<!fir.heap<i32>>>) -> !fir.ref<!fir.box<none>>
+  %11 = fir.convert %5 : (!fir.box<!fir.array<10xi32>>) -> !fir.box<none>
+  %12 = fir.convert %c4 : (index) -> i32
+  %13 = fir.convert %9 : (!fir.ref<!fir.char<1,61>>) -> !fir.ref<i8>
+  %14 = fir.convert %6 : (!fir.box<i1>) -> !fir.box<none>
+  %15 = fir.call @_FortranAMinlocDim(%10, %11, %12, %c1_i32, %13, %c4_i32, %14, %false) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, i32, i32, !fir.ref<i8>, i32, !fir.box<none>, i1) -> none
+  %16 = fir.load %0 : !fir.ref<!fir.box<!fir.heap<i32>>>
+  %17 = fir.box_addr %16 : (!fir.box<!fir.heap<i32>>) -> !fir.heap<i32>
+  %18 = fir.load %17 : !fir.heap<i32>
+  fir.freemem %17 : !fir.heap<i32>
+  %c1_0 = arith.constant 1 : index
+  %c0 = arith.constant 0 : index
+  %19 = arith.subi %c1, %c1_0 : index
+  %20 = fir.do_loop %arg1 = %c0 to %19 step %c1_0 unordered iter_args(%arg2 = %3) -> (!fir.array<1xi32>) {
+    %22 = fir.array_update %arg2, %18, %arg1 : (!fir.array<1xi32>, i32, index) -> !fir.array<1xi32>
+    fir.result %22 : !fir.array<1xi32>
+  }
+  fir.array_merge_store %3, %20 to %1 : !fir.array<1xi32>, !fir.array<1xi32>, !fir.ref<!fir.array<1xi32>>
+  %21 = fir.load %1 : !fir.ref<!fir.array<1xi32>>
+  return %21 : !fir.array<1xi32>
+}
+// CHECK-LABEL:   func.func @_QPtestminloc_doesntwork1d_dim(
+// CHECK-SAME:                                             %[[ARR:.*]]: !fir.ref<!fir.array<10xi32>> {fir.bindc_name = "a"}) -> !fir.array<1xi32> {
+// CHECK-NOT:         fir.call @_FortranAMinlocDimx1_i32_contract_simplified({{.*}}) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, !fir.box<none>) -> ()
+// CHECK:             fir.call @_FortranAMinlocDim({{.*}}) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, i32, i32, !fir.ref<i8>, i32, !fir.box<none>, i1) -> none
+// CHECK-NOT:         fir.call @_FortranAMinlocDimx1_i32_contract_simplified({{.*}}) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, !fir.box<none>) -> ()
+
+// -----
+// Check Minloc is not simplified when dimension of inputArr is unknown
+
+func.func @_QPtestminloc_doesntwork1d_unknownsize(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a"}) -> !fir.array<1xi32> {
+  %0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xi32>>>
+  %c1 = arith.constant 1 : index
+  %1 = fir.alloca !fir.array<1xi32> {bindc_name = "testminloc_doesntwork1d_unknownsize", uniq_name = "_QFtestminloc_doesntwork1d_unknownsizeEtestminloc_doesntwork1d_unknownsize"}
+  %2 = fir.shape %c1 : (index) -> !fir.shape<1>
+  %3 = fir.array_load %1(%2) : (!fir.ref<!fir.array<1xi32>>, !fir.shape<1>) -> !fir.array<1xi32>
+  %4 = fir.absent !fir.box<i1>
+  %c4 = arith.constant 4 : index
+  %false = arith.constant false
+  %5 = fir.zero_bits !fir.heap<!fir.array<?xi32>>
+  %c0 = arith.constant 0 : index
+  %6 = fir.shape %c0 : (index) -> !fir.shape<1>
+  %7 = fir.embox %5(%6) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
+  fir.store %7 to %0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+  %8 = fir.address_of(@_QQcl.2064f5e9298c2127417d52b69eac898e) : !fir.ref<!fir.char<1,69>>
+  %c4_i32 = arith.constant 4 : i32
+  %9 = fir.convert %0 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<none>>
+  %10 = fir.convert %arg0 : (!fir.box<!fir.array<?xi32>>) -> !fir.box<none>
+  %11 = fir.convert %c4 : (index) -> i32
+  %12 = fir.convert %8 : (!fir.ref<!fir.char<1,69>>) -> !fir.ref<i8>
+  %13 = fir.convert %4 : (!fir.box<i1>) -> !fir.box<none>
+  %14 = fir.call @_FortranAMinlocInteger4(%9, %10, %11, %12, %c4_i32, %13, %false) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, i32, !fir.ref<i8>, i32, !fir.box<none>, i1) -> none
+  %15 = fir.load %0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+  %c0_0 = arith.constant 0 : index
+  %16:3 = fir.box_dims %15, %c0_0 : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+  %17 = fir.box_addr %15 : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
+  %18 = fir.shape_shift %16#0, %16#1 : (index, index) -> !fir.shapeshift<1>
+  %19 = fir.array_load %17(%18) : (!fir.heap<!fir.array<?xi32>>, !fir.shapeshift<1>) -> !fir.array<?xi32>
+  %c1_1 = arith.constant 1 : index
+  %c0_2 = arith.constant 0 : index
+  %20 = arith.subi %c1, %c1_1 : index
+  %21 = fir.do_loop %arg1 = %c0_2 to %20 step %c1_1 unordered iter_args(%arg2 = %3) -> (!fir.array<1xi32>) {
+    %23 = fir.array_fetch %19, %arg1 : (!fir.array<?xi32>, index) -> i32
+    %24 = fir.array_update %arg2, %23, %arg1 : (!fir.array<1xi32>, i32, index) -> !fir.array<1xi32>
+    fir.result %24 : !fir.array<1xi32>
+  }
+  fir.array_merge_store %3, %21 to %1 : !fir.array<1xi32>, !fir.array<1xi32>, !fir.ref<!fir.array<1xi32>>
+  fir.freemem %17 : !fir.heap<!fir.array<?xi32>>
+  %22 = fir.load %1 : !fir.ref<!fir.array<1xi32>>
+  return %22 : !fir.array<1xi32>
+}
+// CHECK-LABEL:   func.func @_QPtestminloc_doesntwork1d_unknownsize(
+// CHECK-SAME:                                                      %[[ARR:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a"}) -> !fir.array<1xi32> {
+// CHECK-NOT:         fir.call @_FortranAMinlocInteger4x1_i32_contract_simplified({{.*}}) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, !fir.box<none>) -> ()
+// CHECK:             fir.call @_FortranAMinlocInteger4({{.*}}) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, i32, !fir.ref<i8>, i32, !fir.box<none>, i1) -> none
+// CHECK-NOT:         fir.call @_FortranAMinlocInteger4x1_i32_contract_simplified({{.*}}) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, !fir.box<none>) -> ()
+
+// -----
+// Check Minloc is not simplified when inputArr is characterType
+
+func.func @_QPtestminloc_doesntwork1d_chars(%arg0: !fir.boxchar<1> {fir.bindc_name = "a"}) -> !fir.array<1xi32> {
+  %0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xi32>>>
+  %1:2 = fir.unboxchar %arg0 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+  %2 = fir.convert %1#0 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<!fir.array<10x!fir.char<1>>>
+  %c10 = arith.constant 10 : index
+  %c1 = arith.constant 1 : index
+  %3 = fir.alloca !fir.array<1xi32> {bindc_name = "testminloc_doesntwork1d_chars", uniq_name = "_QFtestminloc_doesntwork1d_charsEtestminloc_doesntwork1d_chars"}
+  %4 = fir.shape %c1 : (index) -> !fir.shape<1>
+  %5 = fir.array_load %3(%4) : (!fir.ref<!fir.array<1xi32>>, !fir.shape<1>) -> !fir.array<1xi32>
+  %6 = fir.shape %c10 : (index) -> !fir.shape<1>
+  %7 = fir.embox %2(%6) : (!fir.ref<!fir.array<10x!fir.char<1>>>, !fir.shape<1>) -> !fir.box<!fir.array<10x!fir.char<1>>>
+  %8 = fir.absent !fir.box<i1>
+  %c4 = arith.constant 4 : index
+  %false = arith.constant false
+  %9 = fir.zero_bits !fir.heap<!fir.array<?xi32>>
+  %c0 = arith.constant 0 : index
+  %10 = fir.shape %c0 : (index) -> !fir.shape<1>
+  %11 = fir.embox %9(%10) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
+  fir.store %11 to %0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+  %12 = fir.address_of(@_QQcl.74460ff3ef22ea53671c22344e1556b9) : !fir.ref<!fir.char<1,41>>
+  %c4_i32 = arith.constant 4 : i32
+  %13 = fir.convert %0 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<none>>
+  %14 = fir.convert %7 : (!fir.box<!fir.array<10x!fir.char<1>>>) -> !fir.box<none>
+  %15 = fir.convert %c4 : (index) -> i32
+  %16 = fir.convert %12 : (!fir.ref<!fir.char<1,41>>) -> !fir.ref<i8>
+  %17 = fir.convert %8 : (!fir.box<i1>) -> !fir.box<none>
+  %18 = fir.call @_FortranAMinlocCharacter(%13, %14, %15, %16, %c4_i32, %17, %false) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, i32, !fir.ref<i8>, i32, !fir.box<none>, i1) -> none
+  %19 = fir.load %0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+  %c0_0 = arith.constant 0 : index
+  %20:3 = fir.box_dims %19, %c0_0 : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+  %21 = fir.box_addr %19 : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
+  %22 = fir.shape_shift %20#0, %20#1 : (index, index) -> !fir.shapeshift<1>
+  %23 = fir.array_load %21(%22) : (!fir.heap<!fir.array<?xi32>>, !fir.shapeshift<1>) -> !fir.array<?xi32>
+  %c1_1 = arith.constant 1 : index
+  %c0_2 = arith.constant 0 : index
+  %24 = arith.subi %c1, %c1_1 : index
+  %25 = fir.do_loop %arg1 = %c0_2 to %24 step %c1_1 unordered iter_args(%arg2 = %5) -> (!fir.array<1xi32>) {
+    %27 = fir.array_fetch %23, %arg1 : (!fir.array<?xi32>, index) -> i32
+    %28 = fir.array_update %arg2, %27, %arg1 : (!fir.array<1xi32>, i32, index) -> !fir.array<1xi32>
+    fir.result %28 : !fir.array<1xi32>
+  }
+  fir.array_merge_store %5, %25 to %3 : !fir.array<1xi32>, !fir.array<1xi32>, !fir.ref<!fir.array<1xi32>>
+  fir.freemem %21 : !fir.heap<!fir.array<?xi32>>
+  %26 = fir.load %3 : !fir.ref<!fir.array<1xi32>>
+  return %26 : !fir.array<1xi32>
+}
+
+// CHECK-LABEL:   func.func @_QPtestminloc_doesntwork1d_chars(
+// CHECK-SAME:                                                %[[ARR:.*]]: !fir.boxchar<1> {fir.bindc_name = "a"}) -> !fir.array<1xi32> {
+// CHECK-NOT:         fir.call @_FortranAMinlocCharacterx1_i32_contract_simplified({{.*}}) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, !fir.box<none>) -> ()
+// CHECK:             fir.call @_FortranAMinlocCharacter({{.*}}) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, i32, !fir.ref<i8>, i32, !fir.box<none>, i1) -> none
+// CHECK-NOT:         fir.call @_FortranAMinlocCharacterx1_i32_contract_simplified({{.*}}) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, !fir.box<none>) -> ()
+
+// -----
+// Check Minloc is not simplified when mask is unknown rank
+
+func.func @_QPtestminloc_doesntwork1d_unknownmask(%arg0: !fir.ref<!fir.array<10xi32>> {fir.bindc_name = "a"}) -> !fir.array<1xi32> {
+  %0 = fir.alloca !fir.box<!fir.heap<!fir.array<?xi32>>>
+  %c10 = arith.constant 10 : index
+  %1 = fir.alloca i32 {bindc_name = "b", uniq_name = "_QFtestminloc_doesntwork1d_unknownmaskEb"}
+  %2 = fir.alloca !fir.box<!fir.heap<!fir.array<?x!fir.logical<4>>>> {bindc_name = "mask", uniq_name = "_QFtestminloc_doesntwork1d_unknownmaskEmask"}
+  %3 = fir.alloca !fir.heap<!fir.array<?x!fir.logical<4>>> {uniq_name = "_QFtestminloc_doesntwork1d_unknownmaskEmask.addr"}
+  %4 = fir.alloca index {uniq_name = "_QFtestminloc_doesntwork1d_unknownmaskEmask.lb0"}
+  %5 = fir.alloca index {uniq_name = "_QFtestminloc_doesntwork1d_unknownmaskEmask.ext0"}
+  %6 = fir.zero_bits !fir.heap<!fir.array<?x!fir.logical<4>>>
+  fir.store %6 to %3 : !fir.ref<!fir.heap<!fir.array<?x!fir.logical<4>>>>
+  %c1 = arith.constant 1 : index
+  %7 = fir.alloca !fir.array<1xi32> {bindc_name = "testminloc_doesntwork1d_unknownmask", uniq_name = "_QFtestminloc_doesntwork1d_unknownmaskEtestminloc_doesntwork1d_unknownmask"}
+  %8 = fir.load %1 : !fir.ref<i32>
+  %9 = fir.convert %8 : (i32) -> index
+  %c0 = arith.constant 0 : index
+  %10 = arith.cmpi sgt, %9, %c0 : index
+  %11 = arith.select %10, %9, %c0 : index
+  %12 = fir.allocmem !fir.array<?x!fir.logical<4>>, %11 {fir.must_be_heap = true, uniq_name = "_QFtestminloc_doesntwork1d_unknownmaskEmask.alloc"}
+  fir.store %12 to %3 : !fir.ref<!fir.heap<!fir.array<?x!fir.logical<4>>>>
+  fir.store %11 to %5 : !fir.ref<index>
+  %c1_0 = arith.constant 1 : index
+  fir.store %c1_0 to %4 : !fir.ref<index>
+  %13 = fir.shape %c1 : (index) -> !fir.shape<1>
+  %14 = fir.array_load %7(%13) : (!fir.ref<!fir.array<1xi32>>, !fir.shape<1>) -> !fir.array<1xi32>
+  %15 = fir.shape %c10 : (index) -> !fir.shape<1>
+  %16 = fir.embox %arg0(%15) : (!fir.ref<!fir.array<10xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<10xi32>>
+  %17 = fir.load %3 : !fir.ref<!fir.heap<!fir.array<?x!fir.logical<4>>>>
+  %18 = fir.convert %17 : (!fir.heap<!fir.array<?x!fir.logical<4>>>) -> i64
+  %c0_i64 = arith.constant 0 : i64
+  %19 = arith.cmpi ne, %18, %c0_i64 : i64
+  %20 = fir.load %4 : !fir.ref<index>
+  %21 = fir.load %5 : !fir.ref<index>
+  %22 = fir.load %3 : !fir.ref<!fir.heap<!fir.array<?x!fir.logical<4>>>>
+  %23 = fir.shape_shift %20, %21 : (index, index) -> !fir.shapeshift<1>
+  %24 = fir.embox %22(%23) : (!fir.heap<!fir.array<?x!fir.logical<4>>>, !fir.shapeshift<1>) -> !fir.box<!fir.array<?x!fir.logical<4>>>
+  %25 = fir.absent !fir.box<!fir.array<?x!fir.logical<4>>>
+  %26 = arith.select %19, %24, %25 : !fir.box<!fir.array<?x!fir.logical<4>>>
+  %c4 = arith.constant 4 : index
+  %false = arith.constant false
+  %27 = fir.zero_bits !fir.heap<!fir.array<?xi32>>
+  %c0_1 = arith.constant 0 : index
+  %28 = fir.shape %c0_1 : (index) -> !fir.shape<1>
+  %29 = fir.embox %27(%28) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
+  fir.store %29 to %0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+  %30 = fir.address_of(@_QQcl.74460ff3ef22ea53671c22344e1556b9) : !fir.ref<!fir.char<1,41>>
+  %c7_i32 = arith.constant 7 : i32
+  %31 = fir.convert %0 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<none>>
+  %32 = fir.convert %16 : (!fir.box<!fir.array<10xi32>>) -> !fir.box<none>
+  %33 = fir.convert %c4 : (index) -> i32
+  %34 = fir.convert %30 : (!fir.ref<!fir.char<1,41>>) -> !fir.ref<i8>
+  %35 = fir.convert %26 : (!fir.box<!fir.array<?x!fir.logical<4>>>) -> !fir.box<none>
+  %36 = fir.call @_FortranAMinlocInteger4(%31, %32, %33, %34, %c7_i32, %35, %false) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, i32, !fir.ref<i8>, i32, !fir.box<none>, i1) -> none
+  %37 = fir.load %0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+  %c0_2 = arith.constant 0 : index
+  %38:3 = fir.box_dims %37, %c0_2 : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+  %39 = fir.box_addr %37 : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
+  %40 = fir.shape_shift %38#0, %38#1 : (index, index) -> !fir.shapeshift<1>
+  %41 = fir.array_load %39(%40) : (!fir.heap<!fir.array<?xi32>>, !fir.shapeshift<1>) -> !fir.array<?xi32>
+  %c1_3 = arith.constant 1 : index
+  %c0_4 = arith.constant 0 : index
+  %42 = arith.subi %c1, %c1_3 : index
+  %43 = fir.do_loop %arg1 = %c0_4 to %42 step %c1_3 unordered iter_args(%arg2 = %14) -> (!fir.array<1xi32>) {
+    %45 = fir.array_fetch %41, %arg1 : (!fir.array<?xi32>, index) -> i32
+    %46 = fir.array_update %arg2, %45, %arg1 : (!fir.array<1xi32>, i32, index) -> !fir.array<1xi32>
+    fir.result %46 : !fir.array<1xi32>
+  }
+  fir.array_merge_store %14, %43 to %7 : !fir.array<1xi32>, !fir.array<1xi32>, !fir.ref<!fir.array<1xi32>>
+  fir.freemem %39 : !fir.heap<!fir.array<?xi32>>
+  %44 = fir.load %7 : !fir.ref<!fir.array<1xi32>>
+  return %44 : !fir.array<1xi32>
+}
+
+// CHECK-LABEL:   func.func @_QPtestminloc_doesntwork1d_unknownmask(
+// CHECK-SAME:                                                      %[[ARR:.*]]: !fir.ref<!fir.array<10xi32>> {fir.bindc_name = "a"}) -> !fir.array<1xi32> {
+// CHECK-NOT:         fir.call @_FortranAMinlocInteger4x1_i32_contract_simplified({{.*}}) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, !fir.box<none>) -> ()
+// CHECK:             fir.call @_FortranAMinlocInteger4({{.*}}) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, i32, !fir.ref<i8>, i32, !fir.box<none>, i1) -> none
+// CHECK-NOT:         fir.call @_FortranAMinlocInteger4x1_i32_contract_simplified({{.*}}) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, !fir.box<none>) -> ()


        


More information about the flang-commits mailing list