[flang-commits] [flang] [Flang] Minloc elemental intrinsic lowering (PR #74828)

David Green via flang-commits flang-commits at lists.llvm.org
Fri Dec 8 03:25:45 PST 2023


https://github.com/davemgreen created https://github.com/llvm/llvm-project/pull/74828

Currently the lowering of a minloc intrinsic with a mask will look something like
```
  %e = hlfir.elemental %shape ({
    ...
  })
  %m = hlfir.minloc %array mask %e
  hlfir.assign %m to %result
  hlfir.destroy %m
```
The elemental will be expanded into a temporary+loop, the minloc into a
FortranAMinloc call (which hopefully gets simplified to a specialized call that
can be inlined at the call site), and the assign might get expanded to a
FortranAAssign. The assign we could inline too, but it would be better to
generate the entire construct as single loop if we can - one that performs the
minloc calculation with the mask elemental computed inline and assigns directly
to the output array.

This patch attempt to do that, adding a hlfir version of the expansion code
from SimplifyIntrinsics that turns an assign+minloc+elemental into a single
combined loop nest. It attempts to reuse the methods in genMinlocReductionLoop
for constructing the loop with a modified loop body. The declaration for the
function is currently in Optimizer/Support/Utils.h, but there might be a better
place for it.

It is currently added as port of the OptimizedBufferizationPass. I originally
had it as part of the SimplifyHLFIRIntrinsics pass, but there were already some
methods doing similar things in OptimizedBufferization. It just needs to happen
before the elementals are expanded. I think I would like to do a similar thing
for maxloc and any/all/count too if this looks OK. I will rebase over #74436
once that goes in.

>From a21f2e45bcda1579761ee796d38df2c2ee913616 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Fri, 8 Dec 2023 10:47:21 +0000
Subject: [PATCH 1/2] [Flang] Add a HLFIR Minloc intrinsic

The adds a hlfir minloc intrinsic, similar to the minval intrinsic already
added, to help in the lowering of minloc. The idea is to later add maxloc too,
and from there add a simplification for producing minloc with inlined elemental
and hopefully less temporaries.
---
 .../include/flang/Optimizer/HLFIR/HLFIROps.td |  26 ++
 flang/lib/Lower/HlfirIntrinsics.cpp           |  65 +++
 flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp     |  75 ++++
 .../HLFIR/Transforms/LowerHLFIRIntrinsics.cpp |  38 +-
 flang/test/HLFIR/invalid.fir                  |  68 ++++
 flang/test/HLFIR/memory-effects.fir           |  15 +
 flang/test/HLFIR/minloc-lowering.fir          | 329 ++++++++++++++++
 flang/test/HLFIR/minloc.fir                   | 272 +++++++++++++
 flang/test/Lower/HLFIR/minloc.f90             | 370 ++++++++++++++++++
 flang/test/Lower/HLFIR/transformational.f90   |  28 +-
 10 files changed, 1260 insertions(+), 26 deletions(-)
 create mode 100644 flang/test/HLFIR/minloc-lowering.fir
 create mode 100644 flang/test/HLFIR/minloc.fir
 create mode 100644 flang/test/Lower/HLFIR/minloc.f90

diff --git a/flang/include/flang/Optimizer/HLFIR/HLFIROps.td b/flang/include/flang/Optimizer/HLFIR/HLFIROps.td
index f4933785a8caa6..1f5bc42c43e65c 100644
--- a/flang/include/flang/Optimizer/HLFIR/HLFIROps.td
+++ b/flang/include/flang/Optimizer/HLFIR/HLFIROps.td
@@ -458,6 +458,32 @@ def hlfir_MinvalOp : hlfir_Op<"minval", [AttrSizedOperandSegments,
   let hasVerifier = 1;
 }
 
+def hlfir_MinlocOp : hlfir_Op<"minloc", [AttrSizedOperandSegments,
+    DeclareOpInterfaceMethods<ArithFastMathInterface>,
+    DeclareOpInterfaceMethods<MemoryEffectsOpInterface>]> {
+  let summary = "MINLOC transformational intrinsic";
+  let description = [{
+    Minlocs of an array.
+  }];
+
+  let arguments = (ins
+    AnyFortranArrayObject:$array,
+    Optional<AnyIntegerType>:$dim,
+    Optional<AnyFortranLogicalOrI1ArrayObject>:$mask,
+    Optional<Type<AnyLogicalLike.predicate>>:$back,
+    DefaultValuedAttr<Arith_FastMathAttr,
+                      "::mlir::arith::FastMathFlags::none">:$fastmath
+  );
+
+  let results = (outs AnyFortranValue);
+
+  let assemblyFormat = [{
+    $array (`dim` $dim^)? (`mask` $mask^)? (`back` $back^)?  attr-dict `:` functional-type(operands, results)
+  }];
+
+  let hasVerifier = 1;
+}
+
 def hlfir_ProductOp : hlfir_Op<"product", [AttrSizedOperandSegments,
     DeclareOpInterfaceMethods<ArithFastMathInterface>,
     DeclareOpInterfaceMethods<MemoryEffectsOpInterface>]> {
diff --git a/flang/lib/Lower/HlfirIntrinsics.cpp b/flang/lib/Lower/HlfirIntrinsics.cpp
index 9f764b61425226..6e5ba92bee86a7 100644
--- a/flang/lib/Lower/HlfirIntrinsics.cpp
+++ b/flang/lib/Lower/HlfirIntrinsics.cpp
@@ -93,6 +93,19 @@ using HlfirMinvalLowering = HlfirReductionIntrinsic<hlfir::MinvalOp, true>;
 using HlfirAnyLowering = HlfirReductionIntrinsic<hlfir::AnyOp, false>;
 using HlfirAllLowering = HlfirReductionIntrinsic<hlfir::AllOp, false>;
 
+template <typename OP>
+class HlfirMinMaxLocIntrinsic : public HlfirTransformationalIntrinsic {
+public:
+  using HlfirTransformationalIntrinsic::HlfirTransformationalIntrinsic;
+
+protected:
+  mlir::Value
+  lowerImpl(const Fortran::lower::PreparedActualArguments &loweredActuals,
+            const fir::IntrinsicArgumentLoweringRules *argLowering,
+            mlir::Type stmtResultType) override;
+};
+using HlfirMinlocLowering = HlfirMinMaxLocIntrinsic<hlfir::MinlocOp>;
+
 template <typename OP>
 class HlfirProductIntrinsic : public HlfirTransformationalIntrinsic {
 public:
@@ -180,6 +193,31 @@ mlir::Value HlfirTransformationalIntrinsic::loadBoxAddress(
   return boxOrAbsent;
 }
 
+static mlir::Value loadOptionalValue(
+    mlir::Location loc, fir::FirOpBuilder &builder,
+    const std::optional<Fortran::lower::PreparedActualArgument> &arg,
+    hlfir::Entity actual) {
+  if (!arg->handleDynamicOptional())
+    return hlfir::loadTrivialScalar(loc, builder, actual);
+
+  mlir::Value isPresent = arg->getIsPresent();
+  mlir::Type eleType = hlfir::getFortranElementType(actual.getType());
+  return builder
+      .genIfOp(loc, {eleType}, isPresent,
+               /*withElseRegion=*/true)
+      .genThen([&]() {
+        assert(actual.isScalar() && fir::isa_trivial(eleType) &&
+               "must be a numerical or logical scalar");
+        hlfir::Entity val = hlfir::loadTrivialScalar(loc, builder, actual);
+        builder.create<fir::ResultOp>(loc, val);
+      })
+      .genElse([&]() {
+        mlir::Value zero = fir::factory::createZeroValue(builder, loc, eleType);
+        builder.create<fir::ResultOp>(loc, zero);
+      })
+      .getResults()[0];
+}
+
 llvm::SmallVector<mlir::Value> HlfirTransformationalIntrinsic::getOperandVector(
     const Fortran::lower::PreparedActualArguments &loweredActuals,
     const fir::IntrinsicArgumentLoweringRules *argLowering) {
@@ -206,6 +244,9 @@ llvm::SmallVector<mlir::Value> HlfirTransformationalIntrinsic::getOperandVector(
       else if (!argRules.handleDynamicOptional &&
                argRules.lowerAs != fir::LowerIntrinsicArgAs::Inquired)
         valArg = hlfir::derefPointersAndAllocatables(loc, builder, actual);
+      else if (argRules.handleDynamicOptional &&
+               argRules.lowerAs == fir::LowerIntrinsicArgAs::Value)
+        valArg = loadOptionalValue(loc, builder, arg, actual);
       else if (argRules.handleDynamicOptional)
         TODO(loc, "hlfir transformational intrinsic dynamically optional "
                   "argument without box lowering");
@@ -260,6 +301,27 @@ mlir::Value HlfirReductionIntrinsic<OP, HAS_MASK>::lowerImpl(
   return op;
 }
 
+template <typename OP>
+mlir::Value HlfirMinMaxLocIntrinsic<OP>::lowerImpl(
+    const Fortran::lower::PreparedActualArguments &loweredActuals,
+    const fir::IntrinsicArgumentLoweringRules *argLowering,
+    mlir::Type stmtResultType) {
+  auto operands = getOperandVector(loweredActuals, argLowering);
+  mlir::Value array = operands[0];
+  mlir::Value dim = operands[1];
+  mlir::Value mask = operands[2];
+  mlir::Value back = operands[4];
+  // dim, mask and back can be NULL if these arguments are not given.
+  if (dim)
+    dim = hlfir::loadTrivialScalar(loc, builder, hlfir::Entity{dim});
+  if (back)
+    back = hlfir::loadTrivialScalar(loc, builder, hlfir::Entity{back});
+
+  mlir::Type resultTy = computeResultType(array, stmtResultType);
+
+  return createOp<OP>(resultTy, array, dim, mask, back);
+}
+
 template <typename OP>
 mlir::Value HlfirProductIntrinsic<OP>::lowerImpl(
     const Fortran::lower::PreparedActualArguments &loweredActuals,
@@ -364,6 +426,9 @@ std::optional<hlfir::EntityWithAttributes> Fortran::lower::lowerHlfirIntrinsic(
   if (name == "minval")
     return HlfirMinvalLowering{builder, loc}.lower(loweredActuals, argLowering,
                                                    stmtResultType);
+  if (name == "minloc")
+    return HlfirMinlocLowering{builder, loc}.lower(loweredActuals, argLowering,
+                                                   stmtResultType);
   if (mlir::isa<fir::CharacterType>(stmtResultType)) {
     if (name == "min")
       return HlfirCharExtremumLowering{builder, loc,
diff --git a/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp b/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp
index a276e5fc65dd59..94a2213306bfd5 100644
--- a/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp
+++ b/flang/lib/Optimizer/HLFIR/IR/HLFIROps.cpp
@@ -870,6 +870,81 @@ void hlfir::MinvalOp::getEffects(
   getIntrinsicEffects(getOperation(), effects);
 }
 
+//===----------------------------------------------------------------------===//
+// MinlocOp
+//===----------------------------------------------------------------------===//
+
+mlir::LogicalResult hlfir::MinlocOp::verify() {
+  mlir::Operation *op = getOperation();
+
+  auto results = op->getResultTypes();
+  assert(results.size() == 1);
+  mlir::Value array = getArray();
+  mlir::Value dim = getDim();
+  mlir::Value mask = getMask();
+
+  fir::SequenceType arrayTy =
+      hlfir::getFortranElementOrSequenceType(array.getType())
+          .cast<fir::SequenceType>();
+  llvm::ArrayRef<int64_t> arrayShape = arrayTy.getShape();
+
+  if (mask) {
+    fir::SequenceType maskSeq =
+        hlfir::getFortranElementOrSequenceType(mask.getType())
+            .dyn_cast<fir::SequenceType>();
+    llvm::ArrayRef<int64_t> maskShape;
+
+    if (maskSeq)
+      maskShape = maskSeq.getShape();
+
+    if (!maskShape.empty()) {
+      if (maskShape.size() != arrayShape.size())
+        return emitWarning("MASK must be conformable to ARRAY");
+      static_assert(fir::SequenceType::getUnknownExtent() ==
+                    hlfir::ExprType::getUnknownExtent());
+      constexpr int64_t unknownExtent = fir::SequenceType::getUnknownExtent();
+      for (std::size_t i = 0; i < arrayShape.size(); ++i) {
+        int64_t arrayExtent = arrayShape[i];
+        int64_t maskExtent = maskShape[i];
+        if ((arrayExtent != maskExtent) && (arrayExtent != unknownExtent) &&
+            (maskExtent != unknownExtent))
+          return emitWarning("MASK must be conformable to ARRAY");
+      }
+    }
+  }
+
+  mlir::Type resultType = results[0];
+  if (dim && arrayShape.size() == 1) {
+    if (!fir::isa_integer(resultType))
+      return emitOpError("result must be scalar integer");
+  } else if (auto resultExpr =
+                 mlir::dyn_cast_or_null<hlfir::ExprType>(resultType)) {
+    if (!resultExpr.isArray())
+      return emitOpError("result must be an array");
+
+    if (!fir::isa_integer(resultExpr.getEleTy()))
+      return emitOpError("result must have integer elements");
+
+    llvm::ArrayRef<int64_t> resultShape = resultExpr.getShape();
+    // With dim the result has rank n-1
+    if (dim && resultShape.size() != (arrayShape.size() - 1))
+      return emitOpError("result rank must be one less than ARRAY");
+    // With dim the result has rank n
+    if (!dim && resultShape.size() != 1)
+      return emitOpError("result rank must be 1");
+  } else {
+    return emitOpError("result must be of numerical expr type");
+  }
+  return mlir::success();
+}
+
+void hlfir::MinlocOp::getEffects(
+    llvm::SmallVectorImpl<
+        mlir::SideEffects::EffectInstance<mlir::MemoryEffects::Effect>>
+        &effects) {
+  getIntrinsicEffects(getOperation(), effects);
+}
+
 //===----------------------------------------------------------------------===//
 // SetLengthOp
 //===----------------------------------------------------------------------===//
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIRIntrinsics.cpp b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIRIntrinsics.cpp
index f2628fcb970bc4..bfebe26fe1d532 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIRIntrinsics.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/LowerHLFIRIntrinsics.cpp
@@ -201,6 +201,23 @@ class HlfirReductionIntrinsicConversion : public HlfirIntrinsicConversion<OP> {
     return lowerArguments(operation, inArgs, rewriter, argLowering);
   };
 
+  auto buildMinMaxLocArgs(OP operation, mlir::Type i32, mlir::Type logicalType,
+                          mlir::PatternRewriter &rewriter, std::string opName,
+                          fir::FirOpBuilder builder) const {
+    llvm::SmallVector<IntrinsicArgument, 3> inArgs;
+    inArgs.push_back({operation.getArray(), operation.getArray().getType()});
+    inArgs.push_back({operation.getDim(), i32});
+    inArgs.push_back({operation.getMask(), logicalType});
+    mlir::Type T = hlfir::getFortranElementType(operation.getType());
+    unsigned width = T.cast<mlir::IntegerType>().getWidth();
+    mlir::Value kind =
+        builder.createIntegerConstant(operation->getLoc(), i32, width / 8);
+    inArgs.push_back({kind, i32});
+    inArgs.push_back({operation.getBack(), i32});
+    auto *argLowering = fir::getIntrinsicArgumentLowering(opName);
+    return lowerArguments(operation, inArgs, rewriter, argLowering);
+  };
+
   auto buildLogicalArgs(OP operation, mlir::Type i32, mlir::Type logicalType,
                         mlir::PatternRewriter &rewriter,
                         std::string opName) const {
@@ -224,6 +241,8 @@ class HlfirReductionIntrinsicConversion : public HlfirIntrinsicConversion<OP> {
       opName = "maxval";
     } else if constexpr (std::is_same_v<OP, hlfir::MinvalOp>) {
       opName = "minval";
+    } else if constexpr (std::is_same_v<OP, hlfir::MinlocOp>) {
+      opName = "minloc";
     } else if constexpr (std::is_same_v<OP, hlfir::AnyOp>) {
       opName = "any";
     } else if constexpr (std::is_same_v<OP, hlfir::AllOp>) {
@@ -246,6 +265,9 @@ class HlfirReductionIntrinsicConversion : public HlfirIntrinsicConversion<OP> {
                   std::is_same_v<OP, hlfir::MaxvalOp> ||
                   std::is_same_v<OP, hlfir::MinvalOp>) {
       args = buildNumericalArgs(operation, i32, logicalType, rewriter, opName);
+    } else if constexpr (std::is_same_v<OP, hlfir::MinlocOp>) {
+      args = buildMinMaxLocArgs(operation, i32, logicalType, rewriter, opName,
+                                builder);
     } else {
       args = buildLogicalArgs(operation, i32, logicalType, rewriter, opName);
     }
@@ -269,6 +291,8 @@ using MaxvalOpConversion = HlfirReductionIntrinsicConversion<hlfir::MaxvalOp>;
 
 using MinvalOpConversion = HlfirReductionIntrinsicConversion<hlfir::MinvalOp>;
 
+using MinlocOpConversion = HlfirReductionIntrinsicConversion<hlfir::MinlocOp>;
+
 using AnyOpConversion = HlfirReductionIntrinsicConversion<hlfir::AnyOp>;
 
 using AllOpConversion = HlfirReductionIntrinsicConversion<hlfir::AllOp>;
@@ -441,12 +465,12 @@ class LowerHLFIRIntrinsics
     mlir::ModuleOp module = this->getOperation();
     mlir::MLIRContext *context = &getContext();
     mlir::RewritePatternSet patterns(context);
-    patterns
-        .insert<MatmulOpConversion, MatmulTransposeOpConversion,
-                AllOpConversion, AnyOpConversion, SumOpConversion,
-                ProductOpConversion, TransposeOpConversion, CountOpConversion,
-                DotProductOpConversion, MaxvalOpConversion, MinvalOpConversion>(
-            context);
+    patterns.insert<MatmulOpConversion, MatmulTransposeOpConversion,
+                    AllOpConversion, AnyOpConversion, SumOpConversion,
+                    ProductOpConversion, TransposeOpConversion,
+                    CountOpConversion, DotProductOpConversion,
+                    MaxvalOpConversion, MinvalOpConversion, MinlocOpConversion>(
+        context);
     mlir::ConversionTarget target(*context);
     target.addLegalDialect<mlir::BuiltinDialect, mlir::arith::ArithDialect,
                            mlir::func::FuncDialect, fir::FIROpsDialect,
@@ -454,7 +478,7 @@ class LowerHLFIRIntrinsics
     target.addIllegalOp<hlfir::MatmulOp, hlfir::MatmulTransposeOp, hlfir::SumOp,
                         hlfir::ProductOp, hlfir::TransposeOp, hlfir::AnyOp,
                         hlfir::AllOp, hlfir::DotProductOp, hlfir::CountOp,
-                        hlfir::MaxvalOp, hlfir::MinvalOp>();
+                        hlfir::MaxvalOp, hlfir::MinvalOp, hlfir::MinlocOp>();
     target.markUnknownOpDynamicallyLegal(
         [](mlir::Operation *) { return true; });
     if (mlir::failed(
diff --git a/flang/test/HLFIR/invalid.fir b/flang/test/HLFIR/invalid.fir
index 09165f09766b91..ce0d728749b960 100644
--- a/flang/test/HLFIR/invalid.fir
+++ b/flang/test/HLFIR/invalid.fir
@@ -548,6 +548,74 @@ func.func @bad_minval13(%arg0: !hlfir.expr<?x?x!fir.char<1,?>>, %arg1: i32){
   %0 = hlfir.minval %arg0 dim %arg1 : (!hlfir.expr<?x?x!fir.char<1,?>>, i32) -> !hlfir.expr<!fir.char<1,?>>
 }
 
+// -----
+func.func @bad_minloc1(%arg0: !hlfir.expr<?xi32>, %arg1: i32, %arg2: !fir.box<!fir.logical<4>>) {
+  // expected-error at +1 {{'hlfir.minloc' op result must be scalar integer}}
+  %0 = hlfir.minloc %arg0 dim %arg1 mask %arg2 : (!hlfir.expr<?xi32>, i32, !fir.box<!fir.logical<4>>) -> f32
+}
+
+// -----
+func.func @bad_minloc2(%arg0: !hlfir.expr<?xi32>, %arg1: i32, %arg2: !fir.box<!fir.array<?x?x?x?x?x!fir.logical<4>>>) {
+  // expected-warning at +1 {{MASK must be conformable to ARRAY}}
+  %0 = hlfir.minloc %arg0 dim %arg1 mask %arg2 : (!hlfir.expr<?xi32>, i32, !fir.box<!fir.array<?x?x?x?x?x!fir.logical<4>>>) -> !hlfir.expr<i32>
+}
+
+// -----
+func.func @bad_minloc3(%arg0: !hlfir.expr<?x5x?xi32>, %arg1: i32, %arg2: !fir.box<!fir.array<2x6x?x!fir.logical<4>>>) {
+  // expected-warning at +1 {{MASK must be conformable to ARRAY}}
+  %0 = hlfir.minloc %arg0 dim %arg1 mask %arg2 : (!hlfir.expr<?x5x?xi32>, i32, !fir.box<!fir.array<2x6x?x!fir.logical<4>>>) -> !hlfir.expr<i32>
+}
+
+// -----
+func.func @bad_minloc4(%arg0: !hlfir.expr<?x?xi32>, %arg1: i32, %arg2: !fir.box<!fir.logical<4>>) {
+  // expected-error at +1 {{'hlfir.minloc' op result rank must be one less than ARRAY}}
+  %0 = hlfir.minloc %arg0 dim %arg1 mask %arg2 : (!hlfir.expr<?x?xi32>, i32, !fir.box<!fir.logical<4>>) -> !hlfir.expr<?x?xi32>
+}
+
+// -----
+func.func @bad_minloc5(%arg0: !hlfir.expr<?xi32>, %arg1: i32, %arg2: !fir.box<!fir.logical<4>>) {
+  // expected-error at +1 {{'hlfir.minloc' op result must be scalar integer}}
+  %0 = hlfir.minloc %arg0 dim %arg1 mask %arg2 : (!hlfir.expr<?xi32>, i32, !fir.box<!fir.logical<4>>) -> !fir.logical<4>
+}
+
+// -----
+func.func @bad_minloc6(%arg0: !hlfir.expr<?x?xi32>, %arg1: i32){
+  // expected-error at +1 {{'hlfir.minloc' op result must be an array}}
+  %0 = hlfir.minloc %arg0 dim %arg1 : (!hlfir.expr<?x?xi32>, i32) -> !hlfir.expr<i32>
+}
+
+// -----
+func.func @bad_minloc7(%arg0: !hlfir.expr<?xi32>){
+  // expected-error at +1 {{'hlfir.minloc' op result must be of numerical expr type}}
+  %0 = hlfir.minloc %arg0 : (!hlfir.expr<?xi32>) -> i32
+}
+
+// -----
+func.func @bad_minloc8(%arg0: !hlfir.expr<?xi32>){
+  // expected-error at +1 {{'hlfir.minloc' op result must have integer elements}}
+  %0 = hlfir.minloc %arg0 : (!hlfir.expr<?xi32>) -> !hlfir.expr<?xf32>
+}
+
+// -----
+func.func @bad_minloc9(%arg0: !hlfir.expr<?x!fir.char<1,?>>, %arg1: i32, %arg2: !fir.box<!fir.array<?x?x?x?x?x!fir.logical<4>>>) {
+  // expected-warning at +1 {{MASK must be conformable to ARRAY}}
+  %0 = hlfir.minloc %arg0 dim %arg1 mask %arg2 : (!hlfir.expr<?x!fir.char<1,?>>, i32, !fir.box<!fir.array<?x?x?x?x?x!fir.logical<4>>>) -> !hlfir.expr<!fir.char<1,?>>
+}
+
+// -----
+func.func @bad_minloc10(%arg0: !hlfir.expr<?x5x?x!fir.char<1,?>>, %arg1: i32, %arg2: !fir.box<!fir.array<2x6x?x!fir.logical<4>>>) {
+  // expected-warning at +1 {{MASK must be conformable to ARRAY}}
+  %0 = hlfir.minloc %arg0 dim %arg1 mask %arg2 : (!hlfir.expr<?x5x?x!fir.char<1,?>>, i32, !fir.box<!fir.array<2x6x?x!fir.logical<4>>>) -> !hlfir.expr<!fir.char<1,?>>
+}
+
+// -----
+func.func @bad_minloc11(%arg0: !hlfir.expr<?x?x!fir.char<1,?>>, %arg1: i32, %arg2: !fir.box<!fir.logical<4>>) {
+  // expected-error at +1 {{'hlfir.minloc' op result rank must be one less than ARRAY}}
+  %0 = hlfir.minloc %arg0 dim %arg1 mask %arg2 : (!hlfir.expr<?x?x!fir.char<1,?>>, i32, !fir.box<!fir.logical<4>>) -> !hlfir.expr<?x?xi32>
+}
+
+
+
 // -----
 func.func @bad_product1(%arg0: !hlfir.expr<?xi32>, %arg1: i32, %arg2: !fir.box<!fir.logical<4>>) {
   // expected-error at +1 {{'hlfir.product' op result must have the same element type as ARRAY argument}}
diff --git a/flang/test/HLFIR/memory-effects.fir b/flang/test/HLFIR/memory-effects.fir
index 4b2a0d575db1ac..c68c71f43a17dd 100644
--- a/flang/test/HLFIR/memory-effects.fir
+++ b/flang/test/HLFIR/memory-effects.fir
@@ -122,6 +122,21 @@ func.func @minval_effects(%arg0: !fir.ref<!fir.array<2x2xf32>>, %arg1: i32) {
   return
 }
 
+func.func @minloc_effects_simple(%arg0: !hlfir.expr<?xf32>) {
+// expected-remark at +1 {{found an instance of 'allocate' on a value, on resource '<Default>'}}
+  %minloc = hlfir.minloc %arg0 : (!hlfir.expr<?xf32>) -> !hlfir.expr<?xi32>
+// expected-remark at +1 {{operation has no memory effects}}
+  return
+}
+
+func.func @minloc_effects(%arg0: !fir.ref<!fir.array<2x2xf32>>, %arg1: i32) {
+// expected-remark at +2 {{found an instance of 'allocate' on a value, on resource '<Default>'}}
+// expected-remark at +1 {{found an instance of 'read' on a value, on resource '<Default>'}}
+  %minloc = hlfir.minloc %arg0 dim %arg1 : (!fir.ref<!fir.array<2x2xf32>>, i32) -> !hlfir.expr<2xi32>
+// expected-remark at +1 {{operation has no memory effects}}
+  return
+}
+
 func.func @dot_product_no_effects(%arg0: !hlfir.expr<?xf32>, %arg1: !hlfir.expr<?xf32>) {
 // expected-remark at +1 {{operation has no memory effects}}
   %0 = hlfir.dot_product %arg0 %arg1 : (!hlfir.expr<?xf32>, !hlfir.expr<?xf32>) -> f32
diff --git a/flang/test/HLFIR/minloc-lowering.fir b/flang/test/HLFIR/minloc-lowering.fir
new file mode 100644
index 00000000000000..fede0a1950121b
--- /dev/null
+++ b/flang/test/HLFIR/minloc-lowering.fir
@@ -0,0 +1,329 @@
+// Test hlfir.minloc operation lowering to fir runtime call
+// RUN: fir-opt %s -lower-hlfir-intrinsics | FileCheck %s
+
+// simple one argument minloc
+func.func @_QPminloc1(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a"}, %arg1: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "s"}) {
+  %0:2 = hlfir.declare %arg0 {uniq_name = "_QFminloc1Ea"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+  %1:2 = hlfir.declare %arg1 {uniq_name = "_QFminloc1Es"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+  %2 = hlfir.minloc %0#0 {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?xi32>>) -> !hlfir.expr<?xi32>
+  hlfir.assign %2 to %1#0 : !hlfir.expr<?xi32>, !fir.box<!fir.array<?xi32>>
+  hlfir.destroy %2 : !hlfir.expr<?xi32>
+  return
+}
+// CHECK-LABEL: func.func @_QPminloc1(
+// CHECK:           %[[ARG0:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a"}
+// CHECK:           %[[ARG1:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "s"}
+// CHECK-NEXT:    %[[V0:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xi32>>>
+// CHECK-NEXT:    %[[V1:.*]]:2 = hlfir.declare %[[ARG0]] {uniq_name = "_QFminloc1Ea"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+// CHECK-NEXT:    %[[V2:.*]]:2 = hlfir.declare %[[ARG1]] {uniq_name = "_QFminloc1Es"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+// CHECK-NEXT:    %c4_i32 = arith.constant 4 : i32
+// CHECK-NEXT:    %[[V3:.*]] = fir.absent !fir.box<i1>
+// CHECK-NEXT:    %false = arith.constant false
+// CHECK-NEXT:    %[[V4:.*]] = fir.zero_bits !fir.heap<!fir.array<?xi32>>
+// CHECK-NEXT:    %c0 = arith.constant 0 : index
+// CHECK-NEXT:    %[[V5:.*]] = fir.shape %c0 : (index) -> !fir.shape<1>
+// CHECK-NEXT:    %[[V6:.*]] = fir.embox %[[V4]](%[[V5]]) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
+// CHECK-NEXT:    fir.store %[[V6]] to %[[V0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+// CHECK:         %[[V8:.*]] = fir.convert %[[V0]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<none>>
+// CHECK-NEXT:    %[[V9:.*]] = fir.convert %[[V1]]#1 : (!fir.box<!fir.array<?xi32>>) -> !fir.box<none>
+// CHECK:         %[[V12:.*]] = fir.convert %[[V3]] : (!fir.box<i1>) -> !fir.box<none>
+// CHECK-NEXT:    %[[V13:.*]] = fir.call @_FortranAMinlocInteger4(%[[V8]], %[[V9]], %c4_i32, {{.*}}, {{.*}}, %[[V12]], %false) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, i32, !fir.ref<i8>, i32, !fir.box<none>, i1) -> none
+// CHECK-NEXT:    %[[V14:.*]] = fir.load %[[V0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+// CHECK-NEXT:    %c0_0 = arith.constant 0 : index
+// CHECK-NEXT:    %[[V15:.*]]:3 = fir.box_dims %[[V14]], %c0_0 : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+// CHECK-NEXT:    %[[V16:.*]] = fir.box_addr %[[V14]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
+// CHECK-NEXT:    %[[V17:.*]] = fir.shape_shift %[[V15]]#0, %[[V15]]#1 : (index, index) -> !fir.shapeshift<1>
+// CHECK-NEXT:    %[[V18:.*]]:2 = hlfir.declare %[[V16]](%[[V17]]) {uniq_name = ".tmp.intrinsic_result"} : (!fir.heap<!fir.array<?xi32>>, !fir.shapeshift<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.heap<!fir.array<?xi32>>)
+// CHECK-NEXT:    %true = arith.constant true
+// CHECK-NEXT:    %[[V19:.*]] = hlfir.as_expr %[[V18]]#0 move %true : (!fir.box<!fir.array<?xi32>>, i1) -> !hlfir.expr<?xi32>
+// CHECK-NEXT:    hlfir.assign %[[V19]] to %[[V2]]#0 : !hlfir.expr<?xi32>, !fir.box<!fir.array<?xi32>>
+// CHECK-NEXT:    hlfir.destroy %[[V19]] : !hlfir.expr<?xi32>
+
+
+// minloc with a dim
+func.func @_QPminloc2(%arg0: !fir.box<!fir.array<?x?xi32>> {fir.bindc_name = "a"}, %arg1: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "s"}, %arg2: !fir.ref<index> {fir.bindc_name = "d"}) {
+  %0:2 = hlfir.declare %arg0 {uniq_name = "_QFminloc2Ea"} : (!fir.box<!fir.array<?x?xi32>>) -> (!fir.box<!fir.array<?x?xi32>>, !fir.box<!fir.array<?x?xi32>>)
+  %1:2 = hlfir.declare %arg2 {uniq_name = "_QFminloc2Ed"} : (!fir.ref<index>) -> (!fir.ref<index>, !fir.ref<index>)
+  %2:2 = hlfir.declare %arg1 {uniq_name = "_QFminloc2Es"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+  %3 = fir.load %1#0 : !fir.ref<index>
+  %4 = hlfir.minloc %0#0 dim %3#0 {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?x?xi32>>, index) -> !hlfir.expr<?xi32>
+  hlfir.assign %4 to %2#0 : !hlfir.expr<?xi32>, !fir.box<!fir.array<?xi32>>
+  hlfir.destroy %4 : !hlfir.expr<?xi32>
+  return
+}
+// CHECK-LABEL: func.func @_QPminloc2(
+// CHECK:           %[[ARG0:.*]]: !fir.box<!fir.array<?x?xi32>> {fir.bindc_name = "a"}
+// CHECK:           %[[ARG1:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "s"}
+// CHECK:           %[[ARG2:.*]]: !fir.ref<index>
+// CHECK-NEXT:    %[[V0:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xi32>>>
+// CHECK-NEXT:    %[[V1:.*]]:2 = hlfir.declare %[[ARG0]] {uniq_name = "_QFminloc2Ea"} : (!fir.box<!fir.array<?x?xi32>>) -> (!fir.box<!fir.array<?x?xi32>>, !fir.box<!fir.array<?x?xi32>>)
+// CHECK-NEXT:    %[[V2:.*]]:2 = hlfir.declare %[[ARG2]] {uniq_name = "_QFminloc2Ed"} : (!fir.ref<index>) -> (!fir.ref<index>, !fir.ref<index>)
+// CHECK-NEXT:    %[[V3:.*]]:2 = hlfir.declare %[[ARG1]] {uniq_name = "_QFminloc2Es"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+// CHECK-NEXT:    %[[V4:.*]] = fir.load %[[V2]]#0 : !fir.ref<index>
+// CHECK-NEXT:    %c4_i32 = arith.constant 4 : i32
+// CHECK-NEXT:    %[[V5:.*]] = fir.convert %[[V4]] : (index) -> i32
+// CHECK-NEXT:    %[[V6:.*]] = fir.absent !fir.box<i1>
+// CHECK-NEXT:    %false = arith.constant false
+// CHECK-NEXT:    %[[V7:.*]] = fir.zero_bits !fir.heap<!fir.array<?xi32>>
+// CHECK-NEXT:    %c0 = arith.constant 0 : index
+// CHECK-NEXT:    %[[V8:.*]] = fir.shape %c0 : (index) -> !fir.shape<1>
+// CHECK-NEXT:    %[[V9:.*]] = fir.embox %[[V7]](%[[V8]]) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
+// CHECK-NEXT:    fir.store %[[V9]] to %[[V0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+// CHECK:         %[[V11:.*]] = fir.convert %[[V0]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<none>>
+// CHECK-NEXT:    %[[V12:.*]] = fir.convert %[[V1]]#1 : (!fir.box<!fir.array<?x?xi32>>) -> !fir.box<none>
+// CHECK:         %[[V15:.*]] = fir.convert %[[V6]] : (!fir.box<i1>) -> !fir.box<none>
+// CHECK-NEXT:    %[[V16:.*]] = fir.call @_FortranAMinlocDim(%[[V11]], %[[V12]], %c4_i32, %[[V5]], {{.*}}, {{.*}}, %[[V15]], %false) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, i32, i32, !fir.ref<i8>, i32, !fir.box<none>, i1) -> none
+// CHECK-NEXT:    %[[V17:.*]] = fir.load %[[V0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+// CHECK-NEXT:    %c0_0 = arith.constant 0 : index
+// CHECK-NEXT:    %[[V18:.*]]:3 = fir.box_dims %[[V17]], %c0_0 : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+// CHECK-NEXT:    %[[V19:.*]] = fir.box_addr %[[V17]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
+// CHECK-NEXT:    %[[V20:.*]] = fir.shape_shift %[[V18]]#0, %[[V18]]#1 : (index, index) -> !fir.shapeshift<1>
+// CHECK-NEXT:    %[[V21:.*]]:2 = hlfir.declare %[[V19]](%[[V20]]) {uniq_name = ".tmp.intrinsic_result"} : (!fir.heap<!fir.array<?xi32>>, !fir.shapeshift<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.heap<!fir.array<?xi32>>)
+// CHECK-NEXT:    %true = arith.constant true
+// CHECK-NEXT:    %[[V22:.*]] = hlfir.as_expr %[[V21]]#0 move %true : (!fir.box<!fir.array<?xi32>>, i1) -> !hlfir.expr<?xi32>
+// CHECK-NEXT:    hlfir.assign %[[V22]] to %[[V3]]#0 : !hlfir.expr<?xi32>, !fir.box<!fir.array<?xi32>>
+// CHECK-NEXT:    hlfir.destroy %[[V22]] : !hlfir.expr<?xi32>
+// CHECK-NEXT:    return
+
+
+// minloc with scalar mask
+func.func @_QPminloc3(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a"}, %arg1: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "s"}, %arg2: !fir.ref<!fir.logical<4>> {fir.bindc_name = "m"}) {
+  %0:2 = hlfir.declare %arg0 {uniq_name = "_QFminloc3Ea"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+  %1:2 = hlfir.declare %arg2 {uniq_name = "_QFminloc3Em"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+  %2:2 = hlfir.declare %arg1 {uniq_name = "_QFminloc3Es"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+  %3 = hlfir.minloc %0#0 mask %1#0 {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?xi32>>, !fir.ref<!fir.logical<4>>) -> !hlfir.expr<?xi32>
+  hlfir.assign %3 to %2#0 : !hlfir.expr<?xi32>, !fir.box<!fir.array<?xi32>>
+  hlfir.destroy %3 : !hlfir.expr<?xi32>
+  return
+}
+// CHECK-LABEL: func.func @_QPminloc3(
+// CHECK:           %[[ARG0:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a"}
+// CHECK:           %[[ARG1:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "s"}
+// CHECK:           %[[ARG2:.*]]: !fir.ref<!fir.logical<4>>
+// CHECK-NEXT:    %[[V0:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xi32>>>
+// CHECK-NEXT:    %[[V1:.*]]:2 = hlfir.declare %[[ARG0]] {uniq_name = "_QFminloc3Ea"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+// CHECK-NEXT:    %[[V2:.*]]:2 = hlfir.declare %[[ARG2]] {uniq_name = "_QFminloc3Em"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+// CHECK-NEXT:    %[[V3:.*]]:2 = hlfir.declare %[[ARG1]] {uniq_name = "_QFminloc3Es"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+// CHECK-NEXT:    %c4_i32 = arith.constant 4 : i32
+// CHECK-NEXT:    %[[V4:.*]] = fir.embox %[[V2]]#1 : (!fir.ref<!fir.logical<4>>) -> !fir.box<!fir.logical<4>>
+// CHECK-NEXT:    %false = arith.constant false
+// CHECK-NEXT:    %[[V5:.*]] = fir.zero_bits !fir.heap<!fir.array<?xi32>>
+// CHECK-NEXT:    %c0 = arith.constant 0 : index
+// CHECK-NEXT:    %[[V6:.*]] = fir.shape %c0 : (index) -> !fir.shape<1>
+// CHECK-NEXT:    %[[V7:.*]] = fir.embox %[[V5]](%[[V6]]) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
+// CHECK-NEXT:    fir.store %[[V7]] to %[[V0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+// CHECK:         %[[V9:.*]] = fir.convert %[[V0]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<none>>
+// CHECK-NEXT:    %[[V10:.*]] = fir.convert %[[V1]]#1 : (!fir.box<!fir.array<?xi32>>) -> !fir.box<none>
+// CHECK:         %[[V13:.*]] = fir.convert %[[V4]] : (!fir.box<!fir.logical<4>>) -> !fir.box<none>
+// CHECK-NEXT:    %[[V14:.*]] = fir.call @_FortranAMinlocInteger4(%[[V9]], %[[V10]], %c4_i32, {{.*}}, {{.*}}, %[[V13]], %false) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, i32, !fir.ref<i8>, i32, !fir.box<none>, i1) -> none
+// CHECK-NEXT:    %[[V15:.*]] = fir.load %[[V0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+// CHECK-NEXT:    %c0_0 = arith.constant 0 : index
+// CHECK-NEXT:    %[[V16:.*]]:3 = fir.box_dims %[[V15]], %c0_0 : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+// CHECK-NEXT:    %[[V17:.*]] = fir.box_addr %[[V15]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
+// CHECK-NEXT:    %[[V18:.*]] = fir.shape_shift %[[V16]]#0, %[[V16]]#1 : (index, index) -> !fir.shapeshift<1>
+// CHECK-NEXT:    %[[V19:.*]]:2 = hlfir.declare %[[V17]](%[[V18]]) {uniq_name = ".tmp.intrinsic_result"} : (!fir.heap<!fir.array<?xi32>>, !fir.shapeshift<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.heap<!fir.array<?xi32>>)
+// CHECK-NEXT:    %true = arith.constant true
+// CHECK-NEXT:    %[[V20:.*]] = hlfir.as_expr %[[V19]]#0 move %true : (!fir.box<!fir.array<?xi32>>, i1) -> !hlfir.expr<?xi32>
+// CHECK-NEXT:    hlfir.assign %[[V20]] to %[[V3]]#0 : !hlfir.expr<?xi32>, !fir.box<!fir.array<?xi32>>
+// CHECK-NEXT:    hlfir.destroy %[[V20]] : !hlfir.expr<?xi32>
+// CHECK-NEXT:    return
+
+
+// minloc with array mask
+func.func @_QPminloc4(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a"}, %arg1: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "s"}, %arg2: !fir.box<!fir.array<?x!fir.logical<4>>> {fir.bindc_name = "m"}) {
+  %0:2 = hlfir.declare %arg0 {uniq_name = "_QFminloc4Ea"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+  %1:2 = hlfir.declare %arg2 {uniq_name = "_QFminloc4Em"} : (!fir.box<!fir.array<?x!fir.logical<4>>>) -> (!fir.box<!fir.array<?x!fir.logical<4>>>, !fir.box<!fir.array<?x!fir.logical<4>>>)
+  %2:2 = hlfir.declare %arg1 {uniq_name = "_QFminloc4Es"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+  %3 = hlfir.minloc %0#0 mask %1#0 {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?x!fir.logical<4>>>) -> !hlfir.expr<?xi32>
+  hlfir.assign %3 to %2#0 : !hlfir.expr<?xi32>, !fir.box<!fir.array<?xi32>>
+  hlfir.destroy %3 : !hlfir.expr<?xi32>
+  return
+}
+// CHECK-LABEL: func.func @_QPminloc4(
+// CHECK:           %[[ARG0:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a"}
+// CHECK:           %[[ARG1:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "s"}
+// CHECK:           %[[ARG2:.*]]: !fir.box<!fir.array<?x!fir.logical<4>>>
+// CHECK-NEXT:    %[[V0:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xi32>>>
+// CHECK-NEXT:    %[[V1:.*]]:2 = hlfir.declare %[[ARG0]] {uniq_name = "_QFminloc4Ea"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+// CHECK-NEXT:    %[[V2:.*]]:2 = hlfir.declare %[[ARG2]] {uniq_name = "_QFminloc4Em"} : (!fir.box<!fir.array<?x!fir.logical<4>>>) -> (!fir.box<!fir.array<?x!fir.logical<4>>>, !fir.box<!fir.array<?x!fir.logical<4>>>)
+// CHECK-NEXT:    %[[V3:.*]]:2 = hlfir.declare %[[ARG1]] {uniq_name = "_QFminloc4Es"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+// CHECK-NEXT:    %c4_i32 = arith.constant 4 : i32
+// CHECK-NEXT:    %false = arith.constant false
+// CHECK-NEXT:    %[[V4:.*]] = fir.zero_bits !fir.heap<!fir.array<?xi32>>
+// CHECK-NEXT:    %c0 = arith.constant 0 : index
+// CHECK-NEXT:    %[[V5:.*]] = fir.shape %c0 : (index) -> !fir.shape<1>
+// CHECK-NEXT:    %[[V6:.*]] = fir.embox %[[V4]](%[[V5]]) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
+// CHECK-NEXT:    fir.store %[[V6]] to %[[V0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+// CHECK:         %[[V8:.*]] = fir.convert %[[V0]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<none>>
+// CHECK-NEXT:    %[[V9:.*]] = fir.convert %[[V1]]#1 : (!fir.box<!fir.array<?xi32>>) -> !fir.box<none>
+// CHECK:         %[[V12:.*]] = fir.convert %[[V2]]#1 : (!fir.box<!fir.array<?x!fir.logical<4>>>) -> !fir.box<none>
+// CHECK-NEXT:    %[[V13:.*]] = fir.call @_FortranAMinlocInteger4(%[[V8]], %[[V9]], %c4_i32, {{.*}}, {{.*}}, %[[V12]], %false) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, i32, !fir.ref<i8>, i32, !fir.box<none>, i1) -> none
+// CHECK-NEXT:    %[[V14:.*]] = fir.load %[[V0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+// CHECK-NEXT:    %c0_0 = arith.constant 0 : index
+// CHECK-NEXT:    %[[V15:.*]]:3 = fir.box_dims %[[V14]], %c0_0 : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+// CHECK-NEXT:    %[[V16:.*]] = fir.box_addr %[[V14]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
+// CHECK-NEXT:    %[[V17:.*]] = fir.shape_shift %[[V15]]#0, %[[V15]]#1 : (index, index) -> !fir.shapeshift<1>
+// CHECK-NEXT:    %[[V18:.*]]:2 = hlfir.declare %[[V16]](%[[V17]]) {uniq_name = ".tmp.intrinsic_result"} : (!fir.heap<!fir.array<?xi32>>, !fir.shapeshift<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.heap<!fir.array<?xi32>>)
+// CHECK-NEXT:    %true = arith.constant true
+// CHECK-NEXT:    %[[V19:.*]] = hlfir.as_expr %[[V18]]#0 move %true : (!fir.box<!fir.array<?xi32>>, i1) -> !hlfir.expr<?xi32>
+// CHECK-NEXT:    hlfir.assign %[[V19]] to %[[V3]]#0 : !hlfir.expr<?xi32>, !fir.box<!fir.array<?xi32>>
+// CHECK-NEXT:    hlfir.destroy %[[V19]] : !hlfir.expr<?xi32>
+// CHECK-NEXT:    return
+
+
+fir.global internal @_QFminloc5Ea : !fir.array<2x2xi32> {
+  %0 = fir.undefined !fir.array<2x2xi32>
+  %c1_i32 = arith.constant 1 : i32
+  %1 = fir.insert_value %0, %c1_i32, [0 : index, 0 : index] : (!fir.array<2x2xi32>, i32) -> !fir.array<2x2xi32>
+  %c2_i32 = arith.constant 2 : i32
+  %2 = fir.insert_value %1, %c2_i32, [1 : index, 0 : index] : (!fir.array<2x2xi32>, i32) -> !fir.array<2x2xi32>
+  %c3_i32 = arith.constant 3 : i32
+  %3 = fir.insert_value %2, %c3_i32, [0 : index, 1 : index] : (!fir.array<2x2xi32>, i32) -> !fir.array<2x2xi32>
+  %c4_i32 = arith.constant 4 : i32
+  %4 = fir.insert_value %3, %c4_i32, [1 : index, 1 : index] : (!fir.array<2x2xi32>, i32) -> !fir.array<2x2xi32>
+  %c2 = arith.constant 2 : index
+  %c2_0 = arith.constant 2 : index
+  fir.has_value %4 : !fir.array<2x2xi32>
+}
+
+// 3 argument minloc, using local variables
+func.func @_QPminloc5(%arg0: !fir.ref<!fir.array<2xi32>> {fir.bindc_name = "s"}) {
+  %0 = fir.address_of(@_QFminloc5Ea) : !fir.ref<!fir.array<2x2xi32>>
+  %c2 = arith.constant 2 : index
+  %c2_0 = arith.constant 2 : index
+  %1 = fir.shape %c2, %c2_0 : (index, index) -> !fir.shape<2>
+  %2:2 = hlfir.declare %0(%1) {uniq_name = "_QFminloc5Ea"} : (!fir.ref<!fir.array<2x2xi32>>, !fir.shape<2>) -> (!fir.ref<!fir.array<2x2xi32>>, !fir.ref<!fir.array<2x2xi32>>)
+  %c2_1 = arith.constant 2 : index
+  %3 = fir.shape %c2_1 : (index) -> !fir.shape<1>
+  %4:2 = hlfir.declare %arg0(%3) {uniq_name = "_QFminloc5Es"} : (!fir.ref<!fir.array<2xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<2xi32>>, !fir.ref<!fir.array<2xi32>>)
+  %c1_i32 = arith.constant 1 : i32
+  %true = arith.constant true
+  %5 = hlfir.minloc %2#0 dim %c1_i32 mask %true {fastmath = #arith.fastmath<contract>} : (!fir.ref<!fir.array<2x2xi32>>, i32, i1) -> !hlfir.expr<2xi32>
+  hlfir.assign %5 to %4#0 : !hlfir.expr<2xi32>, !fir.ref<!fir.array<2xi32>>
+  hlfir.destroy %5 : !hlfir.expr<2xi32>
+  return
+}
+// CHECK-LABEL: func.func @_QPminloc5(
+// CHECK:           %[[ARG0:.*]]: !fir.ref<!fir.array<2xi32>> {fir.bindc_name = "s"}
+// CHECK-NEXT:    %[[V0:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xi32>>>
+// CHECK-NEXT:    %[[V1:.*]] = fir.alloca !fir.logical<4>
+// CHECK-NEXT:    %[[V2:.*]] = fir.address_of(@_QFminloc5Ea) : !fir.ref<!fir.array<2x2xi32>>
+// CHECK-NEXT:    %c2 = arith.constant 2 : index
+// CHECK-NEXT:    %c2_0 = arith.constant 2 : index
+// CHECK-NEXT:    %[[V3:.*]] = fir.shape %c2, %c2_0 : (index, index) -> !fir.shape<2>
+// CHECK-NEXT:    %[[V4:.*]]:2 = hlfir.declare %[[V2]](%[[V3]]) {uniq_name = "_QFminloc5Ea"} : (!fir.ref<!fir.array<2x2xi32>>, !fir.shape<2>) -> (!fir.ref<!fir.array<2x2xi32>>, !fir.ref<!fir.array<2x2xi32>>)
+// CHECK-NEXT:    %c2_1 = arith.constant 2 : index
+// CHECK-NEXT:    %[[V5:.*]] = fir.shape %c2_1 : (index) -> !fir.shape<1>
+// CHECK-NEXT:    %[[V6:.*]]:2 = hlfir.declare %[[ARG0]](%[[V5]]) {uniq_name = "_QFminloc5Es"} : (!fir.ref<!fir.array<2xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<2xi32>>, !fir.ref<!fir.array<2xi32>>)
+// CHECK-NEXT:    %c1_i32 = arith.constant 1 : i32
+// CHECK-NEXT:    %true = arith.constant true
+// CHECK-NEXT:    %c4_i32 = arith.constant 4 : i32
+// CHECK-NEXT:    %[[V7:.*]] = fir.shape %c2, %c2_0 : (index, index) -> !fir.shape<2>
+// CHECK-NEXT:    %[[V8:.*]] = fir.embox %[[V4]]#1(%[[V7]]) : (!fir.ref<!fir.array<2x2xi32>>, !fir.shape<2>) -> !fir.box<!fir.array<2x2xi32>>
+// CHECK-NEXT:    %[[V9:.*]] = fir.convert %true : (i1) -> !fir.logical<4>
+// CHECK-NEXT:    fir.store %[[V9]] to %[[V1]] : !fir.ref<!fir.logical<4>>
+// CHECK-NEXT:    %[[V10:.*]] = fir.embox %[[V1]] : (!fir.ref<!fir.logical<4>>) -> !fir.box<!fir.logical<4>>
+// CHECK-NEXT:    %false = arith.constant false
+// CHECK-NEXT:    %[[V11:.*]] = fir.zero_bits !fir.heap<!fir.array<?xi32>>
+// CHECK-NEXT:    %c0 = arith.constant 0 : index
+// CHECK-NEXT:    %[[V12:.*]] = fir.shape %c0 : (index) -> !fir.shape<1>
+// CHECK-NEXT:    %[[V13:.*]] = fir.embox %[[V11]](%[[V12]]) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
+// CHECK-NEXT:    fir.store %[[V13]] to %[[V0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+// CHECK:         %[[V15:.*]] = fir.convert %[[V0]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<none>>
+// CHECK-NEXT:    %[[V16:.*]] = fir.convert %[[V8]] : (!fir.box<!fir.array<2x2xi32>>) -> !fir.box<none>
+// CHECK:         %[[V19:.*]] = fir.convert %[[V10]] : (!fir.box<!fir.logical<4>>) -> !fir.box<none>
+// CHECK-NEXT:    %[[V20:.*]] = fir.call @_FortranAMinlocDim(%[[V15]], %[[V16]], %c4_i32, %c1_i32, {{.*}}, {{.*}}, %[[V19]], %false) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, i32, i32, !fir.ref<i8>, i32, !fir.box<none>, i1) -> none
+// CHECK-NEXT:    %[[V21:.*]] = fir.load %[[V0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+// CHECK-NEXT:    %c0_2 = arith.constant 0 : index
+// CHECK-NEXT:    %[[V22:.*]]:3 = fir.box_dims %[[V21]], %c0_2 : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+// CHECK-NEXT:    %[[V23:.*]] = fir.box_addr %[[V21]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
+// CHECK-NEXT:    %[[V24:.*]] = fir.shape_shift %[[V22]]#0, %[[V22]]#1 : (index, index) -> !fir.shapeshift<1>
+// CHECK-NEXT:    %[[V25:.*]]:2 = hlfir.declare %[[V23]](%[[V24]]) {uniq_name = ".tmp.intrinsic_result"} : (!fir.heap<!fir.array<?xi32>>, !fir.shapeshift<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.heap<!fir.array<?xi32>>)
+// CHECK-NEXT:    %true_3 = arith.constant true
+// CHECK-NEXT:    %[[V26:.*]] = hlfir.as_expr %[[V25]]#0 move %true_3 : (!fir.box<!fir.array<?xi32>>, i1) -> !hlfir.expr<?xi32>
+// CHECK-NEXT:    hlfir.assign %[[V26]] to %[[V6]]#0 : !hlfir.expr<?xi32>, !fir.ref<!fir.array<2xi32>>
+// CHECK-NEXT:    hlfir.destroy %[[V26]] : !hlfir.expr<?xi32>
+// CHECK-NEXT:    return
+
+
+// simple one argument minloc for character
+func.func @_QPminloc6(%arg0: !fir.box<!fir.array<?x!fir.char<1,?>>> {fir.bindc_name = "a"}, %arg1: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "s"}) {
+  %0:2 = hlfir.declare %arg0 {uniq_name = "_QFminloc6Ea"} : (!fir.box<!fir.array<?x!fir.char<1,?>>>) -> (!fir.box<!fir.array<?x!fir.char<1,?>>>, !fir.box<!fir.array<?x!fir.char<1,?>>>)
+  %1:2 = hlfir.declare %arg1 {uniq_name = "_QFminloc4Es"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+  %2 = hlfir.minloc %0#0 {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?x!fir.char<1,?>>>) -> !hlfir.expr<?xi32>
+  hlfir.assign %2 to %1#0 : !hlfir.expr<?xi32>, !fir.box<!fir.array<?xi32>>
+  hlfir.destroy %2 : !hlfir.expr<?xi32>
+  return
+}
+// CHECK-LABEL: func.func @_QPminloc6(
+// CHECK:           %[[ARG0:.*]]: !fir.box<!fir.array<?x!fir.char<1,?>>> {fir.bindc_name = "a"}
+// CHECK:           %[[ARG1:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "s"}
+// CHECK-NEXT:    %[[V0:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xi32>>>
+// CHECK-NEXT:    %[[V1:.*]]:2 = hlfir.declare %[[ARG0]] {uniq_name = "_QFminloc6Ea"} : (!fir.box<!fir.array<?x!fir.char<1,?>>>) -> (!fir.box<!fir.array<?x!fir.char<1,?>>>, !fir.box<!fir.array<?x!fir.char<1,?>>>)
+// CHECK-NEXT:    %[[V2:.*]]:2 = hlfir.declare %[[ARG1]] {uniq_name = "_QFminloc4Es"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+// CHECK-NEXT:    %c4_i32 = arith.constant 4 : i32
+// CHECK-NEXT:    %[[V3:.*]] = fir.absent !fir.box<i1>
+// CHECK-NEXT:    %false = arith.constant false
+// CHECK-NEXT:    %[[V4:.*]] = fir.zero_bits !fir.heap<!fir.array<?xi32>>
+// CHECK-NEXT:    %c0 = arith.constant 0 : index
+// CHECK-NEXT:    %[[V5:.*]] = fir.shape %c0 : (index) -> !fir.shape<1>
+// CHECK-NEXT:    %[[V6:.*]] = fir.embox %[[V4]](%[[V5]]) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
+// CHECK-NEXT:    fir.store %[[V6]] to %[[V0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+// CHECK:         %[[V8:.*]] = fir.convert %[[V0]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<none>>
+// CHECK-NEXT:    %[[V9:.*]] = fir.convert %[[V1]]#1 : (!fir.box<!fir.array<?x!fir.char<1,?>>>) -> !fir.box<none>
+// CHECK:         %[[V12:.*]] = fir.convert %[[V3]] : (!fir.box<i1>) -> !fir.box<none>
+// CHECK-NEXT:    %[[V13:.*]] = fir.call @_FortranAMinlocCharacter(%[[V8]], %[[V9]], %c4_i32, {{.*}}, {{.*}}, %[[V12]], %false) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, i32, !fir.ref<i8>, i32, !fir.box<none>, i1) -> none
+// CHECK-NEXT:    %[[V14:.*]] = fir.load %[[V0]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+// CHECK-NEXT:    %c0_0 = arith.constant 0 : index
+// CHECK-NEXT:    %[[V15:.*]]:3 = fir.box_dims %[[V14]], %c0_0 : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
+// CHECK-NEXT:    %[[V16:.*]] = fir.box_addr %[[V14]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
+// CHECK-NEXT:    %[[V17:.*]] = fir.shape_shift %[[V15]]#0, %[[V15]]#1 : (index, index) -> !fir.shapeshift<1>
+// CHECK-NEXT:    %[[V18:.*]]:2 = hlfir.declare %[[V16]](%[[V17]]) {uniq_name = ".tmp.intrinsic_result"} : (!fir.heap<!fir.array<?xi32>>, !fir.shapeshift<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.heap<!fir.array<?xi32>>)
+// CHECK-NEXT:    %true = arith.constant true
+// CHECK-NEXT:    %[[V19:.*]] = hlfir.as_expr %[[V18]]#0 move %true : (!fir.box<!fir.array<?xi32>>, i1) -> !hlfir.expr<?xi32>
+// CHECK-NEXT:    hlfir.assign %[[V19]] to %[[V2]]#0 : !hlfir.expr<?xi32>, !fir.box<!fir.array<?xi32>>
+// CHECK-NEXT:    hlfir.destroy %[[V19]] : !hlfir.expr<?xi32>
+// CHECK-NEXT:    return
+
+
+// including mask and back
+func.func @_QPminloc7(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a"}, %arg1: !fir.ref<i32> {fir.bindc_name = "d"}, %arg2: !fir.box<!fir.array<?x!fir.logical<4>>> {fir.bindc_name = "m"}, %arg3: !fir.ref<!fir.logical<4>> {fir.bindc_name = "b"}, %arg4: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "s"}) {
+    %0:2 = hlfir.declare %arg0 {uniq_name = "_QFFtestEa"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+    %1:2 = hlfir.declare %arg3 {uniq_name = "_QFFtestEb"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+    %2:2 = hlfir.declare %arg1 {uniq_name = "_QFFtestEd"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+    %3:2 = hlfir.declare %arg2 {uniq_name = "_QFFtestEm"} : (!fir.box<!fir.array<?x!fir.logical<4>>>) -> (!fir.box<!fir.array<?x!fir.logical<4>>>, !fir.box<!fir.array<?x!fir.logical<4>>>)
+    %4:2 = hlfir.declare %arg4 {uniq_name = "_QFFtestEs"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+    %5 = fir.load %2#0 : !fir.ref<i32>
+    %6 = hlfir.minloc %0#0 dim %5 mask %3#0 {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?xi32>>, i32, !fir.box<!fir.array<?x!fir.logical<4>>>) -> i32
+    hlfir.assign %6 to %4#0 : i32, !fir.box<!fir.array<?xi32>>
+    return
+}
+// CHECK-LABEL: func.func @_QPminloc7(
+// CHECK:           %[[ARG0:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a"}
+// CHECK:           %[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "d"}
+// CHECK:           %[[ARG2:.*]]: !fir.box<!fir.array<?x!fir.logical<4>>> {fir.bindc_name = "m"}
+// CHECK:           %[[ARG3:.*]]: !fir.ref<!fir.logical<4>> {fir.bindc_name = "b"}
+// CHECK:           %[[ARG4:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "s"}
+// CHECK-NEXT:    %[[V0:.*]] = fir.alloca !fir.box<!fir.heap<i32>>
+// CHECK-NEXT:    %[[V1:.*]]:2 = hlfir.declare %[[ARG0]] {uniq_name = "_QFFtestEa"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+// CHECK-NEXT:    %[[V2:.*]]:2 = hlfir.declare %[[ARG3]] {uniq_name = "_QFFtestEb"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
+// CHECK-NEXT:    %[[V3:.*]]:2 = hlfir.declare %[[ARG1]] {uniq_name = "_QFFtestEd"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK-NEXT:    %[[V4:.*]]:2 = hlfir.declare %[[ARG2]] {uniq_name = "_QFFtestEm"} : (!fir.box<!fir.array<?x!fir.logical<4>>>) -> (!fir.box<!fir.array<?x!fir.logical<4>>>, !fir.box<!fir.array<?x!fir.logical<4>>>)
+// CHECK-NEXT:    %[[V5:.*]]:2 = hlfir.declare %[[ARG4]] {uniq_name = "_QFFtestEs"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+// CHECK-NEXT:    %[[V6:.*]] = fir.load %[[V3]]#0 : !fir.ref<i32>
+// CHECK-NEXT:    %c4_i32 = arith.constant 4 : i32
+// CHECK-NEXT:    %false = arith.constant false
+// CHECK-NEXT:    %[[V7:.*]] = fir.zero_bits !fir.heap<i32>
+// CHECK-NEXT:    %[[V8:.*]] = fir.embox %[[V7]] : (!fir.heap<i32>) -> !fir.box<!fir.heap<i32>>
+// CHECK-NEXT:    fir.store %[[V8]] to %[[V0]] : !fir.ref<!fir.box<!fir.heap<i32>>>
+// CHECK:         %[[V10:.*]] = fir.convert %[[V0]] : (!fir.ref<!fir.box<!fir.heap<i32>>>) -> !fir.ref<!fir.box<none>>
+// CHECK-NEXT:    %[[V11:.*]] = fir.convert %[[V1]]#1 : (!fir.box<!fir.array<?xi32>>) -> !fir.box<none>
+// CHECK:         %[[V14:.*]] = fir.convert %[[V4]]#1 : (!fir.box<!fir.array<?x!fir.logical<4>>>) -> !fir.box<none>
+// CHECK-NEXT:    %[[V15:.*]] = fir.call @_FortranAMinlocDim(%[[V10]], %[[V11]], %c4_i32, %[[V6]], {{.*}}, {{.*}}, %[[V14]], %false) fastmath<contract> : (!fir.ref<!fir.box<none>>, !fir.box<none>, i32, i32, !fir.ref<i8>, i32, !fir.box<none>, i1) -> none
+// CHECK-NEXT:    %[[V16:.*]] = fir.load %[[V0]] : !fir.ref<!fir.box<!fir.heap<i32>>>
+// CHECK-NEXT:    %[[V17:.*]] = fir.box_addr %[[V16]] : (!fir.box<!fir.heap<i32>>) -> !fir.heap<i32>
+// CHECK-NEXT:    %[[V18:.*]] = fir.load %[[V17]] : !fir.heap<i32>
+// CHECK-NEXT:    fir.freemem %[[V17]] : !fir.heap<i32>
+// CHECK-NEXT:    hlfir.assign %[[V18]] to %[[V5]]#0 : i32, !fir.box<!fir.array<?xi32>>
+// CHECK-NEXT:    return
+
diff --git a/flang/test/HLFIR/minloc.fir b/flang/test/HLFIR/minloc.fir
new file mode 100644
index 00000000000000..9afb45f5bc198c
--- /dev/null
+++ b/flang/test/HLFIR/minloc.fir
@@ -0,0 +1,272 @@
+// Test hlfir.minloc operation parse, verify (no errors), and unparse
+
+// RUN: fir-opt %s | fir-opt | FileCheck %s
+
+// array is an expression of known shape
+func.func @minloc0(%arg0: !hlfir.expr<42xi32>) {
+  %mask = fir.alloca !fir.logical<4>
+  %c_1 = arith.constant 1 : index
+  %true = arith.constant true
+  %true_logical = fir.convert %true : (i1) -> !fir.logical<4>
+  fir.store %true_logical to %mask : !fir.ref<!fir.logical<4>>
+  %mask_box = fir.embox %mask : (!fir.ref<!fir.logical<4>>) -> !fir.box<!fir.logical<4>>
+  %0 = hlfir.minloc %arg0 dim %c_1 mask %mask_box : (!hlfir.expr<42xi32>, index, !fir.box<!fir.logical<4>>) -> i32
+  return
+}
+// CHECK:      func.func @minloc0(%[[ARRAY:.*]]: !hlfir.expr<42xi32>) {
+// CHECK-NEXT:   %[[MASK:.*]] = fir.alloca !fir.logical<4>
+// CHECK-NEXT:   %[[C1:.*]] = arith.constant 1 : index
+// CHECK-NEXT:   %[[TRUE:.*]] = arith.constant true
+// CHECK-NEXT:   %[[LOGICAL:.*]] = fir.convert %[[TRUE]] : (i1) -> !fir.logical<4>
+// CHECK-NEXT:   fir.store %[[LOGICAL]] to %[[MASK]] : !fir.ref<!fir.logical<4>>
+// CHECK-NEXT:   %[[BOX:.*]] = fir.embox %0 : (!fir.ref<!fir.logical<4>>) -> !fir.box<!fir.logical<4>>
+// CHECK-NEXT:   hlfir.minloc %[[ARRAY]] dim %[[C1]] mask %[[BOX]] : (!hlfir.expr<42xi32>, index, !fir.box<!fir.logical<4>>) -> i32
+// CHECK-NEXT:   return
+// CHECK-NEXT: }
+
+// array is an expression of assumed shape
+func.func @minloc1(%arg0: !hlfir.expr<?xi32>) {
+  %mask = fir.alloca !fir.logical<4>
+  %c_1 = arith.constant 1 : index
+  %true = arith.constant true
+  %true_logical = fir.convert %true : (i1) -> !fir.logical<4>
+  fir.store %true_logical to %mask : !fir.ref<!fir.logical<4>>
+  %mask_box = fir.embox %mask : (!fir.ref<!fir.logical<4>>) -> !fir.box<!fir.logical<4>>
+  %0 = hlfir.minloc %arg0 dim %c_1 mask %mask_box : (!hlfir.expr<?xi32>, index, !fir.box<!fir.logical<4>>) -> i32
+  return
+}
+// CHECK:      func.func @minloc1(%[[ARRAY:.*]]: !hlfir.expr<?xi32>) {
+// CHECK-NEXT:   %[[MASK:.*]] = fir.alloca !fir.logical<4>
+// CHECK-NEXT:   %[[C1:.*]] = arith.constant 1 : index
+// CHECK-NEXT:   %[[TRUE:.*]] = arith.constant true
+// CHECK-NEXT:   %[[LOGICAL:.*]] = fir.convert %[[TRUE]] : (i1) -> !fir.logical<4>
+// CHECK-NEXT:   fir.store %[[LOGICAL:.*]] to %[[MASK:.*]] : !fir.ref<!fir.logical<4>>
+// CHECK-NEXT:   %[[BOX:.*]] = fir.embox %[[MASK:.*]] : (!fir.ref<!fir.logical<4>>) -> !fir.box<!fir.logical<4>>
+// CHECK-NEXT:   hlfir.minloc %[[ARRAY:.*]] dim %[[C1]] mask %[[BOX]] : (!hlfir.expr<?xi32>, index, !fir.box<!fir.logical<4>>) -> i32
+// CHECK-NEXT:   return
+// CHECK-NEXT: }
+
+// boxed array
+func.func @minloc2(%arg0: !fir.box<!fir.array<42xi32>>) {
+  %mask = fir.alloca !fir.logical<4>
+  %c_1 = arith.constant 1 : index
+  %true = arith.constant true
+  %true_logical = fir.convert %true : (i1) -> !fir.logical<4>
+  fir.store %true_logical to %mask : !fir.ref<!fir.logical<4>>
+  %mask_box = fir.embox %mask : (!fir.ref<!fir.logical<4>>) -> !fir.box<!fir.logical<4>>
+  %0 = hlfir.minloc %arg0 dim %c_1 mask %mask_box : (!fir.box<!fir.array<42xi32>>, index, !fir.box<!fir.logical<4>>) -> i32
+  return
+}
+// CHECK:      func.func @minloc2(%[[ARRAY:.*]]: !fir.box<!fir.array<42xi32>>) {
+// CHECK-NEXT:   %[[MASK:.*]] = fir.alloca !fir.logical<4>
+// CHECK-NEXT:   %[[C1:.*]] = arith.constant 1 : index
+// CHECK-NEXT:   %[[TRUE:.*]] = arith.constant true
+// CHECK-NEXT:   %[[LOGICAL:.*]] = fir.convert %[[TRUE]] : (i1) -> !fir.logical<4>
+// CHECK-NEXT:   fir.store %[[LOGICAL:.*]] to %[[MASK:.*]] : !fir.ref<!fir.logical<4>>
+// CHECK-NEXT:   %[[BOX:.*]] = fir.embox %[[MASK:.*]] : (!fir.ref<!fir.logical<4>>) -> !fir.box<!fir.logical<4>>
+// CHECK-NEXT:   hlfir.minloc %[[ARRAY:.*]] dim %[[C1]] mask %[[BOX]] : (!fir.box<!fir.array<42xi32>>, index, !fir.box<!fir.logical<4>>) -> i32
+// CHECK-NEXT:   return
+// CHECK-NEXT: }
+
+// assumed shape boxed array
+func.func @minloc3(%arg0: !fir.box<!fir.array<?xi32>>) {
+  %mask = fir.alloca !fir.logical<4>
+  %c_1 = arith.constant 1 : index
+  %true = arith.constant true
+  %true_logical = fir.convert %true : (i1) -> !fir.logical<4>
+  fir.store %true_logical to %mask : !fir.ref<!fir.logical<4>>
+  %mask_box = fir.embox %mask : (!fir.ref<!fir.logical<4>>) -> !fir.box<!fir.logical<4>>
+  %0 = hlfir.minloc %arg0 dim %c_1 mask %mask_box : (!fir.box<!fir.array<?xi32>>, index, !fir.box<!fir.logical<4>>) -> i32
+  return
+}
+// CHECK:      func.func @minloc3(%[[ARRAY:.*]]: !fir.box<!fir.array<?xi32>>) {
+// CHECK-NEXT:   %[[MASK:.*]] = fir.alloca !fir.logical<4>
+// CHECK-NEXT:   %[[C1:.*]] = arith.constant 1 : index
+// CHECK-NEXT:   %[[TRUE:.*]] = arith.constant true
+// CHECK-NEXT:   %[[LOGICAL:.*]] = fir.convert %[[TRUE]] : (i1) -> !fir.logical<4>
+// CHECK-NEXT:   fir.store %[[LOGICAL:.*]] to %[[MASK:.*]] : !fir.ref<!fir.logical<4>>
+// CHECK-NEXT:   %[[BOX:.*]] = fir.embox %[[MASK:.*]] : (!fir.ref<!fir.logical<4>>) -> !fir.box<!fir.logical<4>>
+// CHECK-NEXT:   hlfir.minloc %[[ARRAY:.*]] dim %[[C1]] mask %[[BOX]] : (!fir.box<!fir.array<?xi32>>, index, !fir.box<!fir.logical<4>>) -> i32
+// CHECK-NEXT:   return
+// CHECK-NEXT: }
+
+// known shape expr mask
+func.func @minloc4(%arg0: !fir.box<!fir.array<?xi32>>, %arg1: !hlfir.expr<42x!fir.logical<4>>) {
+  %c_1 = arith.constant 1 : index
+  %0 = hlfir.minloc %arg0 dim %c_1 mask %arg1 : (!fir.box<!fir.array<?xi32>>, index, !hlfir.expr<42x!fir.logical<4>>) -> i32
+  return
+}
+// CHECK:      func.func @minloc4(%[[ARRAY:.*]]: !fir.box<!fir.array<?xi32>>, %[[MASK:.*]]: !hlfir.expr<42x!fir.logical<4>>) {
+// CHECK-NEXT:   %[[C1:.*]] = arith.constant 1 : index
+// CHECK-NEXT:   hlfir.minloc %[[ARRAY]] dim %[[C1]] mask %[[MASK]] : (!fir.box<!fir.array<?xi32>>, index, !hlfir.expr<42x!fir.logical<4>>) -> i32
+// CHECK-NEXT:   return
+// CHECK-NEXT: }
+
+// assumed shape expr mask
+func.func @minloc5(%arg0: !fir.box<!fir.array<?xi32>>, %arg1: !hlfir.expr<?x!fir.logical<4>>) {
+  %c_1 = arith.constant 1 : index
+  %0 = hlfir.minloc %arg0 dim %c_1 mask %arg1 : (!fir.box<!fir.array<?xi32>>, index, !hlfir.expr<?x!fir.logical<4>>) -> i32
+  return
+}
+// CHECK:      func.func @minloc5(%[[ARRAY:.*]]: !fir.box<!fir.array<?xi32>>, %[[MASK:.*]]: !hlfir.expr<?x!fir.logical<4>>) {
+// CHECK-NEXT:   %[[C1:.*]] = arith.constant 1 : index
+// CHECK-NEXT:   hlfir.minloc %[[ARRAY]] dim %[[C1]] mask %[[MASK]] : (!fir.box<!fir.array<?xi32>>, index, !hlfir.expr<?x!fir.logical<4>>) -> i32
+// CHECK-NEXT:   return
+// CHECK-NEXT: }
+
+// known shape array mask
+func.func @minloc6(%arg0: !fir.box<!fir.array<?xi32>>, %arg1: !fir.box<!fir.array<42x!fir.logical<4>>>) {
+  %c_1 = arith.constant 1 : index
+  %0 = hlfir.minloc %arg0 dim %c_1 mask %arg1 : (!fir.box<!fir.array<?xi32>>, index, !fir.box<!fir.array<42x!fir.logical<4>>>) -> i32
+  return
+}
+// CHECK:      func.func @minloc6(%[[ARRAY:.*]]: !fir.box<!fir.array<?xi32>>, %[[MASK:.*]]: !fir.box<!fir.array<42x!fir.logical<4>>>) {
+// CHECK-NEXT:   %[[C1:.*]] = arith.constant 1 : index
+// CHECK-NEXT:   hlfir.minloc %[[ARRAY]] dim %[[C1]] mask %[[MASK]] : (!fir.box<!fir.array<?xi32>>, index, !fir.box<!fir.array<42x!fir.logical<4>>>) -> i32
+// CHECK-NEXT:   return
+// CHECK-NEXT: }
+
+// assumed shape array mask
+func.func @minloc7(%arg0: !fir.box<!fir.array<?xi32>>, %arg1: !fir.box<!fir.array<?x!fir.logical<4>>>) {
+  %c_1 = arith.constant 1 : index
+  %0 = hlfir.minloc %arg0 dim %c_1 mask %arg1 : (!fir.box<!fir.array<?xi32>>, index, !fir.box<!fir.array<?x!fir.logical<4>>>) -> i32
+  return
+}
+// CHECK:      func.func @minloc7(%[[ARRAY:.*]]: !fir.box<!fir.array<?xi32>>, %[[MASK:.*]]: !fir.box<!fir.array<?x!fir.logical<4>>>) {
+// CHECK-NEXT:   %[[C1:.*]] = arith.constant 1 : index
+// CHECK-NEXT:   hlfir.minloc %[[ARRAY]] dim %[[C1]] mask %[[MASK]] : (!fir.box<!fir.array<?xi32>>, index, !fir.box<!fir.array<?x!fir.logical<4>>>) -> i32
+// CHECK-NEXT:   return
+// CHECK-NEXT: }
+
+// known shape expr return
+func.func @minloc8(%arg0: !fir.box<!fir.array<2x2xi32>>, %arg1: i32) {
+  %mask = fir.alloca !fir.logical<4>
+  %true = arith.constant true
+  %true_logical = fir.convert %true : (i1) -> !fir.logical<4>
+  fir.store %true_logical to %mask : !fir.ref<!fir.logical<4>>
+  %mask_box = fir.embox %mask : (!fir.ref<!fir.logical<4>>) -> !fir.box<!fir.logical<4>>
+  %0 = hlfir.minloc %arg0 dim %arg1 mask %mask_box : (!fir.box<!fir.array<2x2xi32>>, i32, !fir.box<!fir.logical<4>>) -> !hlfir.expr<2xi32>
+  return
+}
+// CHECK:      func.func @minloc8(%[[ARRAY:.*]]: !fir.box<!fir.array<2x2xi32>>, %[[DIM:.*]]: i32) {
+// CHECK-NEXT:   %[[MASK:.*]] = fir.alloca !fir.logical<4>
+// CHECK-NEXT:   %[[TRUE:.*]] = arith.constant true
+// CHECK-NEXT:   %[[LOGICAL:.*]] = fir.convert %[[TRUE]] : (i1) -> !fir.logical<4>
+// CHECK-NEXT:   fir.store %[[LOGICAL]] to %[[MASK]] : !fir.ref<!fir.logical<4>>
+// CHECK-NEXT:   %[[BOX:.*]] = fir.embox %0 : (!fir.ref<!fir.logical<4>>) -> !fir.box<!fir.logical<4>>
+// CHECK-NEXT:   hlfir.minloc %[[ARRAY]] dim %[[DIM]] mask %[[BOX]] : (!fir.box<!fir.array<2x2xi32>>, i32, !fir.box<!fir.logical<4>>) -> !hlfir.expr<2xi32>
+// CHECK-NEXT:   return
+// CHECK-NEXT: }
+
+// assumed shape expr return
+func.func @minloc9(%arg0: !fir.box<!fir.array<?x?xi32>>, %arg1: i32) {
+  %mask = fir.alloca !fir.logical<4>
+  %true = arith.constant true
+  %true_logical = fir.convert %true : (i1) -> !fir.logical<4>
+  fir.store %true_logical to %mask : !fir.ref<!fir.logical<4>>
+  %mask_box = fir.embox %mask : (!fir.ref<!fir.logical<4>>) -> !fir.box<!fir.logical<4>>
+  %0 = hlfir.minloc %arg0 dim %arg1 mask %mask_box : (!fir.box<!fir.array<?x?xi32>>, i32, !fir.box<!fir.logical<4>>) -> !hlfir.expr<?xi32>
+  return
+}
+// CHECK:      func.func @minloc9(%[[ARRAY:.*]]: !fir.box<!fir.array<?x?xi32>>, %[[DIM:.*]]: i32) {
+// CHECK-NEXT:   %[[MASK:.*]] = fir.alloca !fir.logical<4>
+// CHECK-NEXT:   %[[TRUE:.*]] = arith.constant true
+// CHECK-NEXT:   %[[LOGICAL:.*]] = fir.convert %[[TRUE]] : (i1) -> !fir.logical<4>
+// CHECK-NEXT:   fir.store %[[LOGICAL]] to %[[MASK]] : !fir.ref<!fir.logical<4>>
+// CHECK-NEXT:   %[[BOX:.*]] = fir.embox %0 : (!fir.ref<!fir.logical<4>>) -> !fir.box<!fir.logical<4>>
+// CHECK-NEXT:   hlfir.minloc %[[ARRAY]] dim %[[DIM]] mask %[[BOX]] : (!fir.box<!fir.array<?x?xi32>>, i32, !fir.box<!fir.logical<4>>) -> !hlfir.expr<?xi32>
+// CHECK-NEXT:   return
+// CHECK-NEXT: }
+
+// hlfir.minloc with only an array argument
+func.func @minloc10(%arg0: !fir.box<!fir.array<?x?xi32>>) {
+  %minloc = hlfir.minloc %arg0 : (!fir.box<!fir.array<?x?xi32>>) -> !hlfir.expr<1xi32>
+  return
+}
+// CHECK:      func.func @minloc10(%[[ARRAY:.*]]: !fir.box<!fir.array<?x?xi32>>
+// CHECK-NEXT:   %[[minloc:.*]] = hlfir.minloc %[[ARRAY]] : (!fir.box<!fir.array<?x?xi32>>) -> !hlfir.expr<1xi32>
+// CHECK-NEXT:   return
+// CHECK-NEXT: }
+
+// hlfir.minloc with only a character array argument
+func.func @minloc11(%arg0: !fir.box<!fir.array<?x?x!fir.char<1,?>>>) {
+  %minloc = hlfir.minloc %arg0 : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>) -> !hlfir.expr<1xi32>
+  return
+}
+// CHECK:      func.func @minloc11(%[[ARRAY:.*]]: !fir.box<!fir.array<?x?x!fir.char<1,?>>>
+// CHECK-NEXT:   %[[minloc:.*]] = hlfir.minloc %[[ARRAY]] : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>) -> !hlfir.expr<1xi32>
+// CHECK-NEXT:   return
+// CHECK-NEXT: }
+
+// hlfir.minloc with array and dim argument
+func.func @minloc12(%arg0: !fir.box<!fir.array<?x?xi32>>, %arg1: i32) {
+  %minloc = hlfir.minloc %arg0 dim %arg1 : (!fir.box<!fir.array<?x?xi32>>, i32) -> !hlfir.expr<?xi32>
+  return
+}
+// CHECK:      func.func @minloc12(%[[ARRAY:.*]]: !fir.box<!fir.array<?x?xi32>>, %[[DIM:.*]]: i32
+// CHECK-NEXT:   %[[minloc:.*]] = hlfir.minloc %[[ARRAY]] dim %[[DIM]] : (!fir.box<!fir.array<?x?xi32>>, i32) -> !hlfir.expr<?xi32>
+// CHECK-NEXT:   return
+// CHECK-NEXT: }
+
+// hlfir.minloc with array and mask argument
+func.func @minloc13(%arg0: !fir.box<!fir.array<?xi32>>, %arg1: !fir.logical<4>) {
+  %minloc = hlfir.minloc %arg0 mask %arg1 : (!fir.box<!fir.array<?xi32>>, !fir.logical<4>) -> !hlfir.expr<1xi32>
+  return
+}
+// CHECK:      func.func @minloc13(%[[ARRAY:.*]]: !fir.box<!fir.array<?xi32>>, %[[MASK:.*]]: !fir.logical<4>
+// CHECK-NEXT:   %[[minloc:.*]] = hlfir.minloc %[[ARRAY]] mask %[[MASK]] : (!fir.box<!fir.array<?xi32>>, !fir.logical<4>) -> !hlfir.expr<1xi32>
+// CHECK-NEXT:   return
+// CHECK-NEXT: }
+
+// hlfir.minloc with dim argument with an unusual type
+func.func @minloc14(%arg0: !fir.box<!fir.array<?x?xi32>>, %arg1: index) {
+  %minloc = hlfir.minloc %arg0 dim %arg1 : (!fir.box<!fir.array<?x?xi32>>, index) -> !hlfir.expr<?xi32>
+  return
+}
+// CHECK:      func.func @minloc14(%[[ARRAY:.*]]: !fir.box<!fir.array<?x?xi32>>, %[[DIM:.*]]: index
+// CHECK-NEXT:   %[[minloc:.*]] = hlfir.minloc %[[ARRAY]] dim %[[DIM]] : (!fir.box<!fir.array<?x?xi32>>, index) -> !hlfir.expr<?xi32>
+// CHECK-NEXT:   return
+// CHECK-NEXT: }
+
+// hlfir.minloc with mask argument of unusual type
+func.func @minloc15(%arg0: !fir.box<!fir.array<?xi32>>, %arg1: i1) {
+  %minloc = hlfir.minloc %arg0 mask %arg1 : (!fir.box<!fir.array<?xi32>>, i1) -> !hlfir.expr<1xi32>
+  return
+}
+// CHECK:      func.func @minloc15(%[[ARRAY:.*]]: !fir.box<!fir.array<?xi32>>, %[[MASK:.*]]: i1
+// CHECK-NEXT:   %[[minloc:.*]] = hlfir.minloc %[[ARRAY]] mask %[[MASK]] : (!fir.box<!fir.array<?xi32>>, i1) -> !hlfir.expr<1xi32>
+// CHECK-NEXT:   return
+// CHECK-NEXT: }
+
+// hlfir.minloc with mask argument of ref<array<>> type
+func.func @minloc16(%arg0: !fir.box<!fir.array<?xi32>>, %arg1: !fir.ref<!fir.array<?x!fir.logical<4>>>) {
+  %minloc = hlfir.minloc %arg0 mask %arg1 : (!fir.box<!fir.array<?xi32>>, !fir.ref<!fir.array<?x!fir.logical<4>>>) -> !hlfir.expr<1xi32>
+  return
+}
+// CHECK:      func.func @minloc16(%[[ARRAY:.*]]: !fir.box<!fir.array<?xi32>>, %[[MASK:.*]]: !fir.ref<!fir.array<?x!fir.logical<4>>>
+// CHECK-NEXT:   %[[minloc:.*]] = hlfir.minloc %[[ARRAY]] mask %[[MASK]] : (!fir.box<!fir.array<?xi32>>, !fir.ref<!fir.array<?x!fir.logical<4>>>) -> !hlfir.expr<1xi32>
+// CHECK-NEXT:   return
+// CHECK-NEXT: }
+
+
+// hlfir.minloc with kind implied by the return type
+func.func @minloc17(%arg0: !fir.box<!fir.array<?xi32>>, %arg1: i1) {
+  %minloc = hlfir.minloc %arg0 mask %arg1 : (!fir.box<!fir.array<?xi32>>, i1) -> !hlfir.expr<1xi16>
+  return
+}
+// CHECK:      func.func @minloc17(%[[ARRAY:.*]]: !fir.box<!fir.array<?xi32>>, %[[MASK:.*]]: i1
+// CHECK-NEXT:   %[[minloc:.*]] = hlfir.minloc %[[ARRAY]] mask %[[MASK]] : (!fir.box<!fir.array<?xi32>>, i1) -> !hlfir.expr<1xi16>
+// CHECK-NEXT:   return
+// CHECK-NEXT: }
+
+// hlfir.minloc with back argument
+func.func @minloc18(%arg0: !fir.box<!fir.array<?xi32>>, %arg1: i1) {
+  %true = arith.constant true
+  %minloc = hlfir.minloc %arg0 mask %arg1 back %true : (!fir.box<!fir.array<?xi32>>, i1, i1) -> !hlfir.expr<1xi32>
+  return
+}
+// CHECK:      func.func @minloc18(%[[ARRAY:.*]]: !fir.box<!fir.array<?xi32>>, %[[MASK:.*]]: i1
+// CHECK-NEXT:   %[[C2:.*]] = arith.constant true
+// CHECK-NEXT:   %[[minloc:.*]] = hlfir.minloc %[[ARRAY]] mask %[[MASK]] back %[[C2]] : (!fir.box<!fir.array<?xi32>>, i1, i1) -> !hlfir.expr<1xi32>
+// CHECK-NEXT:   return
+// CHECK-NEXT: }
\ No newline at end of file
diff --git a/flang/test/Lower/HLFIR/minloc.f90 b/flang/test/Lower/HLFIR/minloc.f90
new file mode 100644
index 00000000000000..c27430689ee020
--- /dev/null
+++ b/flang/test/Lower/HLFIR/minloc.f90
@@ -0,0 +1,370 @@
+! Test lowering of MINLOC intrinsic to HLFIR
+! RUN: bbc -emit-hlfir -o - %s 2>&1 | FileCheck %s
+
+! simple 1 argument MINLOC
+subroutine minloc1(a, s)
+  integer :: a(:), s(:)
+  s = MINLOC(a)
+end subroutine
+! CHECK-LABEL: func.func @_QPminloc1(
+! CHECK:           %[[ARG0:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a"}, %[[ARG1:.*]]: !fir.box<!fir.array<?xi32>>
+! CHECK-DAG:     %[[ARRAY:.*]]:2 = hlfir.declare %[[ARG0]]
+! CHECK-DAG:     %[[OUT:.*]]:2 = hlfir.declare %[[ARG1]]
+! CHECK-NEXT:    %[[EXPR:.*]] = hlfir.minloc %[[ARRAY]]#0 {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?xi32>>) -> !hlfir.expr<1xi32>
+! CHECK-NEXT:    hlfir.assign %[[EXPR]] to %[[OUT]]#0 : !hlfir.expr<1xi32>, !fir.box<!fir.array<?xi32>>
+! CHECK-NEXT:    hlfir.destroy %[[EXPR]] : !hlfir.expr<1xi32>
+! CHECK-NEXT:    return
+! CHECK-NEXT:  }
+
+! minloc with by-ref DIM argument
+subroutine minloc2(a, s, d)
+  integer :: a(:,:), s(:), d
+  s = MINLOC(a, d)
+end subroutine
+! CHECK-LABEL: func.func @_QPminloc2(
+! CHECK:           %[[ARG0:.*]]: !fir.box<!fir.array<?x?xi32>> {fir.bindc_name = "a"}, %[[ARG1:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "s"}, %[[ARG2:.*]]: !fir.ref<i32>
+! CHECK-DAG:     %[[ARRAY:.*]]:2 = hlfir.declare %[[ARG0]]
+! CHECK-DAG:     %[[OUT:.*]]:2 = hlfir.declare %[[ARG1]]
+! CHECK-DAG:     %[[DIM_REF:.*]]:2 = hlfir.declare %[[ARG2]]
+! CHECK-NEXT:    %[[DIM:.*]] = fir.load %[[DIM_REF]]#0 : !fir.ref<i32>
+! CHECK-NEXT:    %[[EXPR:.*]] = hlfir.minloc %[[ARRAY]]#0 dim %[[DIM]] {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?x?xi32>>, i32) -> !hlfir.expr<?xi32>
+! CHECK-NEXT:    hlfir.assign %[[EXPR]] to %[[OUT]]#0 : !hlfir.expr<?xi32>, !fir.box<!fir.array<?xi32>>
+! CHECK-NEXT:    hlfir.destroy %[[EXPR]]
+! CHECK-NEXT:    return
+! CHECK-NEXT:  }
+
+! minloc with scalar mask argument
+subroutine minloc3(a, s, m)
+  integer :: a(:), s(:)
+  logical :: m
+  s = MINLOC(a, m)
+end subroutine
+! CHECK-LABEL: func.func @_QPminloc3(
+! CHECK:           %[[ARG0:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a"}, %[[ARG1:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "s"}, %[[ARG2:.*]]: !fir.ref<!fir.logical<4>>
+! CHECK-DAG:     %[[ARRAY:.*]]:2 = hlfir.declare %[[ARG0]]
+! CHECK-DAG:     %[[OUT:.*]]:2 = hlfir.declare %[[ARG1]]
+! CHECK-DAG:     %[[MASK:.*]]:2 = hlfir.declare %[[ARG2]]
+! CHECK-NEXT:    %[[EXPR:.*]] = hlfir.minloc %[[ARRAY]]#0 mask %[[MASK]]#0 {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?xi32>>, !fir.ref<!fir.logical<4>>) -> !hlfir.expr<1xi32>
+! CHECK-NEXT:    hlfir.assign %[[EXPR]] to %[[OUT]]#0 : !hlfir.expr<1xi32>, !fir.box<!fir.array<?xi32>>
+! CHECK-NEXT:    hlfir.destroy %[[EXPR]] : !hlfir.expr<1xi32>
+! CHECK-NEXT:    return
+! CHECK-NEXT:  }
+
+! minloc with array mask argument
+subroutine minloc4(a, s, m)
+  integer :: a(:), s(:)
+  logical :: m(:)
+  s = MINLOC(a, m)
+end subroutine
+! CHECK-LABEL: func.func @_QPminloc4(
+! CHECK:           %[[ARG0:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a"}, %[[ARG1:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "s"}, %[[ARG2:.*]]: !fir.box<!fir.array<?x!fir.logical<4>>>
+! CHECK-DAG:     %[[ARRAY:.*]]:2 = hlfir.declare %[[ARG0]]
+! CHECK-DAG:     %[[OUT:.*]]:2 = hlfir.declare %[[ARG1]]
+! CHECK-DAG:     %[[MASK:.*]]:2 = hlfir.declare %[[ARG2]]
+! CHECK-NEXT:    %[[EXPR:.*]] = hlfir.minloc %[[ARRAY]]#0 mask %[[MASK]]#0 {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?x!fir.logical<4>>>) -> !hlfir.expr<1xi32>
+! CHECK-NEXT:    hlfir.assign %[[EXPR]] to %[[OUT]]#0 : !hlfir.expr<1xi32>, !fir.box<!fir.array<?xi32>>
+! CHECK-NEXT:    hlfir.destroy %[[EXPR]] : !hlfir.expr<1xi32>
+! CHECK-NEXT:    return
+! CHECK-NEXT:  }
+
+! minloc with all 3 arguments, dim is by-val, array isn't boxed
+subroutine minloc5(s)
+  integer :: s(2)
+  integer :: a(2,2) = reshape((/1, 2, 3, 4/), [2,2])
+  s = minloc(a, 1, .true.)
+end subroutine
+! CHECK-LABEL: func.func @_QPminloc5
+! CHECK:           %[[ARG0:.*]]: !fir.ref<!fir.array<2xi32>>
+! CHECK-DAG:     %[[ADDR:.*]] = fir.address_of({{.*}}) : !fir.ref<!fir.array<2x2xi32>>
+! CHECK-DAG:     %[[ARRAY_SHAPE:.*]] = fir.shape {{.*}} -> !fir.shape<2>
+! CHECK-DAG:     %[[ARRAY:.*]]:2 = hlfir.declare %[[ADDR]](%[[ARRAY_SHAPE]])
+! CHECK-DAG:     %[[OUT_SHAPE:.*]] = fir.shape {{.*}} -> !fir.shape<1>
+! CHECK-DAG:     %[[OUT:.*]]:2 = hlfir.declare %[[ARG0]](%[[OUT_SHAPE]])
+! CHECK-DAG:     %[[TRUE:.*]] = arith.constant true
+! CHECK-DAG:     %[[C1:.*]] = arith.constant 1 : i32
+! CHECK-NEXT:    %[[EXPR:.*]] = hlfir.minloc %[[ARRAY]]#0 dim %[[C1]] mask %[[TRUE]] {fastmath = #arith.fastmath<contract>} : (!fir.ref<!fir.array<2x2xi32>>, i32, i1) -> !hlfir.expr<2xi32>
+! CHECK-NEXT:    hlfir.assign %[[EXPR]] to %[[OUT]]#0 : !hlfir.expr<2xi32>, !fir.ref<!fir.array<2xi32>>
+! CHECK-NEXT:    hlfir.destroy %[[EXPR]] : !hlfir.expr<2xi32>
+! CHECK-NEXT:    return
+! CHECK-nEXT:  }
+
+! back argument as .true.
+subroutine minloc_back(a, s)
+  integer :: a(:), s(:)
+  s = MINLOC(a, BACK=.TRUE.)
+end subroutine
+! CHECK-LABEL: func.func @_QPminloc_back(
+! CHECK:           %[[ARG0:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a"}, %[[ARG1:.*]]: !fir.box<!fir.array<?xi32>>
+! CHECK-DAG:     %[[ARRAY:.*]]:2 = hlfir.declare %[[ARG0]]
+! CHECK-DAG:     %[[OUT:.*]]:2 = hlfir.declare %[[ARG1]]
+! CHECK-DAG:     %[[C1:.*]] = arith.constant true
+! CHECK-NEXT:    %[[EXPR:.*]] = hlfir.minloc %[[ARRAY]]#0 back %[[C1]] {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?xi32>>, i1) -> !hlfir.expr<1xi32>
+! CHECK-NEXT:    hlfir.assign %[[EXPR]] to %[[OUT]]#0 : !hlfir.expr<1xi32>, !fir.box<!fir.array<?xi32>>
+! CHECK-NEXT:    hlfir.destroy %[[EXPR]] : !hlfir.expr<1xi32>
+! CHECK-NEXT:    return
+! CHECK-NEXT:  }
+
+! back argument as logical
+subroutine minloc_back2(a, s, b)
+  integer :: a(:), s(:)
+  logical :: b
+  s = MINLOC(a, BACK=b)
+end subroutine
+! CHECK-LABEL: func.func @_QPminloc_back2(
+! CHECK:           %[[ARG0:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a"}, %[[ARG1:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "s"}, %[[ARG2:.*]]: !fir.ref<!fir.logical<4>>
+! CHECK-DAG:     %[[ARRAY:.*]]:2 = hlfir.declare %[[ARG0]]
+! CHECK-DAG:     %[[BACKD:.*]]:2 = hlfir.declare %[[ARG2]]
+! CHECK-DAG:     %[[OUT:.*]]:2 = hlfir.declare %[[ARG1]]
+! CHECK-NEXT:    %[[BACK:.*]] = fir.load %[[BACKD]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK-NEXT:    %[[EXPR:.*]] = hlfir.minloc %[[ARRAY]]#0 back %[[BACK]] {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?xi32>>, !fir.logical<4>) -> !hlfir.expr<1xi32>
+! CHECK-NEXT:    hlfir.assign %[[EXPR]] to %[[OUT]]#0 : !hlfir.expr<1xi32>, !fir.box<!fir.array<?xi32>>
+! CHECK-NEXT:    hlfir.destroy %[[EXPR]] : !hlfir.expr<1xi32>
+! CHECK-NEXT:    return
+! CHECK-NEXT:  }
+
+! back argument as optional logical
+subroutine minloc_back3(a, s, b)
+  integer :: a(:), s(:)
+  logical, optional :: b
+  s = MINLOC(a, BACK=b)
+end subroutine
+! CHECK-LABEL: func.func @_QPminloc_back3(
+! CHECK:           %[[ARG0:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a"}, %[[ARG1:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "s"}, %[[ARG2:.*]]: !fir.ref<!fir.logical<4>> {fir.bindc_name = "b", fir.optional}) {
+! CHECK:        %[[ARRAY:.*]]:2 = hlfir.declare %[[ARG0]]
+! CHECK-NEXT:   %[[BACKD:.*]]:2 = hlfir.declare %[[ARG2]]
+! CHECK-NEXT:   %[[OUT:.*]]:2 = hlfir.declare %[[ARG1]]
+! CHECK-NEXT:   %[[IFP:.*]] = fir.is_present %[[BACKD]]#0 : (!fir.ref<!fir.logical<4>>) -> i1
+! CHECK-NEXT:   %[[BACK:.*]] = fir.if %[[IFP]] -> (!fir.logical<4>) {
+! CHECK-NEXT:     %[[IFT:.*]] = fir.load %[[BACKD]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK-NEXT:     fir.result %[[IFT]] : !fir.logical<4>
+! CHECK-NEXT:   } else {
+! CHECK-NEXT:     %false = arith.constant false
+! CHECK-NEXT:     %[[IFE:.*]] = fir.convert %false : (i1) -> !fir.logical<4>
+! CHECK-NEXT:     fir.result %[[IFE]] : !fir.logical<4>
+! CHECK-NEXT:   }
+! CHECK-NEXT:   %[[EXPR:.*]] = hlfir.minloc %[[ARRAY]]#0 back %[[BACK]] {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?xi32>>, !fir.logical<4>) -> !hlfir.expr<1xi32>
+! CHECK-NEXT:   hlfir.assign %[[EXPR]] to %[[OUT]]#0 : !hlfir.expr<1xi32>, !fir.box<!fir.array<?xi32>>
+! CHECK-NEXT:   hlfir.destroy %[[EXPR]] : !hlfir.expr<1xi32>
+! CHECK-NEXT:   return
+! CHECK-NEXT: }
+
+
+! kind = 2
+subroutine minloc_kind(a, s)
+  integer :: a(:), s(:)
+  s = MINLOC(a, KIND=2)
+end subroutine
+! CHECK-LABEL: func.func @_QPminloc_kind(
+! CHECK:           %[[ARG0:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a"}, %[[ARG1:.*]]: !fir.box<!fir.array<?xi32>>
+! CHECK-DAG:     %[[ARRAY:.*]]:2 = hlfir.declare %[[ARG0]]
+! CHECK-DAG:     %[[OUT:.*]]:2 = hlfir.declare %[[ARG1]]
+! CHECK:         %[[EXPR:.*]] = hlfir.minloc %[[ARRAY]]#0 {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?xi32>>) -> !hlfir.expr<1xi16>
+! CHECK:         %[[ELM:.*]] = hlfir.elemental
+! CHECK:         hlfir.assign %[[ELM]] to %[[OUT]]#0 : !hlfir.expr<?xi32>, !fir.box<!fir.array<?xi32>>
+! CHECK-NEXT:    hlfir.destroy %[[ELM]] : !hlfir.expr<?xi32>
+! CHECK-NEXT:    hlfir.destroy %[[EXPR]] : !hlfir.expr<1xi16>
+! CHECK-NEXT:    return
+! CHECK-NEXT:  }
+
+subroutine minloc6(a, s, d)
+  integer, pointer :: d
+  integer s(:)
+  real :: a(:,:)
+  s = minloc(a, (d))
+end subroutine
+! CHECK-LABEL: func.func @_QPminloc6(
+! CHECK:           %[[ARG0:.*]]: !fir.box<!fir.array<?x?xf32>>
+! CHECK:           %[[ARG1:.*]]: !fir.box<!fir.array<?xi32>>
+! CHECK:           %[[ARG2:.*]]: !fir.ref<!fir.box<!fir.ptr<i32>>>
+! CHECK-DAG:     %[[ARRAY:.*]]:2 = hlfir.declare %[[ARG0]]
+! CHECK-DAG:     %[[OUT:.*]]:2 = hlfir.declare %[[ARG1]]
+! CHECK-DAG:     %[[DIM_VAR:.*]]:2 = hlfir.declare %[[ARG2]]
+! CHECK-NEXT:     %[[DIM_BOX:.*]] = fir.load %[[DIM_VAR]]#0 : !fir.ref<!fir.box<!fir.ptr<i32>>>
+! CHECK-NEXT:    %[[DIM_ADDR:.*]] = fir.box_addr %[[DIM_BOX]] : (!fir.box<!fir.ptr<i32>>) -> !fir.ptr<i32>
+! CHECK-NEXT:    %[[DIM0:.*]] = fir.load %[[DIM_ADDR]] : !fir.ptr<i32>
+! CHECK-NEXT:    %[[DIM1:.*]] = hlfir.no_reassoc %[[DIM0]] : i32
+! CHECK-NEXT:    %[[EXPR:.*]] = hlfir.minloc %[[ARRAY]]#0 dim %[[DIM1]] {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?x?xf32>>, i32) -> !hlfir.expr<?xi32>
+! CHECK-NEXT:    hlfir.assign %[[EXPR]] to %[[OUT]]#0 : !hlfir.expr<?xi32>, !fir.box<!fir.array<?xi32>>
+! CHECK-NEXT:    hlfir.destroy %[[EXPR]]
+! CHECK-NEXT:    return
+! CHECK-NEXT:  }
+
+! simple 1 argument MINLOC for character
+subroutine minloc7(a, s)
+  character(*) :: a(:)
+  integer :: s(:)
+  s = MINLOC(a)
+end subroutine
+! CHECK-LABEL: func.func @_QPminloc7(
+! CHECK:           %[[ARG0:.*]]: !fir.box<!fir.array<?x!fir.char<1,?>>> {fir.bindc_name = "a"}, %[[ARG1:.*]]: !fir.box<!fir.array<?xi32>>
+! CHECK-DAG:     %[[ARRAY:.*]]:2 = hlfir.declare %[[ARG0]]
+! CHECK-DAG:     %[[OUT:.*]]:2 = hlfir.declare %[[ARG1]]
+! CHECK-NEXT:    %[[EXPR:.*]] = hlfir.minloc %[[ARRAY]]#0 {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?x!fir.char<1,?>>>) -> !hlfir.expr<1xi32>
+! CHECK-NEXT:    hlfir.assign %[[EXPR]] to %[[OUT]]#0 : !hlfir.expr<1xi32>, !fir.box<!fir.array<?xi32>>
+! CHECK-NEXT:    hlfir.destroy %[[EXPR]]
+! CHECK-NEXT:    return
+! CHECK-NEXT:  }
+
+! minloc for character with by-ref DIM argument
+subroutine minloc8(a, s, d)
+  character(*) :: a(:,:)
+  integer :: d, s(:)
+  s = MINLOC(a, d)
+end subroutine
+! CHECK-LABEL: func.func @_QPminloc8(
+! CHECK:           %[[ARG0:.*]]: !fir.box<!fir.array<?x?x!fir.char<1,?>>> {fir.bindc_name = "a"}, %[[ARG1:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "s"}, %[[ARG2:.*]]: !fir.ref<i32>
+! CHECK-DAG:     %[[ARRAY:.*]]:2 = hlfir.declare %[[ARG0]]
+! CHECK-DAG:     %[[OUT:.*]]:2 = hlfir.declare %[[ARG1]]
+! CHECK-DAG:     %[[DIM_REF:.*]]:2 = hlfir.declare %[[ARG2]]
+! CHECK-NEXT:    %[[DIM:.*]] = fir.load %[[DIM_REF]]#0 : !fir.ref<i32>
+! CHECK-NEXT:    %[[EXPR:.*]] = hlfir.minloc %[[ARRAY]]#0 dim %[[DIM]] {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?x?x!fir.char<1,?>>>, i32) -> !hlfir.expr<?xi32>
+! CHECK-NEXT:    hlfir.assign %[[EXPR]] to %[[OUT]]#0 : !hlfir.expr<?xi32>, !fir.box<!fir.array<?xi32>>
+! CHECK-NEXT:    hlfir.destroy %[[EXPR]]
+! CHECK-NEXT:    return
+! CHECK-NEXT:  }
+
+! minloc for character with scalar mask argument
+subroutine minloc9(a, s, m)
+  character(*) :: a(:)
+  integer :: s(:)
+  logical :: m
+  s = MINLOC(a, m)
+end subroutine
+! CHECK-LABEL: func.func @_QPminloc9(
+! CHECK:           %[[ARG0:.*]]: !fir.box<!fir.array<?x!fir.char<1,?>>> {fir.bindc_name = "a"}, %[[ARG1:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "s"}, %[[ARG2:.*]]: !fir.ref<!fir.logical<4>>
+! CHECK-DAG:     %[[ARRAY:.*]]:2 = hlfir.declare %[[ARG0]]
+! CHECK-DAG:     %[[OUT:.*]]:2 = hlfir.declare %[[ARG1]]
+! CHECK-DAG:     %[[MASK:.*]]:2 = hlfir.declare %[[ARG2]]
+! CHECK-NEXT:    %[[EXPR:.*]] = hlfir.minloc %[[ARRAY]]#0 mask %[[MASK]]#0 {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?x!fir.char<1,?>>>, !fir.ref<!fir.logical<4>>) -> !hlfir.expr<1xi32>
+! CHECK-NEXT:    hlfir.assign %[[EXPR]] to %[[OUT]]#0 : !hlfir.expr<1xi32>, !fir.box<!fir.array<?xi32>>
+! CHECK-NEXT:    hlfir.destroy %[[EXPR]]
+! CHECK-NEXT:    return
+! CHECK-NEXT:  }
+
+subroutine testDynamicallyOptionalMask(array, mask, res)
+  integer :: array(:), res(:)
+  logical, allocatable :: mask(:)
+  res = MINLOC(array, mask=mask)
+end subroutine
+! CHECK-LABEL: func.func @_QPtestdynamicallyoptionalmask(
+! CHECK-SAME:      %[[ARG0:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}
+! CHECK-SAME:      %[[ARG1:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.array<?x!fir.logical<4>>>>>
+! CHECK-SAME:      %[[ARG2:.*]]: !fir.box<!fir.array<?xi32>>
+! CHECK-DAG:     %[[ARRAY:.*]]:2 = hlfir.declare %[[ARG0]]
+! CHECK-DAG:     %[[MASK:.*]]:2 = hlfir.declare %[[ARG1]]
+! CHECK-DAG:     %[[RES:.*]]:2 = hlfir.declare %[[ARG2]]
+! CHECK-NEXT:    %[[MASK_LOAD:.*]] = fir.load %[[MASK]]#1
+! CHECK-NEXT:    %[[MASK_ADDR:.*]] = fir.box_addr %[[MASK_LOAD]]
+! CHECK-NEXT:    %[[MASK_ADDR_INT:.*]] = fir.convert %[[MASK_ADDR]]
+! CHECK-NEXT:    %[[C0:.*]] = arith.constant 0 : i64
+! CHECK-NEXT:    %[[CMP:.*]] = arith.cmpi ne, %[[MASK_ADDR_INT]], %[[C0]] : i64
+! it is a shame there is a second load here. The first is generated for
+! PreparedActualArgument::isPresent, the second is for optional handling
+! CHECK-NEXT:    %[[MASK_LOAD2:.*]] = fir.load %[[MASK]]#1
+! CHECK-NEXT:    %[[ABSENT:.*]] = fir.absent !fir.box<!fir.heap<!fir.array<?x!fir.logical<4>>>>
+! CHECK-NEXT:    %[[SELECT:.*]] = arith.select %[[CMP]], %[[MASK_LOAD2]], %[[ABSENT]]
+! CHECK-NEXT:    %[[MINLOC:.*]] = hlfir.minloc %[[ARRAY]]#0 mask %[[SELECT]]
+! CHECK-NEXT:    hlfir.assign %[[MINLOC]] to %[[RES]]#0
+! CHECK-NEXT:    hlfir.destroy %[[MINLOC]]
+! CHECK-NEXT:    return
+! CHECK-NEXT:  }
+
+subroutine testAllocatableArray(array, mask, res)
+  integer, allocatable :: array(:)
+  integer :: res(:)
+  logical :: mask(:)
+  res = MINLOC(array, mask=mask)
+end subroutine
+! CHECK-LABEL: func.func @_QPtestallocatablearray(
+! CHECK-SAME:      %[[ARG0:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK-SAME:      %[[ARG1:.*]]: !fir.box<!fir.array<?x!fir.logical<4>>>
+! CHECK-SAME:      %[[ARG2:.*]]: !fir.box<!fir.array<?xi32>>
+! CHECK-DAG:     %[[ARRAY:.*]]:2 = hlfir.declare %[[ARG0]]
+! CHECK-DAG:     %[[MASK:.*]]:2 = hlfir.declare %[[ARG1]]
+! CHECK-DAG:     %[[RES:.*]]:2 = hlfir.declare %[[ARG2]]
+! CHECK-NEXT:    %[[LOADED_ARRAY:.*]] = fir.load %[[ARRAY]]#0
+! CHECK-NEXT:    %[[MINLOC:.*]] = hlfir.minloc %[[LOADED_ARRAY]] mask %[[MASK]]#0
+! CHECK-NEXT:    hlfir.assign %[[MINLOC]] to %[[RES]]#0
+! CHECK-NEXT:    hlfir.destroy %[[MINLOC]]
+! CHECK-NEXT:    return
+! CHECK-NEXT:  }
+
+function testOptionalScalar(array, mask)
+  integer :: array(:)
+  logical, optional :: mask
+  integer :: testOptionalScalar(1)
+  testOptionalScalar = minloc(array, mask)
+end function
+! CHECK-LABEL:   func.func @_QPtestoptionalscalar(
+! CHECK-SAME:                                     %[[ARRAY_ARG:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"},
+! CHECK-SAME:                                     %[[MASK_ARG:.*]]: !fir.ref<!fir.logical<4>> {fir.bindc_name = "mask", fir.optional}) -> !fir.array<1xi32>
+! CHECK:           %[[ARRAY_VAR:.*]]:2 = hlfir.declare %[[ARRAY_ARG]]
+! CHECK:           %[[MASK_VAR:.*]]:2 = hlfir.declare %[[MASK_ARG]]
+! CHECK:           %[[RET_ALLOC:.*]] = fir.alloca !fir.array<1xi32> {bindc_name = "testoptionalscalar", uniq_name = "_QFtestoptionalscalarEtestoptionalscalar"}
+! CHECK:           %[[RET_VAR:.*]]:2 = hlfir.declare %[[RET_ALLOC]]
+! CHECK:           %[[MASK_IS_PRESENT:.*]] = fir.is_present %[[MASK_VAR]]#0 : (!fir.ref<!fir.logical<4>>) -> i1
+! CHECK:           %[[MASK_BOX:.*]] = fir.embox %[[MASK_VAR]]#1
+! CHECK:           %[[ABSENT:.*]] = fir.absent !fir.box<!fir.logical<4>>
+! CHECK:           %[[MASK_SELECT:.*]] = arith.select %[[MASK_IS_PRESENT]], %[[MASK_BOX]], %[[ABSENT]]
+! CHECK:           %[[RES:.*]] = hlfir.minloc %[[ARRAY_VAR]]#0 mask %[[MASK_SELECT]] {{.*}}: (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.logical<4>>) -> !hlfir.expr<1xi32>
+! CHECK:           hlfir.assign %[[RES]] to %[[RET_VAR]]#0
+! CHECK:           hlfir.destroy %[[RES]]
+! CHECK:           %[[RET:.*]] = fir.load %[[RET_VAR]]#1 : !fir.ref<!fir.array<1xi32>>
+! CHECK:           return %[[RET]] : !fir.array<1xi32>
+! CHECK:         }
+
+! Test that hlfir.minloc lowering inherits constant
+! character length from the argument, when the length
+! is unknown from the Fortran::evaluate expression type.
+subroutine test_unknown_char_len_result
+  character(len=3) :: array(3,3)
+  integer :: res(2)
+  res = minloc(array(:,:)(:))
+end subroutine test_unknown_char_len_result
+! CHECK-LABEL:   func.func @_QPtest_unknown_char_len_result() {
+! CHECK-DAG:       %[[C3:.*]] = arith.constant 3 : index
+! CHECK-DAG:       %[[C3_0:.*]] = arith.constant 3 : index
+! CHECK-DAG:       %[[C3_1:.*]] = arith.constant 3 : index
+! CHECK-DAG:       %[[ARRAY_ALLOC:.*]] = fir.alloca !fir.array<3x3x!fir.char<1,3>>
+! CHECK-DAG:       %[[ARRAY_SHAPE:.*]] = fir.shape %[[C3_0]], %[[C3_1]] : (index, index) -> !fir.shape<2>
+! CHECK-DAG:       %[[ARRAY:.*]]:2 = hlfir.declare %[[ARRAY_ALLOC]](%[[ARRAY_SHAPE]]) typeparams %[[C3]]
+! CHECK-DAG:       %[[C2:.*]] = arith.constant 2 : index
+! CHECK-DAG:       %[[RES_ALLOC:.*]] = fir.alloca !fir.array<2xi32>
+! CHECK-DAG:       %[[RES_SHAPE:.*]] = fir.shape %[[C2]] : (index) -> !fir.shape<1>
+! CHECK-DAG:       %[[RES:.*]]:2 = hlfir.declare %[[RES_ALLOC]](%[[RES_SHAPE]])
+! CHECK-DAG:       %[[C1:.*]] = arith.constant 1 : index
+! CHECK-DAG:       %[[C1_3:.*]] = arith.constant 1 : index
+! CHECK-DAG:       %[[C3_4:.*]] = arith.constant 3 : index
+! CHECK-DAG:       %[[C1_5:.*]] = arith.constant 1 : index
+! CHECK-DAG:       %[[C3_6:.*]] = arith.constant 3 : index
+! CHECK-DAG:       %[[SHAPE:.*]] = fir.shape %[[C3_4]], %[[C3_6]] : (index, index) -> !fir.shape<2>
+! CHECK-DAG:       %[[C1_7:.*]] = arith.constant 1 : index
+! CHECK-DAG:       %[[C3_8:.*]] = arith.constant 3 : index
+! CHECK-DAG:       %[[C3_9:.*]] = arith.constant 3 : index
+! CHECK-DAG:       %[[ARRAY_BOX:.*]] = hlfir.designate %[[ARRAY]]#0 (%[[C1]]:%[[C3_0]]:%[[C1_3]], %[[C1]]:%[[C3_1]]:%[[C1_5]]) substr %[[C1_7]], %[[C3_8]]  shape %[[SHAPE]] typeparams %[[C3_9]]
+! CHECK:           %[[EXPR:.*]] = hlfir.minloc %[[ARRAY_BOX]] {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<3x3x!fir.char<1,3>>>) -> !hlfir.expr<2xi32>
+! CHECK-NEXT:      hlfir.assign %[[EXPR]] to %[[RES]]#0 : !hlfir.expr<2xi32>, !fir.ref<!fir.array<2xi32>>
+! CHECK-NEXT:      hlfir.destroy %[[EXPR]]
+! CHECK-NEXT:      return
+! CHECK-NEXT:    }
+
+
+subroutine scalar_dim1(a, d, m, b, s)
+  integer :: a(:), d
+  integer :: s(:)
+  logical :: m(:), b
+  s = MINLOC(a, dim=d, mask=m, kind=2, back=b)
+end subroutine
+! CHECK-LABEL:  func.func @_QPscalar_dim1(
+! CHECK:            %[[ARG0:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "a"}, %[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "d"}, %[[ARG2:.*]]: !fir.box<!fir.array<?x!fir.logical<4>>> {fir.bindc_name = "m"}, %[[ARG3:.*]]: !fir.ref<!fir.logical<4>> {fir.bindc_name = "b"}, %[[ARG4:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "s"}) {
+! CHECK-NEXT:    %[[V0:.*]]:2 = hlfir.declare %[[ARG0]]
+! CHECK-NEXT:    %[[V1:.*]]:2 = hlfir.declare %[[ARG3]]
+! CHECK-NEXT:    %[[V2:.*]]:2 = hlfir.declare %[[ARG1]]
+! CHECK-NEXT:    %[[V3:.*]]:2 = hlfir.declare %[[ARG2]]
+! CHECK-NEXT:    %[[V4:.*]]:2 = hlfir.declare %[[ARG4]]
+! CHECK-NEXT:    %[[V5:.*]] = fir.load %[[V1]]#0 : !fir.ref<!fir.logical<4>>
+! CHECK-NEXT:    %[[V6:.*]] = fir.load %[[V2]]#0 : !fir.ref<i32>
+! CHECK-NEXT:    %[[V7:.*]] = hlfir.minloc %[[V0]]#0 dim %[[V6]] mask %[[V3]]#0 back %[[V5]] {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?xi32>>, i32, !fir.box<!fir.array<?x!fir.logical<4>>>, !fir.logical<4>) -> i16
+! CHECK-NEXT:    %[[V8:.*]] = fir.convert %[[V7]] : (i16) -> i32
+! CHECK-NEXT:    hlfir.assign %[[V8]] to %[[V4]]#0 : i32, !fir.box<!fir.array<?xi32>>
+! CHECK-NEXT:    return
diff --git a/flang/test/Lower/HLFIR/transformational.f90 b/flang/test/Lower/HLFIR/transformational.f90
index 22dfb420712569..5f113727733665 100644
--- a/flang/test/Lower/HLFIR/transformational.f90
+++ b/flang/test/Lower/HLFIR/transformational.f90
@@ -7,11 +7,7 @@ subroutine test_transformational_implemented_with_runtime_allocation(x)
   real :: x(10, 10)
   ! MINLOC result is allocated inside the runtime and returned in
   ! a descriptor that was passed by reference to the runtime.
-  ! Lowering does the following:
-  !  - declares the temp created by the runtime as an hlfir variable.
-  !  - "moves" this variable to an hlfir.expr
-  !  - associate the expression to takes_array_arg dummy argument
-  !  - destroys the expression after the call.
+  ! Lowering goes via a hlfir.minloc intrinsic.
 
   ! After bufferization, this will allow the buffer created by the
   ! runtime to be passed to takes_array_arg without creating any
@@ -19,17 +15,11 @@ subroutine test_transformational_implemented_with_runtime_allocation(x)
   call takes_array_arg(minloc(x))
 end subroutine
 ! CHECK-LABEL: func.func @_QPtest_transformational_implemented_with_runtime_allocation(
-! CHECK-SAME:                                                                          %[[VAL_0:.*]]: !fir.ref<!fir.array<10x10xf32>> {fir.bindc_name = "x"}) {
-! CHECK:  %[[VAL_1:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xi32>>>
-! CHECK:  %[[VAL_17:.*]] = fir.convert %[[VAL_1]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<none>>
-! CHECK:  %[[VAL_22:.*]] = fir.call @_FortranAMinlocReal4(%[[VAL_17]], {{.*}}
-! CHECK:  %[[VAL_23:.*]] = fir.load %[[VAL_1]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-! CHECK:  %[[VAL_26:.*]] = fir.box_addr %[[VAL_23]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
-! CHECK:  %[[VAL_28:.*]]:2 = hlfir.declare %[[VAL_26]](%{{.*}}) {uniq_name = ".tmp.intrinsic_result"} : (!fir.heap<!fir.array<?xi32>>, !fir.shapeshift<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.heap<!fir.array<?xi32>>)
-! CHECK:  %[[VAL_29:.*]] = arith.constant true
-! CHECK:  %[[VAL_30:.*]] = hlfir.as_expr %[[VAL_28]]#0 move %[[VAL_29]] : (!fir.box<!fir.array<?xi32>>, i1) -> !hlfir.expr<?xi32>
-! CHECK:  %[[VAL_32:.*]]:3 = hlfir.associate %[[VAL_30]](%{{.*}}) {adapt.valuebyref} : (!hlfir.expr<?xi32>, !fir.shape<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.ref<!fir.array<?xi32>>, i1)
-! CHECK:  %[[VAL_33:.*]] = fir.convert %[[VAL_32]]#1 : (!fir.ref<!fir.array<?xi32>>) -> !fir.ref<!fir.array<2xi32>>
-! CHECK:  fir.call @_QPtakes_array_arg(%[[VAL_33]])
-! CHECK:  hlfir.end_associate %[[VAL_32]]#1, %[[VAL_32]]#2 : !fir.ref<!fir.array<?xi32>>, i1
-! CHECK:  hlfir.destroy %[[VAL_30]] : !hlfir.expr<?xi32>
+! CHECK-SAME:                                                                          %[[ARG0:.*]]: !fir.ref<!fir.array<10x10xf32>> {fir.bindc_name = "x"}) {
+! CHECK:  %[[VAL_1:.*]]:2 = hlfir.declare %[[ARG0]](%{{.*}}) {uniq_name = "_QFtest_transformational_implemented_with_runtime_allocationEx"}
+! CHECK:  %[[VAL_2:.*]] = hlfir.minloc %[[VAL_1]]#0
+! CHECK:  %[[VAL_3:.*]] = hlfir.shape_of %[[VAL_2]]
+! CHECK:  %[[VAL_4:.*]]:3 = hlfir.associate %[[VAL_2]](%[[VAL_3]]) {adapt.valuebyref}
+! CHECK:  fir.call @_QPtakes_array_arg(%[[VAL_4]]#1)
+! CHECK:  hlfir.end_associate %[[VAL_4]]#1, %[[VAL_4]]#2 : !fir.ref<!fir.array<2xi32>>, i1
+! CHECK:  hlfir.destroy %[[VAL_2]] : !hlfir.expr<2xi32>

>From bbdb55b9b0937e75c83bee4309267a231befd7b1 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Fri, 8 Dec 2023 11:19:02 +0000
Subject: [PATCH 2/2] [Flang] Minloc elemental intrinsic lowering

Currently the lowering of a minloc intrinsic with a mask will look something like
  %e = hlfir.elemental %shape ({
    ...
  })
  %m = hlfir.minloc %array mask %e
  hlfir.assign %m to %result
  hlfir.destroy %m
The elemental will be expanded into a temporary+loop, the minloc into a
FortranAMinloc call (which hopefully gets simplified to a specialized call that
can be inlined at the call site), and the assign might get expanded to a
FortranAAssign. The assign we could inline too, but it would be better to
generate the entire construct as single loop if we can - one that performs the
minloc calculation with the mask elemental computed inline and assigns directly
to the output array.

This patch attempt to do that, adding a hlfir version of the expansion code
from SimpliftIntrinsics that turns an assign+minloc+elemental into a single
combined loop nest. It attempts to reuse the methods in genMinlocReductionLoop
for constructing the loop with a modified loop body. The declaration for the
function is curently in Optimizer/Support/Utils.h, but there might be a better
place for it.

It is currently added as port of the OptimizedBufferizationPass. I originally
had it as part of the SimplifyHLFIRIntrinsics pass, but there were already some
methods doing similar things in OptimizedBufferization. It just needs to happen
before the elementals are expanded. I think I would like to do a similar thing
for maxloc and any/all/count too if this looks OK. I will rebase over #74436
once that goes in.
---
 flang/include/flang/Optimizer/Support/Utils.h |  16 +
 .../Transforms/OptimizedBufferization.cpp     | 370 +++++++++++++-----
 .../Transforms/SimplifyIntrinsics.cpp         | 188 +++++----
 flang/test/HLFIR/minloc-elemental.fir         | 327 ++++++++++++++++
 flang/test/Transforms/simplifyintrinsics.fir  |   5 +-
 5 files changed, 720 insertions(+), 186 deletions(-)
 create mode 100644 flang/test/HLFIR/minloc-elemental.fir

diff --git a/flang/include/flang/Optimizer/Support/Utils.h b/flang/include/flang/Optimizer/Support/Utils.h
index 34c8e79173bcd4..93caa8b23d320c 100644
--- a/flang/include/flang/Optimizer/Support/Utils.h
+++ b/flang/include/flang/Optimizer/Support/Utils.h
@@ -133,6 +133,22 @@ inline void intrinsicTypeTODO(fir::FirOpBuilder &builder, mlir::Type type,
            fir::numericMlirTypeToFortran(builder, type, loc, intrinsicName) +
            " in " + intrinsicName);
 }
+
+using MinlocBodyOpGeneratorTy = llvm::function_ref<mlir::Value(
+    fir::FirOpBuilder &, mlir::Location, const mlir::Type &, mlir::Value,
+    mlir::Value, mlir::Value, const llvm::SmallVectorImpl<mlir::Value> &)>;
+using InitValGeneratorTy = llvm::function_ref<mlir::Value(
+    fir::FirOpBuilder &, mlir::Location, const mlir::Type &)>;
+
+// Produces a loop nest for a Minloc intrinsic.
+void genMinlocReductionLoop(fir::FirOpBuilder &builder, mlir::Value array,
+                            InitValGeneratorTy initVal,
+                            MinlocBodyOpGeneratorTy genBody, unsigned rank,
+                            mlir::Type elementType, mlir::Location loc,
+                            mlir::Type maskElemType, mlir::Value resultArr,
+                            bool maskMayBeLogicalScalar);
+
+
 } // namespace fir
 
 #endif // FORTRAN_OPTIMIZER_SUPPORT_UTILS_H
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
index 7abfa20493c736..218ddd2a6a7b7e 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
@@ -20,6 +20,7 @@
 #include "flang/Optimizer/HLFIR/HLFIRDialect.h"
 #include "flang/Optimizer/HLFIR/HLFIROps.h"
 #include "flang/Optimizer/HLFIR/Passes.h"
+#include "flang/Optimizer/Support/Utils.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "mlir/IR/Dominance.h"
 #include "mlir/IR/PatternMatch.h"
@@ -98,7 +99,8 @@ class ElementalAssignBufferization
 /// the same block. If any operations with unknown effects are found,
 /// std::nullopt is returned
 static std::optional<mlir::SmallVector<mlir::MemoryEffects::EffectInstance>>
-getEffectsBetween(mlir::Operation *start, mlir::Operation *end) {
+getEffectsBetween(mlir::Operation *start, mlir::Operation *end,
+                  mlir::Operation *ignoring) {
   mlir::SmallVector<mlir::MemoryEffects::EffectInstance> ret;
   if (start == end)
     return ret;
@@ -108,6 +110,10 @@ getEffectsBetween(mlir::Operation *start, mlir::Operation *end) {
 
   mlir::Operation *nextOp = start;
   while (nextOp && nextOp != end) {
+    if (nextOp == ignoring) {
+      nextOp = nextOp->getNextNode();
+      continue;
+    }
     std::optional<mlir::SmallVector<mlir::MemoryEffects::EffectInstance>>
         effects = mlir::getEffectsRecursively(nextOp);
     if (!effects)
@@ -293,80 +299,10 @@ static bool areIdenticalOrDisjointSlices(mlir::Value ref1, mlir::Value ref2) {
   return false;
 }
 
-std::optional<ElementalAssignBufferization::MatchInfo>
-ElementalAssignBufferization::findMatch(hlfir::ElementalOp elemental) {
-  mlir::Operation::user_range users = elemental->getUsers();
-  // the only uses of the elemental should be the assignment and the destroy
-  if (std::distance(users.begin(), users.end()) != 2) {
-    LLVM_DEBUG(llvm::dbgs() << "Too many uses of the elemental\n");
-    return std::nullopt;
-  }
-
-  // If the ElementalOp must produce a temporary (e.g. for
-  // finalization purposes), then we cannot inline it.
-  if (hlfir::elementalOpMustProduceTemp(elemental)) {
-    LLVM_DEBUG(llvm::dbgs() << "ElementalOp must produce a temp\n");
-    return std::nullopt;
-  }
-
-  MatchInfo match;
-  for (mlir::Operation *user : users)
-    mlir::TypeSwitch<mlir::Operation *, void>(user)
-        .Case([&](hlfir::AssignOp op) { match.assign = op; })
-        .Case([&](hlfir::DestroyOp op) { match.destroy = op; });
-
-  if (!match.assign || !match.destroy) {
-    LLVM_DEBUG(llvm::dbgs() << "Couldn't find assign or destroy\n");
-    return std::nullopt;
-  }
-
-  // the array is what the elemental is assigned into
-  // TODO: this could be extended to also allow hlfir.expr by first bufferizing
-  // the incoming expression
-  match.array = match.assign.getLhs();
-  mlir::Type arrayType = mlir::dyn_cast<fir::SequenceType>(
-      fir::unwrapPassByRefType(match.array.getType()));
-  if (!arrayType)
-    return std::nullopt;
-
-  // require that the array elements are trivial
-  // TODO: this is just to make the pass easier to think about. Not an inherent
-  // limitation
-  mlir::Type eleTy = hlfir::getFortranElementType(arrayType);
-  if (!fir::isa_trivial(eleTy))
-    return std::nullopt;
-
-  // the array must have the same shape as the elemental. CSE should have
-  // deduplicated the fir.shape operations where they are provably the same
-  // so we just have to check for the same ssa value
-  // TODO: add more ways of getting the shape of the array
-  mlir::Value arrayShape;
-  if (match.array.getDefiningOp())
-    arrayShape =
-        mlir::TypeSwitch<mlir::Operation *, mlir::Value>(
-            match.array.getDefiningOp())
-            .Case([](hlfir::DesignateOp designate) {
-              return designate.getShape();
-            })
-            .Case([](hlfir::DeclareOp declare) { return declare.getShape(); })
-            .Default([](mlir::Operation *) { return mlir::Value{}; });
-  if (!arrayShape) {
-    LLVM_DEBUG(llvm::dbgs() << "Can't get shape of " << match.array << " at "
-                            << elemental->getLoc() << "\n");
-    return std::nullopt;
-  }
-  if (arrayShape != elemental.getShape()) {
-    // f2018 10.2.1.2 (3) requires the lhs and rhs of an assignment to be
-    // conformable unless the lhs is an allocatable array. In HLFIR we can
-    // see this from the presence or absence of the realloc attribute on
-    // hlfir.assign. If it is not a realloc assignment, we can trust that
-    // the shapes do conform
-    if (match.assign.getRealloc())
-      return std::nullopt;
-  }
-
-  // the transformation wants to apply the elemental in a do-loop at the
-  // hlfir.assign, check there are no effects which make this unsafe
+static bool checkForElementalEffectsBetween(hlfir::ElementalOp elemental,
+                                            hlfir::AssignOp assign,
+                                            mlir::Value array,
+                                            mlir::Operation *ignoring) {
 
   // keep track of any values written to in the elemental, as these can't be
   // read from between the elemental and the assignment
@@ -375,20 +311,21 @@ ElementalAssignBufferization::findMatch(hlfir::ElementalOp elemental) {
   mlir::SmallVector<mlir::Value, 1> notToBeAccessedBeforeAssign;
   // any accesses to the array between the array and the assignment means it
   // would be unsafe to move the elemental to the assignment
-  notToBeAccessedBeforeAssign.push_back(match.array);
+  notToBeAccessedBeforeAssign.push_back(array);
 
   // 1) side effects in the elemental body - it isn't sufficient to just look
   // for ordered elementals because we also cannot support out of order reads
   std::optional<mlir::SmallVector<mlir::MemoryEffects::EffectInstance>>
-      effects = getEffectsBetween(&elemental.getBody()->front(),
-                                  elemental.getBody()->getTerminator());
+      effects =
+          getEffectsBetween(&elemental.getBody()->front(),
+                            elemental.getBody()->getTerminator(), nullptr);
   if (!effects) {
     LLVM_DEBUG(llvm::dbgs()
                << "operation with unknown effects inside elemental\n");
-    return std::nullopt;
+    return false;
   }
   for (const mlir::MemoryEffects::EffectInstance &effect : *effects) {
-    mlir::AliasResult res = containsReadOrWriteEffectOn(effect, match.array);
+    mlir::AliasResult res = containsReadOrWriteEffectOn(effect, array);
     if (res.isNo()) {
       if (mlir::isa<mlir::MemoryEffects::Write, mlir::MemoryEffects::Read>(
               effect.getEffect()))
@@ -402,7 +339,7 @@ ElementalAssignBufferization::findMatch(hlfir::ElementalOp elemental) {
     // don't allow any aliasing writes in the elemental
     if (mlir::isa<mlir::MemoryEffects::Write>(effect.getEffect())) {
       LLVM_DEBUG(llvm::dbgs() << "write inside the elemental body\n");
-      return std::nullopt;
+      return false;
     }
 
     // allow if and only if the reads are from the elemental indices, in order
@@ -417,17 +354,17 @@ ElementalAssignBufferization::findMatch(hlfir::ElementalOp elemental) {
     if (!res.isPartial()) {
       if (auto designate =
               effect.getValue().getDefiningOp<hlfir::DesignateOp>()) {
-        if (!areIdenticalOrDisjointSlices(match.array, designate.getMemref())) {
+        if (!areIdenticalOrDisjointSlices(array, designate.getMemref())) {
           LLVM_DEBUG(llvm::dbgs() << "possible read conflict: " << designate
                                   << " at " << elemental.getLoc() << "\n");
-          return std::nullopt;
+          return false;
         }
         auto indices = designate.getIndices();
         auto elementalIndices = elemental.getIndices();
         if (indices.size() != elementalIndices.size()) {
           LLVM_DEBUG(llvm::dbgs() << "possible read conflict: " << designate
                                   << " at " << elemental.getLoc() << "\n");
-          return std::nullopt;
+          return false;
         }
         if (std::equal(indices.begin(), indices.end(), elementalIndices.begin(),
                        elementalIndices.end()))
@@ -436,16 +373,16 @@ ElementalAssignBufferization::findMatch(hlfir::ElementalOp elemental) {
     }
     LLVM_DEBUG(llvm::dbgs() << "disallowed side-effect: " << effect.getValue()
                             << " for " << elemental.getLoc() << "\n");
-    return std::nullopt;
+    return false;
   }
 
   // 2) look for conflicting effects between the elemental and the assignment
-  effects = getEffectsBetween(elemental->getNextNode(), match.assign);
+  effects = getEffectsBetween(elemental->getNextNode(), assign, ignoring);
   if (!effects) {
     LLVM_DEBUG(
         llvm::dbgs()
         << "operation with unknown effects between elemental and assign\n");
-    return std::nullopt;
+    return false;
   }
   for (const mlir::MemoryEffects::EffectInstance &effect : *effects) {
     // not safe to access anything written in the elemental as this write
@@ -456,11 +393,92 @@ ElementalAssignBufferization::findMatch(hlfir::ElementalOp elemental) {
         LLVM_DEBUG(llvm::dbgs()
                    << "diasllowed side-effect: " << effect.getValue() << " for "
                    << elemental.getLoc() << "\n");
-        return std::nullopt;
+        return false;
       }
     }
   }
 
+  return true;
+}
+
+std::optional<ElementalAssignBufferization::MatchInfo>
+ElementalAssignBufferization::findMatch(hlfir::ElementalOp elemental) {
+  mlir::Operation::user_range users = elemental->getUsers();
+  // the only uses of the elemental should be the assignment and the destroy
+  if (std::distance(users.begin(), users.end()) != 2) {
+    LLVM_DEBUG(llvm::dbgs() << "Too many uses of the elemental\n");
+    return std::nullopt;
+  }
+
+  // If the ElementalOp must produce a temporary (e.g. for
+  // finalization purposes), then we cannot inline it.
+  if (hlfir::elementalOpMustProduceTemp(elemental)) {
+    LLVM_DEBUG(llvm::dbgs() << "ElementalOp must produce a temp\n");
+    return std::nullopt;
+  }
+
+  MatchInfo match;
+  for (mlir::Operation *user : users)
+    mlir::TypeSwitch<mlir::Operation *, void>(user)
+        .Case([&](hlfir::AssignOp op) { match.assign = op; })
+        .Case([&](hlfir::DestroyOp op) { match.destroy = op; });
+
+  if (!match.assign || !match.destroy) {
+    LLVM_DEBUG(llvm::dbgs() << "Couldn't find assign or destroy\n");
+    return std::nullopt;
+  }
+
+  // the array is what the elemental is assigned into
+  // TODO: this could be extended to also allow hlfir.expr by first bufferizing
+  // the incoming expression
+  match.array = match.assign.getLhs();
+  mlir::Type arrayType = mlir::dyn_cast<fir::SequenceType>(
+      fir::unwrapPassByRefType(match.array.getType()));
+  if (!arrayType)
+    return std::nullopt;
+
+  // require that the array elements are trivial
+  // TODO: this is just to make the pass easier to think about. Not an inherent
+  // limitation
+  mlir::Type eleTy = hlfir::getFortranElementType(arrayType);
+  if (!fir::isa_trivial(eleTy))
+    return std::nullopt;
+
+  // the array must have the same shape as the elemental. CSE should have
+  // deduplicated the fir.shape operations where they are provably the same
+  // so we just have to check for the same ssa value
+  // TODO: add more ways of getting the shape of the array
+  mlir::Value arrayShape;
+  if (match.array.getDefiningOp())
+    arrayShape =
+        mlir::TypeSwitch<mlir::Operation *, mlir::Value>(
+            match.array.getDefiningOp())
+            .Case([](hlfir::DesignateOp designate) {
+              return designate.getShape();
+            })
+            .Case([](hlfir::DeclareOp declare) { return declare.getShape(); })
+            .Default([](mlir::Operation *) { return mlir::Value{}; });
+  if (!arrayShape) {
+    LLVM_DEBUG(llvm::dbgs() << "Can't get shape of " << match.array << " at "
+                            << elemental->getLoc() << "\n");
+    return std::nullopt;
+  }
+  if (arrayShape != elemental.getShape()) {
+    // f2018 10.2.1.2 (3) requires the lhs and rhs of an assignment to be
+    // conformable unless the lhs is an allocatable array. In HLFIR we can
+    // see this from the presence or absence of the realloc attribute on
+    // hlfir.assign. If it is not a realloc assignment, we can trust that
+    // the shapes do conform
+    if (match.assign.getRealloc())
+      return std::nullopt;
+  }
+
+  // the transformation wants to apply the elemental in a do-loop at the
+  // hlfir.assign, check there are no effects which make this unsafe
+  if (!checkForElementalEffectsBetween(elemental, match.assign, match.array,
+                                       nullptr))
+    return std::nullopt;
+
   return match;
 }
 
@@ -659,6 +677,181 @@ mlir::LogicalResult VariableAssignBufferization::matchAndRewrite(
   return mlir::success();
 }
 
+// Look for assign(minloc(mask=elemental)) and generate the minloc loop with
+// inlined elemental and no extra temporaries.
+//  %e = hlfir.elemental %shape ({ ... })
+//  %m = hlfir.minloc %array mask %e
+//  hlfir.assign %m to %result
+//  hlfir.destroy %m
+class AssignMinMaxlocElementalConversion
+    : public mlir::OpRewritePattern<hlfir::AssignOp> {
+public:
+  using mlir::OpRewritePattern<hlfir::AssignOp>::OpRewritePattern;
+
+  mlir::LogicalResult
+  matchAndRewrite(hlfir::AssignOp assign,
+                  mlir::PatternRewriter &rewriter) const override {
+    auto minloc = assign.getOperand(0).getDefiningOp<hlfir::MinlocOp>();
+    if (!minloc || !minloc.getMask() || minloc.getDim() || minloc.getBack())
+      return rewriter.notifyMatchFailure(assign,
+                                         "Did not find minloc with kind");
+
+    auto elemental = minloc.getMask().getDefiningOp<hlfir::ElementalOp>();
+    if (!elemental || hlfir::elementalOpMustProduceTemp(elemental))
+      return rewriter.notifyMatchFailure(assign, "Did not find elemental");
+
+    mlir::Operation::user_range users = minloc->getUsers();
+    if (std::distance(users.begin(), users.end()) != 2)
+      return rewriter.notifyMatchFailure(assign, "Did not find minloc users");
+    auto destroy = mlir::dyn_cast<hlfir::DestroyOp>(
+        (*users.begin()) == minloc ? *++users.begin() : *users.begin());
+    if (!destroy)
+      return rewriter.notifyMatchFailure(assign, "Did not find destroy");
+
+    if (!checkForElementalEffectsBetween(elemental, assign, minloc.getArray(),
+                                         minloc))
+      return rewriter.notifyMatchFailure(assign, "Had unhandled effects");
+
+    mlir::Value resultArr = assign.getOperand(1);
+    mlir::Value array = minloc.getArray();
+
+    unsigned rank = mlir::cast<hlfir::ExprType>(minloc.getType()).getShape()[0];
+    mlir::Type arrayType = array.getType();
+    if (!arrayType.isa<fir::BoxType>())
+      return rewriter.notifyMatchFailure(
+          assign, "Currently requires a boxed type input");
+    mlir::Type elementType = hlfir::getFortranElementType(arrayType);
+  if (!fir::isa_trivial(elementType))
+    return rewriter.notifyMatchFailure(
+        assign, "Character arrays are currently not handled");
+
+    auto init = [](fir::FirOpBuilder builder, mlir::Location loc,
+                   mlir::Type elementType) {
+      if (auto ty = elementType.dyn_cast<mlir::FloatType>()) {
+        const llvm::fltSemantics &sem = ty.getFloatSemantics();
+        return builder.createRealConstant(
+            loc, elementType,
+            llvm::APFloat::getLargest(sem, /*Negative=*/false));
+      }
+      unsigned bits = elementType.getIntOrFloatBitWidth();
+      int64_t maxInt = llvm::APInt::getSignedMaxValue(bits).getSExtValue();
+      return builder.createIntegerConstant(loc, elementType, maxInt);
+    };
+
+    auto genBodyOp =
+        [&rank, &resultArr, &elemental](
+            fir::FirOpBuilder builder, mlir::Location loc,
+            mlir::Type elementType, mlir::Value array, mlir::Value flagRef,
+            mlir::Value reduction,
+            const llvm::SmallVectorImpl<mlir::Value> &indices)
+        -> mlir::Value {
+      // We are in the innermost loop: generate the elemental inline
+      mlir::Value oneIdx =
+          builder.createIntegerConstant(loc, builder.getIndexType(), 1);
+      llvm::SmallVector<mlir::Value> oneBasedIndices;
+      llvm::transform(
+          indices, std::back_inserter(oneBasedIndices), [&](mlir::Value V) {
+            return builder.create<mlir::arith::AddIOp>(loc, V, oneIdx);
+          });
+      hlfir::YieldElementOp yield =
+          hlfir::inlineElementalOp(loc, builder, elemental, oneBasedIndices);
+      mlir::Value maskElem = yield.getElementValue();
+      yield->erase();
+
+      mlir::Type ifCompatType = builder.getI1Type();
+      mlir::Value ifCompatElem =
+          builder.create<fir::ConvertOp>(loc, ifCompatType, maskElem);
+
+      llvm::SmallVector<mlir::Type> resultsTy = {elementType, elementType};
+      fir::IfOp maskIfOp =
+          builder.create<fir::IfOp>(loc, elementType, ifCompatElem,
+                                    /*withElseRegion=*/true);
+      builder.setInsertionPointToStart(&maskIfOp.getThenRegion().front());
+
+      // Set flag that mask was true at some point
+      mlir::Value flagSet = builder.createIntegerConstant(
+          loc, mlir::cast<fir::ReferenceType>(flagRef.getType()).getEleTy(), 1);
+      builder.create<fir::StoreOp>(loc, flagSet, flagRef);
+      mlir::Type eleRefTy = builder.getRefType(elementType);
+      mlir::Value addr =
+          builder.create<fir::CoordinateOp>(loc, eleRefTy, array, indices);
+      mlir::Value elem = builder.create<fir::LoadOp>(loc, addr);
+
+      // Compare with the max reduction value
+      mlir::Value cmp;
+      if (elementType.isa<mlir::FloatType>()) {
+        cmp = builder.create<mlir::arith::CmpFOp>(
+            loc, mlir::arith::CmpFPredicate::OLT, elem, reduction);
+      } else if (elementType.isa<mlir::IntegerType>()) {
+        cmp = builder.create<mlir::arith::CmpIOp>(
+            loc, mlir::arith::CmpIPredicate::slt, elem, reduction);
+      } else {
+        llvm_unreachable("unsupported type");
+      }
+
+      // Set the new coordinate to the result
+      fir::IfOp ifOp = builder.create<fir::IfOp>(loc, elementType, cmp,
+                                                 /*withElseRegion*/ true);
+
+      builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
+      mlir::Type resultElemTy =
+          hlfir::getFortranElementType(resultArr.getType());
+      mlir::Type returnRefTy = builder.getRefType(resultElemTy);
+      mlir::IndexType idxTy = builder.getIndexType();
+
+      mlir::Value one = builder.createIntegerConstant(loc, resultElemTy, 1);
+
+      for (unsigned int i = 0; i < rank; ++i) {
+        mlir::Value index = builder.createIntegerConstant(loc, idxTy, i);
+        mlir::Value resultElemAddr = builder.create<fir::CoordinateOp>(
+            loc, returnRefTy, resultArr, index);
+        mlir::Value convert =
+            builder.create<fir::ConvertOp>(loc, resultElemTy, indices[i]);
+        mlir::Value fortranIndex =
+            builder.create<mlir::arith::AddIOp>(loc, convert, one);
+        builder.create<fir::StoreOp>(loc, fortranIndex, resultElemAddr);
+      }
+      builder.create<fir::ResultOp>(loc, elem);
+      builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
+      builder.create<fir::ResultOp>(loc, reduction);
+      builder.setInsertionPointAfter(ifOp);
+
+      // Close the mask if
+      builder.create<fir::ResultOp>(loc, ifOp.getResult(0));
+      builder.setInsertionPointToStart(&maskIfOp.getElseRegion().front());
+      builder.create<fir::ResultOp>(loc, reduction);
+      builder.setInsertionPointAfter(maskIfOp);
+
+      return maskIfOp.getResult(0);
+    };
+
+    mlir::Location loc = assign.getLoc();
+    fir::FirOpBuilder builder{rewriter, assign.getOperation()};
+
+    // Initialize the result
+    mlir::Type resultElemTy = hlfir::getFortranElementType(resultArr.getType());
+    mlir::Type resultRefTy = builder.getRefType(resultElemTy);
+    mlir::Value returnValue =
+        builder.createIntegerConstant(loc, resultElemTy, 0);
+    for (unsigned int i = 0; i < rank; ++i) {
+      mlir::Value index =
+          builder.createIntegerConstant(loc, builder.getIndexType(), i);
+      mlir::Value resultElemAddr =
+          builder.create<fir::CoordinateOp>(loc, resultRefTy, resultArr, index);
+      builder.create<fir::StoreOp>(loc, returnValue, resultElemAddr);
+    }
+
+    fir::genMinlocReductionLoop(builder, array, init, genBodyOp, rank,
+                                elementType, loc, builder.getI1Type(),
+                                resultArr, false);
+
+    rewriter.eraseOp(assign);
+    rewriter.eraseOp(destroy);
+    rewriter.eraseOp(minloc);
+    return mlir::success();
+  }
+};
+
 class OptimizedBufferizationPass
     : public hlfir::impl::OptimizedBufferizationBase<
           OptimizedBufferizationPass> {
@@ -681,6 +874,7 @@ class OptimizedBufferizationPass
     patterns.insert<ElementalAssignBufferization>(context);
     patterns.insert<BroadcastAssignBufferization>(context);
     patterns.insert<VariableAssignBufferization>(context);
+    patterns.insert<AssignMinMaxlocElementalConversion>(context);
 
     if (mlir::failed(mlir::applyPatternsAndFoldGreedily(
             func, std::move(patterns), config))) {
diff --git a/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp b/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp
index 3eddb9e61ae3b3..c88b71baf202e8 100644
--- a/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp
+++ b/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp
@@ -32,6 +32,7 @@
 #include "flang/Optimizer/Dialect/Support/FIRContext.h"
 #include "flang/Optimizer/HLFIR/HLFIRDialect.h"
 #include "flang/Optimizer/Transforms/Passes.h"
+#include "flang/Optimizer/Support/Utils.h"
 #include "flang/Runtime/entry-names.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/IR/Matchers.h"
@@ -243,8 +244,6 @@ static std::optional<mlir::Type> getArgElementType(mlir::Value val) {
 using BodyOpGeneratorTy = llvm::function_ref<mlir::Value(
     fir::FirOpBuilder &, mlir::Location, const mlir::Type &, mlir::Value,
     mlir::Value)>;
-using InitValGeneratorTy = llvm::function_ref<mlir::Value(
-    fir::FirOpBuilder &, mlir::Location, const mlir::Type &)>;
 using ContinueLoopGenTy = llvm::function_ref<llvm::SmallVector<mlir::Value>(
     fir::FirOpBuilder &, mlir::Location, mlir::Value)>;
 
@@ -266,7 +265,7 @@ using ContinueLoopGenTy = llvm::function_ref<llvm::SmallVector<mlir::Value>(
 template <typename OP, typename T, int resultIndex>
 static void
 genReductionLoop(fir::FirOpBuilder &builder, mlir::func::FuncOp &funcOp,
-                 InitValGeneratorTy initVal, ContinueLoopGenTy loopCond,
+                 fir::InitValGeneratorTy initVal, ContinueLoopGenTy loopCond,
                  T unorderedOrInitialLoopCond, BodyOpGeneratorTy genBody,
                  unsigned rank, mlir::Type elementType, mlir::Location loc) {
 
@@ -353,29 +352,23 @@ genReductionLoop(fir::FirOpBuilder &builder, mlir::func::FuncOp &funcOp,
   // Return the reduction value from the function.
   builder.create<mlir::func::ReturnOp>(loc, results[resultIndex]);
 }
-using MinlocBodyOpGeneratorTy = llvm::function_ref<mlir::Value(
-    fir::FirOpBuilder &, mlir::Location, const mlir::Type &, mlir::Value,
-    mlir::Value, llvm::SmallVector<mlir::Value, Fortran::common::maxRank> &)>;
-
-static void
-genMinlocReductionLoop(fir::FirOpBuilder &builder, mlir::func::FuncOp &funcOp,
-                       InitValGeneratorTy initVal,
-                       MinlocBodyOpGeneratorTy genBody, unsigned rank,
-                       mlir::Type elementType, mlir::Location loc, bool hasMask,
-                       mlir::Type maskElemType, mlir::Value resultArr) {
 
+void fir::genMinlocReductionLoop(fir::FirOpBuilder &builder, mlir::Value array,
+                                 fir::InitValGeneratorTy initVal,
+                                 fir::MinlocBodyOpGeneratorTy genBody,
+                                 unsigned rank, mlir::Type elementType,
+                                 mlir::Location loc, mlir::Type maskElemType,
+                                 mlir::Value resultArr,
+                                 bool maskMayBeLogicalScalar) {
   mlir::IndexType idxTy = builder.getIndexType();
 
-  mlir::Block::BlockArgListType args = funcOp.front().getArguments();
-  mlir::Value arg = args[1];
-
   mlir::Value zeroIdx = builder.createIntegerConstant(loc, idxTy, 0);
 
   fir::SequenceType::Shape flatShape(rank,
                                      fir::SequenceType::getUnknownExtent());
   mlir::Type arrTy = fir::SequenceType::get(flatShape, elementType);
   mlir::Type boxArrTy = fir::BoxType::get(arrTy);
-  mlir::Value array = builder.create<fir::ConvertOp>(loc, boxArrTy, arg);
+  array = builder.create<fir::ConvertOp>(loc, boxArrTy, array);
 
   mlir::Type resultElemType = hlfir::getFortranElementType(resultArr.getType());
   mlir::Value flagSet = builder.createIntegerConstant(loc, resultElemType, 1);
@@ -383,13 +376,6 @@ genMinlocReductionLoop(fir::FirOpBuilder &builder, mlir::func::FuncOp &funcOp,
   mlir::Value flagRef = builder.createTemporary(loc, resultElemType);
   builder.create<fir::StoreOp>(loc, zero, flagRef);
 
-  mlir::Value mask;
-  if (hasMask) {
-    mlir::Type maskTy = fir::SequenceType::get(flatShape, maskElemType);
-    mlir::Type boxMaskTy = fir::BoxType::get(maskTy);
-    mask = builder.create<fir::ConvertOp>(loc, boxMaskTy, args[2]);
-  }
-
   mlir::Value init = initVal(builder, loc, elementType);
   llvm::SmallVector<mlir::Value, Fortran::common::maxRank> bounds;
 
@@ -432,44 +418,8 @@ genMinlocReductionLoop(fir::FirOpBuilder &builder, mlir::func::FuncOp &funcOp,
   // Reverse the indices such that they are ordered as:
   //   <dim-0-idx, dim-1-idx, ...>
   std::reverse(indices.begin(), indices.end());
-  // We are in the innermost loop: generate the reduction body.
-  if (hasMask) {
-    mlir::Type logicalRef = builder.getRefType(maskElemType);
-    mlir::Value maskAddr =
-        builder.create<fir::CoordinateOp>(loc, logicalRef, mask, indices);
-    mlir::Value maskElem = builder.create<fir::LoadOp>(loc, maskAddr);
-
-    // fir::IfOp requires argument to be I1 - won't accept logical or any other
-    // Integer.
-    mlir::Type ifCompatType = builder.getI1Type();
-    mlir::Value ifCompatElem =
-        builder.create<fir::ConvertOp>(loc, ifCompatType, maskElem);
-
-    llvm::SmallVector<mlir::Type> resultsTy = {elementType, elementType};
-    fir::IfOp ifOp = builder.create<fir::IfOp>(loc, elementType, ifCompatElem,
-                                               /*withElseRegion=*/true);
-    builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
-  }
-
-  // Set flag that mask was true at some point
-  builder.create<fir::StoreOp>(loc, flagSet, flagRef);
-  mlir::Type eleRefTy = builder.getRefType(elementType);
-  mlir::Value addr =
-      builder.create<fir::CoordinateOp>(loc, eleRefTy, array, indices);
-  mlir::Value elem = builder.create<fir::LoadOp>(loc, addr);
-
   mlir::Value reductionVal =
-      genBody(builder, loc, elementType, elem, init, indices);
-
-  if (hasMask) {
-    fir::IfOp ifOp =
-        mlir::dyn_cast<fir::IfOp>(builder.getBlock()->getParentOp());
-    builder.create<fir::ResultOp>(loc, reductionVal);
-    builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
-    builder.create<fir::ResultOp>(loc, init);
-    reductionVal = ifOp.getResult(0);
-    builder.setInsertionPointAfter(ifOp);
-  }
+      genBody(builder, loc, elementType, array, flagRef, init, indices);
 
   // Unwind the loop nest and insert ResultOp on each level
   // to return the updated value of the reduction to the enclosing
@@ -484,13 +434,15 @@ genMinlocReductionLoop(fir::FirOpBuilder &builder, mlir::func::FuncOp &funcOp,
     builder.setInsertionPointAfter(loop.getOperation());
   }
   // End of loop nest. The insertion point is after the outermost loop.
-  if (fir::IfOp ifOp =
-          mlir::dyn_cast<fir::IfOp>(builder.getBlock()->getParentOp())) {
-    builder.create<fir::ResultOp>(loc, reductionVal);
-    builder.setInsertionPointAfter(ifOp);
-    // Redefine flagSet to escape scope of ifOp
-    flagSet = builder.createIntegerConstant(loc, resultElemType, 1);
-    reductionVal = ifOp.getResult(0);
+  if (maskMayBeLogicalScalar) {
+    if (fir::IfOp ifOp =
+            mlir::dyn_cast<fir::IfOp>(builder.getBlock()->getParentOp())) {
+      builder.create<fir::ResultOp>(loc, reductionVal);
+      builder.setInsertionPointAfter(ifOp);
+      // Redefine flagSet to escape scope of ifOp
+      flagSet = builder.createIntegerConstant(loc, resultElemType, 1);
+      reductionVal = ifOp.getResult(0);
+    }
   }
 
   // Check for case where array was full of max values.
@@ -523,27 +475,12 @@ genMinlocReductionLoop(fir::FirOpBuilder &builder, mlir::func::FuncOp &funcOp,
   // Load output array with 1s instead of 0s
   for (unsigned int i = 0; i < rank; ++i) {
     mlir::Type resultRefTy = builder.getRefType(resultElemType);
-    // mlir::Value one = builder.createIntegerConstant(loc, resultElemType, 1);
     mlir::Value index = builder.createIntegerConstant(loc, idxTy, i);
     mlir::Value resultElemAddr =
         builder.create<fir::CoordinateOp>(loc, resultRefTy, resultArr, index);
     builder.create<fir::StoreOp>(loc, flagSet, resultElemAddr);
   }
   builder.setInsertionPointAfter(ifMaskTrueOp);
-  // Store newly created output array to the reference passed in
-  fir::SequenceType::Shape resultShape(1, rank);
-  mlir::Type outputArrTy = fir::SequenceType::get(resultShape, resultElemType);
-  mlir::Type outputHeapTy = fir::HeapType::get(outputArrTy);
-  mlir::Type outputBoxTy = fir::BoxType::get(outputHeapTy);
-  mlir::Type outputRefTy = builder.getRefType(outputBoxTy);
-
-  mlir::Value outputArrNone = args[0];
-  mlir::Value outputArr =
-      builder.create<fir::ConvertOp>(loc, outputRefTy, outputArrNone);
-
-  // Store nearly created array to output array
-  builder.create<fir::StoreOp>(loc, resultArr, outputArr);
-  builder.create<mlir::func::ReturnOp>(loc);
 }
 
 static llvm::SmallVector<mlir::Value> nopLoopCond(fir::FirOpBuilder &builder,
@@ -789,6 +726,14 @@ static void genRuntimeMinlocBody(fir::FirOpBuilder &builder,
 
   mlir::Type resultRefTy = builder.getRefType(resultElemTy);
 
+  if (maskRank > 0) {
+    fir::SequenceType::Shape flatShape(rank,
+                                       fir::SequenceType::getUnknownExtent());
+    mlir::Type maskTy = fir::SequenceType::get(flatShape, maskElemType);
+    mlir::Type boxMaskTy = fir::BoxType::get(maskTy);
+    mask = builder.create<fir::ConvertOp>(loc, boxMaskTy, mask);
+  }
+
   for (unsigned int i = 0; i < rank; ++i) {
     mlir::Value index = builder.createIntegerConstant(loc, idxTy, i);
     mlir::Value resultElemAddr =
@@ -797,18 +742,46 @@ static void genRuntimeMinlocBody(fir::FirOpBuilder &builder,
   }
 
   auto genBodyOp =
-      [&rank, &resultArr](
+      [&rank, &resultArr, &mask, &maskElemType, &maskRank](
           fir::FirOpBuilder builder, mlir::Location loc, mlir::Type elementType,
-          mlir::Value elem1, mlir::Value elem2,
-          llvm::SmallVector<mlir::Value, Fortran::common::maxRank> indices)
+          mlir::Value array, mlir::Value flagRef, mlir::Value reduction,
+          const llvm::SmallVectorImpl<mlir::Value> &indices)
       -> mlir::Value {
+    // We are in the innermost loop: generate the reduction body.
+    if (maskRank > 0) {
+      mlir::Type logicalRef = builder.getRefType(maskElemType);
+      mlir::Value maskAddr =
+          builder.create<fir::CoordinateOp>(loc, logicalRef, mask, indices);
+      mlir::Value maskElem = builder.create<fir::LoadOp>(loc, maskAddr);
+
+      // fir::IfOp requires argument to be I1 - won't accept logical or any
+      // other Integer.
+      mlir::Type ifCompatType = builder.getI1Type();
+      mlir::Value ifCompatElem =
+          builder.create<fir::ConvertOp>(loc, ifCompatType, maskElem);
+
+      llvm::SmallVector<mlir::Type> resultsTy = {elementType, elementType};
+      fir::IfOp ifOp = builder.create<fir::IfOp>(loc, elementType, ifCompatElem,
+                                                 /*withElseRegion=*/true);
+      builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
+    }
+
+    // Set flag that mask was true at some point
+    mlir::Value flagSet = builder.createIntegerConstant(
+        loc, mlir::cast<fir::ReferenceType>(flagRef.getType()).getEleTy(), 1);
+    builder.create<fir::StoreOp>(loc, flagSet, flagRef);
+    mlir::Type eleRefTy = builder.getRefType(elementType);
+    mlir::Value addr =
+        builder.create<fir::CoordinateOp>(loc, eleRefTy, array, indices);
+    mlir::Value elem = builder.create<fir::LoadOp>(loc, addr);
+
     mlir::Value cmp;
     if (elementType.isa<mlir::FloatType>()) {
       cmp = builder.create<mlir::arith::CmpFOp>(
-          loc, mlir::arith::CmpFPredicate::OLT, elem1, elem2);
+          loc, mlir::arith::CmpFPredicate::OLT, elem, reduction);
     } else if (elementType.isa<mlir::IntegerType>()) {
       cmp = builder.create<mlir::arith::CmpIOp>(
-          loc, mlir::arith::CmpIPredicate::slt, elem1, elem2);
+          loc, mlir::arith::CmpIPredicate::slt, elem, reduction);
     } else {
       llvm_unreachable("unsupported type");
     }
@@ -833,11 +806,24 @@ static void genRuntimeMinlocBody(fir::FirOpBuilder &builder,
           builder.create<mlir::arith::AddIOp>(loc, convert, one);
       builder.create<fir::StoreOp>(loc, fortranIndex, resultElemAddr);
     }
-    builder.create<fir::ResultOp>(loc, elem1);
+    builder.create<fir::ResultOp>(loc, elem);
     builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
-    builder.create<fir::ResultOp>(loc, elem2);
+    builder.create<fir::ResultOp>(loc, reduction);
     builder.setInsertionPointAfter(ifOp);
-    return ifOp.getResult(0);
+    mlir::Value reductionVal = ifOp.getResult(0);
+
+    // Close the mask if needed
+    if (maskRank > 0) {
+      fir::IfOp ifOp =
+          mlir::dyn_cast<fir::IfOp>(builder.getBlock()->getParentOp());
+      builder.create<fir::ResultOp>(loc, reductionVal);
+      builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
+      builder.create<fir::ResultOp>(loc, reduction);
+      reductionVal = ifOp.getResult(0);
+      builder.setInsertionPointAfter(ifOp);
+    }
+
+    return reductionVal;
   };
 
   // if mask is a logical scalar, we can check its value before the main loop
@@ -872,12 +858,22 @@ static void genRuntimeMinlocBody(fir::FirOpBuilder &builder,
     builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
   }
 
-  // bit of a hack - maskRank is set to -1 for absent mask arg, so don't
-  // generate high level mask or element by element mask.
-  bool hasMask = maskRank > 0;
+  genMinlocReductionLoop(builder, funcOp.front().getArgument(1), init,
+                         genBodyOp, rank, elementType, loc, maskElemType,
+                         resultArr, maskRank == 0);
 
-  genMinlocReductionLoop(builder, funcOp, init, genBodyOp, rank, elementType,
-                         loc, hasMask, maskElemType, resultArr);
+  // Store newly created output array to the reference passed in
+  fir::SequenceType::Shape resultShape(1, rank);
+  mlir::Type outputArrTy = fir::SequenceType::get(resultShape, resultElemTy);
+  mlir::Type outputHeapTy = fir::HeapType::get(outputArrTy);
+  mlir::Type outputBoxTy = fir::BoxType::get(outputHeapTy);
+  mlir::Type outputRefTy = builder.getRefType(outputBoxTy);
+  mlir::Value outputArr = builder.create<fir::ConvertOp>(
+      loc, outputRefTy, funcOp.front().getArgument(0));
+
+  // Store nearly created array to output array
+  builder.create<fir::StoreOp>(loc, resultArr, outputArr);
+  builder.create<mlir::func::ReturnOp>(loc);
 }
 
 /// Generate function type for the simplified version of RTNAME(DotProduct)
diff --git a/flang/test/HLFIR/minloc-elemental.fir b/flang/test/HLFIR/minloc-elemental.fir
new file mode 100644
index 00000000000000..2375a1529cf923
--- /dev/null
+++ b/flang/test/HLFIR/minloc-elemental.fir
@@ -0,0 +1,327 @@
+// RUN: fir-opt %s -opt-bufferization | FileCheck %s
+
+func.func @_QPtest(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}, %arg1: !fir.ref<i32> {fir.bindc_name = "val"}, %arg2: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "m"}) {
+  %c0 = arith.constant 0 : index
+  %0:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+  %1:2 = hlfir.declare %arg2 {uniq_name = "_QFtestEm"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+  %2:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %3 = fir.load %2#0 : !fir.ref<i32>
+  %4:3 = fir.box_dims %0#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+  %5 = fir.shape %4#1 : (index) -> !fir.shape<1>
+  %6 = hlfir.elemental %5 unordered : (!fir.shape<1>) -> !hlfir.expr<?x!fir.logical<4>> {
+  ^bb0(%arg3: index):
+    %8 = hlfir.designate %0#0 (%arg3)  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+    %9 = fir.load %8 : !fir.ref<i32>
+    %10 = arith.cmpi sge, %9, %3 : i32
+    %11 = fir.convert %10 : (i1) -> !fir.logical<4>
+    hlfir.yield_element %11 : !fir.logical<4>
+  }
+  %7 = hlfir.minloc %0#0 mask %6 {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?xi32>>, !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi32>
+  hlfir.assign %7 to %1#0 : !hlfir.expr<1xi32>, !fir.box<!fir.array<?xi32>>
+  hlfir.destroy %7 : !hlfir.expr<1xi32>
+  hlfir.destroy %6 : !hlfir.expr<?x!fir.logical<4>>
+  return
+}
+// CHECK-LABEL: func.func @_QPtest(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}, %arg1: !fir.ref<i32> {fir.bindc_name = "val"}, %arg2: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "m"}) {
+// CHECK-NEXT:    %c1 = arith.constant 1 : index
+// CHECK-NEXT:    %c2147483647_i32 = arith.constant 2147483647 : i32
+// CHECK-NEXT:    %c1_i32 = arith.constant 1 : i32
+// CHECK-NEXT:    %c0_i32 = arith.constant 0 : i32
+// CHECK-NEXT:    %c0 = arith.constant 0 : index
+// CHECK-NEXT:    %0 = fir.alloca i32
+// CHECK-NEXT:    %1:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+// CHECK-NEXT:    %2:2 = hlfir.declare %arg2 {uniq_name = "_QFtestEm"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+// CHECK-NEXT:    %3:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK-NEXT:    %4 = fir.load %3#0 : !fir.ref<i32>
+// CHECK-NEXT:    %5:3 = fir.box_dims %1#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+// CHECK-NEXT:    %6 = fir.shape %5#1 : (index) -> !fir.shape<1>
+// CHECK-NEXT:    %7 = hlfir.elemental %6 unordered : (!fir.shape<1>) -> !hlfir.expr<?x!fir.logical<4>> {
+// CHECK-NEXT:    ^bb0(%arg3: index):
+// CHECK-NEXT:      %14 = hlfir.designate %1#0 (%arg3)  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK-NEXT:      %15 = fir.load %14 : !fir.ref<i32>
+// CHECK-NEXT:      %16 = arith.cmpi sge, %15, %4 : i32
+// CHECK-NEXT:      %17 = fir.convert %16 : (i1) -> !fir.logical<4>
+// CHECK-NEXT:      hlfir.yield_element %17 : !fir.logical<4>
+// CHECK-NEXT:    }
+// CHECK-NEXT:    %8 = fir.coordinate_of %2#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK-NEXT:    fir.store %c0_i32 to %8 : !fir.ref<i32>
+// CHECK-NEXT:    fir.store %c0_i32 to %0 : !fir.ref<i32>
+// CHECK-NEXT:    %9:3 = fir.box_dims %1#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+// CHECK-NEXT:    %10 = arith.subi %9#1, %c1 : index
+// CHECK-NEXT:    %11 = fir.do_loop %arg3 = %c0 to %10 step %c1 iter_args(%arg4 = %c2147483647_i32) -> (i32) {
+// CHECK-NEXT:      %14 = arith.addi %arg3, %c1 : index
+// CHECK-NEXT:      %15 = hlfir.designate %1#0 (%14)  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK-NEXT:      %16 = fir.load %15 : !fir.ref<i32>
+// CHECK-NEXT:      %17 = arith.cmpi sge, %16, %4 : i32
+// CHECK-NEXT:      %18 = fir.if %17 -> (i32) {
+// CHECK-NEXT:        fir.store %c1_i32 to %0 : !fir.ref<i32>
+// CHECK-NEXT:        %19 = fir.coordinate_of %1#0, %arg3 : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK-NEXT:        %20 = fir.load %19 : !fir.ref<i32>
+// CHECK-NEXT:        %21 = arith.cmpi slt, %20, %arg4 : i32
+// CHECK-NEXT:        %22 = fir.if %21 -> (i32) {
+// CHECK-NEXT:          %23 = fir.coordinate_of %2#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK-NEXT:          %24 = fir.convert %arg3 : (index) -> i32
+// CHECK-NEXT:          %25 = arith.addi %24, %c1_i32 : i32
+// CHECK-NEXT:          fir.store %25 to %23 : !fir.ref<i32>
+// CHECK-NEXT:          fir.result %20 : i32
+// CHECK-NEXT:        } else {
+// CHECK-NEXT:          fir.result %arg4 : i32
+// CHECK-NEXT:        }
+// CHECK-NEXT:        fir.result %22 : i32
+// CHECK-NEXT:      } else {
+// CHECK-NEXT:        fir.result %arg4 : i32
+// CHECK-NEXT:      }
+// CHECK-NEXT:      fir.result %18 : i32
+// CHECK-NEXT:    }
+// CHECK-NEXT:    %12 = fir.load %0 : !fir.ref<i32>
+// CHECK-NEXT:    %13 = arith.cmpi eq, %12, %c1_i32 : i32
+// CHECK-NEXT:    fir.if %13 {
+// CHECK-NEXT:      %14 = arith.cmpi eq, %11, %c2147483647_i32 : i32
+// CHECK-NEXT:      fir.if %14 {
+// CHECK-NEXT:        %15 = fir.coordinate_of %2#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK-NEXT:        fir.store %c1_i32 to %15 : !fir.ref<i32>
+// CHECK-NEXT:      }
+// CHECK-NEXT:    }
+// CHECK-NEXT:    hlfir.destroy %7 : !hlfir.expr<?x!fir.logical<4>>
+// CHECK-NEXT:    return
+// CHECK-NEXT:  }
+
+
+func.func @_QPtest_kind2(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}, %arg1: !fir.ref<i32> {fir.bindc_name = "val"}, %arg2: !fir.box<!fir.array<?xi16>> {fir.bindc_name = "m"}) {
+  %c0 = arith.constant 0 : index
+  %0:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+  %1:2 = hlfir.declare %arg2 {uniq_name = "_QFtestEm"} : (!fir.box<!fir.array<?xi16>>) -> (!fir.box<!fir.array<?xi16>>, !fir.box<!fir.array<?xi16>>)
+  %2:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %3 = fir.load %2#0 : !fir.ref<i32>
+  %4:3 = fir.box_dims %0#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+  %5 = fir.shape %4#1 : (index) -> !fir.shape<1>
+  %6 = hlfir.elemental %5 unordered : (!fir.shape<1>) -> !hlfir.expr<?x!fir.logical<4>> {
+  ^bb0(%arg3: index):
+    %8 = hlfir.designate %0#0 (%arg3)  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+    %9 = fir.load %8 : !fir.ref<i32>
+    %10 = arith.cmpi sge, %9, %3 : i32
+    %11 = fir.convert %10 : (i1) -> !fir.logical<4>
+    hlfir.yield_element %11 : !fir.logical<4>
+  }
+  %7 = hlfir.minloc %0#0 mask %6 {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?xi32>>, !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi16>
+  hlfir.assign %7 to %1#0 : !hlfir.expr<1xi16>, !fir.box<!fir.array<?xi16>>
+  hlfir.destroy %7 : !hlfir.expr<1xi16>
+  hlfir.destroy %6 : !hlfir.expr<?x!fir.logical<4>>
+  return
+}
+// CHECK-LABEL:  func.func @_QPtest_kind2(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}, %arg1: !fir.ref<i32> {fir.bindc_name = "val"}, %arg2: !fir.box<!fir.array<?xi16>> {fir.bindc_name = "m"}) {
+// CHECK-NEXT:    %c1 = arith.constant 1 : index
+// CHECK-NEXT:    %c2147483647_i32 = arith.constant 2147483647 : i32
+// CHECK-NEXT:    %c1_i16 = arith.constant 1 : i16
+// CHECK-NEXT:    %c0_i16 = arith.constant 0 : i16
+// CHECK-NEXT:    %c0 = arith.constant 0 : index
+// CHECK-NEXT:    %0 = fir.alloca i16
+// CHECK-NEXT:    %1:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+// CHECK-NEXT:    %2:2 = hlfir.declare %arg2 {uniq_name = "_QFtestEm"} : (!fir.box<!fir.array<?xi16>>) -> (!fir.box<!fir.array<?xi16>>, !fir.box<!fir.array<?xi16>>)
+// CHECK-NEXT:    %3:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+// CHECK-NEXT:    %4 = fir.load %3#0 : !fir.ref<i32>
+// CHECK-NEXT:    %5:3 = fir.box_dims %1#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+// CHECK-NEXT:    %6 = fir.shape %5#1 : (index) -> !fir.shape<1>
+// CHECK-NEXT:    %7 = hlfir.elemental %6 unordered : (!fir.shape<1>) -> !hlfir.expr<?x!fir.logical<4>> {
+// CHECK-NEXT:    ^bb0(%arg3: index):
+// CHECK-NEXT:      %14 = hlfir.designate %1#0 (%arg3)  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK-NEXT:      %15 = fir.load %14 : !fir.ref<i32>
+// CHECK-NEXT:      %16 = arith.cmpi sge, %15, %4 : i32
+// CHECK-NEXT:      %17 = fir.convert %16 : (i1) -> !fir.logical<4>
+// CHECK-NEXT:      hlfir.yield_element %17 : !fir.logical<4>
+// CHECK-NEXT:    }
+// CHECK-NEXT:    %8 = fir.coordinate_of %2#0, %c0 : (!fir.box<!fir.array<?xi16>>, index) -> !fir.ref<i16>
+// CHECK-NEXT:    fir.store %c0_i16 to %8 : !fir.ref<i16>
+// CHECK-NEXT:    fir.store %c0_i16 to %0 : !fir.ref<i16>
+// CHECK-NEXT:    %9:3 = fir.box_dims %1#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+// CHECK-NEXT:    %10 = arith.subi %9#1, %c1 : index
+// CHECK-NEXT:    %11 = fir.do_loop %arg3 = %c0 to %10 step %c1 iter_args(%arg4 = %c2147483647_i32) -> (i32) {
+// CHECK-NEXT:      %14 = arith.addi %arg3, %c1 : index
+// CHECK-NEXT:      %15 = hlfir.designate %1#0 (%14)  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK-NEXT:      %16 = fir.load %15 : !fir.ref<i32>
+// CHECK-NEXT:      %17 = arith.cmpi sge, %16, %4 : i32
+// CHECK-NEXT:      %18 = fir.if %17 -> (i32) {
+// CHECK-NEXT:        fir.store %c1_i16 to %0 : !fir.ref<i16>
+// CHECK-NEXT:        %19 = fir.coordinate_of %1#0, %arg3 : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK-NEXT:        %20 = fir.load %19 : !fir.ref<i32>
+// CHECK-NEXT:        %21 = arith.cmpi slt, %20, %arg4 : i32
+// CHECK-NEXT:        %22 = fir.if %21 -> (i32) {
+// CHECK-NEXT:          %23 = fir.coordinate_of %2#0, %c0 : (!fir.box<!fir.array<?xi16>>, index) -> !fir.ref<i16>
+// CHECK-NEXT:          %24 = fir.convert %arg3 : (index) -> i16
+// CHECK-NEXT:          %25 = arith.addi %24, %c1_i16 : i16
+// CHECK-NEXT:          fir.store %25 to %23 : !fir.ref<i16>
+// CHECK-NEXT:          fir.result %20 : i32
+// CHECK-NEXT:        } else {
+// CHECK-NEXT:          fir.result %arg4 : i32
+// CHECK-NEXT:        }
+// CHECK-NEXT:        fir.result %22 : i32
+// CHECK-NEXT:      } else {
+// CHECK-NEXT:        fir.result %arg4 : i32
+// CHECK-NEXT:      }
+// CHECK-NEXT:      fir.result %18 : i32
+// CHECK-NEXT:    }
+// CHECK-NEXT:    %12 = fir.load %0 : !fir.ref<i16>
+// CHECK-NEXT:    %13 = arith.cmpi eq, %12, %c1_i16 : i16
+// CHECK-NEXT:    fir.if %13 {
+// CHECK-NEXT:      %14 = arith.cmpi eq, %11, %c2147483647_i32 : i32
+// CHECK-NEXT:      fir.if %14 {
+// CHECK-NEXT:        %15 = fir.coordinate_of %2#0, %c0 : (!fir.box<!fir.array<?xi16>>, index) -> !fir.ref<i16>
+// CHECK-NEXT:        fir.store %c1_i16 to %15 : !fir.ref<i16>
+// CHECK-NEXT:      }
+// CHECK-NEXT:    }
+// CHECK-NEXT:    hlfir.destroy %7 : !hlfir.expr<?x!fir.logical<4>>
+// CHECK-NEXT:    return
+
+
+func.func @_QPtest_kind2_convert(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}, %arg1: !fir.ref<i32> {fir.bindc_name = "val"}, %arg2: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "m"}) {
+  %c1 = arith.constant 1 : index
+  %c0 = arith.constant 0 : index
+  %0:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+  %1:2 = hlfir.declare %arg2 {uniq_name = "_QFtestEm"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+  %2:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %3 = fir.load %2#0 : !fir.ref<i32>
+  %4:3 = fir.box_dims %0#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+  %5 = fir.shape %4#1 : (index) -> !fir.shape<1>
+  %6 = hlfir.elemental %5 unordered : (!fir.shape<1>) -> !hlfir.expr<?x!fir.logical<4>> {
+  ^bb0(%arg3: index):
+    %10 = hlfir.designate %0#0 (%arg3)  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+    %11 = fir.load %10 : !fir.ref<i32>
+    %12 = arith.cmpi sge, %11, %3 : i32
+    %13 = fir.convert %12 : (i1) -> !fir.logical<4>
+    hlfir.yield_element %13 : !fir.logical<4>
+  }
+  %7 = hlfir.minloc %0#0 mask %6 {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?xi32>>, !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi16>
+  %8 = fir.shape %c1 : (index) -> !fir.shape<1>
+  %9 = hlfir.elemental %8 unordered : (!fir.shape<1>) -> !hlfir.expr<?xi32> {
+  ^bb0(%arg3: index):
+    %10 = hlfir.apply %7, %arg3 : (!hlfir.expr<1xi16>, index) -> i16
+    %11 = fir.convert %10 : (i16) -> i32
+    hlfir.yield_element %11 : i32
+  }
+  hlfir.assign %9 to %1#0 : !hlfir.expr<?xi32>, !fir.box<!fir.array<?xi32>>
+  hlfir.destroy %9 : !hlfir.expr<?xi32>
+  hlfir.destroy %7 : !hlfir.expr<1xi16>
+  hlfir.destroy %6 : !hlfir.expr<?x!fir.logical<4>>
+  return
+}
+// Doesn't transform due to the convert after the minloc
+// CHECK-LABEL: _QPtest_kind2_convert
+// CHECK: hlfir.minloc
+
+
+func.func @_QPtest_float(%arg0: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "array"}, %arg1: !fir.ref<f32> {fir.bindc_name = "val"}, %arg2: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "m"}) {
+  %c0 = arith.constant 0 : index
+  %0:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xf32>>) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>)
+  %1:2 = hlfir.declare %arg2 {uniq_name = "_QFtestEm"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+  %2:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+  %3 = fir.load %2#0 : !fir.ref<f32>
+  %4:3 = fir.box_dims %0#0, %c0 : (!fir.box<!fir.array<?xf32>>, index) -> (index, index, index)
+  %5 = fir.shape %4#1 : (index) -> !fir.shape<1>
+  %6 = hlfir.elemental %5 unordered : (!fir.shape<1>) -> !hlfir.expr<?x!fir.logical<4>> {
+  ^bb0(%arg3: index):
+    %8 = hlfir.designate %0#0 (%arg3)  : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
+    %9 = fir.load %8 : !fir.ref<f32>
+    %10 = arith.cmpf oge, %9, %3 : f32
+    %11 = fir.convert %10 : (i1) -> !fir.logical<4>
+    hlfir.yield_element %11 : !fir.logical<4>
+  }
+  %7 = hlfir.minloc %0#0 mask %6 {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?xf32>>, !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi32>
+  hlfir.assign %7 to %1#0 : !hlfir.expr<1xi32>, !fir.box<!fir.array<?xi32>>
+  hlfir.destroy %7 : !hlfir.expr<1xi32>
+  hlfir.destroy %6 : !hlfir.expr<?x!fir.logical<4>>
+  return
+}
+// CHECK-LABEL: _QPtest_float
+// CHECK:        %11 = fir.do_loop %arg3 = %c0 to %10 step %c1 iter_args(%arg4 = %cst) -> (f32) {
+// CHECK-NEXT:     %14 = arith.addi %arg3, %c1 : index
+// CHECK-NEXT:     %15 = hlfir.designate %1#0 (%14)  : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
+// CHECK-NEXT:     %16 = fir.load %15 : !fir.ref<f32>
+// CHECK-NEXT:     %17 = arith.cmpf oge, %16, %4 : f32
+// CHECK-NEXT:     %18 = fir.if %17 -> (f32) {
+// CHECK-NEXT:       fir.store %c1_i32 to %0 : !fir.ref<i32>
+// CHECK-NEXT:       %19 = fir.coordinate_of %1#0, %arg3 : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
+// CHECK-NEXT:       %20 = fir.load %19 : !fir.ref<f32>
+// CHECK-NEXT:       %21 = arith.cmpf olt, %20, %arg4 : f32
+// CHECK-NEXT:       %22 = fir.if %21 -> (f32) {
+// CHECK-NEXT:         %23 = fir.coordinate_of %2#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+// CHECK-NEXT:         %24 = fir.convert %arg3 : (index) -> i32
+// CHECK-NEXT:         %25 = arith.addi %24, %c1_i32 : i32
+// CHECK-NEXT:         fir.store %25 to %23 : !fir.ref<i32>
+// CHECK-NEXT:         fir.result %20 : f32
+// CHECK-NEXT:       } else {
+// CHECK-NEXT:         fir.result %arg4 : f32
+// CHECK-NEXT:       }
+// CHECK-NEXT:       fir.result %22 : f32
+// CHECK-NEXT:     } else {
+// CHECK-NEXT:       fir.result %arg4 : f32
+// CHECK-NEXT:     }
+// CHECK-NEXT:     fir.result %18 : f32
+// CHECK-NEXT:   }
+
+
+func.func @_QPtest_assignshape(%arg0: !fir.ref<!fir.array<3x3xf32>> {fir.bindc_name = "array"}, %arg1: !fir.ref<f32> {fir.bindc_name = "val"}, %arg2: !fir.ref<!fir.array<3xi32>> {fir.bindc_name = "m"}) {
+  %c2 = arith.constant 2 : index
+  %c1 = arith.constant 1 : index
+  %c3 = arith.constant 3 : index
+  %0 = fir.shape %c3, %c3 : (index, index) -> !fir.shape<2>
+  %1:2 = hlfir.declare %arg0(%0) {uniq_name = "_QFtestEarray"} : (!fir.ref<!fir.array<3x3xf32>>, !fir.shape<2>) -> (!fir.ref<!fir.array<3x3xf32>>, !fir.ref<!fir.array<3x3xf32>>)
+  %2 = fir.shape %c3 : (index) -> !fir.shape<1>
+  %3:2 = hlfir.declare %arg2(%2) {uniq_name = "_QFtestEm"} : (!fir.ref<!fir.array<3xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<3xi32>>, !fir.ref<!fir.array<3xi32>>)
+  %4:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+  %5 = fir.load %4#0 : !fir.ref<f32>
+  %6 = hlfir.elemental %0 unordered : (!fir.shape<2>) -> !hlfir.expr<3x3x!fir.logical<4>> {
+  ^bb0(%arg3: index, %arg4: index):
+    %10 = hlfir.designate %1#0 (%arg3, %arg4)  : (!fir.ref<!fir.array<3x3xf32>>, index, index) -> !fir.ref<f32>
+    %11 = fir.load %10 : !fir.ref<f32>
+    %12 = arith.cmpf oge, %11, %5 : f32
+    %13 = fir.convert %12 : (i1) -> !fir.logical<4>
+    hlfir.yield_element %13 : !fir.logical<4>
+  }
+  %7 = hlfir.minloc %1#0 mask %6 {fastmath = #arith.fastmath<contract>} : (!fir.ref<!fir.array<3x3xf32>>, !hlfir.expr<3x3x!fir.logical<4>>) -> !hlfir.expr<2xi32>
+  %8 = fir.shape %c2 : (index) -> !fir.shape<1>
+  %9 = hlfir.designate %3#0 (%c1:%c2:%c1)  shape %8 : (!fir.ref<!fir.array<3xi32>>, index, index, index, !fir.shape<1>) -> !fir.ref<!fir.array<2xi32>>
+  hlfir.assign %7 to %9 : !hlfir.expr<2xi32>, !fir.ref<!fir.array<2xi32>>
+  hlfir.destroy %7 : !hlfir.expr<2xi32>
+  hlfir.destroy %6 : !hlfir.expr<3x3x!fir.logical<4>>
+  return
+}
+// Not supported as the input is not a box
+// CHECK-LABEL: _QPtest_assignshape
+// CHECK: hlfir.minloc
+
+
+func.func @_QFPtest_character(%arg0: !fir.box<!fir.array<?x!fir.char<1>>> {fir.bindc_name = "b"}, %arg1: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "c"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> i32 {
+  %c0 = arith.constant 0 : index
+  %c1 = arith.constant 1 : index
+  %0:2 = hlfir.declare %arg0 typeparams %c1 {uniq_name = "_QFFtestEb"} : (!fir.box<!fir.array<?x!fir.char<1>>>, index) -> (!fir.box<!fir.array<?x!fir.char<1>>>, !fir.box<!fir.array<?x!fir.char<1>>>)
+  %1:2 = hlfir.declare %arg1 {uniq_name = "_QFFtestEc"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
+  %2 = fir.alloca !fir.array<1xi32> {bindc_name = "m", uniq_name = "_QFFtestEm"}
+  %3 = fir.shape %c1 : (index) -> !fir.shape<1>
+  %4:2 = hlfir.declare %2(%3) {uniq_name = "_QFFtestEm"} : (!fir.ref<!fir.array<1xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<1xi32>>, !fir.ref<!fir.array<1xi32>>)
+  %5 = fir.alloca i32 {bindc_name = "test", uniq_name = "_QFFtestEtest"}
+  %6:2 = hlfir.declare %5 {uniq_name = "_QFFtestEtest"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %7:2 = hlfir.declare %arg2 {uniq_name = "_QFFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %8 = fir.load %7#0 : !fir.ref<i32>
+  %9:3 = fir.box_dims %1#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
+  %10 = fir.shape %9#1 : (index) -> !fir.shape<1>
+  %11 = hlfir.elemental %10 unordered : (!fir.shape<1>) -> !hlfir.expr<?x!fir.logical<4>> {
+  ^bb0(%arg3: index):
+    %16 = hlfir.designate %1#0 (%arg3)  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
+    %17 = fir.load %16 : !fir.ref<i32>
+    %18 = arith.cmpi eq, %17, %8 : i32
+    %19 = fir.convert %18 : (i1) -> !fir.logical<4>
+    hlfir.yield_element %19 : !fir.logical<4>
+  }
+  %12 = hlfir.minloc %0#0 mask %11 {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?x!fir.char<1>>>, !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi32>
+  hlfir.assign %12 to %4#0 : !hlfir.expr<1xi32>, !fir.ref<!fir.array<1xi32>>
+  hlfir.destroy %12 : !hlfir.expr<1xi32>
+  hlfir.destroy %11 : !hlfir.expr<?x!fir.logical<4>>
+  %13 = hlfir.designate %4#0 (%c1)  : (!fir.ref<!fir.array<1xi32>>, index) -> !fir.ref<i32>
+  %14 = fir.load %13 : !fir.ref<i32>
+  hlfir.assign %14 to %6#0 : i32, !fir.ref<i32>
+  %15 = fir.load %6#1 : !fir.ref<i32>
+  return %15 : i32
+}
+// Characters are not supported at the moment
+// CHECK-LABEL: _QFPtest_character
+// CHECK: hlfir.minloc
diff --git a/flang/test/Transforms/simplifyintrinsics.fir b/flang/test/Transforms/simplifyintrinsics.fir
index 39483a9cc18fe8..da54dcbedc0432 100644
--- a/flang/test/Transforms/simplifyintrinsics.fir
+++ b/flang/test/Transforms/simplifyintrinsics.fir
@@ -1760,6 +1760,7 @@ func.func @_QPtestminloc_works1d(%arg0: !fir.ref<!fir.array<10xi32>> {fir.bindc_
 // CHECK:           %[[OUTARR:.*]] = fir.allocmem !fir.array<1xi32>
 // CHECK:           %[[OUTARR_SHAPE:.*]] = fir.shape %[[OUTARR_SIZE]] : (index) -> !fir.shape<1>
 // CHECK:           %[[BOX_OUTARR:.*]] = fir.embox %[[OUTARR]](%[[OUTARR_SHAPE]]) : (!fir.heap<!fir.array<1xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<1xi32>>>
+// CHECK:           %[[BOX_MASK:.*]] = fir.convert %[[BOX_MASK_NONE]] : (!fir.box<none>) -> !fir.box<!fir.array<?x!fir.logical<4>>>
 // CHECK:           %[[OUTARR_IDX0:.*]] = arith.constant 0 : index
 // CHECK:           %[[OUTARR_ITEM0:.*]] = fir.coordinate_of %[[BOX_OUTARR]], %[[OUTARR_IDX0]] : (!fir.box<!fir.heap<!fir.array<1xi32>>>, index) -> !fir.ref<i32>
 // CHECK:           fir.store %[[INIT_OUT_IDX]] to %[[OUTARR_ITEM0]] : !fir.ref<i32>
@@ -1768,7 +1769,6 @@ func.func @_QPtestminloc_works1d(%arg0: !fir.ref<!fir.array<10xi32>> {fir.bindc_
 // CHECK:           %[[FLAG_SET:.*]] = arith.constant 1 : i32
 // CHECK:           %[[FLAG_EMPTY:.*]] = arith.constant 0 : i32
 // CHECK:           fir.store %[[FLAG_EMPTY]] to %[[FLAG_ALLOC]] : !fir.ref<i32>
-// CHECK:           %[[BOX_MASK:.*]] = fir.convert %[[BOX_MASK_NONE]] : (!fir.box<none>) -> !fir.box<!fir.array<?x!fir.logical<4>>>
 // CHECK:           %[[MAX:.*]] = arith.constant 2147483647 : i32
 // CHECK:           %[[CINDEX_1:.*]] = arith.constant 1 : index
 // CHECK:           %[[DIM_INDEX0:.*]] = arith.constant 0 : index
@@ -1779,7 +1779,8 @@ func.func @_QPtestminloc_works1d(%arg0: !fir.ref<!fir.array<10xi32>> {fir.bindc_
 // CHECK:             %[[MASK_ITEMVAL:.*]] = fir.load %[[MASK_ITEM]] : !fir.ref<!fir.logical<4>>
 // CHECK:             %[[MASK_IF_ITEM:.*]] = fir.convert %[[MASK_ITEMVAL]] : (!fir.logical<4>) -> i1
 // CHECK:             %[[IF_MASK:.*]] = fir.if %[[MASK_IF_ITEM]] -> (i32) {
-// CHECK:               fir.store %[[FLAG_SET]] to %[[FLAG_ALLOC]] : !fir.ref<i32>
+// CHECK:               %[[FLAG_SET2:.*]] = arith.constant 1 : i32
+// CHECK:               fir.store %[[FLAG_SET2]] to %[[FLAG_ALLOC]] : !fir.ref<i32>
 // CHECK:               %[[INARR_ITEM:.*]] = fir.coordinate_of %[[BOX_INARR]], %[[ITER]] : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
 // CHECK:               %[[INARR_ITEMVAL:.*]] = fir.load %[[INARR_ITEM]] : !fir.ref<i32>
 // CHECK:               %[[NEW_MIN:.*]] = arith.cmpi slt, %[[INARR_ITEMVAL]], %[[MIN]] : i32



More information about the flang-commits mailing list