[flang-commits] [flang] [flang] Generalized simplification of HLFIR reduction ops. (PR #136071)

Thu Apr 17 12:25:35 PDT 2025

https://github.com/vzakhari updated https://github.com/llvm/llvm-project/pull/136071

>From cbd7410e006efc2ae7256bfbb795f8854956ebb8 Mon Sep 17 00:00:00 2001
From: Slava Zakharin <szakharin at nvidia.com>
Date: Fri, 11 Apr 2025 15:22:47 -0700
Subject: [PATCH 1/3] [flang] Generalized simplification of HLFIR reduction
 ops.

This change generalizes SumAsElemental inlining in
SimplifyHLFIRIntrinsics pass so that it can be applied
to ALL, ANY, COUNT, MAXLOC, MAXVAL, MINLOC, MINVAL, SUM.

This change makes the special handling of the reduction
operations in OptimizedBufferization redundant: once HLFIR
operations are inlined, the hlfir.elemental inlining should
do the rest of the job.
---
 .../flang/Optimizer/Builder/HLFIRTools.h      |    5 +
 flang/lib/Optimizer/Builder/HLFIRTools.cpp    |   27 +
 .../Transforms/OptimizedBufferization.cpp     |  465 -------
 .../Transforms/SimplifyHLFIRIntrinsics.cpp    | 1079 ++++++++++++++---
 flang/test/HLFIR/all-elemental.fir            |   91 --
 flang/test/HLFIR/any-elemental.fir            |  190 ---
 flang/test/HLFIR/count-elemental.fir          |  314 -----
 flang/test/HLFIR/maxloc-elemental.fir         |  133 --
 flang/test/HLFIR/maxval-elemental.fir         |  117 --
 flang/test/HLFIR/minloc-elemental.fir         |  397 ------
 flang/test/HLFIR/minval-elemental.fir         |   95 --
 .../HLFIR/simplify-hlfir-intrinsics-all.fir   |  123 ++
 .../HLFIR/simplify-hlfir-intrinsics-any.fir   |  123 ++
 .../HLFIR/simplify-hlfir-intrinsics-count.fir |  127 ++
 .../simplify-hlfir-intrinsics-maxloc.fir      |  343 ++++++
 .../simplify-hlfir-intrinsics-maxval.fir      |  177 +++
 .../simplify-hlfir-intrinsics-minloc.fir      |  343 ++++++
 .../simplify-hlfir-intrinsics-minval.fir      |  177 +++
 18 files changed, 2325 insertions(+), 2001 deletions(-)
 delete mode 100644 flang/test/HLFIR/all-elemental.fir
 delete mode 100644 flang/test/HLFIR/any-elemental.fir
 delete mode 100644 flang/test/HLFIR/count-elemental.fir
 delete mode 100644 flang/test/HLFIR/maxloc-elemental.fir
 delete mode 100644 flang/test/HLFIR/maxval-elemental.fir
 delete mode 100644 flang/test/HLFIR/minloc-elemental.fir
 delete mode 100644 flang/test/HLFIR/minval-elemental.fir
 create mode 100644 flang/test/HLFIR/simplify-hlfir-intrinsics-all.fir
 create mode 100644 flang/test/HLFIR/simplify-hlfir-intrinsics-any.fir
 create mode 100644 flang/test/HLFIR/simplify-hlfir-intrinsics-count.fir
 create mode 100644 flang/test/HLFIR/simplify-hlfir-intrinsics-maxloc.fir
 create mode 100644 flang/test/HLFIR/simplify-hlfir-intrinsics-maxval.fir
 create mode 100644 flang/test/HLFIR/simplify-hlfir-intrinsics-minloc.fir
 create mode 100644 flang/test/HLFIR/simplify-hlfir-intrinsics-minval.fir

diff --git a/flang/include/flang/Optimizer/Builder/HLFIRTools.h b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
index ac80873dc374f..cd259b9dc6071 100644
--- a/flang/include/flang/Optimizer/Builder/HLFIRTools.h
+++ b/flang/include/flang/Optimizer/Builder/HLFIRTools.h
@@ -301,6 +301,11 @@ mlir::Value genExtent(mlir::Location loc, fir::FirOpBuilder &builder,
 mlir::Value genLBound(mlir::Location loc, fir::FirOpBuilder &builder,
                       hlfir::Entity entity, unsigned dim);
 
+/// Compute the lower bounds of \p entity, which is an array of known rank.
+llvm::SmallVector<mlir::Value> genLBounds(mlir::Location loc,
+                                          fir::FirOpBuilder &builder,
+                                          hlfir::Entity entity);
+
 /// Generate a vector of extents with index type from a fir.shape
 /// of fir.shape_shift value.
 llvm::SmallVector<mlir::Value> getIndexExtents(mlir::Location loc,
diff --git a/flang/lib/Optimizer/Builder/HLFIRTools.cpp b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
index 558ebcb876ddb..2a5e136c57c62 100644
--- a/flang/lib/Optimizer/Builder/HLFIRTools.cpp
+++ b/flang/lib/Optimizer/Builder/HLFIRTools.cpp
@@ -659,6 +659,33 @@ mlir::Value hlfir::genLBound(mlir::Location loc, fir::FirOpBuilder &builder,
   return dimInfo.getLowerBound();
 }
 
+llvm::SmallVector<mlir::Value> hlfir::genLBounds(mlir::Location loc,
+                                                 fir::FirOpBuilder &builder,
+                                                 hlfir::Entity entity) {
+  assert(!entity.isAssumedRank() &&
+         "cannot compute all lower bounds for assumed rank");
+  assert(!entity.isScalar() && "expected an array entity");
+  int rank = entity.getRank();
+  mlir::Type idxTy = builder.getIndexType();
+  if (!entity.mayHaveNonDefaultLowerBounds())
+    return {static_cast<std::size_t>(rank),
+            builder.createIntegerConstant(loc, idxTy, 1)};
+
+  if (auto shape = tryRetrievingShapeOrShift(entity)) {
+    auto lbounds = getExplicitLboundsFromShape(shape);
+    if (!lbounds.empty())
+      return lbounds;
+  }
+
+  if (entity.isMutableBox())
+    entity = hlfir::derefPointersAndAllocatables(loc, builder, entity);
+
+  llvm::SmallVector<mlir::Value, Fortran::common::maxRank> lbounds;
+  fir::factory::genDimInfoFromBox(builder, loc, entity, &lbounds,
+                                  /*extents=*/nullptr, /*strides=*/nullptr);
+  return lbounds;
+}
+
 void hlfir::genLengthParameters(mlir::Location loc, fir::FirOpBuilder &builder,
                                 Entity entity,
                                 llvm::SmallVectorImpl<mlir::Value> &result) {
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
index c489450384a35..79aabd2981e1a 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
@@ -772,458 +772,6 @@ llvm::LogicalResult BroadcastAssignBufferization::matchAndRewrite(
   return mlir::success();
 }
 
-using GenBodyFn =
-    std::function<mlir::Value(fir::FirOpBuilder &, mlir::Location, mlir::Value,
-                              const llvm::SmallVectorImpl<mlir::Value> &)>;
-static mlir::Value generateReductionLoop(fir::FirOpBuilder &builder,
-                                         mlir::Location loc, mlir::Value init,
-                                         mlir::Value shape, GenBodyFn genBody) {
-  auto extents = hlfir::getIndexExtents(loc, builder, shape);
-  mlir::Value reduction = init;
-  mlir::IndexType idxTy = builder.getIndexType();
-  mlir::Value oneIdx = builder.createIntegerConstant(loc, idxTy, 1);
-
-  // Create a reduction loop nest. We use one-based indices so that they can be
-  // passed to the elemental, and reverse the order so that they can be
-  // generated in column-major order for better performance.
-  llvm::SmallVector<mlir::Value> indices(extents.size(), mlir::Value{});
-  for (unsigned i = 0; i < extents.size(); ++i) {
-    auto loop = builder.create<fir::DoLoopOp>(
-        loc, oneIdx, extents[extents.size() - i - 1], oneIdx, false,
-        /*finalCountValue=*/false, reduction);
-    reduction = loop.getRegionIterArgs()[0];
-    indices[extents.size() - i - 1] = loop.getInductionVar();
-    // Set insertion point to the loop body so that the next loop
-    // is inserted inside the current one.
-    builder.setInsertionPointToStart(loop.getBody());
-  }
-
-  // Generate the body
-  reduction = genBody(builder, loc, reduction, indices);
-
-  // Unwind the loop nest.
-  for (unsigned i = 0; i < extents.size(); ++i) {
-    auto result = builder.create<fir::ResultOp>(loc, reduction);
-    auto loop = mlir::cast<fir::DoLoopOp>(result->getParentOp());
-    reduction = loop.getResult(0);
-    // Set insertion point after the loop operation that we have
-    // just processed.
-    builder.setInsertionPointAfter(loop.getOperation());
-  }
-
-  return reduction;
-}
-
-auto makeMinMaxInitValGenerator(bool isMax) {
-  return [isMax](fir::FirOpBuilder builder, mlir::Location loc,
-                 mlir::Type elementType) -> mlir::Value {
-    if (auto ty = mlir::dyn_cast<mlir::FloatType>(elementType)) {
-      const llvm::fltSemantics &sem = ty.getFloatSemantics();
-      llvm::APFloat limit = llvm::APFloat::getInf(sem, /*Negative=*/isMax);
-      return builder.createRealConstant(loc, elementType, limit);
-    }
-    unsigned bits = elementType.getIntOrFloatBitWidth();
-    int64_t limitInt =
-        isMax ? llvm::APInt::getSignedMinValue(bits).getSExtValue()
-              : llvm::APInt::getSignedMaxValue(bits).getSExtValue();
-    return builder.createIntegerConstant(loc, elementType, limitInt);
-  };
-}
-
-mlir::Value generateMinMaxComparison(fir::FirOpBuilder builder,
-                                     mlir::Location loc, mlir::Value elem,
-                                     mlir::Value reduction, bool isMax) {
-  if (mlir::isa<mlir::FloatType>(reduction.getType())) {
-    // For FP reductions we want the first smallest value to be used, that
-    // is not NaN. A OGL/OLT condition will usually work for this unless all
-    // the values are Nan or Inf. This follows the same logic as
-    // NumericCompare for Minloc/Maxlox in extrema.cpp.
-    mlir::Value cmp = builder.create<mlir::arith::CmpFOp>(
-        loc,
-        isMax ? mlir::arith::CmpFPredicate::OGT
-              : mlir::arith::CmpFPredicate::OLT,
-        elem, reduction);
-    mlir::Value cmpNan = builder.create<mlir::arith::CmpFOp>(
-        loc, mlir::arith::CmpFPredicate::UNE, reduction, reduction);
-    mlir::Value cmpNan2 = builder.create<mlir::arith::CmpFOp>(
-        loc, mlir::arith::CmpFPredicate::OEQ, elem, elem);
-    cmpNan = builder.create<mlir::arith::AndIOp>(loc, cmpNan, cmpNan2);
-    return builder.create<mlir::arith::OrIOp>(loc, cmp, cmpNan);
-  } else if (mlir::isa<mlir::IntegerType>(reduction.getType())) {
-    return builder.create<mlir::arith::CmpIOp>(
-        loc,
-        isMax ? mlir::arith::CmpIPredicate::sgt
-              : mlir::arith::CmpIPredicate::slt,
-        elem, reduction);
-  }
-  llvm_unreachable("unsupported type");
-}
-
-/// Given a reduction operation with an elemental/designate source, attempt to
-/// generate a do-loop to perform the operation inline.
-///   %e = hlfir.elemental %shape unordered
-///   %r = hlfir.count %e
-/// =>
-///   %r = for.do_loop %arg = 1 to bound(%shape) step 1 iter_args(%arg2 = init)
-///     %i = <inline elemental>
-///     %c = <reduce count> %i
-///     fir.result %c
-template <typename Op>
-class ReductionConversion : public mlir::OpRewritePattern<Op> {
-public:
-  using mlir::OpRewritePattern<Op>::OpRewritePattern;
-
-  llvm::LogicalResult
-  matchAndRewrite(Op op, mlir::PatternRewriter &rewriter) const override {
-    mlir::Location loc = op.getLoc();
-    // Select source and validate its arguments.
-    mlir::Value source;
-    bool valid = false;
-    if constexpr (std::is_same_v<Op, hlfir::AnyOp> ||
-                  std::is_same_v<Op, hlfir::AllOp> ||
-                  std::is_same_v<Op, hlfir::CountOp>) {
-      source = op.getMask();
-      valid = !op.getDim();
-    } else if constexpr (std::is_same_v<Op, hlfir::MaxvalOp> ||
-                         std::is_same_v<Op, hlfir::MinvalOp>) {
-      source = op.getArray();
-      valid = !op.getDim() && !op.getMask();
-    } else if constexpr (std::is_same_v<Op, hlfir::MaxlocOp> ||
-                         std::is_same_v<Op, hlfir::MinlocOp>) {
-      source = op.getArray();
-      valid = !op.getDim() && !op.getMask() && !op.getBack();
-    }
-    if (!valid)
-      return rewriter.notifyMatchFailure(
-          op, "Currently does not accept optional arguments");
-
-    hlfir::ElementalOp elemental;
-    hlfir::DesignateOp designate;
-    mlir::Value shape;
-    if ((elemental = source.template getDefiningOp<hlfir::ElementalOp>())) {
-      shape = elemental.getOperand(0);
-    } else if ((designate =
-                    source.template getDefiningOp<hlfir::DesignateOp>())) {
-      shape = designate.getShape();
-    } else {
-      return rewriter.notifyMatchFailure(op, "Did not find valid argument");
-    }
-
-    auto inlineSource =
-        [elemental,
-         &designate](fir::FirOpBuilder builder, mlir::Location loc,
-                     const llvm::SmallVectorImpl<mlir::Value> &oneBasedIndices)
-        -> mlir::Value {
-      if (elemental) {
-        // Inline the elemental and get the value from it.
-        auto yield =
-            inlineElementalOp(loc, builder, elemental, oneBasedIndices);
-        auto tmp = yield.getElementValue();
-        yield->erase();
-        return tmp;
-      }
-      if (designate) {
-        // Create a designator over the array designator, then load the
-        // reference.
-        mlir::Value elementAddr = hlfir::getElementAt(
-            loc, builder, hlfir::Entity{designate.getResult()},
-            oneBasedIndices);
-        return builder.create<fir::LoadOp>(loc, elementAddr);
-      }
-      llvm_unreachable("unsupported type");
-    };
-
-    fir::FirOpBuilder builder{rewriter, op.getOperation()};
-
-    mlir::Value init;
-    GenBodyFn genBodyFn;
-    if constexpr (std::is_same_v<Op, hlfir::AnyOp>) {
-      init = builder.createIntegerConstant(loc, builder.getI1Type(), 0);
-      genBodyFn = [inlineSource](
-                      fir::FirOpBuilder builder, mlir::Location loc,
-                      mlir::Value reduction,
-                      const llvm::SmallVectorImpl<mlir::Value> &oneBasedIndices)
-          -> mlir::Value {
-        // Conditionally set the reduction variable.
-        mlir::Value cond = builder.create<fir::ConvertOp>(
-            loc, builder.getI1Type(),
-            inlineSource(builder, loc, oneBasedIndices));
-        return builder.create<mlir::arith::OrIOp>(loc, reduction, cond);
-      };
-    } else if constexpr (std::is_same_v<Op, hlfir::AllOp>) {
-      init = builder.createIntegerConstant(loc, builder.getI1Type(), 1);
-      genBodyFn = [inlineSource](
-                      fir::FirOpBuilder builder, mlir::Location loc,
-                      mlir::Value reduction,
-                      const llvm::SmallVectorImpl<mlir::Value> &oneBasedIndices)
-          -> mlir::Value {
-        // Conditionally set the reduction variable.
-        mlir::Value cond = builder.create<fir::ConvertOp>(
-            loc, builder.getI1Type(),
-            inlineSource(builder, loc, oneBasedIndices));
-        return builder.create<mlir::arith::AndIOp>(loc, reduction, cond);
-      };
-    } else if constexpr (std::is_same_v<Op, hlfir::CountOp>) {
-      init = builder.createIntegerConstant(loc, op.getType(), 0);
-      genBodyFn = [inlineSource](
-                      fir::FirOpBuilder builder, mlir::Location loc,
-                      mlir::Value reduction,
-                      const llvm::SmallVectorImpl<mlir::Value> &oneBasedIndices)
-          -> mlir::Value {
-        // Conditionally add one to the current value
-        mlir::Value cond = builder.create<fir::ConvertOp>(
-            loc, builder.getI1Type(),
-            inlineSource(builder, loc, oneBasedIndices));
-        mlir::Value one =
-            builder.createIntegerConstant(loc, reduction.getType(), 1);
-        mlir::Value add1 =
-            builder.create<mlir::arith::AddIOp>(loc, reduction, one);
-        return builder.create<mlir::arith::SelectOp>(loc, cond, add1,
-                                                     reduction);
-      };
-    } else if constexpr (std::is_same_v<Op, hlfir::MaxlocOp> ||
-                         std::is_same_v<Op, hlfir::MinlocOp>) {
-      // TODO: implement minloc/maxloc conversion.
-      return rewriter.notifyMatchFailure(
-          op, "Currently minloc/maxloc is not handled");
-    } else if constexpr (std::is_same_v<Op, hlfir::MaxvalOp> ||
-                         std::is_same_v<Op, hlfir::MinvalOp>) {
-      mlir::Type ty = op.getType();
-      if (!(mlir::isa<mlir::FloatType>(ty) ||
-            mlir::isa<mlir::IntegerType>(ty))) {
-        return rewriter.notifyMatchFailure(
-            op, "Type is not supported for Maxval or Minval yet");
-      }
-
-      bool isMax = std::is_same_v<Op, hlfir::MaxvalOp>;
-      init = makeMinMaxInitValGenerator(isMax)(builder, loc, ty);
-      genBodyFn = [inlineSource, isMax](
-                      fir::FirOpBuilder builder, mlir::Location loc,
-                      mlir::Value reduction,
-                      const llvm::SmallVectorImpl<mlir::Value> &oneBasedIndices)
-          -> mlir::Value {
-        mlir::Value val = inlineSource(builder, loc, oneBasedIndices);
-        mlir::Value cmp =
-            generateMinMaxComparison(builder, loc, val, reduction, isMax);
-        return builder.create<mlir::arith::SelectOp>(loc, cmp, val, reduction);
-      };
-    } else {
-      llvm_unreachable("unsupported type");
-    }
-
-    mlir::Value res =
-        generateReductionLoop(builder, loc, init, shape, genBodyFn);
-    if (res.getType() != op.getType())
-      res = builder.create<fir::ConvertOp>(loc, op.getType(), res);
-
-    // Check if the op was the only user of the source (apart from a destroy),
-    // and remove it if so.
-    mlir::Operation *sourceOp = source.getDefiningOp();
-    mlir::Operation::user_range srcUsers = sourceOp->getUsers();
-    hlfir::DestroyOp srcDestroy;
-    if (std::distance(srcUsers.begin(), srcUsers.end()) == 2) {
-      srcDestroy = mlir::dyn_cast<hlfir::DestroyOp>(*srcUsers.begin());
-      if (!srcDestroy)
-        srcDestroy = mlir::dyn_cast<hlfir::DestroyOp>(*++srcUsers.begin());
-    }
-
-    rewriter.replaceOp(op, res);
-    if (srcDestroy) {
-      rewriter.eraseOp(srcDestroy);
-      rewriter.eraseOp(sourceOp);
-    }
-    return mlir::success();
-  }
-};
-
-// Look for minloc(mask=elemental) and generate the minloc loop with
-// inlined elemental.
-//  %e = hlfir.elemental %shape ({ ... })
-//  %m = hlfir.minloc %array mask %e
-template <typename Op>
-class ReductionMaskConversion : public mlir::OpRewritePattern<Op> {
-public:
-  using mlir::OpRewritePattern<Op>::OpRewritePattern;
-
-  llvm::LogicalResult
-  matchAndRewrite(Op mloc, mlir::PatternRewriter &rewriter) const override {
-    if (!mloc.getMask() || mloc.getDim() || mloc.getBack())
-      return rewriter.notifyMatchFailure(mloc,
-                                         "Did not find valid minloc/maxloc");
-
-    bool isMax = std::is_same_v<Op, hlfir::MaxlocOp>;
-
-    auto elemental =
-        mloc.getMask().template getDefiningOp<hlfir::ElementalOp>();
-    if (!elemental || hlfir::elementalOpMustProduceTemp(elemental))
-      return rewriter.notifyMatchFailure(mloc, "Did not find elemental");
-
-    mlir::Value array = mloc.getArray();
-
-    unsigned rank = mlir::cast<hlfir::ExprType>(mloc.getType()).getShape()[0];
-    mlir::Type arrayType = array.getType();
-    if (!mlir::isa<fir::BoxType>(arrayType))
-      return rewriter.notifyMatchFailure(
-          mloc, "Currently requires a boxed type input");
-    mlir::Type elementType = hlfir::getFortranElementType(arrayType);
-    if (!fir::isa_trivial(elementType))
-      return rewriter.notifyMatchFailure(
-          mloc, "Character arrays are currently not handled");
-
-    mlir::Location loc = mloc.getLoc();
-    fir::FirOpBuilder builder{rewriter, mloc.getOperation()};
-    mlir::Value resultArr = builder.createTemporary(
-        loc, fir::SequenceType::get(
-                 rank, hlfir::getFortranElementType(mloc.getType())));
-
-    auto init = makeMinMaxInitValGenerator(isMax);
-
-    auto genBodyOp =
-        [&rank, &resultArr, &elemental, isMax](
-            fir::FirOpBuilder builder, mlir::Location loc,
-            mlir::Type elementType, mlir::Value array, mlir::Value flagRef,
-            mlir::Value reduction,
-            const llvm::SmallVectorImpl<mlir::Value> &indices) -> mlir::Value {
-      // We are in the innermost loop: generate the elemental inline
-      mlir::Value oneIdx =
-          builder.createIntegerConstant(loc, builder.getIndexType(), 1);
-      llvm::SmallVector<mlir::Value> oneBasedIndices;
-      llvm::transform(
-          indices, std::back_inserter(oneBasedIndices), [&](mlir::Value V) {
-            return builder.create<mlir::arith::AddIOp>(loc, V, oneIdx);
-          });
-      hlfir::YieldElementOp yield =
-          hlfir::inlineElementalOp(loc, builder, elemental, oneBasedIndices);
-      mlir::Value maskElem = yield.getElementValue();
-      yield->erase();
-
-      mlir::Type ifCompatType = builder.getI1Type();
-      mlir::Value ifCompatElem =
-          builder.create<fir::ConvertOp>(loc, ifCompatType, maskElem);
-
-      llvm::SmallVector<mlir::Type> resultsTy = {elementType, elementType};
-      fir::IfOp maskIfOp =
-          builder.create<fir::IfOp>(loc, elementType, ifCompatElem,
-                                    /*withElseRegion=*/true);
-      builder.setInsertionPointToStart(&maskIfOp.getThenRegion().front());
-
-      // Set flag that mask was true at some point
-      mlir::Value flagSet = builder.createIntegerConstant(
-          loc, mlir::cast<fir::ReferenceType>(flagRef.getType()).getEleTy(), 1);
-      mlir::Value isFirst = builder.create<fir::LoadOp>(loc, flagRef);
-      mlir::Value addr = hlfir::getElementAt(loc, builder, hlfir::Entity{array},
-                                             oneBasedIndices);
-      mlir::Value elem = builder.create<fir::LoadOp>(loc, addr);
-
-      // Compare with the max reduction value
-      mlir::Value cmp =
-          generateMinMaxComparison(builder, loc, elem, reduction, isMax);
-
-      // The condition used for the loop is isFirst || <the condition above>.
-      isFirst = builder.create<fir::ConvertOp>(loc, cmp.getType(), isFirst);
-      isFirst = builder.create<mlir::arith::XOrIOp>(
-          loc, isFirst, builder.createIntegerConstant(loc, cmp.getType(), 1));
-      cmp = builder.create<mlir::arith::OrIOp>(loc, cmp, isFirst);
-
-      // Set the new coordinate to the result
-      fir::IfOp ifOp = builder.create<fir::IfOp>(loc, elementType, cmp,
-                                                 /*withElseRegion*/ true);
-
-      builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
-      builder.create<fir::StoreOp>(loc, flagSet, flagRef);
-      mlir::Type resultElemTy =
-          hlfir::getFortranElementType(resultArr.getType());
-      mlir::Type returnRefTy = builder.getRefType(resultElemTy);
-      mlir::IndexType idxTy = builder.getIndexType();
-
-      for (unsigned int i = 0; i < rank; ++i) {
-        mlir::Value index = builder.createIntegerConstant(loc, idxTy, i + 1);
-        mlir::Value resultElemAddr = builder.create<hlfir::DesignateOp>(
-            loc, returnRefTy, resultArr, index);
-        mlir::Value fortranIndex = builder.create<fir::ConvertOp>(
-            loc, resultElemTy, oneBasedIndices[i]);
-        builder.create<fir::StoreOp>(loc, fortranIndex, resultElemAddr);
-      }
-      builder.create<fir::ResultOp>(loc, elem);
-      builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
-      builder.create<fir::ResultOp>(loc, reduction);
-      builder.setInsertionPointAfter(ifOp);
-
-      // Close the mask if
-      builder.create<fir::ResultOp>(loc, ifOp.getResult(0));
-      builder.setInsertionPointToStart(&maskIfOp.getElseRegion().front());
-      builder.create<fir::ResultOp>(loc, reduction);
-      builder.setInsertionPointAfter(maskIfOp);
-
-      return maskIfOp.getResult(0);
-    };
-    auto getAddrFn = [](fir::FirOpBuilder builder, mlir::Location loc,
-                        const mlir::Type &resultElemType, mlir::Value resultArr,
-                        mlir::Value index) {
-      mlir::Type resultRefTy = builder.getRefType(resultElemType);
-      mlir::Value oneIdx =
-          builder.createIntegerConstant(loc, builder.getIndexType(), 1);
-      index = builder.create<mlir::arith::AddIOp>(loc, index, oneIdx);
-      return builder.create<hlfir::DesignateOp>(loc, resultRefTy, resultArr,
-                                                index);
-    };
-
-    // Initialize the result
-    mlir::Type resultElemTy = hlfir::getFortranElementType(resultArr.getType());
-    mlir::Type resultRefTy = builder.getRefType(resultElemTy);
-    mlir::Value returnValue =
-        builder.createIntegerConstant(loc, resultElemTy, 0);
-    for (unsigned int i = 0; i < rank; ++i) {
-      mlir::Value index =
-          builder.createIntegerConstant(loc, builder.getIndexType(), i + 1);
-      mlir::Value resultElemAddr = builder.create<hlfir::DesignateOp>(
-          loc, resultRefTy, resultArr, index);
-      builder.create<fir::StoreOp>(loc, returnValue, resultElemAddr);
-    }
-
-    fir::genMinMaxlocReductionLoop(builder, array, init, genBodyOp, getAddrFn,
-                                   rank, elementType, loc, builder.getI1Type(),
-                                   resultArr, false);
-
-    mlir::Value asExpr = builder.create<hlfir::AsExprOp>(
-        loc, resultArr, builder.createBool(loc, false));
-
-    // Check all the users - the destroy is no longer required, and any assign
-    // can use resultArr directly so that InlineHLFIRAssign pass
-    // can optimize the results. Other operations are replaced with an AsExpr
-    // for the temporary resultArr.
-    llvm::SmallVector<hlfir::DestroyOp> destroys;
-    llvm::SmallVector<hlfir::AssignOp> assigns;
-    for (auto user : mloc->getUsers()) {
-      if (auto destroy = mlir::dyn_cast<hlfir::DestroyOp>(user))
-        destroys.push_back(destroy);
-      else if (auto assign = mlir::dyn_cast<hlfir::AssignOp>(user))
-        assigns.push_back(assign);
-    }
-
-    // Check if the minloc/maxloc was the only user of the elemental (apart from
-    // a destroy), and remove it if so.
-    mlir::Operation::user_range elemUsers = elemental->getUsers();
-    hlfir::DestroyOp elemDestroy;
-    if (std::distance(elemUsers.begin(), elemUsers.end()) == 2) {
-      elemDestroy = mlir::dyn_cast<hlfir::DestroyOp>(*elemUsers.begin());
-      if (!elemDestroy)
-        elemDestroy = mlir::dyn_cast<hlfir::DestroyOp>(*++elemUsers.begin());
-    }
-
-    for (auto d : destroys)
-      rewriter.eraseOp(d);
-    for (auto a : assigns)
-      a.setOperand(0, resultArr);
-    rewriter.replaceOp(mloc, asExpr);
-    if (elemDestroy) {
-      rewriter.eraseOp(elemDestroy);
-      rewriter.eraseOp(elemental);
-    }
-    return mlir::success();
-  }
-};
-
 class EvaluateIntoMemoryAssignBufferization
     : public mlir::OpRewritePattern<hlfir::EvaluateInMemoryOp> {
 
@@ -1340,19 +888,6 @@ class OptimizedBufferizationPass
     patterns.insert<ElementalAssignBufferization>(context);
     patterns.insert<BroadcastAssignBufferization>(context);
     patterns.insert<EvaluateIntoMemoryAssignBufferization>(context);
-    patterns.insert<ReductionConversion<hlfir::CountOp>>(context);
-    patterns.insert<ReductionConversion<hlfir::AnyOp>>(context);
-    patterns.insert<ReductionConversion<hlfir::AllOp>>(context);
-    // TODO: implement basic minloc/maxloc conversion.
-    // patterns.insert<ReductionConversion<hlfir::MaxlocOp>>(context);
-    // patterns.insert<ReductionConversion<hlfir::MinlocOp>>(context);
-    patterns.insert<ReductionConversion<hlfir::MaxvalOp>>(context);
-    patterns.insert<ReductionConversion<hlfir::MinvalOp>>(context);
-    patterns.insert<ReductionMaskConversion<hlfir::MinlocOp>>(context);
-    patterns.insert<ReductionMaskConversion<hlfir::MaxlocOp>>(context);
-    // TODO: implement masked minval/maxval conversion.
-    // patterns.insert<ReductionMaskConversion<hlfir::MaxvalOp>>(context);
-    // patterns.insert<ReductionMaskConversion<hlfir::MinvalOp>>(context);
 
     if (mlir::failed(mlir::applyPatternsGreedily(
             getOperation(), std::move(patterns), config))) {
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
index bac10121a881b..5614474767565 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
@@ -173,245 +173,918 @@ class TransposeAsElementalConversion
   }
 };
 
-// Expand the SUM(DIM=CONSTANT) operation into .
-class SumAsElementalConversion : public mlir::OpRewritePattern<hlfir::SumOp> {
+/// CRTP class for converting reduction-like operations into
+/// a reduction loop[-nest] optionally wrapped into hlfir.elemental.
+/// It is used to handle operations produced for ALL, ANY, COUNT,
+/// MAXLOC, MAXVAL, MINLOC, MINVAL, SUM intrinsics.
+///
+/// All of these operations take an input array, and optional
+/// dim, mask arguments. ALL, ANY, COUNT do not have mask argument.
+template <typename T>
+class ReductionAsElementalConverter {
 public:
-  using mlir::OpRewritePattern<hlfir::SumOp>::OpRewritePattern;
+  ReductionAsElementalConverter(mlir::Operation *op,
+                                mlir::PatternRewriter &rewriter)
+      : op{op}, rewriter{rewriter}, loc{op->getLoc()}, builder{rewriter, op} {
+    assert(op->getNumResults() == 1);
+  }
 
-  llvm::LogicalResult
-  matchAndRewrite(hlfir::SumOp sum,
-                  mlir::PatternRewriter &rewriter) const override {
-    hlfir::Entity array = hlfir::Entity{sum.getArray()};
-    bool isTotalReduction = hlfir::Entity{sum}.getRank() == 0;
-    mlir::Value dim = sum.getDim();
+  /// Do the actual conversion or return mlir::failure(),
+  /// if conversion is not possible.
+  mlir::LogicalResult convert();
+
+private:
+  /// Return an instance of the derived class that implements
+  /// the interface.
+  T &impl() { return *static_cast<T *>(this); }
+  const T &impl() const { return *static_cast<const T *>(this); }
+
+  // Return fir.shape specifying the shape of the result
+  // of a reduction with DIM=dimVal. The second return value
+  // is the extent of the DIM dimension.
+  std::tuple<mlir::Value, mlir::Value>
+  genResultShapeForPartialReduction(hlfir::Entity array, int64_t dimVal);
+
+  /// \p mask is a scalar or array logical mask.
+  /// If \p isPresentPred is not nullptr, it is a dynamic predicate value
+  /// identifying whether the mask's variable is present.
+  /// \p indices is a range of one-based indices to access \p mask
+  /// when it is an array.
+  ///
+  /// The method returns the scalar mask value to guard the access
+  /// to a single element of the input array.
+  mlir::Value genMaskValue(mlir::Value mask, mlir::Value isPresentPred,
+                           mlir::ValueRange indices);
+
+protected:
+  // Methods below must be implemented by the derived type.
+
+  /// Return the input array.
+  mlir::Value getSource() const {
+    llvm_unreachable("derived type must provide getSource()");
+  }
+
+  /// Return DIM or nullptr, if it is not present.
+  mlir::Value getDim() const {
+    llvm_unreachable("derived type must provide getDim()");
+  }
+
+  /// Return MASK or nullptr, if it is not present.
+  mlir::Value getMask() const {
+    llvm_unreachable("derived type must provide getMask()");
+  }
+
+  /// Return FastMathFlags attached to the operation
+  /// or arith::FastMathFlags::none, if the operation
+  /// does not support FastMathFlags (e.g. ALL, ANY, COUNT).
+  mlir::arith::FastMathFlags getFastMath() const {
+    llvm_unreachable("derived type must provide getFastMath()");
+  }
+
+  /// Generates initial values for the reduction values used
+  /// by the reduction loop. In general, there is a single
+  /// loop-carried reduction value (e.g. for SUM), but, for example,
+  /// MAXLOC/MINLOC implementation uses multiple reductions.
+  llvm::SmallVector<mlir::Value> genReductionInitValues() {
+    llvm_unreachable("derived type must provide genReductionInitValues()");
+  }
+
+  /// Perform reduction(s) update given a single input array's element
+  /// identified by \p array and \p oneBasedIndices coordinates.
+  /// \p currentValue specifies the current value(s) of the reduction(s)
+  /// inside the reduction loop body.
+  llvm::SmallVector<mlir::Value>
+  reduceOneElement(const llvm::SmallVectorImpl<mlir::Value> &currentValue,
+                   hlfir::Entity array, mlir::ValueRange oneBasedIndices) {
+    llvm_unreachable("derived type must provide reduceOneElement()");
+  }
+
+  /// Given reduction value(s) in \p reductionResults produced
+  /// by the reduction loop, apply any required updates and return
+  /// new reduction value(s) to be used after the reduction loop
+  /// (e.g. as the result yield of the wrapping hlfir.elemental).
+  /// NOTE: if the reduction loop is wrapped in hlfir.elemental,
+  /// the insertion point of any generated code is inside hlfir.elemental.
+  hlfir::Entity
+  genFinalResult(const llvm::SmallVectorImpl<mlir::Value> &reductionResults) {
+    llvm_unreachable("derived type must provide genFinalResult()");
+  }
+
+  // Methods below may be shadowed by the derived type.
+
+  /// Return mlir::success(), if the operation can be converted.
+  /// The default implementation always returns mlir::success().
+  /// The derived type may shadow the default implementation
+  /// with its own definition.
+  mlir::LogicalResult isConvertible() const { return mlir::success(); }
+
+  // Default implementation of isTotalReduction() just checks
+  // if the result of the operation is a scalar.
+  // True result indicates that the reduction has to be done
+  // across all elements, false result indicates that
+  // the result is an array expression produced by an hlfir.elemental
+  // operation with a single reduction loop across the DIM dimension.
+  //
+  // MAXLOC/MINLOC must override this.
+  bool isTotalReduction() const { return getResultRank() == 0; }
+
+  // Return true, if the reduction loop[-nest] may be unordered.
+  // In general, FP reductions may only be unordered when
+  // FastMathFlags::reassoc transformations are allowed.
+  //
+  // Some dervied types may need to override this.
+  bool isUnordered() const {
+    mlir::Type elemType = getSourceElementType();
+    if (mlir::isa<mlir::IntegerType, fir::LogicalType, fir::CharacterType>(
+            elemType))
+      return true;
+    return static_cast<bool>(impl().getFastMath() &
+                             mlir::arith::FastMathFlags::reassoc);
+  }
+
+  // Methods below are utilities that are not supposed to be
+  // overridden by the derived type.
+
+  /// Return 0, if DIM is not present or its values does not matter
+  /// (for example, a reduction of 1D array does not care about
+  /// the DIM value, assuming that it is a valid program).
+  /// Return mlir::failure(), if DIM is a constant known
+  /// to be invalid for the given array.
+  /// Otherwise, return DIM constant value.
+  mlir::FailureOr<int64_t> getConstDim() const {
     int64_t dimVal = 0;
-    if (!isTotalReduction) {
+    if (!impl().isTotalReduction()) {
       // In case of partial reduction we should ignore the operations
       // with invalid DIM values. They may appear in dead code
       // after constant propagation.
-      auto constDim = fir::getIntIfConstant(dim);
+      auto constDim = fir::getIntIfConstant(impl().getDim());
       if (!constDim)
-        return rewriter.notifyMatchFailure(sum, "Nonconstant DIM for SUM");
+        return rewriter.notifyMatchFailure(op, "Nonconstant DIM");
       dimVal = *constDim;
 
-      if ((dimVal <= 0 || dimVal > array.getRank()))
-        return rewriter.notifyMatchFailure(
-            sum, "Invalid DIM for partial SUM reduction");
+      if ((dimVal <= 0 || dimVal > getSourceRank()))
+        return rewriter.notifyMatchFailure(op,
+                                           "Invalid DIM for partial reduction");
     }
+    return dimVal;
+  }
 
-    mlir::Location loc = sum.getLoc();
-    fir::FirOpBuilder builder{rewriter, sum.getOperation()};
-    mlir::Type elementType = hlfir::getFortranElementType(sum.getType());
-    mlir::Value mask = sum.getMask();
+  /// Return hlfir::Entity of the result.
+  hlfir::Entity getResultEntity() const {
+    return hlfir::Entity{op->getResult(0)};
+  }
 
-    mlir::Value resultShape, dimExtent;
-    llvm::SmallVector<mlir::Value> arrayExtents;
-    if (isTotalReduction)
-      arrayExtents = hlfir::genExtentsVector(loc, builder, array);
-    else
-      std::tie(resultShape, dimExtent) =
-          genResultShapeForPartialReduction(loc, builder, array, dimVal);
-
-    // If the mask is present and is a scalar, then we'd better load its value
-    // outside of the reduction loop making the loop unswitching easier.
-    mlir::Value isPresentPred, maskValue;
-    if (mask) {
-      if (mlir::isa<fir::BaseBoxType>(mask.getType())) {
-        // MASK represented by a box might be dynamically optional,
-        // so we have to check for its presence before accessing it.
-        isPresentPred =
-            builder.create<fir::IsPresentOp>(loc, builder.getI1Type(), mask);
-      }
+  /// Return type of the result (e.g. !hlfir.expr<?xi32>).
+  mlir::Type getResultType() const { return getResultEntity().getType(); }
 
-      if (hlfir::Entity{mask}.isScalar())
-        maskValue = genMaskValue(loc, builder, mask, isPresentPred, {});
-    }
+  /// Return the element type of the result (e.g. i32).
+  mlir::Type getResultElementType() const {
+    return hlfir::getFortranElementType(getResultType());
+  }
 
-    auto genKernel = [&](mlir::Location loc, fir::FirOpBuilder &builder,
-                         mlir::ValueRange inputIndices) -> hlfir::Entity {
-      // Loop over all indices in the DIM dimension, and reduce all values.
-      // If DIM is not present, do total reduction.
-
-      // Initial value for the reduction.
-      mlir::Value reductionInitValue =
-          fir::factory::createZeroValue(builder, loc, elementType);
-
-      // The reduction loop may be unordered if FastMathFlags::reassoc
-      // transformations are allowed. The integer reduction is always
-      // unordered.
-      bool isUnordered = mlir::isa<mlir::IntegerType>(elementType) ||
-                         static_cast<bool>(sum.getFastmath() &
-                                           mlir::arith::FastMathFlags::reassoc);
+  /// Return rank of the result.
+  unsigned getResultRank() const { return getResultEntity().getRank(); }
 
-      llvm::SmallVector<mlir::Value> extents;
-      if (isTotalReduction)
-        extents = arrayExtents;
-      else
-        extents.push_back(
-            builder.createConvert(loc, builder.getIndexType(), dimExtent));
+  /// Return the element type of the result.
+  mlir::Type getSourceElementType() const {
+    return hlfir::getFortranElementType(impl().getSource().getType());
+  }
 
-      auto genBody = [&](mlir::Location loc, fir::FirOpBuilder &builder,
-                         mlir::ValueRange oneBasedIndices,
-                         mlir::ValueRange reductionArgs)
-          -> llvm::SmallVector<mlir::Value, 1> {
-        // Generate the reduction loop-nest body.
-        // The initial reduction value in the innermost loop
-        // is passed via reductionArgs[0].
-        llvm::SmallVector<mlir::Value> indices;
-        if (isTotalReduction) {
-          indices = oneBasedIndices;
-        } else {
-          indices = inputIndices;
-          indices.insert(indices.begin() + dimVal - 1, oneBasedIndices[0]);
-        }
+  /// Return rank of the input array.
+  unsigned getSourceRank() const {
+    return hlfir::Entity{impl().getSource()}.getRank();
+  }
 
-        mlir::Value reductionValue = reductionArgs[0];
-        fir::IfOp ifOp;
-        if (mask) {
-          // Make the reduction value update conditional on the value
-          // of the mask.
-          if (!maskValue) {
-            // If the mask is an array, use the elemental and the loop indices
-            // to address the proper mask element.
-            maskValue =
-                genMaskValue(loc, builder, mask, isPresentPred, indices);
-          }
-          mlir::Value isUnmasked = builder.create<fir::ConvertOp>(
-              loc, builder.getI1Type(), maskValue);
-          ifOp = builder.create<fir::IfOp>(loc, elementType, isUnmasked,
-                                           /*withElseRegion=*/true);
-          // In the 'else' block return the current reduction value.
-          builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
-          builder.create<fir::ResultOp>(loc, reductionValue);
-
-          // In the 'then' block do the actual addition.
-          builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
-        }
+  /// The reduction operation.
+  mlir::Operation *op;
 
-        hlfir::Entity element =
-            hlfir::getElementAt(loc, builder, array, indices);
-        hlfir::Entity elementValue =
-            hlfir::loadTrivialScalar(loc, builder, element);
-        // NOTE: we can use "Kahan summation" same way as the runtime
-        // (e.g. when fast-math is not allowed), but let's start with
-        // the simple version.
-        reductionValue =
-            genScalarAdd(loc, builder, reductionValue, elementValue);
-
-        if (ifOp) {
-          builder.create<fir::ResultOp>(loc, reductionValue);
-          builder.setInsertionPointAfter(ifOp);
-          reductionValue = ifOp.getResult(0);
-        }
+  mlir::PatternRewriter &rewriter;
+  mlir::Location loc;
+  fir::FirOpBuilder builder;
+};
 
-        return {reductionValue};
-      };
+/// Generate initialization value for MIN or MAX reduction
+/// of the given \p type.
+template <bool IS_MAX>
+static mlir::Value genMinMaxInitValue(mlir::Location loc,
+                                      fir::FirOpBuilder &builder,
+                                      mlir::Type type) {
+  if (auto ty = mlir::dyn_cast<mlir::FloatType>(type)) {
+    const llvm::fltSemantics &sem = ty.getFloatSemantics();
+    // We must not use +/-INF here. If the reduction input is empty,
+    // the result of reduction must be +/-LARGEST.
+    llvm::APFloat limit = llvm::APFloat::getLargest(sem, /*Negative=*/IS_MAX);
+    return builder.createRealConstant(loc, type, limit);
+  }
+  unsigned bits = type.getIntOrFloatBitWidth();
+  int64_t limitInt = IS_MAX
+                         ? llvm::APInt::getSignedMinValue(bits).getSExtValue()
+                         : llvm::APInt::getSignedMaxValue(bits).getSExtValue();
+  return builder.createIntegerConstant(loc, type, limitInt);
+}
+
+/// Generate a comparison of an array element value \p elem
+/// and the current reduction value \p reduction for MIN/MAX reduction.
+template <bool IS_MAX>
+static mlir::Value
+genMinMaxComparison(mlir::Location loc, fir::FirOpBuilder &builder,
+                    mlir::Value elem, mlir::Value reduction) {
+  if (mlir::isa<mlir::FloatType>(reduction.getType())) {
+    // For FP reductions we want the first smallest value to be used, that
+    // is not NaN. A OGL/OLT condition will usually work for this unless all
+    // the values are Nan or Inf. This follows the same logic as
+    // NumericCompare for Minloc/Maxloc in extrema.cpp.
+    mlir::Value cmp = builder.create<mlir::arith::CmpFOp>(
+        loc,
+        IS_MAX ? mlir::arith::CmpFPredicate::OGT
+               : mlir::arith::CmpFPredicate::OLT,
+        elem, reduction);
+    mlir::Value cmpNan = builder.create<mlir::arith::CmpFOp>(
+        loc, mlir::arith::CmpFPredicate::UNE, reduction, reduction);
+    mlir::Value cmpNan2 = builder.create<mlir::arith::CmpFOp>(
+        loc, mlir::arith::CmpFPredicate::OEQ, elem, elem);
+    cmpNan = builder.create<mlir::arith::AndIOp>(loc, cmpNan, cmpNan2);
+    return builder.create<mlir::arith::OrIOp>(loc, cmp, cmpNan);
+  } else if (mlir::isa<mlir::IntegerType>(reduction.getType())) {
+    return builder.create<mlir::arith::CmpIOp>(
+        loc,
+        IS_MAX ? mlir::arith::CmpIPredicate::sgt
+               : mlir::arith::CmpIPredicate::slt,
+        elem, reduction);
+  }
+  llvm_unreachable("unsupported type");
+}
+
+/// Implementation of ReductionAsElementalConverter interface
+/// for MAXLOC/MINLOC.
+template <typename T>
+class MinMaxlocAsElementalConverter
+    : public ReductionAsElementalConverter<MinMaxlocAsElementalConverter<T>> {
+  static_assert(std::is_same_v<T, hlfir::MaxlocOp> ||
+                std::is_same_v<T, hlfir::MinlocOp>);
+  static constexpr unsigned maxRank = Fortran::common::maxRank;
+  // We have the following reduction values in the reduction loop:
+  //   * N integer coordinates, where N is:
+  //     - RANK(ARRAY) for total reductions.
+  //     - 1 for partial reductions.
+  //   * 1 reduction value holding the current MIN/MAX.
+  //   * 1 boolean indicating whether it is the first time
+  //     the mask is true.
+  static constexpr unsigned maxNumReductions = Fortran::common::maxRank + 2;
+  static constexpr bool isMax = std::is_same_v<T, hlfir::MaxlocOp>;
+  using Base =
+      typename MinMaxlocAsElementalConverter<T>::ReductionAsElementalConverter;
 
-      llvm::SmallVector<mlir::Value, 1> reductionFinalValues =
-          hlfir::genLoopNestWithReductions(loc, builder, extents,
-                                           {reductionInitValue}, genBody,
-                                           isUnordered);
-      return hlfir::Entity{reductionFinalValues[0]};
-    };
+public:
+  MinMaxlocAsElementalConverter(T op, mlir::PatternRewriter &rewriter)
+      : Base{op.getOperation(), rewriter} {}
+
+  mlir::Value getSource() const { return getOp().getArray(); }
+  mlir::Value getDim() const { return getOp().getDim(); }
+  mlir::Value getMask() const { return getOp().getMask(); }
+  mlir::arith::FastMathFlags getFastMath() const {
+    return getOp().getFastmath();
+  }
 
-    if (isTotalReduction) {
-      hlfir::Entity result = genKernel(loc, builder, mlir::ValueRange{});
-      rewriter.replaceOp(sum, result);
-      return mlir::success();
+  mlir::LogicalResult isConvertible() const {
+    if (getOp().getBack())
+      return this->rewriter.notifyMatchFailure(
+          getOp(), "BACK is not supported for MINLOC/MAXLOC inlining");
+    if (mlir::isa<fir::CharacterType>(this->getSourceElementType()))
+      return this->rewriter.notifyMatchFailure(
+          getOp(),
+          "CHARACTER type is not supported for MINLOC/MAXLOC inlining");
+    return mlir::success();
+  }
+
+  // If the result is scalar, then DIM does not matter,
+  // and this is a total reduction.
+  // If DIM is not present, this is a total reduction.
+  bool isTotalReduction() const {
+    return this->getResultRank() == 0 || !getDim();
+  }
+
+  llvm::SmallVector<mlir::Value, maxNumReductions> genReductionInitValues();
+  llvm::SmallVector<mlir::Value, maxNumReductions>
+  reduceOneElement(const llvm::SmallVectorImpl<mlir::Value> &currentValue,
+                   hlfir::Entity array, mlir::ValueRange oneBasedIndices);
+  hlfir::Entity
+  genFinalResult(const llvm::SmallVectorImpl<mlir::Value> &reductionResults);
+
+private:
+  T getOp() const { return mlir::cast<T>(this->op); }
+
+  unsigned getNumCoors() const {
+    return isTotalReduction() ? this->getSourceRank() : 1;
+  }
+
+  void
+  checkReductions(const llvm::SmallVectorImpl<mlir::Value> &reductions) const {
+    assert(reductions.size() == getNumCoors() + 2 &&
+           "invalid number of reductions for MINLOC/MAXLOC");
+  }
+
+  mlir::Value
+  getCurrentMinMax(const llvm::SmallVectorImpl<mlir::Value> &reductions) const {
+    checkReductions(reductions);
+    return reductions[getNumCoors()];
+  }
+
+  mlir::Value
+  getIsFirst(const llvm::SmallVectorImpl<mlir::Value> &reductions) const {
+    checkReductions(reductions);
+    return reductions[getNumCoors() + 1];
+  }
+};
+
+template <typename T>
+llvm::SmallVector<mlir::Value,
+                  MinMaxlocAsElementalConverter<T>::maxNumReductions>
+MinMaxlocAsElementalConverter<T>::genReductionInitValues() {
+  fir::FirOpBuilder &builder = this->builder;
+  mlir::Location loc = this->loc;
+  // Initial value for the coordinate(s) is zero.
+  mlir::Value zeroCoor =
+      fir::factory::createZeroValue(builder, loc, this->getResultElementType());
+  llvm::SmallVector<mlir::Value, maxNumReductions> result(getNumCoors(),
+                                                          zeroCoor);
+
+  // Initial value for the MIN/MAX value.
+  mlir::Value minMaxInit =
+      genMinMaxInitValue<isMax>(loc, builder, this->getSourceElementType());
+  result.push_back(minMaxInit);
+
+  // Initial value for isFirst predicate. It is switched to false,
+  // when the reduction update dynamically happens inside the reduction
+  // loop.
+  mlir::Value trueVal = builder.createBool(loc, true);
+  result.push_back(trueVal);
+
+  return result;
+}
+
+template <typename T>
+llvm::SmallVector<mlir::Value,
+                  MinMaxlocAsElementalConverter<T>::maxNumReductions>
+MinMaxlocAsElementalConverter<T>::reduceOneElement(
+    const llvm::SmallVectorImpl<mlir::Value> &currentValue, hlfir::Entity array,
+    mlir::ValueRange oneBasedIndices) {
+  checkReductions(currentValue);
+  fir::FirOpBuilder &builder = this->builder;
+  mlir::Location loc = this->loc;
+  hlfir::Entity elementValue =
+      hlfir::loadElementAt(loc, builder, array, oneBasedIndices);
+  mlir::Value cmp = genMinMaxComparison<isMax>(loc, builder, elementValue,
+                                               getCurrentMinMax(currentValue));
+  // If isFirst is true, then do the reduction update regardless
+  // of the FP comparison.
+  cmp = builder.create<mlir::arith::OrIOp>(loc, cmp, getIsFirst(currentValue));
+
+  llvm::SmallVector<mlir::Value, maxNumReductions> newIndices;
+  for (unsigned coorIdx = 0; coorIdx < getNumCoors(); ++coorIdx) {
+    mlir::Value currentCoor = currentValue[coorIdx];
+    mlir::Value newCoor = builder.createConvert(loc, currentCoor.getType(),
+                                                oneBasedIndices[coorIdx]);
+    mlir::Value update =
+        builder.create<mlir::arith::SelectOp>(loc, cmp, newCoor, currentCoor);
+    newIndices.push_back(update);
+  }
+
+  mlir::Value newMinMax = builder.create<mlir::arith::SelectOp>(
+      loc, cmp, elementValue, getCurrentMinMax(currentValue));
+  newIndices.push_back(newMinMax);
+
+  mlir::Value newIsFirst = builder.createBool(loc, false);
+  newIndices.push_back(newIsFirst);
+
+  assert(currentValue.size() == newIndices.size() &&
+         "invalid number of updated reductions");
+
+  return newIndices;
+}
+
+template <typename T>
+hlfir::Entity MinMaxlocAsElementalConverter<T>::genFinalResult(
+    const llvm::SmallVectorImpl<mlir::Value> &reductionResults) {
+  // Identification of the final result of MINLOC/MAXLOC:
+  //   * If DIM is absent, the result is rank-one array.
+  //   * If DIM is present:
+  //     - The result is scalar for rank-one input.
+  //     - The result is an array of rank RANK(ARRAY)-1.
+  checkReductions(reductionResults);
+
+  fir::FirOpBuilder &builder = this->builder;
+  mlir::Location loc = this->loc;
+  // We need to adjust the one-based indices to real array indices.
+  // The adjustment must only be done, if there was an actual update
+  // of the coordinates in the reduction loop. For this check we only
+  // need to compare if any of the reduction results is not zero.
+  mlir::Value zero = fir::factory::createZeroValue(
+      builder, loc, reductionResults[0].getType());
+  mlir::Value doAdjust = builder.create<mlir::arith::CmpIOp>(
+      loc, mlir::arith::CmpIPredicate::ne, reductionResults[0], zero);
+  mlir::Type indexType = builder.getIndexType();
+  mlir::Value one = builder.createIntegerConstant(loc, indexType, 1);
+
+  auto adjustCoor = [&](mlir::Value coor, mlir::Value lbound) {
+    assert(mlir::isa<mlir::IndexType>(lbound.getType()));
+    mlir::Value coorAsIndex = builder.createConvert(loc, indexType, coor);
+    mlir::Value tmp =
+        builder.create<mlir::arith::AddIOp>(loc, coorAsIndex, lbound);
+    tmp = builder.create<mlir::arith::SubIOp>(loc, tmp, one);
+    tmp =
+        builder.create<mlir::arith::SelectOp>(loc, doAdjust, tmp, coorAsIndex);
+    return builder.createConvert(loc, coor.getType(), tmp);
+  };
+
+  // For partial reductions, the final result of the reduction
+  // loop is just a scalar - the coordinate within DIM dimension.
+  if (this->getResultRank() == 0 || !isTotalReduction()) {
+    // The result is a scalar, so just return the scalar.
+    assert(getNumCoors() == 1 &&
+           "unpexpected number of coordinates for scalar result");
+
+    int64_t dim = 1;
+    if (!isTotalReduction()) {
+      auto dimVal = this->getConstDim();
+      assert(mlir::succeeded(dimVal) &&
+             "partial MINLOC/MAXLOC reduction with invalid DIM");
+      dim = *dimVal;
     }
+    mlir::Value dimLbound =
+        hlfir::genLBound(loc, builder, hlfir::Entity{getSource()}, dim - 1);
+    return hlfir::Entity{adjustCoor(reductionResults[0], dimLbound)};
+  }
+  // This is a total reduction, and there is no wrapping hlfir.elemental.
+  // We have to pack the reduced coordinates into a rank-one array.
+  unsigned rank = this->getSourceRank();
+  // TODO: in order to avoid introducing new memory effects
+  // we should not use a temporary in memory.
+  // We can use hlfir.elemental with a switch to pack all the coordinates
+  // into an array expression, or we can have a dedicated HLFIR operation
+  // for this.
+  mlir::Value tempArray = builder.createTemporary(
+      loc, fir::SequenceType::get(rank, this->getResultElementType()));
+  llvm::SmallVector<mlir::Value, maxRank> arrayLbounds =
+      hlfir::genLBounds(loc, builder, hlfir::Entity(getSource()));
+  for (unsigned i = 0; i < rank; ++i) {
+    mlir::Value coor = adjustCoor(reductionResults[i], arrayLbounds[i]);
+    mlir::Value idx = builder.createIntegerConstant(loc, indexType, i + 1);
+    mlir::Value resultElement =
+        hlfir::getElementAt(loc, builder, hlfir::Entity{tempArray}, {idx});
+    builder.create<hlfir::AssignOp>(loc, coor, resultElement);
+  }
+  mlir::Value tempExpr = builder.create<hlfir::AsExprOp>(
+      loc, tempArray, builder.createBool(loc, false));
+  return hlfir::Entity{tempExpr};
+}
+
+/// Base class for numeric reductions like MAXVAl, MINVAL, SUM.
+template <typename OpT, typename ConverterT>
+class NumericReductionAsElementalConverterBase
+    : public ReductionAsElementalConverter<ConverterT> {
+  using Base = typename NumericReductionAsElementalConverterBase<
+      OpT, ConverterT>::ReductionAsElementalConverter;
 
-    hlfir::ElementalOp elementalOp = hlfir::genElementalOp(
-        loc, builder, elementType, resultShape, {}, genKernel,
-        /*isUnordered=*/true, /*polymorphicMold=*/nullptr,
-        sum.getResult().getType());
+public:
+  NumericReductionAsElementalConverterBase(OpT op,
+                                           mlir::PatternRewriter &rewriter)
+      : Base{op.getOperation(), rewriter} {}
+
+  mlir::Value getSource() const { return getOp().getArray(); }
+  mlir::Value getDim() const { return getOp().getDim(); }
+  mlir::Value getMask() const { return getOp().getMask(); }
+  mlir::arith::FastMathFlags getFastMath() const {
+    return getOp().getFastmath();
+  }
 
-    // it wouldn't be safe to replace block arguments with a different
-    // hlfir.expr type. Types can differ due to differing amounts of shape
-    // information
-    assert(elementalOp.getResult().getType() == sum.getResult().getType());
+  hlfir::Entity
+  genFinalResult(const llvm::SmallVectorImpl<mlir::Value> &reductionResults) {
+    checkReductions(reductionResults);
+    return hlfir::Entity{reductionResults[0]};
+  }
 
-    rewriter.replaceOp(sum, elementalOp);
+protected:
+  OpT getOp() const { return mlir::cast<OpT>(this->op); }
+
+  void checkReductions(const llvm::SmallVectorImpl<mlir::Value> &reductions) {
+    assert(reductions.size() == 1 && "reduction must produce single value");
+  }
+};
+
+/// Reduction converter for MAXMAL/MINVAL.
+template <typename T>
+class MinMaxvalAsElementalConverter
+    : public NumericReductionAsElementalConverterBase<
+          T, MinMaxvalAsElementalConverter<T>> {
+  static_assert(std::is_same_v<T, hlfir::MaxvalOp> ||
+                std::is_same_v<T, hlfir::MinvalOp>);
+  static constexpr bool isMax = std::is_same_v<T, hlfir::MaxvalOp>;
+  using Base = typename MinMaxvalAsElementalConverter<
+      T>::NumericReductionAsElementalConverterBase;
+
+public:
+  MinMaxvalAsElementalConverter(T op, mlir::PatternRewriter &rewriter)
+      : Base{op, rewriter} {}
+
+  mlir::LogicalResult isConvertible() const {
+    if (mlir::isa<fir::CharacterType>(this->getSourceElementType()))
+      return this->rewriter.notifyMatchFailure(
+          this->getOp(),
+          "CHARACTER type is not supported for MINVAL/MAXVAL inlining");
     return mlir::success();
   }
 
-private:
-  // Return fir.shape specifying the shape of the result
-  // of a SUM reduction with DIM=dimVal. The second return value
-  // is the extent of the DIM dimension.
-  static std::tuple<mlir::Value, mlir::Value>
-  genResultShapeForPartialReduction(mlir::Location loc,
-                                    fir::FirOpBuilder &builder,
-                                    hlfir::Entity array, int64_t dimVal) {
-    llvm::SmallVector<mlir::Value> inExtents =
-        hlfir::genExtentsVector(loc, builder, array);
-    assert(dimVal > 0 && dimVal <= static_cast<int64_t>(inExtents.size()) &&
-           "DIM must be present and a positive constant not exceeding "
-           "the array's rank");
+  llvm::SmallVector<mlir::Value, 1> genReductionInitValues() {
+    return {genMinMaxInitValue<isMax>(this->loc, this->builder,
+                                      this->getResultElementType())};
+  }
+  llvm::SmallVector<mlir::Value, 1>
+  reduceOneElement(const llvm::SmallVectorImpl<mlir::Value> &currentValue,
+                   hlfir::Entity array, mlir::ValueRange oneBasedIndices) {
+    this->checkReductions(currentValue);
+    fir::FirOpBuilder &builder = this->builder;
+    mlir::Location loc = this->loc;
+    hlfir::Entity elementValue =
+        hlfir::loadElementAt(loc, builder, array, oneBasedIndices);
+    mlir::Value cmp =
+        genMinMaxComparison<isMax>(loc, builder, elementValue, currentValue[0]);
+    return {builder.create<mlir::arith::SelectOp>(loc, cmp, elementValue,
+                                                  currentValue[0])};
+  }
+};
+
+/// Reduction converter for SUM.
+class SumAsElementalConverter
+    : public NumericReductionAsElementalConverterBase<hlfir::SumOp,
+                                                      SumAsElementalConverter> {
+  using Base = typename SumAsElementalConverter::
+      NumericReductionAsElementalConverterBase;
+
+public:
+  SumAsElementalConverter(hlfir::SumOp op, mlir::PatternRewriter &rewriter)
+      : Base{op, rewriter} {}
 
-    mlir::Value dimExtent = inExtents[dimVal - 1];
-    inExtents.erase(inExtents.begin() + dimVal - 1);
-    return {builder.create<fir::ShapeOp>(loc, inExtents), dimExtent};
+  llvm::SmallVector<mlir::Value, 1> genReductionInitValues() {
+    return {
+        fir::factory::createZeroValue(builder, loc, getResultElementType())};
+  }
+  llvm::SmallVector<mlir::Value, 1>
+  reduceOneElement(const llvm::SmallVectorImpl<mlir::Value> &currentValue,
+                   hlfir::Entity array, mlir::ValueRange oneBasedIndices) {
+    checkReductions(currentValue);
+    hlfir::Entity elementValue =
+        hlfir::loadElementAt(loc, builder, array, oneBasedIndices);
+    // NOTE: we can use "Kahan summation" same way as the runtime
+    // (e.g. when fast-math is not allowed), but let's start with
+    // the simple version.
+    return {genScalarAdd(currentValue[0], elementValue)};
   }
 
+private:
   // Generate scalar addition of the two values (of the same data type).
-  static mlir::Value genScalarAdd(mlir::Location loc,
-                                  fir::FirOpBuilder &builder,
-                                  mlir::Value value1, mlir::Value value2) {
-    mlir::Type ty = value1.getType();
-    assert(ty == value2.getType() && "reduction values' types do not match");
-    if (mlir::isa<mlir::FloatType>(ty))
-      return builder.create<mlir::arith::AddFOp>(loc, value1, value2);
-    else if (mlir::isa<mlir::ComplexType>(ty))
-      return builder.create<fir::AddcOp>(loc, value1, value2);
-    else if (mlir::isa<mlir::IntegerType>(ty))
-      return builder.create<mlir::arith::AddIOp>(loc, value1, value2);
-
-    llvm_unreachable("unsupported SUM reduction type");
-  }
-
-  static mlir::Value genMaskValue(mlir::Location loc,
-                                  fir::FirOpBuilder &builder, mlir::Value mask,
-                                  mlir::Value isPresentPred,
-                                  mlir::ValueRange indices) {
-    mlir::OpBuilder::InsertionGuard guard(builder);
-    fir::IfOp ifOp;
-    mlir::Type maskType =
-        hlfir::getFortranElementType(fir::unwrapPassByRefType(mask.getType()));
-    if (isPresentPred) {
-      ifOp = builder.create<fir::IfOp>(loc, maskType, isPresentPred,
-                                       /*withElseRegion=*/true);
-
-      // Use 'true', if the mask is not present.
-      builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
-      mlir::Value trueValue = builder.createBool(loc, true);
-      trueValue = builder.createConvert(loc, maskType, trueValue);
-      builder.create<fir::ResultOp>(loc, trueValue);
-
-      // Load the mask value, if the mask is present.
-      builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
+  mlir::Value genScalarAdd(mlir::Value value1, mlir::Value value2);
+};
+
+/// Base class for logical reductions like ALL, ANY, COUNT.
+/// They do not have MASK and FastMathFlags.
+template <typename OpT, typename ConverterT>
+class LogicalReductionAsElementalConverterBase
+    : public ReductionAsElementalConverter<ConverterT> {
+  using Base = typename LogicalReductionAsElementalConverterBase<
+      OpT, ConverterT>::ReductionAsElementalConverter;
+
+public:
+  LogicalReductionAsElementalConverterBase(OpT op,
+                                           mlir::PatternRewriter &rewriter)
+      : Base{op.getOperation(), rewriter} {}
+
+  OpT getOp() const { return mlir::cast<OpT>(this->op); }
+
+  void checkReductions(const llvm::SmallVectorImpl<mlir::Value> &reductions) {
+    assert(reductions.size() == 1 && "reduction must produce single value");
+  }
+
+  mlir::Value getSource() const { return getOp().getMask(); }
+  mlir::Value getDim() const { return getOp().getDim(); }
+  mlir::Value getMask() const { return nullptr; }
+  mlir::arith::FastMathFlags getFastMath() const {
+    return mlir::arith::FastMathFlags::none;
+  }
+
+  hlfir::Entity
+  genFinalResult(const llvm::SmallVectorImpl<mlir::Value> &reductionResults) {
+    checkReductions(reductionResults);
+    return hlfir::Entity{reductionResults[0]};
+  }
+};
+
+/// Reduction converter for ALL/ANY.
+template <typename T>
+class AllAnyAsElementalConverter
+    : public LogicalReductionAsElementalConverterBase<
+          T, AllAnyAsElementalConverter<T>> {
+  static_assert(std::is_same_v<T, hlfir::AllOp> ||
+                std::is_same_v<T, hlfir::AnyOp>);
+  static constexpr bool isAll = std::is_same_v<T, hlfir::AllOp>;
+  using Base = typename AllAnyAsElementalConverter<
+      T>::LogicalReductionAsElementalConverterBase;
+
+public:
+  AllAnyAsElementalConverter(T op, mlir::PatternRewriter &rewriter)
+      : Base{op, rewriter} {}
+
+  llvm::SmallVector<mlir::Value, 1> genReductionInitValues() {
+    return {this->builder.createBool(this->loc, isAll ? true : false)};
+  }
+  llvm::SmallVector<mlir::Value, 1>
+  reduceOneElement(const llvm::SmallVectorImpl<mlir::Value> &currentValue,
+                   hlfir::Entity array, mlir::ValueRange oneBasedIndices) {
+    this->checkReductions(currentValue);
+    fir::FirOpBuilder &builder = this->builder;
+    mlir::Location loc = this->loc;
+    hlfir::Entity elementValue =
+        hlfir::loadElementAt(loc, builder, array, oneBasedIndices);
+    mlir::Value mask =
+        builder.createConvert(loc, builder.getI1Type(), elementValue);
+    if constexpr (isAll)
+      return {builder.create<mlir::arith::AndIOp>(loc, mask, currentValue[0])};
+    else
+      return {builder.create<mlir::arith::OrIOp>(loc, mask, currentValue[0])};
+  }
+
+  hlfir::Entity
+  genFinalResult(const llvm::SmallVectorImpl<mlir::Value> &reductionValues) {
+    this->checkReductions(reductionValues);
+    return hlfir::Entity{this->builder.createConvert(
+        this->loc, this->getResultElementType(), reductionValues[0])};
+  }
+};
+
+/// Reduction converter for COUNT.
+class CountAsElementalConverter
+    : public LogicalReductionAsElementalConverterBase<
+          hlfir::CountOp, CountAsElementalConverter> {
+  using Base = typename CountAsElementalConverter::
+      LogicalReductionAsElementalConverterBase;
+
+public:
+  CountAsElementalConverter(hlfir::CountOp op, mlir::PatternRewriter &rewriter)
+      : Base{op, rewriter} {}
+
+  llvm::SmallVector<mlir::Value, 1> genReductionInitValues() {
+    return {
+        fir::factory::createZeroValue(builder, loc, getResultElementType())};
+  }
+  llvm::SmallVector<mlir::Value, 1>
+  reduceOneElement(const llvm::SmallVectorImpl<mlir::Value> &currentValue,
+                   hlfir::Entity array, mlir::ValueRange oneBasedIndices) {
+    checkReductions(currentValue);
+    hlfir::Entity elementValue =
+        hlfir::loadElementAt(loc, builder, array, oneBasedIndices);
+    mlir::Value cond =
+        builder.createConvert(loc, builder.getI1Type(), elementValue);
+    mlir::Value one =
+        builder.createIntegerConstant(loc, getResultElementType(), 1);
+    mlir::Value add1 =
+        builder.create<mlir::arith::AddIOp>(loc, currentValue[0], one);
+    return {builder.create<mlir::arith::SelectOp>(loc, cond, add1,
+                                                  currentValue[0])};
+  }
+};
+
+template <typename T>
+mlir::LogicalResult ReductionAsElementalConverter<T>::convert() {
+  mlir::LogicalResult canConvert(impl().isConvertible());
+
+  if (mlir::failed(canConvert))
+    return canConvert;
+
+  hlfir::Entity array = hlfir::Entity{impl().getSource()};
+  bool isTotalReduction = impl().isTotalReduction();
+  auto dimVal = impl().getConstDim();
+  if (mlir::failed(dimVal))
+    return dimVal;
+  mlir::Value mask = impl().getMask();
+  mlir::Value resultShape, dimExtent;
+  llvm::SmallVector<mlir::Value> arrayExtents;
+  if (isTotalReduction)
+    arrayExtents = hlfir::genExtentsVector(loc, builder, array);
+  else
+    std::tie(resultShape, dimExtent) =
+        genResultShapeForPartialReduction(array, *dimVal);
+
+  // If the mask is present and is a scalar, then we'd better load its value
+  // outside of the reduction loop making the loop unswitching easier.
+  mlir::Value isPresentPred, maskValue;
+  if (mask) {
+    if (mlir::isa<fir::BaseBoxType>(mask.getType())) {
+      // MASK represented by a box might be dynamically optional,
+      // so we have to check for its presence before accessing it.
+      isPresentPred =
+          builder.create<fir::IsPresentOp>(loc, builder.getI1Type(), mask);
     }
 
-    hlfir::Entity maskVar{mask};
-    if (maskVar.isScalar()) {
-      if (mlir::isa<fir::BaseBoxType>(mask.getType())) {
-        // MASK may be a boxed scalar.
-        mlir::Value addr = hlfir::genVariableRawAddress(loc, builder, maskVar);
-        mask = builder.create<fir::LoadOp>(loc, hlfir::Entity{addr});
+    if (hlfir::Entity{mask}.isScalar())
+      maskValue = genMaskValue(mask, isPresentPred, {});
+  }
+
+  auto genKernel = [&](mlir::Location loc, fir::FirOpBuilder &builder,
+                       mlir::ValueRange inputIndices) -> hlfir::Entity {
+    // Loop over all indices in the DIM dimension, and reduce all values.
+    // If DIM is not present, do total reduction.
+
+    // Initial value for the reduction.
+    llvm::SmallVector<mlir::Value, 1> reductionInitValues =
+        impl().genReductionInitValues();
+
+    llvm::SmallVector<mlir::Value> extents;
+    if (isTotalReduction)
+      extents = arrayExtents;
+    else
+      extents.push_back(
+          builder.createConvert(loc, builder.getIndexType(), dimExtent));
+
+    auto genBody = [&](mlir::Location loc, fir::FirOpBuilder &builder,
+                       mlir::ValueRange oneBasedIndices,
+                       mlir::ValueRange reductionArgs)
+        -> llvm::SmallVector<mlir::Value, 1> {
+      // Generate the reduction loop-nest body.
+      // The initial reduction value in the innermost loop
+      // is passed via reductionArgs[0].
+      llvm::SmallVector<mlir::Value> indices;
+      if (isTotalReduction) {
+        indices = oneBasedIndices;
       } else {
-        mask = hlfir::loadTrivialScalar(loc, builder, maskVar);
+        indices = inputIndices;
+        indices.insert(indices.begin() + *dimVal - 1, oneBasedIndices[0]);
       }
+
+      llvm::SmallVector<mlir::Value, 1> reductionValues = reductionArgs;
+      llvm::SmallVector<mlir::Type, 1> reductionTypes;
+      llvm::transform(reductionValues, std::back_inserter(reductionTypes),
+                      [](mlir::Value v) { return v.getType(); });
+      fir::IfOp ifOp;
+      if (mask) {
+        // Make the reduction value update conditional on the value
+        // of the mask.
+        if (!maskValue) {
+          // If the mask is an array, use the elemental and the loop indices
+          // to address the proper mask element.
+          maskValue = genMaskValue(mask, isPresentPred, indices);
+        }
+        mlir::Value isUnmasked =
+            builder.create<fir::ConvertOp>(loc, builder.getI1Type(), maskValue);
+        ifOp = builder.create<fir::IfOp>(loc, reductionTypes, isUnmasked,
+                                         /*withElseRegion=*/true);
+        // In the 'else' block return the current reduction value.
+        builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
+        builder.create<fir::ResultOp>(loc, reductionValues);
+
+        // In the 'then' block do the actual addition.
+        builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
+      }
+      reductionValues =
+          impl().reduceOneElement(reductionValues, array, indices);
+      if (ifOp) {
+        builder.create<fir::ResultOp>(loc, reductionValues);
+        builder.setInsertionPointAfter(ifOp);
+        reductionValues = ifOp.getResults();
+      }
+
+      return reductionValues;
+    };
+
+    llvm::SmallVector<mlir::Value, 1> reductionFinalValues =
+        hlfir::genLoopNestWithReductions(
+            loc, builder, extents, reductionInitValues, genBody, isUnordered());
+    return impl().genFinalResult(reductionFinalValues);
+  };
+
+  if (isTotalReduction) {
+    hlfir::Entity result = genKernel(loc, builder, mlir::ValueRange{});
+    rewriter.replaceOp(op, result);
+    return mlir::success();
+  }
+
+  hlfir::ElementalOp elementalOp =
+      hlfir::genElementalOp(loc, builder, impl().getResultElementType(),
+                            resultShape, /*typeParams=*/{}, genKernel,
+                            /*isUnordered=*/true, /*polymorphicMold=*/nullptr,
+                            impl().getResultType());
+
+  // it wouldn't be safe to replace block arguments with a different
+  // hlfir.expr type. Types can differ due to differing amounts of shape
+  // information
+  assert(elementalOp.getResult().getType() == op->getResult(0).getType());
+
+  rewriter.replaceOp(op, elementalOp);
+  return mlir::success();
+}
+
+template <typename T>
+std::tuple<mlir::Value, mlir::Value>
+ReductionAsElementalConverter<T>::genResultShapeForPartialReduction(
+    hlfir::Entity array, int64_t dimVal) {
+  llvm::SmallVector<mlir::Value> inExtents =
+      hlfir::genExtentsVector(loc, builder, array);
+  assert(dimVal > 0 && dimVal <= static_cast<int64_t>(inExtents.size()) &&
+         "DIM must be present and a positive constant not exceeding "
+         "the array's rank");
+
+  mlir::Value dimExtent = inExtents[dimVal - 1];
+  inExtents.erase(inExtents.begin() + dimVal - 1);
+  return {builder.create<fir::ShapeOp>(loc, inExtents), dimExtent};
+}
+
+mlir::Value SumAsElementalConverter::genScalarAdd(mlir::Value value1,
+                                                  mlir::Value value2) {
+  mlir::Type ty = value1.getType();
+  assert(ty == value2.getType() && "reduction values' types do not match");
+  if (mlir::isa<mlir::FloatType>(ty))
+    return builder.create<mlir::arith::AddFOp>(loc, value1, value2);
+  else if (mlir::isa<mlir::ComplexType>(ty))
+    return builder.create<fir::AddcOp>(loc, value1, value2);
+  else if (mlir::isa<mlir::IntegerType>(ty))
+    return builder.create<mlir::arith::AddIOp>(loc, value1, value2);
+
+  llvm_unreachable("unsupported SUM reduction type");
+}
+
+template <typename T>
+mlir::Value ReductionAsElementalConverter<T>::genMaskValue(
+    mlir::Value mask, mlir::Value isPresentPred, mlir::ValueRange indices) {
+  mlir::OpBuilder::InsertionGuard guard(builder);
+  fir::IfOp ifOp;
+  mlir::Type maskType =
+      hlfir::getFortranElementType(fir::unwrapPassByRefType(mask.getType()));
+  if (isPresentPred) {
+    ifOp = builder.create<fir::IfOp>(loc, maskType, isPresentPred,
+                                     /*withElseRegion=*/true);
+
+    // Use 'true', if the mask is not present.
+    builder.setInsertionPointToStart(&ifOp.getElseRegion().front());
+    mlir::Value trueValue = builder.createBool(loc, true);
+    trueValue = builder.createConvert(loc, maskType, trueValue);
+    builder.create<fir::ResultOp>(loc, trueValue);
+
+    // Load the mask value, if the mask is present.
+    builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
+  }
+
+  hlfir::Entity maskVar{mask};
+  if (maskVar.isScalar()) {
+    if (mlir::isa<fir::BaseBoxType>(mask.getType())) {
+      // MASK may be a boxed scalar.
+      mlir::Value addr = hlfir::genVariableRawAddress(loc, builder, maskVar);
+      mask = builder.create<fir::LoadOp>(loc, hlfir::Entity{addr});
     } else {
-      // Load from the mask array.
-      assert(!indices.empty() && "no indices for addressing the mask array");
-      maskVar = hlfir::getElementAt(loc, builder, maskVar, indices);
       mask = hlfir::loadTrivialScalar(loc, builder, maskVar);
     }
+  } else {
+    // Load from the mask array.
+    assert(!indices.empty() && "no indices for addressing the mask array");
+    maskVar = hlfir::getElementAt(loc, builder, maskVar, indices);
+    mask = hlfir::loadTrivialScalar(loc, builder, maskVar);
+  }
 
-    if (!isPresentPred)
-      return mask;
+  if (!isPresentPred)
+    return mask;
 
-    builder.create<fir::ResultOp>(loc, mask);
-    return ifOp.getResult(0);
+  builder.create<fir::ResultOp>(loc, mask);
+  return ifOp.getResult(0);
+}
+
+/// Convert an operation that is a partial or total reduction
+/// over an array of values into a reduction loop[-nest]
+/// optionally wrapped into hlfir.elemental.
+template <typename Op>
+class ReductionConversion : public mlir::OpRewritePattern<Op> {
+public:
+  using mlir::OpRewritePattern<Op>::OpRewritePattern;
+
+  llvm::LogicalResult
+  matchAndRewrite(Op op, mlir::PatternRewriter &rewriter) const override {
+    if constexpr (std::is_same_v<Op, hlfir::MaxlocOp> ||
+                  std::is_same_v<Op, hlfir::MinlocOp>) {
+      MinMaxlocAsElementalConverter<Op> converter(op, rewriter);
+      return converter.convert();
+    } else if constexpr (std::is_same_v<Op, hlfir::MaxvalOp> ||
+                         std::is_same_v<Op, hlfir::MinvalOp>) {
+      MinMaxvalAsElementalConverter<Op> converter(op, rewriter);
+      return converter.convert();
+    } else if constexpr (std::is_same_v<Op, hlfir::CountOp>) {
+      CountAsElementalConverter converter(op, rewriter);
+      return converter.convert();
+    } else if constexpr (std::is_same_v<Op, hlfir::AllOp> ||
+                         std::is_same_v<Op, hlfir::AnyOp>) {
+      AllAnyAsElementalConverter<Op> converter(op, rewriter);
+      return converter.convert();
+    } else if constexpr (std::is_same_v<Op, hlfir::SumOp>) {
+      SumAsElementalConverter converter{op, rewriter};
+      return converter.convert();
+    }
+    return rewriter.notifyMatchFailure(op, "unexpected reduction operation");
   }
 };
 
@@ -1481,10 +2154,18 @@ class SimplifyHLFIRIntrinsics
 
     mlir::RewritePatternSet patterns(context);
     patterns.insert<TransposeAsElementalConversion>(context);
-    patterns.insert<SumAsElementalConversion>(context);
+    patterns.insert<ReductionConversion<hlfir::SumOp>>(context);
     patterns.insert<CShiftConversion>(context);
     patterns.insert<MatmulConversion<hlfir::MatmulTransposeOp>>(context);
 
+    patterns.insert<ReductionConversion<hlfir::CountOp>>(context);
+    patterns.insert<ReductionConversion<hlfir::AnyOp>>(context);
+    patterns.insert<ReductionConversion<hlfir::AllOp>>(context);
+    patterns.insert<ReductionConversion<hlfir::MaxlocOp>>(context);
+    patterns.insert<ReductionConversion<hlfir::MinlocOp>>(context);
+    patterns.insert<ReductionConversion<hlfir::MaxvalOp>>(context);
+    patterns.insert<ReductionConversion<hlfir::MinvalOp>>(context);
+
     // If forceMatmulAsElemental is false, then hlfir.matmul inlining
     // will introduce hlfir.eval_in_mem operation with new memory side
     // effects. This conflicts with CSE and optimized bufferization, e.g.:
diff --git a/flang/test/HLFIR/all-elemental.fir b/flang/test/HLFIR/all-elemental.fir
deleted file mode 100644
index 1ba8bb1b7a5fb..0000000000000
--- a/flang/test/HLFIR/all-elemental.fir
+++ /dev/null
@@ -1,91 +0,0 @@
-// RUN: fir-opt %s -opt-bufferization | FileCheck %s
-
-func.func @_QFPtest(%arg0: !fir.ref<!fir.array<4x7xi32>> {fir.bindc_name = "b"}, %arg1: !fir.ref<i32> {fir.bindc_name = "row"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> !fir.logical<4> {
-  %c1 = arith.constant 1 : index
-  %c4 = arith.constant 4 : index
-  %c7 = arith.constant 7 : index
-  %0 = fir.shape %c4, %c7 : (index, index) -> !fir.shape<2>
-  %1:2 = hlfir.declare %arg0(%0) {uniq_name = "_QFFtestEb"} : (!fir.ref<!fir.array<4x7xi32>>, !fir.shape<2>) -> (!fir.ref<!fir.array<4x7xi32>>, !fir.ref<!fir.array<4x7xi32>>)
-  %2:2 = hlfir.declare %arg1 {uniq_name = "_QFFtestErow"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %3 = fir.alloca !fir.logical<4> {bindc_name = "test", uniq_name = "_QFFtestEtest"}
-  %4:2 = hlfir.declare %3 {uniq_name = "_QFFtestEtest"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
-  %5:2 = hlfir.declare %arg2 {uniq_name = "_QFFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %6 = fir.load %2#0 : !fir.ref<i32>
-  %7 = fir.convert %6 : (i32) -> i64
-  %8 = fir.shape %c7 : (index) -> !fir.shape<1>
-  %9 = hlfir.designate %1#0 (%7, %c1:%c7:%c1)  shape %8 : (!fir.ref<!fir.array<4x7xi32>>, i64, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<7xi32>>
-  %10 = fir.load %5#0 : !fir.ref<i32>
-  %11 = hlfir.elemental %8 unordered : (!fir.shape<1>) -> !hlfir.expr<7x!fir.logical<4>> {
-  ^bb0(%arg3: index):
-    %14 = hlfir.designate %9 (%arg3)  : (!fir.box<!fir.array<7xi32>>, index) -> !fir.ref<i32>
-    %15 = fir.load %14 : !fir.ref<i32>
-    %16 = arith.cmpi sge, %15, %10 : i32
-    %17 = fir.convert %16 : (i1) -> !fir.logical<4>
-    hlfir.yield_element %17 : !fir.logical<4>
-  }
-  %12 = hlfir.all %11 : (!hlfir.expr<7x!fir.logical<4>>) -> !fir.logical<4>
-  hlfir.assign %12 to %4#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-  hlfir.destroy %11 : !hlfir.expr<7x!fir.logical<4>>
-  %13 = fir.load %4#1 : !fir.ref<!fir.logical<4>>
-  return %13 : !fir.logical<4>
-}
-// CHECK-LABEL:  func.func @_QFPtest(%arg0: !fir.ref<!fir.array<4x7xi32>> {fir.bindc_name = "b"}, %arg1: !fir.ref<i32> {fir.bindc_name = "row"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> !fir.logical<4> {
-// CHECK-NEXT:     %true = arith.constant true
-// CHECK-NEXT:     %c1 = arith.constant 1 : index
-// CHECK-NEXT:     %c4 = arith.constant 4 : index
-// CHECK-NEXT:     %c7 = arith.constant 7 : index
-// CHECK-NEXT:     %[[V0:.*]] = fir.shape %c4, %c7 : (index, index) -> !fir.shape<2>
-// CHECK-NEXT:     %[[V1:.*]]:2 = hlfir.declare %arg0(%[[V0]])
-// CHECK-NEXT:     %[[V2:.*]]:2 = hlfir.declare %arg1
-// CHECK-NEXT:     %[[V3:.*]] = fir.alloca !fir.logical<4>
-// CHECK-NEXT:     %[[V4:.*]]:2 = hlfir.declare %[[V3]]
-// CHECK-NEXT:     %[[V5:.*]]:2 = hlfir.declare %arg2
-// CHECK-NEXT:     %[[V6:.*]] = fir.load %[[V2]]#0 : !fir.ref<i32>
-// CHECK-NEXT:     %[[V7:.*]] = fir.convert %[[V6]] : (i32) -> i64
-// CHECK-NEXT:     %[[V8:.*]] = fir.shape %c7 : (index) -> !fir.shape<1>
-// CHECK-NEXT:     %[[V9:.*]] = hlfir.designate %[[V1]]#0 (%[[V7]], %c1:%c7:%c1)  shape %[[V8]] : (!fir.ref<!fir.array<4x7xi32>>, i64, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<7xi32>>
-// CHECK-NEXT:     %[[V10:.*]] = fir.load %[[V5]]#0 : !fir.ref<i32>
-// CHECK-NEXT:     %[[V11:.*]] = fir.do_loop %arg3 = %c1 to %c7 step %c1 iter_args(%arg4 = %true) -> (i1) {
-// CHECK-NEXT:       %[[V14:.*]] = hlfir.designate %[[V9]] (%arg3)  : (!fir.box<!fir.array<7xi32>>, index) -> !fir.ref<i32>
-// CHECK-NEXT:       %[[V15:.*]] = fir.load %[[V14]] : !fir.ref<i32>
-// CHECK-NEXT:       %[[V16:.*]] = arith.cmpi sge, %[[V15]], %[[V10]] : i32
-// CHECK-NEXT:       %[[V17:.*]] = arith.andi %arg4, %[[V16]] : i1
-// CHECK-NEXT:       fir.result %[[V17]] : i1
-// CHECK-NEXT:     }
-// CHECK-NEXT:     %[[V12:.*]] = fir.convert %[[V11]] : (i1) -> !fir.logical<4>
-// CHECK-NEXT:     hlfir.assign %[[V12]] to %[[V4]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-// CHECK-NEXT:     %[[V13:.*]] = fir.load %[[V4]]#1 : !fir.ref<!fir.logical<4>>
-// CHECK-NEXT:     return %[[V13]] : !fir.logical<4>
-
-
-func.func @_QFPtest_dim(%arg0: !fir.ref<!fir.array<4x7xi32>> {fir.bindc_name = "b"}, %arg1: !fir.ref<i32> {fir.bindc_name = "row"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> !fir.array<4x!fir.logical<4>> {
-  %c2_i32 = arith.constant 2 : i32
-  %c1 = arith.constant 1 : index
-  %c4 = arith.constant 4 : index
-  %c7 = arith.constant 7 : index
-  %0 = fir.shape %c4, %c7 : (index, index) -> !fir.shape<2>
-  %1:2 = hlfir.declare %arg0(%0) {uniq_name = "_QFFtestEb"} : (!fir.ref<!fir.array<4x7xi32>>, !fir.shape<2>) -> (!fir.ref<!fir.array<4x7xi32>>, !fir.ref<!fir.array<4x7xi32>>)
-  %2:2 = hlfir.declare %arg1 {uniq_name = "_QFFtestErow"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %3 = fir.alloca !fir.array<4x!fir.logical<4>> {bindc_name = "test", uniq_name = "_QFFtestEtest"}
-  %4 = fir.shape %c4 : (index) -> !fir.shape<1>
-  %5:2 = hlfir.declare %3(%4) {uniq_name = "_QFFtestEtest"} : (!fir.ref<!fir.array<4x!fir.logical<4>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<4x!fir.logical<4>>>, !fir.ref<!fir.array<4x!fir.logical<4>>>)
-  %6:2 = hlfir.declare %arg2 {uniq_name = "_QFFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %7 = hlfir.designate %1#0 (%c1:%c4:%c1, %c1:%c7:%c1)  shape %0 : (!fir.ref<!fir.array<4x7xi32>>, index, index, index, index, index, index, !fir.shape<2>) -> !fir.ref<!fir.array<4x7xi32>>
-  %8 = fir.load %6#0 : !fir.ref<i32>
-  %9 = hlfir.elemental %0 unordered : (!fir.shape<2>) -> !hlfir.expr<4x7x!fir.logical<4>> {
-  ^bb0(%arg3: index, %arg4: index):
-    %12 = hlfir.designate %7 (%arg3, %arg4)  : (!fir.ref<!fir.array<4x7xi32>>, index, index) -> !fir.ref<i32>
-    %13 = fir.load %12 : !fir.ref<i32>
-    %14 = arith.cmpi sge, %13, %8 : i32
-    %15 = fir.convert %14 : (i1) -> !fir.logical<4>
-    hlfir.yield_element %15 : !fir.logical<4>
-  }
-  %10 = hlfir.all %9 dim %c2_i32 : (!hlfir.expr<4x7x!fir.logical<4>>, i32) -> !hlfir.expr<4x!fir.logical<4>>
-  hlfir.assign %10 to %5#0 : !hlfir.expr<4x!fir.logical<4>>, !fir.ref<!fir.array<4x!fir.logical<4>>>
-  hlfir.destroy %10 : !hlfir.expr<4x!fir.logical<4>>
-  hlfir.destroy %9 : !hlfir.expr<4x7x!fir.logical<4>>
-  %11 = fir.load %5#1 : !fir.ref<!fir.array<4x!fir.logical<4>>>
-  return %11 : !fir.array<4x!fir.logical<4>>
-}
-// CHECK-LABEL:  func.func @_QFPtest_dim(
-// CHECK: %10 = hlfir.all %9 dim %c2_i32
\ No newline at end of file
diff --git a/flang/test/HLFIR/any-elemental.fir b/flang/test/HLFIR/any-elemental.fir
deleted file mode 100644
index a7c559679d965..0000000000000
--- a/flang/test/HLFIR/any-elemental.fir
+++ /dev/null
@@ -1,190 +0,0 @@
-// RUN: fir-opt %s -opt-bufferization | FileCheck %s
-
-func.func @_QFPtest(%arg0: !fir.ref<!fir.array<4x7xi32>> {fir.bindc_name = "b"}, %arg1: !fir.ref<i32> {fir.bindc_name = "row"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> !fir.logical<4> {
-  %c1 = arith.constant 1 : index
-  %c4 = arith.constant 4 : index
-  %c7 = arith.constant 7 : index
-  %0 = fir.shape %c4, %c7 : (index, index) -> !fir.shape<2>
-  %1:2 = hlfir.declare %arg0(%0) {uniq_name = "_QFFtestEb"} : (!fir.ref<!fir.array<4x7xi32>>, !fir.shape<2>) -> (!fir.ref<!fir.array<4x7xi32>>, !fir.ref<!fir.array<4x7xi32>>)
-  %2:2 = hlfir.declare %arg1 {uniq_name = "_QFFtestErow"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %3 = fir.alloca !fir.logical<4> {bindc_name = "test", uniq_name = "_QFFtestEtest"}
-  %4:2 = hlfir.declare %3 {uniq_name = "_QFFtestEtest"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
-  %5:2 = hlfir.declare %arg2 {uniq_name = "_QFFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %6 = fir.load %2#0 : !fir.ref<i32>
-  %7 = fir.convert %6 : (i32) -> i64
-  %8 = fir.shape %c7 : (index) -> !fir.shape<1>
-  %9 = hlfir.designate %1#0 (%7, %c1:%c7:%c1)  shape %8 : (!fir.ref<!fir.array<4x7xi32>>, i64, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<7xi32>>
-  %10 = fir.load %5#0 : !fir.ref<i32>
-  %11 = hlfir.elemental %8 unordered : (!fir.shape<1>) -> !hlfir.expr<7x!fir.logical<4>> {
-  ^bb0(%arg3: index):
-    %14 = hlfir.designate %9 (%arg3)  : (!fir.box<!fir.array<7xi32>>, index) -> !fir.ref<i32>
-    %15 = fir.load %14 : !fir.ref<i32>
-    %16 = arith.cmpi sge, %15, %10 : i32
-    %17 = fir.convert %16 : (i1) -> !fir.logical<4>
-    hlfir.yield_element %17 : !fir.logical<4>
-  }
-  %12 = hlfir.any %11 : (!hlfir.expr<7x!fir.logical<4>>) -> !fir.logical<4>
-  hlfir.assign %12 to %4#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-  hlfir.destroy %11 : !hlfir.expr<7x!fir.logical<4>>
-  %13 = fir.load %4#1 : !fir.ref<!fir.logical<4>>
-  return %13 : !fir.logical<4>
-}
-// CHECK-LABEL:  func.func @_QFPtest(%arg0: !fir.ref<!fir.array<4x7xi32>> {fir.bindc_name = "b"}, %arg1: !fir.ref<i32> {fir.bindc_name = "row"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> !fir.logical<4> {
-// CHECK-NEXT:     %false = arith.constant false
-// CHECK-NEXT:     %c1 = arith.constant 1 : index
-// CHECK-NEXT:     %c4 = arith.constant 4 : index
-// CHECK-NEXT:     %c7 = arith.constant 7 : index
-// CHECK-NEXT:     %[[V0:.*]] = fir.shape %c4, %c7 : (index, index) -> !fir.shape<2>
-// CHECK-NEXT:     %[[V1:.*]]:2 = hlfir.declare %arg0(%[[V0]])
-// CHECK-NEXT:     %[[V2:.*]]:2 = hlfir.declare %arg1
-// CHECK-NEXT:     %[[V3:.*]] = fir.alloca !fir.logical<4>
-// CHECK-NEXT:     %[[V4:.*]]:2 = hlfir.declare %[[V3]]
-// CHECK-NEXT:     %[[V5:.*]]:2 = hlfir.declare %arg2
-// CHECK-NEXT:     %[[V6:.*]] = fir.load %[[V2]]#0 : !fir.ref<i32>
-// CHECK-NEXT:     %[[V7:.*]] = fir.convert %[[V6]] : (i32) -> i64
-// CHECK-NEXT:     %[[V8:.*]] = fir.shape %c7 : (index) -> !fir.shape<1>
-// CHECK-NEXT:     %[[V9:.*]] = hlfir.designate %[[V1]]#0 (%[[V7]], %c1:%c7:%c1)  shape %[[V8]] : (!fir.ref<!fir.array<4x7xi32>>, i64, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<7xi32>>
-// CHECK-NEXT:     %[[V10:.*]] = fir.load %[[V5]]#0 : !fir.ref<i32>
-// CHECK-NEXT:     %[[V11:.*]] = fir.do_loop %arg3 = %c1 to %c7 step %c1 iter_args(%arg4 = %false) -> (i1) {
-// CHECK-NEXT:       %[[V14:.*]] = hlfir.designate %[[V9]] (%arg3)  : (!fir.box<!fir.array<7xi32>>, index) -> !fir.ref<i32>
-// CHECK-NEXT:       %[[V15:.*]] = fir.load %[[V14]] : !fir.ref<i32>
-// CHECK-NEXT:       %[[V16:.*]] = arith.cmpi sge, %[[V15]], %[[V10]] : i32
-// CHECK-NEXT:       %[[V17:.*]] = arith.ori %arg4, %[[V16]] : i1
-// CHECK-NEXT:       fir.result %[[V17]] : i1
-// CHECK-NEXT:     }
-// CHECK-NEXT:     %[[V12:.*]] = fir.convert %[[V11]] : (i1) -> !fir.logical<4>
-// CHECK-NEXT:     hlfir.assign %[[V12]] to %[[V4]]#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-// CHECK-NEXT:     %[[V13:.*]] = fir.load %[[V4]]#1 : !fir.ref<!fir.logical<4>>
-// CHECK-NEXT:     return %[[V13]] : !fir.logical<4>
-
-
-func.func @_QFPtest_dim(%arg0: !fir.ref<!fir.array<4x7xi32>> {fir.bindc_name = "b"}, %arg1: !fir.ref<i32> {fir.bindc_name = "row"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> !fir.array<4x!fir.logical<4>> {
-  %c2_i32 = arith.constant 2 : i32
-  %c1 = arith.constant 1 : index
-  %c4 = arith.constant 4 : index
-  %c7 = arith.constant 7 : index
-  %0 = fir.shape %c4, %c7 : (index, index) -> !fir.shape<2>
-  %1:2 = hlfir.declare %arg0(%0) {uniq_name = "_QFFtestEb"} : (!fir.ref<!fir.array<4x7xi32>>, !fir.shape<2>) -> (!fir.ref<!fir.array<4x7xi32>>, !fir.ref<!fir.array<4x7xi32>>)
-  %2:2 = hlfir.declare %arg1 {uniq_name = "_QFFtestErow"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %3 = fir.alloca !fir.array<4x!fir.logical<4>> {bindc_name = "test", uniq_name = "_QFFtestEtest"}
-  %4 = fir.shape %c4 : (index) -> !fir.shape<1>
-  %5:2 = hlfir.declare %3(%4) {uniq_name = "_QFFtestEtest"} : (!fir.ref<!fir.array<4x!fir.logical<4>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<4x!fir.logical<4>>>, !fir.ref<!fir.array<4x!fir.logical<4>>>)
-  %6:2 = hlfir.declare %arg2 {uniq_name = "_QFFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %7 = hlfir.designate %1#0 (%c1:%c4:%c1, %c1:%c7:%c1)  shape %0 : (!fir.ref<!fir.array<4x7xi32>>, index, index, index, index, index, index, !fir.shape<2>) -> !fir.ref<!fir.array<4x7xi32>>
-  %8 = fir.load %6#0 : !fir.ref<i32>
-  %9 = hlfir.elemental %0 unordered : (!fir.shape<2>) -> !hlfir.expr<4x7x!fir.logical<4>> {
-  ^bb0(%arg3: index, %arg4: index):
-    %12 = hlfir.designate %7 (%arg3, %arg4)  : (!fir.ref<!fir.array<4x7xi32>>, index, index) -> !fir.ref<i32>
-    %13 = fir.load %12 : !fir.ref<i32>
-    %14 = arith.cmpi sge, %13, %8 : i32
-    %15 = fir.convert %14 : (i1) -> !fir.logical<4>
-    hlfir.yield_element %15 : !fir.logical<4>
-  }
-  %10 = hlfir.any %9 dim %c2_i32 : (!hlfir.expr<4x7x!fir.logical<4>>, i32) -> !hlfir.expr<4x!fir.logical<4>>
-  hlfir.assign %10 to %5#0 : !hlfir.expr<4x!fir.logical<4>>, !fir.ref<!fir.array<4x!fir.logical<4>>>
-  hlfir.destroy %10 : !hlfir.expr<4x!fir.logical<4>>
-  hlfir.destroy %9 : !hlfir.expr<4x7x!fir.logical<4>>
-  %11 = fir.load %5#1 : !fir.ref<!fir.array<4x!fir.logical<4>>>
-  return %11 : !fir.array<4x!fir.logical<4>>
-}
-// CHECK-LABEL:  func.func @_QFPtest_dim(
-// CHECK: {{.*}} = hlfir.any {{.*}} dim %c2_i32
-
-
-func.func @_Qtest_recursive() attributes {fir.bindc_name = "test"} {
-  %c1 = arith.constant 1 : index
-  %true = arith.constant true
-  %false = arith.constant false
-  %c0_i64 = arith.constant 0 : i64
-  %c2_i32 = arith.constant 2 : i32
-  %c0 = arith.constant 0 : index
-  %c1_i32 = arith.constant 1 : i32
-  %0 = fir.address_of(@_QFEa) : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-  %1:2 = hlfir.declare %0 {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFEa"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>)
-  %2 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFEi"}
-  %3:2 = hlfir.declare %2 {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %4 = fir.alloca i32 {bindc_name = "n", uniq_name = "_QFEn"}
-  %5:2 = hlfir.declare %4 {uniq_name = "_QFEn"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %6 = fir.alloca !fir.array<1x!fir.logical<4>> {bindc_name = "ra", uniq_name = "_QFEra"}
-  %7 = fir.shape %c1 : (index) -> !fir.shape<1>
-  %8:2 = hlfir.declare %6(%7) {uniq_name = "_QFEra"} : (!fir.ref<!fir.array<1x!fir.logical<4>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<1x!fir.logical<4>>>, !fir.ref<!fir.array<1x!fir.logical<4>>>)
-  %9 = fir.alloca !fir.logical<4> {bindc_name = "rs", uniq_name = "_QFErs"}
-  %10:2 = hlfir.declare %9 {uniq_name = "_QFErs"} : (!fir.ref<!fir.logical<4>>) -> (!fir.ref<!fir.logical<4>>, !fir.ref<!fir.logical<4>>)
-  %11 = fir.allocmem !fir.array<?xi32>, %c1 {fir.must_be_heap = true, uniq_name = "_QFEa.alloc"}
-  %12 = fir.embox %11(%7) : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.heap<!fir.array<?xi32>>>
-  fir.store %12 to %1#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-  hlfir.assign %c1_i32 to %5#0 : i32, !fir.ref<i32>
-  %13 = fir.load %1#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-  %14:3 = fir.box_dims %13, %c0 : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
-  fir.do_loop %arg0 = %c1 to %14#1 step %c1 unordered {
-    %27:3 = fir.box_dims %13, %c0 : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
-    %28 = arith.subi %27#0, %c1 : index
-    %29 = arith.addi %arg0, %28 : index
-    %30 = hlfir.designate %13 (%29)  : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> !fir.ref<i32>
-    hlfir.assign %c2_i32 to %30 : i32, !fir.ref<i32>
-  }
-  %15 = fir.load %5#0 : !fir.ref<i32>
-  %16 = fir.convert %15 : (i32) -> i64
-  %17 = arith.cmpi sgt, %16, %c0_i64 : i64
-  %18 = arith.select %17, %16, %c0_i64 : i64
-  %19 = fir.convert %18 : (i64) -> index
-  %20 = fir.shape %19 : (index) -> !fir.shape<1>
-  %21 = hlfir.elemental %20 unordered : (!fir.shape<1>) -> !hlfir.expr<?x!fir.logical<4>> {
-  ^bb0(%arg0: index):
-    %27 = fir.load %1#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-    %28:3 = fir.box_dims %27, %c0 : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
-    %29 = arith.addi %28#0, %28#1 : index
-    %30 = arith.subi %29, %c1 : index
-    %31 = arith.subi %30, %28#0 : index
-    %32 = arith.addi %31, %c1 : index
-    %33 = arith.cmpi sgt, %32, %c0 : index
-    %34 = arith.select %33, %32, %c0 : index
-    %35 = fir.shape %34 : (index) -> !fir.shape<1>
-    %36 = hlfir.designate %27 (%28#0:%30:%c1)  shape %35 : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<?xi32>>
-    %37 = hlfir.elemental %35 unordered : (!fir.shape<1>) -> !hlfir.expr<?x!fir.logical<4>> {
-    ^bb0(%arg1: index):
-      %39 = hlfir.designate %36 (%arg1)  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-      %40 = fir.load %39 : !fir.ref<i32>
-      %41 = arith.cmpi eq, %40, %c1_i32 : i32
-      %42 = fir.convert %41 : (i1) -> !fir.logical<4>
-      hlfir.yield_element %42 : !fir.logical<4>
-    }
-    %38 = hlfir.any %37 : (!hlfir.expr<?x!fir.logical<4>>) -> !fir.logical<4>
-    hlfir.destroy %37 : !hlfir.expr<?x!fir.logical<4>>
-    hlfir.yield_element %38 : !fir.logical<4>
-  }
-  %22 = hlfir.any %21 : (!hlfir.expr<?x!fir.logical<4>>) -> !fir.logical<4>
-  hlfir.assign %22 to %10#0 : !fir.logical<4>, !fir.ref<!fir.logical<4>>
-  hlfir.destroy %21 : !hlfir.expr<?x!fir.logical<4>>
-  %23 = fir.load %10#0 : !fir.ref<!fir.logical<4>>
-  %24 = fir.convert %23 : (!fir.logical<4>) -> i1
-  %25 = arith.xori %24, %true : i1
-  cf.cond_br %25, ^bb1, ^bb2
-^bb1:  // pred: ^bb0
-  fir.call @_FortranAStopStatement(%c2_i32, %false, %false) fastmath<contract> : (i32, i1, i1) -> ()
-  fir.unreachable
-^bb2:  // pred: ^bb0
-  return
-}
-// CHECK-LABEL: func.func @_Qtest_recursive()
-// CHECK:    %[[V20:.*]] = fir.do_loop %arg0 = %c1 to %{{.*}} step %c1 iter_args(%arg1 = %false) -> (i1) {
-// CHECK:      %[[V26:.*]] = fir.load %[[V1]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-// CHECK:      %[[V27:.*]]:3 = fir.box_dims %[[V26]], %c0 : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index) -> (index, index, index)
-// CHECK:      %[[V28:.*]] = arith.addi %[[V27]]#0, %[[V27]]#1 : index
-// CHECK:      %[[V29:.*]] = arith.subi %[[V28]], %c1 : index
-// CHECK:      %[[V30:.*]] = arith.subi %[[V29]], %[[V27]]#0 : index
-// CHECK:      %[[V31:.*]] = arith.addi %[[V30]], %c1 : index
-// CHECK:      %[[V32:.*]] = arith.cmpi sgt, %[[V31]], %c0 : index
-// CHECK:      %[[V33:.*]] = arith.select %[[V32]], %[[V31]], %c0 : index
-// CHECK:      %[[V34:.*]] = fir.shape %[[V33]] : (index) -> !fir.shape<1>
-// CHECK:      %[[V35:.*]] = hlfir.designate %[[V26]] (%[[V27]]#0:%[[V29]]:%c1)  shape %[[V34]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<?xi32>>
-// CHECK:      %[[V36:.*]] = fir.do_loop %arg2 = %c1 to %[[V33]] step %c1 iter_args(%arg3 = %false) -> (i1) {
-// CHECK:        %[[V38:.*]] = hlfir.designate %[[V35]] (%arg2)  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-// CHECK:        %[[V39:.*]] = fir.load %[[V38]] : !fir.ref<i32>
-// CHECK:        %[[V40:.*]] = arith.cmpi eq, %[[V39]], %c1_i32 : i32
-// CHECK:        %[[V41:.*]] = arith.ori %arg3, %[[V40]] : i1
-// CHECK:        fir.result %[[V41]] : i1
-// CHECK:      }
-// CHECK:      %[[V37:.*]] = arith.ori %arg1, %[[V36]] : i1
-// CHECK:      fir.result %[[V37]] : i1
-// CHECK:    }
diff --git a/flang/test/HLFIR/count-elemental.fir b/flang/test/HLFIR/count-elemental.fir
deleted file mode 100644
index 0df5cc3c031ea..0000000000000
--- a/flang/test/HLFIR/count-elemental.fir
+++ /dev/null
@@ -1,314 +0,0 @@
-// RUN: fir-opt %s -opt-bufferization | FileCheck %s
-
-func.func @_QFPtest(%arg0: !fir.ref<!fir.array<4x7xi32>> {fir.bindc_name = "b"}, %arg1: !fir.ref<i32> {fir.bindc_name = "row"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> i32 {
-  %c1 = arith.constant 1 : index
-  %c4 = arith.constant 4 : index
-  %c7 = arith.constant 7 : index
-  %0 = fir.shape %c4, %c7 : (index, index) -> !fir.shape<2>
-  %1:2 = hlfir.declare %arg0(%0) {uniq_name = "_QFFtestEb"} : (!fir.ref<!fir.array<4x7xi32>>, !fir.shape<2>) -> (!fir.ref<!fir.array<4x7xi32>>, !fir.ref<!fir.array<4x7xi32>>)
-  %2:2 = hlfir.declare %arg1 {uniq_name = "_QFFtestErow"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %3 = fir.alloca i32 {bindc_name = "test", uniq_name = "_QFFtestEtest"}
-  %4:2 = hlfir.declare %3 {uniq_name = "_QFFtestEtest"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %5:2 = hlfir.declare %arg2 {uniq_name = "_QFFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %6 = fir.load %2#0 : !fir.ref<i32>
-  %7 = fir.convert %6 : (i32) -> i64
-  %8 = fir.shape %c7 : (index) -> !fir.shape<1>
-  %9 = hlfir.designate %1#0 (%7, %c1:%c7:%c1)  shape %8 : (!fir.ref<!fir.array<4x7xi32>>, i64, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<7xi32>>
-  %10 = fir.load %5#0 : !fir.ref<i32>
-  %11 = hlfir.elemental %8 unordered : (!fir.shape<1>) -> !hlfir.expr<7x!fir.logical<4>> {
-  ^bb0(%arg3: index):
-    %14 = hlfir.designate %9 (%arg3)  : (!fir.box<!fir.array<7xi32>>, index) -> !fir.ref<i32>
-    %15 = fir.load %14 : !fir.ref<i32>
-    %16 = arith.cmpi sge, %15, %10 : i32
-    %17 = fir.convert %16 : (i1) -> !fir.logical<4>
-    hlfir.yield_element %17 : !fir.logical<4>
-  }
-  %12 = hlfir.count %11 : (!hlfir.expr<7x!fir.logical<4>>) -> i32
-  hlfir.assign %12 to %4#0 : i32, !fir.ref<i32>
-  hlfir.destroy %11 : !hlfir.expr<7x!fir.logical<4>>
-  %13 = fir.load %4#1 : !fir.ref<i32>
-  return %13 : i32
-}
-// CHECK-LABEL:  func.func @_QFPtest(%arg0: !fir.ref<!fir.array<4x7xi32>> {fir.bindc_name = "b"}, %arg1: !fir.ref<i32> {fir.bindc_name = "row"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> i32 {
-// CHECK-NEXT:     %c1_i32 = arith.constant 1 : i32
-// CHECK-NEXT:     %c0_i32 = arith.constant 0 : i32
-// CHECK-NEXT:     %c1 = arith.constant 1 : index
-// CHECK-NEXT:     %c4 = arith.constant 4 : index
-// CHECK-NEXT:     %c7 = arith.constant 7 : index
-// CHECK-NEXT:     %[[V0:.*]] = fir.shape %c4, %c7 : (index, index) -> !fir.shape<2>
-// CHECK-NEXT:     %[[V1:.*]]:2 = hlfir.declare %arg0(%[[V0]])
-// CHECK-NEXT:     %[[V2:.*]]:2 = hlfir.declare %arg1
-// CHECK-NEXT:     %[[V3:.*]] = fir.alloca i32
-// CHECK-NEXT:     %[[V4:.*]]:2 = hlfir.declare %[[V3]]
-// CHECK-NEXT:     %[[V5:.*]]:2 = hlfir.declare %arg2
-// CHECK-NEXT:     %[[V6:.*]] = fir.load %[[V2]]#0 : !fir.ref<i32>
-// CHECK-NEXT:     %[[V7:.*]] = fir.convert %[[V6]] : (i32) -> i64
-// CHECK-NEXT:     %[[V8:.*]] = fir.shape %c7 : (index) -> !fir.shape<1>
-// CHECK-NEXT:     %[[V9:.*]] = hlfir.designate %[[V1]]#0 (%[[V7]], %c1:%c7:%c1)  shape %[[V8]] : (!fir.ref<!fir.array<4x7xi32>>, i64, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<7xi32>>
-// CHECK-NEXT:     %[[V10:.*]] = fir.load %[[V5]]#0 : !fir.ref<i32>
-// CHECK-NEXT:     %[[V11:.*]] = fir.do_loop %arg3 = %c1 to %c7 step %c1 iter_args(%arg4 = %c0_i32) -> (i32) {
-// CHECK-NEXT:       %[[V13:.*]] = hlfir.designate %[[V9]] (%arg3)  : (!fir.box<!fir.array<7xi32>>, index) -> !fir.ref<i32>
-// CHECK-NEXT:       %[[V14:.*]] = fir.load %[[V13]] : !fir.ref<i32>
-// CHECK-NEXT:       %[[V15:.*]] = arith.cmpi sge, %[[V14]], %[[V10]] : i32
-// CHECK-NEXT:       %[[V16:.*]] = arith.addi %arg4, %c1_i32 : i32
-// CHECK-NEXT:       %[[V17:.*]] = arith.select %[[V15]], %[[V16]], %arg4 : i32
-// CHECK-NEXT:       fir.result %[[V17]] : i32
-// CHECK-NEXT:     }
-// CHECK-NEXT:     hlfir.assign %[[V11]] to %[[V4]]#0 : i32, !fir.ref<i32>
-// CHECK-NEXT:     %[[V12:.*]] = fir.load %[[V4]]#1 : !fir.ref<i32>
-// CHECK-NEXT:     return %[[V12]] : i32
-
-func.func @_QFPtest_kind2(%arg0: !fir.ref<!fir.array<4x7xi32>> {fir.bindc_name = "b"}, %arg1: !fir.ref<i32> {fir.bindc_name = "row"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> i16 {
-  %c1 = arith.constant 1 : index
-  %c4 = arith.constant 4 : index
-  %c7 = arith.constant 7 : index
-  %0 = fir.shape %c4, %c7 : (index, index) -> !fir.shape<2>
-  %1:2 = hlfir.declare %arg0(%0) {uniq_name = "_QFFtestEb"} : (!fir.ref<!fir.array<4x7xi32>>, !fir.shape<2>) -> (!fir.ref<!fir.array<4x7xi32>>, !fir.ref<!fir.array<4x7xi32>>)
-  %2:2 = hlfir.declare %arg1 {uniq_name = "_QFFtestErow"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %3 = fir.alloca i16 {bindc_name = "test", uniq_name = "_QFFtestEtest"}
-  %4:2 = hlfir.declare %3 {uniq_name = "_QFFtestEtest"} : (!fir.ref<i16>) -> (!fir.ref<i16>, !fir.ref<i16>)
-  %5:2 = hlfir.declare %arg2 {uniq_name = "_QFFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %6 = fir.load %2#0 : !fir.ref<i32>
-  %7 = fir.convert %6 : (i32) -> i64
-  %8 = fir.shape %c7 : (index) -> !fir.shape<1>
-  %9 = hlfir.designate %1#0 (%7, %c1:%c7:%c1)  shape %8 : (!fir.ref<!fir.array<4x7xi32>>, i64, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<7xi32>>
-  %10 = fir.load %5#0 : !fir.ref<i32>
-  %11 = hlfir.elemental %8 unordered : (!fir.shape<1>) -> !hlfir.expr<7x!fir.logical<4>> {
-  ^bb0(%arg3: index):
-    %14 = hlfir.designate %9 (%arg3)  : (!fir.box<!fir.array<7xi32>>, index) -> !fir.ref<i32>
-    %15 = fir.load %14 : !fir.ref<i32>
-    %16 = arith.cmpi sge, %15, %10 : i32
-    %17 = fir.convert %16 : (i1) -> !fir.logical<4>
-    hlfir.yield_element %17 : !fir.logical<4>
-  }
-  %12 = hlfir.count %11 : (!hlfir.expr<7x!fir.logical<4>>) -> i16
-  hlfir.assign %12 to %4#0 : i16, !fir.ref<i16>
-  hlfir.destroy %11 : !hlfir.expr<7x!fir.logical<4>>
-  %13 = fir.load %4#1 : !fir.ref<i16>
-  return %13 : i16
-}
-// CHECK-LABEL:  func.func @_QFPtest_kind2(%arg0: !fir.ref<!fir.array<4x7xi32>> {fir.bindc_name = "b"}, %arg1: !fir.ref<i32> {fir.bindc_name = "row"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> i16 {
-// CHECK-NEXT:     %c1_i16 = arith.constant 1 : i16
-// CHECK-NEXT:     %c0_i16 = arith.constant 0 : i16
-// CHECK-NEXT:     %c1 = arith.constant 1 : index
-// CHECK-NEXT:     %c4 = arith.constant 4 : index
-// CHECK-NEXT:     %c7 = arith.constant 7 : index
-// CHECK-NEXT:     %[[V0:.*]] = fir.shape %c4, %c7 : (index, index) -> !fir.shape<2>
-// CHECK-NEXT:     %[[V1:.*]]:2 = hlfir.declare %arg0(%[[V0]])
-// CHECK-NEXT:     %[[V2:.*]]:2 = hlfir.declare %arg1
-// CHECK-NEXT:     %[[V3:.*]] = fir.alloca i16
-// CHECK-NEXT:     %[[V4:.*]]:2 = hlfir.declare %[[V3]]
-// CHECK-NEXT:     %[[V5:.*]]:2 = hlfir.declare %arg2
-// CHECK-NEXT:     %[[V6:.*]] = fir.load %[[V2]]#0 : !fir.ref<i32>
-// CHECK-NEXT:     %[[V7:.*]] = fir.convert %[[V6]] : (i32) -> i64
-// CHECK-NEXT:     %[[V8:.*]] = fir.shape %c7 : (index) -> !fir.shape<1>
-// CHECK-NEXT:     %[[V9:.*]] = hlfir.designate %[[V1]]#0 (%[[V7]], %c1:%c7:%c1)  shape %[[V8]] : (!fir.ref<!fir.array<4x7xi32>>, i64, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<7xi32>>
-// CHECK-NEXT:     %[[V10:.*]] = fir.load %[[V5]]#0 : !fir.ref<i32>
-// CHECK-NEXT:     %[[V11:.*]] = fir.do_loop %arg3 = %c1 to %c7 step %c1 iter_args(%arg4 = %c0_i16) -> (i16) {
-// CHECK-NEXT:       %[[V13:.*]] = hlfir.designate %[[V9]] (%arg3)  : (!fir.box<!fir.array<7xi32>>, index) -> !fir.ref<i32>
-// CHECK-NEXT:       %[[V14:.*]] = fir.load %[[V13]] : !fir.ref<i32>
-// CHECK-NEXT:       %[[V15:.*]] = arith.cmpi sge, %[[V14]], %[[V10]] : i32
-// CHECK-NEXT:       %[[V16:.*]] = arith.addi %arg4, %c1_i16 : i16
-// CHECK-NEXT:       %[[V17:.*]] = arith.select %[[V15]], %[[V16]], %arg4 : i16
-// CHECK-NEXT:       fir.result %[[V17]] : i16
-// CHECK-NEXT:     }
-// CHECK-NEXT:     hlfir.assign %[[V11]] to %[[V4]]#0 : i16, !fir.ref<i16>
-// CHECK-NEXT:     %[[V12:.*]] = fir.load %[[V4]]#1 : !fir.ref<i16>
-// CHECK-NEXT:     return %[[V12]] : i16
-
-func.func @_QFPtest_dim(%arg0: !fir.ref<!fir.array<4x7xi32>> {fir.bindc_name = "b"}, %arg1: !fir.ref<i32> {fir.bindc_name = "row"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> !fir.array<7xi32> {
-  %c1_i32 = arith.constant 1 : i32
-  %c1 = arith.constant 1 : index
-  %c4 = arith.constant 4 : index
-  %c7 = arith.constant 7 : index
-  %0 = fir.shape %c4, %c7 : (index, index) -> !fir.shape<2>
-  %1:2 = hlfir.declare %arg0(%0) {uniq_name = "_QFFtestEb"} : (!fir.ref<!fir.array<4x7xi32>>, !fir.shape<2>) -> (!fir.ref<!fir.array<4x7xi32>>, !fir.ref<!fir.array<4x7xi32>>)
-  %2:2 = hlfir.declare %arg1 {uniq_name = "_QFFtestErow"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %3 = fir.alloca !fir.array<7xi32> {bindc_name = "test", uniq_name = "_QFFtestEtest"}
-  %4 = fir.shape %c7 : (index) -> !fir.shape<1>
-  %5:2 = hlfir.declare %3(%4) {uniq_name = "_QFFtestEtest"} : (!fir.ref<!fir.array<7xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<7xi32>>, !fir.ref<!fir.array<7xi32>>)
-  %6:2 = hlfir.declare %arg2 {uniq_name = "_QFFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %7 = hlfir.designate %1#0 (%c1:%c4:%c1, %c1:%c7:%c1)  shape %0 : (!fir.ref<!fir.array<4x7xi32>>, index, index, index, index, index, index, !fir.shape<2>) -> !fir.ref<!fir.array<4x7xi32>>
-  %8 = fir.load %6#0 : !fir.ref<i32>
-  %9 = hlfir.elemental %0 unordered : (!fir.shape<2>) -> !hlfir.expr<4x7x!fir.logical<4>> {
-  ^bb0(%arg3: index, %arg4: index):
-    %12 = hlfir.designate %7 (%arg3, %arg4)  : (!fir.ref<!fir.array<4x7xi32>>, index, index) -> !fir.ref<i32>
-    %13 = fir.load %12 : !fir.ref<i32>
-    %14 = arith.cmpi sge, %13, %8 : i32
-    %15 = fir.convert %14 : (i1) -> !fir.logical<4>
-    hlfir.yield_element %15 : !fir.logical<4>
-  }
-  %10 = hlfir.count %9 dim %c1_i32 : (!hlfir.expr<4x7x!fir.logical<4>>, i32) -> !hlfir.expr<7xi32>
-  hlfir.assign %10 to %5#0 : !hlfir.expr<7xi32>, !fir.ref<!fir.array<7xi32>>
-  hlfir.destroy %10 : !hlfir.expr<7xi32>
-  hlfir.destroy %9 : !hlfir.expr<4x7x!fir.logical<4>>
-  %11 = fir.load %5#1 : !fir.ref<!fir.array<7xi32>>
-  return %11 : !fir.array<7xi32>
-}
-// CHECK-LABEL:  func.func @_QFPtest_dim(
-// CHECK: %{{.*}} = hlfir.count %{{.*}} dim %c1_i32
-
-
-func.func @_QFPtest_multi(%arg0: !fir.ref<!fir.array<4x7x2xi32>> {fir.bindc_name = "b"}, %arg1: !fir.ref<i32> {fir.bindc_name = "row"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> i32 {
-  %c1 = arith.constant 1 : index
-  %c4 = arith.constant 4 : index
-  %c7 = arith.constant 7 : index
-  %c2 = arith.constant 2 : index
-  %0 = fir.shape %c4, %c7, %c2 : (index, index, index) -> !fir.shape<3>
-  %1:2 = hlfir.declare %arg0(%0) {uniq_name = "_QFFtestEb"} : (!fir.ref<!fir.array<4x7x2xi32>>, !fir.shape<3>) -> (!fir.ref<!fir.array<4x7x2xi32>>, !fir.ref<!fir.array<4x7x2xi32>>)
-  %2:2 = hlfir.declare %arg1 {uniq_name = "_QFFtestErow"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %3 = fir.alloca i32 {bindc_name = "test", uniq_name = "_QFFtestEtest"}
-  %4:2 = hlfir.declare %3 {uniq_name = "_QFFtestEtest"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %5:2 = hlfir.declare %arg2 {uniq_name = "_QFFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %6 = hlfir.designate %1#0 (%c1:%c4:%c1, %c1:%c7:%c1, %c1:%c2:%c1)  shape %0 : (!fir.ref<!fir.array<4x7x2xi32>>, index, index, index, index, index, index, index, index, index, !fir.shape<3>) -> !fir.ref<!fir.array<4x7x2xi32>>
-  %7 = fir.load %5#0 : !fir.ref<i32>
-  %8 = hlfir.elemental %0 unordered : (!fir.shape<3>) -> !hlfir.expr<4x7x2x!fir.logical<4>> {
-  ^bb0(%arg3: index, %arg4: index, %arg5: index):
-    %11 = hlfir.designate %6 (%arg3, %arg4, %arg5)  : (!fir.ref<!fir.array<4x7x2xi32>>, index, index, index) -> !fir.ref<i32>
-    %12 = fir.load %11 : !fir.ref<i32>
-    %13 = arith.cmpi sge, %12, %7 : i32
-    %14 = fir.convert %13 : (i1) -> !fir.logical<4>
-    hlfir.yield_element %14 : !fir.logical<4>
-  }
-  %9 = hlfir.count %8 : (!hlfir.expr<4x7x2x!fir.logical<4>>) -> i32
-  hlfir.assign %9 to %4#0 : i32, !fir.ref<i32>
-  hlfir.destroy %8 : !hlfir.expr<4x7x2x!fir.logical<4>>
-  %10 = fir.load %4#1 : !fir.ref<i32>
-  return %10 : i32
-}
-// CHECK-LABEL:  func.func @_QFPtest_multi(%arg0: !fir.ref<!fir.array<4x7x2xi32>> {fir.bindc_name = "b"}, %arg1: !fir.ref<i32> {fir.bindc_name = "row"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> i32 {
-// CHECK-NEXT:     %c1_i32 = arith.constant 1 : i32
-// CHECK-NEXT:     %c0_i32 = arith.constant 0 : i32
-// CHECK-NEXT:     %c1 = arith.constant 1 : index
-// CHECK-NEXT:     %c4 = arith.constant 4 : index
-// CHECK-NEXT:     %c7 = arith.constant 7 : index
-// CHECK-NEXT:     %c2 = arith.constant 2 : index
-// CHECK-NEXT:     %[[V0:.*]] = fir.shape %c4, %c7, %c2 : (index, index, index) -> !fir.shape<3>
-// CHECK-NEXT:     %[[V1:.*]]:2 = hlfir.declare %arg0(%[[V0]]) {uniq_name = "_QFFtestEb"} : (!fir.ref<!fir.array<4x7x2xi32>>, !fir.shape<3>) -> (!fir.ref<!fir.array<4x7x2xi32>>, !fir.ref<!fir.array<4x7x2xi32>>)
-// CHECK-NEXT:     %[[V2:.*]]:2 = hlfir.declare %arg1 {uniq_name = "_QFFtestErow"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-// CHECK-NEXT:     %[[V3:.*]] = fir.alloca i32 {bindc_name = "test", uniq_name = "_QFFtestEtest"}
-// CHECK-NEXT:     %[[V4:.*]]:2 = hlfir.declare %[[V3]] {uniq_name = "_QFFtestEtest"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-// CHECK-NEXT:     %[[V5:.*]]:2 = hlfir.declare %arg2 {uniq_name = "_QFFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-// CHECK-NEXT:     %[[V6:.*]] = hlfir.designate %[[V1]]#0 (%c1:%c4:%c1, %c1:%c7:%c1, %c1:%c2:%c1)  shape %[[V0]] : (!fir.ref<!fir.array<4x7x2xi32>>, index, index, index, index, index, index, index, index, index, !fir.shape<3>) -> !fir.ref<!fir.array<4x7x2xi32>>
-// CHECK-NEXT:     %[[V7:.*]] = fir.load %[[V5]]#0 : !fir.ref<i32>
-// CHECK-NEXT:     %[[V8:.*]] = fir.do_loop %arg3 = %c1 to %c2 step %c1 iter_args(%arg4 = %c0_i32) -> (i32) {
-// CHECK-NEXT:       %[[V10:.*]] = fir.do_loop %arg5 = %c1 to %c7 step %c1 iter_args(%arg6 = %arg4) -> (i32) {
-// CHECK-NEXT:         %[[V11:.*]] = fir.do_loop %arg7 = %c1 to %c4 step %c1 iter_args(%arg8 = %arg6) -> (i32) {
-// CHECK-NEXT:           %[[V12:.*]] = hlfir.designate %[[V6]] (%arg7, %arg5, %arg3)  : (!fir.ref<!fir.array<4x7x2xi32>>, index, index, index) -> !fir.ref<i32>
-// CHECK-NEXT:           %[[V13:.*]] = fir.load %[[V12]] : !fir.ref<i32>
-// CHECK-NEXT:           %[[V14:.*]] = arith.cmpi sge, %[[V13]], %[[V7]] : i32
-// CHECK-NEXT:           %[[V15:.*]] = arith.addi %arg8, %c1_i32 : i32
-// CHECK-NEXT:           %[[V16:.*]] = arith.select %[[V14]], %[[V15]], %arg8 : i32
-// CHECK-NEXT:           fir.result %[[V16]] : i32
-// CHECK-NEXT:         }
-// CHECK-NEXT:         fir.result %[[V11]] : i32
-// CHECK-NEXT:       }
-// CHECK-NEXT:       fir.result %[[V10]] : i32
-// CHECK-NEXT:     }
-// CHECK-NEXT:     hlfir.assign %[[V8]] to %[[V4]]#0 : i32, !fir.ref<i32>
-// CHECK-NEXT:     %[[V9:.*]] = fir.load %[[V4]]#1 : !fir.ref<i32>
-// CHECK-NEXT:     return %[[V9]] : i32
-
-
-
-
-
-func.func @_QFPtest_rec_sum(%arg0: !fir.ref<!fir.array<4x7xi32>> {fir.bindc_name = "b"}, %arg1: !fir.ref<i32> {fir.bindc_name = "row"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> i32 {
-  %c1 = arith.constant 1 : index
-  %c4 = arith.constant 4 : index
-  %c7 = arith.constant 7 : index
-  %0 = fir.shape %c4, %c7 : (index, index) -> !fir.shape<2>
-  %1:2 = hlfir.declare %arg0(%0) {uniq_name = "_QFFtestEb"} : (!fir.ref<!fir.array<4x7xi32>>, !fir.shape<2>) -> (!fir.ref<!fir.array<4x7xi32>>, !fir.ref<!fir.array<4x7xi32>>)
-  %2:2 = hlfir.declare %arg1 {uniq_name = "_QFFtestErow"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %3 = fir.alloca i32 {bindc_name = "test", uniq_name = "_QFFtestEtest"}
-  %4:2 = hlfir.declare %3 {uniq_name = "_QFFtestEtest"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %5:2 = hlfir.declare %arg2 {uniq_name = "_QFFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %6 = fir.load %2#0 : !fir.ref<i32>
-  %7 = fir.convert %6 : (i32) -> i64
-  %8 = fir.shape %c7 : (index) -> !fir.shape<1>
-  %9 = hlfir.designate %1#0 (%7, %c1:%c7:%c1)  shape %8 : (!fir.ref<!fir.array<4x7xi32>>, i64, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<7xi32>>
-  %10 = fir.load %5#0 : !fir.ref<i32>
-  %11 = hlfir.elemental %8 unordered : (!fir.shape<1>) -> !hlfir.expr<7xi32> {
-  ^bb0(%arg3: index):
-    %15 = hlfir.designate %9 (%arg3)  : (!fir.box<!fir.array<7xi32>>, index) -> !fir.ref<i32>
-    %16 = fir.load %15 : !fir.ref<i32>
-    hlfir.yield_element %16 : i32
-  }
-  %12 = hlfir.elemental %8 unordered : (!fir.shape<1>) -> !hlfir.expr<7x!fir.logical<4>> {
-  ^bb0(%arg3: index):
-    %15 = hlfir.sum %11 : (!hlfir.expr<7xi32>) -> i32
-    %16 = arith.cmpi sge, %15, %10 : i32
-    %17 = fir.convert %16 : (i1) -> !fir.logical<4>
-    hlfir.yield_element %17 : !fir.logical<4>
-  }
-  %13 = hlfir.count %12 : (!hlfir.expr<7x!fir.logical<4>>) -> i32
-  hlfir.assign %13 to %4#0 : i32, !fir.ref<i32>
-  hlfir.destroy %12 : !hlfir.expr<7x!fir.logical<4>>
-  hlfir.destroy %11 : !hlfir.expr<7xi32>
-  %14 = fir.load %4#1 : !fir.ref<i32>
-  return %14 : i32
-}
-// CHECK-LABEL:  func.func @_QFPtest_rec_sum(%arg0: !fir.ref<!fir.array<4x7xi32>> {fir.bindc_name = "b"}, %arg1: !fir.ref<i32> {fir.bindc_name = "row"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> i32 {
-// CHECK:    %[[V12:.*]] = fir.do_loop %arg3 = %c1 to %c7 step %c1 iter_args(%arg4 = %c0_i32) -> (i32) {
-// CHECK:      %[[V14:.*]] = hlfir.sum %[[V11]] : (!hlfir.expr<7xi32>) -> i32
-// CHECK:      %[[V15:.*]] = arith.cmpi sge, %[[V14]], %[[V10]] : i32
-// CHECK:      %[[V16:.*]] = arith.addi %arg4, %c1_i32 : i32
-// CHECK:      %[[V17:.*]] = arith.select %[[V15]], %[[V16]], %arg4 : i32
-// CHECK:      fir.result %[[V17]] : i32
-// CHECK:    }
-
-
-
-
-func.func @_QFPtest_rec_count(%arg0: !fir.ref<!fir.array<4x7xi32>> {fir.bindc_name = "b"}, %arg1: !fir.ref<i32> {fir.bindc_name = "row"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> i32 {
-  %c1 = arith.constant 1 : index
-  %c4 = arith.constant 4 : index
-  %c7 = arith.constant 7 : index
-  %0 = fir.shape %c4, %c7 : (index, index) -> !fir.shape<2>
-  %1:2 = hlfir.declare %arg0(%0) {uniq_name = "_QFFtestEb"} : (!fir.ref<!fir.array<4x7xi32>>, !fir.shape<2>) -> (!fir.ref<!fir.array<4x7xi32>>, !fir.ref<!fir.array<4x7xi32>>)
-  %2:2 = hlfir.declare %arg1 {uniq_name = "_QFFtestErow"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %3 = fir.alloca i32 {bindc_name = "test", uniq_name = "_QFFtestEtest"}
-  %4:2 = hlfir.declare %3 {uniq_name = "_QFFtestEtest"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %5:2 = hlfir.declare %arg2 {uniq_name = "_QFFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %6 = fir.load %2#0 : !fir.ref<i32>
-  %7 = fir.convert %6 : (i32) -> i64
-  %8 = fir.shape %c7 : (index) -> !fir.shape<1>
-  %9 = hlfir.designate %1#0 (%7, %c1:%c7:%c1)  shape %8 : (!fir.ref<!fir.array<4x7xi32>>, i64, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<7xi32>>
-  %10 = fir.load %5#0 : !fir.ref<i32>
-  %11 = hlfir.elemental %8 unordered : (!fir.shape<1>) -> !hlfir.expr<7x!fir.logical<4>> {
-  ^bb0(%arg3: index):
-    %15 = hlfir.designate %9 (%arg3)  : (!fir.box<!fir.array<7xi32>>, index) -> !fir.ref<i32>
-    %16 = fir.load %15 : !fir.ref<i32>
-    %17 = arith.cmpi sge, %16, %10 : i32
-    %18 = fir.convert %17 : (i1) -> !fir.logical<4>
-    hlfir.yield_element %18 : !fir.logical<4>
-  }
-  %12 = hlfir.elemental %8 unordered : (!fir.shape<1>) -> !hlfir.expr<7x!fir.logical<4>> {
-  ^bb0(%arg3: index):
-    %15 = hlfir.count %11 : (!hlfir.expr<7x!fir.logical<4>>) -> i32
-    %16 = arith.cmpi sge, %15, %10 : i32
-    %17 = fir.convert %16 : (i1) -> !fir.logical<4>
-    hlfir.yield_element %17 : !fir.logical<4>
-  }
-  %13 = hlfir.count %12 : (!hlfir.expr<7x!fir.logical<4>>) -> i32
-  hlfir.assign %13 to %4#0 : i32, !fir.ref<i32>
-  hlfir.destroy %12 : !hlfir.expr<7x!fir.logical<4>>
-  hlfir.destroy %11 : !hlfir.expr<7x!fir.logical<4>>
-  %14 = fir.load %4#1 : !fir.ref<i32>
-  return %14 : i32
-}
-// CHECK-LABEL:  func.func @_QFPtest_rec_count(%arg0: !fir.ref<!fir.array<4x7xi32>> {fir.bindc_name = "b"}, %arg1: !fir.ref<i32> {fir.bindc_name = "row"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> i32 {
-// CHECK:    %[[V11:.*]] = fir.do_loop %arg3 = %c1 to %c7 step %c1 iter_args(%arg4 = %c0_i32) -> (i32) {
-// CHECK:      %[[V13:.*]] = fir.do_loop %arg5 = %c1 to %c7 step %c1 iter_args(%arg6 = %c0_i32) -> (i32) {
-// CHECK:        %[[V17:.*]] = hlfir.designate %[[V9]] (%arg5)  : (!fir.box<!fir.array<7xi32>>, index) -> !fir.ref<i32>
-// CHECK:        %[[V18:.*]] = fir.load %[[V17]] : !fir.ref<i32>
-// CHECK:        %[[V19:.*]] = arith.cmpi sge, %[[V18]], %[[V10]] : i32
-// CHECK:        %[[V20:.*]] = arith.addi %arg6, %c1_i32 : i32
-// CHECK:        %[[V21:.*]] = arith.select %[[V19]], %[[V20]], %arg6 : i32
-// CHECK:        fir.result %[[V21]] : i32
-// CHECK:      }
-// CHECK:      %[[V14:.*]] = arith.cmpi sge, %[[V13]], %[[V10]] : i32
-// CHECK:      %[[V15:.*]] = arith.addi %arg4, %c1_i32 : i32
-// CHECK:      %[[V16:.*]] = arith.select %[[V14]], %[[V15]], %arg4 : i32
-// CHECK:      fir.result %[[V16]] : i32
-// CHECK:    }
diff --git a/flang/test/HLFIR/maxloc-elemental.fir b/flang/test/HLFIR/maxloc-elemental.fir
deleted file mode 100644
index c9210a59f0340..0000000000000
--- a/flang/test/HLFIR/maxloc-elemental.fir
+++ /dev/null
@@ -1,133 +0,0 @@
-// RUN: fir-opt %s -opt-bufferization | FileCheck %s
-
-func.func @_QPtest(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}, %arg1: !fir.ref<i32> {fir.bindc_name = "val"}, %arg2: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "m"}) {
-  %c0 = arith.constant 0 : index
-  %0:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-  %1:2 = hlfir.declare %arg2 {uniq_name = "_QFtestEm"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-  %2:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %3 = fir.load %2#0 : !fir.ref<i32>
-  %4:3 = fir.box_dims %0#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
-  %5 = fir.shape %4#1 : (index) -> !fir.shape<1>
-  %6 = hlfir.elemental %5 unordered : (!fir.shape<1>) -> !hlfir.expr<?x!fir.logical<4>> {
-  ^bb0(%arg3: index):
-    %8 = hlfir.designate %0#0 (%arg3)  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-    %9 = fir.load %8 : !fir.ref<i32>
-    %10 = arith.cmpi sge, %9, %3 : i32
-    %11 = fir.convert %10 : (i1) -> !fir.logical<4>
-    hlfir.yield_element %11 : !fir.logical<4>
-  }
-  %7 = hlfir.maxloc %0#0 mask %6 {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?xi32>>, !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi32>
-  hlfir.assign %7 to %1#0 : !hlfir.expr<1xi32>, !fir.box<!fir.array<?xi32>>
-  hlfir.destroy %7 : !hlfir.expr<1xi32>
-  hlfir.destroy %6 : !hlfir.expr<?x!fir.logical<4>>
-  return
-}
-// CHECK-LABEL: func.func @_QPtest(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}, %arg1: !fir.ref<i32> {fir.bindc_name = "val"}, %arg2: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "m"}) {
-// CHECK-NEXT:    %true = arith.constant true
-// CHECK-NEXT:    %c-2147483648_i32 = arith.constant -2147483648 : i32
-// CHECK-NEXT:    %c1_i32 = arith.constant 1 : i32
-// CHECK-NEXT:    %c0 = arith.constant 0 : index
-// CHECK-NEXT:    %c1 = arith.constant 1 : index
-// CHECK-NEXT:    %c0_i32 = arith.constant 0 : i32
-// CHECK-NEXT:    %[[V0:.*]] = fir.alloca i32
-// CHECK-NEXT:    %[[RES:.*]] = fir.alloca !fir.array<1xi32>
-// CHECK-NEXT:    %[[V1:.*]]:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-// CHECK-NEXT:    %[[V2:.*]]:2 = hlfir.declare %arg2 {uniq_name = "_QFtestEm"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-// CHECK-NEXT:    %[[V3:.*]]:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-// CHECK-NEXT:    %[[V4:.*]] = fir.load %[[V3]]#0 : !fir.ref<i32>
-// CHECK-NEXT:    %[[V8:.*]] = hlfir.designate %[[RES]] (%c1) : (!fir.ref<!fir.array<1xi32>>, index) -> !fir.ref<i32>
-// CHECK-NEXT:    fir.store %c0_i32 to %[[V8]] : !fir.ref<i32>
-// CHECK-NEXT:    fir.store %c0_i32 to %[[V0]] : !fir.ref<i32>
-// CHECK-NEXT:    %[[V9:.*]]:3 = fir.box_dims %[[V1]]#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
-// CHECK-NEXT:    %[[V10:.*]] = arith.subi %[[V9]]#1, %c1 : index
-// CHECK-NEXT:    %[[V11:.*]] = fir.do_loop %arg3 = %c0 to %[[V10]] step %c1 iter_args(%arg4 = %c-2147483648_i32) -> (i32) {
-// CHECK-NEXT:      %[[V14:.*]] = arith.addi %arg3, %c1 : index
-// CHECK-NEXT:      %[[V15:.*]] = hlfir.designate %[[V1]]#0 (%[[V14]])  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-// CHECK-NEXT:      %[[V16:.*]] = fir.load %[[V15]] : !fir.ref<i32>
-// CHECK-NEXT:      %[[V17:.*]] = arith.cmpi sge, %[[V16]], %[[V4]] : i32
-// CHECK-NEXT:      %[[V18:.*]] = fir.if %[[V17]] -> (i32) {
-// CHECK-NEXT:        %[[ISFIRST:.*]] = fir.load %[[V0]] : !fir.ref<i32>
-// CHECK-NEXT:        %[[V19:.*]] = hlfir.designate %[[V1]]#0 (%[[V14]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-// CHECK-NEXT:        %[[V20:.*]] = fir.load %[[V19]] : !fir.ref<i32>
-// CHECK-NEXT:        %[[V21:.*]] = arith.cmpi sgt, %[[V20]], %arg4 : i32
-// CHECK-NEXT:        %[[ISFIRSTL:.*]] = fir.convert %[[ISFIRST]] : (i32) -> i1
-// CHECK-NEXT:        %[[ISFIRSTNOT:.*]] = arith.xori %[[ISFIRSTL]], %true : i1
-// CHECK-NEXT:        %[[ORCOND:.*]] = arith.ori %[[V21]], %[[ISFIRSTNOT]] : i1
-// CHECK-NEXT:        %[[V22:.*]] = fir.if %[[ORCOND]] -> (i32) {
-// CHECK-NEXT:          fir.store %c1_i32 to %[[V0]] : !fir.ref<i32>
-// CHECK-NEXT:          %[[V23:.*]] = hlfir.designate %[[RES]] (%c1) : (!fir.ref<!fir.array<1xi32>>, index) -> !fir.ref<i32>
-// CHECK-NEXT:          %[[V24:.*]] = fir.convert %[[V14]] : (index) -> i32
-// CHECK-NEXT:          fir.store %[[V24]] to %[[V23]] : !fir.ref<i32>
-// CHECK-NEXT:          fir.result %[[V20]] : i32
-// CHECK-NEXT:        } else {
-// CHECK-NEXT:          fir.result %arg4 : i32
-// CHECK-NEXT:        }
-// CHECK-NEXT:        fir.result %[[V22]] : i32
-// CHECK-NEXT:      } else {
-// CHECK-NEXT:        fir.result %arg4 : i32
-// CHECK-NEXT:      }
-// CHECK-NEXT:      fir.result %[[V18]] : i32
-// CHECK-NEXT:    }
-// CHECK-NEXT:    hlfir.assign %[[RES]] to %[[V2]]#0 : !fir.ref<!fir.array<1xi32>>, !fir.box<!fir.array<?xi32>>
-// CHECK-NEXT:    return
-// CHECK-NEXT:  }
-
-
-
-func.func @_QPtest_float(%arg0: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "array"}, %arg1: !fir.ref<f32> {fir.bindc_name = "val"}, %arg2: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "m"}) {
-  %c0 = arith.constant 0 : index
-  %0:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xf32>>) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>)
-  %1:2 = hlfir.declare %arg2 {uniq_name = "_QFtestEm"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-  %2:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-  %3 = fir.load %2#0 : !fir.ref<f32>
-  %4:3 = fir.box_dims %0#0, %c0 : (!fir.box<!fir.array<?xf32>>, index) -> (index, index, index)
-  %5 = fir.shape %4#1 : (index) -> !fir.shape<1>
-  %6 = hlfir.elemental %5 unordered : (!fir.shape<1>) -> !hlfir.expr<?x!fir.logical<4>> {
-  ^bb0(%arg3: index):
-    %8 = hlfir.designate %0#0 (%arg3)  : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
-    %9 = fir.load %8 : !fir.ref<f32>
-    %10 = arith.cmpf oge, %9, %3 : f32
-    %11 = fir.convert %10 : (i1) -> !fir.logical<4>
-    hlfir.yield_element %11 : !fir.logical<4>
-  }
-  %7 = hlfir.maxloc %0#0 mask %6 {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?xf32>>, !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi32>
-  hlfir.assign %7 to %1#0 : !hlfir.expr<1xi32>, !fir.box<!fir.array<?xi32>>
-  hlfir.destroy %7 : !hlfir.expr<1xi32>
-  hlfir.destroy %6 : !hlfir.expr<?x!fir.logical<4>>
-  return
-}
-// CHECK-LABEL: _QPtest_float
-// CHECK:        %cst = arith.constant 0xFF800000 : f32
-// CHECK:        %[[V11:.*]] = fir.do_loop %arg3 = %c0 to %[[V10:.*]] step %c1 iter_args(%arg4 = %cst) -> (f32) {
-// CHECK-NEXT:     %[[V14:.*]] = arith.addi %arg3, %c1 : index
-// CHECK-NEXT:     %[[V15:.*]] = hlfir.designate %[[V1:.*]]#0 (%[[V14]])  : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
-// CHECK-NEXT:     %[[V16:.*]] = fir.load %[[V15]] : !fir.ref<f32>
-// CHECK-NEXT:     %[[V17:.*]] = arith.cmpf oge, %[[V16]], %[[V4:.*]] : f32
-// CHECK-NEXT:     %[[V18:.*]] = fir.if %[[V17]] -> (f32) {
-// CHECK-NEXT:       %[[ISFIRST:.*]] = fir.load %[[V0:.*]] : !fir.ref<i32>
-// CHECK-NEXT:       %[[V19:.*]] = hlfir.designate %[[V1]]#0 (%[[V14]]) : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
-// CHECK-NEXT:       %[[V20:.*]] = fir.load %[[V19]] : !fir.ref<f32>
-// CHECK-NEXT:       %[[NEW_MIN:.*]] = arith.cmpf ogt, %[[V20]], %arg4 fastmath<contract> : f32
-// CHECK-NEXT:       %[[CONDRED:.*]] = arith.cmpf une, %arg4, %arg4 fastmath<contract> : f32
-// CHECK-NEXT:       %[[CONDELEM:.*]] = arith.cmpf oeq, %[[V20]], %[[V20]] fastmath<contract> : f32
-// CHECK-NEXT:       %[[ANDCOND:.*]] = arith.andi %[[CONDRED]], %[[CONDELEM]] : i1
-// CHECK-NEXT:       %[[NEW_MIN2:.*]] = arith.ori %[[NEW_MIN]], %[[ANDCOND]] : i1
-// CHECK-NEXT:       %[[ISFIRSTL:.*]] = fir.convert %[[ISFIRST]] : (i32) -> i1
-// CHECK-NEXT:       %[[ISFIRSTNOT:.*]] = arith.xori %[[ISFIRSTL]], %true : i1
-// CHECK-NEXT:       %[[ORCOND:.*]] = arith.ori %[[NEW_MIN2]], %[[ISFIRSTNOT]] : i1
-// CHECK-NEXT:       %[[V22:.*]] = fir.if %[[ORCOND]] -> (f32) {
-// CHECK-NEXT:         fir.store %c1_i32 to %[[V0]] : !fir.ref<i32>
-// CHECK-NEXT:         %[[V23:.*]] = hlfir.designate %{{.}} (%c1) : (!fir.ref<!fir.array<1xi32>>, index) -> !fir.ref<i32>
-// CHECK-NEXT:         %[[V24:.*]] = fir.convert %[[V14]] : (index) -> i32
-// CHECK-NEXT:         fir.store %[[V24]] to %[[V23]] : !fir.ref<i32>
-// CHECK-NEXT:         fir.result %[[V20]] : f32
-// CHECK-NEXT:       } else {
-// CHECK-NEXT:         fir.result %arg4 : f32
-// CHECK-NEXT:       }
-// CHECK-NEXT:       fir.result %[[V22]] : f32
-// CHECK-NEXT:     } else {
-// CHECK-NEXT:       fir.result %arg4 : f32
-// CHECK-NEXT:     }
-// CHECK-NEXT:     fir.result %[[V18]] : f32
-// CHECK-NEXT:   }
-
diff --git a/flang/test/HLFIR/maxval-elemental.fir b/flang/test/HLFIR/maxval-elemental.fir
deleted file mode 100644
index a21b4858412de..0000000000000
--- a/flang/test/HLFIR/maxval-elemental.fir
+++ /dev/null
@@ -1,117 +0,0 @@
-// Test maxval inlining for both elemental and designate
-// RUN: fir-opt %s -opt-bufferization | FileCheck %s
-
-// subroutine test(array)
-//   integer :: array(:), x
-//   x = maxval(abs(array))
-// end subroutine test
-
-func.func @_QPtest(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}) {
-  %c31_i32 = arith.constant 31 : i32
-  %c0 = arith.constant 0 : index
-  %0 = fir.dummy_scope : !fir.dscope
-  %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xi32>>, !fir.dscope) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-  %2 = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFtestEx"}
-  %3:2 = hlfir.declare %2 {uniq_name = "_QFtestEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %4:3 = fir.box_dims %1#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
-  %5 = fir.shape %4#1 : (index) -> !fir.shape<1>
-  %6 = hlfir.elemental %5 unordered : (!fir.shape<1>) -> !hlfir.expr<?xi32> {
-  ^bb0(%arg1: index):
-    %8 = hlfir.designate %1#0 (%arg1)  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-    %9 = fir.load %8 : !fir.ref<i32>
-    %10 = arith.shrsi %9, %c31_i32 : i32
-    %11 = arith.xori %9, %10 : i32
-    %12 = arith.subi %11, %10 : i32
-    hlfir.yield_element %12 : i32
-  }
-  %7 = hlfir.maxval %6 {fastmath = #arith.fastmath<contract>} : (!hlfir.expr<?xi32>) -> i32
-  hlfir.assign %7 to %3#0 : i32, !fir.ref<i32>
-  hlfir.destroy %6 : !hlfir.expr<?xi32>
-  return
-}
-
-// CHECK-LABEL: func.func @_QPtest(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}) {
-// CHECK-NEXT:    %c1 = arith.constant 1 : index
-// CHECK-NEXT:    %c-2147483648_i32 = arith.constant -2147483648 : i32
-// CHECK-NEXT:    %c31_i32 = arith.constant 31 : i32
-// CHECK-NEXT:    %c0 = arith.constant 0 : index
-// CHECK-NEXT:    %[[V0:.*]] = fir.dummy_scope : !fir.dscope
-// CHECK-NEXT:    %[[V1:.*]]:2 = hlfir.declare %arg0 dummy_scope %[[V0]] {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xi32>>, !fir.dscope) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-// CHECK-NEXT:    %[[V2:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFtestEx"}
-// CHECK-NEXT:    %[[V3:.*]]:2 = hlfir.declare %[[V2]] {uniq_name = "_QFtestEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-// CHECK-NEXT:    %[[V4:.*]]:3 = fir.box_dims %[[V1]]#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
-// CHECK-NEXT:    %[[V5:.*]] = fir.do_loop %arg1 = %c1 to %[[V4]]#1 step %c1 iter_args(%arg2 = %c-2147483648_i32) -> (i32) {
-// CHECK-NEXT:      %[[V6:.*]] = hlfir.designate %[[V1]]#0 (%arg1)  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-// CHECK-NEXT:      %[[V7:.*]] = fir.load %[[V6]] : !fir.ref<i32>
-// CHECK-NEXT:      %[[V8:.*]] = arith.shrsi %[[V7]], %c31_i32 : i32
-// CHECK-NEXT:      %[[V9:.*]] = arith.xori %[[V7]], %[[V8]] : i32
-// CHECK-NEXT:      %[[V10:.*]] = arith.subi %[[V9]], %[[V8]] : i32
-// CHECK-NEXT:      %[[V11:.*]] = arith.cmpi sgt, %[[V10]], %arg2 : i32
-// CHECK-NEXT:      %[[V12:.*]] = arith.select %[[V11]], %[[V10]], %arg2 : i32
-// CHECK-NEXT:      fir.result %[[V12]] : i32
-// CHECK-NEXT:    }
-// CHECK-NEXT:    hlfir.assign %[[V5]] to %[[V3]]#0 : i32, !fir.ref<i32>
-// CHECK-NEXT:    return
-// CHECK-NEXT:  }
-
-// subroutine test(array)
-//   real :: array(:), x
-//   x = maxval(array(3:6))
-// end subroutine test
-
-func.func @_QPtest_float(%arg0: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "array"}) {
-  %c4 = arith.constant 4 : index
-  %c1 = arith.constant 1 : index
-  %c6 = arith.constant 6 : index
-  %c3 = arith.constant 3 : index
-  %0 = fir.dummy_scope : !fir.dscope
-  %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>)
-  %2 = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFtestEx"}
-  %3:2 = hlfir.declare %2 {uniq_name = "_QFtestEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-  %4 = fir.shape %c4 : (index) -> !fir.shape<1>
-  %5 = hlfir.designate %1#0 (%c3:%c6:%c1)  shape %4 : (!fir.box<!fir.array<?xf32>>, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<4xf32>>
-  %6 = hlfir.maxval %5 {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<4xf32>>) -> f32
-  hlfir.assign %6 to %3#0 : f32, !fir.ref<f32>
-  return
-}
-
-// CHECK-LABEL: _QPtest_float
-// CHECK:       %cst = arith.constant 0xFF800000 : f32
-// CHECK:       %[[V4:.*]] = fir.shape %c4 : (index) -> !fir.shape<1>
-// CHECK-NEXT:  %[[V5:.*]] = hlfir.designate %{{.*}} (%c3:%c6:%c1)  shape %[[V4]] : (!fir.box<!fir.array<?xf32>>, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<4xf32>>
-// CHECK-NEXT:  %[[V6:.*]] = fir.do_loop %arg1 = %c1 to %c4 step %c1 iter_args(%arg2 = %cst) -> (f32) {
-// CHECK-NEXT:      %[[V7:.*]] = hlfir.designate %[[V5]] (%arg1)  : (!fir.box<!fir.array<4xf32>>, index) -> !fir.ref<f32>
-// CHECK-NEXT:      %[[V8:.*]] = fir.load %[[V7]] : !fir.ref<f32>
-// CHECK-NEXT:      %[[V9:.*]] = arith.cmpf ogt, %[[V8]], %arg2 fastmath<contract> : f32
-// CHECK-NEXT:      %[[V10:.*]] = arith.cmpf une, %arg2, %arg2 fastmath<contract> : f32
-// CHECK-NEXT:      %[[V11:.*]] = arith.cmpf oeq, %[[V8]], %[[V8]] fastmath<contract> : f32
-// CHECK-NEXT:      %[[V12:.*]] = arith.andi %[[V10]], %[[V11]] : i1
-// CHECK-NEXT:      %[[V13:.*]] = arith.ori %[[V9]], %[[V12]] : i1
-// CHECK-NEXT:      %[[V14:.*]] = arith.select %[[V13]], %[[V8]], %arg2 : f32
-// CHECK-NEXT:      fir.result %[[V14]] : f32
-// CHECK-NEXT:    }
-// CHECK-NEXT:    hlfir.assign %[[V6]] to %3#0 : f32, !fir.ref<f32>
-// CHECK-NEXT:    return
-// CHECK-NEXT:  }
-
-// Verify that lower bounds of designator are applied in the indexing inside
-// the generated loop (hlfir.designate takes indices relative to the base lower
-// bounds).
-func.func @component_lower_bounds(%arg0: !fir.ref<!fir.type<sometype{i:!fir.array<10xi32>}>>) -> i32 {
-  %c10 = arith.constant 10 : index
-  %c101 = arith.constant 101 : index
-  %4 = fir.shape_shift %c101, %c10 : (index, index) -> !fir.shapeshift<1>
-  %5 = hlfir.designate %arg0{"i"}   shape %4 : (!fir.ref<!fir.type<sometype{i:!fir.array<10xi32>}>>, !fir.shapeshift<1>) -> !fir.box<!fir.array<10xi32>>
-  %6 = hlfir.maxval %5 : (!fir.box<!fir.array<10xi32>>) -> i32
-  return %6 : i32
-}
-// CHECK-LABEL:   func.func @component_lower_bounds(
-// CHECK:  %[[VAL_1:.*]] = arith.constant 100 : index
-// CHECK:  %[[VAL_2:.*]] = arith.constant 1 : index
-// CHECK:  %[[VAL_4:.*]] = arith.constant 10 : index
-// CHECK:  %[[VAL_5:.*]] = arith.constant 101 : index
-// CHECK:  %[[VAL_6:.*]] = fir.shape_shift %[[VAL_5]], %[[VAL_4]] : (index, index) -> !fir.shapeshift<1>
-// CHECK:  %[[VAL_7:.*]] = hlfir.designate %{{.*}}{"i"}   shape %[[VAL_6]] : (!fir.ref<!fir.type<sometype{i:!fir.array<10xi32>}>>, !fir.shapeshift<1>) -> !fir.box<!fir.array<10xi32>>
-// CHECK:  %[[VAL_8:.*]] = fir.do_loop %[[VAL_9:.*]] = %[[VAL_2]] to %[[VAL_4]] {{.*}}
-// CHECK:    %[[VAL_11:.*]] = arith.addi %[[VAL_9]], %[[VAL_1]] : index
-// CHECK:    hlfir.designate %[[VAL_7]] (%[[VAL_11]])  : (!fir.box<!fir.array<10xi32>>, index) -> !fir.ref<i32>
diff --git a/flang/test/HLFIR/minloc-elemental.fir b/flang/test/HLFIR/minloc-elemental.fir
deleted file mode 100644
index 9453a335b4fbf..0000000000000
--- a/flang/test/HLFIR/minloc-elemental.fir
+++ /dev/null
@@ -1,397 +0,0 @@
-// RUN: fir-opt %s -opt-bufferization | FileCheck %s
-
-func.func @_QPtest(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}, %arg1: !fir.ref<i32> {fir.bindc_name = "val"}, %arg2: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "m"}) {
-  %c0 = arith.constant 0 : index
-  %0:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-  %1:2 = hlfir.declare %arg2 {uniq_name = "_QFtestEm"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-  %2:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %3 = fir.load %2#0 : !fir.ref<i32>
-  %4:3 = fir.box_dims %0#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
-  %5 = fir.shape %4#1 : (index) -> !fir.shape<1>
-  %6 = hlfir.elemental %5 unordered : (!fir.shape<1>) -> !hlfir.expr<?x!fir.logical<4>> {
-  ^bb0(%arg3: index):
-    %8 = hlfir.designate %0#0 (%arg3)  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-    %9 = fir.load %8 : !fir.ref<i32>
-    %10 = arith.cmpi sge, %9, %3 : i32
-    %11 = fir.convert %10 : (i1) -> !fir.logical<4>
-    hlfir.yield_element %11 : !fir.logical<4>
-  }
-  %7 = hlfir.minloc %0#0 mask %6 {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?xi32>>, !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi32>
-  hlfir.assign %7 to %1#0 : !hlfir.expr<1xi32>, !fir.box<!fir.array<?xi32>>
-  hlfir.destroy %7 : !hlfir.expr<1xi32>
-  hlfir.destroy %6 : !hlfir.expr<?x!fir.logical<4>>
-  return
-}
-// CHECK-LABEL: func.func @_QPtest(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}, %arg1: !fir.ref<i32> {fir.bindc_name = "val"}, %arg2: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "m"}) {
-// CHECK-NEXT:    %true = arith.constant true
-// CHECK-NEXT:    %c2147483647_i32 = arith.constant 2147483647 : i32
-// CHECK-NEXT:    %c1_i32 = arith.constant 1 : i32
-// CHECK-NEXT:    %c0 = arith.constant 0 : index
-// CHECK-NEXT:    %c1 = arith.constant 1 : index
-// CHECK-NEXT:    %c0_i32 = arith.constant 0 : i32
-// CHECK-NEXT:    %[[V0:.*]] = fir.alloca i32
-// CHECK-NEXT:    %[[RES:.*]] = fir.alloca !fir.array<1xi32>
-// CHECK-NEXT:    %[[V1:.*]]:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-// CHECK-NEXT:    %[[V2:.*]]:2 = hlfir.declare %arg2 {uniq_name = "_QFtestEm"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-// CHECK-NEXT:    %[[V3:.*]]:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-// CHECK-NEXT:    %[[V4:.*]] = fir.load %[[V3]]#0 : !fir.ref<i32>
-// CHECK-NEXT:    %[[V8:.*]] = hlfir.designate %[[RES]] (%c1) : (!fir.ref<!fir.array<1xi32>>, index) -> !fir.ref<i32>
-// CHECK-NEXT:    fir.store %c0_i32 to %[[V8]] : !fir.ref<i32>
-// CHECK-NEXT:    fir.store %c0_i32 to %[[V0]] : !fir.ref<i32>
-// CHECK-NEXT:    %[[V9:.*]]:3 = fir.box_dims %[[V1]]#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
-// CHECK-NEXT:    %[[V10:.*]] = arith.subi %[[V9]]#1, %c1 : index
-// CHECK-NEXT:    %[[V11:.*]] = fir.do_loop %arg3 = %c0 to %[[V10]] step %c1 iter_args(%arg4 = %c2147483647_i32) -> (i32) {
-// CHECK-NEXT:      %[[V14:.*]] = arith.addi %arg3, %c1 : index
-// CHECK-NEXT:      %[[V15:.*]] = hlfir.designate %[[V1]]#0 (%[[V14]])  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-// CHECK-NEXT:      %[[V16:.*]] = fir.load %[[V15]] : !fir.ref<i32>
-// CHECK-NEXT:      %[[V17:.*]] = arith.cmpi sge, %[[V16]], %[[V4]] : i32
-// CHECK-NEXT:      %[[V18:.*]] = fir.if %[[V17]] -> (i32) {
-// CHECK-NEXT:        %[[ISFIRST:.*]] = fir.load %[[V0]] : !fir.ref<i32>
-// CHECK-NEXT:        %[[V19:.*]] = hlfir.designate %[[V1]]#0 (%[[V14]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-// CHECK-NEXT:        %[[V20:.*]] = fir.load %[[V19]] : !fir.ref<i32>
-// CHECK-NEXT:        %[[V21:.*]] = arith.cmpi slt, %[[V20]], %arg4 : i32
-// CHECK-NEXT:        %[[ISFIRSTL:.*]] = fir.convert %[[ISFIRST]] : (i32) -> i1
-// CHECK-NEXT:        %[[ISFIRSTNOT:.*]] = arith.xori %[[ISFIRSTL]], %true : i1
-// CHECK-NEXT:        %[[ORCOND:.*]] = arith.ori %[[V21]], %[[ISFIRSTNOT]] : i1
-// CHECK-NEXT:        %[[V22:.*]] = fir.if %[[ORCOND]] -> (i32) {
-// CHECK-NEXT:          fir.store %c1_i32 to %[[V0]] : !fir.ref<i32>
-// CHECK-NEXT:          %[[V23:.*]] = hlfir.designate %[[RES]] (%c1) : (!fir.ref<!fir.array<1xi32>>, index) -> !fir.ref<i32>
-// CHECK-NEXT:          %[[V24:.*]] = fir.convert %[[V14]] : (index) -> i32
-// CHECK-NEXT:          fir.store %[[V24]] to %[[V23]] : !fir.ref<i32>
-// CHECK-NEXT:          fir.result %[[V20]] : i32
-// CHECK-NEXT:        } else {
-// CHECK-NEXT:          fir.result %arg4 : i32
-// CHECK-NEXT:        }
-// CHECK-NEXT:        fir.result %[[V22]] : i32
-// CHECK-NEXT:      } else {
-// CHECK-NEXT:        fir.result %arg4 : i32
-// CHECK-NEXT:      }
-// CHECK-NEXT:      fir.result %[[V18]] : i32
-// CHECK-NEXT:    }
-// CHECK-NEXT:    hlfir.assign %[[RES]] to %[[V2]]#0 : !fir.ref<!fir.array<1xi32>>, !fir.box<!fir.array<?xi32>>
-// CHECK-NEXT:    return
-// CHECK-NEXT:  }
-
-
-func.func @_QPtest_kind2(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}, %arg1: !fir.ref<i32> {fir.bindc_name = "val"}, %arg2: !fir.box<!fir.array<?xi16>> {fir.bindc_name = "m"}) {
-  %c0 = arith.constant 0 : index
-  %0:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-  %1:2 = hlfir.declare %arg2 {uniq_name = "_QFtestEm"} : (!fir.box<!fir.array<?xi16>>) -> (!fir.box<!fir.array<?xi16>>, !fir.box<!fir.array<?xi16>>)
-  %2:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %3 = fir.load %2#0 : !fir.ref<i32>
-  %4:3 = fir.box_dims %0#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
-  %5 = fir.shape %4#1 : (index) -> !fir.shape<1>
-  %6 = hlfir.elemental %5 unordered : (!fir.shape<1>) -> !hlfir.expr<?x!fir.logical<4>> {
-  ^bb0(%arg3: index):
-    %8 = hlfir.designate %0#0 (%arg3)  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-    %9 = fir.load %8 : !fir.ref<i32>
-    %10 = arith.cmpi sge, %9, %3 : i32
-    %11 = fir.convert %10 : (i1) -> !fir.logical<4>
-    hlfir.yield_element %11 : !fir.logical<4>
-  }
-  %7 = hlfir.minloc %0#0 mask %6 {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?xi32>>, !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi16>
-  hlfir.assign %7 to %1#0 : !hlfir.expr<1xi16>, !fir.box<!fir.array<?xi16>>
-  hlfir.destroy %7 : !hlfir.expr<1xi16>
-  hlfir.destroy %6 : !hlfir.expr<?x!fir.logical<4>>
-  return
-}
-// CHECK-LABEL:  func.func @_QPtest_kind2(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}, %arg1: !fir.ref<i32> {fir.bindc_name = "val"}, %arg2: !fir.box<!fir.array<?xi16>> {fir.bindc_name = "m"}) {
-// CHECK-NEXT:    %true = arith.constant true
-// CHECK-NEXT:    %c2147483647_i32 = arith.constant 2147483647 : i32
-// CHECK-NEXT:    %c1_i16 = arith.constant 1 : i16
-// CHECK-NEXT:    %c0 = arith.constant 0 : index
-// CHECK-NEXT:    %c1 = arith.constant 1 : index
-// CHECK-NEXT:    %c0_i16 = arith.constant 0 : i16
-// CHECK-NEXT:    %[[V0:.*]] = fir.alloca i16
-// CHECK-NEXT:    %[[RES:.*]] = fir.alloca !fir.array<1xi16>
-// CHECK-NEXT:    %[[V1:.*]]:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-// CHECK-NEXT:    %[[V2:.*]]:2 = hlfir.declare %arg2 {uniq_name = "_QFtestEm"} : (!fir.box<!fir.array<?xi16>>) -> (!fir.box<!fir.array<?xi16>>, !fir.box<!fir.array<?xi16>>)
-// CHECK-NEXT:    %[[V3:.*]]:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-// CHECK-NEXT:    %[[V4:.*]] = fir.load %[[V3]]#0 : !fir.ref<i32>
-// CHECK-NEXT:    %[[V8:.*]] = hlfir.designate %[[RES]] (%c1) : (!fir.ref<!fir.array<1xi16>>, index) -> !fir.ref<i16>
-// CHECK-NEXT:    fir.store %c0_i16 to %[[V8]] : !fir.ref<i16>
-// CHECK-NEXT:    fir.store %c0_i16 to %[[V0]] : !fir.ref<i16>
-// CHECK-NEXT:    %[[V9:.*]]:3 = fir.box_dims %[[V1]]#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
-// CHECK-NEXT:    %[[V10:.*]] = arith.subi %[[V9]]#1, %c1 : index
-// CHECK-NEXT:    %[[V11:.*]] = fir.do_loop %arg3 = %c0 to %[[V10]] step %c1 iter_args(%arg4 = %c2147483647_i32) -> (i32) {
-// CHECK-NEXT:      %[[V14:.*]] = arith.addi %arg3, %c1 : index
-// CHECK-NEXT:      %[[V15:.*]] = hlfir.designate %[[V1]]#0 (%[[V14]])  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-// CHECK-NEXT:      %[[V16:.*]] = fir.load %[[V15]] : !fir.ref<i32>
-// CHECK-NEXT:      %[[V17:.*]] = arith.cmpi sge, %[[V16]], %[[V4]] : i32
-// CHECK-NEXT:      %[[V18:.*]] = fir.if %[[V17]] -> (i32) {
-// CHECK-NEXT:        %[[ISFIRST:.*]] = fir.load %[[V0]] : !fir.ref<i16>
-// CHECK-NEXT:        %[[V19:.*]] = hlfir.designate %[[V1]]#0 (%[[V14]]) : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-// CHECK-NEXT:        %[[V20:.*]] = fir.load %[[V19]] : !fir.ref<i32>
-// CHECK-NEXT:        %[[V21:.*]] = arith.cmpi slt, %[[V20]], %arg4 : i32
-// CHECK-NEXT:        %[[ISFIRSTL:.*]] = fir.convert %[[ISFIRST]] : (i16) -> i1
-// CHECK-NEXT:        %[[ISFIRSTNOT:.*]] = arith.xori %[[ISFIRSTL]], %true : i1
-// CHECK-NEXT:        %[[ORCOND:.*]] = arith.ori %[[V21]], %[[ISFIRSTNOT]] : i1
-// CHECK-NEXT:        %[[V22:.*]] = fir.if %[[ORCOND]] -> (i32) {
-// CHECK-NEXT:          fir.store %c1_i16 to %[[V0]] : !fir.ref<i16>
-// CHECK-NEXT:          %[[V23:.*]] = hlfir.designate %[[RES]] (%c1) : (!fir.ref<!fir.array<1xi16>>, index) -> !fir.ref<i16>
-// CHECK-NEXT:          %[[V24:.*]] = fir.convert %[[V14]] : (index) -> i16
-// CHECK-NEXT:          fir.store %[[V24]] to %[[V23]] : !fir.ref<i16>
-// CHECK-NEXT:          fir.result %[[V20]] : i32
-// CHECK-NEXT:        } else {
-// CHECK-NEXT:          fir.result %arg4 : i32
-// CHECK-NEXT:        }
-// CHECK-NEXT:        fir.result %[[V22]] : i32
-// CHECK-NEXT:      } else {
-// CHECK-NEXT:        fir.result %arg4 : i32
-// CHECK-NEXT:      }
-// CHECK-NEXT:      fir.result %[[V18]] : i32
-// CHECK-NEXT:    }
-// CHECK-NEXT:    hlfir.assign %[[RES]] to %[[V2]]#0 : !fir.ref<!fir.array<1xi16>>, !fir.box<!fir.array<?xi16>>
-// CHECK-NEXT:    return
-
-
-func.func @_QPtest_kind2_convert(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}, %arg1: !fir.ref<i32> {fir.bindc_name = "val"}, %arg2: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "m"}) {
-  %c1 = arith.constant 1 : index
-  %c0 = arith.constant 0 : index
-  %0:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-  %1:2 = hlfir.declare %arg2 {uniq_name = "_QFtestEm"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-  %2:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %3 = fir.load %2#0 : !fir.ref<i32>
-  %4:3 = fir.box_dims %0#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
-  %5 = fir.shape %4#1 : (index) -> !fir.shape<1>
-  %6 = hlfir.elemental %5 unordered : (!fir.shape<1>) -> !hlfir.expr<?x!fir.logical<4>> {
-  ^bb0(%arg3: index):
-    %10 = hlfir.designate %0#0 (%arg3)  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-    %11 = fir.load %10 : !fir.ref<i32>
-    %12 = arith.cmpi sge, %11, %3 : i32
-    %13 = fir.convert %12 : (i1) -> !fir.logical<4>
-    hlfir.yield_element %13 : !fir.logical<4>
-  }
-  %7 = hlfir.minloc %0#0 mask %6 {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?xi32>>, !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi16>
-  %8 = fir.shape %c1 : (index) -> !fir.shape<1>
-  %9 = hlfir.elemental %8 unordered : (!fir.shape<1>) -> !hlfir.expr<?xi32> {
-  ^bb0(%arg3: index):
-    %10 = hlfir.apply %7, %arg3 : (!hlfir.expr<1xi16>, index) -> i16
-    %11 = fir.convert %10 : (i16) -> i32
-    hlfir.yield_element %11 : i32
-  }
-  hlfir.assign %9 to %1#0 : !hlfir.expr<?xi32>, !fir.box<!fir.array<?xi32>>
-  hlfir.destroy %9 : !hlfir.expr<?xi32>
-  hlfir.destroy %7 : !hlfir.expr<1xi16>
-  hlfir.destroy %6 : !hlfir.expr<?x!fir.logical<4>>
-  return
-}
-// CHECK-LABEL:   func.func @_QPtest_kind2_convert(
-// CHECK-SAME:                                     %[[VAL_0:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"},
-// CHECK-SAME:                                     %[[VAL_1:.*]]: !fir.ref<i32> {fir.bindc_name = "val"},
-// CHECK-SAME:                                     %[[VAL_2:.*]]: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "m"}) {
-// CHECK:           %[[VAL_3:.*]] = arith.constant false
-// CHECK:           %[[VAL_4:.*]] = arith.constant true
-// CHECK:           %[[VAL_5:.*]] = arith.constant 2147483647 : i32
-// CHECK:           %[[VAL_6:.*]] = arith.constant 1 : i16
-// CHECK:           %[[VAL_7:.*]] = arith.constant 0 : index
-// CHECK:           %[[VAL_8:.*]] = arith.constant 0 : i16
-// CHECK:           %[[VAL_9:.*]] = arith.constant 1 : index
-// CHECK:           %[[VAL_10:.*]] = fir.alloca i16
-// CHECK:           %[[VAL_11:.*]] = fir.alloca !fir.array<1xi16>
-// CHECK:           %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-// CHECK:           %[[VAL_13:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFtestEm"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-// CHECK:           %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-// CHECK:           %[[VAL_15:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref<i32>
-// CHECK:           %[[VAL_16:.*]] = hlfir.designate %[[VAL_11]] (%[[VAL_9]])  : (!fir.ref<!fir.array<1xi16>>, index) -> !fir.ref<i16>
-// CHECK:           fir.store %[[VAL_8]] to %[[VAL_16]] : !fir.ref<i16>
-// CHECK:           fir.store %[[VAL_8]] to %[[VAL_10]] : !fir.ref<i16>
-// CHECK:           %[[VAL_17:.*]]:3 = fir.box_dims %[[VAL_12]]#0, %[[VAL_7]] : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
-// CHECK:           %[[VAL_18:.*]] = arith.subi %[[VAL_17]]#1, %[[VAL_9]] : index
-// CHECK:           %[[VAL_19:.*]] = fir.do_loop %[[VAL_20:.*]] = %[[VAL_7]] to %[[VAL_18]] step %[[VAL_9]] iter_args(%[[VAL_21:.*]] = %[[VAL_5]]) -> (i32) {
-// CHECK:             %[[VAL_22:.*]] = arith.addi %[[VAL_20]], %[[VAL_9]] : index
-// CHECK:             %[[VAL_23:.*]] = hlfir.designate %[[VAL_12]]#0 (%[[VAL_22]])  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-// CHECK:             %[[VAL_24:.*]] = fir.load %[[VAL_23]] : !fir.ref<i32>
-// CHECK:             %[[VAL_25:.*]] = arith.cmpi sge, %[[VAL_24]], %[[VAL_15]] : i32
-// CHECK:             %[[VAL_26:.*]] = fir.if %[[VAL_25]] -> (i32) {
-// CHECK:               %[[VAL_27:.*]] = fir.load %[[VAL_10]] : !fir.ref<i16>
-// CHECK:               %[[VAL_28:.*]] = hlfir.designate %[[VAL_12]]#0 (%[[VAL_22]])  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-// CHECK:               %[[VAL_29:.*]] = fir.load %[[VAL_28]] : !fir.ref<i32>
-// CHECK:               %[[VAL_30:.*]] = arith.cmpi slt, %[[VAL_29]], %[[VAL_21]] : i32
-// CHECK:               %[[VAL_31:.*]] = fir.convert %[[VAL_27]] : (i16) -> i1
-// CHECK:               %[[VAL_32:.*]] = arith.xori %[[VAL_31]], %[[VAL_4]] : i1
-// CHECK:               %[[VAL_33:.*]] = arith.ori %[[VAL_30]], %[[VAL_32]] : i1
-// CHECK:               %[[VAL_34:.*]] = fir.if %[[VAL_33]] -> (i32) {
-// CHECK:                 fir.store %[[VAL_6]] to %[[VAL_10]] : !fir.ref<i16>
-// CHECK:                 %[[VAL_35:.*]] = hlfir.designate %[[VAL_11]] (%[[VAL_9]])  : (!fir.ref<!fir.array<1xi16>>, index) -> !fir.ref<i16>
-// CHECK:                 %[[VAL_36:.*]] = fir.convert %[[VAL_22]] : (index) -> i16
-// CHECK:                 fir.store %[[VAL_36]] to %[[VAL_35]] : !fir.ref<i16>
-// CHECK:                 fir.result %[[VAL_29]] : i32
-// CHECK:               } else {
-// CHECK:                 fir.result %[[VAL_21]] : i32
-// CHECK:               }
-// CHECK:               fir.result %[[VAL_34]] : i32
-// CHECK:             } else {
-// CHECK:               fir.result %[[VAL_21]] : i32
-// CHECK:             }
-// CHECK:             fir.result %[[VAL_26]] : i32
-// CHECK:           }
-// CHECK:           %[[VAL_37:.*]] = hlfir.as_expr %[[VAL_11]] move %[[VAL_3]] : (!fir.ref<!fir.array<1xi16>>, i1) -> !hlfir.expr<1xi16>
-// CHECK:           fir.do_loop %[[VAL_38:.*]] = %[[VAL_9]] to %[[VAL_9]] step %[[VAL_9]] unordered {
-// CHECK:             %[[VAL_39:.*]] = hlfir.apply %[[VAL_37]], %[[VAL_38]] : (!hlfir.expr<1xi16>, index) -> i16
-// CHECK:             %[[VAL_40:.*]] = fir.convert %[[VAL_39]] : (i16) -> i32
-// CHECK:             %[[VAL_41:.*]] = hlfir.designate %[[VAL_13]]#0 (%[[VAL_38]])  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-// CHECK:             hlfir.assign %[[VAL_40]] to %[[VAL_41]] : i32, !fir.ref<i32>
-// CHECK:           }
-// CHECK:           return
-// CHECK:         }
-
-
-func.func @_QPtest_float(%arg0: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "array"}, %arg1: !fir.ref<f32> {fir.bindc_name = "val"}, %arg2: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "m"}) {
-  %c0 = arith.constant 0 : index
-  %0:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xf32>>) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>)
-  %1:2 = hlfir.declare %arg2 {uniq_name = "_QFtestEm"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-  %2:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-  %3 = fir.load %2#0 : !fir.ref<f32>
-  %4:3 = fir.box_dims %0#0, %c0 : (!fir.box<!fir.array<?xf32>>, index) -> (index, index, index)
-  %5 = fir.shape %4#1 : (index) -> !fir.shape<1>
-  %6 = hlfir.elemental %5 unordered : (!fir.shape<1>) -> !hlfir.expr<?x!fir.logical<4>> {
-  ^bb0(%arg3: index):
-    %8 = hlfir.designate %0#0 (%arg3)  : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
-    %9 = fir.load %8 : !fir.ref<f32>
-    %10 = arith.cmpf oge, %9, %3 : f32
-    %11 = fir.convert %10 : (i1) -> !fir.logical<4>
-    hlfir.yield_element %11 : !fir.logical<4>
-  }
-  %7 = hlfir.minloc %0#0 mask %6 {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?xf32>>, !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi32>
-  hlfir.assign %7 to %1#0 : !hlfir.expr<1xi32>, !fir.box<!fir.array<?xi32>>
-  hlfir.destroy %7 : !hlfir.expr<1xi32>
-  hlfir.destroy %6 : !hlfir.expr<?x!fir.logical<4>>
-  return
-}
-// CHECK-LABEL: _QPtest_float
-// CHECK:        %cst = arith.constant 0x7F800000 : f32
-// CHECK:        %[[V11:.*]] = fir.do_loop %arg3 = %c0 to %[[V10:.*]] step %c1 iter_args(%arg4 = %cst) -> (f32) {
-// CHECK-NEXT:     %[[V14:.*]] = arith.addi %arg3, %c1 : index
-// CHECK-NEXT:     %[[V15:.*]] = hlfir.designate %[[V1:.*]]#0 (%[[V14]])  : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
-// CHECK-NEXT:     %[[V16:.*]] = fir.load %[[V15]] : !fir.ref<f32>
-// CHECK-NEXT:     %[[V17:.*]] = arith.cmpf oge, %[[V16]], %[[V4:.*]] : f32
-// CHECK-NEXT:     %[[V18:.*]] = fir.if %[[V17]] -> (f32) {
-// CHECK-NEXT:       %[[ISFIRST:.*]] = fir.load %[[V0:.*]] : !fir.ref<i32>
-// CHECK-NEXT:       %[[V19:.*]] = hlfir.designate %[[V1]]#0 (%[[V14]]) : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
-// CHECK-NEXT:       %[[V20:.*]] = fir.load %[[V19]] : !fir.ref<f32>
-// CHECK-NEXT:       %[[NEW_MIN:.*]] = arith.cmpf olt, %[[V20]], %arg4 fastmath<contract> : f32
-// CHECK-NEXT:       %[[CONDRED:.*]] = arith.cmpf une, %arg4, %arg4 fastmath<contract> : f32
-// CHECK-NEXT:       %[[CONDELEM:.*]] = arith.cmpf oeq, %[[V20]], %[[V20]] fastmath<contract> : f32
-// CHECK-NEXT:       %[[ANDCOND:.*]] = arith.andi %[[CONDRED]], %[[CONDELEM]] : i1
-// CHECK-NEXT:       %[[NEW_MIN2:.*]] = arith.ori %[[NEW_MIN]], %[[ANDCOND]] : i1
-// CHECK-NEXT:       %[[ISFIRSTL:.*]] = fir.convert %[[ISFIRST]] : (i32) -> i1
-// CHECK-NEXT:       %[[ISFIRSTNOT:.*]] = arith.xori %[[ISFIRSTL]], %true : i1
-// CHECK-NEXT:       %[[ORCOND:.*]] = arith.ori %[[NEW_MIN2]], %[[ISFIRSTNOT]] : i1
-// CHECK-NEXT:       %[[V22:.*]] = fir.if %[[ORCOND]] -> (f32) {
-// CHECK-NEXT:         fir.store %c1_i32 to %[[V0]] : !fir.ref<i32>
-// CHECK-NEXT:         %[[V23:.*]] = hlfir.designate %{{.}} (%c1) : (!fir.ref<!fir.array<1xi32>>, index) -> !fir.ref<i32>
-// CHECK-NEXT:         %[[V24:.*]] = fir.convert %[[V14]] : (index) -> i32
-// CHECK-NEXT:         fir.store %[[V24]] to %[[V23]] : !fir.ref<i32>
-// CHECK-NEXT:         fir.result %[[V20]] : f32
-// CHECK-NEXT:       } else {
-// CHECK-NEXT:         fir.result %arg4 : f32
-// CHECK-NEXT:       }
-// CHECK-NEXT:       fir.result %[[V22]] : f32
-// CHECK-NEXT:     } else {
-// CHECK-NEXT:       fir.result %arg4 : f32
-// CHECK-NEXT:     }
-// CHECK-NEXT:     fir.result %[[V18]] : f32
-// CHECK-NEXT:   }
-
-
-func.func @_QPtest_assignshape(%arg0: !fir.ref<!fir.array<3x3xf32>> {fir.bindc_name = "array"}, %arg1: !fir.ref<f32> {fir.bindc_name = "val"}, %arg2: !fir.ref<!fir.array<3xi32>> {fir.bindc_name = "m"}) {
-  %c2 = arith.constant 2 : index
-  %c1 = arith.constant 1 : index
-  %c3 = arith.constant 3 : index
-  %0 = fir.shape %c3, %c3 : (index, index) -> !fir.shape<2>
-  %1:2 = hlfir.declare %arg0(%0) {uniq_name = "_QFtestEarray"} : (!fir.ref<!fir.array<3x3xf32>>, !fir.shape<2>) -> (!fir.ref<!fir.array<3x3xf32>>, !fir.ref<!fir.array<3x3xf32>>)
-  %2 = fir.shape %c3 : (index) -> !fir.shape<1>
-  %3:2 = hlfir.declare %arg2(%2) {uniq_name = "_QFtestEm"} : (!fir.ref<!fir.array<3xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<3xi32>>, !fir.ref<!fir.array<3xi32>>)
-  %4:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEval"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-  %5 = fir.load %4#0 : !fir.ref<f32>
-  %6 = hlfir.elemental %0 unordered : (!fir.shape<2>) -> !hlfir.expr<3x3x!fir.logical<4>> {
-  ^bb0(%arg3: index, %arg4: index):
-    %10 = hlfir.designate %1#0 (%arg3, %arg4)  : (!fir.ref<!fir.array<3x3xf32>>, index, index) -> !fir.ref<f32>
-    %11 = fir.load %10 : !fir.ref<f32>
-    %12 = arith.cmpf oge, %11, %5 : f32
-    %13 = fir.convert %12 : (i1) -> !fir.logical<4>
-    hlfir.yield_element %13 : !fir.logical<4>
-  }
-  %7 = hlfir.minloc %1#0 mask %6 {fastmath = #arith.fastmath<contract>} : (!fir.ref<!fir.array<3x3xf32>>, !hlfir.expr<3x3x!fir.logical<4>>) -> !hlfir.expr<2xi32>
-  %8 = fir.shape %c2 : (index) -> !fir.shape<1>
-  %9 = hlfir.designate %3#0 (%c1:%c2:%c1)  shape %8 : (!fir.ref<!fir.array<3xi32>>, index, index, index, !fir.shape<1>) -> !fir.ref<!fir.array<2xi32>>
-  hlfir.assign %7 to %9 : !hlfir.expr<2xi32>, !fir.ref<!fir.array<2xi32>>
-  hlfir.destroy %7 : !hlfir.expr<2xi32>
-  hlfir.destroy %6 : !hlfir.expr<3x3x!fir.logical<4>>
-  return
-}
-// Not supported as the input is not a box
-// CHECK-LABEL: _QPtest_assignshape
-// CHECK: hlfir.minloc
-
-
-func.func @_QFPtest_character(%arg0: !fir.box<!fir.array<?x!fir.char<1>>> {fir.bindc_name = "b"}, %arg1: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "c"}, %arg2: !fir.ref<i32> {fir.bindc_name = "val"}) -> i32 {
-  %c0 = arith.constant 0 : index
-  %c1 = arith.constant 1 : index
-  %0:2 = hlfir.declare %arg0 typeparams %c1 {uniq_name = "_QFFtestEb"} : (!fir.box<!fir.array<?x!fir.char<1>>>, index) -> (!fir.box<!fir.array<?x!fir.char<1>>>, !fir.box<!fir.array<?x!fir.char<1>>>)
-  %1:2 = hlfir.declare %arg1 {uniq_name = "_QFFtestEc"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-  %2 = fir.alloca !fir.array<1xi32> {bindc_name = "m", uniq_name = "_QFFtestEm"}
-  %3 = fir.shape %c1 : (index) -> !fir.shape<1>
-  %4:2 = hlfir.declare %2(%3) {uniq_name = "_QFFtestEm"} : (!fir.ref<!fir.array<1xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<1xi32>>, !fir.ref<!fir.array<1xi32>>)
-  %5 = fir.alloca i32 {bindc_name = "test", uniq_name = "_QFFtestEtest"}
-  %6:2 = hlfir.declare %5 {uniq_name = "_QFFtestEtest"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %7:2 = hlfir.declare %arg2 {uniq_name = "_QFFtestEval"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %8 = fir.load %7#0 : !fir.ref<i32>
-  %9:3 = fir.box_dims %1#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
-  %10 = fir.shape %9#1 : (index) -> !fir.shape<1>
-  %11 = hlfir.elemental %10 unordered : (!fir.shape<1>) -> !hlfir.expr<?x!fir.logical<4>> {
-  ^bb0(%arg3: index):
-    %16 = hlfir.designate %1#0 (%arg3)  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-    %17 = fir.load %16 : !fir.ref<i32>
-    %18 = arith.cmpi eq, %17, %8 : i32
-    %19 = fir.convert %18 : (i1) -> !fir.logical<4>
-    hlfir.yield_element %19 : !fir.logical<4>
-  }
-  %12 = hlfir.minloc %0#0 mask %11 {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?x!fir.char<1>>>, !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi32>
-  hlfir.assign %12 to %4#0 : !hlfir.expr<1xi32>, !fir.ref<!fir.array<1xi32>>
-  hlfir.destroy %12 : !hlfir.expr<1xi32>
-  hlfir.destroy %11 : !hlfir.expr<?x!fir.logical<4>>
-  %13 = hlfir.designate %4#0 (%c1)  : (!fir.ref<!fir.array<1xi32>>, index) -> !fir.ref<i32>
-  %14 = fir.load %13 : !fir.ref<i32>
-  hlfir.assign %14 to %6#0 : i32, !fir.ref<i32>
-  %15 = fir.load %6#1 : !fir.ref<i32>
-  return %15 : i32
-}
-// Characters are not supported at the moment
-// CHECK-LABEL: _QFPtest_character
-// CHECK: hlfir.minloc
-
-
-func.func @_QPtest_parts(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "x"}, %arg1: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "mask"}) -> f32 {
-  %c1 = arith.constant 1 : index
-  %c5 = arith.constant 5 : index
-  %c0 = arith.constant 0 : index
-  %c5_i32 = arith.constant 5 : i32
-  %0:2 = hlfir.declare %arg1 {uniq_name = "_QFtestEmask"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-  %1 = fir.alloca f32 {bindc_name = "test", uniq_name = "_QFtestEtest"}
-  %2:2 = hlfir.declare %1 {uniq_name = "_QFtestEtest"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-  %3:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEx"} : (!fir.box<!fir.array<?xi32>>) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-  %4:3 = fir.box_dims %0#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
-  %5 = fir.shape %4#1 : (index) -> !fir.shape<1>
-  %6 = hlfir.elemental %5 unordered : (!fir.shape<1>) -> !hlfir.expr<?x!fir.logical<4>> {
-  ^bb0(%arg2: index):
-    %11 = hlfir.designate %0#0 (%arg2)  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-    %12 = fir.load %11 : !fir.ref<i32>
-    %13 = arith.cmpi sge, %12, %c5_i32 : i32
-    %14 = fir.convert %13 : (i1) -> !fir.logical<4>
-    hlfir.yield_element %14 : !fir.logical<4>
-  }
-  %7 = hlfir.minloc %3#0 mask %6 {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?xi32>>, !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi32>
-  %8 = fir.shape %c1 : (index) -> !fir.shape<1>
-  %9 = hlfir.designate %3#0 (%c5:%c5:%c1)  shape %8 : (!fir.box<!fir.array<?xi32>>, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<1xi32>>
-  hlfir.assign %7 to %9 : !hlfir.expr<1xi32>, !fir.box<!fir.array<1xi32>>
-  hlfir.destroy %7 : !hlfir.expr<1xi32>
-  hlfir.destroy %6 : !hlfir.expr<?x!fir.logical<4>>
-  %10 = fir.load %2#1 : !fir.ref<f32>
-  return %10 : f32
-}
-// Characters are not supported at the moment
-// CHECK-LABEL: _QPtest_parts
-// CHECK: fir.do_loop %{{.*}} = %c0 to %{{.*}} step %c1 iter_args(%{{.*}} = %c2147483647_i32) -> (i32) {
-
diff --git a/flang/test/HLFIR/minval-elemental.fir b/flang/test/HLFIR/minval-elemental.fir
deleted file mode 100644
index 64cd5403ec558..0000000000000
--- a/flang/test/HLFIR/minval-elemental.fir
+++ /dev/null
@@ -1,95 +0,0 @@
-// Test maxval inlining for both elemental and designate
-// RUN: fir-opt %s -opt-bufferization | FileCheck %s
-
-// subroutine test(array)
-//   integer :: array(:), x
-//   x = minval(abs(array))
-// end subroutine test
-
-func.func @_QPtest(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}) {
-  %c31_i32 = arith.constant 31 : i32
-  %c0 = arith.constant 0 : index
-  %0 = fir.dummy_scope : !fir.dscope
-  %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xi32>>, !fir.dscope) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-  %2 = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFtestEx"}
-  %3:2 = hlfir.declare %2 {uniq_name = "_QFtestEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-  %4:3 = fir.box_dims %1#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
-  %5 = fir.shape %4#1 : (index) -> !fir.shape<1>
-  %6 = hlfir.elemental %5 unordered : (!fir.shape<1>) -> !hlfir.expr<?xi32> {
-  ^bb0(%arg1: index):
-    %8 = hlfir.designate %1#0 (%arg1)  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-    %9 = fir.load %8 : !fir.ref<i32>
-    %10 = arith.shrsi %9, %c31_i32 : i32
-    %11 = arith.xori %9, %10 : i32
-    %12 = arith.subi %11, %10 : i32
-    hlfir.yield_element %12 : i32
-  }
-  %7 = hlfir.minval %6 {fastmath = #arith.fastmath<contract>} : (!hlfir.expr<?xi32>) -> i32
-  hlfir.assign %7 to %3#0 : i32, !fir.ref<i32>
-  hlfir.destroy %6 : !hlfir.expr<?xi32>
-  return
-}
-
-// CHECK-LABEL: func.func @_QPtest(%arg0: !fir.box<!fir.array<?xi32>> {fir.bindc_name = "array"}) {
-// CHECK-NEXT:    %c1 = arith.constant 1 : index
-// CHECK-NEXT:    %c2147483647_i32 = arith.constant 2147483647 : i32
-// CHECK-NEXT:    %c31_i32 = arith.constant 31 : i32
-// CHECK-NEXT:    %c0 = arith.constant 0 : index
-// CHECK-NEXT:    %[[V0:.*]] = fir.dummy_scope : !fir.dscope
-// CHECK-NEXT:    %[[V1:.*]]:2 = hlfir.declare %arg0 dummy_scope %[[V0]] {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xi32>>, !fir.dscope) -> (!fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>)
-// CHECK-NEXT:    %[[V2:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFtestEx"}
-// CHECK-NEXT:    %[[V3:.*]]:2 = hlfir.declare %[[V2]] {uniq_name = "_QFtestEx"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-// CHECK-NEXT:    %[[V4:.*]]:3 = fir.box_dims %[[V1]]#0, %c0 : (!fir.box<!fir.array<?xi32>>, index) -> (index, index, index)
-// CHECK-NEXT:    %[[V5:.*]] = fir.do_loop %arg1 = %c1 to %[[V4]]#1 step %c1 iter_args(%arg2 = %c2147483647_i32) -> (i32) {
-// CHECK-NEXT:      %[[V6:.*]] = hlfir.designate %[[V1]]#0 (%arg1)  : (!fir.box<!fir.array<?xi32>>, index) -> !fir.ref<i32>
-// CHECK-NEXT:      %[[V7:.*]] = fir.load %[[V6]] : !fir.ref<i32>
-// CHECK-NEXT:      %[[V8:.*]] = arith.shrsi %[[V7]], %c31_i32 : i32
-// CHECK-NEXT:      %[[V9:.*]] = arith.xori %[[V7]], %[[V8]] : i32
-// CHECK-NEXT:      %[[V10:.*]] = arith.subi %[[V9]], %[[V8]] : i32
-// CHECK-NEXT:      %[[V11:.*]] = arith.cmpi slt, %[[V10]], %arg2 : i32
-// CHECK-NEXT:      %[[V12:.*]] = arith.select %[[V11]], %[[V10]], %arg2 : i32
-// CHECK-NEXT:      fir.result %[[V12]] : i32
-// CHECK-NEXT:    }
-// CHECK-NEXT:    hlfir.assign %[[V5]] to %[[V3]]#0 : i32, !fir.ref<i32>
-// CHECK-NEXT:    return
-// CHECK-NEXT:  }
-
-// subroutine test(array)
-//   real :: array(:), x
-//   x = minval(array(3:6))
-// end subroutine test
-
-func.func @_QPtest_float(%arg0: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "array"}) {
-  %c4 = arith.constant 4 : index
-  %c1 = arith.constant 1 : index
-  %c6 = arith.constant 6 : index
-  %c3 = arith.constant 3 : index
-  %0 = fir.dummy_scope : !fir.dscope
-  %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFtestEarray"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>)
-  %2 = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFtestEx"}
-  %3:2 = hlfir.declare %2 {uniq_name = "_QFtestEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
-  %4 = fir.shape %c4 : (index) -> !fir.shape<1>
-  %5 = hlfir.designate %1#0 (%c3:%c6:%c1)  shape %4 : (!fir.box<!fir.array<?xf32>>, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<4xf32>>
-  %6 = hlfir.minval %5 {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<4xf32>>) -> f32
-  hlfir.assign %6 to %3#0 : f32, !fir.ref<f32>
-  return
-}
-
-// CHECK-LABEL: _QPtest_float
-// CHECK:       %cst = arith.constant 0x7F800000 : f32
-// CHECK:       %[[V4:.*]] = fir.shape %c4 : (index) -> !fir.shape<1>
-// CHECK-NEXT:  %[[V5:.*]] = hlfir.designate %{{.*}} (%c3:%c6:%c1)  shape %[[V4]] : (!fir.box<!fir.array<?xf32>>, index, index, index, !fir.shape<1>) -> !fir.box<!fir.array<4xf32>>
-// CHECK-NEXT:  %[[V6:.*]] = fir.do_loop %arg1 = %c1 to %c4 step %c1 iter_args(%arg2 = %cst) -> (f32) {
-// CHECK-NEXT:      %[[V7:.*]] = hlfir.designate %[[V5]] (%arg1)  : (!fir.box<!fir.array<4xf32>>, index) -> !fir.ref<f32>
-// CHECK-NEXT:      %[[V8:.*]] = fir.load %[[V7]] : !fir.ref<f32>
-// CHECK-NEXT:      %[[V9:.*]] = arith.cmpf olt, %[[V8]], %arg2 fastmath<contract> : f32
-// CHECK-NEXT:      %[[V10:.*]] = arith.cmpf une, %arg2, %arg2 fastmath<contract> : f32
-// CHECK-NEXT:      %[[V11:.*]] = arith.cmpf oeq, %[[V8]], %[[V8]] fastmath<contract> : f32
-// CHECK-NEXT:      %[[V12:.*]] = arith.andi %[[V10]], %[[V11]] : i1
-// CHECK-NEXT:      %[[V13:.*]] = arith.ori %[[V9]], %[[V12]] : i1
-// CHECK-NEXT:      %[[V14:.*]] = arith.select %[[V13]], %[[V8]], %arg2 : f32
-// CHECK-NEXT:      fir.result %[[V14]] : f32
-// CHECK-NEXT:    }
-// CHECK-NEXT:    hlfir.assign %[[V6]] to %3#0 : f32, !fir.ref<f32>
-// CHECK-NEXT:    return
-// CHECK-NEXT:  }
diff --git a/flang/test/HLFIR/simplify-hlfir-intrinsics-all.fir b/flang/test/HLFIR/simplify-hlfir-intrinsics-all.fir
new file mode 100644
index 0000000000000..fb0f39811ea74
--- /dev/null
+++ b/flang/test/HLFIR/simplify-hlfir-intrinsics-all.fir
@@ -0,0 +1,123 @@
+// RUN: fir-opt %s --simplify-hlfir-intrinsics | FileCheck %s
+
+func.func @test_total_expr(%arg0: !hlfir.expr<?x?x!fir.logical<4>>) -> !fir.logical<4> {
+  %0 = hlfir.all %arg0 : (!hlfir.expr<?x?x!fir.logical<4>>) -> !fir.logical<4>
+  return %0 : !fir.logical<4>
+}
+// CHECK-LABEL:   func.func @test_total_expr(
+// CHECK-SAME:                               %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x?x!fir.logical<4>>) -> !fir.logical<4> {
+// CHECK:           %[[VAL_1:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_2:.*]] = arith.constant true
+// CHECK:           %[[VAL_3:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?x?x!fir.logical<4>>) -> !fir.shape<2>
+// CHECK:           %[[VAL_4:.*]] = hlfir.get_extent %[[VAL_3]] {dim = 0 : index} : (!fir.shape<2>) -> index
+// CHECK:           %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_3]] {dim = 1 : index} : (!fir.shape<2>) -> index
+// CHECK:           %[[VAL_6:.*]] = fir.do_loop %[[VAL_7:.*]] = %[[VAL_1]] to %[[VAL_5]] step %[[VAL_1]] unordered iter_args(%[[VAL_8:.*]] = %[[VAL_2]]) -> (i1) {
+// CHECK:             %[[VAL_9:.*]] = fir.do_loop %[[VAL_10:.*]] = %[[VAL_1]] to %[[VAL_4]] step %[[VAL_1]] unordered iter_args(%[[VAL_11:.*]] = %[[VAL_8]]) -> (i1) {
+// CHECK:               %[[VAL_12:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_10]], %[[VAL_7]] : (!hlfir.expr<?x?x!fir.logical<4>>, index, index) -> !fir.logical<4>
+// CHECK:               %[[VAL_13:.*]] = fir.convert %[[VAL_12]] : (!fir.logical<4>) -> i1
+// CHECK:               %[[VAL_14:.*]] = arith.andi %[[VAL_13]], %[[VAL_11]] : i1
+// CHECK:               fir.result %[[VAL_14]] : i1
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_9]] : i1
+// CHECK:           }
+// CHECK:           %[[VAL_15:.*]] = fir.convert %[[VAL_6]] : (i1) -> !fir.logical<4>
+// CHECK:           return %[[VAL_15]] : !fir.logical<4>
+// CHECK:         }
+
+func.func @test_partial_expr(%arg0: !hlfir.expr<?x?x?x!fir.logical<1>>) -> !hlfir.expr<?x?x!fir.logical<1>> {
+  %dim = arith.constant 2 : i32
+  %0 = hlfir.all %arg0 dim %dim : (!hlfir.expr<?x?x?x!fir.logical<1>>, i32) -> !hlfir.expr<?x?x!fir.logical<1>>
+  return %0 : !hlfir.expr<?x?x!fir.logical<1>>
+}
+// CHECK-LABEL:   func.func @test_partial_expr(
+// CHECK-SAME:                                 %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x?x?x!fir.logical<1>>) -> !hlfir.expr<?x?x!fir.logical<1>> {
+// CHECK:           %[[VAL_1:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_2:.*]] = arith.constant true
+// CHECK:           %[[VAL_3:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?x?x?x!fir.logical<1>>) -> !fir.shape<3>
+// CHECK:           %[[VAL_4:.*]] = hlfir.get_extent %[[VAL_3]] {dim = 0 : index} : (!fir.shape<3>) -> index
+// CHECK:           %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_3]] {dim = 1 : index} : (!fir.shape<3>) -> index
+// CHECK:           %[[VAL_6:.*]] = hlfir.get_extent %[[VAL_3]] {dim = 2 : index} : (!fir.shape<3>) -> index
+// CHECK:           %[[VAL_7:.*]] = fir.shape %[[VAL_4]], %[[VAL_6]] : (index, index) -> !fir.shape<2>
+// CHECK:           %[[VAL_8:.*]] = hlfir.elemental %[[VAL_7]] unordered : (!fir.shape<2>) -> !hlfir.expr<?x?x!fir.logical<1>> {
+// CHECK:           ^bb0(%[[VAL_9:.*]]: index, %[[VAL_10:.*]]: index):
+// CHECK:             %[[VAL_11:.*]] = fir.do_loop %[[VAL_12:.*]] = %[[VAL_1]] to %[[VAL_5]] step %[[VAL_1]] unordered iter_args(%[[VAL_13:.*]] = %[[VAL_2]]) -> (i1) {
+// CHECK:               %[[VAL_14:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_9]], %[[VAL_12]], %[[VAL_10]] : (!hlfir.expr<?x?x?x!fir.logical<1>>, index, index, index) -> !fir.logical<1>
+// CHECK:               %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (!fir.logical<1>) -> i1
+// CHECK:               %[[VAL_16:.*]] = arith.andi %[[VAL_15]], %[[VAL_13]] : i1
+// CHECK:               fir.result %[[VAL_16]] : i1
+// CHECK:             }
+// CHECK:             %[[VAL_17:.*]] = fir.convert %[[VAL_11]] : (i1) -> !fir.logical<1>
+// CHECK:             hlfir.yield_element %[[VAL_17]] : !fir.logical<1>
+// CHECK:           }
+// CHECK:           return %[[VAL_8]] : !hlfir.expr<?x?x!fir.logical<1>>
+// CHECK:         }
+
+func.func @test_total_var(%arg0: !fir.box<!fir.array<?x?x!fir.logical<4>>>) -> !fir.logical<4> {
+  %0 = hlfir.all %arg0 : (!fir.box<!fir.array<?x?x!fir.logical<4>>>) -> !fir.logical<4>
+  return %0 : !fir.logical<4>
+}
+// CHECK-LABEL:   func.func @test_total_var(
+// CHECK-SAME:                              %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box<!fir.array<?x?x!fir.logical<4>>>) -> !fir.logical<4> {
+// CHECK:           %[[VAL_1:.*]] = arith.constant true
+// CHECK:           %[[VAL_2:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_4:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?x!fir.logical<4>>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_5:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_2]] : (!fir.box<!fir.array<?x?x!fir.logical<4>>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_6:.*]] = fir.do_loop %[[VAL_7:.*]] = %[[VAL_2]] to %[[VAL_5]]#1 step %[[VAL_2]] unordered iter_args(%[[VAL_8:.*]] = %[[VAL_1]]) -> (i1) {
+// CHECK:             %[[VAL_9:.*]] = fir.do_loop %[[VAL_10:.*]] = %[[VAL_2]] to %[[VAL_4]]#1 step %[[VAL_2]] unordered iter_args(%[[VAL_11:.*]] = %[[VAL_8]]) -> (i1) {
+// CHECK:               %[[VAL_12:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?x!fir.logical<4>>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_13:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_2]] : (!fir.box<!fir.array<?x?x!fir.logical<4>>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_14:.*]] = arith.subi %[[VAL_12]]#0, %[[VAL_2]] : index
+// CHECK:               %[[VAL_15:.*]] = arith.addi %[[VAL_10]], %[[VAL_14]] : index
+// CHECK:               %[[VAL_16:.*]] = arith.subi %[[VAL_13]]#0, %[[VAL_2]] : index
+// CHECK:               %[[VAL_17:.*]] = arith.addi %[[VAL_7]], %[[VAL_16]] : index
+// CHECK:               %[[VAL_18:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_15]], %[[VAL_17]])  : (!fir.box<!fir.array<?x?x!fir.logical<4>>>, index, index) -> !fir.ref<!fir.logical<4>>
+// CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_18]] : !fir.ref<!fir.logical<4>>
+// CHECK:               %[[VAL_20:.*]] = fir.convert %[[VAL_19]] : (!fir.logical<4>) -> i1
+// CHECK:               %[[VAL_21:.*]] = arith.andi %[[VAL_20]], %[[VAL_11]] : i1
+// CHECK:               fir.result %[[VAL_21]] : i1
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_9]] : i1
+// CHECK:           }
+// CHECK:           %[[VAL_22:.*]] = fir.convert %[[VAL_6]] : (i1) -> !fir.logical<4>
+// CHECK:           return %[[VAL_22]] : !fir.logical<4>
+// CHECK:         }
+
+func.func @test_partial_var(%arg0: !fir.box<!fir.array<?x?x?x!fir.logical<2>>>) -> !hlfir.expr<?x?x!fir.logical<2>> {
+  %dim = arith.constant 2 : i32
+  %0 = hlfir.all %arg0 dim %dim : (!fir.box<!fir.array<?x?x?x!fir.logical<2>>>, i32) -> !hlfir.expr<?x?x!fir.logical<2>>
+  return %0 : !hlfir.expr<?x?x!fir.logical<2>>
+}
+// CHECK-LABEL:   func.func @test_partial_var(
+// CHECK-SAME:                                %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box<!fir.array<?x?x?x!fir.logical<2>>>) -> !hlfir.expr<?x?x!fir.logical<2>> {
+// CHECK:           %[[VAL_1:.*]] = arith.constant true
+// CHECK:           %[[VAL_2:.*]] = arith.constant 2 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_4:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_5:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box<!fir.array<?x?x?x!fir.logical<2>>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_6:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?x?x!fir.logical<2>>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_7:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_2]] : (!fir.box<!fir.array<?x?x?x!fir.logical<2>>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_8:.*]] = fir.shape %[[VAL_5]]#1, %[[VAL_7]]#1 : (index, index) -> !fir.shape<2>
+// CHECK:           %[[VAL_9:.*]] = hlfir.elemental %[[VAL_8]] unordered : (!fir.shape<2>) -> !hlfir.expr<?x?x!fir.logical<2>> {
+// CHECK:           ^bb0(%[[VAL_10:.*]]: index, %[[VAL_11:.*]]: index):
+// CHECK:             %[[VAL_12:.*]] = fir.do_loop %[[VAL_13:.*]] = %[[VAL_3]] to %[[VAL_6]]#1 step %[[VAL_3]] unordered iter_args(%[[VAL_14:.*]] = %[[VAL_1]]) -> (i1) {
+// CHECK:               %[[VAL_15:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box<!fir.array<?x?x?x!fir.logical<2>>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_16:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?x?x!fir.logical<2>>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_17:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_2]] : (!fir.box<!fir.array<?x?x?x!fir.logical<2>>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_18:.*]] = arith.subi %[[VAL_15]]#0, %[[VAL_3]] : index
+// CHECK:               %[[VAL_19:.*]] = arith.addi %[[VAL_10]], %[[VAL_18]] : index
+// CHECK:               %[[VAL_20:.*]] = arith.subi %[[VAL_16]]#0, %[[VAL_3]] : index
+// CHECK:               %[[VAL_21:.*]] = arith.addi %[[VAL_13]], %[[VAL_20]] : index
+// CHECK:               %[[VAL_22:.*]] = arith.subi %[[VAL_17]]#0, %[[VAL_3]] : index
+// CHECK:               %[[VAL_23:.*]] = arith.addi %[[VAL_11]], %[[VAL_22]] : index
+// CHECK:               %[[VAL_24:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_19]], %[[VAL_21]], %[[VAL_23]])  : (!fir.box<!fir.array<?x?x?x!fir.logical<2>>>, index, index, index) -> !fir.ref<!fir.logical<2>>
+// CHECK:               %[[VAL_25:.*]] = fir.load %[[VAL_24]] : !fir.ref<!fir.logical<2>>
+// CHECK:               %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (!fir.logical<2>) -> i1
+// CHECK:               %[[VAL_27:.*]] = arith.andi %[[VAL_26]], %[[VAL_14]] : i1
+// CHECK:               fir.result %[[VAL_27]] : i1
+// CHECK:             }
+// CHECK:             %[[VAL_28:.*]] = fir.convert %[[VAL_12]] : (i1) -> !fir.logical<2>
+// CHECK:             hlfir.yield_element %[[VAL_28]] : !fir.logical<2>
+// CHECK:           }
+// CHECK:           return %[[VAL_9]] : !hlfir.expr<?x?x!fir.logical<2>>
+// CHECK:         }
diff --git a/flang/test/HLFIR/simplify-hlfir-intrinsics-any.fir b/flang/test/HLFIR/simplify-hlfir-intrinsics-any.fir
new file mode 100644
index 0000000000000..5bd76f3d24927
--- /dev/null
+++ b/flang/test/HLFIR/simplify-hlfir-intrinsics-any.fir
@@ -0,0 +1,123 @@
+// RUN: fir-opt %s --simplify-hlfir-intrinsics | FileCheck %s
+
+func.func @test_total_expr(%arg0: !hlfir.expr<?x?x!fir.logical<4>>) -> !fir.logical<4> {
+  %0 = hlfir.any %arg0 : (!hlfir.expr<?x?x!fir.logical<4>>) -> !fir.logical<4>
+  return %0 : !fir.logical<4>
+}
+// CHECK-LABEL:   func.func @test_total_expr(
+// CHECK-SAME:                               %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x?x!fir.logical<4>>) -> !fir.logical<4> {
+// CHECK:           %[[VAL_1:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_2:.*]] = arith.constant false
+// CHECK:           %[[VAL_3:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?x?x!fir.logical<4>>) -> !fir.shape<2>
+// CHECK:           %[[VAL_4:.*]] = hlfir.get_extent %[[VAL_3]] {dim = 0 : index} : (!fir.shape<2>) -> index
+// CHECK:           %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_3]] {dim = 1 : index} : (!fir.shape<2>) -> index
+// CHECK:           %[[VAL_6:.*]] = fir.do_loop %[[VAL_7:.*]] = %[[VAL_1]] to %[[VAL_5]] step %[[VAL_1]] unordered iter_args(%[[VAL_8:.*]] = %[[VAL_2]]) -> (i1) {
+// CHECK:             %[[VAL_9:.*]] = fir.do_loop %[[VAL_10:.*]] = %[[VAL_1]] to %[[VAL_4]] step %[[VAL_1]] unordered iter_args(%[[VAL_11:.*]] = %[[VAL_8]]) -> (i1) {
+// CHECK:               %[[VAL_12:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_10]], %[[VAL_7]] : (!hlfir.expr<?x?x!fir.logical<4>>, index, index) -> !fir.logical<4>
+// CHECK:               %[[VAL_13:.*]] = fir.convert %[[VAL_12]] : (!fir.logical<4>) -> i1
+// CHECK:               %[[VAL_14:.*]] = arith.ori %[[VAL_13]], %[[VAL_11]] : i1
+// CHECK:               fir.result %[[VAL_14]] : i1
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_9]] : i1
+// CHECK:           }
+// CHECK:           %[[VAL_15:.*]] = fir.convert %[[VAL_6]] : (i1) -> !fir.logical<4>
+// CHECK:           return %[[VAL_15]] : !fir.logical<4>
+// CHECK:         }
+
+func.func @test_partial_expr(%arg0: !hlfir.expr<?x?x?x!fir.logical<1>>) -> !hlfir.expr<?x?x!fir.logical<1>> {
+  %dim = arith.constant 2 : i32
+  %0 = hlfir.any %arg0 dim %dim : (!hlfir.expr<?x?x?x!fir.logical<1>>, i32) -> !hlfir.expr<?x?x!fir.logical<1>>
+  return %0 : !hlfir.expr<?x?x!fir.logical<1>>
+}
+// CHECK-LABEL:   func.func @test_partial_expr(
+// CHECK-SAME:                                 %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x?x?x!fir.logical<1>>) -> !hlfir.expr<?x?x!fir.logical<1>> {
+// CHECK:           %[[VAL_1:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_2:.*]] = arith.constant false
+// CHECK:           %[[VAL_3:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?x?x?x!fir.logical<1>>) -> !fir.shape<3>
+// CHECK:           %[[VAL_4:.*]] = hlfir.get_extent %[[VAL_3]] {dim = 0 : index} : (!fir.shape<3>) -> index
+// CHECK:           %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_3]] {dim = 1 : index} : (!fir.shape<3>) -> index
+// CHECK:           %[[VAL_6:.*]] = hlfir.get_extent %[[VAL_3]] {dim = 2 : index} : (!fir.shape<3>) -> index
+// CHECK:           %[[VAL_7:.*]] = fir.shape %[[VAL_4]], %[[VAL_6]] : (index, index) -> !fir.shape<2>
+// CHECK:           %[[VAL_8:.*]] = hlfir.elemental %[[VAL_7]] unordered : (!fir.shape<2>) -> !hlfir.expr<?x?x!fir.logical<1>> {
+// CHECK:           ^bb0(%[[VAL_9:.*]]: index, %[[VAL_10:.*]]: index):
+// CHECK:             %[[VAL_11:.*]] = fir.do_loop %[[VAL_12:.*]] = %[[VAL_1]] to %[[VAL_5]] step %[[VAL_1]] unordered iter_args(%[[VAL_13:.*]] = %[[VAL_2]]) -> (i1) {
+// CHECK:               %[[VAL_14:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_9]], %[[VAL_12]], %[[VAL_10]] : (!hlfir.expr<?x?x?x!fir.logical<1>>, index, index, index) -> !fir.logical<1>
+// CHECK:               %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (!fir.logical<1>) -> i1
+// CHECK:               %[[VAL_16:.*]] = arith.ori %[[VAL_15]], %[[VAL_13]] : i1
+// CHECK:               fir.result %[[VAL_16]] : i1
+// CHECK:             }
+// CHECK:             %[[VAL_17:.*]] = fir.convert %[[VAL_11]] : (i1) -> !fir.logical<1>
+// CHECK:             hlfir.yield_element %[[VAL_17]] : !fir.logical<1>
+// CHECK:           }
+// CHECK:           return %[[VAL_8]] : !hlfir.expr<?x?x!fir.logical<1>>
+// CHECK:         }
+
+func.func @test_total_var(%arg0: !fir.box<!fir.array<?x?x!fir.logical<4>>>) -> !fir.logical<4> {
+  %0 = hlfir.any %arg0 : (!fir.box<!fir.array<?x?x!fir.logical<4>>>) -> !fir.logical<4>
+  return %0 : !fir.logical<4>
+}
+// CHECK-LABEL:   func.func @test_total_var(
+// CHECK-SAME:                              %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box<!fir.array<?x?x!fir.logical<4>>>) -> !fir.logical<4> {
+// CHECK:           %[[VAL_1:.*]] = arith.constant false
+// CHECK:           %[[VAL_2:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_4:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?x!fir.logical<4>>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_5:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_2]] : (!fir.box<!fir.array<?x?x!fir.logical<4>>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_6:.*]] = fir.do_loop %[[VAL_7:.*]] = %[[VAL_2]] to %[[VAL_5]]#1 step %[[VAL_2]] unordered iter_args(%[[VAL_8:.*]] = %[[VAL_1]]) -> (i1) {
+// CHECK:             %[[VAL_9:.*]] = fir.do_loop %[[VAL_10:.*]] = %[[VAL_2]] to %[[VAL_4]]#1 step %[[VAL_2]] unordered iter_args(%[[VAL_11:.*]] = %[[VAL_8]]) -> (i1) {
+// CHECK:               %[[VAL_12:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?x!fir.logical<4>>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_13:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_2]] : (!fir.box<!fir.array<?x?x!fir.logical<4>>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_14:.*]] = arith.subi %[[VAL_12]]#0, %[[VAL_2]] : index
+// CHECK:               %[[VAL_15:.*]] = arith.addi %[[VAL_10]], %[[VAL_14]] : index
+// CHECK:               %[[VAL_16:.*]] = arith.subi %[[VAL_13]]#0, %[[VAL_2]] : index
+// CHECK:               %[[VAL_17:.*]] = arith.addi %[[VAL_7]], %[[VAL_16]] : index
+// CHECK:               %[[VAL_18:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_15]], %[[VAL_17]])  : (!fir.box<!fir.array<?x?x!fir.logical<4>>>, index, index) -> !fir.ref<!fir.logical<4>>
+// CHECK:               %[[VAL_19:.*]] = fir.load %[[VAL_18]] : !fir.ref<!fir.logical<4>>
+// CHECK:               %[[VAL_20:.*]] = fir.convert %[[VAL_19]] : (!fir.logical<4>) -> i1
+// CHECK:               %[[VAL_21:.*]] = arith.ori %[[VAL_20]], %[[VAL_11]] : i1
+// CHECK:               fir.result %[[VAL_21]] : i1
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_9]] : i1
+// CHECK:           }
+// CHECK:           %[[VAL_22:.*]] = fir.convert %[[VAL_6]] : (i1) -> !fir.logical<4>
+// CHECK:           return %[[VAL_22]] : !fir.logical<4>
+// CHECK:         }
+
+func.func @test_partial_var(%arg0: !fir.box<!fir.array<?x?x?x!fir.logical<2>>>) -> !hlfir.expr<?x?x!fir.logical<2>> {
+  %dim = arith.constant 2 : i32
+  %0 = hlfir.any %arg0 dim %dim : (!fir.box<!fir.array<?x?x?x!fir.logical<2>>>, i32) -> !hlfir.expr<?x?x!fir.logical<2>>
+  return %0 : !hlfir.expr<?x?x!fir.logical<2>>
+}
+// CHECK-LABEL:   func.func @test_partial_var(
+// CHECK-SAME:                                %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box<!fir.array<?x?x?x!fir.logical<2>>>) -> !hlfir.expr<?x?x!fir.logical<2>> {
+// CHECK:           %[[VAL_1:.*]] = arith.constant false
+// CHECK:           %[[VAL_2:.*]] = arith.constant 2 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_4:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_5:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box<!fir.array<?x?x?x!fir.logical<2>>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_6:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?x?x!fir.logical<2>>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_7:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_2]] : (!fir.box<!fir.array<?x?x?x!fir.logical<2>>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_8:.*]] = fir.shape %[[VAL_5]]#1, %[[VAL_7]]#1 : (index, index) -> !fir.shape<2>
+// CHECK:           %[[VAL_9:.*]] = hlfir.elemental %[[VAL_8]] unordered : (!fir.shape<2>) -> !hlfir.expr<?x?x!fir.logical<2>> {
+// CHECK:           ^bb0(%[[VAL_10:.*]]: index, %[[VAL_11:.*]]: index):
+// CHECK:             %[[VAL_12:.*]] = fir.do_loop %[[VAL_13:.*]] = %[[VAL_3]] to %[[VAL_6]]#1 step %[[VAL_3]] unordered iter_args(%[[VAL_14:.*]] = %[[VAL_1]]) -> (i1) {
+// CHECK:               %[[VAL_15:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box<!fir.array<?x?x?x!fir.logical<2>>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_16:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?x?x!fir.logical<2>>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_17:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_2]] : (!fir.box<!fir.array<?x?x?x!fir.logical<2>>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_18:.*]] = arith.subi %[[VAL_15]]#0, %[[VAL_3]] : index
+// CHECK:               %[[VAL_19:.*]] = arith.addi %[[VAL_10]], %[[VAL_18]] : index
+// CHECK:               %[[VAL_20:.*]] = arith.subi %[[VAL_16]]#0, %[[VAL_3]] : index
+// CHECK:               %[[VAL_21:.*]] = arith.addi %[[VAL_13]], %[[VAL_20]] : index
+// CHECK:               %[[VAL_22:.*]] = arith.subi %[[VAL_17]]#0, %[[VAL_3]] : index
+// CHECK:               %[[VAL_23:.*]] = arith.addi %[[VAL_11]], %[[VAL_22]] : index
+// CHECK:               %[[VAL_24:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_19]], %[[VAL_21]], %[[VAL_23]])  : (!fir.box<!fir.array<?x?x?x!fir.logical<2>>>, index, index, index) -> !fir.ref<!fir.logical<2>>
+// CHECK:               %[[VAL_25:.*]] = fir.load %[[VAL_24]] : !fir.ref<!fir.logical<2>>
+// CHECK:               %[[VAL_26:.*]] = fir.convert %[[VAL_25]] : (!fir.logical<2>) -> i1
+// CHECK:               %[[VAL_27:.*]] = arith.ori %[[VAL_26]], %[[VAL_14]] : i1
+// CHECK:               fir.result %[[VAL_27]] : i1
+// CHECK:             }
+// CHECK:             %[[VAL_28:.*]] = fir.convert %[[VAL_12]] : (i1) -> !fir.logical<2>
+// CHECK:             hlfir.yield_element %[[VAL_28]] : !fir.logical<2>
+// CHECK:           }
+// CHECK:           return %[[VAL_9]] : !hlfir.expr<?x?x!fir.logical<2>>
+// CHECK:         }
diff --git a/flang/test/HLFIR/simplify-hlfir-intrinsics-count.fir b/flang/test/HLFIR/simplify-hlfir-intrinsics-count.fir
new file mode 100644
index 0000000000000..44594c646a368
--- /dev/null
+++ b/flang/test/HLFIR/simplify-hlfir-intrinsics-count.fir
@@ -0,0 +1,127 @@
+// RUN: fir-opt %s --simplify-hlfir-intrinsics | FileCheck %s
+
+func.func @test_total_expr(%arg0: !hlfir.expr<?x?x!fir.logical<4>>) -> i32 {
+  %0 = hlfir.count %arg0 : (!hlfir.expr<?x?x!fir.logical<4>>) -> i32
+  return %0 : i32
+}
+// CHECK-LABEL:   func.func @test_total_expr(
+// CHECK-SAME:                               %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x?x!fir.logical<4>>) -> i32 {
+// CHECK:           %[[VAL_1:.*]] = arith.constant 1 : i32
+// CHECK:           %[[VAL_2:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant 0 : i32
+// CHECK:           %[[VAL_4:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?x?x!fir.logical<4>>) -> !fir.shape<2>
+// CHECK:           %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 0 : index} : (!fir.shape<2>) -> index
+// CHECK:           %[[VAL_6:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 1 : index} : (!fir.shape<2>) -> index
+// CHECK:           %[[VAL_7:.*]] = fir.do_loop %[[VAL_8:.*]] = %[[VAL_2]] to %[[VAL_6]] step %[[VAL_2]] unordered iter_args(%[[VAL_9:.*]] = %[[VAL_3]]) -> (i32) {
+// CHECK:             %[[VAL_10:.*]] = fir.do_loop %[[VAL_11:.*]] = %[[VAL_2]] to %[[VAL_5]] step %[[VAL_2]] unordered iter_args(%[[VAL_12:.*]] = %[[VAL_9]]) -> (i32) {
+// CHECK:               %[[VAL_13:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_11]], %[[VAL_8]] : (!hlfir.expr<?x?x!fir.logical<4>>, index, index) -> !fir.logical<4>
+// CHECK:               %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (!fir.logical<4>) -> i1
+// CHECK:               %[[VAL_15:.*]] = arith.addi %[[VAL_12]], %[[VAL_1]] : i32
+// CHECK:               %[[VAL_16:.*]] = arith.select %[[VAL_14]], %[[VAL_15]], %[[VAL_12]] : i32
+// CHECK:               fir.result %[[VAL_16]] : i32
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_10]] : i32
+// CHECK:           }
+// CHECK:           return %[[VAL_7]] : i32
+// CHECK:         }
+
+func.func @test_partial_expr(%arg0: !hlfir.expr<?x?x?x!fir.logical<1>>) -> !hlfir.expr<?x?xi16> {
+  %dim = arith.constant 2 : i32
+  %0 = hlfir.count %arg0 dim %dim : (!hlfir.expr<?x?x?x!fir.logical<1>>, i32) -> !hlfir.expr<?x?xi16>
+  return %0 : !hlfir.expr<?x?xi16>
+}
+// CHECK-LABEL:   func.func @test_partial_expr(
+// CHECK-SAME:                                 %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x?x?x!fir.logical<1>>) -> !hlfir.expr<?x?xi16> {
+// CHECK:           %[[VAL_1:.*]] = arith.constant 1 : i16
+// CHECK:           %[[VAL_2:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant 0 : i16
+// CHECK:           %[[VAL_4:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?x?x?x!fir.logical<1>>) -> !fir.shape<3>
+// CHECK:           %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 0 : index} : (!fir.shape<3>) -> index
+// CHECK:           %[[VAL_6:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 1 : index} : (!fir.shape<3>) -> index
+// CHECK:           %[[VAL_7:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 2 : index} : (!fir.shape<3>) -> index
+// CHECK:           %[[VAL_8:.*]] = fir.shape %[[VAL_5]], %[[VAL_7]] : (index, index) -> !fir.shape<2>
+// CHECK:           %[[VAL_9:.*]] = hlfir.elemental %[[VAL_8]] unordered : (!fir.shape<2>) -> !hlfir.expr<?x?xi16> {
+// CHECK:           ^bb0(%[[VAL_10:.*]]: index, %[[VAL_11:.*]]: index):
+// CHECK:             %[[VAL_12:.*]] = fir.do_loop %[[VAL_13:.*]] = %[[VAL_2]] to %[[VAL_6]] step %[[VAL_2]] unordered iter_args(%[[VAL_14:.*]] = %[[VAL_3]]) -> (i16) {
+// CHECK:               %[[VAL_15:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_10]], %[[VAL_13]], %[[VAL_11]] : (!hlfir.expr<?x?x?x!fir.logical<1>>, index, index, index) -> !fir.logical<1>
+// CHECK:               %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (!fir.logical<1>) -> i1
+// CHECK:               %[[VAL_17:.*]] = arith.addi %[[VAL_14]], %[[VAL_1]] : i16
+// CHECK:               %[[VAL_18:.*]] = arith.select %[[VAL_16]], %[[VAL_17]], %[[VAL_14]] : i16
+// CHECK:               fir.result %[[VAL_18]] : i16
+// CHECK:             }
+// CHECK:             hlfir.yield_element %[[VAL_12]] : i16
+// CHECK:           }
+// CHECK:           return %[[VAL_9]] : !hlfir.expr<?x?xi16>
+// CHECK:         }
+
+func.func @test_total_var(%arg0: !fir.box<!fir.array<?x?x!fir.logical<4>>>) -> i32 {
+  %0 = hlfir.count %arg0 : (!fir.box<!fir.array<?x?x!fir.logical<4>>>) -> i32
+  return %0 : i32
+}
+// CHECK-LABEL:   func.func @test_total_var(
+// CHECK-SAME:                              %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box<!fir.array<?x?x!fir.logical<4>>>) -> i32 {
+// CHECK:           %[[VAL_1:.*]] = arith.constant 1 : i32
+// CHECK:           %[[VAL_2:.*]] = arith.constant 0 : i32
+// CHECK:           %[[VAL_3:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_4:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_5:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box<!fir.array<?x?x!fir.logical<4>>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_6:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?x!fir.logical<4>>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_7:.*]] = fir.do_loop %[[VAL_8:.*]] = %[[VAL_3]] to %[[VAL_6]]#1 step %[[VAL_3]] unordered iter_args(%[[VAL_9:.*]] = %[[VAL_2]]) -> (i32) {
+// CHECK:             %[[VAL_10:.*]] = fir.do_loop %[[VAL_11:.*]] = %[[VAL_3]] to %[[VAL_5]]#1 step %[[VAL_3]] unordered iter_args(%[[VAL_12:.*]] = %[[VAL_9]]) -> (i32) {
+// CHECK:               %[[VAL_13:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box<!fir.array<?x?x!fir.logical<4>>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_14:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?x!fir.logical<4>>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_15:.*]] = arith.subi %[[VAL_13]]#0, %[[VAL_3]] : index
+// CHECK:               %[[VAL_16:.*]] = arith.addi %[[VAL_11]], %[[VAL_15]] : index
+// CHECK:               %[[VAL_17:.*]] = arith.subi %[[VAL_14]]#0, %[[VAL_3]] : index
+// CHECK:               %[[VAL_18:.*]] = arith.addi %[[VAL_8]], %[[VAL_17]] : index
+// CHECK:               %[[VAL_19:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_16]], %[[VAL_18]])  : (!fir.box<!fir.array<?x?x!fir.logical<4>>>, index, index) -> !fir.ref<!fir.logical<4>>
+// CHECK:               %[[VAL_20:.*]] = fir.load %[[VAL_19]] : !fir.ref<!fir.logical<4>>
+// CHECK:               %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (!fir.logical<4>) -> i1
+// CHECK:               %[[VAL_22:.*]] = arith.addi %[[VAL_12]], %[[VAL_1]] : i32
+// CHECK:               %[[VAL_23:.*]] = arith.select %[[VAL_21]], %[[VAL_22]], %[[VAL_12]] : i32
+// CHECK:               fir.result %[[VAL_23]] : i32
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_10]] : i32
+// CHECK:           }
+// CHECK:           return %[[VAL_7]] : i32
+// CHECK:         }
+
+func.func @test_partial_var(%arg0: !fir.box<!fir.array<?x?x?x!fir.logical<2>>>) -> !hlfir.expr<?x?xi64> {
+  %dim = arith.constant 2 : i32
+  %0 = hlfir.count %arg0 dim %dim : (!fir.box<!fir.array<?x?x?x!fir.logical<2>>>, i32) -> !hlfir.expr<?x?xi64>
+  return %0 : !hlfir.expr<?x?xi64>
+}
+// CHECK-LABEL:   func.func @test_partial_var(
+// CHECK-SAME:                                %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box<!fir.array<?x?x?x!fir.logical<2>>>) -> !hlfir.expr<?x?xi64> {
+// CHECK:           %[[VAL_1:.*]] = arith.constant 1 : i64
+// CHECK:           %[[VAL_2:.*]] = arith.constant 0 : i64
+// CHECK:           %[[VAL_3:.*]] = arith.constant 2 : index
+// CHECK:           %[[VAL_4:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_5:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_6:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_5]] : (!fir.box<!fir.array<?x?x?x!fir.logical<2>>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_7:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box<!fir.array<?x?x?x!fir.logical<2>>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_8:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?x?x!fir.logical<2>>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_9:.*]] = fir.shape %[[VAL_6]]#1, %[[VAL_8]]#1 : (index, index) -> !fir.shape<2>
+// CHECK:           %[[VAL_10:.*]] = hlfir.elemental %[[VAL_9]] unordered : (!fir.shape<2>) -> !hlfir.expr<?x?xi64> {
+// CHECK:           ^bb0(%[[VAL_11:.*]]: index, %[[VAL_12:.*]]: index):
+// CHECK:             %[[VAL_13:.*]] = fir.do_loop %[[VAL_14:.*]] = %[[VAL_4]] to %[[VAL_7]]#1 step %[[VAL_4]] unordered iter_args(%[[VAL_15:.*]] = %[[VAL_2]]) -> (i64) {
+// CHECK:               %[[VAL_16:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_5]] : (!fir.box<!fir.array<?x?x?x!fir.logical<2>>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_17:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box<!fir.array<?x?x?x!fir.logical<2>>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_18:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?x?x!fir.logical<2>>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_19:.*]] = arith.subi %[[VAL_16]]#0, %[[VAL_4]] : index
+// CHECK:               %[[VAL_20:.*]] = arith.addi %[[VAL_11]], %[[VAL_19]] : index
+// CHECK:               %[[VAL_21:.*]] = arith.subi %[[VAL_17]]#0, %[[VAL_4]] : index
+// CHECK:               %[[VAL_22:.*]] = arith.addi %[[VAL_14]], %[[VAL_21]] : index
+// CHECK:               %[[VAL_23:.*]] = arith.subi %[[VAL_18]]#0, %[[VAL_4]] : index
+// CHECK:               %[[VAL_24:.*]] = arith.addi %[[VAL_12]], %[[VAL_23]] : index
+// CHECK:               %[[VAL_25:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_20]], %[[VAL_22]], %[[VAL_24]])  : (!fir.box<!fir.array<?x?x?x!fir.logical<2>>>, index, index, index) -> !fir.ref<!fir.logical<2>>
+// CHECK:               %[[VAL_26:.*]] = fir.load %[[VAL_25]] : !fir.ref<!fir.logical<2>>
+// CHECK:               %[[VAL_27:.*]] = fir.convert %[[VAL_26]] : (!fir.logical<2>) -> i1
+// CHECK:               %[[VAL_28:.*]] = arith.addi %[[VAL_15]], %[[VAL_1]] : i64
+// CHECK:               %[[VAL_29:.*]] = arith.select %[[VAL_27]], %[[VAL_28]], %[[VAL_15]] : i64
+// CHECK:               fir.result %[[VAL_29]] : i64
+// CHECK:             }
+// CHECK:             hlfir.yield_element %[[VAL_13]] : i64
+// CHECK:           }
+// CHECK:           return %[[VAL_10]] : !hlfir.expr<?x?xi64>
+// CHECK:         }
diff --git a/flang/test/HLFIR/simplify-hlfir-intrinsics-maxloc.fir b/flang/test/HLFIR/simplify-hlfir-intrinsics-maxloc.fir
new file mode 100644
index 0000000000000..de631e0f208d2
--- /dev/null
+++ b/flang/test/HLFIR/simplify-hlfir-intrinsics-maxloc.fir
@@ -0,0 +1,343 @@
+// RUN: fir-opt %s --simplify-hlfir-intrinsics | FileCheck %s
+
+func.func @test_1d_total_expr(%input: !hlfir.expr<?xi32>, %mask: !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi32> {
+  %0 = hlfir.maxloc %input mask %mask {fastmath = #arith.fastmath<contract>} : (!hlfir.expr<?xi32>, !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi32>
+  return %0 : !hlfir.expr<1xi32>
+}
+// CHECK-LABEL:   func.func @test_1d_total_expr(
+// CHECK-SAME:                                  %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?xi32>,
+// CHECK-SAME:                                  %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi32> {
+// CHECK:           %[[VAL_2:.*]] = arith.constant false
+// CHECK:           %[[VAL_3:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_4:.*]] = arith.constant true
+// CHECK:           %[[VAL_5:.*]] = arith.constant -2147483648 : i32
+// CHECK:           %[[VAL_6:.*]] = arith.constant 0 : i32
+// CHECK:           %[[VAL_7:.*]] = fir.alloca !fir.array<1xi32>
+// CHECK:           %[[VAL_8:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?xi32>) -> !fir.shape<1>
+// CHECK:           %[[VAL_9:.*]] = hlfir.get_extent %[[VAL_8]] {dim = 0 : index} : (!fir.shape<1>) -> index
+// CHECK:           %[[VAL_10:.*]]:3 = fir.do_loop %[[VAL_11:.*]] = %[[VAL_3]] to %[[VAL_9]] step %[[VAL_3]] unordered iter_args(%[[VAL_12:.*]] = %[[VAL_6]], %[[VAL_13:.*]] = %[[VAL_5]], %[[VAL_14:.*]] = %[[VAL_4]]) -> (i32, i32, i1) {
+// CHECK:             %[[VAL_15:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_11]] : (!hlfir.expr<?x!fir.logical<4>>, index) -> !fir.logical<4>
+// CHECK:             %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (!fir.logical<4>) -> i1
+// CHECK:             %[[VAL_17:.*]]:3 = fir.if %[[VAL_16]] -> (i32, i32, i1) {
+// CHECK:               %[[VAL_18:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_11]] : (!hlfir.expr<?xi32>, index) -> i32
+// CHECK:               %[[VAL_19:.*]] = arith.cmpi sgt, %[[VAL_18]], %[[VAL_13]] : i32
+// CHECK:               %[[VAL_20:.*]] = arith.ori %[[VAL_19]], %[[VAL_14]] : i1
+// CHECK:               %[[VAL_21:.*]] = fir.convert %[[VAL_11]] : (index) -> i32
+// CHECK:               %[[VAL_22:.*]] = arith.select %[[VAL_20]], %[[VAL_21]], %[[VAL_12]] : i32
+// CHECK:               %[[VAL_23:.*]] = arith.select %[[VAL_20]], %[[VAL_18]], %[[VAL_13]] : i32
+// CHECK:               fir.result %[[VAL_22]], %[[VAL_23]], %[[VAL_2]] : i32, i32, i1
+// CHECK:             } else {
+// CHECK:               fir.result %[[VAL_12]], %[[VAL_13]], %[[VAL_14]] : i32, i32, i1
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_24:.*]]#0, %[[VAL_24]]#1, %[[VAL_24]]#2 : i32, i32, i1
+// CHECK:           }
+// CHECK:           %[[VAL_25:.*]] = fir.convert %[[VAL_26:.*]]#0 : (i32) -> index
+// CHECK:           %[[VAL_27:.*]] = fir.convert %[[VAL_25]] : (index) -> i32
+// CHECK:           %[[VAL_28:.*]] = hlfir.designate %[[VAL_7]] (%[[VAL_3]])  : (!fir.ref<!fir.array<1xi32>>, index) -> !fir.ref<i32>
+// CHECK:           hlfir.assign %[[VAL_27]] to %[[VAL_28]] : i32, !fir.ref<i32>
+// CHECK:           %[[VAL_29:.*]] = hlfir.as_expr %[[VAL_7]] move %[[VAL_2]] : (!fir.ref<!fir.array<1xi32>>, i1) -> !hlfir.expr<1xi32>
+// CHECK:           return %[[VAL_29]] : !hlfir.expr<1xi32>
+// CHECK:         }
+
+func.func @test_1d_dim_expr(%input: !hlfir.expr<?xf32>, %mask: !hlfir.expr<?x!fir.logical<4>>) -> i32 {
+  %dim = arith.constant 1 : i16
+  %0 = hlfir.maxloc %input dim %dim mask %mask {fastmath = #arith.fastmath<contract>} : (!hlfir.expr<?xf32>, i16, !hlfir.expr<?x!fir.logical<4>>) -> i32
+  return %0 : i32
+}
+// CHECK-LABEL:   func.func @test_1d_dim_expr(
+// CHECK-SAME:                                %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?xf32>,
+// CHECK-SAME:                                %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x!fir.logical<4>>) -> i32 {
+// CHECK:           %[[VAL_2:.*]] = arith.constant false
+// CHECK:           %[[VAL_3:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_4:.*]] = arith.constant true
+// CHECK:           %[[VAL_5:.*]] = arith.constant -3.40282347E+38 : f32
+// CHECK:           %[[VAL_6:.*]] = arith.constant 0 : i32
+// CHECK:           %[[VAL_7:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?xf32>) -> !fir.shape<1>
+// CHECK:           %[[VAL_8:.*]] = hlfir.get_extent %[[VAL_7]] {dim = 0 : index} : (!fir.shape<1>) -> index
+// CHECK:           %[[VAL_9:.*]]:3 = fir.do_loop %[[VAL_10:.*]] = %[[VAL_3]] to %[[VAL_8]] step %[[VAL_3]] iter_args(%[[VAL_11:.*]] = %[[VAL_6]], %[[VAL_12:.*]] = %[[VAL_5]], %[[VAL_13:.*]] = %[[VAL_4]]) -> (i32, f32, i1) {
+// CHECK:             %[[VAL_14:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_10]] : (!hlfir.expr<?x!fir.logical<4>>, index) -> !fir.logical<4>
+// CHECK:             %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (!fir.logical<4>) -> i1
+// CHECK:             %[[VAL_16:.*]]:3 = fir.if %[[VAL_15]] -> (i32, f32, i1) {
+// CHECK:               %[[VAL_17:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_10]] : (!hlfir.expr<?xf32>, index) -> f32
+// CHECK:               %[[VAL_18:.*]] = arith.cmpf ogt, %[[VAL_17]], %[[VAL_12]] fastmath<contract> : f32
+// CHECK:               %[[VAL_19:.*]] = arith.cmpf une, %[[VAL_12]], %[[VAL_12]] fastmath<contract> : f32
+// CHECK:               %[[VAL_20:.*]] = arith.cmpf oeq, %[[VAL_17]], %[[VAL_17]] fastmath<contract> : f32
+// CHECK:               %[[VAL_21:.*]] = arith.andi %[[VAL_19]], %[[VAL_20]] : i1
+// CHECK:               %[[VAL_22:.*]] = arith.ori %[[VAL_18]], %[[VAL_21]] : i1
+// CHECK:               %[[VAL_23:.*]] = arith.ori %[[VAL_22]], %[[VAL_13]] : i1
+// CHECK:               %[[VAL_24:.*]] = fir.convert %[[VAL_10]] : (index) -> i32
+// CHECK:               %[[VAL_25:.*]] = arith.select %[[VAL_23]], %[[VAL_24]], %[[VAL_11]] : i32
+// CHECK:               %[[VAL_26:.*]] = arith.select %[[VAL_23]], %[[VAL_17]], %[[VAL_12]] : f32
+// CHECK:               fir.result %[[VAL_25]], %[[VAL_26]], %[[VAL_2]] : i32, f32, i1
+// CHECK:             } else {
+// CHECK:               fir.result %[[VAL_11]], %[[VAL_12]], %[[VAL_13]] : i32, f32, i1
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_27:.*]]#0, %[[VAL_27]]#1, %[[VAL_27]]#2 : i32, f32, i1
+// CHECK:           }
+// CHECK:           %[[VAL_28:.*]] = fir.convert %[[VAL_29:.*]]#0 : (i32) -> index
+// CHECK:           %[[VAL_30:.*]] = fir.convert %[[VAL_28]] : (index) -> i32
+// CHECK:           return %[[VAL_30]] : i32
+// CHECK:         }
+
+func.func @test_1d_total_var(%input: !fir.box<!fir.array<?xf32>>, %mask: !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi16> {
+  %0 = hlfir.maxloc %input mask %mask {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?xf32>>, !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi16>
+  return %0 : !hlfir.expr<1xi16>
+}
+// CHECK-LABEL:   func.func @test_1d_total_var(
+// CHECK-SAME:                                 %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box<!fir.array<?xf32>>,
+// CHECK-SAME:                                 %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi16> {
+// CHECK:           %[[VAL_2:.*]] = arith.constant false
+// CHECK:           %[[VAL_3:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_4:.*]] = arith.constant true
+// CHECK:           %[[VAL_5:.*]] = arith.constant -3.40282347E+38 : f32
+// CHECK:           %[[VAL_6:.*]] = arith.constant 0 : i16
+// CHECK:           %[[VAL_7:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_8:.*]] = fir.alloca !fir.array<1xi16>
+// CHECK:           %[[VAL_9:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_7]] : (!fir.box<!fir.array<?xf32>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_10:.*]]:3 = fir.do_loop %[[VAL_11:.*]] = %[[VAL_3]] to %[[VAL_9]]#1 step %[[VAL_3]] iter_args(%[[VAL_12:.*]] = %[[VAL_6]], %[[VAL_13:.*]] = %[[VAL_5]], %[[VAL_14:.*]] = %[[VAL_4]]) -> (i16, f32, i1) {
+// CHECK:             %[[VAL_15:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_11]] : (!hlfir.expr<?x!fir.logical<4>>, index) -> !fir.logical<4>
+// CHECK:             %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (!fir.logical<4>) -> i1
+// CHECK:             %[[VAL_17:.*]]:3 = fir.if %[[VAL_16]] -> (i16, f32, i1) {
+// CHECK:               %[[VAL_18:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_7]] : (!fir.box<!fir.array<?xf32>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_19:.*]] = arith.subi %[[VAL_18]]#0, %[[VAL_3]] : index
+// CHECK:               %[[VAL_20:.*]] = arith.addi %[[VAL_11]], %[[VAL_19]] : index
+// CHECK:               %[[VAL_21:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_20]])  : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
+// CHECK:               %[[VAL_22:.*]] = fir.load %[[VAL_21]] : !fir.ref<f32>
+// CHECK:               %[[VAL_23:.*]] = arith.cmpf ogt, %[[VAL_22]], %[[VAL_13]] fastmath<contract> : f32
+// CHECK:               %[[VAL_24:.*]] = arith.cmpf une, %[[VAL_13]], %[[VAL_13]] fastmath<contract> : f32
+// CHECK:               %[[VAL_25:.*]] = arith.cmpf oeq, %[[VAL_22]], %[[VAL_22]] fastmath<contract> : f32
+// CHECK:               %[[VAL_26:.*]] = arith.andi %[[VAL_24]], %[[VAL_25]] : i1
+// CHECK:               %[[VAL_27:.*]] = arith.ori %[[VAL_23]], %[[VAL_26]] : i1
+// CHECK:               %[[VAL_28:.*]] = arith.ori %[[VAL_27]], %[[VAL_14]] : i1
+// CHECK:               %[[VAL_29:.*]] = fir.convert %[[VAL_11]] : (index) -> i16
+// CHECK:               %[[VAL_30:.*]] = arith.select %[[VAL_28]], %[[VAL_29]], %[[VAL_12]] : i16
+// CHECK:               %[[VAL_31:.*]] = arith.select %[[VAL_28]], %[[VAL_22]], %[[VAL_13]] : f32
+// CHECK:               fir.result %[[VAL_30]], %[[VAL_31]], %[[VAL_2]] : i16, f32, i1
+// CHECK:             } else {
+// CHECK:               fir.result %[[VAL_12]], %[[VAL_13]], %[[VAL_14]] : i16, f32, i1
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_32:.*]]#0, %[[VAL_32]]#1, %[[VAL_32]]#2 : i16, f32, i1
+// CHECK:           }
+// CHECK:           %[[VAL_33:.*]] = arith.cmpi ne, %[[VAL_34:.*]]#0, %[[VAL_6]] : i16
+// CHECK:           %[[VAL_35:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_7]] : (!fir.box<!fir.array<?xf32>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_36:.*]] = fir.convert %[[VAL_34]]#0 : (i16) -> index
+// CHECK:           %[[VAL_37:.*]] = arith.addi %[[VAL_36]], %[[VAL_35]]#0 : index
+// CHECK:           %[[VAL_38:.*]] = arith.subi %[[VAL_37]], %[[VAL_3]] : index
+// CHECK:           %[[VAL_39:.*]] = arith.select %[[VAL_33]], %[[VAL_38]], %[[VAL_36]] : index
+// CHECK:           %[[VAL_40:.*]] = fir.convert %[[VAL_39]] : (index) -> i16
+// CHECK:           %[[VAL_41:.*]] = hlfir.designate %[[VAL_8]] (%[[VAL_3]])  : (!fir.ref<!fir.array<1xi16>>, index) -> !fir.ref<i16>
+// CHECK:           hlfir.assign %[[VAL_40]] to %[[VAL_41]] : i16, !fir.ref<i16>
+// CHECK:           %[[VAL_42:.*]] = hlfir.as_expr %[[VAL_8]] move %[[VAL_2]] : (!fir.ref<!fir.array<1xi16>>, i1) -> !hlfir.expr<1xi16>
+// CHECK:           return %[[VAL_42]] : !hlfir.expr<1xi16>
+// CHECK:         }
+
+func.func @test_1d_dim_var(%input: !fir.box<!fir.array<?xf64>>, %mask: !hlfir.expr<?x!fir.logical<4>>) -> i64 {
+  %dim = arith.constant 1 : i32
+  %0 = hlfir.maxloc %input dim %dim mask %mask {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?xf64>>, i32, !hlfir.expr<?x!fir.logical<4>>) -> i64
+  return %0 : i64
+}
+// CHECK-LABEL:   func.func @test_1d_dim_var(
+// CHECK-SAME:                               %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box<!fir.array<?xf64>>,
+// CHECK-SAME:                               %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x!fir.logical<4>>) -> i64 {
+// CHECK:           %[[VAL_2:.*]] = arith.constant false
+// CHECK:           %[[VAL_3:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_4:.*]] = arith.constant true
+// CHECK:           %[[VAL_5:.*]] = arith.constant -1.7976931348623157E+308 : f64
+// CHECK:           %[[VAL_6:.*]] = arith.constant 0 : i64
+// CHECK:           %[[VAL_7:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_8:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_7]] : (!fir.box<!fir.array<?xf64>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_9:.*]]:3 = fir.do_loop %[[VAL_10:.*]] = %[[VAL_3]] to %[[VAL_8]]#1 step %[[VAL_3]] iter_args(%[[VAL_11:.*]] = %[[VAL_6]], %[[VAL_12:.*]] = %[[VAL_5]], %[[VAL_13:.*]] = %[[VAL_4]]) -> (i64, f64, i1) {
+// CHECK:             %[[VAL_14:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_10]] : (!hlfir.expr<?x!fir.logical<4>>, index) -> !fir.logical<4>
+// CHECK:             %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (!fir.logical<4>) -> i1
+// CHECK:             %[[VAL_16:.*]]:3 = fir.if %[[VAL_15]] -> (i64, f64, i1) {
+// CHECK:               %[[VAL_17:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_7]] : (!fir.box<!fir.array<?xf64>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_18:.*]] = arith.subi %[[VAL_17]]#0, %[[VAL_3]] : index
+// CHECK:               %[[VAL_19:.*]] = arith.addi %[[VAL_10]], %[[VAL_18]] : index
+// CHECK:               %[[VAL_20:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_19]])  : (!fir.box<!fir.array<?xf64>>, index) -> !fir.ref<f64>
+// CHECK:               %[[VAL_21:.*]] = fir.load %[[VAL_20]] : !fir.ref<f64>
+// CHECK:               %[[VAL_22:.*]] = arith.cmpf ogt, %[[VAL_21]], %[[VAL_12]] fastmath<contract> : f64
+// CHECK:               %[[VAL_23:.*]] = arith.cmpf une, %[[VAL_12]], %[[VAL_12]] fastmath<contract> : f64
+// CHECK:               %[[VAL_24:.*]] = arith.cmpf oeq, %[[VAL_21]], %[[VAL_21]] fastmath<contract> : f64
+// CHECK:               %[[VAL_25:.*]] = arith.andi %[[VAL_23]], %[[VAL_24]] : i1
+// CHECK:               %[[VAL_26:.*]] = arith.ori %[[VAL_22]], %[[VAL_25]] : i1
+// CHECK:               %[[VAL_27:.*]] = arith.ori %[[VAL_26]], %[[VAL_13]] : i1
+// CHECK:               %[[VAL_28:.*]] = fir.convert %[[VAL_10]] : (index) -> i64
+// CHECK:               %[[VAL_29:.*]] = arith.select %[[VAL_27]], %[[VAL_28]], %[[VAL_11]] : i64
+// CHECK:               %[[VAL_30:.*]] = arith.select %[[VAL_27]], %[[VAL_21]], %[[VAL_12]] : f64
+// CHECK:               fir.result %[[VAL_29]], %[[VAL_30]], %[[VAL_2]] : i64, f64, i1
+// CHECK:             } else {
+// CHECK:               fir.result %[[VAL_11]], %[[VAL_12]], %[[VAL_13]] : i64, f64, i1
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_31:.*]]#0, %[[VAL_31]]#1, %[[VAL_31]]#2 : i64, f64, i1
+// CHECK:           }
+// CHECK:           %[[VAL_32:.*]] = arith.cmpi ne, %[[VAL_33:.*]]#0, %[[VAL_6]] : i64
+// CHECK:           %[[VAL_34:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_7]] : (!fir.box<!fir.array<?xf64>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_35:.*]] = fir.convert %[[VAL_33]]#0 : (i64) -> index
+// CHECK:           %[[VAL_36:.*]] = arith.addi %[[VAL_35]], %[[VAL_34]]#0 : index
+// CHECK:           %[[VAL_37:.*]] = arith.subi %[[VAL_36]], %[[VAL_3]] : index
+// CHECK:           %[[VAL_38:.*]] = arith.select %[[VAL_32]], %[[VAL_37]], %[[VAL_35]] : index
+// CHECK:           %[[VAL_39:.*]] = fir.convert %[[VAL_38]] : (index) -> i64
+// CHECK:           return %[[VAL_39]] : i64
+// CHECK:         }
+
+func.func @test_total_expr(%input: !hlfir.expr<?x?x?xf32>, %mask: !hlfir.expr<?x?x?x!fir.logical<4>>) -> !hlfir.expr<3xi32> {
+  %0 = hlfir.maxloc %input mask %mask {fastmath = #arith.fastmath<reassoc>} : (!hlfir.expr<?x?x?xf32>, !hlfir.expr<?x?x?x!fir.logical<4>>) -> !hlfir.expr<3xi32>
+  return %0 : !hlfir.expr<3xi32>
+}
+// CHECK-LABEL:   func.func @test_total_expr(
+// CHECK-SAME:                               %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x?x?xf32>,
+// CHECK-SAME:                               %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x?x?x!fir.logical<4>>) -> !hlfir.expr<3xi32> {
+// CHECK:           %[[VAL_2:.*]] = arith.constant 3 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant 2 : index
+// CHECK:           %[[VAL_4:.*]] = arith.constant false
+// CHECK:           %[[VAL_5:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_6:.*]] = arith.constant true
+// CHECK:           %[[VAL_7:.*]] = arith.constant -3.40282347E+38 : f32
+// CHECK:           %[[VAL_8:.*]] = arith.constant 0 : i32
+// CHECK:           %[[VAL_9:.*]] = fir.alloca !fir.array<3xi32>
+// CHECK:           %[[VAL_10:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?x?x?xf32>) -> !fir.shape<3>
+// CHECK:           %[[VAL_11:.*]] = hlfir.get_extent %[[VAL_10]] {dim = 0 : index} : (!fir.shape<3>) -> index
+// CHECK:           %[[VAL_12:.*]] = hlfir.get_extent %[[VAL_10]] {dim = 1 : index} : (!fir.shape<3>) -> index
+// CHECK:           %[[VAL_13:.*]] = hlfir.get_extent %[[VAL_10]] {dim = 2 : index} : (!fir.shape<3>) -> index
+// CHECK:           %[[VAL_14:.*]]:5 = fir.do_loop %[[VAL_15:.*]] = %[[VAL_5]] to %[[VAL_13]] step %[[VAL_5]] unordered iter_args(%[[VAL_16:.*]] = %[[VAL_8]], %[[VAL_17:.*]] = %[[VAL_8]], %[[VAL_18:.*]] = %[[VAL_8]], %[[VAL_19:.*]] = %[[VAL_7]], %[[VAL_20:.*]] = %[[VAL_6]]) -> (i32, i32, i32, f32, i1) {
+// CHECK:             %[[VAL_21:.*]]:5 = fir.do_loop %[[VAL_22:.*]] = %[[VAL_5]] to %[[VAL_12]] step %[[VAL_5]] unordered iter_args(%[[VAL_23:.*]] = %[[VAL_16]], %[[VAL_24:.*]] = %[[VAL_17]], %[[VAL_25:.*]] = %[[VAL_18]], %[[VAL_26:.*]] = %[[VAL_19]], %[[VAL_27:.*]] = %[[VAL_20]]) -> (i32, i32, i32, f32, i1) {
+// CHECK:               %[[VAL_28:.*]]:5 = fir.do_loop %[[VAL_29:.*]] = %[[VAL_5]] to %[[VAL_11]] step %[[VAL_5]] unordered iter_args(%[[VAL_30:.*]] = %[[VAL_23]], %[[VAL_31:.*]] = %[[VAL_24]], %[[VAL_32:.*]] = %[[VAL_25]], %[[VAL_33:.*]] = %[[VAL_26]], %[[VAL_34:.*]] = %[[VAL_27]]) -> (i32, i32, i32, f32, i1) {
+// CHECK:                 %[[VAL_35:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_29]], %[[VAL_22]], %[[VAL_15]] : (!hlfir.expr<?x?x?x!fir.logical<4>>, index, index, index) -> !fir.logical<4>
+// CHECK:                 %[[VAL_36:.*]] = fir.convert %[[VAL_35]] : (!fir.logical<4>) -> i1
+// CHECK:                 %[[VAL_37:.*]]:5 = fir.if %[[VAL_36]] -> (i32, i32, i32, f32, i1) {
+// CHECK:                   %[[VAL_38:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_29]], %[[VAL_22]], %[[VAL_15]] : (!hlfir.expr<?x?x?xf32>, index, index, index) -> f32
+// CHECK:                   %[[VAL_39:.*]] = arith.cmpf ogt, %[[VAL_38]], %[[VAL_33]] fastmath<reassoc> : f32
+// CHECK:                   %[[VAL_40:.*]] = arith.cmpf une, %[[VAL_33]], %[[VAL_33]] fastmath<reassoc> : f32
+// CHECK:                   %[[VAL_41:.*]] = arith.cmpf oeq, %[[VAL_38]], %[[VAL_38]] fastmath<reassoc> : f32
+// CHECK:                   %[[VAL_42:.*]] = arith.andi %[[VAL_40]], %[[VAL_41]] : i1
+// CHECK:                   %[[VAL_43:.*]] = arith.ori %[[VAL_39]], %[[VAL_42]] : i1
+// CHECK:                   %[[VAL_44:.*]] = arith.ori %[[VAL_43]], %[[VAL_34]] : i1
+// CHECK:                   %[[VAL_45:.*]] = fir.convert %[[VAL_29]] : (index) -> i32
+// CHECK:                   %[[VAL_46:.*]] = arith.select %[[VAL_44]], %[[VAL_45]], %[[VAL_30]] : i32
+// CHECK:                   %[[VAL_47:.*]] = fir.convert %[[VAL_22]] : (index) -> i32
+// CHECK:                   %[[VAL_48:.*]] = arith.select %[[VAL_44]], %[[VAL_47]], %[[VAL_31]] : i32
+// CHECK:                   %[[VAL_49:.*]] = fir.convert %[[VAL_15]] : (index) -> i32
+// CHECK:                   %[[VAL_50:.*]] = arith.select %[[VAL_44]], %[[VAL_49]], %[[VAL_32]] : i32
+// CHECK:                   %[[VAL_51:.*]] = arith.select %[[VAL_44]], %[[VAL_38]], %[[VAL_33]] : f32
+// CHECK:                   fir.result %[[VAL_46]], %[[VAL_48]], %[[VAL_50]], %[[VAL_51]], %[[VAL_4]] : i32, i32, i32, f32, i1
+// CHECK:                 } else {
+// CHECK:                   fir.result %[[VAL_30]], %[[VAL_31]], %[[VAL_32]], %[[VAL_33]], %[[VAL_34]] : i32, i32, i32, f32, i1
+// CHECK:                 }
+// CHECK:                 fir.result %[[VAL_52:.*]]#0, %[[VAL_52]]#1, %[[VAL_52]]#2, %[[VAL_52]]#3, %[[VAL_52]]#4 : i32, i32, i32, f32, i1
+// CHECK:               }
+// CHECK:               fir.result %[[VAL_53:.*]]#0, %[[VAL_53]]#1, %[[VAL_53]]#2, %[[VAL_53]]#3, %[[VAL_53]]#4 : i32, i32, i32, f32, i1
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_54:.*]]#0, %[[VAL_54]]#1, %[[VAL_54]]#2, %[[VAL_54]]#3, %[[VAL_54]]#4 : i32, i32, i32, f32, i1
+// CHECK:           }
+// CHECK:           %[[VAL_55:.*]] = fir.convert %[[VAL_56:.*]]#0 : (i32) -> index
+// CHECK:           %[[VAL_57:.*]] = fir.convert %[[VAL_55]] : (index) -> i32
+// CHECK:           %[[VAL_58:.*]] = hlfir.designate %[[VAL_9]] (%[[VAL_5]])  : (!fir.ref<!fir.array<3xi32>>, index) -> !fir.ref<i32>
+// CHECK:           hlfir.assign %[[VAL_57]] to %[[VAL_58]] : i32, !fir.ref<i32>
+// CHECK:           %[[VAL_59:.*]] = fir.convert %[[VAL_56]]#1 : (i32) -> index
+// CHECK:           %[[VAL_60:.*]] = fir.convert %[[VAL_59]] : (index) -> i32
+// CHECK:           %[[VAL_61:.*]] = hlfir.designate %[[VAL_9]] (%[[VAL_3]])  : (!fir.ref<!fir.array<3xi32>>, index) -> !fir.ref<i32>
+// CHECK:           hlfir.assign %[[VAL_60]] to %[[VAL_61]] : i32, !fir.ref<i32>
+// CHECK:           %[[VAL_62:.*]] = fir.convert %[[VAL_56]]#2 : (i32) -> index
+// CHECK:           %[[VAL_63:.*]] = fir.convert %[[VAL_62]] : (index) -> i32
+// CHECK:           %[[VAL_64:.*]] = hlfir.designate %[[VAL_9]] (%[[VAL_2]])  : (!fir.ref<!fir.array<3xi32>>, index) -> !fir.ref<i32>
+// CHECK:           hlfir.assign %[[VAL_63]] to %[[VAL_64]] : i32, !fir.ref<i32>
+// CHECK:           %[[VAL_65:.*]] = hlfir.as_expr %[[VAL_9]] move %[[VAL_4]] : (!fir.ref<!fir.array<3xi32>>, i1) -> !hlfir.expr<3xi32>
+// CHECK:           return %[[VAL_65]] : !hlfir.expr<3xi32>
+// CHECK:         }
+
+func.func @test_partial_var(%input: !fir.box<!fir.array<?x?x?xf32>>, %mask: !fir.box<!fir.array<?x?x?x!fir.logical<4>>>) -> !hlfir.expr<?x?xi32> {
+  %dim = arith.constant 2 : i32
+  %0 = hlfir.maxloc %input dim %dim mask %mask {fastmath = #arith.fastmath<reassoc>} : (!fir.box<!fir.array<?x?x?xf32>>, i32, !fir.box<!fir.array<?x?x?x!fir.logical<4>>>) -> !hlfir.expr<?x?xi32>
+  return %0 : !hlfir.expr<?x?xi32>
+}
+// CHECK-LABEL:   func.func @test_partial_var(
+// CHECK-SAME:                                %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box<!fir.array<?x?x?xf32>>,
+// CHECK-SAME:                                %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box<!fir.array<?x?x?x!fir.logical<4>>>) -> !hlfir.expr<?x?xi32> {
+// CHECK:           %[[VAL_2:.*]] = arith.constant false
+// CHECK:           %[[VAL_3:.*]] = arith.constant true
+// CHECK:           %[[VAL_4:.*]] = arith.constant -3.40282347E+38 : f32
+// CHECK:           %[[VAL_5:.*]] = arith.constant 0 : i32
+// CHECK:           %[[VAL_6:.*]] = arith.constant 2 : index
+// CHECK:           %[[VAL_7:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_8:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_9:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_8]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_10:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_7]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_11:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_6]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_12:.*]] = fir.shape %[[VAL_9]]#1, %[[VAL_11]]#1 : (index, index) -> !fir.shape<2>
+// CHECK:           %[[VAL_13:.*]] = fir.is_present %[[VAL_1]] : (!fir.box<!fir.array<?x?x?x!fir.logical<4>>>) -> i1
+// CHECK:           %[[VAL_14:.*]] = hlfir.elemental %[[VAL_12]] unordered : (!fir.shape<2>) -> !hlfir.expr<?x?xi32> {
+// CHECK:           ^bb0(%[[VAL_15:.*]]: index, %[[VAL_16:.*]]: index):
+// CHECK:             %[[VAL_17:.*]]:3 = fir.do_loop %[[VAL_18:.*]] = %[[VAL_7]] to %[[VAL_10]]#1 step %[[VAL_7]] unordered iter_args(%[[VAL_19:.*]] = %[[VAL_5]], %[[VAL_20:.*]] = %[[VAL_4]], %[[VAL_21:.*]] = %[[VAL_3]]) -> (i32, f32, i1) {
+// CHECK:               %[[VAL_22:.*]] = fir.if %[[VAL_13]] -> (!fir.logical<4>) {
+// CHECK:                 %[[VAL_23:.*]]:3 = fir.box_dims %[[VAL_1]], %[[VAL_8]] : (!fir.box<!fir.array<?x?x?x!fir.logical<4>>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_24:.*]]:3 = fir.box_dims %[[VAL_1]], %[[VAL_7]] : (!fir.box<!fir.array<?x?x?x!fir.logical<4>>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_25:.*]]:3 = fir.box_dims %[[VAL_1]], %[[VAL_6]] : (!fir.box<!fir.array<?x?x?x!fir.logical<4>>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_26:.*]] = arith.subi %[[VAL_23]]#0, %[[VAL_7]] : index
+// CHECK:                 %[[VAL_27:.*]] = arith.addi %[[VAL_15]], %[[VAL_26]] : index
+// CHECK:                 %[[VAL_28:.*]] = arith.subi %[[VAL_24]]#0, %[[VAL_7]] : index
+// CHECK:                 %[[VAL_29:.*]] = arith.addi %[[VAL_18]], %[[VAL_28]] : index
+// CHECK:                 %[[VAL_30:.*]] = arith.subi %[[VAL_25]]#0, %[[VAL_7]] : index
+// CHECK:                 %[[VAL_31:.*]] = arith.addi %[[VAL_16]], %[[VAL_30]] : index
+// CHECK:                 %[[VAL_32:.*]] = hlfir.designate %[[VAL_1]] (%[[VAL_27]], %[[VAL_29]], %[[VAL_31]])  : (!fir.box<!fir.array<?x?x?x!fir.logical<4>>>, index, index, index) -> !fir.ref<!fir.logical<4>>
+// CHECK:                 %[[VAL_33:.*]] = fir.load %[[VAL_32]] : !fir.ref<!fir.logical<4>>
+// CHECK:                 fir.result %[[VAL_33]] : !fir.logical<4>
+// CHECK:               } else {
+// CHECK:                 %[[VAL_34:.*]] = fir.convert %[[VAL_3]] : (i1) -> !fir.logical<4>
+// CHECK:                 fir.result %[[VAL_34]] : !fir.logical<4>
+// CHECK:               }
+// CHECK:               %[[VAL_35:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1
+// CHECK:               %[[VAL_36:.*]]:3 = fir.if %[[VAL_35]] -> (i32, f32, i1) {
+// CHECK:                 %[[VAL_37:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_8]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_38:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_7]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_39:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_6]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_40:.*]] = arith.subi %[[VAL_37]]#0, %[[VAL_7]] : index
+// CHECK:                 %[[VAL_41:.*]] = arith.addi %[[VAL_15]], %[[VAL_40]] : index
+// CHECK:                 %[[VAL_42:.*]] = arith.subi %[[VAL_38]]#0, %[[VAL_7]] : index
+// CHECK:                 %[[VAL_43:.*]] = arith.addi %[[VAL_18]], %[[VAL_42]] : index
+// CHECK:                 %[[VAL_44:.*]] = arith.subi %[[VAL_39]]#0, %[[VAL_7]] : index
+// CHECK:                 %[[VAL_45:.*]] = arith.addi %[[VAL_16]], %[[VAL_44]] : index
+// CHECK:                 %[[VAL_46:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_41]], %[[VAL_43]], %[[VAL_45]])  : (!fir.box<!fir.array<?x?x?xf32>>, index, index, index) -> !fir.ref<f32>
+// CHECK:                 %[[VAL_47:.*]] = fir.load %[[VAL_46]] : !fir.ref<f32>
+// CHECK:                 %[[VAL_48:.*]] = arith.cmpf ogt, %[[VAL_47]], %[[VAL_20]] fastmath<reassoc> : f32
+// CHECK:                 %[[VAL_49:.*]] = arith.cmpf une, %[[VAL_20]], %[[VAL_20]] fastmath<reassoc> : f32
+// CHECK:                 %[[VAL_50:.*]] = arith.cmpf oeq, %[[VAL_47]], %[[VAL_47]] fastmath<reassoc> : f32
+// CHECK:                 %[[VAL_51:.*]] = arith.andi %[[VAL_49]], %[[VAL_50]] : i1
+// CHECK:                 %[[VAL_52:.*]] = arith.ori %[[VAL_48]], %[[VAL_51]] : i1
+// CHECK:                 %[[VAL_53:.*]] = arith.ori %[[VAL_52]], %[[VAL_21]] : i1
+// CHECK:                 %[[VAL_54:.*]] = fir.convert %[[VAL_15]] : (index) -> i32
+// CHECK:                 %[[VAL_55:.*]] = arith.select %[[VAL_53]], %[[VAL_54]], %[[VAL_19]] : i32
+// CHECK:                 %[[VAL_56:.*]] = arith.select %[[VAL_53]], %[[VAL_47]], %[[VAL_20]] : f32
+// CHECK:                 fir.result %[[VAL_55]], %[[VAL_56]], %[[VAL_2]] : i32, f32, i1
+// CHECK:               } else {
+// CHECK:                 fir.result %[[VAL_19]], %[[VAL_20]], %[[VAL_21]] : i32, f32, i1
+// CHECK:               }
+// CHECK:               fir.result %[[VAL_57:.*]]#0, %[[VAL_57]]#1, %[[VAL_57]]#2 : i32, f32, i1
+// CHECK:             }
+// CHECK:             %[[VAL_58:.*]] = arith.cmpi ne, %[[VAL_59:.*]]#0, %[[VAL_5]] : i32
+// CHECK:             %[[VAL_60:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_7]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:             %[[VAL_61:.*]] = fir.convert %[[VAL_59]]#0 : (i32) -> index
+// CHECK:             %[[VAL_62:.*]] = arith.addi %[[VAL_61]], %[[VAL_60]]#0 : index
+// CHECK:             %[[VAL_63:.*]] = arith.subi %[[VAL_62]], %[[VAL_7]] : index
+// CHECK:             %[[VAL_64:.*]] = arith.select %[[VAL_58]], %[[VAL_63]], %[[VAL_61]] : index
+// CHECK:             %[[VAL_65:.*]] = fir.convert %[[VAL_64]] : (index) -> i32
+// CHECK:             hlfir.yield_element %[[VAL_65]] : i32
+// CHECK:           }
+// CHECK:           return %[[VAL_14]] : !hlfir.expr<?x?xi32>
+// CHECK:         }
+
+// Character comparisons are not supported yet.
+func.func @test_character(%input: !fir.box<!fir.array<?x!fir.char<1>>>) -> !hlfir.expr<1xi32> {
+  %0 = hlfir.maxloc %input : (!fir.box<!fir.array<?x!fir.char<1>>>) -> !hlfir.expr<1xi32>
+  return %0 : !hlfir.expr<1xi32>
+}
+// CHECK-LABEL:   func.func @test_character(
+// CHECK:           hlfir.maxloc
+
+// BACK is not supported yet.
+func.func @test_back(%input: !hlfir.expr<?xi32>) -> !hlfir.expr<1xi32> {
+  %back = arith.constant true
+  %0 = hlfir.maxloc %input back %back : (!hlfir.expr<?xi32>, i1) -> !hlfir.expr<1xi32>
+  return %0 : !hlfir.expr<1xi32>
+}
+// CHECK-LABEL:   func.func @test_back(
+// CHECK:           hlfir.maxloc
diff --git a/flang/test/HLFIR/simplify-hlfir-intrinsics-maxval.fir b/flang/test/HLFIR/simplify-hlfir-intrinsics-maxval.fir
new file mode 100644
index 0000000000000..4022b7d3b2b77
--- /dev/null
+++ b/flang/test/HLFIR/simplify-hlfir-intrinsics-maxval.fir
@@ -0,0 +1,177 @@
+// RUN: fir-opt %s --simplify-hlfir-intrinsics | FileCheck %s
+
+func.func @test_total_expr(%input: !hlfir.expr<?x?xf32>, %mask: !hlfir.expr<?x?x!fir.logical<4>>) -> f32 {
+  %0 = hlfir.maxval %input mask %mask {fastmath = #arith.fastmath<contract>} : (!hlfir.expr<?x?xf32>, !hlfir.expr<?x?x!fir.logical<4>>) -> f32
+  return %0 : f32
+}
+// CHECK-LABEL:   func.func @test_total_expr(
+// CHECK-SAME:                               %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x?xf32>,
+// CHECK-SAME:                               %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x?x!fir.logical<4>>) -> f32 {
+// CHECK:           %[[VAL_2:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant -3.40282347E+38 : f32
+// CHECK:           %[[VAL_4:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?x?xf32>) -> !fir.shape<2>
+// CHECK:           %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 0 : index} : (!fir.shape<2>) -> index
+// CHECK:           %[[VAL_6:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 1 : index} : (!fir.shape<2>) -> index
+// CHECK:           %[[VAL_7:.*]] = fir.do_loop %[[VAL_8:.*]] = %[[VAL_2]] to %[[VAL_6]] step %[[VAL_2]] iter_args(%[[VAL_9:.*]] = %[[VAL_3]]) -> (f32) {
+// CHECK:             %[[VAL_10:.*]] = fir.do_loop %[[VAL_11:.*]] = %[[VAL_2]] to %[[VAL_5]] step %[[VAL_2]] iter_args(%[[VAL_12:.*]] = %[[VAL_9]]) -> (f32) {
+// CHECK:               %[[VAL_13:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_11]], %[[VAL_8]] : (!hlfir.expr<?x?x!fir.logical<4>>, index, index) -> !fir.logical<4>
+// CHECK:               %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (!fir.logical<4>) -> i1
+// CHECK:               %[[VAL_15:.*]] = fir.if %[[VAL_14]] -> (f32) {
+// CHECK:                 %[[VAL_16:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_11]], %[[VAL_8]] : (!hlfir.expr<?x?xf32>, index, index) -> f32
+// CHECK:                 %[[VAL_17:.*]] = arith.cmpf ogt, %[[VAL_16]], %[[VAL_12]] fastmath<contract> : f32
+// CHECK:                 %[[VAL_18:.*]] = arith.cmpf une, %[[VAL_12]], %[[VAL_12]] fastmath<contract> : f32
+// CHECK:                 %[[VAL_19:.*]] = arith.cmpf oeq, %[[VAL_16]], %[[VAL_16]] fastmath<contract> : f32
+// CHECK:                 %[[VAL_20:.*]] = arith.andi %[[VAL_18]], %[[VAL_19]] : i1
+// CHECK:                 %[[VAL_21:.*]] = arith.ori %[[VAL_17]], %[[VAL_20]] : i1
+// CHECK:                 %[[VAL_22:.*]] = arith.select %[[VAL_21]], %[[VAL_16]], %[[VAL_12]] : f32
+// CHECK:                 fir.result %[[VAL_22]] : f32
+// CHECK:               } else {
+// CHECK:                 fir.result %[[VAL_12]] : f32
+// CHECK:               }
+// CHECK:               fir.result %[[VAL_15]] : f32
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_10]] : f32
+// CHECK:           }
+// CHECK:           return %[[VAL_7]] : f32
+// CHECK:         }
+
+func.func @test_partial_expr(%input: !hlfir.expr<?x?xf64>, %mask: !hlfir.expr<?x?x!fir.logical<4>>) -> !hlfir.expr<?xf64> {
+  %dim = arith.constant 1 : i32
+  %0 = hlfir.maxval %input dim %dim mask %mask {fastmath = #arith.fastmath<reassoc>} : (!hlfir.expr<?x?xf64>, i32, !hlfir.expr<?x?x!fir.logical<4>>) -> !hlfir.expr<?xf64>
+  return %0 : !hlfir.expr<?xf64>
+}
+// CHECK-LABEL:   func.func @test_partial_expr(
+// CHECK-SAME:                                 %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x?xf64>,
+// CHECK-SAME:                                 %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x?x!fir.logical<4>>) -> !hlfir.expr<?xf64> {
+// CHECK:           %[[VAL_2:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant -1.7976931348623157E+308 : f64
+// CHECK:           %[[VAL_4:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?x?xf64>) -> !fir.shape<2>
+// CHECK:           %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 0 : index} : (!fir.shape<2>) -> index
+// CHECK:           %[[VAL_6:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 1 : index} : (!fir.shape<2>) -> index
+// CHECK:           %[[VAL_7:.*]] = fir.shape %[[VAL_6]] : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_8:.*]] = hlfir.elemental %[[VAL_7]] unordered : (!fir.shape<1>) -> !hlfir.expr<?xf64> {
+// CHECK:           ^bb0(%[[VAL_9:.*]]: index):
+// CHECK:             %[[VAL_10:.*]] = fir.do_loop %[[VAL_11:.*]] = %[[VAL_2]] to %[[VAL_5]] step %[[VAL_2]] unordered iter_args(%[[VAL_12:.*]] = %[[VAL_3]]) -> (f64) {
+// CHECK:               %[[VAL_13:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_11]], %[[VAL_9]] : (!hlfir.expr<?x?x!fir.logical<4>>, index, index) -> !fir.logical<4>
+// CHECK:               %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (!fir.logical<4>) -> i1
+// CHECK:               %[[VAL_15:.*]] = fir.if %[[VAL_14]] -> (f64) {
+// CHECK:                 %[[VAL_16:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_11]], %[[VAL_9]] : (!hlfir.expr<?x?xf64>, index, index) -> f64
+// CHECK:                 %[[VAL_17:.*]] = arith.cmpf ogt, %[[VAL_16]], %[[VAL_12]] fastmath<reassoc> : f64
+// CHECK:                 %[[VAL_18:.*]] = arith.cmpf une, %[[VAL_12]], %[[VAL_12]] fastmath<reassoc> : f64
+// CHECK:                 %[[VAL_19:.*]] = arith.cmpf oeq, %[[VAL_16]], %[[VAL_16]] fastmath<reassoc> : f64
+// CHECK:                 %[[VAL_20:.*]] = arith.andi %[[VAL_18]], %[[VAL_19]] : i1
+// CHECK:                 %[[VAL_21:.*]] = arith.ori %[[VAL_17]], %[[VAL_20]] : i1
+// CHECK:                 %[[VAL_22:.*]] = arith.select %[[VAL_21]], %[[VAL_16]], %[[VAL_12]] : f64
+// CHECK:                 fir.result %[[VAL_22]] : f64
+// CHECK:               } else {
+// CHECK:                 fir.result %[[VAL_12]] : f64
+// CHECK:               }
+// CHECK:               fir.result %[[VAL_15]] : f64
+// CHECK:             }
+// CHECK:             hlfir.yield_element %[[VAL_10]] : f64
+// CHECK:           }
+// CHECK:           return %[[VAL_8]] : !hlfir.expr<?xf64>
+// CHECK:         }
+
+func.func @test_total_var(%input: !fir.box<!fir.array<?x?xf16>>, %mask: !fir.ref<!fir.array<2x2x!fir.logical<1>>>) -> f16 {
+  %0 = hlfir.maxval %input mask %mask {fastmath = #arith.fastmath<reassoc>} : (!fir.box<!fir.array<?x?xf16>>, !fir.ref<!fir.array<2x2x!fir.logical<1>>>) -> f16
+  return %0 : f16
+}
+// CHECK-LABEL:   func.func @test_total_var(
+// CHECK-SAME:                              %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box<!fir.array<?x?xf16>>,
+// CHECK-SAME:                              %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.ref<!fir.array<2x2x!fir.logical<1>>>) -> f16 {
+// CHECK:           %[[VAL_2:.*]] = arith.constant -6.550400e+04 : f16
+// CHECK:           %[[VAL_3:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_4:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_5:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_6:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_7:.*]] = fir.do_loop %[[VAL_8:.*]] = %[[VAL_3]] to %[[VAL_6]]#1 step %[[VAL_3]] unordered iter_args(%[[VAL_9:.*]] = %[[VAL_2]]) -> (f16) {
+// CHECK:             %[[VAL_10:.*]] = fir.do_loop %[[VAL_11:.*]] = %[[VAL_3]] to %[[VAL_5]]#1 step %[[VAL_3]] unordered iter_args(%[[VAL_12:.*]] = %[[VAL_9]]) -> (f16) {
+// CHECK:               %[[VAL_13:.*]] = hlfir.designate %[[VAL_1]] (%[[VAL_11]], %[[VAL_8]])  : (!fir.ref<!fir.array<2x2x!fir.logical<1>>>, index, index) -> !fir.ref<!fir.logical<1>>
+// CHECK:               %[[VAL_14:.*]] = fir.load %[[VAL_13]] : !fir.ref<!fir.logical<1>>
+// CHECK:               %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (!fir.logical<1>) -> i1
+// CHECK:               %[[VAL_16:.*]] = fir.if %[[VAL_15]] -> (f16) {
+// CHECK:                 %[[VAL_17:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_18:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_19:.*]] = arith.subi %[[VAL_17]]#0, %[[VAL_3]] : index
+// CHECK:                 %[[VAL_20:.*]] = arith.addi %[[VAL_11]], %[[VAL_19]] : index
+// CHECK:                 %[[VAL_21:.*]] = arith.subi %[[VAL_18]]#0, %[[VAL_3]] : index
+// CHECK:                 %[[VAL_22:.*]] = arith.addi %[[VAL_8]], %[[VAL_21]] : index
+// CHECK:                 %[[VAL_23:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_20]], %[[VAL_22]])  : (!fir.box<!fir.array<?x?xf16>>, index, index) -> !fir.ref<f16>
+// CHECK:                 %[[VAL_24:.*]] = fir.load %[[VAL_23]] : !fir.ref<f16>
+// CHECK:                 %[[VAL_25:.*]] = arith.cmpf ogt, %[[VAL_24]], %[[VAL_12]] fastmath<reassoc> : f16
+// CHECK:                 %[[VAL_26:.*]] = arith.cmpf une, %[[VAL_12]], %[[VAL_12]] fastmath<reassoc> : f16
+// CHECK:                 %[[VAL_27:.*]] = arith.cmpf oeq, %[[VAL_24]], %[[VAL_24]] fastmath<reassoc> : f16
+// CHECK:                 %[[VAL_28:.*]] = arith.andi %[[VAL_26]], %[[VAL_27]] : i1
+// CHECK:                 %[[VAL_29:.*]] = arith.ori %[[VAL_25]], %[[VAL_28]] : i1
+// CHECK:                 %[[VAL_30:.*]] = arith.select %[[VAL_29]], %[[VAL_24]], %[[VAL_12]] : f16
+// CHECK:                 fir.result %[[VAL_30]] : f16
+// CHECK:               } else {
+// CHECK:                 fir.result %[[VAL_12]] : f16
+// CHECK:               }
+// CHECK:               fir.result %[[VAL_16]] : f16
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_10]] : f16
+// CHECK:           }
+// CHECK:           return %[[VAL_7]] : f16
+// CHECK:         }
+
+func.func @test_partial_var(%input: !fir.box<!fir.array<?x?xf16>>, %mask: !fir.box<!fir.array<2x2x!fir.logical<1>>>) -> !hlfir.expr<?xf16> {
+  %dim = arith.constant 2 : i32
+  %0 = hlfir.maxval %input dim %dim mask %mask {fastmath = #arith.fastmath<reassoc>} : (!fir.box<!fir.array<?x?xf16>>, i32, !fir.box<!fir.array<2x2x!fir.logical<1>>>) -> !hlfir.expr<?xf16>
+  return %0 : !hlfir.expr<?xf16>
+}
+// CHECK-LABEL:   func.func @test_partial_var(
+// CHECK-SAME:                                %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box<!fir.array<?x?xf16>>,
+// CHECK-SAME:                                %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box<!fir.array<2x2x!fir.logical<1>>>) -> !hlfir.expr<?xf16> {
+// CHECK:           %[[VAL_2:.*]] = arith.constant true
+// CHECK:           %[[VAL_3:.*]] = arith.constant -6.550400e+04 : f16
+// CHECK:           %[[VAL_4:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_5:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_6:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_5]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_7:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_8:.*]] = fir.shape %[[VAL_6]]#1 : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_9:.*]] = fir.is_present %[[VAL_1]] : (!fir.box<!fir.array<2x2x!fir.logical<1>>>) -> i1
+// CHECK:           %[[VAL_10:.*]] = hlfir.elemental %[[VAL_8]] unordered : (!fir.shape<1>) -> !hlfir.expr<?xf16> {
+// CHECK:           ^bb0(%[[VAL_11:.*]]: index):
+// CHECK:             %[[VAL_12:.*]] = fir.do_loop %[[VAL_13:.*]] = %[[VAL_4]] to %[[VAL_7]]#1 step %[[VAL_4]] unordered iter_args(%[[VAL_14:.*]] = %[[VAL_3]]) -> (f16) {
+// CHECK:               %[[VAL_15:.*]] = fir.if %[[VAL_9]] -> (!fir.logical<1>) {
+// CHECK:                 %[[VAL_16:.*]]:3 = fir.box_dims %[[VAL_1]], %[[VAL_5]] : (!fir.box<!fir.array<2x2x!fir.logical<1>>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_17:.*]]:3 = fir.box_dims %[[VAL_1]], %[[VAL_4]] : (!fir.box<!fir.array<2x2x!fir.logical<1>>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_18:.*]] = arith.subi %[[VAL_16]]#0, %[[VAL_4]] : index
+// CHECK:                 %[[VAL_19:.*]] = arith.addi %[[VAL_11]], %[[VAL_18]] : index
+// CHECK:                 %[[VAL_20:.*]] = arith.subi %[[VAL_17]]#0, %[[VAL_4]] : index
+// CHECK:                 %[[VAL_21:.*]] = arith.addi %[[VAL_13]], %[[VAL_20]] : index
+// CHECK:                 %[[VAL_22:.*]] = hlfir.designate %[[VAL_1]] (%[[VAL_19]], %[[VAL_21]])  : (!fir.box<!fir.array<2x2x!fir.logical<1>>>, index, index) -> !fir.ref<!fir.logical<1>>
+// CHECK:                 %[[VAL_23:.*]] = fir.load %[[VAL_22]] : !fir.ref<!fir.logical<1>>
+// CHECK:                 fir.result %[[VAL_23]] : !fir.logical<1>
+// CHECK:               } else {
+// CHECK:                 %[[VAL_24:.*]] = fir.convert %[[VAL_2]] : (i1) -> !fir.logical<1>
+// CHECK:                 fir.result %[[VAL_24]] : !fir.logical<1>
+// CHECK:               }
+// CHECK:               %[[VAL_25:.*]] = fir.convert %[[VAL_15]] : (!fir.logical<1>) -> i1
+// CHECK:               %[[VAL_26:.*]] = fir.if %[[VAL_25]] -> (f16) {
+// CHECK:                 %[[VAL_27:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_5]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_28:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_29:.*]] = arith.subi %[[VAL_27]]#0, %[[VAL_4]] : index
+// CHECK:                 %[[VAL_30:.*]] = arith.addi %[[VAL_11]], %[[VAL_29]] : index
+// CHECK:                 %[[VAL_31:.*]] = arith.subi %[[VAL_28]]#0, %[[VAL_4]] : index
+// CHECK:                 %[[VAL_32:.*]] = arith.addi %[[VAL_13]], %[[VAL_31]] : index
+// CHECK:                 %[[VAL_33:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_30]], %[[VAL_32]])  : (!fir.box<!fir.array<?x?xf16>>, index, index) -> !fir.ref<f16>
+// CHECK:                 %[[VAL_34:.*]] = fir.load %[[VAL_33]] : !fir.ref<f16>
+// CHECK:                 %[[VAL_35:.*]] = arith.cmpf ogt, %[[VAL_34]], %[[VAL_14]] fastmath<reassoc> : f16
+// CHECK:                 %[[VAL_36:.*]] = arith.cmpf une, %[[VAL_14]], %[[VAL_14]] fastmath<reassoc> : f16
+// CHECK:                 %[[VAL_37:.*]] = arith.cmpf oeq, %[[VAL_34]], %[[VAL_34]] fastmath<reassoc> : f16
+// CHECK:                 %[[VAL_38:.*]] = arith.andi %[[VAL_36]], %[[VAL_37]] : i1
+// CHECK:                 %[[VAL_39:.*]] = arith.ori %[[VAL_35]], %[[VAL_38]] : i1
+// CHECK:                 %[[VAL_40:.*]] = arith.select %[[VAL_39]], %[[VAL_34]], %[[VAL_14]] : f16
+// CHECK:                 fir.result %[[VAL_40]] : f16
+// CHECK:               } else {
+// CHECK:                 fir.result %[[VAL_14]] : f16
+// CHECK:               }
+// CHECK:               fir.result %[[VAL_26]] : f16
+// CHECK:             }
+// CHECK:             hlfir.yield_element %[[VAL_12]] : f16
+// CHECK:           }
+// CHECK:           return %[[VAL_10]] : !hlfir.expr<?xf16>
+// CHECK:         }
diff --git a/flang/test/HLFIR/simplify-hlfir-intrinsics-minloc.fir b/flang/test/HLFIR/simplify-hlfir-intrinsics-minloc.fir
new file mode 100644
index 0000000000000..161cb99486622
--- /dev/null
+++ b/flang/test/HLFIR/simplify-hlfir-intrinsics-minloc.fir
@@ -0,0 +1,343 @@
+// RUN: fir-opt %s --simplify-hlfir-intrinsics | FileCheck %s
+
+func.func @test_1d_total_expr(%input: !hlfir.expr<?xi32>, %mask: !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi32> {
+  %0 = hlfir.minloc %input mask %mask {fastmath = #arith.fastmath<contract>} : (!hlfir.expr<?xi32>, !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi32>
+  return %0 : !hlfir.expr<1xi32>
+}
+// CHECK-LABEL:   func.func @test_1d_total_expr(
+// CHECK-SAME:                                  %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?xi32>,
+// CHECK-SAME:                                  %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi32> {
+// CHECK:           %[[VAL_2:.*]] = arith.constant false
+// CHECK:           %[[VAL_3:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_4:.*]] = arith.constant true
+// CHECK:           %[[VAL_5:.*]] = arith.constant 2147483647 : i32
+// CHECK:           %[[VAL_6:.*]] = arith.constant 0 : i32
+// CHECK:           %[[VAL_7:.*]] = fir.alloca !fir.array<1xi32>
+// CHECK:           %[[VAL_8:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?xi32>) -> !fir.shape<1>
+// CHECK:           %[[VAL_9:.*]] = hlfir.get_extent %[[VAL_8]] {dim = 0 : index} : (!fir.shape<1>) -> index
+// CHECK:           %[[VAL_10:.*]]:3 = fir.do_loop %[[VAL_11:.*]] = %[[VAL_3]] to %[[VAL_9]] step %[[VAL_3]] unordered iter_args(%[[VAL_12:.*]] = %[[VAL_6]], %[[VAL_13:.*]] = %[[VAL_5]], %[[VAL_14:.*]] = %[[VAL_4]]) -> (i32, i32, i1) {
+// CHECK:             %[[VAL_15:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_11]] : (!hlfir.expr<?x!fir.logical<4>>, index) -> !fir.logical<4>
+// CHECK:             %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (!fir.logical<4>) -> i1
+// CHECK:             %[[VAL_17:.*]]:3 = fir.if %[[VAL_16]] -> (i32, i32, i1) {
+// CHECK:               %[[VAL_18:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_11]] : (!hlfir.expr<?xi32>, index) -> i32
+// CHECK:               %[[VAL_19:.*]] = arith.cmpi slt, %[[VAL_18]], %[[VAL_13]] : i32
+// CHECK:               %[[VAL_20:.*]] = arith.ori %[[VAL_19]], %[[VAL_14]] : i1
+// CHECK:               %[[VAL_21:.*]] = fir.convert %[[VAL_11]] : (index) -> i32
+// CHECK:               %[[VAL_22:.*]] = arith.select %[[VAL_20]], %[[VAL_21]], %[[VAL_12]] : i32
+// CHECK:               %[[VAL_23:.*]] = arith.select %[[VAL_20]], %[[VAL_18]], %[[VAL_13]] : i32
+// CHECK:               fir.result %[[VAL_22]], %[[VAL_23]], %[[VAL_2]] : i32, i32, i1
+// CHECK:             } else {
+// CHECK:               fir.result %[[VAL_12]], %[[VAL_13]], %[[VAL_14]] : i32, i32, i1
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_24:.*]]#0, %[[VAL_24]]#1, %[[VAL_24]]#2 : i32, i32, i1
+// CHECK:           }
+// CHECK:           %[[VAL_25:.*]] = fir.convert %[[VAL_26:.*]]#0 : (i32) -> index
+// CHECK:           %[[VAL_27:.*]] = fir.convert %[[VAL_25]] : (index) -> i32
+// CHECK:           %[[VAL_28:.*]] = hlfir.designate %[[VAL_7]] (%[[VAL_3]])  : (!fir.ref<!fir.array<1xi32>>, index) -> !fir.ref<i32>
+// CHECK:           hlfir.assign %[[VAL_27]] to %[[VAL_28]] : i32, !fir.ref<i32>
+// CHECK:           %[[VAL_29:.*]] = hlfir.as_expr %[[VAL_7]] move %[[VAL_2]] : (!fir.ref<!fir.array<1xi32>>, i1) -> !hlfir.expr<1xi32>
+// CHECK:           return %[[VAL_29]] : !hlfir.expr<1xi32>
+// CHECK:         }
+
+func.func @test_1d_dim_expr(%input: !hlfir.expr<?xf32>, %mask: !hlfir.expr<?x!fir.logical<4>>) -> i32 {
+  %dim = arith.constant 1 : i16
+  %0 = hlfir.minloc %input dim %dim mask %mask {fastmath = #arith.fastmath<contract>} : (!hlfir.expr<?xf32>, i16, !hlfir.expr<?x!fir.logical<4>>) -> i32
+  return %0 : i32
+}
+// CHECK-LABEL:   func.func @test_1d_dim_expr(
+// CHECK-SAME:                                %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?xf32>,
+// CHECK-SAME:                                %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x!fir.logical<4>>) -> i32 {
+// CHECK:           %[[VAL_2:.*]] = arith.constant false
+// CHECK:           %[[VAL_3:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_4:.*]] = arith.constant true
+// CHECK:           %[[VAL_5:.*]] = arith.constant 3.40282347E+38 : f32
+// CHECK:           %[[VAL_6:.*]] = arith.constant 0 : i32
+// CHECK:           %[[VAL_7:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?xf32>) -> !fir.shape<1>
+// CHECK:           %[[VAL_8:.*]] = hlfir.get_extent %[[VAL_7]] {dim = 0 : index} : (!fir.shape<1>) -> index
+// CHECK:           %[[VAL_9:.*]]:3 = fir.do_loop %[[VAL_10:.*]] = %[[VAL_3]] to %[[VAL_8]] step %[[VAL_3]] iter_args(%[[VAL_11:.*]] = %[[VAL_6]], %[[VAL_12:.*]] = %[[VAL_5]], %[[VAL_13:.*]] = %[[VAL_4]]) -> (i32, f32, i1) {
+// CHECK:             %[[VAL_14:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_10]] : (!hlfir.expr<?x!fir.logical<4>>, index) -> !fir.logical<4>
+// CHECK:             %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (!fir.logical<4>) -> i1
+// CHECK:             %[[VAL_16:.*]]:3 = fir.if %[[VAL_15]] -> (i32, f32, i1) {
+// CHECK:               %[[VAL_17:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_10]] : (!hlfir.expr<?xf32>, index) -> f32
+// CHECK:               %[[VAL_18:.*]] = arith.cmpf olt, %[[VAL_17]], %[[VAL_12]] fastmath<contract> : f32
+// CHECK:               %[[VAL_19:.*]] = arith.cmpf une, %[[VAL_12]], %[[VAL_12]] fastmath<contract> : f32
+// CHECK:               %[[VAL_20:.*]] = arith.cmpf oeq, %[[VAL_17]], %[[VAL_17]] fastmath<contract> : f32
+// CHECK:               %[[VAL_21:.*]] = arith.andi %[[VAL_19]], %[[VAL_20]] : i1
+// CHECK:               %[[VAL_22:.*]] = arith.ori %[[VAL_18]], %[[VAL_21]] : i1
+// CHECK:               %[[VAL_23:.*]] = arith.ori %[[VAL_22]], %[[VAL_13]] : i1
+// CHECK:               %[[VAL_24:.*]] = fir.convert %[[VAL_10]] : (index) -> i32
+// CHECK:               %[[VAL_25:.*]] = arith.select %[[VAL_23]], %[[VAL_24]], %[[VAL_11]] : i32
+// CHECK:               %[[VAL_26:.*]] = arith.select %[[VAL_23]], %[[VAL_17]], %[[VAL_12]] : f32
+// CHECK:               fir.result %[[VAL_25]], %[[VAL_26]], %[[VAL_2]] : i32, f32, i1
+// CHECK:             } else {
+// CHECK:               fir.result %[[VAL_11]], %[[VAL_12]], %[[VAL_13]] : i32, f32, i1
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_27:.*]]#0, %[[VAL_27]]#1, %[[VAL_27]]#2 : i32, f32, i1
+// CHECK:           }
+// CHECK:           %[[VAL_28:.*]] = fir.convert %[[VAL_29:.*]]#0 : (i32) -> index
+// CHECK:           %[[VAL_30:.*]] = fir.convert %[[VAL_28]] : (index) -> i32
+// CHECK:           return %[[VAL_30]] : i32
+// CHECK:         }
+
+func.func @test_1d_total_var(%input: !fir.box<!fir.array<?xf32>>, %mask: !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi16> {
+  %0 = hlfir.minloc %input mask %mask {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?xf32>>, !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi16>
+  return %0 : !hlfir.expr<1xi16>
+}
+// CHECK-LABEL:   func.func @test_1d_total_var(
+// CHECK-SAME:                                 %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box<!fir.array<?xf32>>,
+// CHECK-SAME:                                 %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x!fir.logical<4>>) -> !hlfir.expr<1xi16> {
+// CHECK:           %[[VAL_2:.*]] = arith.constant false
+// CHECK:           %[[VAL_3:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_4:.*]] = arith.constant true
+// CHECK:           %[[VAL_5:.*]] = arith.constant 3.40282347E+38 : f32
+// CHECK:           %[[VAL_6:.*]] = arith.constant 0 : i16
+// CHECK:           %[[VAL_7:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_8:.*]] = fir.alloca !fir.array<1xi16>
+// CHECK:           %[[VAL_9:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_7]] : (!fir.box<!fir.array<?xf32>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_10:.*]]:3 = fir.do_loop %[[VAL_11:.*]] = %[[VAL_3]] to %[[VAL_9]]#1 step %[[VAL_3]] iter_args(%[[VAL_12:.*]] = %[[VAL_6]], %[[VAL_13:.*]] = %[[VAL_5]], %[[VAL_14:.*]] = %[[VAL_4]]) -> (i16, f32, i1) {
+// CHECK:             %[[VAL_15:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_11]] : (!hlfir.expr<?x!fir.logical<4>>, index) -> !fir.logical<4>
+// CHECK:             %[[VAL_16:.*]] = fir.convert %[[VAL_15]] : (!fir.logical<4>) -> i1
+// CHECK:             %[[VAL_17:.*]]:3 = fir.if %[[VAL_16]] -> (i16, f32, i1) {
+// CHECK:               %[[VAL_18:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_7]] : (!fir.box<!fir.array<?xf32>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_19:.*]] = arith.subi %[[VAL_18]]#0, %[[VAL_3]] : index
+// CHECK:               %[[VAL_20:.*]] = arith.addi %[[VAL_11]], %[[VAL_19]] : index
+// CHECK:               %[[VAL_21:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_20]])  : (!fir.box<!fir.array<?xf32>>, index) -> !fir.ref<f32>
+// CHECK:               %[[VAL_22:.*]] = fir.load %[[VAL_21]] : !fir.ref<f32>
+// CHECK:               %[[VAL_23:.*]] = arith.cmpf olt, %[[VAL_22]], %[[VAL_13]] fastmath<contract> : f32
+// CHECK:               %[[VAL_24:.*]] = arith.cmpf une, %[[VAL_13]], %[[VAL_13]] fastmath<contract> : f32
+// CHECK:               %[[VAL_25:.*]] = arith.cmpf oeq, %[[VAL_22]], %[[VAL_22]] fastmath<contract> : f32
+// CHECK:               %[[VAL_26:.*]] = arith.andi %[[VAL_24]], %[[VAL_25]] : i1
+// CHECK:               %[[VAL_27:.*]] = arith.ori %[[VAL_23]], %[[VAL_26]] : i1
+// CHECK:               %[[VAL_28:.*]] = arith.ori %[[VAL_27]], %[[VAL_14]] : i1
+// CHECK:               %[[VAL_29:.*]] = fir.convert %[[VAL_11]] : (index) -> i16
+// CHECK:               %[[VAL_30:.*]] = arith.select %[[VAL_28]], %[[VAL_29]], %[[VAL_12]] : i16
+// CHECK:               %[[VAL_31:.*]] = arith.select %[[VAL_28]], %[[VAL_22]], %[[VAL_13]] : f32
+// CHECK:               fir.result %[[VAL_30]], %[[VAL_31]], %[[VAL_2]] : i16, f32, i1
+// CHECK:             } else {
+// CHECK:               fir.result %[[VAL_12]], %[[VAL_13]], %[[VAL_14]] : i16, f32, i1
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_32:.*]]#0, %[[VAL_32]]#1, %[[VAL_32]]#2 : i16, f32, i1
+// CHECK:           }
+// CHECK:           %[[VAL_33:.*]] = arith.cmpi ne, %[[VAL_34:.*]]#0, %[[VAL_6]] : i16
+// CHECK:           %[[VAL_35:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_7]] : (!fir.box<!fir.array<?xf32>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_36:.*]] = fir.convert %[[VAL_34]]#0 : (i16) -> index
+// CHECK:           %[[VAL_37:.*]] = arith.addi %[[VAL_36]], %[[VAL_35]]#0 : index
+// CHECK:           %[[VAL_38:.*]] = arith.subi %[[VAL_37]], %[[VAL_3]] : index
+// CHECK:           %[[VAL_39:.*]] = arith.select %[[VAL_33]], %[[VAL_38]], %[[VAL_36]] : index
+// CHECK:           %[[VAL_40:.*]] = fir.convert %[[VAL_39]] : (index) -> i16
+// CHECK:           %[[VAL_41:.*]] = hlfir.designate %[[VAL_8]] (%[[VAL_3]])  : (!fir.ref<!fir.array<1xi16>>, index) -> !fir.ref<i16>
+// CHECK:           hlfir.assign %[[VAL_40]] to %[[VAL_41]] : i16, !fir.ref<i16>
+// CHECK:           %[[VAL_42:.*]] = hlfir.as_expr %[[VAL_8]] move %[[VAL_2]] : (!fir.ref<!fir.array<1xi16>>, i1) -> !hlfir.expr<1xi16>
+// CHECK:           return %[[VAL_42]] : !hlfir.expr<1xi16>
+// CHECK:         }
+
+func.func @test_1d_dim_var(%input: !fir.box<!fir.array<?xf64>>, %mask: !hlfir.expr<?x!fir.logical<4>>) -> i64 {
+  %dim = arith.constant 1 : i32
+  %0 = hlfir.minloc %input dim %dim mask %mask {fastmath = #arith.fastmath<contract>} : (!fir.box<!fir.array<?xf64>>, i32, !hlfir.expr<?x!fir.logical<4>>) -> i64
+  return %0 : i64
+}
+// CHECK-LABEL:   func.func @test_1d_dim_var(
+// CHECK-SAME:                               %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box<!fir.array<?xf64>>,
+// CHECK-SAME:                               %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x!fir.logical<4>>) -> i64 {
+// CHECK:           %[[VAL_2:.*]] = arith.constant false
+// CHECK:           %[[VAL_3:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_4:.*]] = arith.constant true
+// CHECK:           %[[VAL_5:.*]] = arith.constant 1.7976931348623157E+308 : f64
+// CHECK:           %[[VAL_6:.*]] = arith.constant 0 : i64
+// CHECK:           %[[VAL_7:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_8:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_7]] : (!fir.box<!fir.array<?xf64>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_9:.*]]:3 = fir.do_loop %[[VAL_10:.*]] = %[[VAL_3]] to %[[VAL_8]]#1 step %[[VAL_3]] iter_args(%[[VAL_11:.*]] = %[[VAL_6]], %[[VAL_12:.*]] = %[[VAL_5]], %[[VAL_13:.*]] = %[[VAL_4]]) -> (i64, f64, i1) {
+// CHECK:             %[[VAL_14:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_10]] : (!hlfir.expr<?x!fir.logical<4>>, index) -> !fir.logical<4>
+// CHECK:             %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (!fir.logical<4>) -> i1
+// CHECK:             %[[VAL_16:.*]]:3 = fir.if %[[VAL_15]] -> (i64, f64, i1) {
+// CHECK:               %[[VAL_17:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_7]] : (!fir.box<!fir.array<?xf64>>, index) -> (index, index, index)
+// CHECK:               %[[VAL_18:.*]] = arith.subi %[[VAL_17]]#0, %[[VAL_3]] : index
+// CHECK:               %[[VAL_19:.*]] = arith.addi %[[VAL_10]], %[[VAL_18]] : index
+// CHECK:               %[[VAL_20:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_19]])  : (!fir.box<!fir.array<?xf64>>, index) -> !fir.ref<f64>
+// CHECK:               %[[VAL_21:.*]] = fir.load %[[VAL_20]] : !fir.ref<f64>
+// CHECK:               %[[VAL_22:.*]] = arith.cmpf olt, %[[VAL_21]], %[[VAL_12]] fastmath<contract> : f64
+// CHECK:               %[[VAL_23:.*]] = arith.cmpf une, %[[VAL_12]], %[[VAL_12]] fastmath<contract> : f64
+// CHECK:               %[[VAL_24:.*]] = arith.cmpf oeq, %[[VAL_21]], %[[VAL_21]] fastmath<contract> : f64
+// CHECK:               %[[VAL_25:.*]] = arith.andi %[[VAL_23]], %[[VAL_24]] : i1
+// CHECK:               %[[VAL_26:.*]] = arith.ori %[[VAL_22]], %[[VAL_25]] : i1
+// CHECK:               %[[VAL_27:.*]] = arith.ori %[[VAL_26]], %[[VAL_13]] : i1
+// CHECK:               %[[VAL_28:.*]] = fir.convert %[[VAL_10]] : (index) -> i64
+// CHECK:               %[[VAL_29:.*]] = arith.select %[[VAL_27]], %[[VAL_28]], %[[VAL_11]] : i64
+// CHECK:               %[[VAL_30:.*]] = arith.select %[[VAL_27]], %[[VAL_21]], %[[VAL_12]] : f64
+// CHECK:               fir.result %[[VAL_29]], %[[VAL_30]], %[[VAL_2]] : i64, f64, i1
+// CHECK:             } else {
+// CHECK:               fir.result %[[VAL_11]], %[[VAL_12]], %[[VAL_13]] : i64, f64, i1
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_31:.*]]#0, %[[VAL_31]]#1, %[[VAL_31]]#2 : i64, f64, i1
+// CHECK:           }
+// CHECK:           %[[VAL_32:.*]] = arith.cmpi ne, %[[VAL_33:.*]]#0, %[[VAL_6]] : i64
+// CHECK:           %[[VAL_34:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_7]] : (!fir.box<!fir.array<?xf64>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_35:.*]] = fir.convert %[[VAL_33]]#0 : (i64) -> index
+// CHECK:           %[[VAL_36:.*]] = arith.addi %[[VAL_35]], %[[VAL_34]]#0 : index
+// CHECK:           %[[VAL_37:.*]] = arith.subi %[[VAL_36]], %[[VAL_3]] : index
+// CHECK:           %[[VAL_38:.*]] = arith.select %[[VAL_32]], %[[VAL_37]], %[[VAL_35]] : index
+// CHECK:           %[[VAL_39:.*]] = fir.convert %[[VAL_38]] : (index) -> i64
+// CHECK:           return %[[VAL_39]] : i64
+// CHECK:         }
+
+func.func @test_total_expr(%input: !hlfir.expr<?x?x?xf32>, %mask: !hlfir.expr<?x?x?x!fir.logical<4>>) -> !hlfir.expr<3xi32> {
+  %0 = hlfir.minloc %input mask %mask {fastmath = #arith.fastmath<reassoc>} : (!hlfir.expr<?x?x?xf32>, !hlfir.expr<?x?x?x!fir.logical<4>>) -> !hlfir.expr<3xi32>
+  return %0 : !hlfir.expr<3xi32>
+}
+// CHECK-LABEL:   func.func @test_total_expr(
+// CHECK-SAME:                               %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x?x?xf32>,
+// CHECK-SAME:                               %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x?x?x!fir.logical<4>>) -> !hlfir.expr<3xi32> {
+// CHECK:           %[[VAL_2:.*]] = arith.constant 3 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant 2 : index
+// CHECK:           %[[VAL_4:.*]] = arith.constant false
+// CHECK:           %[[VAL_5:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_6:.*]] = arith.constant true
+// CHECK:           %[[VAL_7:.*]] = arith.constant 3.40282347E+38 : f32
+// CHECK:           %[[VAL_8:.*]] = arith.constant 0 : i32
+// CHECK:           %[[VAL_9:.*]] = fir.alloca !fir.array<3xi32>
+// CHECK:           %[[VAL_10:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?x?x?xf32>) -> !fir.shape<3>
+// CHECK:           %[[VAL_11:.*]] = hlfir.get_extent %[[VAL_10]] {dim = 0 : index} : (!fir.shape<3>) -> index
+// CHECK:           %[[VAL_12:.*]] = hlfir.get_extent %[[VAL_10]] {dim = 1 : index} : (!fir.shape<3>) -> index
+// CHECK:           %[[VAL_13:.*]] = hlfir.get_extent %[[VAL_10]] {dim = 2 : index} : (!fir.shape<3>) -> index
+// CHECK:           %[[VAL_14:.*]]:5 = fir.do_loop %[[VAL_15:.*]] = %[[VAL_5]] to %[[VAL_13]] step %[[VAL_5]] unordered iter_args(%[[VAL_16:.*]] = %[[VAL_8]], %[[VAL_17:.*]] = %[[VAL_8]], %[[VAL_18:.*]] = %[[VAL_8]], %[[VAL_19:.*]] = %[[VAL_7]], %[[VAL_20:.*]] = %[[VAL_6]]) -> (i32, i32, i32, f32, i1) {
+// CHECK:             %[[VAL_21:.*]]:5 = fir.do_loop %[[VAL_22:.*]] = %[[VAL_5]] to %[[VAL_12]] step %[[VAL_5]] unordered iter_args(%[[VAL_23:.*]] = %[[VAL_16]], %[[VAL_24:.*]] = %[[VAL_17]], %[[VAL_25:.*]] = %[[VAL_18]], %[[VAL_26:.*]] = %[[VAL_19]], %[[VAL_27:.*]] = %[[VAL_20]]) -> (i32, i32, i32, f32, i1) {
+// CHECK:               %[[VAL_28:.*]]:5 = fir.do_loop %[[VAL_29:.*]] = %[[VAL_5]] to %[[VAL_11]] step %[[VAL_5]] unordered iter_args(%[[VAL_30:.*]] = %[[VAL_23]], %[[VAL_31:.*]] = %[[VAL_24]], %[[VAL_32:.*]] = %[[VAL_25]], %[[VAL_33:.*]] = %[[VAL_26]], %[[VAL_34:.*]] = %[[VAL_27]]) -> (i32, i32, i32, f32, i1) {
+// CHECK:                 %[[VAL_35:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_29]], %[[VAL_22]], %[[VAL_15]] : (!hlfir.expr<?x?x?x!fir.logical<4>>, index, index, index) -> !fir.logical<4>
+// CHECK:                 %[[VAL_36:.*]] = fir.convert %[[VAL_35]] : (!fir.logical<4>) -> i1
+// CHECK:                 %[[VAL_37:.*]]:5 = fir.if %[[VAL_36]] -> (i32, i32, i32, f32, i1) {
+// CHECK:                   %[[VAL_38:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_29]], %[[VAL_22]], %[[VAL_15]] : (!hlfir.expr<?x?x?xf32>, index, index, index) -> f32
+// CHECK:                   %[[VAL_39:.*]] = arith.cmpf olt, %[[VAL_38]], %[[VAL_33]] fastmath<reassoc> : f32
+// CHECK:                   %[[VAL_40:.*]] = arith.cmpf une, %[[VAL_33]], %[[VAL_33]] fastmath<reassoc> : f32
+// CHECK:                   %[[VAL_41:.*]] = arith.cmpf oeq, %[[VAL_38]], %[[VAL_38]] fastmath<reassoc> : f32
+// CHECK:                   %[[VAL_42:.*]] = arith.andi %[[VAL_40]], %[[VAL_41]] : i1
+// CHECK:                   %[[VAL_43:.*]] = arith.ori %[[VAL_39]], %[[VAL_42]] : i1
+// CHECK:                   %[[VAL_44:.*]] = arith.ori %[[VAL_43]], %[[VAL_34]] : i1
+// CHECK:                   %[[VAL_45:.*]] = fir.convert %[[VAL_29]] : (index) -> i32
+// CHECK:                   %[[VAL_46:.*]] = arith.select %[[VAL_44]], %[[VAL_45]], %[[VAL_30]] : i32
+// CHECK:                   %[[VAL_47:.*]] = fir.convert %[[VAL_22]] : (index) -> i32
+// CHECK:                   %[[VAL_48:.*]] = arith.select %[[VAL_44]], %[[VAL_47]], %[[VAL_31]] : i32
+// CHECK:                   %[[VAL_49:.*]] = fir.convert %[[VAL_15]] : (index) -> i32
+// CHECK:                   %[[VAL_50:.*]] = arith.select %[[VAL_44]], %[[VAL_49]], %[[VAL_32]] : i32
+// CHECK:                   %[[VAL_51:.*]] = arith.select %[[VAL_44]], %[[VAL_38]], %[[VAL_33]] : f32
+// CHECK:                   fir.result %[[VAL_46]], %[[VAL_48]], %[[VAL_50]], %[[VAL_51]], %[[VAL_4]] : i32, i32, i32, f32, i1
+// CHECK:                 } else {
+// CHECK:                   fir.result %[[VAL_30]], %[[VAL_31]], %[[VAL_32]], %[[VAL_33]], %[[VAL_34]] : i32, i32, i32, f32, i1
+// CHECK:                 }
+// CHECK:                 fir.result %[[VAL_52:.*]]#0, %[[VAL_52]]#1, %[[VAL_52]]#2, %[[VAL_52]]#3, %[[VAL_52]]#4 : i32, i32, i32, f32, i1
+// CHECK:               }
+// CHECK:               fir.result %[[VAL_53:.*]]#0, %[[VAL_53]]#1, %[[VAL_53]]#2, %[[VAL_53]]#3, %[[VAL_53]]#4 : i32, i32, i32, f32, i1
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_54:.*]]#0, %[[VAL_54]]#1, %[[VAL_54]]#2, %[[VAL_54]]#3, %[[VAL_54]]#4 : i32, i32, i32, f32, i1
+// CHECK:           }
+// CHECK:           %[[VAL_55:.*]] = fir.convert %[[VAL_56:.*]]#0 : (i32) -> index
+// CHECK:           %[[VAL_57:.*]] = fir.convert %[[VAL_55]] : (index) -> i32
+// CHECK:           %[[VAL_58:.*]] = hlfir.designate %[[VAL_9]] (%[[VAL_5]])  : (!fir.ref<!fir.array<3xi32>>, index) -> !fir.ref<i32>
+// CHECK:           hlfir.assign %[[VAL_57]] to %[[VAL_58]] : i32, !fir.ref<i32>
+// CHECK:           %[[VAL_59:.*]] = fir.convert %[[VAL_56]]#1 : (i32) -> index
+// CHECK:           %[[VAL_60:.*]] = fir.convert %[[VAL_59]] : (index) -> i32
+// CHECK:           %[[VAL_61:.*]] = hlfir.designate %[[VAL_9]] (%[[VAL_3]])  : (!fir.ref<!fir.array<3xi32>>, index) -> !fir.ref<i32>
+// CHECK:           hlfir.assign %[[VAL_60]] to %[[VAL_61]] : i32, !fir.ref<i32>
+// CHECK:           %[[VAL_62:.*]] = fir.convert %[[VAL_56]]#2 : (i32) -> index
+// CHECK:           %[[VAL_63:.*]] = fir.convert %[[VAL_62]] : (index) -> i32
+// CHECK:           %[[VAL_64:.*]] = hlfir.designate %[[VAL_9]] (%[[VAL_2]])  : (!fir.ref<!fir.array<3xi32>>, index) -> !fir.ref<i32>
+// CHECK:           hlfir.assign %[[VAL_63]] to %[[VAL_64]] : i32, !fir.ref<i32>
+// CHECK:           %[[VAL_65:.*]] = hlfir.as_expr %[[VAL_9]] move %[[VAL_4]] : (!fir.ref<!fir.array<3xi32>>, i1) -> !hlfir.expr<3xi32>
+// CHECK:           return %[[VAL_65]] : !hlfir.expr<3xi32>
+// CHECK:         }
+
+func.func @test_partial_var(%input: !fir.box<!fir.array<?x?x?xf32>>, %mask: !fir.box<!fir.array<?x?x?x!fir.logical<4>>>) -> !hlfir.expr<?x?xi32> {
+  %dim = arith.constant 2 : i32
+  %0 = hlfir.minloc %input dim %dim mask %mask {fastmath = #arith.fastmath<reassoc>} : (!fir.box<!fir.array<?x?x?xf32>>, i32, !fir.box<!fir.array<?x?x?x!fir.logical<4>>>) -> !hlfir.expr<?x?xi32>
+  return %0 : !hlfir.expr<?x?xi32>
+}
+// CHECK-LABEL:   func.func @test_partial_var(
+// CHECK-SAME:                                %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box<!fir.array<?x?x?xf32>>,
+// CHECK-SAME:                                %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box<!fir.array<?x?x?x!fir.logical<4>>>) -> !hlfir.expr<?x?xi32> {
+// CHECK:           %[[VAL_2:.*]] = arith.constant false
+// CHECK:           %[[VAL_3:.*]] = arith.constant true
+// CHECK:           %[[VAL_4:.*]] = arith.constant 3.40282347E+38 : f32
+// CHECK:           %[[VAL_5:.*]] = arith.constant 0 : i32
+// CHECK:           %[[VAL_6:.*]] = arith.constant 2 : index
+// CHECK:           %[[VAL_7:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_8:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_9:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_8]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_10:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_7]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_11:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_6]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_12:.*]] = fir.shape %[[VAL_9]]#1, %[[VAL_11]]#1 : (index, index) -> !fir.shape<2>
+// CHECK:           %[[VAL_13:.*]] = fir.is_present %[[VAL_1]] : (!fir.box<!fir.array<?x?x?x!fir.logical<4>>>) -> i1
+// CHECK:           %[[VAL_14:.*]] = hlfir.elemental %[[VAL_12]] unordered : (!fir.shape<2>) -> !hlfir.expr<?x?xi32> {
+// CHECK:           ^bb0(%[[VAL_15:.*]]: index, %[[VAL_16:.*]]: index):
+// CHECK:             %[[VAL_17:.*]]:3 = fir.do_loop %[[VAL_18:.*]] = %[[VAL_7]] to %[[VAL_10]]#1 step %[[VAL_7]] unordered iter_args(%[[VAL_19:.*]] = %[[VAL_5]], %[[VAL_20:.*]] = %[[VAL_4]], %[[VAL_21:.*]] = %[[VAL_3]]) -> (i32, f32, i1) {
+// CHECK:               %[[VAL_22:.*]] = fir.if %[[VAL_13]] -> (!fir.logical<4>) {
+// CHECK:                 %[[VAL_23:.*]]:3 = fir.box_dims %[[VAL_1]], %[[VAL_8]] : (!fir.box<!fir.array<?x?x?x!fir.logical<4>>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_24:.*]]:3 = fir.box_dims %[[VAL_1]], %[[VAL_7]] : (!fir.box<!fir.array<?x?x?x!fir.logical<4>>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_25:.*]]:3 = fir.box_dims %[[VAL_1]], %[[VAL_6]] : (!fir.box<!fir.array<?x?x?x!fir.logical<4>>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_26:.*]] = arith.subi %[[VAL_23]]#0, %[[VAL_7]] : index
+// CHECK:                 %[[VAL_27:.*]] = arith.addi %[[VAL_15]], %[[VAL_26]] : index
+// CHECK:                 %[[VAL_28:.*]] = arith.subi %[[VAL_24]]#0, %[[VAL_7]] : index
+// CHECK:                 %[[VAL_29:.*]] = arith.addi %[[VAL_18]], %[[VAL_28]] : index
+// CHECK:                 %[[VAL_30:.*]] = arith.subi %[[VAL_25]]#0, %[[VAL_7]] : index
+// CHECK:                 %[[VAL_31:.*]] = arith.addi %[[VAL_16]], %[[VAL_30]] : index
+// CHECK:                 %[[VAL_32:.*]] = hlfir.designate %[[VAL_1]] (%[[VAL_27]], %[[VAL_29]], %[[VAL_31]])  : (!fir.box<!fir.array<?x?x?x!fir.logical<4>>>, index, index, index) -> !fir.ref<!fir.logical<4>>
+// CHECK:                 %[[VAL_33:.*]] = fir.load %[[VAL_32]] : !fir.ref<!fir.logical<4>>
+// CHECK:                 fir.result %[[VAL_33]] : !fir.logical<4>
+// CHECK:               } else {
+// CHECK:                 %[[VAL_34:.*]] = fir.convert %[[VAL_3]] : (i1) -> !fir.logical<4>
+// CHECK:                 fir.result %[[VAL_34]] : !fir.logical<4>
+// CHECK:               }
+// CHECK:               %[[VAL_35:.*]] = fir.convert %[[VAL_22]] : (!fir.logical<4>) -> i1
+// CHECK:               %[[VAL_36:.*]]:3 = fir.if %[[VAL_35]] -> (i32, f32, i1) {
+// CHECK:                 %[[VAL_37:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_8]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_38:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_7]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_39:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_6]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_40:.*]] = arith.subi %[[VAL_37]]#0, %[[VAL_7]] : index
+// CHECK:                 %[[VAL_41:.*]] = arith.addi %[[VAL_15]], %[[VAL_40]] : index
+// CHECK:                 %[[VAL_42:.*]] = arith.subi %[[VAL_38]]#0, %[[VAL_7]] : index
+// CHECK:                 %[[VAL_43:.*]] = arith.addi %[[VAL_18]], %[[VAL_42]] : index
+// CHECK:                 %[[VAL_44:.*]] = arith.subi %[[VAL_39]]#0, %[[VAL_7]] : index
+// CHECK:                 %[[VAL_45:.*]] = arith.addi %[[VAL_16]], %[[VAL_44]] : index
+// CHECK:                 %[[VAL_46:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_41]], %[[VAL_43]], %[[VAL_45]])  : (!fir.box<!fir.array<?x?x?xf32>>, index, index, index) -> !fir.ref<f32>
+// CHECK:                 %[[VAL_47:.*]] = fir.load %[[VAL_46]] : !fir.ref<f32>
+// CHECK:                 %[[VAL_48:.*]] = arith.cmpf olt, %[[VAL_47]], %[[VAL_20]] fastmath<reassoc> : f32
+// CHECK:                 %[[VAL_49:.*]] = arith.cmpf une, %[[VAL_20]], %[[VAL_20]] fastmath<reassoc> : f32
+// CHECK:                 %[[VAL_50:.*]] = arith.cmpf oeq, %[[VAL_47]], %[[VAL_47]] fastmath<reassoc> : f32
+// CHECK:                 %[[VAL_51:.*]] = arith.andi %[[VAL_49]], %[[VAL_50]] : i1
+// CHECK:                 %[[VAL_52:.*]] = arith.ori %[[VAL_48]], %[[VAL_51]] : i1
+// CHECK:                 %[[VAL_53:.*]] = arith.ori %[[VAL_52]], %[[VAL_21]] : i1
+// CHECK:                 %[[VAL_54:.*]] = fir.convert %[[VAL_15]] : (index) -> i32
+// CHECK:                 %[[VAL_55:.*]] = arith.select %[[VAL_53]], %[[VAL_54]], %[[VAL_19]] : i32
+// CHECK:                 %[[VAL_56:.*]] = arith.select %[[VAL_53]], %[[VAL_47]], %[[VAL_20]] : f32
+// CHECK:                 fir.result %[[VAL_55]], %[[VAL_56]], %[[VAL_2]] : i32, f32, i1
+// CHECK:               } else {
+// CHECK:                 fir.result %[[VAL_19]], %[[VAL_20]], %[[VAL_21]] : i32, f32, i1
+// CHECK:               }
+// CHECK:               fir.result %[[VAL_57:.*]]#0, %[[VAL_57]]#1, %[[VAL_57]]#2 : i32, f32, i1
+// CHECK:             }
+// CHECK:             %[[VAL_58:.*]] = arith.cmpi ne, %[[VAL_59:.*]]#0, %[[VAL_5]] : i32
+// CHECK:             %[[VAL_60:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_7]] : (!fir.box<!fir.array<?x?x?xf32>>, index) -> (index, index, index)
+// CHECK:             %[[VAL_61:.*]] = fir.convert %[[VAL_59]]#0 : (i32) -> index
+// CHECK:             %[[VAL_62:.*]] = arith.addi %[[VAL_61]], %[[VAL_60]]#0 : index
+// CHECK:             %[[VAL_63:.*]] = arith.subi %[[VAL_62]], %[[VAL_7]] : index
+// CHECK:             %[[VAL_64:.*]] = arith.select %[[VAL_58]], %[[VAL_63]], %[[VAL_61]] : index
+// CHECK:             %[[VAL_65:.*]] = fir.convert %[[VAL_64]] : (index) -> i32
+// CHECK:             hlfir.yield_element %[[VAL_65]] : i32
+// CHECK:           }
+// CHECK:           return %[[VAL_14]] : !hlfir.expr<?x?xi32>
+// CHECK:         }
+
+// Character comparisons are not supported yet.
+func.func @test_character(%input: !fir.box<!fir.array<?x!fir.char<1>>>) -> !hlfir.expr<1xi32> {
+  %0 = hlfir.minloc %input : (!fir.box<!fir.array<?x!fir.char<1>>>) -> !hlfir.expr<1xi32>
+  return %0 : !hlfir.expr<1xi32>
+}
+// CHECK-LABEL:   func.func @test_character(
+// CHECK:           hlfir.minloc
+
+// BACK is not supported yet.
+func.func @test_back(%input: !hlfir.expr<?xi32>) -> !hlfir.expr<1xi32> {
+  %back = arith.constant true
+  %0 = hlfir.minloc %input back %back : (!hlfir.expr<?xi32>, i1) -> !hlfir.expr<1xi32>
+  return %0 : !hlfir.expr<1xi32>
+}
+// CHECK-LABEL:   func.func @test_back(
+// CHECK:           hlfir.minloc
diff --git a/flang/test/HLFIR/simplify-hlfir-intrinsics-minval.fir b/flang/test/HLFIR/simplify-hlfir-intrinsics-minval.fir
new file mode 100644
index 0000000000000..4278ac7abd457
--- /dev/null
+++ b/flang/test/HLFIR/simplify-hlfir-intrinsics-minval.fir
@@ -0,0 +1,177 @@
+// RUN: fir-opt %s --simplify-hlfir-intrinsics | FileCheck %s
+
+func.func @test_total_expr(%input: !hlfir.expr<?x?xf32>, %mask: !hlfir.expr<?x?x!fir.logical<4>>) -> f32 {
+  %0 = hlfir.minval %input mask %mask {fastmath = #arith.fastmath<contract>} : (!hlfir.expr<?x?xf32>, !hlfir.expr<?x?x!fir.logical<4>>) -> f32
+  return %0 : f32
+}
+// CHECK-LABEL:   func.func @test_total_expr(
+// CHECK-SAME:                               %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x?xf32>,
+// CHECK-SAME:                               %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x?x!fir.logical<4>>) -> f32 {
+// CHECK:           %[[VAL_2:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant 3.40282347E+38 : f32
+// CHECK:           %[[VAL_4:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?x?xf32>) -> !fir.shape<2>
+// CHECK:           %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 0 : index} : (!fir.shape<2>) -> index
+// CHECK:           %[[VAL_6:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 1 : index} : (!fir.shape<2>) -> index
+// CHECK:           %[[VAL_7:.*]] = fir.do_loop %[[VAL_8:.*]] = %[[VAL_2]] to %[[VAL_6]] step %[[VAL_2]] iter_args(%[[VAL_9:.*]] = %[[VAL_3]]) -> (f32) {
+// CHECK:             %[[VAL_10:.*]] = fir.do_loop %[[VAL_11:.*]] = %[[VAL_2]] to %[[VAL_5]] step %[[VAL_2]] iter_args(%[[VAL_12:.*]] = %[[VAL_9]]) -> (f32) {
+// CHECK:               %[[VAL_13:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_11]], %[[VAL_8]] : (!hlfir.expr<?x?x!fir.logical<4>>, index, index) -> !fir.logical<4>
+// CHECK:               %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (!fir.logical<4>) -> i1
+// CHECK:               %[[VAL_15:.*]] = fir.if %[[VAL_14]] -> (f32) {
+// CHECK:                 %[[VAL_16:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_11]], %[[VAL_8]] : (!hlfir.expr<?x?xf32>, index, index) -> f32
+// CHECK:                 %[[VAL_17:.*]] = arith.cmpf olt, %[[VAL_16]], %[[VAL_12]] fastmath<contract> : f32
+// CHECK:                 %[[VAL_18:.*]] = arith.cmpf une, %[[VAL_12]], %[[VAL_12]] fastmath<contract> : f32
+// CHECK:                 %[[VAL_19:.*]] = arith.cmpf oeq, %[[VAL_16]], %[[VAL_16]] fastmath<contract> : f32
+// CHECK:                 %[[VAL_20:.*]] = arith.andi %[[VAL_18]], %[[VAL_19]] : i1
+// CHECK:                 %[[VAL_21:.*]] = arith.ori %[[VAL_17]], %[[VAL_20]] : i1
+// CHECK:                 %[[VAL_22:.*]] = arith.select %[[VAL_21]], %[[VAL_16]], %[[VAL_12]] : f32
+// CHECK:                 fir.result %[[VAL_22]] : f32
+// CHECK:               } else {
+// CHECK:                 fir.result %[[VAL_12]] : f32
+// CHECK:               }
+// CHECK:               fir.result %[[VAL_15]] : f32
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_10]] : f32
+// CHECK:           }
+// CHECK:           return %[[VAL_7]] : f32
+// CHECK:         }
+
+func.func @test_partial_expr(%input: !hlfir.expr<?x?xf64>, %mask: !hlfir.expr<?x?x!fir.logical<4>>) -> !hlfir.expr<?xf64> {
+  %dim = arith.constant 1 : i32
+  %0 = hlfir.minval %input dim %dim mask %mask {fastmath = #arith.fastmath<reassoc>} : (!hlfir.expr<?x?xf64>, i32, !hlfir.expr<?x?x!fir.logical<4>>) -> !hlfir.expr<?xf64>
+  return %0 : !hlfir.expr<?xf64>
+}
+// CHECK-LABEL:   func.func @test_partial_expr(
+// CHECK-SAME:                                 %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x?xf64>,
+// CHECK-SAME:                                 %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !hlfir.expr<?x?x!fir.logical<4>>) -> !hlfir.expr<?xf64> {
+// CHECK:           %[[VAL_2:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_3:.*]] = arith.constant 1.7976931348623157E+308 : f64
+// CHECK:           %[[VAL_4:.*]] = hlfir.shape_of %[[VAL_0]] : (!hlfir.expr<?x?xf64>) -> !fir.shape<2>
+// CHECK:           %[[VAL_5:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 0 : index} : (!fir.shape<2>) -> index
+// CHECK:           %[[VAL_6:.*]] = hlfir.get_extent %[[VAL_4]] {dim = 1 : index} : (!fir.shape<2>) -> index
+// CHECK:           %[[VAL_7:.*]] = fir.shape %[[VAL_6]] : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_8:.*]] = hlfir.elemental %[[VAL_7]] unordered : (!fir.shape<1>) -> !hlfir.expr<?xf64> {
+// CHECK:           ^bb0(%[[VAL_9:.*]]: index):
+// CHECK:             %[[VAL_10:.*]] = fir.do_loop %[[VAL_11:.*]] = %[[VAL_2]] to %[[VAL_5]] step %[[VAL_2]] unordered iter_args(%[[VAL_12:.*]] = %[[VAL_3]]) -> (f64) {
+// CHECK:               %[[VAL_13:.*]] = hlfir.apply %[[VAL_1]], %[[VAL_11]], %[[VAL_9]] : (!hlfir.expr<?x?x!fir.logical<4>>, index, index) -> !fir.logical<4>
+// CHECK:               %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (!fir.logical<4>) -> i1
+// CHECK:               %[[VAL_15:.*]] = fir.if %[[VAL_14]] -> (f64) {
+// CHECK:                 %[[VAL_16:.*]] = hlfir.apply %[[VAL_0]], %[[VAL_11]], %[[VAL_9]] : (!hlfir.expr<?x?xf64>, index, index) -> f64
+// CHECK:                 %[[VAL_17:.*]] = arith.cmpf olt, %[[VAL_16]], %[[VAL_12]] fastmath<reassoc> : f64
+// CHECK:                 %[[VAL_18:.*]] = arith.cmpf une, %[[VAL_12]], %[[VAL_12]] fastmath<reassoc> : f64
+// CHECK:                 %[[VAL_19:.*]] = arith.cmpf oeq, %[[VAL_16]], %[[VAL_16]] fastmath<reassoc> : f64
+// CHECK:                 %[[VAL_20:.*]] = arith.andi %[[VAL_18]], %[[VAL_19]] : i1
+// CHECK:                 %[[VAL_21:.*]] = arith.ori %[[VAL_17]], %[[VAL_20]] : i1
+// CHECK:                 %[[VAL_22:.*]] = arith.select %[[VAL_21]], %[[VAL_16]], %[[VAL_12]] : f64
+// CHECK:                 fir.result %[[VAL_22]] : f64
+// CHECK:               } else {
+// CHECK:                 fir.result %[[VAL_12]] : f64
+// CHECK:               }
+// CHECK:               fir.result %[[VAL_15]] : f64
+// CHECK:             }
+// CHECK:             hlfir.yield_element %[[VAL_10]] : f64
+// CHECK:           }
+// CHECK:           return %[[VAL_8]] : !hlfir.expr<?xf64>
+// CHECK:         }
+
+func.func @test_total_var(%input: !fir.box<!fir.array<?x?xf16>>, %mask: !fir.ref<!fir.array<2x2x!fir.logical<1>>>) -> f16 {
+  %0 = hlfir.minval %input mask %mask {fastmath = #arith.fastmath<reassoc>} : (!fir.box<!fir.array<?x?xf16>>, !fir.ref<!fir.array<2x2x!fir.logical<1>>>) -> f16
+  return %0 : f16
+}
+// CHECK-LABEL:   func.func @test_total_var(
+// CHECK-SAME:                              %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box<!fir.array<?x?xf16>>,
+// CHECK-SAME:                              %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.ref<!fir.array<2x2x!fir.logical<1>>>) -> f16 {
+// CHECK:           %[[VAL_2:.*]] = arith.constant 6.550400e+04 : f16
+// CHECK:           %[[VAL_3:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_4:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_5:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_6:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_7:.*]] = fir.do_loop %[[VAL_8:.*]] = %[[VAL_3]] to %[[VAL_6]]#1 step %[[VAL_3]] unordered iter_args(%[[VAL_9:.*]] = %[[VAL_2]]) -> (f16) {
+// CHECK:             %[[VAL_10:.*]] = fir.do_loop %[[VAL_11:.*]] = %[[VAL_3]] to %[[VAL_5]]#1 step %[[VAL_3]] unordered iter_args(%[[VAL_12:.*]] = %[[VAL_9]]) -> (f16) {
+// CHECK:               %[[VAL_13:.*]] = hlfir.designate %[[VAL_1]] (%[[VAL_11]], %[[VAL_8]])  : (!fir.ref<!fir.array<2x2x!fir.logical<1>>>, index, index) -> !fir.ref<!fir.logical<1>>
+// CHECK:               %[[VAL_14:.*]] = fir.load %[[VAL_13]] : !fir.ref<!fir.logical<1>>
+// CHECK:               %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (!fir.logical<1>) -> i1
+// CHECK:               %[[VAL_16:.*]] = fir.if %[[VAL_15]] -> (f16) {
+// CHECK:                 %[[VAL_17:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_18:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_19:.*]] = arith.subi %[[VAL_17]]#0, %[[VAL_3]] : index
+// CHECK:                 %[[VAL_20:.*]] = arith.addi %[[VAL_11]], %[[VAL_19]] : index
+// CHECK:                 %[[VAL_21:.*]] = arith.subi %[[VAL_18]]#0, %[[VAL_3]] : index
+// CHECK:                 %[[VAL_22:.*]] = arith.addi %[[VAL_8]], %[[VAL_21]] : index
+// CHECK:                 %[[VAL_23:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_20]], %[[VAL_22]])  : (!fir.box<!fir.array<?x?xf16>>, index, index) -> !fir.ref<f16>
+// CHECK:                 %[[VAL_24:.*]] = fir.load %[[VAL_23]] : !fir.ref<f16>
+// CHECK:                 %[[VAL_25:.*]] = arith.cmpf olt, %[[VAL_24]], %[[VAL_12]] fastmath<reassoc> : f16
+// CHECK:                 %[[VAL_26:.*]] = arith.cmpf une, %[[VAL_12]], %[[VAL_12]] fastmath<reassoc> : f16
+// CHECK:                 %[[VAL_27:.*]] = arith.cmpf oeq, %[[VAL_24]], %[[VAL_24]] fastmath<reassoc> : f16
+// CHECK:                 %[[VAL_28:.*]] = arith.andi %[[VAL_26]], %[[VAL_27]] : i1
+// CHECK:                 %[[VAL_29:.*]] = arith.ori %[[VAL_25]], %[[VAL_28]] : i1
+// CHECK:                 %[[VAL_30:.*]] = arith.select %[[VAL_29]], %[[VAL_24]], %[[VAL_12]] : f16
+// CHECK:                 fir.result %[[VAL_30]] : f16
+// CHECK:               } else {
+// CHECK:                 fir.result %[[VAL_12]] : f16
+// CHECK:               }
+// CHECK:               fir.result %[[VAL_16]] : f16
+// CHECK:             }
+// CHECK:             fir.result %[[VAL_10]] : f16
+// CHECK:           }
+// CHECK:           return %[[VAL_7]] : f16
+// CHECK:         }
+
+func.func @test_partial_var(%input: !fir.box<!fir.array<?x?xf16>>, %mask: !fir.box<!fir.array<2x2x!fir.logical<1>>>) -> !hlfir.expr<?xf16> {
+  %dim = arith.constant 2 : i32
+  %0 = hlfir.minval %input dim %dim mask %mask {fastmath = #arith.fastmath<reassoc>} : (!fir.box<!fir.array<?x?xf16>>, i32, !fir.box<!fir.array<2x2x!fir.logical<1>>>) -> !hlfir.expr<?xf16>
+  return %0 : !hlfir.expr<?xf16>
+}
+// CHECK-LABEL:   func.func @test_partial_var(
+// CHECK-SAME:                                %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box<!fir.array<?x?xf16>>,
+// CHECK-SAME:                                %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !fir.box<!fir.array<2x2x!fir.logical<1>>>) -> !hlfir.expr<?xf16> {
+// CHECK:           %[[VAL_2:.*]] = arith.constant true
+// CHECK:           %[[VAL_3:.*]] = arith.constant 6.550400e+04 : f16
+// CHECK:           %[[VAL_4:.*]] = arith.constant 1 : index
+// CHECK:           %[[VAL_5:.*]] = arith.constant 0 : index
+// CHECK:           %[[VAL_6:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_5]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_7:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:           %[[VAL_8:.*]] = fir.shape %[[VAL_6]]#1 : (index) -> !fir.shape<1>
+// CHECK:           %[[VAL_9:.*]] = fir.is_present %[[VAL_1]] : (!fir.box<!fir.array<2x2x!fir.logical<1>>>) -> i1
+// CHECK:           %[[VAL_10:.*]] = hlfir.elemental %[[VAL_8]] unordered : (!fir.shape<1>) -> !hlfir.expr<?xf16> {
+// CHECK:           ^bb0(%[[VAL_11:.*]]: index):
+// CHECK:             %[[VAL_12:.*]] = fir.do_loop %[[VAL_13:.*]] = %[[VAL_4]] to %[[VAL_7]]#1 step %[[VAL_4]] unordered iter_args(%[[VAL_14:.*]] = %[[VAL_3]]) -> (f16) {
+// CHECK:               %[[VAL_15:.*]] = fir.if %[[VAL_9]] -> (!fir.logical<1>) {
+// CHECK:                 %[[VAL_16:.*]]:3 = fir.box_dims %[[VAL_1]], %[[VAL_5]] : (!fir.box<!fir.array<2x2x!fir.logical<1>>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_17:.*]]:3 = fir.box_dims %[[VAL_1]], %[[VAL_4]] : (!fir.box<!fir.array<2x2x!fir.logical<1>>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_18:.*]] = arith.subi %[[VAL_16]]#0, %[[VAL_4]] : index
+// CHECK:                 %[[VAL_19:.*]] = arith.addi %[[VAL_11]], %[[VAL_18]] : index
+// CHECK:                 %[[VAL_20:.*]] = arith.subi %[[VAL_17]]#0, %[[VAL_4]] : index
+// CHECK:                 %[[VAL_21:.*]] = arith.addi %[[VAL_13]], %[[VAL_20]] : index
+// CHECK:                 %[[VAL_22:.*]] = hlfir.designate %[[VAL_1]] (%[[VAL_19]], %[[VAL_21]])  : (!fir.box<!fir.array<2x2x!fir.logical<1>>>, index, index) -> !fir.ref<!fir.logical<1>>
+// CHECK:                 %[[VAL_23:.*]] = fir.load %[[VAL_22]] : !fir.ref<!fir.logical<1>>
+// CHECK:                 fir.result %[[VAL_23]] : !fir.logical<1>
+// CHECK:               } else {
+// CHECK:                 %[[VAL_24:.*]] = fir.convert %[[VAL_2]] : (i1) -> !fir.logical<1>
+// CHECK:                 fir.result %[[VAL_24]] : !fir.logical<1>
+// CHECK:               }
+// CHECK:               %[[VAL_25:.*]] = fir.convert %[[VAL_15]] : (!fir.logical<1>) -> i1
+// CHECK:               %[[VAL_26:.*]] = fir.if %[[VAL_25]] -> (f16) {
+// CHECK:                 %[[VAL_27:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_5]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_28:.*]]:3 = fir.box_dims %[[VAL_0]], %[[VAL_4]] : (!fir.box<!fir.array<?x?xf16>>, index) -> (index, index, index)
+// CHECK:                 %[[VAL_29:.*]] = arith.subi %[[VAL_27]]#0, %[[VAL_4]] : index
+// CHECK:                 %[[VAL_30:.*]] = arith.addi %[[VAL_11]], %[[VAL_29]] : index
+// CHECK:                 %[[VAL_31:.*]] = arith.subi %[[VAL_28]]#0, %[[VAL_4]] : index
+// CHECK:                 %[[VAL_32:.*]] = arith.addi %[[VAL_13]], %[[VAL_31]] : index
+// CHECK:                 %[[VAL_33:.*]] = hlfir.designate %[[VAL_0]] (%[[VAL_30]], %[[VAL_32]])  : (!fir.box<!fir.array<?x?xf16>>, index, index) -> !fir.ref<f16>
+// CHECK:                 %[[VAL_34:.*]] = fir.load %[[VAL_33]] : !fir.ref<f16>
+// CHECK:                 %[[VAL_35:.*]] = arith.cmpf olt, %[[VAL_34]], %[[VAL_14]] fastmath<reassoc> : f16
+// CHECK:                 %[[VAL_36:.*]] = arith.cmpf une, %[[VAL_14]], %[[VAL_14]] fastmath<reassoc> : f16
+// CHECK:                 %[[VAL_37:.*]] = arith.cmpf oeq, %[[VAL_34]], %[[VAL_34]] fastmath<reassoc> : f16
+// CHECK:                 %[[VAL_38:.*]] = arith.andi %[[VAL_36]], %[[VAL_37]] : i1
+// CHECK:                 %[[VAL_39:.*]] = arith.ori %[[VAL_35]], %[[VAL_38]] : i1
+// CHECK:                 %[[VAL_40:.*]] = arith.select %[[VAL_39]], %[[VAL_34]], %[[VAL_14]] : f16
+// CHECK:                 fir.result %[[VAL_40]] : f16
+// CHECK:               } else {
+// CHECK:                 fir.result %[[VAL_14]] : f16
+// CHECK:               }
+// CHECK:               fir.result %[[VAL_26]] : f16
+// CHECK:             }
+// CHECK:             hlfir.yield_element %[[VAL_12]] : f16
+// CHECK:           }
+// CHECK:           return %[[VAL_10]] : !hlfir.expr<?xf16>
+// CHECK:         }

>From b1101ba186c303ebb0dfd26bb286cd237fcb0db9 Mon Sep 17 00:00:00 2001
From: Slava Zakharin <szakharin at nvidia.com>
Date: Wed, 16 Apr 2025 21:22:40 -0700
Subject: [PATCH 2/3] Fixed the coordinate update for MAXLOC/MINLOC.

In case of partial reduction, the new index is the oneBasedIndices[DIM] value.
This is the IV for the loop over DIM dimension.
---
 .../HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp   | 14 ++++++++++++--
 .../HLFIR/simplify-hlfir-intrinsics-maxloc.fir     |  2 +-
 .../HLFIR/simplify-hlfir-intrinsics-minloc.fir     |  2 +-
 3 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
index 5614474767565..1ecd9a4294cb4 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
@@ -541,10 +541,20 @@ MinMaxlocAsElementalConverter<T>::reduceOneElement(
   cmp = builder.create<mlir::arith::OrIOp>(loc, cmp, getIsFirst(currentValue));
 
   llvm::SmallVector<mlir::Value, maxNumReductions> newIndices;
+  int64_t dim = 1;
+  if (!isTotalReduction()) {
+    auto dimVal = this->getConstDim();
+    assert(mlir::succeeded(dimVal) &&
+           "partial MINLOC/MAXLOC reduction with invalid DIM");
+    dim = *dimVal;
+    assert(getNumCoors() == 1 &&
+           "partial MAXLOC/MINLOC reduction must compute one coordinate");
+  }
+
   for (unsigned coorIdx = 0; coorIdx < getNumCoors(); ++coorIdx) {
     mlir::Value currentCoor = currentValue[coorIdx];
-    mlir::Value newCoor = builder.createConvert(loc, currentCoor.getType(),
-                                                oneBasedIndices[coorIdx]);
+    mlir::Value newCoor = builder.createConvert(
+        loc, currentCoor.getType(), oneBasedIndices[coorIdx + dim - 1]);
     mlir::Value update =
         builder.create<mlir::arith::SelectOp>(loc, cmp, newCoor, currentCoor);
     newIndices.push_back(update);
diff --git a/flang/test/HLFIR/simplify-hlfir-intrinsics-maxloc.fir b/flang/test/HLFIR/simplify-hlfir-intrinsics-maxloc.fir
index de631e0f208d2..49e3bb6936338 100644
--- a/flang/test/HLFIR/simplify-hlfir-intrinsics-maxloc.fir
+++ b/flang/test/HLFIR/simplify-hlfir-intrinsics-maxloc.fir
@@ -304,7 +304,7 @@ func.func @test_partial_var(%input: !fir.box<!fir.array<?x?x?xf32>>, %mask: !fir
 // CHECK:                 %[[VAL_51:.*]] = arith.andi %[[VAL_49]], %[[VAL_50]] : i1
 // CHECK:                 %[[VAL_52:.*]] = arith.ori %[[VAL_48]], %[[VAL_51]] : i1
 // CHECK:                 %[[VAL_53:.*]] = arith.ori %[[VAL_52]], %[[VAL_21]] : i1
-// CHECK:                 %[[VAL_54:.*]] = fir.convert %[[VAL_15]] : (index) -> i32
+// CHECK:                 %[[VAL_54:.*]] = fir.convert %[[VAL_18]] : (index) -> i32
 // CHECK:                 %[[VAL_55:.*]] = arith.select %[[VAL_53]], %[[VAL_54]], %[[VAL_19]] : i32
 // CHECK:                 %[[VAL_56:.*]] = arith.select %[[VAL_53]], %[[VAL_47]], %[[VAL_20]] : f32
 // CHECK:                 fir.result %[[VAL_55]], %[[VAL_56]], %[[VAL_2]] : i32, f32, i1
diff --git a/flang/test/HLFIR/simplify-hlfir-intrinsics-minloc.fir b/flang/test/HLFIR/simplify-hlfir-intrinsics-minloc.fir
index 161cb99486622..96dcebaf9a2cd 100644
--- a/flang/test/HLFIR/simplify-hlfir-intrinsics-minloc.fir
+++ b/flang/test/HLFIR/simplify-hlfir-intrinsics-minloc.fir
@@ -304,7 +304,7 @@ func.func @test_partial_var(%input: !fir.box<!fir.array<?x?x?xf32>>, %mask: !fir
 // CHECK:                 %[[VAL_51:.*]] = arith.andi %[[VAL_49]], %[[VAL_50]] : i1
 // CHECK:                 %[[VAL_52:.*]] = arith.ori %[[VAL_48]], %[[VAL_51]] : i1
 // CHECK:                 %[[VAL_53:.*]] = arith.ori %[[VAL_52]], %[[VAL_21]] : i1
-// CHECK:                 %[[VAL_54:.*]] = fir.convert %[[VAL_15]] : (index) -> i32
+// CHECK:                 %[[VAL_54:.*]] = fir.convert %[[VAL_18]] : (index) -> i32
 // CHECK:                 %[[VAL_55:.*]] = arith.select %[[VAL_53]], %[[VAL_54]], %[[VAL_19]] : i32
 // CHECK:                 %[[VAL_56:.*]] = arith.select %[[VAL_53]], %[[VAL_47]], %[[VAL_20]] : f32
 // CHECK:                 fir.result %[[VAL_55]], %[[VAL_56]], %[[VAL_2]] : i32, f32, i1

>From b2a283322d1f02e0fce20e8c901b536f9d303479 Mon Sep 17 00:00:00 2001
From: Slava Zakharin <szakharin at nvidia.com>
Date: Thu, 17 Apr 2025 12:23:08 -0700
Subject: [PATCH 3/3] Switched to virtual interface.

---
 .../Transforms/SimplifyHLFIRIntrinsics.cpp    | 290 ++++++++----------
 1 file changed, 122 insertions(+), 168 deletions(-)

diff --git a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
index 1ecd9a4294cb4..d660cf3845f9a 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/SimplifyHLFIRIntrinsics.cpp
@@ -173,14 +173,13 @@ class TransposeAsElementalConversion
   }
 };
 
-/// CRTP class for converting reduction-like operations into
+/// Base class for converting reduction-like operations into
 /// a reduction loop[-nest] optionally wrapped into hlfir.elemental.
 /// It is used to handle operations produced for ALL, ANY, COUNT,
 /// MAXLOC, MAXVAL, MINLOC, MINVAL, SUM intrinsics.
 ///
 /// All of these operations take an input array, and optional
 /// dim, mask arguments. ALL, ANY, COUNT do not have mask argument.
-template <typename T>
 class ReductionAsElementalConverter {
 public:
   ReductionAsElementalConverter(mlir::Operation *op,
@@ -188,17 +187,13 @@ class ReductionAsElementalConverter {
       : op{op}, rewriter{rewriter}, loc{op->getLoc()}, builder{rewriter, op} {
     assert(op->getNumResults() == 1);
   }
+  virtual ~ReductionAsElementalConverter() {}
 
   /// Do the actual conversion or return mlir::failure(),
   /// if conversion is not possible.
   mlir::LogicalResult convert();
 
 private:
-  /// Return an instance of the derived class that implements
-  /// the interface.
-  T &impl() { return *static_cast<T *>(this); }
-  const T &impl() const { return *static_cast<const T *>(this); }
-
   // Return fir.shape specifying the shape of the result
   // of a reduction with DIM=dimVal. The second return value
   // is the extent of the DIM dimension.
@@ -217,47 +212,35 @@ class ReductionAsElementalConverter {
                            mlir::ValueRange indices);
 
 protected:
-  // Methods below must be implemented by the derived type.
-
   /// Return the input array.
-  mlir::Value getSource() const {
-    llvm_unreachable("derived type must provide getSource()");
-  }
+  virtual mlir::Value getSource() const = 0;
 
   /// Return DIM or nullptr, if it is not present.
-  mlir::Value getDim() const {
-    llvm_unreachable("derived type must provide getDim()");
-  }
+  virtual mlir::Value getDim() const = 0;
 
   /// Return MASK or nullptr, if it is not present.
-  mlir::Value getMask() const {
-    llvm_unreachable("derived type must provide getMask()");
-  }
+  virtual mlir::Value getMask() const { return nullptr; }
 
   /// Return FastMathFlags attached to the operation
   /// or arith::FastMathFlags::none, if the operation
   /// does not support FastMathFlags (e.g. ALL, ANY, COUNT).
-  mlir::arith::FastMathFlags getFastMath() const {
-    llvm_unreachable("derived type must provide getFastMath()");
+  virtual mlir::arith::FastMathFlags getFastMath() const {
+    return mlir::arith::FastMathFlags::none;
   }
 
   /// Generates initial values for the reduction values used
   /// by the reduction loop. In general, there is a single
   /// loop-carried reduction value (e.g. for SUM), but, for example,
   /// MAXLOC/MINLOC implementation uses multiple reductions.
-  llvm::SmallVector<mlir::Value> genReductionInitValues() {
-    llvm_unreachable("derived type must provide genReductionInitValues()");
-  }
+  virtual llvm::SmallVector<mlir::Value> genReductionInitValues() = 0;
 
   /// Perform reduction(s) update given a single input array's element
   /// identified by \p array and \p oneBasedIndices coordinates.
   /// \p currentValue specifies the current value(s) of the reduction(s)
   /// inside the reduction loop body.
-  llvm::SmallVector<mlir::Value>
+  virtual llvm::SmallVector<mlir::Value>
   reduceOneElement(const llvm::SmallVectorImpl<mlir::Value> &currentValue,
-                   hlfir::Entity array, mlir::ValueRange oneBasedIndices) {
-    llvm_unreachable("derived type must provide reduceOneElement()");
-  }
+                   hlfir::Entity array, mlir::ValueRange oneBasedIndices) = 0;
 
   /// Given reduction value(s) in \p reductionResults produced
   /// by the reduction loop, apply any required updates and return
@@ -265,18 +248,19 @@ class ReductionAsElementalConverter {
   /// (e.g. as the result yield of the wrapping hlfir.elemental).
   /// NOTE: if the reduction loop is wrapped in hlfir.elemental,
   /// the insertion point of any generated code is inside hlfir.elemental.
-  hlfir::Entity
+  virtual hlfir::Entity
   genFinalResult(const llvm::SmallVectorImpl<mlir::Value> &reductionResults) {
-    llvm_unreachable("derived type must provide genFinalResult()");
+    assert(reductionResults.size() == 1 &&
+           "default implementation of genFinalResult expect a single reduction "
+           "value");
+    return hlfir::Entity{reductionResults[0]};
   }
 
-  // Methods below may be shadowed by the derived type.
-
   /// Return mlir::success(), if the operation can be converted.
   /// The default implementation always returns mlir::success().
-  /// The derived type may shadow the default implementation
+  /// The derived type may override the default implementation
   /// with its own definition.
-  mlir::LogicalResult isConvertible() const { return mlir::success(); }
+  virtual mlir::LogicalResult isConvertible() const { return mlir::success(); }
 
   // Default implementation of isTotalReduction() just checks
   // if the result of the operation is a scalar.
@@ -286,25 +270,22 @@ class ReductionAsElementalConverter {
   // operation with a single reduction loop across the DIM dimension.
   //
   // MAXLOC/MINLOC must override this.
-  bool isTotalReduction() const { return getResultRank() == 0; }
+  virtual bool isTotalReduction() const { return getResultRank() == 0; }
 
   // Return true, if the reduction loop[-nest] may be unordered.
   // In general, FP reductions may only be unordered when
   // FastMathFlags::reassoc transformations are allowed.
   //
   // Some dervied types may need to override this.
-  bool isUnordered() const {
+  virtual bool isUnordered() const {
     mlir::Type elemType = getSourceElementType();
     if (mlir::isa<mlir::IntegerType, fir::LogicalType, fir::CharacterType>(
             elemType))
       return true;
-    return static_cast<bool>(impl().getFastMath() &
+    return static_cast<bool>(getFastMath() &
                              mlir::arith::FastMathFlags::reassoc);
   }
 
-  // Methods below are utilities that are not supposed to be
-  // overridden by the derived type.
-
   /// Return 0, if DIM is not present or its values does not matter
   /// (for example, a reduction of 1D array does not care about
   /// the DIM value, assuming that it is a valid program).
@@ -313,11 +294,11 @@ class ReductionAsElementalConverter {
   /// Otherwise, return DIM constant value.
   mlir::FailureOr<int64_t> getConstDim() const {
     int64_t dimVal = 0;
-    if (!impl().isTotalReduction()) {
+    if (!isTotalReduction()) {
       // In case of partial reduction we should ignore the operations
       // with invalid DIM values. They may appear in dead code
       // after constant propagation.
-      auto constDim = fir::getIntIfConstant(impl().getDim());
+      auto constDim = fir::getIntIfConstant(getDim());
       if (!constDim)
         return rewriter.notifyMatchFailure(op, "Nonconstant DIM");
       dimVal = *constDim;
@@ -345,14 +326,14 @@ class ReductionAsElementalConverter {
   /// Return rank of the result.
   unsigned getResultRank() const { return getResultEntity().getRank(); }
 
-  /// Return the element type of the result.
+  /// Return the element type of the source.
   mlir::Type getSourceElementType() const {
-    return hlfir::getFortranElementType(impl().getSource().getType());
+    return hlfir::getFortranElementType(getSource().getType());
   }
 
   /// Return rank of the input array.
   unsigned getSourceRank() const {
-    return hlfir::Entity{impl().getSource()}.getRank();
+    return hlfir::Entity{getSource()}.getRank();
   }
 
   /// The reduction operation.
@@ -418,8 +399,7 @@ genMinMaxComparison(mlir::Location loc, fir::FirOpBuilder &builder,
 /// Implementation of ReductionAsElementalConverter interface
 /// for MAXLOC/MINLOC.
 template <typename T>
-class MinMaxlocAsElementalConverter
-    : public ReductionAsElementalConverter<MinMaxlocAsElementalConverter<T>> {
+class MinMaxlocAsElementalConverter : public ReductionAsElementalConverter {
   static_assert(std::is_same_v<T, hlfir::MaxlocOp> ||
                 std::is_same_v<T, hlfir::MinlocOp>);
   static constexpr unsigned maxRank = Fortran::common::maxRank;
@@ -432,26 +412,26 @@ class MinMaxlocAsElementalConverter
   //     the mask is true.
   static constexpr unsigned maxNumReductions = Fortran::common::maxRank + 2;
   static constexpr bool isMax = std::is_same_v<T, hlfir::MaxlocOp>;
-  using Base =
-      typename MinMaxlocAsElementalConverter<T>::ReductionAsElementalConverter;
+  using Base = ReductionAsElementalConverter;
 
 public:
   MinMaxlocAsElementalConverter(T op, mlir::PatternRewriter &rewriter)
       : Base{op.getOperation(), rewriter} {}
 
-  mlir::Value getSource() const { return getOp().getArray(); }
-  mlir::Value getDim() const { return getOp().getDim(); }
-  mlir::Value getMask() const { return getOp().getMask(); }
-  mlir::arith::FastMathFlags getFastMath() const {
+private:
+  virtual mlir::Value getSource() const final { return getOp().getArray(); }
+  virtual mlir::Value getDim() const final { return getOp().getDim(); }
+  virtual mlir::Value getMask() const final { return getOp().getMask(); }
+  virtual mlir::arith::FastMathFlags getFastMath() const final {
     return getOp().getFastmath();
   }
 
-  mlir::LogicalResult isConvertible() const {
+  virtual mlir::LogicalResult isConvertible() const final {
     if (getOp().getBack())
-      return this->rewriter.notifyMatchFailure(
+      return rewriter.notifyMatchFailure(
           getOp(), "BACK is not supported for MINLOC/MAXLOC inlining");
-    if (mlir::isa<fir::CharacterType>(this->getSourceElementType()))
-      return this->rewriter.notifyMatchFailure(
+    if (mlir::isa<fir::CharacterType>(getSourceElementType()))
+      return rewriter.notifyMatchFailure(
           getOp(),
           "CHARACTER type is not supported for MINLOC/MAXLOC inlining");
     return mlir::success();
@@ -460,22 +440,22 @@ class MinMaxlocAsElementalConverter
   // If the result is scalar, then DIM does not matter,
   // and this is a total reduction.
   // If DIM is not present, this is a total reduction.
-  bool isTotalReduction() const {
-    return this->getResultRank() == 0 || !getDim();
+  virtual bool isTotalReduction() const final {
+    return getResultRank() == 0 || !getDim();
   }
 
-  llvm::SmallVector<mlir::Value, maxNumReductions> genReductionInitValues();
-  llvm::SmallVector<mlir::Value, maxNumReductions>
+  virtual llvm::SmallVector<mlir::Value> genReductionInitValues() final;
+  virtual llvm::SmallVector<mlir::Value>
   reduceOneElement(const llvm::SmallVectorImpl<mlir::Value> &currentValue,
-                   hlfir::Entity array, mlir::ValueRange oneBasedIndices);
-  hlfir::Entity
-  genFinalResult(const llvm::SmallVectorImpl<mlir::Value> &reductionResults);
+                   hlfir::Entity array, mlir::ValueRange oneBasedIndices) final;
+  virtual hlfir::Entity genFinalResult(
+      const llvm::SmallVectorImpl<mlir::Value> &reductionResults) final;
 
 private:
-  T getOp() const { return mlir::cast<T>(this->op); }
+  T getOp() const { return mlir::cast<T>(op); }
 
   unsigned getNumCoors() const {
-    return isTotalReduction() ? this->getSourceRank() : 1;
+    return isTotalReduction() ? getSourceRank() : 1;
   }
 
   void
@@ -498,20 +478,17 @@ class MinMaxlocAsElementalConverter
 };
 
 template <typename T>
-llvm::SmallVector<mlir::Value,
-                  MinMaxlocAsElementalConverter<T>::maxNumReductions>
+llvm::SmallVector<mlir::Value>
 MinMaxlocAsElementalConverter<T>::genReductionInitValues() {
-  fir::FirOpBuilder &builder = this->builder;
-  mlir::Location loc = this->loc;
   // Initial value for the coordinate(s) is zero.
   mlir::Value zeroCoor =
-      fir::factory::createZeroValue(builder, loc, this->getResultElementType());
+      fir::factory::createZeroValue(builder, loc, getResultElementType());
   llvm::SmallVector<mlir::Value, maxNumReductions> result(getNumCoors(),
                                                           zeroCoor);
 
   // Initial value for the MIN/MAX value.
   mlir::Value minMaxInit =
-      genMinMaxInitValue<isMax>(loc, builder, this->getSourceElementType());
+      genMinMaxInitValue<isMax>(loc, builder, getSourceElementType());
   result.push_back(minMaxInit);
 
   // Initial value for isFirst predicate. It is switched to false,
@@ -524,14 +501,11 @@ MinMaxlocAsElementalConverter<T>::genReductionInitValues() {
 }
 
 template <typename T>
-llvm::SmallVector<mlir::Value,
-                  MinMaxlocAsElementalConverter<T>::maxNumReductions>
+llvm::SmallVector<mlir::Value>
 MinMaxlocAsElementalConverter<T>::reduceOneElement(
     const llvm::SmallVectorImpl<mlir::Value> &currentValue, hlfir::Entity array,
     mlir::ValueRange oneBasedIndices) {
   checkReductions(currentValue);
-  fir::FirOpBuilder &builder = this->builder;
-  mlir::Location loc = this->loc;
   hlfir::Entity elementValue =
       hlfir::loadElementAt(loc, builder, array, oneBasedIndices);
   mlir::Value cmp = genMinMaxComparison<isMax>(loc, builder, elementValue,
@@ -543,7 +517,7 @@ MinMaxlocAsElementalConverter<T>::reduceOneElement(
   llvm::SmallVector<mlir::Value, maxNumReductions> newIndices;
   int64_t dim = 1;
   if (!isTotalReduction()) {
-    auto dimVal = this->getConstDim();
+    auto dimVal = getConstDim();
     assert(mlir::succeeded(dimVal) &&
            "partial MINLOC/MAXLOC reduction with invalid DIM");
     dim = *dimVal;
@@ -583,8 +557,6 @@ hlfir::Entity MinMaxlocAsElementalConverter<T>::genFinalResult(
   //     - The result is an array of rank RANK(ARRAY)-1.
   checkReductions(reductionResults);
 
-  fir::FirOpBuilder &builder = this->builder;
-  mlir::Location loc = this->loc;
   // We need to adjust the one-based indices to real array indices.
   // The adjustment must only be done, if there was an actual update
   // of the coordinates in the reduction loop. For this check we only
@@ -609,14 +581,14 @@ hlfir::Entity MinMaxlocAsElementalConverter<T>::genFinalResult(
 
   // For partial reductions, the final result of the reduction
   // loop is just a scalar - the coordinate within DIM dimension.
-  if (this->getResultRank() == 0 || !isTotalReduction()) {
+  if (getResultRank() == 0 || !isTotalReduction()) {
     // The result is a scalar, so just return the scalar.
     assert(getNumCoors() == 1 &&
            "unpexpected number of coordinates for scalar result");
 
     int64_t dim = 1;
     if (!isTotalReduction()) {
-      auto dimVal = this->getConstDim();
+      auto dimVal = getConstDim();
       assert(mlir::succeeded(dimVal) &&
              "partial MINLOC/MAXLOC reduction with invalid DIM");
       dim = *dimVal;
@@ -627,14 +599,14 @@ hlfir::Entity MinMaxlocAsElementalConverter<T>::genFinalResult(
   }
   // This is a total reduction, and there is no wrapping hlfir.elemental.
   // We have to pack the reduced coordinates into a rank-one array.
-  unsigned rank = this->getSourceRank();
+  unsigned rank = getSourceRank();
   // TODO: in order to avoid introducing new memory effects
   // we should not use a temporary in memory.
   // We can use hlfir.elemental with a switch to pack all the coordinates
   // into an array expression, or we can have a dedicated HLFIR operation
   // for this.
   mlir::Value tempArray = builder.createTemporary(
-      loc, fir::SequenceType::get(rank, this->getResultElementType()));
+      loc, fir::SequenceType::get(rank, getResultElementType()));
   llvm::SmallVector<mlir::Value, maxRank> arrayLbounds =
       hlfir::genLBounds(loc, builder, hlfir::Entity(getSource()));
   for (unsigned i = 0; i < rank; ++i) {
@@ -650,32 +622,24 @@ hlfir::Entity MinMaxlocAsElementalConverter<T>::genFinalResult(
 }
 
 /// Base class for numeric reductions like MAXVAl, MINVAL, SUM.
-template <typename OpT, typename ConverterT>
+template <typename OpT>
 class NumericReductionAsElementalConverterBase
-    : public ReductionAsElementalConverter<ConverterT> {
-  using Base = typename NumericReductionAsElementalConverterBase<
-      OpT, ConverterT>::ReductionAsElementalConverter;
+    : public ReductionAsElementalConverter {
+  using Base = ReductionAsElementalConverter;
 
-public:
+protected:
   NumericReductionAsElementalConverterBase(OpT op,
                                            mlir::PatternRewriter &rewriter)
       : Base{op.getOperation(), rewriter} {}
 
-  mlir::Value getSource() const { return getOp().getArray(); }
-  mlir::Value getDim() const { return getOp().getDim(); }
-  mlir::Value getMask() const { return getOp().getMask(); }
-  mlir::arith::FastMathFlags getFastMath() const {
+  virtual mlir::Value getSource() const final { return getOp().getArray(); }
+  virtual mlir::Value getDim() const final { return getOp().getDim(); }
+  virtual mlir::Value getMask() const final { return getOp().getMask(); }
+  virtual mlir::arith::FastMathFlags getFastMath() const final {
     return getOp().getFastmath();
   }
 
-  hlfir::Entity
-  genFinalResult(const llvm::SmallVectorImpl<mlir::Value> &reductionResults) {
-    checkReductions(reductionResults);
-    return hlfir::Entity{reductionResults[0]};
-  }
-
-protected:
-  OpT getOp() const { return mlir::cast<OpT>(this->op); }
+  OpT getOp() const { return mlir::cast<OpT>(op); }
 
   void checkReductions(const llvm::SmallVectorImpl<mlir::Value> &reductions) {
     assert(reductions.size() == 1 && "reduction must produce single value");
@@ -685,19 +649,18 @@ class NumericReductionAsElementalConverterBase
 /// Reduction converter for MAXMAL/MINVAL.
 template <typename T>
 class MinMaxvalAsElementalConverter
-    : public NumericReductionAsElementalConverterBase<
-          T, MinMaxvalAsElementalConverter<T>> {
+    : public NumericReductionAsElementalConverterBase<T> {
   static_assert(std::is_same_v<T, hlfir::MaxvalOp> ||
                 std::is_same_v<T, hlfir::MinvalOp>);
   static constexpr bool isMax = std::is_same_v<T, hlfir::MaxvalOp>;
-  using Base = typename MinMaxvalAsElementalConverter<
-      T>::NumericReductionAsElementalConverterBase;
+  using Base = NumericReductionAsElementalConverterBase<T>;
 
 public:
   MinMaxvalAsElementalConverter(T op, mlir::PatternRewriter &rewriter)
       : Base{op, rewriter} {}
 
-  mlir::LogicalResult isConvertible() const {
+private:
+  virtual mlir::LogicalResult isConvertible() const final {
     if (mlir::isa<fir::CharacterType>(this->getSourceElementType()))
       return this->rewriter.notifyMatchFailure(
           this->getOp(),
@@ -705,13 +668,14 @@ class MinMaxvalAsElementalConverter
     return mlir::success();
   }
 
-  llvm::SmallVector<mlir::Value, 1> genReductionInitValues() {
+  virtual llvm::SmallVector<mlir::Value> genReductionInitValues() final {
     return {genMinMaxInitValue<isMax>(this->loc, this->builder,
                                       this->getResultElementType())};
   }
-  llvm::SmallVector<mlir::Value, 1>
+  virtual llvm::SmallVector<mlir::Value>
   reduceOneElement(const llvm::SmallVectorImpl<mlir::Value> &currentValue,
-                   hlfir::Entity array, mlir::ValueRange oneBasedIndices) {
+                   hlfir::Entity array,
+                   mlir::ValueRange oneBasedIndices) final {
     this->checkReductions(currentValue);
     fir::FirOpBuilder &builder = this->builder;
     mlir::Location loc = this->loc;
@@ -726,22 +690,22 @@ class MinMaxvalAsElementalConverter
 
 /// Reduction converter for SUM.
 class SumAsElementalConverter
-    : public NumericReductionAsElementalConverterBase<hlfir::SumOp,
-                                                      SumAsElementalConverter> {
-  using Base = typename SumAsElementalConverter::
-      NumericReductionAsElementalConverterBase;
+    : public NumericReductionAsElementalConverterBase<hlfir::SumOp> {
+  using Base = NumericReductionAsElementalConverterBase;
 
 public:
   SumAsElementalConverter(hlfir::SumOp op, mlir::PatternRewriter &rewriter)
       : Base{op, rewriter} {}
 
-  llvm::SmallVector<mlir::Value, 1> genReductionInitValues() {
+private:
+  virtual llvm::SmallVector<mlir::Value> genReductionInitValues() final {
     return {
         fir::factory::createZeroValue(builder, loc, getResultElementType())};
   }
-  llvm::SmallVector<mlir::Value, 1>
+  virtual llvm::SmallVector<mlir::Value>
   reduceOneElement(const llvm::SmallVectorImpl<mlir::Value> &currentValue,
-                   hlfir::Entity array, mlir::ValueRange oneBasedIndices) {
+                   hlfir::Entity array,
+                   mlir::ValueRange oneBasedIndices) final {
     checkReductions(currentValue);
     hlfir::Entity elementValue =
         hlfir::loadElementAt(loc, builder, array, oneBasedIndices);
@@ -751,39 +715,34 @@ class SumAsElementalConverter
     return {genScalarAdd(currentValue[0], elementValue)};
   }
 
-private:
   // Generate scalar addition of the two values (of the same data type).
   mlir::Value genScalarAdd(mlir::Value value1, mlir::Value value2);
 };
 
 /// Base class for logical reductions like ALL, ANY, COUNT.
 /// They do not have MASK and FastMathFlags.
-template <typename OpT, typename ConverterT>
+template <typename OpT>
 class LogicalReductionAsElementalConverterBase
-    : public ReductionAsElementalConverter<ConverterT> {
-  using Base = typename LogicalReductionAsElementalConverterBase<
-      OpT, ConverterT>::ReductionAsElementalConverter;
+    : public ReductionAsElementalConverter {
+  using Base = ReductionAsElementalConverter;
 
 public:
   LogicalReductionAsElementalConverterBase(OpT op,
                                            mlir::PatternRewriter &rewriter)
       : Base{op.getOperation(), rewriter} {}
 
-  OpT getOp() const { return mlir::cast<OpT>(this->op); }
+protected:
+  OpT getOp() const { return mlir::cast<OpT>(op); }
 
   void checkReductions(const llvm::SmallVectorImpl<mlir::Value> &reductions) {
     assert(reductions.size() == 1 && "reduction must produce single value");
   }
 
-  mlir::Value getSource() const { return getOp().getMask(); }
-  mlir::Value getDim() const { return getOp().getDim(); }
-  mlir::Value getMask() const { return nullptr; }
-  mlir::arith::FastMathFlags getFastMath() const {
-    return mlir::arith::FastMathFlags::none;
-  }
+  virtual mlir::Value getSource() const final { return getOp().getMask(); }
+  virtual mlir::Value getDim() const final { return getOp().getDim(); }
 
-  hlfir::Entity
-  genFinalResult(const llvm::SmallVectorImpl<mlir::Value> &reductionResults) {
+  virtual hlfir::Entity genFinalResult(
+      const llvm::SmallVectorImpl<mlir::Value> &reductionResults) override {
     checkReductions(reductionResults);
     return hlfir::Entity{reductionResults[0]};
   }
@@ -792,24 +751,24 @@ class LogicalReductionAsElementalConverterBase
 /// Reduction converter for ALL/ANY.
 template <typename T>
 class AllAnyAsElementalConverter
-    : public LogicalReductionAsElementalConverterBase<
-          T, AllAnyAsElementalConverter<T>> {
+    : public LogicalReductionAsElementalConverterBase<T> {
   static_assert(std::is_same_v<T, hlfir::AllOp> ||
                 std::is_same_v<T, hlfir::AnyOp>);
   static constexpr bool isAll = std::is_same_v<T, hlfir::AllOp>;
-  using Base = typename AllAnyAsElementalConverter<
-      T>::LogicalReductionAsElementalConverterBase;
+  using Base = LogicalReductionAsElementalConverterBase<T>;
 
 public:
   AllAnyAsElementalConverter(T op, mlir::PatternRewriter &rewriter)
       : Base{op, rewriter} {}
 
-  llvm::SmallVector<mlir::Value, 1> genReductionInitValues() {
+private:
+  virtual llvm::SmallVector<mlir::Value> genReductionInitValues() final {
     return {this->builder.createBool(this->loc, isAll ? true : false)};
   }
-  llvm::SmallVector<mlir::Value, 1>
+  virtual llvm::SmallVector<mlir::Value>
   reduceOneElement(const llvm::SmallVectorImpl<mlir::Value> &currentValue,
-                   hlfir::Entity array, mlir::ValueRange oneBasedIndices) {
+                   hlfir::Entity array,
+                   mlir::ValueRange oneBasedIndices) final {
     this->checkReductions(currentValue);
     fir::FirOpBuilder &builder = this->builder;
     mlir::Location loc = this->loc;
@@ -823,8 +782,8 @@ class AllAnyAsElementalConverter
       return {builder.create<mlir::arith::OrIOp>(loc, mask, currentValue[0])};
   }
 
-  hlfir::Entity
-  genFinalResult(const llvm::SmallVectorImpl<mlir::Value> &reductionValues) {
+  virtual hlfir::Entity genFinalResult(
+      const llvm::SmallVectorImpl<mlir::Value> &reductionValues) final {
     this->checkReductions(reductionValues);
     return hlfir::Entity{this->builder.createConvert(
         this->loc, this->getResultElementType(), reductionValues[0])};
@@ -833,22 +792,22 @@ class AllAnyAsElementalConverter
 
 /// Reduction converter for COUNT.
 class CountAsElementalConverter
-    : public LogicalReductionAsElementalConverterBase<
-          hlfir::CountOp, CountAsElementalConverter> {
-  using Base = typename CountAsElementalConverter::
-      LogicalReductionAsElementalConverterBase;
+    : public LogicalReductionAsElementalConverterBase<hlfir::CountOp> {
+  using Base = LogicalReductionAsElementalConverterBase<hlfir::CountOp>;
 
 public:
   CountAsElementalConverter(hlfir::CountOp op, mlir::PatternRewriter &rewriter)
       : Base{op, rewriter} {}
 
-  llvm::SmallVector<mlir::Value, 1> genReductionInitValues() {
+private:
+  virtual llvm::SmallVector<mlir::Value> genReductionInitValues() final {
     return {
         fir::factory::createZeroValue(builder, loc, getResultElementType())};
   }
-  llvm::SmallVector<mlir::Value, 1>
+  virtual llvm::SmallVector<mlir::Value>
   reduceOneElement(const llvm::SmallVectorImpl<mlir::Value> &currentValue,
-                   hlfir::Entity array, mlir::ValueRange oneBasedIndices) {
+                   hlfir::Entity array,
+                   mlir::ValueRange oneBasedIndices) final {
     checkReductions(currentValue);
     hlfir::Entity elementValue =
         hlfir::loadElementAt(loc, builder, array, oneBasedIndices);
@@ -863,22 +822,21 @@ class CountAsElementalConverter
   }
 };
 
-template <typename T>
-mlir::LogicalResult ReductionAsElementalConverter<T>::convert() {
-  mlir::LogicalResult canConvert(impl().isConvertible());
+mlir::LogicalResult ReductionAsElementalConverter::convert() {
+  mlir::LogicalResult canConvert(isConvertible());
 
   if (mlir::failed(canConvert))
     return canConvert;
 
-  hlfir::Entity array = hlfir::Entity{impl().getSource()};
-  bool isTotalReduction = impl().isTotalReduction();
-  auto dimVal = impl().getConstDim();
+  hlfir::Entity array = hlfir::Entity{getSource()};
+  bool isTotalReduce = isTotalReduction();
+  auto dimVal = getConstDim();
   if (mlir::failed(dimVal))
     return dimVal;
-  mlir::Value mask = impl().getMask();
+  mlir::Value mask = getMask();
   mlir::Value resultShape, dimExtent;
   llvm::SmallVector<mlir::Value> arrayExtents;
-  if (isTotalReduction)
+  if (isTotalReduce)
     arrayExtents = hlfir::genExtentsVector(loc, builder, array);
   else
     std::tie(resultShape, dimExtent) =
@@ -906,10 +864,10 @@ mlir::LogicalResult ReductionAsElementalConverter<T>::convert() {
 
     // Initial value for the reduction.
     llvm::SmallVector<mlir::Value, 1> reductionInitValues =
-        impl().genReductionInitValues();
+        genReductionInitValues();
 
     llvm::SmallVector<mlir::Value> extents;
-    if (isTotalReduction)
+    if (isTotalReduce)
       extents = arrayExtents;
     else
       extents.push_back(
@@ -923,7 +881,7 @@ mlir::LogicalResult ReductionAsElementalConverter<T>::convert() {
       // The initial reduction value in the innermost loop
       // is passed via reductionArgs[0].
       llvm::SmallVector<mlir::Value> indices;
-      if (isTotalReduction) {
+      if (isTotalReduce) {
         indices = oneBasedIndices;
       } else {
         indices = inputIndices;
@@ -954,8 +912,7 @@ mlir::LogicalResult ReductionAsElementalConverter<T>::convert() {
         // In the 'then' block do the actual addition.
         builder.setInsertionPointToStart(&ifOp.getThenRegion().front());
       }
-      reductionValues =
-          impl().reduceOneElement(reductionValues, array, indices);
+      reductionValues = reduceOneElement(reductionValues, array, indices);
       if (ifOp) {
         builder.create<fir::ResultOp>(loc, reductionValues);
         builder.setInsertionPointAfter(ifOp);
@@ -968,20 +925,19 @@ mlir::LogicalResult ReductionAsElementalConverter<T>::convert() {
     llvm::SmallVector<mlir::Value, 1> reductionFinalValues =
         hlfir::genLoopNestWithReductions(
             loc, builder, extents, reductionInitValues, genBody, isUnordered());
-    return impl().genFinalResult(reductionFinalValues);
+    return genFinalResult(reductionFinalValues);
   };
 
-  if (isTotalReduction) {
+  if (isTotalReduce) {
     hlfir::Entity result = genKernel(loc, builder, mlir::ValueRange{});
     rewriter.replaceOp(op, result);
     return mlir::success();
   }
 
-  hlfir::ElementalOp elementalOp =
-      hlfir::genElementalOp(loc, builder, impl().getResultElementType(),
-                            resultShape, /*typeParams=*/{}, genKernel,
-                            /*isUnordered=*/true, /*polymorphicMold=*/nullptr,
-                            impl().getResultType());
+  hlfir::ElementalOp elementalOp = hlfir::genElementalOp(
+      loc, builder, getResultElementType(), resultShape, /*typeParams=*/{},
+      genKernel,
+      /*isUnordered=*/true, /*polymorphicMold=*/nullptr, getResultType());
 
   // it wouldn't be safe to replace block arguments with a different
   // hlfir.expr type. Types can differ due to differing amounts of shape
@@ -992,9 +948,8 @@ mlir::LogicalResult ReductionAsElementalConverter<T>::convert() {
   return mlir::success();
 }
 
-template <typename T>
 std::tuple<mlir::Value, mlir::Value>
-ReductionAsElementalConverter<T>::genResultShapeForPartialReduction(
+ReductionAsElementalConverter::genResultShapeForPartialReduction(
     hlfir::Entity array, int64_t dimVal) {
   llvm::SmallVector<mlir::Value> inExtents =
       hlfir::genExtentsVector(loc, builder, array);
@@ -1021,8 +976,7 @@ mlir::Value SumAsElementalConverter::genScalarAdd(mlir::Value value1,
   llvm_unreachable("unsupported SUM reduction type");
 }
 
-template <typename T>
-mlir::Value ReductionAsElementalConverter<T>::genMaskValue(
+mlir::Value ReductionAsElementalConverter::genMaskValue(
     mlir::Value mask, mlir::Value isPresentPred, mlir::ValueRange indices) {
   mlir::OpBuilder::InsertionGuard guard(builder);
   fir::IfOp ifOp;