[flang-commits] [flang] 8d672c0 - [flang] Implement IPARITY, PARITY, and FINDLOC reductions

Tue Apr 20 12:27:08 PDT 2021

Author: peter klausler
Date: 2021-04-20T12:25:42-07:00
New Revision: 8d672c0b3e70b060e215cbd13536a10b50f6fab2

URL: https://github.com/llvm/llvm-project/commit/8d672c0b3e70b060e215cbd13536a10b50f6fab2
DIFF: https://github.com/llvm/llvm-project/commit/8d672c0b3e70b060e215cbd13536a10b50f6fab2.diff

LOG: [flang] Implement IPARITY, PARITY, and FINDLOC reductions

Define APIs for, and implement, these three more recently-introduced
standard reduction transformational intrinsic functions to the runtime.

Differential Revision: https://reviews.llvm.org/D100863

Added: 
    

Modified: 
    flang/runtime/reduction.cpp
    flang/runtime/reduction.h
    flang/runtime/terminator.h
    flang/runtime/tools.h
    flang/unittests/RuntimeGTest/Reduction.cpp

Removed: 
    


################################################################################
diff  --git a/flang/runtime/reduction.cpp b/flang/runtime/reduction.cpp
index 1f4ed50251f9..50fbc5b6d6f6 100644

--- a/flang/runtime/reduction.cpp
+++ b/flang/runtime/reduction.cpp
@@ -6,9 +6,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-// Implements ALL, ANY, COUNT, MAXLOC, MAXVAL, MINLOC, MINVAL, PRODUCT, and SUM
-// for all required operand types and shapes and (for MAXLOC & MINLOC) kinds of
-// results.
+// Implements ALL, ANY, COUNT, FINDLOC, IPARITY, MAXLOC, MAXVAL, MINLOC, MINVAL,
+// PARITY, PRODUCT, and SUM for all required operand types and shapes and,
+// for FINDLOC, MAXLOC, & MINLOC, kinds of results.
 //
 // * Real and complex SUM reductions attempt to reduce floating-point
 //   cancellation on intermediate results by adding up partial sums
@@ -16,7 +16,7 @@
 // * Partial reductions (i.e., those with DIM= arguments that are not
 //   required to be 1 by the rank of the argument) return arrays that
 //   are dynamically allocated in a caller-supplied descriptor.
-// * Total reductions (i.e., no DIM= argument) with MAXLOC & MINLOC
+// * Total reductions (i.e., no DIM= argument) with FINDLOC, MAXLOC, & MINLOC
 //   return integer vectors of some kind, not scalars; a caller-supplied
 //   descriptor is used
 // * Character-valued reductions (MAXVAL & MINVAL) return arbitrary
@@ -46,8 +46,8 @@ namespace Fortran::runtime {
 // member function that copies a final result into its destination.
 
 // Total reduction of the array argument to a scalar (or to a vector in the
-// cases of MAXLOC & MINLOC).  These are the cases without DIM= or cases
-// where the argument has rank 1 and DIM=, if present, must be 1.
+// cases of FINDLOC, MAXLOC, & MINLOC).  These are the cases without DIM= or
+// cases where the argument has rank 1 and DIM=, if present, must be 1.
 template <typename TYPE, typename ACCUMULATOR>
 inline void DoTotalReduction(const Descriptor &x, int dim,
     const Descriptor *mask, ACCUMULATOR &accumulator, const char *intrinsic,
@@ -122,8 +122,7 @@ inline void GetExpandedSubscripts(SubscriptValue at[],
 
 template <typename TYPE, typename ACCUMULATOR>
 inline void ReduceDimToScalar(const Descriptor &x, int zeroBasedDim,
-    SubscriptValue subscripts[], TYPE *result) {
-  ACCUMULATOR accumulator{x};
+    SubscriptValue subscripts[], TYPE *result, ACCUMULATOR &accumulator) {
   SubscriptValue xAt[maxRank];
   GetExpandedSubscripts(xAt, x, zeroBasedDim, subscripts);
   const auto &dim{x.GetDimension(zeroBasedDim)};
@@ -143,8 +142,8 @@ inline void ReduceDimToScalar(const Descriptor &x, int zeroBasedDim,
 
 template <typename TYPE, typename ACCUMULATOR>
 inline void ReduceDimMaskToScalar(const Descriptor &x, int zeroBasedDim,
-    SubscriptValue subscripts[], const Descriptor &mask, TYPE *result) {
-  ACCUMULATOR accumulator{x};
+    SubscriptValue subscripts[], const Descriptor &mask, TYPE *result,
+    ACCUMULATOR &accumulator) {
   SubscriptValue xAt[maxRank], maskAt[maxRank];
   GetExpandedSubscripts(xAt, x, zeroBasedDim, subscripts);
   GetExpandedSubscripts(maskAt, mask, zeroBasedDim, subscripts);
@@ -201,7 +200,8 @@ static void CreatePartialReductionResult(Descriptor &result,
 
 template <typename ACCUMULATOR, TypeCategory CAT, int KIND>
 inline void PartialReduction(Descriptor &result, const Descriptor &x, int dim,
-    const Descriptor *mask, Terminator &terminator, const char *intrinsic) {
+    const Descriptor *mask, Terminator &terminator, const char *intrinsic,
+    ACCUMULATOR &accumulator) {
   CreatePartialReductionResult(
       result, x, dim, terminator, intrinsic, TypeCode{CAT, KIND});
   SubscriptValue at[maxRank];
@@ -213,13 +213,14 @@ inline void PartialReduction(Descriptor &result, const Descriptor &x, int dim,
     SubscriptValue maskAt[maxRank]; // contents unused
     if (mask->rank() > 0) {
       for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
+        accumulator.Reinitialize();
         ReduceDimMaskToScalar<CppType, ACCUMULATOR>(
-            x, dim - 1, at, *mask, result.Element<CppType>(at));
+            x, dim - 1, at, *mask, result.Element<CppType>(at), accumulator);
       }
       return;
     } else if (!IsLogicalElementTrue(*mask, maskAt)) {
       // scalar MASK=.FALSE.
-      ACCUMULATOR accumulator{x};
+      accumulator.Reinitialize();
       for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
         accumulator.GetResult(result.Element<CppType>(at));
       }
@@ -228,11 +229,54 @@ inline void PartialReduction(Descriptor &result, const Descriptor &x, int dim,
   }
   // No MASK= or scalar MASK=.TRUE.
   for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
+    accumulator.Reinitialize();
     ReduceDimToScalar<CppType, ACCUMULATOR>(
-        x, dim - 1, at, result.Element<CppType>(at));
+        x, dim - 1, at, result.Element<CppType>(at), accumulator);
   }
 }
 
+template <template <typename> class ACCUM>
+struct PartialIntegerReductionHelper {
+  template <int KIND> struct Functor {
+    static constexpr int Intermediate{
+        std::max(KIND, 4)}; // use at least "int" for intermediate results
+    void operator()(Descriptor &result, const Descriptor &x, int dim,
+        const Descriptor *mask, Terminator &terminator,
+        const char *intrinsic) const {
+      using Accumulator =
+          ACCUM<CppTypeFor<TypeCategory::Integer, Intermediate>>;
+      Accumulator accumulator{x};
+      PartialReduction<Accumulator, TypeCategory::Integer, KIND>(
+          result, x, dim, mask, terminator, intrinsic, accumulator);
+    }
+  };
+};
+
+template <template <typename> class INTEGER_ACCUM>
+inline void PartialIntegerReduction(Descriptor &result, const Descriptor &x,
+    int dim, int kind, const Descriptor *mask, const char *intrinsic,
+    Terminator &terminator) {
+  ApplyIntegerKind<
+      PartialIntegerReductionHelper<INTEGER_ACCUM>::template Functor, void>(
+      kind, terminator, result, x, dim, mask, terminator, intrinsic);
+}
+
+template <TypeCategory CAT, template <typename> class ACCUM>
+struct PartialFloatingReductionHelper {
+  template <int KIND> struct Functor {
+    static constexpr int Intermediate{
+        std::max(KIND, 8)}; // use at least "double" for intermediate results
+    void operator()(Descriptor &result, const Descriptor &x, int dim,
+        const Descriptor *mask, Terminator &terminator,
+        const char *intrinsic) const {
+      using Accumulator = ACCUM<CppTypeFor<TypeCategory::Real, Intermediate>>;
+      Accumulator accumulator{x};
+      PartialReduction<Accumulator, CAT, KIND>(
+          result, x, dim, mask, terminator, intrinsic, accumulator);
+    }
+  };
+};
+
 template <template <typename> class INTEGER_ACCUM,
     template <typename> class REAL_ACCUM,
     template <typename> class COMPLEX_ACCUM>
@@ -244,98 +288,24 @@ inline void TypedPartialNumericReduction(Descriptor &result,
   RUNTIME_CHECK(terminator, catKind.has_value());
   switch (catKind->first) {
   case TypeCategory::Integer:
-    switch (catKind->second) {
-    case 1:
-      PartialReduction<INTEGER_ACCUM<CppTypeFor<TypeCategory::Integer, 4>>,
-          TypeCategory::Integer, 1>(
-          result, x, dim, mask, terminator, intrinsic);
-      return;
-    case 2:
-      PartialReduction<INTEGER_ACCUM<CppTypeFor<TypeCategory::Integer, 4>>,
-          TypeCategory::Integer, 2>(
-          result, x, dim, mask, terminator, intrinsic);
-      return;
-    case 4:
-      PartialReduction<INTEGER_ACCUM<CppTypeFor<TypeCategory::Integer, 4>>,
-          TypeCategory::Integer, 4>(
-          result, x, dim, mask, terminator, intrinsic);
-      return;
-    case 8:
-      PartialReduction<INTEGER_ACCUM<CppTypeFor<TypeCategory::Integer, 8>>,
-          TypeCategory::Integer, 8>(
-          result, x, dim, mask, terminator, intrinsic);
-      return;
-#ifdef __SIZEOF_INT128__
-    case 16:
-      PartialReduction<INTEGER_ACCUM<CppTypeFor<TypeCategory::Integer, 16>>,
-          TypeCategory::Integer, 16>(
-          result, x, dim, mask, terminator, intrinsic);
-      return;
-#endif
-    }
+    PartialIntegerReduction<INTEGER_ACCUM>(
+        result, x, dim, catKind->second, mask, intrinsic, terminator);
     break;
   case TypeCategory::Real:
-    switch (catKind->second) {
-#if 0 // TODO
-    case 2:
-    case 3:
-#endif
-    case 4:
-      PartialReduction<REAL_ACCUM<CppTypeFor<TypeCategory::Real, 8>>,
-          TypeCategory::Real, 4>(result, x, dim, mask, terminator, intrinsic);
-      return;
-    case 8:
-      PartialReduction<REAL_ACCUM<CppTypeFor<TypeCategory::Real, 8>>,
-          TypeCategory::Real, 8>(result, x, dim, mask, terminator, intrinsic);
-      return;
-#if LONG_DOUBLE == 80
-    case 10:
-      PartialReduction<REAL_ACCUM<CppTypeFor<TypeCategory::Real, 10>>,
-          TypeCategory::Real, 10>(result, x, dim, mask, terminator, intrinsic);
-      return;
-#elif LONG_DOUBLE == 128
-    case 16:
-      PartialReduction<REAL_ACCUM<CppTypeFor<TypeCategory::Real, 16>>,
-          TypeCategory::Real, 16>(result, x, dim, mask, terminator, intrinsic);
-      return;
-#endif
-    }
+    ApplyFloatingPointKind<PartialFloatingReductionHelper<TypeCategory::Real,
+                               REAL_ACCUM>::template Functor,
+        void>(catKind->second, terminator, result, x, dim, mask, terminator,
+        intrinsic);
     break;
   case TypeCategory::Complex:
-    switch (catKind->second) {
-#if 0 // TODO
-    case 2:
-    case 3:
-#endif
-    case 4:
-      PartialReduction<COMPLEX_ACCUM<CppTypeFor<TypeCategory::Real, 8>>,
-          TypeCategory::Complex, 4>(
-          result, x, dim, mask, terminator, intrinsic);
-      return;
-    case 8:
-      PartialReduction<COMPLEX_ACCUM<CppTypeFor<TypeCategory::Real, 8>>,
-          TypeCategory::Complex, 8>(
-          result, x, dim, mask, terminator, intrinsic);
-      return;
-#if LONG_DOUBLE == 80
-    case 10:
-      PartialReduction<COMPLEX_ACCUM<CppTypeFor<TypeCategory::Real, 10>>,
-          TypeCategory::Complex, 10>(
-          result, x, dim, mask, terminator, intrinsic);
-      return;
-#elif LONG_DOUBLE == 128
-    case 16:
-      PartialReduction<COMPLEX_ACCUM<CppTypeFor<TypeCategory::Real, 16>>,
-          TypeCategory::Complex, 16>(
-          result, x, dim, mask, terminator, intrinsic);
-      return;
-#endif
-    }
+    ApplyFloatingPointKind<PartialFloatingReductionHelper<TypeCategory::Complex,
+                               COMPLEX_ACCUM>::template Functor,
+        void>(catKind->second, terminator, result, x, dim, mask, terminator,
+        intrinsic);
     break;
   default:
-    break;
+    terminator.Crash("%s: invalid type code %d", intrinsic, x.type().raw());
   }
-  terminator.Crash("%s: invalid type code %d", intrinsic, x.type().raw());
 }
 
 // SUM()
@@ -343,6 +313,7 @@ inline void TypedPartialNumericReduction(Descriptor &result,
 template <typename INTERMEDIATE> class IntegerSumAccumulator {
 public:
   explicit IntegerSumAccumulator(const Descriptor &array) : array_{array} {}
+  void Reinitialize() { sum_ = 0; }
   template <typename A> void GetResult(A *p, int /*zeroBasedDim*/ = -1) const {
     *p = static_cast<A>(sum_);
   }
@@ -359,6 +330,7 @@ template <typename INTERMEDIATE> class IntegerSumAccumulator {
 template <typename INTERMEDIATE> class RealSumAccumulator {
 public:
   explicit RealSumAccumulator(const Descriptor &array) : array_{array} {}
+  void Reinitialize() { positives_ = negatives_ = inOrder_ = 0; }
   template <typename A> A Result() const {
     auto sum{static_cast<A>(positives_ + negatives_)};
     return std::isfinite(sum) ? sum : static_cast<A>(inOrder_);
@@ -390,6 +362,10 @@ template <typename INTERMEDIATE> class RealSumAccumulator {
 template <typename PART> class ComplexSumAccumulator {
 public:
   explicit ComplexSumAccumulator(const Descriptor &array) : array_{array} {}
+  void Reinitialize() {
+    reals_.Reinitialize();
+    imaginaries_.Reinitialize();
+  }
   template <typename A> void GetResult(A *p, int /*zeroBasedDim*/ = -1) const {
     using ResultPart = typename A::value_type;
     *p = {reals_.template Result<ResultPart>(),
@@ -505,6 +481,7 @@ template <typename INTERMEDIATE> class NonComplexProductAccumulator {
 public:
   explicit NonComplexProductAccumulator(const Descriptor &array)
       : array_{array} {}
+  void Reinitialize() { product_ = 1; }
   template <typename A> void GetResult(A *p, int /*zeroBasedDim*/ = -1) const {
     *p = static_cast<A>(product_);
   }
@@ -521,6 +498,7 @@ template <typename INTERMEDIATE> class NonComplexProductAccumulator {
 template <typename PART> class ComplexProductAccumulator {
 public:
   explicit ComplexProductAccumulator(const Descriptor &array) : array_{array} {}
+  void Reinitialize() { product_ = std::complex<PART>{1, 0}; }
   template <typename A> void GetResult(A *p, int /*zeroBasedDim*/ = -1) const {
     using ResultPart = typename A::value_type;
     *p = {static_cast<ResultPart>(product_.real()),
@@ -645,14 +623,77 @@ void RTNAME(ProductDim)(Descriptor &result, const Descriptor &x, int dim,
 }
 } // extern "C"
 
-// MAXLOC and MINLOC
+// IPARITY()
+
+template <typename INTERMEDIATE> class IntegerXorAccumulator {
+public:
+  explicit IntegerXorAccumulator(const Descriptor &array) : array_{array} {}
+  void Reinitialize() { xor_ = 0; }
+  template <typename A> void GetResult(A *p, int /*zeroBasedDim*/ = -1) const {
+    *p = static_cast<A>(xor_);
+  }
+  template <typename A> bool AccumulateAt(const SubscriptValue at[]) {
+    xor_ ^= *array_.Element<A>(at);
+    return true;
+  }
+
+private:
+  const Descriptor &array_;
+  INTERMEDIATE xor_{0};
+};
+
+extern "C" {
+CppTypeFor<TypeCategory::Integer, 1> RTNAME(IParity1)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return GetTotalReduction<TypeCategory::Integer, 1>(x, source, line, dim, mask,
+      IntegerXorAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x},
+      "IPARITY");
+}
+CppTypeFor<TypeCategory::Integer, 2> RTNAME(IParity2)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return GetTotalReduction<TypeCategory::Integer, 2>(x, source, line, dim, mask,
+      IntegerXorAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x},
+      "IPARITY");
+}
+CppTypeFor<TypeCategory::Integer, 4> RTNAME(IParity4)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return GetTotalReduction<TypeCategory::Integer, 4>(x, source, line, dim, mask,
+      IntegerXorAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x},
+      "IPARITY");
+}
+CppTypeFor<TypeCategory::Integer, 8> RTNAME(IParity8)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return GetTotalReduction<TypeCategory::Integer, 8>(x, source, line, dim, mask,
+      IntegerXorAccumulator<CppTypeFor<TypeCategory::Integer, 8>>{x},
+      "IPARITY");
+}
+#ifdef __SIZEOF_INT128__
+CppTypeFor<TypeCategory::Integer, 16> RTNAME(IParity16)(const Descriptor &x,
+    const char *source, int line, int dim, const Descriptor *mask) {
+  return GetTotalReduction<TypeCategory::Integer, 16>(x, source, line, dim,
+      mask, IntegerXorAccumulator<CppTypeFor<TypeCategory::Integer, 16>>{x},
+      "IPARITY");
+}
+#endif
+void RTNAME(IParityDim)(Descriptor &result, const Descriptor &x, int dim,
+    const char *source, int line, const Descriptor *mask) {
+  Terminator terminator{source, line};
+  auto catKind{x.type().GetCategoryAndKind()};
+  RUNTIME_CHECK(terminator,
+      catKind.has_value() && catKind->first == TypeCategory::Integer);
+  PartialIntegerReduction<IntegerXorAccumulator>(
+      result, x, dim, catKind->second, mask, "IPARITY", terminator);
+}
+}
+
+// MAXLOC & MINLOC
 
 template <typename T, bool IS_MAX, bool BACK> struct NumericCompare {
   using Type = T;
   explicit NumericCompare(std::size_t /*elemLen; ignored*/) {}
-  bool operator()(const T &value, const T &previous) {
-    if (BACK && value == previous) {
-      return true;
+  bool operator()(const T &value, const T &previous) const {
+    if (value == previous) {
+      return BACK;
     } else if constexpr (IS_MAX) {
       return value > previous;
     } else {
@@ -666,10 +707,10 @@ template <typename T, bool IS_MAX, bool BACK> class CharacterCompare {
   using Type = T;
   explicit CharacterCompare(std::size_t elemLen)
       : chars_{elemLen / sizeof(T)} {}
-  bool operator()(const T &value, const T &previous) {
+  bool operator()(const T &value, const T &previous) const {
     int cmp{CharacterScalarCompare<T>(&value, &previous, chars_, chars_)};
-    if (BACK && cmp == 0) {
-      return true;
+    if (cmp == 0) {
+      return BACK;
     } else if constexpr (IS_MAX) {
       return cmp > 0;
     } else {
@@ -686,10 +727,14 @@ template <typename COMPARE> class ExtremumLocAccumulator {
   using Type = typename COMPARE::Type;
   ExtremumLocAccumulator(const Descriptor &array, std::size_t chars = 0)
       : array_{array}, argRank_{array.rank()}, compare_{array.ElementBytes()} {
+    Reinitialize();
+  }
+  void Reinitialize() {
     // per standard: result indices are all zero if no data
     for (int j{0}; j < argRank_; ++j) {
       extremumLoc_[j] = 0;
     }
+    previous_ = nullptr;
   }
   int argRank() const { return argRank_; }
   template <typename A> void GetResult(A *p, int zeroBasedDim = -1) {
@@ -720,38 +765,23 @@ template <typename COMPARE> class ExtremumLocAccumulator {
   COMPARE compare_;
 };
 
-template <typename COMPARE, typename CPPTYPE>
-static void DoMaxOrMinLocHelper(const char *intrinsic, Descriptor &result,
+template <typename ACCUMULATOR> struct LocationResultHelper {
+  template <int KIND> struct Functor {
+    void operator()(ACCUMULATOR &accumulator, const Descriptor &result) const {
+      accumulator.GetResult(
+          result.OffsetElement<CppTypeFor<TypeCategory::Integer, KIND>>());
+    }
+  };
+};
+
+template <typename ACCUMULATOR, typename CPPTYPE>
+static void LocationHelper(const char *intrinsic, Descriptor &result,
     const Descriptor &x, int kind, const Descriptor *mask,
     Terminator &terminator) {
-  ExtremumLocAccumulator<COMPARE> accumulator{x};
+  ACCUMULATOR accumulator{x};
   DoTotalReduction<CPPTYPE>(x, 0, mask, accumulator, intrinsic, terminator);
-  switch (kind) {
-  case 1:
-    accumulator.GetResult(
-        result.OffsetElement<CppTypeFor<TypeCategory::Integer, 1>>());
-    break;
-  case 2:
-    accumulator.GetResult(
-        result.OffsetElement<CppTypeFor<TypeCategory::Integer, 2>>());
-    break;
-  case 4:
-    accumulator.GetResult(
-        result.OffsetElement<CppTypeFor<TypeCategory::Integer, 4>>());
-    break;
-  case 8:
-    accumulator.GetResult(
-        result.OffsetElement<CppTypeFor<TypeCategory::Integer, 8>>());
-    break;
-#ifdef __SIZEOF_INT128__
-  case 16:
-    accumulator.GetResult(
-        result.OffsetElement<CppTypeFor<TypeCategory::Integer, 16>>());
-    break;
-#endif
-  default:
-    terminator.Crash("%s: bad KIND=%d", intrinsic, kind);
-  }
+  ApplyIntegerKind<LocationResultHelper<ACCUMULATOR>::template Functor, void>(
+      kind, terminator, accumulator, result);
 }
 
 template <TypeCategory CAT, int KIND, bool IS_MAX,
@@ -762,14 +792,25 @@ inline void DoMaxOrMinLoc(const char *intrinsic, Descriptor &result,
   using CppType = CppTypeFor<CAT, KIND>;
   Terminator terminator{source, line};
   if (back) {
-    DoMaxOrMinLocHelper<COMPARE<CppType, IS_MAX, true>, CppType>(
-        intrinsic, result, x, kind, mask, terminator);
+    LocationHelper<ExtremumLocAccumulator<COMPARE<CppType, IS_MAX, true>>,
+        CppType>(intrinsic, result, x, kind, mask, terminator);
   } else {
-    DoMaxOrMinLocHelper<COMPARE<CppType, IS_MAX, false>, CppType>(
-        intrinsic, result, x, kind, mask, terminator);
+    LocationHelper<ExtremumLocAccumulator<COMPARE<CppType, IS_MAX, false>>,
+        CppType>(intrinsic, result, x, kind, mask, terminator);
   }
 }
 
+template <TypeCategory CAT, bool IS_MAX> struct TypedMaxOrMinLocHelper {
+  template <int KIND> struct Functor {
+    void operator()(const char *intrinsic, Descriptor &result,
+        const Descriptor &x, int kind, const char *source, int line,
+        const Descriptor *mask, bool back) const {
+      DoMaxOrMinLoc<TypeCategory::Integer, KIND, IS_MAX, NumericCompare>(
+          intrinsic, result, x, kind, source, line, mask, back);
+    }
+  };
+};
+
 template <bool IS_MAX>
 inline void TypedMaxOrMinLoc(const char *intrinsic, Descriptor &result,
     const Descriptor &x, int kind, const char *source, int line,
@@ -789,76 +830,27 @@ inline void TypedMaxOrMinLoc(const char *intrinsic, Descriptor &result,
   RUNTIME_CHECK(terminator, catKind.has_value());
   switch (catKind->first) {
   case TypeCategory::Integer:
-    switch (catKind->second) {
-    case 1:
-      DoMaxOrMinLoc<TypeCategory::Integer, 1, IS_MAX, NumericCompare>(
-          intrinsic, result, x, kind, source, line, mask, back);
-      return;
-    case 2:
-      DoMaxOrMinLoc<TypeCategory::Integer, 2, IS_MAX, NumericCompare>(
-          intrinsic, result, x, kind, source, line, mask, back);
-      return;
-    case 4:
-      DoMaxOrMinLoc<TypeCategory::Integer, 4, IS_MAX, NumericCompare>(
-          intrinsic, result, x, kind, source, line, mask, back);
-      return;
-    case 8:
-      DoMaxOrMinLoc<TypeCategory::Integer, 8, IS_MAX, NumericCompare>(
-          intrinsic, result, x, kind, source, line, mask, back);
-      return;
-#ifdef __SIZEOF_INT128__
-    case 16:
-      DoMaxOrMinLoc<TypeCategory::Integer, 16, IS_MAX, NumericCompare>(
-          intrinsic, result, x, kind, source, line, mask, back);
-      return;
-#endif
-    }
+    ApplyIntegerKind<
+        TypedMaxOrMinLocHelper<TypeCategory::Integer, IS_MAX>::template Functor,
+        void>(catKind->second, terminator, intrinsic, result, x, kind, source,
+        line, mask, back);
     break;
   case TypeCategory::Real:
-    switch (catKind->second) {
-    // TODO: REAL(2 & 3)
-    case 4:
-      DoMaxOrMinLoc<TypeCategory::Real, 4, IS_MAX, NumericCompare>(
-          intrinsic, result, x, kind, source, line, mask, back);
-      return;
-    case 8:
-      DoMaxOrMinLoc<TypeCategory::Real, 8, IS_MAX, NumericCompare>(
-          intrinsic, result, x, kind, source, line, mask, back);
-      return;
-#if LONG_DOUBLE == 80
-    case 10:
-      DoMaxOrMinLoc<TypeCategory::Real, 10, IS_MAX, NumericCompare>(
-          intrinsic, result, x, kind, source, line, mask, back);
-      return;
-#elif LONG_DOUBLE == 128
-    case 16:
-      DoMaxOrMinLoc<TypeCategory::Real, 16, IS_MAX, NumericCompare>(
-          intrinsic, result, x, kind, source, line, mask, back);
-      return;
-#endif
-    }
+    ApplyFloatingPointKind<
+        TypedMaxOrMinLocHelper<TypeCategory::Real, IS_MAX>::template Functor,
+        void>(catKind->second, terminator, intrinsic, result, x, kind, source,
+        line, mask, back);
     break;
   case TypeCategory::Character:
-    switch (catKind->second) {
-    case 1:
-      DoMaxOrMinLoc<TypeCategory::Character, 1, IS_MAX, CharacterCompare>(
-          intrinsic, result, x, kind, source, line, mask, back);
-      return;
-    case 2:
-      DoMaxOrMinLoc<TypeCategory::Character, 2, IS_MAX, CharacterCompare>(
-          intrinsic, result, x, kind, source, line, mask, back);
-      return;
-    case 4:
-      DoMaxOrMinLoc<TypeCategory::Character, 4, IS_MAX, CharacterCompare>(
-          intrinsic, result, x, kind, source, line, mask, back);
-      return;
-    }
+    ApplyCharacterKind<TypedMaxOrMinLocHelper<TypeCategory::Character,
+                           IS_MAX>::template Functor,
+        void>(catKind->second, terminator, intrinsic, result, x, kind, source,
+        line, mask, back);
     break;
   default:
-    break;
+    terminator.Crash(
+        "%s: Bad data type code (%d) for array", intrinsic, x.type().raw());
   }
-  terminator.Crash(
-      "%s: Bad data type code (%d) for array", intrinsic, x.type().raw());
 }
 
 extern "C" {
@@ -874,38 +866,28 @@ void RTNAME(Minloc)(Descriptor &result, const Descriptor &x, int kind,
 
 // MAXLOC/MINLOC with DIM=
 
+template <typename ACCUMULATOR> struct PartialLocationHelper {
+  template <int KIND> struct Functor {
+    void operator()(Descriptor &result, const Descriptor &x, int dim,
+        const Descriptor *mask, Terminator &terminator, const char *intrinsic,
+        ACCUMULATOR &accumulator) const {
+      PartialReduction<ACCUMULATOR, TypeCategory::Integer, KIND>(
+          result, x, dim, mask, terminator, intrinsic, accumulator);
+    }
+  };
+};
+
 template <TypeCategory CAT, int KIND, bool IS_MAX,
     template <typename, bool, bool> class COMPARE, bool BACK>
 static void DoPartialMaxOrMinLocDirection(const char *intrinsic,
     Descriptor &result, const Descriptor &x, int kind, int dim,
     const Descriptor *mask, Terminator &terminator) {
   using CppType = CppTypeFor<CAT, KIND>;
-  switch (kind) {
-  case 1:
-    PartialReduction<ExtremumLocAccumulator<COMPARE<CppType, IS_MAX, BACK>>,
-        TypeCategory::Integer, 1>(result, x, dim, mask, terminator, intrinsic);
-    break;
-  case 2:
-    PartialReduction<ExtremumLocAccumulator<COMPARE<CppType, IS_MAX, BACK>>,
-        TypeCategory::Integer, 2>(result, x, dim, mask, terminator, intrinsic);
-    break;
-  case 4:
-    PartialReduction<ExtremumLocAccumulator<COMPARE<CppType, IS_MAX, BACK>>,
-        TypeCategory::Integer, 4>(result, x, dim, mask, terminator, intrinsic);
-    break;
-  case 8:
-    PartialReduction<ExtremumLocAccumulator<COMPARE<CppType, IS_MAX, BACK>>,
-        TypeCategory::Integer, 8>(result, x, dim, mask, terminator, intrinsic);
-    break;
-#ifdef __SIZEOF_INT128__
-  case 16:
-    PartialReduction<ExtremumLocAccumulator<COMPARE<CppType, IS_MAX, BACK>>,
-        TypeCategory::Integer, 16>(result, x, dim, mask, terminator, intrinsic);
-    break;
-#endif
-  default:
-    terminator.Crash("%s: bad KIND=%d", intrinsic, kind);
-  }
+  using Accumulator = ExtremumLocAccumulator<COMPARE<CppType, IS_MAX, BACK>>;
+  Accumulator accumulator{x};
+  ApplyIntegerKind<PartialLocationHelper<Accumulator>::template Functor, void>(
+      kind, terminator, result, x, dim, mask, terminator, intrinsic,
+      accumulator);
 }
 
 template <TypeCategory CAT, int KIND, bool IS_MAX,
@@ -922,6 +904,19 @@ inline void DoPartialMaxOrMinLoc(const char *intrinsic, Descriptor &result,
   }
 }
 
+template <TypeCategory CAT, bool IS_MAX,
+    template <typename, bool, bool> class COMPARE>
+struct DoPartialMaxOrMinLocHelper {
+  template <int KIND> struct Functor {
+    void operator()(const char *intrinsic, Descriptor &result,
+        const Descriptor &x, int kind, int dim, const Descriptor *mask,
+        bool back, Terminator &terminator) const {
+      DoPartialMaxOrMinLoc<CAT, KIND, IS_MAX, COMPARE>(
+          intrinsic, result, x, kind, dim, mask, back, terminator);
+    }
+  };
+};
+
 template <bool IS_MAX>
 inline void TypedPartialMaxOrMinLoc(const char *intrinsic, Descriptor &result,
     const Descriptor &x, int kind, int dim, const char *source, int line,
@@ -932,79 +927,27 @@ inline void TypedPartialMaxOrMinLoc(const char *intrinsic, Descriptor &result,
   RUNTIME_CHECK(terminator, catKind.has_value());
   switch (catKind->first) {
   case TypeCategory::Integer:
-    switch (catKind->second) {
-    case 1:
-      DoPartialMaxOrMinLoc<TypeCategory::Integer, 1, IS_MAX, NumericCompare>(
-          intrinsic, result, x, kind, dim, mask, back, terminator);
-      return;
-    case 2:
-      DoPartialMaxOrMinLoc<TypeCategory::Integer, 2, IS_MAX, NumericCompare>(
-          intrinsic, result, x, kind, dim, mask, back, terminator);
-      return;
-    case 4:
-      DoPartialMaxOrMinLoc<TypeCategory::Integer, 4, IS_MAX, NumericCompare>(
-          intrinsic, result, x, kind, dim, mask, back, terminator);
-      return;
-    case 8:
-      DoPartialMaxOrMinLoc<TypeCategory::Integer, 8, IS_MAX, NumericCompare>(
-          intrinsic, result, x, kind, dim, mask, back, terminator);
-      return;
-#ifdef __SIZEOF_INT128__
-    case 16:
-      DoPartialMaxOrMinLoc<TypeCategory::Integer, 16, IS_MAX, NumericCompare>(
-          intrinsic, result, x, kind, dim, mask, back, terminator);
-      return;
-#endif
-    }
+    ApplyIntegerKind<DoPartialMaxOrMinLocHelper<TypeCategory::Integer, IS_MAX,
+                         NumericCompare>::template Functor,
+        void>(catKind->second, terminator, intrinsic, result, x, kind, dim,
+        mask, back, terminator);
     break;
   case TypeCategory::Real:
-    switch (catKind->second) {
-    // TODO: REAL(2 & 3)
-    case 4:
-      DoPartialMaxOrMinLoc<TypeCategory::Real, 4, IS_MAX, NumericCompare>(
-          intrinsic, result, x, kind, dim, mask, back, terminator);
-      return;
-    case 8:
-      DoPartialMaxOrMinLoc<TypeCategory::Real, 8, IS_MAX, NumericCompare>(
-          intrinsic, result, x, kind, dim, mask, back, terminator);
-      return;
-#if LONG_DOUBLE == 80
-    case 10:
-      DoPartialMaxOrMinLoc<TypeCategory::Real, 10, IS_MAX, NumericCompare>(
-          intrinsic, result, x, kind, dim, mask, back, terminator);
-      return;
-#elif LONG_DOUBLE == 128
-    case 16:
-      DoPartialMaxOrMinLoc<TypeCategory::Real, 16, IS_MAX, NumericCompare>(
-          intrinsic, result, x, kind, dim, mask, back, terminator);
-      return;
-#endif
-    }
+    ApplyFloatingPointKind<DoPartialMaxOrMinLocHelper<TypeCategory::Real,
+                               IS_MAX, NumericCompare>::template Functor,
+        void>(catKind->second, terminator, intrinsic, result, x, kind, dim,
+        mask, back, terminator);
     break;
   case TypeCategory::Character:
-    switch (catKind->second) {
-    case 1:
-      DoPartialMaxOrMinLoc<TypeCategory::Character, 1, IS_MAX,
-          CharacterCompare>(
-          intrinsic, result, x, kind, dim, mask, back, terminator);
-      return;
-    case 2:
-      DoPartialMaxOrMinLoc<TypeCategory::Character, 2, IS_MAX,
-          CharacterCompare>(
-          intrinsic, result, x, kind, dim, mask, back, terminator);
-      return;
-    case 4:
-      DoPartialMaxOrMinLoc<TypeCategory::Character, 4, IS_MAX,
-          CharacterCompare>(
-          intrinsic, result, x, kind, dim, mask, back, terminator);
-      return;
-    }
+    ApplyCharacterKind<DoPartialMaxOrMinLocHelper<TypeCategory::Character,
+                           IS_MAX, CharacterCompare>::template Functor,
+        void>(catKind->second, terminator, intrinsic, result, x, kind, dim,
+        mask, back, terminator);
     break;
   default:
-    break;
+    terminator.Crash(
+        "%s: Bad data type code (%d) for array", intrinsic, x.type().raw());
   }
-  terminator.Crash(
-      "%s: Bad data type code (%d) for array", intrinsic, x.type().raw());
 }
 
 extern "C" {
@@ -1020,6 +963,329 @@ void RTNAME(MinlocDim)(Descriptor &result, const Descriptor &x, int kind,
 }
 } // extern "C"
 
+// FINDLOC
+
+template <TypeCategory CAT1, int KIND1, TypeCategory CAT2, int KIND2>
+struct Equality {
+  using Type1 = CppTypeFor<CAT1, KIND1>;
+  using Type2 = CppTypeFor<CAT2, KIND2>;
+  bool operator()(const Descriptor &array, const SubscriptValue at[],
+      const Descriptor &target) const {
+    return *array.Element<Type1>(at) == *target.OffsetElement<Type2>();
+  }
+};
+
+template <int KIND1, int KIND2>
+struct Equality<TypeCategory::Complex, KIND1, TypeCategory::Complex, KIND2> {
+  using Type1 = CppTypeFor<TypeCategory::Complex, KIND1>;
+  using Type2 = CppTypeFor<TypeCategory::Complex, KIND2>;
+  bool operator()(const Descriptor &array, const SubscriptValue at[],
+      const Descriptor &target) const {
+    const Type1 &xz{*array.Element<Type1>(at)};
+    const Type2 &tz{*target.OffsetElement<Type2>()};
+    return xz.real() == tz.real() && xz.imag() == tz.imag();
+  }
+};
+
+template <int KIND1, TypeCategory CAT2, int KIND2>
+struct Equality<TypeCategory::Complex, KIND1, CAT2, KIND2> {
+  using Type1 = CppTypeFor<TypeCategory::Complex, KIND1>;
+  using Type2 = CppTypeFor<CAT2, KIND2>;
+  bool operator()(const Descriptor &array, const SubscriptValue at[],
+      const Descriptor &target) const {
+    const Type1 &z{*array.Element<Type1>(at)};
+    return z.imag() == 0 && z.real() == *target.OffsetElement<Type2>();
+  }
+};
+
+template <TypeCategory CAT1, int KIND1, int KIND2>
+struct Equality<CAT1, KIND1, TypeCategory::Complex, KIND2> {
+  using Type1 = CppTypeFor<CAT1, KIND1>;
+  using Type2 = CppTypeFor<TypeCategory::Complex, KIND2>;
+  bool operator()(const Descriptor &array, const SubscriptValue at[],
+      const Descriptor &target) const {
+    const Type2 &z{*target.OffsetElement<Type2>()};
+    return *array.Element<Type1>(at) == z.real() && z.imag() == 0;
+  }
+};
+
+template <int KIND> struct CharacterEquality {
+  using Type = CppTypeFor<TypeCategory::Character, KIND>;
+  bool operator()(const Descriptor &array, const SubscriptValue at[],
+      const Descriptor &target) const {
+    return CharacterScalarCompare<Type>(array.Element<Type>(at),
+               target.OffsetElement<Type>(),
+               array.ElementBytes() / static_cast<unsigned>(KIND),
+               target.ElementBytes() / static_cast<unsigned>(KIND)) == 0;
+  }
+};
+
+struct LogicalEquivalence {
+  bool operator()(const Descriptor &array, const SubscriptValue at[],
+      const Descriptor &target) const {
+    return IsLogicalElementTrue(array, at) ==
+        IsLogicalElementTrue(target, at /*ignored*/);
+  }
+};
+
+template <typename EQUALITY> class LocationAccumulator {
+public:
+  LocationAccumulator(
+      const Descriptor &array, const Descriptor &target, bool back)
+      : array_{array}, target_{target}, back_{back} {
+    Reinitialize();
+  }
+  void Reinitialize() {
+    // per standard: result indices are all zero if no data
+    for (int j{0}; j < rank_; ++j) {
+      location_[j] = 0;
+    }
+  }
+  template <typename A> void GetResult(A *p, int zeroBasedDim = -1) {
+    if (zeroBasedDim >= 0) {
+      *p = location_[zeroBasedDim];
+    } else {
+      for (int j{0}; j < rank_; ++j) {
+        p[j] = location_[j];
+      }
+    }
+  }
+  template <typename IGNORED> bool AccumulateAt(const SubscriptValue at[]) {
+    if (equality_(array_, at, target_)) {
+      for (int j{0}; j < rank_; ++j) {
+        location_[j] = at[j];
+      }
+      return back_;
+    } else {
+      return true;
+    }
+  }
+
+private:
+  const Descriptor &array_;
+  const Descriptor &target_;
+  const bool back_{false};
+  const int rank_{array_.rank()};
+  SubscriptValue location_[maxRank];
+  const EQUALITY equality_{};
+};
+
+template <TypeCategory XCAT, int XKIND, TypeCategory TARGET_CAT>
+struct TotalNumericFindlocHelper {
+  template <int TARGET_KIND> struct Functor {
+    void operator()(Descriptor &result, const Descriptor &x,
+        const Descriptor &target, int kind, int dim, const Descriptor *mask,
+        bool back, Terminator &terminator) const {
+      using Eq = Equality<XCAT, XKIND, TARGET_CAT, TARGET_KIND>;
+      using Accumulator = LocationAccumulator<Eq>;
+      Accumulator accumulator{x, target, back};
+      DoTotalReduction<void>(x, dim, mask, accumulator, "FINDLOC", terminator);
+      ApplyIntegerKind<LocationResultHelper<Accumulator>::template Functor,
+          void>(kind, terminator, accumulator, result);
+    }
+  };
+};
+
+template <TypeCategory CAT,
+    template <TypeCategory XCAT, int XKIND, TypeCategory TARGET_CAT>
+    class HELPER>
+struct NumericFindlocHelper {
+  template <int KIND> struct Functor {
+    void operator()(TypeCategory targetCat, int targetKind, Descriptor &result,
+        const Descriptor &x, const Descriptor &target, int kind, int dim,
+        const Descriptor *mask, bool back, Terminator &terminator) const {
+      switch (targetCat) {
+      case TypeCategory::Integer:
+        ApplyIntegerKind<
+            HELPER<CAT, KIND, TypeCategory::Integer>::template Functor, void>(
+            targetKind, terminator, result, x, target, kind, dim, mask, back,
+            terminator);
+        break;
+      case TypeCategory::Real:
+        ApplyFloatingPointKind<
+            HELPER<CAT, KIND, TypeCategory::Real>::template Functor, void>(
+            targetKind, terminator, result, x, target, kind, dim, mask, back,
+            terminator);
+        break;
+      case TypeCategory::Complex:
+        ApplyFloatingPointKind<
+            HELPER<CAT, KIND, TypeCategory::Complex>::template Functor, void>(
+            targetKind, terminator, result, x, target, kind, dim, mask, back,
+            terminator);
+        break;
+      default:
+        terminator.Crash(
+            "FINDLOC: bad target category %d for array category %d",
+            static_cast<int>(targetCat), static_cast<int>(CAT));
+      }
+    }
+  };
+};
+
+template <int KIND> struct CharacterFindlocHelper {
+  void operator()(Descriptor &result, const Descriptor &x,
+      const Descriptor &target, int kind, const Descriptor *mask, bool back,
+      Terminator &terminator) {
+    using Accumulator = LocationAccumulator<CharacterEquality<KIND>>;
+    Accumulator accumulator{x, target, back};
+    DoTotalReduction<void>(x, 0, mask, accumulator, "FINDLOC", terminator);
+    ApplyIntegerKind<LocationResultHelper<Accumulator>::template Functor, void>(
+        kind, terminator, accumulator, result);
+  }
+};
+
+static void LogicalFindlocHelper(Descriptor &result, const Descriptor &x,
+    const Descriptor &target, int kind, const Descriptor *mask, bool back,
+    Terminator &terminator) {
+  using Accumulator = LocationAccumulator<LogicalEquivalence>;
+  Accumulator accumulator{x, target, back};
+  DoTotalReduction<void>(x, 0, mask, accumulator, "FINDLOC", terminator);
+  ApplyIntegerKind<LocationResultHelper<Accumulator>::template Functor, void>(
+      kind, terminator, accumulator, result);
+}
+
+extern "C" {
+void RTNAME(Findloc)(Descriptor &result, const Descriptor &x,
+    const Descriptor &target, int kind, const char *source, int line,
+    const Descriptor *mask, bool back) {
+  int rank{x.rank()};
+  SubscriptValue extent[1]{rank};
+  result.Establish(TypeCategory::Integer, kind, nullptr, 1, extent,
+      CFI_attribute_allocatable);
+  result.GetDimension(0).SetBounds(1, extent[0]);
+  Terminator terminator{source, line};
+  if (int stat{result.Allocate()}) {
+    terminator.Crash(
+        "FINDLOC: could not allocate memory for result; STAT=%d", stat);
+  }
+  CheckIntegerKind(terminator, kind, "FINDLOC");
+  auto xType{x.type().GetCategoryAndKind()};
+  auto targetType{target.type().GetCategoryAndKind()};
+  RUNTIME_CHECK(terminator, xType.has_value() && targetType.has_value());
+  switch (xType->first) {
+  case TypeCategory::Integer:
+    ApplyIntegerKind<NumericFindlocHelper<TypeCategory::Integer,
+                         TotalNumericFindlocHelper>::template Functor,
+        void>(xType->second, terminator, targetType->first, targetType->second,
+        result, x, target, kind, 0, mask, back, terminator);
+    break;
+  case TypeCategory::Real:
+    ApplyFloatingPointKind<NumericFindlocHelper<TypeCategory::Real,
+                               TotalNumericFindlocHelper>::template Functor,
+        void>(xType->second, terminator, targetType->first, targetType->second,
+        result, x, target, kind, 0, mask, back, terminator);
+    break;
+  case TypeCategory::Complex:
+    ApplyFloatingPointKind<NumericFindlocHelper<TypeCategory::Complex,
+                               TotalNumericFindlocHelper>::template Functor,
+        void>(xType->second, terminator, targetType->first, targetType->second,
+        result, x, target, kind, 0, mask, back, terminator);
+    break;
+  case TypeCategory::Character:
+    RUNTIME_CHECK(terminator,
+        targetType->first == TypeCategory::Character &&
+            targetType->second == xType->second);
+    ApplyCharacterKind<CharacterFindlocHelper, void>(xType->second, terminator,
+        result, x, target, kind, mask, back, terminator);
+    break;
+  case TypeCategory::Logical:
+    RUNTIME_CHECK(terminator, targetType->first == TypeCategory::Logical);
+    LogicalFindlocHelper(result, x, target, kind, mask, back, terminator);
+    break;
+  default:
+    terminator.Crash(
+        "FINDLOC: Bad data type code (%d) for array", x.type().raw());
+  }
+}
+} // extern "C"
+
+// FINDLOC with DIM=
+
+template <TypeCategory XCAT, int XKIND, TypeCategory TARGET_CAT>
+struct PartialNumericFindlocHelper {
+  template <int TARGET_KIND> struct Functor {
+    void operator()(Descriptor &result, const Descriptor &x,
+        const Descriptor &target, int kind, int dim, const Descriptor *mask,
+        bool back, Terminator &terminator) const {
+      using Eq = Equality<XCAT, XKIND, TARGET_CAT, TARGET_KIND>;
+      using Accumulator = LocationAccumulator<Eq>;
+      Accumulator accumulator{x, target, back};
+      ApplyIntegerKind<PartialLocationHelper<Accumulator>::template Functor,
+          void>(kind, terminator, result, x, dim, mask, terminator, "FINDLOC",
+          accumulator);
+    }
+  };
+};
+
+template <int KIND> struct PartialCharacterFindlocHelper {
+  void operator()(Descriptor &result, const Descriptor &x,
+      const Descriptor &target, int kind, int dim, const Descriptor *mask,
+      bool back, Terminator &terminator) {
+    using Accumulator = LocationAccumulator<CharacterEquality<KIND>>;
+    Accumulator accumulator{x, target, back};
+    ApplyIntegerKind<PartialLocationHelper<Accumulator>::template Functor,
+        void>(kind, terminator, result, x, dim, mask, terminator, "FINDLOC",
+        accumulator);
+  }
+};
+
+static void PartialLogicalFindlocHelper(Descriptor &result, const Descriptor &x,
+    const Descriptor &target, int kind, int dim, const Descriptor *mask,
+    bool back, Terminator &terminator) {
+  using Accumulator = LocationAccumulator<LogicalEquivalence>;
+  Accumulator accumulator{x, target, back};
+  ApplyIntegerKind<PartialLocationHelper<Accumulator>::template Functor, void>(
+      kind, terminator, result, x, dim, mask, terminator, "FINDLOC",
+      accumulator);
+}
+
+extern "C" {
+void RTNAME(FindlocDim)(Descriptor &result, const Descriptor &x,
+    const Descriptor &target, int kind, int dim, const char *source, int line,
+    const Descriptor *mask, bool back) {
+  Terminator terminator{source, line};
+  CheckIntegerKind(terminator, kind, "FINDLOC");
+  auto xType{x.type().GetCategoryAndKind()};
+  auto targetType{target.type().GetCategoryAndKind()};
+  RUNTIME_CHECK(terminator, xType.has_value() && targetType.has_value());
+  switch (xType->first) {
+  case TypeCategory::Integer:
+    ApplyIntegerKind<NumericFindlocHelper<TypeCategory::Integer,
+                         PartialNumericFindlocHelper>::template Functor,
+        void>(xType->second, terminator, targetType->first, targetType->second,
+        result, x, target, kind, dim, mask, back, terminator);
+    break;
+  case TypeCategory::Real:
+    ApplyFloatingPointKind<NumericFindlocHelper<TypeCategory::Real,
+                               PartialNumericFindlocHelper>::template Functor,
+        void>(xType->second, terminator, targetType->first, targetType->second,
+        result, x, target, kind, dim, mask, back, terminator);
+    break;
+  case TypeCategory::Complex:
+    ApplyFloatingPointKind<NumericFindlocHelper<TypeCategory::Complex,
+                               PartialNumericFindlocHelper>::template Functor,
+        void>(xType->second, terminator, targetType->first, targetType->second,
+        result, x, target, kind, dim, mask, back, terminator);
+    break;
+  case TypeCategory::Character:
+    RUNTIME_CHECK(terminator,
+        targetType->first == TypeCategory::Character &&
+            targetType->second == xType->second);
+    ApplyCharacterKind<PartialCharacterFindlocHelper, void>(xType->second,
+        terminator, result, x, target, kind, dim, mask, back, terminator);
+    break;
+  case TypeCategory::Logical:
+    RUNTIME_CHECK(terminator, targetType->first == TypeCategory::Logical);
+    PartialLogicalFindlocHelper(
+        result, x, target, kind, dim, mask, back, terminator);
+    break;
+  default:
+    terminator.Crash(
+        "FINDLOC: Bad data type code (%d) for array", x.type().raw());
+  }
+}
+} // extern "C"
+
 // MAXVAL and MINVAL
 
 template <TypeCategory CAT, int KIND, bool IS_MAXVAL> struct MaxOrMinIdentity {
@@ -1043,7 +1309,11 @@ template <TypeCategory CAT, int KIND, bool IS_MAXVAL>
 class NumericExtremumAccumulator {
 public:
   using Type = CppTypeFor<CAT, KIND>;
-  NumericExtremumAccumulator(const Descriptor &array) : array_{array} {}
+  explicit NumericExtremumAccumulator(const Descriptor &array)
+      : array_{array} {}
+  void Reinitialize() {
+    extremum_ = MaxOrMinIdentity<CAT, KIND, IS_MAXVAL>::Value();
+  }
   template <typename A> void GetResult(A *p, int /*zeroBasedDim*/ = -1) const {
     *p = extremum_;
   }
@@ -1092,11 +1362,24 @@ static void DoMaxOrMin(Descriptor &result, const Descriptor &x, int dim,
     accumulator.GetResult(result.OffsetElement<Type>());
   } else {
     // Partial reduction
-    PartialReduction<ACCUMULATOR<CAT, KIND, IS_MAXVAL>, CAT, KIND>(
-        result, x, dim, mask, terminator, intrinsic);
+    using Accumulator = ACCUMULATOR<CAT, KIND, IS_MAXVAL>;
+    Accumulator accumulator{x};
+    PartialReduction<Accumulator, CAT, KIND>(
+        result, x, dim, mask, terminator, intrinsic, accumulator);
   }
 }
 
+template <TypeCategory CAT, bool IS_MAXVAL> struct MaxOrMinHelper {
+  template <int KIND> struct Functor {
+    void operator()(Descriptor &result, const Descriptor &x, int dim,
+        const Descriptor *mask, const char *intrinsic,
+        Terminator &terminator) const {
+      DoMaxOrMin<CAT, KIND, IS_MAXVAL, NumericExtremumAccumulator>(
+          result, x, dim, mask, intrinsic, terminator);
+    }
+  };
+};
+
 template <bool IS_MAXVAL>
 inline void NumericMaxOrMin(Descriptor &result, const Descriptor &x, int dim,
     const char *source, int line, const Descriptor *mask,
@@ -1106,72 +1389,28 @@ inline void NumericMaxOrMin(Descriptor &result, const Descriptor &x, int dim,
   RUNTIME_CHECK(terminator, type);
   switch (type->first) {
   case TypeCategory::Integer:
-    switch (type->second) {
-    case 1:
-      DoMaxOrMin<TypeCategory::Integer, 1, IS_MAXVAL,
-          NumericExtremumAccumulator>(
-          result, x, dim, mask, intrinsic, terminator);
-      return;
-    case 2:
-      DoMaxOrMin<TypeCategory::Integer, 2, IS_MAXVAL,
-          NumericExtremumAccumulator>(
-          result, x, dim, mask, intrinsic, terminator);
-      return;
-    case 4:
-      DoMaxOrMin<TypeCategory::Integer, 4, IS_MAXVAL,
-          NumericExtremumAccumulator>(
-          result, x, dim, mask, intrinsic, terminator);
-      return;
-    case 8:
-      DoMaxOrMin<TypeCategory::Integer, 8, IS_MAXVAL,
-          NumericExtremumAccumulator>(
-          result, x, dim, mask, intrinsic, terminator);
-      return;
-#ifdef __SIZEOF_INT128__
-    case 16:
-      DoMaxOrMin<TypeCategory::Integer, 16, IS_MAXVAL,
-          NumericExtremumAccumulator>(
-          result, x, dim, mask, intrinsic, terminator);
-      return;
-#endif
-    }
+    ApplyIntegerKind<
+        MaxOrMinHelper<TypeCategory::Integer, IS_MAXVAL>::template Functor,
+        void>(
+        type->second, terminator, result, x, dim, mask, intrinsic, terminator);
     break;
   case TypeCategory::Real:
-    switch (type->second) {
-    // TODO: REAL(2 & 3)
-    case 4:
-      DoMaxOrMin<TypeCategory::Real, 4, IS_MAXVAL, NumericExtremumAccumulator>(
-          result, x, dim, mask, intrinsic, terminator);
-      return;
-    case 8:
-      DoMaxOrMin<TypeCategory::Real, 8, IS_MAXVAL, NumericExtremumAccumulator>(
-          result, x, dim, mask, intrinsic, terminator);
-      return;
-#if LONG_DOUBLE == 80
-    case 10:
-      DoMaxOrMin<TypeCategory::Real, 10, IS_MAXVAL, NumericExtremumAccumulator>(
-          result, x, dim, mask, intrinsic, terminator);
-      return;
-#elif LONG_DOUBLE == 128
-    case 16:
-      DoMaxOrMin<TypeCategory::Real, 16, IS_MAXVAL, NumericExtremumAccumulator>(
-          result, x, dim, mask, intrinsic, terminator);
-      return;
-#endif
-    }
+    ApplyFloatingPointKind<
+        MaxOrMinHelper<TypeCategory::Real, IS_MAXVAL>::template Functor, void>(
+        type->second, terminator, result, x, dim, mask, intrinsic, terminator);
     break;
   default:
-    break;
+    terminator.Crash("%s: bad type code %d", intrinsic, x.type().raw());
   }
-  terminator.Crash("%s: bad type code %d", intrinsic, x.type().raw());
 }
 
 template <TypeCategory, int KIND, bool IS_MAXVAL>
 class CharacterExtremumAccumulator {
 public:
   using Type = CppTypeFor<TypeCategory::Character, KIND>;
-  CharacterExtremumAccumulator(const Descriptor &array)
+  explicit CharacterExtremumAccumulator(const Descriptor &array)
       : array_{array}, charLen_{array_.ElementBytes() / KIND} {}
+  void Reinitialize() { extremum_ = nullptr; }
   template <typename A> void GetResult(A *p, int /*zeroBasedDim*/ = -1) const {
     static_assert(std::is_same_v<A, Type>);
     if (extremum_) {
@@ -1202,6 +1441,18 @@ class CharacterExtremumAccumulator {
   const Type *extremum_{nullptr};
 };
 
+template <bool IS_MAXVAL> struct CharacterMaxOrMinHelper {
+  template <int KIND> struct Functor {
+    void operator()(Descriptor &result, const Descriptor &x, int dim,
+        const Descriptor *mask, const char *intrinsic,
+        Terminator &terminator) const {
+      DoMaxOrMin<TypeCategory::Character, KIND, IS_MAXVAL,
+          CharacterExtremumAccumulator>(
+          result, x, dim, mask, intrinsic, terminator);
+    }
+  };
+};
+
 template <bool IS_MAXVAL>
 inline void CharacterMaxOrMin(Descriptor &result, const Descriptor &x, int dim,
     const char *source, int line, const Descriptor *mask,
@@ -1209,25 +1460,9 @@ inline void CharacterMaxOrMin(Descriptor &result, const Descriptor &x, int dim,
   Terminator terminator{source, line};
   auto type{x.type().GetCategoryAndKind()};
   RUNTIME_CHECK(terminator, type && type->first == TypeCategory::Character);
-  switch (type->second) {
-  case 1:
-    DoMaxOrMin<TypeCategory::Character, 1, IS_MAXVAL,
-        CharacterExtremumAccumulator>(
-        result, x, dim, mask, intrinsic, terminator);
-    break;
-  case 2:
-    DoMaxOrMin<TypeCategory::Character, 2, IS_MAXVAL,
-        CharacterExtremumAccumulator>(
-        result, x, dim, mask, intrinsic, terminator);
-    break;
-  case 4:
-    DoMaxOrMin<TypeCategory::Character, 4, IS_MAXVAL,
-        CharacterExtremumAccumulator>(
-        result, x, dim, mask, intrinsic, terminator);
-    break;
-  default:
-    terminator.Crash("%s: bad character kind %d", intrinsic, type->second);
-  }
+  ApplyCharacterKind<CharacterMaxOrMinHelper<IS_MAXVAL>::template Functor,
+      void>(
+      type->second, terminator, result, x, dim, mask, intrinsic, terminator);
 }
 
 extern "C" {
@@ -1368,20 +1603,24 @@ void RTNAME(MinvalDim)(Descriptor &result, const Descriptor &x, int dim,
 
 } // extern "C"
 
-// ALL, ANY, & COUNT
+// ALL, ANY, COUNT, & PARITY
 
-template <bool IS_ALL> class LogicalAccumulator {
+enum class LogicalReduction { All, Any, Parity };
+
+template <LogicalReduction REDUCTION> class LogicalAccumulator {
 public:
   using Type = bool;
   explicit LogicalAccumulator(const Descriptor &array) : array_{array} {}
+  void Reinitialize() { result_ = REDUCTION == LogicalReduction::All; }
   bool Result() const { return result_; }
   bool Accumulate(bool x) {
-    if (x == IS_ALL) {
-      return true;
-    } else {
+    if constexpr (REDUCTION == LogicalReduction::Parity) {
+      result_ = result_ != x;
+    } else if (x != (REDUCTION == LogicalReduction::All)) {
       result_ = x;
       return false;
     }
+    return true;
   }
   template <typename IGNORED = void>
   bool AccumulateAt(const SubscriptValue at[]) {
@@ -1390,7 +1629,7 @@ template <bool IS_ALL> class LogicalAccumulator {
 
 private:
   const Descriptor &array_;
-  bool result_{IS_ALL};
+  bool result_{REDUCTION == LogicalReduction::All};
 };
 
 template <typename ACCUMULATOR>
@@ -1428,43 +1667,33 @@ inline auto ReduceLogicalDimToScalar(const Descriptor &x, int zeroBasedDim,
   return accumulator.Result();
 }
 
-template <bool IS_ALL, int KIND>
-inline void ReduceLogicalDimension(Descriptor &result, const Descriptor &x,
-    int dim, Terminator &terminator, const char *intrinsic) {
-  // Standard requires result to have same LOGICAL kind as argument.
-  CreatePartialReductionResult(result, x, dim, terminator, intrinsic, x.type());
-  SubscriptValue at[maxRank];
-  result.GetLowerBounds(at);
-  INTERNAL_CHECK(at[0] == 1);
-  using CppType = CppTypeFor<TypeCategory::Logical, KIND>;
-  for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
-    *result.Element<CppType>(at) =
-        ReduceLogicalDimToScalar<LogicalAccumulator<IS_ALL>>(x, dim - 1, at);
-  }
-}
+template <LogicalReduction REDUCTION> struct LogicalReduceHelper {
+  template <int KIND> struct Functor {
+    void operator()(Descriptor &result, const Descriptor &x, int dim,
+        Terminator &terminator, const char *intrinsic) const {
+      // Standard requires result to have same LOGICAL kind as argument.
+      CreatePartialReductionResult(
+          result, x, dim, terminator, intrinsic, x.type());
+      SubscriptValue at[maxRank];
+      result.GetLowerBounds(at);
+      INTERNAL_CHECK(at[0] == 1);
+      using CppType = CppTypeFor<TypeCategory::Logical, KIND>;
+      for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
+        *result.Element<CppType>(at) =
+            ReduceLogicalDimToScalar<LogicalAccumulator<REDUCTION>>(
+                x, dim - 1, at);
+      }
+    }
+  };
+};
 
-template <bool IS_ALL>
+template <LogicalReduction REDUCTION>
 inline void DoReduceLogicalDimension(Descriptor &result, const Descriptor &x,
     int dim, Terminator &terminator, const char *intrinsic) {
   auto catKind{x.type().GetCategoryAndKind()};
   RUNTIME_CHECK(terminator, catKind && catKind->first == TypeCategory::Logical);
-  switch (catKind->second) {
-  case 1:
-    ReduceLogicalDimension<IS_ALL, 1>(result, x, dim, terminator, intrinsic);
-    break;
-  case 2:
-    ReduceLogicalDimension<IS_ALL, 2>(result, x, dim, terminator, intrinsic);
-    break;
-  case 4:
-    ReduceLogicalDimension<IS_ALL, 4>(result, x, dim, terminator, intrinsic);
-    break;
-  case 8:
-    ReduceLogicalDimension<IS_ALL, 8>(result, x, dim, terminator, intrinsic);
-    break;
-  default:
-    terminator.Crash(
-        "%s: bad argument type LOGICAL(%d)", intrinsic, catKind->second);
-  }
+  ApplyLogicalKind<LogicalReduceHelper<REDUCTION>::template Functor, void>(
+      catKind->second, terminator, result, x, dim, terminator, intrinsic);
 }
 
 // COUNT
@@ -1473,6 +1702,7 @@ class CountAccumulator {
 public:
   using Type = std::int64_t;
   explicit CountAccumulator(const Descriptor &array) : array_{array} {}
+  void Reinitialize() { result_ = 0; }
   Type Result() const { return result_; }
   template <typename IGNORED = void>
   bool AccumulateAt(const SubscriptValue at[]) {
@@ -1487,41 +1717,44 @@ class CountAccumulator {
   Type result_{0};
 };
 
-template <int KIND>
-inline void CountDimension(
-    Descriptor &result, const Descriptor &x, int dim, Terminator &terminator) {
-  CreatePartialReductionResult(result, x, dim, terminator, "COUNT",
-      TypeCode{TypeCategory::Integer, KIND});
-  SubscriptValue at[maxRank];
-  result.GetLowerBounds(at);
-  INTERNAL_CHECK(at[0] == 1);
-  using CppType = CppTypeFor<TypeCategory::Integer, KIND>;
-  for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
-    *result.Element<CppType>(at) =
-        ReduceLogicalDimToScalar<CountAccumulator>(x, dim - 1, at);
+template <int KIND> struct CountDimension {
+  void operator()(Descriptor &result, const Descriptor &x, int dim,
+      Terminator &terminator) const {
+    CreatePartialReductionResult(result, x, dim, terminator, "COUNT",
+        TypeCode{TypeCategory::Integer, KIND});
+    SubscriptValue at[maxRank];
+    result.GetLowerBounds(at);
+    INTERNAL_CHECK(at[0] == 1);
+    using CppType = CppTypeFor<TypeCategory::Integer, KIND>;
+    for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
+      *result.Element<CppType>(at) =
+          ReduceLogicalDimToScalar<CountAccumulator>(x, dim - 1, at);
+    }
   }
-}
+};
 
 extern "C" {
 
 bool RTNAME(All)(const Descriptor &x, const char *source, int line, int dim) {
-  return GetTotalLogicalReduction(
-      x, source, line, dim, LogicalAccumulator<true>{x}, "ALL");
+  return GetTotalLogicalReduction(x, source, line, dim,
+      LogicalAccumulator<LogicalReduction::All>{x}, "ALL");
 }
 void RTNAME(AllDim)(Descriptor &result, const Descriptor &x, int dim,
     const char *source, int line) {
   Terminator terminator{source, line};
-  DoReduceLogicalDimension<true>(result, x, dim, terminator, "ALL");
+  DoReduceLogicalDimension<LogicalReduction::All>(
+      result, x, dim, terminator, "ALL");
 }
 
 bool RTNAME(Any)(const Descriptor &x, const char *source, int line, int dim) {
-  return GetTotalLogicalReduction(
-      x, source, line, dim, LogicalAccumulator<false>{x}, "ANY");
+  return GetTotalLogicalReduction(x, source, line, dim,
+      LogicalAccumulator<LogicalReduction::Any>{x}, "ANY");
 }
 void RTNAME(AnyDim)(Descriptor &result, const Descriptor &x, int dim,
     const char *source, int line) {
   Terminator terminator{source, line};
-  DoReduceLogicalDimension<false>(result, x, dim, terminator, "ANY");
+  DoReduceLogicalDimension<LogicalReduction::Any>(
+      result, x, dim, terminator, "ANY");
 }
 
 std::int64_t RTNAME(Count)(
@@ -1529,30 +1762,24 @@ std::int64_t RTNAME(Count)(
   return GetTotalLogicalReduction(
       x, source, line, dim, CountAccumulator{x}, "COUNT");
 }
+
 void RTNAME(CountDim)(Descriptor &result, const Descriptor &x, int dim,
     int kind, const char *source, int line) {
   Terminator terminator{source, line};
-  switch (kind) {
-  case 1:
-    CountDimension<1>(result, x, dim, terminator);
-    break;
-  case 2:
-    CountDimension<2>(result, x, dim, terminator);
-    break;
-  case 4:
-    CountDimension<4>(result, x, dim, terminator);
-    break;
-  case 8:
-    CountDimension<8>(result, x, dim, terminator);
-    break;
-#ifdef __SIZEOF_INT128__
-  case 16:
-    CountDimension<16>(result, x, dim, terminator);
-    break;
-#endif
-  default:
-    terminator.Crash("COUNT: bad KIND=%d", kind);
-  }
+  ApplyIntegerKind<CountDimension, void>(
+      kind, terminator, result, x, dim, terminator);
+}
+
+bool RTNAME(Parity)(
+    const Descriptor &x, const char *source, int line, int dim) {
+  return GetTotalLogicalReduction(x, source, line, dim,
+      LogicalAccumulator<LogicalReduction::Parity>{x}, "PARITY");
+}
+void RTNAME(ParityDim)(Descriptor &result, const Descriptor &x, int dim,
+    const char *source, int line) {
+  Terminator terminator{source, line};
+  DoReduceLogicalDimension<LogicalReduction::Parity>(
+      result, x, dim, terminator, "PARITY");
 }
 
 } // extern "C"

diff  --git a/flang/runtime/reduction.h b/flang/runtime/reduction.h
index ea9e93adf72b..39604b8517e2 100644
--- a/flang/runtime/reduction.h
+++ b/flang/runtime/reduction.h
@@ -24,8 +24,8 @@ namespace Fortran::runtime {
 extern "C" {
 
 // Reductions that are known to return scalars have per-type entry
-// points.  These cover the casse that either have no DIM=
-// argument, or have an argument rank of 1.  Pass 0 for no DIM=
+// points.  These cover the cases that either have no DIM=
+// argument or have an argument rank of 1.  Pass 0 for no DIM=
 // or the value of the DIM= argument so that it may be checked.
 // The data type in the descriptor is checked against the expected
 // return type.
@@ -144,20 +144,42 @@ void RTNAME(CppProductComplex16)(std::complex<long double> &,
 void RTNAME(ProductDim)(Descriptor &result, const Descriptor &array, int dim,
     const char *source, int line, const Descriptor *mask = nullptr);
 
-// MAXLOC and MINLOC
+// IPARITY()
+std::int8_t RTNAME(IParity1)(const Descriptor &, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr);
+std::int16_t RTNAME(IParity2)(const Descriptor &, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr);
+std::int32_t RTNAME(IParity4)(const Descriptor &, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr);
+std::int64_t RTNAME(IParity8)(const Descriptor &, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr);
+#ifdef __SIZEOF_INT128__
+common::int128_t RTNAME(IParity16)(const Descriptor &, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr);
+#endif
+void RTNAME(IParityDim)(Descriptor &result, const Descriptor &array, int dim,
+    const char *source, int line, const Descriptor *mask = nullptr);
+
+// FINDLOC, MAXLOC, & MINLOC
 // These return allocated arrays in the supplied descriptor.
 // The default value for KIND= should be the default INTEGER in effect at
 // compilation time.
-void RTNAME(Maxloc)(Descriptor &, const Descriptor &, int kind,
+void RTNAME(Findloc)(Descriptor &, const Descriptor &x,
+    const Descriptor &target, int kind, const char *source, int line,
+    const Descriptor *mask = nullptr, bool back = false);
+void RTNAME(FindlocDim)(Descriptor &, const Descriptor &x,
+    const Descriptor &target, int kind, int dim, const char *source, int line,
+    const Descriptor *mask = nullptr, bool back = false);
+void RTNAME(Maxloc)(Descriptor &, const Descriptor &x, int kind,
     const char *source, int line, const Descriptor *mask = nullptr,
     bool back = false);
-void RTNAME(MaxlocDim)(Descriptor &, const Descriptor &, int kind, int dim,
+void RTNAME(MaxlocDim)(Descriptor &, const Descriptor &x, int kind, int dim,
     const char *source, int line, const Descriptor *mask = nullptr,
     bool back = false);
-void RTNAME(Minloc)(Descriptor &, const Descriptor &, int kind,
+void RTNAME(Minloc)(Descriptor &, const Descriptor &x, int kind,
     const char *source, int line, const Descriptor *mask = nullptr,
     bool back = false);
-void RTNAME(MinlocDim)(Descriptor &, const Descriptor &, int kind, int dim,
+void RTNAME(MinlocDim)(Descriptor &, const Descriptor &x, int kind, int dim,
     const char *source, int line, const Descriptor *mask = nullptr,
     bool back = false);
 
@@ -221,7 +243,7 @@ void RTNAME(MaxvalDim)(Descriptor &, const Descriptor &, int dim,
 void RTNAME(MinvalDim)(Descriptor &, const Descriptor &, int dim,
     const char *source, int line, const Descriptor *mask = nullptr);
 
-// ALL, ANY, & COUNT logical reductions
+// ALL, ANY, COUNT, & PARITY logical reductions
 bool RTNAME(All)(const Descriptor &, const char *source, int line, int dim = 0);
 void RTNAME(AllDim)(Descriptor &result, const Descriptor &, int dim,
     const char *source, int line);
@@ -232,6 +254,10 @@ std::int64_t RTNAME(Count)(
     const Descriptor &, const char *source, int line, int dim = 0);
 void RTNAME(CountDim)(Descriptor &result, const Descriptor &, int dim, int kind,
     const char *source, int line);
+bool RTNAME(Parity)(
+    const Descriptor &, const char *source, int line, int dim = 0);
+void RTNAME(ParityDim)(Descriptor &result, const Descriptor &, int dim,
+    const char *source, int line);
 
 } // extern "C"
 } // namespace Fortran::runtime

diff  --git a/flang/runtime/terminator.h b/flang/runtime/terminator.h
index c63f8950df3d..02d97bef9456 100644
--- a/flang/runtime/terminator.h
+++ b/flang/runtime/terminator.h
@@ -24,6 +24,10 @@ class Terminator {
   Terminator(const Terminator &) = default;
   explicit Terminator(const char *sourceFileName, int sourceLine = 0)
       : sourceFileName_{sourceFileName}, sourceLine_{sourceLine} {}
+
+  const char *sourceFileName() const { return sourceFileName_; }
+  int sourceLine() const { return sourceLine_; }
+
   void SetLocation(const char *sourceFileName = nullptr, int sourceLine = 0) {
     sourceFileName_ = sourceFileName;
     sourceLine_ = sourceLine;

diff  --git a/flang/runtime/tools.h b/flang/runtime/tools.h
index 1b988d994eb6..c5996dc3e568 100644
--- a/flang/runtime/tools.h
+++ b/flang/runtime/tools.h
@@ -13,6 +13,7 @@
 #include "descriptor.h"
 #include "memory.h"
 #include "terminator.h"
+#include "flang/Common/long-double.h"
 #include <functional>
 #include <map>
 #include <type_traits>
@@ -101,5 +102,83 @@ inline bool SetInteger(INT &x, int kind, std::int64_t value) {
   }
 }
 
+// Maps a runtime INTEGER kind value to the appropriate instantiation of
+// a function object template and calls it with the supplied arguments.
+template <template <int KIND> class FUNC, typename RESULT, typename... A>
+inline RESULT ApplyIntegerKind(int kind, Terminator &terminator, A &&...x) {
+  switch (kind) {
+  case 1:
+    return FUNC<1>{}(std::forward<A>(x)...);
+  case 2:
+    return FUNC<2>{}(std::forward<A>(x)...);
+  case 4:
+    return FUNC<4>{}(std::forward<A>(x)...);
+  case 8:
+    return FUNC<8>{}(std::forward<A>(x)...);
+#ifdef __SIZEOF_INT128__
+  case 16:
+    return FUNC<16>{}(std::forward<A>(x)...);
+#endif
+  default:
+    terminator.Crash("unsupported INTEGER(KIND=%d)", kind);
+  }
+}
+
+template <template <int KIND> class FUNC, typename RESULT, typename... A>
+inline RESULT ApplyFloatingPointKind(
+    int kind, Terminator &terminator, A &&...x) {
+  switch (kind) {
+#if 0 // TODO: REAL/COMPLEX (2 & 3)
+  case 2:
+    return FUNC<2>{}(std::forward<A>(x)...);
+  case 3:
+    return FUNC<3>{}(std::forward<A>(x)...);
+#endif
+  case 4:
+    return FUNC<4>{}(std::forward<A>(x)...);
+  case 8:
+    return FUNC<8>{}(std::forward<A>(x)...);
+#if LONG_DOUBLE == 80
+  case 10:
+    return FUNC<10>{}(std::forward<A>(x)...);
+#elif LONG_DOUBLE == 128
+  case 16:
+    return FUNC<16>{}(std::forward<A>(x)...);
+#endif
+  default:
+    terminator.Crash("unsupported REAL/COMPLEX(KIND=%d)", kind);
+  }
+}
+
+template <template <int KIND> class FUNC, typename RESULT, typename... A>
+inline RESULT ApplyCharacterKind(int kind, Terminator &terminator, A &&...x) {
+  switch (kind) {
+  case 1:
+    return FUNC<1>{}(std::forward<A>(x)...);
+  case 2:
+    return FUNC<2>{}(std::forward<A>(x)...);
+  case 4:
+    return FUNC<4>{}(std::forward<A>(x)...);
+  default:
+    terminator.Crash("unsupported CHARACTER(KIND=%d)", kind);
+  }
+}
+
+template <template <int KIND> class FUNC, typename RESULT, typename... A>
+inline RESULT ApplyLogicalKind(int kind, Terminator &terminator, A &&...x) {
+  switch (kind) {
+  case 1:
+    return FUNC<1>{}(std::forward<A>(x)...);
+  case 2:
+    return FUNC<2>{}(std::forward<A>(x)...);
+  case 4:
+    return FUNC<4>{}(std::forward<A>(x)...);
+  case 8:
+    return FUNC<8>{}(std::forward<A>(x)...);
+  default:
+    terminator.Crash("unsupported LOGICAL(KIND=%d)", kind);
+  }
+}
+
 } // namespace Fortran::runtime
 #endif // FORTRAN_RUNTIME_TOOLS_H_

diff  --git a/flang/unittests/RuntimeGTest/Reduction.cpp b/flang/unittests/RuntimeGTest/Reduction.cpp
index 111b5674285f..181f44d4207b 100644
--- a/flang/unittests/RuntimeGTest/Reduction.cpp
+++ b/flang/unittests/RuntimeGTest/Reduction.cpp
@@ -138,8 +138,8 @@ TEST(Reductions, Character) {
   std::vector<int> shape{2, 3};
   auto array{MakeArray<TypeCategory::Character, 1>(shape,
       std::vector<std::string>{"abc", "def", "ghi", "jkl", "mno", "abc"}, 3)};
-  StaticDescriptor<1> statDesc;
-  Descriptor &res{statDesc.descriptor()};
+  StaticDescriptor<1> statDesc[2];
+  Descriptor &res{statDesc[0].descriptor()};
   RTNAME(MaxvalCharacter)(res, *array, __FILE__, __LINE__);
   EXPECT_EQ(res.rank(), 0);
   EXPECT_EQ(res.type().raw(), (TypeCode{TypeCategory::Character, 1}.raw()));
@@ -202,6 +202,30 @@ TEST(Reductions, Character) {
   EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(0), 2);
   EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(1), 3);
   res.Destroy();
+  static const char targetChar[]{"abc"};
+  Descriptor &target{statDesc[1].descriptor()};
+  target.Establish(1, std::strlen(targetChar),
+      const_cast<void *>(static_cast<const void *>(&targetChar)), 0, nullptr,
+      CFI_attribute_pointer);
+  RTNAME(Findloc)
+  (res, *array, target, /*KIND=*/4, __FILE__, __LINE__, nullptr,
+      /*BACK=*/false);
+  EXPECT_EQ(res.rank(), 1);
+  EXPECT_EQ(res.type().raw(), (TypeCode{TypeCategory::Integer, 4}.raw()));
+  EXPECT_EQ(res.GetDimension(0).LowerBound(), 1);
+  EXPECT_EQ(res.GetDimension(0).Extent(), 2);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(0), 1);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(1), 1);
+  res.Destroy();
+  RTNAME(Findloc)
+  (res, *array, target, /*KIND=*/4, __FILE__, __LINE__, nullptr, /*BACK=*/true);
+  EXPECT_EQ(res.rank(), 1);
+  EXPECT_EQ(res.type().raw(), (TypeCode{TypeCategory::Integer, 4}.raw()));
+  EXPECT_EQ(res.GetDimension(0).LowerBound(), 1);
+  EXPECT_EQ(res.GetDimension(0).Extent(), 2);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(0), 2);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(1), 3);
+  res.Destroy();
 }
 
 TEST(Reductions, Logical) {
@@ -211,9 +235,10 @@ TEST(Reductions, Logical) {
   ASSERT_EQ(array->ElementBytes(), std::size_t{4});
   EXPECT_EQ(RTNAME(All)(*array, __FILE__, __LINE__), false);
   EXPECT_EQ(RTNAME(Any)(*array, __FILE__, __LINE__), true);
+  EXPECT_EQ(RTNAME(Parity)(*array, __FILE__, __LINE__), false);
   EXPECT_EQ(RTNAME(Count)(*array, __FILE__, __LINE__), 2);
-  StaticDescriptor<2> statDesc;
-  Descriptor &res{statDesc.descriptor()};
+  StaticDescriptor<2> statDesc[2];
+  Descriptor &res{statDesc[0].descriptor()};
   RTNAME(AllDim)(res, *array, /*DIM=*/1, __FILE__, __LINE__);
   EXPECT_EQ(res.rank(), 1);
   EXPECT_EQ(res.type().raw(), (TypeCode{TypeCategory::Logical, 4}.raw()));
@@ -246,6 +271,22 @@ TEST(Reductions, Logical) {
   EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(0), 1);
   EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(1), 1);
   res.Destroy();
+  RTNAME(ParityDim)(res, *array, /*DIM=*/1, __FILE__, __LINE__);
+  EXPECT_EQ(res.rank(), 1);
+  EXPECT_EQ(res.type().raw(), (TypeCode{TypeCategory::Logical, 4}.raw()));
+  EXPECT_EQ(res.GetDimension(0).LowerBound(), 1);
+  EXPECT_EQ(res.GetDimension(0).Extent(), 2);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(0), 0);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(1), 0);
+  res.Destroy();
+  RTNAME(ParityDim)(res, *array, /*DIM=*/2, __FILE__, __LINE__);
+  EXPECT_EQ(res.rank(), 1);
+  EXPECT_EQ(res.type().raw(), (TypeCode{TypeCategory::Logical, 4}.raw()));
+  EXPECT_EQ(res.GetDimension(0).LowerBound(), 1);
+  EXPECT_EQ(res.GetDimension(0).Extent(), 2);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(0), 1);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(1), 1);
+  res.Destroy();
   RTNAME(CountDim)(res, *array, /*DIM=*/1, /*KIND=*/4, __FILE__, __LINE__);
   EXPECT_EQ(res.rank(), 1);
   EXPECT_EQ(res.type().raw(), (TypeCode{TypeCategory::Integer, 4}.raw()));
@@ -262,4 +303,123 @@ TEST(Reductions, Logical) {
   EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int64_t>(0), 1);
   EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int64_t>(1), 1);
   res.Destroy();
+  bool boolValue{false};
+  Descriptor &target{statDesc[1].descriptor()};
+  target.Establish(TypeCategory::Logical, 1, static_cast<void *>(&boolValue), 0,
+      nullptr, CFI_attribute_pointer);
+  RTNAME(Findloc)
+  (res, *array, target, /*KIND=*/4, __FILE__, __LINE__, nullptr,
+      /*BACK=*/false);
+  EXPECT_EQ(res.rank(), 1);
+  EXPECT_EQ(res.type().raw(), (TypeCode{TypeCategory::Integer, 4}.raw()));
+  EXPECT_EQ(res.GetDimension(0).LowerBound(), 1);
+  EXPECT_EQ(res.GetDimension(0).Extent(), 2);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(0), 1);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(1), 1);
+  res.Destroy();
+  boolValue = true;
+  RTNAME(Findloc)
+  (res, *array, target, /*KIND=*/4, __FILE__, __LINE__, nullptr, /*BACK=*/true);
+  EXPECT_EQ(res.rank(), 1);
+  EXPECT_EQ(res.type().raw(), (TypeCode{TypeCategory::Integer, 4}.raw()));
+  EXPECT_EQ(res.GetDimension(0).LowerBound(), 1);
+  EXPECT_EQ(res.GetDimension(0).Extent(), 2);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(0), 2);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<std::int32_t>(1), 2);
+  res.Destroy();
+}
+
+TEST(Reductions, FindlocNumeric) {
+  std::vector<int> shape{2, 3};
+  auto realArray{MakeArray<TypeCategory::Real, 8>(shape,
+      std::vector<double>{0.0, -0.0, 1.0, 3.14,
+          std::numeric_limits<double>::quiet_NaN(),
+          std::numeric_limits<double>::infinity()})};
+  ASSERT_EQ(realArray->ElementBytes(), sizeof(double));
+  StaticDescriptor<2> statDesc[2];
+  Descriptor &res{statDesc[0].descriptor()};
+  // Find the first zero
+  Descriptor &target{statDesc[1].descriptor()};
+  double value{0.0};
+  target.Establish(TypeCategory::Real, 8, static_cast<void *>(&value), 0,
+      nullptr, CFI_attribute_pointer);
+  RTNAME(Findloc)
+  (res, *realArray, target, 8, __FILE__, __LINE__, nullptr, /*BACK=*/false);
+  EXPECT_EQ(res.rank(), 1);
+  EXPECT_EQ(res.type().raw(), (TypeCode{TypeCategory::Integer, 8}.raw()));
+  EXPECT_EQ(res.GetDimension(0).LowerBound(), 1);
+  EXPECT_EQ(res.GetDimension(0).UpperBound(), 2);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<SubscriptValue>(0), 1);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<SubscriptValue>(1), 1);
+  res.Destroy();
+  // Find last zero (even though it's negative)
+  RTNAME(Findloc)
+  (res, *realArray, target, 8, __FILE__, __LINE__, nullptr, /*BACK=*/true);
+  EXPECT_EQ(res.rank(), 1);
+  EXPECT_EQ(res.type().raw(), (TypeCode{TypeCategory::Integer, 8}.raw()));
+  EXPECT_EQ(res.GetDimension(0).LowerBound(), 1);
+  EXPECT_EQ(res.GetDimension(0).UpperBound(), 2);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<SubscriptValue>(0), 2);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<SubscriptValue>(1), 1);
+  res.Destroy();
+  // Find the +Inf
+  value = std::numeric_limits<double>::infinity();
+  RTNAME(Findloc)
+  (res, *realArray, target, 8, __FILE__, __LINE__, nullptr, /*BACK=*/false);
+  EXPECT_EQ(res.rank(), 1);
+  EXPECT_EQ(res.type().raw(), (TypeCode{TypeCategory::Integer, 8}.raw()));
+  EXPECT_EQ(res.GetDimension(0).LowerBound(), 1);
+  EXPECT_EQ(res.GetDimension(0).UpperBound(), 2);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<SubscriptValue>(0), 2);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<SubscriptValue>(1), 3);
+  res.Destroy();
+  // Ensure that we can't find a NaN
+  value = std::numeric_limits<double>::quiet_NaN();
+  RTNAME(Findloc)
+  (res, *realArray, target, 8, __FILE__, __LINE__, nullptr, /*BACK=*/false);
+  EXPECT_EQ(res.rank(), 1);
+  EXPECT_EQ(res.type().raw(), (TypeCode{TypeCategory::Integer, 8}.raw()));
+  EXPECT_EQ(res.GetDimension(0).LowerBound(), 1);
+  EXPECT_EQ(res.GetDimension(0).UpperBound(), 2);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<SubscriptValue>(0), 0);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<SubscriptValue>(1), 0);
+  res.Destroy();
+  // Find a value of a distinct type
+  int intValue{1};
+  target.Establish(TypeCategory::Integer, 4, static_cast<void *>(&intValue), 0,
+      nullptr, CFI_attribute_pointer);
+  RTNAME(Findloc)
+  (res, *realArray, target, 8, __FILE__, __LINE__, nullptr, /*BACK=*/false);
+  EXPECT_EQ(res.rank(), 1);
+  EXPECT_EQ(res.type().raw(), (TypeCode{TypeCategory::Integer, 8}.raw()));
+  EXPECT_EQ(res.GetDimension(0).LowerBound(), 1);
+  EXPECT_EQ(res.GetDimension(0).UpperBound(), 2);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<SubscriptValue>(0), 1);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<SubscriptValue>(1), 2);
+  res.Destroy();
+  // Partial reductions
+  value = 1.0;
+  target.Establish(TypeCategory::Real, 8, static_cast<void *>(&value), 0,
+      nullptr, CFI_attribute_pointer);
+  RTNAME(FindlocDim)
+  (res, *realArray, target, 8, /*DIM=*/1, __FILE__, __LINE__, nullptr,
+      /*BACK=*/false);
+  EXPECT_EQ(res.rank(), 1);
+  EXPECT_EQ(res.type().raw(), (TypeCode{TypeCategory::Integer, 8}.raw()));
+  EXPECT_EQ(res.GetDimension(0).LowerBound(), 1);
+  EXPECT_EQ(res.GetDimension(0).UpperBound(), 3);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<SubscriptValue>(0), 0);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<SubscriptValue>(1), 1);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<SubscriptValue>(2), 0);
+  res.Destroy();
+  RTNAME(FindlocDim)
+  (res, *realArray, target, 8, /*DIM=*/2, __FILE__, __LINE__, nullptr,
+      /*BACK=*/true);
+  EXPECT_EQ(res.rank(), 1);
+  EXPECT_EQ(res.type().raw(), (TypeCode{TypeCategory::Integer, 8}.raw()));
+  EXPECT_EQ(res.GetDimension(0).LowerBound(), 1);
+  EXPECT_EQ(res.GetDimension(0).UpperBound(), 2);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<SubscriptValue>(0), 2);
+  EXPECT_EQ(*res.ZeroBasedIndexedElement<SubscriptValue>(1), 0);
+  res.Destroy();
 }