[flang-commits] [flang] [flang][runtime] Distinguish VALUE from non-VALUE operations in REDUCE (PR #95297)

Peter Klausler via flang-commits flang-commits at lists.llvm.org
Wed Jun 12 12:43:41 PDT 2024


https://github.com/klausler created https://github.com/llvm/llvm-project/pull/95297

Accommodate operations with VALUE dummy arguments in the runtime support for the REDUCE intrinsic function by splitting most entry points into Reduce...Ref and Reduce...Value variants.

Further work will be needed in lowering to call the ...Value entry points.

>From 941ba5cd6f47f39e7f439e3e9ffb0d07aaf795a6 Mon Sep 17 00:00:00 2001
From: Peter Klausler <pklausler at nvidia.com>
Date: Wed, 12 Jun 2024 12:40:31 -0700
Subject: [PATCH] [flang][runtime] Distinguish VALUE from non-VALUE operations
 in REDUCE

Accommodate operations with VALUE dummy arguments in the runtime support
for the REDUCE intrinsic function by splitting most entry points into
Reduce...Ref and Reduce...Value variants.

Further work will be needed in lowering to call the ...Value entry points.
---
 .../Optimizer/Builder/Runtime/RTBuilder.h     |  24 +-
 flang/include/flang/Runtime/reduce.h          | 425 ++++++++----
 .../Optimizer/Builder/Runtime/Reduction.cpp   | 130 ++--
 flang/runtime/reduce.cpp                      | 615 ++++++++++++++----
 flang/test/Lower/Intrinsics/reduce.f90        |  66 +-
 flang/unittests/Runtime/Reduction.cpp         |   9 +-
 6 files changed, 906 insertions(+), 363 deletions(-)

diff --git a/flang/include/flang/Optimizer/Builder/Runtime/RTBuilder.h b/flang/include/flang/Optimizer/Builder/Runtime/RTBuilder.h
index 99161c57fbe28..809d5b8d569dc 100644
--- a/flang/include/flang/Optimizer/Builder/Runtime/RTBuilder.h
+++ b/flang/include/flang/Optimizer/Builder/Runtime/RTBuilder.h
@@ -53,10 +53,10 @@ namespace fir::runtime {
 using TypeBuilderFunc = mlir::Type (*)(mlir::MLIRContext *);
 using FuncTypeBuilderFunc = mlir::FunctionType (*)(mlir::MLIRContext *);
 
-#define REDUCTION_OPERATION_MODEL(T)                                           \
+#define REDUCTION_REF_OPERATION_MODEL(T)                                       \
   template <>                                                                  \
   constexpr TypeBuilderFunc                                                    \
-  getModel<Fortran::runtime::ReductionOperation<T>>() {                        \
+  getModel<Fortran::runtime::ReferenceReductionOperation<T>>() {               \
     return [](mlir::MLIRContext *context) -> mlir::Type {                      \
       TypeBuilderFunc f{getModel<T>()};                                        \
       auto refTy = fir::ReferenceType::get(f(context));                        \
@@ -480,18 +480,18 @@ constexpr TypeBuilderFunc getModel<void>() {
   };
 }
 
-REDUCTION_OPERATION_MODEL(std::int8_t)
-REDUCTION_OPERATION_MODEL(std::int16_t)
-REDUCTION_OPERATION_MODEL(std::int32_t)
-REDUCTION_OPERATION_MODEL(std::int64_t)
-REDUCTION_OPERATION_MODEL(Fortran::common::int128_t)
+REDUCTION_REF_OPERATION_MODEL(std::int8_t)
+REDUCTION_REF_OPERATION_MODEL(std::int16_t)
+REDUCTION_REF_OPERATION_MODEL(std::int32_t)
+REDUCTION_REF_OPERATION_MODEL(std::int64_t)
+REDUCTION_REF_OPERATION_MODEL(Fortran::common::int128_t)
 
-REDUCTION_OPERATION_MODEL(float)
-REDUCTION_OPERATION_MODEL(double)
-REDUCTION_OPERATION_MODEL(long double)
+REDUCTION_REF_OPERATION_MODEL(float)
+REDUCTION_REF_OPERATION_MODEL(double)
+REDUCTION_REF_OPERATION_MODEL(long double)
 
-REDUCTION_OPERATION_MODEL(std::complex<float>)
-REDUCTION_OPERATION_MODEL(std::complex<double>)
+REDUCTION_REF_OPERATION_MODEL(std::complex<float>)
+REDUCTION_REF_OPERATION_MODEL(std::complex<double>)
 
 REDUCTION_CHAR_OPERATION_MODEL(char)
 REDUCTION_CHAR_OPERATION_MODEL(char16_t)
diff --git a/flang/include/flang/Runtime/reduce.h b/flang/include/flang/Runtime/reduce.h
index 975aa6dea305f..60f54c393b4bb 100644
--- a/flang/include/flang/Runtime/reduce.h
+++ b/flang/include/flang/Runtime/reduce.h
@@ -28,7 +28,9 @@ namespace Fortran::runtime {
 
 class Descriptor;
 
-template <typename T> using ReductionOperation = T (*)(const T *, const T *);
+template <typename T>
+using ReferenceReductionOperation = T (*)(const T *, const T *);
+template <typename T> using ValueReductionOperation = T (*)(T, T);
 template <typename CHAR>
 using ReductionCharOperation = void (*)(CHAR *hiddenResult,
     std::size_t resultLen, const CHAR *x, const CHAR *y, std::size_t xLen,
@@ -38,185 +40,364 @@ using ReductionDerivedTypeOperation = void (*)(
 
 extern "C" {
 
-std::int8_t RTDECL(ReduceInteger1)(const Descriptor &,
-    ReductionOperation<std::int8_t>, const char *source, int line, int dim = 0,
-    const Descriptor *mask = nullptr, const std::int8_t *identity = nullptr,
-    bool ordered = true);
-void RTDECL(ReduceInteger1Dim)(Descriptor &result, const Descriptor &array,
-    ReductionOperation<std::int8_t>, const char *source, int line, int dim,
+std::int8_t RTDECL(ReduceInteger1Ref)(const Descriptor &,
+    ReferenceReductionOperation<std::int8_t>, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr,
+    const std::int8_t *identity = nullptr, bool ordered = true);
+std::int8_t RTDECL(ReduceInteger1Value)(const Descriptor &,
+    ValueReductionOperation<std::int8_t>, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr,
+    const std::int8_t *identity = nullptr, bool ordered = true);
+void RTDECL(ReduceInteger1DimRef)(Descriptor &result, const Descriptor &array,
+    ReferenceReductionOperation<std::int8_t>, const char *source, int line,
+    int dim, const Descriptor *mask = nullptr,
+    const std::int8_t *identity = nullptr, bool ordered = true);
+void RTDECL(ReduceInteger1DimValue)(Descriptor &result, const Descriptor &array,
+    ValueReductionOperation<std::int8_t>, const char *source, int line, int dim,
     const Descriptor *mask = nullptr, const std::int8_t *identity = nullptr,
     bool ordered = true);
-std::int16_t RTDECL(ReduceInteger2)(const Descriptor &,
-    ReductionOperation<std::int16_t>, const char *source, int line, int dim = 0,
-    const Descriptor *mask = nullptr, const std::int16_t *identity = nullptr,
-    bool ordered = true);
-void RTDECL(ReduceInteger2Dim)(Descriptor &result, const Descriptor &array,
-    ReductionOperation<std::int16_t>, const char *source, int line, int dim,
-    const Descriptor *mask = nullptr, const std::int16_t *identity = nullptr,
-    bool ordered = true);
-std::int32_t RTDECL(ReduceInteger4)(const Descriptor &,
-    ReductionOperation<std::int32_t>, const char *source, int line, int dim = 0,
-    const Descriptor *mask = nullptr, const std::int32_t *identity = nullptr,
-    bool ordered = true);
-void RTDECL(ReduceInteger4Dim)(Descriptor &result, const Descriptor &array,
-    ReductionOperation<std::int32_t>, const char *source, int line, int dim,
-    const Descriptor *mask = nullptr, const std::int32_t *identity = nullptr,
-    bool ordered = true);
-std::int64_t RTDECL(ReduceInteger8)(const Descriptor &,
-    ReductionOperation<std::int64_t>, const char *source, int line, int dim = 0,
-    const Descriptor *mask = nullptr, const std::int64_t *identity = nullptr,
-    bool ordered = true);
-void RTDECL(ReduceInteger8Dim)(Descriptor &result, const Descriptor &array,
-    ReductionOperation<std::int64_t>, const char *source, int line, int dim,
-    const Descriptor *mask = nullptr, const std::int64_t *identity = nullptr,
-    bool ordered = true);
+std::int16_t RTDECL(ReduceInteger2Ref)(const Descriptor &,
+    ReferenceReductionOperation<std::int16_t>, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr,
+    const std::int16_t *identity = nullptr, bool ordered = true);
+std::int16_t RTDECL(ReduceInteger2Value)(const Descriptor &,
+    ValueReductionOperation<std::int16_t>, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr,
+    const std::int16_t *identity = nullptr, bool ordered = true);
+void RTDECL(ReduceInteger2DimRef)(Descriptor &result, const Descriptor &array,
+    ReferenceReductionOperation<std::int16_t>, const char *source, int line,
+    int dim, const Descriptor *mask = nullptr,
+    const std::int16_t *identity = nullptr, bool ordered = true);
+void RTDECL(ReduceInteger2DimValue)(Descriptor &result, const Descriptor &array,
+    ValueReductionOperation<std::int16_t>, const char *source, int line,
+    int dim, const Descriptor *mask = nullptr,
+    const std::int16_t *identity = nullptr, bool ordered = true);
+std::int32_t RTDECL(ReduceInteger4Ref)(const Descriptor &,
+    ReferenceReductionOperation<std::int32_t>, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr,
+    const std::int32_t *identity = nullptr, bool ordered = true);
+std::int32_t RTDECL(ReduceInteger4Value)(const Descriptor &,
+    ValueReductionOperation<std::int32_t>, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr,
+    const std::int32_t *identity = nullptr, bool ordered = true);
+void RTDECL(ReduceInteger4DimRef)(Descriptor &result, const Descriptor &array,
+    ReferenceReductionOperation<std::int32_t>, const char *source, int line,
+    int dim, const Descriptor *mask = nullptr,
+    const std::int32_t *identity = nullptr, bool ordered = true);
+void RTDECL(ReduceInteger4DimValue)(Descriptor &result, const Descriptor &array,
+    ValueReductionOperation<std::int32_t>, const char *source, int line,
+    int dim, const Descriptor *mask = nullptr,
+    const std::int32_t *identity = nullptr, bool ordered = true);
+std::int64_t RTDECL(ReduceInteger8Ref)(const Descriptor &,
+    ReferenceReductionOperation<std::int64_t>, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr,
+    const std::int64_t *identity = nullptr, bool ordered = true);
+std::int64_t RTDECL(ReduceInteger8Value)(const Descriptor &,
+    ValueReductionOperation<std::int64_t>, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr,
+    const std::int64_t *identity = nullptr, bool ordered = true);
+void RTDECL(ReduceInteger8DimRef)(Descriptor &result, const Descriptor &array,
+    ReferenceReductionOperation<std::int64_t>, const char *source, int line,
+    int dim, const Descriptor *mask = nullptr,
+    const std::int64_t *identity = nullptr, bool ordered = true);
+void RTDECL(ReduceInteger8DimValue)(Descriptor &result, const Descriptor &array,
+    ValueReductionOperation<std::int64_t>, const char *source, int line,
+    int dim, const Descriptor *mask = nullptr,
+    const std::int64_t *identity = nullptr, bool ordered = true);
 #ifdef __SIZEOF_INT128__
-common::int128_t RTDECL(ReduceInteger16)(const Descriptor &,
-    ReductionOperation<common::int128_t>, const char *source, int line,
+common::int128_t RTDECL(ReduceInteger16Ref)(const Descriptor &,
+    ReferenceReductionOperation<common::int128_t>, const char *source, int line,
     int dim = 0, const Descriptor *mask = nullptr,
     const common::int128_t *identity = nullptr, bool ordered = true);
-void RTDECL(ReduceInteger16Dim)(Descriptor &result, const Descriptor &array,
-    ReductionOperation<common::int128_t>, const char *source, int line, int dim,
-    const Descriptor *mask = nullptr,
+common::int128_t RTDECL(ReduceInteger16Value)(const Descriptor &,
+    ValueReductionOperation<common::int128_t>, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr,
+    const common::int128_t *identity = nullptr, bool ordered = true);
+void RTDECL(ReduceInteger16DimRef)(Descriptor &result, const Descriptor &array,
+    ReferenceReductionOperation<common::int128_t>, const char *source, int line,
+    int dim, const Descriptor *mask = nullptr,
+    const common::int128_t *identity = nullptr, bool ordered = true);
+void RTDECL(ReduceInteger16DimValue)(Descriptor &result,
+    const Descriptor &array, ValueReductionOperation<common::int128_t>,
+    const char *source, int line, int dim, const Descriptor *mask = nullptr,
     const common::int128_t *identity = nullptr, bool ordered = true);
 #endif
 
 // REAL/COMPLEX(2 & 3) return 32-bit float results for the caller to downconvert
-float RTDECL(ReduceReal2)(const Descriptor &, ReductionOperation<float>,
-    const char *source, int line, int dim = 0, const Descriptor *mask = nullptr,
+float RTDECL(ReduceReal2Ref)(const Descriptor &,
+    ReferenceReductionOperation<float>, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr,
     const float *identity = nullptr, bool ordered = true);
-void RTDECL(ReduceReal2Dim)(Descriptor &result, const Descriptor &array,
-    ReductionOperation<float>, const char *source, int line, int dim,
+float RTDECL(ReduceReal2Value)(const Descriptor &,
+    ValueReductionOperation<float>, const char *source, int line, int dim = 0,
     const Descriptor *mask = nullptr, const float *identity = nullptr,
     bool ordered = true);
-float RTDECL(ReduceReal3)(const Descriptor &, ReductionOperation<float>,
-    const char *source, int line, int dim = 0, const Descriptor *mask = nullptr,
+void RTDECL(ReduceReal2DimRef)(Descriptor &result, const Descriptor &array,
+    ReferenceReductionOperation<float>, const char *source, int line, int dim,
+    const Descriptor *mask = nullptr, const float *identity = nullptr,
+    bool ordered = true);
+void RTDECL(ReduceReal2DimValue)(Descriptor &result, const Descriptor &array,
+    ValueReductionOperation<float>, const char *source, int line, int dim,
+    const Descriptor *mask = nullptr, const float *identity = nullptr,
+    bool ordered = true);
+float RTDECL(ReduceReal3Ref)(const Descriptor &,
+    ReferenceReductionOperation<float>, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr,
     const float *identity = nullptr, bool ordered = true);
-void RTDECL(ReduceReal3Dim)(Descriptor &result, const Descriptor &array,
-    ReductionOperation<float>, const char *source, int line, int dim,
+float RTDECL(ReduceReal3Value)(const Descriptor &,
+    ValueReductionOperation<float>, const char *source, int line, int dim = 0,
     const Descriptor *mask = nullptr, const float *identity = nullptr,
     bool ordered = true);
-float RTDECL(ReduceReal4)(const Descriptor &, ReductionOperation<float>,
-    const char *source, int line, int dim = 0, const Descriptor *mask = nullptr,
+void RTDECL(ReduceReal3DimRef)(Descriptor &result, const Descriptor &array,
+    ReferenceReductionOperation<float>, const char *source, int line, int dim,
+    const Descriptor *mask = nullptr, const float *identity = nullptr,
+    bool ordered = true);
+void RTDECL(ReduceReal3DimValue)(Descriptor &result, const Descriptor &array,
+    ValueReductionOperation<float>, const char *source, int line, int dim,
+    const Descriptor *mask = nullptr, const float *identity = nullptr,
+    bool ordered = true);
+float RTDECL(ReduceReal4Ref)(const Descriptor &,
+    ReferenceReductionOperation<float>, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr,
     const float *identity = nullptr, bool ordered = true);
-void RTDECL(ReduceReal4Dim)(Descriptor &result, const Descriptor &array,
-    ReductionOperation<float>, const char *source, int line, int dim,
+float RTDECL(ReduceReal4Value)(const Descriptor &,
+    ValueReductionOperation<float>, const char *source, int line, int dim = 0,
     const Descriptor *mask = nullptr, const float *identity = nullptr,
     bool ordered = true);
-double RTDECL(ReduceReal8)(const Descriptor &, ReductionOperation<double>,
-    const char *source, int line, int dim = 0, const Descriptor *mask = nullptr,
+void RTDECL(ReduceReal4DimRef)(Descriptor &result, const Descriptor &array,
+    ReferenceReductionOperation<float>, const char *source, int line, int dim,
+    const Descriptor *mask = nullptr, const float *identity = nullptr,
+    bool ordered = true);
+void RTDECL(ReduceReal4DimValue)(Descriptor &result, const Descriptor &array,
+    ValueReductionOperation<float>, const char *source, int line, int dim,
+    const Descriptor *mask = nullptr, const float *identity = nullptr,
+    bool ordered = true);
+double RTDECL(ReduceReal8Ref)(const Descriptor &,
+    ReferenceReductionOperation<double>, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr,
     const double *identity = nullptr, bool ordered = true);
-void RTDECL(ReduceReal8Dim)(Descriptor &result, const Descriptor &array,
-    ReductionOperation<double>, const char *source, int line, int dim,
+double RTDECL(ReduceReal8Value)(const Descriptor &,
+    ValueReductionOperation<double>, const char *source, int line, int dim = 0,
     const Descriptor *mask = nullptr, const double *identity = nullptr,
     bool ordered = true);
-#if LDBL_MANT_DIG == 64
-long double RTDECL(ReduceReal10)(const Descriptor &,
-    ReductionOperation<long double>, const char *source, int line, int dim = 0,
-    const Descriptor *mask = nullptr, const long double *identity = nullptr,
+void RTDECL(ReduceReal8DimRef)(Descriptor &result, const Descriptor &array,
+    ReferenceReductionOperation<double>, const char *source, int line, int dim,
+    const Descriptor *mask = nullptr, const double *identity = nullptr,
     bool ordered = true);
-void RTDECL(ReduceReal10Dim)(Descriptor &result, const Descriptor &array,
-    ReductionOperation<long double>, const char *source, int line, int dim,
+void RTDECL(ReduceReal8DimValue)(Descriptor &result, const Descriptor &array,
+    ValueReductionOperation<double>, const char *source, int line, int dim,
+    const Descriptor *mask = nullptr, const double *identity = nullptr,
+    bool ordered = true);
+#if LDBL_MANT_DIG == 64
+long double RTDECL(ReduceReal10Ref)(const Descriptor &,
+    ReferenceReductionOperation<long double>, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr,
+    const long double *identity = nullptr, bool ordered = true);
+long double RTDECL(ReduceReal10Value)(const Descriptor &,
+    ValueReductionOperation<long double>, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr,
+    const long double *identity = nullptr, bool ordered = true);
+void RTDECL(ReduceReal10DimRef)(Descriptor &result, const Descriptor &array,
+    ReferenceReductionOperation<long double>, const char *source, int line,
+    int dim, const Descriptor *mask = nullptr,
+    const long double *identity = nullptr, bool ordered = true);
+void RTDECL(ReduceReal10DimValue)(Descriptor &result, const Descriptor &array,
+    ValueReductionOperation<long double>, const char *source, int line, int dim,
     const Descriptor *mask = nullptr, const long double *identity = nullptr,
     bool ordered = true);
 #endif
 #if LDBL_MANT_DIG == 113 || HAS_FLOAT128
-CppFloat128Type RTDECL(ReduceReal16)(const Descriptor &,
-    ReductionOperation<CppFloat128Type>, const char *source, int line,
+CppFloat128Type RTDECL(ReduceReal16Ref)(const Descriptor &,
+    ReferenceReductionOperation<CppFloat128Type>, const char *source, int line,
     int dim = 0, const Descriptor *mask = nullptr,
     const CppFloat128Type *identity = nullptr, bool ordered = true);
-void RTDECL(ReduceReal16Dim)(Descriptor &result, const Descriptor &array,
-    ReductionOperation<CppFloat128Type>, const char *source, int line, int dim,
-    const Descriptor *mask = nullptr, const CppFloat128Type *identity = nullptr,
-    bool ordered = true);
+CppFloat128Type RTDECL(ReduceReal16Value)(const Descriptor &,
+    ValueReductionOperation<CppFloat128Type>, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr,
+    const CppFloat128Type *identity = nullptr, bool ordered = true);
+void RTDECL(ReduceReal16DimRef)(Descriptor &result, const Descriptor &array,
+    ReferenceReductionOperation<CppFloat128Type>, const char *source, int line,
+    int dim, const Descriptor *mask = nullptr,
+    const CppFloat128Type *identity = nullptr, bool ordered = true);
+void RTDECL(ReduceReal16DimValue)(Descriptor &result, const Descriptor &array,
+    ValueReductionOperation<CppFloat128Type>, const char *source, int line,
+    int dim, const Descriptor *mask = nullptr,
+    const CppFloat128Type *identity = nullptr, bool ordered = true);
 #endif
 
-void RTDECL(CppReduceComplex2)(std::complex<float> &, const Descriptor &,
-    ReductionOperation<std::complex<float>>, const char *source, int line,
+void RTDECL(CppReduceComplex2Ref)(std::complex<float> &, const Descriptor &,
+    ReferenceReductionOperation<std::complex<float>>, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr,
+    const std::complex<float> *identity = nullptr, bool ordered = true);
+void RTDECL(CppReduceComplex2Value)(std::complex<float> &, const Descriptor &,
+    ValueReductionOperation<std::complex<float>>, const char *source, int line,
     int dim = 0, const Descriptor *mask = nullptr,
     const std::complex<float> *identity = nullptr, bool ordered = true);
-void RTDECL(CppReduceComplex2Dim)(Descriptor &result, const Descriptor &array,
-    ReductionOperation<std::complex<float>>, const char *source, int line,
-    int dim, const Descriptor *mask = nullptr,
+void RTDECL(CppReduceComplex2DimRef)(Descriptor &result,
+    const Descriptor &array, ReferenceReductionOperation<std::complex<float>>,
+    const char *source, int line, int dim, const Descriptor *mask = nullptr,
+    const std::complex<float> *identity = nullptr, bool ordered = true);
+void RTDECL(CppReduceComplex2DimValue)(Descriptor &result,
+    const Descriptor &array, ValueReductionOperation<std::complex<float>>,
+    const char *source, int line, int dim, const Descriptor *mask = nullptr,
     const std::complex<float> *identity = nullptr, bool ordered = true);
-void RTDECL(CppReduceComplex3)(std::complex<float> &, const Descriptor &,
-    ReductionOperation<std::complex<float>>, const char *source, int line,
+void RTDECL(CppReduceComplex3Ref)(std::complex<float> &, const Descriptor &,
+    ReferenceReductionOperation<std::complex<float>>, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr,
+    const std::complex<float> *identity = nullptr, bool ordered = true);
+void RTDECL(CppReduceComplex3Value)(std::complex<float> &, const Descriptor &,
+    ValueReductionOperation<std::complex<float>>, const char *source, int line,
     int dim = 0, const Descriptor *mask = nullptr,
     const std::complex<float> *identity = nullptr, bool ordered = true);
-void RTDECL(CppReduceComplex3Dim)(Descriptor &result, const Descriptor &array,
-    ReductionOperation<std::complex<float>>, const char *source, int line,
-    int dim, const Descriptor *mask = nullptr,
+void RTDECL(CppReduceComplex3DimRef)(Descriptor &result,
+    const Descriptor &array, ReferenceReductionOperation<std::complex<float>>,
+    const char *source, int line, int dim, const Descriptor *mask = nullptr,
+    const std::complex<float> *identity = nullptr, bool ordered = true);
+void RTDECL(CppReduceComplex3DimValue)(Descriptor &result,
+    const Descriptor &array, ValueReductionOperation<std::complex<float>>,
+    const char *source, int line, int dim, const Descriptor *mask = nullptr,
     const std::complex<float> *identity = nullptr, bool ordered = true);
-void RTDECL(CppReduceComplex4)(std::complex<float> &, const Descriptor &,
-    ReductionOperation<std::complex<float>>, const char *source, int line,
+void RTDECL(CppReduceComplex4Ref)(std::complex<float> &, const Descriptor &,
+    ReferenceReductionOperation<std::complex<float>>, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr,
+    const std::complex<float> *identity = nullptr, bool ordered = true);
+void RTDECL(CppReduceComplex4Value)(std::complex<float> &, const Descriptor &,
+    ValueReductionOperation<std::complex<float>>, const char *source, int line,
     int dim = 0, const Descriptor *mask = nullptr,
     const std::complex<float> *identity = nullptr, bool ordered = true);
-void RTDECL(CppReduceComplex4Dim)(Descriptor &result, const Descriptor &array,
-    ReductionOperation<std::complex<float>>, const char *source, int line,
-    int dim, const Descriptor *mask = nullptr,
+void RTDECL(CppReduceComplex4DimRef)(Descriptor &result,
+    const Descriptor &array, ReferenceReductionOperation<std::complex<float>>,
+    const char *source, int line, int dim, const Descriptor *mask = nullptr,
     const std::complex<float> *identity = nullptr, bool ordered = true);
-void RTDECL(CppReduceComplex8)(std::complex<double> &, const Descriptor &,
-    ReductionOperation<std::complex<double>>, const char *source, int line,
+void RTDECL(CppReduceComplex4DimValue)(Descriptor &result,
+    const Descriptor &array, ValueReductionOperation<std::complex<float>>,
+    const char *source, int line, int dim, const Descriptor *mask = nullptr,
+    const std::complex<float> *identity = nullptr, bool ordered = true);
+void RTDECL(CppReduceComplex8Ref)(std::complex<double> &, const Descriptor &,
+    ReferenceReductionOperation<std::complex<double>>, const char *source,
+    int line, int dim = 0, const Descriptor *mask = nullptr,
+    const std::complex<double> *identity = nullptr, bool ordered = true);
+void RTDECL(CppReduceComplex8Value)(std::complex<double> &, const Descriptor &,
+    ValueReductionOperation<std::complex<double>>, const char *source, int line,
     int dim = 0, const Descriptor *mask = nullptr,
     const std::complex<double> *identity = nullptr, bool ordered = true);
-void RTDECL(CppReduceComplex8Dim)(Descriptor &result, const Descriptor &array,
-    ReductionOperation<std::complex<double>>, const char *source, int line,
-    int dim, const Descriptor *mask = nullptr,
+void RTDECL(CppReduceComplex8DimRef)(Descriptor &result,
+    const Descriptor &array, ReferenceReductionOperation<std::complex<double>>,
+    const char *source, int line, int dim, const Descriptor *mask = nullptr,
+    const std::complex<double> *identity = nullptr, bool ordered = true);
+void RTDECL(CppReduceComplex8DimValue)(Descriptor &result,
+    const Descriptor &array, ValueReductionOperation<std::complex<double>>,
+    const char *source, int line, int dim, const Descriptor *mask = nullptr,
     const std::complex<double> *identity = nullptr, bool ordered = true);
 #if LDBL_MANT_DIG == 64
-void RTDECL(CppReduceComplex10)(std::complex<long double> &, const Descriptor &,
-    ReductionOperation<std::complex<long double>>, const char *source, int line,
-    int dim = 0, const Descriptor *mask = nullptr,
+void RTDECL(CppReduceComplex10Ref)(std::complex<long double> &,
+    const Descriptor &, ReferenceReductionOperation<std::complex<long double>>,
+    const char *source, int line, int dim = 0, const Descriptor *mask = nullptr,
     const std::complex<long double> *identity = nullptr, bool ordered = true);
-void RTDECL(CppReduceComplex10Dim)(Descriptor &result, const Descriptor &array,
-    ReductionOperation<std::complex<long double>>, const char *source, int line,
-    int dim, const Descriptor *mask = nullptr,
+void RTDECL(CppReduceComplex10Value)(std::complex<long double> &,
+    const Descriptor &, ValueReductionOperation<std::complex<long double>>,
+    const char *source, int line, int dim = 0, const Descriptor *mask = nullptr,
+    const std::complex<long double> *identity = nullptr, bool ordered = true);
+void RTDECL(CppReduceComplex10DimRef)(Descriptor &result,
+    const Descriptor &array,
+    ReferenceReductionOperation<std::complex<long double>>, const char *source,
+    int line, int dim, const Descriptor *mask = nullptr,
+    const std::complex<long double> *identity = nullptr, bool ordered = true);
+void RTDECL(CppReduceComplex10DimValue)(Descriptor &result,
+    const Descriptor &array, ValueReductionOperation<std::complex<long double>>,
+    const char *source, int line, int dim, const Descriptor *mask = nullptr,
     const std::complex<long double> *identity = nullptr, bool ordered = true);
 #endif
 #if LDBL_MANT_DIG == 113 || HAS_FLOAT128
-void RTDECL(CppReduceComplex16)(std::complex<CppFloat128Type> &,
-    const Descriptor &, ReductionOperation<std::complex<CppFloat128Type>>,
+void RTDECL(CppReduceComplex16Ref)(std::complex<CppFloat128Type> &,
+    const Descriptor &,
+    ReferenceReductionOperation<std::complex<CppFloat128Type>>,
     const char *source, int line, int dim = 0, const Descriptor *mask = nullptr,
     const std::complex<CppFloat128Type> *identity = nullptr,
     bool ordered = true);
-void RTDECL(CppReduceComplex16Dim)(Descriptor &result, const Descriptor &array,
-    ReductionOperation<std::complex<CppFloat128Type>>, const char *source,
+void RTDECL(CppReduceComplex16Value)(std::complex<CppFloat128Type> &,
+    const Descriptor &, ValueReductionOperation<std::complex<CppFloat128Type>>,
+    const char *source, int line, int dim = 0, const Descriptor *mask = nullptr,
+    const std::complex<CppFloat128Type> *identity = nullptr,
+    bool ordered = true);
+void RTDECL(CppReduceComplex16DimRef)(Descriptor &result,
+    const Descriptor &array,
+    ReferenceReductionOperation<std::complex<CppFloat128Type>>,
+    const char *source, int line, int dim, const Descriptor *mask = nullptr,
+    const std::complex<CppFloat128Type> *identity = nullptr,
+    bool ordered = true);
+void RTDECL(CppReduceComplex16DimValue)(Descriptor &result,
+    const Descriptor &array,
+    ValueReductionOperation<std::complex<CppFloat128Type>>, const char *source,
     int line, int dim, const Descriptor *mask = nullptr,
     const std::complex<CppFloat128Type> *identity = nullptr,
     bool ordered = true);
 #endif
 
-bool RTDECL(ReduceLogical1)(const Descriptor &, ReductionOperation<std::int8_t>,
-    const char *source, int line, int dim = 0, const Descriptor *mask = nullptr,
+bool RTDECL(ReduceLogical1Ref)(const Descriptor &,
+    ReferenceReductionOperation<std::int8_t>, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr,
+    const std::int8_t *identity = nullptr, bool ordered = true);
+bool RTDECL(ReduceLogical1Value)(const Descriptor &,
+    ValueReductionOperation<std::int8_t>, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr,
+    const std::int8_t *identity = nullptr, bool ordered = true);
+void RTDECL(ReduceLogical1DimRef)(Descriptor &result, const Descriptor &array,
+    ReferenceReductionOperation<std::int8_t>, const char *source, int line,
+    int dim, const Descriptor *mask = nullptr,
     const std::int8_t *identity = nullptr, bool ordered = true);
-void RTDECL(ReduceLogical1Dim)(Descriptor &result, const Descriptor &array,
-    ReductionOperation<std::int8_t>, const char *source, int line, int dim,
+void RTDECL(ReduceLogical1DimValue)(Descriptor &result, const Descriptor &array,
+    ValueReductionOperation<std::int8_t>, const char *source, int line, int dim,
     const Descriptor *mask = nullptr, const std::int8_t *identity = nullptr,
     bool ordered = true);
-bool RTDECL(ReduceLogical2)(const Descriptor &,
-    ReductionOperation<std::int16_t>, const char *source, int line, int dim = 0,
-    const Descriptor *mask = nullptr, const std::int16_t *identity = nullptr,
-    bool ordered = true);
-void RTDECL(ReduceLogical2Dim)(Descriptor &result, const Descriptor &array,
-    ReductionOperation<std::int16_t>, const char *source, int line, int dim,
-    const Descriptor *mask = nullptr, const std::int16_t *identity = nullptr,
-    bool ordered = true);
-bool RTDECL(ReduceLogical4)(const Descriptor &,
-    ReductionOperation<std::int32_t>, const char *source, int line, int dim = 0,
-    const Descriptor *mask = nullptr, const std::int32_t *identity = nullptr,
-    bool ordered = true);
-void RTDECL(ReduceLogical4Dim)(Descriptor &result, const Descriptor &array,
-    ReductionOperation<std::int32_t>, const char *source, int line, int dim,
-    const Descriptor *mask = nullptr, const std::int32_t *identity = nullptr,
-    bool ordered = true);
-bool RTDECL(ReduceLogical8)(const Descriptor &,
-    ReductionOperation<std::int64_t>, const char *source, int line, int dim = 0,
-    const Descriptor *mask = nullptr, const std::int64_t *identity = nullptr,
-    bool ordered = true);
-void RTDECL(ReduceLogical8Dim)(Descriptor &result, const Descriptor &array,
-    ReductionOperation<std::int64_t>, const char *source, int line, int dim,
-    const Descriptor *mask = nullptr, const std::int64_t *identity = nullptr,
-    bool ordered = true);
+bool RTDECL(ReduceLogical2Ref)(const Descriptor &,
+    ReferenceReductionOperation<std::int16_t>, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr,
+    const std::int16_t *identity = nullptr, bool ordered = true);
+bool RTDECL(ReduceLogical2Value)(const Descriptor &,
+    ValueReductionOperation<std::int16_t>, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr,
+    const std::int16_t *identity = nullptr, bool ordered = true);
+void RTDECL(ReduceLogical2DimRef)(Descriptor &result, const Descriptor &array,
+    ReferenceReductionOperation<std::int16_t>, const char *source, int line,
+    int dim, const Descriptor *mask = nullptr,
+    const std::int16_t *identity = nullptr, bool ordered = true);
+void RTDECL(ReduceLogical2DimValue)(Descriptor &result, const Descriptor &array,
+    ValueReductionOperation<std::int16_t>, const char *source, int line,
+    int dim, const Descriptor *mask = nullptr,
+    const std::int16_t *identity = nullptr, bool ordered = true);
+bool RTDECL(ReduceLogical4Ref)(const Descriptor &,
+    ReferenceReductionOperation<std::int32_t>, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr,
+    const std::int32_t *identity = nullptr, bool ordered = true);
+bool RTDECL(ReduceLogical4Value)(const Descriptor &,
+    ValueReductionOperation<std::int32_t>, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr,
+    const std::int32_t *identity = nullptr, bool ordered = true);
+void RTDECL(ReduceLogical4DimRef)(Descriptor &result, const Descriptor &array,
+    ReferenceReductionOperation<std::int32_t>, const char *source, int line,
+    int dim, const Descriptor *mask = nullptr,
+    const std::int32_t *identity = nullptr, bool ordered = true);
+void RTDECL(ReduceLogical4DimValue)(Descriptor &result, const Descriptor &array,
+    ValueReductionOperation<std::int32_t>, const char *source, int line,
+    int dim, const Descriptor *mask = nullptr,
+    const std::int32_t *identity = nullptr, bool ordered = true);
+bool RTDECL(ReduceLogical8Ref)(const Descriptor &,
+    ReferenceReductionOperation<std::int64_t>, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr,
+    const std::int64_t *identity = nullptr, bool ordered = true);
+bool RTDECL(ReduceLogical8Value)(const Descriptor &,
+    ValueReductionOperation<std::int64_t>, const char *source, int line,
+    int dim = 0, const Descriptor *mask = nullptr,
+    const std::int64_t *identity = nullptr, bool ordered = true);
+void RTDECL(ReduceLogical8DimRef)(Descriptor &result, const Descriptor &array,
+    ReferenceReductionOperation<std::int64_t>, const char *source, int line,
+    int dim, const Descriptor *mask = nullptr,
+    const std::int64_t *identity = nullptr, bool ordered = true);
+void RTDECL(ReduceLogical8DimValue)(Descriptor &result, const Descriptor &array,
+    ValueReductionOperation<std::int64_t>, const char *source, int line,
+    int dim, const Descriptor *mask = nullptr,
+    const std::int64_t *identity = nullptr, bool ordered = true);
 
 void RTDECL(ReduceChar1)(char *result, const Descriptor &array,
     ReductionCharOperation<char>, const char *source, int line, int dim = 0,
diff --git a/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp b/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp
index 4b086a98de47b..c306b50eb5698 100644
--- a/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp
+++ b/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp
@@ -469,7 +469,8 @@ struct ForcedIParity16 {
 
 /// Placeholder for real*10 version of Reduce Intrinsic
 struct ForcedReduceReal10 {
-  static constexpr const char *name = ExpandAndQuoteKey(RTNAME(ReduceReal10));
+  static constexpr const char *name =
+      ExpandAndQuoteKey(RTNAME(ReduceReal10Ref));
   static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() {
     return [](mlir::MLIRContext *ctx) {
       auto ty = mlir::FloatType::getF80(ctx);
@@ -488,7 +489,8 @@ struct ForcedReduceReal10 {
 
 /// Placeholder for real*16 version of Reduce Intrinsic
 struct ForcedReduceReal16 {
-  static constexpr const char *name = ExpandAndQuoteKey(RTNAME(ReduceReal16));
+  static constexpr const char *name =
+      ExpandAndQuoteKey(RTNAME(ReduceReal16Ref));
   static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() {
     return [](mlir::MLIRContext *ctx) {
       auto ty = mlir::FloatType::getF128(ctx);
@@ -508,7 +510,7 @@ struct ForcedReduceReal16 {
 /// Placeholder for DIM real*10 version of Reduce Intrinsic
 struct ForcedReduceReal10Dim {
   static constexpr const char *name =
-      ExpandAndQuoteKey(RTNAME(ReduceReal10Dim));
+      ExpandAndQuoteKey(RTNAME(ReduceReal10DimRef));
   static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() {
     return [](mlir::MLIRContext *ctx) {
       auto ty = mlir::FloatType::getF80(ctx);
@@ -530,7 +532,7 @@ struct ForcedReduceReal10Dim {
 /// Placeholder for DIM real*16 version of Reduce Intrinsic
 struct ForcedReduceReal16Dim {
   static constexpr const char *name =
-      ExpandAndQuoteKey(RTNAME(ReduceReal16Dim));
+      ExpandAndQuoteKey(RTNAME(ReduceReal16DimRef));
   static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() {
     return [](mlir::MLIRContext *ctx) {
       auto ty = mlir::FloatType::getF128(ctx);
@@ -552,7 +554,7 @@ struct ForcedReduceReal16Dim {
 /// Placeholder for integer*16 version of Reduce Intrinsic
 struct ForcedReduceInteger16 {
   static constexpr const char *name =
-      ExpandAndQuoteKey(RTNAME(ReduceInteger16));
+      ExpandAndQuoteKey(RTNAME(ReduceInteger16Ref));
   static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() {
     return [](mlir::MLIRContext *ctx) {
       auto ty = mlir::IntegerType::get(ctx, 128);
@@ -572,7 +574,7 @@ struct ForcedReduceInteger16 {
 /// Placeholder for DIM integer*16 version of Reduce Intrinsic
 struct ForcedReduceInteger16Dim {
   static constexpr const char *name =
-      ExpandAndQuoteKey(RTNAME(ReduceInteger16Dim));
+      ExpandAndQuoteKey(RTNAME(ReduceInteger16DimRef));
   static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() {
     return [](mlir::MLIRContext *ctx) {
       auto ty = mlir::IntegerType::get(ctx, 128);
@@ -594,7 +596,7 @@ struct ForcedReduceInteger16Dim {
 /// Placeholder for complex(10) version of Reduce Intrinsic
 struct ForcedReduceComplex10 {
   static constexpr const char *name =
-      ExpandAndQuoteKey(RTNAME(CppReduceComplex10));
+      ExpandAndQuoteKey(RTNAME(CppReduceComplex10Ref));
   static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() {
     return [](mlir::MLIRContext *ctx) {
       auto ty = mlir::ComplexType::get(mlir::FloatType::getF80(ctx));
@@ -615,7 +617,7 @@ struct ForcedReduceComplex10 {
 /// Placeholder for Dim complex(10) version of Reduce Intrinsic
 struct ForcedReduceComplex10Dim {
   static constexpr const char *name =
-      ExpandAndQuoteKey(RTNAME(CppReduceComplex10Dim));
+      ExpandAndQuoteKey(RTNAME(CppReduceComplex10DimRef));
   static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() {
     return [](mlir::MLIRContext *ctx) {
       auto ty = mlir::ComplexType::get(mlir::FloatType::getF80(ctx));
@@ -637,7 +639,7 @@ struct ForcedReduceComplex10Dim {
 /// Placeholder for complex(16) version of Reduce Intrinsic
 struct ForcedReduceComplex16 {
   static constexpr const char *name =
-      ExpandAndQuoteKey(RTNAME(CppReduceComplex16));
+      ExpandAndQuoteKey(RTNAME(CppReduceComplex16Ref));
   static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() {
     return [](mlir::MLIRContext *ctx) {
       auto ty = mlir::ComplexType::get(mlir::FloatType::getF128(ctx));
@@ -658,7 +660,7 @@ struct ForcedReduceComplex16 {
 /// Placeholder for Dim complex(16) version of Reduce Intrinsic
 struct ForcedReduceComplex16Dim {
   static constexpr const char *name =
-      ExpandAndQuoteKey(RTNAME(CppReduceComplex16Dim));
+      ExpandAndQuoteKey(RTNAME(CppReduceComplex16DimRef));
   static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() {
     return [](mlir::MLIRContext *ctx) {
       auto ty = mlir::ComplexType::get(mlir::FloatType::getF128(ctx));
@@ -1471,17 +1473,17 @@ void fir::runtime::genReduce(fir::FirOpBuilder &builder, mlir::Location loc,
   fir::factory::CharacterExprHelper charHelper{builder, loc};
 
   if (eleTy == fir::ComplexType::get(ctx, 2))
-    func =
-        fir::runtime::getRuntimeFunc<mkRTKey(CppReduceComplex2)>(loc, builder);
+    func = fir::runtime::getRuntimeFunc<mkRTKey(CppReduceComplex2Ref)>(loc,
+                                                                       builder);
   else if (eleTy == fir::ComplexType::get(ctx, 3))
-    func =
-        fir::runtime::getRuntimeFunc<mkRTKey(CppReduceComplex3)>(loc, builder);
+    func = fir::runtime::getRuntimeFunc<mkRTKey(CppReduceComplex3Ref)>(loc,
+                                                                       builder);
   else if (eleTy == fir::ComplexType::get(ctx, 4))
-    func =
-        fir::runtime::getRuntimeFunc<mkRTKey(CppReduceComplex4)>(loc, builder);
+    func = fir::runtime::getRuntimeFunc<mkRTKey(CppReduceComplex4Ref)>(loc,
+                                                                       builder);
   else if (eleTy == fir::ComplexType::get(ctx, 8))
-    func =
-        fir::runtime::getRuntimeFunc<mkRTKey(CppReduceComplex8)>(loc, builder);
+    func = fir::runtime::getRuntimeFunc<mkRTKey(CppReduceComplex8Ref)>(loc,
+                                                                       builder);
   else if (eleTy == fir::ComplexType::get(ctx, 10))
     func = fir::runtime::getRuntimeFunc<ForcedReduceComplex10>(loc, builder);
   else if (eleTy == fir::ComplexType::get(ctx, 16))
@@ -1529,35 +1531,43 @@ mlir::Value fir::runtime::genReduce(fir::FirOpBuilder &builder,
          "expect real, interger or logical");
 
   if (eleTy.isF16())
-    func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceReal2)>(loc, builder);
+    func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceReal2Ref)>(loc, builder);
   else if (eleTy.isBF16())
-    func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceReal3)>(loc, builder);
+    func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceReal3Ref)>(loc, builder);
   else if (eleTy.isF32())
-    func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceReal4)>(loc, builder);
+    func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceReal4Ref)>(loc, builder);
   else if (eleTy.isF64())
-    func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceReal8)>(loc, builder);
+    func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceReal8Ref)>(loc, builder);
   else if (eleTy.isF80())
     func = fir::runtime::getRuntimeFunc<ForcedReduceReal10>(loc, builder);
   else if (eleTy.isF128())
     func = fir::runtime::getRuntimeFunc<ForcedReduceReal16>(loc, builder);
   else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(1)))
-    func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceInteger1)>(loc, builder);
+    func =
+        fir::runtime::getRuntimeFunc<mkRTKey(ReduceInteger1Ref)>(loc, builder);
   else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(2)))
-    func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceInteger2)>(loc, builder);
+    func =
+        fir::runtime::getRuntimeFunc<mkRTKey(ReduceInteger2Ref)>(loc, builder);
   else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(4)))
-    func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceInteger4)>(loc, builder);
+    func =
+        fir::runtime::getRuntimeFunc<mkRTKey(ReduceInteger4Ref)>(loc, builder);
   else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(8)))
-    func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceInteger8)>(loc, builder);
+    func =
+        fir::runtime::getRuntimeFunc<mkRTKey(ReduceInteger8Ref)>(loc, builder);
   else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(16)))
     func = fir::runtime::getRuntimeFunc<ForcedReduceInteger16>(loc, builder);
   else if (eleTy == fir::LogicalType::get(ctx, 1))
-    func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceLogical1)>(loc, builder);
+    func =
+        fir::runtime::getRuntimeFunc<mkRTKey(ReduceLogical1Ref)>(loc, builder);
   else if (eleTy == fir::LogicalType::get(ctx, 2))
-    func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceLogical2)>(loc, builder);
+    func =
+        fir::runtime::getRuntimeFunc<mkRTKey(ReduceLogical2Ref)>(loc, builder);
   else if (eleTy == fir::LogicalType::get(ctx, 4))
-    func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceLogical4)>(loc, builder);
+    func =
+        fir::runtime::getRuntimeFunc<mkRTKey(ReduceLogical4Ref)>(loc, builder);
   else if (eleTy == fir::LogicalType::get(ctx, 8))
-    func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceLogical8)>(loc, builder);
+    func =
+        fir::runtime::getRuntimeFunc<mkRTKey(ReduceLogical8Ref)>(loc, builder);
   else
     fir::intrinsicTypeTODO(builder, eleTy, loc, "REDUCE");
 
@@ -1586,59 +1596,63 @@ void fir::runtime::genReduceDim(fir::FirOpBuilder &builder, mlir::Location loc,
   fir::factory::CharacterExprHelper charHelper{builder, loc};
 
   if (eleTy.isF16())
-    func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceReal2Dim)>(loc, builder);
+    func =
+        fir::runtime::getRuntimeFunc<mkRTKey(ReduceReal2DimRef)>(loc, builder);
   else if (eleTy.isBF16())
-    func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceReal3Dim)>(loc, builder);
+    func =
+        fir::runtime::getRuntimeFunc<mkRTKey(ReduceReal3DimRef)>(loc, builder);
   else if (eleTy.isF32())
-    func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceReal4Dim)>(loc, builder);
+    func =
+        fir::runtime::getRuntimeFunc<mkRTKey(ReduceReal4DimRef)>(loc, builder);
   else if (eleTy.isF64())
-    func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceReal8Dim)>(loc, builder);
+    func =
+        fir::runtime::getRuntimeFunc<mkRTKey(ReduceReal8DimRef)>(loc, builder);
   else if (eleTy.isF80())
     func = fir::runtime::getRuntimeFunc<ForcedReduceReal10Dim>(loc, builder);
   else if (eleTy.isF128())
     func = fir::runtime::getRuntimeFunc<ForcedReduceReal16Dim>(loc, builder);
   else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(1)))
-    func =
-        fir::runtime::getRuntimeFunc<mkRTKey(ReduceInteger1Dim)>(loc, builder);
+    func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceInteger1DimRef)>(loc,
+                                                                       builder);
   else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(2)))
-    func =
-        fir::runtime::getRuntimeFunc<mkRTKey(ReduceInteger2Dim)>(loc, builder);
+    func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceInteger2DimRef)>(loc,
+                                                                       builder);
   else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(4)))
-    func =
-        fir::runtime::getRuntimeFunc<mkRTKey(ReduceInteger4Dim)>(loc, builder);
+    func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceInteger4DimRef)>(loc,
+                                                                       builder);
   else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(8)))
-    func =
-        fir::runtime::getRuntimeFunc<mkRTKey(ReduceInteger8Dim)>(loc, builder);
+    func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceInteger8DimRef)>(loc,
+                                                                       builder);
   else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(16)))
     func = fir::runtime::getRuntimeFunc<ForcedReduceInteger16Dim>(loc, builder);
   else if (eleTy == fir::ComplexType::get(ctx, 2))
-    func = fir::runtime::getRuntimeFunc<mkRTKey(CppReduceComplex2Dim)>(loc,
-                                                                       builder);
+    func = fir::runtime::getRuntimeFunc<mkRTKey(CppReduceComplex2DimRef)>(
+        loc, builder);
   else if (eleTy == fir::ComplexType::get(ctx, 3))
-    func = fir::runtime::getRuntimeFunc<mkRTKey(CppReduceComplex3Dim)>(loc,
-                                                                       builder);
+    func = fir::runtime::getRuntimeFunc<mkRTKey(CppReduceComplex3DimRef)>(
+        loc, builder);
   else if (eleTy == fir::ComplexType::get(ctx, 4))
-    func = fir::runtime::getRuntimeFunc<mkRTKey(CppReduceComplex4Dim)>(loc,
-                                                                       builder);
+    func = fir::runtime::getRuntimeFunc<mkRTKey(CppReduceComplex4DimRef)>(
+        loc, builder);
   else if (eleTy == fir::ComplexType::get(ctx, 8))
-    func = fir::runtime::getRuntimeFunc<mkRTKey(CppReduceComplex8Dim)>(loc,
-                                                                       builder);
+    func = fir::runtime::getRuntimeFunc<mkRTKey(CppReduceComplex8DimRef)>(
+        loc, builder);
   else if (eleTy == fir::ComplexType::get(ctx, 10))
     func = fir::runtime::getRuntimeFunc<ForcedReduceComplex10Dim>(loc, builder);
   else if (eleTy == fir::ComplexType::get(ctx, 16))
     func = fir::runtime::getRuntimeFunc<ForcedReduceComplex16Dim>(loc, builder);
   else if (eleTy == fir::LogicalType::get(ctx, 1))
-    func =
-        fir::runtime::getRuntimeFunc<mkRTKey(ReduceLogical1Dim)>(loc, builder);
+    func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceLogical1DimRef)>(loc,
+                                                                       builder);
   else if (eleTy == fir::LogicalType::get(ctx, 2))
-    func =
-        fir::runtime::getRuntimeFunc<mkRTKey(ReduceLogical2Dim)>(loc, builder);
+    func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceLogical2DimRef)>(loc,
+                                                                       builder);
   else if (eleTy == fir::LogicalType::get(ctx, 4))
-    func =
-        fir::runtime::getRuntimeFunc<mkRTKey(ReduceLogical4Dim)>(loc, builder);
+    func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceLogical4DimRef)>(loc,
+                                                                       builder);
   else if (eleTy == fir::LogicalType::get(ctx, 8))
-    func =
-        fir::runtime::getRuntimeFunc<mkRTKey(ReduceLogical8Dim)>(loc, builder);
+    func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceLogical8DimRef)>(loc,
+                                                                       builder);
   else if (fir::isa_char(eleTy) && charHelper.getCharacterKind(eleTy) == 1)
     func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceCharacter1Dim)>(loc,
                                                                       builder);
diff --git a/flang/runtime/reduce.cpp b/flang/runtime/reduce.cpp
index f8a5221a1ebf7..2f4bb6ea159cf 100644
--- a/flang/runtime/reduce.cpp
+++ b/flang/runtime/reduce.cpp
@@ -16,11 +16,12 @@
 
 namespace Fortran::runtime {
 
-template <typename T> class ReduceAccumulator {
+template <typename T, bool isByValue> class ReduceAccumulator {
 public:
-  RT_API_ATTRS ReduceAccumulator(const Descriptor &array,
-      ReductionOperation<T> operation, const T *identity,
-      Terminator &terminator)
+  using Operation = std::conditional_t<isByValue, ValueReductionOperation<T>,
+      ReferenceReductionOperation<T>>;
+  RT_API_ATTRS ReduceAccumulator(const Descriptor &array, Operation operation,
+      const T *identity, Terminator &terminator)
       : array_{array}, operation_{operation}, identity_{identity},
         terminator_{terminator} {}
   RT_API_ATTRS void Reinitialize() { result_.reset(); }
@@ -28,7 +29,11 @@ template <typename T> class ReduceAccumulator {
   RT_API_ATTRS bool AccumulateAt(const SubscriptValue at[]) {
     const auto *operand{array_.Element<A>(at)};
     if (result_) {
-      result_ = operation_(&*result_, operand);
+      if constexpr (isByValue) {
+        result_ = operation_(*result_, *operand);
+      } else {
+        result_ = operation_(&*result_, operand);
+      }
     } else {
       result_ = *operand;
     }
@@ -48,7 +53,7 @@ template <typename T> class ReduceAccumulator {
 private:
   const Descriptor &array_;
   common::optional<T> result_;
-  ReductionOperation<T> operation_;
+  Operation operation_;
   const T *identity_{nullptr};
   Terminator &terminator_;
 };
@@ -104,104 +109,213 @@ class BufferedReduceAccumulator {
 extern "C" {
 RT_EXT_API_GROUP_BEGIN
 
-std::int8_t RTDEF(ReduceInteger1)(const Descriptor &array,
-    ReductionOperation<std::int8_t> operation, const char *source, int line,
-    int dim, const Descriptor *mask, const std::int8_t *identity,
+std::int8_t RTDEF(ReduceInteger1Ref)(const Descriptor &array,
+    ReferenceReductionOperation<std::int8_t> operation, const char *source,
+    int line, int dim, const Descriptor *mask, const std::int8_t *identity,
+    bool ordered) {
+  Terminator terminator{source, line};
+  return GetTotalReduction<TypeCategory::Integer, 1>(array, source, line, dim,
+      mask,
+      ReduceAccumulator<std::int8_t, false>{
+          array, operation, identity, terminator},
+      "REDUCE");
+}
+std::int8_t RTDEF(ReduceInteger1Value)(const Descriptor &array,
+    ValueReductionOperation<std::int8_t> operation, const char *source,
+    int line, int dim, const Descriptor *mask, const std::int8_t *identity,
     bool ordered) {
   Terminator terminator{source, line};
   return GetTotalReduction<TypeCategory::Integer, 1>(array, source, line, dim,
       mask,
-      ReduceAccumulator<std::int8_t>{array, operation, identity, terminator},
+      ReduceAccumulator<std::int8_t, true>{
+          array, operation, identity, terminator},
       "REDUCE");
 }
-void RTDEF(ReduceInteger1Dim)(Descriptor &result, const Descriptor &array,
-    ReductionOperation<std::int8_t> operation, const char *source, int line,
-    int dim, const Descriptor *mask, const std::int8_t *identity,
+void RTDEF(ReduceInteger1DimRef)(Descriptor &result, const Descriptor &array,
+    ReferenceReductionOperation<std::int8_t> operation, const char *source,
+    int line, int dim, const Descriptor *mask, const std::int8_t *identity,
+    bool ordered) {
+  Terminator terminator{source, line};
+  using Accumulator = ReduceAccumulator<std::int8_t, false>;
+  Accumulator accumulator{array, operation, identity, terminator};
+  PartialReduction<Accumulator, TypeCategory::Integer, 1>(result, array,
+      array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator);
+}
+void RTDEF(ReduceInteger1DimValue)(Descriptor &result, const Descriptor &array,
+    ValueReductionOperation<std::int8_t> operation, const char *source,
+    int line, int dim, const Descriptor *mask, const std::int8_t *identity,
     bool ordered) {
   Terminator terminator{source, line};
-  using Accumulator = ReduceAccumulator<std::int8_t>;
+  using Accumulator = ReduceAccumulator<std::int8_t, true>;
   Accumulator accumulator{array, operation, identity, terminator};
   PartialReduction<Accumulator, TypeCategory::Integer, 1>(result, array,
       array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator);
 }
-std::int16_t RTDEF(ReduceInteger2)(const Descriptor &array,
-    ReductionOperation<std::int16_t> operation, const char *source, int line,
-    int dim, const Descriptor *mask, const std::int16_t *identity,
+std::int16_t RTDEF(ReduceInteger2Ref)(const Descriptor &array,
+    ReferenceReductionOperation<std::int16_t> operation, const char *source,
+    int line, int dim, const Descriptor *mask, const std::int16_t *identity,
+    bool ordered) {
+  Terminator terminator{source, line};
+  return GetTotalReduction<TypeCategory::Integer, 2>(array, source, line, dim,
+      mask,
+      ReduceAccumulator<std::int16_t, false>{
+          array, operation, identity, terminator},
+      "REDUCE");
+}
+std::int16_t RTDEF(ReduceInteger2Value)(const Descriptor &array,
+    ValueReductionOperation<std::int16_t> operation, const char *source,
+    int line, int dim, const Descriptor *mask, const std::int16_t *identity,
     bool ordered) {
   Terminator terminator{source, line};
   return GetTotalReduction<TypeCategory::Integer, 2>(array, source, line, dim,
       mask,
-      ReduceAccumulator<std::int16_t>{array, operation, identity, terminator},
+      ReduceAccumulator<std::int16_t, true>{
+          array, operation, identity, terminator},
       "REDUCE");
 }
-void RTDEF(ReduceInteger2Dim)(Descriptor &result, const Descriptor &array,
-    ReductionOperation<std::int16_t> operation, const char *source, int line,
-    int dim, const Descriptor *mask, const std::int16_t *identity,
+void RTDEF(ReduceInteger2DimRef)(Descriptor &result, const Descriptor &array,
+    ReferenceReductionOperation<std::int16_t> operation, const char *source,
+    int line, int dim, const Descriptor *mask, const std::int16_t *identity,
+    bool ordered) {
+  Terminator terminator{source, line};
+  using Accumulator = ReduceAccumulator<std::int16_t, false>;
+  Accumulator accumulator{array, operation, identity, terminator};
+  PartialReduction<Accumulator, TypeCategory::Integer, 2>(result, array,
+      array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator);
+}
+void RTDEF(ReduceInteger2DimValue)(Descriptor &result, const Descriptor &array,
+    ValueReductionOperation<std::int16_t> operation, const char *source,
+    int line, int dim, const Descriptor *mask, const std::int16_t *identity,
     bool ordered) {
   Terminator terminator{source, line};
-  using Accumulator = ReduceAccumulator<std::int16_t>;
+  using Accumulator = ReduceAccumulator<std::int16_t, true>;
   Accumulator accumulator{array, operation, identity, terminator};
   PartialReduction<Accumulator, TypeCategory::Integer, 2>(result, array,
       array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator);
 }
-std::int32_t RTDEF(ReduceInteger4)(const Descriptor &array,
-    ReductionOperation<std::int32_t> operation, const char *source, int line,
-    int dim, const Descriptor *mask, const std::int32_t *identity,
+std::int32_t RTDEF(ReduceInteger4Ref)(const Descriptor &array,
+    ReferenceReductionOperation<std::int32_t> operation, const char *source,
+    int line, int dim, const Descriptor *mask, const std::int32_t *identity,
+    bool ordered) {
+  Terminator terminator{source, line};
+  return GetTotalReduction<TypeCategory::Integer, 4>(array, source, line, dim,
+      mask,
+      ReduceAccumulator<std::int32_t, false>{
+          array, operation, identity, terminator},
+      "REDUCE");
+}
+std::int32_t RTDEF(ReduceInteger4Value)(const Descriptor &array,
+    ValueReductionOperation<std::int32_t> operation, const char *source,
+    int line, int dim, const Descriptor *mask, const std::int32_t *identity,
     bool ordered) {
   Terminator terminator{source, line};
   return GetTotalReduction<TypeCategory::Integer, 4>(array, source, line, dim,
       mask,
-      ReduceAccumulator<std::int32_t>{array, operation, identity, terminator},
+      ReduceAccumulator<std::int32_t, true>{
+          array, operation, identity, terminator},
       "REDUCE");
 }
-void RTDEF(ReduceInteger4Dim)(Descriptor &result, const Descriptor &array,
-    ReductionOperation<std::int32_t> operation, const char *source, int line,
-    int dim, const Descriptor *mask, const std::int32_t *identity,
+void RTDEF(ReduceInteger4DimRef)(Descriptor &result, const Descriptor &array,
+    ReferenceReductionOperation<std::int32_t> operation, const char *source,
+    int line, int dim, const Descriptor *mask, const std::int32_t *identity,
+    bool ordered) {
+  Terminator terminator{source, line};
+  using Accumulator = ReduceAccumulator<std::int32_t, false>;
+  Accumulator accumulator{array, operation, identity, terminator};
+  PartialReduction<Accumulator, TypeCategory::Integer, 4>(result, array,
+      array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator);
+}
+void RTDEF(ReduceInteger4DimValue)(Descriptor &result, const Descriptor &array,
+    ValueReductionOperation<std::int32_t> operation, const char *source,
+    int line, int dim, const Descriptor *mask, const std::int32_t *identity,
     bool ordered) {
   Terminator terminator{source, line};
-  using Accumulator = ReduceAccumulator<std::int32_t>;
+  using Accumulator = ReduceAccumulator<std::int32_t, true>;
   Accumulator accumulator{array, operation, identity, terminator};
   PartialReduction<Accumulator, TypeCategory::Integer, 4>(result, array,
       array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator);
 }
-std::int64_t RTDEF(ReduceInteger8)(const Descriptor &array,
-    ReductionOperation<std::int64_t> operation, const char *source, int line,
-    int dim, const Descriptor *mask, const std::int64_t *identity,
+std::int64_t RTDEF(ReduceInteger8Ref)(const Descriptor &array,
+    ReferenceReductionOperation<std::int64_t> operation, const char *source,
+    int line, int dim, const Descriptor *mask, const std::int64_t *identity,
+    bool ordered) {
+  Terminator terminator{source, line};
+  return GetTotalReduction<TypeCategory::Integer, 8>(array, source, line, dim,
+      mask,
+      ReduceAccumulator<std::int64_t, false>{
+          array, operation, identity, terminator},
+      "REDUCE");
+}
+std::int64_t RTDEF(ReduceInteger8Value)(const Descriptor &array,
+    ValueReductionOperation<std::int64_t> operation, const char *source,
+    int line, int dim, const Descriptor *mask, const std::int64_t *identity,
     bool ordered) {
   Terminator terminator{source, line};
   return GetTotalReduction<TypeCategory::Integer, 8>(array, source, line, dim,
       mask,
-      ReduceAccumulator<std::int64_t>{array, operation, identity, terminator},
+      ReduceAccumulator<std::int64_t, true>{
+          array, operation, identity, terminator},
       "REDUCE");
 }
-void RTDEF(ReduceInteger8Dim)(Descriptor &result, const Descriptor &array,
-    ReductionOperation<std::int64_t> operation, const char *source, int line,
-    int dim, const Descriptor *mask, const std::int64_t *identity,
+void RTDEF(ReduceInteger8DimRef)(Descriptor &result, const Descriptor &array,
+    ReferenceReductionOperation<std::int64_t> operation, const char *source,
+    int line, int dim, const Descriptor *mask, const std::int64_t *identity,
+    bool ordered) {
+  Terminator terminator{source, line};
+  using Accumulator = ReduceAccumulator<std::int64_t, false>;
+  Accumulator accumulator{array, operation, identity, terminator};
+  PartialReduction<Accumulator, TypeCategory::Integer, 8>(result, array,
+      array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator);
+}
+void RTDEF(ReduceInteger8DimValue)(Descriptor &result, const Descriptor &array,
+    ValueReductionOperation<std::int64_t> operation, const char *source,
+    int line, int dim, const Descriptor *mask, const std::int64_t *identity,
     bool ordered) {
   Terminator terminator{source, line};
-  using Accumulator = ReduceAccumulator<std::int64_t>;
+  using Accumulator = ReduceAccumulator<std::int64_t, true>;
   Accumulator accumulator{array, operation, identity, terminator};
   PartialReduction<Accumulator, TypeCategory::Integer, 8>(result, array,
       array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator);
 }
 #ifdef __SIZEOF_INT128__
-common::int128_t RTDEF(ReduceInteger16)(const Descriptor &array,
-    ReductionOperation<common::int128_t> operation, const char *source,
+common::int128_t RTDEF(ReduceInteger16Ref)(const Descriptor &array,
+    ReferenceReductionOperation<common::int128_t> operation, const char *source,
+    int line, int dim, const Descriptor *mask, const common::int128_t *identity,
+    bool ordered) {
+  Terminator terminator{source, line};
+  return GetTotalReduction<TypeCategory::Integer, 16>(array, source, line, dim,
+      mask,
+      ReduceAccumulator<common::int128_t, false>{
+          array, operation, identity, terminator},
+      "REDUCE");
+}
+common::int128_t RTDEF(ReduceInteger16Value)(const Descriptor &array,
+    ValueReductionOperation<common::int128_t> operation, const char *source,
     int line, int dim, const Descriptor *mask, const common::int128_t *identity,
     bool ordered) {
   Terminator terminator{source, line};
   return GetTotalReduction<TypeCategory::Integer, 16>(array, source, line, dim,
       mask,
-      ReduceAccumulator<common::int128_t>{
+      ReduceAccumulator<common::int128_t, true>{
           array, operation, identity, terminator},
       "REDUCE");
 }
-void RTDEF(ReduceInteger16Dim)(Descriptor &result, const Descriptor &array,
-    ReductionOperation<common::int128_t> operation, const char *source,
+void RTDEF(ReduceInteger16DimRef)(Descriptor &result, const Descriptor &array,
+    ReferenceReductionOperation<common::int128_t> operation, const char *source,
+    int line, int dim, const Descriptor *mask, const common::int128_t *identity,
+    bool ordered) {
+  Terminator terminator{source, line};
+  using Accumulator = ReduceAccumulator<common::int128_t, false>;
+  Accumulator accumulator{array, operation, identity, terminator};
+  PartialReduction<Accumulator, TypeCategory::Integer, 16>(result, array,
+      array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator);
+}
+void RTDEF(ReduceInteger16DimValue)(Descriptor &result, const Descriptor &array,
+    ValueReductionOperation<common::int128_t> operation, const char *source,
     int line, int dim, const Descriptor *mask, const common::int128_t *identity,
     bool ordered) {
   Terminator terminator{source, line};
-  using Accumulator = ReduceAccumulator<common::int128_t>;
+  using Accumulator = ReduceAccumulator<common::int128_t, true>;
   Accumulator accumulator{array, operation, identity, terminator};
   PartialReduction<Accumulator, TypeCategory::Integer, 16>(result, array,
       array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator);
@@ -209,231 +323,464 @@ void RTDEF(ReduceInteger16Dim)(Descriptor &result, const Descriptor &array,
 #endif
 
 // TODO: real/complex(2 & 3)
-float RTDEF(ReduceReal4)(const Descriptor &array,
-    ReductionOperation<float> operation, const char *source, int line, int dim,
-    const Descriptor *mask, const float *identity, bool ordered) {
+float RTDEF(ReduceReal4Ref)(const Descriptor &array,
+    ReferenceReductionOperation<float> operation, const char *source, int line,
+    int dim, const Descriptor *mask, const float *identity, bool ordered) {
+  Terminator terminator{source, line};
+  return GetTotalReduction<TypeCategory::Real, 4>(array, source, line, dim,
+      mask,
+      ReduceAccumulator<float, false>{array, operation, identity, terminator},
+      "REDUCE");
+}
+float RTDEF(ReduceReal4Value)(const Descriptor &array,
+    ValueReductionOperation<float> operation, const char *source, int line,
+    int dim, const Descriptor *mask, const float *identity, bool ordered) {
   Terminator terminator{source, line};
   return GetTotalReduction<TypeCategory::Real, 4>(array, source, line, dim,
-      mask, ReduceAccumulator<float>{array, operation, identity, terminator},
+      mask,
+      ReduceAccumulator<float, true>{array, operation, identity, terminator},
       "REDUCE");
 }
-void RTDEF(ReduceReal4Dim)(Descriptor &result, const Descriptor &array,
-    ReductionOperation<float> operation, const char *source, int line, int dim,
-    const Descriptor *mask, const float *identity, bool ordered) {
+void RTDEF(ReduceReal4DimRef)(Descriptor &result, const Descriptor &array,
+    ReferenceReductionOperation<float> operation, const char *source, int line,
+    int dim, const Descriptor *mask, const float *identity, bool ordered) {
+  Terminator terminator{source, line};
+  using Accumulator = ReduceAccumulator<float, false>;
+  Accumulator accumulator{array, operation, identity, terminator};
+  PartialReduction<Accumulator, TypeCategory::Real, 4>(result, array,
+      array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator);
+}
+void RTDEF(ReduceReal4DimValue)(Descriptor &result, const Descriptor &array,
+    ValueReductionOperation<float> operation, const char *source, int line,
+    int dim, const Descriptor *mask, const float *identity, bool ordered) {
   Terminator terminator{source, line};
-  using Accumulator = ReduceAccumulator<float>;
+  using Accumulator = ReduceAccumulator<float, true>;
   Accumulator accumulator{array, operation, identity, terminator};
   PartialReduction<Accumulator, TypeCategory::Real, 4>(result, array,
       array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator);
 }
-double RTDEF(ReduceReal8)(const Descriptor &array,
-    ReductionOperation<double> operation, const char *source, int line, int dim,
-    const Descriptor *mask, const double *identity, bool ordered) {
+double RTDEF(ReduceReal8Ref)(const Descriptor &array,
+    ReferenceReductionOperation<double> operation, const char *source, int line,
+    int dim, const Descriptor *mask, const double *identity, bool ordered) {
   Terminator terminator{source, line};
   return GetTotalReduction<TypeCategory::Real, 8>(array, source, line, dim,
-      mask, ReduceAccumulator<double>{array, operation, identity, terminator},
+      mask,
+      ReduceAccumulator<double, false>{array, operation, identity, terminator},
+      "REDUCE");
+}
+double RTDEF(ReduceReal8Value)(const Descriptor &array,
+    ValueReductionOperation<double> operation, const char *source, int line,
+    int dim, const Descriptor *mask, const double *identity, bool ordered) {
+  Terminator terminator{source, line};
+  return GetTotalReduction<TypeCategory::Real, 8>(array, source, line, dim,
+      mask,
+      ReduceAccumulator<double, true>{array, operation, identity, terminator},
       "REDUCE");
 }
-void RTDEF(ReduceReal8Dim)(Descriptor &result, const Descriptor &array,
-    ReductionOperation<double> operation, const char *source, int line, int dim,
-    const Descriptor *mask, const double *identity, bool ordered) {
+void RTDEF(ReduceReal8DimRef)(Descriptor &result, const Descriptor &array,
+    ReferenceReductionOperation<double> operation, const char *source, int line,
+    int dim, const Descriptor *mask, const double *identity, bool ordered) {
   Terminator terminator{source, line};
-  using Accumulator = ReduceAccumulator<double>;
+  using Accumulator = ReduceAccumulator<double, false>;
+  Accumulator accumulator{array, operation, identity, terminator};
+  PartialReduction<Accumulator, TypeCategory::Real, 8>(result, array,
+      array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator);
+}
+void RTDEF(ReduceReal8DimValue)(Descriptor &result, const Descriptor &array,
+    ValueReductionOperation<double> operation, const char *source, int line,
+    int dim, const Descriptor *mask, const double *identity, bool ordered) {
+  Terminator terminator{source, line};
+  using Accumulator = ReduceAccumulator<double, true>;
   Accumulator accumulator{array, operation, identity, terminator};
   PartialReduction<Accumulator, TypeCategory::Real, 8>(result, array,
       array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator);
 }
 #if LDBL_MANT_DIG == 64
-long double RTDEF(ReduceReal10)(const Descriptor &array,
-    ReductionOperation<long double> operation, const char *source, int line,
-    int dim, const Descriptor *mask, const long double *identity,
+long double RTDEF(ReduceReal10Ref)(const Descriptor &array,
+    ReferenceReductionOperation<long double> operation, const char *source,
+    int line, int dim, const Descriptor *mask, const long double *identity,
+    bool ordered) {
+  Terminator terminator{source, line};
+  return GetTotalReduction<TypeCategory::Real, 10>(array, source, line, dim,
+      mask,
+      ReduceAccumulator<long double, false>{
+          array, operation, identity, terminator},
+      "REDUCE");
+}
+long double RTDEF(ReduceReal10Value)(const Descriptor &array,
+    ValueReductionOperation<long double> operation, const char *source,
+    int line, int dim, const Descriptor *mask, const long double *identity,
     bool ordered) {
   Terminator terminator{source, line};
   return GetTotalReduction<TypeCategory::Real, 10>(array, source, line, dim,
       mask,
-      ReduceAccumulator<long double>{array, operation, identity, terminator},
+      ReduceAccumulator<long double, true>{
+          array, operation, identity, terminator},
       "REDUCE");
 }
-void RTDEF(ReduceReal10Dim)(Descriptor &result, const Descriptor &array,
-    ReductionOperation<long double> operation, const char *source, int line,
-    int dim, const Descriptor *mask, const long double *identity,
+void RTDEF(ReduceReal10DimRef)(Descriptor &result, const Descriptor &array,
+    ReferenceReductionOperation<long double> operation, const char *source,
+    int line, int dim, const Descriptor *mask, const long double *identity,
+    bool ordered) {
+  Terminator terminator{source, line};
+  using Accumulator = ReduceAccumulator<long double, false>;
+  Accumulator accumulator{array, operation, identity, terminator};
+  PartialReduction<Accumulator, TypeCategory::Real, 10>(result, array,
+      array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator);
+}
+void RTDEF(ReduceReal10DimValue)(Descriptor &result, const Descriptor &array,
+    ValueReductionOperation<long double> operation, const char *source,
+    int line, int dim, const Descriptor *mask, const long double *identity,
     bool ordered) {
   Terminator terminator{source, line};
-  using Accumulator = ReduceAccumulator<long double>;
+  using Accumulator = ReduceAccumulator<long double, true>;
   Accumulator accumulator{array, operation, identity, terminator};
   PartialReduction<Accumulator, TypeCategory::Real, 10>(result, array,
       array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator);
 }
 #endif
 #if LDBL_MANT_DIG == 113 || HAS_FLOAT128
-CppFloat128Type RTDEF(ReduceReal16)(const Descriptor &array,
-    ReductionOperation<CppFloat128Type> operation, const char *source, int line,
-    int dim, const Descriptor *mask, const CppFloat128Type *identity,
+CppFloat128Type RTDEF(ReduceReal16Ref)(const Descriptor &array,
+    ReferenceReductionOperation<CppFloat128Type> operation, const char *source,
+    int line, int dim, const Descriptor *mask, const CppFloat128Type *identity,
     bool ordered) {
   Terminator terminator{source, line};
   return GetTotalReduction<TypeCategory::Real, 16>(array, source, line, dim,
       mask,
-      ReduceAccumulator<CppFloat128Type>{
+      ReduceAccumulator<CppFloat128Type, false>{
           array, operation, identity, terminator},
       "REDUCE");
 }
-void RTDEF(ReduceReal16Dim)(Descriptor &result, const Descriptor &array,
-    ReductionOperation<CppFloat128Type> operation, const char *source, int line,
-    int dim, const Descriptor *mask, const CppFloat128Type *identity,
+CppFloat128Type RTDEF(ReduceReal16Value)(const Descriptor &array,
+    ValueReductionOperation<CppFloat128Type> operation, const char *source,
+    int line, int dim, const Descriptor *mask, const CppFloat128Type *identity,
     bool ordered) {
   Terminator terminator{source, line};
-  using Accumulator = ReduceAccumulator<CppFloat128Type>;
+  return GetTotalReduction<TypeCategory::Real, 16>(array, source, line, dim,
+      mask,
+      ReduceAccumulator<CppFloat128Type, true>{
+          array, operation, identity, terminator},
+      "REDUCE");
+}
+void RTDEF(ReduceReal16DimRef)(Descriptor &result, const Descriptor &array,
+    ReferenceReductionOperation<CppFloat128Type> operation, const char *source,
+    int line, int dim, const Descriptor *mask, const CppFloat128Type *identity,
+    bool ordered) {
+  Terminator terminator{source, line};
+  using Accumulator = ReduceAccumulator<CppFloat128Type, false>;
+  Accumulator accumulator{array, operation, identity, terminator};
+  PartialReduction<Accumulator, TypeCategory::Real, 16>(result, array,
+      array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator);
+}
+void RTDEF(ReduceReal16DimValue)(Descriptor &result, const Descriptor &array,
+    ValueReductionOperation<CppFloat128Type> operation, const char *source,
+    int line, int dim, const Descriptor *mask, const CppFloat128Type *identity,
+    bool ordered) {
+  Terminator terminator{source, line};
+  using Accumulator = ReduceAccumulator<CppFloat128Type, true>;
   Accumulator accumulator{array, operation, identity, terminator};
   PartialReduction<Accumulator, TypeCategory::Real, 16>(result, array,
       array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator);
 }
 #endif
 
-void RTDEF(CppReduceComplex4)(std::complex<float> &result,
-    const Descriptor &array, ReductionOperation<std::complex<float>> operation,
+void RTDEF(CppReduceComplex4Ref)(std::complex<float> &result,
+    const Descriptor &array,
+    ReferenceReductionOperation<std::complex<float>> operation,
     const char *source, int line, int dim, const Descriptor *mask,
     const std::complex<float> *identity, bool ordered) {
   Terminator terminator{source, line};
   result = GetTotalReduction<TypeCategory::Complex, 4>(array, source, line, dim,
       mask,
-      ReduceAccumulator<std::complex<float>>{
+      ReduceAccumulator<std::complex<float>, false>{
+          array, operation, identity, terminator},
+      "REDUCE");
+}
+void RTDEF(CppReduceComplex4Value)(std::complex<float> &result,
+    const Descriptor &array,
+    ValueReductionOperation<std::complex<float>> operation, const char *source,
+    int line, int dim, const Descriptor *mask,
+    const std::complex<float> *identity, bool ordered) {
+  Terminator terminator{source, line};
+  result = GetTotalReduction<TypeCategory::Complex, 4>(array, source, line, dim,
+      mask,
+      ReduceAccumulator<std::complex<float>, true>{
           array, operation, identity, terminator},
       "REDUCE");
 }
-void RTDEF(CppReduceComplex4Dim)(Descriptor &result, const Descriptor &array,
-    ReductionOperation<std::complex<float>> operation, const char *source,
+void RTDEF(CppReduceComplex4DimRef)(Descriptor &result, const Descriptor &array,
+    ReferenceReductionOperation<std::complex<float>> operation,
+    const char *source, int line, int dim, const Descriptor *mask,
+    const std::complex<float> *identity, bool ordered) {
+  Terminator terminator{source, line};
+  using Accumulator = ReduceAccumulator<std::complex<float>, false>;
+  Accumulator accumulator{array, operation, identity, terminator};
+  PartialReduction<Accumulator, TypeCategory::Complex, 4>(result, array,
+      array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator);
+}
+void RTDEF(CppReduceComplex4DimValue)(Descriptor &result,
+    const Descriptor &array,
+    ValueReductionOperation<std::complex<float>> operation, const char *source,
     int line, int dim, const Descriptor *mask,
     const std::complex<float> *identity, bool ordered) {
   Terminator terminator{source, line};
-  using Accumulator = ReduceAccumulator<std::complex<float>>;
+  using Accumulator = ReduceAccumulator<std::complex<float>, true>;
   Accumulator accumulator{array, operation, identity, terminator};
   PartialReduction<Accumulator, TypeCategory::Complex, 4>(result, array,
       array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator);
 }
-void RTDEF(CppReduceComplex8)(std::complex<double> &result,
-    const Descriptor &array, ReductionOperation<std::complex<double>> operation,
+void RTDEF(CppReduceComplex8Ref)(std::complex<double> &result,
+    const Descriptor &array,
+    ReferenceReductionOperation<std::complex<double>> operation,
     const char *source, int line, int dim, const Descriptor *mask,
     const std::complex<double> *identity, bool ordered) {
   Terminator terminator{source, line};
   result = GetTotalReduction<TypeCategory::Complex, 8>(array, source, line, dim,
       mask,
-      ReduceAccumulator<std::complex<double>>{
+      ReduceAccumulator<std::complex<double>, false>{
           array, operation, identity, terminator},
       "REDUCE");
 }
-void RTDEF(CppReduceComplex8Dim)(Descriptor &result, const Descriptor &array,
-    ReductionOperation<std::complex<double>> operation, const char *source,
+void RTDEF(CppReduceComplex8Value)(std::complex<double> &result,
+    const Descriptor &array,
+    ValueReductionOperation<std::complex<double>> operation, const char *source,
     int line, int dim, const Descriptor *mask,
     const std::complex<double> *identity, bool ordered) {
   Terminator terminator{source, line};
-  using Accumulator = ReduceAccumulator<std::complex<double>>;
+  result = GetTotalReduction<TypeCategory::Complex, 8>(array, source, line, dim,
+      mask,
+      ReduceAccumulator<std::complex<double>, true>{
+          array, operation, identity, terminator},
+      "REDUCE");
+}
+void RTDEF(CppReduceComplex8DimRef)(Descriptor &result, const Descriptor &array,
+    ReferenceReductionOperation<std::complex<double>> operation,
+    const char *source, int line, int dim, const Descriptor *mask,
+    const std::complex<double> *identity, bool ordered) {
+  Terminator terminator{source, line};
+  using Accumulator = ReduceAccumulator<std::complex<double>, false>;
   Accumulator accumulator{array, operation, identity, terminator};
   PartialReduction<Accumulator, TypeCategory::Complex, 8>(result, array,
       array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator);
 }
-#if LDBL_MANT_DIG == 64
-void RTDEF(CppReduceComplex10)(std::complex<long double> &result,
+void RTDEF(CppReduceComplex8DimValue)(Descriptor &result,
     const Descriptor &array,
-    ReductionOperation<std::complex<long double>> operation, const char *source,
+    ValueReductionOperation<std::complex<double>> operation, const char *source,
     int line, int dim, const Descriptor *mask,
+    const std::complex<double> *identity, bool ordered) {
+  Terminator terminator{source, line};
+  using Accumulator = ReduceAccumulator<std::complex<double>, true>;
+  Accumulator accumulator{array, operation, identity, terminator};
+  PartialReduction<Accumulator, TypeCategory::Complex, 8>(result, array,
+      array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator);
+}
+#if LDBL_MANT_DIG == 64
+void RTDEF(CppReduceComplex10Ref)(std::complex<long double> &result,
+    const Descriptor &array,
+    ReferenceReductionOperation<std::complex<long double>> operation,
+    const char *source, int line, int dim, const Descriptor *mask,
     const std::complex<long double> *identity, bool ordered) {
   Terminator terminator{source, line};
   result = GetTotalReduction<TypeCategory::Complex, 10>(array, source, line,
       dim, mask,
-      ReduceAccumulator<std::complex<long double>>{
+      ReduceAccumulator<std::complex<long double>, false>{
           array, operation, identity, terminator},
       "REDUCE");
 }
-void RTDEF(CppReduceComplex10Dim)(Descriptor &result, const Descriptor &array,
-    ReductionOperation<std::complex<long double>> operation, const char *source,
-    int line, int dim, const Descriptor *mask,
+void RTDEF(CppReduceComplex10Value)(std::complex<long double> &result,
+    const Descriptor &array,
+    ValueReductionOperation<std::complex<long double>> operation,
+    const char *source, int line, int dim, const Descriptor *mask,
     const std::complex<long double> *identity, bool ordered) {
   Terminator terminator{source, line};
-  using Accumulator = ReduceAccumulator<std::complex<long double>>;
+  result = GetTotalReduction<TypeCategory::Complex, 10>(array, source, line,
+      dim, mask,
+      ReduceAccumulator<std::complex<long double>, true>{
+          array, operation, identity, terminator},
+      "REDUCE");
+}
+void RTDEF(CppReduceComplex10DimRef)(Descriptor &result,
+    const Descriptor &array,
+    ReferenceReductionOperation<std::complex<long double>> operation,
+    const char *source, int line, int dim, const Descriptor *mask,
+    const std::complex<long double> *identity, bool ordered) {
+  Terminator terminator{source, line};
+  using Accumulator = ReduceAccumulator<std::complex<long double>, false>;
+  Accumulator accumulator{array, operation, identity, terminator};
+  PartialReduction<Accumulator, TypeCategory::Complex, 10>(result, array,
+      array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator);
+}
+void RTDEF(CppReduceComplex10DimValue)(Descriptor &result,
+    const Descriptor &array,
+    ValueReductionOperation<std::complex<long double>> operation,
+    const char *source, int line, int dim, const Descriptor *mask,
+    const std::complex<long double> *identity, bool ordered) {
+  Terminator terminator{source, line};
+  using Accumulator = ReduceAccumulator<std::complex<long double>, true>;
   Accumulator accumulator{array, operation, identity, terminator};
   PartialReduction<Accumulator, TypeCategory::Complex, 10>(result, array,
       array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator);
 }
 #endif
 #if LDBL_MANT_DIG == 113 || HAS_FLOAT128
-void RTDEF(CppReduceComplex16)(std::complex<CppFloat128Type> &result,
+void RTDEF(CppReduceComplex16Ref)(std::complex<CppFloat128Type> &result,
+    const Descriptor &array,
+    ReferenceReductionOperation<std::complex<CppFloat128Type>> operation,
+    const char *source, int line, int dim, const Descriptor *mask,
+    const std::complex<CppFloat128Type> *identity, bool ordered) {
+  Terminator terminator{source, line};
+  result = GetTotalReduction<TypeCategory::Complex, 16>(array, source, line,
+      dim, mask,
+      ReduceAccumulator<std::complex<CppFloat128Type>, false>{
+          array, operation, identity, terminator},
+      "REDUCE");
+}
+void RTDEF(CppReduceComplex16Value)(std::complex<CppFloat128Type> &result,
     const Descriptor &array,
-    ReductionOperation<std::complex<CppFloat128Type>> operation,
+    ValueReductionOperation<std::complex<CppFloat128Type>> operation,
     const char *source, int line, int dim, const Descriptor *mask,
     const std::complex<CppFloat128Type> *identity, bool ordered) {
   Terminator terminator{source, line};
   result = GetTotalReduction<TypeCategory::Complex, 16>(array, source, line,
       dim, mask,
-      ReduceAccumulator<std::complex<CppFloat128Type>>{
+      ReduceAccumulator<std::complex<CppFloat128Type>, true>{
           array, operation, identity, terminator},
       "REDUCE");
 }
-void RTDEF(CppReduceComplex16Dim)(Descriptor &result, const Descriptor &array,
-    ReductionOperation<std::complex<CppFloat128Type>> operation,
+void RTDEF(CppReduceComplex16DimRef)(Descriptor &result,
+    const Descriptor &array,
+    ReferenceReductionOperation<std::complex<CppFloat128Type>> operation,
     const char *source, int line, int dim, const Descriptor *mask,
     const std::complex<CppFloat128Type> *identity, bool ordered) {
   Terminator terminator{source, line};
-  using Accumulator = ReduceAccumulator<std::complex<CppFloat128Type>>;
+  using Accumulator = ReduceAccumulator<std::complex<CppFloat128Type>, false>;
+  Accumulator accumulator{array, operation, identity, terminator};
+  PartialReduction<Accumulator, TypeCategory::Complex, 16>(result, array,
+      array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator);
+}
+void RTDEF(CppReduceComplex16DimValue)(Descriptor &result,
+    const Descriptor &array,
+    ValueReductionOperation<std::complex<CppFloat128Type>> operation,
+    const char *source, int line, int dim, const Descriptor *mask,
+    const std::complex<CppFloat128Type> *identity, bool ordered) {
+  Terminator terminator{source, line};
+  using Accumulator = ReduceAccumulator<std::complex<CppFloat128Type>, true>;
   Accumulator accumulator{array, operation, identity, terminator};
   PartialReduction<Accumulator, TypeCategory::Complex, 16>(result, array,
       array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator);
 }
 #endif
 
-bool RTDEF(ReduceLogical1)(const Descriptor &array,
-    ReductionOperation<std::int8_t> operation, const char *source, int line,
-    int dim, const Descriptor *mask, const std::int8_t *identity,
+bool RTDEF(ReduceLogical1Ref)(const Descriptor &array,
+    ReferenceReductionOperation<std::int8_t> operation, const char *source,
+    int line, int dim, const Descriptor *mask, const std::int8_t *identity,
     bool ordered) {
-  return RTNAME(ReduceInteger1)(
+  return RTNAME(ReduceInteger1Ref)(
              array, operation, source, line, dim, mask, identity, ordered) != 0;
 }
-void RTDEF(ReduceLogical1Dim)(Descriptor &result, const Descriptor &array,
-    ReductionOperation<std::int8_t> operation, const char *source, int line,
-    int dim, const Descriptor *mask, const std::int8_t *identity,
+bool RTDEF(ReduceLogical1Value)(const Descriptor &array,
+    ValueReductionOperation<std::int8_t> operation, const char *source,
+    int line, int dim, const Descriptor *mask, const std::int8_t *identity,
+    bool ordered) {
+  return RTNAME(ReduceInteger1Value)(
+             array, operation, source, line, dim, mask, identity, ordered) != 0;
+}
+void RTDEF(ReduceLogical1DimRef)(Descriptor &result, const Descriptor &array,
+    ReferenceReductionOperation<std::int8_t> operation, const char *source,
+    int line, int dim, const Descriptor *mask, const std::int8_t *identity,
+    bool ordered) {
+  RTNAME(ReduceInteger1DimRef)
+  (result, array, operation, source, line, dim, mask, identity, ordered);
+}
+void RTDEF(ReduceLogical1DimValue)(Descriptor &result, const Descriptor &array,
+    ValueReductionOperation<std::int8_t> operation, const char *source,
+    int line, int dim, const Descriptor *mask, const std::int8_t *identity,
     bool ordered) {
-  RTNAME(ReduceInteger1Dim)
+  RTNAME(ReduceInteger1DimValue)
   (result, array, operation, source, line, dim, mask, identity, ordered);
 }
-bool RTDEF(ReduceLogical2)(const Descriptor &array,
-    ReductionOperation<std::int16_t> operation, const char *source, int line,
-    int dim, const Descriptor *mask, const std::int16_t *identity,
+bool RTDEF(ReduceLogical2Ref)(const Descriptor &array,
+    ReferenceReductionOperation<std::int16_t> operation, const char *source,
+    int line, int dim, const Descriptor *mask, const std::int16_t *identity,
     bool ordered) {
-  return RTNAME(ReduceInteger2)(
+  return RTNAME(ReduceInteger2Ref)(
              array, operation, source, line, dim, mask, identity, ordered) != 0;
 }
-void RTDEF(ReduceLogical2Dim)(Descriptor &result, const Descriptor &array,
-    ReductionOperation<std::int16_t> operation, const char *source, int line,
-    int dim, const Descriptor *mask, const std::int16_t *identity,
+bool RTDEF(ReduceLogical2Value)(const Descriptor &array,
+    ValueReductionOperation<std::int16_t> operation, const char *source,
+    int line, int dim, const Descriptor *mask, const std::int16_t *identity,
     bool ordered) {
-  RTNAME(ReduceInteger2Dim)
+  return RTNAME(ReduceInteger2Value)(
+             array, operation, source, line, dim, mask, identity, ordered) != 0;
+}
+void RTDEF(ReduceLogical2DimRef)(Descriptor &result, const Descriptor &array,
+    ReferenceReductionOperation<std::int16_t> operation, const char *source,
+    int line, int dim, const Descriptor *mask, const std::int16_t *identity,
+    bool ordered) {
+  RTNAME(ReduceInteger2DimRef)
   (result, array, operation, source, line, dim, mask, identity, ordered);
 }
-bool RTDEF(ReduceLogical4)(const Descriptor &array,
-    ReductionOperation<std::int32_t> operation, const char *source, int line,
-    int dim, const Descriptor *mask, const std::int32_t *identity,
+void RTDEF(ReduceLogical2DimValue)(Descriptor &result, const Descriptor &array,
+    ValueReductionOperation<std::int16_t> operation, const char *source,
+    int line, int dim, const Descriptor *mask, const std::int16_t *identity,
     bool ordered) {
-  return RTNAME(ReduceInteger4)(
+  RTNAME(ReduceInteger2DimValue)
+  (result, array, operation, source, line, dim, mask, identity, ordered);
+}
+bool RTDEF(ReduceLogical4Ref)(const Descriptor &array,
+    ReferenceReductionOperation<std::int32_t> operation, const char *source,
+    int line, int dim, const Descriptor *mask, const std::int32_t *identity,
+    bool ordered) {
+  return RTNAME(ReduceInteger4Ref)(
+             array, operation, source, line, dim, mask, identity, ordered) != 0;
+}
+bool RTDEF(ReduceLogical4Value)(const Descriptor &array,
+    ValueReductionOperation<std::int32_t> operation, const char *source,
+    int line, int dim, const Descriptor *mask, const std::int32_t *identity,
+    bool ordered) {
+  return RTNAME(ReduceInteger4Value)(
              array, operation, source, line, dim, mask, identity, ordered) != 0;
 }
-void RTDEF(ReduceLogical4Dim)(Descriptor &result, const Descriptor &array,
-    ReductionOperation<std::int32_t> operation, const char *source, int line,
-    int dim, const Descriptor *mask, const std::int32_t *identity,
+void RTDEF(ReduceLogical4DimRef)(Descriptor &result, const Descriptor &array,
+    ReferenceReductionOperation<std::int32_t> operation, const char *source,
+    int line, int dim, const Descriptor *mask, const std::int32_t *identity,
+    bool ordered) {
+  RTNAME(ReduceInteger4DimRef)
+  (result, array, operation, source, line, dim, mask, identity, ordered);
+}
+void RTDEF(ReduceLogical4DimValue)(Descriptor &result, const Descriptor &array,
+    ValueReductionOperation<std::int32_t> operation, const char *source,
+    int line, int dim, const Descriptor *mask, const std::int32_t *identity,
     bool ordered) {
-  RTNAME(ReduceInteger4Dim)
+  RTNAME(ReduceInteger4DimValue)
   (result, array, operation, source, line, dim, mask, identity, ordered);
 }
-bool RTDEF(ReduceLogical8)(const Descriptor &array,
-    ReductionOperation<std::int64_t> operation, const char *source, int line,
-    int dim, const Descriptor *mask, const std::int64_t *identity,
+bool RTDEF(ReduceLogical8Ref)(const Descriptor &array,
+    ReferenceReductionOperation<std::int64_t> operation, const char *source,
+    int line, int dim, const Descriptor *mask, const std::int64_t *identity,
     bool ordered) {
-  return RTNAME(ReduceInteger8)(
+  return RTNAME(ReduceInteger8Ref)(
              array, operation, source, line, dim, mask, identity, ordered) != 0;
 }
-void RTDEF(ReduceLogical8Dim)(Descriptor &result, const Descriptor &array,
-    ReductionOperation<std::int64_t> operation, const char *source, int line,
-    int dim, const Descriptor *mask, const std::int64_t *identity,
+bool RTDEF(ReduceLogical8Value)(const Descriptor &array,
+    ValueReductionOperation<std::int64_t> operation, const char *source,
+    int line, int dim, const Descriptor *mask, const std::int64_t *identity,
+    bool ordered) {
+  return RTNAME(ReduceInteger8Value)(
+             array, operation, source, line, dim, mask, identity, ordered) != 0;
+}
+void RTDEF(ReduceLogical8DimRef)(Descriptor &result, const Descriptor &array,
+    ReferenceReductionOperation<std::int64_t> operation, const char *source,
+    int line, int dim, const Descriptor *mask, const std::int64_t *identity,
+    bool ordered) {
+  RTNAME(ReduceInteger8DimRef)
+  (result, array, operation, source, line, dim, mask, identity, ordered);
+}
+void RTDEF(ReduceLogical8DimValue)(Descriptor &result, const Descriptor &array,
+    ValueReductionOperation<std::int64_t> operation, const char *source,
+    int line, int dim, const Descriptor *mask, const std::int64_t *identity,
     bool ordered) {
-  RTNAME(ReduceInteger8Dim)
+  RTNAME(ReduceInteger8DimValue)
   (result, array, operation, source, line, dim, mask, identity, ordered);
 }
 
diff --git a/flang/test/Lower/Intrinsics/reduce.f90 b/flang/test/Lower/Intrinsics/reduce.f90
index 8d7b7798a94c5..7619edffd529e 100644
--- a/flang/test/Lower/Intrinsics/reduce.f90
+++ b/flang/test/Lower/Intrinsics/reduce.f90
@@ -40,7 +40,7 @@ subroutine integer1(a, id)
 ! CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[BOX_PROC]] : (!fir.boxproc<() -> ()>) -> ((!fir.ref<i8>, !fir.ref<i8>) -> !fir.ref<i8>)
 ! CHECK: %[[A_NONE:.*]] = fir.convert %[[A]]#1 : (!fir.box<!fir.array<?xi8>>) -> !fir.box<none>
 ! CHECK: %[[MASK_NONE:.*]] = fir.convert %[[MASK]] : (!fir.box<i1>) -> !fir.box<none>
-! CHECK: %[[REDUCE_RES:.*]] = fir.call @_FortranAReduceInteger1(%[[A_NONE]], %[[BOX_ADDR]], %{{.*}}, %{{.*}}, %c1{{.*}}, %[[MASK_NONE]], %[[IDENTITY]], %false) fastmath<contract> : (!fir.box<none>, (!fir.ref<i8>, !fir.ref<i8>) -> !fir.ref<i8>, !fir.ref<i8>, i32, i32, !fir.box<none>, !fir.ref<i8>, i1) -> i8
+! CHECK: %[[REDUCE_RES:.*]] = fir.call @_FortranAReduceInteger1Ref(%[[A_NONE]], %[[BOX_ADDR]], %{{.*}}, %{{.*}}, %c1{{.*}}, %[[MASK_NONE]], %[[IDENTITY]], %false) fastmath<contract> : (!fir.box<none>, (!fir.ref<i8>, !fir.ref<i8>) -> !fir.ref<i8>, !fir.ref<i8>, i32, i32, !fir.box<none>, !fir.ref<i8>, i1) -> i8
 ! CHECK: hlfir.assign %[[REDUCE_RES]] to %[[RES]]#0 : i8, !fir.ref<i8>
 ! CHECK: %[[ADDR_OP:.*]] = fir.address_of(@_QMreduce_modPred_int1) : (!fir.ref<i8>, !fir.ref<i8>) -> i8
 ! CHECK: %[[BOX_PROC:.*]] = fir.emboxproc %[[ADDR_OP]] : ((!fir.ref<i8>, !fir.ref<i8>) -> i8) -> !fir.boxproc<() -> ()>
@@ -48,13 +48,13 @@ subroutine integer1(a, id)
 ! CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[BOX_PROC]] : (!fir.boxproc<() -> ()>) -> ((!fir.ref<i8>, !fir.ref<i8>) -> !fir.ref<i8>)
 ! CHECK: %[[A_NONE:.*]] = fir.convert %[[A]]#1 : (!fir.box<!fir.array<?xi8>>) -> !fir.box<none>
 ! CHECK: %[[MASK_NONE:.*]] = fir.convert %[[MASK]] : (!fir.box<i1>) -> !fir.box<none>
-! CHECK: %{{.*}} = fir.call @_FortranAReduceInteger1(%[[A_NONE]], %[[BOX_ADDR]], %{{.*}}, %{{.*}}, %c1{{.*}}, %[[MASK_NONE]], %[[ID]]#1, %false{{.*}}) fastmath<contract> : (!fir.box<none>, (!fir.ref<i8>, !fir.ref<i8>) -> !fir.ref<i8>, !fir.ref<i8>, i32, i32, !fir.box<none>, !fir.ref<i8>, i1) -> i8
-! CHECK: fir.call @_FortranAReduceInteger1(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}#1, %true)
+! CHECK: %{{.*}} = fir.call @_FortranAReduceInteger1Ref(%[[A_NONE]], %[[BOX_ADDR]], %{{.*}}, %{{.*}}, %c1{{.*}}, %[[MASK_NONE]], %[[ID]]#1, %false{{.*}}) fastmath<contract> : (!fir.box<none>, (!fir.ref<i8>, !fir.ref<i8>) -> !fir.ref<i8>, !fir.ref<i8>, i32, i32, !fir.box<none>, !fir.ref<i8>, i1) -> i8
+! CHECK: fir.call @_FortranAReduceInteger1Ref(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}#1, %true)
 ! CHECK: %[[MASK:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QQro.3xl4.0"} : (!fir.ref<!fir.array<3x!fir.logical<4>>>, !fir.shape<1>) -> (!fir.ref<!fir.array<3x!fir.logical<4>>>, !fir.ref<!fir.array<3x!fir.logical<4>>>)
 ! CHECK: %[[SHAPE_C3:.*]] = fir.shape %c3{{.*}} : (index) -> !fir.shape<1>
 ! CHECK: %[[BOXED_MASK:.*]] = fir.embox %[[MASK]]#1(%[[SHAPE_C3]]) : (!fir.ref<!fir.array<3x!fir.logical<4>>>, !fir.shape<1>) -> !fir.box<!fir.array<3x!fir.logical<4>>>
 ! CHECK: %[[CONV_MASK:.*]] = fir.convert %[[BOXED_MASK]] : (!fir.box<!fir.array<3x!fir.logical<4>>>) -> !fir.box<none>
-! CHECK: fir.call @_FortranAReduceInteger1(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %[[CONV_MASK]], %{{.*}}, %false{{.*}})
+! CHECK: fir.call @_FortranAReduceInteger1Ref(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %[[CONV_MASK]], %{{.*}}, %false{{.*}})
 
 pure function red_int2(a,b)
   integer(2), intent(in) :: a, b
@@ -68,7 +68,7 @@ subroutine integer2(a)
   res = reduce(a, red_int2)
 end subroutine
 
-! CHECK: fir.call @_FortranAReduceInteger2
+! CHECK: fir.call @_FortranAReduceInteger2Ref
 
 pure function red_int4(a,b)
   integer(4), intent(in) :: a, b
@@ -82,7 +82,7 @@ subroutine integer4(a)
   res = reduce(a, red_int4)
 end subroutine
 
-! CHECK: fir.call @_FortranAReduceInteger4
+! CHECK: fir.call @_FortranAReduceInteger4Ref
 
 pure function red_int8(a,b)
   integer(8), intent(in) :: a, b
@@ -96,7 +96,7 @@ subroutine integer8(a)
   res = reduce(a, red_int8)
 end subroutine
 
-! CHECK: fir.call @_FortranAReduceInteger8
+! CHECK: fir.call @_FortranAReduceInteger8Ref
 
 pure function red_int16(a,b)
   integer(16), intent(in) :: a, b
@@ -110,7 +110,7 @@ subroutine integer16(a)
   res = reduce(a, red_int16)
 end subroutine
 
-! CHECK: fir.call @_FortranAReduceInteger16
+! CHECK: fir.call @_FortranAReduceInteger16Ref
 
 pure function red_real2(a,b)
   real(2), intent(in) :: a, b
@@ -124,7 +124,7 @@ subroutine real2(a)
   res = reduce(a, red_real2)
 end subroutine
 
-! CHECK: fir.call @_FortranAReduceReal2
+! CHECK: fir.call @_FortranAReduceReal2Ref
 
 pure function red_real3(a,b)
   real(3), intent(in) :: a, b
@@ -138,7 +138,7 @@ subroutine real3(a)
   res = reduce(a, red_real3)
 end subroutine
 
-! CHECK: fir.call @_FortranAReduceReal3
+! CHECK: fir.call @_FortranAReduceReal3Ref
 
 pure function red_real4(a,b)
   real(4), intent(in) :: a, b
@@ -152,7 +152,7 @@ subroutine real4(a)
   res = reduce(a, red_real4)
 end subroutine
 
-! CHECK: fir.call @_FortranAReduceReal4
+! CHECK: fir.call @_FortranAReduceReal4Ref
 
 pure function red_real8(a,b)
   real(8), intent(in) :: a, b
@@ -166,7 +166,7 @@ subroutine real8(a)
   res = reduce(a, red_real8)
 end subroutine
 
-! CHECK: fir.call @_FortranAReduceReal8
+! CHECK: fir.call @_FortranAReduceReal8Ref
 
 pure function red_real10(a,b)
   real(10), intent(in) :: a, b
@@ -180,7 +180,7 @@ subroutine real10(a)
   res = reduce(a, red_real10)
 end subroutine
 
-! CHECK: fir.call @_FortranAReduceReal10
+! CHECK: fir.call @_FortranAReduceReal10Ref
 
 pure function red_real16(a,b)
   real(16), intent(in) :: a, b
@@ -194,7 +194,7 @@ subroutine real16(a)
   res = reduce(a, red_real16)
 end subroutine
 
-! CHECK: fir.call @_FortranAReduceReal16
+! CHECK: fir.call @_FortranAReduceReal16Ref
 
 pure function red_complex2(a,b)
   complex(2), intent(in) :: a, b
@@ -292,7 +292,7 @@ subroutine log1(a)
   res = reduce(a, red_log1)
 end subroutine
 
-! CHECK: fir.call @_FortranAReduceLogical1
+! CHECK: fir.call @_FortranAReduceLogical1Ref
 
 pure function red_log2(a,b)
   logical(2), intent(in) :: a, b
@@ -306,7 +306,7 @@ subroutine log2(a)
   res = reduce(a, red_log2)
 end subroutine
 
-! CHECK: fir.call @_FortranAReduceLogical2
+! CHECK: fir.call @_FortranAReduceLogical2Ref
 
 pure function red_log4(a,b)
   logical(4), intent(in) :: a, b
@@ -320,7 +320,7 @@ subroutine log4(a)
   res = reduce(a, red_log4)
 end subroutine
 
-! CHECK: fir.call @_FortranAReduceLogical4
+! CHECK: fir.call @_FortranAReduceLogical4Ref
 
 pure function red_log8(a,b)
   logical(8), intent(in) :: a, b
@@ -334,7 +334,7 @@ subroutine log8(a)
   res = reduce(a, red_log8)
 end subroutine
 
-! CHECK: fir.call @_FortranAReduceLogical8
+! CHECK: fir.call @_FortranAReduceLogical8Ref
 
 pure function red_char1(a,b)
   character(1), intent(in) :: a, b
@@ -403,7 +403,7 @@ subroutine integer1dim(a, id)
   res = reduce(a, red_int1, 2)
 end subroutine
 
-! CHECK: fir.call @_FortranAReduceInteger1Dim
+! CHECK: fir.call @_FortranAReduceInteger1DimRef
 
 subroutine integer2dim(a, id)
   integer(2), intent(in) :: a(:,:)
@@ -412,7 +412,7 @@ subroutine integer2dim(a, id)
   res = reduce(a, red_int2, 2)
 end subroutine
 
-! CHECK: fir.call @_FortranAReduceInteger2Dim
+! CHECK: fir.call @_FortranAReduceInteger2DimRef
 
 subroutine integer4dim(a, id)
   integer(4), intent(in) :: a(:,:)
@@ -421,7 +421,7 @@ subroutine integer4dim(a, id)
   res = reduce(a, red_int4, 2)
 end subroutine
 
-! CHECK: fir.call @_FortranAReduceInteger4Dim
+! CHECK: fir.call @_FortranAReduceInteger4DimRef
 
 subroutine integer8dim(a, id)
   integer(8), intent(in) :: a(:,:)
@@ -430,7 +430,7 @@ subroutine integer8dim(a, id)
   res = reduce(a, red_int8, 2)
 end subroutine
 
-! CHECK: fir.call @_FortranAReduceInteger8Dim
+! CHECK: fir.call @_FortranAReduceInteger8DimRef
 
 subroutine integer16dim(a, id)
   integer(16), intent(in) :: a(:,:)
@@ -439,7 +439,7 @@ subroutine integer16dim(a, id)
   res = reduce(a, red_int16, 2)
 end subroutine
 
-! CHECK: fir.call @_FortranAReduceInteger16Dim
+! CHECK: fir.call @_FortranAReduceInteger16DimRef
 
 subroutine real2dim(a, id)
   real(2), intent(in) :: a(:,:)
@@ -448,7 +448,7 @@ subroutine real2dim(a, id)
   res = reduce(a, red_real2, 2)
 end subroutine
 
-! CHECK: fir.call @_FortranAReduceReal2Dim
+! CHECK: fir.call @_FortranAReduceReal2DimRef
 
 subroutine real3dim(a, id)
   real(3), intent(in) :: a(:,:)
@@ -457,7 +457,7 @@ subroutine real3dim(a, id)
   res = reduce(a, red_real3, 2)
 end subroutine
 
-! CHECK: fir.call @_FortranAReduceReal3Dim
+! CHECK: fir.call @_FortranAReduceReal3DimRef
 
 subroutine real4dim(a, id)
   real(4), intent(in) :: a(:,:)
@@ -466,7 +466,7 @@ subroutine real4dim(a, id)
   res = reduce(a, red_real4, 2)
 end subroutine
 
-! CHECK: fir.call @_FortranAReduceReal4Dim
+! CHECK: fir.call @_FortranAReduceReal4DimRef
 
 subroutine real8dim(a, id)
   real(8), intent(in) :: a(:,:)
@@ -475,7 +475,7 @@ subroutine real8dim(a, id)
   res = reduce(a, red_real8, 2)
 end subroutine
 
-! CHECK: fir.call @_FortranAReduceReal8Dim
+! CHECK: fir.call @_FortranAReduceReal8DimRef
 
 subroutine real10dim(a, id)
   real(10), intent(in) :: a(:,:)
@@ -484,7 +484,7 @@ subroutine real10dim(a, id)
   res = reduce(a, red_real10, 2)
 end subroutine
 
-! CHECK: fir.call @_FortranAReduceReal10Dim
+! CHECK: fir.call @_FortranAReduceReal10DimRef
 
 subroutine real16dim(a, id)
   real(16), intent(in) :: a(:,:)
@@ -493,7 +493,7 @@ subroutine real16dim(a, id)
   res = reduce(a, red_real16, 2)
 end subroutine
 
-! CHECK: fir.call @_FortranAReduceReal16Dim
+! CHECK: fir.call @_FortranAReduceReal16DimRef
 
 subroutine complex2dim(a, id)
   complex(2), intent(in) :: a(:,:)
@@ -556,7 +556,7 @@ subroutine logical1dim(a, id)
   res = reduce(a, red_log1, 2)
 end subroutine
 
-! CHECK: fir.call @_FortranAReduceLogical1Dim
+! CHECK: fir.call @_FortranAReduceLogical1DimRef
 
 subroutine logical2dim(a, id)
   logical(2), intent(in) :: a(:,:)
@@ -565,7 +565,7 @@ subroutine logical2dim(a, id)
   res = reduce(a, red_log2, 2)
 end subroutine
 
-! CHECK: fir.call @_FortranAReduceLogical2Dim
+! CHECK: fir.call @_FortranAReduceLogical2DimRef
 
 subroutine logical4dim(a, id)
   logical(4), intent(in) :: a(:,:)
@@ -574,7 +574,7 @@ subroutine logical4dim(a, id)
   res = reduce(a, red_log4, 2)
 end subroutine
 
-! CHECK: fir.call @_FortranAReduceLogical4Dim
+! CHECK: fir.call @_FortranAReduceLogical4DimRef
 
 subroutine logical8dim(a, id)
   logical(8), intent(in) :: a(:,:)
@@ -583,7 +583,7 @@ subroutine logical8dim(a, id)
   res = reduce(a, red_log8, 2)
 end subroutine
 
-! CHECK: fir.call @_FortranAReduceLogical8Dim
+! CHECK: fir.call @_FortranAReduceLogical8DimRef
 
 subroutine testtypeDim(a)
   type(t1), intent(in) :: a(:,:)
diff --git a/flang/unittests/Runtime/Reduction.cpp b/flang/unittests/Runtime/Reduction.cpp
index b2661e78abdf5..41c8d86c35b76 100644
--- a/flang/unittests/Runtime/Reduction.cpp
+++ b/flang/unittests/Runtime/Reduction.cpp
@@ -647,23 +647,24 @@ static std::int32_t IMultiply(const std::int32_t *x, const std::int32_t *y) {
 TEST(Reductions, ReduceInt4) {
   auto intVector{MakeArray<TypeCategory::Integer, 4>(
       std::vector<int>{4}, std::vector<std::int32_t>{1, 2, 3, 4})};
-  EXPECT_EQ(RTNAME(ReduceInteger4)(*intVector, IAdd, __FILE__, __LINE__), 10);
   EXPECT_EQ(
-      RTNAME(ReduceInteger4)(*intVector, IMultiply, __FILE__, __LINE__), 24);
+      RTNAME(ReduceInteger4Ref)(*intVector, IAdd, __FILE__, __LINE__), 10);
+  EXPECT_EQ(
+      RTNAME(ReduceInteger4Ref)(*intVector, IMultiply, __FILE__, __LINE__), 24);
 }
 TEST(Reductions, ReduceInt4Dim) {
   auto intMatrix{MakeArray<TypeCategory::Integer, 4>(
       std::vector<int>{2, 2}, std::vector<std::int32_t>{1, 2, 3, 4})};
   StaticDescriptor<1, true> statDesc;
   Descriptor &sums{statDesc.descriptor()};
-  RTNAME(ReduceInteger4Dim)(sums, *intMatrix, IAdd, __FILE__, __LINE__, 1);
+  RTNAME(ReduceInteger4DimRef)(sums, *intMatrix, IAdd, __FILE__, __LINE__, 1);
   EXPECT_EQ(sums.rank(), 1);
   EXPECT_EQ(sums.GetDimension(0).LowerBound(), 1);
   EXPECT_EQ(sums.GetDimension(0).Extent(), 2);
   EXPECT_EQ(*sums.ZeroBasedIndexedElement<std::int32_t>(0), 3);
   EXPECT_EQ(*sums.ZeroBasedIndexedElement<std::int32_t>(1), 7);
   sums.Destroy();
-  RTNAME(ReduceInteger4Dim)(sums, *intMatrix, IAdd, __FILE__, __LINE__, 2);
+  RTNAME(ReduceInteger4DimRef)(sums, *intMatrix, IAdd, __FILE__, __LINE__, 2);
   EXPECT_EQ(sums.rank(), 1);
   EXPECT_EQ(sums.GetDimension(0).LowerBound(), 1);
   EXPECT_EQ(sums.GetDimension(0).Extent(), 2);



More information about the flang-commits mailing list