[llvm-branch-commits] [flang] [flang] Lower REDUCE intrinsic for reduction op with args by value (PR #95353)

Valentin Clement バレンタイン クレメン via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Wed Jun 12 22:05:31 PDT 2024


https://github.com/clementval created https://github.com/llvm/llvm-project/pull/95353

#95297 Updates the runtime entry points to distinguish between reduction operation with arguments passed by value or by reference. Add lowering to support the arguments passed by value. 

>From defadc4f18b0b4b369a3657a0f6e4c9f79ffd793 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Wed, 12 Jun 2024 15:28:31 -0700
Subject: [PATCH] [flang] Update lowering of REDUCE intrinsic for reduction
 operation with args by value

---
 .../Optimizer/Builder/Runtime/RTBuilder.h     |  22 +
 .../Optimizer/Builder/Runtime/Reduction.h     |   8 +-
 flang/lib/Optimizer/Builder/IntrinsicCall.cpp |  16 +-
 .../Optimizer/Builder/Runtime/Reduction.cpp   | 468 ++++++++++++++++--
 flang/test/Lower/Intrinsics/reduce.f90        | 235 ++++++++-
 5 files changed, 674 insertions(+), 75 deletions(-)

diff --git a/flang/include/flang/Optimizer/Builder/Runtime/RTBuilder.h b/flang/include/flang/Optimizer/Builder/Runtime/RTBuilder.h
index 809d5b8d569dc..845ba385918d0 100644
--- a/flang/include/flang/Optimizer/Builder/Runtime/RTBuilder.h
+++ b/flang/include/flang/Optimizer/Builder/Runtime/RTBuilder.h
@@ -64,6 +64,18 @@ using FuncTypeBuilderFunc = mlir::FunctionType (*)(mlir::MLIRContext *);
     };                                                                         \
   }
 
+#define REDUCTION_VALUE_OPERATION_MODEL(T)                                     \
+  template <>                                                                  \
+  constexpr TypeBuilderFunc                                                    \
+  getModel<Fortran::runtime::ValueReductionOperation<T>>() {                   \
+    return [](mlir::MLIRContext *context) -> mlir::Type {                      \
+      TypeBuilderFunc f{getModel<T>()};                                        \
+      auto refTy = fir::ReferenceType::get(f(context));                        \
+      return mlir::FunctionType::get(context, {f(context), f(context)},        \
+                                     refTy);                                   \
+    };                                                                         \
+  }
+
 #define REDUCTION_CHAR_OPERATION_MODEL(T)                                      \
   template <>                                                                  \
   constexpr TypeBuilderFunc                                                    \
@@ -481,17 +493,27 @@ constexpr TypeBuilderFunc getModel<void>() {
 }
 
 REDUCTION_REF_OPERATION_MODEL(std::int8_t)
+REDUCTION_VALUE_OPERATION_MODEL(std::int8_t)
 REDUCTION_REF_OPERATION_MODEL(std::int16_t)
+REDUCTION_VALUE_OPERATION_MODEL(std::int16_t)
 REDUCTION_REF_OPERATION_MODEL(std::int32_t)
+REDUCTION_VALUE_OPERATION_MODEL(std::int32_t)
 REDUCTION_REF_OPERATION_MODEL(std::int64_t)
+REDUCTION_VALUE_OPERATION_MODEL(std::int64_t)
 REDUCTION_REF_OPERATION_MODEL(Fortran::common::int128_t)
+REDUCTION_VALUE_OPERATION_MODEL(Fortran::common::int128_t)
 
 REDUCTION_REF_OPERATION_MODEL(float)
+REDUCTION_VALUE_OPERATION_MODEL(float)
 REDUCTION_REF_OPERATION_MODEL(double)
+REDUCTION_VALUE_OPERATION_MODEL(double)
 REDUCTION_REF_OPERATION_MODEL(long double)
+REDUCTION_VALUE_OPERATION_MODEL(long double)
 
 REDUCTION_REF_OPERATION_MODEL(std::complex<float>)
+REDUCTION_VALUE_OPERATION_MODEL(std::complex<float>)
 REDUCTION_REF_OPERATION_MODEL(std::complex<double>)
+REDUCTION_VALUE_OPERATION_MODEL(std::complex<double>)
 
 REDUCTION_CHAR_OPERATION_MODEL(char)
 REDUCTION_CHAR_OPERATION_MODEL(char16_t)
diff --git a/flang/include/flang/Optimizer/Builder/Runtime/Reduction.h b/flang/include/flang/Optimizer/Builder/Runtime/Reduction.h
index fedf453a6dc8d..2a40cddc0cc2c 100644
--- a/flang/include/flang/Optimizer/Builder/Runtime/Reduction.h
+++ b/flang/include/flang/Optimizer/Builder/Runtime/Reduction.h
@@ -229,8 +229,8 @@ void genIParityDim(fir::FirOpBuilder &builder, mlir::Location loc,
 /// result value. This is used for COMPLEX, CHARACTER and DERIVED TYPES.
 void genReduce(fir::FirOpBuilder &builder, mlir::Location loc,
                mlir::Value arrayBox, mlir::Value operation, mlir::Value maskBox,
-               mlir::Value identity, mlir::Value ordered,
-               mlir::Value resultBox);
+               mlir::Value identity, mlir::Value ordered, mlir::Value resultBox,
+               bool argByRef);
 
 /// Generate call to `Reduce` intrinsic runtime routine. This is the version
 /// that does not take a dim argument and return a scalare result. This is used
@@ -238,14 +238,14 @@ void genReduce(fir::FirOpBuilder &builder, mlir::Location loc,
 mlir::Value genReduce(fir::FirOpBuilder &builder, mlir::Location loc,
                       mlir::Value arrayBox, mlir::Value operation,
                       mlir::Value maskBox, mlir::Value identity,
-                      mlir::Value ordered);
+                      mlir::Value ordered, bool argByRef);
 
 /// Generate call to `Reduce` intrinsic runtime routine. This is the version
 /// that takes arrays of any rank with a dim argument specified.
 void genReduceDim(fir::FirOpBuilder &builder, mlir::Location loc,
                   mlir::Value arrayBox, mlir::Value operation, mlir::Value dim,
                   mlir::Value maskBox, mlir::Value identity,
-                  mlir::Value ordered, mlir::Value resultBox);
+                  mlir::Value ordered, mlir::Value resultBox, bool argByRef);
 
 } // namespace fir::runtime
 
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index c438ae1250e45..bc8a1889da74e 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -5745,6 +5745,14 @@ IntrinsicLibrary::genReduce(mlir::Type resultType,
   int rank = arrayTmp.rank();
   assert(rank >= 1);
 
+  // Arguements to the reduction operation are passed by reference or value?
+  bool argByRef = true;
+  if (auto embox =
+          mlir::dyn_cast_or_null<fir::EmboxProcOp>(operation.getDefiningOp())) {
+    auto fctTy = mlir::dyn_cast<mlir::FunctionType>(embox.getFunc().getType());
+    argByRef = mlir::isa<fir::ReferenceType>(fctTy.getInput(0));
+  }
+
   mlir::Type ty = array.getType();
   mlir::Type arrTy = fir::dyn_cast_ptrOrBoxEleTy(ty);
   mlir::Type eleTy = mlir::cast<fir::SequenceType>(arrTy).getEleTy();
@@ -5772,7 +5780,7 @@ IntrinsicLibrary::genReduce(mlir::Type resultType,
     if (fir::isa_complex(eleTy) || fir::isa_derived(eleTy)) {
       mlir::Value result = builder.createTemporary(loc, eleTy);
       fir::runtime::genReduce(builder, loc, array, operation, mask, identity,
-                              ordered, result);
+                              ordered, result, argByRef);
       if (fir::isa_derived(eleTy))
         return result;
       return builder.create<fir::LoadOp>(loc, result);
@@ -5789,11 +5797,11 @@ IntrinsicLibrary::genReduce(mlir::Type resultType,
                                             charTy.getLen());
       fir::CharBoxValue temp = charHelper.createCharacterTemp(eleTy, len);
       fir::runtime::genReduce(builder, loc, array, operation, mask, identity,
-                              ordered, temp.getBuffer());
+                              ordered, temp.getBuffer(), argByRef);
       return temp;
     }
     return fir::runtime::genReduce(builder, loc, array, operation, mask,
-                                   identity, ordered);
+                                   identity, ordered, argByRef);
   }
   // Handle cases that have an array result.
   // Create mutable fir.box to be passed to the runtime for the result.
@@ -5804,7 +5812,7 @@ IntrinsicLibrary::genReduce(mlir::Type resultType,
       fir::factory::getMutableIRBox(builder, loc, resultMutableBox);
   mlir::Value dim = fir::getBase(args[2]);
   fir::runtime::genReduceDim(builder, loc, array, operation, dim, mask,
-                             identity, ordered, resultIrBox);
+                             identity, ordered, resultIrBox, argByRef);
   return readAndAddCleanUp(resultMutableBox, resultType, "REDUCE");
 }
 
diff --git a/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp b/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp
index c306b50eb5698..18eff93727856 100644
--- a/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp
+++ b/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp
@@ -476,10 +476,30 @@ struct ForcedReduceReal10 {
       auto ty = mlir::FloatType::getF80(ctx);
       auto boxTy =
           fir::runtime::getModel<const Fortran::runtime::Descriptor &>()(ctx);
-      auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, ty);
+      auto refTy = fir::ReferenceType::get(ty);
+      auto opTy = mlir::FunctionType::get(ctx, {refTy, refTy}, refTy);
       auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8));
       auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int));
+      auto i1Ty = mlir::IntegerType::get(ctx, 1);
+      return mlir::FunctionType::get(
+          ctx, {boxTy, opTy, strTy, intTy, intTy, boxTy, refTy, i1Ty}, {ty});
+    };
+  }
+};
+
+/// Placeholder for real*10 version of Reduce Intrinsic
+struct ForcedReduceReal10Value {
+  static constexpr const char *name =
+      ExpandAndQuoteKey(RTNAME(ReduceReal10Value));
+  static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() {
+    return [](mlir::MLIRContext *ctx) {
+      auto ty = mlir::FloatType::getF80(ctx);
+      auto boxTy =
+          fir::runtime::getModel<const Fortran::runtime::Descriptor &>()(ctx);
       auto refTy = fir::ReferenceType::get(ty);
+      auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, refTy);
+      auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8));
+      auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int));
       auto i1Ty = mlir::IntegerType::get(ctx, 1);
       return mlir::FunctionType::get(
           ctx, {boxTy, opTy, strTy, intTy, intTy, boxTy, refTy, i1Ty}, {ty});
@@ -496,10 +516,30 @@ struct ForcedReduceReal16 {
       auto ty = mlir::FloatType::getF128(ctx);
       auto boxTy =
           fir::runtime::getModel<const Fortran::runtime::Descriptor &>()(ctx);
-      auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, ty);
+      auto refTy = fir::ReferenceType::get(ty);
+      auto opTy = mlir::FunctionType::get(ctx, {refTy, refTy}, refTy);
       auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8));
       auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int));
+      auto i1Ty = mlir::IntegerType::get(ctx, 1);
+      return mlir::FunctionType::get(
+          ctx, {boxTy, opTy, strTy, intTy, intTy, boxTy, refTy, i1Ty}, {ty});
+    };
+  }
+};
+
+/// Placeholder for real*16 version of Reduce Intrinsic
+struct ForcedReduceReal16Value {
+  static constexpr const char *name =
+      ExpandAndQuoteKey(RTNAME(ReduceReal16Value));
+  static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() {
+    return [](mlir::MLIRContext *ctx) {
+      auto ty = mlir::FloatType::getF128(ctx);
+      auto boxTy =
+          fir::runtime::getModel<const Fortran::runtime::Descriptor &>()(ctx);
       auto refTy = fir::ReferenceType::get(ty);
+      auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, refTy);
+      auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8));
+      auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int));
       auto i1Ty = mlir::IntegerType::get(ctx, 1);
       return mlir::FunctionType::get(
           ctx, {boxTy, opTy, strTy, intTy, intTy, boxTy, refTy, i1Ty}, {ty});
@@ -516,10 +556,32 @@ struct ForcedReduceReal10Dim {
       auto ty = mlir::FloatType::getF80(ctx);
       auto boxTy =
           fir::runtime::getModel<const Fortran::runtime::Descriptor &>()(ctx);
-      auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, ty);
+      auto refTy = fir::ReferenceType::get(ty);
+      auto opTy = mlir::FunctionType::get(ctx, {refTy, refTy}, refTy);
       auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8));
       auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int));
+      auto refBoxTy = fir::ReferenceType::get(boxTy);
+      auto i1Ty = mlir::IntegerType::get(ctx, 1);
+      return mlir::FunctionType::get(
+          ctx, {refBoxTy, boxTy, opTy, strTy, intTy, intTy, boxTy, refTy, i1Ty},
+          {});
+    };
+  }
+};
+
+/// Placeholder for DIM real*10 with value version of Reduce Intrinsic
+struct ForcedReduceReal10DimValue {
+  static constexpr const char *name =
+      ExpandAndQuoteKey(RTNAME(ReduceReal10DimValue));
+  static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() {
+    return [](mlir::MLIRContext *ctx) {
+      auto ty = mlir::FloatType::getF80(ctx);
+      auto boxTy =
+          fir::runtime::getModel<const Fortran::runtime::Descriptor &>()(ctx);
       auto refTy = fir::ReferenceType::get(ty);
+      auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, refTy);
+      auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8));
+      auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int));
       auto refBoxTy = fir::ReferenceType::get(boxTy);
       auto i1Ty = mlir::IntegerType::get(ctx, 1);
       return mlir::FunctionType::get(
@@ -538,10 +600,32 @@ struct ForcedReduceReal16Dim {
       auto ty = mlir::FloatType::getF128(ctx);
       auto boxTy =
           fir::runtime::getModel<const Fortran::runtime::Descriptor &>()(ctx);
-      auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, ty);
+      auto refTy = fir::ReferenceType::get(ty);
+      auto opTy = mlir::FunctionType::get(ctx, {refTy, refTy}, refTy);
       auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8));
       auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int));
+      auto refBoxTy = fir::ReferenceType::get(boxTy);
+      auto i1Ty = mlir::IntegerType::get(ctx, 1);
+      return mlir::FunctionType::get(
+          ctx, {refBoxTy, boxTy, opTy, strTy, intTy, intTy, boxTy, refTy, i1Ty},
+          {});
+    };
+  }
+};
+
+/// Placeholder for DIM real*16 with value version of Reduce Intrinsic
+struct ForcedReduceReal16DimValue {
+  static constexpr const char *name =
+      ExpandAndQuoteKey(RTNAME(ReduceReal16DimValue));
+  static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() {
+    return [](mlir::MLIRContext *ctx) {
+      auto ty = mlir::FloatType::getF128(ctx);
+      auto boxTy =
+          fir::runtime::getModel<const Fortran::runtime::Descriptor &>()(ctx);
       auto refTy = fir::ReferenceType::get(ty);
+      auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, refTy);
+      auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8));
+      auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int));
       auto refBoxTy = fir::ReferenceType::get(boxTy);
       auto i1Ty = mlir::IntegerType::get(ctx, 1);
       return mlir::FunctionType::get(
@@ -560,10 +644,30 @@ struct ForcedReduceInteger16 {
       auto ty = mlir::IntegerType::get(ctx, 128);
       auto boxTy =
           fir::runtime::getModel<const Fortran::runtime::Descriptor &>()(ctx);
-      auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, ty);
+      auto refTy = fir::ReferenceType::get(ty);
+      auto opTy = mlir::FunctionType::get(ctx, {refTy, refTy}, refTy);
       auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8));
       auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int));
+      auto i1Ty = mlir::IntegerType::get(ctx, 1);
+      return mlir::FunctionType::get(
+          ctx, {boxTy, opTy, strTy, intTy, intTy, boxTy, refTy, i1Ty}, {ty});
+    };
+  }
+};
+
+/// Placeholder for integer*16 with value version of Reduce Intrinsic
+struct ForcedReduceInteger16Value {
+  static constexpr const char *name =
+      ExpandAndQuoteKey(RTNAME(ReduceInteger16Value));
+  static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() {
+    return [](mlir::MLIRContext *ctx) {
+      auto ty = mlir::IntegerType::get(ctx, 128);
+      auto boxTy =
+          fir::runtime::getModel<const Fortran::runtime::Descriptor &>()(ctx);
       auto refTy = fir::ReferenceType::get(ty);
+      auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, refTy);
+      auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8));
+      auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int));
       auto i1Ty = mlir::IntegerType::get(ctx, 1);
       return mlir::FunctionType::get(
           ctx, {boxTy, opTy, strTy, intTy, intTy, boxTy, refTy, i1Ty}, {ty});
@@ -580,10 +684,32 @@ struct ForcedReduceInteger16Dim {
       auto ty = mlir::IntegerType::get(ctx, 128);
       auto boxTy =
           fir::runtime::getModel<const Fortran::runtime::Descriptor &>()(ctx);
-      auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, ty);
+      auto refTy = fir::ReferenceType::get(ty);
+      auto opTy = mlir::FunctionType::get(ctx, {refTy, refTy}, refTy);
       auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8));
       auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int));
+      auto refBoxTy = fir::ReferenceType::get(boxTy);
+      auto i1Ty = mlir::IntegerType::get(ctx, 1);
+      return mlir::FunctionType::get(
+          ctx, {refBoxTy, boxTy, opTy, strTy, intTy, intTy, boxTy, refTy, i1Ty},
+          {});
+    };
+  }
+};
+
+/// Placeholder for DIM integer*16 with value version of Reduce Intrinsic
+struct ForcedReduceInteger16DimValue {
+  static constexpr const char *name =
+      ExpandAndQuoteKey(RTNAME(ReduceInteger16DimValue));
+  static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() {
+    return [](mlir::MLIRContext *ctx) {
+      auto ty = mlir::IntegerType::get(ctx, 128);
+      auto boxTy =
+          fir::runtime::getModel<const Fortran::runtime::Descriptor &>()(ctx);
       auto refTy = fir::ReferenceType::get(ty);
+      auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, refTy);
+      auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8));
+      auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int));
       auto refBoxTy = fir::ReferenceType::get(boxTy);
       auto i1Ty = mlir::IntegerType::get(ctx, 1);
       return mlir::FunctionType::get(
@@ -602,10 +728,31 @@ struct ForcedReduceComplex10 {
       auto ty = mlir::ComplexType::get(mlir::FloatType::getF80(ctx));
       auto boxTy =
           fir::runtime::getModel<const Fortran::runtime::Descriptor &>()(ctx);
-      auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, ty);
+      auto refTy = fir::ReferenceType::get(ty);
+      auto opTy = mlir::FunctionType::get(ctx, {refTy, refTy}, refTy);
       auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8));
       auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int));
+      auto i1Ty = mlir::IntegerType::get(ctx, 1);
+      return mlir::FunctionType::get(
+          ctx, {refTy, boxTy, opTy, strTy, intTy, intTy, boxTy, refTy, i1Ty},
+          {});
+    };
+  }
+};
+
+/// Placeholder for complex(10) with value version of Reduce Intrinsic
+struct ForcedReduceComplex10Value {
+  static constexpr const char *name =
+      ExpandAndQuoteKey(RTNAME(CppReduceComplex10Value));
+  static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() {
+    return [](mlir::MLIRContext *ctx) {
+      auto ty = mlir::ComplexType::get(mlir::FloatType::getF80(ctx));
+      auto boxTy =
+          fir::runtime::getModel<const Fortran::runtime::Descriptor &>()(ctx);
       auto refTy = fir::ReferenceType::get(ty);
+      auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, refTy);
+      auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8));
+      auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int));
       auto i1Ty = mlir::IntegerType::get(ctx, 1);
       return mlir::FunctionType::get(
           ctx, {refTy, boxTy, opTy, strTy, intTy, intTy, boxTy, refTy, i1Ty},
@@ -623,10 +770,32 @@ struct ForcedReduceComplex10Dim {
       auto ty = mlir::ComplexType::get(mlir::FloatType::getF80(ctx));
       auto boxTy =
           fir::runtime::getModel<const Fortran::runtime::Descriptor &>()(ctx);
-      auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, ty);
+      auto refTy = fir::ReferenceType::get(ty);
+      auto opTy = mlir::FunctionType::get(ctx, {refTy, refTy}, refTy);
       auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8));
       auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int));
+      auto refBoxTy = fir::ReferenceType::get(boxTy);
+      auto i1Ty = mlir::IntegerType::get(ctx, 1);
+      return mlir::FunctionType::get(
+          ctx, {refBoxTy, boxTy, opTy, strTy, intTy, intTy, boxTy, refTy, i1Ty},
+          {});
+    };
+  }
+};
+
+/// Placeholder for Dim complex(10) with value version of Reduce Intrinsic
+struct ForcedReduceComplex10DimValue {
+  static constexpr const char *name =
+      ExpandAndQuoteKey(RTNAME(CppReduceComplex10DimValue));
+  static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() {
+    return [](mlir::MLIRContext *ctx) {
+      auto ty = mlir::ComplexType::get(mlir::FloatType::getF80(ctx));
+      auto boxTy =
+          fir::runtime::getModel<const Fortran::runtime::Descriptor &>()(ctx);
       auto refTy = fir::ReferenceType::get(ty);
+      auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, refTy);
+      auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8));
+      auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int));
       auto refBoxTy = fir::ReferenceType::get(boxTy);
       auto i1Ty = mlir::IntegerType::get(ctx, 1);
       return mlir::FunctionType::get(
@@ -645,10 +814,31 @@ struct ForcedReduceComplex16 {
       auto ty = mlir::ComplexType::get(mlir::FloatType::getF128(ctx));
       auto boxTy =
           fir::runtime::getModel<const Fortran::runtime::Descriptor &>()(ctx);
-      auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, ty);
+      auto refTy = fir::ReferenceType::get(ty);
+      auto opTy = mlir::FunctionType::get(ctx, {refTy, refTy}, refTy);
       auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8));
       auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int));
+      auto i1Ty = mlir::IntegerType::get(ctx, 1);
+      return mlir::FunctionType::get(
+          ctx, {refTy, boxTy, opTy, strTy, intTy, intTy, boxTy, refTy, i1Ty},
+          {});
+    };
+  }
+};
+
+/// Placeholder for complex(16) with value version of Reduce Intrinsic
+struct ForcedReduceComplex16Value {
+  static constexpr const char *name =
+      ExpandAndQuoteKey(RTNAME(CppReduceComplex16Value));
+  static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() {
+    return [](mlir::MLIRContext *ctx) {
+      auto ty = mlir::ComplexType::get(mlir::FloatType::getF128(ctx));
+      auto boxTy =
+          fir::runtime::getModel<const Fortran::runtime::Descriptor &>()(ctx);
       auto refTy = fir::ReferenceType::get(ty);
+      auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, refTy);
+      auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8));
+      auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int));
       auto i1Ty = mlir::IntegerType::get(ctx, 1);
       return mlir::FunctionType::get(
           ctx, {refTy, boxTy, opTy, strTy, intTy, intTy, boxTy, refTy, i1Ty},
@@ -666,10 +856,32 @@ struct ForcedReduceComplex16Dim {
       auto ty = mlir::ComplexType::get(mlir::FloatType::getF128(ctx));
       auto boxTy =
           fir::runtime::getModel<const Fortran::runtime::Descriptor &>()(ctx);
-      auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, ty);
+      auto refTy = fir::ReferenceType::get(ty);
+      auto opTy = mlir::FunctionType::get(ctx, {refTy, refTy}, refTy);
       auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8));
       auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int));
+      auto refBoxTy = fir::ReferenceType::get(boxTy);
+      auto i1Ty = mlir::IntegerType::get(ctx, 1);
+      return mlir::FunctionType::get(
+          ctx, {refBoxTy, boxTy, opTy, strTy, intTy, intTy, boxTy, refTy, i1Ty},
+          {});
+    };
+  }
+};
+
+/// Placeholder for Dim complex(16) with value version of Reduce Intrinsic
+struct ForcedReduceComplex16DimValue {
+  static constexpr const char *name =
+      ExpandAndQuoteKey(RTNAME(CppReduceComplex16DimValue));
+  static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() {
+    return [](mlir::MLIRContext *ctx) {
+      auto ty = mlir::ComplexType::get(mlir::FloatType::getF128(ctx));
+      auto boxTy =
+          fir::runtime::getModel<const Fortran::runtime::Descriptor &>()(ctx);
       auto refTy = fir::ReferenceType::get(ty);
+      auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, refTy);
+      auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8));
+      auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int));
       auto refBoxTy = fir::ReferenceType::get(boxTy);
       auto i1Ty = mlir::IntegerType::get(ctx, 1);
       return mlir::FunctionType::get(
@@ -1457,7 +1669,8 @@ GEN_IALL_IANY_IPARITY(IParity)
 void fir::runtime::genReduce(fir::FirOpBuilder &builder, mlir::Location loc,
                              mlir::Value arrayBox, mlir::Value operation,
                              mlir::Value maskBox, mlir::Value identity,
-                             mlir::Value ordered, mlir::Value resultBox) {
+                             mlir::Value ordered, mlir::Value resultBox,
+                             bool argByRef) {
   mlir::func::FuncOp func;
   auto ty = arrayBox.getType();
   auto arrTy = fir::dyn_cast_ptrOrBoxEleTy(ty);
@@ -1472,22 +1685,40 @@ void fir::runtime::genReduce(fir::FirOpBuilder &builder, mlir::Location loc,
   mlir::MLIRContext *ctx = builder.getContext();
   fir::factory::CharacterExprHelper charHelper{builder, loc};
 
-  if (eleTy == fir::ComplexType::get(ctx, 2))
+  if (eleTy == fir::ComplexType::get(ctx, 2) && argByRef)
     func = fir::runtime::getRuntimeFunc<mkRTKey(CppReduceComplex2Ref)>(loc,
                                                                        builder);
-  else if (eleTy == fir::ComplexType::get(ctx, 3))
+  else if (eleTy == fir::ComplexType::get(ctx, 2) && !argByRef)
+    func = fir::runtime::getRuntimeFunc<mkRTKey(CppReduceComplex2Value)>(
+        loc, builder);
+  else if (eleTy == fir::ComplexType::get(ctx, 3) && argByRef)
     func = fir::runtime::getRuntimeFunc<mkRTKey(CppReduceComplex3Ref)>(loc,
                                                                        builder);
-  else if (eleTy == fir::ComplexType::get(ctx, 4))
+  else if (eleTy == fir::ComplexType::get(ctx, 3) && !argByRef)
+    func = fir::runtime::getRuntimeFunc<mkRTKey(CppReduceComplex3Value)>(
+        loc, builder);
+  else if (eleTy == fir::ComplexType::get(ctx, 4) && argByRef)
     func = fir::runtime::getRuntimeFunc<mkRTKey(CppReduceComplex4Ref)>(loc,
                                                                        builder);
-  else if (eleTy == fir::ComplexType::get(ctx, 8))
+  else if (eleTy == fir::ComplexType::get(ctx, 4) && !argByRef)
+    func = fir::runtime::getRuntimeFunc<mkRTKey(CppReduceComplex4Value)>(
+        loc, builder);
+  else if (eleTy == fir::ComplexType::get(ctx, 8) && argByRef)
     func = fir::runtime::getRuntimeFunc<mkRTKey(CppReduceComplex8Ref)>(loc,
                                                                        builder);
-  else if (eleTy == fir::ComplexType::get(ctx, 10))
+  else if (eleTy == fir::ComplexType::get(ctx, 8) && !argByRef)
+    func = fir::runtime::getRuntimeFunc<mkRTKey(CppReduceComplex8Value)>(
+        loc, builder);
+  else if (eleTy == fir::ComplexType::get(ctx, 10) && argByRef)
     func = fir::runtime::getRuntimeFunc<ForcedReduceComplex10>(loc, builder);
-  else if (eleTy == fir::ComplexType::get(ctx, 16))
+  else if (eleTy == fir::ComplexType::get(ctx, 10) && !argByRef)
+    func =
+        fir::runtime::getRuntimeFunc<ForcedReduceComplex10Value>(loc, builder);
+  else if (eleTy == fir::ComplexType::get(ctx, 16) && argByRef)
     func = fir::runtime::getRuntimeFunc<ForcedReduceComplex16>(loc, builder);
+  else if (eleTy == fir::ComplexType::get(ctx, 16) && !argByRef)
+    func =
+        fir::runtime::getRuntimeFunc<ForcedReduceComplex16Value>(loc, builder);
   else if (fir::isa_char(eleTy) && charHelper.getCharacterKind(eleTy) == 1)
     func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceChar1)>(loc, builder);
   else if (fir::isa_char(eleTy) && charHelper.getCharacterKind(eleTy) == 2)
@@ -1516,7 +1747,8 @@ void fir::runtime::genReduce(fir::FirOpBuilder &builder, mlir::Location loc,
 mlir::Value fir::runtime::genReduce(fir::FirOpBuilder &builder,
                                     mlir::Location loc, mlir::Value arrayBox,
                                     mlir::Value operation, mlir::Value maskBox,
-                                    mlir::Value identity, mlir::Value ordered) {
+                                    mlir::Value identity, mlir::Value ordered,
+                                    bool argByRef) {
   mlir::func::FuncOp func;
   auto ty = arrayBox.getType();
   auto arrTy = fir::dyn_cast_ptrOrBoxEleTy(ty);
@@ -1530,44 +1762,97 @@ mlir::Value fir::runtime::genReduce(fir::FirOpBuilder &builder,
           mlir::isa<fir::LogicalType>(eleTy)) &&
          "expect real, interger or logical");
 
-  if (eleTy.isF16())
+  if (eleTy.isF16() && argByRef)
     func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceReal2Ref)>(loc, builder);
-  else if (eleTy.isBF16())
+  else if (eleTy.isF16() && !argByRef)
+    func =
+        fir::runtime::getRuntimeFunc<mkRTKey(ReduceReal2Value)>(loc, builder);
+  else if (eleTy.isBF16() && argByRef)
     func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceReal3Ref)>(loc, builder);
-  else if (eleTy.isF32())
+  else if (eleTy.isBF16() && !argByRef)
+    func =
+        fir::runtime::getRuntimeFunc<mkRTKey(ReduceReal3Value)>(loc, builder);
+  else if (eleTy.isF32() && argByRef)
     func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceReal4Ref)>(loc, builder);
-  else if (eleTy.isF64())
+  else if (eleTy.isF32() && !argByRef)
+    func =
+        fir::runtime::getRuntimeFunc<mkRTKey(ReduceReal4Value)>(loc, builder);
+  else if (eleTy.isF64() && argByRef)
     func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceReal8Ref)>(loc, builder);
-  else if (eleTy.isF80())
+  else if (eleTy.isF64() && !argByRef)
+    func =
+        fir::runtime::getRuntimeFunc<mkRTKey(ReduceReal8Value)>(loc, builder);
+  else if (eleTy.isF80() && argByRef)
     func = fir::runtime::getRuntimeFunc<ForcedReduceReal10>(loc, builder);
-  else if (eleTy.isF128())
+  else if (eleTy.isF80() && !argByRef)
+    func = fir::runtime::getRuntimeFunc<ForcedReduceReal10Value>(loc, builder);
+  else if (eleTy.isF128() && argByRef)
     func = fir::runtime::getRuntimeFunc<ForcedReduceReal16>(loc, builder);
-  else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(1)))
+  else if (eleTy.isF128() && !argByRef)
+    func = fir::runtime::getRuntimeFunc<ForcedReduceReal16Value>(loc, builder);
+  else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(1)) &&
+           argByRef)
     func =
         fir::runtime::getRuntimeFunc<mkRTKey(ReduceInteger1Ref)>(loc, builder);
-  else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(2)))
+  else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(1)) &&
+           !argByRef)
+    func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceInteger1Value)>(loc,
+                                                                      builder);
+  else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(2)) &&
+           argByRef)
     func =
         fir::runtime::getRuntimeFunc<mkRTKey(ReduceInteger2Ref)>(loc, builder);
-  else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(4)))
+  else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(2)) &&
+           !argByRef)
+    func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceInteger2Value)>(loc,
+                                                                      builder);
+  else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(4)) &&
+           argByRef)
     func =
         fir::runtime::getRuntimeFunc<mkRTKey(ReduceInteger4Ref)>(loc, builder);
-  else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(8)))
+  else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(4)) &&
+           !argByRef)
+    func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceInteger4Value)>(loc,
+                                                                      builder);
+  else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(8)) &&
+           argByRef)
     func =
         fir::runtime::getRuntimeFunc<mkRTKey(ReduceInteger8Ref)>(loc, builder);
-  else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(16)))
+  else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(8)) &&
+           !argByRef)
+    func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceInteger8Value)>(loc,
+                                                                      builder);
+  else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(16)) &&
+           argByRef)
     func = fir::runtime::getRuntimeFunc<ForcedReduceInteger16>(loc, builder);
-  else if (eleTy == fir::LogicalType::get(ctx, 1))
+  else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(16)) &&
+           !argByRef)
+    func =
+        fir::runtime::getRuntimeFunc<ForcedReduceInteger16Value>(loc, builder);
+  else if (eleTy == fir::LogicalType::get(ctx, 1) && argByRef)
     func =
         fir::runtime::getRuntimeFunc<mkRTKey(ReduceLogical1Ref)>(loc, builder);
-  else if (eleTy == fir::LogicalType::get(ctx, 2))
+  else if (eleTy == fir::LogicalType::get(ctx, 1) && !argByRef)
+    func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceLogical1Value)>(loc,
+                                                                      builder);
+  else if (eleTy == fir::LogicalType::get(ctx, 2) && argByRef)
     func =
         fir::runtime::getRuntimeFunc<mkRTKey(ReduceLogical2Ref)>(loc, builder);
-  else if (eleTy == fir::LogicalType::get(ctx, 4))
+  else if (eleTy == fir::LogicalType::get(ctx, 2) && !argByRef)
+    func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceLogical2Value)>(loc,
+                                                                      builder);
+  else if (eleTy == fir::LogicalType::get(ctx, 4) && argByRef)
     func =
         fir::runtime::getRuntimeFunc<mkRTKey(ReduceLogical4Ref)>(loc, builder);
-  else if (eleTy == fir::LogicalType::get(ctx, 8))
+  else if (eleTy == fir::LogicalType::get(ctx, 4) && !argByRef)
+    func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceLogical4Value)>(loc,
+                                                                      builder);
+  else if (eleTy == fir::LogicalType::get(ctx, 8) && argByRef)
     func =
         fir::runtime::getRuntimeFunc<mkRTKey(ReduceLogical8Ref)>(loc, builder);
+  else if (eleTy == fir::LogicalType::get(ctx, 8) && !argByRef)
+    func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceLogical8Value)>(loc,
+                                                                      builder);
   else
     fir::intrinsicTypeTODO(builder, eleTy, loc, "REDUCE");
 
@@ -1586,7 +1871,7 @@ void fir::runtime::genReduceDim(fir::FirOpBuilder &builder, mlir::Location loc,
                                 mlir::Value arrayBox, mlir::Value operation,
                                 mlir::Value dim, mlir::Value maskBox,
                                 mlir::Value identity, mlir::Value ordered,
-                                mlir::Value resultBox) {
+                                mlir::Value resultBox, bool argByRef) {
   mlir::func::FuncOp func;
   auto ty = arrayBox.getType();
   auto arrTy = fir::dyn_cast_ptrOrBoxEleTy(ty);
@@ -1595,64 +1880,137 @@ void fir::runtime::genReduceDim(fir::FirOpBuilder &builder, mlir::Location loc,
   mlir::MLIRContext *ctx = builder.getContext();
   fir::factory::CharacterExprHelper charHelper{builder, loc};
 
-  if (eleTy.isF16())
+  if (eleTy.isF16() && argByRef)
     func =
         fir::runtime::getRuntimeFunc<mkRTKey(ReduceReal2DimRef)>(loc, builder);
-  else if (eleTy.isBF16())
+  else if (eleTy.isF16() && !argByRef)
+    func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceReal2DimValue)>(loc,
+                                                                      builder);
+  else if (eleTy.isBF16() && argByRef)
     func =
         fir::runtime::getRuntimeFunc<mkRTKey(ReduceReal3DimRef)>(loc, builder);
-  else if (eleTy.isF32())
+  else if (eleTy.isBF16() && !argByRef)
+    func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceReal3DimValue)>(loc,
+                                                                      builder);
+  else if (eleTy.isF32() && argByRef)
     func =
         fir::runtime::getRuntimeFunc<mkRTKey(ReduceReal4DimRef)>(loc, builder);
-  else if (eleTy.isF64())
+  else if (eleTy.isF32() && !argByRef)
+    func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceReal4DimValue)>(loc,
+                                                                      builder);
+  else if (eleTy.isF64() && argByRef)
     func =
         fir::runtime::getRuntimeFunc<mkRTKey(ReduceReal8DimRef)>(loc, builder);
-  else if (eleTy.isF80())
+  else if (eleTy.isF64() && !argByRef)
+    func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceReal8DimValue)>(loc,
+                                                                      builder);
+  else if (eleTy.isF80() && argByRef)
     func = fir::runtime::getRuntimeFunc<ForcedReduceReal10Dim>(loc, builder);
-  else if (eleTy.isF128())
+  else if (eleTy.isF80() && !argByRef)
+    func =
+        fir::runtime::getRuntimeFunc<ForcedReduceReal10DimValue>(loc, builder);
+  else if (eleTy.isF128() && argByRef)
     func = fir::runtime::getRuntimeFunc<ForcedReduceReal16Dim>(loc, builder);
-  else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(1)))
+  else if (eleTy.isF128() && !argByRef)
+    func =
+        fir::runtime::getRuntimeFunc<ForcedReduceReal16DimValue>(loc, builder);
+  else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(1)) &&
+           argByRef)
     func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceInteger1DimRef)>(loc,
                                                                        builder);
-  else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(2)))
+  else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(1)) &&
+           !argByRef)
+    func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceInteger1DimValue)>(
+        loc, builder);
+  else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(2)) &&
+           argByRef)
     func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceInteger2DimRef)>(loc,
                                                                        builder);
-  else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(4)))
+  else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(2)) &&
+           !argByRef)
+    func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceInteger2DimValue)>(
+        loc, builder);
+  else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(4)) &&
+           argByRef)
     func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceInteger4DimRef)>(loc,
                                                                        builder);
-  else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(8)))
+  else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(4)) &&
+           !argByRef)
+    func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceInteger4DimValue)>(
+        loc, builder);
+  else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(8)) &&
+           argByRef)
     func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceInteger8DimRef)>(loc,
                                                                        builder);
-  else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(16)))
+  else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(8)) &&
+           !argByRef)
+    func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceInteger8DimValue)>(
+        loc, builder);
+  else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(16)) &&
+           argByRef)
     func = fir::runtime::getRuntimeFunc<ForcedReduceInteger16Dim>(loc, builder);
-  else if (eleTy == fir::ComplexType::get(ctx, 2))
+  else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(16)) &&
+           !argByRef)
+    func = fir::runtime::getRuntimeFunc<ForcedReduceInteger16DimValue>(loc,
+                                                                       builder);
+  else if (eleTy == fir::ComplexType::get(ctx, 2) && argByRef)
     func = fir::runtime::getRuntimeFunc<mkRTKey(CppReduceComplex2DimRef)>(
         loc, builder);
-  else if (eleTy == fir::ComplexType::get(ctx, 3))
+  else if (eleTy == fir::ComplexType::get(ctx, 2) && !argByRef)
+    func = fir::runtime::getRuntimeFunc<mkRTKey(CppReduceComplex2DimValue)>(
+        loc, builder);
+  else if (eleTy == fir::ComplexType::get(ctx, 3) && argByRef)
     func = fir::runtime::getRuntimeFunc<mkRTKey(CppReduceComplex3DimRef)>(
         loc, builder);
-  else if (eleTy == fir::ComplexType::get(ctx, 4))
+  else if (eleTy == fir::ComplexType::get(ctx, 3) && !argByRef)
+    func = fir::runtime::getRuntimeFunc<mkRTKey(CppReduceComplex3DimValue)>(
+        loc, builder);
+  else if (eleTy == fir::ComplexType::get(ctx, 4) && argByRef)
     func = fir::runtime::getRuntimeFunc<mkRTKey(CppReduceComplex4DimRef)>(
         loc, builder);
-  else if (eleTy == fir::ComplexType::get(ctx, 8))
+  else if (eleTy == fir::ComplexType::get(ctx, 4) && !argByRef)
+    func = fir::runtime::getRuntimeFunc<mkRTKey(CppReduceComplex4DimValue)>(
+        loc, builder);
+  else if (eleTy == fir::ComplexType::get(ctx, 8) && argByRef)
     func = fir::runtime::getRuntimeFunc<mkRTKey(CppReduceComplex8DimRef)>(
         loc, builder);
-  else if (eleTy == fir::ComplexType::get(ctx, 10))
+  else if (eleTy == fir::ComplexType::get(ctx, 8) && !argByRef)
+    func = fir::runtime::getRuntimeFunc<mkRTKey(CppReduceComplex8DimValue)>(
+        loc, builder);
+  else if (eleTy == fir::ComplexType::get(ctx, 10) && argByRef)
     func = fir::runtime::getRuntimeFunc<ForcedReduceComplex10Dim>(loc, builder);
-  else if (eleTy == fir::ComplexType::get(ctx, 16))
+  else if (eleTy == fir::ComplexType::get(ctx, 10) && !argByRef)
+    func = fir::runtime::getRuntimeFunc<ForcedReduceComplex10DimValue>(loc,
+                                                                       builder);
+  else if (eleTy == fir::ComplexType::get(ctx, 16) && argByRef)
     func = fir::runtime::getRuntimeFunc<ForcedReduceComplex16Dim>(loc, builder);
-  else if (eleTy == fir::LogicalType::get(ctx, 1))
+  else if (eleTy == fir::ComplexType::get(ctx, 16) && !argByRef)
+    func = fir::runtime::getRuntimeFunc<ForcedReduceComplex16DimValue>(loc,
+                                                                       builder);
+  else if (eleTy == fir::LogicalType::get(ctx, 1) && argByRef)
     func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceLogical1DimRef)>(loc,
                                                                        builder);
-  else if (eleTy == fir::LogicalType::get(ctx, 2))
+  else if (eleTy == fir::LogicalType::get(ctx, 1) && !argByRef)
+    func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceLogical1DimValue)>(
+        loc, builder);
+  else if (eleTy == fir::LogicalType::get(ctx, 2) && argByRef)
     func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceLogical2DimRef)>(loc,
                                                                        builder);
-  else if (eleTy == fir::LogicalType::get(ctx, 4))
+  else if (eleTy == fir::LogicalType::get(ctx, 2) && !argByRef)
+    func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceLogical2DimValue)>(
+        loc, builder);
+  else if (eleTy == fir::LogicalType::get(ctx, 4) && argByRef)
     func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceLogical4DimRef)>(loc,
                                                                        builder);
-  else if (eleTy == fir::LogicalType::get(ctx, 8))
+  else if (eleTy == fir::LogicalType::get(ctx, 4) && !argByRef)
+    func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceLogical4DimValue)>(
+        loc, builder);
+  else if (eleTy == fir::LogicalType::get(ctx, 8) && argByRef)
     func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceLogical8DimRef)>(loc,
                                                                        builder);
+  else if (eleTy == fir::LogicalType::get(ctx, 8) && !argByRef)
+    func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceLogical8DimValue)>(
+        loc, builder);
   else if (fir::isa_char(eleTy) && charHelper.getCharacterKind(eleTy) == 1)
     func = fir::runtime::getRuntimeFunc<mkRTKey(ReduceCharacter1Dim)>(loc,
                                                                       builder);
diff --git a/flang/test/Lower/Intrinsics/reduce.f90 b/flang/test/Lower/Intrinsics/reduce.f90
index 7619edffd529e..358897b05adce 100644
--- a/flang/test/Lower/Intrinsics/reduce.f90
+++ b/flang/test/Lower/Intrinsics/reduce.f90
@@ -14,6 +14,12 @@ pure function red_int1(a,b)
   red_int1 = a + b
 end function
 
+pure function red_int1_value(a,b)
+  integer(1), value, intent(in) :: a, b
+  integer(1) :: red_int1_value
+  red_int1_value = a + b
+end function
+
 subroutine integer1(a, id)
   integer(1), intent(in) :: a(:)
   integer(1) :: res, id
@@ -25,6 +31,8 @@ subroutine integer1(a, id)
   res = reduce(a, red_int1, identity=id, ordered = .true.)
 
   res = reduce(a, red_int1, [.true., .true., .false.])
+  
+  res = reduce(a, red_int1_value)
 end subroutine
 
 ! CHECK-LABEL: func.func @_QMreduce_modPinteger1(
@@ -55,6 +63,7 @@ subroutine integer1(a, id)
 ! CHECK: %[[BOXED_MASK:.*]] = fir.embox %[[MASK]]#1(%[[SHAPE_C3]]) : (!fir.ref<!fir.array<3x!fir.logical<4>>>, !fir.shape<1>) -> !fir.box<!fir.array<3x!fir.logical<4>>>
 ! CHECK: %[[CONV_MASK:.*]] = fir.convert %[[BOXED_MASK]] : (!fir.box<!fir.array<3x!fir.logical<4>>>) -> !fir.box<none>
 ! CHECK: fir.call @_FortranAReduceInteger1Ref(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %[[CONV_MASK]], %{{.*}}, %false{{.*}})
+! CHECK: fir.call @_FortranAReduceInteger1Value
 
 pure function red_int2(a,b)
   integer(2), intent(in) :: a, b
@@ -62,13 +71,21 @@ pure function red_int2(a,b)
   red_int2 = a + b
 end function
 
+pure function red_int2_value(a,b)
+  integer(2), value, intent(in) :: a, b
+  integer(2) :: red_int2_value
+  red_int2_value = a + b
+end function
+
 subroutine integer2(a)
   integer(2), intent(in) :: a(:)
   integer(2) :: res
   res = reduce(a, red_int2)
+  res = reduce(a, red_int2_value)
 end subroutine
 
 ! CHECK: fir.call @_FortranAReduceInteger2Ref
+! CHECK: fir.call @_FortranAReduceInteger2Value
 
 pure function red_int4(a,b)
   integer(4), intent(in) :: a, b
@@ -76,13 +93,21 @@ pure function red_int4(a,b)
   red_int4 = a + b
 end function
 
+pure function red_int4_value(a,b)
+  integer(4), value, intent(in) :: a, b
+  integer(4) :: red_int4_value
+  red_int4_value = a + b
+end function
+
 subroutine integer4(a)
   integer(4), intent(in) :: a(:)
   integer(4) :: res
   res = reduce(a, red_int4)
+  res = reduce(a, red_int4_value)
 end subroutine
 
 ! CHECK: fir.call @_FortranAReduceInteger4Ref
+! CHECK: fir.call @_FortranAReduceInteger4Value
 
 pure function red_int8(a,b)
   integer(8), intent(in) :: a, b
@@ -90,13 +115,21 @@ pure function red_int8(a,b)
   red_int8 = a + b
 end function
 
+pure function red_int8_value(a,b)
+  integer(8), value, intent(in) :: a, b
+  integer(8) :: red_int8_value
+  red_int8_value = a + b
+end function
+
 subroutine integer8(a)
   integer(8), intent(in) :: a(:)
   integer(8) :: res
   res = reduce(a, red_int8)
+  res = reduce(a, red_int8_value)
 end subroutine
 
 ! CHECK: fir.call @_FortranAReduceInteger8Ref
+! CHECK: fir.call @_FortranAReduceInteger8Value
 
 pure function red_int16(a,b)
   integer(16), intent(in) :: a, b
@@ -104,13 +137,21 @@ pure function red_int16(a,b)
   red_int16 = a + b
 end function
 
+pure function red_int16_value(a,b)
+  integer(16), value, intent(in) :: a, b
+  integer(16) :: red_int16_value
+  red_int16_value = a + b
+end function
+
 subroutine integer16(a)
   integer(16), intent(in) :: a(:)
   integer(16) :: res
   res = reduce(a, red_int16)
+  res = reduce(a, red_int16_value)
 end subroutine
 
 ! CHECK: fir.call @_FortranAReduceInteger16Ref
+! CHECK: fir.call @_FortranAReduceInteger16Value
 
 pure function red_real2(a,b)
   real(2), intent(in) :: a, b
@@ -118,13 +159,21 @@ pure function red_real2(a,b)
   red_real2 = a + b
 end function
 
+pure function red_real2_value(a,b)
+  real(2), value, intent(in) :: a, b
+  real(2) :: red_real2_value
+  red_real2_value = a + b
+end function
+
 subroutine real2(a)
   real(2), intent(in) :: a(:)
   real(2) :: res
   res = reduce(a, red_real2)
+  res = reduce(a, red_real2_value)
 end subroutine
 
 ! CHECK: fir.call @_FortranAReduceReal2Ref
+! CHECK: fir.call @_FortranAReduceReal2Value
 
 pure function red_real3(a,b)
   real(3), intent(in) :: a, b
@@ -132,13 +181,21 @@ pure function red_real3(a,b)
   red_real3 = a + b
 end function
 
+pure function red_real3_value(a,b)
+  real(3), value, intent(in) :: a, b
+  real(3) :: red_real3_value
+  red_real3_value = a + b
+end function
+
 subroutine real3(a)
   real(3), intent(in) :: a(:)
   real(3) :: res
   res = reduce(a, red_real3)
+  res = reduce(a, red_real3_value)
 end subroutine
 
 ! CHECK: fir.call @_FortranAReduceReal3Ref
+! CHECK: fir.call @_FortranAReduceReal3Value
 
 pure function red_real4(a,b)
   real(4), intent(in) :: a, b
@@ -146,13 +203,21 @@ pure function red_real4(a,b)
   red_real4 = a + b
 end function
 
+pure function red_real4_value(a,b)
+  real(4), value, intent(in) :: a, b
+  real(4) :: red_real4_value
+  red_real4_value = a + b
+end function
+
 subroutine real4(a)
   real(4), intent(in) :: a(:)
   real(4) :: res
   res = reduce(a, red_real4)
+  res = reduce(a, red_real4_value)
 end subroutine
 
 ! CHECK: fir.call @_FortranAReduceReal4Ref
+! CHECK: fir.call @_FortranAReduceReal4Value
 
 pure function red_real8(a,b)
   real(8), intent(in) :: a, b
@@ -160,13 +225,21 @@ pure function red_real8(a,b)
   red_real8 = a + b
 end function
 
+pure function red_real8_value(a,b)
+  real(8), value, intent(in) :: a, b
+  real(8) :: red_real8_value
+  red_real8_value = a + b
+end function
+
 subroutine real8(a)
   real(8), intent(in) :: a(:)
   real(8) :: res
   res = reduce(a, red_real8)
+  res = reduce(a, red_real8_value)
 end subroutine
 
 ! CHECK: fir.call @_FortranAReduceReal8Ref
+! CHECK: fir.call @_FortranAReduceReal8Value
 
 pure function red_real10(a,b)
   real(10), intent(in) :: a, b
@@ -174,13 +247,21 @@ pure function red_real10(a,b)
   red_real10 = a + b
 end function
 
+pure function red_real10_value(a,b)
+  real(10), value, intent(in) :: a, b
+  real(10) :: red_real10_value
+  red_real10_value = a + b
+end function
+
 subroutine real10(a)
   real(10), intent(in) :: a(:)
   real(10) :: res
   res = reduce(a, red_real10)
+  res = reduce(a, red_real10_value)
 end subroutine
 
 ! CHECK: fir.call @_FortranAReduceReal10Ref
+! CHECK: fir.call @_FortranAReduceReal10Value
 
 pure function red_real16(a,b)
   real(16), intent(in) :: a, b
@@ -188,13 +269,21 @@ pure function red_real16(a,b)
   red_real16 = a + b
 end function
 
+pure function red_real16_value(a,b)
+  real(16), value, intent(in) :: a, b
+  real(16) :: red_real16_value
+  red_real16_value = a + b
+end function
+
 subroutine real16(a)
   real(16), intent(in) :: a(:)
   real(16) :: res
   res = reduce(a, red_real16)
+  res = reduce(a, red_real16_value)
 end subroutine
 
 ! CHECK: fir.call @_FortranAReduceReal16Ref
+! CHECK: fir.call @_FortranAReduceReal16Value
 
 pure function red_complex2(a,b)
   complex(2), intent(in) :: a, b
@@ -202,13 +291,21 @@ pure function red_complex2(a,b)
   red_complex2 = a + b
 end function
 
+pure function red_complex2_value(a,b)
+  complex(2), value, intent(in) :: a, b
+  complex(2) :: red_complex2_value
+  red_complex2_value = a + b
+end function
+
 subroutine complex2(a)
   complex(2), intent(in) :: a(:)
   complex(2) :: res
   res = reduce(a, red_complex2)
+  res = reduce(a, red_complex2_value)
 end subroutine
 
-! CHECK: fir.call @_FortranACppReduceComplex2
+! CHECK: fir.call @_FortranACppReduceComplex2Ref
+! CHECK: fir.call @_FortranACppReduceComplex2Value
 
 pure function red_complex3(a,b)
   complex(3), intent(in) :: a, b
@@ -216,13 +313,21 @@ pure function red_complex3(a,b)
   red_complex3 = a + b
 end function
 
+pure function red_complex3_value(a,b)
+  complex(3), value, intent(in) :: a, b
+  complex(3) :: red_complex3_value
+  red_complex3_value = a + b
+end function
+
 subroutine complex3(a)
   complex(3), intent(in) :: a(:)
   complex(3) :: res
   res = reduce(a, red_complex3)
+  res = reduce(a, red_complex3_value)
 end subroutine
 
-! CHECK: fir.call @_FortranACppReduceComplex3
+! CHECK: fir.call @_FortranACppReduceComplex3Ref
+! CHECK: fir.call @_FortranACppReduceComplex3Value
 
 pure function red_complex4(a,b)
   complex(4), intent(in) :: a, b
@@ -230,13 +335,21 @@ pure function red_complex4(a,b)
   red_complex4 = a + b
 end function
 
+pure function red_complex4_value(a,b)
+  complex(4), value, intent(in) :: a, b
+  complex(4) :: red_complex4_value
+  red_complex4_value = a + b
+end function
+
 subroutine complex4(a)
   complex(4), intent(in) :: a(:)
   complex(4) :: res
   res = reduce(a, red_complex4)
+  res = reduce(a, red_complex4_value)
 end subroutine
 
-! CHECK: fir.call @_FortranACppReduceComplex4
+! CHECK: fir.call @_FortranACppReduceComplex4Ref
+! CHECK: fir.call @_FortranACppReduceComplex4Value
 
 pure function red_complex8(a,b)
   complex(8), intent(in) :: a, b
@@ -244,13 +357,21 @@ pure function red_complex8(a,b)
   red_complex8 = a + b
 end function
 
+pure function red_complex8_value(a,b)
+  complex(8), value, intent(in) :: a, b
+  complex(8) :: red_complex8_value
+  red_complex8_value = a + b
+end function
+
 subroutine complex8(a)
   complex(8), intent(in) :: a(:)
   complex(8) :: res
   res = reduce(a, red_complex8)
+  res = reduce(a, red_complex8_value)
 end subroutine
 
-! CHECK: fir.call @_FortranACppReduceComplex8
+! CHECK: fir.call @_FortranACppReduceComplex8Ref
+! CHECK: fir.call @_FortranACppReduceComplex8Value
 
 pure function red_complex10(a,b)
   complex(10), intent(in) :: a, b
@@ -258,13 +379,21 @@ pure function red_complex10(a,b)
   red_complex10 = a + b
 end function
 
+pure function red_complex10_value(a,b)
+  complex(10), value, intent(in) :: a, b
+  complex(10) :: red_complex10_value
+  red_complex10_value = a + b
+end function
+
 subroutine complex10(a)
   complex(10), intent(in) :: a(:)
   complex(10) :: res
   res = reduce(a, red_complex10)
+  res = reduce(a, red_complex10_value)
 end subroutine
 
-! CHECK: fir.call @_FortranACppReduceComplex10
+! CHECK: fir.call @_FortranACppReduceComplex10Ref
+! CHECK: fir.call @_FortranACppReduceComplex10Value
 
 pure function red_complex16(a,b)
   complex(16), intent(in) :: a, b
@@ -272,13 +401,21 @@ pure function red_complex16(a,b)
   red_complex16 = a + b
 end function
 
+pure function red_complex16_value(a,b)
+  complex(16), value, intent(in) :: a, b
+  complex(16) :: red_complex16_value
+  red_complex16_value = a + b
+end function
+
 subroutine complex16(a)
   complex(16), intent(in) :: a(:)
   complex(16) :: res
   res = reduce(a, red_complex16)
+  res = reduce(a, red_complex16_value)
 end subroutine
 
-! CHECK: fir.call @_FortranACppReduceComplex16
+! CHECK: fir.call @_FortranACppReduceComplex16Ref
+! CHECK: fir.call @_FortranACppReduceComplex16Value
 
 pure function red_log1(a,b)
   logical(1), intent(in) :: a, b
@@ -286,13 +423,21 @@ pure function red_log1(a,b)
   red_log1 = a .and. b
 end function
 
+pure function red_log1_value(a,b)
+  logical(1), value, intent(in) :: a, b
+  logical(1) :: red_log1_value
+  red_log1_value = a .and. b
+end function
+
 subroutine log1(a)
   logical(1), intent(in) :: a(:)
   logical(1) :: res
   res = reduce(a, red_log1)
+  res = reduce(a, red_log1_value)
 end subroutine
 
 ! CHECK: fir.call @_FortranAReduceLogical1Ref
+! CHECK: fir.call @_FortranAReduceLogical1Value
 
 pure function red_log2(a,b)
   logical(2), intent(in) :: a, b
@@ -300,13 +445,21 @@ pure function red_log2(a,b)
   red_log2 = a .and. b
 end function
 
+pure function red_log2_value(a,b)
+  logical(2), value, intent(in) :: a, b
+  logical(2) :: red_log2_value
+  red_log2_value = a .and. b
+end function
+
 subroutine log2(a)
   logical(2), intent(in) :: a(:)
   logical(2) :: res
   res = reduce(a, red_log2)
+  res = reduce(a, red_log2_value)
 end subroutine
 
 ! CHECK: fir.call @_FortranAReduceLogical2Ref
+! CHECK: fir.call @_FortranAReduceLogical2Value
 
 pure function red_log4(a,b)
   logical(4), intent(in) :: a, b
@@ -314,13 +467,21 @@ pure function red_log4(a,b)
   red_log4 = a .and. b
 end function
 
+pure function red_log4_value(a,b)
+  logical(4), value, intent(in) :: a, b
+  logical(4) :: red_log4_value
+  red_log4_value = a .and. b
+end function
+
 subroutine log4(a)
   logical(4), intent(in) :: a(:)
   logical(4) :: res
   res = reduce(a, red_log4)
+  res = reduce(a, red_log4_value)
 end subroutine
 
 ! CHECK: fir.call @_FortranAReduceLogical4Ref
+! CHECK: fir.call @_FortranAReduceLogical4Value
 
 pure function red_log8(a,b)
   logical(8), intent(in) :: a, b
@@ -328,13 +489,21 @@ pure function red_log8(a,b)
   red_log8 = a .and. b
 end function
 
+pure function red_log8_value(a,b)
+  logical(8), value, intent(in) :: a, b
+  logical(8) :: red_log8_value
+  red_log8_value = a .and. b
+end function
+
 subroutine log8(a)
   logical(8), intent(in) :: a(:)
   logical(8) :: res
   res = reduce(a, red_log8)
+  res = reduce(a, red_log8_value)
 end subroutine
 
 ! CHECK: fir.call @_FortranAReduceLogical8Ref
+! CHECK: fir.call @_FortranAReduceLogical8Value
 
 pure function red_char1(a,b)
   character(1), intent(in) :: a, b
@@ -401,189 +570,231 @@ subroutine integer1dim(a, id)
   integer(1), allocatable :: res(:)
 
   res = reduce(a, red_int1, 2)
+  res = reduce(a, red_int1_value, 2)
 end subroutine
 
 ! CHECK: fir.call @_FortranAReduceInteger1DimRef
+! CHECK: fir.call @_FortranAReduceInteger1DimValue
 
 subroutine integer2dim(a, id)
   integer(2), intent(in) :: a(:,:)
   integer(2), allocatable :: res(:)
 
   res = reduce(a, red_int2, 2)
+  res = reduce(a, red_int2_value, 2)
 end subroutine
 
 ! CHECK: fir.call @_FortranAReduceInteger2DimRef
+! CHECK: fir.call @_FortranAReduceInteger2DimValue
 
 subroutine integer4dim(a, id)
   integer(4), intent(in) :: a(:,:)
   integer(4), allocatable :: res(:)
 
   res = reduce(a, red_int4, 2)
+  res = reduce(a, red_int4_value, 2)
 end subroutine
 
 ! CHECK: fir.call @_FortranAReduceInteger4DimRef
+! CHECK: fir.call @_FortranAReduceInteger4DimValue
 
 subroutine integer8dim(a, id)
   integer(8), intent(in) :: a(:,:)
   integer(8), allocatable :: res(:)
 
   res = reduce(a, red_int8, 2)
+  res = reduce(a, red_int8_value, 2)
 end subroutine
 
 ! CHECK: fir.call @_FortranAReduceInteger8DimRef
+! CHECK: fir.call @_FortranAReduceInteger8DimValue
 
 subroutine integer16dim(a, id)
   integer(16), intent(in) :: a(:,:)
   integer(16), allocatable :: res(:)
 
   res = reduce(a, red_int16, 2)
+  res = reduce(a, red_int16_value, 2)
 end subroutine
 
 ! CHECK: fir.call @_FortranAReduceInteger16DimRef
+! CHECK: fir.call @_FortranAReduceInteger16DimValue
 
 subroutine real2dim(a, id)
   real(2), intent(in) :: a(:,:)
   real(2), allocatable :: res(:)
 
   res = reduce(a, red_real2, 2)
+  res = reduce(a, red_real2_value, 2)
 end subroutine
 
 ! CHECK: fir.call @_FortranAReduceReal2DimRef
+! CHECK: fir.call @_FortranAReduceReal2DimValue
 
 subroutine real3dim(a, id)
   real(3), intent(in) :: a(:,:)
   real(3), allocatable :: res(:)
 
   res = reduce(a, red_real3, 2)
+  res = reduce(a, red_real3_value, 2)
 end subroutine
 
 ! CHECK: fir.call @_FortranAReduceReal3DimRef
+! CHECK: fir.call @_FortranAReduceReal3DimValue
 
 subroutine real4dim(a, id)
   real(4), intent(in) :: a(:,:)
   real(4), allocatable :: res(:)
 
   res = reduce(a, red_real4, 2)
+  res = reduce(a, red_real4_value, 2)
 end subroutine
 
 ! CHECK: fir.call @_FortranAReduceReal4DimRef
+! CHECK: fir.call @_FortranAReduceReal4DimValue
 
 subroutine real8dim(a, id)
   real(8), intent(in) :: a(:,:)
   real(8), allocatable :: res(:)
 
   res = reduce(a, red_real8, 2)
+  res = reduce(a, red_real8_value, 2)
 end subroutine
 
 ! CHECK: fir.call @_FortranAReduceReal8DimRef
+! CHECK: fir.call @_FortranAReduceReal8DimValue
 
 subroutine real10dim(a, id)
   real(10), intent(in) :: a(:,:)
   real(10), allocatable :: res(:)
 
   res = reduce(a, red_real10, 2)
+  res = reduce(a, red_real10_value, 2)
 end subroutine
 
 ! CHECK: fir.call @_FortranAReduceReal10DimRef
+! CHECK: fir.call @_FortranAReduceReal10DimValue
 
 subroutine real16dim(a, id)
   real(16), intent(in) :: a(:,:)
   real(16), allocatable :: res(:)
 
   res = reduce(a, red_real16, 2)
+  res = reduce(a, red_real16_value, 2)
 end subroutine
 
 ! CHECK: fir.call @_FortranAReduceReal16DimRef
+! CHECK: fir.call @_FortranAReduceReal16DimValue
 
 subroutine complex2dim(a, id)
   complex(2), intent(in) :: a(:,:)
   complex(2), allocatable :: res(:)
 
   res = reduce(a, red_complex2, 2)
+  res = reduce(a, red_complex2_value, 2)
 end subroutine
 
-! CHECK: fir.call @_FortranACppReduceComplex2Dim
+! CHECK: fir.call @_FortranACppReduceComplex2DimRef
+! CHECK: fir.call @_FortranACppReduceComplex2DimValue
 
 subroutine complex3dim(a, id)
   complex(3), intent(in) :: a(:,:)
   complex(3), allocatable :: res(:)
 
   res = reduce(a, red_complex3, 2)
+  res = reduce(a, red_complex3_value, 2)
 end subroutine
 
-! CHECK: fir.call @_FortranACppReduceComplex3Dim
+! CHECK: fir.call @_FortranACppReduceComplex3DimRef
+! CHECK: fir.call @_FortranACppReduceComplex3DimValue
 
 subroutine complex4dim(a, id)
   complex(4), intent(in) :: a(:,:)
   complex(4), allocatable :: res(:)
 
   res = reduce(a, red_complex4, 2)
+  res = reduce(a, red_complex4_value, 2)
 end subroutine
 
-! CHECK: fir.call @_FortranACppReduceComplex4Dim
+! CHECK: fir.call @_FortranACppReduceComplex4DimRef
+! CHECK: fir.call @_FortranACppReduceComplex4DimValue
 
 subroutine complex8dim(a, id)
   complex(8), intent(in) :: a(:,:)
   complex(8), allocatable :: res(:)
 
   res = reduce(a, red_complex8, 2)
+  res = reduce(a, red_complex8_value, 2)
 end subroutine
 
-! CHECK: fir.call @_FortranACppReduceComplex8Dim
+! CHECK: fir.call @_FortranACppReduceComplex8DimRef
+! CHECK: fir.call @_FortranACppReduceComplex8DimValue
 
 subroutine complex10dim(a, id)
   complex(10), intent(in) :: a(:,:)
   complex(10), allocatable :: res(:)
 
   res = reduce(a, red_complex10, 2)
+  res = reduce(a, red_complex10_value, 2)
 end subroutine
 
-! CHECK: fir.call @_FortranACppReduceComplex10Dim
+! CHECK: fir.call @_FortranACppReduceComplex10DimRef
+! CHECK: fir.call @_FortranACppReduceComplex10DimValue
 
 subroutine complex16dim(a, id)
   complex(16), intent(in) :: a(:,:)
   complex(16), allocatable :: res(:)
 
   res = reduce(a, red_complex16, 2)
+  res = reduce(a, red_complex16_value, 2)
 end subroutine
 
-! CHECK: fir.call @_FortranACppReduceComplex16Dim
+! CHECK: fir.call @_FortranACppReduceComplex16DimRef
+! CHECK: fir.call @_FortranACppReduceComplex16DimValue
 
 subroutine logical1dim(a, id)
   logical(1), intent(in) :: a(:,:)
   logical(1), allocatable :: res(:)
 
   res = reduce(a, red_log1, 2)
+  res = reduce(a, red_log1_value, 2)
 end subroutine
 
 ! CHECK: fir.call @_FortranAReduceLogical1DimRef
+! CHECK: fir.call @_FortranAReduceLogical1DimValue
 
 subroutine logical2dim(a, id)
   logical(2), intent(in) :: a(:,:)
   logical(2), allocatable :: res(:)
 
   res = reduce(a, red_log2, 2)
+  res = reduce(a, red_log2_value, 2)
 end subroutine
 
 ! CHECK: fir.call @_FortranAReduceLogical2DimRef
+! CHECK: fir.call @_FortranAReduceLogical2DimValue
 
 subroutine logical4dim(a, id)
   logical(4), intent(in) :: a(:,:)
   logical(4), allocatable :: res(:)
 
   res = reduce(a, red_log4, 2)
+  res = reduce(a, red_log4_value, 2)
 end subroutine
 
 ! CHECK: fir.call @_FortranAReduceLogical4DimRef
+! CHECK: fir.call @_FortranAReduceLogical4DimValue
 
 subroutine logical8dim(a, id)
   logical(8), intent(in) :: a(:,:)
   logical(8), allocatable :: res(:)
 
   res = reduce(a, red_log8, 2)
+  res = reduce(a, red_log8_value, 2)
 end subroutine
 
 ! CHECK: fir.call @_FortranAReduceLogical8DimRef
+! CHECK: fir.call @_FortranAReduceLogical8DimValue
 
 subroutine testtypeDim(a)
   type(t1), intent(in) :: a(:,:)



More information about the llvm-branch-commits mailing list