[flang-commits] [flang] [flang][runtime] Runtime support for REDUCE() (PR #86214)
Peter Klausler via flang-commits
flang-commits at lists.llvm.org
Thu Mar 21 16:20:39 PDT 2024
https://github.com/klausler created https://github.com/llvm/llvm-project/pull/86214
Supports the REDUCE() transformational intrinsic function of Fortran (see F'2023 16.9.173) in a manner similar to the existing support for SUM(), PRODUCT(), &c. There are APIs for total reductions to scalar results, and APIs for partial reductions that reduce the rank of the argument by one.
This implementation requires more functions than other reductions because the various possible types of the user-supplied OPERATION= function need to be elaborated.
Once the basic API in reduce.h has been approved, later patches will implement lowering.
REDUCE() is primarily for completeness, not portability; only one other Fortran compiler implements this F'2018 feature today, and only some types work correctly with it.
>From c38bea42cf81b0fdf0ff25f62d588b2913c4491b Mon Sep 17 00:00:00 2001
From: Peter Klausler <pklausler at nvidia.com>
Date: Wed, 20 Mar 2024 16:15:47 -0700
Subject: [PATCH] [flang][runtime] Runtime support for REDUCE()
Supports the REDUCE() transformational intrinsic function of Fortran
(see F'2023 16.9.173) in a manner similar to the existing support for
SUM(), PRODUCT(), &c. There are APIs for total reductions to scalar
results, and APIs for partial reductions that reduce the rank of the
argument by one.
This implementation requires more functions than other reductions
because the various possible types of the user-supplied OPERATION=
function need to be elaborated.
Once the basic API in reduce.h has been approved, later patches will
implement lowering.
REDUCE() is primarily for completeness, not portability; only one other
Fortran compiler implements this F'2018 feature today, and only some
types work correctly with it.
---
flang/include/flang/Runtime/reduce.h | 254 +++++++++++++
flang/lib/Semantics/check-call.cpp | 3 +
flang/runtime/CMakeLists.txt | 1 +
flang/runtime/complex-reduction.c | 22 ++
flang/runtime/complex-reduction.h | 45 +++
flang/runtime/reduce.cpp | 526 ++++++++++++++++++++++++++
flang/runtime/reduction-templates.h | 46 +--
flang/runtime/tools.cpp | 29 ++
flang/runtime/tools.h | 4 +
flang/unittests/Runtime/Reduction.cpp | 37 ++
10 files changed, 934 insertions(+), 33 deletions(-)
create mode 100644 flang/include/flang/Runtime/reduce.h
create mode 100644 flang/runtime/reduce.cpp
diff --git a/flang/include/flang/Runtime/reduce.h b/flang/include/flang/Runtime/reduce.h
new file mode 100644
index 00000000000000..f314129f6e9351
--- /dev/null
+++ b/flang/include/flang/Runtime/reduce.h
@@ -0,0 +1,254 @@
+//===-- include/flang/Runtime/reduce.h --------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// Defines the API for implementations of the transformational intrinsic
+// functionREDUCE(); see F'2023 16.9.173.
+//
+// Similar to the definition of the APIs for SUM(), &c., in reduction.h,
+// there are typed functions here like ReduceInteger4() for total reductions
+// to scalars and void functions like ReduceInteger4Dim() for partial
+// reductions to smaller arrays.
+
+#ifndef FORTRAN_RUNTIME_REDUCE_H_
+#define FORTRAN_RUNTIME_REDUCE_H_
+
+#include "flang/Common/float128.h"
+#include "flang/Common/uint128.h"
+#include "flang/Runtime/cpp-type.h"
+#include "flang/Runtime/entry-names.h"
+#include <complex>
+#include <cstdint>
+
+namespace Fortran::runtime {
+
+class Descriptor;
+
+template <typename T> using ReductionOperation = T (*)(const T *, const T *);
+template <typename CHAR>
+using ReductionCharOperation = void (*)(CHAR *hiddenResult, const CHAR *x,
+ const CHAR *y, std::size_t resultLen, std::size_t xLen, std::size_t yLen);
+using ReductionDerivedTypeOperation = void (*)(
+ void *hiddenResult, const void *x, const void *y);
+
+extern "C" {
+
+std::int8_t RTDECL(ReduceInteger1)(const Descriptor &,
+ ReductionOperation<std::int8_t>, const char *source, int line, int dim = 0,
+ const Descriptor *mask = nullptr, const std::int8_t *identity = nullptr,
+ bool ordered = true);
+void RTDECL(ReduceInteger1Dim)(Descriptor &result, const Descriptor &array,
+ ReductionOperation<std::int8_t>, const char *source, int line, int dim,
+ const Descriptor *mask = nullptr, const std::int8_t *identity = nullptr,
+ bool ordered = true);
+std::int16_t RTDECL(ReduceInteger2)(const Descriptor &,
+ ReductionOperation<std::int16_t>, const char *source, int line, int dim = 0,
+ const Descriptor *mask = nullptr, const std::int16_t *identity = nullptr,
+ bool ordered = true);
+void RTDECL(ReduceInteger2Dim)(Descriptor &result, const Descriptor &array,
+ ReductionOperation<std::int16_t>, const char *source, int line, int dim,
+ const Descriptor *mask = nullptr, const std::int16_t *identity = nullptr,
+ bool ordered = true);
+std::int32_t RTDECL(ReduceInteger4)(const Descriptor &,
+ ReductionOperation<std::int32_t>, const char *source, int line, int dim = 0,
+ const Descriptor *mask = nullptr, const std::int32_t *identity = nullptr,
+ bool ordered = true);
+void RTDECL(ReduceInteger4Dim)(Descriptor &result, const Descriptor &array,
+ ReductionOperation<std::int32_t>, const char *source, int line, int dim,
+ const Descriptor *mask = nullptr, const std::int32_t *identity = nullptr,
+ bool ordered = true);
+std::int64_t RTDECL(ReduceInteger8)(const Descriptor &,
+ ReductionOperation<std::int64_t>, const char *source, int line, int dim = 0,
+ const Descriptor *mask = nullptr, const std::int64_t *identity = nullptr,
+ bool ordered = true);
+void RTDECL(ReduceInteger8Dim)(Descriptor &result, const Descriptor &array,
+ ReductionOperation<std::int64_t>, const char *source, int line, int dim,
+ const Descriptor *mask = nullptr, const std::int64_t *identity = nullptr,
+ bool ordered = true);
+#ifdef __SIZEOF_INT128__
+common::int128_t RTDECL(ReduceInteger16)(const Descriptor &,
+ ReductionOperation<common::int128_t>, const char *source, int line,
+ int dim = 0, const Descriptor *mask = nullptr,
+ const common::int128_t *identity = nullptr, bool ordered = true);
+void RTDECL(ReduceInteger16Dim)(Descriptor &result, const Descriptor &array,
+ ReductionOperation<common::int128_t>, const char *source, int line, int dim,
+ const Descriptor *mask = nullptr,
+ const common::int128_t *identity = nullptr, bool ordered = true);
+#endif
+
+// REAL/COMPLEX(2 & 3) return 32-bit float results for the caller to downconvert
+float RTDECL(ReduceReal2)(const Descriptor &, ReductionOperation<float>,
+ const char *source, int line, int dim = 0, const Descriptor *mask = nullptr,
+ const float *identity = nullptr, bool ordered = true);
+void RTDECL(ReduceReal2Dim)(Descriptor &result, const Descriptor &array,
+ ReductionOperation<float>, const char *source, int line, int dim,
+ const Descriptor *mask = nullptr, const float *identity = nullptr,
+ bool ordered = true);
+float RTDECL(ReduceReal3)(const Descriptor &, ReductionOperation<float>,
+ const char *source, int line, int dim = 0, const Descriptor *mask = nullptr,
+ const float *identity = nullptr, bool ordered = true);
+void RTDECL(ReduceReal3Dim)(Descriptor &result, const Descriptor &array,
+ ReductionOperation<float>, const char *source, int line, int dim,
+ const Descriptor *mask = nullptr, const float *identity = nullptr,
+ bool ordered = true);
+float RTDECL(ReduceReal4)(const Descriptor &, ReductionOperation<float>,
+ const char *source, int line, int dim = 0, const Descriptor *mask = nullptr,
+ const float *identity = nullptr, bool ordered = true);
+void RTDECL(ReduceReal4Dim)(Descriptor &result, const Descriptor &array,
+ ReductionOperation<float>, const char *source, int line, int dim,
+ const Descriptor *mask = nullptr, const float *identity = nullptr,
+ bool ordered = true);
+double RTDECL(ReduceReal8)(const Descriptor &, ReductionOperation<double>,
+ const char *source, int line, int dim = 0, const Descriptor *mask = nullptr,
+ const double *identity = nullptr, bool ordered = true);
+void RTDECL(ReduceReal8Dim)(Descriptor &result, const Descriptor &array,
+ ReductionOperation<double>, const char *source, int line, int dim,
+ const Descriptor *mask = nullptr, const double *identity = nullptr,
+ bool ordered = true);
+#if LDBL_MANT_DIG == 64
+long double RTDECL(ReduceReal10)(const Descriptor &,
+ ReductionOperation<long double>, const char *source, int line, int dim = 0,
+ const Descriptor *mask = nullptr, const long double *identity = nullptr,
+ bool ordered = true);
+void RTDECL(ReduceReal10Dim)(Descriptor &result, const Descriptor &array,
+ ReductionOperation<long double>, const char *source, int line, int dim,
+ const Descriptor *mask = nullptr, const long double *identity = nullptr,
+ bool ordered = true);
+#endif
+#if LDBL_MANT_DIG == 113 || HAS_FLOAT128
+CppFloat128Type RTDECL(ReduceReal16)(const Descriptor &,
+ ReductionOperation<CppFloat128Type>, const char *source, int line,
+ int dim = 0, const Descriptor *mask = nullptr,
+ const CppFloat128Type *identity = nullptr, bool ordered = true);
+void RTDECL(ReduceReal16Dim)(Descriptor &result, const Descriptor &array,
+ ReductionOperation<CppFloat128Type>, const char *source, int line, int dim,
+ const Descriptor *mask = nullptr, const CppFloat128Type *identity = nullptr,
+ bool ordered = true);
+#endif
+
+void RTDECL(CppReduceComplex2)(std::complex<float> &, const Descriptor &,
+ ReductionOperation<std::complex<float>>, const char *source, int line,
+ int dim = 0, const Descriptor *mask = nullptr,
+ const std::complex<float> *identity = nullptr, bool ordered = true);
+void RTDECL(CppReduceComplex2Dim)(Descriptor &result, const Descriptor &array,
+ ReductionOperation<std::complex<float>>, const char *source, int line,
+ int dim, const Descriptor *mask = nullptr,
+ const std::complex<float> *identity = nullptr, bool ordered = true);
+void RTDECL(CppReduceComplex3)(std::complex<float> &, const Descriptor &,
+ ReductionOperation<std::complex<float>>, const char *source, int line,
+ int dim = 0, const Descriptor *mask = nullptr,
+ const std::complex<float> *identity = nullptr, bool ordered = true);
+void RTDECL(CppReduceComplex3Dim)(Descriptor &result, const Descriptor &array,
+ ReductionOperation<std::complex<float>>, const char *source, int line,
+ int dim, const Descriptor *mask = nullptr,
+ const std::complex<float> *identity = nullptr, bool ordered = true);
+void RTDECL(CppReduceComplex4)(std::complex<float> &, const Descriptor &,
+ ReductionOperation<std::complex<float>>, const char *source, int line,
+ int dim = 0, const Descriptor *mask = nullptr,
+ const std::complex<float> *identity = nullptr, bool ordered = true);
+void RTDECL(CppReduceComplex4Dim)(Descriptor &result, const Descriptor &array,
+ ReductionOperation<std::complex<float>>, const char *source, int line,
+ int dim, const Descriptor *mask = nullptr,
+ const std::complex<float> *identity = nullptr, bool ordered = true);
+void RTDECL(CppReduceComplex8)(std::complex<double> &, const Descriptor &,
+ ReductionOperation<std::complex<double>>, const char *source, int line,
+ int dim = 0, const Descriptor *mask = nullptr,
+ const std::complex<double> *identity = nullptr, bool ordered = true);
+void RTDECL(CppReduceComplex8Dim)(Descriptor &result, const Descriptor &array,
+ ReductionOperation<std::complex<double>>, const char *source, int line,
+ int dim, const Descriptor *mask = nullptr,
+ const std::complex<double> *identity = nullptr, bool ordered = true);
+void RTDECL(CppReduceComplex10)(std::complex<long double> &, const Descriptor &,
+ ReductionOperation<std::complex<long double>>, const char *source, int line,
+ int dim = 0, const Descriptor *mask = nullptr,
+ const std::complex<long double> *identity = nullptr, bool ordered = true);
+void RTDECL(CppReduceComplex10Dim)(Descriptor &result, const Descriptor &array,
+ ReductionOperation<std::complex<long double>>, const char *source, int line,
+ int dim, const Descriptor *mask = nullptr,
+ const std::complex<long double> *identity = nullptr, bool ordered = true);
+#if LDBL_MANT_DIG == 113 || HAS_FLOAT128
+void RTDECL(CppReduceComplex16)(std::complex<CppFloat128Type> &,
+ const Descriptor &, ReductionOperation<std::complex<CppFloat128Type>>,
+ const char *source, int line, int dim = 0, const Descriptor *mask = nullptr,
+ const std::complex<CppFloat128Type> *identity = nullptr,
+ bool ordered = true);
+void RTDECL(CppReduceComplex16Dim)(Descriptor &result, const Descriptor &array,
+ ReductionOperation<std::complex<CppFloat128Type>>, const char *source,
+ int line, int dim, const Descriptor *mask = nullptr,
+ const std::complex<CppFloat128Type> *identity = nullptr,
+ bool ordered = true);
+#endif
+
+bool RTDECL(ReduceLogical1)(const Descriptor &, ReductionOperation<std::int8_t>,
+ const char *source, int line, int dim = 0, const Descriptor *mask = nullptr,
+ const std::int8_t *identity = nullptr, bool ordered = true);
+void RTDECL(ReduceLogical1Dim)(Descriptor &result, const Descriptor &array,
+ ReductionOperation<std::int8_t>, const char *source, int line, int dim,
+ const Descriptor *mask = nullptr, const std::int8_t *identity = nullptr,
+ bool ordered = true);
+bool RTDECL(ReduceLogical2)(const Descriptor &,
+ ReductionOperation<std::int16_t>, const char *source, int line, int dim = 0,
+ const Descriptor *mask = nullptr, const std::int16_t *identity = nullptr,
+ bool ordered = true);
+void RTDECL(ReduceLogical2Dim)(Descriptor &result, const Descriptor &array,
+ ReductionOperation<std::int16_t>, const char *source, int line, int dim,
+ const Descriptor *mask = nullptr, const std::int16_t *identity = nullptr,
+ bool ordered = true);
+bool RTDECL(ReduceLogical4)(const Descriptor &,
+ ReductionOperation<std::int32_t>, const char *source, int line, int dim = 0,
+ const Descriptor *mask = nullptr, const std::int32_t *identity = nullptr,
+ bool ordered = true);
+void RTDECL(ReduceLogical4Dim)(Descriptor &result, const Descriptor &array,
+ ReductionOperation<std::int32_t>, const char *source, int line, int dim,
+ const Descriptor *mask = nullptr, const std::int32_t *identity = nullptr,
+ bool ordered = true);
+bool RTDECL(ReduceLogical8)(const Descriptor &,
+ ReductionOperation<std::int64_t>, const char *source, int line, int dim = 0,
+ const Descriptor *mask = nullptr, const std::int64_t *identity = nullptr,
+ bool ordered = true);
+void RTDECL(ReduceLogical8Dim)(Descriptor &result, const Descriptor &array,
+ ReductionOperation<std::int64_t>, const char *source, int line, int dim,
+ const Descriptor *mask = nullptr, const std::int64_t *identity = nullptr,
+ bool ordered = true);
+
+void RTDECL(ReduceChar1)(char *result, const Descriptor &array,
+ ReductionCharOperation<char>, const char *source, int line, int dim = 0,
+ const Descriptor *mask = nullptr, const char *identity = nullptr,
+ bool ordered = true);
+void RTDECL(ReduceCharacter1Dim)(Descriptor &result, const Descriptor &array,
+ ReductionCharOperation<char>, const char *source, int line, int dim,
+ const Descriptor *mask = nullptr, const char *identity = nullptr,
+ bool ordered = true);
+void RTDECL(ReduceChar2)(char16_t *result, const Descriptor &array,
+ ReductionCharOperation<char16_t>, const char *source, int line, int dim = 0,
+ const Descriptor *mask = nullptr, const char16_t *identity = nullptr,
+ bool ordered = true);
+void RTDECL(ReduceCharacter2Dim)(Descriptor &result, const Descriptor &array,
+ ReductionCharOperation<char16_t>, const char *source, int line, int dim,
+ const Descriptor *mask = nullptr, const char16_t *identity = nullptr,
+ bool ordered = true);
+void RTDECL(ReduceChar4)(char32_t *result, const Descriptor &array,
+ ReductionCharOperation<char32_t>, const char *source, int line, int dim = 0,
+ const Descriptor *mask = nullptr, const char32_t *identity = nullptr,
+ bool ordered = true);
+void RTDECL(ReduceCharacter4Dim)(Descriptor &result, const Descriptor &array,
+ ReductionCharOperation<char32_t>, const char *source, int line, int dim,
+ const Descriptor *mask = nullptr, const char32_t *identity = nullptr,
+ bool ordered = true);
+
+void RTDECL(ReduceDerivedType)(char *result, const Descriptor &array,
+ ReductionDerivedTypeOperation, const char *source, int line, int dim = 0,
+ const Descriptor *mask = nullptr, const char *identity = nullptr,
+ bool ordered = true);
+void RTDECL(ReduceDerivedTypeDim)(Descriptor &result, const Descriptor &array,
+ ReductionDerivedTypeOperation, const char *source, int line, int dim,
+ const Descriptor *mask = nullptr, const char *identity = nullptr,
+ bool ordered = true);
+
+} // extern "C"
+} // namespace Fortran::runtime
+#endif // FORTRAN_RUNTIME_REDUCE_H_
diff --git a/flang/lib/Semantics/check-call.cpp b/flang/lib/Semantics/check-call.cpp
index d625f8c2f7fc11..51a16ee155fabb 100644
--- a/flang/lib/Semantics/check-call.cpp
+++ b/flang/lib/Semantics/check-call.cpp
@@ -1588,6 +1588,9 @@ static void CheckReduce(
procChars->dummyArguments.size() != 2 || !procChars->functionResult) {
messages.Say(
"OPERATION= argument of REDUCE() must be a pure function of two data arguments"_err_en_US);
+ } else if (procChars->attrs.test(characteristics::Procedure::Attr::BindC)) {
+ messages.Say(
+ "A BIND(C) OPERATION= argument of REDUCE() is not supported"_err_en_US);
} else if (!result || result->Rank() != 0) {
messages.Say(
"OPERATION= argument of REDUCE() must be a scalar function"_err_en_US);
diff --git a/flang/runtime/CMakeLists.txt b/flang/runtime/CMakeLists.txt
index 7dd60b5edcd5fb..55986e190ac1b7 100644
--- a/flang/runtime/CMakeLists.txt
+++ b/flang/runtime/CMakeLists.txt
@@ -151,6 +151,7 @@ set(sources
product.cpp
ragged.cpp
random.cpp
+ reduce.cpp
reduction.cpp
stat.cpp
stop.cpp
diff --git a/flang/runtime/complex-reduction.c b/flang/runtime/complex-reduction.c
index c91d1253991176..7654de8080a152 100644
--- a/flang/runtime/complex-reduction.c
+++ b/flang/runtime/complex-reduction.c
@@ -155,3 +155,25 @@ ADAPT_REDUCTION(DotProductComplex10, long_double_Complex_t,
ADAPT_REDUCTION(DotProductComplex16, CFloat128ComplexType, CppComplexFloat128,
CMPLXF128, DOT_PRODUCT_ARGS, DOT_PRODUCT_ARG_NAMES)
#endif
+
+/* REDUCE() */
+#define RARGS REDUCE_ARGS(float_Complex_t)
+ADAPT_REDUCTION(ReduceComplex4, float_Complex_t, CppComplexFloat, CMPLXF, RARGS,
+ REDUCE_ARG_NAMES)
+#undef RARGS
+#define RARGS REDUCE_ARGS(double_Complex_t)
+ADAPT_REDUCTION(ReduceComplex8, double_Complex_t, CppComplexDouble, CMPLX,
+ RARGS, REDUCE_ARG_NAMES)
+#undef RARGS
+#if LDBL_MANT_DIG == 64
+#define RARGS REDUCE_ARGS(long_double_Complex_t)
+ADAPT_REDUCTION(ReduceComplex10, long_double_Complex_t, CppComplexLongDouble,
+ CMPLXL, RARGS, REDUCE_ARG_NAMES)
+#undef RARGS
+#endif
+#if LDBL_MANT_DIG == 113 || HAS_FLOAT128
+#define RARGS REDUCE_ARGS(CFloat128ComplexType)
+ADAPT_REDUCTION(ReduceComplex16, CFloat128ComplexType, CppComplexFloat128,
+ CMPLXF128, RARGS, REDUCE_ARG_NAMES)
+#undef RARGS
+#endif
diff --git a/flang/runtime/complex-reduction.h b/flang/runtime/complex-reduction.h
index 1d37b235d5194b..98b20d1e592be8 100644
--- a/flang/runtime/complex-reduction.h
+++ b/flang/runtime/complex-reduction.h
@@ -69,4 +69,49 @@ long_double_Complex_t RTNAME(DotProductComplex10)(DOT_PRODUCT_ARGS);
CFloat128ComplexType RTNAME(DotProductComplex16)(DOT_PRODUCT_ARGS);
#endif
+#define REDUCE_ARGS(T) \
+ T##_op operation, const struct CppDescriptor *x, \
+ const struct CppDescriptor *y, const char *source, int line, \
+ int dim /*=0*/, const struct CppDescriptor *mask /*=NULL*/, \
+ const T *identity /*=NULL*/, _Bool ordered /*=true*/
+#define REDUCE_ARG_NAMES \
+ operation, x, y, source, line, dim, mask, identity, ordered
+
+typedef float_Complex_t (*float_Complex_t_op)(
+ const float_Complex_t *, const float_Complex_t *);
+typedef double_Complex_t (*double_Complex_t_op)(
+ const double_Complex_t *, const double_Complex_t *);
+typedef long_double_Complex_t (*long_double_Complex_t_op)(
+ const long_double_Complex_t *, const long_double_Complex_t *);
+
+float_Complex_t RTNAME(ReduceComplex2)(REDUCE_ARGS(float_Complex_t));
+float_Complex_t RTNAME(ReduceComplex3)(REDUCE_ARGS(float_Complex_t));
+float_Complex_t RTNAME(ReduceComplex4)(REDUCE_ARGS(float_Complex_t));
+double_Complex_t RTNAME(ReduceComplex8)(REDUCE_ARGS(double_Complex_t));
+long_double_Complex_t RTNAME(ReduceComplex10)(
+ REDUCE_ARGS(long_double_Complex_t));
+#if LDBL_MANT_DIG == 113 || HAS_FLOAT128
+typedef CFloat128ComplexType (*CFloat128ComplexType_op)(
+ const CFloat128ComplexType *, const CFloat128ComplexType *);
+CFloat128ComplexType RTNAME(ReduceComplex16)(REDUCE_ARGS(CFloat128ComplexType));
+#endif
+
+#define REDUCE_DIM_ARGS(T) \
+ struct CppDescriptor *result, T##_op operation, \
+ const struct CppDescriptor *x, const struct CppDescriptor *y, \
+ const char *source, int line, int dim, \
+ const struct CppDescriptor *mask /*=NULL*/, const T *identity /*=NULL*/, \
+ _Bool ordered /*=true*/
+#define REDUCE_DIM_ARG_NAMES \
+ result, operation, x, y, source, line, dim, mask, identity, ordered
+
+void RTNAME(ReduceComplex2Dim)(REDUCE_DIM_ARGS(float_Complex_t));
+void RTNAME(ReduceComplex3Dim)(REDUCE_DIM_ARGS(float_Complex_t));
+void RTNAME(ReduceComplex4Dim)(REDUCE_DIM_ARGS(float_Complex_t));
+void RTNAME(ReduceComplex8Dim)(REDUCE_DIM_ARGS(double_Complex_t));
+void RTNAME(ReduceComplex10Dim)(REDUCE_DIM_ARGS(long_double_Complex_t));
+#if LDBL_MANT_DIG == 113 || HAS_FLOAT128
+void RTNAME(ReduceComplex16Dim)(REDUCE_DIM_ARGS(CFloat128ComplexType));
+#endif
+
#endif // FORTRAN_RUNTIME_COMPLEX_REDUCTION_H_
diff --git a/flang/runtime/reduce.cpp b/flang/runtime/reduce.cpp
new file mode 100644
index 00000000000000..da2dcd5a193a9b
--- /dev/null
+++ b/flang/runtime/reduce.cpp
@@ -0,0 +1,526 @@
+//===-- runtime/reduce.cpp ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// REDUCE() implementation
+
+#include "flang/Runtime/reduce.h"
+#include "reduction-templates.h"
+#include "terminator.h"
+#include "tools.h"
+#include "flang/Runtime/descriptor.h"
+
+namespace Fortran::runtime {
+
+template <typename T> class ReduceAccumulator {
+public:
+ RT_API_ATTRS ReduceAccumulator(const Descriptor &array,
+ ReductionOperation<T> operation, const T *identity,
+ Terminator &terminator)
+ : array_{array}, operation_{operation}, identity_{identity},
+ terminator_{terminator} {}
+ RT_API_ATTRS void Reinitialize() { result_.reset(); }
+ template <typename A>
+ RT_API_ATTRS bool AccumulateAt(const SubscriptValue at[]) {
+ const auto *operand{array_.Element<A>(at)};
+ if (result_) {
+ result_ = operation_(&*result_, operand);
+ } else {
+ result_ = *operand;
+ }
+ return true;
+ }
+ template <typename A>
+ RT_API_ATTRS void GetResult(A *to, int /*zeroBasedDim*/ = -1) {
+ if (result_) {
+ *to = *result_;
+ } else if (identity_) {
+ *to = *identity_;
+ } else {
+ terminator_.Crash("REDUCE() without IDENTITY= has no result");
+ }
+ }
+
+private:
+ const Descriptor &array_;
+ std::optional<T> result_;
+ ReductionOperation<T> operation_;
+ const T *identity_{nullptr};
+ Terminator &terminator_;
+};
+
+template <typename T, typename OP, bool hasLength>
+class BufferedReduceAccumulator {
+public:
+ RT_API_ATTRS BufferedReduceAccumulator(const Descriptor &array, OP operation,
+ const T *identity, Terminator &terminator)
+ : array_{array}, operation_{operation}, identity_{identity},
+ terminator_{terminator} {}
+ RT_API_ATTRS void Reinitialize() { activeTemp_ = -1; }
+ template <typename A>
+ RT_API_ATTRS bool AccumulateAt(const SubscriptValue at[]) {
+ const auto *operand{array_.Element<A>(at)};
+ if (activeTemp_ >= 0) {
+ if constexpr (hasLength) {
+ operation_(&*temp_[1 - activeTemp_], &*temp_[activeTemp_], operand,
+ length_, length_, length_);
+ } else {
+ operation_(&*temp_[1 - activeTemp_], &*temp_[activeTemp_], operand);
+ }
+ activeTemp_ = 1 - activeTemp_;
+ } else {
+ activeTemp_ = 0;
+ std::memcpy(&*temp_[activeTemp_], operand, elementBytes_);
+ }
+ return true;
+ }
+ template <typename A>
+ RT_API_ATTRS void GetResult(A *to, int /*zeroBasedDim*/ = -1) {
+ if (activeTemp_ >= 0) {
+ std::memcpy(to, &*temp_[activeTemp_], elementBytes_);
+ } else if (identity_) {
+ std::memcpy(to, identity_, elementBytes_);
+ } else {
+ terminator_.Crash("REDUCE() without IDENTITY= has no result");
+ }
+ }
+
+private:
+ const Descriptor &array_;
+ OP operation_;
+ const T *identity_{nullptr};
+ Terminator &terminator_;
+ std::size_t elementBytes_{array_.ElementBytes()};
+ OwningPtr<T> temp_[2]{SizedNew<T>{terminator_}(elementBytes_),
+ SizedNew<T>{terminator_}(elementBytes_)};
+ int activeTemp_{-1};
+ std::size_t length_{elementBytes_ / sizeof(T)};
+};
+
+extern "C" {
+RT_EXT_API_GROUP_BEGIN
+
+std::int8_t RTDEF(ReduceInteger1)(const Descriptor &array,
+ ReductionOperation<std::int8_t> operation, const char *source, int line,
+ int dim, const Descriptor *mask, const std::int8_t *identity,
+ bool ordered) {
+ Terminator terminator{source, line};
+ return GetTotalReduction<TypeCategory::Integer, 1>(array, source, line, dim,
+ mask,
+ ReduceAccumulator<std::int8_t>{array, operation, identity, terminator},
+ "REDUCE");
+}
+void RTDEF(ReduceInteger1Dim)(Descriptor &result, const Descriptor &array,
+ ReductionOperation<std::int8_t> operation, const char *source, int line,
+ int dim, const Descriptor *mask, const std::int8_t *identity,
+ bool ordered) {
+ Terminator terminator{source, line};
+ using Accumulator = ReduceAccumulator<std::int8_t>;
+ Accumulator accumulator{array, operation, identity, terminator};
+ PartialReduction<Accumulator, TypeCategory::Integer, 1>(result, array,
+ array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator);
+}
+std::int16_t RTDEF(ReduceInteger2)(const Descriptor &array,
+ ReductionOperation<std::int16_t> operation, const char *source, int line,
+ int dim, const Descriptor *mask, const std::int16_t *identity,
+ bool ordered) {
+ Terminator terminator{source, line};
+ return GetTotalReduction<TypeCategory::Integer, 2>(array, source, line, dim,
+ mask,
+ ReduceAccumulator<std::int16_t>{array, operation, identity, terminator},
+ "REDUCE");
+}
+void RTDEF(ReduceInteger2Dim)(Descriptor &result, const Descriptor &array,
+ ReductionOperation<std::int16_t> operation, const char *source, int line,
+ int dim, const Descriptor *mask, const std::int16_t *identity,
+ bool ordered) {
+ Terminator terminator{source, line};
+ using Accumulator = ReduceAccumulator<std::int16_t>;
+ Accumulator accumulator{array, operation, identity, terminator};
+ PartialReduction<Accumulator, TypeCategory::Integer, 2>(result, array,
+ array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator);
+}
+std::int32_t RTDEF(ReduceInteger4)(const Descriptor &array,
+ ReductionOperation<std::int32_t> operation, const char *source, int line,
+ int dim, const Descriptor *mask, const std::int32_t *identity,
+ bool ordered) {
+ Terminator terminator{source, line};
+ return GetTotalReduction<TypeCategory::Integer, 4>(array, source, line, dim,
+ mask,
+ ReduceAccumulator<std::int32_t>{array, operation, identity, terminator},
+ "REDUCE");
+}
+void RTDEF(ReduceInteger4Dim)(Descriptor &result, const Descriptor &array,
+ ReductionOperation<std::int32_t> operation, const char *source, int line,
+ int dim, const Descriptor *mask, const std::int32_t *identity,
+ bool ordered) {
+ Terminator terminator{source, line};
+ using Accumulator = ReduceAccumulator<std::int32_t>;
+ Accumulator accumulator{array, operation, identity, terminator};
+ PartialReduction<Accumulator, TypeCategory::Integer, 4>(result, array,
+ array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator);
+}
+std::int64_t RTDEF(ReduceInteger8)(const Descriptor &array,
+ ReductionOperation<std::int64_t> operation, const char *source, int line,
+ int dim, const Descriptor *mask, const std::int64_t *identity,
+ bool ordered) {
+ Terminator terminator{source, line};
+ return GetTotalReduction<TypeCategory::Integer, 8>(array, source, line, dim,
+ mask,
+ ReduceAccumulator<std::int64_t>{array, operation, identity, terminator},
+ "REDUCE");
+}
+void RTDEF(ReduceInteger8Dim)(Descriptor &result, const Descriptor &array,
+ ReductionOperation<std::int64_t> operation, const char *source, int line,
+ int dim, const Descriptor *mask, const std::int64_t *identity,
+ bool ordered) {
+ Terminator terminator{source, line};
+ using Accumulator = ReduceAccumulator<std::int64_t>;
+ Accumulator accumulator{array, operation, identity, terminator};
+ PartialReduction<Accumulator, TypeCategory::Integer, 8>(result, array,
+ array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator);
+}
+#ifdef __SIZEOF_INT128__
+common::int128_t RTDEF(ReduceInteger16)(const Descriptor &array,
+ ReductionOperation<common::int128_t> operation, const char *source,
+ int line, int dim, const Descriptor *mask, const common::int128_t *identity,
+ bool ordered) {
+ Terminator terminator{source, line};
+ return GetTotalReduction<TypeCategory::Integer, 16>(array, source, line, dim,
+ mask,
+ ReduceAccumulator<common::int128_t>{
+ array, operation, identity, terminator},
+ "REDUCE");
+}
+void RTDEF(ReduceInteger16Dim)(Descriptor &result, const Descriptor &array,
+ ReductionOperation<common::int128_t> operation, const char *source,
+ int line, int dim, const Descriptor *mask, const common::int128_t *identity,
+ bool ordered) {
+ Terminator terminator{source, line};
+ using Accumulator = ReduceAccumulator<common::int128_t>;
+ Accumulator accumulator{array, operation, identity, terminator};
+ PartialReduction<Accumulator, TypeCategory::Integer, 16>(result, array,
+ array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator);
+}
+#endif
+
+// TODO: real/complex(2 & 3)
+float RTDEF(ReduceReal4)(const Descriptor &array,
+ ReductionOperation<float> operation, const char *source, int line, int dim,
+ const Descriptor *mask, const float *identity, bool ordered) {
+ Terminator terminator{source, line};
+ return GetTotalReduction<TypeCategory::Real, 4>(array, source, line, dim,
+ mask, ReduceAccumulator<float>{array, operation, identity, terminator},
+ "REDUCE");
+}
+void RTDEF(ReduceReal4Dim)(Descriptor &result, const Descriptor &array,
+ ReductionOperation<float> operation, const char *source, int line, int dim,
+ const Descriptor *mask, const float *identity, bool ordered) {
+ Terminator terminator{source, line};
+ using Accumulator = ReduceAccumulator<float>;
+ Accumulator accumulator{array, operation, identity, terminator};
+ PartialReduction<Accumulator, TypeCategory::Real, 4>(result, array,
+ array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator);
+}
+double RTDEF(ReduceReal8)(const Descriptor &array,
+ ReductionOperation<double> operation, const char *source, int line, int dim,
+ const Descriptor *mask, const double *identity, bool ordered) {
+ Terminator terminator{source, line};
+ return GetTotalReduction<TypeCategory::Real, 8>(array, source, line, dim,
+ mask, ReduceAccumulator<double>{array, operation, identity, terminator},
+ "REDUCE");
+}
+void RTDEF(ReduceReal8Dim)(Descriptor &result, const Descriptor &array,
+ ReductionOperation<double> operation, const char *source, int line, int dim,
+ const Descriptor *mask, const double *identity, bool ordered) {
+ Terminator terminator{source, line};
+ using Accumulator = ReduceAccumulator<double>;
+ Accumulator accumulator{array, operation, identity, terminator};
+ PartialReduction<Accumulator, TypeCategory::Real, 8>(result, array,
+ array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator);
+}
+#if LDBL_MANT_DIG == 64
+long double RTDEF(ReduceReal10)(const Descriptor &array,
+ ReductionOperation<long double> operation, const char *source, int line,
+ int dim, const Descriptor *mask, const long double *identity,
+ bool ordered) {
+ Terminator terminator{source, line};
+ return GetTotalReduction<TypeCategory::Real, 10>(array, source, line, dim,
+ mask,
+ ReduceAccumulator<long double>{array, operation, identity, terminator},
+ "REDUCE");
+}
+void RTDEF(ReduceReal10Dim)(Descriptor &result, const Descriptor &array,
+ ReductionOperation<long double> operation, const char *source, int line,
+ int dim, const Descriptor *mask, const long double *identity,
+ bool ordered) {
+ Terminator terminator{source, line};
+ using Accumulator = ReduceAccumulator<long double>;
+ Accumulator accumulator{array, operation, identity, terminator};
+ PartialReduction<Accumulator, TypeCategory::Real, 10>(result, array,
+ array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator);
+}
+#endif
+#if LDBL_MANT_DIG == 113 || HAS_FLOAT128
+CppFloat128Type RTDEF(ReduceReal16)(const Descriptor &array,
+ ReductionOperation<CppFloat128Type> operation, const char *source, int line,
+ int dim, const Descriptor *mask, const CppFloat128Type *identity,
+ bool ordered) {
+ Terminator terminator{source, line};
+ return GetTotalReduction<TypeCategory::Real, 16>(array, source, line, dim,
+ mask,
+ ReduceAccumulator<CppFloat128Type>{
+ array, operation, identity, terminator},
+ "REDUCE");
+}
+void RTDEF(ReduceReal16Dim)(Descriptor &result, const Descriptor &array,
+ ReductionOperation<CppFloat128Type> operation, const char *source, int line,
+ int dim, const Descriptor *mask, const CppFloat128Type *identity,
+ bool ordered) {
+ Terminator terminator{source, line};
+ using Accumulator = ReduceAccumulator<CppFloat128Type>;
+ Accumulator accumulator{array, operation, identity, terminator};
+ PartialReduction<Accumulator, TypeCategory::Real, 16>(result, array,
+ array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator);
+}
+#endif
+
+void RTDEF(CppReduceComplex4)(std::complex<float> &result,
+ const Descriptor &array, ReductionOperation<std::complex<float>> operation,
+ const char *source, int line, int dim, const Descriptor *mask,
+ const std::complex<float> *identity, bool ordered) {
+ Terminator terminator{source, line};
+ result = GetTotalReduction<TypeCategory::Complex, 4>(array, source, line, dim,
+ mask,
+ ReduceAccumulator<std::complex<float>>{
+ array, operation, identity, terminator},
+ "REDUCE");
+}
+void RTDEF(CppReduceComplex4Dim)(Descriptor &result, const Descriptor &array,
+ ReductionOperation<std::complex<float>> operation, const char *source,
+ int line, int dim, const Descriptor *mask,
+ const std::complex<float> *identity, bool ordered) {
+ Terminator terminator{source, line};
+ using Accumulator = ReduceAccumulator<std::complex<float>>;
+ Accumulator accumulator{array, operation, identity, terminator};
+ PartialReduction<Accumulator, TypeCategory::Complex, 4>(result, array,
+ array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator);
+}
+void RTDEF(CppReduceComplex8)(std::complex<double> &result,
+ const Descriptor &array, ReductionOperation<std::complex<double>> operation,
+ const char *source, int line, int dim, const Descriptor *mask,
+ const std::complex<double> *identity, bool ordered) {
+ Terminator terminator{source, line};
+ result = GetTotalReduction<TypeCategory::Complex, 8>(array, source, line, dim,
+ mask,
+ ReduceAccumulator<std::complex<double>>{
+ array, operation, identity, terminator},
+ "REDUCE");
+}
+void RTDEF(CppReduceComplex8Dim)(Descriptor &result, const Descriptor &array,
+ ReductionOperation<std::complex<double>> operation, const char *source,
+ int line, int dim, const Descriptor *mask,
+ const std::complex<double> *identity, bool ordered) {
+ Terminator terminator{source, line};
+ using Accumulator = ReduceAccumulator<std::complex<double>>;
+ Accumulator accumulator{array, operation, identity, terminator};
+ PartialReduction<Accumulator, TypeCategory::Complex, 8>(result, array,
+ array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator);
+}
+#if LDBL_MANT_DIG == 64
+void RTDEF(CppReduceComplex10)(std::complex<long double> &result,
+ const Descriptor &array,
+ ReductionOperation<std::complex<long double>> operation, const char *source,
+ int line, int dim, const Descriptor *mask,
+ const std::complex<long double> *identity, bool ordered) {
+ Terminator terminator{source, line};
+ result = GetTotalReduction<TypeCategory::Complex, 10>(array, source, line,
+ dim, mask,
+ ReduceAccumulator<std::complex<long double>>{
+ array, operation, identity, terminator},
+ "REDUCE");
+}
+void RTDEF(CppReduceComplex10Dim)(Descriptor &result, const Descriptor &array,
+ ReductionOperation<std::complex<long double>> operation, const char *source,
+ int line, int dim, const Descriptor *mask,
+ const std::complex<long double> *identity, bool ordered) {
+ Terminator terminator{source, line};
+ using Accumulator = ReduceAccumulator<std::complex<long double>>;
+ Accumulator accumulator{array, operation, identity, terminator};
+ PartialReduction<Accumulator, TypeCategory::Complex, 10>(result, array,
+ array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator);
+}
+#endif
+#if LDBL_MANT_DIG == 113 || HAS_FLOAT128
+void RTDEF(CppReduceComplex16)(std::complex<CppFloat128Type> &result,
+ const Descriptor &array,
+ ReductionOperation<std::complex<CppFloat128Type>> operation,
+ const char *source, int line, int dim, const Descriptor *mask,
+ const std::complex<CppFloat128Type> *identity, bool ordered) {
+ Terminator terminator{source, line};
+ result = GetTotalReduction<TypeCategory::Complex, 16>(array, source, line,
+ dim, mask,
+ ReduceAccumulator<std::complex<CppFloat128Type>>{
+ array, operation, identity, terminator},
+ "REDUCE");
+}
+void RTDEF(CppReduceComplex16Dim)(Descriptor &result, const Descriptor &array,
+ ReductionOperation<std::complex<CppFloat128Type>> operation,
+ const char *source, int line, int dim, const Descriptor *mask,
+ const std::complex<CppFloat128Type> *identity, bool ordered) {
+ Terminator terminator{source, line};
+ using Accumulator = ReduceAccumulator<std::complex<CppFloat128Type>>;
+ Accumulator accumulator{array, operation, identity, terminator};
+ PartialReduction<Accumulator, TypeCategory::Complex, 16>(result, array,
+ array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator);
+}
+#endif
+
+bool RTDEF(ReduceLogical1)(const Descriptor &array,
+ ReductionOperation<std::int8_t> operation, const char *source, int line,
+ int dim, const Descriptor *mask, const std::int8_t *identity,
+ bool ordered) {
+ return RTNAME(ReduceInteger1)(
+ array, operation, source, line, dim, mask, identity, ordered) != 0;
+}
+void RTDEF(ReduceLogical1Dim)(Descriptor &result, const Descriptor &array,
+ ReductionOperation<std::int8_t> operation, const char *source, int line,
+ int dim, const Descriptor *mask, const std::int8_t *identity,
+ bool ordered) {
+ RTNAME(ReduceInteger1Dim)
+ (result, array, operation, source, line, dim, mask, identity, ordered);
+}
+bool RTDEF(ReduceLogical2)(const Descriptor &array,
+ ReductionOperation<std::int16_t> operation, const char *source, int line,
+ int dim, const Descriptor *mask, const std::int16_t *identity,
+ bool ordered) {
+ return RTNAME(ReduceInteger2)(
+ array, operation, source, line, dim, mask, identity, ordered) != 0;
+}
+void RTDEF(ReduceLogical2Dim)(Descriptor &result, const Descriptor &array,
+ ReductionOperation<std::int16_t> operation, const char *source, int line,
+ int dim, const Descriptor *mask, const std::int16_t *identity,
+ bool ordered) {
+ RTNAME(ReduceInteger2Dim)
+ (result, array, operation, source, line, dim, mask, identity, ordered);
+}
+bool RTDEF(ReduceLogical4)(const Descriptor &array,
+ ReductionOperation<std::int32_t> operation, const char *source, int line,
+ int dim, const Descriptor *mask, const std::int32_t *identity,
+ bool ordered) {
+ return RTNAME(ReduceInteger4)(
+ array, operation, source, line, dim, mask, identity, ordered) != 0;
+}
+void RTDEF(ReduceLogical4Dim)(Descriptor &result, const Descriptor &array,
+ ReductionOperation<std::int32_t> operation, const char *source, int line,
+ int dim, const Descriptor *mask, const std::int32_t *identity,
+ bool ordered) {
+ RTNAME(ReduceInteger4Dim)
+ (result, array, operation, source, line, dim, mask, identity, ordered);
+}
+bool RTDEF(ReduceLogical8)(const Descriptor &array,
+ ReductionOperation<std::int64_t> operation, const char *source, int line,
+ int dim, const Descriptor *mask, const std::int64_t *identity,
+ bool ordered) {
+ return RTNAME(ReduceInteger8)(
+ array, operation, source, line, dim, mask, identity, ordered) != 0;
+}
+void RTDEF(ReduceLogical8Dim)(Descriptor &result, const Descriptor &array,
+ ReductionOperation<std::int64_t> operation, const char *source, int line,
+ int dim, const Descriptor *mask, const std::int64_t *identity,
+ bool ordered) {
+ RTNAME(ReduceInteger8Dim)
+ (result, array, operation, source, line, dim, mask, identity, ordered);
+}
+
+void RTDEF(ReduceChar1)(char *result, const Descriptor &array,
+ ReductionCharOperation<char> operation, const char *source, int line,
+ int dim, const Descriptor *mask, const char *identity, bool ordered) {
+ Terminator terminator{source, line};
+ BufferedReduceAccumulator<char, ReductionCharOperation<char>,
+ /*hasLength=*/true>
+ accumulator{array, operation, identity, terminator};
+ DoTotalReduction<char>(array, dim, mask, accumulator, "REDUCE", terminator);
+ accumulator.GetResult(result);
+}
+void RTDEF(ReduceCharacter1Dim)(Descriptor &result, const Descriptor &array,
+ ReductionCharOperation<char> operation, const char *source, int line,
+ int dim, const Descriptor *mask, const char *identity, bool ordered) {
+ Terminator terminator{source, line};
+ using Accumulator = BufferedReduceAccumulator<char,
+ ReductionCharOperation<char>, /*hasLength=*/true>;
+ Accumulator accumulator{array, operation, identity, terminator};
+ PartialReduction<Accumulator, TypeCategory::Character, 1>(result, array,
+ array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator);
+}
+void RTDEF(ReduceChar2)(char16_t *result, const Descriptor &array,
+ ReductionCharOperation<char16_t> operation, const char *source, int line,
+ int dim, const Descriptor *mask, const char16_t *identity, bool ordered) {
+ Terminator terminator{source, line};
+ BufferedReduceAccumulator<char16_t, ReductionCharOperation<char16_t>,
+ /*hasLength=*/true>
+ accumulator{array, operation, identity, terminator};
+ DoTotalReduction<char16_t>(
+ array, dim, mask, accumulator, "REDUCE", terminator);
+ accumulator.GetResult(result);
+}
+void RTDEF(ReduceCharacter2Dim)(Descriptor &result, const Descriptor &array,
+ ReductionCharOperation<char16_t> operation, const char *source, int line,
+ int dim, const Descriptor *mask, const char16_t *identity, bool ordered) {
+ Terminator terminator{source, line};
+ using Accumulator = BufferedReduceAccumulator<char16_t,
+ ReductionCharOperation<char16_t>, /*hasLength=*/true>;
+ Accumulator accumulator{array, operation, identity, terminator};
+ PartialReduction<Accumulator, TypeCategory::Character, 2>(result, array,
+ array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator);
+}
+void RTDEF(ReduceChar4)(char32_t *result, const Descriptor &array,
+ ReductionCharOperation<char32_t> operation, const char *source, int line,
+ int dim, const Descriptor *mask, const char32_t *identity, bool ordered) {
+ Terminator terminator{source, line};
+ BufferedReduceAccumulator<char32_t, ReductionCharOperation<char32_t>,
+ /*hasLength=*/true>
+ accumulator{array, operation, identity, terminator};
+ DoTotalReduction<char32_t>(
+ array, dim, mask, accumulator, "REDUCE", terminator);
+ accumulator.GetResult(result);
+}
+void RTDEF(ReduceCharacter4Dim)(Descriptor &result, const Descriptor &array,
+ ReductionCharOperation<char32_t> operation, const char *source, int line,
+ int dim, const Descriptor *mask, const char32_t *identity, bool ordered) {
+ Terminator terminator{source, line};
+ using Accumulator = BufferedReduceAccumulator<char32_t,
+ ReductionCharOperation<char32_t>, /*hasLength=*/true>;
+ Accumulator accumulator{array, operation, identity, terminator};
+ PartialReduction<Accumulator, TypeCategory::Character, 4>(result, array,
+ array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator);
+}
+
+void RTDEF(ReduceDerivedType)(char *result, const Descriptor &array,
+ ReductionDerivedTypeOperation operation, const char *source, int line,
+ int dim, const Descriptor *mask, const char *identity, bool ordered) {
+ Terminator terminator{source, line};
+ BufferedReduceAccumulator<char, ReductionDerivedTypeOperation,
+ /*hasLength=*/false>
+ accumulator{array, operation, identity, terminator};
+ DoTotalReduction<char>(array, dim, mask, accumulator, "REDUCE", terminator);
+ accumulator.GetResult(result);
+}
+void RTDEF(ReduceDerivedTypeDim)(Descriptor &result, const Descriptor &array,
+ ReductionDerivedTypeOperation operation, const char *source, int line,
+ int dim, const Descriptor *mask, const char *identity, bool ordered) {
+ Terminator terminator{source, line};
+ using Accumulator = BufferedReduceAccumulator<char,
+ ReductionDerivedTypeOperation, /*hasLength=*/false>;
+ Accumulator accumulator{array, operation, identity, terminator};
+ PartialReduction<Accumulator, TypeCategory::Derived, 0>(result, array,
+ array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator);
+}
+
+RT_EXT_API_GROUP_END
+} // extern "C"
+} // namespace Fortran::runtime
diff --git a/flang/runtime/reduction-templates.h b/flang/runtime/reduction-templates.h
index 5b793deb2a123d..0db3b9b6acbe64 100644
--- a/flang/runtime/reduction-templates.h
+++ b/flang/runtime/reduction-templates.h
@@ -86,13 +86,22 @@ inline RT_API_ATTRS CppTypeFor<CAT, KIND> GetTotalReduction(const Descriptor &x,
RUNTIME_CHECK(terminator, TypeCode(CAT, KIND) == x.type());
using CppType = CppTypeFor<CAT, KIND>;
DoTotalReduction<CppType>(x, dim, mask, accumulator, intrinsic, terminator);
- CppType result;
+ if constexpr (std::is_void_v<CppType>) {
+ // Result is returned from accumulator, as in REDUCE() for derived type
#ifdef _MSC_VER // work around MSVC spurious error
- accumulator.GetResult(&result);
+ accumulator.GetResult();
#else
- accumulator.template GetResult(&result);
+ accumulator.template GetResult();
#endif
- return result;
+ } else {
+ CppType result;
+#ifdef _MSC_VER // work around MSVC spurious error
+ accumulator.GetResult(&result);
+#else
+ accumulator.template GetResult(&result);
+#endif
+ return result;
+ }
}
// For reductions on a dimension, e.g. SUM(array,DIM=2) where the shape
@@ -164,35 +173,6 @@ inline RT_API_ATTRS void ReduceDimMaskToScalar(const Descriptor &x,
#endif
}
-// Utility: establishes & allocates the result array for a partial
-// reduction (i.e., one with DIM=).
-static RT_API_ATTRS void CreatePartialReductionResult(Descriptor &result,
- const Descriptor &x, std::size_t resultElementSize, int dim,
- Terminator &terminator, const char *intrinsic, TypeCode typeCode) {
- int xRank{x.rank()};
- if (dim < 1 || dim > xRank) {
- terminator.Crash(
- "%s: bad DIM=%d for ARRAY with rank %d", intrinsic, dim, xRank);
- }
- int zeroBasedDim{dim - 1};
- SubscriptValue resultExtent[maxRank];
- for (int j{0}; j < zeroBasedDim; ++j) {
- resultExtent[j] = x.GetDimension(j).Extent();
- }
- for (int j{zeroBasedDim + 1}; j < xRank; ++j) {
- resultExtent[j - 1] = x.GetDimension(j).Extent();
- }
- result.Establish(typeCode, resultElementSize, nullptr, xRank - 1,
- resultExtent, CFI_attribute_allocatable);
- for (int j{0}; j + 1 < xRank; ++j) {
- result.GetDimension(j).SetBounds(1, resultExtent[j]);
- }
- if (int stat{result.Allocate()}) {
- terminator.Crash(
- "%s: could not allocate memory for result; STAT=%d", intrinsic, stat);
- }
-}
-
// Partial reductions with DIM=
template <typename ACCUMULATOR, TypeCategory CAT, int KIND>
diff --git a/flang/runtime/tools.cpp b/flang/runtime/tools.cpp
index e653323ed1de03..bd1913bfe42fe3 100644
--- a/flang/runtime/tools.cpp
+++ b/flang/runtime/tools.cpp
@@ -238,5 +238,34 @@ template <int KIND> struct FitsInIntegerKind {
}
};
+// Utility: establishes & allocates the result array for a partial
+// reduction (i.e., one with DIM=).
+RT_API_ATTRS void CreatePartialReductionResult(Descriptor &result,
+ const Descriptor &x, std::size_t resultElementSize, int dim,
+ Terminator &terminator, const char *intrinsic, TypeCode typeCode) {
+ int xRank{x.rank()};
+ if (dim < 1 || dim > xRank) {
+ terminator.Crash(
+ "%s: bad DIM=%d for ARRAY with rank %d", intrinsic, dim, xRank);
+ }
+ int zeroBasedDim{dim - 1};
+ SubscriptValue resultExtent[maxRank];
+ for (int j{0}; j < zeroBasedDim; ++j) {
+ resultExtent[j] = x.GetDimension(j).Extent();
+ }
+ for (int j{zeroBasedDim + 1}; j < xRank; ++j) {
+ resultExtent[j - 1] = x.GetDimension(j).Extent();
+ }
+ result.Establish(typeCode, resultElementSize, nullptr, xRank - 1,
+ resultExtent, CFI_attribute_allocatable);
+ for (int j{0}; j + 1 < xRank; ++j) {
+ result.GetDimension(j).SetBounds(1, resultExtent[j]);
+ }
+ if (int stat{result.Allocate()}) {
+ terminator.Crash(
+ "%s: could not allocate memory for result; STAT=%d", intrinsic, stat);
+ }
+}
+
RT_OFFLOAD_API_GROUP_END
} // namespace Fortran::runtime
diff --git a/flang/runtime/tools.h b/flang/runtime/tools.h
index 392e3fc6c89136..7551045296b4b7 100644
--- a/flang/runtime/tools.h
+++ b/flang/runtime/tools.h
@@ -490,5 +490,9 @@ RT_API_ATTRS void CopyAndPad(
}
}
+RT_API_ATTRS void CreatePartialReductionResult(Descriptor &result,
+ const Descriptor &x, std::size_t resultElementSize, int dim, Terminator &,
+ const char *intrinsic, TypeCode);
+
} // namespace Fortran::runtime
#endif // FORTRAN_RUNTIME_TOOLS_H_
diff --git a/flang/unittests/Runtime/Reduction.cpp b/flang/unittests/Runtime/Reduction.cpp
index b17988bc17699d..b2661e78abdf58 100644
--- a/flang/unittests/Runtime/Reduction.cpp
+++ b/flang/unittests/Runtime/Reduction.cpp
@@ -13,6 +13,7 @@
#include "flang/Runtime/allocatable.h"
#include "flang/Runtime/cpp-type.h"
#include "flang/Runtime/descriptor.h"
+#include "flang/Runtime/reduce.h"
#include "flang/Runtime/type-code.h"
#include <cstdint>
#include <cstring>
@@ -634,3 +635,39 @@ TEST(Reductions, ExtremaReal16) {
EXPECT_EQ(RTNAME(MaxvalReal16)(*maxArray, __FILE__, __LINE__), -1.0);
}
#endif // LDBL_MANT_DIG == 113 || HAS_FLOAT128
+
+static std::int32_t IAdd(const std::int32_t *x, const std::int32_t *y) {
+ return *x + *y;
+}
+
+static std::int32_t IMultiply(const std::int32_t *x, const std::int32_t *y) {
+ return *x * *y;
+}
+
+TEST(Reductions, ReduceInt4) {
+ auto intVector{MakeArray<TypeCategory::Integer, 4>(
+ std::vector<int>{4}, std::vector<std::int32_t>{1, 2, 3, 4})};
+ EXPECT_EQ(RTNAME(ReduceInteger4)(*intVector, IAdd, __FILE__, __LINE__), 10);
+ EXPECT_EQ(
+ RTNAME(ReduceInteger4)(*intVector, IMultiply, __FILE__, __LINE__), 24);
+}
+TEST(Reductions, ReduceInt4Dim) {
+ auto intMatrix{MakeArray<TypeCategory::Integer, 4>(
+ std::vector<int>{2, 2}, std::vector<std::int32_t>{1, 2, 3, 4})};
+ StaticDescriptor<1, true> statDesc;
+ Descriptor &sums{statDesc.descriptor()};
+ RTNAME(ReduceInteger4Dim)(sums, *intMatrix, IAdd, __FILE__, __LINE__, 1);
+ EXPECT_EQ(sums.rank(), 1);
+ EXPECT_EQ(sums.GetDimension(0).LowerBound(), 1);
+ EXPECT_EQ(sums.GetDimension(0).Extent(), 2);
+ EXPECT_EQ(*sums.ZeroBasedIndexedElement<std::int32_t>(0), 3);
+ EXPECT_EQ(*sums.ZeroBasedIndexedElement<std::int32_t>(1), 7);
+ sums.Destroy();
+ RTNAME(ReduceInteger4Dim)(sums, *intMatrix, IAdd, __FILE__, __LINE__, 2);
+ EXPECT_EQ(sums.rank(), 1);
+ EXPECT_EQ(sums.GetDimension(0).LowerBound(), 1);
+ EXPECT_EQ(sums.GetDimension(0).Extent(), 2);
+ EXPECT_EQ(*sums.ZeroBasedIndexedElement<std::int32_t>(0), 4);
+ EXPECT_EQ(*sums.ZeroBasedIndexedElement<std::int32_t>(1), 6);
+ sums.Destroy();
+}
More information about the flang-commits
mailing list