[flang-commits] [flang] [flang][runtime] Use cuda::std::complex in F18 runtime CUDA build. (PR #109078)

Tue Sep 17 19:45:43 PDT 2024

https://github.com/vzakhari created https://github.com/llvm/llvm-project/pull/109078

`std::complex` operators do not work for the CUDA device compilation
of F18 runtime. This change makes use of `cuda::std::complex` from `libcudacxx`.
`cuda::std::complex` does not have specializations for `long double`,
so the change is accompanied with a clean-up for `long double` usage.


>From 75d05da697805c9cc994c0990adcf7bac80aedc1 Mon Sep 17 00:00:00 2001
From: Slava Zakharin <szakharin at nvidia.com>
Date: Tue, 17 Sep 2024 18:51:53 -0700
Subject: [PATCH] [flang][runtime] Use cuda::std::complex in F18 runtime CUDA
 build.

`std::complex` operators do not work for the CUDA device compilation
of F18 runtime. This change makes use of `cuda::std::complex` from `libcudacxx`.
`cuda::std::complex` does not have specializations for `long double`,
so the change is accompanied with a clean-up for `long double` usage.
---
 flang/include/flang/Common/float80.h          |  43 ++++
 flang/include/flang/Runtime/complex.h         |  31 +++
 flang/include/flang/Runtime/cpp-type.h        |   9 +-
 .../flang/Runtime/matmul-instances.inc        |   6 +-
 flang/include/flang/Runtime/numeric.h         |  32 +--
 flang/include/flang/Runtime/reduce.h          | 214 +++++++++++-------
 flang/include/flang/Runtime/reduction.h       | 112 ++++-----
 .../include/flang/Runtime/transformational.h  |  20 +-
 flang/runtime/complex-powi.cpp                |  39 ++--
 flang/runtime/complex-reduction.c             |   8 +-
 flang/runtime/dot-product.cpp                 |  21 +-
 flang/runtime/extrema.cpp                     |  10 +-
 flang/runtime/matmul-transpose.cpp            |  17 --
 flang/runtime/matmul.cpp                      |  34 +--
 flang/runtime/numeric.cpp                     |  36 +--
 flang/runtime/product.cpp                     |  15 +-
 flang/runtime/random.cpp                      |   2 +-
 flang/runtime/reduce.cpp                      | 180 ++++++++-------
 flang/runtime/reduction-templates.h           |   4 +-
 flang/runtime/sum.cpp                         |  22 +-
 flang/runtime/transformational.cpp            |   8 +-
 flang/unittests/Runtime/Numeric.cpp           |   4 +-
 flang/unittests/Runtime/Transformational.cpp  |  10 +-
 23 files changed, 489 insertions(+), 388 deletions(-)
 create mode 100644 flang/include/flang/Common/float80.h
 create mode 100644 flang/include/flang/Runtime/complex.h

diff --git a/flang/include/flang/Common/float80.h b/flang/include/flang/Common/float80.h
new file mode 100644
index 00000000000000..1838f7b13c8bb2
--- /dev/null
+++ b/flang/include/flang/Common/float80.h
@@ -0,0 +1,43 @@
+/*===-- flang/Common/float80.h --------------------------------------*- C -*-===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===----------------------------------------------------------------------===*/
+
+/* This header is usable in both C and C++ code.
+ * Isolates build compiler checks to determine if the 80-bit
+ * floating point format is supported via a particular C type.
+ * It defines CFloat80Type and CppFloat80Type aliases for this
+ * C type.
+ */
+
+#ifndef FORTRAN_COMMON_FLOAT80_H_
+#define FORTRAN_COMMON_FLOAT80_H_
+
+#include "api-attrs.h"
+#include <float.h>
+
+#if LDBL_MANT_DIG == 64
+#undef HAS_FLOAT80
+#define HAS_FLOAT80 1
+#endif
+
+#if defined(RT_DEVICE_COMPILATION) && defined(__CUDACC__)
+/*
+ * 'long double' is treated as 'double' in the CUDA device code,
+ * and there is no support for 80-bit floating point format.
+ * This is probably true for most offload devices, so RT_DEVICE_COMPILATION
+ * check should be enough. For the time being, guard it with __CUDACC__
+ * as well.
+ */
+#undef HAS_FLOAT80
+#endif
+
+#if HAS_FLOAT80
+typedef long double CFloat80Type;
+typedef long double CppFloat80Type;
+#endif
+
+#endif /* FORTRAN_COMMON_FLOAT80_H_ */
diff --git a/flang/include/flang/Runtime/complex.h b/flang/include/flang/Runtime/complex.h
new file mode 100644
index 00000000000000..b7ad1376bffbf1
--- /dev/null
+++ b/flang/include/flang/Runtime/complex.h
@@ -0,0 +1,31 @@
+//===-- include/flang/Runtime/complex.h -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// A single way to expose C++ complex class in files that can be used
+// in F18 runtime build. With inclusion of this file std::complex
+// and the related names become available, though, they may correspond
+// to alternative definitions (e.g. from cuda::std namespace).
+
+#ifndef FORTRAN_RUNTIME_COMPLEX_H
+#define FORTRAN_RUNTIME_COMPLEX_H
+
+#if RT_USE_LIBCUDACXX
+#include <cuda/std/complex>
+namespace Fortran::runtime::rtcmplx {
+using cuda::std::complex;
+using cuda::std::conj;
+} // namespace Fortran::runtime::rtcmplx
+#else // !RT_USE_LIBCUDACXX
+#include <complex>
+namespace Fortran::runtime::rtcmplx {
+using std::complex;
+using std::conj;
+} // namespace Fortran::runtime::rtcmplx
+#endif // !RT_USE_LIBCUDACXX
+
+#endif // FORTRAN_RUNTIME_COMPLEX_H
diff --git a/flang/include/flang/Runtime/cpp-type.h b/flang/include/flang/Runtime/cpp-type.h
index fe21dd544cf7d8..aef0fbd7ede586 100644
--- a/flang/include/flang/Runtime/cpp-type.h
+++ b/flang/include/flang/Runtime/cpp-type.h
@@ -13,8 +13,9 @@
 
 #include "flang/Common/Fortran.h"
 #include "flang/Common/float128.h"
+#include "flang/Common/float80.h"
 #include "flang/Common/uint128.h"
-#include <complex>
+#include "flang/Runtime/complex.h"
 #include <cstdint>
 #if __cplusplus >= 202302
 #include <stdfloat>
@@ -70,9 +71,9 @@ template <> struct CppTypeForHelper<TypeCategory::Real, 8> {
   using type = double;
 #endif
 };
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 template <> struct CppTypeForHelper<TypeCategory::Real, 10> {
-  using type = long double;
+  using type = CppFloat80Type;
 };
 #endif
 #if __STDCPP_FLOAT128_T__
@@ -89,7 +90,7 @@ template <> struct CppTypeForHelper<TypeCategory::Real, 16> {
 #endif
 
 template <int KIND> struct CppTypeForHelper<TypeCategory::Complex, KIND> {
-  using type = std::complex<CppTypeFor<TypeCategory::Real, KIND>>;
+  using type = rtcmplx::complex<CppTypeFor<TypeCategory::Real, KIND>>;
 };
 
 template <> struct CppTypeForHelper<TypeCategory::Character, 1> {
diff --git a/flang/include/flang/Runtime/matmul-instances.inc b/flang/include/flang/Runtime/matmul-instances.inc
index 32c6ab06d25219..88e3067ca029d4 100644
--- a/flang/include/flang/Runtime/matmul-instances.inc
+++ b/flang/include/flang/Runtime/matmul-instances.inc
@@ -111,7 +111,7 @@ FOREACH_MATMUL_TYPE_PAIR(MATMUL_DIRECT_INSTANCE)
 FOREACH_MATMUL_TYPE_PAIR_WITH_INT16(MATMUL_INSTANCE)
 FOREACH_MATMUL_TYPE_PAIR_WITH_INT16(MATMUL_DIRECT_INSTANCE)
 
-#if MATMUL_FORCE_ALL_TYPES || LDBL_MANT_DIG == 64
+#if MATMUL_FORCE_ALL_TYPES || HAS_FLOAT80
 MATMUL_INSTANCE(Integer, 16, Real, 10)
 MATMUL_INSTANCE(Integer, 16, Complex, 10)
 MATMUL_INSTANCE(Real, 10, Integer, 16)
@@ -133,7 +133,7 @@ MATMUL_DIRECT_INSTANCE(Complex, 16, Integer, 16)
 #endif
 #endif // MATMUL_FORCE_ALL_TYPES || (defined __SIZEOF_INT128__ && !AVOID_NATIVE_UINT128_T)
 
-#if MATMUL_FORCE_ALL_TYPES || LDBL_MANT_DIG == 64
+#if MATMUL_FORCE_ALL_TYPES || HAS_FLOAT80
 #define FOREACH_MATMUL_TYPE_PAIR_WITH_REAL10(macro)         \
   macro(Integer, 1, Real, 10)                               \
   macro(Integer, 1, Complex, 10)                            \
@@ -193,7 +193,7 @@ MATMUL_DIRECT_INSTANCE(Complex, 10, Complex, 16)
 MATMUL_DIRECT_INSTANCE(Complex, 16, Real, 10)
 MATMUL_DIRECT_INSTANCE(Complex, 16, Complex, 10)
 #endif
-#endif // MATMUL_FORCE_ALL_TYPES || LDBL_MANT_DIG == 64
+#endif // MATMUL_FORCE_ALL_TYPES || HAS_FLOAT80
 
 #if MATMUL_FORCE_ALL_TYPES || (LDBL_MANT_DIG == 113 || HAS_FLOAT128)
 #define FOREACH_MATMUL_TYPE_PAIR_WITH_REAL16(macro)         \
diff --git a/flang/include/flang/Runtime/numeric.h b/flang/include/flang/Runtime/numeric.h
index 84a5a7cd7a361c..c3923ee2e0d889 100644
--- a/flang/include/flang/Runtime/numeric.h
+++ b/flang/include/flang/Runtime/numeric.h
@@ -44,7 +44,7 @@ CppTypeFor<TypeCategory::Integer, 8> RTDECL(Ceiling8_8)(
 CppTypeFor<TypeCategory::Integer, 16> RTDECL(Ceiling8_16)(
     CppTypeFor<TypeCategory::Real, 8>);
 #endif
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 CppTypeFor<TypeCategory::Integer, 1> RTDECL(Ceiling10_1)(
     CppTypeFor<TypeCategory::Real, 10>);
 CppTypeFor<TypeCategory::Integer, 2> RTDECL(Ceiling10_2)(
@@ -78,7 +78,7 @@ CppTypeFor<TypeCategory::Real, 4> RTDECL(ErfcScaled4)(
     CppTypeFor<TypeCategory::Real, 4>);
 CppTypeFor<TypeCategory::Real, 8> RTDECL(ErfcScaled8)(
     CppTypeFor<TypeCategory::Real, 8>);
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 CppTypeFor<TypeCategory::Real, 10> RTDECL(ErfcScaled10)(
     CppTypeFor<TypeCategory::Real, 10>);
 #endif
@@ -96,7 +96,7 @@ CppTypeFor<TypeCategory::Integer, 4> RTDECL(Exponent8_4)(
     CppTypeFor<TypeCategory::Real, 8>);
 CppTypeFor<TypeCategory::Integer, 8> RTDECL(Exponent8_8)(
     CppTypeFor<TypeCategory::Real, 8>);
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 CppTypeFor<TypeCategory::Integer, 4> RTDECL(Exponent10_4)(
     CppTypeFor<TypeCategory::Real, 10>);
 CppTypeFor<TypeCategory::Integer, 8> RTDECL(Exponent10_8)(
@@ -134,7 +134,7 @@ CppTypeFor<TypeCategory::Integer, 8> RTDECL(Floor8_8)(
 CppTypeFor<TypeCategory::Integer, 16> RTDECL(Floor8_16)(
     CppTypeFor<TypeCategory::Real, 8>);
 #endif
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 CppTypeFor<TypeCategory::Integer, 1> RTDECL(Floor10_1)(
     CppTypeFor<TypeCategory::Real, 10>);
 CppTypeFor<TypeCategory::Integer, 2> RTDECL(Floor10_2)(
@@ -168,7 +168,7 @@ CppTypeFor<TypeCategory::Real, 4> RTDECL(Fraction4)(
     CppTypeFor<TypeCategory::Real, 4>);
 CppTypeFor<TypeCategory::Real, 8> RTDECL(Fraction8)(
     CppTypeFor<TypeCategory::Real, 8>);
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 CppTypeFor<TypeCategory::Real, 10> RTDECL(Fraction10)(
     CppTypeFor<TypeCategory::Real, 10>);
 #endif
@@ -180,7 +180,7 @@ CppTypeFor<TypeCategory::Real, 16> RTDECL(Fraction16)(
 // ISNAN / IEEE_IS_NAN
 bool RTDECL(IsNaN4)(CppTypeFor<TypeCategory::Real, 4>);
 bool RTDECL(IsNaN8)(CppTypeFor<TypeCategory::Real, 8>);
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 bool RTDECL(IsNaN10)(CppTypeFor<TypeCategory::Real, 10>);
 #endif
 #if LDBL_MANT_DIG == 113 || HAS_FLOAT128
@@ -212,7 +212,7 @@ CppTypeFor<TypeCategory::Real, 4> RTDECL(ModReal4)(
 CppTypeFor<TypeCategory::Real, 8> RTDECL(ModReal8)(
     CppTypeFor<TypeCategory::Real, 8>, CppTypeFor<TypeCategory::Real, 8>,
     const char *sourceFile = nullptr, int sourceLine = 0);
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 CppTypeFor<TypeCategory::Real, 10> RTDECL(ModReal10)(
     CppTypeFor<TypeCategory::Real, 10>, CppTypeFor<TypeCategory::Real, 10>,
     const char *sourceFile = nullptr, int sourceLine = 0);
@@ -247,7 +247,7 @@ CppTypeFor<TypeCategory::Real, 4> RTDECL(ModuloReal4)(
 CppTypeFor<TypeCategory::Real, 8> RTDECL(ModuloReal8)(
     CppTypeFor<TypeCategory::Real, 8>, CppTypeFor<TypeCategory::Real, 8>,
     const char *sourceFile = nullptr, int sourceLine = 0);
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 CppTypeFor<TypeCategory::Real, 10> RTDECL(ModuloReal10)(
     CppTypeFor<TypeCategory::Real, 10>, CppTypeFor<TypeCategory::Real, 10>,
     const char *sourceFile = nullptr, int sourceLine = 0);
@@ -283,7 +283,7 @@ CppTypeFor<TypeCategory::Integer, 8> RTDECL(Nint8_8)(
 CppTypeFor<TypeCategory::Integer, 16> RTDECL(Nint8_16)(
     CppTypeFor<TypeCategory::Real, 8>);
 #endif
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 CppTypeFor<TypeCategory::Integer, 1> RTDECL(Nint10_1)(
     CppTypeFor<TypeCategory::Real, 10>);
 CppTypeFor<TypeCategory::Integer, 2> RTDECL(Nint10_2)(
@@ -319,7 +319,7 @@ CppTypeFor<TypeCategory::Real, 4> RTDECL(Nearest4)(
     CppTypeFor<TypeCategory::Real, 4>, bool positive);
 CppTypeFor<TypeCategory::Real, 8> RTDECL(Nearest8)(
     CppTypeFor<TypeCategory::Real, 8>, bool positive);
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 CppTypeFor<TypeCategory::Real, 10> RTDECL(Nearest10)(
     CppTypeFor<TypeCategory::Real, 10>, bool positive);
 #endif
@@ -333,7 +333,7 @@ CppTypeFor<TypeCategory::Real, 4> RTDECL(RRSpacing4)(
     CppTypeFor<TypeCategory::Real, 4>);
 CppTypeFor<TypeCategory::Real, 8> RTDECL(RRSpacing8)(
     CppTypeFor<TypeCategory::Real, 8>);
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 CppTypeFor<TypeCategory::Real, 10> RTDECL(RRSpacing10)(
     CppTypeFor<TypeCategory::Real, 10>);
 #endif
@@ -347,7 +347,7 @@ CppTypeFor<TypeCategory::Real, 4> RTDECL(SetExponent4)(
     CppTypeFor<TypeCategory::Real, 4>, std::int64_t);
 CppTypeFor<TypeCategory::Real, 8> RTDECL(SetExponent8)(
     CppTypeFor<TypeCategory::Real, 8>, std::int64_t);
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 CppTypeFor<TypeCategory::Real, 10> RTDECL(SetExponent10)(
     CppTypeFor<TypeCategory::Real, 10>, std::int64_t);
 #endif
@@ -361,7 +361,7 @@ CppTypeFor<TypeCategory::Real, 4> RTDECL(Scale4)(
     CppTypeFor<TypeCategory::Real, 4>, std::int64_t);
 CppTypeFor<TypeCategory::Real, 8> RTDECL(Scale8)(
     CppTypeFor<TypeCategory::Real, 8>, std::int64_t);
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 CppTypeFor<TypeCategory::Real, 10> RTDECL(Scale10)(
     CppTypeFor<TypeCategory::Real, 10>, std::int64_t);
 #endif
@@ -410,7 +410,7 @@ CppTypeFor<TypeCategory::Real, 4> RTDECL(Spacing4)(
     CppTypeFor<TypeCategory::Real, 4>);
 CppTypeFor<TypeCategory::Real, 8> RTDECL(Spacing8)(
     CppTypeFor<TypeCategory::Real, 8>);
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 CppTypeFor<TypeCategory::Real, 10> RTDECL(Spacing10)(
     CppTypeFor<TypeCategory::Real, 10>);
 #endif
@@ -425,7 +425,7 @@ CppTypeFor<TypeCategory::Real, 4> RTDECL(FPow4i)(
 CppTypeFor<TypeCategory::Real, 8> RTDECL(FPow8i)(
     CppTypeFor<TypeCategory::Real, 8> b,
     CppTypeFor<TypeCategory::Integer, 4> e);
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 CppTypeFor<TypeCategory::Real, 10> RTDECL(FPow10i)(
     CppTypeFor<TypeCategory::Real, 10> b,
     CppTypeFor<TypeCategory::Integer, 4> e);
@@ -442,7 +442,7 @@ CppTypeFor<TypeCategory::Real, 4> RTDECL(FPow4k)(
 CppTypeFor<TypeCategory::Real, 8> RTDECL(FPow8k)(
     CppTypeFor<TypeCategory::Real, 8> b,
     CppTypeFor<TypeCategory::Integer, 8> e);
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 CppTypeFor<TypeCategory::Real, 10> RTDECL(FPow10k)(
     CppTypeFor<TypeCategory::Real, 10> b,
     CppTypeFor<TypeCategory::Integer, 8> e);
diff --git a/flang/include/flang/Runtime/reduce.h b/flang/include/flang/Runtime/reduce.h
index 60f54c393b4bbd..c016b37f9592a1 100644
--- a/flang/include/flang/Runtime/reduce.h
+++ b/flang/include/flang/Runtime/reduce.h
@@ -188,22 +188,26 @@ void RTDECL(ReduceReal8DimValue)(Descriptor &result, const Descriptor &array,
     ValueReductionOperation<double>, const char *source, int line, int dim,
     const Descriptor *mask = nullptr, const double *identity = nullptr,
     bool ordered = true);
-#if LDBL_MANT_DIG == 64
-long double RTDECL(ReduceReal10Ref)(const Descriptor &,
-    ReferenceReductionOperation<long double>, const char *source, int line,
-    int dim = 0, const Descriptor *mask = nullptr,
-    const long double *identity = nullptr, bool ordered = true);
-long double RTDECL(ReduceReal10Value)(const Descriptor &,
-    ValueReductionOperation<long double>, const char *source, int line,
-    int dim = 0, const Descriptor *mask = nullptr,
-    const long double *identity = nullptr, bool ordered = true);
+#if HAS_FLOAT80
+CppTypeFor<TypeCategory::Real, 10> RTDECL(ReduceReal10Ref)(const Descriptor &,
+    ReferenceReductionOperation<CppTypeFor<TypeCategory::Real, 10>>,
+    const char *source, int line, int dim = 0, const Descriptor *mask = nullptr,
+    const CppTypeFor<TypeCategory::Real, 10> *identity = nullptr,
+    bool ordered = true);
+CppTypeFor<TypeCategory::Real, 10> RTDECL(ReduceReal10Value)(const Descriptor &,
+    ValueReductionOperation<CppTypeFor<TypeCategory::Real, 10>>,
+    const char *source, int line, int dim = 0, const Descriptor *mask = nullptr,
+    const CppTypeFor<TypeCategory::Real, 10> *identity = nullptr,
+    bool ordered = true);
 void RTDECL(ReduceReal10DimRef)(Descriptor &result, const Descriptor &array,
-    ReferenceReductionOperation<long double>, const char *source, int line,
-    int dim, const Descriptor *mask = nullptr,
-    const long double *identity = nullptr, bool ordered = true);
+    ReferenceReductionOperation<CppTypeFor<TypeCategory::Real, 10>>,
+    const char *source, int line, int dim, const Descriptor *mask = nullptr,
+    const CppTypeFor<TypeCategory::Real, 10> *identity = nullptr,
+    bool ordered = true);
 void RTDECL(ReduceReal10DimValue)(Descriptor &result, const Descriptor &array,
-    ValueReductionOperation<long double>, const char *source, int line, int dim,
-    const Descriptor *mask = nullptr, const long double *identity = nullptr,
+    ValueReductionOperation<CppTypeFor<TypeCategory::Real, 10>>,
+    const char *source, int line, int dim, const Descriptor *mask = nullptr,
+    const CppTypeFor<TypeCategory::Real, 10> *identity = nullptr,
     bool ordered = true);
 #endif
 #if LDBL_MANT_DIG == 113 || HAS_FLOAT128
@@ -225,112 +229,152 @@ void RTDECL(ReduceReal16DimValue)(Descriptor &result, const Descriptor &array,
     const CppFloat128Type *identity = nullptr, bool ordered = true);
 #endif
 
-void RTDECL(CppReduceComplex2Ref)(std::complex<float> &, const Descriptor &,
-    ReferenceReductionOperation<std::complex<float>>, const char *source,
-    int line, int dim = 0, const Descriptor *mask = nullptr,
-    const std::complex<float> *identity = nullptr, bool ordered = true);
-void RTDECL(CppReduceComplex2Value)(std::complex<float> &, const Descriptor &,
-    ValueReductionOperation<std::complex<float>>, const char *source, int line,
-    int dim = 0, const Descriptor *mask = nullptr,
-    const std::complex<float> *identity = nullptr, bool ordered = true);
+void RTDECL(CppReduceComplex2Ref)(CppTypeFor<TypeCategory::Complex, 4> &,
+    const Descriptor &,
+    ReferenceReductionOperation<CppTypeFor<TypeCategory::Complex, 4>>,
+    const char *source, int line, int dim = 0, const Descriptor *mask = nullptr,
+    const CppTypeFor<TypeCategory::Complex, 4> *identity = nullptr,
+    bool ordered = true);
+void RTDECL(CppReduceComplex2Value)(CppTypeFor<TypeCategory::Complex, 4> &,
+    const Descriptor &,
+    ValueReductionOperation<CppTypeFor<TypeCategory::Complex, 4>>,
+    const char *source, int line, int dim = 0, const Descriptor *mask = nullptr,
+    const CppTypeFor<TypeCategory::Complex, 4> *identity = nullptr,
+    bool ordered = true);
 void RTDECL(CppReduceComplex2DimRef)(Descriptor &result,
-    const Descriptor &array, ReferenceReductionOperation<std::complex<float>>,
+    const Descriptor &array,
+    ReferenceReductionOperation<CppTypeFor<TypeCategory::Complex, 4>>,
     const char *source, int line, int dim, const Descriptor *mask = nullptr,
-    const std::complex<float> *identity = nullptr, bool ordered = true);
+    const CppTypeFor<TypeCategory::Complex, 4> *identity = nullptr,
+    bool ordered = true);
 void RTDECL(CppReduceComplex2DimValue)(Descriptor &result,
-    const Descriptor &array, ValueReductionOperation<std::complex<float>>,
+    const Descriptor &array,
+    ValueReductionOperation<CppTypeFor<TypeCategory::Complex, 4>>,
     const char *source, int line, int dim, const Descriptor *mask = nullptr,
-    const std::complex<float> *identity = nullptr, bool ordered = true);
-void RTDECL(CppReduceComplex3Ref)(std::complex<float> &, const Descriptor &,
-    ReferenceReductionOperation<std::complex<float>>, const char *source,
-    int line, int dim = 0, const Descriptor *mask = nullptr,
-    const std::complex<float> *identity = nullptr, bool ordered = true);
-void RTDECL(CppReduceComplex3Value)(std::complex<float> &, const Descriptor &,
-    ValueReductionOperation<std::complex<float>>, const char *source, int line,
-    int dim = 0, const Descriptor *mask = nullptr,
-    const std::complex<float> *identity = nullptr, bool ordered = true);
+    const CppTypeFor<TypeCategory::Complex, 4> *identity = nullptr,
+    bool ordered = true);
+void RTDECL(CppReduceComplex3Ref)(CppTypeFor<TypeCategory::Complex, 4> &,
+    const Descriptor &,
+    ReferenceReductionOperation<CppTypeFor<TypeCategory::Complex, 4>>,
+    const char *source, int line, int dim = 0, const Descriptor *mask = nullptr,
+    const CppTypeFor<TypeCategory::Complex, 4> *identity = nullptr,
+    bool ordered = true);
+void RTDECL(CppReduceComplex3Value)(CppTypeFor<TypeCategory::Complex, 4> &,
+    const Descriptor &,
+    ValueReductionOperation<CppTypeFor<TypeCategory::Complex, 4>>,
+    const char *source, int line, int dim = 0, const Descriptor *mask = nullptr,
+    const CppTypeFor<TypeCategory::Complex, 4> *identity = nullptr,
+    bool ordered = true);
 void RTDECL(CppReduceComplex3DimRef)(Descriptor &result,
-    const Descriptor &array, ReferenceReductionOperation<std::complex<float>>,
+    const Descriptor &array,
+    ReferenceReductionOperation<CppTypeFor<TypeCategory::Complex, 4>>,
     const char *source, int line, int dim, const Descriptor *mask = nullptr,
-    const std::complex<float> *identity = nullptr, bool ordered = true);
+    const CppTypeFor<TypeCategory::Complex, 4> *identity = nullptr,
+    bool ordered = true);
 void RTDECL(CppReduceComplex3DimValue)(Descriptor &result,
-    const Descriptor &array, ValueReductionOperation<std::complex<float>>,
+    const Descriptor &array,
+    ValueReductionOperation<CppTypeFor<TypeCategory::Complex, 4>>,
     const char *source, int line, int dim, const Descriptor *mask = nullptr,
-    const std::complex<float> *identity = nullptr, bool ordered = true);
-void RTDECL(CppReduceComplex4Ref)(std::complex<float> &, const Descriptor &,
-    ReferenceReductionOperation<std::complex<float>>, const char *source,
-    int line, int dim = 0, const Descriptor *mask = nullptr,
-    const std::complex<float> *identity = nullptr, bool ordered = true);
-void RTDECL(CppReduceComplex4Value)(std::complex<float> &, const Descriptor &,
-    ValueReductionOperation<std::complex<float>>, const char *source, int line,
-    int dim = 0, const Descriptor *mask = nullptr,
-    const std::complex<float> *identity = nullptr, bool ordered = true);
+    const CppTypeFor<TypeCategory::Complex, 4> *identity = nullptr,
+    bool ordered = true);
+void RTDECL(CppReduceComplex4Ref)(CppTypeFor<TypeCategory::Complex, 4> &,
+    const Descriptor &,
+    ReferenceReductionOperation<CppTypeFor<TypeCategory::Complex, 4>>,
+    const char *source, int line, int dim = 0, const Descriptor *mask = nullptr,
+    const CppTypeFor<TypeCategory::Complex, 4> *identity = nullptr,
+    bool ordered = true);
+void RTDECL(CppReduceComplex4Value)(CppTypeFor<TypeCategory::Complex, 4> &,
+    const Descriptor &,
+    ValueReductionOperation<CppTypeFor<TypeCategory::Complex, 4>>,
+    const char *source, int line, int dim = 0, const Descriptor *mask = nullptr,
+    const CppTypeFor<TypeCategory::Complex, 4> *identity = nullptr,
+    bool ordered = true);
 void RTDECL(CppReduceComplex4DimRef)(Descriptor &result,
-    const Descriptor &array, ReferenceReductionOperation<std::complex<float>>,
+    const Descriptor &array,
+    ReferenceReductionOperation<CppTypeFor<TypeCategory::Complex, 4>>,
     const char *source, int line, int dim, const Descriptor *mask = nullptr,
-    const std::complex<float> *identity = nullptr, bool ordered = true);
+    const CppTypeFor<TypeCategory::Complex, 4> *identity = nullptr,
+    bool ordered = true);
 void RTDECL(CppReduceComplex4DimValue)(Descriptor &result,
-    const Descriptor &array, ValueReductionOperation<std::complex<float>>,
+    const Descriptor &array,
+    ValueReductionOperation<CppTypeFor<TypeCategory::Complex, 4>>,
     const char *source, int line, int dim, const Descriptor *mask = nullptr,
-    const std::complex<float> *identity = nullptr, bool ordered = true);
-void RTDECL(CppReduceComplex8Ref)(std::complex<double> &, const Descriptor &,
-    ReferenceReductionOperation<std::complex<double>>, const char *source,
-    int line, int dim = 0, const Descriptor *mask = nullptr,
-    const std::complex<double> *identity = nullptr, bool ordered = true);
-void RTDECL(CppReduceComplex8Value)(std::complex<double> &, const Descriptor &,
-    ValueReductionOperation<std::complex<double>>, const char *source, int line,
-    int dim = 0, const Descriptor *mask = nullptr,
-    const std::complex<double> *identity = nullptr, bool ordered = true);
+    const CppTypeFor<TypeCategory::Complex, 4> *identity = nullptr,
+    bool ordered = true);
+void RTDECL(CppReduceComplex8Ref)(CppTypeFor<TypeCategory::Complex, 8> &,
+    const Descriptor &,
+    ReferenceReductionOperation<CppTypeFor<TypeCategory::Complex, 8>>,
+    const char *source, int line, int dim = 0, const Descriptor *mask = nullptr,
+    const CppTypeFor<TypeCategory::Complex, 8> *identity = nullptr,
+    bool ordered = true);
+void RTDECL(CppReduceComplex8Value)(CppTypeFor<TypeCategory::Complex, 8> &,
+    const Descriptor &,
+    ValueReductionOperation<CppTypeFor<TypeCategory::Complex, 8>>,
+    const char *source, int line, int dim = 0, const Descriptor *mask = nullptr,
+    const CppTypeFor<TypeCategory::Complex, 8> *identity = nullptr,
+    bool ordered = true);
 void RTDECL(CppReduceComplex8DimRef)(Descriptor &result,
-    const Descriptor &array, ReferenceReductionOperation<std::complex<double>>,
+    const Descriptor &array,
+    ReferenceReductionOperation<CppTypeFor<TypeCategory::Complex, 8>>,
     const char *source, int line, int dim, const Descriptor *mask = nullptr,
-    const std::complex<double> *identity = nullptr, bool ordered = true);
+    const CppTypeFor<TypeCategory::Complex, 8> *identity = nullptr,
+    bool ordered = true);
 void RTDECL(CppReduceComplex8DimValue)(Descriptor &result,
-    const Descriptor &array, ValueReductionOperation<std::complex<double>>,
+    const Descriptor &array,
+    ValueReductionOperation<CppTypeFor<TypeCategory::Complex, 8>>,
     const char *source, int line, int dim, const Descriptor *mask = nullptr,
-    const std::complex<double> *identity = nullptr, bool ordered = true);
-#if LDBL_MANT_DIG == 64
-void RTDECL(CppReduceComplex10Ref)(std::complex<long double> &,
-    const Descriptor &, ReferenceReductionOperation<std::complex<long double>>,
+    const CppTypeFor<TypeCategory::Complex, 8> *identity = nullptr,
+    bool ordered = true);
+#if HAS_FLOAT80
+void RTDECL(CppReduceComplex10Ref)(CppTypeFor<TypeCategory::Complex, 10> &,
+    const Descriptor &,
+    ReferenceReductionOperation<CppTypeFor<TypeCategory::Complex, 10>>,
     const char *source, int line, int dim = 0, const Descriptor *mask = nullptr,
-    const std::complex<long double> *identity = nullptr, bool ordered = true);
-void RTDECL(CppReduceComplex10Value)(std::complex<long double> &,
-    const Descriptor &, ValueReductionOperation<std::complex<long double>>,
+    const CppTypeFor<TypeCategory::Complex, 10> *identity = nullptr,
+    bool ordered = true);
+void RTDECL(CppReduceComplex10Value)(CppTypeFor<TypeCategory::Complex, 10> &,
+    const Descriptor &,
+    ValueReductionOperation<CppTypeFor<TypeCategory::Complex, 10>>,
     const char *source, int line, int dim = 0, const Descriptor *mask = nullptr,
-    const std::complex<long double> *identity = nullptr, bool ordered = true);
+    const CppTypeFor<TypeCategory::Complex, 10> *identity = nullptr,
+    bool ordered = true);
 void RTDECL(CppReduceComplex10DimRef)(Descriptor &result,
     const Descriptor &array,
-    ReferenceReductionOperation<std::complex<long double>>, const char *source,
-    int line, int dim, const Descriptor *mask = nullptr,
-    const std::complex<long double> *identity = nullptr, bool ordered = true);
+    ReferenceReductionOperation<CppTypeFor<TypeCategory::Complex, 10>>,
+    const char *source, int line, int dim, const Descriptor *mask = nullptr,
+    const CppTypeFor<TypeCategory::Complex, 10> *identity = nullptr,
+    bool ordered = true);
 void RTDECL(CppReduceComplex10DimValue)(Descriptor &result,
-    const Descriptor &array, ValueReductionOperation<std::complex<long double>>,
+    const Descriptor &array,
+    ValueReductionOperation<CppTypeFor<TypeCategory::Complex, 10>>,
     const char *source, int line, int dim, const Descriptor *mask = nullptr,
-    const std::complex<long double> *identity = nullptr, bool ordered = true);
+    const CppTypeFor<TypeCategory::Complex, 10> *identity = nullptr,
+    bool ordered = true);
 #endif
 #if LDBL_MANT_DIG == 113 || HAS_FLOAT128
-void RTDECL(CppReduceComplex16Ref)(std::complex<CppFloat128Type> &,
+void RTDECL(CppReduceComplex16Ref)(CppTypeFor<TypeCategory::Complex, 16> &,
     const Descriptor &,
-    ReferenceReductionOperation<std::complex<CppFloat128Type>>,
+    ReferenceReductionOperation<CppTypeFor<TypeCategory::Complex, 16>>,
     const char *source, int line, int dim = 0, const Descriptor *mask = nullptr,
-    const std::complex<CppFloat128Type> *identity = nullptr,
+    const CppTypeFor<TypeCategory::Complex, 16> *identity = nullptr,
     bool ordered = true);
-void RTDECL(CppReduceComplex16Value)(std::complex<CppFloat128Type> &,
-    const Descriptor &, ValueReductionOperation<std::complex<CppFloat128Type>>,
+void RTDECL(CppReduceComplex16Value)(CppTypeFor<TypeCategory::Complex, 16> &,
+    const Descriptor &,
+    ValueReductionOperation<CppTypeFor<TypeCategory::Complex, 16>>,
     const char *source, int line, int dim = 0, const Descriptor *mask = nullptr,
-    const std::complex<CppFloat128Type> *identity = nullptr,
+    const CppTypeFor<TypeCategory::Complex, 16> *identity = nullptr,
     bool ordered = true);
 void RTDECL(CppReduceComplex16DimRef)(Descriptor &result,
     const Descriptor &array,
-    ReferenceReductionOperation<std::complex<CppFloat128Type>>,
+    ReferenceReductionOperation<CppTypeFor<TypeCategory::Complex, 16>>,
     const char *source, int line, int dim, const Descriptor *mask = nullptr,
-    const std::complex<CppFloat128Type> *identity = nullptr,
+    const CppTypeFor<TypeCategory::Complex, 16> *identity = nullptr,
     bool ordered = true);
 void RTDECL(CppReduceComplex16DimValue)(Descriptor &result,
     const Descriptor &array,
-    ValueReductionOperation<std::complex<CppFloat128Type>>, const char *source,
-    int line, int dim, const Descriptor *mask = nullptr,
-    const std::complex<CppFloat128Type> *identity = nullptr,
+    ValueReductionOperation<CppTypeFor<TypeCategory::Complex, 16>>,
+    const char *source, int line, int dim, const Descriptor *mask = nullptr,
+    const CppTypeFor<TypeCategory::Complex, 16> *identity = nullptr,
     bool ordered = true);
 #endif
 
diff --git a/flang/include/flang/Runtime/reduction.h b/flang/include/flang/Runtime/reduction.h
index 97986c12e8a10e..7eafacee69d034 100644
--- a/flang/include/flang/Runtime/reduction.h
+++ b/flang/include/flang/Runtime/reduction.h
@@ -68,34 +68,35 @@ float RTDECL(SumReal4)(const Descriptor &, const char *source, int line,
     int dim = 0, const Descriptor *mask = nullptr);
 double RTDECL(SumReal8)(const Descriptor &, const char *source, int line,
     int dim = 0, const Descriptor *mask = nullptr);
-#if LDBL_MANT_DIG == 64
-long double RTDECL(SumReal10)(const Descriptor &, const char *source, int line,
-    int dim = 0, const Descriptor *mask = nullptr);
+#if HAS_FLOAT80
+CppTypeFor<TypeCategory::Real, 10> RTDECL(SumReal10)(const Descriptor &,
+    const char *source, int line, int dim = 0,
+    const Descriptor *mask = nullptr);
 #endif
 #if LDBL_MANT_DIG == 113 || HAS_FLOAT128
 CppFloat128Type RTDECL(SumReal16)(const Descriptor &, const char *source,
     int line, int dim = 0, const Descriptor *mask = nullptr);
 #endif
 
-void RTDECL(CppSumComplex2)(std::complex<float> &, const Descriptor &,
-    const char *source, int line, int dim = 0,
+void RTDECL(CppSumComplex2)(CppTypeFor<TypeCategory::Complex, 4> &,
+    const Descriptor &, const char *source, int line, int dim = 0,
     const Descriptor *mask = nullptr);
-void RTDECL(CppSumComplex3)(std::complex<float> &, const Descriptor &,
-    const char *source, int line, int dim = 0,
+void RTDECL(CppSumComplex3)(CppTypeFor<TypeCategory::Complex, 4> &,
+    const Descriptor &, const char *source, int line, int dim = 0,
     const Descriptor *mask = nullptr);
-void RTDECL(CppSumComplex4)(std::complex<float> &, const Descriptor &,
-    const char *source, int line, int dim = 0,
+void RTDECL(CppSumComplex4)(CppTypeFor<TypeCategory::Complex, 4> &,
+    const Descriptor &, const char *source, int line, int dim = 0,
     const Descriptor *mask = nullptr);
-void RTDECL(CppSumComplex8)(std::complex<double> &, const Descriptor &,
-    const char *source, int line, int dim = 0,
+void RTDECL(CppSumComplex8)(CppTypeFor<TypeCategory::Complex, 8> &,
+    const Descriptor &, const char *source, int line, int dim = 0,
     const Descriptor *mask = nullptr);
-#if LDBL_MANT_DIG == 64
-void RTDECL(CppSumComplex10)(std::complex<long double> &, const Descriptor &,
-    const char *source, int line, int dim = 0,
+#if HAS_FLOAT80
+void RTDECL(CppSumComplex10)(CppTypeFor<TypeCategory::Complex, 10> &,
+    const Descriptor &, const char *source, int line, int dim = 0,
     const Descriptor *mask = nullptr);
 #endif
 #if LDBL_MANT_DIG == 113 || HAS_FLOAT128
-void RTDECL(CppSumComplex16)(std::complex<CppFloat128Type> &,
+void RTDECL(CppSumComplex16)(CppTypeFor<TypeCategory::Complex, 16> &,
     const Descriptor &, const char *source, int line, int dim = 0,
     const Descriptor *mask = nullptr);
 #endif
@@ -128,34 +129,35 @@ float RTDECL(ProductReal4)(const Descriptor &, const char *source, int line,
     int dim = 0, const Descriptor *mask = nullptr);
 double RTDECL(ProductReal8)(const Descriptor &, const char *source, int line,
     int dim = 0, const Descriptor *mask = nullptr);
-#if LDBL_MANT_DIG == 64
-long double RTDECL(ProductReal10)(const Descriptor &, const char *source,
-    int line, int dim = 0, const Descriptor *mask = nullptr);
+#if HAS_FLOAT80
+CppTypeFor<TypeCategory::Real, 10> RTDECL(ProductReal10)(const Descriptor &,
+    const char *source, int line, int dim = 0,
+    const Descriptor *mask = nullptr);
 #endif
 #if LDBL_MANT_DIG == 113 || HAS_FLOAT128
 CppFloat128Type RTDECL(ProductReal16)(const Descriptor &, const char *source,
     int line, int dim = 0, const Descriptor *mask = nullptr);
 #endif
 
-void RTDECL(CppProductComplex2)(std::complex<float> &, const Descriptor &,
-    const char *source, int line, int dim = 0,
+void RTDECL(CppProductComplex2)(CppTypeFor<TypeCategory::Complex, 4> &,
+    const Descriptor &, const char *source, int line, int dim = 0,
     const Descriptor *mask = nullptr);
-void RTDECL(CppProductComplex3)(std::complex<float> &, const Descriptor &,
-    const char *source, int line, int dim = 0,
+void RTDECL(CppProductComplex3)(CppTypeFor<TypeCategory::Complex, 4> &,
+    const Descriptor &, const char *source, int line, int dim = 0,
     const Descriptor *mask = nullptr);
-void RTDECL(CppProductComplex4)(std::complex<float> &, const Descriptor &,
-    const char *source, int line, int dim = 0,
+void RTDECL(CppProductComplex4)(CppTypeFor<TypeCategory::Complex, 4> &,
+    const Descriptor &, const char *source, int line, int dim = 0,
     const Descriptor *mask = nullptr);
-void RTDECL(CppProductComplex8)(std::complex<double> &, const Descriptor &,
-    const char *source, int line, int dim = 0,
+void RTDECL(CppProductComplex8)(CppTypeFor<TypeCategory::Complex, 8> &,
+    const Descriptor &, const char *source, int line, int dim = 0,
     const Descriptor *mask = nullptr);
-#if LDBL_MANT_DIG == 64
-void RTDECL(CppProductComplex10)(std::complex<long double> &,
+#if HAS_FLOAT80
+void RTDECL(CppProductComplex10)(CppTypeFor<TypeCategory::Complex, 10> &,
     const Descriptor &, const char *source, int line, int dim = 0,
     const Descriptor *mask = nullptr);
 #endif
 #if LDBL_MANT_DIG == 113 || HAS_FLOAT128
-void RTDECL(CppProductComplex16)(std::complex<CppFloat128Type> &,
+void RTDECL(CppProductComplex16)(CppTypeFor<TypeCategory::Complex, 16> &,
     const Descriptor &, const char *source, int line, int dim = 0,
     const Descriptor *mask = nullptr);
 #endif
@@ -307,9 +309,10 @@ float RTDECL(MaxvalReal4)(const Descriptor &, const char *source, int line,
     int dim = 0, const Descriptor *mask = nullptr);
 double RTDECL(MaxvalReal8)(const Descriptor &, const char *source, int line,
     int dim = 0, const Descriptor *mask = nullptr);
-#if LDBL_MANT_DIG == 64
-long double RTDECL(MaxvalReal10)(const Descriptor &, const char *source,
-    int line, int dim = 0, const Descriptor *mask = nullptr);
+#if HAS_FLOAT80
+CppTypeFor<TypeCategory::Real, 10> RTDECL(MaxvalReal10)(const Descriptor &,
+    const char *source, int line, int dim = 0,
+    const Descriptor *mask = nullptr);
 #endif
 #if LDBL_MANT_DIG == 113 || HAS_FLOAT128
 CppFloat128Type RTDECL(MaxvalReal16)(const Descriptor &, const char *source,
@@ -338,9 +341,10 @@ float RTDECL(MinvalReal4)(const Descriptor &, const char *source, int line,
     int dim = 0, const Descriptor *mask = nullptr);
 double RTDECL(MinvalReal8)(const Descriptor &, const char *source, int line,
     int dim = 0, const Descriptor *mask = nullptr);
-#if LDBL_MANT_DIG == 64
-long double RTDECL(MinvalReal10)(const Descriptor &, const char *source,
-    int line, int dim = 0, const Descriptor *mask = nullptr);
+#if HAS_FLOAT80
+CppTypeFor<TypeCategory::Real, 10> RTDECL(MinvalReal10)(const Descriptor &,
+    const char *source, int line, int dim = 0,
+    const Descriptor *mask = nullptr);
 #endif
 #if LDBL_MANT_DIG == 113 || HAS_FLOAT128
 CppFloat128Type RTDECL(MinvalReal16)(const Descriptor &, const char *source,
@@ -363,12 +367,12 @@ float RTDECL(Norm2_4)(
     const Descriptor &, const char *source, int line, int dim = 0);
 double RTDECL(Norm2_8)(
     const Descriptor &, const char *source, int line, int dim = 0);
-#if LDBL_MANT_DIG == 64
-long double RTDECL(Norm2_10)(
+#if HAS_FLOAT80
+CppTypeFor<TypeCategory::Real, 10> RTDECL(Norm2_10)(
     const Descriptor &, const char *source, int line, int dim = 0);
 #endif
 #if LDBL_MANT_DIG == 113 || HAS_FLOAT128
-long double RTDECL(Norm2_16)(
+CppFloat128Type RTDECL(Norm2_16)(
     const Descriptor &, const char *source, int line, int dim = 0);
 void RTDECL(Norm2DimReal16)(
     Descriptor &, const Descriptor &, int dim, const char *source, int line);
@@ -413,29 +417,33 @@ float RTDECL(DotProductReal4)(const Descriptor &, const Descriptor &,
     const char *source = nullptr, int line = 0);
 double RTDECL(DotProductReal8)(const Descriptor &, const Descriptor &,
     const char *source = nullptr, int line = 0);
-#if LDBL_MANT_DIG == 64
-long double RTDECL(DotProductReal10)(const Descriptor &, const Descriptor &,
-    const char *source = nullptr, int line = 0);
+#if HAS_FLOAT80
+CppTypeFor<TypeCategory::Real, 10> RTDECL(DotProductReal10)(const Descriptor &,
+    const Descriptor &, const char *source = nullptr, int line = 0);
 #endif
 #if LDBL_MANT_DIG == 113 || HAS_FLOAT128
 CppFloat128Type RTDECL(DotProductReal16)(const Descriptor &, const Descriptor &,
     const char *source = nullptr, int line = 0);
 #endif
-void RTDECL(CppDotProductComplex2)(std::complex<float> &, const Descriptor &,
-    const Descriptor &, const char *source = nullptr, int line = 0);
-void RTDECL(CppDotProductComplex3)(std::complex<float> &, const Descriptor &,
-    const Descriptor &, const char *source = nullptr, int line = 0);
-void RTDECL(CppDotProductComplex4)(std::complex<float> &, const Descriptor &,
-    const Descriptor &, const char *source = nullptr, int line = 0);
-void RTDECL(CppDotProductComplex8)(std::complex<double> &, const Descriptor &,
-    const Descriptor &, const char *source = nullptr, int line = 0);
-#if LDBL_MANT_DIG == 64
-void RTDECL(CppDotProductComplex10)(std::complex<long double> &,
+void RTDECL(CppDotProductComplex2)(CppTypeFor<TypeCategory::Complex, 4> &,
+    const Descriptor &, const Descriptor &, const char *source = nullptr,
+    int line = 0);
+void RTDECL(CppDotProductComplex3)(CppTypeFor<TypeCategory::Complex, 4> &,
+    const Descriptor &, const Descriptor &, const char *source = nullptr,
+    int line = 0);
+void RTDECL(CppDotProductComplex4)(CppTypeFor<TypeCategory::Complex, 4> &,
+    const Descriptor &, const Descriptor &, const char *source = nullptr,
+    int line = 0);
+void RTDECL(CppDotProductComplex8)(CppTypeFor<TypeCategory::Complex, 8> &,
+    const Descriptor &, const Descriptor &, const char *source = nullptr,
+    int line = 0);
+#if HAS_FLOAT80
+void RTDECL(CppDotProductComplex10)(CppTypeFor<TypeCategory::Complex, 10> &,
     const Descriptor &, const Descriptor &, const char *source = nullptr,
     int line = 0);
 #endif
 #if LDBL_MANT_DIG == 113 || HAS_FLOAT128
-void RTDECL(CppDotProductComplex16)(std::complex<CppFloat128Type> &,
+void RTDECL(CppDotProductComplex16)(CppTypeFor<TypeCategory::Complex, 16> &,
     const Descriptor &, const Descriptor &, const char *source = nullptr,
     int line = 0);
 #endif
diff --git a/flang/include/flang/Runtime/transformational.h b/flang/include/flang/Runtime/transformational.h
index a39b872f376a69..faeaa1baa39ae2 100644
--- a/flang/include/flang/Runtime/transformational.h
+++ b/flang/include/flang/Runtime/transformational.h
@@ -45,10 +45,12 @@ void RTDECL(BesselJn_4)(Descriptor &result, int32_t n1, int32_t n2, float x,
 void RTDECL(BesselJn_8)(Descriptor &result, int32_t n1, int32_t n2, double x,
     double bn2, double bn2_1, const char *sourceFile = nullptr, int line = 0);
 
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 void RTDECL(BesselJn_10)(Descriptor &result, int32_t n1, int32_t n2,
-    long double x, long double bn2, long double bn2_1,
-    const char *sourceFile = nullptr, int line = 0);
+    CppTypeFor<TypeCategory::Real, 10> x,
+    CppTypeFor<TypeCategory::Real, 10> bn2,
+    CppTypeFor<TypeCategory::Real, 10> bn2_1, const char *sourceFile = nullptr,
+    int line = 0);
 #endif
 
 #if LDBL_MANT_DIG == 113 || HAS_FLOAT128
@@ -69,7 +71,7 @@ void RTDECL(BesselJnX0_4)(Descriptor &result, int32_t n1, int32_t n2,
 void RTDECL(BesselJnX0_8)(Descriptor &result, int32_t n1, int32_t n2,
     const char *sourceFile = nullptr, int line = 0);
 
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 void RTDECL(BesselJnX0_10)(Descriptor &result, int32_t n1, int32_t n2,
     const char *sourceFile = nullptr, int line = 0);
 #endif
@@ -91,10 +93,12 @@ void RTDECL(BesselYn_4)(Descriptor &result, int32_t n1, int32_t n2, float x,
 void RTDECL(BesselYn_8)(Descriptor &result, int32_t n1, int32_t n2, double x,
     double bn1, double bn1_1, const char *sourceFile = nullptr, int line = 0);
 
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 void RTDECL(BesselYn_10)(Descriptor &result, int32_t n1, int32_t n2,
-    long double x, long double bn1, long double bn1_1,
-    const char *sourceFile = nullptr, int line = 0);
+    CppTypeFor<TypeCategory::Real, 10> x,
+    CppTypeFor<TypeCategory::Real, 10> bn1,
+    CppTypeFor<TypeCategory::Real, 10> bn1_1, const char *sourceFile = nullptr,
+    int line = 0);
 #endif
 
 #if LDBL_MANT_DIG == 113 || HAS_FLOAT128
@@ -115,7 +119,7 @@ void RTDECL(BesselYnX0_4)(Descriptor &result, int32_t n1, int32_t n2,
 void RTDECL(BesselYnX0_8)(Descriptor &result, int32_t n1, int32_t n2,
     const char *sourceFile = nullptr, int line = 0);
 
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 void RTDECL(BesselYnX0_10)(Descriptor &result, int32_t n1, int32_t n2,
     const char *sourceFile = nullptr, int line = 0);
 #endif
diff --git a/flang/runtime/complex-powi.cpp b/flang/runtime/complex-powi.cpp
index 77031e40242791..9e26ec1b8cd324 100644
--- a/flang/runtime/complex-powi.cpp
+++ b/flang/runtime/complex-powi.cpp
@@ -7,11 +7,13 @@
  * ===-----------------------------------------------------------------------===
  */
 #include "flang/Common/float128.h"
+#include "flang/Runtime/cpp-type.h"
 #include "flang/Runtime/entry-names.h"
 #include <cstdint>
 #include <cstdio>
 #include <limits>
 
+namespace Fortran::runtime {
 #ifdef __clang_major__
 #pragma clang diagnostic ignored "-Wc99-extensions"
 #endif
@@ -63,21 +65,23 @@ template <typename C, typename I> C tgpowi(C base, I exp) {
 #ifndef _MSC_VER
 // With most compilers, C complex is implemented as a builtin type that may have
 // specific ABI requirements
-extern "C" float _Complex RTNAME(cpowi)(float _Complex base, std::int32_t exp) {
+extern "C" CppTypeFor<TypeCategory::Real, 4> _Complex RTNAME(cpowi)(
+    CppTypeFor<TypeCategory::Real, 4> _Complex base, std::int32_t exp) {
   return tgpowi(base, exp);
 }
 
-extern "C" double _Complex RTNAME(zpowi)(
-    double _Complex base, std::int32_t exp) {
+extern "C" CppTypeFor<TypeCategory::Real, 8> _Complex RTNAME(zpowi)(
+    CppTypeFor<TypeCategory::Real, 8> _Complex base, std::int32_t exp) {
   return tgpowi(base, exp);
 }
 
-extern "C" float _Complex RTNAME(cpowk)(float _Complex base, std::int64_t exp) {
+extern "C" CppTypeFor<TypeCategory::Real, 4> _Complex RTNAME(cpowk)(
+    CppTypeFor<TypeCategory::Real, 4> _Complex base, std::int64_t exp) {
   return tgpowi(base, exp);
 }
 
-extern "C" double _Complex RTNAME(zpowk)(
-    double _Complex base, std::int64_t exp) {
+extern "C" CppTypeFor<TypeCategory::Real, 8> _Complex RTNAME(zpowk)(
+    CppTypeFor<TypeCategory::Real, 8> _Complex base, std::int64_t exp) {
   return tgpowi(base, exp);
 }
 
@@ -87,7 +91,7 @@ extern "C" double _Complex RTNAME(zpowk)(
 // c99-extension warnings. We decided to disable warnings for this
 // particular file, so we can use _Complex here.
 #if LDBL_MANT_DIG == 113
-typedef long double _Complex Qcomplex;
+typedef CppTypeFor<TypeCategory::Real, 16> _Complex Qcomplex;
 #elif HAS_FLOAT128
 #if !defined(_ARCH_PPC) || defined(__LONG_DOUBLE_IEEE128__)
 typedef _Complex float __attribute__((mode(TC))) Qcomplex;
@@ -114,35 +118,35 @@ extern "C" Qcomplex RTNAME(cqpowk)(Qcomplex base, std::int64_t exp) {
 // MSVC doesn't allow including <ccomplex> or <complex.h> in C++17 mode to get
 // the Windows definitions of these structs so just redefine here.
 struct Fcomplex {
-  float re;
-  float im;
+  CppTypeFor<TypeCategory::Real, 4> re;
+  CppTypeFor<TypeCategory::Real, 4> im;
 };
 
 struct Dcomplex {
-  double re;
-  double im;
+  CppTypeFor<TypeCategory::Real, 8> re;
+  CppTypeFor<TypeCategory::Real, 8> im;
 };
 
 extern "C" Fcomplex RTNAME(cpowi)(Fcomplex base, std::int32_t exp) {
-  auto cppbase = *(std::complex<float> *)(&base);
+  auto cppbase = *(CppTypeFor<TypeCategory::Complex, 4> *)(&base);
   auto cppres = tgpowi(cppbase, exp);
   return *(Fcomplex *)(&cppres);
 }
 
 extern "C" Dcomplex RTNAME(zpowi)(Dcomplex base, std::int32_t exp) {
-  auto cppbase = *(std::complex<double> *)(&base);
+  auto cppbase = *(CppTypeFor<TypeCategory::Complex, 8> *)(&base);
   auto cppres = tgpowi(cppbase, exp);
   return *(Dcomplex *)(&cppres);
 }
 
 extern "C" Fcomplex RTNAME(cpowk)(Fcomplex base, std::int64_t exp) {
-  auto cppbase = *(std::complex<float> *)(&base);
+  auto cppbase = *(CppTypeFor<TypeCategory::Complex, 4> *)(&base);
   auto cppres = tgpowi(cppbase, exp);
   return *(Fcomplex *)(&cppres);
 }
 
 extern "C" Dcomplex RTNAME(zpowk)(Dcomplex base, std::int64_t exp) {
-  auto cppbase = *(std::complex<double> *)(&base);
+  auto cppbase = *(CppTypeFor<TypeCategory::Complex, 8> *)(&base);
   auto cppres = tgpowi(cppbase, exp);
   return *(Dcomplex *)(&cppres);
 }
@@ -154,15 +158,16 @@ struct Qcomplex {
 };
 
 extern "C" Dcomplex RTNAME(cqpowi)(Qcomplex base, std::int32_t exp) {
-  auto cppbase = *(std::complex<CFloat128Type> *)(&base);
+  auto cppbase = *(rtcmplx::complex<CFloat128Type> *)(&base);
   auto cppres = tgpowi(cppbase, exp);
   return *(Qcomplex *)(&cppres);
 }
 
 extern "C" Dcomplex RTNAME(cqpowk)(Qcomplex base, std::int64_t exp) {
-  auto cppbase = *(std::complex<CFloat128Type> *)(&base);
+  auto cppbase = *(rtcmplx::complex<CFloat128Type> *)(&base);
   auto cppres = tgpowi(cppbase, exp);
   return *(Qcomplex *)(&cppres);
 }
 #endif
 #endif
+} // namespace Fortran::runtime
diff --git a/flang/runtime/complex-reduction.c b/flang/runtime/complex-reduction.c
index 37ce3fa410016b..232c5452488f1a 100644
--- a/flang/runtime/complex-reduction.c
+++ b/flang/runtime/complex-reduction.c
@@ -119,7 +119,7 @@ ADAPT_REDUCTION(SumComplex4, float_Complex_t, CppComplexFloat, CMPLXF,
     REDUCTION_ARGS, REDUCTION_ARG_NAMES)
 ADAPT_REDUCTION(SumComplex8, double_Complex_t, CppComplexDouble, CMPLX,
     REDUCTION_ARGS, REDUCTION_ARG_NAMES)
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 ADAPT_REDUCTION(SumComplex10, long_double_Complex_t, CppComplexLongDouble,
     CMPLXL, REDUCTION_ARGS, REDUCTION_ARG_NAMES)
 #endif
@@ -133,7 +133,7 @@ ADAPT_REDUCTION(ProductComplex4, float_Complex_t, CppComplexFloat, CMPLXF,
     REDUCTION_ARGS, REDUCTION_ARG_NAMES)
 ADAPT_REDUCTION(ProductComplex8, double_Complex_t, CppComplexDouble, CMPLX,
     REDUCTION_ARGS, REDUCTION_ARG_NAMES)
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 ADAPT_REDUCTION(ProductComplex10, long_double_Complex_t, CppComplexLongDouble,
     CMPLXL, REDUCTION_ARGS, REDUCTION_ARG_NAMES)
 #endif
@@ -147,7 +147,7 @@ ADAPT_REDUCTION(DotProductComplex4, float_Complex_t, CppComplexFloat, CMPLXF,
     DOT_PRODUCT_ARGS, DOT_PRODUCT_ARG_NAMES)
 ADAPT_REDUCTION(DotProductComplex8, double_Complex_t, CppComplexDouble, CMPLX,
     DOT_PRODUCT_ARGS, DOT_PRODUCT_ARG_NAMES)
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 ADAPT_REDUCTION(DotProductComplex10, long_double_Complex_t,
     CppComplexLongDouble, CMPLXL, DOT_PRODUCT_ARGS, DOT_PRODUCT_ARG_NAMES)
 #endif
@@ -173,7 +173,7 @@ ADAPT_REDUCTION(ReduceComplex8Ref, double_Complex_t, CppComplexDouble, CMPLX,
 ADAPT_REDUCTION(ReduceComplex8Value, double_Complex_t, CppComplexDouble, CMPLX,
     RARGS, REDUCE_ARG_NAMES)
 #undef RARGS
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 #define RARGS REDUCE_ARGS(long_double_Complex_t, long_double_Complex_t_ref_op)
 ADAPT_REDUCTION(ReduceComplex10Ref, long_double_Complex_t, CppComplexLongDouble,
     CMPLXL, RARGS, REDUCE_ARG_NAMES)
diff --git a/flang/runtime/dot-product.cpp b/flang/runtime/dot-product.cpp
index 977698269bcb46..aafef379fad43c 100644
--- a/flang/runtime/dot-product.cpp
+++ b/flang/runtime/dot-product.cpp
@@ -21,11 +21,6 @@ namespace Fortran::runtime {
 // Beware: DOT_PRODUCT of COMPLEX data uses the complex conjugate of the first
 // argument; MATMUL does not.
 
-// Suppress the warnings about calling __host__-only std::complex operators,
-// defined in C++ STD header files, from __device__ code.
-RT_DIAG_PUSH
-RT_DIAG_DISABLE_CALL_HOST_FROM_DEVICE_WARN
-
 // General accumulator for any type and stride; this is not used for
 // contiguous numeric vectors.
 template <TypeCategory RCAT, int RKIND, typename XT, typename YT>
@@ -42,7 +37,7 @@ class Accumulator {
       const XT &xElement{*x_.Element<XT>(&xAt)};
       const YT &yElement{*y_.Element<YT>(&yAt)};
       if constexpr (RCAT == TypeCategory::Complex) {
-        sum_ += std::conj(static_cast<Result>(xElement)) *
+        sum_ += rtcmplx::conj(static_cast<Result>(xElement)) *
             static_cast<Result>(yElement);
       } else {
         sum_ += static_cast<Result>(xElement) * static_cast<Result>(yElement);
@@ -77,9 +72,9 @@ static inline RT_API_ATTRS CppTypeFor<RCAT, RKIND> DoDotProduct(
           // TODO: call BLAS-1 SDOT or SDSDOT
         } else if constexpr (std::is_same_v<XT, double>) {
           // TODO: call BLAS-1 DDOT
-        } else if constexpr (std::is_same_v<XT, std::complex<float>>) {
+        } else if constexpr (std::is_same_v<XT, rtcmplx::complex<float>>) {
           // TODO: call BLAS-1 CDOTC
-        } else if constexpr (std::is_same_v<XT, std::complex<double>>) {
+        } else if constexpr (std::is_same_v<XT, rtcmplx::complex<double>>) {
           // TODO: call BLAS-1 ZDOTC
         }
       }
@@ -89,12 +84,12 @@ static inline RT_API_ATTRS CppTypeFor<RCAT, RKIND> DoDotProduct(
       AccumType accum{};
       if constexpr (RCAT == TypeCategory::Complex) {
         for (SubscriptValue j{0}; j < n; ++j) {
-          // std::conj() may instantiate its argument twice,
+          // conj() may instantiate its argument twice,
           // so xp has to be incremented separately.
           // This is a workaround for an alleged bug in clang,
           // that shows up as:
           //   warning: multiple unsequenced modifications to 'xp'
-          accum += std::conj(static_cast<AccumType>(*xp)) *
+          accum += rtcmplx::conj(static_cast<AccumType>(*xp)) *
               static_cast<AccumType>(*yp++);
           xp++;
         }
@@ -117,8 +112,6 @@ static inline RT_API_ATTRS CppTypeFor<RCAT, RKIND> DoDotProduct(
   return static_cast<Result>(accumulator.GetResult());
 }
 
-RT_DIAG_POP
-
 template <TypeCategory RCAT, int RKIND> struct DotProduct {
   using Result = CppTypeFor<RCAT, RKIND>;
   template <TypeCategory XCAT, int XKIND> struct DP1 {
@@ -197,7 +190,7 @@ CppTypeFor<TypeCategory::Real, 8> RTDEF(DotProductReal8)(
     const Descriptor &x, const Descriptor &y, const char *source, int line) {
   return DotProduct<TypeCategory::Real, 8>{}(x, y, source, line);
 }
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 CppTypeFor<TypeCategory::Real, 10> RTDEF(DotProductReal10)(
     const Descriptor &x, const Descriptor &y, const char *source, int line) {
   return DotProduct<TypeCategory::Real, 10>{}(x, y, source, line);
@@ -218,7 +211,7 @@ void RTDEF(CppDotProductComplex8)(CppTypeFor<TypeCategory::Complex, 8> &result,
     const Descriptor &x, const Descriptor &y, const char *source, int line) {
   result = DotProduct<TypeCategory::Complex, 8>{}(x, y, source, line);
 }
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 void RTDEF(CppDotProductComplex10)(
     CppTypeFor<TypeCategory::Complex, 10> &result, const Descriptor &x,
     const Descriptor &y, const char *source, int line) {
diff --git a/flang/runtime/extrema.cpp b/flang/runtime/extrema.cpp
index d6e9633372f524..2658709b7de86b 100644
--- a/flang/runtime/extrema.cpp
+++ b/flang/runtime/extrema.cpp
@@ -236,7 +236,7 @@ void RTDEF(MaxlocReal8)(Descriptor &result, const Descriptor &x, int kind,
   TotalNumericMaxOrMinLoc<TypeCategory::Real, 8, true>(
       "MAXLOC", result, x, kind, source, line, mask, back);
 }
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 void RTDEF(MaxlocReal10)(Descriptor &result, const Descriptor &x, int kind,
     const char *source, int line, const Descriptor *mask, bool back) {
   TotalNumericMaxOrMinLoc<TypeCategory::Real, 10, true>(
@@ -292,7 +292,7 @@ void RTDEF(MinlocReal8)(Descriptor &result, const Descriptor &x, int kind,
   TotalNumericMaxOrMinLoc<TypeCategory::Real, 8, false>(
       "MINLOC", result, x, kind, source, line, mask, back);
 }
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 void RTDEF(MinlocReal10)(Descriptor &result, const Descriptor &x, int kind,
     const char *source, int line, const Descriptor *mask, bool back) {
   TotalNumericMaxOrMinLoc<TypeCategory::Real, 10, false>(
@@ -614,7 +614,7 @@ CppTypeFor<TypeCategory::Real, 8> RTDEF(MaxvalReal8)(const Descriptor &x,
   return TotalNumericMaxOrMin<TypeCategory::Real, 8, true>(
       x, source, line, dim, mask, "MAXVAL");
 }
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 CppTypeFor<TypeCategory::Real, 10> RTDEF(MaxvalReal10)(const Descriptor &x,
     const char *source, int line, int dim, const Descriptor *mask) {
   return TotalNumericMaxOrMin<TypeCategory::Real, 10, true>(
@@ -674,7 +674,7 @@ CppTypeFor<TypeCategory::Real, 8> RTDEF(MinvalReal8)(const Descriptor &x,
   return TotalNumericMaxOrMin<TypeCategory::Real, 8, false>(
       x, source, line, dim, mask, "MINVAL");
 }
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 CppTypeFor<TypeCategory::Real, 10> RTDEF(MinvalReal10)(const Descriptor &x,
     const char *source, int line, int dim, const Descriptor *mask) {
   return TotalNumericMaxOrMin<TypeCategory::Real, 10, false>(
@@ -730,7 +730,7 @@ CppTypeFor<TypeCategory::Real, 8> RTDEF(Norm2_8)(
   return GetTotalReduction<TypeCategory::Real, 8>(
       x, source, line, dim, nullptr, Norm2Accumulator<8>{x}, "NORM2");
 }
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 CppTypeFor<TypeCategory::Real, 10> RTDEF(Norm2_10)(
     const Descriptor &x, const char *source, int line, int dim) {
   return GetTotalReduction<TypeCategory::Real, 10>(
diff --git a/flang/runtime/matmul-transpose.cpp b/flang/runtime/matmul-transpose.cpp
index 283472650a1c69..bafa05056bebc4 100644
--- a/flang/runtime/matmul-transpose.cpp
+++ b/flang/runtime/matmul-transpose.cpp
@@ -32,11 +32,6 @@
 namespace {
 using namespace Fortran::runtime;
 
-// Suppress the warnings about calling __host__-only std::complex operators,
-// defined in C++ STD header files, from __device__ code.
-RT_DIAG_PUSH
-RT_DIAG_DISABLE_CALL_HOST_FROM_DEVICE_WARN
-
 // Contiguous numeric TRANSPOSE(matrix)*matrix multiplication
 //   TRANSPOSE(matrix(n, rows)) * matrix(n,cols) ->
 //             matrix(rows, n)  * matrix(n,cols) -> matrix(rows,cols)
@@ -91,8 +86,6 @@ inline static RT_API_ATTRS void MatrixTransposedTimesMatrix(
   }
 }
 
-RT_DIAG_POP
-
 template <TypeCategory RCAT, int RKIND, typename XT, typename YT>
 inline static RT_API_ATTRS void MatrixTransposedTimesMatrixHelper(
     CppTypeFor<RCAT, RKIND> *RESTRICT product, SubscriptValue rows,
@@ -118,9 +111,6 @@ inline static RT_API_ATTRS void MatrixTransposedTimesMatrixHelper(
   }
 }
 
-RT_DIAG_PUSH
-RT_DIAG_DISABLE_CALL_HOST_FROM_DEVICE_WARN
-
 // Contiguous numeric matrix*vector multiplication
 //   matrix(rows,n) * column vector(n) -> column vector(rows)
 // Straightforward algorithm:
@@ -158,8 +148,6 @@ inline static RT_API_ATTRS void MatrixTransposedTimesVector(
   }
 }
 
-RT_DIAG_POP
-
 template <TypeCategory RCAT, int RKIND, typename XT, typename YT>
 inline static RT_API_ATTRS void MatrixTransposedTimesVectorHelper(
     CppTypeFor<RCAT, RKIND> *RESTRICT product, SubscriptValue rows,
@@ -174,9 +162,6 @@ inline static RT_API_ATTRS void MatrixTransposedTimesVectorHelper(
   }
 }
 
-RT_DIAG_PUSH
-RT_DIAG_DISABLE_CALL_HOST_FROM_DEVICE_WARN
-
 // Implements an instance of MATMUL for given argument types.
 template <bool IS_ALLOCATING, TypeCategory RCAT, int RKIND, typename XT,
     typename YT>
@@ -341,8 +326,6 @@ inline static RT_API_ATTRS void DoMatmulTranspose(
   }
 }
 
-RT_DIAG_POP
-
 template <bool IS_ALLOCATING, TypeCategory XCAT, int XKIND, TypeCategory YCAT,
     int YKIND>
 struct MatmulTransposeHelper {
diff --git a/flang/runtime/matmul.cpp b/flang/runtime/matmul.cpp
index 252557e2f9e7ad..a5737a9bc62075 100644
--- a/flang/runtime/matmul.cpp
+++ b/flang/runtime/matmul.cpp
@@ -31,11 +31,6 @@
 namespace {
 using namespace Fortran::runtime;
 
-// Suppress the warnings about calling __host__-only std::complex operators,
-// defined in C++ STD header files, from __device__ code.
-RT_DIAG_PUSH
-RT_DIAG_DISABLE_CALL_HOST_FROM_DEVICE_WARN
-
 // General accumulator for any type and stride; this is not used for
 // contiguous numeric cases.
 template <TypeCategory RCAT, int RKIND, typename XT, typename YT>
@@ -112,8 +107,6 @@ inline RT_API_ATTRS void MatrixTimesMatrix(
   }
 }
 
-RT_DIAG_POP
-
 template <TypeCategory RCAT, int RKIND, typename XT, typename YT>
 inline RT_API_ATTRS void MatrixTimesMatrixHelper(
     CppTypeFor<RCAT, RKIND> *RESTRICT product, SubscriptValue rows,
@@ -139,9 +132,6 @@ inline RT_API_ATTRS void MatrixTimesMatrixHelper(
   }
 }
 
-RT_DIAG_PUSH
-RT_DIAG_DISABLE_CALL_HOST_FROM_DEVICE_WARN
-
 // Contiguous numeric matrix*vector multiplication
 //   matrix(rows,n) * column vector(n) -> column vector(rows)
 // Straightforward algorithm:
@@ -179,8 +169,6 @@ inline RT_API_ATTRS void MatrixTimesVector(
   }
 }
 
-RT_DIAG_POP
-
 template <TypeCategory RCAT, int RKIND, typename XT, typename YT>
 inline RT_API_ATTRS void MatrixTimesVectorHelper(
     CppTypeFor<RCAT, RKIND> *RESTRICT product, SubscriptValue rows,
@@ -194,9 +182,6 @@ inline RT_API_ATTRS void MatrixTimesVectorHelper(
   }
 }
 
-RT_DIAG_PUSH
-RT_DIAG_DISABLE_CALL_HOST_FROM_DEVICE_WARN
-
 // Contiguous numeric vector*matrix multiplication
 //   row vector(n) * matrix(n,cols) -> row vector(cols)
 // Straightforward algorithm:
@@ -235,8 +220,6 @@ inline RT_API_ATTRS void VectorTimesMatrix(
   }
 }
 
-RT_DIAG_POP
-
 template <TypeCategory RCAT, int RKIND, typename XT, typename YT,
     bool SPARSE_COLUMNS = false>
 inline RT_API_ATTRS void VectorTimesMatrixHelper(
@@ -251,9 +234,6 @@ inline RT_API_ATTRS void VectorTimesMatrixHelper(
   }
 }
 
-RT_DIAG_PUSH
-RT_DIAG_DISABLE_CALL_HOST_FROM_DEVICE_WARN
-
 // Implements an instance of MATMUL for given argument types.
 template <bool IS_ALLOCATING, TypeCategory RCAT, int RKIND, typename XT,
     typename YT>
@@ -344,9 +324,9 @@ static inline RT_API_ATTRS void DoMatmul(
             // TODO: try using CUTLASS for device.
           } else if constexpr (std::is_same_v<XT, double>) {
             // TODO: call BLAS-3 DGEMM
-          } else if constexpr (std::is_same_v<XT, std::complex<float>>) {
+          } else if constexpr (std::is_same_v<XT, rtcmplx::complex<float>>) {
             // TODO: call BLAS-3 CGEMM
-          } else if constexpr (std::is_same_v<XT, std::complex<double>>) {
+          } else if constexpr (std::is_same_v<XT, rtcmplx::complex<double>>) {
             // TODO: call BLAS-3 ZGEMM
           }
         }
@@ -361,9 +341,9 @@ static inline RT_API_ATTRS void DoMatmul(
             // TODO: call BLAS-2 SGEMV(x,y)
           } else if constexpr (std::is_same_v<XT, double>) {
             // TODO: call BLAS-2 DGEMV(x,y)
-          } else if constexpr (std::is_same_v<XT, std::complex<float>>) {
+          } else if constexpr (std::is_same_v<XT, rtcmplx::complex<float>>) {
             // TODO: call BLAS-2 CGEMV(x,y)
-          } else if constexpr (std::is_same_v<XT, std::complex<double>>) {
+          } else if constexpr (std::is_same_v<XT, rtcmplx::complex<double>>) {
             // TODO: call BLAS-2 ZGEMV(x,y)
           }
         }
@@ -377,9 +357,9 @@ static inline RT_API_ATTRS void DoMatmul(
             // TODO: call BLAS-2 SGEMV(y,x)
           } else if constexpr (std::is_same_v<XT, double>) {
             // TODO: call BLAS-2 DGEMV(y,x)
-          } else if constexpr (std::is_same_v<XT, std::complex<float>>) {
+          } else if constexpr (std::is_same_v<XT, rtcmplx::complex<float>>) {
             // TODO: call BLAS-2 CGEMV(y,x)
-          } else if constexpr (std::is_same_v<XT, std::complex<double>>) {
+          } else if constexpr (std::is_same_v<XT, rtcmplx::complex<double>>) {
             // TODO: call BLAS-2 ZGEMV(y,x)
           }
         }
@@ -441,8 +421,6 @@ static inline RT_API_ATTRS void DoMatmul(
   }
 }
 
-RT_DIAG_POP
-
 template <bool IS_ALLOCATING, TypeCategory XCAT, int XKIND, TypeCategory YCAT,
     int YKIND>
 struct MatmulHelper {
diff --git a/flang/runtime/numeric.cpp b/flang/runtime/numeric.cpp
index 9a8ddc6615564d..23f8da3f81f176 100644
--- a/flang/runtime/numeric.cpp
+++ b/flang/runtime/numeric.cpp
@@ -144,7 +144,7 @@ inline RT_API_ATTRS CppTypeFor<TypeCategory::Integer, 4> SelectedRealKind(
 #ifdef FLANG_RUNTIME_NO_REAL_3
   mask &= ~(1 << 3);
 #endif
-#if LDBL_MANT_DIG < 64 || defined FLANG_RUNTIME_NO_REAL_10
+#if !HAS_FLOAT80 || defined FLANG_RUNTIME_NO_REAL_10
   mask &= ~(1 << 10);
 #endif
 #if LDBL_MANT_DIG < 64 || defined FLANG_RUNTIME_NO_REAL_16
@@ -276,7 +276,7 @@ CppTypeFor<TypeCategory::Integer, 16> RTDEF(Ceiling8_16)(
   return Ceiling<CppTypeFor<TypeCategory::Integer, 16>>(x);
 }
 #endif
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 CppTypeFor<TypeCategory::Integer, 1> RTDEF(Ceiling10_1)(
     CppTypeFor<TypeCategory::Real, 10> x) {
   return Ceiling<CppTypeFor<TypeCategory::Integer, 1>>(x);
@@ -332,7 +332,7 @@ CppTypeFor<TypeCategory::Real, 8> RTDEF(ErfcScaled8)(
     CppTypeFor<TypeCategory::Real, 8> x) {
   return ErfcScaled(x);
 }
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 CppTypeFor<TypeCategory::Real, 10> RTDEF(ErfcScaled10)(
     CppTypeFor<TypeCategory::Real, 10> x) {
   return ErfcScaled(x);
@@ -361,7 +361,7 @@ CppTypeFor<TypeCategory::Integer, 8> RTDEF(Exponent8_8)(
     CppTypeFor<TypeCategory::Real, 8> x) {
   return Exponent<CppTypeFor<TypeCategory::Integer, 8>>(x);
 }
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 CppTypeFor<TypeCategory::Integer, 4> RTDEF(Exponent10_4)(
     CppTypeFor<TypeCategory::Real, 10> x) {
   return Exponent<CppTypeFor<TypeCategory::Integer, 4>>(x);
@@ -416,7 +416,7 @@ CppTypeFor<TypeCategory::Integer, 16> RTDEF(Floor8_16)(
   return Floor<CppTypeFor<TypeCategory::Integer, 16>>(x);
 }
 #endif
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 CppTypeFor<TypeCategory::Integer, 1> RTDEF(Floor10_1)(
     CppTypeFor<TypeCategory::Real, 10> x) {
   return Floor<CppTypeFor<TypeCategory::Integer, 1>>(x);
@@ -472,7 +472,7 @@ CppTypeFor<TypeCategory::Real, 8> RTDEF(Fraction8)(
     CppTypeFor<TypeCategory::Real, 8> x) {
   return Fraction(x);
 }
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 CppTypeFor<TypeCategory::Real, 10> RTDEF(Fraction10)(
     CppTypeFor<TypeCategory::Real, 10> x) {
   return Fraction(x);
@@ -485,7 +485,7 @@ bool RTDEF(IsFinite4)(CppTypeFor<TypeCategory::Real, 4> x) {
 bool RTDEF(IsFinite8)(CppTypeFor<TypeCategory::Real, 8> x) {
   return std::isfinite(x);
 }
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 bool RTDEF(IsFinite10)(CppTypeFor<TypeCategory::Real, 10> x) {
   return std::isfinite(x);
 }
@@ -501,7 +501,7 @@ bool RTDEF(IsNaN4)(CppTypeFor<TypeCategory::Real, 4> x) {
 bool RTDEF(IsNaN8)(CppTypeFor<TypeCategory::Real, 8> x) {
   return std::isnan(x);
 }
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 bool RTDEF(IsNaN10)(CppTypeFor<TypeCategory::Real, 10> x) {
   return std::isnan(x);
 }
@@ -553,7 +553,7 @@ CppTypeFor<TypeCategory::Real, 8> RTDEF(ModReal8)(
     const char *sourceFile, int sourceLine) {
   return RealMod<false>(x, p, sourceFile, sourceLine);
 }
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 CppTypeFor<TypeCategory::Real, 10> RTDEF(ModReal10)(
     CppTypeFor<TypeCategory::Real, 10> x, CppTypeFor<TypeCategory::Real, 10> p,
     const char *sourceFile, int sourceLine) {
@@ -603,7 +603,7 @@ CppTypeFor<TypeCategory::Real, 8> RTDEF(ModuloReal8)(
     const char *sourceFile, int sourceLine) {
   return RealMod<true>(x, p, sourceFile, sourceLine);
 }
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 CppTypeFor<TypeCategory::Real, 10> RTDEF(ModuloReal10)(
     CppTypeFor<TypeCategory::Real, 10> x, CppTypeFor<TypeCategory::Real, 10> p,
     const char *sourceFile, int sourceLine) {
@@ -619,7 +619,7 @@ CppTypeFor<TypeCategory::Real, 8> RTDEF(Nearest8)(
     CppTypeFor<TypeCategory::Real, 8> x, bool positive) {
   return Nearest<53>(x, positive);
 }
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 CppTypeFor<TypeCategory::Real, 10> RTDEF(Nearest10)(
     CppTypeFor<TypeCategory::Real, 10> x, bool positive) {
   return Nearest<64>(x, positive);
@@ -670,7 +670,7 @@ CppTypeFor<TypeCategory::Integer, 16> RTDEF(Nint8_16)(
   return Nint<CppTypeFor<TypeCategory::Integer, 16>>(x);
 }
 #endif
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 CppTypeFor<TypeCategory::Integer, 1> RTDEF(Nint10_1)(
     CppTypeFor<TypeCategory::Real, 10> x) {
   return Nint<CppTypeFor<TypeCategory::Integer, 1>>(x);
@@ -726,7 +726,7 @@ CppTypeFor<TypeCategory::Real, 8> RTDEF(RRSpacing8)(
     CppTypeFor<TypeCategory::Real, 8> x) {
   return RRSpacing<53>(x);
 }
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 CppTypeFor<TypeCategory::Real, 10> RTDEF(RRSpacing10)(
     CppTypeFor<TypeCategory::Real, 10> x) {
   return RRSpacing<64>(x);
@@ -741,7 +741,7 @@ CppTypeFor<TypeCategory::Real, 8> RTDEF(SetExponent8)(
     CppTypeFor<TypeCategory::Real, 8> x, std::int64_t p) {
   return SetExponent(x, p);
 }
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 CppTypeFor<TypeCategory::Real, 10> RTDEF(SetExponent10)(
     CppTypeFor<TypeCategory::Real, 10> x, std::int64_t p) {
   return SetExponent(x, p);
@@ -756,7 +756,7 @@ CppTypeFor<TypeCategory::Real, 8> RTDEF(Scale8)(
     CppTypeFor<TypeCategory::Real, 8> x, std::int64_t p) {
   return Scale(x, p);
 }
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 CppTypeFor<TypeCategory::Real, 10> RTDEF(Scale10)(
     CppTypeFor<TypeCategory::Real, 10> x, std::int64_t p) {
   return Scale(x, p);
@@ -876,7 +876,7 @@ CppTypeFor<TypeCategory::Real, 8> RTDEF(Spacing8)(
     CppTypeFor<TypeCategory::Real, 8> x) {
   return Spacing<53>(x);
 }
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 CppTypeFor<TypeCategory::Real, 10> RTDEF(Spacing10)(
     CppTypeFor<TypeCategory::Real, 10> x) {
   return Spacing<64>(x);
@@ -893,7 +893,7 @@ CppTypeFor<TypeCategory::Real, 8> RTDEF(FPow8i)(
     CppTypeFor<TypeCategory::Integer, 4> e) {
   return FPowI(b, e);
 }
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 CppTypeFor<TypeCategory::Real, 10> RTDEF(FPow10i)(
     CppTypeFor<TypeCategory::Real, 10> b,
     CppTypeFor<TypeCategory::Integer, 4> e) {
@@ -918,7 +918,7 @@ CppTypeFor<TypeCategory::Real, 8> RTDEF(FPow8k)(
     CppTypeFor<TypeCategory::Integer, 8> e) {
   return FPowI(b, e);
 }
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 CppTypeFor<TypeCategory::Real, 10> RTDEF(FPow10k)(
     CppTypeFor<TypeCategory::Real, 10> b,
     CppTypeFor<TypeCategory::Integer, 8> e) {
diff --git a/flang/runtime/product.cpp b/flang/runtime/product.cpp
index 7fc0fcd3b107de..39b40d82b05401 100644
--- a/flang/runtime/product.cpp
+++ b/flang/runtime/product.cpp
@@ -36,16 +36,11 @@ template <typename INTERMEDIATE> class NonComplexProductAccumulator {
   INTERMEDIATE product_{1};
 };
 
-// Suppress the warnings about calling __host__-only std::complex operators,
-// defined in C++ STD header files, from __device__ code.
-RT_DIAG_PUSH
-RT_DIAG_DISABLE_CALL_HOST_FROM_DEVICE_WARN
-
 template <typename PART> class ComplexProductAccumulator {
 public:
   explicit RT_API_ATTRS ComplexProductAccumulator(const Descriptor &array)
       : array_{array} {}
-  RT_API_ATTRS void Reinitialize() { product_ = std::complex<PART>{1, 0}; }
+  RT_API_ATTRS void Reinitialize() { product_ = rtcmplx::complex<PART>{1, 0}; }
   template <typename A>
   RT_API_ATTRS void GetResult(A *p, int /*zeroBasedDim*/ = -1) const {
     using ResultPart = typename A::value_type;
@@ -60,11 +55,9 @@ template <typename PART> class ComplexProductAccumulator {
 
 private:
   const Descriptor &array_;
-  std::complex<PART> product_{1, 0};
+  rtcmplx::complex<PART> product_{1, 0};
 };
 
-RT_DIAG_POP
-
 extern "C" {
 RT_EXT_API_GROUP_BEGIN
 
@@ -116,7 +109,7 @@ CppTypeFor<TypeCategory::Real, 8> RTDEF(ProductReal8)(const Descriptor &x,
       NonComplexProductAccumulator<CppTypeFor<TypeCategory::Real, 8>>{x},
       "PRODUCT");
 }
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 CppTypeFor<TypeCategory::Real, 10> RTDEF(ProductReal10)(const Descriptor &x,
     const char *source, int line, int dim, const Descriptor *mask) {
   return GetTotalReduction<TypeCategory::Real, 10>(x, source, line, dim, mask,
@@ -147,7 +140,7 @@ void RTDEF(CppProductComplex8)(CppTypeFor<TypeCategory::Complex, 8> &result,
       mask, ComplexProductAccumulator<CppTypeFor<TypeCategory::Real, 8>>{x},
       "PRODUCT");
 }
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 void RTDEF(CppProductComplex10)(CppTypeFor<TypeCategory::Complex, 10> &result,
     const Descriptor &x, const char *source, int line, int dim,
     const Descriptor *mask) {
diff --git a/flang/runtime/random.cpp b/flang/runtime/random.cpp
index 69de9b8c96fb5d..9ec961fd058745 100644
--- a/flang/runtime/random.cpp
+++ b/flang/runtime/random.cpp
@@ -66,7 +66,7 @@ void RTNAME(RandomNumber)(
     return;
   case 10:
     if constexpr (HasCppTypeFor<TypeCategory::Real, 10>) {
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
       Generate<CppTypeFor<TypeCategory::Real, 10>, 64>(harvest);
       return;
 #endif
diff --git a/flang/runtime/reduce.cpp b/flang/runtime/reduce.cpp
index 2f4bb6ea159cf4..6b62e1cf1e76f1 100644
--- a/flang/runtime/reduce.cpp
+++ b/flang/runtime/reduce.cpp
@@ -395,45 +395,49 @@ void RTDEF(ReduceReal8DimValue)(Descriptor &result, const Descriptor &array,
   PartialReduction<Accumulator, TypeCategory::Real, 8>(result, array,
       array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator);
 }
-#if LDBL_MANT_DIG == 64
-long double RTDEF(ReduceReal10Ref)(const Descriptor &array,
-    ReferenceReductionOperation<long double> operation, const char *source,
-    int line, int dim, const Descriptor *mask, const long double *identity,
-    bool ordered) {
+#if HAS_FLOAT80
+CppTypeFor<TypeCategory::Real, 10> RTDEF(ReduceReal10Ref)(
+    const Descriptor &array,
+    ReferenceReductionOperation<CppTypeFor<TypeCategory::Real, 10>> operation,
+    const char *source, int line, int dim, const Descriptor *mask,
+    const CppTypeFor<TypeCategory::Real, 10> *identity, bool ordered) {
   Terminator terminator{source, line};
   return GetTotalReduction<TypeCategory::Real, 10>(array, source, line, dim,
       mask,
-      ReduceAccumulator<long double, false>{
+      ReduceAccumulator<CppTypeFor<TypeCategory::Real, 10>, false>{
           array, operation, identity, terminator},
       "REDUCE");
 }
-long double RTDEF(ReduceReal10Value)(const Descriptor &array,
-    ValueReductionOperation<long double> operation, const char *source,
-    int line, int dim, const Descriptor *mask, const long double *identity,
-    bool ordered) {
+CppTypeFor<TypeCategory::Real, 10> RTDEF(ReduceReal10Value)(
+    const Descriptor &array,
+    ValueReductionOperation<CppTypeFor<TypeCategory::Real, 10>> operation,
+    const char *source, int line, int dim, const Descriptor *mask,
+    const CppTypeFor<TypeCategory::Real, 10> *identity, bool ordered) {
   Terminator terminator{source, line};
   return GetTotalReduction<TypeCategory::Real, 10>(array, source, line, dim,
       mask,
-      ReduceAccumulator<long double, true>{
+      ReduceAccumulator<CppTypeFor<TypeCategory::Real, 10>, true>{
           array, operation, identity, terminator},
       "REDUCE");
 }
 void RTDEF(ReduceReal10DimRef)(Descriptor &result, const Descriptor &array,
-    ReferenceReductionOperation<long double> operation, const char *source,
-    int line, int dim, const Descriptor *mask, const long double *identity,
-    bool ordered) {
+    ReferenceReductionOperation<CppTypeFor<TypeCategory::Real, 10>> operation,
+    const char *source, int line, int dim, const Descriptor *mask,
+    const CppTypeFor<TypeCategory::Real, 10> *identity, bool ordered) {
   Terminator terminator{source, line};
-  using Accumulator = ReduceAccumulator<long double, false>;
+  using Accumulator =
+      ReduceAccumulator<CppTypeFor<TypeCategory::Real, 10>, false>;
   Accumulator accumulator{array, operation, identity, terminator};
   PartialReduction<Accumulator, TypeCategory::Real, 10>(result, array,
       array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator);
 }
 void RTDEF(ReduceReal10DimValue)(Descriptor &result, const Descriptor &array,
-    ValueReductionOperation<long double> operation, const char *source,
-    int line, int dim, const Descriptor *mask, const long double *identity,
-    bool ordered) {
+    ValueReductionOperation<CppTypeFor<TypeCategory::Real, 10>> operation,
+    const char *source, int line, int dim, const Descriptor *mask,
+    const CppTypeFor<TypeCategory::Real, 10> *identity, bool ordered) {
   Terminator terminator{source, line};
-  using Accumulator = ReduceAccumulator<long double, true>;
+  using Accumulator =
+      ReduceAccumulator<CppTypeFor<TypeCategory::Real, 10>, true>;
   Accumulator accumulator{array, operation, identity, terminator};
   PartialReduction<Accumulator, TypeCategory::Real, 10>(result, array,
       array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator);
@@ -484,187 +488,199 @@ void RTDEF(ReduceReal16DimValue)(Descriptor &result, const Descriptor &array,
 }
 #endif
 
-void RTDEF(CppReduceComplex4Ref)(std::complex<float> &result,
+void RTDEF(CppReduceComplex4Ref)(CppTypeFor<TypeCategory::Complex, 4> &result,
     const Descriptor &array,
-    ReferenceReductionOperation<std::complex<float>> operation,
+    ReferenceReductionOperation<CppTypeFor<TypeCategory::Complex, 4>> operation,
     const char *source, int line, int dim, const Descriptor *mask,
-    const std::complex<float> *identity, bool ordered) {
+    const CppTypeFor<TypeCategory::Complex, 4> *identity, bool ordered) {
   Terminator terminator{source, line};
   result = GetTotalReduction<TypeCategory::Complex, 4>(array, source, line, dim,
       mask,
-      ReduceAccumulator<std::complex<float>, false>{
+      ReduceAccumulator<CppTypeFor<TypeCategory::Complex, 4>, false>{
           array, operation, identity, terminator},
       "REDUCE");
 }
-void RTDEF(CppReduceComplex4Value)(std::complex<float> &result,
+void RTDEF(CppReduceComplex4Value)(CppTypeFor<TypeCategory::Complex, 4> &result,
     const Descriptor &array,
-    ValueReductionOperation<std::complex<float>> operation, const char *source,
-    int line, int dim, const Descriptor *mask,
-    const std::complex<float> *identity, bool ordered) {
+    ValueReductionOperation<CppTypeFor<TypeCategory::Complex, 4>> operation,
+    const char *source, int line, int dim, const Descriptor *mask,
+    const CppTypeFor<TypeCategory::Complex, 4> *identity, bool ordered) {
   Terminator terminator{source, line};
   result = GetTotalReduction<TypeCategory::Complex, 4>(array, source, line, dim,
       mask,
-      ReduceAccumulator<std::complex<float>, true>{
+      ReduceAccumulator<CppTypeFor<TypeCategory::Complex, 4>, true>{
           array, operation, identity, terminator},
       "REDUCE");
 }
 void RTDEF(CppReduceComplex4DimRef)(Descriptor &result, const Descriptor &array,
-    ReferenceReductionOperation<std::complex<float>> operation,
+    ReferenceReductionOperation<CppTypeFor<TypeCategory::Complex, 4>> operation,
     const char *source, int line, int dim, const Descriptor *mask,
-    const std::complex<float> *identity, bool ordered) {
+    const CppTypeFor<TypeCategory::Complex, 4> *identity, bool ordered) {
   Terminator terminator{source, line};
-  using Accumulator = ReduceAccumulator<std::complex<float>, false>;
+  using Accumulator =
+      ReduceAccumulator<CppTypeFor<TypeCategory::Complex, 4>, false>;
   Accumulator accumulator{array, operation, identity, terminator};
   PartialReduction<Accumulator, TypeCategory::Complex, 4>(result, array,
       array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator);
 }
 void RTDEF(CppReduceComplex4DimValue)(Descriptor &result,
     const Descriptor &array,
-    ValueReductionOperation<std::complex<float>> operation, const char *source,
-    int line, int dim, const Descriptor *mask,
-    const std::complex<float> *identity, bool ordered) {
+    ValueReductionOperation<CppTypeFor<TypeCategory::Complex, 4>> operation,
+    const char *source, int line, int dim, const Descriptor *mask,
+    const CppTypeFor<TypeCategory::Complex, 4> *identity, bool ordered) {
   Terminator terminator{source, line};
-  using Accumulator = ReduceAccumulator<std::complex<float>, true>;
+  using Accumulator =
+      ReduceAccumulator<CppTypeFor<TypeCategory::Complex, 4>, true>;
   Accumulator accumulator{array, operation, identity, terminator};
   PartialReduction<Accumulator, TypeCategory::Complex, 4>(result, array,
       array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator);
 }
-void RTDEF(CppReduceComplex8Ref)(std::complex<double> &result,
+void RTDEF(CppReduceComplex8Ref)(CppTypeFor<TypeCategory::Complex, 8> &result,
     const Descriptor &array,
-    ReferenceReductionOperation<std::complex<double>> operation,
+    ReferenceReductionOperation<CppTypeFor<TypeCategory::Complex, 8>> operation,
     const char *source, int line, int dim, const Descriptor *mask,
-    const std::complex<double> *identity, bool ordered) {
+    const CppTypeFor<TypeCategory::Complex, 8> *identity, bool ordered) {
   Terminator terminator{source, line};
   result = GetTotalReduction<TypeCategory::Complex, 8>(array, source, line, dim,
       mask,
-      ReduceAccumulator<std::complex<double>, false>{
+      ReduceAccumulator<CppTypeFor<TypeCategory::Complex, 8>, false>{
           array, operation, identity, terminator},
       "REDUCE");
 }
-void RTDEF(CppReduceComplex8Value)(std::complex<double> &result,
+void RTDEF(CppReduceComplex8Value)(CppTypeFor<TypeCategory::Complex, 8> &result,
     const Descriptor &array,
-    ValueReductionOperation<std::complex<double>> operation, const char *source,
-    int line, int dim, const Descriptor *mask,
-    const std::complex<double> *identity, bool ordered) {
+    ValueReductionOperation<CppTypeFor<TypeCategory::Complex, 8>> operation,
+    const char *source, int line, int dim, const Descriptor *mask,
+    const CppTypeFor<TypeCategory::Complex, 8> *identity, bool ordered) {
   Terminator terminator{source, line};
   result = GetTotalReduction<TypeCategory::Complex, 8>(array, source, line, dim,
       mask,
-      ReduceAccumulator<std::complex<double>, true>{
+      ReduceAccumulator<CppTypeFor<TypeCategory::Complex, 8>, true>{
           array, operation, identity, terminator},
       "REDUCE");
 }
 void RTDEF(CppReduceComplex8DimRef)(Descriptor &result, const Descriptor &array,
-    ReferenceReductionOperation<std::complex<double>> operation,
+    ReferenceReductionOperation<CppTypeFor<TypeCategory::Complex, 8>> operation,
     const char *source, int line, int dim, const Descriptor *mask,
-    const std::complex<double> *identity, bool ordered) {
+    const CppTypeFor<TypeCategory::Complex, 8> *identity, bool ordered) {
   Terminator terminator{source, line};
-  using Accumulator = ReduceAccumulator<std::complex<double>, false>;
+  using Accumulator =
+      ReduceAccumulator<CppTypeFor<TypeCategory::Complex, 8>, false>;
   Accumulator accumulator{array, operation, identity, terminator};
   PartialReduction<Accumulator, TypeCategory::Complex, 8>(result, array,
       array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator);
 }
 void RTDEF(CppReduceComplex8DimValue)(Descriptor &result,
     const Descriptor &array,
-    ValueReductionOperation<std::complex<double>> operation, const char *source,
-    int line, int dim, const Descriptor *mask,
-    const std::complex<double> *identity, bool ordered) {
+    ValueReductionOperation<CppTypeFor<TypeCategory::Complex, 8>> operation,
+    const char *source, int line, int dim, const Descriptor *mask,
+    const CppTypeFor<TypeCategory::Complex, 8> *identity, bool ordered) {
   Terminator terminator{source, line};
-  using Accumulator = ReduceAccumulator<std::complex<double>, true>;
+  using Accumulator =
+      ReduceAccumulator<CppTypeFor<TypeCategory::Complex, 8>, true>;
   Accumulator accumulator{array, operation, identity, terminator};
   PartialReduction<Accumulator, TypeCategory::Complex, 8>(result, array,
       array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator);
 }
-#if LDBL_MANT_DIG == 64
-void RTDEF(CppReduceComplex10Ref)(std::complex<long double> &result,
+#if HAS_FLOAT80
+void RTDEF(CppReduceComplex10Ref)(CppTypeFor<TypeCategory::Complex, 10> &result,
     const Descriptor &array,
-    ReferenceReductionOperation<std::complex<long double>> operation,
+    ReferenceReductionOperation<CppTypeFor<TypeCategory::Complex, 10>>
+        operation,
     const char *source, int line, int dim, const Descriptor *mask,
-    const std::complex<long double> *identity, bool ordered) {
+    const CppTypeFor<TypeCategory::Complex, 10> *identity, bool ordered) {
   Terminator terminator{source, line};
   result = GetTotalReduction<TypeCategory::Complex, 10>(array, source, line,
       dim, mask,
-      ReduceAccumulator<std::complex<long double>, false>{
+      ReduceAccumulator<CppTypeFor<TypeCategory::Complex, 10>, false>{
           array, operation, identity, terminator},
       "REDUCE");
 }
-void RTDEF(CppReduceComplex10Value)(std::complex<long double> &result,
-    const Descriptor &array,
-    ValueReductionOperation<std::complex<long double>> operation,
+void RTDEF(CppReduceComplex10Value)(
+    CppTypeFor<TypeCategory::Complex, 10> &result, const Descriptor &array,
+    ValueReductionOperation<CppTypeFor<TypeCategory::Complex, 10>> operation,
     const char *source, int line, int dim, const Descriptor *mask,
-    const std::complex<long double> *identity, bool ordered) {
+    const CppTypeFor<TypeCategory::Complex, 10> *identity, bool ordered) {
   Terminator terminator{source, line};
   result = GetTotalReduction<TypeCategory::Complex, 10>(array, source, line,
       dim, mask,
-      ReduceAccumulator<std::complex<long double>, true>{
+      ReduceAccumulator<CppTypeFor<TypeCategory::Complex, 10>, true>{
           array, operation, identity, terminator},
       "REDUCE");
 }
 void RTDEF(CppReduceComplex10DimRef)(Descriptor &result,
     const Descriptor &array,
-    ReferenceReductionOperation<std::complex<long double>> operation,
+    ReferenceReductionOperation<CppTypeFor<TypeCategory::Complex, 10>>
+        operation,
     const char *source, int line, int dim, const Descriptor *mask,
-    const std::complex<long double> *identity, bool ordered) {
+    const CppTypeFor<TypeCategory::Complex, 10> *identity, bool ordered) {
   Terminator terminator{source, line};
-  using Accumulator = ReduceAccumulator<std::complex<long double>, false>;
+  using Accumulator =
+      ReduceAccumulator<CppTypeFor<TypeCategory::Complex, 10>, false>;
   Accumulator accumulator{array, operation, identity, terminator};
   PartialReduction<Accumulator, TypeCategory::Complex, 10>(result, array,
       array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator);
 }
 void RTDEF(CppReduceComplex10DimValue)(Descriptor &result,
     const Descriptor &array,
-    ValueReductionOperation<std::complex<long double>> operation,
+    ValueReductionOperation<CppTypeFor<TypeCategory::Complex, 10>> operation,
     const char *source, int line, int dim, const Descriptor *mask,
-    const std::complex<long double> *identity, bool ordered) {
+    const CppTypeFor<TypeCategory::Complex, 10> *identity, bool ordered) {
   Terminator terminator{source, line};
-  using Accumulator = ReduceAccumulator<std::complex<long double>, true>;
+  using Accumulator =
+      ReduceAccumulator<CppTypeFor<TypeCategory::Complex, 10>, true>;
   Accumulator accumulator{array, operation, identity, terminator};
   PartialReduction<Accumulator, TypeCategory::Complex, 10>(result, array,
       array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator);
 }
 #endif
 #if LDBL_MANT_DIG == 113 || HAS_FLOAT128
-void RTDEF(CppReduceComplex16Ref)(std::complex<CppFloat128Type> &result,
+void RTDEF(CppReduceComplex16Ref)(CppTypeFor<TypeCategory::Complex, 16> &result,
     const Descriptor &array,
-    ReferenceReductionOperation<std::complex<CppFloat128Type>> operation,
+    ReferenceReductionOperation<CppTypeFor<TypeCategory::Complex, 16>>
+        operation,
     const char *source, int line, int dim, const Descriptor *mask,
-    const std::complex<CppFloat128Type> *identity, bool ordered) {
+    const CppTypeFor<TypeCategory::Complex, 16> *identity, bool ordered) {
   Terminator terminator{source, line};
   result = GetTotalReduction<TypeCategory::Complex, 16>(array, source, line,
       dim, mask,
-      ReduceAccumulator<std::complex<CppFloat128Type>, false>{
+      ReduceAccumulator<CppTypeFor<TypeCategory::Complex, 16>, false>{
           array, operation, identity, terminator},
       "REDUCE");
 }
-void RTDEF(CppReduceComplex16Value)(std::complex<CppFloat128Type> &result,
-    const Descriptor &array,
-    ValueReductionOperation<std::complex<CppFloat128Type>> operation,
+void RTDEF(CppReduceComplex16Value)(
+    CppTypeFor<TypeCategory::Complex, 16> &result, const Descriptor &array,
+    ValueReductionOperation<CppTypeFor<TypeCategory::Complex, 16>> operation,
     const char *source, int line, int dim, const Descriptor *mask,
-    const std::complex<CppFloat128Type> *identity, bool ordered) {
+    const CppTypeFor<TypeCategory::Complex, 16> *identity, bool ordered) {
   Terminator terminator{source, line};
   result = GetTotalReduction<TypeCategory::Complex, 16>(array, source, line,
       dim, mask,
-      ReduceAccumulator<std::complex<CppFloat128Type>, true>{
+      ReduceAccumulator<CppTypeFor<TypeCategory::Complex, 16>, true>{
           array, operation, identity, terminator},
       "REDUCE");
 }
 void RTDEF(CppReduceComplex16DimRef)(Descriptor &result,
     const Descriptor &array,
-    ReferenceReductionOperation<std::complex<CppFloat128Type>> operation,
+    ReferenceReductionOperation<CppTypeFor<TypeCategory::Complex, 16>>
+        operation,
     const char *source, int line, int dim, const Descriptor *mask,
-    const std::complex<CppFloat128Type> *identity, bool ordered) {
+    const CppTypeFor<TypeCategory::Complex, 16> *identity, bool ordered) {
   Terminator terminator{source, line};
-  using Accumulator = ReduceAccumulator<std::complex<CppFloat128Type>, false>;
+  using Accumulator =
+      ReduceAccumulator<CppTypeFor<TypeCategory::Complex, 16>, false>;
   Accumulator accumulator{array, operation, identity, terminator};
   PartialReduction<Accumulator, TypeCategory::Complex, 16>(result, array,
       array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator);
 }
 void RTDEF(CppReduceComplex16DimValue)(Descriptor &result,
     const Descriptor &array,
-    ValueReductionOperation<std::complex<CppFloat128Type>> operation,
+    ValueReductionOperation<CppTypeFor<TypeCategory::Complex, 16>> operation,
     const char *source, int line, int dim, const Descriptor *mask,
-    const std::complex<CppFloat128Type> *identity, bool ordered) {
+    const CppTypeFor<TypeCategory::Complex, 16> *identity, bool ordered) {
   Terminator terminator{source, line};
-  using Accumulator = ReduceAccumulator<std::complex<CppFloat128Type>, true>;
+  using Accumulator =
+      ReduceAccumulator<CppTypeFor<TypeCategory::Complex, 16>, true>;
   Accumulator accumulator{array, operation, identity, terminator};
   PartialReduction<Accumulator, TypeCategory::Complex, 16>(result, array,
       array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator);
diff --git a/flang/runtime/reduction-templates.h b/flang/runtime/reduction-templates.h
index a51404c9637620..6b7d57f98384ae 100644
--- a/flang/runtime/reduction-templates.h
+++ b/flang/runtime/reduction-templates.h
@@ -321,8 +321,8 @@ RT_VAR_GROUP_BEGIN
 static constexpr RT_CONST_VAR_ATTRS int Norm2LargestLDKind {
 #if LDBL_MANT_DIG == 113 || HAS_FLOAT128
   16
-#elif LDBL_MANT_DIG == 64
-  10
+#elif HAS_FLOAT80
+    10
 #else
   8
 #endif
diff --git a/flang/runtime/sum.cpp b/flang/runtime/sum.cpp
index 63d8c9029a0ef5..88c6c914e1e243 100644
--- a/flang/runtime/sum.cpp
+++ b/flang/runtime/sum.cpp
@@ -141,18 +141,18 @@ CppTypeFor<TypeCategory::Real, 8> RTDEF(SumReal8)(const Descriptor &x,
   return GetTotalReduction<TypeCategory::Real, 8>(
       x, source, line, dim, mask, RealSumAccumulator<double>{x}, "SUM");
 }
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 CppTypeFor<TypeCategory::Real, 10> RTDEF(SumReal10)(const Descriptor &x,
     const char *source, int line, int dim, const Descriptor *mask) {
-  return GetTotalReduction<TypeCategory::Real, 10>(
-      x, source, line, dim, mask, RealSumAccumulator<long double>{x}, "SUM");
+  return GetTotalReduction<TypeCategory::Real, 10>(x, source, line, dim, mask,
+      RealSumAccumulator<CppTypeFor<TypeCategory::Real, 10>>{x}, "SUM");
 }
 #endif
 #if LDBL_MANT_DIG == 113 || HAS_FLOAT128
 CppTypeFor<TypeCategory::Real, 16> RTDEF(SumReal16)(const Descriptor &x,
     const char *source, int line, int dim, const Descriptor *mask) {
-  return GetTotalReduction<TypeCategory::Real, 16>(
-      x, source, line, dim, mask, RealSumAccumulator<long double>{x}, "SUM");
+  return GetTotalReduction<TypeCategory::Real, 16>(x, source, line, dim, mask,
+      RealSumAccumulator<CppTypeFor<TypeCategory::Real, 16>>{x}, "SUM");
 }
 #endif
 
@@ -168,20 +168,22 @@ void RTDEF(CppSumComplex8)(CppTypeFor<TypeCategory::Complex, 8> &result,
   result = GetTotalReduction<TypeCategory::Complex, 8>(
       x, source, line, dim, mask, ComplexSumAccumulator<double>{x}, "SUM");
 }
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 void RTDEF(CppSumComplex10)(CppTypeFor<TypeCategory::Complex, 10> &result,
     const Descriptor &x, const char *source, int line, int dim,
     const Descriptor *mask) {
-  result = GetTotalReduction<TypeCategory::Complex, 10>(
-      x, source, line, dim, mask, ComplexSumAccumulator<long double>{x}, "SUM");
+  result =
+      GetTotalReduction<TypeCategory::Complex, 10>(x, source, line, dim, mask,
+          ComplexSumAccumulator<CppTypeFor<TypeCategory::Real, 10>>{x}, "SUM");
 }
 #endif
 #if LDBL_MANT_DIG == 113 || HAS_FLOAT128
 void RTDEF(CppSumComplex16)(CppTypeFor<TypeCategory::Complex, 16> &result,
     const Descriptor &x, const char *source, int line, int dim,
     const Descriptor *mask) {
-  result = GetTotalReduction<TypeCategory::Complex, 16>(
-      x, source, line, dim, mask, ComplexSumAccumulator<long double>{x}, "SUM");
+  result =
+      GetTotalReduction<TypeCategory::Complex, 16>(x, source, line, dim, mask,
+          ComplexSumAccumulator<CppTypeFor<TypeCategory::Real, 16>>{x}, "SUM");
 }
 #endif
 
diff --git a/flang/runtime/transformational.cpp b/flang/runtime/transformational.cpp
index b6b204be4418c9..0ce18171274e42 100644
--- a/flang/runtime/transformational.cpp
+++ b/flang/runtime/transformational.cpp
@@ -342,7 +342,7 @@ void RTDEF(BesselJn_8)(Descriptor &result, int32_t n1, int32_t n2,
       result, n1, n2, x, bn2, bn2_1, sourceFile, line);
 }
 
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 void RTDEF(BesselJn_10)(Descriptor &result, int32_t n1, int32_t n2,
     CppTypeFor<TypeCategory::Real, 10> x,
     CppTypeFor<TypeCategory::Real, 10> bn2,
@@ -375,7 +375,7 @@ void RTDEF(BesselJnX0_8)(Descriptor &result, int32_t n1, int32_t n2,
   DoBesselJnX0<TypeCategory::Real, 8>(result, n1, n2, sourceFile, line);
 }
 
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 void RTDEF(BesselJnX0_10)(Descriptor &result, int32_t n1, int32_t n2,
     const char *sourceFile, int line) {
   DoBesselJnX0<TypeCategory::Real, 10>(result, n1, n2, sourceFile, line);
@@ -405,7 +405,7 @@ void RTDEF(BesselYn_8)(Descriptor &result, int32_t n1, int32_t n2,
       result, n1, n2, x, bn1, bn1_1, sourceFile, line);
 }
 
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 void RTDEF(BesselYn_10)(Descriptor &result, int32_t n1, int32_t n2,
     CppTypeFor<TypeCategory::Real, 10> x,
     CppTypeFor<TypeCategory::Real, 10> bn1,
@@ -438,7 +438,7 @@ void RTDEF(BesselYnX0_8)(Descriptor &result, int32_t n1, int32_t n2,
   DoBesselYnX0<TypeCategory::Real, 8>(result, n1, n2, sourceFile, line);
 }
 
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 void RTDEF(BesselYnX0_10)(Descriptor &result, int32_t n1, int32_t n2,
     const char *sourceFile, int line) {
   DoBesselYnX0<TypeCategory::Real, 10>(result, n1, n2, sourceFile, line);
diff --git a/flang/unittests/Runtime/Numeric.cpp b/flang/unittests/Runtime/Numeric.cpp
index 799756aab3839a..3e574c06b091e8 100644
--- a/flang/unittests/Runtime/Numeric.cpp
+++ b/flang/unittests/Runtime/Numeric.cpp
@@ -34,7 +34,7 @@ TEST(Numeric, Floor) {
 TEST(Numeric, Erfc_scaled) {
   EXPECT_NEAR(RTNAME(ErfcScaled4)(Real<4>{20.0}), 0.02817434874, 1.0e-8);
   EXPECT_NEAR(RTNAME(ErfcScaled8)(Real<8>{20.0}), 0.02817434874, 1.0e-11);
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
   EXPECT_NEAR(RTNAME(ErfcScaled10)(Real<10>{20.0}), 0.02817434874, 1.0e-8);
 #endif
 }
@@ -295,7 +295,7 @@ TEST(Numeric, FPowI) {
   EXPECT_EQ(RTNAME(FPow8k)(Real<8>{-3}, Int<8>{3}), Real<8>{-27});
   EXPECT_EQ(RTNAME(FPow8k)(Real<8>{-2}, Int<8>{-3}), Real<8>{-0.125});
 
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
   EXPECT_EQ(RTNAME(FPow10i)(Real<10>{0}, Int<4>{0}), Real<10>{1});
   EXPECT_EQ(RTNAME(FPow10i)(Real<10>{0.3}, Int<4>{0}), Real<10>{1});
   EXPECT_EQ(RTNAME(FPow10i)(Real<10>{2}, Int<4>{-1}), Real<10>{0.5});
diff --git a/flang/unittests/Runtime/Transformational.cpp b/flang/unittests/Runtime/Transformational.cpp
index 5836e70c740f9a..b36ea0a60c670c 100644
--- a/flang/unittests/Runtime/Transformational.cpp
+++ b/flang/unittests/Runtime/Transformational.cpp
@@ -108,7 +108,7 @@ template <int KIND> static void testBesselJnX0(BesselX0FuncType<KIND> rtFunc) {
 static void testBesselJn() {
   testBesselJn<4>(RTNAME(BesselJn_4));
   testBesselJn<8>(RTNAME(BesselJn_8));
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
   testBesselJn<10>(RTNAME(BesselJn_10));
 #endif
 #if LDBL_MANT_DIG == 113 || HAS_FLOAT128
@@ -117,7 +117,7 @@ static void testBesselJn() {
 
   testBesselJnX0<4>(RTNAME(BesselJnX0_4));
   testBesselJnX0<8>(RTNAME(BesselJnX0_8));
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
   testBesselJnX0<10>(RTNAME(BesselJnX0_10));
 #endif
 #if LDBL_MANT_DIG == 113 || HAS_FLOAT128
@@ -201,7 +201,7 @@ template <int KIND> static void testBesselYnX0(BesselX0FuncType<KIND> rtFunc) {
 static void testBesselYn() {
   testBesselYn<4>(RTNAME(BesselYn_4));
   testBesselYn<8>(RTNAME(BesselYn_8));
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
   testBesselYn<10>(RTNAME(BesselYn_10));
 #endif
 #if LDBL_MANT_DIG == 113 || HAS_FLOAT128
@@ -210,7 +210,7 @@ static void testBesselYn() {
 
   testBesselYnX0<4>(RTNAME(BesselYnX0_4));
   testBesselYnX0<8>(RTNAME(BesselYnX0_8));
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
   testBesselYnX0<10>(RTNAME(BesselYnX0_10));
 #endif
 #if LDBL_MANT_DIG == 113 || HAS_FLOAT128
@@ -523,7 +523,7 @@ TEST(Transformational, Unpack) {
   result.Destroy();
 }
 
-#if LDBL_MANT_DIG == 64
+#if HAS_FLOAT80
 // Make sure the destination descriptor is created by the runtime
 // with proper element size, when REAL*10 maps to 'long double'.
 #define Real10CppType long double