[flang-commits] [flang] [flang][runtime] Enable more APIs in the offload build. (PR #75996)

Slava Zakharin via flang-commits flang-commits at lists.llvm.org
Tue Dec 19 18:42:52 PST 2023


https://github.com/vzakhari created https://github.com/llvm/llvm-project/pull/75996

This patch enables more numeric (mod, sum, matmul, etc.) APIs,
and some others.

I added new macros to disable warnings about using C++ STD methods
like operators of std::complex, which do not have __device__ attribute.
This may probably result in unresolved references, if the header files
implementation relies on libstdc++. I will need to follow up on this.


>From 620ea47ba8d5b107b1de1ca727f8e1bb6f0f9be1 Mon Sep 17 00:00:00 2001
From: Slava Zakharin <szakharin at nvidia.com>
Date: Tue, 19 Dec 2023 17:11:04 -0800
Subject: [PATCH] [flang][runtime] Enable more APIs in the offload build.

This patch enables more numeric (mod, sum, matmul, etc.) APIs,
and some others.

I added new macros to disable warnings about using C++ STD methods
like operators of std::complex, which do not have __device__ attribute.
This may probably result in unresolved references, if the header files
implementation relies on libstdc++. I will need to follow up on this.
---
 flang/include/flang/ISO_Fortran_binding.h     |   4 +-
 flang/include/flang/Runtime/allocatable.h     |  36 +-
 flang/include/flang/Runtime/api-attrs.h       |  11 +
 flang/include/flang/Runtime/derived-api.h     |  16 +-
 .../include/flang/Runtime/matmul-transpose.h  |   4 +-
 flang/include/flang/Runtime/matmul.h          |   4 +-
 flang/include/flang/Runtime/numeric.h         | 248 +++++++-------
 flang/include/flang/Runtime/reduction.h       | 274 ++++++++--------
 flang/include/flang/Runtime/support.h         |   2 +-
 flang/runtime/CMakeLists.txt                  |  11 +
 flang/runtime/allocatable.cpp                 |  30 +-
 flang/runtime/derived-api.cpp                 |  26 +-
 flang/runtime/matmul-transpose.cpp            |  43 ++-
 flang/runtime/matmul.cpp                      |  90 +++--
 flang/runtime/numeric.cpp                     | 309 +++++++++---------
 flang/runtime/reduction-templates.h           |  51 +--
 flang/runtime/reduction.cpp                   |  88 ++---
 flang/runtime/sum.cpp                         |  67 ++--
 flang/runtime/support.cpp                     |   2 +-
 19 files changed, 712 insertions(+), 604 deletions(-)

diff --git a/flang/include/flang/ISO_Fortran_binding.h b/flang/include/flang/ISO_Fortran_binding.h
index dd384c516263e5..4a28d3322a38f0 100644
--- a/flang/include/flang/ISO_Fortran_binding.h
+++ b/flang/include/flang/ISO_Fortran_binding.h
@@ -189,8 +189,8 @@ RT_API_ATTRS void *CFI_address(
 RT_API_ATTRS int CFI_allocate(CFI_cdesc_t *, const CFI_index_t lower_bounds[],
     const CFI_index_t upper_bounds[], size_t elem_len);
 RT_API_ATTRS int CFI_deallocate(CFI_cdesc_t *);
-int CFI_establish(CFI_cdesc_t *, void *base_addr, CFI_attribute_t, CFI_type_t,
-    size_t elem_len, CFI_rank_t, const CFI_index_t extents[]);
+RT_API_ATTRS int CFI_establish(CFI_cdesc_t *, void *base_addr, CFI_attribute_t,
+    CFI_type_t, size_t elem_len, CFI_rank_t, const CFI_index_t extents[]);
 RT_API_ATTRS int CFI_is_contiguous(const CFI_cdesc_t *);
 RT_API_ATTRS int CFI_section(CFI_cdesc_t *, const CFI_cdesc_t *source,
     const CFI_index_t lower_bounds[], const CFI_index_t upper_bounds[],
diff --git a/flang/include/flang/Runtime/allocatable.h b/flang/include/flang/Runtime/allocatable.h
index 4169483398f6a7..58061d9862095e 100644
--- a/flang/include/flang/Runtime/allocatable.h
+++ b/flang/include/flang/Runtime/allocatable.h
@@ -26,22 +26,22 @@ extern "C" {
 // A descriptor must be initialized before being used for any purpose,
 // but needs reinitialization in a deallocated state only when there is
 // a change of type, rank, or corank.
-void RTNAME(AllocatableInitIntrinsic)(
+void RTDECL(AllocatableInitIntrinsic)(
     Descriptor &, TypeCategory, int kind, int rank = 0, int corank = 0);
-void RTNAME(AllocatableInitCharacter)(Descriptor &, SubscriptValue length = 0,
+void RTDECL(AllocatableInitCharacter)(Descriptor &, SubscriptValue length = 0,
     int kind = 1, int rank = 0, int corank = 0);
-void RTNAME(AllocatableInitDerived)(
+void RTDECL(AllocatableInitDerived)(
     Descriptor &, const typeInfo::DerivedType &, int rank = 0, int corank = 0);
 
 // Initializes the descriptor for an allocatable of intrinsic or derived type.
 // These functions are meant to be used in the allocate statement lowering. If
 // the descriptor is allocated, the initialization is skiped so the error
 // handling can be done by AllocatableAllocate.
-void RTNAME(AllocatableInitIntrinsicForAllocate)(
+void RTDECL(AllocatableInitIntrinsicForAllocate)(
     Descriptor &, TypeCategory, int kind, int rank = 0, int corank = 0);
-void RTNAME(AllocatableInitCharacterForAllocate)(Descriptor &,
+void RTDECL(AllocatableInitCharacterForAllocate)(Descriptor &,
     SubscriptValue length = 0, int kind = 1, int rank = 0, int corank = 0);
-void RTNAME(AllocatableInitDerivedForAllocate)(
+void RTDECL(AllocatableInitDerivedForAllocate)(
     Descriptor &, const typeInfo::DerivedType &, int rank = 0, int corank = 0);
 
 // Checks that an allocatable is not already allocated in statements
@@ -50,29 +50,29 @@ void RTNAME(AllocatableInitDerivedForAllocate)(
 // (If there's no STAT=, the error will be caught later anyway, but
 // this API allows the error to be caught before descriptor is modified.)
 // Return 0 on success (deallocated state), else the STAT= value.
-int RTNAME(AllocatableCheckAllocated)(Descriptor &,
+int RTDECL(AllocatableCheckAllocated)(Descriptor &,
     const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr,
     int sourceLine = 0);
 
 // For MOLD= allocation; sets bounds, cobounds, and length type
 // parameters from another descriptor. The destination descriptor must
 // be initialized and deallocated.
-void RTNAME(AllocatableApplyMold)(
+void RTDECL(AllocatableApplyMold)(
     Descriptor &, const Descriptor &mold, int rank = 0);
 
 // Explicitly sets the bounds and length type parameters of an initialized
 // deallocated allocatable.
-void RTNAME(AllocatableSetBounds)(
+void RTDECL(AllocatableSetBounds)(
     Descriptor &, int zeroBasedDim, SubscriptValue lower, SubscriptValue upper);
 
 // The upper cobound is ignored for the last codimension.
-void RTNAME(AllocatableSetCoBounds)(Descriptor &, int zeroBasedCoDim,
+void RTDECL(AllocatableSetCoBounds)(Descriptor &, int zeroBasedCoDim,
     SubscriptValue lower, SubscriptValue upper = 0);
 
 // Length type parameters are indexed in declaration order; i.e., 0 is the
 // first length type parameter in the deepest base type.  (Not for use
 // with CHARACTER; see above.)
-void RTNAME(AllocatableSetDerivedLength)(
+void RTDECL(AllocatableSetDerivedLength)(
     Descriptor &, int which, SubscriptValue);
 
 // When an explicit type-spec appears in an ALLOCATE statement for an
@@ -80,7 +80,7 @@ void RTNAME(AllocatableSetDerivedLength)(
 // a derived type or CHARACTER value, the explicit value has to match
 // the length type parameter's value.  This API checks that requirement.
 // Returns 0 for success, or the STAT= value on failure with hasStat==true.
-int RTNAME(AllocatableCheckLengthParameter)(Descriptor &,
+int RTDECL(AllocatableCheckLengthParameter)(Descriptor &,
     int which /* 0 for CHARACTER length */, SubscriptValue other,
     bool hasStat = false, const Descriptor *errMsg = nullptr,
     const char *sourceFile = nullptr, int sourceLine = 0);
@@ -94,10 +94,10 @@ int RTNAME(AllocatableCheckLengthParameter)(Descriptor &,
 // Successfully allocated memory is initialized if the allocatable has a
 // derived type, and is always initialized by AllocatableAllocateSource().
 // Performs all necessary coarray synchronization and validation actions.
-int RTNAME(AllocatableAllocate)(Descriptor &, bool hasStat = false,
+int RTDECL(AllocatableAllocate)(Descriptor &, bool hasStat = false,
     const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr,
     int sourceLine = 0);
-int RTNAME(AllocatableAllocateSource)(Descriptor &, const Descriptor &source,
+int RTDECL(AllocatableAllocateSource)(Descriptor &, const Descriptor &source,
     bool hasStat = false, const Descriptor *errMsg = nullptr,
     const char *sourceFile = nullptr, int sourceLine = 0);
 
@@ -105,7 +105,7 @@ int RTNAME(AllocatableAllocateSource)(Descriptor &, const Descriptor &source,
 // but note the order of first two arguments is reversed for consistency
 // with the other APIs for allocatables.)  The destination descriptor
 // must be initialized.
-std::int32_t RTNAME(MoveAlloc)(Descriptor &to, Descriptor &from,
+std::int32_t RTDECL(MoveAlloc)(Descriptor &to, Descriptor &from,
     const typeInfo::DerivedType *, bool hasStat = false,
     const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr,
     int sourceLine = 0);
@@ -113,19 +113,19 @@ std::int32_t RTNAME(MoveAlloc)(Descriptor &to, Descriptor &from,
 // Deallocates an allocatable.  Finalizes elements &/or components as needed.
 // The allocatable is left in an initialized state suitable for reallocation
 // with the same bounds, cobounds, and length type parameters.
-int RTNAME(AllocatableDeallocate)(Descriptor &, bool hasStat = false,
+int RTDECL(AllocatableDeallocate)(Descriptor &, bool hasStat = false,
     const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr,
     int sourceLine = 0);
 
 // Same as AllocatableDeallocate but also set the dynamic type as the declared
 // type as mentioned in 7.3.2.3 note 7.
-int RTNAME(AllocatableDeallocatePolymorphic)(Descriptor &,
+int RTDECL(AllocatableDeallocatePolymorphic)(Descriptor &,
     const typeInfo::DerivedType *, bool hasStat = false,
     const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr,
     int sourceLine = 0);
 
 // Variant of above that does not finalize; for intermediate results
-void RTNAME(AllocatableDeallocateNoFinal)(
+void RTDECL(AllocatableDeallocateNoFinal)(
     Descriptor &, const char *sourceFile = nullptr, int sourceLine = 0);
 } // extern "C"
 } // namespace Fortran::runtime
diff --git a/flang/include/flang/Runtime/api-attrs.h b/flang/include/flang/Runtime/api-attrs.h
index 61da2c06d3a4dc..9c8a67ffc34a82 100644
--- a/flang/include/flang/Runtime/api-attrs.h
+++ b/flang/include/flang/Runtime/api-attrs.h
@@ -121,4 +121,15 @@
 #undef RT_DEVICE_COMPILATION
 #endif
 
+#if defined(__CUDACC__)
+#define RT_DIAG_PUSH _Pragma("nv_diagnostic push")
+#define RT_DIAG_POP _Pragma("nv_diagnostic pop")
+#define RT_DIAG_DISABLE_CALL_HOST_FROM_DEVICE_WARN \
+  _Pragma("nv_diag_suppress 20011") _Pragma("nv_diag_suppress 20014")
+#else /* !defined(__CUDACC__) */
+#define RT_DIAG_PUSH
+#define RT_DIAG_POP
+#define RT_DIAG_DISABLE_CALL_HOST_FROM_DEVICE_WARN
+#endif /* !defined(__CUDACC__) */
+
 #endif /* !FORTRAN_RUNTIME_API_ATTRS_H_ */
diff --git a/flang/include/flang/Runtime/derived-api.h b/flang/include/flang/Runtime/derived-api.h
index decba9f686d920..79aa7d82de8819 100644
--- a/flang/include/flang/Runtime/derived-api.h
+++ b/flang/include/flang/Runtime/derived-api.h
@@ -29,37 +29,37 @@ extern "C" {
 // Initializes and allocates an object's components, if it has a derived type
 // with any default component initialization or automatic components.
 // The descriptor must be initialized and non-null.
-void RTNAME(Initialize)(
+void RTDECL(Initialize)(
     const Descriptor &, const char *sourceFile = nullptr, int sourceLine = 0);
 
 // Finalizes an object and its components.  Deallocates any
 // allocatable/automatic components.  Does not deallocate the descriptor's
 // storage.
-void RTNAME(Destroy)(const Descriptor &);
+void RTDECL(Destroy)(const Descriptor &);
 
 // Finalizes the object and its components.
-void RTNAME(Finalize)(
+void RTDECL(Finalize)(
     const Descriptor &, const char *sourceFile = nullptr, int sourceLine = 0);
 
 /// Deallocates any allocatable/automatic components.
 /// Does not deallocate the descriptor's storage.
 /// Does not perform any finalization.
-void RTNAME(DestroyWithoutFinalization)(const Descriptor &);
+void RTDECL(DestroyWithoutFinalization)(const Descriptor &);
 
 // Intrinsic or defined assignment, with scalar expansion but not type
 // conversion.
-void RTNAME(Assign)(const Descriptor &, const Descriptor &,
+void RTDECL(Assign)(const Descriptor &, const Descriptor &,
     const char *sourceFile = nullptr, int sourceLine = 0);
 
 // Perform the test of the CLASS IS type guard statement of the SELECT TYPE
 // construct.
-bool RTNAME(ClassIs)(const Descriptor &, const typeInfo::DerivedType &);
+bool RTDECL(ClassIs)(const Descriptor &, const typeInfo::DerivedType &);
 
 // Perform the test of the SAME_TYPE_AS intrinsic.
-bool RTNAME(SameTypeAs)(const Descriptor &, const Descriptor &);
+bool RTDECL(SameTypeAs)(const Descriptor &, const Descriptor &);
 
 // Perform the test of the EXTENDS_TYPE_OF intrinsic.
-bool RTNAME(ExtendsTypeOf)(const Descriptor &, const Descriptor &);
+bool RTDECL(ExtendsTypeOf)(const Descriptor &, const Descriptor &);
 
 } // extern "C"
 } // namespace Fortran::runtime
diff --git a/flang/include/flang/Runtime/matmul-transpose.h b/flang/include/flang/Runtime/matmul-transpose.h
index 7cfb189863df82..5eb5896972e0fd 100644
--- a/flang/include/flang/Runtime/matmul-transpose.h
+++ b/flang/include/flang/Runtime/matmul-transpose.h
@@ -18,12 +18,12 @@ extern "C" {
 // The most general MATMUL(TRANSPOSE()).  All type and shape information is
 // taken from the arguments' descriptors, and the result is dynamically
 // allocated.
-void RTNAME(MatmulTranspose)(Descriptor &, const Descriptor &,
+void RTDECL(MatmulTranspose)(Descriptor &, const Descriptor &,
     const Descriptor &, const char *sourceFile = nullptr, int line = 0);
 
 // A non-allocating variant; the result's descriptor must be established
 // and have a valid base address.
-void RTNAME(MatmulTransposeDirect)(const Descriptor &, const Descriptor &,
+void RTDECL(MatmulTransposeDirect)(const Descriptor &, const Descriptor &,
     const Descriptor &, const char *sourceFile = nullptr, int line = 0);
 } // extern "C"
 } // namespace Fortran::runtime
diff --git a/flang/include/flang/Runtime/matmul.h b/flang/include/flang/Runtime/matmul.h
index 4598c487a12ca1..40581d44de9e2c 100644
--- a/flang/include/flang/Runtime/matmul.h
+++ b/flang/include/flang/Runtime/matmul.h
@@ -17,12 +17,12 @@ extern "C" {
 
 // The most general MATMUL.  All type and shape information is taken from the
 // arguments' descriptors, and the result is dynamically allocated.
-void RTNAME(Matmul)(Descriptor &, const Descriptor &, const Descriptor &,
+void RTDECL(Matmul)(Descriptor &, const Descriptor &, const Descriptor &,
     const char *sourceFile = nullptr, int line = 0);
 
 // A non-allocating variant; the result's descriptor must be established
 // and have a valid base address.
-void RTNAME(MatmulDirect)(const Descriptor &, const Descriptor &,
+void RTDECL(MatmulDirect)(const Descriptor &, const Descriptor &,
     const Descriptor &, const char *sourceFile = nullptr, int line = 0);
 } // extern "C"
 } // namespace Fortran::runtime
diff --git a/flang/include/flang/Runtime/numeric.h b/flang/include/flang/Runtime/numeric.h
index e4e11a61731a61..3d9cb8b5b0acdc 100644
--- a/flang/include/flang/Runtime/numeric.h
+++ b/flang/include/flang/Runtime/numeric.h
@@ -20,280 +20,280 @@ namespace Fortran::runtime {
 extern "C" {
 
 // CEILING
-CppTypeFor<TypeCategory::Integer, 1> RTNAME(Ceiling4_1)(
+CppTypeFor<TypeCategory::Integer, 1> RTDECL(Ceiling4_1)(
     CppTypeFor<TypeCategory::Real, 4>);
-CppTypeFor<TypeCategory::Integer, 2> RTNAME(Ceiling4_2)(
+CppTypeFor<TypeCategory::Integer, 2> RTDECL(Ceiling4_2)(
     CppTypeFor<TypeCategory::Real, 4>);
-CppTypeFor<TypeCategory::Integer, 4> RTNAME(Ceiling4_4)(
+CppTypeFor<TypeCategory::Integer, 4> RTDECL(Ceiling4_4)(
     CppTypeFor<TypeCategory::Real, 4>);
-CppTypeFor<TypeCategory::Integer, 8> RTNAME(Ceiling4_8)(
+CppTypeFor<TypeCategory::Integer, 8> RTDECL(Ceiling4_8)(
     CppTypeFor<TypeCategory::Real, 4>);
 #ifdef __SIZEOF_INT128__
-CppTypeFor<TypeCategory::Integer, 16> RTNAME(Ceiling4_16)(
+CppTypeFor<TypeCategory::Integer, 16> RTDECL(Ceiling4_16)(
     CppTypeFor<TypeCategory::Real, 4>);
 #endif
-CppTypeFor<TypeCategory::Integer, 1> RTNAME(Ceiling8_1)(
+CppTypeFor<TypeCategory::Integer, 1> RTDECL(Ceiling8_1)(
     CppTypeFor<TypeCategory::Real, 8>);
-CppTypeFor<TypeCategory::Integer, 2> RTNAME(Ceiling8_2)(
+CppTypeFor<TypeCategory::Integer, 2> RTDECL(Ceiling8_2)(
     CppTypeFor<TypeCategory::Real, 8>);
-CppTypeFor<TypeCategory::Integer, 4> RTNAME(Ceiling8_4)(
+CppTypeFor<TypeCategory::Integer, 4> RTDECL(Ceiling8_4)(
     CppTypeFor<TypeCategory::Real, 8>);
-CppTypeFor<TypeCategory::Integer, 8> RTNAME(Ceiling8_8)(
+CppTypeFor<TypeCategory::Integer, 8> RTDECL(Ceiling8_8)(
     CppTypeFor<TypeCategory::Real, 8>);
 #ifdef __SIZEOF_INT128__
-CppTypeFor<TypeCategory::Integer, 16> RTNAME(Ceiling8_16)(
+CppTypeFor<TypeCategory::Integer, 16> RTDECL(Ceiling8_16)(
     CppTypeFor<TypeCategory::Real, 8>);
 #endif
 #if LDBL_MANT_DIG == 64
-CppTypeFor<TypeCategory::Integer, 1> RTNAME(Ceiling10_1)(
+CppTypeFor<TypeCategory::Integer, 1> RTDECL(Ceiling10_1)(
     CppTypeFor<TypeCategory::Real, 10>);
-CppTypeFor<TypeCategory::Integer, 2> RTNAME(Ceiling10_2)(
+CppTypeFor<TypeCategory::Integer, 2> RTDECL(Ceiling10_2)(
     CppTypeFor<TypeCategory::Real, 10>);
-CppTypeFor<TypeCategory::Integer, 4> RTNAME(Ceiling10_4)(
+CppTypeFor<TypeCategory::Integer, 4> RTDECL(Ceiling10_4)(
     CppTypeFor<TypeCategory::Real, 10>);
-CppTypeFor<TypeCategory::Integer, 8> RTNAME(Ceiling10_8)(
+CppTypeFor<TypeCategory::Integer, 8> RTDECL(Ceiling10_8)(
     CppTypeFor<TypeCategory::Real, 10>);
 #ifdef __SIZEOF_INT128__
-CppTypeFor<TypeCategory::Integer, 16> RTNAME(Ceiling10_16)(
+CppTypeFor<TypeCategory::Integer, 16> RTDECL(Ceiling10_16)(
     CppTypeFor<TypeCategory::Real, 10>);
 #endif
 #endif
 #if LDBL_MANT_DIG == 113 || HAS_FLOAT128
-CppTypeFor<TypeCategory::Integer, 1> RTNAME(Ceiling16_1)(
+CppTypeFor<TypeCategory::Integer, 1> RTDECL(Ceiling16_1)(
     CppTypeFor<TypeCategory::Real, 16>);
-CppTypeFor<TypeCategory::Integer, 2> RTNAME(Ceiling16_2)(
+CppTypeFor<TypeCategory::Integer, 2> RTDECL(Ceiling16_2)(
     CppTypeFor<TypeCategory::Real, 16>);
-CppTypeFor<TypeCategory::Integer, 4> RTNAME(Ceiling16_4)(
+CppTypeFor<TypeCategory::Integer, 4> RTDECL(Ceiling16_4)(
     CppTypeFor<TypeCategory::Real, 16>);
-CppTypeFor<TypeCategory::Integer, 8> RTNAME(Ceiling16_8)(
+CppTypeFor<TypeCategory::Integer, 8> RTDECL(Ceiling16_8)(
     CppTypeFor<TypeCategory::Real, 16>);
 #ifdef __SIZEOF_INT128__
-CppTypeFor<TypeCategory::Integer, 16> RTNAME(Ceiling16_16)(
+CppTypeFor<TypeCategory::Integer, 16> RTDECL(Ceiling16_16)(
     CppTypeFor<TypeCategory::Real, 16>);
 #endif
 #endif
 
 // EXPONENT is defined to return default INTEGER; support INTEGER(4 & 8)
-CppTypeFor<TypeCategory::Integer, 4> RTNAME(Exponent4_4)(
+CppTypeFor<TypeCategory::Integer, 4> RTDECL(Exponent4_4)(
     CppTypeFor<TypeCategory::Real, 4>);
-CppTypeFor<TypeCategory::Integer, 8> RTNAME(Exponent4_8)(
+CppTypeFor<TypeCategory::Integer, 8> RTDECL(Exponent4_8)(
     CppTypeFor<TypeCategory::Real, 4>);
-CppTypeFor<TypeCategory::Integer, 4> RTNAME(Exponent8_4)(
+CppTypeFor<TypeCategory::Integer, 4> RTDECL(Exponent8_4)(
     CppTypeFor<TypeCategory::Real, 8>);
-CppTypeFor<TypeCategory::Integer, 8> RTNAME(Exponent8_8)(
+CppTypeFor<TypeCategory::Integer, 8> RTDECL(Exponent8_8)(
     CppTypeFor<TypeCategory::Real, 8>);
 #if LDBL_MANT_DIG == 64
-CppTypeFor<TypeCategory::Integer, 4> RTNAME(Exponent10_4)(
+CppTypeFor<TypeCategory::Integer, 4> RTDECL(Exponent10_4)(
     CppTypeFor<TypeCategory::Real, 10>);
-CppTypeFor<TypeCategory::Integer, 8> RTNAME(Exponent10_8)(
+CppTypeFor<TypeCategory::Integer, 8> RTDECL(Exponent10_8)(
     CppTypeFor<TypeCategory::Real, 10>);
 #endif
 #if LDBL_MANT_DIG == 113 || HAS_FLOAT
-CppTypeFor<TypeCategory::Integer, 4> RTNAME(Exponent16_4)(
+CppTypeFor<TypeCategory::Integer, 4> RTDECL(Exponent16_4)(
     CppTypeFor<TypeCategory::Real, 16>);
-CppTypeFor<TypeCategory::Integer, 8> RTNAME(Exponent16_8)(
+CppTypeFor<TypeCategory::Integer, 8> RTDECL(Exponent16_8)(
     CppTypeFor<TypeCategory::Real, 16>);
 #endif
 
 // FLOOR
-CppTypeFor<TypeCategory::Integer, 1> RTNAME(Floor4_1)(
+CppTypeFor<TypeCategory::Integer, 1> RTDECL(Floor4_1)(
     CppTypeFor<TypeCategory::Real, 4>);
-CppTypeFor<TypeCategory::Integer, 2> RTNAME(Floor4_2)(
+CppTypeFor<TypeCategory::Integer, 2> RTDECL(Floor4_2)(
     CppTypeFor<TypeCategory::Real, 4>);
-CppTypeFor<TypeCategory::Integer, 4> RTNAME(Floor4_4)(
+CppTypeFor<TypeCategory::Integer, 4> RTDECL(Floor4_4)(
     CppTypeFor<TypeCategory::Real, 4>);
-CppTypeFor<TypeCategory::Integer, 8> RTNAME(Floor4_8)(
+CppTypeFor<TypeCategory::Integer, 8> RTDECL(Floor4_8)(
     CppTypeFor<TypeCategory::Real, 4>);
 #ifdef __SIZEOF_INT128__
-CppTypeFor<TypeCategory::Integer, 16> RTNAME(Floor4_16)(
+CppTypeFor<TypeCategory::Integer, 16> RTDECL(Floor4_16)(
     CppTypeFor<TypeCategory::Real, 4>);
 #endif
-CppTypeFor<TypeCategory::Integer, 1> RTNAME(Floor8_1)(
+CppTypeFor<TypeCategory::Integer, 1> RTDECL(Floor8_1)(
     CppTypeFor<TypeCategory::Real, 8>);
-CppTypeFor<TypeCategory::Integer, 2> RTNAME(Floor8_2)(
+CppTypeFor<TypeCategory::Integer, 2> RTDECL(Floor8_2)(
     CppTypeFor<TypeCategory::Real, 8>);
-CppTypeFor<TypeCategory::Integer, 4> RTNAME(Floor8_4)(
+CppTypeFor<TypeCategory::Integer, 4> RTDECL(Floor8_4)(
     CppTypeFor<TypeCategory::Real, 8>);
-CppTypeFor<TypeCategory::Integer, 8> RTNAME(Floor8_8)(
+CppTypeFor<TypeCategory::Integer, 8> RTDECL(Floor8_8)(
     CppTypeFor<TypeCategory::Real, 8>);
 #ifdef __SIZEOF_INT128__
-CppTypeFor<TypeCategory::Integer, 16> RTNAME(Floor8_16)(
+CppTypeFor<TypeCategory::Integer, 16> RTDECL(Floor8_16)(
     CppTypeFor<TypeCategory::Real, 8>);
 #endif
 #if LDBL_MANT_DIG == 64
-CppTypeFor<TypeCategory::Integer, 1> RTNAME(Floor10_1)(
+CppTypeFor<TypeCategory::Integer, 1> RTDECL(Floor10_1)(
     CppTypeFor<TypeCategory::Real, 10>);
-CppTypeFor<TypeCategory::Integer, 2> RTNAME(Floor10_2)(
+CppTypeFor<TypeCategory::Integer, 2> RTDECL(Floor10_2)(
     CppTypeFor<TypeCategory::Real, 10>);
-CppTypeFor<TypeCategory::Integer, 4> RTNAME(Floor10_4)(
+CppTypeFor<TypeCategory::Integer, 4> RTDECL(Floor10_4)(
     CppTypeFor<TypeCategory::Real, 10>);
-CppTypeFor<TypeCategory::Integer, 8> RTNAME(Floor10_8)(
+CppTypeFor<TypeCategory::Integer, 8> RTDECL(Floor10_8)(
     CppTypeFor<TypeCategory::Real, 10>);
 #ifdef __SIZEOF_INT128__
-CppTypeFor<TypeCategory::Integer, 16> RTNAME(Floor10_16)(
+CppTypeFor<TypeCategory::Integer, 16> RTDECL(Floor10_16)(
     CppTypeFor<TypeCategory::Real, 10>);
 #endif
 #endif
 #if LDBL_MANT_DIG == 113 || HAS_FLOAT128
-CppTypeFor<TypeCategory::Integer, 1> RTNAME(Floor16_1)(
+CppTypeFor<TypeCategory::Integer, 1> RTDECL(Floor16_1)(
     CppTypeFor<TypeCategory::Real, 16>);
-CppTypeFor<TypeCategory::Integer, 2> RTNAME(Floor16_2)(
+CppTypeFor<TypeCategory::Integer, 2> RTDECL(Floor16_2)(
     CppTypeFor<TypeCategory::Real, 16>);
-CppTypeFor<TypeCategory::Integer, 4> RTNAME(Floor16_4)(
+CppTypeFor<TypeCategory::Integer, 4> RTDECL(Floor16_4)(
     CppTypeFor<TypeCategory::Real, 16>);
-CppTypeFor<TypeCategory::Integer, 8> RTNAME(Floor16_8)(
+CppTypeFor<TypeCategory::Integer, 8> RTDECL(Floor16_8)(
     CppTypeFor<TypeCategory::Real, 16>);
 #ifdef __SIZEOF_INT128__
-CppTypeFor<TypeCategory::Integer, 16> RTNAME(Floor16_16)(
+CppTypeFor<TypeCategory::Integer, 16> RTDECL(Floor16_16)(
     CppTypeFor<TypeCategory::Real, 16>);
 #endif
 #endif
 
 // FRACTION
-CppTypeFor<TypeCategory::Real, 4> RTNAME(Fraction4)(
+CppTypeFor<TypeCategory::Real, 4> RTDECL(Fraction4)(
     CppTypeFor<TypeCategory::Real, 4>);
-CppTypeFor<TypeCategory::Real, 8> RTNAME(Fraction8)(
+CppTypeFor<TypeCategory::Real, 8> RTDECL(Fraction8)(
     CppTypeFor<TypeCategory::Real, 8>);
 #if LDBL_MANT_DIG == 64
-CppTypeFor<TypeCategory::Real, 10> RTNAME(Fraction10)(
+CppTypeFor<TypeCategory::Real, 10> RTDECL(Fraction10)(
     CppTypeFor<TypeCategory::Real, 10>);
 #endif
 #if LDBL_MANT_DIG == 113 || HAS_FLOAT128
-CppTypeFor<TypeCategory::Real, 16> RTNAME(Fraction16)(
+CppTypeFor<TypeCategory::Real, 16> RTDECL(Fraction16)(
     CppTypeFor<TypeCategory::Real, 16>);
 #endif
 
 // ISNAN / IEEE_IS_NAN
-bool RTNAME(IsNaN4)(CppTypeFor<TypeCategory::Real, 4>);
-bool RTNAME(IsNaN8)(CppTypeFor<TypeCategory::Real, 8>);
+bool RTDECL(IsNaN4)(CppTypeFor<TypeCategory::Real, 4>);
+bool RTDECL(IsNaN8)(CppTypeFor<TypeCategory::Real, 8>);
 #if LDBL_MANT_DIG == 64
-bool RTNAME(IsNaN10)(CppTypeFor<TypeCategory::Real, 10>);
+bool RTDECL(IsNaN10)(CppTypeFor<TypeCategory::Real, 10>);
 #endif
 #if LDBL_MANT_DIG == 113 || HAS_FLOAT128
-bool RTNAME(IsNaN16)(CppTypeFor<TypeCategory::Real, 16>);
+bool RTDECL(IsNaN16)(CppTypeFor<TypeCategory::Real, 16>);
 #endif
 
 // MOD & MODULO
-CppTypeFor<TypeCategory::Integer, 1> RTNAME(ModInteger1)(
+CppTypeFor<TypeCategory::Integer, 1> RTDECL(ModInteger1)(
     CppTypeFor<TypeCategory::Integer, 1>, CppTypeFor<TypeCategory::Integer, 1>,
     const char *sourceFile = nullptr, int sourceLine = 0);
-CppTypeFor<TypeCategory::Integer, 2> RTNAME(ModInteger2)(
+CppTypeFor<TypeCategory::Integer, 2> RTDECL(ModInteger2)(
     CppTypeFor<TypeCategory::Integer, 2>, CppTypeFor<TypeCategory::Integer, 2>,
     const char *sourceFile = nullptr, int sourceLine = 0);
-CppTypeFor<TypeCategory::Integer, 4> RTNAME(ModInteger4)(
+CppTypeFor<TypeCategory::Integer, 4> RTDECL(ModInteger4)(
     CppTypeFor<TypeCategory::Integer, 4>, CppTypeFor<TypeCategory::Integer, 4>,
     const char *sourceFile = nullptr, int sourceLine = 0);
-CppTypeFor<TypeCategory::Integer, 8> RTNAME(ModInteger8)(
+CppTypeFor<TypeCategory::Integer, 8> RTDECL(ModInteger8)(
     CppTypeFor<TypeCategory::Integer, 8>, CppTypeFor<TypeCategory::Integer, 8>,
     const char *sourceFile = nullptr, int sourceLine = 0);
 #ifdef __SIZEOF_INT128__
-CppTypeFor<TypeCategory::Integer, 16> RTNAME(ModInteger16)(
+CppTypeFor<TypeCategory::Integer, 16> RTDECL(ModInteger16)(
     CppTypeFor<TypeCategory::Integer, 16>,
     CppTypeFor<TypeCategory::Integer, 16>, const char *sourceFile = nullptr,
     int sourceLine = 0);
 #endif
-CppTypeFor<TypeCategory::Real, 4> RTNAME(ModReal4)(
+CppTypeFor<TypeCategory::Real, 4> RTDECL(ModReal4)(
     CppTypeFor<TypeCategory::Real, 4>, CppTypeFor<TypeCategory::Real, 4>,
     const char *sourceFile = nullptr, int sourceLine = 0);
-CppTypeFor<TypeCategory::Real, 8> RTNAME(ModReal8)(
+CppTypeFor<TypeCategory::Real, 8> RTDECL(ModReal8)(
     CppTypeFor<TypeCategory::Real, 8>, CppTypeFor<TypeCategory::Real, 8>,
     const char *sourceFile = nullptr, int sourceLine = 0);
 #if LDBL_MANT_DIG == 64
-CppTypeFor<TypeCategory::Real, 10> RTNAME(ModReal10)(
+CppTypeFor<TypeCategory::Real, 10> RTDECL(ModReal10)(
     CppTypeFor<TypeCategory::Real, 10>, CppTypeFor<TypeCategory::Real, 10>,
     const char *sourceFile = nullptr, int sourceLine = 0);
 #endif
 #if LDBL_MANT_DIG == 113 || HAS_FLOAT128
-CppTypeFor<TypeCategory::Real, 16> RTNAME(ModReal16)(
+CppTypeFor<TypeCategory::Real, 16> RTDECL(ModReal16)(
     CppTypeFor<TypeCategory::Real, 16>, CppTypeFor<TypeCategory::Real, 16>,
     const char *sourceFile = nullptr, int sourceLine = 0);
 #endif
 
-CppTypeFor<TypeCategory::Integer, 1> RTNAME(ModuloInteger1)(
+CppTypeFor<TypeCategory::Integer, 1> RTDECL(ModuloInteger1)(
     CppTypeFor<TypeCategory::Integer, 1>, CppTypeFor<TypeCategory::Integer, 1>,
     const char *sourceFile = nullptr, int sourceLine = 0);
-CppTypeFor<TypeCategory::Integer, 2> RTNAME(ModuloInteger2)(
+CppTypeFor<TypeCategory::Integer, 2> RTDECL(ModuloInteger2)(
     CppTypeFor<TypeCategory::Integer, 2>, CppTypeFor<TypeCategory::Integer, 2>,
     const char *sourceFile = nullptr, int sourceLine = 0);
-CppTypeFor<TypeCategory::Integer, 4> RTNAME(ModuloInteger4)(
+CppTypeFor<TypeCategory::Integer, 4> RTDECL(ModuloInteger4)(
     CppTypeFor<TypeCategory::Integer, 4>, CppTypeFor<TypeCategory::Integer, 4>,
     const char *sourceFile = nullptr, int sourceLine = 0);
-CppTypeFor<TypeCategory::Integer, 8> RTNAME(ModuloInteger8)(
+CppTypeFor<TypeCategory::Integer, 8> RTDECL(ModuloInteger8)(
     CppTypeFor<TypeCategory::Integer, 8>, CppTypeFor<TypeCategory::Integer, 8>,
     const char *sourceFile = nullptr, int sourceLine = 0);
 #ifdef __SIZEOF_INT128__
-CppTypeFor<TypeCategory::Integer, 16> RTNAME(ModuloInteger16)(
+CppTypeFor<TypeCategory::Integer, 16> RTDECL(ModuloInteger16)(
     CppTypeFor<TypeCategory::Integer, 16>,
     CppTypeFor<TypeCategory::Integer, 16>, const char *sourceFile = nullptr,
     int sourceLine = 0);
 #endif
-CppTypeFor<TypeCategory::Real, 4> RTNAME(ModuloReal4)(
+CppTypeFor<TypeCategory::Real, 4> RTDECL(ModuloReal4)(
     CppTypeFor<TypeCategory::Real, 4>, CppTypeFor<TypeCategory::Real, 4>,
     const char *sourceFile = nullptr, int sourceLine = 0);
-CppTypeFor<TypeCategory::Real, 8> RTNAME(ModuloReal8)(
+CppTypeFor<TypeCategory::Real, 8> RTDECL(ModuloReal8)(
     CppTypeFor<TypeCategory::Real, 8>, CppTypeFor<TypeCategory::Real, 8>,
     const char *sourceFile = nullptr, int sourceLine = 0);
 #if LDBL_MANT_DIG == 64
-CppTypeFor<TypeCategory::Real, 10> RTNAME(ModuloReal10)(
+CppTypeFor<TypeCategory::Real, 10> RTDECL(ModuloReal10)(
     CppTypeFor<TypeCategory::Real, 10>, CppTypeFor<TypeCategory::Real, 10>,
     const char *sourceFile = nullptr, int sourceLine = 0);
 #endif
 #if LDBL_MANT_DIG == 113 || HAS_FLOAT128
-CppTypeFor<TypeCategory::Real, 16> RTNAME(ModuloReal16)(
+CppTypeFor<TypeCategory::Real, 16> RTDECL(ModuloReal16)(
     CppTypeFor<TypeCategory::Real, 16>, CppTypeFor<TypeCategory::Real, 16>,
     const char *sourceFile = nullptr, int sourceLine = 0);
 #endif
 
 // NINT
-CppTypeFor<TypeCategory::Integer, 1> RTNAME(Nint4_1)(
+CppTypeFor<TypeCategory::Integer, 1> RTDECL(Nint4_1)(
     CppTypeFor<TypeCategory::Real, 4>);
-CppTypeFor<TypeCategory::Integer, 2> RTNAME(Nint4_2)(
+CppTypeFor<TypeCategory::Integer, 2> RTDECL(Nint4_2)(
     CppTypeFor<TypeCategory::Real, 4>);
-CppTypeFor<TypeCategory::Integer, 4> RTNAME(Nint4_4)(
+CppTypeFor<TypeCategory::Integer, 4> RTDECL(Nint4_4)(
     CppTypeFor<TypeCategory::Real, 4>);
-CppTypeFor<TypeCategory::Integer, 8> RTNAME(Nint4_8)(
+CppTypeFor<TypeCategory::Integer, 8> RTDECL(Nint4_8)(
     CppTypeFor<TypeCategory::Real, 4>);
 #ifdef __SIZEOF_INT128__
-CppTypeFor<TypeCategory::Integer, 16> RTNAME(Nint4_16)(
+CppTypeFor<TypeCategory::Integer, 16> RTDECL(Nint4_16)(
     CppTypeFor<TypeCategory::Real, 4>);
 #endif
-CppTypeFor<TypeCategory::Integer, 1> RTNAME(Nint8_1)(
+CppTypeFor<TypeCategory::Integer, 1> RTDECL(Nint8_1)(
     CppTypeFor<TypeCategory::Real, 8>);
-CppTypeFor<TypeCategory::Integer, 2> RTNAME(Nint8_2)(
+CppTypeFor<TypeCategory::Integer, 2> RTDECL(Nint8_2)(
     CppTypeFor<TypeCategory::Real, 8>);
-CppTypeFor<TypeCategory::Integer, 4> RTNAME(Nint8_4)(
+CppTypeFor<TypeCategory::Integer, 4> RTDECL(Nint8_4)(
     CppTypeFor<TypeCategory::Real, 8>);
-CppTypeFor<TypeCategory::Integer, 8> RTNAME(Nint8_8)(
+CppTypeFor<TypeCategory::Integer, 8> RTDECL(Nint8_8)(
     CppTypeFor<TypeCategory::Real, 8>);
 #ifdef __SIZEOF_INT128__
-CppTypeFor<TypeCategory::Integer, 16> RTNAME(Nint8_16)(
+CppTypeFor<TypeCategory::Integer, 16> RTDECL(Nint8_16)(
     CppTypeFor<TypeCategory::Real, 8>);
 #endif
 #if LDBL_MANT_DIG == 64
-CppTypeFor<TypeCategory::Integer, 1> RTNAME(Nint10_1)(
+CppTypeFor<TypeCategory::Integer, 1> RTDECL(Nint10_1)(
     CppTypeFor<TypeCategory::Real, 10>);
-CppTypeFor<TypeCategory::Integer, 2> RTNAME(Nint10_2)(
+CppTypeFor<TypeCategory::Integer, 2> RTDECL(Nint10_2)(
     CppTypeFor<TypeCategory::Real, 10>);
-CppTypeFor<TypeCategory::Integer, 4> RTNAME(Nint10_4)(
+CppTypeFor<TypeCategory::Integer, 4> RTDECL(Nint10_4)(
     CppTypeFor<TypeCategory::Real, 10>);
-CppTypeFor<TypeCategory::Integer, 8> RTNAME(Nint10_8)(
+CppTypeFor<TypeCategory::Integer, 8> RTDECL(Nint10_8)(
     CppTypeFor<TypeCategory::Real, 10>);
 #ifdef __SIZEOF_INT128__
-CppTypeFor<TypeCategory::Integer, 16> RTNAME(Nint10_16)(
+CppTypeFor<TypeCategory::Integer, 16> RTDECL(Nint10_16)(
     CppTypeFor<TypeCategory::Real, 10>);
 #endif
 #endif
 #if LDBL_MANT_DIG == 113 || HAS_FLOAT128
-CppTypeFor<TypeCategory::Integer, 1> RTNAME(Nint16_1)(
+CppTypeFor<TypeCategory::Integer, 1> RTDECL(Nint16_1)(
     CppTypeFor<TypeCategory::Real, 16>);
-CppTypeFor<TypeCategory::Integer, 2> RTNAME(Nint16_2)(
+CppTypeFor<TypeCategory::Integer, 2> RTDECL(Nint16_2)(
     CppTypeFor<TypeCategory::Real, 16>);
-CppTypeFor<TypeCategory::Integer, 4> RTNAME(Nint16_4)(
+CppTypeFor<TypeCategory::Integer, 4> RTDECL(Nint16_4)(
     CppTypeFor<TypeCategory::Real, 16>);
-CppTypeFor<TypeCategory::Integer, 8> RTNAME(Nint16_8)(
+CppTypeFor<TypeCategory::Integer, 8> RTDECL(Nint16_8)(
     CppTypeFor<TypeCategory::Real, 16>);
 #if defined __SIZEOF_INT128__
-CppTypeFor<TypeCategory::Integer, 16> RTNAME(Nint16_16)(
+CppTypeFor<TypeCategory::Integer, 16> RTDECL(Nint16_16)(
     CppTypeFor<TypeCategory::Real, 16>);
 #endif
 #endif
@@ -301,113 +301,113 @@ CppTypeFor<TypeCategory::Integer, 16> RTNAME(Nint16_16)(
 // NEAREST
 // The second argument to NEAREST is the result of a comparison
 // to zero (i.e., S > 0)
-CppTypeFor<TypeCategory::Real, 4> RTNAME(Nearest4)(
+CppTypeFor<TypeCategory::Real, 4> RTDECL(Nearest4)(
     CppTypeFor<TypeCategory::Real, 4>, bool positive);
-CppTypeFor<TypeCategory::Real, 8> RTNAME(Nearest8)(
+CppTypeFor<TypeCategory::Real, 8> RTDECL(Nearest8)(
     CppTypeFor<TypeCategory::Real, 8>, bool positive);
 #if LDBL_MANT_DIG == 64
-CppTypeFor<TypeCategory::Real, 10> RTNAME(Nearest10)(
+CppTypeFor<TypeCategory::Real, 10> RTDECL(Nearest10)(
     CppTypeFor<TypeCategory::Real, 10>, bool positive);
 #endif
 #if LDBL_MANT_DIG == 113 || HAS_FLOAT128
-CppTypeFor<TypeCategory::Real, 16> RTNAME(Nearest16)(
+CppTypeFor<TypeCategory::Real, 16> RTDECL(Nearest16)(
     CppTypeFor<TypeCategory::Real, 16>, bool positive);
 #endif
 
 // RRSPACING
-CppTypeFor<TypeCategory::Real, 4> RTNAME(RRSpacing4)(
+CppTypeFor<TypeCategory::Real, 4> RTDECL(RRSpacing4)(
     CppTypeFor<TypeCategory::Real, 4>);
-CppTypeFor<TypeCategory::Real, 8> RTNAME(RRSpacing8)(
+CppTypeFor<TypeCategory::Real, 8> RTDECL(RRSpacing8)(
     CppTypeFor<TypeCategory::Real, 8>);
 #if LDBL_MANT_DIG == 64
-CppTypeFor<TypeCategory::Real, 10> RTNAME(RRSpacing10)(
+CppTypeFor<TypeCategory::Real, 10> RTDECL(RRSpacing10)(
     CppTypeFor<TypeCategory::Real, 10>);
 #endif
 #if LDBL_MANT_DIG == 113 || HAS_FLOAT128
-CppTypeFor<TypeCategory::Real, 16> RTNAME(RRSpacing16)(
+CppTypeFor<TypeCategory::Real, 16> RTDECL(RRSpacing16)(
     CppTypeFor<TypeCategory::Real, 16>);
 #endif
 
 // SET_EXPONENT's I= argument can be any INTEGER kind; upcast it to 64-bit
-CppTypeFor<TypeCategory::Real, 4> RTNAME(SetExponent4)(
+CppTypeFor<TypeCategory::Real, 4> RTDECL(SetExponent4)(
     CppTypeFor<TypeCategory::Real, 4>, std::int64_t);
-CppTypeFor<TypeCategory::Real, 8> RTNAME(SetExponent8)(
+CppTypeFor<TypeCategory::Real, 8> RTDECL(SetExponent8)(
     CppTypeFor<TypeCategory::Real, 8>, std::int64_t);
 #if LDBL_MANT_DIG == 64
-CppTypeFor<TypeCategory::Real, 10> RTNAME(SetExponent10)(
+CppTypeFor<TypeCategory::Real, 10> RTDECL(SetExponent10)(
     CppTypeFor<TypeCategory::Real, 10>, std::int64_t);
 #endif
 #if LDBL_MANT_DIG == 113 || HAS_FLOAT128
-CppTypeFor<TypeCategory::Real, 16> RTNAME(SetExponent16)(
+CppTypeFor<TypeCategory::Real, 16> RTDECL(SetExponent16)(
     CppTypeFor<TypeCategory::Real, 16>, std::int64_t);
 #endif
 
 // SCALE
-CppTypeFor<TypeCategory::Real, 4> RTNAME(Scale4)(
+CppTypeFor<TypeCategory::Real, 4> RTDECL(Scale4)(
     CppTypeFor<TypeCategory::Real, 4>, std::int64_t);
-CppTypeFor<TypeCategory::Real, 8> RTNAME(Scale8)(
+CppTypeFor<TypeCategory::Real, 8> RTDECL(Scale8)(
     CppTypeFor<TypeCategory::Real, 8>, std::int64_t);
 #if LDBL_MANT_DIG == 64
-CppTypeFor<TypeCategory::Real, 10> RTNAME(Scale10)(
+CppTypeFor<TypeCategory::Real, 10> RTDECL(Scale10)(
     CppTypeFor<TypeCategory::Real, 10>, std::int64_t);
 #endif
 #if LDBL_MANT_DIG == 113 || HAS_FLOAT128
-CppTypeFor<TypeCategory::Real, 16> RTNAME(Scale16)(
+CppTypeFor<TypeCategory::Real, 16> RTDECL(Scale16)(
     CppTypeFor<TypeCategory::Real, 16>, std::int64_t);
 #endif
 
 // SELECTED_INT_KIND
-CppTypeFor<TypeCategory::Integer, 4> RTNAME(SelectedIntKind)(
+CppTypeFor<TypeCategory::Integer, 4> RTDECL(SelectedIntKind)(
     const char *, int, void *, int);
 
 // SELECTED_REAL_KIND
-CppTypeFor<TypeCategory::Integer, 4> RTNAME(SelectedRealKind)(
+CppTypeFor<TypeCategory::Integer, 4> RTDECL(SelectedRealKind)(
     const char *, int, void *, int, void *, int, void *, int);
 
 // SPACING
-CppTypeFor<TypeCategory::Real, 4> RTNAME(Spacing4)(
+CppTypeFor<TypeCategory::Real, 4> RTDECL(Spacing4)(
     CppTypeFor<TypeCategory::Real, 4>);
-CppTypeFor<TypeCategory::Real, 8> RTNAME(Spacing8)(
+CppTypeFor<TypeCategory::Real, 8> RTDECL(Spacing8)(
     CppTypeFor<TypeCategory::Real, 8>);
 #if LDBL_MANT_DIG == 64
-CppTypeFor<TypeCategory::Real, 10> RTNAME(Spacing10)(
+CppTypeFor<TypeCategory::Real, 10> RTDECL(Spacing10)(
     CppTypeFor<TypeCategory::Real, 10>);
 #endif
 #if LDBL_MANT_DIG == 113 || HAS_FLOAT128
-CppTypeFor<TypeCategory::Real, 16> RTNAME(Spacing16)(
+CppTypeFor<TypeCategory::Real, 16> RTDECL(Spacing16)(
     CppTypeFor<TypeCategory::Real, 16>);
 #endif
 
-CppTypeFor<TypeCategory::Real, 4> RTNAME(FPow4i)(
+CppTypeFor<TypeCategory::Real, 4> RTDECL(FPow4i)(
     CppTypeFor<TypeCategory::Real, 4> b,
     CppTypeFor<TypeCategory::Integer, 4> e);
-CppTypeFor<TypeCategory::Real, 8> RTNAME(FPow8i)(
+CppTypeFor<TypeCategory::Real, 8> RTDECL(FPow8i)(
     CppTypeFor<TypeCategory::Real, 8> b,
     CppTypeFor<TypeCategory::Integer, 4> e);
 #if LDBL_MANT_DIG == 64
-CppTypeFor<TypeCategory::Real, 10> RTNAME(FPow10i)(
+CppTypeFor<TypeCategory::Real, 10> RTDECL(FPow10i)(
     CppTypeFor<TypeCategory::Real, 10> b,
     CppTypeFor<TypeCategory::Integer, 4> e);
 #endif
 #if LDBL_MANT_DIG == 113 || HAS_FLOAT128
-CppTypeFor<TypeCategory::Real, 16> RTNAME(FPow16i)(
+CppTypeFor<TypeCategory::Real, 16> RTDECL(FPow16i)(
     CppTypeFor<TypeCategory::Real, 16> b,
     CppTypeFor<TypeCategory::Integer, 4> e);
 #endif
 
-CppTypeFor<TypeCategory::Real, 4> RTNAME(FPow4k)(
+CppTypeFor<TypeCategory::Real, 4> RTDECL(FPow4k)(
     CppTypeFor<TypeCategory::Real, 4> b,
     CppTypeFor<TypeCategory::Integer, 8> e);
-CppTypeFor<TypeCategory::Real, 8> RTNAME(FPow8k)(
+CppTypeFor<TypeCategory::Real, 8> RTDECL(FPow8k)(
     CppTypeFor<TypeCategory::Real, 8> b,
     CppTypeFor<TypeCategory::Integer, 8> e);
 #if LDBL_MANT_DIG == 64
-CppTypeFor<TypeCategory::Real, 10> RTNAME(FPow10k)(
+CppTypeFor<TypeCategory::Real, 10> RTDECL(FPow10k)(
     CppTypeFor<TypeCategory::Real, 10> b,
     CppTypeFor<TypeCategory::Integer, 8> e);
 #endif
 #if LDBL_MANT_DIG == 113 || HAS_FLOAT128
-CppTypeFor<TypeCategory::Real, 16> RTNAME(FPow16k)(
+CppTypeFor<TypeCategory::Real, 16> RTDECL(FPow16k)(
     CppTypeFor<TypeCategory::Real, 16> b,
     CppTypeFor<TypeCategory::Integer, 8> e);
 #endif
diff --git a/flang/include/flang/Runtime/reduction.h b/flang/include/flang/Runtime/reduction.h
index a8469cb9dac872..b91fec0cd26b51 100644
--- a/flang/include/flang/Runtime/reduction.h
+++ b/flang/include/flang/Runtime/reduction.h
@@ -46,389 +46,389 @@ extern "C" {
 
 // SUM()
 
-std::int8_t RTNAME(SumInteger1)(const Descriptor &, const char *source,
+std::int8_t RTDECL(SumInteger1)(const Descriptor &, const char *source,
     int line, int dim = 0, const Descriptor *mask = nullptr);
-std::int16_t RTNAME(SumInteger2)(const Descriptor &, const char *source,
+std::int16_t RTDECL(SumInteger2)(const Descriptor &, const char *source,
     int line, int dim = 0, const Descriptor *mask = nullptr);
-std::int32_t RTNAME(SumInteger4)(const Descriptor &, const char *source,
+std::int32_t RTDECL(SumInteger4)(const Descriptor &, const char *source,
     int line, int dim = 0, const Descriptor *mask = nullptr);
-std::int64_t RTNAME(SumInteger8)(const Descriptor &, const char *source,
+std::int64_t RTDECL(SumInteger8)(const Descriptor &, const char *source,
     int line, int dim = 0, const Descriptor *mask = nullptr);
 #ifdef __SIZEOF_INT128__
-common::int128_t RTNAME(SumInteger16)(const Descriptor &, const char *source,
+common::int128_t RTDECL(SumInteger16)(const Descriptor &, const char *source,
     int line, int dim = 0, const Descriptor *mask = nullptr);
 #endif
 
 // REAL/COMPLEX(2 & 3) return 32-bit float results for the caller to downconvert
-float RTNAME(SumReal2)(const Descriptor &, const char *source, int line,
+float RTDECL(SumReal2)(const Descriptor &, const char *source, int line,
     int dim = 0, const Descriptor *mask = nullptr);
-float RTNAME(SumReal3)(const Descriptor &, const char *source, int line,
+float RTDECL(SumReal3)(const Descriptor &, const char *source, int line,
     int dim = 0, const Descriptor *mask = nullptr);
-float RTNAME(SumReal4)(const Descriptor &, const char *source, int line,
+float RTDECL(SumReal4)(const Descriptor &, const char *source, int line,
     int dim = 0, const Descriptor *mask = nullptr);
-double RTNAME(SumReal8)(const Descriptor &, const char *source, int line,
+double RTDECL(SumReal8)(const Descriptor &, const char *source, int line,
     int dim = 0, const Descriptor *mask = nullptr);
 #if LDBL_MANT_DIG == 64
-long double RTNAME(SumReal10)(const Descriptor &, const char *source, int line,
+long double RTDECL(SumReal10)(const Descriptor &, const char *source, int line,
     int dim = 0, const Descriptor *mask = nullptr);
 #endif
 #if LDBL_MANT_DIG == 113 || HAS_FLOAT128
-CppFloat128Type RTNAME(SumReal16)(const Descriptor &, const char *source,
+CppFloat128Type RTDECL(SumReal16)(const Descriptor &, const char *source,
     int line, int dim = 0, const Descriptor *mask = nullptr);
 #endif
 
-void RTNAME(CppSumComplex2)(std::complex<float> &, const Descriptor &,
+void RTDECL(CppSumComplex2)(std::complex<float> &, const Descriptor &,
     const char *source, int line, int dim = 0,
     const Descriptor *mask = nullptr);
-void RTNAME(CppSumComplex3)(std::complex<float> &, const Descriptor &,
+void RTDECL(CppSumComplex3)(std::complex<float> &, const Descriptor &,
     const char *source, int line, int dim = 0,
     const Descriptor *mask = nullptr);
-void RTNAME(CppSumComplex4)(std::complex<float> &, const Descriptor &,
+void RTDECL(CppSumComplex4)(std::complex<float> &, const Descriptor &,
     const char *source, int line, int dim = 0,
     const Descriptor *mask = nullptr);
-void RTNAME(CppSumComplex8)(std::complex<double> &, const Descriptor &,
+void RTDECL(CppSumComplex8)(std::complex<double> &, const Descriptor &,
     const char *source, int line, int dim = 0,
     const Descriptor *mask = nullptr);
-void RTNAME(CppSumComplex10)(std::complex<long double> &, const Descriptor &,
+void RTDECL(CppSumComplex10)(std::complex<long double> &, const Descriptor &,
     const char *source, int line, int dim = 0,
     const Descriptor *mask = nullptr);
-void RTNAME(CppSumComplex16)(std::complex<long double> &, const Descriptor &,
+void RTDECL(CppSumComplex16)(std::complex<long double> &, const Descriptor &,
     const char *source, int line, int dim = 0,
     const Descriptor *mask = nullptr);
 
-void RTNAME(SumDim)(Descriptor &result, const Descriptor &array, int dim,
+void RTDECL(SumDim)(Descriptor &result, const Descriptor &array, int dim,
     const char *source, int line, const Descriptor *mask = nullptr);
 
 // PRODUCT()
 
-std::int8_t RTNAME(ProductInteger1)(const Descriptor &, const char *source,
+std::int8_t RTDECL(ProductInteger1)(const Descriptor &, const char *source,
     int line, int dim = 0, const Descriptor *mask = nullptr);
-std::int16_t RTNAME(ProductInteger2)(const Descriptor &, const char *source,
+std::int16_t RTDECL(ProductInteger2)(const Descriptor &, const char *source,
     int line, int dim = 0, const Descriptor *mask = nullptr);
-std::int32_t RTNAME(ProductInteger4)(const Descriptor &, const char *source,
+std::int32_t RTDECL(ProductInteger4)(const Descriptor &, const char *source,
     int line, int dim = 0, const Descriptor *mask = nullptr);
-std::int64_t RTNAME(ProductInteger8)(const Descriptor &, const char *source,
+std::int64_t RTDECL(ProductInteger8)(const Descriptor &, const char *source,
     int line, int dim = 0, const Descriptor *mask = nullptr);
 #ifdef __SIZEOF_INT128__
-common::int128_t RTNAME(ProductInteger16)(const Descriptor &,
+common::int128_t RTDECL(ProductInteger16)(const Descriptor &,
     const char *source, int line, int dim = 0,
     const Descriptor *mask = nullptr);
 #endif
 
 // REAL/COMPLEX(2 & 3) return 32-bit float results for the caller to downconvert
-float RTNAME(ProductReal2)(const Descriptor &, const char *source, int line,
+float RTDECL(ProductReal2)(const Descriptor &, const char *source, int line,
     int dim = 0, const Descriptor *mask = nullptr);
-float RTNAME(ProductReal3)(const Descriptor &, const char *source, int line,
+float RTDECL(ProductReal3)(const Descriptor &, const char *source, int line,
     int dim = 0, const Descriptor *mask = nullptr);
-float RTNAME(ProductReal4)(const Descriptor &, const char *source, int line,
+float RTDECL(ProductReal4)(const Descriptor &, const char *source, int line,
     int dim = 0, const Descriptor *mask = nullptr);
-double RTNAME(ProductReal8)(const Descriptor &, const char *source, int line,
+double RTDECL(ProductReal8)(const Descriptor &, const char *source, int line,
     int dim = 0, const Descriptor *mask = nullptr);
 #if LDBL_MANT_DIG == 64
-long double RTNAME(ProductReal10)(const Descriptor &, const char *source,
+long double RTDECL(ProductReal10)(const Descriptor &, const char *source,
     int line, int dim = 0, const Descriptor *mask = nullptr);
 #endif
 #if LDBL_MANT_DIG == 113 || HAS_FLOAT128
-CppFloat128Type RTNAME(ProductReal16)(const Descriptor &, const char *source,
+CppFloat128Type RTDECL(ProductReal16)(const Descriptor &, const char *source,
     int line, int dim = 0, const Descriptor *mask = nullptr);
 #endif
 
-void RTNAME(CppProductComplex2)(std::complex<float> &, const Descriptor &,
+void RTDECL(CppProductComplex2)(std::complex<float> &, const Descriptor &,
     const char *source, int line, int dim = 0,
     const Descriptor *mask = nullptr);
-void RTNAME(CppProductComplex3)(std::complex<float> &, const Descriptor &,
+void RTDECL(CppProductComplex3)(std::complex<float> &, const Descriptor &,
     const char *source, int line, int dim = 0,
     const Descriptor *mask = nullptr);
-void RTNAME(CppProductComplex4)(std::complex<float> &, const Descriptor &,
+void RTDECL(CppProductComplex4)(std::complex<float> &, const Descriptor &,
     const char *source, int line, int dim = 0,
     const Descriptor *mask = nullptr);
-void RTNAME(CppProductComplex8)(std::complex<double> &, const Descriptor &,
+void RTDECL(CppProductComplex8)(std::complex<double> &, const Descriptor &,
     const char *source, int line, int dim = 0,
     const Descriptor *mask = nullptr);
-void RTNAME(CppProductComplex10)(std::complex<long double> &,
+void RTDECL(CppProductComplex10)(std::complex<long double> &,
     const Descriptor &, const char *source, int line, int dim = 0,
     const Descriptor *mask = nullptr);
-void RTNAME(CppProductComplex16)(std::complex<long double> &,
+void RTDECL(CppProductComplex16)(std::complex<long double> &,
     const Descriptor &, const char *source, int line, int dim = 0,
     const Descriptor *mask = nullptr);
 
-void RTNAME(ProductDim)(Descriptor &result, const Descriptor &array, int dim,
+void RTDECL(ProductDim)(Descriptor &result, const Descriptor &array, int dim,
     const char *source, int line, const Descriptor *mask = nullptr);
 
 // IALL, IANY, IPARITY
-std::int8_t RTNAME(IAll1)(const Descriptor &, const char *source, int line,
+std::int8_t RTDECL(IAll1)(const Descriptor &, const char *source, int line,
     int dim = 0, const Descriptor *mask = nullptr);
-std::int16_t RTNAME(IAll2)(const Descriptor &, const char *source, int line,
+std::int16_t RTDECL(IAll2)(const Descriptor &, const char *source, int line,
     int dim = 0, const Descriptor *mask = nullptr);
-std::int32_t RTNAME(IAll4)(const Descriptor &, const char *source, int line,
+std::int32_t RTDECL(IAll4)(const Descriptor &, const char *source, int line,
     int dim = 0, const Descriptor *mask = nullptr);
-std::int64_t RTNAME(IAll8)(const Descriptor &, const char *source, int line,
+std::int64_t RTDECL(IAll8)(const Descriptor &, const char *source, int line,
     int dim = 0, const Descriptor *mask = nullptr);
 #ifdef __SIZEOF_INT128__
-common::int128_t RTNAME(IAll16)(const Descriptor &, const char *source,
+common::int128_t RTDECL(IAll16)(const Descriptor &, const char *source,
     int line, int dim = 0, const Descriptor *mask = nullptr);
 #endif
-void RTNAME(IAllDim)(Descriptor &result, const Descriptor &array, int dim,
+void RTDECL(IAllDim)(Descriptor &result, const Descriptor &array, int dim,
     const char *source, int line, const Descriptor *mask = nullptr);
 
-std::int8_t RTNAME(IAny1)(const Descriptor &, const char *source, int line,
+std::int8_t RTDECL(IAny1)(const Descriptor &, const char *source, int line,
     int dim = 0, const Descriptor *mask = nullptr);
-std::int16_t RTNAME(IAny2)(const Descriptor &, const char *source, int line,
+std::int16_t RTDECL(IAny2)(const Descriptor &, const char *source, int line,
     int dim = 0, const Descriptor *mask = nullptr);
-std::int32_t RTNAME(IAny4)(const Descriptor &, const char *source, int line,
+std::int32_t RTDECL(IAny4)(const Descriptor &, const char *source, int line,
     int dim = 0, const Descriptor *mask = nullptr);
-std::int64_t RTNAME(IAny8)(const Descriptor &, const char *source, int line,
+std::int64_t RTDECL(IAny8)(const Descriptor &, const char *source, int line,
     int dim = 0, const Descriptor *mask = nullptr);
 #ifdef __SIZEOF_INT128__
-common::int128_t RTNAME(IAny16)(const Descriptor &, const char *source,
+common::int128_t RTDECL(IAny16)(const Descriptor &, const char *source,
     int line, int dim = 0, const Descriptor *mask = nullptr);
 #endif
-void RTNAME(IAnyDim)(Descriptor &result, const Descriptor &array, int dim,
+void RTDECL(IAnyDim)(Descriptor &result, const Descriptor &array, int dim,
     const char *source, int line, const Descriptor *mask = nullptr);
 
-std::int8_t RTNAME(IParity1)(const Descriptor &, const char *source, int line,
+std::int8_t RTDECL(IParity1)(const Descriptor &, const char *source, int line,
     int dim = 0, const Descriptor *mask = nullptr);
-std::int16_t RTNAME(IParity2)(const Descriptor &, const char *source, int line,
+std::int16_t RTDECL(IParity2)(const Descriptor &, const char *source, int line,
     int dim = 0, const Descriptor *mask = nullptr);
-std::int32_t RTNAME(IParity4)(const Descriptor &, const char *source, int line,
+std::int32_t RTDECL(IParity4)(const Descriptor &, const char *source, int line,
     int dim = 0, const Descriptor *mask = nullptr);
-std::int64_t RTNAME(IParity8)(const Descriptor &, const char *source, int line,
+std::int64_t RTDECL(IParity8)(const Descriptor &, const char *source, int line,
     int dim = 0, const Descriptor *mask = nullptr);
 #ifdef __SIZEOF_INT128__
-common::int128_t RTNAME(IParity16)(const Descriptor &, const char *source,
+common::int128_t RTDECL(IParity16)(const Descriptor &, const char *source,
     int line, int dim = 0, const Descriptor *mask = nullptr);
 #endif
-void RTNAME(IParityDim)(Descriptor &result, const Descriptor &array, int dim,
+void RTDECL(IParityDim)(Descriptor &result, const Descriptor &array, int dim,
     const char *source, int line, const Descriptor *mask = nullptr);
 
 // FINDLOC, MAXLOC, & MINLOC
 // These return allocated arrays in the supplied descriptor.
 // The default value for KIND= should be the default INTEGER in effect at
 // compilation time.
-void RTNAME(Findloc)(Descriptor &, const Descriptor &x,
+void RTDECL(Findloc)(Descriptor &, const Descriptor &x,
     const Descriptor &target, int kind, const char *source, int line,
     const Descriptor *mask = nullptr, bool back = false);
-void RTNAME(FindlocDim)(Descriptor &, const Descriptor &x,
+void RTDECL(FindlocDim)(Descriptor &, const Descriptor &x,
     const Descriptor &target, int kind, int dim, const char *source, int line,
     const Descriptor *mask = nullptr, bool back = false);
-void RTNAME(MaxlocCharacter)(Descriptor &, const Descriptor &, int kind,
+void RTDECL(MaxlocCharacter)(Descriptor &, const Descriptor &, int kind,
     const char *source, int line, const Descriptor *mask = nullptr,
     bool back = false);
-void RTNAME(MaxlocInteger1)(Descriptor &, const Descriptor &, int kind,
+void RTDECL(MaxlocInteger1)(Descriptor &, const Descriptor &, int kind,
     const char *source, int line, const Descriptor *mask = nullptr,
     bool back = false);
-void RTNAME(MaxlocInteger2)(Descriptor &, const Descriptor &, int kind,
+void RTDECL(MaxlocInteger2)(Descriptor &, const Descriptor &, int kind,
     const char *source, int line, const Descriptor *mask = nullptr,
     bool back = false);
-void RTNAME(MaxlocInteger4)(Descriptor &, const Descriptor &, int kind,
+void RTDECL(MaxlocInteger4)(Descriptor &, const Descriptor &, int kind,
     const char *source, int line, const Descriptor *mask = nullptr,
     bool back = false);
-void RTNAME(MaxlocInteger8)(Descriptor &, const Descriptor &, int kind,
+void RTDECL(MaxlocInteger8)(Descriptor &, const Descriptor &, int kind,
     const char *source, int line, const Descriptor *mask = nullptr,
     bool back = false);
-void RTNAME(MaxlocInteger16)(Descriptor &, const Descriptor &, int kind,
+void RTDECL(MaxlocInteger16)(Descriptor &, const Descriptor &, int kind,
     const char *source, int line, const Descriptor *mask = nullptr,
     bool back = false);
-void RTNAME(MaxlocReal4)(Descriptor &, const Descriptor &, int kind,
+void RTDECL(MaxlocReal4)(Descriptor &, const Descriptor &, int kind,
     const char *source, int line, const Descriptor *mask = nullptr,
     bool back = false);
-void RTNAME(MaxlocReal8)(Descriptor &, const Descriptor &, int kind,
+void RTDECL(MaxlocReal8)(Descriptor &, const Descriptor &, int kind,
     const char *source, int line, const Descriptor *mask = nullptr,
     bool back = false);
-void RTNAME(MaxlocReal10)(Descriptor &, const Descriptor &, int kind,
+void RTDECL(MaxlocReal10)(Descriptor &, const Descriptor &, int kind,
     const char *source, int line, const Descriptor *mask = nullptr,
     bool back = false);
-void RTNAME(MaxlocReal16)(Descriptor &, const Descriptor &, int kind,
+void RTDECL(MaxlocReal16)(Descriptor &, const Descriptor &, int kind,
     const char *source, int line, const Descriptor *mask = nullptr,
     bool back = false);
-void RTNAME(MaxlocDim)(Descriptor &, const Descriptor &x, int kind, int dim,
+void RTDECL(MaxlocDim)(Descriptor &, const Descriptor &x, int kind, int dim,
     const char *source, int line, const Descriptor *mask = nullptr,
     bool back = false);
-void RTNAME(MinlocCharacter)(Descriptor &, const Descriptor &, int kind,
+void RTDECL(MinlocCharacter)(Descriptor &, const Descriptor &, int kind,
     const char *source, int line, const Descriptor *mask = nullptr,
     bool back = false);
-void RTNAME(MinlocInteger1)(Descriptor &, const Descriptor &, int kind,
+void RTDECL(MinlocInteger1)(Descriptor &, const Descriptor &, int kind,
     const char *source, int line, const Descriptor *mask = nullptr,
     bool back = false);
-void RTNAME(MinlocInteger2)(Descriptor &, const Descriptor &, int kind,
+void RTDECL(MinlocInteger2)(Descriptor &, const Descriptor &, int kind,
     const char *source, int line, const Descriptor *mask = nullptr,
     bool back = false);
-void RTNAME(MinlocInteger4)(Descriptor &, const Descriptor &, int kind,
+void RTDECL(MinlocInteger4)(Descriptor &, const Descriptor &, int kind,
     const char *source, int line, const Descriptor *mask = nullptr,
     bool back = false);
-void RTNAME(MinlocInteger8)(Descriptor &, const Descriptor &, int kind,
+void RTDECL(MinlocInteger8)(Descriptor &, const Descriptor &, int kind,
     const char *source, int line, const Descriptor *mask = nullptr,
     bool back = false);
-void RTNAME(MinlocInteger16)(Descriptor &, const Descriptor &, int kind,
+void RTDECL(MinlocInteger16)(Descriptor &, const Descriptor &, int kind,
     const char *source, int line, const Descriptor *mask = nullptr,
     bool back = false);
-void RTNAME(MinlocReal4)(Descriptor &, const Descriptor &, int kind,
+void RTDECL(MinlocReal4)(Descriptor &, const Descriptor &, int kind,
     const char *source, int line, const Descriptor *mask = nullptr,
     bool back = false);
-void RTNAME(MinlocReal8)(Descriptor &, const Descriptor &, int kind,
+void RTDECL(MinlocReal8)(Descriptor &, const Descriptor &, int kind,
     const char *source, int line, const Descriptor *mask = nullptr,
     bool back = false);
-void RTNAME(MinlocReal10)(Descriptor &, const Descriptor &, int kind,
+void RTDECL(MinlocReal10)(Descriptor &, const Descriptor &, int kind,
     const char *source, int line, const Descriptor *mask = nullptr,
     bool back = false);
-void RTNAME(MinlocReal16)(Descriptor &, const Descriptor &, int kind,
+void RTDECL(MinlocReal16)(Descriptor &, const Descriptor &, int kind,
     const char *source, int line, const Descriptor *mask = nullptr,
     bool back = false);
-void RTNAME(MinlocDim)(Descriptor &, const Descriptor &x, int kind, int dim,
+void RTDECL(MinlocDim)(Descriptor &, const Descriptor &x, int kind, int dim,
     const char *source, int line, const Descriptor *mask = nullptr,
     bool back = false);
 
 // MAXVAL and MINVAL
-std::int8_t RTNAME(MaxvalInteger1)(const Descriptor &, const char *source,
+std::int8_t RTDECL(MaxvalInteger1)(const Descriptor &, const char *source,
     int line, int dim = 0, const Descriptor *mask = nullptr);
-std::int16_t RTNAME(MaxvalInteger2)(const Descriptor &, const char *source,
+std::int16_t RTDECL(MaxvalInteger2)(const Descriptor &, const char *source,
     int line, int dim = 0, const Descriptor *mask = nullptr);
-std::int32_t RTNAME(MaxvalInteger4)(const Descriptor &, const char *source,
+std::int32_t RTDECL(MaxvalInteger4)(const Descriptor &, const char *source,
     int line, int dim = 0, const Descriptor *mask = nullptr);
-std::int64_t RTNAME(MaxvalInteger8)(const Descriptor &, const char *source,
+std::int64_t RTDECL(MaxvalInteger8)(const Descriptor &, const char *source,
     int line, int dim = 0, const Descriptor *mask = nullptr);
 #ifdef __SIZEOF_INT128__
-common::int128_t RTNAME(MaxvalInteger16)(const Descriptor &, const char *source,
+common::int128_t RTDECL(MaxvalInteger16)(const Descriptor &, const char *source,
     int line, int dim = 0, const Descriptor *mask = nullptr);
 #endif
-float RTNAME(MaxvalReal2)(const Descriptor &, const char *source, int line,
+float RTDECL(MaxvalReal2)(const Descriptor &, const char *source, int line,
     int dim = 0, const Descriptor *mask = nullptr);
-float RTNAME(MaxvalReal3)(const Descriptor &, const char *source, int line,
+float RTDECL(MaxvalReal3)(const Descriptor &, const char *source, int line,
     int dim = 0, const Descriptor *mask = nullptr);
-float RTNAME(MaxvalReal4)(const Descriptor &, const char *source, int line,
+float RTDECL(MaxvalReal4)(const Descriptor &, const char *source, int line,
     int dim = 0, const Descriptor *mask = nullptr);
-double RTNAME(MaxvalReal8)(const Descriptor &, const char *source, int line,
+double RTDECL(MaxvalReal8)(const Descriptor &, const char *source, int line,
     int dim = 0, const Descriptor *mask = nullptr);
 #if LDBL_MANT_DIG == 64
-long double RTNAME(MaxvalReal10)(const Descriptor &, const char *source,
+long double RTDECL(MaxvalReal10)(const Descriptor &, const char *source,
     int line, int dim = 0, const Descriptor *mask = nullptr);
 #endif
 #if LDBL_MANT_DIG == 113 || HAS_FLOAT128
-CppFloat128Type RTNAME(MaxvalReal16)(const Descriptor &, const char *source,
+CppFloat128Type RTDECL(MaxvalReal16)(const Descriptor &, const char *source,
     int line, int dim = 0, const Descriptor *mask = nullptr);
 #endif
-void RTNAME(MaxvalCharacter)(Descriptor &, const Descriptor &,
+void RTDECL(MaxvalCharacter)(Descriptor &, const Descriptor &,
     const char *source, int line, const Descriptor *mask = nullptr);
 
-std::int8_t RTNAME(MinvalInteger1)(const Descriptor &, const char *source,
+std::int8_t RTDECL(MinvalInteger1)(const Descriptor &, const char *source,
     int line, int dim = 0, const Descriptor *mask = nullptr);
-std::int16_t RTNAME(MinvalInteger2)(const Descriptor &, const char *source,
+std::int16_t RTDECL(MinvalInteger2)(const Descriptor &, const char *source,
     int line, int dim = 0, const Descriptor *mask = nullptr);
-std::int32_t RTNAME(MinvalInteger4)(const Descriptor &, const char *source,
+std::int32_t RTDECL(MinvalInteger4)(const Descriptor &, const char *source,
     int line, int dim = 0, const Descriptor *mask = nullptr);
-std::int64_t RTNAME(MinvalInteger8)(const Descriptor &, const char *source,
+std::int64_t RTDECL(MinvalInteger8)(const Descriptor &, const char *source,
     int line, int dim = 0, const Descriptor *mask = nullptr);
 #ifdef __SIZEOF_INT128__
-common::int128_t RTNAME(MinvalInteger16)(const Descriptor &, const char *source,
+common::int128_t RTDECL(MinvalInteger16)(const Descriptor &, const char *source,
     int line, int dim = 0, const Descriptor *mask = nullptr);
 #endif
-float RTNAME(MinvalReal2)(const Descriptor &, const char *source, int line,
+float RTDECL(MinvalReal2)(const Descriptor &, const char *source, int line,
     int dim = 0, const Descriptor *mask = nullptr);
-float RTNAME(MinvalReal3)(const Descriptor &, const char *source, int line,
+float RTDECL(MinvalReal3)(const Descriptor &, const char *source, int line,
     int dim = 0, const Descriptor *mask = nullptr);
-float RTNAME(MinvalReal4)(const Descriptor &, const char *source, int line,
+float RTDECL(MinvalReal4)(const Descriptor &, const char *source, int line,
     int dim = 0, const Descriptor *mask = nullptr);
-double RTNAME(MinvalReal8)(const Descriptor &, const char *source, int line,
+double RTDECL(MinvalReal8)(const Descriptor &, const char *source, int line,
     int dim = 0, const Descriptor *mask = nullptr);
 #if LDBL_MANT_DIG == 64
-long double RTNAME(MinvalReal10)(const Descriptor &, const char *source,
+long double RTDECL(MinvalReal10)(const Descriptor &, const char *source,
     int line, int dim = 0, const Descriptor *mask = nullptr);
 #endif
 #if LDBL_MANT_DIG == 113 || HAS_FLOAT128
-CppFloat128Type RTNAME(MinvalReal16)(const Descriptor &, const char *source,
+CppFloat128Type RTDECL(MinvalReal16)(const Descriptor &, const char *source,
     int line, int dim = 0, const Descriptor *mask = nullptr);
 #endif
-void RTNAME(MinvalCharacter)(Descriptor &, const Descriptor &,
+void RTDECL(MinvalCharacter)(Descriptor &, const Descriptor &,
     const char *source, int line, const Descriptor *mask = nullptr);
 
-void RTNAME(MaxvalDim)(Descriptor &, const Descriptor &, int dim,
+void RTDECL(MaxvalDim)(Descriptor &, const Descriptor &, int dim,
     const char *source, int line, const Descriptor *mask = nullptr);
-void RTNAME(MinvalDim)(Descriptor &, const Descriptor &, int dim,
+void RTDECL(MinvalDim)(Descriptor &, const Descriptor &, int dim,
     const char *source, int line, const Descriptor *mask = nullptr);
 
 // NORM2
-float RTNAME(Norm2_2)(
+float RTDECL(Norm2_2)(
     const Descriptor &, const char *source, int line, int dim = 0);
-float RTNAME(Norm2_3)(
+float RTDECL(Norm2_3)(
     const Descriptor &, const char *source, int line, int dim = 0);
-float RTNAME(Norm2_4)(
+float RTDECL(Norm2_4)(
     const Descriptor &, const char *source, int line, int dim = 0);
-double RTNAME(Norm2_8)(
+double RTDECL(Norm2_8)(
     const Descriptor &, const char *source, int line, int dim = 0);
 #if LDBL_MANT_DIG == 64
-long double RTNAME(Norm2_10)(
+long double RTDECL(Norm2_10)(
     const Descriptor &, const char *source, int line, int dim = 0);
 #elif LDBL_MANT_DIG == 113
-long double RTNAME(Norm2_16)(
+long double RTDECL(Norm2_16)(
     const Descriptor &, const char *source, int line, int dim = 0);
 #endif
-void RTNAME(Norm2Dim)(
+void RTDECL(Norm2Dim)(
     Descriptor &, const Descriptor &, int dim, const char *source, int line);
 
 // ALL, ANY, COUNT, & PARITY logical reductions
-bool RTNAME(All)(const Descriptor &, const char *source, int line, int dim = 0);
-void RTNAME(AllDim)(Descriptor &result, const Descriptor &, int dim,
+bool RTDECL(All)(const Descriptor &, const char *source, int line, int dim = 0);
+void RTDECL(AllDim)(Descriptor &result, const Descriptor &, int dim,
     const char *source, int line);
-bool RTNAME(Any)(const Descriptor &, const char *source, int line, int dim = 0);
-void RTNAME(AnyDim)(Descriptor &result, const Descriptor &, int dim,
+bool RTDECL(Any)(const Descriptor &, const char *source, int line, int dim = 0);
+void RTDECL(AnyDim)(Descriptor &result, const Descriptor &, int dim,
     const char *source, int line);
-std::int64_t RTNAME(Count)(
+std::int64_t RTDECL(Count)(
     const Descriptor &, const char *source, int line, int dim = 0);
-void RTNAME(CountDim)(Descriptor &result, const Descriptor &, int dim, int kind,
+void RTDECL(CountDim)(Descriptor &result, const Descriptor &, int dim, int kind,
     const char *source, int line);
-bool RTNAME(Parity)(
+bool RTDECL(Parity)(
     const Descriptor &, const char *source, int line, int dim = 0);
-void RTNAME(ParityDim)(Descriptor &result, const Descriptor &, int dim,
+void RTDECL(ParityDim)(Descriptor &result, const Descriptor &, int dim,
     const char *source, int line);
 
 // DOT_PRODUCT
-std::int8_t RTNAME(DotProductInteger1)(const Descriptor &, const Descriptor &,
+std::int8_t RTDECL(DotProductInteger1)(const Descriptor &, const Descriptor &,
     const char *source = nullptr, int line = 0);
-std::int16_t RTNAME(DotProductInteger2)(const Descriptor &, const Descriptor &,
+std::int16_t RTDECL(DotProductInteger2)(const Descriptor &, const Descriptor &,
     const char *source = nullptr, int line = 0);
-std::int32_t RTNAME(DotProductInteger4)(const Descriptor &, const Descriptor &,
+std::int32_t RTDECL(DotProductInteger4)(const Descriptor &, const Descriptor &,
     const char *source = nullptr, int line = 0);
-std::int64_t RTNAME(DotProductInteger8)(const Descriptor &, const Descriptor &,
+std::int64_t RTDECL(DotProductInteger8)(const Descriptor &, const Descriptor &,
     const char *source = nullptr, int line = 0);
 #ifdef __SIZEOF_INT128__
-common::int128_t RTNAME(DotProductInteger16)(const Descriptor &,
+common::int128_t RTDECL(DotProductInteger16)(const Descriptor &,
     const Descriptor &, const char *source = nullptr, int line = 0);
 #endif
-float RTNAME(DotProductReal2)(const Descriptor &, const Descriptor &,
+float RTDECL(DotProductReal2)(const Descriptor &, const Descriptor &,
     const char *source = nullptr, int line = 0);
-float RTNAME(DotProductReal3)(const Descriptor &, const Descriptor &,
+float RTDECL(DotProductReal3)(const Descriptor &, const Descriptor &,
     const char *source = nullptr, int line = 0);
-float RTNAME(DotProductReal4)(const Descriptor &, const Descriptor &,
+float RTDECL(DotProductReal4)(const Descriptor &, const Descriptor &,
     const char *source = nullptr, int line = 0);
-double RTNAME(DotProductReal8)(const Descriptor &, const Descriptor &,
+double RTDECL(DotProductReal8)(const Descriptor &, const Descriptor &,
     const char *source = nullptr, int line = 0);
 #if LDBL_MANT_DIG == 64
-long double RTNAME(DotProductReal10)(const Descriptor &, const Descriptor &,
+long double RTDECL(DotProductReal10)(const Descriptor &, const Descriptor &,
     const char *source = nullptr, int line = 0);
 #endif
 #if LDBL_MANT_DIG == 113 || HAS_FLOAT128
-CppFloat128Type RTNAME(DotProductReal16)(const Descriptor &, const Descriptor &,
+CppFloat128Type RTDECL(DotProductReal16)(const Descriptor &, const Descriptor &,
     const char *source = nullptr, int line = 0);
 #endif
-void RTNAME(CppDotProductComplex2)(std::complex<float> &, const Descriptor &,
+void RTDECL(CppDotProductComplex2)(std::complex<float> &, const Descriptor &,
     const Descriptor &, const char *source = nullptr, int line = 0);
-void RTNAME(CppDotProductComplex3)(std::complex<float> &, const Descriptor &,
+void RTDECL(CppDotProductComplex3)(std::complex<float> &, const Descriptor &,
     const Descriptor &, const char *source = nullptr, int line = 0);
-void RTNAME(CppDotProductComplex4)(std::complex<float> &, const Descriptor &,
+void RTDECL(CppDotProductComplex4)(std::complex<float> &, const Descriptor &,
     const Descriptor &, const char *source = nullptr, int line = 0);
-void RTNAME(CppDotProductComplex8)(std::complex<double> &, const Descriptor &,
+void RTDECL(CppDotProductComplex8)(std::complex<double> &, const Descriptor &,
     const Descriptor &, const char *source = nullptr, int line = 0);
 #if LDBL_MANT_DIG == 64
-void RTNAME(CppDotProductComplex10)(std::complex<long double> &,
+void RTDECL(CppDotProductComplex10)(std::complex<long double> &,
     const Descriptor &, const Descriptor &, const char *source = nullptr,
     int line = 0);
 #endif
 #if LDBL_MANT_DIG == 113 || HAS_FLOAT128
-void RTNAME(CppDotProductComplex16)(std::complex<CppFloat128Type> &,
+void RTDECL(CppDotProductComplex16)(std::complex<CppFloat128Type> &,
     const Descriptor &, const Descriptor &, const char *source = nullptr,
     int line = 0);
 #endif
-bool RTNAME(DotProductLogical)(const Descriptor &, const Descriptor &,
+bool RTDECL(DotProductLogical)(const Descriptor &, const Descriptor &,
     const char *source = nullptr, int line = 0);
 
 } // extern "C"
diff --git a/flang/include/flang/Runtime/support.h b/flang/include/flang/Runtime/support.h
index 1262a04b5cd4fc..e7ae2154b2a72b 100644
--- a/flang/include/flang/Runtime/support.h
+++ b/flang/include/flang/Runtime/support.h
@@ -21,7 +21,7 @@ class Descriptor;
 extern "C" {
 
 // Predicate: is the storage described by a Descriptor contiguous in memory?
-bool RTNAME(IsContiguous)(const Descriptor &);
+bool RTDECL(IsContiguous)(const Descriptor &);
 
 } // extern "C"
 } // namespace Fortran::runtime
diff --git a/flang/runtime/CMakeLists.txt b/flang/runtime/CMakeLists.txt
index bf3aa5af3c88e3..5df94491b06fb8 100644
--- a/flang/runtime/CMakeLists.txt
+++ b/flang/runtime/CMakeLists.txt
@@ -152,10 +152,17 @@ option(FLANG_EXPERIMENTAL_CUDA_RUNTIME
 # List of files that are buildable for all devices.
 set(supported_files
   ISO_Fortran_binding.cpp
+  allocatable.cpp
   assign.cpp
   derived.cpp
+  derived-api.cpp
   descriptor.cpp
+  matmul-transpose.cpp
+  matmul.cpp
+  numeric.cpp
   stat.cpp
+  sum.cpp
+  support.cpp
   terminator.cpp
   tools.cpp
   transformational.cpp
@@ -188,6 +195,10 @@ if (FLANG_EXPERIMENTAL_CUDA_RUNTIME)
   if ("${CMAKE_CUDA_COMPILER_ID}" MATCHES "NVIDIA")
     set(CUDA_COMPILE_OPTIONS
       --expt-relaxed-constexpr
+      # Disable these warnings:
+      #   'long double' is treated as 'double' in device code
+      -Xcudafe --diag_suppress=20208
+      -Xcudafe --display_error_number
       )
   endif()
   set_source_files_properties(${supported_files} PROPERTIES COMPILE_OPTIONS
diff --git a/flang/runtime/allocatable.cpp b/flang/runtime/allocatable.cpp
index 409255aaa214de..143518d8e99e13 100644
--- a/flang/runtime/allocatable.cpp
+++ b/flang/runtime/allocatable.cpp
@@ -19,7 +19,7 @@
 namespace Fortran::runtime {
 extern "C" {
 
-void RTNAME(AllocatableInitIntrinsic)(Descriptor &descriptor,
+void RTDEF(AllocatableInitIntrinsic)(Descriptor &descriptor,
     TypeCategory category, int kind, int rank, int corank) {
   INTERNAL_CHECK(corank == 0);
   descriptor.Establish(TypeCode{category, kind},
@@ -27,21 +27,21 @@ void RTNAME(AllocatableInitIntrinsic)(Descriptor &descriptor,
       CFI_attribute_allocatable);
 }
 
-void RTNAME(AllocatableInitCharacter)(Descriptor &descriptor,
+void RTDEF(AllocatableInitCharacter)(Descriptor &descriptor,
     SubscriptValue length, int kind, int rank, int corank) {
   INTERNAL_CHECK(corank == 0);
   descriptor.Establish(
       kind, length, nullptr, rank, nullptr, CFI_attribute_allocatable);
 }
 
-void RTNAME(AllocatableInitDerived)(Descriptor &descriptor,
+void RTDEF(AllocatableInitDerived)(Descriptor &descriptor,
     const typeInfo::DerivedType &derivedType, int rank, int corank) {
   INTERNAL_CHECK(corank == 0);
   descriptor.Establish(
       derivedType, nullptr, rank, nullptr, CFI_attribute_allocatable);
 }
 
-void RTNAME(AllocatableInitIntrinsicForAllocate)(Descriptor &descriptor,
+void RTDEF(AllocatableInitIntrinsicForAllocate)(Descriptor &descriptor,
     TypeCategory category, int kind, int rank, int corank) {
   if (descriptor.IsAllocated()) {
     return;
@@ -49,7 +49,7 @@ void RTNAME(AllocatableInitIntrinsicForAllocate)(Descriptor &descriptor,
   RTNAME(AllocatableInitIntrinsic)(descriptor, category, kind, rank, corank);
 }
 
-void RTNAME(AllocatableInitCharacterForAllocate)(Descriptor &descriptor,
+void RTDEF(AllocatableInitCharacterForAllocate)(Descriptor &descriptor,
     SubscriptValue length, int kind, int rank, int corank) {
   if (descriptor.IsAllocated()) {
     return;
@@ -57,7 +57,7 @@ void RTNAME(AllocatableInitCharacterForAllocate)(Descriptor &descriptor,
   RTNAME(AllocatableInitCharacter)(descriptor, length, kind, rank, corank);
 }
 
-void RTNAME(AllocatableInitDerivedForAllocate)(Descriptor &descriptor,
+void RTDEF(AllocatableInitDerivedForAllocate)(Descriptor &descriptor,
     const typeInfo::DerivedType &derivedType, int rank, int corank) {
   if (descriptor.IsAllocated()) {
     return;
@@ -65,7 +65,7 @@ void RTNAME(AllocatableInitDerivedForAllocate)(Descriptor &descriptor,
   RTNAME(AllocatableInitDerived)(descriptor, derivedType, rank, corank);
 }
 
-std::int32_t RTNAME(MoveAlloc)(Descriptor &to, Descriptor &from,
+std::int32_t RTDEF(MoveAlloc)(Descriptor &to, Descriptor &from,
     const typeInfo::DerivedType *derivedType, bool hasStat,
     const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
   Terminator terminator{sourceFile, sourceLine};
@@ -110,21 +110,21 @@ std::int32_t RTNAME(MoveAlloc)(Descriptor &to, Descriptor &from,
   return StatOk;
 }
 
-void RTNAME(AllocatableSetBounds)(Descriptor &descriptor, int zeroBasedDim,
+void RTDEF(AllocatableSetBounds)(Descriptor &descriptor, int zeroBasedDim,
     SubscriptValue lower, SubscriptValue upper) {
   INTERNAL_CHECK(zeroBasedDim >= 0 && zeroBasedDim < descriptor.rank());
   descriptor.GetDimension(zeroBasedDim).SetBounds(lower, upper);
   // The byte strides are computed when the object is allocated.
 }
 
-void RTNAME(AllocatableSetDerivedLength)(
+void RTDEF(AllocatableSetDerivedLength)(
     Descriptor &descriptor, int which, SubscriptValue x) {
   DescriptorAddendum *addendum{descriptor.Addendum()};
   INTERNAL_CHECK(addendum != nullptr);
   addendum->SetLenParameterValue(which, x);
 }
 
-void RTNAME(AllocatableApplyMold)(
+void RTDEF(AllocatableApplyMold)(
     Descriptor &descriptor, const Descriptor &mold, int rank) {
   if (descriptor.IsAllocated()) {
     // 9.7.1.3 Return so the error can be emitted by AllocatableAllocate.
@@ -133,7 +133,7 @@ void RTNAME(AllocatableApplyMold)(
   descriptor.ApplyMold(mold, rank);
 }
 
-int RTNAME(AllocatableAllocate)(Descriptor &descriptor, bool hasStat,
+int RTDEF(AllocatableAllocate)(Descriptor &descriptor, bool hasStat,
     const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
   Terminator terminator{sourceFile, sourceLine};
   if (!descriptor.IsAllocatable()) {
@@ -155,7 +155,7 @@ int RTNAME(AllocatableAllocate)(Descriptor &descriptor, bool hasStat,
   return stat;
 }
 
-int RTNAME(AllocatableAllocateSource)(Descriptor &alloc,
+int RTDEF(AllocatableAllocateSource)(Descriptor &alloc,
     const Descriptor &source, bool hasStat, const Descriptor *errMsg,
     const char *sourceFile, int sourceLine) {
   int stat{RTNAME(AllocatableAllocate)(
@@ -167,7 +167,7 @@ int RTNAME(AllocatableAllocateSource)(Descriptor &alloc,
   return stat;
 }
 
-int RTNAME(AllocatableDeallocate)(Descriptor &descriptor, bool hasStat,
+int RTDEF(AllocatableDeallocate)(Descriptor &descriptor, bool hasStat,
     const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
   Terminator terminator{sourceFile, sourceLine};
   if (!descriptor.IsAllocatable()) {
@@ -182,7 +182,7 @@ int RTNAME(AllocatableDeallocate)(Descriptor &descriptor, bool hasStat,
       errMsg, hasStat);
 }
 
-int RTNAME(AllocatableDeallocatePolymorphic)(Descriptor &descriptor,
+int RTDEF(AllocatableDeallocatePolymorphic)(Descriptor &descriptor,
     const typeInfo::DerivedType *derivedType, bool hasStat,
     const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
   int stat{RTNAME(AllocatableDeallocate)(
@@ -202,7 +202,7 @@ int RTNAME(AllocatableDeallocatePolymorphic)(Descriptor &descriptor,
   return stat;
 }
 
-void RTNAME(AllocatableDeallocateNoFinal)(
+void RTDEF(AllocatableDeallocateNoFinal)(
     Descriptor &descriptor, const char *sourceFile, int sourceLine) {
   Terminator terminator{sourceFile, sourceLine};
   if (!descriptor.IsAllocatable()) {
diff --git a/flang/runtime/derived-api.cpp b/flang/runtime/derived-api.cpp
index 39bf0521e73b16..0b2df206938916 100644
--- a/flang/runtime/derived-api.cpp
+++ b/flang/runtime/derived-api.cpp
@@ -10,6 +10,7 @@
 #include "flang/Runtime/derived-api.h"
 #include "derived.h"
 #include "terminator.h"
+#include "tools.h"
 #include "type-info.h"
 #include "flang/Runtime/descriptor.h"
 
@@ -17,7 +18,7 @@ namespace Fortran::runtime {
 
 extern "C" {
 
-void RTNAME(Initialize)(
+void RTDEF(Initialize)(
     const Descriptor &descriptor, const char *sourceFile, int sourceLine) {
   if (const DescriptorAddendum * addendum{descriptor.Addendum()}) {
     if (const auto *derived{addendum->derivedType()}) {
@@ -29,7 +30,7 @@ void RTNAME(Initialize)(
   }
 }
 
-void RTNAME(Destroy)(const Descriptor &descriptor) {
+void RTDEF(Destroy)(const Descriptor &descriptor) {
   if (const DescriptorAddendum * addendum{descriptor.Addendum()}) {
     if (const auto *derived{addendum->derivedType()}) {
       if (!derived->noDestructionNeeded()) {
@@ -41,7 +42,7 @@ void RTNAME(Destroy)(const Descriptor &descriptor) {
   }
 }
 
-void RTNAME(Finalize)(
+void RTDEF(Finalize)(
     const Descriptor &descriptor, const char *sourceFile, int sourceLine) {
   if (const DescriptorAddendum * addendum{descriptor.Addendum()}) {
     if (const auto *derived{addendum->derivedType()}) {
@@ -53,7 +54,7 @@ void RTNAME(Finalize)(
   }
 }
 
-bool RTNAME(ClassIs)(
+bool RTDEF(ClassIs)(
     const Descriptor &descriptor, const typeInfo::DerivedType &derivedType) {
   if (const DescriptorAddendum * addendum{descriptor.Addendum()}) {
     if (const auto *derived{addendum->derivedType()}) {
@@ -72,7 +73,8 @@ bool RTNAME(ClassIs)(
   return false;
 }
 
-static bool CompareDerivedTypeNames(const Descriptor &a, const Descriptor &b) {
+static RT_API_ATTRS bool CompareDerivedTypeNames(
+    const Descriptor &a, const Descriptor &b) {
   if (a.raw().version == CFI_VERSION &&
       a.type() == TypeCode{TypeCategory::Character, 1} &&
       a.ElementBytes() > 0 && a.rank() == 0 && a.OffsetElement() != nullptr &&
@@ -80,18 +82,20 @@ static bool CompareDerivedTypeNames(const Descriptor &a, const Descriptor &b) {
       b.type() == TypeCode{TypeCategory::Character, 1} &&
       b.ElementBytes() > 0 && b.rank() == 0 && b.OffsetElement() != nullptr &&
       a.ElementBytes() == b.ElementBytes() &&
-      memcmp(a.OffsetElement(), b.OffsetElement(), a.ElementBytes()) == 0) {
+      Fortran::runtime::memcmp(
+          a.OffsetElement(), b.OffsetElement(), a.ElementBytes()) == 0) {
     return true;
   }
   return false;
 }
 
-inline bool CompareDerivedType(
+inline RT_API_ATTRS bool CompareDerivedType(
     const typeInfo::DerivedType *a, const typeInfo::DerivedType *b) {
   return a == b || CompareDerivedTypeNames(a->name(), b->name());
 }
 
-static const typeInfo::DerivedType *GetDerivedType(const Descriptor &desc) {
+static const RT_API_ATTRS typeInfo::DerivedType *GetDerivedType(
+    const Descriptor &desc) {
   if (const DescriptorAddendum * addendum{desc.Addendum()}) {
     if (const auto *derived{addendum->derivedType()}) {
       return derived;
@@ -100,7 +104,7 @@ static const typeInfo::DerivedType *GetDerivedType(const Descriptor &desc) {
   return nullptr;
 }
 
-bool RTNAME(SameTypeAs)(const Descriptor &a, const Descriptor &b) {
+bool RTDEF(SameTypeAs)(const Descriptor &a, const Descriptor &b) {
   auto aType{a.raw().type};
   auto bType{b.raw().type};
   if ((aType != CFI_type_struct && aType != CFI_type_other) ||
@@ -125,7 +129,7 @@ bool RTNAME(SameTypeAs)(const Descriptor &a, const Descriptor &b) {
   }
 }
 
-bool RTNAME(ExtendsTypeOf)(const Descriptor &a, const Descriptor &mold) {
+bool RTDEF(ExtendsTypeOf)(const Descriptor &a, const Descriptor &mold) {
   auto aType{a.raw().type};
   auto moldType{mold.raw().type};
   if ((aType != CFI_type_struct && aType != CFI_type_other) ||
@@ -152,7 +156,7 @@ bool RTNAME(ExtendsTypeOf)(const Descriptor &a, const Descriptor &mold) {
   }
 }
 
-void RTNAME(DestroyWithoutFinalization)(const Descriptor &descriptor) {
+void RTDEF(DestroyWithoutFinalization)(const Descriptor &descriptor) {
   if (const DescriptorAddendum * addendum{descriptor.Addendum()}) {
     if (const auto *derived{addendum->derivedType()}) {
       if (!derived->noDestructionNeeded()) {
diff --git a/flang/runtime/matmul-transpose.cpp b/flang/runtime/matmul-transpose.cpp
index 43fcf7c0849069..3d745575f25872 100644
--- a/flang/runtime/matmul-transpose.cpp
+++ b/flang/runtime/matmul-transpose.cpp
@@ -31,6 +31,11 @@
 namespace {
 using namespace Fortran::runtime;
 
+// Suppress the warnings about calling __host__-only std::complex operators,
+// defined in C++ STD header files, from __device__ code.
+RT_DIAG_PUSH
+RT_DIAG_DISABLE_CALL_HOST_FROM_DEVICE_WARN
+
 // Contiguous numeric TRANSPOSE(matrix)*matrix multiplication
 //   TRANSPOSE(matrix(n, rows)) * matrix(n,cols) ->
 //             matrix(rows, n)  * matrix(n,cols) -> matrix(rows,cols)
@@ -54,7 +59,7 @@ using namespace Fortran::runtime;
 //   2  RES(I,J) = RES(I,J) + X(K,I)*Y(K,J) ! loop-invariant last term
 template <TypeCategory RCAT, int RKIND, typename XT, typename YT,
     bool X_HAS_STRIDED_COLUMNS, bool Y_HAS_STRIDED_COLUMNS>
-inline static void MatrixTransposedTimesMatrix(
+inline static RT_API_ATTRS void MatrixTransposedTimesMatrix(
     CppTypeFor<RCAT, RKIND> *RESTRICT product, SubscriptValue rows,
     SubscriptValue cols, const XT *RESTRICT x, const YT *RESTRICT y,
     SubscriptValue n, std::size_t xColumnByteStride = 0,
@@ -85,8 +90,10 @@ inline static void MatrixTransposedTimesMatrix(
   }
 }
 
+RT_DIAG_POP
+
 template <TypeCategory RCAT, int RKIND, typename XT, typename YT>
-inline static void MatrixTransposedTimesMatrixHelper(
+inline static RT_API_ATTRS void MatrixTransposedTimesMatrixHelper(
     CppTypeFor<RCAT, RKIND> *RESTRICT product, SubscriptValue rows,
     SubscriptValue cols, const XT *RESTRICT x, const YT *RESTRICT y,
     SubscriptValue n, std::optional<std::size_t> xColumnByteStride,
@@ -110,6 +117,9 @@ inline static void MatrixTransposedTimesMatrixHelper(
   }
 }
 
+RT_DIAG_PUSH
+RT_DIAG_DISABLE_CALL_HOST_FROM_DEVICE_WARN
+
 // Contiguous numeric matrix*vector multiplication
 //   matrix(rows,n) * column vector(n) -> column vector(rows)
 // Straightforward algorithm:
@@ -126,7 +136,7 @@ inline static void MatrixTransposedTimesMatrixHelper(
 //   2 RES(I) = RES(I) + X(K,I)*Y(K)
 template <TypeCategory RCAT, int RKIND, typename XT, typename YT,
     bool X_HAS_STRIDED_COLUMNS>
-inline static void MatrixTransposedTimesVector(
+inline static RT_API_ATTRS void MatrixTransposedTimesVector(
     CppTypeFor<RCAT, RKIND> *RESTRICT product, SubscriptValue rows,
     SubscriptValue n, const XT *RESTRICT x, const YT *RESTRICT y,
     std::size_t xColumnByteStride = 0) {
@@ -147,8 +157,10 @@ inline static void MatrixTransposedTimesVector(
   }
 }
 
+RT_DIAG_POP
+
 template <TypeCategory RCAT, int RKIND, typename XT, typename YT>
-inline static void MatrixTransposedTimesVectorHelper(
+inline static RT_API_ATTRS void MatrixTransposedTimesVectorHelper(
     CppTypeFor<RCAT, RKIND> *RESTRICT product, SubscriptValue rows,
     SubscriptValue n, const XT *RESTRICT x, const YT *RESTRICT y,
     std::optional<std::size_t> xColumnByteStride) {
@@ -161,10 +173,13 @@ inline static void MatrixTransposedTimesVectorHelper(
   }
 }
 
+RT_DIAG_PUSH
+RT_DIAG_DISABLE_CALL_HOST_FROM_DEVICE_WARN
+
 // Implements an instance of MATMUL for given argument types.
 template <bool IS_ALLOCATING, TypeCategory RCAT, int RKIND, typename XT,
     typename YT>
-inline static void DoMatmulTranspose(
+inline static RT_API_ATTRS void DoMatmulTranspose(
     std::conditional_t<IS_ALLOCATING, Descriptor, const Descriptor> &result,
     const Descriptor &x, const Descriptor &y, Terminator &terminator) {
   int xRank{x.rank()};
@@ -325,6 +340,8 @@ inline static void DoMatmulTranspose(
   }
 }
 
+RT_DIAG_POP
+
 // Maps the dynamic type information from the arguments' descriptors
 // to the right instantiation of DoMatmul() for valid combinations of
 // types.
@@ -333,8 +350,9 @@ template <bool IS_ALLOCATING> struct MatmulTranspose {
       std::conditional_t<IS_ALLOCATING, Descriptor, const Descriptor>;
   template <TypeCategory XCAT, int XKIND> struct MM1 {
     template <TypeCategory YCAT, int YKIND> struct MM2 {
-      void operator()(ResultDescriptor &result, const Descriptor &x,
-          const Descriptor &y, Terminator &terminator) const {
+      RT_API_ATTRS void operator()(ResultDescriptor &result,
+          const Descriptor &x, const Descriptor &y,
+          Terminator &terminator) const {
         if constexpr (constexpr auto resultType{
                           GetResultType(XCAT, XKIND, YCAT, YKIND)}) {
           if constexpr (Fortran::common::IsNumericTypeCategory(
@@ -349,13 +367,13 @@ template <bool IS_ALLOCATING> struct MatmulTranspose {
             static_cast<int>(XCAT), XKIND, static_cast<int>(YCAT), YKIND);
       }
     };
-    void operator()(ResultDescriptor &result, const Descriptor &x,
+    RT_API_ATTRS void operator()(ResultDescriptor &result, const Descriptor &x,
         const Descriptor &y, Terminator &terminator, TypeCategory yCat,
         int yKind) const {
       ApplyType<MM2, void>(yCat, yKind, terminator, result, x, y, terminator);
     }
   };
-  void operator()(ResultDescriptor &result, const Descriptor &x,
+  RT_API_ATTRS void operator()(ResultDescriptor &result, const Descriptor &x,
       const Descriptor &y, const char *sourceFile, int line) const {
     Terminator terminator{sourceFile, line};
     auto xCatKind{x.type().GetCategoryAndKind()};
@@ -369,13 +387,12 @@ template <bool IS_ALLOCATING> struct MatmulTranspose {
 
 namespace Fortran::runtime {
 extern "C" {
-void RTNAME(MatmulTranspose)(Descriptor &result, const Descriptor &x,
+void RTDEF(MatmulTranspose)(Descriptor &result, const Descriptor &x,
     const Descriptor &y, const char *sourceFile, int line) {
   MatmulTranspose<true>{}(result, x, y, sourceFile, line);
 }
-void RTNAME(MatmulTransposeDirect)(const Descriptor &result,
-    const Descriptor &x, const Descriptor &y, const char *sourceFile,
-    int line) {
+void RTDEF(MatmulTransposeDirect)(const Descriptor &result, const Descriptor &x,
+    const Descriptor &y, const char *sourceFile, int line) {
   MatmulTranspose<false>{}(result, x, y, sourceFile, line);
 }
 } // extern "C"
diff --git a/flang/runtime/matmul.cpp b/flang/runtime/matmul.cpp
index b46a94de01ceda..f06740a24b2f9a 100644
--- a/flang/runtime/matmul.cpp
+++ b/flang/runtime/matmul.cpp
@@ -29,14 +29,21 @@
 
 namespace Fortran::runtime {
 
+// Suppress the warnings about calling __host__-only std::complex operators,
+// defined in C++ STD header files, from __device__ code.
+RT_DIAG_PUSH
+RT_DIAG_DISABLE_CALL_HOST_FROM_DEVICE_WARN
+
 // General accumulator for any type and stride; this is not used for
 // contiguous numeric cases.
 template <TypeCategory RCAT, int RKIND, typename XT, typename YT>
 class Accumulator {
 public:
   using Result = AccumulationType<RCAT, RKIND>;
-  Accumulator(const Descriptor &x, const Descriptor &y) : x_{x}, y_{y} {}
-  void Accumulate(const SubscriptValue xAt[], const SubscriptValue yAt[]) {
+  RT_API_ATTRS Accumulator(const Descriptor &x, const Descriptor &y)
+      : x_{x}, y_{y} {}
+  RT_API_ATTRS void Accumulate(
+      const SubscriptValue xAt[], const SubscriptValue yAt[]) {
     if constexpr (RCAT == TypeCategory::Logical) {
       sum_ = sum_ ||
           (IsLogicalElementTrue(x_, xAt) && IsLogicalElementTrue(y_, yAt));
@@ -45,7 +52,7 @@ class Accumulator {
           static_cast<Result>(*y_.Element<YT>(yAt));
     }
   }
-  Result GetResult() const { return sum_; }
+  RT_API_ATTRS Result GetResult() const { return sum_; }
 
 private:
   const Descriptor &x_, &y_;
@@ -71,9 +78,10 @@ class Accumulator {
 //   2  RES(I,J) = RES(I,J) + X(I,K)*Y(K,J) ! loop-invariant last term
 template <TypeCategory RCAT, int RKIND, typename XT, typename YT,
     bool X_HAS_STRIDED_COLUMNS, bool Y_HAS_STRIDED_COLUMNS>
-inline void MatrixTimesMatrix(CppTypeFor<RCAT, RKIND> *RESTRICT product,
-    SubscriptValue rows, SubscriptValue cols, const XT *RESTRICT x,
-    const YT *RESTRICT y, SubscriptValue n, std::size_t xColumnByteStride = 0,
+inline RT_API_ATTRS void MatrixTimesMatrix(
+    CppTypeFor<RCAT, RKIND> *RESTRICT product, SubscriptValue rows,
+    SubscriptValue cols, const XT *RESTRICT x, const YT *RESTRICT y,
+    SubscriptValue n, std::size_t xColumnByteStride = 0,
     std::size_t yColumnByteStride = 0) {
   using ResultType = CppTypeFor<RCAT, RKIND>;
   std::memset(product, 0, rows * cols * sizeof *product);
@@ -102,11 +110,13 @@ inline void MatrixTimesMatrix(CppTypeFor<RCAT, RKIND> *RESTRICT product,
   }
 }
 
+RT_DIAG_POP
+
 template <TypeCategory RCAT, int RKIND, typename XT, typename YT>
-inline void MatrixTimesMatrixHelper(CppTypeFor<RCAT, RKIND> *RESTRICT product,
-    SubscriptValue rows, SubscriptValue cols, const XT *RESTRICT x,
-    const YT *RESTRICT y, SubscriptValue n,
-    std::optional<std::size_t> xColumnByteStride,
+inline RT_API_ATTRS void MatrixTimesMatrixHelper(
+    CppTypeFor<RCAT, RKIND> *RESTRICT product, SubscriptValue rows,
+    SubscriptValue cols, const XT *RESTRICT x, const YT *RESTRICT y,
+    SubscriptValue n, std::optional<std::size_t> xColumnByteStride,
     std::optional<std::size_t> yColumnByteStride) {
   if (!xColumnByteStride) {
     if (!yColumnByteStride) {
@@ -127,6 +137,9 @@ inline void MatrixTimesMatrixHelper(CppTypeFor<RCAT, RKIND> *RESTRICT product,
   }
 }
 
+RT_DIAG_PUSH
+RT_DIAG_DISABLE_CALL_HOST_FROM_DEVICE_WARN
+
 // Contiguous numeric matrix*vector multiplication
 //   matrix(rows,n) * column vector(n) -> column vector(rows)
 // Straightforward algorithm:
@@ -143,9 +156,10 @@ inline void MatrixTimesMatrixHelper(CppTypeFor<RCAT, RKIND> *RESTRICT product,
 //   2 RES(J) = RES(J) + X(J,K)*Y(K)
 template <TypeCategory RCAT, int RKIND, typename XT, typename YT,
     bool X_HAS_STRIDED_COLUMNS>
-inline void MatrixTimesVector(CppTypeFor<RCAT, RKIND> *RESTRICT product,
-    SubscriptValue rows, SubscriptValue n, const XT *RESTRICT x,
-    const YT *RESTRICT y, std::size_t xColumnByteStride = 0) {
+inline RT_API_ATTRS void MatrixTimesVector(
+    CppTypeFor<RCAT, RKIND> *RESTRICT product, SubscriptValue rows,
+    SubscriptValue n, const XT *RESTRICT x, const YT *RESTRICT y,
+    std::size_t xColumnByteStride = 0) {
   using ResultType = CppTypeFor<RCAT, RKIND>;
   std::memset(product, 0, rows * sizeof *product);
   [[maybe_unused]] const XT *RESTRICT xp0{x};
@@ -163,10 +177,13 @@ inline void MatrixTimesVector(CppTypeFor<RCAT, RKIND> *RESTRICT product,
   }
 }
 
+RT_DIAG_POP
+
 template <TypeCategory RCAT, int RKIND, typename XT, typename YT>
-inline void MatrixTimesVectorHelper(CppTypeFor<RCAT, RKIND> *RESTRICT product,
-    SubscriptValue rows, SubscriptValue n, const XT *RESTRICT x,
-    const YT *RESTRICT y, std::optional<std::size_t> xColumnByteStride) {
+inline RT_API_ATTRS void MatrixTimesVectorHelper(
+    CppTypeFor<RCAT, RKIND> *RESTRICT product, SubscriptValue rows,
+    SubscriptValue n, const XT *RESTRICT x, const YT *RESTRICT y,
+    std::optional<std::size_t> xColumnByteStride) {
   if (!xColumnByteStride) {
     MatrixTimesVector<RCAT, RKIND, XT, YT, false>(product, rows, n, x, y);
   } else {
@@ -175,6 +192,9 @@ inline void MatrixTimesVectorHelper(CppTypeFor<RCAT, RKIND> *RESTRICT product,
   }
 }
 
+RT_DIAG_PUSH
+RT_DIAG_DISABLE_CALL_HOST_FROM_DEVICE_WARN
+
 // Contiguous numeric vector*matrix multiplication
 //   row vector(n) * matrix(n,cols) -> row vector(cols)
 // Straightforward algorithm:
@@ -191,9 +211,10 @@ inline void MatrixTimesVectorHelper(CppTypeFor<RCAT, RKIND> *RESTRICT product,
 //   2 RES(J) = RES(J) + X(K)*Y(K,J)
 template <TypeCategory RCAT, int RKIND, typename XT, typename YT,
     bool Y_HAS_STRIDED_COLUMNS>
-inline void VectorTimesMatrix(CppTypeFor<RCAT, RKIND> *RESTRICT product,
-    SubscriptValue n, SubscriptValue cols, const XT *RESTRICT x,
-    const YT *RESTRICT y, std::size_t yColumnByteStride = 0) {
+inline RT_API_ATTRS void VectorTimesMatrix(
+    CppTypeFor<RCAT, RKIND> *RESTRICT product, SubscriptValue n,
+    SubscriptValue cols, const XT *RESTRICT x, const YT *RESTRICT y,
+    std::size_t yColumnByteStride = 0) {
   using ResultType = CppTypeFor<RCAT, RKIND>;
   std::memset(product, 0, cols * sizeof *product);
   for (SubscriptValue k{0}; k < n; ++k) {
@@ -212,11 +233,14 @@ inline void VectorTimesMatrix(CppTypeFor<RCAT, RKIND> *RESTRICT product,
   }
 }
 
+RT_DIAG_POP
+
 template <TypeCategory RCAT, int RKIND, typename XT, typename YT,
     bool SPARSE_COLUMNS = false>
-inline void VectorTimesMatrixHelper(CppTypeFor<RCAT, RKIND> *RESTRICT product,
-    SubscriptValue n, SubscriptValue cols, const XT *RESTRICT x,
-    const YT *RESTRICT y, std::optional<std::size_t> yColumnByteStride) {
+inline RT_API_ATTRS void VectorTimesMatrixHelper(
+    CppTypeFor<RCAT, RKIND> *RESTRICT product, SubscriptValue n,
+    SubscriptValue cols, const XT *RESTRICT x, const YT *RESTRICT y,
+    std::optional<std::size_t> yColumnByteStride) {
   if (!yColumnByteStride) {
     VectorTimesMatrix<RCAT, RKIND, XT, YT, false>(product, n, cols, x, y);
   } else {
@@ -225,10 +249,13 @@ inline void VectorTimesMatrixHelper(CppTypeFor<RCAT, RKIND> *RESTRICT product,
   }
 }
 
+RT_DIAG_PUSH
+RT_DIAG_DISABLE_CALL_HOST_FROM_DEVICE_WARN
+
 // Implements an instance of MATMUL for given argument types.
 template <bool IS_ALLOCATING, TypeCategory RCAT, int RKIND, typename XT,
     typename YT>
-static inline void DoMatmul(
+static inline RT_API_ATTRS void DoMatmul(
     std::conditional_t<IS_ALLOCATING, Descriptor, const Descriptor> &result,
     const Descriptor &x, const Descriptor &y, Terminator &terminator) {
   int xRank{x.rank()};
@@ -398,6 +425,8 @@ static inline void DoMatmul(
   }
 }
 
+RT_DIAG_POP
+
 // Maps the dynamic type information from the arguments' descriptors
 // to the right instantiation of DoMatmul() for valid combinations of
 // types.
@@ -406,8 +435,9 @@ template <bool IS_ALLOCATING> struct Matmul {
       std::conditional_t<IS_ALLOCATING, Descriptor, const Descriptor>;
   template <TypeCategory XCAT, int XKIND> struct MM1 {
     template <TypeCategory YCAT, int YKIND> struct MM2 {
-      void operator()(ResultDescriptor &result, const Descriptor &x,
-          const Descriptor &y, Terminator &terminator) const {
+      RT_API_ATTRS void operator()(ResultDescriptor &result,
+          const Descriptor &x, const Descriptor &y,
+          Terminator &terminator) const {
         if constexpr (constexpr auto resultType{
                           GetResultType(XCAT, XKIND, YCAT, YKIND)}) {
           if constexpr (common::IsNumericTypeCategory(resultType->first) ||
@@ -421,13 +451,13 @@ template <bool IS_ALLOCATING> struct Matmul {
             static_cast<int>(XCAT), XKIND, static_cast<int>(YCAT), YKIND);
       }
     };
-    void operator()(ResultDescriptor &result, const Descriptor &x,
+    RT_API_ATTRS void operator()(ResultDescriptor &result, const Descriptor &x,
         const Descriptor &y, Terminator &terminator, TypeCategory yCat,
         int yKind) const {
       ApplyType<MM2, void>(yCat, yKind, terminator, result, x, y, terminator);
     }
   };
-  void operator()(ResultDescriptor &result, const Descriptor &x,
+  RT_API_ATTRS void operator()(ResultDescriptor &result, const Descriptor &x,
       const Descriptor &y, const char *sourceFile, int line) const {
     Terminator terminator{sourceFile, line};
     auto xCatKind{x.type().GetCategoryAndKind()};
@@ -439,11 +469,11 @@ template <bool IS_ALLOCATING> struct Matmul {
 };
 
 extern "C" {
-void RTNAME(Matmul)(Descriptor &result, const Descriptor &x,
-    const Descriptor &y, const char *sourceFile, int line) {
+void RTDEF(Matmul)(Descriptor &result, const Descriptor &x, const Descriptor &y,
+    const char *sourceFile, int line) {
   Matmul<true>{}(result, x, y, sourceFile, line);
 }
-void RTNAME(MatmulDirect)(const Descriptor &result, const Descriptor &x,
+void RTDEF(MatmulDirect)(const Descriptor &result, const Descriptor &x,
     const Descriptor &y, const char *sourceFile, int line) {
   Matmul<false>{}(result, x, y, sourceFile, line);
 }
diff --git a/flang/runtime/numeric.cpp b/flang/runtime/numeric.cpp
index cd54e6b54a2e53..25e58e79dbba0e 100644
--- a/flang/runtime/numeric.cpp
+++ b/flang/runtime/numeric.cpp
@@ -17,8 +17,8 @@
 namespace Fortran::runtime {
 
 template <typename RES>
-inline RES getIntArgValue(const char *source, int line, void *arg, int kind,
-    std::int64_t defaultValue, int resKind) {
+inline RT_API_ATTRS RES getIntArgValue(const char *source, int line, void *arg,
+    int kind, std::int64_t defaultValue, int resKind) {
   RES res;
   if (!arg) {
     res = static_cast<RES>(defaultValue);
@@ -49,7 +49,8 @@ inline RES getIntArgValue(const char *source, int line, void *arg, int kind,
 }
 
 // NINT (16.9.141)
-template <typename RESULT, typename ARG> inline RESULT Nint(ARG x) {
+template <typename RESULT, typename ARG>
+inline RT_API_ATTRS RESULT Nint(ARG x) {
   if (x >= 0) {
     return std::trunc(x + ARG{0.5});
   } else {
@@ -58,15 +59,18 @@ template <typename RESULT, typename ARG> inline RESULT Nint(ARG x) {
 }
 
 // CEILING & FLOOR (16.9.43, .79)
-template <typename RESULT, typename ARG> inline RESULT Ceiling(ARG x) {
+template <typename RESULT, typename ARG>
+inline RT_API_ATTRS RESULT Ceiling(ARG x) {
   return std::ceil(x);
 }
-template <typename RESULT, typename ARG> inline RESULT Floor(ARG x) {
+template <typename RESULT, typename ARG>
+inline RT_API_ATTRS RESULT Floor(ARG x) {
   return std::floor(x);
 }
 
 // EXPONENT (16.9.75)
-template <typename RESULT, typename ARG> inline RESULT Exponent(ARG x) {
+template <typename RESULT, typename ARG>
+inline RT_API_ATTRS RESULT Exponent(ARG x) {
   if (std::isinf(x) || std::isnan(x)) {
     return std::numeric_limits<RESULT>::max(); // +/-Inf, NaN -> HUGE(0)
   } else if (x == 0) {
@@ -76,8 +80,13 @@ template <typename RESULT, typename ARG> inline RESULT Exponent(ARG x) {
   }
 }
 
+// Suppress the warnings about calling __host__-only std::frexp,
+// defined in C++ STD header files, from __device__ code.
+RT_DIAG_PUSH
+RT_DIAG_DISABLE_CALL_HOST_FROM_DEVICE_WARN
+
 // FRACTION (16.9.80)
-template <typename T> inline T Fraction(T x) {
+template <typename T> inline RT_API_ATTRS T Fraction(T x) {
   if (std::isnan(x)) {
     return x; // NaN -> same NaN
   } else if (std::isinf(x)) {
@@ -90,9 +99,11 @@ template <typename T> inline T Fraction(T x) {
   }
 }
 
+RT_DIAG_POP
+
 // MOD & MODULO (16.9.135, .136)
 template <bool IS_MODULO, typename T>
-inline T IntMod(T x, T p, const char *sourceFile, int sourceLine) {
+inline RT_API_ATTRS T IntMod(T x, T p, const char *sourceFile, int sourceLine) {
   if (p == 0) {
     Terminator{sourceFile, sourceLine}.Crash(
         IS_MODULO ? "MODULO with P==0" : "MOD with P==0");
@@ -104,7 +115,8 @@ inline T IntMod(T x, T p, const char *sourceFile, int sourceLine) {
   return mod;
 }
 template <bool IS_MODULO, typename T>
-inline T RealMod(T a, T p, const char *sourceFile, int sourceLine) {
+inline RT_API_ATTRS T RealMod(
+    T a, T p, const char *sourceFile, int sourceLine) {
   if (p == 0) {
     Terminator{sourceFile, sourceLine}.Crash(
         IS_MODULO ? "MODULO with P==0" : "MOD with P==0");
@@ -120,7 +132,7 @@ inline T RealMod(T a, T p, const char *sourceFile, int sourceLine) {
 }
 
 // RRSPACING (16.9.164)
-template <int PREC, typename T> inline T RRSpacing(T x) {
+template <int PREC, typename T> inline RT_API_ATTRS T RRSpacing(T x) {
   if (std::isnan(x)) {
     return x; // NaN -> same NaN
   } else if (std::isinf(x)) {
@@ -133,7 +145,7 @@ template <int PREC, typename T> inline T RRSpacing(T x) {
 }
 
 // SCALE (16.9.166)
-template <typename T> inline T Scale(T x, std::int64_t p) {
+template <typename T> inline RT_API_ATTRS T Scale(T x, std::int64_t p) {
   auto ip{static_cast<int>(p)};
   if (ip != p) {
     ip = p < 0 ? std::numeric_limits<int>::min()
@@ -144,7 +156,7 @@ template <typename T> inline T Scale(T x, std::int64_t p) {
 
 // SELECTED_INT_KIND (16.9.169)
 template <typename T>
-inline CppTypeFor<TypeCategory::Integer, 4> SelectedIntKind(T x) {
+inline RT_API_ATTRS CppTypeFor<TypeCategory::Integer, 4> SelectedIntKind(T x) {
   if (x <= 2) {
     return 1;
   } else if (x <= 4) {
@@ -163,7 +175,8 @@ inline CppTypeFor<TypeCategory::Integer, 4> SelectedIntKind(T x) {
 
 // SELECTED_REAL_KIND (16.9.170)
 template <typename P, typename R, typename D>
-inline CppTypeFor<TypeCategory::Integer, 4> SelectedRealKind(P p, R r, D d) {
+inline RT_API_ATTRS CppTypeFor<TypeCategory::Integer, 4> SelectedRealKind(
+    P p, R r, D d) {
   if (d != 2) {
     return -5;
   }
@@ -210,7 +223,7 @@ inline CppTypeFor<TypeCategory::Integer, 4> SelectedRealKind(P p, R r, D d) {
 }
 
 // SET_EXPONENT (16.9.171)
-template <typename T> inline T SetExponent(T x, std::int64_t p) {
+template <typename T> inline RT_API_ATTRS T SetExponent(T x, std::int64_t p) {
   if (std::isnan(x)) {
     return x; // NaN -> same NaN
   } else if (std::isinf(x)) {
@@ -229,7 +242,7 @@ template <typename T> inline T SetExponent(T x, std::int64_t p) {
 }
 
 // SPACING (16.9.180)
-template <int PREC, typename T> inline T Spacing(T x) {
+template <int PREC, typename T> inline RT_API_ATTRS T Spacing(T x) {
   if (std::isnan(x)) {
     return x; // NaN -> same NaN
   } else if (std::isinf(x)) {
@@ -246,7 +259,8 @@ template <int PREC, typename T> inline T Spacing(T x) {
 }
 
 // NEAREST (16.9.139)
-template <int PREC, typename T> inline T Nearest(T x, bool positive) {
+template <int PREC, typename T>
+inline RT_API_ATTRS T Nearest(T x, bool positive) {
   auto spacing{Spacing<PREC>(x)};
   if (x == 0) {
     auto least{std::numeric_limits<T>::denorm_min()};
@@ -257,7 +271,8 @@ template <int PREC, typename T> inline T Nearest(T x, bool positive) {
 }
 
 // Exponentiation operator for (Real ** Integer) cases (10.1.5.2.1).
-template <typename BTy, typename ETy> BTy FPowI(BTy base, ETy exp) {
+template <typename BTy, typename ETy>
+RT_API_ATTRS BTy FPowI(BTy base, ETy exp) {
   if (exp == ETy{0})
     return BTy{1};
   bool isNegativePower{exp < ETy{0}};
@@ -290,564 +305,564 @@ template <typename BTy, typename ETy> BTy FPowI(BTy base, ETy exp) {
 
 extern "C" {
 
-CppTypeFor<TypeCategory::Integer, 1> RTNAME(Ceiling4_1)(
+CppTypeFor<TypeCategory::Integer, 1> RTDEF(Ceiling4_1)(
     CppTypeFor<TypeCategory::Real, 4> x) {
   return Ceiling<CppTypeFor<TypeCategory::Integer, 1>>(x);
 }
-CppTypeFor<TypeCategory::Integer, 2> RTNAME(Ceiling4_2)(
+CppTypeFor<TypeCategory::Integer, 2> RTDEF(Ceiling4_2)(
     CppTypeFor<TypeCategory::Real, 4> x) {
   return Ceiling<CppTypeFor<TypeCategory::Integer, 2>>(x);
 }
-CppTypeFor<TypeCategory::Integer, 4> RTNAME(Ceiling4_4)(
+CppTypeFor<TypeCategory::Integer, 4> RTDEF(Ceiling4_4)(
     CppTypeFor<TypeCategory::Real, 4> x) {
   return Ceiling<CppTypeFor<TypeCategory::Integer, 4>>(x);
 }
-CppTypeFor<TypeCategory::Integer, 8> RTNAME(Ceiling4_8)(
+CppTypeFor<TypeCategory::Integer, 8> RTDEF(Ceiling4_8)(
     CppTypeFor<TypeCategory::Real, 4> x) {
   return Ceiling<CppTypeFor<TypeCategory::Integer, 8>>(x);
 }
 #if defined __SIZEOF_INT128__ && !AVOID_NATIVE_UINT128_T
-CppTypeFor<TypeCategory::Integer, 16> RTNAME(Ceiling4_16)(
+CppTypeFor<TypeCategory::Integer, 16> RTDEF(Ceiling4_16)(
     CppTypeFor<TypeCategory::Real, 4> x) {
   return Ceiling<CppTypeFor<TypeCategory::Integer, 16>>(x);
 }
 #endif
-CppTypeFor<TypeCategory::Integer, 1> RTNAME(Ceiling8_1)(
+CppTypeFor<TypeCategory::Integer, 1> RTDEF(Ceiling8_1)(
     CppTypeFor<TypeCategory::Real, 8> x) {
   return Ceiling<CppTypeFor<TypeCategory::Integer, 1>>(x);
 }
-CppTypeFor<TypeCategory::Integer, 2> RTNAME(Ceiling8_2)(
+CppTypeFor<TypeCategory::Integer, 2> RTDEF(Ceiling8_2)(
     CppTypeFor<TypeCategory::Real, 8> x) {
   return Ceiling<CppTypeFor<TypeCategory::Integer, 2>>(x);
 }
-CppTypeFor<TypeCategory::Integer, 4> RTNAME(Ceiling8_4)(
+CppTypeFor<TypeCategory::Integer, 4> RTDEF(Ceiling8_4)(
     CppTypeFor<TypeCategory::Real, 8> x) {
   return Ceiling<CppTypeFor<TypeCategory::Integer, 4>>(x);
 }
-CppTypeFor<TypeCategory::Integer, 8> RTNAME(Ceiling8_8)(
+CppTypeFor<TypeCategory::Integer, 8> RTDEF(Ceiling8_8)(
     CppTypeFor<TypeCategory::Real, 8> x) {
   return Ceiling<CppTypeFor<TypeCategory::Integer, 8>>(x);
 }
 #if defined __SIZEOF_INT128__ && !AVOID_NATIVE_UINT128_T
-CppTypeFor<TypeCategory::Integer, 16> RTNAME(Ceiling8_16)(
+CppTypeFor<TypeCategory::Integer, 16> RTDEF(Ceiling8_16)(
     CppTypeFor<TypeCategory::Real, 8> x) {
   return Ceiling<CppTypeFor<TypeCategory::Integer, 16>>(x);
 }
 #endif
 #if LDBL_MANT_DIG == 64
-CppTypeFor<TypeCategory::Integer, 1> RTNAME(Ceiling10_1)(
+CppTypeFor<TypeCategory::Integer, 1> RTDEF(Ceiling10_1)(
     CppTypeFor<TypeCategory::Real, 10> x) {
   return Ceiling<CppTypeFor<TypeCategory::Integer, 1>>(x);
 }
-CppTypeFor<TypeCategory::Integer, 2> RTNAME(Ceiling10_2)(
+CppTypeFor<TypeCategory::Integer, 2> RTDEF(Ceiling10_2)(
     CppTypeFor<TypeCategory::Real, 10> x) {
   return Ceiling<CppTypeFor<TypeCategory::Integer, 2>>(x);
 }
-CppTypeFor<TypeCategory::Integer, 4> RTNAME(Ceiling10_4)(
+CppTypeFor<TypeCategory::Integer, 4> RTDEF(Ceiling10_4)(
     CppTypeFor<TypeCategory::Real, 10> x) {
   return Ceiling<CppTypeFor<TypeCategory::Integer, 4>>(x);
 }
-CppTypeFor<TypeCategory::Integer, 8> RTNAME(Ceiling10_8)(
+CppTypeFor<TypeCategory::Integer, 8> RTDEF(Ceiling10_8)(
     CppTypeFor<TypeCategory::Real, 10> x) {
   return Ceiling<CppTypeFor<TypeCategory::Integer, 8>>(x);
 }
 #if defined __SIZEOF_INT128__ && !AVOID_NATIVE_UINT128_T
-CppTypeFor<TypeCategory::Integer, 16> RTNAME(Ceiling10_16)(
+CppTypeFor<TypeCategory::Integer, 16> RTDEF(Ceiling10_16)(
     CppTypeFor<TypeCategory::Real, 10> x) {
   return Ceiling<CppTypeFor<TypeCategory::Integer, 16>>(x);
 }
 #endif
 #elif LDBL_MANT_DIG == 113
-CppTypeFor<TypeCategory::Integer, 1> RTNAME(Ceiling16_1)(
+CppTypeFor<TypeCategory::Integer, 1> RTDEF(Ceiling16_1)(
     CppTypeFor<TypeCategory::Real, 16> x) {
   return Ceiling<CppTypeFor<TypeCategory::Integer, 1>>(x);
 }
-CppTypeFor<TypeCategory::Integer, 2> RTNAME(Ceiling16_2)(
+CppTypeFor<TypeCategory::Integer, 2> RTDEF(Ceiling16_2)(
     CppTypeFor<TypeCategory::Real, 16> x) {
   return Ceiling<CppTypeFor<TypeCategory::Integer, 2>>(x);
 }
-CppTypeFor<TypeCategory::Integer, 4> RTNAME(Ceiling16_4)(
+CppTypeFor<TypeCategory::Integer, 4> RTDEF(Ceiling16_4)(
     CppTypeFor<TypeCategory::Real, 16> x) {
   return Ceiling<CppTypeFor<TypeCategory::Integer, 4>>(x);
 }
-CppTypeFor<TypeCategory::Integer, 8> RTNAME(Ceiling16_8)(
+CppTypeFor<TypeCategory::Integer, 8> RTDEF(Ceiling16_8)(
     CppTypeFor<TypeCategory::Real, 16> x) {
   return Ceiling<CppTypeFor<TypeCategory::Integer, 8>>(x);
 }
 #if defined __SIZEOF_INT128__ && !AVOID_NATIVE_UINT128_T
-CppTypeFor<TypeCategory::Integer, 16> RTNAME(Ceiling16_16)(
+CppTypeFor<TypeCategory::Integer, 16> RTDEF(Ceiling16_16)(
     CppTypeFor<TypeCategory::Real, 16> x) {
   return Ceiling<CppTypeFor<TypeCategory::Integer, 16>>(x);
 }
 #endif
 #endif
 
-CppTypeFor<TypeCategory::Integer, 4> RTNAME(Exponent4_4)(
+CppTypeFor<TypeCategory::Integer, 4> RTDEF(Exponent4_4)(
     CppTypeFor<TypeCategory::Real, 4> x) {
   return Exponent<CppTypeFor<TypeCategory::Integer, 4>>(x);
 }
-CppTypeFor<TypeCategory::Integer, 8> RTNAME(Exponent4_8)(
+CppTypeFor<TypeCategory::Integer, 8> RTDEF(Exponent4_8)(
     CppTypeFor<TypeCategory::Real, 4> x) {
   return Exponent<CppTypeFor<TypeCategory::Integer, 8>>(x);
 }
-CppTypeFor<TypeCategory::Integer, 4> RTNAME(Exponent8_4)(
+CppTypeFor<TypeCategory::Integer, 4> RTDEF(Exponent8_4)(
     CppTypeFor<TypeCategory::Real, 8> x) {
   return Exponent<CppTypeFor<TypeCategory::Integer, 4>>(x);
 }
-CppTypeFor<TypeCategory::Integer, 8> RTNAME(Exponent8_8)(
+CppTypeFor<TypeCategory::Integer, 8> RTDEF(Exponent8_8)(
     CppTypeFor<TypeCategory::Real, 8> x) {
   return Exponent<CppTypeFor<TypeCategory::Integer, 8>>(x);
 }
 #if LDBL_MANT_DIG == 64
-CppTypeFor<TypeCategory::Integer, 4> RTNAME(Exponent10_4)(
+CppTypeFor<TypeCategory::Integer, 4> RTDEF(Exponent10_4)(
     CppTypeFor<TypeCategory::Real, 10> x) {
   return Exponent<CppTypeFor<TypeCategory::Integer, 4>>(x);
 }
-CppTypeFor<TypeCategory::Integer, 8> RTNAME(Exponent10_8)(
+CppTypeFor<TypeCategory::Integer, 8> RTDEF(Exponent10_8)(
     CppTypeFor<TypeCategory::Real, 10> x) {
   return Exponent<CppTypeFor<TypeCategory::Integer, 8>>(x);
 }
 #elif LDBL_MANT_DIG == 113
-CppTypeFor<TypeCategory::Integer, 4> RTNAME(Exponent16_4)(
+CppTypeFor<TypeCategory::Integer, 4> RTDEF(Exponent16_4)(
     CppTypeFor<TypeCategory::Real, 16> x) {
   return Exponent<CppTypeFor<TypeCategory::Integer, 4>>(x);
 }
-CppTypeFor<TypeCategory::Integer, 8> RTNAME(Exponent16_8)(
+CppTypeFor<TypeCategory::Integer, 8> RTDEF(Exponent16_8)(
     CppTypeFor<TypeCategory::Real, 16> x) {
   return Exponent<CppTypeFor<TypeCategory::Integer, 8>>(x);
 }
 #endif
 
-CppTypeFor<TypeCategory::Integer, 1> RTNAME(Floor4_1)(
+CppTypeFor<TypeCategory::Integer, 1> RTDEF(Floor4_1)(
     CppTypeFor<TypeCategory::Real, 4> x) {
   return Floor<CppTypeFor<TypeCategory::Integer, 1>>(x);
 }
-CppTypeFor<TypeCategory::Integer, 2> RTNAME(Floor4_2)(
+CppTypeFor<TypeCategory::Integer, 2> RTDEF(Floor4_2)(
     CppTypeFor<TypeCategory::Real, 4> x) {
   return Floor<CppTypeFor<TypeCategory::Integer, 2>>(x);
 }
-CppTypeFor<TypeCategory::Integer, 4> RTNAME(Floor4_4)(
+CppTypeFor<TypeCategory::Integer, 4> RTDEF(Floor4_4)(
     CppTypeFor<TypeCategory::Real, 4> x) {
   return Floor<CppTypeFor<TypeCategory::Integer, 4>>(x);
 }
-CppTypeFor<TypeCategory::Integer, 8> RTNAME(Floor4_8)(
+CppTypeFor<TypeCategory::Integer, 8> RTDEF(Floor4_8)(
     CppTypeFor<TypeCategory::Real, 4> x) {
   return Floor<CppTypeFor<TypeCategory::Integer, 8>>(x);
 }
 #if defined __SIZEOF_INT128__ && !AVOID_NATIVE_UINT128_T
-CppTypeFor<TypeCategory::Integer, 16> RTNAME(Floor4_16)(
+CppTypeFor<TypeCategory::Integer, 16> RTDEF(Floor4_16)(
     CppTypeFor<TypeCategory::Real, 4> x) {
   return Floor<CppTypeFor<TypeCategory::Integer, 16>>(x);
 }
 #endif
-CppTypeFor<TypeCategory::Integer, 1> RTNAME(Floor8_1)(
+CppTypeFor<TypeCategory::Integer, 1> RTDEF(Floor8_1)(
     CppTypeFor<TypeCategory::Real, 8> x) {
   return Floor<CppTypeFor<TypeCategory::Integer, 1>>(x);
 }
-CppTypeFor<TypeCategory::Integer, 2> RTNAME(Floor8_2)(
+CppTypeFor<TypeCategory::Integer, 2> RTDEF(Floor8_2)(
     CppTypeFor<TypeCategory::Real, 8> x) {
   return Floor<CppTypeFor<TypeCategory::Integer, 2>>(x);
 }
-CppTypeFor<TypeCategory::Integer, 4> RTNAME(Floor8_4)(
+CppTypeFor<TypeCategory::Integer, 4> RTDEF(Floor8_4)(
     CppTypeFor<TypeCategory::Real, 8> x) {
   return Floor<CppTypeFor<TypeCategory::Integer, 4>>(x);
 }
-CppTypeFor<TypeCategory::Integer, 8> RTNAME(Floor8_8)(
+CppTypeFor<TypeCategory::Integer, 8> RTDEF(Floor8_8)(
     CppTypeFor<TypeCategory::Real, 8> x) {
   return Floor<CppTypeFor<TypeCategory::Integer, 8>>(x);
 }
 #if defined __SIZEOF_INT128__ && !AVOID_NATIVE_UINT128_T
-CppTypeFor<TypeCategory::Integer, 16> RTNAME(Floor8_16)(
+CppTypeFor<TypeCategory::Integer, 16> RTDEF(Floor8_16)(
     CppTypeFor<TypeCategory::Real, 8> x) {
   return Floor<CppTypeFor<TypeCategory::Integer, 16>>(x);
 }
 #endif
 #if LDBL_MANT_DIG == 64
-CppTypeFor<TypeCategory::Integer, 1> RTNAME(Floor10_1)(
+CppTypeFor<TypeCategory::Integer, 1> RTDEF(Floor10_1)(
     CppTypeFor<TypeCategory::Real, 10> x) {
   return Floor<CppTypeFor<TypeCategory::Integer, 1>>(x);
 }
-CppTypeFor<TypeCategory::Integer, 2> RTNAME(Floor10_2)(
+CppTypeFor<TypeCategory::Integer, 2> RTDEF(Floor10_2)(
     CppTypeFor<TypeCategory::Real, 10> x) {
   return Floor<CppTypeFor<TypeCategory::Integer, 2>>(x);
 }
-CppTypeFor<TypeCategory::Integer, 4> RTNAME(Floor10_4)(
+CppTypeFor<TypeCategory::Integer, 4> RTDEF(Floor10_4)(
     CppTypeFor<TypeCategory::Real, 10> x) {
   return Floor<CppTypeFor<TypeCategory::Integer, 4>>(x);
 }
-CppTypeFor<TypeCategory::Integer, 8> RTNAME(Floor10_8)(
+CppTypeFor<TypeCategory::Integer, 8> RTDEF(Floor10_8)(
     CppTypeFor<TypeCategory::Real, 10> x) {
   return Floor<CppTypeFor<TypeCategory::Integer, 8>>(x);
 }
 #if defined __SIZEOF_INT128__ && !AVOID_NATIVE_UINT128_T
-CppTypeFor<TypeCategory::Integer, 16> RTNAME(Floor10_16)(
+CppTypeFor<TypeCategory::Integer, 16> RTDEF(Floor10_16)(
     CppTypeFor<TypeCategory::Real, 10> x) {
   return Floor<CppTypeFor<TypeCategory::Integer, 16>>(x);
 }
 #endif
 #elif LDBL_MANT_DIG == 113
-CppTypeFor<TypeCategory::Integer, 1> RTNAME(Floor16_1)(
+CppTypeFor<TypeCategory::Integer, 1> RTDEF(Floor16_1)(
     CppTypeFor<TypeCategory::Real, 16> x) {
   return Floor<CppTypeFor<TypeCategory::Integer, 1>>(x);
 }
-CppTypeFor<TypeCategory::Integer, 2> RTNAME(Floor16_2)(
+CppTypeFor<TypeCategory::Integer, 2> RTDEF(Floor16_2)(
     CppTypeFor<TypeCategory::Real, 16> x) {
   return Floor<CppTypeFor<TypeCategory::Integer, 2>>(x);
 }
-CppTypeFor<TypeCategory::Integer, 4> RTNAME(Floor16_4)(
+CppTypeFor<TypeCategory::Integer, 4> RTDEF(Floor16_4)(
     CppTypeFor<TypeCategory::Real, 16> x) {
   return Floor<CppTypeFor<TypeCategory::Integer, 4>>(x);
 }
-CppTypeFor<TypeCategory::Integer, 8> RTNAME(Floor16_8)(
+CppTypeFor<TypeCategory::Integer, 8> RTDEF(Floor16_8)(
     CppTypeFor<TypeCategory::Real, 16> x) {
   return Floor<CppTypeFor<TypeCategory::Integer, 8>>(x);
 }
 #if defined __SIZEOF_INT128__ && !AVOID_NATIVE_UINT128_T
-CppTypeFor<TypeCategory::Integer, 16> RTNAME(Floor16_16)(
+CppTypeFor<TypeCategory::Integer, 16> RTDEF(Floor16_16)(
     CppTypeFor<TypeCategory::Real, 16> x) {
   return Floor<CppTypeFor<TypeCategory::Integer, 16>>(x);
 }
 #endif
 #endif
 
-CppTypeFor<TypeCategory::Real, 4> RTNAME(Fraction4)(
+CppTypeFor<TypeCategory::Real, 4> RTDEF(Fraction4)(
     CppTypeFor<TypeCategory::Real, 4> x) {
   return Fraction(x);
 }
-CppTypeFor<TypeCategory::Real, 8> RTNAME(Fraction8)(
+CppTypeFor<TypeCategory::Real, 8> RTDEF(Fraction8)(
     CppTypeFor<TypeCategory::Real, 8> x) {
   return Fraction(x);
 }
 #if LDBL_MANT_DIG == 64
-CppTypeFor<TypeCategory::Real, 10> RTNAME(Fraction10)(
+CppTypeFor<TypeCategory::Real, 10> RTDEF(Fraction10)(
     CppTypeFor<TypeCategory::Real, 10> x) {
   return Fraction(x);
 }
 #elif LDBL_MANT_DIG == 113
-CppTypeFor<TypeCategory::Real, 16> RTNAME(Fraction16)(
+CppTypeFor<TypeCategory::Real, 16> RTDEF(Fraction16)(
     CppTypeFor<TypeCategory::Real, 16> x) {
   return Fraction(x);
 }
 #endif
 
-bool RTNAME(IsFinite4)(CppTypeFor<TypeCategory::Real, 4> x) {
+bool RTDEF(IsFinite4)(CppTypeFor<TypeCategory::Real, 4> x) {
   return std::isfinite(x);
 }
-bool RTNAME(IsFinite8)(CppTypeFor<TypeCategory::Real, 8> x) {
+bool RTDEF(IsFinite8)(CppTypeFor<TypeCategory::Real, 8> x) {
   return std::isfinite(x);
 }
 #if LDBL_MANT_DIG == 64
-bool RTNAME(IsFinite10)(CppTypeFor<TypeCategory::Real, 10> x) {
+bool RTDEF(IsFinite10)(CppTypeFor<TypeCategory::Real, 10> x) {
   return std::isfinite(x);
 }
 #elif LDBL_MANT_DIG == 113
-bool RTNAME(IsFinite16)(CppTypeFor<TypeCategory::Real, 16> x) {
+bool RTDEF(IsFinite16)(CppTypeFor<TypeCategory::Real, 16> x) {
   return std::isfinite(x);
 }
 #endif
 
-bool RTNAME(IsNaN4)(CppTypeFor<TypeCategory::Real, 4> x) {
+bool RTDEF(IsNaN4)(CppTypeFor<TypeCategory::Real, 4> x) {
   return std::isnan(x);
 }
-bool RTNAME(IsNaN8)(CppTypeFor<TypeCategory::Real, 8> x) {
+bool RTDEF(IsNaN8)(CppTypeFor<TypeCategory::Real, 8> x) {
   return std::isnan(x);
 }
 #if LDBL_MANT_DIG == 64
-bool RTNAME(IsNaN10)(CppTypeFor<TypeCategory::Real, 10> x) {
+bool RTDEF(IsNaN10)(CppTypeFor<TypeCategory::Real, 10> x) {
   return std::isnan(x);
 }
 #elif LDBL_MANT_DIG == 113
-bool RTNAME(IsNaN16)(CppTypeFor<TypeCategory::Real, 16> x) {
+bool RTDEF(IsNaN16)(CppTypeFor<TypeCategory::Real, 16> x) {
   return std::isnan(x);
 }
 #endif
 
-CppTypeFor<TypeCategory::Integer, 1> RTNAME(ModInteger1)(
+CppTypeFor<TypeCategory::Integer, 1> RTDEF(ModInteger1)(
     CppTypeFor<TypeCategory::Integer, 1> x,
     CppTypeFor<TypeCategory::Integer, 1> p, const char *sourceFile,
     int sourceLine) {
   return IntMod<false>(x, p, sourceFile, sourceLine);
 }
-CppTypeFor<TypeCategory::Integer, 2> RTNAME(ModInteger2)(
+CppTypeFor<TypeCategory::Integer, 2> RTDEF(ModInteger2)(
     CppTypeFor<TypeCategory::Integer, 2> x,
     CppTypeFor<TypeCategory::Integer, 2> p, const char *sourceFile,
     int sourceLine) {
   return IntMod<false>(x, p, sourceFile, sourceLine);
 }
-CppTypeFor<TypeCategory::Integer, 4> RTNAME(ModInteger4)(
+CppTypeFor<TypeCategory::Integer, 4> RTDEF(ModInteger4)(
     CppTypeFor<TypeCategory::Integer, 4> x,
     CppTypeFor<TypeCategory::Integer, 4> p, const char *sourceFile,
     int sourceLine) {
   return IntMod<false>(x, p, sourceFile, sourceLine);
 }
-CppTypeFor<TypeCategory::Integer, 8> RTNAME(ModInteger8)(
+CppTypeFor<TypeCategory::Integer, 8> RTDEF(ModInteger8)(
     CppTypeFor<TypeCategory::Integer, 8> x,
     CppTypeFor<TypeCategory::Integer, 8> p, const char *sourceFile,
     int sourceLine) {
   return IntMod<false>(x, p, sourceFile, sourceLine);
 }
 #ifdef __SIZEOF_INT128__
-CppTypeFor<TypeCategory::Integer, 16> RTNAME(ModInteger16)(
+CppTypeFor<TypeCategory::Integer, 16> RTDEF(ModInteger16)(
     CppTypeFor<TypeCategory::Integer, 16> x,
     CppTypeFor<TypeCategory::Integer, 16> p, const char *sourceFile,
     int sourceLine) {
   return IntMod<false>(x, p, sourceFile, sourceLine);
 }
 #endif
-CppTypeFor<TypeCategory::Real, 4> RTNAME(ModReal4)(
+CppTypeFor<TypeCategory::Real, 4> RTDEF(ModReal4)(
     CppTypeFor<TypeCategory::Real, 4> x, CppTypeFor<TypeCategory::Real, 4> p,
     const char *sourceFile, int sourceLine) {
   return RealMod<false>(x, p, sourceFile, sourceLine);
 }
-CppTypeFor<TypeCategory::Real, 8> RTNAME(ModReal8)(
+CppTypeFor<TypeCategory::Real, 8> RTDEF(ModReal8)(
     CppTypeFor<TypeCategory::Real, 8> x, CppTypeFor<TypeCategory::Real, 8> p,
     const char *sourceFile, int sourceLine) {
   return RealMod<false>(x, p, sourceFile, sourceLine);
 }
 #if LDBL_MANT_DIG == 64
-CppTypeFor<TypeCategory::Real, 10> RTNAME(ModReal10)(
+CppTypeFor<TypeCategory::Real, 10> RTDEF(ModReal10)(
     CppTypeFor<TypeCategory::Real, 10> x, CppTypeFor<TypeCategory::Real, 10> p,
     const char *sourceFile, int sourceLine) {
   return RealMod<false>(x, p, sourceFile, sourceLine);
 }
 #elif LDBL_MANT_DIG == 113
-CppTypeFor<TypeCategory::Real, 16> RTNAME(ModReal16)(
+CppTypeFor<TypeCategory::Real, 16> RTDEF(ModReal16)(
     CppTypeFor<TypeCategory::Real, 16> x, CppTypeFor<TypeCategory::Real, 16> p,
     const char *sourceFile, int sourceLine) {
   return RealMod<false>(x, p, sourceFile, sourceLine);
 }
 #endif
 
-CppTypeFor<TypeCategory::Integer, 1> RTNAME(ModuloInteger1)(
+CppTypeFor<TypeCategory::Integer, 1> RTDEF(ModuloInteger1)(
     CppTypeFor<TypeCategory::Integer, 1> x,
     CppTypeFor<TypeCategory::Integer, 1> p, const char *sourceFile,
     int sourceLine) {
   return IntMod<true>(x, p, sourceFile, sourceLine);
 }
-CppTypeFor<TypeCategory::Integer, 2> RTNAME(ModuloInteger2)(
+CppTypeFor<TypeCategory::Integer, 2> RTDEF(ModuloInteger2)(
     CppTypeFor<TypeCategory::Integer, 2> x,
     CppTypeFor<TypeCategory::Integer, 2> p, const char *sourceFile,
     int sourceLine) {
   return IntMod<true>(x, p, sourceFile, sourceLine);
 }
-CppTypeFor<TypeCategory::Integer, 4> RTNAME(ModuloInteger4)(
+CppTypeFor<TypeCategory::Integer, 4> RTDEF(ModuloInteger4)(
     CppTypeFor<TypeCategory::Integer, 4> x,
     CppTypeFor<TypeCategory::Integer, 4> p, const char *sourceFile,
     int sourceLine) {
   return IntMod<true>(x, p, sourceFile, sourceLine);
 }
-CppTypeFor<TypeCategory::Integer, 8> RTNAME(ModuloInteger8)(
+CppTypeFor<TypeCategory::Integer, 8> RTDEF(ModuloInteger8)(
     CppTypeFor<TypeCategory::Integer, 8> x,
     CppTypeFor<TypeCategory::Integer, 8> p, const char *sourceFile,
     int sourceLine) {
   return IntMod<true>(x, p, sourceFile, sourceLine);
 }
 #ifdef __SIZEOF_INT128__
-CppTypeFor<TypeCategory::Integer, 16> RTNAME(ModuloInteger16)(
+CppTypeFor<TypeCategory::Integer, 16> RTDEF(ModuloInteger16)(
     CppTypeFor<TypeCategory::Integer, 16> x,
     CppTypeFor<TypeCategory::Integer, 16> p, const char *sourceFile,
     int sourceLine) {
   return IntMod<true>(x, p, sourceFile, sourceLine);
 }
 #endif
-CppTypeFor<TypeCategory::Real, 4> RTNAME(ModuloReal4)(
+CppTypeFor<TypeCategory::Real, 4> RTDEF(ModuloReal4)(
     CppTypeFor<TypeCategory::Real, 4> x, CppTypeFor<TypeCategory::Real, 4> p,
     const char *sourceFile, int sourceLine) {
   return RealMod<true>(x, p, sourceFile, sourceLine);
 }
-CppTypeFor<TypeCategory::Real, 8> RTNAME(ModuloReal8)(
+CppTypeFor<TypeCategory::Real, 8> RTDEF(ModuloReal8)(
     CppTypeFor<TypeCategory::Real, 8> x, CppTypeFor<TypeCategory::Real, 8> p,
     const char *sourceFile, int sourceLine) {
   return RealMod<true>(x, p, sourceFile, sourceLine);
 }
 #if LDBL_MANT_DIG == 64
-CppTypeFor<TypeCategory::Real, 10> RTNAME(ModuloReal10)(
+CppTypeFor<TypeCategory::Real, 10> RTDEF(ModuloReal10)(
     CppTypeFor<TypeCategory::Real, 10> x, CppTypeFor<TypeCategory::Real, 10> p,
     const char *sourceFile, int sourceLine) {
   return RealMod<true>(x, p, sourceFile, sourceLine);
 }
 #elif LDBL_MANT_DIG == 113
-CppTypeFor<TypeCategory::Real, 16> RTNAME(ModuloReal16)(
+CppTypeFor<TypeCategory::Real, 16> RTDEF(ModuloReal16)(
     CppTypeFor<TypeCategory::Real, 16> x, CppTypeFor<TypeCategory::Real, 16> p,
     const char *sourceFile, int sourceLine) {
   return RealMod<true>(x, p, sourceFile, sourceLine);
 }
 #endif
 
-CppTypeFor<TypeCategory::Real, 4> RTNAME(Nearest4)(
+CppTypeFor<TypeCategory::Real, 4> RTDEF(Nearest4)(
     CppTypeFor<TypeCategory::Real, 4> x, bool positive) {
   return Nearest<24>(x, positive);
 }
-CppTypeFor<TypeCategory::Real, 8> RTNAME(Nearest8)(
+CppTypeFor<TypeCategory::Real, 8> RTDEF(Nearest8)(
     CppTypeFor<TypeCategory::Real, 8> x, bool positive) {
   return Nearest<53>(x, positive);
 }
 #if LDBL_MANT_DIG == 64
-CppTypeFor<TypeCategory::Real, 10> RTNAME(Nearest10)(
+CppTypeFor<TypeCategory::Real, 10> RTDEF(Nearest10)(
     CppTypeFor<TypeCategory::Real, 10> x, bool positive) {
   return Nearest<64>(x, positive);
 }
 #elif LDBL_MANT_DIG == 113
-CppTypeFor<TypeCategory::Real, 16> RTNAME(Nearest16)(
+CppTypeFor<TypeCategory::Real, 16> RTDEF(Nearest16)(
     CppTypeFor<TypeCategory::Real, 16> x, bool positive) {
   return Nearest<113>(x, positive);
 }
 #endif
 
-CppTypeFor<TypeCategory::Integer, 1> RTNAME(Nint4_1)(
+CppTypeFor<TypeCategory::Integer, 1> RTDEF(Nint4_1)(
     CppTypeFor<TypeCategory::Real, 4> x) {
   return Nint<CppTypeFor<TypeCategory::Integer, 1>>(x);
 }
-CppTypeFor<TypeCategory::Integer, 2> RTNAME(Nint4_2)(
+CppTypeFor<TypeCategory::Integer, 2> RTDEF(Nint4_2)(
     CppTypeFor<TypeCategory::Real, 4> x) {
   return Nint<CppTypeFor<TypeCategory::Integer, 2>>(x);
 }
-CppTypeFor<TypeCategory::Integer, 4> RTNAME(Nint4_4)(
+CppTypeFor<TypeCategory::Integer, 4> RTDEF(Nint4_4)(
     CppTypeFor<TypeCategory::Real, 4> x) {
   return Nint<CppTypeFor<TypeCategory::Integer, 4>>(x);
 }
-CppTypeFor<TypeCategory::Integer, 8> RTNAME(Nint4_8)(
+CppTypeFor<TypeCategory::Integer, 8> RTDEF(Nint4_8)(
     CppTypeFor<TypeCategory::Real, 4> x) {
   return Nint<CppTypeFor<TypeCategory::Integer, 8>>(x);
 }
 #if defined __SIZEOF_INT128__ && !AVOID_NATIVE_UINT128_T
-CppTypeFor<TypeCategory::Integer, 16> RTNAME(Nint4_16)(
+CppTypeFor<TypeCategory::Integer, 16> RTDEF(Nint4_16)(
     CppTypeFor<TypeCategory::Real, 4> x) {
   return Nint<CppTypeFor<TypeCategory::Integer, 16>>(x);
 }
 #endif
-CppTypeFor<TypeCategory::Integer, 1> RTNAME(Nint8_1)(
+CppTypeFor<TypeCategory::Integer, 1> RTDEF(Nint8_1)(
     CppTypeFor<TypeCategory::Real, 8> x) {
   return Nint<CppTypeFor<TypeCategory::Integer, 1>>(x);
 }
-CppTypeFor<TypeCategory::Integer, 2> RTNAME(Nint8_2)(
+CppTypeFor<TypeCategory::Integer, 2> RTDEF(Nint8_2)(
     CppTypeFor<TypeCategory::Real, 8> x) {
   return Nint<CppTypeFor<TypeCategory::Integer, 2>>(x);
 }
-CppTypeFor<TypeCategory::Integer, 4> RTNAME(Nint8_4)(
+CppTypeFor<TypeCategory::Integer, 4> RTDEF(Nint8_4)(
     CppTypeFor<TypeCategory::Real, 8> x) {
   return Nint<CppTypeFor<TypeCategory::Integer, 4>>(x);
 }
-CppTypeFor<TypeCategory::Integer, 8> RTNAME(Nint8_8)(
+CppTypeFor<TypeCategory::Integer, 8> RTDEF(Nint8_8)(
     CppTypeFor<TypeCategory::Real, 8> x) {
   return Nint<CppTypeFor<TypeCategory::Integer, 8>>(x);
 }
 #if defined __SIZEOF_INT128__ && !AVOID_NATIVE_UINT128_T
-CppTypeFor<TypeCategory::Integer, 16> RTNAME(Nint8_16)(
+CppTypeFor<TypeCategory::Integer, 16> RTDEF(Nint8_16)(
     CppTypeFor<TypeCategory::Real, 8> x) {
   return Nint<CppTypeFor<TypeCategory::Integer, 16>>(x);
 }
 #endif
 #if LDBL_MANT_DIG == 64
-CppTypeFor<TypeCategory::Integer, 1> RTNAME(Nint10_1)(
+CppTypeFor<TypeCategory::Integer, 1> RTDEF(Nint10_1)(
     CppTypeFor<TypeCategory::Real, 10> x) {
   return Nint<CppTypeFor<TypeCategory::Integer, 1>>(x);
 }
-CppTypeFor<TypeCategory::Integer, 2> RTNAME(Nint10_2)(
+CppTypeFor<TypeCategory::Integer, 2> RTDEF(Nint10_2)(
     CppTypeFor<TypeCategory::Real, 10> x) {
   return Nint<CppTypeFor<TypeCategory::Integer, 2>>(x);
 }
-CppTypeFor<TypeCategory::Integer, 4> RTNAME(Nint10_4)(
+CppTypeFor<TypeCategory::Integer, 4> RTDEF(Nint10_4)(
     CppTypeFor<TypeCategory::Real, 10> x) {
   return Nint<CppTypeFor<TypeCategory::Integer, 4>>(x);
 }
-CppTypeFor<TypeCategory::Integer, 8> RTNAME(Nint10_8)(
+CppTypeFor<TypeCategory::Integer, 8> RTDEF(Nint10_8)(
     CppTypeFor<TypeCategory::Real, 10> x) {
   return Nint<CppTypeFor<TypeCategory::Integer, 8>>(x);
 }
 #if defined __SIZEOF_INT128__ && !AVOID_NATIVE_UINT128_T
-CppTypeFor<TypeCategory::Integer, 16> RTNAME(Nint10_16)(
+CppTypeFor<TypeCategory::Integer, 16> RTDEF(Nint10_16)(
     CppTypeFor<TypeCategory::Real, 10> x) {
   return Nint<CppTypeFor<TypeCategory::Integer, 16>>(x);
 }
 #endif
 #elif LDBL_MANT_DIG == 113
-CppTypeFor<TypeCategory::Integer, 1> RTNAME(Nint16_1)(
+CppTypeFor<TypeCategory::Integer, 1> RTDEF(Nint16_1)(
     CppTypeFor<TypeCategory::Real, 16> x) {
   return Nint<CppTypeFor<TypeCategory::Integer, 1>>(x);
 }
-CppTypeFor<TypeCategory::Integer, 2> RTNAME(Nint16_2)(
+CppTypeFor<TypeCategory::Integer, 2> RTDEF(Nint16_2)(
     CppTypeFor<TypeCategory::Real, 16> x) {
   return Nint<CppTypeFor<TypeCategory::Integer, 2>>(x);
 }
-CppTypeFor<TypeCategory::Integer, 4> RTNAME(Nint16_4)(
+CppTypeFor<TypeCategory::Integer, 4> RTDEF(Nint16_4)(
     CppTypeFor<TypeCategory::Real, 16> x) {
   return Nint<CppTypeFor<TypeCategory::Integer, 4>>(x);
 }
-CppTypeFor<TypeCategory::Integer, 8> RTNAME(Nint16_8)(
+CppTypeFor<TypeCategory::Integer, 8> RTDEF(Nint16_8)(
     CppTypeFor<TypeCategory::Real, 16> x) {
   return Nint<CppTypeFor<TypeCategory::Integer, 8>>(x);
 }
 #if defined __SIZEOF_INT128__ && !AVOID_NATIVE_UINT128_T
-CppTypeFor<TypeCategory::Integer, 16> RTNAME(Nint16_16)(
+CppTypeFor<TypeCategory::Integer, 16> RTDEF(Nint16_16)(
     CppTypeFor<TypeCategory::Real, 16> x) {
   return Nint<CppTypeFor<TypeCategory::Integer, 16>>(x);
 }
 #endif
 #endif
 
-CppTypeFor<TypeCategory::Real, 4> RTNAME(RRSpacing4)(
+CppTypeFor<TypeCategory::Real, 4> RTDEF(RRSpacing4)(
     CppTypeFor<TypeCategory::Real, 4> x) {
   return RRSpacing<24>(x);
 }
-CppTypeFor<TypeCategory::Real, 8> RTNAME(RRSpacing8)(
+CppTypeFor<TypeCategory::Real, 8> RTDEF(RRSpacing8)(
     CppTypeFor<TypeCategory::Real, 8> x) {
   return RRSpacing<53>(x);
 }
 #if LDBL_MANT_DIG == 64
-CppTypeFor<TypeCategory::Real, 10> RTNAME(RRSpacing10)(
+CppTypeFor<TypeCategory::Real, 10> RTDEF(RRSpacing10)(
     CppTypeFor<TypeCategory::Real, 10> x) {
   return RRSpacing<64>(x);
 }
 #elif LDBL_MANT_DIG == 113
-CppTypeFor<TypeCategory::Real, 16> RTNAME(RRSpacing16)(
+CppTypeFor<TypeCategory::Real, 16> RTDEF(RRSpacing16)(
     CppTypeFor<TypeCategory::Real, 16> x) {
   return RRSpacing<113>(x);
 }
 #endif
 
-CppTypeFor<TypeCategory::Real, 4> RTNAME(SetExponent4)(
+CppTypeFor<TypeCategory::Real, 4> RTDEF(SetExponent4)(
     CppTypeFor<TypeCategory::Real, 4> x, std::int64_t p) {
   return SetExponent(x, p);
 }
-CppTypeFor<TypeCategory::Real, 8> RTNAME(SetExponent8)(
+CppTypeFor<TypeCategory::Real, 8> RTDEF(SetExponent8)(
     CppTypeFor<TypeCategory::Real, 8> x, std::int64_t p) {
   return SetExponent(x, p);
 }
 #if LDBL_MANT_DIG == 64
-CppTypeFor<TypeCategory::Real, 10> RTNAME(SetExponent10)(
+CppTypeFor<TypeCategory::Real, 10> RTDEF(SetExponent10)(
     CppTypeFor<TypeCategory::Real, 10> x, std::int64_t p) {
   return SetExponent(x, p);
 }
 #elif LDBL_MANT_DIG == 113
-CppTypeFor<TypeCategory::Real, 16> RTNAME(SetExponent16)(
+CppTypeFor<TypeCategory::Real, 16> RTDEF(SetExponent16)(
     CppTypeFor<TypeCategory::Real, 16> x, std::int64_t p) {
   return SetExponent(x, p);
 }
 #endif
 
-CppTypeFor<TypeCategory::Real, 4> RTNAME(Scale4)(
+CppTypeFor<TypeCategory::Real, 4> RTDEF(Scale4)(
     CppTypeFor<TypeCategory::Real, 4> x, std::int64_t p) {
   return Scale(x, p);
 }
-CppTypeFor<TypeCategory::Real, 8> RTNAME(Scale8)(
+CppTypeFor<TypeCategory::Real, 8> RTDEF(Scale8)(
     CppTypeFor<TypeCategory::Real, 8> x, std::int64_t p) {
   return Scale(x, p);
 }
 #if LDBL_MANT_DIG == 64
-CppTypeFor<TypeCategory::Real, 10> RTNAME(Scale10)(
+CppTypeFor<TypeCategory::Real, 10> RTDEF(Scale10)(
     CppTypeFor<TypeCategory::Real, 10> x, std::int64_t p) {
   return Scale(x, p);
 }
 #elif LDBL_MANT_DIG == 113
-CppTypeFor<TypeCategory::Real, 16> RTNAME(Scale16)(
+CppTypeFor<TypeCategory::Real, 16> RTDEF(Scale16)(
     CppTypeFor<TypeCategory::Real, 16> x, std::int64_t p) {
   return Scale(x, p);
 }
 #endif
 
 // SELECTED_INT_KIND
-CppTypeFor<TypeCategory::Integer, 4> RTNAME(SelectedIntKind)(
+CppTypeFor<TypeCategory::Integer, 4> RTDEF(SelectedIntKind)(
     const char *source, int line, void *x, int xKind) {
 #ifdef __SIZEOF_INT128__
   CppTypeFor<TypeCategory::Integer, 16> r =
@@ -861,9 +876,9 @@ CppTypeFor<TypeCategory::Integer, 4> RTNAME(SelectedIntKind)(
 }
 
 // SELECTED_REAL_KIND
-CppTypeFor<TypeCategory::Integer, 4> RTNAME(SelectedRealKind)(
-    const char *source, int line, void *precision, int pKind, void *range,
-    int rKind, void *radix, int dKind) {
+CppTypeFor<TypeCategory::Integer, 4> RTDEF(SelectedRealKind)(const char *source,
+    int line, void *precision, int pKind, void *range, int rKind, void *radix,
+    int dKind) {
 #ifdef __SIZEOF_INT128__
   CppTypeFor<TypeCategory::Integer, 16> p =
       getIntArgValue<CppTypeFor<TypeCategory::Integer, 16>>(
@@ -885,70 +900,70 @@ CppTypeFor<TypeCategory::Integer, 4> RTNAME(SelectedRealKind)(
   return SelectedRealKind(p, r, d);
 }
 
-CppTypeFor<TypeCategory::Real, 4> RTNAME(Spacing4)(
+CppTypeFor<TypeCategory::Real, 4> RTDEF(Spacing4)(
     CppTypeFor<TypeCategory::Real, 4> x) {
   return Spacing<24>(x);
 }
-CppTypeFor<TypeCategory::Real, 8> RTNAME(Spacing8)(
+CppTypeFor<TypeCategory::Real, 8> RTDEF(Spacing8)(
     CppTypeFor<TypeCategory::Real, 8> x) {
   return Spacing<53>(x);
 }
 #if LDBL_MANT_DIG == 64
-CppTypeFor<TypeCategory::Real, 10> RTNAME(Spacing10)(
+CppTypeFor<TypeCategory::Real, 10> RTDEF(Spacing10)(
     CppTypeFor<TypeCategory::Real, 10> x) {
   return Spacing<64>(x);
 }
 #elif LDBL_MANT_DIG == 113
-CppTypeFor<TypeCategory::Real, 16> RTNAME(Spacing16)(
+CppTypeFor<TypeCategory::Real, 16> RTDEF(Spacing16)(
     CppTypeFor<TypeCategory::Real, 16> x) {
   return Spacing<113>(x);
 }
 #endif
 
-CppTypeFor<TypeCategory::Real, 4> RTNAME(FPow4i)(
+CppTypeFor<TypeCategory::Real, 4> RTDEF(FPow4i)(
     CppTypeFor<TypeCategory::Real, 4> b,
     CppTypeFor<TypeCategory::Integer, 4> e) {
   return FPowI(b, e);
 }
-CppTypeFor<TypeCategory::Real, 8> RTNAME(FPow8i)(
+CppTypeFor<TypeCategory::Real, 8> RTDEF(FPow8i)(
     CppTypeFor<TypeCategory::Real, 8> b,
     CppTypeFor<TypeCategory::Integer, 4> e) {
   return FPowI(b, e);
 }
 #if LDBL_MANT_DIG == 64
-CppTypeFor<TypeCategory::Real, 10> RTNAME(FPow10i)(
+CppTypeFor<TypeCategory::Real, 10> RTDEF(FPow10i)(
     CppTypeFor<TypeCategory::Real, 10> b,
     CppTypeFor<TypeCategory::Integer, 4> e) {
   return FPowI(b, e);
 }
 #endif
 #if LDBL_MANT_DIG == 113 || HAS_FLOAT128
-CppTypeFor<TypeCategory::Real, 16> RTNAME(FPow16i)(
+CppTypeFor<TypeCategory::Real, 16> RTDEF(FPow16i)(
     CppTypeFor<TypeCategory::Real, 16> b,
     CppTypeFor<TypeCategory::Integer, 4> e) {
   return FPowI(b, e);
 }
 #endif
 
-CppTypeFor<TypeCategory::Real, 4> RTNAME(FPow4k)(
+CppTypeFor<TypeCategory::Real, 4> RTDEF(FPow4k)(
     CppTypeFor<TypeCategory::Real, 4> b,
     CppTypeFor<TypeCategory::Integer, 8> e) {
   return FPowI(b, e);
 }
-CppTypeFor<TypeCategory::Real, 8> RTNAME(FPow8k)(
+CppTypeFor<TypeCategory::Real, 8> RTDEF(FPow8k)(
     CppTypeFor<TypeCategory::Real, 8> b,
     CppTypeFor<TypeCategory::Integer, 8> e) {
   return FPowI(b, e);
 }
 #if LDBL_MANT_DIG == 64
-CppTypeFor<TypeCategory::Real, 10> RTNAME(FPow10k)(
+CppTypeFor<TypeCategory::Real, 10> RTDEF(FPow10k)(
     CppTypeFor<TypeCategory::Real, 10> b,
     CppTypeFor<TypeCategory::Integer, 8> e) {
   return FPowI(b, e);
 }
 #endif
 #if LDBL_MANT_DIG == 113 || HAS_FLOAT128
-CppTypeFor<TypeCategory::Real, 16> RTNAME(FPow16k)(
+CppTypeFor<TypeCategory::Real, 16> RTDEF(FPow16k)(
     CppTypeFor<TypeCategory::Real, 16> b,
     CppTypeFor<TypeCategory::Integer, 8> e) {
   return FPowI(b, e);
diff --git a/flang/runtime/reduction-templates.h b/flang/runtime/reduction-templates.h
index 2aaf5c102a9ca9..cf1ee8a967750d 100644
--- a/flang/runtime/reduction-templates.h
+++ b/flang/runtime/reduction-templates.h
@@ -40,7 +40,7 @@ namespace Fortran::runtime {
 // cases of FINDLOC, MAXLOC, & MINLOC).  These are the cases without DIM= or
 // cases where the argument has rank 1 and DIM=, if present, must be 1.
 template <typename TYPE, typename ACCUMULATOR>
-inline void DoTotalReduction(const Descriptor &x, int dim,
+inline RT_API_ATTRS void DoTotalReduction(const Descriptor &x, int dim,
     const Descriptor *mask, ACCUMULATOR &accumulator, const char *intrinsic,
     Terminator &terminator) {
   if (dim < 0 || dim > 1) {
@@ -76,7 +76,7 @@ inline void DoTotalReduction(const Descriptor &x, int dim,
 }
 
 template <TypeCategory CAT, int KIND, typename ACCUMULATOR>
-inline CppTypeFor<CAT, KIND> GetTotalReduction(const Descriptor &x,
+inline RT_API_ATTRS CppTypeFor<CAT, KIND> GetTotalReduction(const Descriptor &x,
     const char *source, int line, int dim, const Descriptor *mask,
     ACCUMULATOR &&accumulator, const char *intrinsic) {
   Terminator terminator{source, line};
@@ -98,7 +98,7 @@ inline CppTypeFor<CAT, KIND> GetTotalReduction(const Descriptor &x,
 // lower bounds other than one.  This utility subroutine creates an
 // array of subscripts [j,_,k] for result subscripts [j,k] so that the
 // elements of array(j,:,k) can be reduced.
-inline void GetExpandedSubscripts(SubscriptValue at[],
+inline RT_API_ATTRS void GetExpandedSubscripts(SubscriptValue at[],
     const Descriptor &descriptor, int zeroBasedDim,
     const SubscriptValue from[]) {
   descriptor.GetLowerBounds(at);
@@ -113,8 +113,9 @@ inline void GetExpandedSubscripts(SubscriptValue at[],
 }
 
 template <typename TYPE, typename ACCUMULATOR>
-inline void ReduceDimToScalar(const Descriptor &x, int zeroBasedDim,
-    SubscriptValue subscripts[], TYPE *result, ACCUMULATOR &accumulator) {
+inline RT_API_ATTRS void ReduceDimToScalar(const Descriptor &x,
+    int zeroBasedDim, SubscriptValue subscripts[], TYPE *result,
+    ACCUMULATOR &accumulator) {
   SubscriptValue xAt[maxRank];
   GetExpandedSubscripts(xAt, x, zeroBasedDim, subscripts);
   const auto &dim{x.GetDimension(zeroBasedDim)};
@@ -133,9 +134,9 @@ inline void ReduceDimToScalar(const Descriptor &x, int zeroBasedDim,
 }
 
 template <typename TYPE, typename ACCUMULATOR>
-inline void ReduceDimMaskToScalar(const Descriptor &x, int zeroBasedDim,
-    SubscriptValue subscripts[], const Descriptor &mask, TYPE *result,
-    ACCUMULATOR &accumulator) {
+inline RT_API_ATTRS void ReduceDimMaskToScalar(const Descriptor &x,
+    int zeroBasedDim, SubscriptValue subscripts[], const Descriptor &mask,
+    TYPE *result, ACCUMULATOR &accumulator) {
   SubscriptValue xAt[maxRank], maskAt[maxRank];
   GetExpandedSubscripts(xAt, x, zeroBasedDim, subscripts);
   GetExpandedSubscripts(maskAt, mask, zeroBasedDim, subscripts);
@@ -162,7 +163,7 @@ inline void ReduceDimMaskToScalar(const Descriptor &x, int zeroBasedDim,
 
 // Utility: establishes & allocates the result array for a partial
 // reduction (i.e., one with DIM=).
-static void CreatePartialReductionResult(Descriptor &result,
+static RT_API_ATTRS void CreatePartialReductionResult(Descriptor &result,
     const Descriptor &x, std::size_t resultElementSize, int dim,
     Terminator &terminator, const char *intrinsic, TypeCode typeCode) {
   int xRank{x.rank()};
@@ -192,9 +193,10 @@ static void CreatePartialReductionResult(Descriptor &result,
 // Partial reductions with DIM=
 
 template <typename ACCUMULATOR, TypeCategory CAT, int KIND>
-inline void PartialReduction(Descriptor &result, const Descriptor &x,
-    std::size_t resultElementSize, int dim, const Descriptor *mask,
-    Terminator &terminator, const char *intrinsic, ACCUMULATOR &accumulator) {
+inline RT_API_ATTRS void PartialReduction(Descriptor &result,
+    const Descriptor &x, std::size_t resultElementSize, int dim,
+    const Descriptor *mask, Terminator &terminator, const char *intrinsic,
+    ACCUMULATOR &accumulator) {
   CreatePartialReductionResult(result, x, resultElementSize, dim, terminator,
       intrinsic, TypeCode{CAT, KIND});
   SubscriptValue at[maxRank];
@@ -233,8 +235,8 @@ struct PartialIntegerReductionHelper {
   template <int KIND> struct Functor {
     static constexpr int Intermediate{
         std::max(KIND, 4)}; // use at least "int" for intermediate results
-    void operator()(Descriptor &result, const Descriptor &x, int dim,
-        const Descriptor *mask, Terminator &terminator,
+    RT_API_ATTRS void operator()(Descriptor &result, const Descriptor &x,
+        int dim, const Descriptor *mask, Terminator &terminator,
         const char *intrinsic) const {
       using Accumulator =
           ACCUM<CppTypeFor<TypeCategory::Integer, Intermediate>>;
@@ -248,9 +250,9 @@ struct PartialIntegerReductionHelper {
 };
 
 template <template <typename> class INTEGER_ACCUM>
-inline void PartialIntegerReduction(Descriptor &result, const Descriptor &x,
-    int dim, int kind, const Descriptor *mask, const char *intrinsic,
-    Terminator &terminator) {
+inline RT_API_ATTRS void PartialIntegerReduction(Descriptor &result,
+    const Descriptor &x, int dim, int kind, const Descriptor *mask,
+    const char *intrinsic, Terminator &terminator) {
   ApplyIntegerKind<
       PartialIntegerReductionHelper<INTEGER_ACCUM>::template Functor, void>(
       kind, terminator, result, x, dim, mask, terminator, intrinsic);
@@ -261,8 +263,8 @@ struct PartialFloatingReductionHelper {
   template <int KIND> struct Functor {
     static constexpr int Intermediate{
         std::max(KIND, 8)}; // use at least "double" for intermediate results
-    void operator()(Descriptor &result, const Descriptor &x, int dim,
-        const Descriptor *mask, Terminator &terminator,
+    RT_API_ATTRS void operator()(Descriptor &result, const Descriptor &x,
+        int dim, const Descriptor *mask, Terminator &terminator,
         const char *intrinsic) const {
       using Accumulator = ACCUM<CppTypeFor<TypeCategory::Real, Intermediate>>;
       Accumulator accumulator{x};
@@ -277,7 +279,7 @@ struct PartialFloatingReductionHelper {
 template <template <typename> class INTEGER_ACCUM,
     template <typename> class REAL_ACCUM,
     template <typename> class COMPLEX_ACCUM>
-inline void TypedPartialNumericReduction(Descriptor &result,
+inline RT_API_ATTRS void TypedPartialNumericReduction(Descriptor &result,
     const Descriptor &x, int dim, const char *source, int line,
     const Descriptor *mask, const char *intrinsic) {
   Terminator terminator{source, line};
@@ -307,7 +309,8 @@ inline void TypedPartialNumericReduction(Descriptor &result,
 
 template <typename ACCUMULATOR> struct LocationResultHelper {
   template <int KIND> struct Functor {
-    void operator()(ACCUMULATOR &accumulator, const Descriptor &result) const {
+    RT_API_ATTRS void operator()(
+        ACCUMULATOR &accumulator, const Descriptor &result) const {
       accumulator.GetResult(
           result.OffsetElement<CppTypeFor<TypeCategory::Integer, KIND>>());
     }
@@ -316,9 +319,9 @@ template <typename ACCUMULATOR> struct LocationResultHelper {
 
 template <typename ACCUMULATOR> struct PartialLocationHelper {
   template <int KIND> struct Functor {
-    void operator()(Descriptor &result, const Descriptor &x, int dim,
-        const Descriptor *mask, Terminator &terminator, const char *intrinsic,
-        ACCUMULATOR &accumulator) const {
+    RT_API_ATTRS void operator()(Descriptor &result, const Descriptor &x,
+        int dim, const Descriptor *mask, Terminator &terminator,
+        const char *intrinsic, ACCUMULATOR &accumulator) const {
       // Element size of the destination descriptor is the size
       // of {TypeCategory::Integer, KIND}.
       PartialReduction<ACCUMULATOR, TypeCategory::Integer, KIND>(result, x,
diff --git a/flang/runtime/reduction.cpp b/flang/runtime/reduction.cpp
index e8c2bd3e77e274..63e309ef86ac45 100644
--- a/flang/runtime/reduction.cpp
+++ b/flang/runtime/reduction.cpp
@@ -24,12 +24,15 @@ namespace Fortran::runtime {
 
 template <typename INTERMEDIATE> class IntegerAndAccumulator {
 public:
-  explicit IntegerAndAccumulator(const Descriptor &array) : array_{array} {}
-  void Reinitialize() { and_ = ~INTERMEDIATE{0}; }
-  template <typename A> void GetResult(A *p, int /*zeroBasedDim*/ = -1) const {
+  explicit RT_API_ATTRS IntegerAndAccumulator(const Descriptor &array)
+      : array_{array} {}
+  RT_API_ATTRS void Reinitialize() { and_ = ~INTERMEDIATE{0}; }
+  template <typename A>
+  RT_API_ATTRS void GetResult(A *p, int /*zeroBasedDim*/ = -1) const {
     *p = static_cast<A>(and_);
   }
-  template <typename A> bool AccumulateAt(const SubscriptValue at[]) {
+  template <typename A>
+  RT_API_ATTRS bool AccumulateAt(const SubscriptValue at[]) {
     and_ &= *array_.Element<A>(at);
     return true;
   }
@@ -41,12 +44,15 @@ template <typename INTERMEDIATE> class IntegerAndAccumulator {
 
 template <typename INTERMEDIATE> class IntegerOrAccumulator {
 public:
-  explicit IntegerOrAccumulator(const Descriptor &array) : array_{array} {}
-  void Reinitialize() { or_ = 0; }
-  template <typename A> void GetResult(A *p, int /*zeroBasedDim*/ = -1) const {
+  explicit RT_API_ATTRS IntegerOrAccumulator(const Descriptor &array)
+      : array_{array} {}
+  RT_API_ATTRS void Reinitialize() { or_ = 0; }
+  template <typename A>
+  RT_API_ATTRS void GetResult(A *p, int /*zeroBasedDim*/ = -1) const {
     *p = static_cast<A>(or_);
   }
-  template <typename A> bool AccumulateAt(const SubscriptValue at[]) {
+  template <typename A>
+  RT_API_ATTRS bool AccumulateAt(const SubscriptValue at[]) {
     or_ |= *array_.Element<A>(at);
     return true;
   }
@@ -58,12 +64,15 @@ template <typename INTERMEDIATE> class IntegerOrAccumulator {
 
 template <typename INTERMEDIATE> class IntegerXorAccumulator {
 public:
-  explicit IntegerXorAccumulator(const Descriptor &array) : array_{array} {}
-  void Reinitialize() { xor_ = 0; }
-  template <typename A> void GetResult(A *p, int /*zeroBasedDim*/ = -1) const {
+  explicit RT_API_ATTRS IntegerXorAccumulator(const Descriptor &array)
+      : array_{array} {}
+  RT_API_ATTRS void Reinitialize() { xor_ = 0; }
+  template <typename A>
+  RT_API_ATTRS void GetResult(A *p, int /*zeroBasedDim*/ = -1) const {
     *p = static_cast<A>(xor_);
   }
-  template <typename A> bool AccumulateAt(const SubscriptValue at[]) {
+  template <typename A>
+  RT_API_ATTRS bool AccumulateAt(const SubscriptValue at[]) {
     xor_ ^= *array_.Element<A>(at);
     return true;
   }
@@ -74,35 +83,35 @@ template <typename INTERMEDIATE> class IntegerXorAccumulator {
 };
 
 extern "C" {
-CppTypeFor<TypeCategory::Integer, 1> RTNAME(IAll1)(const Descriptor &x,
+CppTypeFor<TypeCategory::Integer, 1> RTDEF(IAll1)(const Descriptor &x,
     const char *source, int line, int dim, const Descriptor *mask) {
   return GetTotalReduction<TypeCategory::Integer, 1>(x, source, line, dim, mask,
       IntegerAndAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x}, "IALL");
 }
-CppTypeFor<TypeCategory::Integer, 2> RTNAME(IAll2)(const Descriptor &x,
+CppTypeFor<TypeCategory::Integer, 2> RTDEF(IAll2)(const Descriptor &x,
     const char *source, int line, int dim, const Descriptor *mask) {
   return GetTotalReduction<TypeCategory::Integer, 2>(x, source, line, dim, mask,
       IntegerAndAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x}, "IALL");
 }
-CppTypeFor<TypeCategory::Integer, 4> RTNAME(IAll4)(const Descriptor &x,
+CppTypeFor<TypeCategory::Integer, 4> RTDEF(IAll4)(const Descriptor &x,
     const char *source, int line, int dim, const Descriptor *mask) {
   return GetTotalReduction<TypeCategory::Integer, 4>(x, source, line, dim, mask,
       IntegerAndAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x}, "IALL");
 }
-CppTypeFor<TypeCategory::Integer, 8> RTNAME(IAll8)(const Descriptor &x,
+CppTypeFor<TypeCategory::Integer, 8> RTDEF(IAll8)(const Descriptor &x,
     const char *source, int line, int dim, const Descriptor *mask) {
   return GetTotalReduction<TypeCategory::Integer, 8>(x, source, line, dim, mask,
       IntegerAndAccumulator<CppTypeFor<TypeCategory::Integer, 8>>{x}, "IALL");
 }
 #ifdef __SIZEOF_INT128__
-CppTypeFor<TypeCategory::Integer, 16> RTNAME(IAll16)(const Descriptor &x,
+CppTypeFor<TypeCategory::Integer, 16> RTDEF(IAll16)(const Descriptor &x,
     const char *source, int line, int dim, const Descriptor *mask) {
   return GetTotalReduction<TypeCategory::Integer, 16>(x, source, line, dim,
       mask, IntegerAndAccumulator<CppTypeFor<TypeCategory::Integer, 16>>{x},
       "IALL");
 }
 #endif
-void RTNAME(IAllDim)(Descriptor &result, const Descriptor &x, int dim,
+void RTDEF(IAllDim)(Descriptor &result, const Descriptor &x, int dim,
     const char *source, int line, const Descriptor *mask) {
   Terminator terminator{source, line};
   auto catKind{x.type().GetCategoryAndKind()};
@@ -112,35 +121,35 @@ void RTNAME(IAllDim)(Descriptor &result, const Descriptor &x, int dim,
       result, x, dim, catKind->second, mask, "IALL", terminator);
 }
 
-CppTypeFor<TypeCategory::Integer, 1> RTNAME(IAny1)(const Descriptor &x,
+CppTypeFor<TypeCategory::Integer, 1> RTDEF(IAny1)(const Descriptor &x,
     const char *source, int line, int dim, const Descriptor *mask) {
   return GetTotalReduction<TypeCategory::Integer, 1>(x, source, line, dim, mask,
       IntegerOrAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x}, "IANY");
 }
-CppTypeFor<TypeCategory::Integer, 2> RTNAME(IAny2)(const Descriptor &x,
+CppTypeFor<TypeCategory::Integer, 2> RTDEF(IAny2)(const Descriptor &x,
     const char *source, int line, int dim, const Descriptor *mask) {
   return GetTotalReduction<TypeCategory::Integer, 2>(x, source, line, dim, mask,
       IntegerOrAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x}, "IANY");
 }
-CppTypeFor<TypeCategory::Integer, 4> RTNAME(IAny4)(const Descriptor &x,
+CppTypeFor<TypeCategory::Integer, 4> RTDEF(IAny4)(const Descriptor &x,
     const char *source, int line, int dim, const Descriptor *mask) {
   return GetTotalReduction<TypeCategory::Integer, 4>(x, source, line, dim, mask,
       IntegerOrAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x}, "IANY");
 }
-CppTypeFor<TypeCategory::Integer, 8> RTNAME(IAny8)(const Descriptor &x,
+CppTypeFor<TypeCategory::Integer, 8> RTDEF(IAny8)(const Descriptor &x,
     const char *source, int line, int dim, const Descriptor *mask) {
   return GetTotalReduction<TypeCategory::Integer, 8>(x, source, line, dim, mask,
       IntegerOrAccumulator<CppTypeFor<TypeCategory::Integer, 8>>{x}, "IANY");
 }
 #ifdef __SIZEOF_INT128__
-CppTypeFor<TypeCategory::Integer, 16> RTNAME(IAny16)(const Descriptor &x,
+CppTypeFor<TypeCategory::Integer, 16> RTDEF(IAny16)(const Descriptor &x,
     const char *source, int line, int dim, const Descriptor *mask) {
   return GetTotalReduction<TypeCategory::Integer, 16>(x, source, line, dim,
       mask, IntegerOrAccumulator<CppTypeFor<TypeCategory::Integer, 16>>{x},
       "IANY");
 }
 #endif
-void RTNAME(IAnyDim)(Descriptor &result, const Descriptor &x, int dim,
+void RTDEF(IAnyDim)(Descriptor &result, const Descriptor &x, int dim,
     const char *source, int line, const Descriptor *mask) {
   Terminator terminator{source, line};
   auto catKind{x.type().GetCategoryAndKind()};
@@ -150,39 +159,39 @@ void RTNAME(IAnyDim)(Descriptor &result, const Descriptor &x, int dim,
       result, x, dim, catKind->second, mask, "IANY", terminator);
 }
 
-CppTypeFor<TypeCategory::Integer, 1> RTNAME(IParity1)(const Descriptor &x,
+CppTypeFor<TypeCategory::Integer, 1> RTDEF(IParity1)(const Descriptor &x,
     const char *source, int line, int dim, const Descriptor *mask) {
   return GetTotalReduction<TypeCategory::Integer, 1>(x, source, line, dim, mask,
       IntegerXorAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x},
       "IPARITY");
 }
-CppTypeFor<TypeCategory::Integer, 2> RTNAME(IParity2)(const Descriptor &x,
+CppTypeFor<TypeCategory::Integer, 2> RTDEF(IParity2)(const Descriptor &x,
     const char *source, int line, int dim, const Descriptor *mask) {
   return GetTotalReduction<TypeCategory::Integer, 2>(x, source, line, dim, mask,
       IntegerXorAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x},
       "IPARITY");
 }
-CppTypeFor<TypeCategory::Integer, 4> RTNAME(IParity4)(const Descriptor &x,
+CppTypeFor<TypeCategory::Integer, 4> RTDEF(IParity4)(const Descriptor &x,
     const char *source, int line, int dim, const Descriptor *mask) {
   return GetTotalReduction<TypeCategory::Integer, 4>(x, source, line, dim, mask,
       IntegerXorAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x},
       "IPARITY");
 }
-CppTypeFor<TypeCategory::Integer, 8> RTNAME(IParity8)(const Descriptor &x,
+CppTypeFor<TypeCategory::Integer, 8> RTDEF(IParity8)(const Descriptor &x,
     const char *source, int line, int dim, const Descriptor *mask) {
   return GetTotalReduction<TypeCategory::Integer, 8>(x, source, line, dim, mask,
       IntegerXorAccumulator<CppTypeFor<TypeCategory::Integer, 8>>{x},
       "IPARITY");
 }
 #ifdef __SIZEOF_INT128__
-CppTypeFor<TypeCategory::Integer, 16> RTNAME(IParity16)(const Descriptor &x,
+CppTypeFor<TypeCategory::Integer, 16> RTDEF(IParity16)(const Descriptor &x,
     const char *source, int line, int dim, const Descriptor *mask) {
   return GetTotalReduction<TypeCategory::Integer, 16>(x, source, line, dim,
       mask, IntegerXorAccumulator<CppTypeFor<TypeCategory::Integer, 16>>{x},
       "IPARITY");
 }
 #endif
-void RTNAME(IParityDim)(Descriptor &result, const Descriptor &x, int dim,
+void RTDEF(IParityDim)(Descriptor &result, const Descriptor &x, int dim,
     const char *source, int line, const Descriptor *mask) {
   Terminator terminator{source, line};
   auto catKind{x.type().GetCategoryAndKind()};
@@ -328,47 +337,46 @@ template <int KIND> struct CountDimension {
 
 extern "C" {
 
-bool RTNAME(All)(const Descriptor &x, const char *source, int line, int dim) {
+bool RTDEF(All)(const Descriptor &x, const char *source, int line, int dim) {
   return GetTotalLogicalReduction(x, source, line, dim,
       LogicalAccumulator<LogicalReduction::All>{x}, "ALL");
 }
-void RTNAME(AllDim)(Descriptor &result, const Descriptor &x, int dim,
+void RTDEF(AllDim)(Descriptor &result, const Descriptor &x, int dim,
     const char *source, int line) {
   Terminator terminator{source, line};
   DoReduceLogicalDimension<LogicalReduction::All>(
       result, x, dim, terminator, "ALL");
 }
 
-bool RTNAME(Any)(const Descriptor &x, const char *source, int line, int dim) {
+bool RTDEF(Any)(const Descriptor &x, const char *source, int line, int dim) {
   return GetTotalLogicalReduction(x, source, line, dim,
       LogicalAccumulator<LogicalReduction::Any>{x}, "ANY");
 }
-void RTNAME(AnyDim)(Descriptor &result, const Descriptor &x, int dim,
+void RTDEF(AnyDim)(Descriptor &result, const Descriptor &x, int dim,
     const char *source, int line) {
   Terminator terminator{source, line};
   DoReduceLogicalDimension<LogicalReduction::Any>(
       result, x, dim, terminator, "ANY");
 }
 
-std::int64_t RTNAME(Count)(
+std::int64_t RTDEF(Count)(
     const Descriptor &x, const char *source, int line, int dim) {
   return GetTotalLogicalReduction(
       x, source, line, dim, CountAccumulator{x}, "COUNT");
 }
 
-void RTNAME(CountDim)(Descriptor &result, const Descriptor &x, int dim,
-    int kind, const char *source, int line) {
+void RTDEF(CountDim)(Descriptor &result, const Descriptor &x, int dim, int kind,
+    const char *source, int line) {
   Terminator terminator{source, line};
   ApplyIntegerKind<CountDimension, void>(
       kind, terminator, result, x, dim, terminator);
 }
 
-bool RTNAME(Parity)(
-    const Descriptor &x, const char *source, int line, int dim) {
+bool RTDEF(Parity)(const Descriptor &x, const char *source, int line, int dim) {
   return GetTotalLogicalReduction(x, source, line, dim,
       LogicalAccumulator<LogicalReduction::Parity>{x}, "PARITY");
 }
-void RTNAME(ParityDim)(Descriptor &result, const Descriptor &x, int dim,
+void RTDEF(ParityDim)(Descriptor &result, const Descriptor &x, int dim,
     const char *source, int line) {
   Terminator terminator{source, line};
   DoReduceLogicalDimension<LogicalReduction::Parity>(
diff --git a/flang/runtime/sum.cpp b/flang/runtime/sum.cpp
index c3c14829638462..5d025a4b33d6f7 100644
--- a/flang/runtime/sum.cpp
+++ b/flang/runtime/sum.cpp
@@ -23,12 +23,15 @@ namespace Fortran::runtime {
 
 template <typename INTERMEDIATE> class IntegerSumAccumulator {
 public:
-  explicit IntegerSumAccumulator(const Descriptor &array) : array_{array} {}
-  void Reinitialize() { sum_ = 0; }
-  template <typename A> void GetResult(A *p, int /*zeroBasedDim*/ = -1) const {
+  explicit RT_API_ATTRS IntegerSumAccumulator(const Descriptor &array)
+      : array_{array} {}
+  void RT_API_ATTRS Reinitialize() { sum_ = 0; }
+  template <typename A>
+  RT_API_ATTRS void GetResult(A *p, int /*zeroBasedDim*/ = -1) const {
     *p = static_cast<A>(sum_);
   }
-  template <typename A> bool AccumulateAt(const SubscriptValue at[]) {
+  template <typename A>
+  RT_API_ATTRS bool AccumulateAt(const SubscriptValue at[]) {
     sum_ += *array_.Element<A>(at);
     return true;
   }
@@ -40,13 +43,15 @@ template <typename INTERMEDIATE> class IntegerSumAccumulator {
 
 template <typename INTERMEDIATE> class RealSumAccumulator {
 public:
-  explicit RealSumAccumulator(const Descriptor &array) : array_{array} {}
-  void Reinitialize() { sum_ = correction_ = 0; }
-  template <typename A> A Result() const { return sum_; }
-  template <typename A> void GetResult(A *p, int /*zeroBasedDim*/ = -1) const {
+  explicit RT_API_ATTRS RealSumAccumulator(const Descriptor &array)
+      : array_{array} {}
+  void RT_API_ATTRS Reinitialize() { sum_ = correction_ = 0; }
+  template <typename A> RT_API_ATTRS A Result() const { return sum_; }
+  template <typename A>
+  RT_API_ATTRS void GetResult(A *p, int /*zeroBasedDim*/ = -1) const {
     *p = Result<A>();
   }
-  template <typename A> bool Accumulate(A x) {
+  template <typename A> RT_API_ATTRS bool Accumulate(A x) {
     // Kahan summation
     auto next{x + correction_};
     auto oldSum{sum_};
@@ -54,7 +59,8 @@ template <typename INTERMEDIATE> class RealSumAccumulator {
     correction_ = (sum_ - oldSum) - next; // algebraically zero
     return true;
   }
-  template <typename A> bool AccumulateAt(const SubscriptValue at[]) {
+  template <typename A>
+  RT_API_ATTRS bool AccumulateAt(const SubscriptValue at[]) {
     return Accumulate(*array_.Element<A>(at));
   }
 
@@ -65,22 +71,25 @@ template <typename INTERMEDIATE> class RealSumAccumulator {
 
 template <typename PART> class ComplexSumAccumulator {
 public:
-  explicit ComplexSumAccumulator(const Descriptor &array) : array_{array} {}
-  void Reinitialize() {
+  explicit RT_API_ATTRS ComplexSumAccumulator(const Descriptor &array)
+      : array_{array} {}
+  void RT_API_ATTRS Reinitialize() {
     reals_.Reinitialize();
     imaginaries_.Reinitialize();
   }
-  template <typename A> void GetResult(A *p, int /*zeroBasedDim*/ = -1) const {
+  template <typename A>
+  RT_API_ATTRS void GetResult(A *p, int /*zeroBasedDim*/ = -1) const {
     using ResultPart = typename A::value_type;
     *p = {reals_.template Result<ResultPart>(),
         imaginaries_.template Result<ResultPart>()};
   }
-  template <typename A> bool Accumulate(const A &z) {
+  template <typename A> RT_API_ATTRS bool Accumulate(const A &z) {
     reals_.Accumulate(z.real());
     imaginaries_.Accumulate(z.imag());
     return true;
   }
-  template <typename A> bool AccumulateAt(const SubscriptValue at[]) {
+  template <typename A>
+  RT_API_ATTRS bool AccumulateAt(const SubscriptValue at[]) {
     return Accumulate(*array_.Element<A>(at));
   }
 
@@ -90,28 +99,28 @@ template <typename PART> class ComplexSumAccumulator {
 };
 
 extern "C" {
-CppTypeFor<TypeCategory::Integer, 1> RTNAME(SumInteger1)(const Descriptor &x,
+CppTypeFor<TypeCategory::Integer, 1> RTDEF(SumInteger1)(const Descriptor &x,
     const char *source, int line, int dim, const Descriptor *mask) {
   return GetTotalReduction<TypeCategory::Integer, 1>(x, source, line, dim, mask,
       IntegerSumAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x}, "SUM");
 }
-CppTypeFor<TypeCategory::Integer, 2> RTNAME(SumInteger2)(const Descriptor &x,
+CppTypeFor<TypeCategory::Integer, 2> RTDEF(SumInteger2)(const Descriptor &x,
     const char *source, int line, int dim, const Descriptor *mask) {
   return GetTotalReduction<TypeCategory::Integer, 2>(x, source, line, dim, mask,
       IntegerSumAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x}, "SUM");
 }
-CppTypeFor<TypeCategory::Integer, 4> RTNAME(SumInteger4)(const Descriptor &x,
+CppTypeFor<TypeCategory::Integer, 4> RTDEF(SumInteger4)(const Descriptor &x,
     const char *source, int line, int dim, const Descriptor *mask) {
   return GetTotalReduction<TypeCategory::Integer, 4>(x, source, line, dim, mask,
       IntegerSumAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x}, "SUM");
 }
-CppTypeFor<TypeCategory::Integer, 8> RTNAME(SumInteger8)(const Descriptor &x,
+CppTypeFor<TypeCategory::Integer, 8> RTDEF(SumInteger8)(const Descriptor &x,
     const char *source, int line, int dim, const Descriptor *mask) {
   return GetTotalReduction<TypeCategory::Integer, 8>(x, source, line, dim, mask,
       IntegerSumAccumulator<CppTypeFor<TypeCategory::Integer, 8>>{x}, "SUM");
 }
 #ifdef __SIZEOF_INT128__
-CppTypeFor<TypeCategory::Integer, 16> RTNAME(SumInteger16)(const Descriptor &x,
+CppTypeFor<TypeCategory::Integer, 16> RTDEF(SumInteger16)(const Descriptor &x,
     const char *source, int line, int dim, const Descriptor *mask) {
   return GetTotalReduction<TypeCategory::Integer, 16>(x, source, line, dim,
       mask, IntegerSumAccumulator<CppTypeFor<TypeCategory::Integer, 16>>{x},
@@ -120,52 +129,52 @@ CppTypeFor<TypeCategory::Integer, 16> RTNAME(SumInteger16)(const Descriptor &x,
 #endif
 
 // TODO: real/complex(2 & 3)
-CppTypeFor<TypeCategory::Real, 4> RTNAME(SumReal4)(const Descriptor &x,
+CppTypeFor<TypeCategory::Real, 4> RTDEF(SumReal4)(const Descriptor &x,
     const char *source, int line, int dim, const Descriptor *mask) {
   return GetTotalReduction<TypeCategory::Real, 4>(
       x, source, line, dim, mask, RealSumAccumulator<double>{x}, "SUM");
 }
-CppTypeFor<TypeCategory::Real, 8> RTNAME(SumReal8)(const Descriptor &x,
+CppTypeFor<TypeCategory::Real, 8> RTDEF(SumReal8)(const Descriptor &x,
     const char *source, int line, int dim, const Descriptor *mask) {
   return GetTotalReduction<TypeCategory::Real, 8>(
       x, source, line, dim, mask, RealSumAccumulator<double>{x}, "SUM");
 }
 #if LDBL_MANT_DIG == 64
-CppTypeFor<TypeCategory::Real, 10> RTNAME(SumReal10)(const Descriptor &x,
+CppTypeFor<TypeCategory::Real, 10> RTDEF(SumReal10)(const Descriptor &x,
     const char *source, int line, int dim, const Descriptor *mask) {
   return GetTotalReduction<TypeCategory::Real, 10>(
       x, source, line, dim, mask, RealSumAccumulator<long double>{x}, "SUM");
 }
 #endif
 #if LDBL_MANT_DIG == 113 || HAS_FLOAT128
-CppTypeFor<TypeCategory::Real, 16> RTNAME(SumReal16)(const Descriptor &x,
+CppTypeFor<TypeCategory::Real, 16> RTDEF(SumReal16)(const Descriptor &x,
     const char *source, int line, int dim, const Descriptor *mask) {
   return GetTotalReduction<TypeCategory::Real, 16>(
       x, source, line, dim, mask, RealSumAccumulator<long double>{x}, "SUM");
 }
 #endif
 
-void RTNAME(CppSumComplex4)(CppTypeFor<TypeCategory::Complex, 4> &result,
+void RTDEF(CppSumComplex4)(CppTypeFor<TypeCategory::Complex, 4> &result,
     const Descriptor &x, const char *source, int line, int dim,
     const Descriptor *mask) {
   result = GetTotalReduction<TypeCategory::Complex, 4>(
       x, source, line, dim, mask, ComplexSumAccumulator<double>{x}, "SUM");
 }
-void RTNAME(CppSumComplex8)(CppTypeFor<TypeCategory::Complex, 8> &result,
+void RTDEF(CppSumComplex8)(CppTypeFor<TypeCategory::Complex, 8> &result,
     const Descriptor &x, const char *source, int line, int dim,
     const Descriptor *mask) {
   result = GetTotalReduction<TypeCategory::Complex, 8>(
       x, source, line, dim, mask, ComplexSumAccumulator<double>{x}, "SUM");
 }
 #if LDBL_MANT_DIG == 64
-void RTNAME(CppSumComplex10)(CppTypeFor<TypeCategory::Complex, 10> &result,
+void RTDEF(CppSumComplex10)(CppTypeFor<TypeCategory::Complex, 10> &result,
     const Descriptor &x, const char *source, int line, int dim,
     const Descriptor *mask) {
   result = GetTotalReduction<TypeCategory::Complex, 10>(
       x, source, line, dim, mask, ComplexSumAccumulator<long double>{x}, "SUM");
 }
 #elif LDBL_MANT_DIG == 113
-void RTNAME(CppSumComplex16)(CppTypeFor<TypeCategory::Complex, 16> &result,
+void RTDEF(CppSumComplex16)(CppTypeFor<TypeCategory::Complex, 16> &result,
     const Descriptor &x, const char *source, int line, int dim,
     const Descriptor *mask) {
   result = GetTotalReduction<TypeCategory::Complex, 16>(
@@ -173,7 +182,7 @@ void RTNAME(CppSumComplex16)(CppTypeFor<TypeCategory::Complex, 16> &result,
 }
 #endif
 
-void RTNAME(SumDim)(Descriptor &result, const Descriptor &x, int dim,
+void RTDEF(SumDim)(Descriptor &result, const Descriptor &x, int dim,
     const char *source, int line, const Descriptor *mask) {
   TypedPartialNumericReduction<IntegerSumAccumulator, RealSumAccumulator,
       ComplexSumAccumulator>(result, x, dim, source, line, mask, "SUM");
diff --git a/flang/runtime/support.cpp b/flang/runtime/support.cpp
index 88a3e79009f6cd..08aa8014f4f733 100644
--- a/flang/runtime/support.cpp
+++ b/flang/runtime/support.cpp
@@ -12,7 +12,7 @@
 namespace Fortran::runtime {
 extern "C" {
 
-bool RTNAME(IsContiguous)(const Descriptor &descriptor) {
+bool RTDEF(IsContiguous)(const Descriptor &descriptor) {
   return descriptor.IsContiguous();
 }
 



More information about the flang-commits mailing list