[flang-commits] [flang] [flang][runtime] Added Assign runtime to CUDA build closure. (PR #68171)

Slava Zakharin via flang-commits flang-commits at lists.llvm.org
Tue Oct 3 21:25:29 PDT 2023


https://github.com/vzakhari updated https://github.com/llvm/llvm-project/pull/68171

>From 1a4921efd775cb19aa1a9e8d332de737fe4bb0e1 Mon Sep 17 00:00:00 2001
From: Slava Zakharin <szakharin at nvidia.com>
Date: Fri, 29 Sep 2023 12:05:05 -0700
Subject: [PATCH 1/2] [flang][runtime] Added Assign runtime to CUDA build
 closure.

---
 flang/include/flang/ISO_Fortran_binding.h | 15 ++--
 flang/include/flang/Runtime/api-attrs.h   |  9 +++
 flang/include/flang/Runtime/assign.h      | 10 +--
 flang/include/flang/Runtime/descriptor.h  |  2 +
 flang/runtime/CMakeLists.txt              |  6 ++
 flang/runtime/ISO_Fortran_binding.cpp     | 23 +++---
 flang/runtime/assign-impl.h               |  3 +-
 flang/runtime/assign.cpp                  | 63 +++++++++-------
 flang/runtime/derived.cpp                 | 18 +++--
 flang/runtime/freestanding-tools.h        | 89 +++++++++++++++++++++--
 flang/runtime/stat.cpp                    | 13 +++-
 flang/runtime/stat.h                      |  9 ++-
 flang/runtime/tools.cpp                   | 31 ++++----
 flang/runtime/tools.h                     | 36 ++++-----
 flang/runtime/type-info.cpp               | 27 ++++---
 15 files changed, 246 insertions(+), 108 deletions(-)

diff --git a/flang/include/flang/ISO_Fortran_binding.h b/flang/include/flang/ISO_Fortran_binding.h
index 59e31462ab5aac7..2893fd46c267d9b 100644
--- a/flang/include/flang/ISO_Fortran_binding.h
+++ b/flang/include/flang/ISO_Fortran_binding.h
@@ -127,8 +127,8 @@ namespace cfi_internal {
 template <typename T> struct FlexibleArray : T {
   RT_API_ATTRS T &operator[](int index) { return *(this + index); }
   const RT_API_ATTRS T &operator[](int index) const { return *(this + index); }
-  operator T *() { return this; }
-  operator const T *() const { return this; }
+  RT_API_ATTRS operator T *() { return this; }
+  RT_API_ATTRS operator const T *() const { return this; }
 };
 } // namespace cfi_internal
 #endif
@@ -182,19 +182,20 @@ template <> struct CdescStorage<0> : public CFI_cdesc_t {};
 #ifdef __cplusplus
 extern "C" {
 #endif
-void *CFI_address(const CFI_cdesc_t *, const CFI_index_t subscripts[]);
-int CFI_allocate(CFI_cdesc_t *, const CFI_index_t lower_bounds[],
+RT_API_ATTRS void *CFI_address(
+    const CFI_cdesc_t *, const CFI_index_t subscripts[]);
+RT_API_ATTRS int CFI_allocate(CFI_cdesc_t *, const CFI_index_t lower_bounds[],
     const CFI_index_t upper_bounds[], size_t elem_len);
 RT_API_ATTRS int CFI_deallocate(CFI_cdesc_t *);
 int CFI_establish(CFI_cdesc_t *, void *base_addr, CFI_attribute_t, CFI_type_t,
     size_t elem_len, CFI_rank_t, const CFI_index_t extents[]);
-int CFI_is_contiguous(const CFI_cdesc_t *);
+RT_API_ATTRS int CFI_is_contiguous(const CFI_cdesc_t *);
 RT_API_ATTRS int CFI_section(CFI_cdesc_t *, const CFI_cdesc_t *source,
     const CFI_index_t lower_bounds[], const CFI_index_t upper_bounds[],
     const CFI_index_t strides[]);
-int CFI_select_part(CFI_cdesc_t *, const CFI_cdesc_t *source,
+RT_API_ATTRS int CFI_select_part(CFI_cdesc_t *, const CFI_cdesc_t *source,
     size_t displacement, size_t elem_len);
-int CFI_setpointer(
+RT_API_ATTRS int CFI_setpointer(
     CFI_cdesc_t *, const CFI_cdesc_t *source, const CFI_index_t lower_bounds[]);
 #ifdef __cplusplus
 } // extern "C"
diff --git a/flang/include/flang/Runtime/api-attrs.h b/flang/include/flang/Runtime/api-attrs.h
index 7420472aed670e4..9835adbfb0f8da5 100644
--- a/flang/include/flang/Runtime/api-attrs.h
+++ b/flang/include/flang/Runtime/api-attrs.h
@@ -54,6 +54,15 @@
 #define RT_OFFLOAD_API_GROUP_BEGIN RT_EXT_API_GROUP_BEGIN
 #define RT_OFFLOAD_API_GROUP_END RT_EXT_API_GROUP_END
 
+/*
+ * RT_OFFLOAD_VAR_GROUP_BEGIN/END pair is placed around definitions
+ * of variables (e.g. globals or static class members) that can be
+ * referenced in functions marked with RT_OFFLOAD_API_GROUP_BEGIN/END.
+ * For OpenMP offload these variables are made "declare target".
+ */
+#define RT_OFFLOAD_VAR_GROUP_BEGIN RT_EXT_API_GROUP_BEGIN
+#define RT_OFFLOAD_VAR_GROUP_END RT_EXT_API_GROUP_END
+
 /*
  * RT_VAR_GROUP_BEGIN/END pair is placed around definitions
  * of module scope variables referenced by Flang runtime (directly
diff --git a/flang/include/flang/Runtime/assign.h b/flang/include/flang/Runtime/assign.h
index 779997dab61868b..b19c02f44c73bce 100644
--- a/flang/include/flang/Runtime/assign.h
+++ b/flang/include/flang/Runtime/assign.h
@@ -30,23 +30,23 @@ class Descriptor;
 
 extern "C" {
 // API for lowering assignment
-void RTNAME(Assign)(Descriptor &to, const Descriptor &from,
+void RTDECL(Assign)(Descriptor &to, const Descriptor &from,
     const char *sourceFile = nullptr, int sourceLine = 0);
 // This variant has no finalization, defined assignment, or allocatable
 // reallocation.
-void RTNAME(AssignTemporary)(Descriptor &to, const Descriptor &from,
+void RTDECL(AssignTemporary)(Descriptor &to, const Descriptor &from,
     const char *sourceFile = nullptr, int sourceLine = 0);
-void RTNAME(CopyOutAssign)(Descriptor &to, const Descriptor &from,
+void RTDECL(CopyOutAssign)(Descriptor &to, const Descriptor &from,
     bool skipToInit, const char *sourceFile = nullptr, int sourceLine = 0);
 // This variant is for assignments to explicit-length CHARACTER left-hand
 // sides that might need to handle truncation or blank-fill, and
 // must maintain the character length even if an allocatable array
 // is reallocated.
-void RTNAME(AssignExplicitLengthCharacter)(Descriptor &to,
+void RTDECL(AssignExplicitLengthCharacter)(Descriptor &to,
     const Descriptor &from, const char *sourceFile = nullptr,
     int sourceLine = 0);
 // This variant is assignments to whole polymorphic allocatables.
-void RTNAME(AssignPolymorphic)(Descriptor &to, const Descriptor &from,
+void RTDECL(AssignPolymorphic)(Descriptor &to, const Descriptor &from,
     const char *sourceFile = nullptr, int sourceLine = 0);
 } // extern "C"
 } // namespace Fortran::runtime
diff --git a/flang/include/flang/Runtime/descriptor.h b/flang/include/flang/Runtime/descriptor.h
index d26139321227fc2..c9a3b1b0310077e 100644
--- a/flang/include/flang/Runtime/descriptor.h
+++ b/flang/include/flang/Runtime/descriptor.h
@@ -427,11 +427,13 @@ static_assert(sizeof(Descriptor) == sizeof(ISO::CFI_cdesc_t));
 template <int MAX_RANK = maxRank, bool ADDENDUM = false, int MAX_LEN_PARMS = 0>
 class alignas(Descriptor) StaticDescriptor {
 public:
+  RT_OFFLOAD_VAR_GROUP_BEGIN
   static constexpr int maxRank{MAX_RANK};
   static constexpr int maxLengthTypeParameters{MAX_LEN_PARMS};
   static constexpr bool hasAddendum{ADDENDUM || MAX_LEN_PARMS > 0};
   static constexpr std::size_t byteSize{
       Descriptor::SizeInBytes(maxRank, hasAddendum, maxLengthTypeParameters)};
+  RT_OFFLOAD_VAR_GROUP_END
 
   RT_API_ATTRS Descriptor &descriptor() {
     return *reinterpret_cast<Descriptor *>(storage_);
diff --git a/flang/runtime/CMakeLists.txt b/flang/runtime/CMakeLists.txt
index e7d416749219ef6..f75daa373705f00 100644
--- a/flang/runtime/CMakeLists.txt
+++ b/flang/runtime/CMakeLists.txt
@@ -150,10 +150,16 @@ option(FLANG_EXPERIMENTAL_CUDA_RUNTIME
 
 # List of files that are buildable for all devices.
 set(supported_files
+  ISO_Fortran_binding.cpp
+  assign.cpp
+  derived.cpp
   descriptor.cpp
+  stat.cpp
   terminator.cpp
+  tools.cpp
   transformational.cpp
   type-code.cpp
+  type-info.cpp
   )
 
 if (FLANG_EXPERIMENTAL_CUDA_RUNTIME)
diff --git a/flang/runtime/ISO_Fortran_binding.cpp b/flang/runtime/ISO_Fortran_binding.cpp
index 45b4d0ae3f569ab..15743be88d1beb0 100644
--- a/flang/runtime/ISO_Fortran_binding.cpp
+++ b/flang/runtime/ISO_Fortran_binding.cpp
@@ -19,7 +19,9 @@
 namespace Fortran::ISO {
 extern "C" {
 
-void *CFI_address(
+RT_EXT_API_GROUP_BEGIN
+
+RT_API_ATTRS void *CFI_address(
     const CFI_cdesc_t *descriptor, const CFI_index_t subscripts[]) {
   char *p{static_cast<char *>(descriptor->base_addr)};
   const CFI_rank_t rank{descriptor->rank};
@@ -30,8 +32,9 @@ void *CFI_address(
   return p;
 }
 
-int CFI_allocate(CFI_cdesc_t *descriptor, const CFI_index_t lower_bounds[],
-    const CFI_index_t upper_bounds[], std::size_t elem_len) {
+RT_API_ATTRS int CFI_allocate(CFI_cdesc_t *descriptor,
+    const CFI_index_t lower_bounds[], const CFI_index_t upper_bounds[],
+    std::size_t elem_len) {
   if (!descriptor) {
     return CFI_INVALID_DESCRIPTOR;
   }
@@ -81,7 +84,7 @@ int CFI_allocate(CFI_cdesc_t *descriptor, const CFI_index_t lower_bounds[],
   return CFI_SUCCESS;
 }
 
-int CFI_deallocate(CFI_cdesc_t *descriptor) {
+RT_API_ATTRS int CFI_deallocate(CFI_cdesc_t *descriptor) {
   if (!descriptor) {
     return CFI_INVALID_DESCRIPTOR;
   }
@@ -101,7 +104,7 @@ int CFI_deallocate(CFI_cdesc_t *descriptor) {
   return CFI_SUCCESS;
 }
 
-int CFI_establish(CFI_cdesc_t *descriptor, void *base_addr,
+RT_API_ATTRS int CFI_establish(CFI_cdesc_t *descriptor, void *base_addr,
     CFI_attribute_t attribute, CFI_type_t type, std::size_t elem_len,
     CFI_rank_t rank, const CFI_index_t extents[]) {
   int cfiStatus{VerifyEstablishParameters(descriptor, base_addr, attribute,
@@ -121,7 +124,7 @@ int CFI_establish(CFI_cdesc_t *descriptor, void *base_addr,
   return CFI_SUCCESS;
 }
 
-int CFI_is_contiguous(const CFI_cdesc_t *descriptor) {
+RT_API_ATTRS int CFI_is_contiguous(const CFI_cdesc_t *descriptor) {
   CFI_index_t bytes = descriptor->elem_len;
   for (int j{0}; j < descriptor->rank; ++j) {
     if (bytes != descriptor->dim[j].sm) {
@@ -132,7 +135,7 @@ int CFI_is_contiguous(const CFI_cdesc_t *descriptor) {
   return 1;
 }
 
-int CFI_section(CFI_cdesc_t *result, const CFI_cdesc_t *source,
+RT_API_ATTRS int CFI_section(CFI_cdesc_t *result, const CFI_cdesc_t *source,
     const CFI_index_t lower_bounds[], const CFI_index_t upper_bounds[],
     const CFI_index_t strides[]) {
   CFI_index_t extent[CFI_MAX_RANK];
@@ -208,7 +211,7 @@ int CFI_section(CFI_cdesc_t *result, const CFI_cdesc_t *source,
   return CFI_SUCCESS;
 }
 
-int CFI_select_part(CFI_cdesc_t *result, const CFI_cdesc_t *source,
+RT_API_ATTRS int CFI_select_part(CFI_cdesc_t *result, const CFI_cdesc_t *source,
     std::size_t displacement, std::size_t elem_len) {
   if (!result || !source) {
     return CFI_INVALID_DESCRIPTOR;
@@ -243,7 +246,7 @@ int CFI_select_part(CFI_cdesc_t *result, const CFI_cdesc_t *source,
   return CFI_SUCCESS;
 }
 
-int CFI_setpointer(CFI_cdesc_t *result, const CFI_cdesc_t *source,
+RT_API_ATTRS int CFI_setpointer(CFI_cdesc_t *result, const CFI_cdesc_t *source,
     const CFI_index_t lower_bounds[]) {
   if (!result) {
     return CFI_INVALID_DESCRIPTOR;
@@ -285,5 +288,7 @@ int CFI_setpointer(CFI_cdesc_t *result, const CFI_cdesc_t *source,
   }
   return CFI_SUCCESS;
 }
+
+RT_EXT_API_GROUP_END
 } // extern "C"
 } // namespace Fortran::ISO
diff --git a/flang/runtime/assign-impl.h b/flang/runtime/assign-impl.h
index 0cc3aab432fc25c..f07a501d1d12632 100644
--- a/flang/runtime/assign-impl.h
+++ b/flang/runtime/assign-impl.h
@@ -17,7 +17,8 @@ class Terminator;
 // Note that if allocate object and source expression have the same rank, the
 // value of the allocate object becomes the value provided; otherwise the value
 // of each element of allocate object becomes the value provided (9.7.1.2(7)).
-void DoFromSourceAssign(Descriptor &, const Descriptor &, Terminator &);
+RT_API_ATTRS void DoFromSourceAssign(
+    Descriptor &, const Descriptor &, Terminator &);
 
 } // namespace Fortran::runtime
 #endif // FORTRAN_RUNTIME_ASSIGN_IMPL_H_
diff --git a/flang/runtime/assign.cpp b/flang/runtime/assign.cpp
index 3a7ade9421ccf29..458a1ba006b690e 100644
--- a/flang/runtime/assign.cpp
+++ b/flang/runtime/assign.cpp
@@ -30,7 +30,7 @@ enum AssignFlags {
 
 // Predicate: is the left-hand side of an assignment an allocated allocatable
 // that must be deallocated?
-static inline bool MustDeallocateLHS(
+static inline RT_API_ATTRS bool MustDeallocateLHS(
     Descriptor &to, const Descriptor &from, Terminator &terminator, int flags) {
   // Top-level assignments to allocatable variables (*not* components)
   // may first deallocate existing content if there's about to be a
@@ -83,7 +83,7 @@ static inline bool MustDeallocateLHS(
 
 // Utility: allocate the allocatable left-hand side, either because it was
 // originally deallocated or because it required reallocation
-static int AllocateAssignmentLHS(
+static RT_API_ATTRS int AllocateAssignmentLHS(
     Descriptor &to, const Descriptor &from, Terminator &terminator, int flags) {
   to.raw().type = from.raw().type;
   if (!(flags & ExplicitLengthCharacterLHS)) {
@@ -118,7 +118,7 @@ static int AllocateAssignmentLHS(
 }
 
 // least <= 0, most >= 0
-static void MaximalByteOffsetRange(
+static RT_API_ATTRS void MaximalByteOffsetRange(
     const Descriptor &desc, std::int64_t &least, std::int64_t &most) {
   least = most = 0;
   if (desc.ElementBytes() == 0) {
@@ -140,15 +140,15 @@ static void MaximalByteOffsetRange(
   most += desc.ElementBytes() - 1;
 }
 
-static inline bool RangesOverlap(const char *aStart, const char *aEnd,
-    const char *bStart, const char *bEnd) {
+static inline RT_API_ATTRS bool RangesOverlap(const char *aStart,
+    const char *aEnd, const char *bStart, const char *bEnd) {
   return aEnd >= bStart && bEnd >= aStart;
 }
 
 // Predicate: could the left-hand and right-hand sides of the assignment
 // possibly overlap in memory?  Note that the descriptors themeselves
 // are included in the test.
-static bool MayAlias(const Descriptor &x, const Descriptor &y) {
+static RT_API_ATTRS bool MayAlias(const Descriptor &x, const Descriptor &y) {
   const char *xBase{x.OffsetElement()};
   const char *yBase{y.OffsetElement()};
   if (!xBase || !yBase) {
@@ -176,7 +176,7 @@ static bool MayAlias(const Descriptor &x, const Descriptor &y) {
   return true;
 }
 
-static void DoScalarDefinedAssignment(const Descriptor &to,
+static RT_API_ATTRS void DoScalarDefinedAssignment(const Descriptor &to,
     const Descriptor &from, const typeInfo::SpecialBinding &special) {
   bool toIsDesc{special.IsArgDescriptor(0)};
   bool fromIsDesc{special.IsArgDescriptor(1)};
@@ -200,7 +200,7 @@ static void DoScalarDefinedAssignment(const Descriptor &to,
   }
 }
 
-static void DoElementalDefinedAssignment(const Descriptor &to,
+static RT_API_ATTRS void DoElementalDefinedAssignment(const Descriptor &to,
     const Descriptor &from, const typeInfo::DerivedType &derived,
     const typeInfo::SpecialBinding &special) {
   SubscriptValue toAt[maxRank], fromAt[maxRank];
@@ -221,15 +221,16 @@ static void DoElementalDefinedAssignment(const Descriptor &to,
 }
 
 template <typename CHAR>
-static void BlankPadCharacterAssignment(Descriptor &to, const Descriptor &from,
-    SubscriptValue toAt[], SubscriptValue fromAt[], std::size_t elements,
-    std::size_t toElementBytes, std::size_t fromElementBytes) {
+static RT_API_ATTRS void BlankPadCharacterAssignment(Descriptor &to,
+    const Descriptor &from, SubscriptValue toAt[], SubscriptValue fromAt[],
+    std::size_t elements, std::size_t toElementBytes,
+    std::size_t fromElementBytes) {
   std::size_t padding{(toElementBytes - fromElementBytes) / sizeof(CHAR)};
   std::size_t copiedCharacters{fromElementBytes / sizeof(CHAR)};
   for (; elements-- > 0;
        to.IncrementSubscripts(toAt), from.IncrementSubscripts(fromAt)) {
     CHAR *p{to.Element<CHAR>(toAt)};
-    std::memmove(
+    Fortran::runtime::memmove(
         p, from.Element<std::add_const_t<CHAR>>(fromAt), fromElementBytes);
     p += copiedCharacters;
     for (auto n{padding}; n-- > 0;) {
@@ -249,7 +250,7 @@ static void BlankPadCharacterAssignment(Descriptor &to, const Descriptor &from,
 // of elements, but their shape need not to conform (the assignment is done in
 // element sequence order). This facilitates some internal usages, like when
 // dealing with array constructors.
-static void Assign(
+RT_API_ATTRS static void Assign(
     Descriptor &to, const Descriptor &from, Terminator &terminator, int flags) {
   bool mustDeallocateLHS{(flags & DeallocateLHS) ||
       MustDeallocateLHS(to, from, terminator, flags)};
@@ -418,7 +419,7 @@ static void Assign(
           std::size_t componentByteSize{comp.SizeInBytes(to)};
           for (std::size_t j{0}; j < toElements; ++j,
                to.IncrementSubscripts(toAt), from.IncrementSubscripts(fromAt)) {
-            std::memmove(to.Element<char>(toAt) + comp.offset(),
+            Fortran::runtime::memmove(to.Element<char>(toAt) + comp.offset(),
                 from.Element<const char>(fromAt) + comp.offset(),
                 componentByteSize);
           }
@@ -428,7 +429,7 @@ static void Assign(
         std::size_t componentByteSize{comp.SizeInBytes(to)};
         for (std::size_t j{0}; j < toElements; ++j,
              to.IncrementSubscripts(toAt), from.IncrementSubscripts(fromAt)) {
-          std::memmove(to.Element<char>(toAt) + comp.offset(),
+          Fortran::runtime::memmove(to.Element<char>(toAt) + comp.offset(),
               from.Element<const char>(fromAt) + comp.offset(),
               componentByteSize);
         }
@@ -479,14 +480,14 @@ static void Assign(
           *procPtrDesc.ZeroBasedIndexedElement<typeInfo::ProcPtrComponent>(k)};
       for (std::size_t j{0}; j < toElements; ++j, to.IncrementSubscripts(toAt),
            from.IncrementSubscripts(fromAt)) {
-        std::memmove(to.Element<char>(toAt) + procPtr.offset,
+        Fortran::runtime::memmove(to.Element<char>(toAt) + procPtr.offset,
             from.Element<const char>(fromAt) + procPtr.offset,
             sizeof(typeInfo::ProcedurePointer));
       }
     }
   } else { // intrinsic type, intrinsic assignment
     if (isSimpleMemmove()) {
-      std::memmove(to.raw().base_addr, from.raw().base_addr,
+      Fortran::runtime::memmove(to.raw().base_addr, from.raw().base_addr,
           toElements * toElementBytes);
     } else if (toElementBytes > fromElementBytes) { // blank padding
       switch (to.type().raw()) {
@@ -510,8 +511,8 @@ static void Assign(
     } else { // elemental copies, possibly with character truncation
       for (std::size_t n{toElements}; n-- > 0;
            to.IncrementSubscripts(toAt), from.IncrementSubscripts(fromAt)) {
-        std::memmove(to.Element<char>(toAt), from.Element<const char>(fromAt),
-            toElementBytes);
+        Fortran::runtime::memmove(to.Element<char>(toAt),
+            from.Element<const char>(fromAt), toElementBytes);
       }
     }
   }
@@ -523,7 +524,9 @@ static void Assign(
   }
 }
 
-void DoFromSourceAssign(
+RT_OFFLOAD_API_GROUP_BEGIN
+
+RT_API_ATTRS void DoFromSourceAssign(
     Descriptor &alloc, const Descriptor &source, Terminator &terminator) {
   if (alloc.rank() > 0 && source.rank() == 0) {
     // The value of each element of allocate object becomes the value of source.
@@ -542,8 +545,8 @@ void DoFromSourceAssign(
     } else { // intrinsic type
       for (std::size_t n{alloc.Elements()}; n-- > 0;
            alloc.IncrementSubscripts(allocAt)) {
-        std::memmove(alloc.Element<char>(allocAt), source.raw().base_addr,
-            alloc.ElementBytes());
+        Fortran::runtime::memmove(alloc.Element<char>(allocAt),
+            source.raw().base_addr, alloc.ElementBytes());
       }
     }
   } else {
@@ -551,8 +554,12 @@ void DoFromSourceAssign(
   }
 }
 
+RT_OFFLOAD_API_GROUP_END
+
 extern "C" {
-void RTNAME(Assign)(Descriptor &to, const Descriptor &from,
+RT_EXT_API_GROUP_BEGIN
+
+void RTDEF(Assign)(Descriptor &to, const Descriptor &from,
     const char *sourceFile, int sourceLine) {
   Terminator terminator{sourceFile, sourceLine};
   // All top-level defined assignments can be recognized in semantics and
@@ -562,7 +569,7 @@ void RTNAME(Assign)(Descriptor &to, const Descriptor &from,
       MaybeReallocate | NeedFinalization | ComponentCanBeDefinedAssignment);
 }
 
-void RTNAME(AssignTemporary)(Descriptor &to, const Descriptor &from,
+void RTDEF(AssignTemporary)(Descriptor &to, const Descriptor &from,
     const char *sourceFile, int sourceLine) {
   Terminator terminator{sourceFile, sourceLine};
   // Initialize the "to" if it is of derived type that needs initialization.
@@ -591,7 +598,7 @@ void RTNAME(AssignTemporary)(Descriptor &to, const Descriptor &from,
   Assign(to, from, terminator, PolymorphicLHS);
 }
 
-void RTNAME(CopyOutAssign)(Descriptor &to, const Descriptor &from,
+void RTDEF(CopyOutAssign)(Descriptor &to, const Descriptor &from,
     bool skipToInit, const char *sourceFile, int sourceLine) {
   Terminator terminator{sourceFile, sourceLine};
   // Initialize the "to" if it is of derived type that needs initialization.
@@ -613,7 +620,7 @@ void RTNAME(CopyOutAssign)(Descriptor &to, const Descriptor &from,
   Assign(to, from, terminator, NoAssignFlags);
 }
 
-void RTNAME(AssignExplicitLengthCharacter)(Descriptor &to,
+void RTDEF(AssignExplicitLengthCharacter)(Descriptor &to,
     const Descriptor &from, const char *sourceFile, int sourceLine) {
   Terminator terminator{sourceFile, sourceLine};
   Assign(to, from, terminator,
@@ -621,12 +628,14 @@ void RTNAME(AssignExplicitLengthCharacter)(Descriptor &to,
           ExplicitLengthCharacterLHS);
 }
 
-void RTNAME(AssignPolymorphic)(Descriptor &to, const Descriptor &from,
+void RTDEF(AssignPolymorphic)(Descriptor &to, const Descriptor &from,
     const char *sourceFile, int sourceLine) {
   Terminator terminator{sourceFile, sourceLine};
   Assign(to, from, terminator,
       MaybeReallocate | NeedFinalization | ComponentCanBeDefinedAssignment |
           PolymorphicLHS);
 }
+
+RT_EXT_API_GROUP_END
 } // extern "C"
 } // namespace Fortran::runtime
diff --git a/flang/runtime/derived.cpp b/flang/runtime/derived.cpp
index 6e87e010df2edaf..8a0d0ab2bb7836a 100644
--- a/flang/runtime/derived.cpp
+++ b/flang/runtime/derived.cpp
@@ -15,8 +15,11 @@
 
 namespace Fortran::runtime {
 
-int Initialize(const Descriptor &instance, const typeInfo::DerivedType &derived,
-    Terminator &terminator, bool hasStat, const Descriptor *errMsg) {
+RT_OFFLOAD_API_GROUP_BEGIN
+
+RT_API_ATTRS int Initialize(const Descriptor &instance,
+    const typeInfo::DerivedType &derived, Terminator &terminator, bool hasStat,
+    const Descriptor *errMsg) {
   const Descriptor &componentDesc{derived.component()};
   std::size_t elements{instance.Elements()};
   int stat{StatOk};
@@ -114,7 +117,7 @@ int Initialize(const Descriptor &instance, const typeInfo::DerivedType &derived,
   return stat;
 }
 
-static const typeInfo::SpecialBinding *FindFinal(
+static RT_API_ATTRS const typeInfo::SpecialBinding *FindFinal(
     const typeInfo::DerivedType &derived, int rank) {
   if (const auto *ranked{derived.FindSpecialBinding(
           typeInfo::SpecialBinding::RankFinal(rank))}) {
@@ -128,7 +131,7 @@ static const typeInfo::SpecialBinding *FindFinal(
   }
 }
 
-static void CallFinalSubroutine(const Descriptor &descriptor,
+static RT_API_ATTRS void CallFinalSubroutine(const Descriptor &descriptor,
     const typeInfo::DerivedType &derived, Terminator *terminator) {
   if (const auto *special{FindFinal(derived, descriptor.rank())}) {
     if (special->which() == typeInfo::SpecialBinding::Which::ElementalFinal) {
@@ -193,7 +196,7 @@ static void CallFinalSubroutine(const Descriptor &descriptor,
 }
 
 // Fortran 2018 subclause 7.5.6.2
-void Finalize(const Descriptor &descriptor,
+RT_API_ATTRS void Finalize(const Descriptor &descriptor,
     const typeInfo::DerivedType &derived, Terminator *terminator) {
   if (derived.noFinalizationNeeded() || !descriptor.IsAllocated()) {
     return;
@@ -285,7 +288,7 @@ void Finalize(const Descriptor &descriptor,
 // elementwise finalization of non-parent components taking place
 // before parent component finalization, and with all finalization
 // preceding any deallocation.
-void Destroy(const Descriptor &descriptor, bool finalize,
+RT_API_ATTRS void Destroy(const Descriptor &descriptor, bool finalize,
     const typeInfo::DerivedType &derived, Terminator *terminator) {
   if (derived.noDestructionNeeded() || !descriptor.IsAllocated()) {
     return;
@@ -313,7 +316,7 @@ void Destroy(const Descriptor &descriptor, bool finalize,
   }
 }
 
-bool HasDynamicComponent(const Descriptor &descriptor) {
+RT_API_ATTRS bool HasDynamicComponent(const Descriptor &descriptor) {
   if (const DescriptorAddendum * addendum{descriptor.Addendum()}) {
     if (const auto *derived = addendum->derivedType()) {
       const Descriptor &componentDesc{derived->component()};
@@ -331,4 +334,5 @@ bool HasDynamicComponent(const Descriptor &descriptor) {
   return false;
 }
 
+RT_OFFLOAD_API_GROUP_END
 } // namespace Fortran::runtime
diff --git a/flang/runtime/freestanding-tools.h b/flang/runtime/freestanding-tools.h
index 6acfb8a532d30d1..28248f76e882af1 100644
--- a/flang/runtime/freestanding-tools.h
+++ b/flang/runtime/freestanding-tools.h
@@ -10,7 +10,9 @@
 #define FORTRAN_RUNTIME_FREESTANDING_TOOLS_H_
 
 #include "flang/Runtime/api-attrs.h"
+#include "flang/Runtime/c-or-cpp.h"
 #include <algorithm>
+#include <cstring>
 
 // The file defines a set of utilities/classes that might be
 // used to get reduce the dependency on external libraries (e.g. libstdc++).
@@ -20,6 +22,21 @@
 #define STD_FILL_N_UNSUPPORTED 1
 #endif
 
+#if !defined(STD_MEMMOVE_UNSUPPORTED) && \
+    (defined(__CUDACC__) || defined(__CUDA__)) && defined(__CUDA_ARCH__)
+#define STD_MEMMOVE_UNSUPPORTED 1
+#endif
+
+#if !defined(STD_STRLEN_UNSUPPORTED) && \
+    (defined(__CUDACC__) || defined(__CUDA__)) && defined(__CUDA_ARCH__)
+#define STD_STRLEN_UNSUPPORTED 1
+#endif
+
+#if !defined(STD_MEMCMP_UNSUPPORTED) && \
+    (defined(__CUDACC__) || defined(__CUDA__)) && defined(__CUDA_ARCH__)
+#define STD_MEMCMP_UNSUPPORTED 1
+#endif
+
 namespace Fortran::runtime {
 
 #if STD_FILL_N_UNSUPPORTED
@@ -28,16 +45,78 @@ namespace Fortran::runtime {
 template <typename A>
 static inline RT_API_ATTRS void fill_n(
     A *start, std::size_t count, const A &value) {
-#if STD_FILL_N_UNSUPPORTED
-  for (std::size_t j{0}; j < count; ++j)
+  for (std::size_t j{0}; j < count; ++j) {
     start[j] = value;
-#else
-  std::fill_n(start, count, value);
-#endif
+  }
 }
 #else // !STD_FILL_N_UNSUPPORTED
 using std::fill_n;
 #endif // !STD_FILL_N_UNSUPPORTED
 
+#if STD_MEMMOVE_UNSUPPORTED
+// Provides alternative implementation for std::memmove(), if
+// it is not supported.
+static inline RT_API_ATTRS void memmove(
+    void *dest, const void *src, std::size_t count) {
+  char *to{reinterpret_cast<char *>(dest)};
+  const char *from{reinterpret_cast<const char *>(src)};
+
+  if (to == from) {
+    return;
+  }
+  if (to + count <= from || from + count <= to) {
+    std::memcpy(dest, src, count);
+  } else if (to < from) {
+    while (count--) {
+      *to++ = *from++;
+    }
+  } else {
+    to += count;
+    from += count;
+    while (count--) {
+      *--to = *--from;
+    }
+  }
+}
+#else // !STD_MEMMOVE_UNSUPPORTED
+using std::memmove;
+#endif // !STD_MEMMOVE_UNSUPPORTED
+
+#if STD_STRLEN_UNSUPPORTED
+// Provides alternative implementation for std::strlen(), if
+// it is not supported.
+static inline RT_API_ATTRS std::size_t strlen(const char *str) {
+  if (!str) {
+    // Return 0 for nullptr.
+    return 0;
+  }
+  const char *end = str;
+  for (; *end != '\0'; ++end)
+    ;
+  return end - str;
+}
+#else // !STD_STRLEN_UNSUPPORTED
+using std::strlen;
+#endif // !STD_STRLEN_UNSUPPORTED
+
+#if STD_MEMCMP_UNSUPPORTED
+// Provides alternative implementation for std::memcmp(), if
+// it is not supported.
+static inline RT_API_ATTRS int memcmp(
+    const void *RESTRICT lhs, const void *RESTRICT rhs, std::size_t count) {
+  auto m1{reinterpret_cast<const unsigned char *>(lhs)};
+  auto m2{reinterpret_cast<const unsigned char *>(rhs)};
+  for (; count--; ++m1, ++m2) {
+    int diff = *m1 - *m2;
+    if (diff != 0) {
+      return diff;
+    }
+  }
+  return 0;
+}
+#else // !STD_MEMCMP_UNSUPPORTED
+using std::memcmp;
+#endif // !STD_MEMCMP_UNSUPPORTED
+
 } // namespace Fortran::runtime
 #endif // FORTRAN_RUNTIME_FREESTANDING_TOOLS_H_
diff --git a/flang/runtime/stat.cpp b/flang/runtime/stat.cpp
index 63284bbea7f2310..24368fa6a1ae1a7 100644
--- a/flang/runtime/stat.cpp
+++ b/flang/runtime/stat.cpp
@@ -8,10 +8,13 @@
 
 #include "stat.h"
 #include "terminator.h"
+#include "tools.h"
 #include "flang/Runtime/descriptor.h"
 
 namespace Fortran::runtime {
-const char *StatErrorString(int stat) {
+RT_OFFLOAD_API_GROUP_BEGIN
+
+RT_API_ATTRS const char *StatErrorString(int stat) {
   switch (stat) {
   case StatOk:
     return "No error";
@@ -68,14 +71,14 @@ const char *StatErrorString(int stat) {
   }
 }
 
-int ToErrmsg(const Descriptor *errmsg, int stat) {
+RT_API_ATTRS int ToErrmsg(const Descriptor *errmsg, int stat) {
   if (stat != StatOk && errmsg && errmsg->raw().base_addr &&
       errmsg->type() == TypeCode(TypeCategory::Character, 1) &&
       errmsg->rank() == 0) {
     if (const char *msg{StatErrorString(stat)}) {
       char *buffer{errmsg->OffsetElement()};
       std::size_t bufferLength{errmsg->ElementBytes()};
-      std::size_t msgLength{std::strlen(msg)};
+      std::size_t msgLength{Fortran::runtime::strlen(msg)};
       if (msgLength >= bufferLength) {
         std::memcpy(buffer, msg, bufferLength);
       } else {
@@ -87,7 +90,7 @@ int ToErrmsg(const Descriptor *errmsg, int stat) {
   return stat;
 }
 
-int ReturnError(
+RT_API_ATTRS int ReturnError(
     Terminator &terminator, int stat, const Descriptor *errmsg, bool hasStat) {
   if (stat == StatOk || hasStat) {
     return ToErrmsg(errmsg, stat);
@@ -98,4 +101,6 @@ int ReturnError(
   }
   return stat;
 }
+
+RT_OFFLOAD_API_GROUP_END
 } // namespace Fortran::runtime
diff --git a/flang/runtime/stat.h b/flang/runtime/stat.h
index 7ba797c37418692..e2b0658b122c972 100644
--- a/flang/runtime/stat.h
+++ b/flang/runtime/stat.h
@@ -12,6 +12,7 @@
 #ifndef FORTRAN_RUNTIME_STAT_H_
 #define FORTRAN_RUNTIME_STAT_H_
 #include "flang/ISO_Fortran_binding_wrapper.h"
+#include "flang/Runtime/api-attrs.h"
 #include "flang/Runtime/magic-numbers.h"
 namespace Fortran::runtime {
 
@@ -52,9 +53,9 @@ enum Stat {
       FORTRAN_RUNTIME_STAT_MOVE_ALLOC_SAME_ALLOCATABLE,
 };
 
-const char *StatErrorString(int);
-int ToErrmsg(const Descriptor *errmsg, int stat); // returns stat
-int ReturnError(Terminator &, int stat, const Descriptor *errmsg = nullptr,
-    bool hasStat = false);
+RT_API_ATTRS const char *StatErrorString(int);
+RT_API_ATTRS int ToErrmsg(const Descriptor *errmsg, int stat); // returns stat
+RT_API_ATTRS int ReturnError(Terminator &, int stat,
+    const Descriptor *errmsg = nullptr, bool hasStat = false);
 } // namespace Fortran::runtime
 #endif // FORTRAN_RUNTIME_STAT_H
diff --git a/flang/runtime/tools.cpp b/flang/runtime/tools.cpp
index 36cfa456a08234a..a027559d9f4a740 100644
--- a/flang/runtime/tools.cpp
+++ b/flang/runtime/tools.cpp
@@ -15,14 +15,16 @@
 
 namespace Fortran::runtime {
 
-std::size_t TrimTrailingSpaces(const char *s, std::size_t n) {
+RT_OFFLOAD_API_GROUP_BEGIN
+
+RT_API_ATTRS std::size_t TrimTrailingSpaces(const char *s, std::size_t n) {
   while (n > 0 && s[n - 1] == ' ') {
     --n;
   }
   return n;
 }
 
-OwningPtr<char> SaveDefaultCharacter(
+RT_API_ATTRS OwningPtr<char> SaveDefaultCharacter(
     const char *s, std::size_t length, const Terminator &terminator) {
   if (s) {
     auto *p{static_cast<char *>(AllocateMemoryOrCrash(terminator, length + 1))};
@@ -34,7 +36,7 @@ OwningPtr<char> SaveDefaultCharacter(
   }
 }
 
-static bool CaseInsensitiveMatch(
+static RT_API_ATTRS bool CaseInsensitiveMatch(
     const char *value, std::size_t length, const char *possibility) {
   for (; length-- > 0; ++possibility) {
     char ch{*value++};
@@ -57,7 +59,7 @@ static bool CaseInsensitiveMatch(
   return *possibility == '\0';
 }
 
-int IdentifyValue(
+RT_API_ATTRS int IdentifyValue(
     const char *value, std::size_t length, const char *possibilities[]) {
   if (value) {
     for (int j{0}; possibilities[j]; ++j) {
@@ -69,9 +71,9 @@ int IdentifyValue(
   return -1;
 }
 
-void ToFortranDefaultCharacter(
+RT_API_ATTRS void ToFortranDefaultCharacter(
     char *to, std::size_t toLength, const char *from) {
-  std::size_t len{std::strlen(from)};
+  std::size_t len{Fortran::runtime::strlen(from)};
   if (len < toLength) {
     std::memcpy(to, from, len);
     std::memset(to + len, ' ', toLength - len);
@@ -80,7 +82,7 @@ void ToFortranDefaultCharacter(
   }
 }
 
-void CheckConformability(const Descriptor &to, const Descriptor &x,
+RT_API_ATTRS void CheckConformability(const Descriptor &to, const Descriptor &x,
     Terminator &terminator, const char *funcName, const char *toName,
     const char *xName) {
   if (x.rank() == 0) {
@@ -104,14 +106,15 @@ void CheckConformability(const Descriptor &to, const Descriptor &x,
   }
 }
 
-void CheckIntegerKind(Terminator &terminator, int kind, const char *intrinsic) {
+RT_API_ATTRS void CheckIntegerKind(
+    Terminator &terminator, int kind, const char *intrinsic) {
   if (kind < 1 || kind > 16 || (kind & (kind - 1)) != 0) {
     terminator.Crash(
         "not yet implemented: %s: KIND=%d argument", intrinsic, kind);
   }
 }
 
-void ShallowCopyDiscontiguousToDiscontiguous(
+RT_API_ATTRS void ShallowCopyDiscontiguousToDiscontiguous(
     const Descriptor &to, const Descriptor &from) {
   SubscriptValue toAt[maxRank], fromAt[maxRank];
   to.GetLowerBounds(toAt);
@@ -124,7 +127,7 @@ void ShallowCopyDiscontiguousToDiscontiguous(
   }
 }
 
-void ShallowCopyDiscontiguousToContiguous(
+RT_API_ATTRS void ShallowCopyDiscontiguousToContiguous(
     const Descriptor &to, const Descriptor &from) {
   char *toAt{to.OffsetElement()};
   SubscriptValue fromAt[maxRank];
@@ -136,7 +139,7 @@ void ShallowCopyDiscontiguousToContiguous(
   }
 }
 
-void ShallowCopyContiguousToDiscontiguous(
+RT_API_ATTRS void ShallowCopyContiguousToDiscontiguous(
     const Descriptor &to, const Descriptor &from) {
   SubscriptValue toAt[maxRank];
   to.GetLowerBounds(toAt);
@@ -148,7 +151,7 @@ void ShallowCopyContiguousToDiscontiguous(
   }
 }
 
-void ShallowCopy(const Descriptor &to, const Descriptor &from,
+RT_API_ATTRS void ShallowCopy(const Descriptor &to, const Descriptor &from,
     bool toIsContiguous, bool fromIsContiguous) {
   if (toIsContiguous) {
     if (fromIsContiguous) {
@@ -166,7 +169,9 @@ void ShallowCopy(const Descriptor &to, const Descriptor &from,
   }
 }
 
-void ShallowCopy(const Descriptor &to, const Descriptor &from) {
+RT_API_ATTRS void ShallowCopy(const Descriptor &to, const Descriptor &from) {
   ShallowCopy(to, from, to.IsContiguous(), from.IsContiguous());
 }
+
+RT_OFFLOAD_API_GROUP_END
 } // namespace Fortran::runtime
diff --git a/flang/runtime/tools.h b/flang/runtime/tools.h
index 34ee8c56aa962a2..ea659190e143913 100644
--- a/flang/runtime/tools.h
+++ b/flang/runtime/tools.h
@@ -23,20 +23,20 @@ namespace Fortran::runtime {
 
 class Terminator;
 
-std::size_t TrimTrailingSpaces(const char *, std::size_t);
+RT_API_ATTRS std::size_t TrimTrailingSpaces(const char *, std::size_t);
 
-OwningPtr<char> SaveDefaultCharacter(
+RT_API_ATTRS OwningPtr<char> SaveDefaultCharacter(
     const char *, std::size_t, const Terminator &);
 
 // For validating and recognizing default CHARACTER values in a
 // case-insensitive manner.  Returns the zero-based index into the
 // null-terminated array of upper-case possibilities when the value is valid,
 // or -1 when it has no match.
-int IdentifyValue(
+RT_API_ATTRS int IdentifyValue(
     const char *value, std::size_t length, const char *possibilities[]);
 
 // Truncates or pads as necessary
-void ToFortranDefaultCharacter(
+RT_API_ATTRS void ToFortranDefaultCharacter(
     char *to, std::size_t toLength, const char *from);
 
 // Utility for dealing with elemental LOGICAL arguments
@@ -59,8 +59,8 @@ RT_API_ATTRS void CheckConformability(const Descriptor &to, const Descriptor &x,
 
 // Helper to store integer value in result[at].
 template <int KIND> struct StoreIntegerAt {
-  void operator()(const Fortran::runtime::Descriptor &result, std::size_t at,
-      std::int64_t value) const {
+  RT_API_ATTRS void operator()(const Fortran::runtime::Descriptor &result,
+      std::size_t at, std::int64_t value) const {
     *result.ZeroBasedIndexedElement<Fortran::runtime::CppTypeFor<
         Fortran::common::TypeCategory::Integer, KIND>>(at) = value;
   }
@@ -71,7 +71,8 @@ RT_API_ATTRS void CheckIntegerKind(
     Terminator &, int kind, const char *intrinsic);
 
 template <typename TO, typename FROM>
-inline void PutContiguousConverted(TO *to, FROM *from, std::size_t count) {
+inline RT_API_ATTRS void PutContiguousConverted(
+    TO *to, FROM *from, std::size_t count) {
   while (count-- > 0) {
     *to++ = *from++;
   }
@@ -94,7 +95,7 @@ static inline RT_API_ATTRS std::int64_t GetInt64(
 }
 
 template <typename INT>
-inline bool SetInteger(INT &x, int kind, std::int64_t value) {
+inline RT_API_ATTRS bool SetInteger(INT &x, int kind, std::int64_t value) {
   switch (kind) {
   case 1:
     reinterpret_cast<CppTypeFor<TypeCategory::Integer, 1> &>(x) = value;
@@ -300,8 +301,8 @@ inline RT_API_ATTRS RESULT ApplyLogicalKind(
 }
 
 // Calculate result type of (X op Y) for *, //, DOT_PRODUCT, &c.
-std::optional<std::pair<TypeCategory, int>> inline constexpr GetResultType(
-    TypeCategory xCat, int xKind, TypeCategory yCat, int yKind) {
+std::optional<std::pair<TypeCategory, int>> inline constexpr RT_API_ATTRS
+GetResultType(TypeCategory xCat, int xKind, TypeCategory yCat, int yKind) {
   int maxKind{std::max(xKind, yKind)};
   switch (xCat) {
   case TypeCategory::Integer:
@@ -379,7 +380,7 @@ using AccumulationType = CppTypeFor<CAT,
 
 // memchr() for any character type
 template <typename CHAR>
-static inline const CHAR *FindCharacter(
+static inline RT_API_ATTRS const CHAR *FindCharacter(
     const CHAR *data, CHAR ch, std::size_t chars) {
   const CHAR *end{data + chars};
   for (const CHAR *p{data}; p < end; ++p) {
@@ -391,7 +392,8 @@ static inline const CHAR *FindCharacter(
 }
 
 template <>
-inline const char *FindCharacter(const char *data, char ch, std::size_t chars) {
+inline RT_API_ATTRS const char *FindCharacter(
+    const char *data, char ch, std::size_t chars) {
   return reinterpret_cast<const char *>(
       std::memchr(data, static_cast<int>(ch), chars));
 }
@@ -399,15 +401,15 @@ inline const char *FindCharacter(const char *data, char ch, std::size_t chars) {
 // Copy payload data from one allocated descriptor to another.
 // Assumes element counts and element sizes match, and that both
 // descriptors are allocated.
-void ShallowCopyDiscontiguousToDiscontiguous(
+RT_API_ATTRS void ShallowCopyDiscontiguousToDiscontiguous(
     const Descriptor &to, const Descriptor &from);
-void ShallowCopyDiscontiguousToContiguous(
+RT_API_ATTRS void ShallowCopyDiscontiguousToContiguous(
     const Descriptor &to, const Descriptor &from);
-void ShallowCopyContiguousToDiscontiguous(
+RT_API_ATTRS void ShallowCopyContiguousToDiscontiguous(
     const Descriptor &to, const Descriptor &from);
-void ShallowCopy(const Descriptor &to, const Descriptor &from,
+RT_API_ATTRS void ShallowCopy(const Descriptor &to, const Descriptor &from,
     bool toIsContiguous, bool fromIsContiguous);
-void ShallowCopy(const Descriptor &to, const Descriptor &from);
+RT_API_ATTRS void ShallowCopy(const Descriptor &to, const Descriptor &from);
 
 } // namespace Fortran::runtime
 #endif // FORTRAN_RUNTIME_TOOLS_H_
diff --git a/flang/runtime/type-info.cpp b/flang/runtime/type-info.cpp
index 5bd0258cbbf7ff4..baf446e0c79d3c9 100644
--- a/flang/runtime/type-info.cpp
+++ b/flang/runtime/type-info.cpp
@@ -8,11 +8,14 @@
 
 #include "type-info.h"
 #include "terminator.h"
+#include "tools.h"
 #include <cstdio>
 
 namespace Fortran::runtime::typeInfo {
 
-std::optional<TypeParameterValue> Value::GetValue(
+RT_OFFLOAD_API_GROUP_BEGIN
+
+RT_API_ATTRS std::optional<TypeParameterValue> Value::GetValue(
     const Descriptor *descriptor) const {
   switch (genre_) {
   case Genre::Explicit:
@@ -29,7 +32,8 @@ std::optional<TypeParameterValue> Value::GetValue(
   }
 }
 
-std::size_t Component::GetElementByteSize(const Descriptor &instance) const {
+RT_API_ATTRS std::size_t Component::GetElementByteSize(
+    const Descriptor &instance) const {
   switch (category()) {
   case TypeCategory::Integer:
   case TypeCategory::Real:
@@ -51,7 +55,8 @@ std::size_t Component::GetElementByteSize(const Descriptor &instance) const {
   return 0;
 }
 
-std::size_t Component::GetElements(const Descriptor &instance) const {
+RT_API_ATTRS std::size_t Component::GetElements(
+    const Descriptor &instance) const {
   std::size_t elements{1};
   if (int rank{rank_}) {
     if (const Value * boundValues{bounds()}) {
@@ -73,7 +78,8 @@ std::size_t Component::GetElements(const Descriptor &instance) const {
   return elements;
 }
 
-std::size_t Component::SizeInBytes(const Descriptor &instance) const {
+RT_API_ATTRS std::size_t Component::SizeInBytes(
+    const Descriptor &instance) const {
   if (genre() == Genre::Data) {
     return GetElementByteSize(instance) * GetElements(instance);
   } else if (category() == TypeCategory::Derived) {
@@ -85,7 +91,7 @@ std::size_t Component::SizeInBytes(const Descriptor &instance) const {
   }
 }
 
-void Component::EstablishDescriptor(Descriptor &descriptor,
+RT_API_ATTRS void Component::EstablishDescriptor(Descriptor &descriptor,
     const Descriptor &container, Terminator &terminator) const {
   ISO::CFI_attribute_t attribute{static_cast<ISO::CFI_attribute_t>(
       genre_ == Genre::Allocatable   ? CFI_attribute_allocatable
@@ -128,7 +134,7 @@ void Component::EstablishDescriptor(Descriptor &descriptor,
   }
 }
 
-void Component::CreatePointerDescriptor(Descriptor &descriptor,
+RT_API_ATTRS void Component::CreatePointerDescriptor(Descriptor &descriptor,
     const Descriptor &container, Terminator &terminator,
     const SubscriptValue *subscripts) const {
   RUNTIME_CHECK(terminator, genre_ == Genre::Data);
@@ -141,7 +147,7 @@ void Component::CreatePointerDescriptor(Descriptor &descriptor,
   descriptor.raw().attribute = CFI_attribute_pointer;
 }
 
-const DerivedType *DerivedType::GetParentType() const {
+RT_API_ATTRS const DerivedType *DerivedType::GetParentType() const {
   if (hasParent_) {
     const Descriptor &compDesc{component()};
     const Component &component{*compDesc.OffsetElement<const Component>()};
@@ -151,7 +157,7 @@ const DerivedType *DerivedType::GetParentType() const {
   }
 }
 
-const Component *DerivedType::FindDataComponent(
+RT_API_ATTRS const Component *DerivedType::FindDataComponent(
     const char *compName, std::size_t compNameLen) const {
   const Descriptor &compDesc{component()};
   std::size_t n{compDesc.Elements()};
@@ -162,7 +168,8 @@ const Component *DerivedType::FindDataComponent(
     INTERNAL_CHECK(component != nullptr);
     const Descriptor &nameDesc{component->name()};
     if (nameDesc.ElementBytes() == compNameLen &&
-        std::memcmp(compName, nameDesc.OffsetElement(), compNameLen) == 0) {
+        Fortran::runtime::memcmp(
+            compName, nameDesc.OffsetElement(), compNameLen) == 0) {
       return component;
     }
   }
@@ -170,6 +177,8 @@ const Component *DerivedType::FindDataComponent(
   return parent ? parent->FindDataComponent(compName, compNameLen) : nullptr;
 }
 
+RT_OFFLOAD_API_GROUP_END
+
 static void DumpScalarCharacter(
     FILE *f, const Descriptor &desc, const char *what) {
   if (desc.raw().version == CFI_VERSION &&

>From 6fd457698589bfade62378392698b026283c794a Mon Sep 17 00:00:00 2001
From: Slava Zakharin <szakharin at nvidia.com>
Date: Tue, 3 Oct 2023 21:25:00 -0700
Subject: [PATCH 2/2] Added commas.

---
 flang/include/flang/Runtime/api-attrs.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/flang/include/flang/Runtime/api-attrs.h b/flang/include/flang/Runtime/api-attrs.h
index 9835adbfb0f8da5..61da2c06d3a4dc9 100644
--- a/flang/include/flang/Runtime/api-attrs.h
+++ b/flang/include/flang/Runtime/api-attrs.h
@@ -45,7 +45,7 @@
 /*
  * RT_OFFLOAD_API_GROUP_BEGIN/END pair is placed around definitions
  * of functions that can be referenced in other modules of Flang
- * runtime. For OpenMP offload these functions are made "declare target"
+ * runtime. For OpenMP offload, these functions are made "declare target"
  * making sure they are compiled for the target even though direct
  * references to them from other "declare target" functions may not
  * be seen. Host-only functions should not be put in between these
@@ -58,7 +58,7 @@
  * RT_OFFLOAD_VAR_GROUP_BEGIN/END pair is placed around definitions
  * of variables (e.g. globals or static class members) that can be
  * referenced in functions marked with RT_OFFLOAD_API_GROUP_BEGIN/END.
- * For OpenMP offload these variables are made "declare target".
+ * For OpenMP offload, these variables are made "declare target".
  */
 #define RT_OFFLOAD_VAR_GROUP_BEGIN RT_EXT_API_GROUP_BEGIN
 #define RT_OFFLOAD_VAR_GROUP_END RT_EXT_API_GROUP_END



More information about the flang-commits mailing list