[flang-commits] [flang] 4bdec58 - [flang][runtime] Enable more code for offload device builds. (#67489)

via flang-commits flang-commits at lists.llvm.org
Wed Sep 27 08:20:22 PDT 2023


Author: Slava Zakharin
Date: 2023-09-27T08:20:17-07:00
New Revision: 4bdec5830bc398cecf6e775cc54d9dd511e6e237

URL: https://github.com/llvm/llvm-project/commit/4bdec5830bc398cecf6e775cc54d9dd511e6e237
DIFF: https://github.com/llvm/llvm-project/commit/4bdec5830bc398cecf6e775cc54d9dd511e6e237.diff

LOG: [flang][runtime] Enable more code for offload device builds. (#67489)

I extended the "closure" of the device code containing the initial
transformational.cpp. The device side of the library should not be
complete at least for some APIs. For example, I tested with C OpenMP
code calling BesselJnX0 with a nullptr descriptor that failed with
a runtime error when executing on a GPU.

I added `--expt-relaxed-constexpr` for NVCC compiler to avoid multiple
warnings about missing `__attribute__((device))` on constexpr methods
coming from C++ header files.

Added: 
    

Modified: 
    flang/include/flang/Runtime/api-attrs.h
    flang/include/flang/Runtime/descriptor.h
    flang/include/flang/Runtime/memory.h
    flang/include/flang/Runtime/type-code.h
    flang/runtime/CMakeLists.txt
    flang/runtime/ISO_Fortran_util.h
    flang/runtime/derived.h
    flang/runtime/descriptor.cpp
    flang/runtime/terminator.cpp
    flang/runtime/terminator.h
    flang/runtime/type-code.cpp
    flang/runtime/type-info.h

Removed: 
    


################################################################################
diff  --git a/flang/include/flang/Runtime/api-attrs.h b/flang/include/flang/Runtime/api-attrs.h
index a866625a7b95ba4..7420472aed670e4 100644
--- a/flang/include/flang/Runtime/api-attrs.h
+++ b/flang/include/flang/Runtime/api-attrs.h
@@ -42,6 +42,18 @@
 #endif
 #endif /* !defined(RT_EXT_API_GROUP_END) */
 
+/*
+ * RT_OFFLOAD_API_GROUP_BEGIN/END pair is placed around definitions
+ * of functions that can be referenced in other modules of Flang
+ * runtime. For OpenMP offload these functions are made "declare target"
+ * making sure they are compiled for the target even though direct
+ * references to them from other "declare target" functions may not
+ * be seen. Host-only functions should not be put in between these
+ * two macros.
+ */
+#define RT_OFFLOAD_API_GROUP_BEGIN RT_EXT_API_GROUP_BEGIN
+#define RT_OFFLOAD_API_GROUP_END RT_EXT_API_GROUP_END
+
 /*
  * RT_VAR_GROUP_BEGIN/END pair is placed around definitions
  * of module scope variables referenced by Flang runtime (directly
@@ -88,4 +100,16 @@
 #endif
 #endif /* !defined(RT_CONST_VAR_ATTRS) */
 
+/*
+ * RT_DEVICE_COMPILATION is defined for any device compilation.
+ * Note that it can only be used reliably with compilers that perform
+ * separate host and device compilations.
+ */
+#if ((defined(__CUDACC__) || defined(__CUDA__)) && defined(__CUDA_ARCH__)) || \
+    (defined(_OPENMP) && (defined(__AMDGCN__) || defined(__NVPTX__)))
+#define RT_DEVICE_COMPILATION 1
+#else
+#undef RT_DEVICE_COMPILATION
+#endif
+
 #endif /* !FORTRAN_RUNTIME_API_ATTRS_H_ */

diff  --git a/flang/include/flang/Runtime/descriptor.h b/flang/include/flang/Runtime/descriptor.h
index 32fb6d646d1bda7..34f497ecf261588 100644
--- a/flang/include/flang/Runtime/descriptor.h
+++ b/flang/include/flang/Runtime/descriptor.h
@@ -181,20 +181,21 @@ class Descriptor {
       ISO::CFI_attribute_t attribute = CFI_attribute_other);
 
   // CUDA_TODO: Clang does not support unique_ptr on device.
-  static OwningPtr<Descriptor> Create(TypeCode t, std::size_t elementBytes,
-      void *p = nullptr, int rank = maxRank,
+  static RT_API_ATTRS OwningPtr<Descriptor> Create(TypeCode t,
+      std::size_t elementBytes, void *p = nullptr, int rank = maxRank,
       const SubscriptValue *extent = nullptr,
       ISO::CFI_attribute_t attribute = CFI_attribute_other,
       int derivedTypeLenParameters = 0);
-  static OwningPtr<Descriptor> Create(TypeCategory, int kind, void *p = nullptr,
-      int rank = maxRank, const SubscriptValue *extent = nullptr,
+  static RT_API_ATTRS OwningPtr<Descriptor> Create(TypeCategory, int kind,
+      void *p = nullptr, int rank = maxRank,
+      const SubscriptValue *extent = nullptr,
       ISO::CFI_attribute_t attribute = CFI_attribute_other);
-  static OwningPtr<Descriptor> Create(int characterKind,
+  static RT_API_ATTRS OwningPtr<Descriptor> Create(int characterKind,
       SubscriptValue characters, void *p = nullptr, int rank = maxRank,
       const SubscriptValue *extent = nullptr,
       ISO::CFI_attribute_t attribute = CFI_attribute_other);
-  static OwningPtr<Descriptor> Create(const typeInfo::DerivedType &dt,
-      void *p = nullptr, int rank = maxRank,
+  static RT_API_ATTRS OwningPtr<Descriptor> Create(
+      const typeInfo::DerivedType &dt, void *p = nullptr, int rank = maxRank,
       const SubscriptValue *extent = nullptr,
       ISO::CFI_attribute_t attribute = CFI_attribute_other);
 

diff  --git a/flang/include/flang/Runtime/memory.h b/flang/include/flang/Runtime/memory.h
index 0afe5250169d0b4..bde056f439a5ee7 100644
--- a/flang/include/flang/Runtime/memory.h
+++ b/flang/include/flang/Runtime/memory.h
@@ -12,19 +12,22 @@
 #ifndef FORTRAN_RUNTIME_MEMORY_H_
 #define FORTRAN_RUNTIME_MEMORY_H_
 
+#include "flang/Runtime/api-attrs.h"
+#include <cassert>
 #include <memory>
+#include <type_traits>
 
 namespace Fortran::runtime {
 
 class Terminator;
 
-[[nodiscard]] void *AllocateMemoryOrCrash(
+[[nodiscard]] RT_API_ATTRS void *AllocateMemoryOrCrash(
     const Terminator &, std::size_t bytes);
 template <typename A> [[nodiscard]] A &AllocateOrCrash(const Terminator &t) {
   return *reinterpret_cast<A *>(AllocateMemoryOrCrash(t, sizeof(A)));
 }
-void FreeMemory(void *);
-template <typename A> void FreeMemory(A *p) {
+RT_API_ATTRS void FreeMemory(void *);
+template <typename A> RT_API_ATTRS void FreeMemory(A *p) {
   FreeMemory(reinterpret_cast<void *>(p));
 }
 template <typename A> void FreeMemoryAndNullify(A *&p) {
@@ -32,11 +35,93 @@ template <typename A> void FreeMemoryAndNullify(A *&p) {
   p = nullptr;
 }
 
-template <typename A> struct OwningPtrDeleter {
-  void operator()(A *p) { FreeMemory(p); }
+// Very basic implementation mimicking std::unique_ptr.
+// It should work for any offload device compiler.
+// It uses a fixed memory deleter based on FreeMemory(),
+// and does not support array objects with runtime length.
+template <typename A> class OwningPtr {
+public:
+  using pointer_type = A *;
+
+  OwningPtr() = default;
+  RT_API_ATTRS explicit OwningPtr(pointer_type p) : ptr_(p) {}
+  RT_API_ATTRS OwningPtr(const OwningPtr &) = delete;
+  RT_API_ATTRS OwningPtr &operator=(const OwningPtr &) = delete;
+  RT_API_ATTRS OwningPtr(OwningPtr &&other) {
+    ptr_ = other.ptr_;
+    other.ptr_ = pointer_type{};
+  }
+  RT_API_ATTRS OwningPtr &operator=(OwningPtr &&other) {
+    if (this != &other) {
+      delete_ptr(ptr_);
+      ptr_ = other.ptr_;
+      other.ptr_ = pointer_type{};
+    }
+    return *this;
+  }
+  constexpr RT_API_ATTRS OwningPtr(std::nullptr_t) : OwningPtr() {}
+
+  // Delete the pointer, if owns one.
+  RT_API_ATTRS ~OwningPtr() {
+    if (ptr_ != pointer_type{}) {
+      delete_ptr(ptr_);
+      ptr_ = pointer_type{};
+    }
+  }
+
+  // Release the ownership.
+  RT_API_ATTRS pointer_type release() {
+    pointer_type p = ptr_;
+    ptr_ = pointer_type{};
+    return p;
+  }
+
+  // Replace the pointer.
+  RT_API_ATTRS void reset(pointer_type p = pointer_type{}) {
+    std::swap(ptr_, p);
+    if (p != pointer_type{}) {
+      // Delete the owned pointer.
+      delete_ptr(p);
+    }
+  }
+
+  // Exchange the pointer with another object.
+  RT_API_ATTRS void swap(OwningPtr &other) { std::swap(ptr_, other.ptr_); }
+
+  // Get the stored pointer.
+  RT_API_ATTRS pointer_type get() const { return ptr_; }
+
+  RT_API_ATTRS explicit operator bool() const {
+    return get() != pointer_type{};
+  }
+
+  RT_API_ATTRS typename std::add_lvalue_reference<A>::type operator*() const {
+    assert(get() != pointer_type{});
+    return *get();
+  }
+
+  RT_API_ATTRS pointer_type operator->() const { return get(); }
+
+private:
+  RT_API_ATTRS void delete_ptr(pointer_type p) { FreeMemory(p); }
+  pointer_type ptr_{};
 };
 
-template <typename A> using OwningPtr = std::unique_ptr<A, OwningPtrDeleter<A>>;
+template <typename X, typename Y>
+inline RT_API_ATTRS bool operator!=(
+    const OwningPtr<X> &x, const OwningPtr<Y> &y) {
+  return x.get() != y.get();
+}
+
+template <typename X>
+inline RT_API_ATTRS bool operator!=(const OwningPtr<X> &x, std::nullptr_t) {
+  return (bool)x;
+}
+
+template <typename X>
+inline RT_API_ATTRS bool operator!=(std::nullptr_t, const OwningPtr<X> &x) {
+  return (bool)x;
+}
 
 template <typename A> class SizedNew {
 public:

diff  --git a/flang/include/flang/Runtime/type-code.h b/flang/include/flang/Runtime/type-code.h
index fb18dba54980f69..3757840cfdef963 100644
--- a/flang/include/flang/Runtime/type-code.h
+++ b/flang/include/flang/Runtime/type-code.h
@@ -26,29 +26,33 @@ class TypeCode {
 
   RT_API_ATTRS int raw() const { return raw_; }
 
-  constexpr bool IsValid() const {
+  constexpr RT_API_ATTRS bool IsValid() const {
     return raw_ >= CFI_type_signed_char && raw_ <= CFI_TYPE_LAST;
   }
-  constexpr bool IsInteger() const {
+  constexpr RT_API_ATTRS bool IsInteger() const {
     return raw_ >= CFI_type_signed_char && raw_ <= CFI_type_ptr
diff _t;
   }
-  constexpr bool IsReal() const {
+  constexpr RT_API_ATTRS bool IsReal() const {
     return raw_ >= CFI_type_half_float && raw_ <= CFI_type_float128;
   }
-  constexpr bool IsComplex() const {
+  constexpr RT_API_ATTRS bool IsComplex() const {
     return raw_ >= CFI_type_half_float_Complex &&
         raw_ <= CFI_type_float128_Complex;
   }
-  constexpr bool IsCharacter() const {
+  constexpr RT_API_ATTRS bool IsCharacter() const {
     return raw_ == CFI_type_char || raw_ == CFI_type_char16_t ||
         raw_ == CFI_type_char32_t;
   }
-  constexpr bool IsLogical() const {
+  constexpr RT_API_ATTRS bool IsLogical() const {
     return raw_ == CFI_type_Bool ||
         (raw_ >= CFI_type_int_least8_t && raw_ <= CFI_type_int_least64_t);
   }
-  constexpr bool IsDerived() const { return raw_ == CFI_type_struct; }
-  constexpr bool IsIntrinsic() const { return IsValid() && !IsDerived(); }
+  constexpr RT_API_ATTRS bool IsDerived() const {
+    return raw_ == CFI_type_struct;
+  }
+  constexpr RT_API_ATTRS bool IsIntrinsic() const {
+    return IsValid() && !IsDerived();
+  }
 
   RT_API_ATTRS std::optional<std::pair<TypeCategory, int>>
   GetCategoryAndKind() const;
@@ -65,7 +69,7 @@ class TypeCode {
       return thisCK && thatCK && *thisCK == *thatCK;
     }
   }
-  bool operator!=(TypeCode that) const { return !(*this == that); }
+  RT_API_ATTRS bool operator!=(TypeCode that) const { return !(*this == that); }
 
 private:
   ISO::CFI_type_t raw_{CFI_type_other};

diff  --git a/flang/runtime/CMakeLists.txt b/flang/runtime/CMakeLists.txt
index 5b23065a32d1699..e7d416749219ef6 100644
--- a/flang/runtime/CMakeLists.txt
+++ b/flang/runtime/CMakeLists.txt
@@ -150,7 +150,10 @@ option(FLANG_EXPERIMENTAL_CUDA_RUNTIME
 
 # List of files that are buildable for all devices.
 set(supported_files
+  descriptor.cpp
+  terminator.cpp
   transformational.cpp
+  type-code.cpp
   )
 
 if (FLANG_EXPERIMENTAL_CUDA_RUNTIME)
@@ -175,6 +178,11 @@ if (FLANG_EXPERIMENTAL_CUDA_RUNTIME)
       -Xclang -fcuda-allow-variadic-functions
       )
   endif()
+  if ("${CMAKE_CUDA_COMPILER_ID}" MATCHES "NVIDIA")
+    set(CUDA_COMPILE_OPTIONS
+      --expt-relaxed-constexpr
+      )
+  endif()
   set_source_files_properties(${supported_files} PROPERTIES COMPILE_OPTIONS
     "${CUDA_COMPILE_OPTIONS}"
     )

diff  --git a/flang/runtime/ISO_Fortran_util.h b/flang/runtime/ISO_Fortran_util.h
index 7d527bfd65789d8..469067600bd9033 100644
--- a/flang/runtime/ISO_Fortran_util.h
+++ b/flang/runtime/ISO_Fortran_util.h
@@ -18,15 +18,15 @@
 #include <cstdlib>
 
 namespace Fortran::ISO {
-static inline constexpr bool IsCharacterType(CFI_type_t ty) {
+static inline constexpr RT_API_ATTRS bool IsCharacterType(CFI_type_t ty) {
   return ty == CFI_type_char || ty == CFI_type_char16_t ||
       ty == CFI_type_char32_t;
 }
-static inline constexpr bool IsAssumedSize(const CFI_cdesc_t *dv) {
+static inline constexpr RT_API_ATTRS bool IsAssumedSize(const CFI_cdesc_t *dv) {
   return dv->rank > 0 && dv->dim[dv->rank - 1].extent == -1;
 }
 
-static inline std::size_t MinElemLen(CFI_type_t type) {
+static inline RT_API_ATTRS std::size_t MinElemLen(CFI_type_t type) {
   auto typeParams{Fortran::runtime::TypeCode{type}.GetCategoryAndKind()};
   if (!typeParams) {
     Fortran::runtime::Terminator terminator{__FILE__, __LINE__};
@@ -38,10 +38,10 @@ static inline std::size_t MinElemLen(CFI_type_t type) {
       typeParams->first, typeParams->second);
 }
 
-static inline int VerifyEstablishParameters(CFI_cdesc_t *descriptor,
-    void *base_addr, CFI_attribute_t attribute, CFI_type_t type,
-    std::size_t elem_len, CFI_rank_t rank, const CFI_index_t extents[],
-    bool external) {
+static inline RT_API_ATTRS int VerifyEstablishParameters(
+    CFI_cdesc_t *descriptor, void *base_addr, CFI_attribute_t attribute,
+    CFI_type_t type, std::size_t elem_len, CFI_rank_t rank,
+    const CFI_index_t extents[], bool external) {
   if (attribute != CFI_attribute_other && attribute != CFI_attribute_pointer &&
       attribute != CFI_attribute_allocatable) {
     return CFI_INVALID_ATTRIBUTE;
@@ -77,9 +77,9 @@ static inline int VerifyEstablishParameters(CFI_cdesc_t *descriptor,
   return CFI_SUCCESS;
 }
 
-static inline void EstablishDescriptor(CFI_cdesc_t *descriptor, void *base_addr,
-    CFI_attribute_t attribute, CFI_type_t type, std::size_t elem_len,
-    CFI_rank_t rank, const CFI_index_t extents[]) {
+static inline RT_API_ATTRS void EstablishDescriptor(CFI_cdesc_t *descriptor,
+    void *base_addr, CFI_attribute_t attribute, CFI_type_t type,
+    std::size_t elem_len, CFI_rank_t rank, const CFI_index_t extents[]) {
   descriptor->base_addr = base_addr;
   descriptor->elem_len = elem_len;
   descriptor->version = CFI_VERSION;

diff  --git a/flang/runtime/derived.h b/flang/runtime/derived.h
index 747a93303e0dbc0..e43ecc34a31d1b0 100644
--- a/flang/runtime/derived.h
+++ b/flang/runtime/derived.h
@@ -11,6 +11,8 @@
 #ifndef FORTRAN_RUNTIME_DERIVED_H_
 #define FORTRAN_RUNTIME_DERIVED_H_
 
+#include "flang/Runtime/api-attrs.h"
+
 namespace Fortran::runtime::typeInfo {
 class DerivedType;
 }
@@ -21,21 +23,21 @@ class Terminator;
 
 // Perform default component initialization, allocate automatic components.
 // Returns a STAT= code (0 when all's well).
-int Initialize(const Descriptor &, const typeInfo::DerivedType &, Terminator &,
-    bool hasStat = false, const Descriptor *errMsg = nullptr);
+RT_API_ATTRS int Initialize(const Descriptor &, const typeInfo::DerivedType &,
+    Terminator &, bool hasStat = false, const Descriptor *errMsg = nullptr);
 
 // Call FINAL subroutines, if any
-void Finalize(
+RT_API_ATTRS void Finalize(
     const Descriptor &, const typeInfo::DerivedType &derived, Terminator *);
 
 // Call FINAL subroutines, deallocate allocatable & automatic components.
 // Does not deallocate the original descriptor.
-void Destroy(const Descriptor &, bool finalize, const typeInfo::DerivedType &,
-    Terminator *);
+RT_API_ATTRS void Destroy(const Descriptor &, bool finalize,
+    const typeInfo::DerivedType &, Terminator *);
 
 // Return true if the passed descriptor is for a derived type
 // entity that has a dynamic (allocatable, automatic) component.
-bool HasDynamicComponent(const Descriptor &);
+RT_API_ATTRS bool HasDynamicComponent(const Descriptor &);
 
 } // namespace Fortran::runtime
 #endif // FORTRAN_RUNTIME_DERIVED_H_

diff  --git a/flang/runtime/descriptor.cpp b/flang/runtime/descriptor.cpp
index ab6460708e9b68f..b36e4e409f04169 100644
--- a/flang/runtime/descriptor.cpp
+++ b/flang/runtime/descriptor.cpp
@@ -20,16 +20,18 @@
 
 namespace Fortran::runtime {
 
-Descriptor::Descriptor(const Descriptor &that) { *this = that; }
+RT_OFFLOAD_API_GROUP_BEGIN
 
-Descriptor &Descriptor::operator=(const Descriptor &that) {
+RT_API_ATTRS Descriptor::Descriptor(const Descriptor &that) { *this = that; }
+
+RT_API_ATTRS Descriptor &Descriptor::operator=(const Descriptor &that) {
   std::memcpy(this, &that, that.SizeInBytes());
   return *this;
 }
 
-void Descriptor::Establish(TypeCode t, std::size_t elementBytes, void *p,
-    int rank, const SubscriptValue *extent, ISO::CFI_attribute_t attribute,
-    bool addendum) {
+RT_API_ATTRS void Descriptor::Establish(TypeCode t, std::size_t elementBytes,
+    void *p, int rank, const SubscriptValue *extent,
+    ISO::CFI_attribute_t attribute, bool addendum) {
   Terminator terminator{__FILE__, __LINE__};
   int cfiStatus{ISO::VerifyEstablishParameters(&raw_, p, attribute, t.raw(),
       elementBytes, rank, extent, /*external=*/false)};
@@ -58,34 +60,35 @@ void Descriptor::Establish(TypeCode t, std::size_t elementBytes, void *p,
 
 namespace {
 template <TypeCategory CAT, int KIND> struct TypeSizeGetter {
-  constexpr std::size_t operator()() const {
+  constexpr RT_API_ATTRS std::size_t operator()() const {
     CppTypeFor<CAT, KIND> arr[2];
     return sizeof arr / 2;
   }
 };
 } // namespace
 
-std::size_t Descriptor::BytesFor(TypeCategory category, int kind) {
+RT_API_ATTRS std::size_t Descriptor::BytesFor(TypeCategory category, int kind) {
   Terminator terminator{__FILE__, __LINE__};
   return ApplyType<TypeSizeGetter, std::size_t>(category, kind, terminator);
 }
 
-void Descriptor::Establish(TypeCategory c, int kind, void *p, int rank,
-    const SubscriptValue *extent, ISO::CFI_attribute_t attribute,
+RT_API_ATTRS void Descriptor::Establish(TypeCategory c, int kind, void *p,
+    int rank, const SubscriptValue *extent, ISO::CFI_attribute_t attribute,
     bool addendum) {
   Establish(TypeCode(c, kind), BytesFor(c, kind), p, rank, extent, attribute,
       addendum);
 }
 
-void Descriptor::Establish(int characterKind, std::size_t characters, void *p,
-    int rank, const SubscriptValue *extent, ISO::CFI_attribute_t attribute,
-    bool addendum) {
+RT_API_ATTRS void Descriptor::Establish(int characterKind,
+    std::size_t characters, void *p, int rank, const SubscriptValue *extent,
+    ISO::CFI_attribute_t attribute, bool addendum) {
   Establish(TypeCode{TypeCategory::Character, characterKind},
       characterKind * characters, p, rank, extent, attribute, addendum);
 }
 
-void Descriptor::Establish(const typeInfo::DerivedType &dt, void *p, int rank,
-    const SubscriptValue *extent, ISO::CFI_attribute_t attribute) {
+RT_API_ATTRS void Descriptor::Establish(const typeInfo::DerivedType &dt,
+    void *p, int rank, const SubscriptValue *extent,
+    ISO::CFI_attribute_t attribute) {
   Establish(TypeCode{TypeCategory::Derived, 0}, dt.sizeInBytes(), p, rank,
       extent, attribute, true);
   DescriptorAddendum *a{Addendum()};
@@ -94,8 +97,8 @@ void Descriptor::Establish(const typeInfo::DerivedType &dt, void *p, int rank,
   new (a) DescriptorAddendum{&dt};
 }
 
-OwningPtr<Descriptor> Descriptor::Create(TypeCode t, std::size_t elementBytes,
-    void *p, int rank, const SubscriptValue *extent,
+RT_API_ATTRS OwningPtr<Descriptor> Descriptor::Create(TypeCode t,
+    std::size_t elementBytes, void *p, int rank, const SubscriptValue *extent,
     ISO::CFI_attribute_t attribute, int derivedTypeLenParameters) {
   std::size_t bytes{SizeInBytes(rank, true, derivedTypeLenParameters)};
   Terminator terminator{__FILE__, __LINE__};
@@ -105,33 +108,34 @@ OwningPtr<Descriptor> Descriptor::Create(TypeCode t, std::size_t elementBytes,
   return OwningPtr<Descriptor>{result};
 }
 
-OwningPtr<Descriptor> Descriptor::Create(TypeCategory c, int kind, void *p,
-    int rank, const SubscriptValue *extent, ISO::CFI_attribute_t attribute) {
+RT_API_ATTRS OwningPtr<Descriptor> Descriptor::Create(TypeCategory c, int kind,
+    void *p, int rank, const SubscriptValue *extent,
+    ISO::CFI_attribute_t attribute) {
   return Create(
       TypeCode(c, kind), BytesFor(c, kind), p, rank, extent, attribute);
 }
 
-OwningPtr<Descriptor> Descriptor::Create(int characterKind,
+RT_API_ATTRS OwningPtr<Descriptor> Descriptor::Create(int characterKind,
     SubscriptValue characters, void *p, int rank, const SubscriptValue *extent,
     ISO::CFI_attribute_t attribute) {
   return Create(TypeCode{TypeCategory::Character, characterKind},
       characterKind * characters, p, rank, extent, attribute);
 }
 
-OwningPtr<Descriptor> Descriptor::Create(const typeInfo::DerivedType &dt,
-    void *p, int rank, const SubscriptValue *extent,
-    ISO::CFI_attribute_t attribute) {
+RT_API_ATTRS OwningPtr<Descriptor> Descriptor::Create(
+    const typeInfo::DerivedType &dt, void *p, int rank,
+    const SubscriptValue *extent, ISO::CFI_attribute_t attribute) {
   return Create(TypeCode{TypeCategory::Derived, 0}, dt.sizeInBytes(), p, rank,
       extent, attribute, dt.LenParameters());
 }
 
-std::size_t Descriptor::SizeInBytes() const {
+RT_API_ATTRS std::size_t Descriptor::SizeInBytes() const {
   const DescriptorAddendum *addendum{Addendum()};
   return sizeof *this - sizeof(Dimension) + raw_.rank * sizeof(Dimension) +
       (addendum ? addendum->SizeInBytes() : 0);
 }
 
-std::size_t Descriptor::Elements() const {
+RT_API_ATTRS std::size_t Descriptor::Elements() const {
   int n{rank()};
   std::size_t elements{1};
   for (int j{0}; j < n; ++j) {
@@ -140,7 +144,7 @@ std::size_t Descriptor::Elements() const {
   return elements;
 }
 
-int Descriptor::Allocate() {
+RT_API_ATTRS int Descriptor::Allocate() {
   std::size_t byteSize{Elements() * ElementBytes()};
   // Zero size allocation is possible in Fortran and the resulting
   // descriptor must be allocated/associated. Since std::malloc(0)
@@ -162,7 +166,7 @@ int Descriptor::Allocate() {
   return 0;
 }
 
-int Descriptor::Destroy(
+RT_API_ATTRS int Descriptor::Destroy(
     bool finalize, bool destroyPointers, Terminator *terminator) {
   if (!destroyPointers && raw_.attribute == CFI_attribute_pointer) {
     return StatOk;
@@ -178,9 +182,9 @@ int Descriptor::Destroy(
   }
 }
 
-int Descriptor::Deallocate() { return ISO::CFI_deallocate(&raw_); }
+RT_API_ATTRS int Descriptor::Deallocate() { return ISO::CFI_deallocate(&raw_); }
 
-bool Descriptor::DecrementSubscripts(
+RT_API_ATTRS bool Descriptor::DecrementSubscripts(
     SubscriptValue *subscript, const int *permutation) const {
   for (int j{raw_.rank - 1}; j >= 0; --j) {
     int k{permutation ? permutation[j] : j};
@@ -193,7 +197,7 @@ bool Descriptor::DecrementSubscripts(
   return false;
 }
 
-std::size_t Descriptor::ZeroBasedElementNumber(
+RT_API_ATTRS std::size_t Descriptor::ZeroBasedElementNumber(
     const SubscriptValue *subscript, const int *permutation) const {
   std::size_t result{0};
   std::size_t coefficient{1};
@@ -206,7 +210,7 @@ std::size_t Descriptor::ZeroBasedElementNumber(
   return result;
 }
 
-bool Descriptor::EstablishPointerSection(const Descriptor &source,
+RT_API_ATTRS bool Descriptor::EstablishPointerSection(const Descriptor &source,
     const SubscriptValue *lower, const SubscriptValue *upper,
     const SubscriptValue *stride) {
   *this = source;
@@ -232,7 +236,7 @@ bool Descriptor::EstablishPointerSection(const Descriptor &source,
   return CFI_section(&raw_, &source.raw_, lower, upper, stride) == CFI_SUCCESS;
 }
 
-void Descriptor::Check() const {
+RT_API_ATTRS void Descriptor::Check() const {
   // TODO
 }
 
@@ -258,7 +262,7 @@ void Descriptor::Dump(FILE *f) const {
   }
 }
 
-DescriptorAddendum &DescriptorAddendum::operator=(
+RT_API_ATTRS DescriptorAddendum &DescriptorAddendum::operator=(
     const DescriptorAddendum &that) {
   derivedType_ = that.derivedType_;
   auto lenParms{that.LenParameters()};
@@ -268,11 +272,11 @@ DescriptorAddendum &DescriptorAddendum::operator=(
   return *this;
 }
 
-std::size_t DescriptorAddendum::SizeInBytes() const {
+RT_API_ATTRS std::size_t DescriptorAddendum::SizeInBytes() const {
   return SizeInBytes(LenParameters());
 }
 
-std::size_t DescriptorAddendum::LenParameters() const {
+RT_API_ATTRS std::size_t DescriptorAddendum::LenParameters() const {
   const auto *type{derivedType()};
   return type ? type->LenParameters() : 0;
 }
@@ -285,4 +289,7 @@ void DescriptorAddendum::Dump(FILE *f) const {
     std::fprintf(f, "  len[%zd] %jd\n", j, static_cast<std::intmax_t>(len_[j]));
   }
 }
+
+RT_OFFLOAD_API_GROUP_END
+
 } // namespace Fortran::runtime

diff  --git a/flang/runtime/terminator.cpp b/flang/runtime/terminator.cpp
index f242ac6f2de2293..bab9edc64fa35b8 100644
--- a/flang/runtime/terminator.cpp
+++ b/flang/runtime/terminator.cpp
@@ -12,26 +12,47 @@
 
 namespace Fortran::runtime {
 
-[[noreturn]] void Terminator::Crash(const char *message, ...) const {
-  va_list ap;
-  va_start(ap, message);
-  CrashArgs(message, ap);
-  va_end(ap);
-}
-
-static void (*crashHandler)(const char *, int, const char *, va_list &){
-    nullptr};
+#if !defined(RT_DEVICE_COMPILATION)
+[[maybe_unused]] static void (*crashHandler)(
+    const char *, int, const char *, va_list &){nullptr};
 
 void Terminator::RegisterCrashHandler(
     void (*handler)(const char *, int, const char *, va_list &)) {
   crashHandler = handler;
 }
 
-[[noreturn]] void Terminator::CrashArgs(
-    const char *message, va_list &ap) const {
+void Terminator::InvokeCrashHandler(const char *message, ...) const {
   if (crashHandler) {
+    va_list ap;
+    va_start(ap, message);
     crashHandler(sourceFileName_, sourceLine_, message, ap);
+    va_end(ap);
+  }
+}
+
+[[noreturn]] void Terminator::CrashArgs(
+    const char *message, va_list &ap) const {
+  CrashHeader();
+  std::vfprintf(stderr, message, ap);
+  va_end(ap);
+  CrashFooter();
+}
+#endif
+
+RT_OFFLOAD_API_GROUP_BEGIN
+
+RT_API_ATTRS void Terminator::CrashHeader() const {
+#if defined(RT_DEVICE_COMPILATION)
+  std::printf("\nfatal Fortran runtime error");
+  if (sourceFileName_) {
+    std::printf("(%s", sourceFileName_);
+    if (sourceLine_) {
+      std::printf(":%d", sourceLine_);
+    }
+    std::printf(")");
   }
+  std::printf(": ");
+#else
   std::fputs("\nfatal Fortran runtime error", stderr);
   if (sourceFileName_) {
     std::fprintf(stderr, "(%s", sourceFileName_);
@@ -41,27 +62,50 @@ void Terminator::RegisterCrashHandler(
     fputc(')', stderr);
   }
   std::fputs(": ", stderr);
-  std::vfprintf(stderr, message, ap);
+#endif
+}
+
+[[noreturn]] RT_API_ATTRS void Terminator::CrashFooter() const {
+#if defined(RT_DEVICE_COMPILATION)
+  std::printf("\n");
+#else
   fputc('\n', stderr);
-  va_end(ap);
+  // FIXME: re-enable the flush along with the IO enabling.
   io::FlushOutputOnCrash(*this);
+#endif
   NotifyOtherImagesOfErrorTermination();
+#if defined(RT_DEVICE_COMPILATION)
+#if defined(__CUDACC__)
+  // NVCC supports __trap().
+  __trap();
+#elif defined(__clang__)
+  // Clang supports __builtin_trap().
+  __builtin_trap();
+#else
+#error "unsupported compiler"
+#endif
+#else
   std::abort();
+#endif
 }
 
-[[noreturn]] void Terminator::CheckFailed(
+[[noreturn]] RT_API_ATTRS void Terminator::CheckFailed(
     const char *predicate, const char *file, int line) const {
   Crash("Internal error: RUNTIME_CHECK(%s) failed at %s(%d)", predicate, file,
       line);
 }
 
-[[noreturn]] void Terminator::CheckFailed(const char *predicate) const {
+[[noreturn]] RT_API_ATTRS void Terminator::CheckFailed(
+    const char *predicate) const {
   Crash("Internal error: RUNTIME_CHECK(%s) failed at %s(%d)", predicate,
       sourceFileName_, sourceLine_);
 }
 
 // TODO: These will be defined in the coarray runtime library
-void NotifyOtherImagesOfNormalEnd() {}
-void NotifyOtherImagesOfFailImageStatement() {}
-void NotifyOtherImagesOfErrorTermination() {}
+RT_API_ATTRS void NotifyOtherImagesOfNormalEnd() {}
+RT_API_ATTRS void NotifyOtherImagesOfFailImageStatement() {}
+RT_API_ATTRS void NotifyOtherImagesOfErrorTermination() {}
+
+RT_OFFLOAD_API_GROUP_END
+
 } // namespace Fortran::runtime

diff  --git a/flang/runtime/terminator.h b/flang/runtime/terminator.h
index 84b4b1d79bf76e2..444c68d109eedf6 100644
--- a/flang/runtime/terminator.h
+++ b/flang/runtime/terminator.h
@@ -13,6 +13,8 @@
 
 #include "flang/Runtime/api-attrs.h"
 #include <cstdarg>
+#include <cstdio>
+#include <cstdlib>
 
 namespace Fortran::runtime {
 
@@ -20,26 +22,70 @@ namespace Fortran::runtime {
 // for errors detected in the runtime library
 class Terminator {
 public:
-  Terminator() {}
+  RT_API_ATTRS Terminator() {}
   Terminator(const Terminator &) = default;
   explicit RT_API_ATTRS Terminator(
       const char *sourceFileName, int sourceLine = 0)
       : sourceFileName_{sourceFileName}, sourceLine_{sourceLine} {}
 
-  const char *sourceFileName() const { return sourceFileName_; }
-  int sourceLine() const { return sourceLine_; }
+  RT_API_ATTRS const char *sourceFileName() const { return sourceFileName_; }
+  RT_API_ATTRS int sourceLine() const { return sourceLine_; }
 
-  void SetLocation(const char *sourceFileName = nullptr, int sourceLine = 0) {
+  RT_API_ATTRS void SetLocation(
+      const char *sourceFileName = nullptr, int sourceLine = 0) {
     sourceFileName_ = sourceFileName;
     sourceLine_ = sourceLine;
   }
 
-  // CUDA_TODO: Clang for CUDA does not support varargs, though
-  // it compiles it with -fcuda-allow-variadic-functions.
-  // We can try to replace varargs functions with variadic templates.
-  [[noreturn]] RT_API_ATTRS void Crash(const char *message, ...) const;
-  [[noreturn]] RT_API_ATTRS void CrashArgs(
-      const char *message, va_list &) const;
+  // Silence compiler warnings about the format string being
+  // non-literal. A more precise control would be
+  // __attribute__((format_arg(2))), but it requires the function
+  // to return 'char *', which does not work well with noreturn.
+#if defined(__clang__)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wformat-security"
+#elif defined(__GNUC__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wformat-security"
+#endif
+
+  // Device offload compilers do not normally support varargs and va_list,
+  // so use C++ variadic templates to forward the crash arguments
+  // to regular printf for the device compilation.
+  // Try to keep the inline implementations as small as possible.
+  template <typename... Args>
+  [[noreturn]] RT_API_ATTRS const char *Crash(
+      const char *message, Args... args) const {
+#if !defined(RT_DEVICE_COMPILATION)
+    // Invoke handler set up by the test harness.
+    InvokeCrashHandler(message, args...);
+#endif
+    CrashHeader();
+    PrintCrashArgs(message, args...);
+    CrashFooter();
+  }
+
+  template <typename... Args>
+  RT_API_ATTRS void PrintCrashArgs(const char *message, Args... args) const {
+#if RT_DEVICE_COMPILATION
+    std::printf(message, args...);
+#else
+    std::fprintf(stderr, message, args...);
+#endif
+  }
+
+#if defined(__clang__)
+#pragma clang diagnostic pop
+#elif defined(__GNUC__)
+#pragma GCC diagnostic pop
+#endif
+
+  RT_API_ATTRS void CrashHeader() const;
+  [[noreturn]] RT_API_ATTRS void CrashFooter() const;
+#if !defined(RT_DEVICE_COMPILATION)
+  void InvokeCrashHandler(const char *message, ...) const;
+  [[noreturn]] void CrashArgs(const char *message, va_list &) const;
+#endif
   [[noreturn]] RT_API_ATTRS void CheckFailed(
       const char *predicate, const char *file, int line) const;
   [[noreturn]] RT_API_ATTRS void CheckFailed(const char *predicate) const;
@@ -66,13 +112,13 @@ class Terminator {
   else \
     Terminator{__FILE__, __LINE__}.CheckFailed(#pred)
 
-void NotifyOtherImagesOfNormalEnd();
-void NotifyOtherImagesOfFailImageStatement();
-void NotifyOtherImagesOfErrorTermination();
+RT_API_ATTRS void NotifyOtherImagesOfNormalEnd();
+RT_API_ATTRS void NotifyOtherImagesOfFailImageStatement();
+RT_API_ATTRS void NotifyOtherImagesOfErrorTermination();
 } // namespace Fortran::runtime
 
 namespace Fortran::runtime::io {
-void FlushOutputOnCrash(const Terminator &);
+RT_API_ATTRS void FlushOutputOnCrash(const Terminator &);
 }
 
 #endif // FORTRAN_RUNTIME_TERMINATOR_H_

diff  --git a/flang/runtime/type-code.cpp b/flang/runtime/type-code.cpp
index b9ce519dc14941b..b9ef307835dfba3 100644
--- a/flang/runtime/type-code.cpp
+++ b/flang/runtime/type-code.cpp
@@ -10,7 +10,9 @@
 
 namespace Fortran::runtime {
 
-TypeCode::TypeCode(TypeCategory f, int kind) {
+RT_OFFLOAD_API_GROUP_BEGIN
+
+RT_API_ATTRS TypeCode::TypeCode(TypeCategory f, int kind) {
   switch (f) {
   case TypeCategory::Integer:
     switch (kind) {
@@ -110,7 +112,7 @@ TypeCode::TypeCode(TypeCategory f, int kind) {
   }
 }
 
-std::optional<std::pair<TypeCategory, int>>
+RT_API_ATTRS std::optional<std::pair<TypeCategory, int>>
 TypeCode::GetCategoryAndKind() const {
   switch (raw_) {
   case CFI_type_signed_char:
@@ -205,4 +207,7 @@ TypeCode::GetCategoryAndKind() const {
     return std::nullopt;
   }
 }
+
+RT_OFFLOAD_API_GROUP_END
+
 } // namespace Fortran::runtime

diff  --git a/flang/runtime/type-info.h b/flang/runtime/type-info.h
index 1f6c56742b6f7c2..bd8112d9d6d8c06 100644
--- a/flang/runtime/type-info.h
+++ b/flang/runtime/type-info.h
@@ -38,8 +38,9 @@ class Value {
     Explicit = 2,
     LenParameter = 3
   };
-  Genre genre() const { return genre_; }
-  std::optional<TypeParameterValue> GetValue(const Descriptor *) const;
+  RT_API_ATTRS Genre genre() const { return genre_; }
+  RT_API_ATTRS std::optional<TypeParameterValue> GetValue(
+      const Descriptor *) const;
 
 private:
   Genre genre_{Genre::Explicit};
@@ -57,39 +58,42 @@ class Component {
     Automatic = 4
   };
 
-  const Descriptor &name() const { return name_.descriptor(); }
-  Genre genre() const { return genre_; }
-  TypeCategory category() const { return static_cast<TypeCategory>(category_); }
-  int kind() const { return kind_; }
-  int rank() const { return rank_; }
-  std::uint64_t offset() const { return offset_; }
-  const Value &characterLen() const { return characterLen_; }
-  const DerivedType *derivedType() const {
+  const RT_API_ATTRS Descriptor &name() const { return name_.descriptor(); }
+  RT_API_ATTRS Genre genre() const { return genre_; }
+  RT_API_ATTRS TypeCategory category() const {
+    return static_cast<TypeCategory>(category_);
+  }
+  RT_API_ATTRS int kind() const { return kind_; }
+  RT_API_ATTRS int rank() const { return rank_; }
+  RT_API_ATTRS std::uint64_t offset() const { return offset_; }
+  const RT_API_ATTRS Value &characterLen() const { return characterLen_; }
+  const RT_API_ATTRS DerivedType *derivedType() const {
     return derivedType_.descriptor().OffsetElement<const DerivedType>();
   }
-  const Value *lenValue() const {
+  const RT_API_ATTRS Value *lenValue() const {
     return lenValue_.descriptor().OffsetElement<const Value>();
   }
-  const Value *bounds() const {
+  const RT_API_ATTRS Value *bounds() const {
     return bounds_.descriptor().OffsetElement<const Value>();
   }
-  const char *initialization() const { return initialization_; }
+  const RT_API_ATTRS char *initialization() const { return initialization_; }
 
-  std::size_t GetElementByteSize(const Descriptor &) const;
-  std::size_t GetElements(const Descriptor &) const;
+  RT_API_ATTRS std::size_t GetElementByteSize(const Descriptor &) const;
+  RT_API_ATTRS std::size_t GetElements(const Descriptor &) const;
 
   // For components that are descriptors, returns size of descriptor;
   // for Genre::Data, returns elemental byte size times element count.
-  std::size_t SizeInBytes(const Descriptor &) const;
+  RT_API_ATTRS std::size_t SizeInBytes(const Descriptor &) const;
 
   // Establishes a descriptor from this component description.
-  void EstablishDescriptor(
+  RT_API_ATTRS void EstablishDescriptor(
       Descriptor &, const Descriptor &container, Terminator &) const;
 
   // Creates a pointer descriptor from this component description, possibly
   // with subscripts
-  void CreatePointerDescriptor(Descriptor &, const Descriptor &container,
-      Terminator &, const SubscriptValue * = nullptr) const;
+  RT_API_ATTRS void CreatePointerDescriptor(Descriptor &,
+      const Descriptor &container, Terminator &,
+      const SubscriptValue * = nullptr) const;
 
   FILE *Dump(FILE * = stdout) const;
 
@@ -135,25 +139,26 @@ class SpecialBinding {
 
   // Special bindings can be created during execution to handle defined
   // I/O procedures that are not type-bound.
-  SpecialBinding(Which which, ProcedurePointer proc, std::uint8_t isArgDescSet,
-      std::uint8_t isTypeBound, std::uint8_t isArgContiguousSet)
+  RT_API_ATTRS SpecialBinding(Which which, ProcedurePointer proc,
+      std::uint8_t isArgDescSet, std::uint8_t isTypeBound,
+      std::uint8_t isArgContiguousSet)
       : which_{which}, isArgDescriptorSet_{isArgDescSet},
         isTypeBound_{isTypeBound}, isArgContiguousSet_{isArgContiguousSet},
         proc_{proc} {}
 
-  static constexpr Which RankFinal(int rank) {
+  static constexpr RT_API_ATTRS Which RankFinal(int rank) {
     return static_cast<Which>(static_cast<int>(Which::ScalarFinal) + rank);
   }
 
-  Which which() const { return which_; }
-  bool IsArgDescriptor(int zeroBasedArg) const {
+  RT_API_ATTRS Which which() const { return which_; }
+  RT_API_ATTRS bool IsArgDescriptor(int zeroBasedArg) const {
     return (isArgDescriptorSet_ >> zeroBasedArg) & 1;
   }
-  bool isTypeBound() const { return isTypeBound_; }
-  bool IsArgContiguous(int zeroBasedArg) const {
+  RT_API_ATTRS bool isTypeBound() const { return isTypeBound_; }
+  RT_API_ATTRS bool IsArgContiguous(int zeroBasedArg) const {
     return (isArgContiguousSet_ >> zeroBasedArg) & 1;
   }
-  template <typename PROC> PROC GetProc() const {
+  template <typename PROC> RT_API_ATTRS PROC GetProc() const {
     return reinterpret_cast<PROC>(proc_);
   }
 
@@ -200,36 +205,51 @@ class DerivedType {
 public:
   ~DerivedType(); // never defined
 
-  const Descriptor &binding() const { return binding_.descriptor(); }
-  const Descriptor &name() const { return name_.descriptor(); }
-  std::uint64_t sizeInBytes() const { return sizeInBytes_; }
-  const Descriptor &uninstatiated() const {
+  const RT_API_ATTRS Descriptor &binding() const {
+    return binding_.descriptor();
+  }
+  const RT_API_ATTRS Descriptor &name() const { return name_.descriptor(); }
+  RT_API_ATTRS std::uint64_t sizeInBytes() const { return sizeInBytes_; }
+  const RT_API_ATTRS Descriptor &uninstatiated() const {
     return uninstantiated_.descriptor();
   }
-  const Descriptor &kindParameter() const {
+  const RT_API_ATTRS Descriptor &kindParameter() const {
     return kindParameter_.descriptor();
   }
-  const Descriptor &lenParameterKind() const {
+  const RT_API_ATTRS Descriptor &lenParameterKind() const {
     return lenParameterKind_.descriptor();
   }
-  const Descriptor &component() const { return component_.descriptor(); }
-  const Descriptor &procPtr() const { return procPtr_.descriptor(); }
-  const Descriptor &special() const { return special_.descriptor(); }
-  bool hasParent() const { return hasParent_; }
-  bool noInitializationNeeded() const { return noInitializationNeeded_; }
-  bool noDestructionNeeded() const { return noDestructionNeeded_; }
-  bool noFinalizationNeeded() const { return noFinalizationNeeded_; }
+  const RT_API_ATTRS Descriptor &component() const {
+    return component_.descriptor();
+  }
+  const RT_API_ATTRS Descriptor &procPtr() const {
+    return procPtr_.descriptor();
+  }
+  const RT_API_ATTRS Descriptor &special() const {
+    return special_.descriptor();
+  }
+  RT_API_ATTRS bool hasParent() const { return hasParent_; }
+  RT_API_ATTRS bool noInitializationNeeded() const {
+    return noInitializationNeeded_;
+  }
+  RT_API_ATTRS bool noDestructionNeeded() const { return noDestructionNeeded_; }
+  RT_API_ATTRS bool noFinalizationNeeded() const {
+    return noFinalizationNeeded_;
+  }
 
-  std::size_t LenParameters() const { return lenParameterKind().Elements(); }
+  RT_API_ATTRS std::size_t LenParameters() const {
+    return lenParameterKind().Elements();
+  }
 
-  const DerivedType *GetParentType() const;
+  const RT_API_ATTRS DerivedType *GetParentType() const;
 
   // Finds a data component by name in this derived type or its ancestors.
-  const Component *FindDataComponent(
+  const RT_API_ATTRS Component *FindDataComponent(
       const char *name, std::size_t nameLen) const;
 
   // O(1) look-up of special procedure bindings
-  const SpecialBinding *FindSpecialBinding(SpecialBinding::Which which) const {
+  const RT_API_ATTRS SpecialBinding *FindSpecialBinding(
+      SpecialBinding::Which which) const {
     auto bitIndex{static_cast<std::uint32_t>(which)};
     auto bit{std::uint32_t{1} << bitIndex};
     if (specialBitSet_ & bit) {


        


More information about the flang-commits mailing list