[flang-commits] [flang] [flang][runtime] Enable more code for offload device builds. (PR #67489)
Slava Zakharin via flang-commits
flang-commits at lists.llvm.org
Tue Sep 26 14:14:02 PDT 2023
https://github.com/vzakhari created https://github.com/llvm/llvm-project/pull/67489
I extended the "closure" of the device code containing the initial
transformational.cpp. The device side of the library should not be
complete at least for some APIs. For example, I tested with C OpenMP
code calling BesselJnX0 with a nullptr descriptor that failed with
a runtime error when executing on a GPU.
I added `--expt-relaxed-constexpr` for NVCC compiler to avoid multiple
warnings about missing __attribute__((device)) on constexpr methods
coming from C++ header files.
>From cc925ab9cabca65b2e872146c435d33956a5eddc Mon Sep 17 00:00:00 2001
From: Slava Zakharin <szakharin at nvidia.com>
Date: Tue, 26 Sep 2023 13:22:01 -0700
Subject: [PATCH] [flang][runtime] Enable more code for offload device builds.
I extended the "closure" of the device code containing the initial
transformational.cpp. The device side of the library should not be
complete at least for some APIs. For example, I tested with C OpenMP
code calling BesselJnX0 with a nullptr descriptor that failed with
a runtime error when executing on a GPU.
I added `--expt-relaxed-constexpr` for NVCC compiler to avoid multiple
warnings about missing __attribute__((device)) on constexpr methods
coming from C++ header files.
---
flang/include/flang/Runtime/api-attrs.h | 23 +++++
flang/include/flang/Runtime/descriptor.h | 8 +-
flang/include/flang/Runtime/memory.h | 105 +++++++++++++++++++++--
flang/include/flang/Runtime/type-code.h | 18 ++--
flang/runtime/CMakeLists.txt | 8 ++
flang/runtime/ISO_Fortran_util.h | 10 +--
flang/runtime/derived.h | 10 ++-
flang/runtime/descriptor.cpp | 53 ++++++------
flang/runtime/terminator.cpp | 77 +++++++++++++----
flang/runtime/terminator.h | 71 ++++++++++++---
flang/runtime/type-code.cpp | 9 +-
flang/runtime/type-info.h | 84 +++++++++---------
12 files changed, 350 insertions(+), 126 deletions(-)
diff --git a/flang/include/flang/Runtime/api-attrs.h b/flang/include/flang/Runtime/api-attrs.h
index a866625a7b95ba4..0768682cadbdcbb 100644
--- a/flang/include/flang/Runtime/api-attrs.h
+++ b/flang/include/flang/Runtime/api-attrs.h
@@ -42,6 +42,18 @@
#endif
#endif /* !defined(RT_EXT_API_GROUP_END) */
+/*
+ * RT_OFFLOAD_API_GROUP_BEGIN/END pair is placed around definitions
+ * of functions that can be referenced in other modules of Flang
+ * runtime. For OpenMP offload these functions are made "declare target"
+ * making sure they are compiled for the target even though direct
+ * references to them from other "declare target" functions may not
+ * be seen. Host-only functions should not be put in between these
+ * two macros.
+ */
+#define RT_OFFLOAD_API_GROUP_BEGIN RT_EXT_API_GROUP_BEGIN
+#define RT_OFFLOAD_API_GROUP_END RT_EXT_API_GROUP_END
+
/*
* RT_VAR_GROUP_BEGIN/END pair is placed around definitions
* of module scope variables referenced by Flang runtime (directly
@@ -88,4 +100,15 @@
#endif
#endif /* !defined(RT_CONST_VAR_ATTRS) */
+/*
+ * RT_DEVICE_COMPILATION is defined for any device compilation.
+ * Note that it can only be used reliably with compilers that perform
+ * separate host and device compilations.
+ */
+#if ((defined(__CUDACC__) || defined(__CUDA__)) && defined(__CUDA_ARCH__)) || (defined(_OPENMP) && (defined(__AMDGCN__) || defined(__NVPTX__)))
+#define RT_DEVICE_COMPILATION 1
+#else
+#undef RT_DEVICE_COMPILATION
+#endif
+
#endif /* !FORTRAN_RUNTIME_API_ATTRS_H_ */
diff --git a/flang/include/flang/Runtime/descriptor.h b/flang/include/flang/Runtime/descriptor.h
index 62a8d123bf2ee06..09077ec849acee0 100644
--- a/flang/include/flang/Runtime/descriptor.h
+++ b/flang/include/flang/Runtime/descriptor.h
@@ -181,19 +181,19 @@ class Descriptor {
ISO::CFI_attribute_t attribute = CFI_attribute_other);
// CUDA_TODO: Clang does not support unique_ptr on device.
- static OwningPtr<Descriptor> Create(TypeCode t, std::size_t elementBytes,
+ static RT_API_ATTRS OwningPtr<Descriptor> Create(TypeCode t, std::size_t elementBytes,
void *p = nullptr, int rank = maxRank,
const SubscriptValue *extent = nullptr,
ISO::CFI_attribute_t attribute = CFI_attribute_other,
int derivedTypeLenParameters = 0);
- static OwningPtr<Descriptor> Create(TypeCategory, int kind, void *p = nullptr,
+ static RT_API_ATTRS OwningPtr<Descriptor> Create(TypeCategory, int kind, void *p = nullptr,
int rank = maxRank, const SubscriptValue *extent = nullptr,
ISO::CFI_attribute_t attribute = CFI_attribute_other);
- static OwningPtr<Descriptor> Create(int characterKind,
+ static RT_API_ATTRS OwningPtr<Descriptor> Create(int characterKind,
SubscriptValue characters, void *p = nullptr, int rank = maxRank,
const SubscriptValue *extent = nullptr,
ISO::CFI_attribute_t attribute = CFI_attribute_other);
- static OwningPtr<Descriptor> Create(const typeInfo::DerivedType &dt,
+ static RT_API_ATTRS OwningPtr<Descriptor> Create(const typeInfo::DerivedType &dt,
void *p = nullptr, int rank = maxRank,
const SubscriptValue *extent = nullptr,
ISO::CFI_attribute_t attribute = CFI_attribute_other);
diff --git a/flang/include/flang/Runtime/memory.h b/flang/include/flang/Runtime/memory.h
index 0afe5250169d0b4..579ba78a1c93b20 100644
--- a/flang/include/flang/Runtime/memory.h
+++ b/flang/include/flang/Runtime/memory.h
@@ -12,19 +12,22 @@
#ifndef FORTRAN_RUNTIME_MEMORY_H_
#define FORTRAN_RUNTIME_MEMORY_H_
+#include "flang/Runtime/api-attrs.h"
+#include <cassert>
#include <memory>
+#include <type_traits>
namespace Fortran::runtime {
class Terminator;
-[[nodiscard]] void *AllocateMemoryOrCrash(
+[[nodiscard]] RT_API_ATTRS void *AllocateMemoryOrCrash(
const Terminator &, std::size_t bytes);
template <typename A> [[nodiscard]] A &AllocateOrCrash(const Terminator &t) {
return *reinterpret_cast<A *>(AllocateMemoryOrCrash(t, sizeof(A)));
}
-void FreeMemory(void *);
-template <typename A> void FreeMemory(A *p) {
+RT_API_ATTRS void FreeMemory(void *);
+template <typename A> RT_API_ATTRS void FreeMemory(A *p) {
FreeMemory(reinterpret_cast<void *>(p));
}
template <typename A> void FreeMemoryAndNullify(A *&p) {
@@ -32,11 +35,101 @@ template <typename A> void FreeMemoryAndNullify(A *&p) {
p = nullptr;
}
-template <typename A> struct OwningPtrDeleter {
- void operator()(A *p) { FreeMemory(p); }
+// Very basic implementation mimicking std::unique_ptr.
+// It should work for any offload device compiler.
+// It uses a fixed memory deleter based on FreeMemory(),
+// and does not support array objects with runtime length.
+template <typename A>
+class OwningPtr {
+public:
+ using pointer_type = A *;
+
+ OwningPtr() = default;
+ RT_API_ATTRS explicit OwningPtr(pointer_type p) : ptr_(p) {}
+ RT_API_ATTRS OwningPtr(const OwningPtr &) = delete;
+ RT_API_ATTRS OwningPtr& operator=(const OwningPtr &) = delete;
+ RT_API_ATTRS OwningPtr(OwningPtr &&other) {
+ ptr_ = other.ptr_;
+ other.ptr_ = pointer_type();
+ }
+ RT_API_ATTRS OwningPtr &operator=(OwningPtr &&other) {
+ if (this != &other) {
+ delete_ptr(ptr_);
+ ptr_ = other.ptr_;
+ other.ptr_ = pointer_type();
+ }
+ return *this;
+ }
+ constexpr RT_API_ATTRS OwningPtr(std::nullptr_t) : OwningPtr() { }
+
+ // Delete the pointer, if owns one.
+ RT_API_ATTRS ~OwningPtr() {
+ if (ptr_ != pointer_type()) {
+ delete_ptr(ptr_);
+ ptr_ = pointer_type();
+ }
+ }
+
+ // Release the ownership.
+ RT_API_ATTRS pointer_type release() {
+ pointer_type p = ptr_;
+ ptr_ = pointer_type();
+ return p;
+ }
+
+ // Replace the pointer.
+ RT_API_ATTRS void reset(pointer_type p = pointer_type()) {
+ std::swap(ptr_, p);
+ if (p != pointer_type()) {
+ // Delete the owned pointer.
+ delete_ptr(p);
+ }
+ }
+
+ // Exchange the pointer with another object.
+ RT_API_ATTRS void swap(OwningPtr &other) {
+ std::swap(ptr_, other.ptr_);
+ }
+
+ // Get the stored pointer.
+ RT_API_ATTRS pointer_type get() const {
+ return ptr_;
+ }
+
+ RT_API_ATTRS explicit operator bool() const {
+ return get() == pointer_type() ? false : true;
+ }
+
+ RT_API_ATTRS typename std::add_lvalue_reference<A>::type operator*() const {
+ assert(get() != pointer_type());
+ return *get();
+ }
+
+ RT_API_ATTRS pointer_type operator->() const {
+ return get();
+ }
+
+private:
+ RT_API_ATTRS void delete_ptr(pointer_type p) {
+ FreeMemory(p);
+ }
+ pointer_type ptr_{};
};
-template <typename A> using OwningPtr = std::unique_ptr<A, OwningPtrDeleter<A>>;
+template <typename X, typename Y>
+inline RT_API_ATTRS bool operator!=(const OwningPtr<X> &x, const OwningPtr<Y> &y) {
+ return x.get() != y.get();
+}
+
+template <typename X>
+inline RT_API_ATTRS bool operator!=(const OwningPtr<X> &x, std::nullptr_t) {
+ return (bool)x;
+}
+
+template <typename X>
+inline RT_API_ATTRS bool operator!=(std::nullptr_t, const OwningPtr<X> &x) {
+ return (bool)x;
+}
template <typename A> class SizedNew {
public:
diff --git a/flang/include/flang/Runtime/type-code.h b/flang/include/flang/Runtime/type-code.h
index fb18dba54980f69..172355609e26128 100644
--- a/flang/include/flang/Runtime/type-code.h
+++ b/flang/include/flang/Runtime/type-code.h
@@ -26,29 +26,29 @@ class TypeCode {
RT_API_ATTRS int raw() const { return raw_; }
- constexpr bool IsValid() const {
+ constexpr RT_API_ATTRS bool IsValid() const {
return raw_ >= CFI_type_signed_char && raw_ <= CFI_TYPE_LAST;
}
- constexpr bool IsInteger() const {
+ constexpr RT_API_ATTRS bool IsInteger() const {
return raw_ >= CFI_type_signed_char && raw_ <= CFI_type_ptrdiff_t;
}
- constexpr bool IsReal() const {
+ constexpr RT_API_ATTRS bool IsReal() const {
return raw_ >= CFI_type_half_float && raw_ <= CFI_type_float128;
}
- constexpr bool IsComplex() const {
+ constexpr RT_API_ATTRS bool IsComplex() const {
return raw_ >= CFI_type_half_float_Complex &&
raw_ <= CFI_type_float128_Complex;
}
- constexpr bool IsCharacter() const {
+ constexpr RT_API_ATTRS bool IsCharacter() const {
return raw_ == CFI_type_char || raw_ == CFI_type_char16_t ||
raw_ == CFI_type_char32_t;
}
- constexpr bool IsLogical() const {
+ constexpr RT_API_ATTRS bool IsLogical() const {
return raw_ == CFI_type_Bool ||
(raw_ >= CFI_type_int_least8_t && raw_ <= CFI_type_int_least64_t);
}
- constexpr bool IsDerived() const { return raw_ == CFI_type_struct; }
- constexpr bool IsIntrinsic() const { return IsValid() && !IsDerived(); }
+ constexpr RT_API_ATTRS bool IsDerived() const { return raw_ == CFI_type_struct; }
+ constexpr RT_API_ATTRS bool IsIntrinsic() const { return IsValid() && !IsDerived(); }
RT_API_ATTRS std::optional<std::pair<TypeCategory, int>>
GetCategoryAndKind() const;
@@ -65,7 +65,7 @@ class TypeCode {
return thisCK && thatCK && *thisCK == *thatCK;
}
}
- bool operator!=(TypeCode that) const { return !(*this == that); }
+ RT_API_ATTRS bool operator!=(TypeCode that) const { return !(*this == that); }
private:
ISO::CFI_type_t raw_{CFI_type_other};
diff --git a/flang/runtime/CMakeLists.txt b/flang/runtime/CMakeLists.txt
index 5b23065a32d1699..e7d416749219ef6 100644
--- a/flang/runtime/CMakeLists.txt
+++ b/flang/runtime/CMakeLists.txt
@@ -150,7 +150,10 @@ option(FLANG_EXPERIMENTAL_CUDA_RUNTIME
# List of files that are buildable for all devices.
set(supported_files
+ descriptor.cpp
+ terminator.cpp
transformational.cpp
+ type-code.cpp
)
if (FLANG_EXPERIMENTAL_CUDA_RUNTIME)
@@ -175,6 +178,11 @@ if (FLANG_EXPERIMENTAL_CUDA_RUNTIME)
-Xclang -fcuda-allow-variadic-functions
)
endif()
+ if ("${CMAKE_CUDA_COMPILER_ID}" MATCHES "NVIDIA")
+ set(CUDA_COMPILE_OPTIONS
+ --expt-relaxed-constexpr
+ )
+ endif()
set_source_files_properties(${supported_files} PROPERTIES COMPILE_OPTIONS
"${CUDA_COMPILE_OPTIONS}"
)
diff --git a/flang/runtime/ISO_Fortran_util.h b/flang/runtime/ISO_Fortran_util.h
index 7d527bfd65789d8..d63cda8931f37b7 100644
--- a/flang/runtime/ISO_Fortran_util.h
+++ b/flang/runtime/ISO_Fortran_util.h
@@ -18,15 +18,15 @@
#include <cstdlib>
namespace Fortran::ISO {
-static inline constexpr bool IsCharacterType(CFI_type_t ty) {
+static inline constexpr RT_API_ATTRS bool IsCharacterType(CFI_type_t ty) {
return ty == CFI_type_char || ty == CFI_type_char16_t ||
ty == CFI_type_char32_t;
}
-static inline constexpr bool IsAssumedSize(const CFI_cdesc_t *dv) {
+static inline constexpr RT_API_ATTRS bool IsAssumedSize(const CFI_cdesc_t *dv) {
return dv->rank > 0 && dv->dim[dv->rank - 1].extent == -1;
}
-static inline std::size_t MinElemLen(CFI_type_t type) {
+static inline RT_API_ATTRS std::size_t MinElemLen(CFI_type_t type) {
auto typeParams{Fortran::runtime::TypeCode{type}.GetCategoryAndKind()};
if (!typeParams) {
Fortran::runtime::Terminator terminator{__FILE__, __LINE__};
@@ -38,7 +38,7 @@ static inline std::size_t MinElemLen(CFI_type_t type) {
typeParams->first, typeParams->second);
}
-static inline int VerifyEstablishParameters(CFI_cdesc_t *descriptor,
+static inline RT_API_ATTRS int VerifyEstablishParameters(CFI_cdesc_t *descriptor,
void *base_addr, CFI_attribute_t attribute, CFI_type_t type,
std::size_t elem_len, CFI_rank_t rank, const CFI_index_t extents[],
bool external) {
@@ -77,7 +77,7 @@ static inline int VerifyEstablishParameters(CFI_cdesc_t *descriptor,
return CFI_SUCCESS;
}
-static inline void EstablishDescriptor(CFI_cdesc_t *descriptor, void *base_addr,
+static inline RT_API_ATTRS void EstablishDescriptor(CFI_cdesc_t *descriptor, void *base_addr,
CFI_attribute_t attribute, CFI_type_t type, std::size_t elem_len,
CFI_rank_t rank, const CFI_index_t extents[]) {
descriptor->base_addr = base_addr;
diff --git a/flang/runtime/derived.h b/flang/runtime/derived.h
index 747a93303e0dbc0..6b9ea907fda9b8b 100644
--- a/flang/runtime/derived.h
+++ b/flang/runtime/derived.h
@@ -11,6 +11,8 @@
#ifndef FORTRAN_RUNTIME_DERIVED_H_
#define FORTRAN_RUNTIME_DERIVED_H_
+#include "flang/Runtime/api-attrs.h"
+
namespace Fortran::runtime::typeInfo {
class DerivedType;
}
@@ -21,21 +23,21 @@ class Terminator;
// Perform default component initialization, allocate automatic components.
// Returns a STAT= code (0 when all's well).
-int Initialize(const Descriptor &, const typeInfo::DerivedType &, Terminator &,
+RT_API_ATTRS int Initialize(const Descriptor &, const typeInfo::DerivedType &, Terminator &,
bool hasStat = false, const Descriptor *errMsg = nullptr);
// Call FINAL subroutines, if any
-void Finalize(
+RT_API_ATTRS void Finalize(
const Descriptor &, const typeInfo::DerivedType &derived, Terminator *);
// Call FINAL subroutines, deallocate allocatable & automatic components.
// Does not deallocate the original descriptor.
-void Destroy(const Descriptor &, bool finalize, const typeInfo::DerivedType &,
+RT_API_ATTRS void Destroy(const Descriptor &, bool finalize, const typeInfo::DerivedType &,
Terminator *);
// Return true if the passed descriptor is for a derived type
// entity that has a dynamic (allocatable, automatic) component.
-bool HasDynamicComponent(const Descriptor &);
+RT_API_ATTRS bool HasDynamicComponent(const Descriptor &);
} // namespace Fortran::runtime
#endif // FORTRAN_RUNTIME_DERIVED_H_
diff --git a/flang/runtime/descriptor.cpp b/flang/runtime/descriptor.cpp
index ab6460708e9b68f..043b73255ab6686 100644
--- a/flang/runtime/descriptor.cpp
+++ b/flang/runtime/descriptor.cpp
@@ -20,14 +20,16 @@
namespace Fortran::runtime {
-Descriptor::Descriptor(const Descriptor &that) { *this = that; }
+RT_OFFLOAD_API_GROUP_BEGIN
-Descriptor &Descriptor::operator=(const Descriptor &that) {
+RT_API_ATTRS Descriptor::Descriptor(const Descriptor &that) { *this = that; }
+
+RT_API_ATTRS Descriptor &Descriptor::operator=(const Descriptor &that) {
std::memcpy(this, &that, that.SizeInBytes());
return *this;
}
-void Descriptor::Establish(TypeCode t, std::size_t elementBytes, void *p,
+RT_API_ATTRS void Descriptor::Establish(TypeCode t, std::size_t elementBytes, void *p,
int rank, const SubscriptValue *extent, ISO::CFI_attribute_t attribute,
bool addendum) {
Terminator terminator{__FILE__, __LINE__};
@@ -58,33 +60,33 @@ void Descriptor::Establish(TypeCode t, std::size_t elementBytes, void *p,
namespace {
template <TypeCategory CAT, int KIND> struct TypeSizeGetter {
- constexpr std::size_t operator()() const {
+ constexpr RT_API_ATTRS std::size_t operator()() const {
CppTypeFor<CAT, KIND> arr[2];
return sizeof arr / 2;
}
};
} // namespace
-std::size_t Descriptor::BytesFor(TypeCategory category, int kind) {
+RT_API_ATTRS std::size_t Descriptor::BytesFor(TypeCategory category, int kind) {
Terminator terminator{__FILE__, __LINE__};
return ApplyType<TypeSizeGetter, std::size_t>(category, kind, terminator);
}
-void Descriptor::Establish(TypeCategory c, int kind, void *p, int rank,
+RT_API_ATTRS void Descriptor::Establish(TypeCategory c, int kind, void *p, int rank,
const SubscriptValue *extent, ISO::CFI_attribute_t attribute,
bool addendum) {
Establish(TypeCode(c, kind), BytesFor(c, kind), p, rank, extent, attribute,
addendum);
}
-void Descriptor::Establish(int characterKind, std::size_t characters, void *p,
+RT_API_ATTRS void Descriptor::Establish(int characterKind, std::size_t characters, void *p,
int rank, const SubscriptValue *extent, ISO::CFI_attribute_t attribute,
bool addendum) {
Establish(TypeCode{TypeCategory::Character, characterKind},
characterKind * characters, p, rank, extent, attribute, addendum);
}
-void Descriptor::Establish(const typeInfo::DerivedType &dt, void *p, int rank,
+RT_API_ATTRS void Descriptor::Establish(const typeInfo::DerivedType &dt, void *p, int rank,
const SubscriptValue *extent, ISO::CFI_attribute_t attribute) {
Establish(TypeCode{TypeCategory::Derived, 0}, dt.sizeInBytes(), p, rank,
extent, attribute, true);
@@ -94,7 +96,7 @@ void Descriptor::Establish(const typeInfo::DerivedType &dt, void *p, int rank,
new (a) DescriptorAddendum{&dt};
}
-OwningPtr<Descriptor> Descriptor::Create(TypeCode t, std::size_t elementBytes,
+RT_API_ATTRS OwningPtr<Descriptor> Descriptor::Create(TypeCode t, std::size_t elementBytes,
void *p, int rank, const SubscriptValue *extent,
ISO::CFI_attribute_t attribute, int derivedTypeLenParameters) {
std::size_t bytes{SizeInBytes(rank, true, derivedTypeLenParameters)};
@@ -105,33 +107,33 @@ OwningPtr<Descriptor> Descriptor::Create(TypeCode t, std::size_t elementBytes,
return OwningPtr<Descriptor>{result};
}
-OwningPtr<Descriptor> Descriptor::Create(TypeCategory c, int kind, void *p,
+RT_API_ATTRS OwningPtr<Descriptor> Descriptor::Create(TypeCategory c, int kind, void *p,
int rank, const SubscriptValue *extent, ISO::CFI_attribute_t attribute) {
return Create(
TypeCode(c, kind), BytesFor(c, kind), p, rank, extent, attribute);
}
-OwningPtr<Descriptor> Descriptor::Create(int characterKind,
+RT_API_ATTRS OwningPtr<Descriptor> Descriptor::Create(int characterKind,
SubscriptValue characters, void *p, int rank, const SubscriptValue *extent,
ISO::CFI_attribute_t attribute) {
return Create(TypeCode{TypeCategory::Character, characterKind},
characterKind * characters, p, rank, extent, attribute);
}
-OwningPtr<Descriptor> Descriptor::Create(const typeInfo::DerivedType &dt,
+RT_API_ATTRS OwningPtr<Descriptor> Descriptor::Create(const typeInfo::DerivedType &dt,
void *p, int rank, const SubscriptValue *extent,
ISO::CFI_attribute_t attribute) {
return Create(TypeCode{TypeCategory::Derived, 0}, dt.sizeInBytes(), p, rank,
extent, attribute, dt.LenParameters());
}
-std::size_t Descriptor::SizeInBytes() const {
+RT_API_ATTRS std::size_t Descriptor::SizeInBytes() const {
const DescriptorAddendum *addendum{Addendum()};
return sizeof *this - sizeof(Dimension) + raw_.rank * sizeof(Dimension) +
(addendum ? addendum->SizeInBytes() : 0);
}
-std::size_t Descriptor::Elements() const {
+RT_API_ATTRS std::size_t Descriptor::Elements() const {
int n{rank()};
std::size_t elements{1};
for (int j{0}; j < n; ++j) {
@@ -140,7 +142,7 @@ std::size_t Descriptor::Elements() const {
return elements;
}
-int Descriptor::Allocate() {
+RT_API_ATTRS int Descriptor::Allocate() {
std::size_t byteSize{Elements() * ElementBytes()};
// Zero size allocation is possible in Fortran and the resulting
// descriptor must be allocated/associated. Since std::malloc(0)
@@ -162,7 +164,7 @@ int Descriptor::Allocate() {
return 0;
}
-int Descriptor::Destroy(
+RT_API_ATTRS int Descriptor::Destroy(
bool finalize, bool destroyPointers, Terminator *terminator) {
if (!destroyPointers && raw_.attribute == CFI_attribute_pointer) {
return StatOk;
@@ -178,9 +180,9 @@ int Descriptor::Destroy(
}
}
-int Descriptor::Deallocate() { return ISO::CFI_deallocate(&raw_); }
+RT_API_ATTRS int Descriptor::Deallocate() { return ISO::CFI_deallocate(&raw_); }
-bool Descriptor::DecrementSubscripts(
+RT_API_ATTRS bool Descriptor::DecrementSubscripts(
SubscriptValue *subscript, const int *permutation) const {
for (int j{raw_.rank - 1}; j >= 0; --j) {
int k{permutation ? permutation[j] : j};
@@ -193,7 +195,7 @@ bool Descriptor::DecrementSubscripts(
return false;
}
-std::size_t Descriptor::ZeroBasedElementNumber(
+RT_API_ATTRS std::size_t Descriptor::ZeroBasedElementNumber(
const SubscriptValue *subscript, const int *permutation) const {
std::size_t result{0};
std::size_t coefficient{1};
@@ -206,7 +208,7 @@ std::size_t Descriptor::ZeroBasedElementNumber(
return result;
}
-bool Descriptor::EstablishPointerSection(const Descriptor &source,
+RT_API_ATTRS bool Descriptor::EstablishPointerSection(const Descriptor &source,
const SubscriptValue *lower, const SubscriptValue *upper,
const SubscriptValue *stride) {
*this = source;
@@ -232,7 +234,7 @@ bool Descriptor::EstablishPointerSection(const Descriptor &source,
return CFI_section(&raw_, &source.raw_, lower, upper, stride) == CFI_SUCCESS;
}
-void Descriptor::Check() const {
+RT_API_ATTRS void Descriptor::Check() const {
// TODO
}
@@ -258,7 +260,7 @@ void Descriptor::Dump(FILE *f) const {
}
}
-DescriptorAddendum &DescriptorAddendum::operator=(
+RT_API_ATTRS DescriptorAddendum &DescriptorAddendum::operator=(
const DescriptorAddendum &that) {
derivedType_ = that.derivedType_;
auto lenParms{that.LenParameters()};
@@ -268,11 +270,11 @@ DescriptorAddendum &DescriptorAddendum::operator=(
return *this;
}
-std::size_t DescriptorAddendum::SizeInBytes() const {
+RT_API_ATTRS std::size_t DescriptorAddendum::SizeInBytes() const {
return SizeInBytes(LenParameters());
}
-std::size_t DescriptorAddendum::LenParameters() const {
+RT_API_ATTRS std::size_t DescriptorAddendum::LenParameters() const {
const auto *type{derivedType()};
return type ? type->LenParameters() : 0;
}
@@ -285,4 +287,7 @@ void DescriptorAddendum::Dump(FILE *f) const {
std::fprintf(f, " len[%zd] %jd\n", j, static_cast<std::intmax_t>(len_[j]));
}
}
+
+RT_OFFLOAD_API_GROUP_END
+
} // namespace Fortran::runtime
diff --git a/flang/runtime/terminator.cpp b/flang/runtime/terminator.cpp
index f242ac6f2de2293..bd86912cd53b0bc 100644
--- a/flang/runtime/terminator.cpp
+++ b/flang/runtime/terminator.cpp
@@ -12,14 +12,8 @@
namespace Fortran::runtime {
-[[noreturn]] void Terminator::Crash(const char *message, ...) const {
- va_list ap;
- va_start(ap, message);
- CrashArgs(message, ap);
- va_end(ap);
-}
-
-static void (*crashHandler)(const char *, int, const char *, va_list &){
+#if !defined(RT_DEVICE_COMPILATION)
+[[maybe_unused]] static void (*crashHandler)(const char *, int, const char *, va_list &){
nullptr};
void Terminator::RegisterCrashHandler(
@@ -27,11 +21,38 @@ void Terminator::RegisterCrashHandler(
crashHandler = handler;
}
-[[noreturn]] void Terminator::CrashArgs(
- const char *message, va_list &ap) const {
+void Terminator::InvokeCrashHandler(const char *message, ...) const {
if (crashHandler) {
+ va_list ap;
+ va_start(ap, message);
crashHandler(sourceFileName_, sourceLine_, message, ap);
+ va_end(ap);
+ }
+}
+
+[[noreturn]] void Terminator::CrashArgs(
+ const char *message, va_list &ap) const {
+ CrashHeader();
+ std::vfprintf(stderr, message, ap);
+ va_end(ap);
+ CrashFooter();
+}
+#endif
+
+RT_OFFLOAD_API_GROUP_BEGIN
+
+RT_API_ATTRS void Terminator::CrashHeader() const {
+#if defined(RT_DEVICE_COMPILATION)
+ std::printf("\nfatal Fortran runtime error");
+ if (sourceFileName_) {
+ std::printf("(%s", sourceFileName_);
+ if (sourceLine_) {
+ std::printf(":%d", sourceLine_);
+ }
+ std::printf(")");
}
+ std::printf(": ");
+#else
std::fputs("\nfatal Fortran runtime error", stderr);
if (sourceFileName_) {
std::fprintf(stderr, "(%s", sourceFileName_);
@@ -41,27 +62,49 @@ void Terminator::RegisterCrashHandler(
fputc(')', stderr);
}
std::fputs(": ", stderr);
- std::vfprintf(stderr, message, ap);
+#endif
+}
+
+[[noreturn]] RT_API_ATTRS void Terminator::CrashFooter() const {
+#if defined(RT_DEVICE_COMPILATION)
+ std::printf("\n");
+#else
fputc('\n', stderr);
- va_end(ap);
+ // FIXME: re-enable the flush along with the IO enabling.
io::FlushOutputOnCrash(*this);
+#endif
NotifyOtherImagesOfErrorTermination();
+#if defined(RT_DEVICE_COMPILATION)
+#if defined(__CUDACC__)
+ // NVCC supports __trap().
+ __trap();
+#elif defined(__clang__)
+ // Clang supports __builtin_trap().
+ __builtin_trap();
+#else
+#error "unsupported compiler"
+#endif
+#else
std::abort();
+#endif
}
-[[noreturn]] void Terminator::CheckFailed(
+[[noreturn]] RT_API_ATTRS void Terminator::CheckFailed(
const char *predicate, const char *file, int line) const {
Crash("Internal error: RUNTIME_CHECK(%s) failed at %s(%d)", predicate, file,
line);
}
-[[noreturn]] void Terminator::CheckFailed(const char *predicate) const {
+[[noreturn]] RT_API_ATTRS void Terminator::CheckFailed(const char *predicate) const {
Crash("Internal error: RUNTIME_CHECK(%s) failed at %s(%d)", predicate,
sourceFileName_, sourceLine_);
}
// TODO: These will be defined in the coarray runtime library
-void NotifyOtherImagesOfNormalEnd() {}
-void NotifyOtherImagesOfFailImageStatement() {}
-void NotifyOtherImagesOfErrorTermination() {}
+RT_API_ATTRS void NotifyOtherImagesOfNormalEnd() {}
+RT_API_ATTRS void NotifyOtherImagesOfFailImageStatement() {}
+RT_API_ATTRS void NotifyOtherImagesOfErrorTermination() {}
+
+RT_OFFLOAD_API_GROUP_END
+
} // namespace Fortran::runtime
diff --git a/flang/runtime/terminator.h b/flang/runtime/terminator.h
index 84b4b1d79bf76e2..dc73407093fda23 100644
--- a/flang/runtime/terminator.h
+++ b/flang/runtime/terminator.h
@@ -13,6 +13,8 @@
#include "flang/Runtime/api-attrs.h"
#include <cstdarg>
+#include <cstdio>
+#include <cstdlib>
namespace Fortran::runtime {
@@ -20,26 +22,69 @@ namespace Fortran::runtime {
// for errors detected in the runtime library
class Terminator {
public:
- Terminator() {}
+ RT_API_ATTRS Terminator() {}
Terminator(const Terminator &) = default;
explicit RT_API_ATTRS Terminator(
const char *sourceFileName, int sourceLine = 0)
: sourceFileName_{sourceFileName}, sourceLine_{sourceLine} {}
- const char *sourceFileName() const { return sourceFileName_; }
- int sourceLine() const { return sourceLine_; }
+ RT_API_ATTRS const char *sourceFileName() const { return sourceFileName_; }
+ RT_API_ATTRS int sourceLine() const { return sourceLine_; }
- void SetLocation(const char *sourceFileName = nullptr, int sourceLine = 0) {
+ RT_API_ATTRS void SetLocation(const char *sourceFileName = nullptr, int sourceLine = 0) {
sourceFileName_ = sourceFileName;
sourceLine_ = sourceLine;
}
- // CUDA_TODO: Clang for CUDA does not support varargs, though
- // it compiles it with -fcuda-allow-variadic-functions.
- // We can try to replace varargs functions with variadic templates.
- [[noreturn]] RT_API_ATTRS void Crash(const char *message, ...) const;
- [[noreturn]] RT_API_ATTRS void CrashArgs(
+ // Silence compiler warnings about the format string being
+ // non-literal. A more precise control would be
+ // __attribute__((format_arg(2))), but it requires the function
+ // to return 'char *', which does not work well with noreturn.
+#if defined(__clang__)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wformat-security"
+#elif defined(__GNUC__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wformat-security"
+#endif
+
+ // Device offload compilers do not normally support varargs and va_list,
+ // so use C++ variadic templates to forward the crash arguments
+ // to regular printf for the device compilation.
+ // Try to keep the inline implementations as small as possible.
+ template <typename... Args>
+ [[noreturn]] RT_API_ATTRS const char *Crash(const char *message, Args... args) const {
+#if !defined(RT_DEVICE_COMPILATION)
+ // Invoke handler set up by the test harness.
+ InvokeCrashHandler(message, args...);
+#endif
+ CrashHeader();
+ PrintCrashArgs(message, args...);
+ CrashFooter();
+ }
+
+ template <typename... Args>
+ RT_API_ATTRS void PrintCrashArgs(const char *message, Args... args) const {
+#if RT_DEVICE_COMPILATION
+ std::printf(message, args...);
+#else
+ std::fprintf(stderr, message, args...);
+#endif
+ }
+
+#if defined(__clang__)
+#pragma clang diagnostic pop
+#elif defined(__GNUC__)
+#pragma GCC diagnostic pop
+#endif
+
+ RT_API_ATTRS void CrashHeader() const;
+ [[noreturn]] RT_API_ATTRS void CrashFooter() const;
+#if !defined(RT_DEVICE_COMPILATION)
+ void InvokeCrashHandler(const char *message, ...) const;
+ [[noreturn]] void CrashArgs(
const char *message, va_list &) const;
+#endif
[[noreturn]] RT_API_ATTRS void CheckFailed(
const char *predicate, const char *file, int line) const;
[[noreturn]] RT_API_ATTRS void CheckFailed(const char *predicate) const;
@@ -66,13 +111,13 @@ class Terminator {
else \
Terminator{__FILE__, __LINE__}.CheckFailed(#pred)
-void NotifyOtherImagesOfNormalEnd();
-void NotifyOtherImagesOfFailImageStatement();
-void NotifyOtherImagesOfErrorTermination();
+RT_API_ATTRS void NotifyOtherImagesOfNormalEnd();
+RT_API_ATTRS void NotifyOtherImagesOfFailImageStatement();
+RT_API_ATTRS void NotifyOtherImagesOfErrorTermination();
} // namespace Fortran::runtime
namespace Fortran::runtime::io {
-void FlushOutputOnCrash(const Terminator &);
+RT_API_ATTRS void FlushOutputOnCrash(const Terminator &);
}
#endif // FORTRAN_RUNTIME_TERMINATOR_H_
diff --git a/flang/runtime/type-code.cpp b/flang/runtime/type-code.cpp
index b9ce519dc14941b..b9ef307835dfba3 100644
--- a/flang/runtime/type-code.cpp
+++ b/flang/runtime/type-code.cpp
@@ -10,7 +10,9 @@
namespace Fortran::runtime {
-TypeCode::TypeCode(TypeCategory f, int kind) {
+RT_OFFLOAD_API_GROUP_BEGIN
+
+RT_API_ATTRS TypeCode::TypeCode(TypeCategory f, int kind) {
switch (f) {
case TypeCategory::Integer:
switch (kind) {
@@ -110,7 +112,7 @@ TypeCode::TypeCode(TypeCategory f, int kind) {
}
}
-std::optional<std::pair<TypeCategory, int>>
+RT_API_ATTRS std::optional<std::pair<TypeCategory, int>>
TypeCode::GetCategoryAndKind() const {
switch (raw_) {
case CFI_type_signed_char:
@@ -205,4 +207,7 @@ TypeCode::GetCategoryAndKind() const {
return std::nullopt;
}
}
+
+RT_OFFLOAD_API_GROUP_END
+
} // namespace Fortran::runtime
diff --git a/flang/runtime/type-info.h b/flang/runtime/type-info.h
index 1f6c56742b6f7c2..7ed17d9ede93000 100644
--- a/flang/runtime/type-info.h
+++ b/flang/runtime/type-info.h
@@ -38,8 +38,8 @@ class Value {
Explicit = 2,
LenParameter = 3
};
- Genre genre() const { return genre_; }
- std::optional<TypeParameterValue> GetValue(const Descriptor *) const;
+ RT_API_ATTRS Genre genre() const { return genre_; }
+ RT_API_ATTRS std::optional<TypeParameterValue> GetValue(const Descriptor *) const;
private:
Genre genre_{Genre::Explicit};
@@ -57,38 +57,38 @@ class Component {
Automatic = 4
};
- const Descriptor &name() const { return name_.descriptor(); }
- Genre genre() const { return genre_; }
- TypeCategory category() const { return static_cast<TypeCategory>(category_); }
- int kind() const { return kind_; }
- int rank() const { return rank_; }
- std::uint64_t offset() const { return offset_; }
- const Value &characterLen() const { return characterLen_; }
- const DerivedType *derivedType() const {
+ const RT_API_ATTRS Descriptor &name() const { return name_.descriptor(); }
+ RT_API_ATTRS Genre genre() const { return genre_; }
+ RT_API_ATTRS TypeCategory category() const { return static_cast<TypeCategory>(category_); }
+ RT_API_ATTRS int kind() const { return kind_; }
+ RT_API_ATTRS int rank() const { return rank_; }
+ RT_API_ATTRS std::uint64_t offset() const { return offset_; }
+ const RT_API_ATTRS Value &characterLen() const { return characterLen_; }
+ const RT_API_ATTRS DerivedType *derivedType() const {
return derivedType_.descriptor().OffsetElement<const DerivedType>();
}
- const Value *lenValue() const {
+ const RT_API_ATTRS Value *lenValue() const {
return lenValue_.descriptor().OffsetElement<const Value>();
}
- const Value *bounds() const {
+ const RT_API_ATTRS Value *bounds() const {
return bounds_.descriptor().OffsetElement<const Value>();
}
- const char *initialization() const { return initialization_; }
+ const RT_API_ATTRS char *initialization() const { return initialization_; }
- std::size_t GetElementByteSize(const Descriptor &) const;
- std::size_t GetElements(const Descriptor &) const;
+ RT_API_ATTRS std::size_t GetElementByteSize(const Descriptor &) const;
+ RT_API_ATTRS std::size_t GetElements(const Descriptor &) const;
// For components that are descriptors, returns size of descriptor;
// for Genre::Data, returns elemental byte size times element count.
- std::size_t SizeInBytes(const Descriptor &) const;
+ RT_API_ATTRS std::size_t SizeInBytes(const Descriptor &) const;
// Establishes a descriptor from this component description.
- void EstablishDescriptor(
+ RT_API_ATTRS void EstablishDescriptor(
Descriptor &, const Descriptor &container, Terminator &) const;
// Creates a pointer descriptor from this component description, possibly
// with subscripts
- void CreatePointerDescriptor(Descriptor &, const Descriptor &container,
+ RT_API_ATTRS void CreatePointerDescriptor(Descriptor &, const Descriptor &container,
Terminator &, const SubscriptValue * = nullptr) const;
FILE *Dump(FILE * = stdout) const;
@@ -135,25 +135,25 @@ class SpecialBinding {
// Special bindings can be created during execution to handle defined
// I/O procedures that are not type-bound.
- SpecialBinding(Which which, ProcedurePointer proc, std::uint8_t isArgDescSet,
+ RT_API_ATTRS SpecialBinding(Which which, ProcedurePointer proc, std::uint8_t isArgDescSet,
std::uint8_t isTypeBound, std::uint8_t isArgContiguousSet)
: which_{which}, isArgDescriptorSet_{isArgDescSet},
isTypeBound_{isTypeBound}, isArgContiguousSet_{isArgContiguousSet},
proc_{proc} {}
- static constexpr Which RankFinal(int rank) {
+ static constexpr RT_API_ATTRS Which RankFinal(int rank) {
return static_cast<Which>(static_cast<int>(Which::ScalarFinal) + rank);
}
- Which which() const { return which_; }
- bool IsArgDescriptor(int zeroBasedArg) const {
+ RT_API_ATTRS Which which() const { return which_; }
+ RT_API_ATTRS bool IsArgDescriptor(int zeroBasedArg) const {
return (isArgDescriptorSet_ >> zeroBasedArg) & 1;
}
- bool isTypeBound() const { return isTypeBound_; }
- bool IsArgContiguous(int zeroBasedArg) const {
+ RT_API_ATTRS bool isTypeBound() const { return isTypeBound_; }
+ RT_API_ATTRS bool IsArgContiguous(int zeroBasedArg) const {
return (isArgContiguousSet_ >> zeroBasedArg) & 1;
}
- template <typename PROC> PROC GetProc() const {
+ template <typename PROC> RT_API_ATTRS PROC GetProc() const {
return reinterpret_cast<PROC>(proc_);
}
@@ -200,36 +200,36 @@ class DerivedType {
public:
~DerivedType(); // never defined
- const Descriptor &binding() const { return binding_.descriptor(); }
- const Descriptor &name() const { return name_.descriptor(); }
- std::uint64_t sizeInBytes() const { return sizeInBytes_; }
- const Descriptor &uninstatiated() const {
+ const RT_API_ATTRS Descriptor &binding() const { return binding_.descriptor(); }
+ const RT_API_ATTRS Descriptor &name() const { return name_.descriptor(); }
+ RT_API_ATTRS std::uint64_t sizeInBytes() const { return sizeInBytes_; }
+ const RT_API_ATTRS Descriptor &uninstatiated() const {
return uninstantiated_.descriptor();
}
- const Descriptor &kindParameter() const {
+ const RT_API_ATTRS Descriptor &kindParameter() const {
return kindParameter_.descriptor();
}
- const Descriptor &lenParameterKind() const {
+ const RT_API_ATTRS Descriptor &lenParameterKind() const {
return lenParameterKind_.descriptor();
}
- const Descriptor &component() const { return component_.descriptor(); }
- const Descriptor &procPtr() const { return procPtr_.descriptor(); }
- const Descriptor &special() const { return special_.descriptor(); }
- bool hasParent() const { return hasParent_; }
- bool noInitializationNeeded() const { return noInitializationNeeded_; }
- bool noDestructionNeeded() const { return noDestructionNeeded_; }
- bool noFinalizationNeeded() const { return noFinalizationNeeded_; }
+ const RT_API_ATTRS Descriptor &component() const { return component_.descriptor(); }
+ const RT_API_ATTRS Descriptor &procPtr() const { return procPtr_.descriptor(); }
+ const RT_API_ATTRS Descriptor &special() const { return special_.descriptor(); }
+ RT_API_ATTRS bool hasParent() const { return hasParent_; }
+ RT_API_ATTRS bool noInitializationNeeded() const { return noInitializationNeeded_; }
+ RT_API_ATTRS bool noDestructionNeeded() const { return noDestructionNeeded_; }
+ RT_API_ATTRS bool noFinalizationNeeded() const { return noFinalizationNeeded_; }
- std::size_t LenParameters() const { return lenParameterKind().Elements(); }
+ RT_API_ATTRS std::size_t LenParameters() const { return lenParameterKind().Elements(); }
- const DerivedType *GetParentType() const;
+ const RT_API_ATTRS DerivedType *GetParentType() const;
// Finds a data component by name in this derived type or its ancestors.
- const Component *FindDataComponent(
+ const RT_API_ATTRS Component *FindDataComponent(
const char *name, std::size_t nameLen) const;
// O(1) look-up of special procedure bindings
- const SpecialBinding *FindSpecialBinding(SpecialBinding::Which which) const {
+ const RT_API_ATTRS SpecialBinding *FindSpecialBinding(SpecialBinding::Which which) const {
auto bitIndex{static_cast<std::uint32_t>(which)};
auto bit{std::uint32_t{1} << bitIndex};
if (specialBitSet_ & bit) {
More information about the flang-commits
mailing list