[llvm] [flang][runtime] Speed up initialization & destruction (PR #148087)
Peter Klausler via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 11 16:53:47 PDT 2025
https://github.com/klausler updated https://github.com/llvm/llvm-project/pull/148087
>From b9be6901f419923f735a829c42d28a4b69c74172 Mon Sep 17 00:00:00 2001
From: Peter Klausler <pklausler at nvidia.com>
Date: Tue, 8 Jul 2025 18:05:44 -0700
Subject: [PATCH] [flang][runtime] Speed up initialization & destruction
Rework derived type initialization in the runtime to just
initialize the first element of any array, and then memcpy
it to the others, rather than exercising the per-component
paths for each element.
Reword derived type destruction in the runtime to detect and
exploit a fast path for allocatable components whose types
themselves don't need nested destruction.
Small tweaks were made in hot paths exposed by profiling in
descriptor operations and derived type assignment.
---
.../include/flang-rt/runtime/descriptor.h | 87 ++++++++++-
flang-rt/include/flang-rt/runtime/type-info.h | 4 +-
.../include/flang-rt/runtime/work-queue.h | 36 +++--
flang-rt/lib/runtime/assign.cpp | 17 ++-
flang-rt/lib/runtime/derived.cpp | 138 ++++++++++--------
flang-rt/lib/runtime/descriptor.cpp | 51 ++-----
flang-rt/lib/runtime/pointer.cpp | 6 +-
flang-rt/lib/runtime/work-queue.cpp | 40 ++---
8 files changed, 229 insertions(+), 150 deletions(-)
diff --git a/flang-rt/include/flang-rt/runtime/descriptor.h b/flang-rt/include/flang-rt/runtime/descriptor.h
index 75092a1fd2bfc..4a5674cb6596c 100644
--- a/flang-rt/include/flang-rt/runtime/descriptor.h
+++ b/flang-rt/include/flang-rt/runtime/descriptor.h
@@ -20,13 +20,16 @@
#include "memory.h"
#include "type-code.h"
+#include "flang-rt/runtime/allocator-registry.h"
#include "flang/Common/ISO_Fortran_binding_wrapper.h"
+#include "flang/Common/optional.h"
#include "flang/Runtime/descriptor-consts.h"
#include <algorithm>
#include <cassert>
#include <cinttypes>
#include <cstddef>
#include <cstdio>
+#include <cstdlib>
#include <cstring>
/// Value used for asyncObject when no specific stream is specified.
@@ -262,9 +265,20 @@ class Descriptor {
template <typename A>
RT_API_ATTRS A *ZeroBasedIndexedElement(std::size_t n) const {
- SubscriptValue at[maxRank];
- if (SubscriptsForZeroBasedElementNumber(at, n)) {
- return Element<A>(at);
+ if (raw_.rank == 0) {
+ if (n == 0) {
+ return OffsetElement<A>();
+ }
+ } else if (raw_.rank == 1) {
+ const auto &dim{GetDimension(0)};
+ if (n < static_cast<std::size_t>(dim.Extent())) {
+ return OffsetElement<A>(n * dim.ByteStride());
+ }
+ } else {
+ SubscriptValue at[maxRank];
+ if (SubscriptsForZeroBasedElementNumber(at, n)) {
+ return Element<A>(at);
+ }
}
return nullptr;
}
@@ -365,7 +379,18 @@ class Descriptor {
RT_API_ATTRS std::size_t SizeInBytes() const;
- RT_API_ATTRS std::size_t Elements() const;
+ RT_API_ATTRS std::size_t Elements() const {
+ int n{rank()};
+ if (n == 0) {
+ return 1;
+ } else {
+ auto elements{static_cast<std::size_t>(GetDimension(0).Extent())};
+ for (int j{1}; j < n; ++j) {
+ elements *= GetDimension(j).Extent();
+ }
+ return elements;
+ }
+ }
// Allocate() assumes Elements() and ElementBytes() work;
// define the extents of the dimensions and the element length
@@ -377,7 +402,22 @@ class Descriptor {
// Deallocates storage; does not call FINAL subroutines or
// deallocate allocatable/automatic components.
- RT_API_ATTRS int Deallocate();
+ RT_API_ATTRS int Deallocate() {
+ ISO::CFI_cdesc_t &descriptor{raw()};
+ void *pointer{descriptor.base_addr};
+ if (!pointer) {
+ return CFI_ERROR_BASE_ADDR_NULL;
+ } else {
+ int allocIndex{MapAllocIdx()};
+ if (allocIndex == kDefaultAllocator) {
+ std::free(pointer);
+ } else {
+ allocatorRegistry.GetDeallocator(MapAllocIdx())(pointer);
+ }
+ descriptor.base_addr = nullptr;
+ return CFI_SUCCESS;
+ }
+ }
// Deallocates storage, including allocatable and automatic
// components. Optionally invokes FINAL subroutines.
@@ -392,8 +432,7 @@ class Descriptor {
bool stridesAreContiguous{true};
for (int j{0}; j < leadingDimensions; ++j) {
const Dimension &dim{GetDimension(j)};
- stridesAreContiguous &=
- (bytes == dim.ByteStride()) || (dim.Extent() == 1);
+ stridesAreContiguous &= bytes == dim.ByteStride() || dim.Extent() == 1;
bytes *= dim.Extent();
}
// One and zero element arrays are contiguous even if the descriptor
@@ -406,6 +445,32 @@ class Descriptor {
return stridesAreContiguous || bytes == 0;
}
+ // The result, if any, is a fixed stride value that can be used to
+ // address all elements. It genernalizes contiguity by also allowing
+ // the case of an array with extent 1 on all but one dimension.
+ RT_API_ATTRS common::optional<SubscriptValue> FixedStride() const {
+ auto rank{static_cast<std::size_t>(raw_.rank)};
+ common::optional<SubscriptValue> stride;
+ for (std::size_t j{0}; j < rank; ++j) {
+ const Dimension &dim{GetDimension(j)};
+ auto extent{dim.Extent()};
+ if (extent == 0) {
+ break; // empty array
+ } else if (extent == 1) { // ok
+ } else if (stride) {
+ // Extent > 1 on multiple dimensions
+ if (IsContiguous()) {
+ return ElementBytes();
+ } else {
+ return common::nullopt;
+ }
+ } else {
+ stride = dim.ByteStride();
+ }
+ }
+ return stride.value_or(0); // 0 for scalars and empty arrays
+ }
+
// Establishes a pointer to a section or element.
RT_API_ATTRS bool EstablishPointerSection(const Descriptor &source,
const SubscriptValue *lower = nullptr,
@@ -427,6 +492,14 @@ class Descriptor {
RT_API_ATTRS inline int GetAllocIdx() const {
return (raw_.extra & _CFI_ALLOCATOR_IDX_MASK) >> _CFI_ALLOCATOR_IDX_SHIFT;
}
+ RT_API_ATTRS int MapAllocIdx() const {
+#ifdef RT_DEVICE_COMPILATION
+ // Force default allocator in device code.
+ return kDefaultAllocator;
+#else
+ return GetAllocIdx();
+#endif
+ }
RT_API_ATTRS inline void SetAllocIdx(int pos) {
raw_.extra &= ~_CFI_ALLOCATOR_IDX_MASK; // Clear the allocator index bits.
raw_.extra |= pos << _CFI_ALLOCATOR_IDX_SHIFT;
diff --git a/flang-rt/include/flang-rt/runtime/type-info.h b/flang-rt/include/flang-rt/runtime/type-info.h
index 80301a313282f..a8d39f4f8a1a3 100644
--- a/flang-rt/include/flang-rt/runtime/type-info.h
+++ b/flang-rt/include/flang-rt/runtime/type-info.h
@@ -68,7 +68,9 @@ class Component {
RT_API_ATTRS std::uint64_t offset() const { return offset_; }
RT_API_ATTRS const Value &characterLen() const { return characterLen_; }
RT_API_ATTRS const DerivedType *derivedType() const {
- return derivedType_.descriptor().OffsetElement<const DerivedType>();
+ return category() == TypeCategory::Derived
+ ? derivedType_.descriptor().OffsetElement<const DerivedType>()
+ : nullptr;
}
RT_API_ATTRS const Value *lenValue() const {
return lenValue_.descriptor().OffsetElement<const Value>();
diff --git a/flang-rt/include/flang-rt/runtime/work-queue.h b/flang-rt/include/flang-rt/runtime/work-queue.h
index 0daa7bc4d3384..14a9d720e2ef2 100644
--- a/flang-rt/include/flang-rt/runtime/work-queue.h
+++ b/flang-rt/include/flang-rt/runtime/work-queue.h
@@ -62,6 +62,7 @@
#include "flang-rt/runtime/stat.h"
#include "flang-rt/runtime/type-info.h"
#include "flang/Common/api-attrs.h"
+#include "flang/Common/optional.h"
#include "flang/Runtime/freestanding-tools.h"
#include <flang/Common/variant.h>
@@ -131,11 +132,19 @@ class Elementwise {
// Base class for ticket workers that operate over derived type components.
class Componentwise {
public:
- RT_API_ATTRS Componentwise(const typeInfo::DerivedType &);
+ RT_API_ATTRS Componentwise(const typeInfo::DerivedType &derived)
+ : derived_{derived}, components_{derived_.component().Elements()} {
+ GetFirstComponent();
+ }
+
RT_API_ATTRS bool IsComplete() const { return componentAt_ >= components_; }
RT_API_ATTRS void Advance() {
++componentAt_;
- GetComponent();
+ if (IsComplete()) {
+ component_ = nullptr;
+ } else {
+ ++component_;
+ }
}
RT_API_ATTRS void SkipToEnd() {
component_ = nullptr;
@@ -144,15 +153,21 @@ class Componentwise {
RT_API_ATTRS void Reset() {
component_ = nullptr;
componentAt_ = 0;
- GetComponent();
+ GetFirstComponent();
}
- RT_API_ATTRS void GetComponent();
protected:
const typeInfo::DerivedType &derived_;
std::size_t components_{0}, componentAt_{0};
const typeInfo::Component *component_{nullptr};
StaticDescriptor<common::maxRank, true, 0> componentDescriptor_;
+
+private:
+ RT_API_ATTRS void GetFirstComponent() {
+ if (components_ > 0) {
+ component_ = derived_.component().OffsetElement<typeInfo::Component>();
+ }
+ }
};
// Base class for ticket workers that operate over derived type components
@@ -228,14 +243,14 @@ class ElementsOverComponents : public Elementwise, public Componentwise {
// Ticket worker classes
-// Implements derived type instance initialization
+// Implements derived type instance initialization.
class InitializeTicket : public ImmediateTicketRunner<InitializeTicket>,
- private ComponentsOverElements {
+ private ElementsOverComponents {
public:
RT_API_ATTRS InitializeTicket(
const Descriptor &instance, const typeInfo::DerivedType &derived)
: ImmediateTicketRunner<InitializeTicket>{*this},
- ComponentsOverElements{instance, derived} {}
+ ElementsOverComponents{instance, derived} {}
RT_API_ATTRS int Begin(WorkQueue &);
RT_API_ATTRS int Continue(WorkQueue &);
};
@@ -283,12 +298,14 @@ class DestroyTicket : public ImmediateTicketRunner<DestroyTicket>,
RT_API_ATTRS DestroyTicket(const Descriptor &instance,
const typeInfo::DerivedType &derived, bool finalize)
: ImmediateTicketRunner<DestroyTicket>{*this},
- ComponentsOverElements{instance, derived}, finalize_{finalize} {}
+ ComponentsOverElements{instance, derived}, finalize_{finalize},
+ fixedStride_{instance.FixedStride()} {}
RT_API_ATTRS int Begin(WorkQueue &);
RT_API_ATTRS int Continue(WorkQueue &);
private:
bool finalize_{false};
+ std::optional<SubscriptValue> fixedStride_;
};
// Implements general intrinsic assignment
@@ -302,11 +319,11 @@ class AssignTicket : public ImmediateTicketRunner<AssignTicket> {
RT_API_ATTRS int Continue(WorkQueue &);
private:
+ RT_API_ATTRS Descriptor &GetTempDescriptor();
RT_API_ATTRS bool IsSimpleMemmove() const {
return !toDerived_ && to_.rank() == from_->rank() && to_.IsContiguous() &&
from_->IsContiguous() && to_.ElementBytes() == from_->ElementBytes();
}
- RT_API_ATTRS Descriptor &GetTempDescriptor();
Descriptor &to_;
const Descriptor *from_{nullptr};
@@ -549,6 +566,7 @@ class WorkQueue {
TicketList *first_{nullptr}, *last_{nullptr}, *insertAfter_{nullptr};
TicketList static_[numStatic_];
TicketList *firstFree_{static_};
+ bool anyDynamicAllocation_{false};
};
} // namespace Fortran::runtime
diff --git a/flang-rt/lib/runtime/assign.cpp b/flang-rt/lib/runtime/assign.cpp
index f936a4192a33c..78c3f60256543 100644
--- a/flang-rt/lib/runtime/assign.cpp
+++ b/flang-rt/lib/runtime/assign.cpp
@@ -432,10 +432,13 @@ RT_API_ATTRS int AssignTicket::Continue(WorkQueue &workQueue) {
}
// Intrinsic assignment
std::size_t toElements{to_.Elements()};
- if (from_->rank() > 0 && toElements != from_->Elements()) {
- workQueue.terminator().Crash("Assign: mismatching element counts in array "
- "assignment (to %zd, from %zd)",
- toElements, from_->Elements());
+ if (from_->rank() > 0) {
+ std::size_t fromElements{from_->Elements()};
+ if (toElements != fromElements) {
+ workQueue.terminator().Crash("Assign: mismatching element counts in "
+ "array assignment (to %zd, from %zd)",
+ toElements, fromElements);
+ }
}
if (to_.type() != from_->type()) {
workQueue.terminator().Crash(
@@ -614,7 +617,7 @@ RT_API_ATTRS int DerivedAssignTicket<IS_COMPONENTWISE>::Continue(
memmoveFct_(to, from, componentByteSize);
}
}
- this->Componentwise::Advance();
+ this->SkipToNextComponent();
} else {
memmoveFct_(
this->instance_.template Element<char>(this->subscripts_) +
@@ -646,7 +649,7 @@ RT_API_ATTRS int DerivedAssignTicket<IS_COMPONENTWISE>::Continue(
memmoveFct_(to, from, componentByteSize);
}
}
- this->Componentwise::Advance();
+ this->SkipToNextComponent();
} else {
memmoveFct_(this->instance_.template Element<char>(this->subscripts_) +
this->component_->offset(),
@@ -668,11 +671,11 @@ RT_API_ATTRS int DerivedAssignTicket<IS_COMPONENTWISE>::Continue(
if (toDesc->IsAllocatable() && !fromDesc->IsAllocated()) {
if (toDesc->IsAllocated()) {
if (this->phase_ == 0) {
- this->phase_++;
if (componentDerived && !componentDerived->noDestructionNeeded()) {
if (int status{workQueue.BeginDestroy(
*toDesc, *componentDerived, /*finalize=*/false)};
status != StatOk) {
+ this->phase_++;
return status;
}
}
diff --git a/flang-rt/lib/runtime/derived.cpp b/flang-rt/lib/runtime/derived.cpp
index 4e36b1e2edfc8..576fbf1ecdff5 100644
--- a/flang-rt/lib/runtime/derived.cpp
+++ b/flang-rt/lib/runtime/derived.cpp
@@ -39,64 +39,46 @@ RT_API_ATTRS int Initialize(const Descriptor &instance,
}
RT_API_ATTRS int InitializeTicket::Begin(WorkQueue &) {
- // Initialize procedure pointer components in each element
- const Descriptor &procPtrDesc{derived_.procPtr()};
- if (std::size_t numProcPtrs{procPtrDesc.Elements()}) {
- for (std::size_t k{0}; k < numProcPtrs; ++k) {
- const auto &comp{
- *procPtrDesc.ZeroBasedIndexedElement<typeInfo::ProcPtrComponent>(k)};
- // Loop only over elements
- if (k > 0) {
- Elementwise::Reset();
- }
- for (; !Elementwise::IsComplete(); Elementwise::Advance()) {
- auto &pptr{*instance_.ElementComponent<typeInfo::ProcedurePointer>(
- subscripts_, comp.offset)};
- pptr = comp.procInitialization;
- }
- }
- if (IsComplete()) {
- return StatOk;
+ if (elements_ == 0) {
+ return StatOk;
+ } else {
+ // Initialize procedure pointer components in the first element,
+ // whence they will be copied later into all others.
+ const Descriptor &procPtrDesc{derived_.procPtr()};
+ std::size_t numProcPtrs{procPtrDesc.Elements()};
+ char *raw{instance_.OffsetElement<char>()};
+ const auto *ppComponent{
+ procPtrDesc.OffsetElement<typeInfo::ProcPtrComponent>()};
+ for (std::size_t k{0}; k < numProcPtrs; ++k, ++ppComponent) {
+ auto &pptr{*reinterpret_cast<typeInfo::ProcedurePointer *>(
+ raw + ppComponent->offset)};
+ pptr = ppComponent->procInitialization;
}
- Elementwise::Reset();
+ return StatContinue;
}
- return StatContinue;
}
RT_API_ATTRS int InitializeTicket::Continue(WorkQueue &workQueue) {
- while (!IsComplete()) {
+ // Initialize the data components of the first element.
+ char *rawInstance{instance_.OffsetElement<char>()};
+ for (; !Componentwise::IsComplete(); SkipToNextComponent()) {
+ char *rawComponent{rawInstance + component_->offset()};
if (component_->genre() == typeInfo::Component::Genre::Allocatable) {
- // Establish allocatable descriptors
- for (; !Elementwise::IsComplete(); Elementwise::Advance()) {
- Descriptor &allocDesc{*instance_.ElementComponent<Descriptor>(
- subscripts_, component_->offset())};
- component_->EstablishDescriptor(
- allocDesc, instance_, workQueue.terminator());
- allocDesc.raw().attribute = CFI_attribute_allocatable;
- }
- SkipToNextComponent();
+ Descriptor &allocDesc{*reinterpret_cast<Descriptor *>(rawComponent)};
+ component_->EstablishDescriptor(
+ allocDesc, instance_, workQueue.terminator());
} else if (const void *init{component_->initialization()}) {
// Explicit initialization of data pointers and
// non-allocatable non-automatic components
std::size_t bytes{component_->SizeInBytes(instance_)};
- for (; !Elementwise::IsComplete(); Elementwise::Advance()) {
- char *ptr{instance_.ElementComponent<char>(
- subscripts_, component_->offset())};
- std::memcpy(ptr, init, bytes);
- }
- SkipToNextComponent();
+ std::memcpy(rawComponent, init, bytes);
} else if (component_->genre() == typeInfo::Component::Genre::Pointer) {
// Data pointers without explicit initialization are established
// so that they are valid right-hand side targets of pointer
// assignment statements.
- for (; !Elementwise::IsComplete(); Elementwise::Advance()) {
- Descriptor &ptrDesc{*instance_.ElementComponent<Descriptor>(
- subscripts_, component_->offset())};
- component_->EstablishDescriptor(
- ptrDesc, instance_, workQueue.terminator());
- ptrDesc.raw().attribute = CFI_attribute_pointer;
- }
- SkipToNextComponent();
+ Descriptor &ptrDesc{*reinterpret_cast<Descriptor *>(rawComponent)};
+ component_->EstablishDescriptor(
+ ptrDesc, instance_, workQueue.terminator());
} else if (component_->genre() == typeInfo::Component::Genre::Data &&
component_->derivedType() &&
!component_->derivedType()->noInitializationNeeded()) {
@@ -106,16 +88,41 @@ RT_API_ATTRS int InitializeTicket::Continue(WorkQueue &workQueue) {
GetComponentExtents(extents, *component_, instance_);
Descriptor &compDesc{componentDescriptor_.descriptor()};
const typeInfo::DerivedType &compType{*component_->derivedType()};
- compDesc.Establish(compType,
- instance_.ElementComponent<char>(subscripts_, component_->offset()),
- component_->rank(), extents);
- Advance();
+ compDesc.Establish(compType, rawComponent, component_->rank(), extents);
if (int status{workQueue.BeginInitialize(compDesc, compType)};
status != StatOk) {
+ SkipToNextComponent();
return status;
}
- } else {
- SkipToNextComponent();
+ }
+ }
+ // The first element is now complete. Copy it into the others.
+ if (elements_ < 2) {
+ } else {
+ auto elementBytes{static_cast<SubscriptValue>(instance_.ElementBytes())};
+ if (auto stride{instance_.FixedStride()}) {
+ if (*stride == elementBytes) { // contiguous
+ for (std::size_t done{1}; done < elements_;) {
+ std::size_t chunk{elements_ - done};
+ if (chunk > done) {
+ chunk = done;
+ }
+ char *uninitialized{rawInstance + done * *stride};
+ std::memcpy(uninitialized, rawInstance, chunk * *stride);
+ done += chunk;
+ }
+ } else {
+ for (std::size_t done{1}; done < elements_; ++done) {
+ char *uninitialized{rawInstance + done * *stride};
+ std::memcpy(uninitialized, rawInstance, elementBytes);
+ }
+ }
+ } else { // one at a time with subscription
+ for (Elementwise::Advance(); !Elementwise::IsComplete();
+ Elementwise::Advance()) {
+ char *element{instance_.Element<char>(subscripts_)};
+ std::memcpy(element, rawInstance, elementBytes);
+ }
}
}
return StatOk;
@@ -415,24 +422,33 @@ RT_API_ATTRS int DestroyTicket::Continue(WorkQueue &workQueue) {
// Contrary to finalization, the order of deallocation does not matter.
while (!IsComplete()) {
const auto *componentDerived{component_->derivedType()};
- if (component_->genre() == typeInfo::Component::Genre::Allocatable ||
- component_->genre() == typeInfo::Component::Genre::Automatic) {
- Descriptor *d{instance_.ElementComponent<Descriptor>(
- subscripts_, component_->offset())};
- if (d->IsAllocated()) {
- if (phase_ == 0) {
- ++phase_;
- if (componentDerived && !componentDerived->noDestructionNeeded()) {
+ if (component_->genre() == typeInfo::Component::Genre::Allocatable) {
+ if (fixedStride_ &&
+ (!componentDerived || componentDerived->noDestructionNeeded())) {
+ // common fast path, just deallocate in every element
+ char *p{instance_.OffsetElement<char>(component_->offset())};
+ for (std::size_t j{0}; j < elements_; ++j, p += *fixedStride_) {
+ Descriptor &d{*reinterpret_cast<Descriptor *>(p)};
+ d.Deallocate();
+ }
+ SkipToNextComponent();
+ } else {
+ Descriptor &d{*instance_.ElementComponent<Descriptor>(
+ subscripts_, component_->offset())};
+ if (d.IsAllocated()) {
+ if (componentDerived && !componentDerived->noDestructionNeeded() &&
+ phase_ == 0) {
if (int status{workQueue.BeginDestroy(
- *d, *componentDerived, /*finalize=*/false)};
+ d, *componentDerived, /*finalize=*/false)};
status != StatOk) {
+ ++phase_;
return status;
}
}
+ d.Deallocate();
}
- d->Deallocate();
+ Advance();
}
- Advance();
} else if (component_->genre() == typeInfo::Component::Genre::Data) {
if (!componentDerived || componentDerived->noDestructionNeeded()) {
SkipToNextComponent();
diff --git a/flang-rt/lib/runtime/descriptor.cpp b/flang-rt/lib/runtime/descriptor.cpp
index 67336d01380e0..7392270e14a8e 100644
--- a/flang-rt/lib/runtime/descriptor.cpp
+++ b/flang-rt/lib/runtime/descriptor.cpp
@@ -85,12 +85,19 @@ RT_API_ATTRS void Descriptor::Establish(int characterKind,
RT_API_ATTRS void Descriptor::Establish(const typeInfo::DerivedType &dt,
void *p, int rank, const SubscriptValue *extent,
ISO::CFI_attribute_t attribute) {
- Establish(TypeCode{TypeCategory::Derived, 0}, dt.sizeInBytes(), p, rank,
- extent, attribute, true);
- DescriptorAddendum *a{Addendum()};
- Terminator terminator{__FILE__, __LINE__};
- RUNTIME_CHECK(terminator, a != nullptr);
- new (a) DescriptorAddendum{&dt};
+ std::size_t elementBytes{dt.sizeInBytes()};
+ ISO::EstablishDescriptor(
+ &raw_, p, attribute, CFI_type_struct, elementBytes, rank, extent);
+ if (elementBytes == 0) {
+ raw_.elem_len = 0;
+ // Reset byte strides of the dimensions, since EstablishDescriptor()
+ // only does that when the base address is not nullptr.
+ for (int j{0}; j < rank; ++j) {
+ GetDimension(j).SetByteStride(0);
+ }
+ }
+ SetHasAddendum();
+ new (Addendum()) DescriptorAddendum{&dt};
}
RT_API_ATTRS OwningPtr<Descriptor> Descriptor::Create(TypeCode t,
@@ -140,24 +147,6 @@ RT_API_ATTRS std::size_t Descriptor::SizeInBytes() const {
return bytes;
}
-RT_API_ATTRS std::size_t Descriptor::Elements() const {
- int n{rank()};
- std::size_t elements{1};
- for (int j{0}; j < n; ++j) {
- elements *= GetDimension(j).Extent();
- }
- return elements;
-}
-
-RT_API_ATTRS static inline int MapAllocIdx(const Descriptor &desc) {
-#ifdef RT_DEVICE_COMPILATION
- // Force default allocator in device code.
- return kDefaultAllocator;
-#else
- return desc.GetAllocIdx();
-#endif
-}
-
RT_API_ATTRS int Descriptor::Allocate(std::int64_t *asyncObject) {
std::size_t elementBytes{ElementBytes()};
if (static_cast<std::int64_t>(elementBytes) < 0) {
@@ -166,7 +155,7 @@ RT_API_ATTRS int Descriptor::Allocate(std::int64_t *asyncObject) {
elementBytes = raw_.elem_len = 0;
}
std::size_t byteSize{Elements() * elementBytes};
- AllocFct alloc{allocatorRegistry.GetAllocator(MapAllocIdx(*this))};
+ AllocFct alloc{allocatorRegistry.GetAllocator(MapAllocIdx())};
// Zero size allocation is possible in Fortran and the resulting
// descriptor must be allocated/associated. Since std::malloc(0)
// result is implementation defined, always allocate at least one byte.
@@ -207,18 +196,6 @@ RT_API_ATTRS int Descriptor::Destroy(
}
}
-RT_API_ATTRS int Descriptor::Deallocate() {
- ISO::CFI_cdesc_t &descriptor{raw()};
- if (!descriptor.base_addr) {
- return CFI_ERROR_BASE_ADDR_NULL;
- } else {
- FreeFct free{allocatorRegistry.GetDeallocator(MapAllocIdx(*this))};
- free(descriptor.base_addr);
- descriptor.base_addr = nullptr;
- return CFI_SUCCESS;
- }
-}
-
RT_API_ATTRS bool Descriptor::DecrementSubscripts(
SubscriptValue *subscript, const int *permutation) const {
for (int j{raw_.rank - 1}; j >= 0; --j) {
diff --git a/flang-rt/lib/runtime/pointer.cpp b/flang-rt/lib/runtime/pointer.cpp
index 7331f7bbc3a75..04487abd3272e 100644
--- a/flang-rt/lib/runtime/pointer.cpp
+++ b/flang-rt/lib/runtime/pointer.cpp
@@ -115,10 +115,12 @@ void RTDEF(PointerAssociateRemapping)(Descriptor &pointer,
byteStride *= dim.Extent();
}
}
- if (pointer.Elements() > target.Elements()) {
+ std::size_t pointerElements{pointer.Elements()};
+ std::size_t targetElements{target.Elements()};
+ if (pointerElements > targetElements) {
terminator.Crash("PointerAssociateRemapping: too many elements in remapped "
"pointer (%zd > %zd)",
- pointer.Elements(), target.Elements());
+ pointerElements, targetElements);
}
}
diff --git a/flang-rt/lib/runtime/work-queue.cpp b/flang-rt/lib/runtime/work-queue.cpp
index a508ecb637102..42dbc9064b03b 100644
--- a/flang-rt/lib/runtime/work-queue.cpp
+++ b/flang-rt/lib/runtime/work-queue.cpp
@@ -21,21 +21,6 @@ static constexpr bool enableDebugOutput{false};
RT_OFFLOAD_API_GROUP_BEGIN
-RT_API_ATTRS Componentwise::Componentwise(const typeInfo::DerivedType &derived)
- : derived_{derived}, components_{derived_.component().Elements()} {
- GetComponent();
-}
-
-RT_API_ATTRS void Componentwise::GetComponent() {
- if (IsComplete()) {
- component_ = nullptr;
- } else {
- const Descriptor &componentDesc{derived_.component()};
- component_ = componentDesc.ZeroBasedIndexedElement<typeInfo::Component>(
- componentAt_);
- }
-}
-
RT_API_ATTRS int Ticket::Continue(WorkQueue &workQueue) {
if (!begun) {
begun = true;
@@ -53,19 +38,21 @@ RT_API_ATTRS int Ticket::Continue(WorkQueue &workQueue) {
}
RT_API_ATTRS WorkQueue::~WorkQueue() {
- if (last_) {
- if ((last_->next = firstFree_)) {
- last_->next->previous = last_;
+ if (anyDynamicAllocation_) {
+ if (last_) {
+ if ((last_->next = firstFree_)) {
+ last_->next->previous = last_;
+ }
+ firstFree_ = first_;
+ first_ = last_ = nullptr;
}
- firstFree_ = first_;
- first_ = last_ = nullptr;
- }
- while (firstFree_) {
- TicketList *next{firstFree_->next};
- if (!firstFree_->isStatic) {
- FreeMemory(firstFree_);
+ while (firstFree_) {
+ TicketList *next{firstFree_->next};
+ if (!firstFree_->isStatic) {
+ FreeMemory(firstFree_);
+ }
+ firstFree_ = next;
}
- firstFree_ = next;
}
}
@@ -74,6 +61,7 @@ RT_API_ATTRS Ticket &WorkQueue::StartTicket() {
void *p{AllocateMemoryOrCrash(terminator_, sizeof(TicketList))};
firstFree_ = new (p) TicketList;
firstFree_->isStatic = false;
+ anyDynamicAllocation_ = true;
}
TicketList *newTicket{firstFree_};
if ((firstFree_ = newTicket->next)) {
More information about the llvm-commits
mailing list