[llvm] [flang][runtime] Speed up initialization & destruction (PR #148087)

Peter Klausler via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 11 16:53:47 PDT 2025


https://github.com/klausler updated https://github.com/llvm/llvm-project/pull/148087

>From b9be6901f419923f735a829c42d28a4b69c74172 Mon Sep 17 00:00:00 2001
From: Peter Klausler <pklausler at nvidia.com>
Date: Tue, 8 Jul 2025 18:05:44 -0700
Subject: [PATCH] [flang][runtime] Speed up initialization & destruction

Rework derived type initialization in the runtime to just
initialize the first element of any array, and then memcpy
it to the others, rather than exercising the per-component
paths for each element.

Reword derived type destruction in the runtime to detect and
exploit a fast path for allocatable components whose types
themselves don't need nested destruction.

Small tweaks were made in hot paths exposed by profiling in
descriptor operations and derived type assignment.
---
 .../include/flang-rt/runtime/descriptor.h     |  87 ++++++++++-
 flang-rt/include/flang-rt/runtime/type-info.h |   4 +-
 .../include/flang-rt/runtime/work-queue.h     |  36 +++--
 flang-rt/lib/runtime/assign.cpp               |  17 ++-
 flang-rt/lib/runtime/derived.cpp              | 138 ++++++++++--------
 flang-rt/lib/runtime/descriptor.cpp           |  51 ++-----
 flang-rt/lib/runtime/pointer.cpp              |   6 +-
 flang-rt/lib/runtime/work-queue.cpp           |  40 ++---
 8 files changed, 229 insertions(+), 150 deletions(-)

diff --git a/flang-rt/include/flang-rt/runtime/descriptor.h b/flang-rt/include/flang-rt/runtime/descriptor.h
index 75092a1fd2bfc..4a5674cb6596c 100644
--- a/flang-rt/include/flang-rt/runtime/descriptor.h
+++ b/flang-rt/include/flang-rt/runtime/descriptor.h
@@ -20,13 +20,16 @@
 
 #include "memory.h"
 #include "type-code.h"
+#include "flang-rt/runtime/allocator-registry.h"
 #include "flang/Common/ISO_Fortran_binding_wrapper.h"
+#include "flang/Common/optional.h"
 #include "flang/Runtime/descriptor-consts.h"
 #include <algorithm>
 #include <cassert>
 #include <cinttypes>
 #include <cstddef>
 #include <cstdio>
+#include <cstdlib>
 #include <cstring>
 
 /// Value used for asyncObject when no specific stream is specified.
@@ -262,9 +265,20 @@ class Descriptor {
 
   template <typename A>
   RT_API_ATTRS A *ZeroBasedIndexedElement(std::size_t n) const {
-    SubscriptValue at[maxRank];
-    if (SubscriptsForZeroBasedElementNumber(at, n)) {
-      return Element<A>(at);
+    if (raw_.rank == 0) {
+      if (n == 0) {
+        return OffsetElement<A>();
+      }
+    } else if (raw_.rank == 1) {
+      const auto &dim{GetDimension(0)};
+      if (n < static_cast<std::size_t>(dim.Extent())) {
+        return OffsetElement<A>(n * dim.ByteStride());
+      }
+    } else {
+      SubscriptValue at[maxRank];
+      if (SubscriptsForZeroBasedElementNumber(at, n)) {
+        return Element<A>(at);
+      }
     }
     return nullptr;
   }
@@ -365,7 +379,18 @@ class Descriptor {
 
   RT_API_ATTRS std::size_t SizeInBytes() const;
 
-  RT_API_ATTRS std::size_t Elements() const;
+  RT_API_ATTRS std::size_t Elements() const {
+    int n{rank()};
+    if (n == 0) {
+      return 1;
+    } else {
+      auto elements{static_cast<std::size_t>(GetDimension(0).Extent())};
+      for (int j{1}; j < n; ++j) {
+        elements *= GetDimension(j).Extent();
+      }
+      return elements;
+    }
+  }
 
   // Allocate() assumes Elements() and ElementBytes() work;
   // define the extents of the dimensions and the element length
@@ -377,7 +402,22 @@ class Descriptor {
 
   // Deallocates storage; does not call FINAL subroutines or
   // deallocate allocatable/automatic components.
-  RT_API_ATTRS int Deallocate();
+  RT_API_ATTRS int Deallocate() {
+    ISO::CFI_cdesc_t &descriptor{raw()};
+    void *pointer{descriptor.base_addr};
+    if (!pointer) {
+      return CFI_ERROR_BASE_ADDR_NULL;
+    } else {
+      int allocIndex{MapAllocIdx()};
+      if (allocIndex == kDefaultAllocator) {
+        std::free(pointer);
+      } else {
+        allocatorRegistry.GetDeallocator(MapAllocIdx())(pointer);
+      }
+      descriptor.base_addr = nullptr;
+      return CFI_SUCCESS;
+    }
+  }
 
   // Deallocates storage, including allocatable and automatic
   // components.  Optionally invokes FINAL subroutines.
@@ -392,8 +432,7 @@ class Descriptor {
     bool stridesAreContiguous{true};
     for (int j{0}; j < leadingDimensions; ++j) {
       const Dimension &dim{GetDimension(j)};
-      stridesAreContiguous &=
-          (bytes == dim.ByteStride()) || (dim.Extent() == 1);
+      stridesAreContiguous &= bytes == dim.ByteStride() || dim.Extent() == 1;
       bytes *= dim.Extent();
     }
     // One and zero element arrays are contiguous even if the descriptor
@@ -406,6 +445,32 @@ class Descriptor {
     return stridesAreContiguous || bytes == 0;
   }
 
+  // The result, if any, is a fixed stride value that can be used to
+  // address all elements.  It genernalizes contiguity by also allowing
+  // the case of an array with extent 1 on all but one dimension.
+  RT_API_ATTRS common::optional<SubscriptValue> FixedStride() const {
+    auto rank{static_cast<std::size_t>(raw_.rank)};
+    common::optional<SubscriptValue> stride;
+    for (std::size_t j{0}; j < rank; ++j) {
+      const Dimension &dim{GetDimension(j)};
+      auto extent{dim.Extent()};
+      if (extent == 0) {
+        break; // empty array
+      } else if (extent == 1) { // ok
+      } else if (stride) {
+        // Extent > 1 on multiple dimensions
+        if (IsContiguous()) {
+          return ElementBytes();
+        } else {
+          return common::nullopt;
+        }
+      } else {
+        stride = dim.ByteStride();
+      }
+    }
+    return stride.value_or(0); // 0 for scalars and empty arrays
+  }
+
   // Establishes a pointer to a section or element.
   RT_API_ATTRS bool EstablishPointerSection(const Descriptor &source,
       const SubscriptValue *lower = nullptr,
@@ -427,6 +492,14 @@ class Descriptor {
   RT_API_ATTRS inline int GetAllocIdx() const {
     return (raw_.extra & _CFI_ALLOCATOR_IDX_MASK) >> _CFI_ALLOCATOR_IDX_SHIFT;
   }
+  RT_API_ATTRS int MapAllocIdx() const {
+#ifdef RT_DEVICE_COMPILATION
+    // Force default allocator in device code.
+    return kDefaultAllocator;
+#else
+    return GetAllocIdx();
+#endif
+  }
   RT_API_ATTRS inline void SetAllocIdx(int pos) {
     raw_.extra &= ~_CFI_ALLOCATOR_IDX_MASK; // Clear the allocator index bits.
     raw_.extra |= pos << _CFI_ALLOCATOR_IDX_SHIFT;
diff --git a/flang-rt/include/flang-rt/runtime/type-info.h b/flang-rt/include/flang-rt/runtime/type-info.h
index 80301a313282f..a8d39f4f8a1a3 100644
--- a/flang-rt/include/flang-rt/runtime/type-info.h
+++ b/flang-rt/include/flang-rt/runtime/type-info.h
@@ -68,7 +68,9 @@ class Component {
   RT_API_ATTRS std::uint64_t offset() const { return offset_; }
   RT_API_ATTRS const Value &characterLen() const { return characterLen_; }
   RT_API_ATTRS const DerivedType *derivedType() const {
-    return derivedType_.descriptor().OffsetElement<const DerivedType>();
+    return category() == TypeCategory::Derived
+        ? derivedType_.descriptor().OffsetElement<const DerivedType>()
+        : nullptr;
   }
   RT_API_ATTRS const Value *lenValue() const {
     return lenValue_.descriptor().OffsetElement<const Value>();
diff --git a/flang-rt/include/flang-rt/runtime/work-queue.h b/flang-rt/include/flang-rt/runtime/work-queue.h
index 0daa7bc4d3384..14a9d720e2ef2 100644
--- a/flang-rt/include/flang-rt/runtime/work-queue.h
+++ b/flang-rt/include/flang-rt/runtime/work-queue.h
@@ -62,6 +62,7 @@
 #include "flang-rt/runtime/stat.h"
 #include "flang-rt/runtime/type-info.h"
 #include "flang/Common/api-attrs.h"
+#include "flang/Common/optional.h"
 #include "flang/Runtime/freestanding-tools.h"
 #include <flang/Common/variant.h>
 
@@ -131,11 +132,19 @@ class Elementwise {
 // Base class for ticket workers that operate over derived type components.
 class Componentwise {
 public:
-  RT_API_ATTRS Componentwise(const typeInfo::DerivedType &);
+  RT_API_ATTRS Componentwise(const typeInfo::DerivedType &derived)
+      : derived_{derived}, components_{derived_.component().Elements()} {
+    GetFirstComponent();
+  }
+
   RT_API_ATTRS bool IsComplete() const { return componentAt_ >= components_; }
   RT_API_ATTRS void Advance() {
     ++componentAt_;
-    GetComponent();
+    if (IsComplete()) {
+      component_ = nullptr;
+    } else {
+      ++component_;
+    }
   }
   RT_API_ATTRS void SkipToEnd() {
     component_ = nullptr;
@@ -144,15 +153,21 @@ class Componentwise {
   RT_API_ATTRS void Reset() {
     component_ = nullptr;
     componentAt_ = 0;
-    GetComponent();
+    GetFirstComponent();
   }
-  RT_API_ATTRS void GetComponent();
 
 protected:
   const typeInfo::DerivedType &derived_;
   std::size_t components_{0}, componentAt_{0};
   const typeInfo::Component *component_{nullptr};
   StaticDescriptor<common::maxRank, true, 0> componentDescriptor_;
+
+private:
+  RT_API_ATTRS void GetFirstComponent() {
+    if (components_ > 0) {
+      component_ = derived_.component().OffsetElement<typeInfo::Component>();
+    }
+  }
 };
 
 // Base class for ticket workers that operate over derived type components
@@ -228,14 +243,14 @@ class ElementsOverComponents : public Elementwise, public Componentwise {
 
 // Ticket worker classes
 
-// Implements derived type instance initialization
+// Implements derived type instance initialization.
 class InitializeTicket : public ImmediateTicketRunner<InitializeTicket>,
-                         private ComponentsOverElements {
+                         private ElementsOverComponents {
 public:
   RT_API_ATTRS InitializeTicket(
       const Descriptor &instance, const typeInfo::DerivedType &derived)
       : ImmediateTicketRunner<InitializeTicket>{*this},
-        ComponentsOverElements{instance, derived} {}
+        ElementsOverComponents{instance, derived} {}
   RT_API_ATTRS int Begin(WorkQueue &);
   RT_API_ATTRS int Continue(WorkQueue &);
 };
@@ -283,12 +298,14 @@ class DestroyTicket : public ImmediateTicketRunner<DestroyTicket>,
   RT_API_ATTRS DestroyTicket(const Descriptor &instance,
       const typeInfo::DerivedType &derived, bool finalize)
       : ImmediateTicketRunner<DestroyTicket>{*this},
-        ComponentsOverElements{instance, derived}, finalize_{finalize} {}
+        ComponentsOverElements{instance, derived}, finalize_{finalize},
+        fixedStride_{instance.FixedStride()} {}
   RT_API_ATTRS int Begin(WorkQueue &);
   RT_API_ATTRS int Continue(WorkQueue &);
 
 private:
   bool finalize_{false};
+  std::optional<SubscriptValue> fixedStride_;
 };
 
 // Implements general intrinsic assignment
@@ -302,11 +319,11 @@ class AssignTicket : public ImmediateTicketRunner<AssignTicket> {
   RT_API_ATTRS int Continue(WorkQueue &);
 
 private:
+  RT_API_ATTRS Descriptor &GetTempDescriptor();
   RT_API_ATTRS bool IsSimpleMemmove() const {
     return !toDerived_ && to_.rank() == from_->rank() && to_.IsContiguous() &&
         from_->IsContiguous() && to_.ElementBytes() == from_->ElementBytes();
   }
-  RT_API_ATTRS Descriptor &GetTempDescriptor();
 
   Descriptor &to_;
   const Descriptor *from_{nullptr};
@@ -549,6 +566,7 @@ class WorkQueue {
   TicketList *first_{nullptr}, *last_{nullptr}, *insertAfter_{nullptr};
   TicketList static_[numStatic_];
   TicketList *firstFree_{static_};
+  bool anyDynamicAllocation_{false};
 };
 
 } // namespace Fortran::runtime
diff --git a/flang-rt/lib/runtime/assign.cpp b/flang-rt/lib/runtime/assign.cpp
index f936a4192a33c..78c3f60256543 100644
--- a/flang-rt/lib/runtime/assign.cpp
+++ b/flang-rt/lib/runtime/assign.cpp
@@ -432,10 +432,13 @@ RT_API_ATTRS int AssignTicket::Continue(WorkQueue &workQueue) {
   }
   // Intrinsic assignment
   std::size_t toElements{to_.Elements()};
-  if (from_->rank() > 0 && toElements != from_->Elements()) {
-    workQueue.terminator().Crash("Assign: mismatching element counts in array "
-                                 "assignment (to %zd, from %zd)",
-        toElements, from_->Elements());
+  if (from_->rank() > 0) {
+    std::size_t fromElements{from_->Elements()};
+    if (toElements != fromElements) {
+      workQueue.terminator().Crash("Assign: mismatching element counts in "
+                                   "array assignment (to %zd, from %zd)",
+          toElements, fromElements);
+    }
   }
   if (to_.type() != from_->type()) {
     workQueue.terminator().Crash(
@@ -614,7 +617,7 @@ RT_API_ATTRS int DerivedAssignTicket<IS_COMPONENTWISE>::Continue(
               memmoveFct_(to, from, componentByteSize);
             }
           }
-          this->Componentwise::Advance();
+          this->SkipToNextComponent();
         } else {
           memmoveFct_(
               this->instance_.template Element<char>(this->subscripts_) +
@@ -646,7 +649,7 @@ RT_API_ATTRS int DerivedAssignTicket<IS_COMPONENTWISE>::Continue(
             memmoveFct_(to, from, componentByteSize);
           }
         }
-        this->Componentwise::Advance();
+        this->SkipToNextComponent();
       } else {
         memmoveFct_(this->instance_.template Element<char>(this->subscripts_) +
                 this->component_->offset(),
@@ -668,11 +671,11 @@ RT_API_ATTRS int DerivedAssignTicket<IS_COMPONENTWISE>::Continue(
       if (toDesc->IsAllocatable() && !fromDesc->IsAllocated()) {
         if (toDesc->IsAllocated()) {
           if (this->phase_ == 0) {
-            this->phase_++;
             if (componentDerived && !componentDerived->noDestructionNeeded()) {
               if (int status{workQueue.BeginDestroy(
                       *toDesc, *componentDerived, /*finalize=*/false)};
                   status != StatOk) {
+                this->phase_++;
                 return status;
               }
             }
diff --git a/flang-rt/lib/runtime/derived.cpp b/flang-rt/lib/runtime/derived.cpp
index 4e36b1e2edfc8..576fbf1ecdff5 100644
--- a/flang-rt/lib/runtime/derived.cpp
+++ b/flang-rt/lib/runtime/derived.cpp
@@ -39,64 +39,46 @@ RT_API_ATTRS int Initialize(const Descriptor &instance,
 }
 
 RT_API_ATTRS int InitializeTicket::Begin(WorkQueue &) {
-  // Initialize procedure pointer components in each element
-  const Descriptor &procPtrDesc{derived_.procPtr()};
-  if (std::size_t numProcPtrs{procPtrDesc.Elements()}) {
-    for (std::size_t k{0}; k < numProcPtrs; ++k) {
-      const auto &comp{
-          *procPtrDesc.ZeroBasedIndexedElement<typeInfo::ProcPtrComponent>(k)};
-      // Loop only over elements
-      if (k > 0) {
-        Elementwise::Reset();
-      }
-      for (; !Elementwise::IsComplete(); Elementwise::Advance()) {
-        auto &pptr{*instance_.ElementComponent<typeInfo::ProcedurePointer>(
-            subscripts_, comp.offset)};
-        pptr = comp.procInitialization;
-      }
-    }
-    if (IsComplete()) {
-      return StatOk;
+  if (elements_ == 0) {
+    return StatOk;
+  } else {
+    // Initialize procedure pointer components in the first element,
+    // whence they will be copied later into all others.
+    const Descriptor &procPtrDesc{derived_.procPtr()};
+    std::size_t numProcPtrs{procPtrDesc.Elements()};
+    char *raw{instance_.OffsetElement<char>()};
+    const auto *ppComponent{
+        procPtrDesc.OffsetElement<typeInfo::ProcPtrComponent>()};
+    for (std::size_t k{0}; k < numProcPtrs; ++k, ++ppComponent) {
+      auto &pptr{*reinterpret_cast<typeInfo::ProcedurePointer *>(
+          raw + ppComponent->offset)};
+      pptr = ppComponent->procInitialization;
     }
-    Elementwise::Reset();
+    return StatContinue;
   }
-  return StatContinue;
 }
 
 RT_API_ATTRS int InitializeTicket::Continue(WorkQueue &workQueue) {
-  while (!IsComplete()) {
+  // Initialize the data components of the first element.
+  char *rawInstance{instance_.OffsetElement<char>()};
+  for (; !Componentwise::IsComplete(); SkipToNextComponent()) {
+    char *rawComponent{rawInstance + component_->offset()};
     if (component_->genre() == typeInfo::Component::Genre::Allocatable) {
-      // Establish allocatable descriptors
-      for (; !Elementwise::IsComplete(); Elementwise::Advance()) {
-        Descriptor &allocDesc{*instance_.ElementComponent<Descriptor>(
-            subscripts_, component_->offset())};
-        component_->EstablishDescriptor(
-            allocDesc, instance_, workQueue.terminator());
-        allocDesc.raw().attribute = CFI_attribute_allocatable;
-      }
-      SkipToNextComponent();
+      Descriptor &allocDesc{*reinterpret_cast<Descriptor *>(rawComponent)};
+      component_->EstablishDescriptor(
+          allocDesc, instance_, workQueue.terminator());
     } else if (const void *init{component_->initialization()}) {
       // Explicit initialization of data pointers and
       // non-allocatable non-automatic components
       std::size_t bytes{component_->SizeInBytes(instance_)};
-      for (; !Elementwise::IsComplete(); Elementwise::Advance()) {
-        char *ptr{instance_.ElementComponent<char>(
-            subscripts_, component_->offset())};
-        std::memcpy(ptr, init, bytes);
-      }
-      SkipToNextComponent();
+      std::memcpy(rawComponent, init, bytes);
     } else if (component_->genre() == typeInfo::Component::Genre::Pointer) {
       // Data pointers without explicit initialization are established
       // so that they are valid right-hand side targets of pointer
       // assignment statements.
-      for (; !Elementwise::IsComplete(); Elementwise::Advance()) {
-        Descriptor &ptrDesc{*instance_.ElementComponent<Descriptor>(
-            subscripts_, component_->offset())};
-        component_->EstablishDescriptor(
-            ptrDesc, instance_, workQueue.terminator());
-        ptrDesc.raw().attribute = CFI_attribute_pointer;
-      }
-      SkipToNextComponent();
+      Descriptor &ptrDesc{*reinterpret_cast<Descriptor *>(rawComponent)};
+      component_->EstablishDescriptor(
+          ptrDesc, instance_, workQueue.terminator());
     } else if (component_->genre() == typeInfo::Component::Genre::Data &&
         component_->derivedType() &&
         !component_->derivedType()->noInitializationNeeded()) {
@@ -106,16 +88,41 @@ RT_API_ATTRS int InitializeTicket::Continue(WorkQueue &workQueue) {
       GetComponentExtents(extents, *component_, instance_);
       Descriptor &compDesc{componentDescriptor_.descriptor()};
       const typeInfo::DerivedType &compType{*component_->derivedType()};
-      compDesc.Establish(compType,
-          instance_.ElementComponent<char>(subscripts_, component_->offset()),
-          component_->rank(), extents);
-      Advance();
+      compDesc.Establish(compType, rawComponent, component_->rank(), extents);
       if (int status{workQueue.BeginInitialize(compDesc, compType)};
           status != StatOk) {
+        SkipToNextComponent();
         return status;
       }
-    } else {
-      SkipToNextComponent();
+    }
+  }
+  // The first element is now complete.  Copy it into the others.
+  if (elements_ < 2) {
+  } else {
+    auto elementBytes{static_cast<SubscriptValue>(instance_.ElementBytes())};
+    if (auto stride{instance_.FixedStride()}) {
+      if (*stride == elementBytes) { // contiguous
+        for (std::size_t done{1}; done < elements_;) {
+          std::size_t chunk{elements_ - done};
+          if (chunk > done) {
+            chunk = done;
+          }
+          char *uninitialized{rawInstance + done * *stride};
+          std::memcpy(uninitialized, rawInstance, chunk * *stride);
+          done += chunk;
+        }
+      } else {
+        for (std::size_t done{1}; done < elements_; ++done) {
+          char *uninitialized{rawInstance + done * *stride};
+          std::memcpy(uninitialized, rawInstance, elementBytes);
+        }
+      }
+    } else { // one at a time with subscription
+      for (Elementwise::Advance(); !Elementwise::IsComplete();
+          Elementwise::Advance()) {
+        char *element{instance_.Element<char>(subscripts_)};
+        std::memcpy(element, rawInstance, elementBytes);
+      }
     }
   }
   return StatOk;
@@ -415,24 +422,33 @@ RT_API_ATTRS int DestroyTicket::Continue(WorkQueue &workQueue) {
   // Contrary to finalization, the order of deallocation does not matter.
   while (!IsComplete()) {
     const auto *componentDerived{component_->derivedType()};
-    if (component_->genre() == typeInfo::Component::Genre::Allocatable ||
-        component_->genre() == typeInfo::Component::Genre::Automatic) {
-      Descriptor *d{instance_.ElementComponent<Descriptor>(
-          subscripts_, component_->offset())};
-      if (d->IsAllocated()) {
-        if (phase_ == 0) {
-          ++phase_;
-          if (componentDerived && !componentDerived->noDestructionNeeded()) {
+    if (component_->genre() == typeInfo::Component::Genre::Allocatable) {
+      if (fixedStride_ &&
+          (!componentDerived || componentDerived->noDestructionNeeded())) {
+        // common fast path, just deallocate in every element
+        char *p{instance_.OffsetElement<char>(component_->offset())};
+        for (std::size_t j{0}; j < elements_; ++j, p += *fixedStride_) {
+          Descriptor &d{*reinterpret_cast<Descriptor *>(p)};
+          d.Deallocate();
+        }
+        SkipToNextComponent();
+      } else {
+        Descriptor &d{*instance_.ElementComponent<Descriptor>(
+            subscripts_, component_->offset())};
+        if (d.IsAllocated()) {
+          if (componentDerived && !componentDerived->noDestructionNeeded() &&
+              phase_ == 0) {
             if (int status{workQueue.BeginDestroy(
-                    *d, *componentDerived, /*finalize=*/false)};
+                    d, *componentDerived, /*finalize=*/false)};
                 status != StatOk) {
+              ++phase_;
               return status;
             }
           }
+          d.Deallocate();
         }
-        d->Deallocate();
+        Advance();
       }
-      Advance();
     } else if (component_->genre() == typeInfo::Component::Genre::Data) {
       if (!componentDerived || componentDerived->noDestructionNeeded()) {
         SkipToNextComponent();
diff --git a/flang-rt/lib/runtime/descriptor.cpp b/flang-rt/lib/runtime/descriptor.cpp
index 67336d01380e0..7392270e14a8e 100644
--- a/flang-rt/lib/runtime/descriptor.cpp
+++ b/flang-rt/lib/runtime/descriptor.cpp
@@ -85,12 +85,19 @@ RT_API_ATTRS void Descriptor::Establish(int characterKind,
 RT_API_ATTRS void Descriptor::Establish(const typeInfo::DerivedType &dt,
     void *p, int rank, const SubscriptValue *extent,
     ISO::CFI_attribute_t attribute) {
-  Establish(TypeCode{TypeCategory::Derived, 0}, dt.sizeInBytes(), p, rank,
-      extent, attribute, true);
-  DescriptorAddendum *a{Addendum()};
-  Terminator terminator{__FILE__, __LINE__};
-  RUNTIME_CHECK(terminator, a != nullptr);
-  new (a) DescriptorAddendum{&dt};
+  std::size_t elementBytes{dt.sizeInBytes()};
+  ISO::EstablishDescriptor(
+      &raw_, p, attribute, CFI_type_struct, elementBytes, rank, extent);
+  if (elementBytes == 0) {
+    raw_.elem_len = 0;
+    // Reset byte strides of the dimensions, since EstablishDescriptor()
+    // only does that when the base address is not nullptr.
+    for (int j{0}; j < rank; ++j) {
+      GetDimension(j).SetByteStride(0);
+    }
+  }
+  SetHasAddendum();
+  new (Addendum()) DescriptorAddendum{&dt};
 }
 
 RT_API_ATTRS OwningPtr<Descriptor> Descriptor::Create(TypeCode t,
@@ -140,24 +147,6 @@ RT_API_ATTRS std::size_t Descriptor::SizeInBytes() const {
   return bytes;
 }
 
-RT_API_ATTRS std::size_t Descriptor::Elements() const {
-  int n{rank()};
-  std::size_t elements{1};
-  for (int j{0}; j < n; ++j) {
-    elements *= GetDimension(j).Extent();
-  }
-  return elements;
-}
-
-RT_API_ATTRS static inline int MapAllocIdx(const Descriptor &desc) {
-#ifdef RT_DEVICE_COMPILATION
-  // Force default allocator in device code.
-  return kDefaultAllocator;
-#else
-  return desc.GetAllocIdx();
-#endif
-}
-
 RT_API_ATTRS int Descriptor::Allocate(std::int64_t *asyncObject) {
   std::size_t elementBytes{ElementBytes()};
   if (static_cast<std::int64_t>(elementBytes) < 0) {
@@ -166,7 +155,7 @@ RT_API_ATTRS int Descriptor::Allocate(std::int64_t *asyncObject) {
     elementBytes = raw_.elem_len = 0;
   }
   std::size_t byteSize{Elements() * elementBytes};
-  AllocFct alloc{allocatorRegistry.GetAllocator(MapAllocIdx(*this))};
+  AllocFct alloc{allocatorRegistry.GetAllocator(MapAllocIdx())};
   // Zero size allocation is possible in Fortran and the resulting
   // descriptor must be allocated/associated. Since std::malloc(0)
   // result is implementation defined, always allocate at least one byte.
@@ -207,18 +196,6 @@ RT_API_ATTRS int Descriptor::Destroy(
   }
 }
 
-RT_API_ATTRS int Descriptor::Deallocate() {
-  ISO::CFI_cdesc_t &descriptor{raw()};
-  if (!descriptor.base_addr) {
-    return CFI_ERROR_BASE_ADDR_NULL;
-  } else {
-    FreeFct free{allocatorRegistry.GetDeallocator(MapAllocIdx(*this))};
-    free(descriptor.base_addr);
-    descriptor.base_addr = nullptr;
-    return CFI_SUCCESS;
-  }
-}
-
 RT_API_ATTRS bool Descriptor::DecrementSubscripts(
     SubscriptValue *subscript, const int *permutation) const {
   for (int j{raw_.rank - 1}; j >= 0; --j) {
diff --git a/flang-rt/lib/runtime/pointer.cpp b/flang-rt/lib/runtime/pointer.cpp
index 7331f7bbc3a75..04487abd3272e 100644
--- a/flang-rt/lib/runtime/pointer.cpp
+++ b/flang-rt/lib/runtime/pointer.cpp
@@ -115,10 +115,12 @@ void RTDEF(PointerAssociateRemapping)(Descriptor &pointer,
       byteStride *= dim.Extent();
     }
   }
-  if (pointer.Elements() > target.Elements()) {
+  std::size_t pointerElements{pointer.Elements()};
+  std::size_t targetElements{target.Elements()};
+  if (pointerElements > targetElements) {
     terminator.Crash("PointerAssociateRemapping: too many elements in remapped "
                      "pointer (%zd > %zd)",
-        pointer.Elements(), target.Elements());
+        pointerElements, targetElements);
   }
 }
 
diff --git a/flang-rt/lib/runtime/work-queue.cpp b/flang-rt/lib/runtime/work-queue.cpp
index a508ecb637102..42dbc9064b03b 100644
--- a/flang-rt/lib/runtime/work-queue.cpp
+++ b/flang-rt/lib/runtime/work-queue.cpp
@@ -21,21 +21,6 @@ static constexpr bool enableDebugOutput{false};
 
 RT_OFFLOAD_API_GROUP_BEGIN
 
-RT_API_ATTRS Componentwise::Componentwise(const typeInfo::DerivedType &derived)
-    : derived_{derived}, components_{derived_.component().Elements()} {
-  GetComponent();
-}
-
-RT_API_ATTRS void Componentwise::GetComponent() {
-  if (IsComplete()) {
-    component_ = nullptr;
-  } else {
-    const Descriptor &componentDesc{derived_.component()};
-    component_ = componentDesc.ZeroBasedIndexedElement<typeInfo::Component>(
-        componentAt_);
-  }
-}
-
 RT_API_ATTRS int Ticket::Continue(WorkQueue &workQueue) {
   if (!begun) {
     begun = true;
@@ -53,19 +38,21 @@ RT_API_ATTRS int Ticket::Continue(WorkQueue &workQueue) {
 }
 
 RT_API_ATTRS WorkQueue::~WorkQueue() {
-  if (last_) {
-    if ((last_->next = firstFree_)) {
-      last_->next->previous = last_;
+  if (anyDynamicAllocation_) {
+    if (last_) {
+      if ((last_->next = firstFree_)) {
+        last_->next->previous = last_;
+      }
+      firstFree_ = first_;
+      first_ = last_ = nullptr;
     }
-    firstFree_ = first_;
-    first_ = last_ = nullptr;
-  }
-  while (firstFree_) {
-    TicketList *next{firstFree_->next};
-    if (!firstFree_->isStatic) {
-      FreeMemory(firstFree_);
+    while (firstFree_) {
+      TicketList *next{firstFree_->next};
+      if (!firstFree_->isStatic) {
+        FreeMemory(firstFree_);
+      }
+      firstFree_ = next;
     }
-    firstFree_ = next;
   }
 }
 
@@ -74,6 +61,7 @@ RT_API_ATTRS Ticket &WorkQueue::StartTicket() {
     void *p{AllocateMemoryOrCrash(terminator_, sizeof(TicketList))};
     firstFree_ = new (p) TicketList;
     firstFree_->isStatic = false;
+    anyDynamicAllocation_ = true;
   }
   TicketList *newTicket{firstFree_};
   if ((firstFree_ = newTicket->next)) {



More information about the llvm-commits mailing list