[flang] [llvm] [flang][runtime] Replace recursion with iterative work queue (PR #137727)

Peter Klausler via llvm-commits llvm-commits at lists.llvm.org
Fri May 30 10:18:13 PDT 2025


https://github.com/klausler updated https://github.com/llvm/llvm-project/pull/137727

>From 61187093aa87f00484d9ddc486c415255092a919 Mon Sep 17 00:00:00 2001
From: Peter Klausler <pklausler at nvidia.com>
Date: Wed, 23 Apr 2025 14:44:23 -0700
Subject: [PATCH] [flang][runtime] Replace recursion with iterative work queue

Recursion, both direct and indirect, prevents accurate stack size
calculation at link time for GPU device code.  Restructure these
recursive (often mutually so) routines in the Fortran runtime
with new implementations based on an iterative work queue with
suspendable/resumable work tickets: Assign, Initialize, initializeClone,
Finalize, Destroy, and DescriptorIO.

Note that derived type FINAL subroutine calls, defined assignments,
and defined I/O procedures all perform callbacks into user code,
which may well reenter the runtime library.  This kind of recursion
is not handled by this change, although it may be possible to do so
in the future using thread-local work queues.

The effects of this restructuring on CPU performance are yet to be
measured.

There is a fast(?) mode in the work queue implementation that causes
new work items to be executed to completion immediately upon
creation, saving the overhead of actually representing and managing
the work queue.  This mode can't be used on GPU devices, but it
is enabled by default for CPU hosts.  It can be disabled easily
for debugging and performance testing.
---
 .../include/flang-rt/runtime/environment.h    |   3 +
 flang-rt/include/flang-rt/runtime/stat.h      |  10 +-
 flang-rt/include/flang-rt/runtime/type-info.h |   2 +
 .../include/flang-rt/runtime/work-queue.h     | 548 +++++++++++++++
 flang-rt/lib/runtime/CMakeLists.txt           |   2 +
 flang-rt/lib/runtime/assign.cpp               | 622 +++++++++++------
 flang-rt/lib/runtime/derived.cpp              | 517 +++++++-------
 flang-rt/lib/runtime/descriptor-io.cpp        | 651 +++++++++++++++++-
 flang-rt/lib/runtime/descriptor-io.h          | 620 +----------------
 flang-rt/lib/runtime/environment.cpp          |   4 +
 flang-rt/lib/runtime/namelist.cpp             |   1 +
 flang-rt/lib/runtime/tools.cpp                |   4 +-
 flang-rt/lib/runtime/type-info.cpp            |   6 +-
 flang-rt/lib/runtime/work-queue.cpp           | 161 +++++
 flang-rt/unittests/Runtime/ExternalIOTest.cpp |   2 +-
 flang/docs/Extensions.md                      |  10 +
 flang/include/flang/Runtime/assign.h          |   2 +-
 flang/include/flang/Semantics/tools.h         |   7 +-
 flang/lib/Semantics/runtime-type-info.cpp     |   4 +
 flang/lib/Semantics/tools.cpp                 |  32 +
 flang/module/__fortran_type_info.f90          |   3 +-
 flang/test/Lower/volatile-openmp.f90          |   8 +-
 flang/test/Semantics/typeinfo01.f90           |  26 +-
 flang/test/Semantics/typeinfo03.f90           |   2 +-
 flang/test/Semantics/typeinfo04.f90           |   8 +-
 flang/test/Semantics/typeinfo05.f90           |   4 +-
 flang/test/Semantics/typeinfo06.f90           |   4 +-
 flang/test/Semantics/typeinfo07.f90           |   8 +-
 flang/test/Semantics/typeinfo08.f90           |   2 +-
 flang/test/Semantics/typeinfo11.f90           |   2 +-
 flang/test/Semantics/typeinfo12.f90           |  67 ++
 31 files changed, 2225 insertions(+), 1117 deletions(-)
 create mode 100644 flang-rt/include/flang-rt/runtime/work-queue.h
 create mode 100644 flang-rt/lib/runtime/work-queue.cpp
 create mode 100644 flang/test/Semantics/typeinfo12.f90

diff --git a/flang-rt/include/flang-rt/runtime/environment.h b/flang-rt/include/flang-rt/runtime/environment.h
index 16258b3bbba9b..e579f6012ce86 100644
--- a/flang-rt/include/flang-rt/runtime/environment.h
+++ b/flang-rt/include/flang-rt/runtime/environment.h
@@ -64,6 +64,9 @@ struct ExecutionEnvironment {
   bool defaultUTF8{false}; // DEFAULT_UTF8
   bool checkPointerDeallocation{true}; // FORT_CHECK_POINTER_DEALLOCATION
 
+  enum InternalDebugging { WorkQueue = 1 };
+  int internalDebugging{0}; // FLANG_RT_DEBUG
+
   // CUDA related variables
   std::size_t cudaStackLimit{0}; // ACC_OFFLOAD_STACK_SIZE
   bool cudaDeviceIsManaged{false}; // NV_CUDAFOR_DEVICE_IS_MANAGED
diff --git a/flang-rt/include/flang-rt/runtime/stat.h b/flang-rt/include/flang-rt/runtime/stat.h
index 070d0bf8673fb..dc372de53506a 100644
--- a/flang-rt/include/flang-rt/runtime/stat.h
+++ b/flang-rt/include/flang-rt/runtime/stat.h
@@ -24,7 +24,7 @@ class Terminator;
 enum Stat {
   StatOk = 0, // required to be zero by Fortran
 
-  // Interoperable STAT= codes
+  // Interoperable STAT= codes (>= 11)
   StatBaseNull = CFI_ERROR_BASE_ADDR_NULL,
   StatBaseNotNull = CFI_ERROR_BASE_ADDR_NOT_NULL,
   StatInvalidElemLen = CFI_INVALID_ELEM_LEN,
@@ -36,7 +36,7 @@ enum Stat {
   StatMemAllocation = CFI_ERROR_MEM_ALLOCATION,
   StatOutOfBounds = CFI_ERROR_OUT_OF_BOUNDS,
 
-  // Standard STAT= values
+  // Standard STAT= values (>= 101)
   StatFailedImage = FORTRAN_RUNTIME_STAT_FAILED_IMAGE,
   StatLocked = FORTRAN_RUNTIME_STAT_LOCKED,
   StatLockedOtherImage = FORTRAN_RUNTIME_STAT_LOCKED_OTHER_IMAGE,
@@ -49,10 +49,14 @@ enum Stat {
   // Additional "processor-defined" STAT= values
   StatInvalidArgumentNumber = FORTRAN_RUNTIME_STAT_INVALID_ARG_NUMBER,
   StatMissingArgument = FORTRAN_RUNTIME_STAT_MISSING_ARG,
-  StatValueTooShort = FORTRAN_RUNTIME_STAT_VALUE_TOO_SHORT,
+  StatValueTooShort = FORTRAN_RUNTIME_STAT_VALUE_TOO_SHORT, // -1
   StatMoveAllocSameAllocatable =
       FORTRAN_RUNTIME_STAT_MOVE_ALLOC_SAME_ALLOCATABLE,
   StatBadPointerDeallocation = FORTRAN_RUNTIME_STAT_BAD_POINTER_DEALLOCATION,
+
+  // Dummy status for work queue continuation, declared here to perhaps
+  // avoid collisions
+  StatContinue = 201
 };
 
 RT_API_ATTRS const char *StatErrorString(int);
diff --git a/flang-rt/include/flang-rt/runtime/type-info.h b/flang-rt/include/flang-rt/runtime/type-info.h
index 5e79efde164f2..9bde3adba87f5 100644
--- a/flang-rt/include/flang-rt/runtime/type-info.h
+++ b/flang-rt/include/flang-rt/runtime/type-info.h
@@ -240,6 +240,7 @@ class DerivedType {
   RT_API_ATTRS bool noFinalizationNeeded() const {
     return noFinalizationNeeded_;
   }
+  RT_API_ATTRS bool noDefinedAssignment() const { return noDefinedAssignment_; }
 
   RT_API_ATTRS std::size_t LenParameters() const {
     return lenParameterKind().Elements();
@@ -322,6 +323,7 @@ class DerivedType {
   bool noInitializationNeeded_{false};
   bool noDestructionNeeded_{false};
   bool noFinalizationNeeded_{false};
+  bool noDefinedAssignment_{false};
 };
 
 } // namespace Fortran::runtime::typeInfo
diff --git a/flang-rt/include/flang-rt/runtime/work-queue.h b/flang-rt/include/flang-rt/runtime/work-queue.h
new file mode 100644
index 0000000000000..878b18373e1d2
--- /dev/null
+++ b/flang-rt/include/flang-rt/runtime/work-queue.h
@@ -0,0 +1,548 @@
+//===-- include/flang-rt/runtime/work-queue.h -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// Internal runtime utilities for work queues that replace the use of recursion
+// for better GPU device support.
+//
+// A work queue comprises a list of tickets.  Each ticket class has a Begin()
+// member function, which is called once, and a Continue() member function
+// that can be called zero or more times.  A ticket's execution terminates
+// when either of these member functions returns a status other than
+// StatContinue.  When that status is not StatOk, then the whole queue
+// is shut down.
+//
+// By returning StatContinue from its Continue() member function,
+// a ticket suspends its execution so that any nested tickets that it
+// may have created can be run to completion.  It is the reponsibility
+// of each ticket class to maintain resumption information in its state
+// and manage its own progress.  Most ticket classes inherit from
+// class ComponentsOverElements, which implements an outer loop over all
+// components of a derived type, and an inner loop over all elements
+// of a descriptor, possibly with multiple phases of execution per element.
+//
+// Tickets are created by WorkQueue::Begin...() member functions.
+// There is one of these for each "top level" recursive function in the
+// Fortran runtime support library that has been restructured into this
+// ticket framework.
+//
+// When the work queue is running tickets, it always selects the last ticket
+// on the list for execution -- "work stack" might have been a more accurate
+// name for this framework.  This ticket may, while doing its job, create
+// new tickets, and since those are pushed after the active one, the first
+// such nested ticket will be the next one executed to completion -- i.e.,
+// the order of nested WorkQueue::Begin...() calls is respected.
+// Note that a ticket's Continue() member function won't be called again
+// until all nested tickets have run to completion and it is once again
+// the last ticket on the queue.
+//
+// Example for an assignment to a derived type:
+// 1. Assign() is called, and its work queue is created.  It calls
+//    WorkQueue::BeginAssign() and then WorkQueue::Run().
+// 2. Run calls AssignTicket::Begin(), which pushes a tickets via
+//    BeginFinalize() and returns StatContinue.
+// 3. FinalizeTicket::Begin() and FinalizeTicket::Continue() are called
+//    until one of them returns StatOk, which ends the finalization ticket.
+// 4. AssignTicket::Continue() is then called; it creates a DerivedAssignTicket
+//    and then returns StatOk, which ends the ticket.
+// 5. At this point, only one ticket remains.  DerivedAssignTicket::Begin()
+//    and ::Continue() are called until they are done (not StatContinue).
+//    Along the way, it may create nested AssignTickets for components,
+//    and suspend itself so that they may each run to completion.
+
+#ifndef FLANG_RT_RUNTIME_WORK_QUEUE_H_
+#define FLANG_RT_RUNTIME_WORK_QUEUE_H_
+
+#include "flang-rt/runtime/connection.h"
+#include "flang-rt/runtime/descriptor.h"
+#include "flang-rt/runtime/stat.h"
+#include "flang-rt/runtime/type-info.h"
+#include "flang/Common/api-attrs.h"
+#include "flang/Runtime/freestanding-tools.h"
+#include <flang/Common/variant.h>
+
+namespace Fortran::runtime::io {
+class IoStatementState;
+struct NonTbpDefinedIoTable;
+} // namespace Fortran::runtime::io
+
+namespace Fortran::runtime {
+class Terminator;
+class WorkQueue;
+
+// Ticket worker base classes
+
+template <typename TICKET> class ImmediateTicketRunner {
+public:
+  RT_API_ATTRS explicit ImmediateTicketRunner(TICKET &ticket)
+      : ticket_{ticket} {}
+  RT_API_ATTRS int Run(WorkQueue &workQueue) {
+    int status{ticket_.Begin(workQueue)};
+    while (status == StatContinue) {
+      status = ticket_.Continue(workQueue);
+    }
+    return status;
+  }
+
+private:
+  TICKET &ticket_;
+};
+
+// Base class for ticket workers that operate elementwise over descriptors
+class Elementwise {
+protected:
+  RT_API_ATTRS Elementwise(
+      const Descriptor &instance, const Descriptor *from = nullptr)
+      : instance_{instance}, from_{from} {
+    instance_.GetLowerBounds(subscripts_);
+    if (from_) {
+      from_->GetLowerBounds(fromSubscripts_);
+    }
+  }
+  RT_API_ATTRS bool IsComplete() const { return elementAt_ >= elements_; }
+  RT_API_ATTRS void Advance() {
+    ++elementAt_;
+    instance_.IncrementSubscripts(subscripts_);
+    if (from_) {
+      from_->IncrementSubscripts(fromSubscripts_);
+    }
+  }
+  RT_API_ATTRS void SkipToEnd() { elementAt_ = elements_; }
+  RT_API_ATTRS void Reset() {
+    elementAt_ = 0;
+    instance_.GetLowerBounds(subscripts_);
+    if (from_) {
+      from_->GetLowerBounds(fromSubscripts_);
+    }
+  }
+
+  const Descriptor &instance_, *from_{nullptr};
+  std::size_t elements_{instance_.Elements()};
+  std::size_t elementAt_{0};
+  SubscriptValue subscripts_[common::maxRank];
+  SubscriptValue fromSubscripts_[common::maxRank];
+};
+
+// Base class for ticket workers that operate over derived type components.
+class Componentwise {
+protected:
+  RT_API_ATTRS Componentwise(const typeInfo::DerivedType &);
+  RT_API_ATTRS bool IsComplete() const { return componentAt_ >= components_; }
+  RT_API_ATTRS void Advance() {
+    ++componentAt_;
+    GetComponent();
+  }
+  RT_API_ATTRS void SkipToEnd() {
+    component_ = nullptr;
+    componentAt_ = components_;
+  }
+  RT_API_ATTRS void Reset() {
+    component_ = nullptr;
+    componentAt_ = 0;
+    GetComponent();
+  }
+  RT_API_ATTRS void GetComponent();
+
+  const typeInfo::DerivedType &derived_;
+  std::size_t components_{0}, componentAt_{0};
+  const typeInfo::Component *component_{nullptr};
+  StaticDescriptor<common::maxRank, true, 0> componentDescriptor_;
+};
+
+// Base class for ticket workers that operate over derived type components
+// in an outer loop, and elements in an inner loop.
+class ComponentsOverElements : protected Componentwise, protected Elementwise {
+protected:
+  RT_API_ATTRS ComponentsOverElements(const Descriptor &instance,
+      const typeInfo::DerivedType &derived, const Descriptor *from = nullptr)
+      : Componentwise{derived}, Elementwise{instance, from} {
+    if (Elementwise::IsComplete()) {
+      Componentwise::SkipToEnd();
+    }
+  }
+  RT_API_ATTRS bool IsComplete() const { return Componentwise::IsComplete(); }
+  RT_API_ATTRS void Advance() {
+    SkipToNextElement();
+    if (Elementwise::IsComplete()) {
+      Elementwise::Reset();
+      Componentwise::Advance();
+    }
+  }
+  RT_API_ATTRS void SkipToNextElement() {
+    phase_ = 0;
+    Elementwise::Advance();
+  }
+  RT_API_ATTRS void SkipToNextComponent() {
+    phase_ = 0;
+    Elementwise::Reset();
+    Componentwise::Advance();
+  }
+  RT_API_ATTRS void Reset() {
+    phase_ = 0;
+    Elementwise::Reset();
+    Componentwise::Reset();
+  }
+
+  int phase_{0};
+};
+
+// Base class for ticket workers that operate over elements in an outer loop,
+// type components in an inner loop.
+class ElementsOverComponents : protected Elementwise, protected Componentwise {
+protected:
+  RT_API_ATTRS ElementsOverComponents(const Descriptor &instance,
+      const typeInfo::DerivedType &derived, const Descriptor *from = nullptr)
+      : Elementwise{instance, from}, Componentwise{derived} {
+    if (Componentwise::IsComplete()) {
+      Elementwise::SkipToEnd();
+    }
+  }
+  RT_API_ATTRS bool IsComplete() const { return Elementwise::IsComplete(); }
+  RT_API_ATTRS void Advance() {
+    SkipToNextComponent();
+    if (Componentwise::IsComplete()) {
+      Componentwise::Reset();
+      Elementwise::Advance();
+    }
+  }
+  RT_API_ATTRS void SkipToNextComponent() {
+    phase_ = 0;
+    Componentwise::Advance();
+  }
+  RT_API_ATTRS void SkipToNextElement() {
+    phase_ = 0;
+    Componentwise::Reset();
+    Elementwise::Advance();
+  }
+
+  int phase_{0};
+};
+
+// Ticket worker classes
+
+// Implements derived type instance initialization
+class InitializeTicket : public ImmediateTicketRunner<InitializeTicket>,
+                         private ComponentsOverElements {
+public:
+  RT_API_ATTRS InitializeTicket(
+      const Descriptor &instance, const typeInfo::DerivedType &derived)
+      : ImmediateTicketRunner<InitializeTicket>{*this},
+        ComponentsOverElements{instance, derived} {}
+  RT_API_ATTRS int Begin(WorkQueue &);
+  RT_API_ATTRS int Continue(WorkQueue &);
+};
+
+// Initializes one derived type instance from the value of another
+class InitializeCloneTicket
+    : public ImmediateTicketRunner<InitializeCloneTicket>,
+      private ComponentsOverElements {
+public:
+  RT_API_ATTRS InitializeCloneTicket(const Descriptor &clone,
+      const Descriptor &original, const typeInfo::DerivedType &derived,
+      bool hasStat, const Descriptor *errMsg)
+      : ImmediateTicketRunner<InitializeCloneTicket>{*this},
+        ComponentsOverElements{original, derived}, clone_{clone},
+        hasStat_{hasStat}, errMsg_{errMsg} {}
+  RT_API_ATTRS int Begin(WorkQueue &) { return StatContinue; }
+  RT_API_ATTRS int Continue(WorkQueue &);
+
+private:
+  const Descriptor &clone_;
+  bool hasStat_{false};
+  const Descriptor *errMsg_{nullptr};
+  StaticDescriptor<common::maxRank, true, 0> cloneComponentDescriptor_;
+};
+
+// Implements derived type instance finalization
+class FinalizeTicket : public ImmediateTicketRunner<FinalizeTicket>,
+                       private ComponentsOverElements {
+public:
+  RT_API_ATTRS FinalizeTicket(
+      const Descriptor &instance, const typeInfo::DerivedType &derived)
+      : ImmediateTicketRunner<FinalizeTicket>{*this},
+        ComponentsOverElements{instance, derived} {}
+  RT_API_ATTRS int Begin(WorkQueue &);
+  RT_API_ATTRS int Continue(WorkQueue &);
+
+private:
+  const typeInfo::DerivedType *finalizableParentType_{nullptr};
+};
+
+// Implements derived type instance destruction
+class DestroyTicket : public ImmediateTicketRunner<DestroyTicket>,
+                      private ComponentsOverElements {
+public:
+  RT_API_ATTRS DestroyTicket(const Descriptor &instance,
+      const typeInfo::DerivedType &derived, bool finalize)
+      : ImmediateTicketRunner<DestroyTicket>{*this},
+        ComponentsOverElements{instance, derived}, finalize_{finalize} {}
+  RT_API_ATTRS int Begin(WorkQueue &);
+  RT_API_ATTRS int Continue(WorkQueue &);
+
+private:
+  bool finalize_{false};
+};
+
+// Implements general intrinsic assignment
+class AssignTicket : public ImmediateTicketRunner<AssignTicket> {
+public:
+  RT_API_ATTRS AssignTicket(
+      Descriptor &to, const Descriptor &from, int flags, MemmoveFct memmoveFct)
+      : ImmediateTicketRunner<AssignTicket>{*this}, to_{to}, from_{&from},
+        flags_{flags}, memmoveFct_{memmoveFct} {}
+  RT_API_ATTRS int Begin(WorkQueue &);
+  RT_API_ATTRS int Continue(WorkQueue &);
+
+private:
+  RT_API_ATTRS bool IsSimpleMemmove() const {
+    return !toDerived_ && to_.rank() == from_->rank() && to_.IsContiguous() &&
+        from_->IsContiguous() && to_.ElementBytes() == from_->ElementBytes();
+  }
+  RT_API_ATTRS Descriptor &GetTempDescriptor();
+
+  Descriptor &to_;
+  const Descriptor *from_{nullptr};
+  int flags_{0}; // enum AssignFlags
+  MemmoveFct memmoveFct_{nullptr};
+  StaticDescriptor<common::maxRank, true, 0> tempDescriptor_;
+  const typeInfo::DerivedType *toDerived_{nullptr};
+  Descriptor *toDeallocate_{nullptr};
+  bool persist_{false};
+  bool done_{false};
+};
+
+// Implements derived type intrinsic assignment.
+template <bool IS_COMPONENTWISE>
+class DerivedAssignTicket
+    : public ImmediateTicketRunner<DerivedAssignTicket<IS_COMPONENTWISE>>,
+      private std::conditional_t<IS_COMPONENTWISE, ComponentsOverElements,
+          ElementsOverComponents> {
+public:
+  using Base = std::conditional_t<IS_COMPONENTWISE, ComponentsOverElements,
+      ElementsOverComponents>;
+  RT_API_ATTRS DerivedAssignTicket(const Descriptor &to, const Descriptor &from,
+      const typeInfo::DerivedType &derived, int flags, MemmoveFct memmoveFct,
+      Descriptor *deallocateAfter)
+      : ImmediateTicketRunner<DerivedAssignTicket>{*this},
+        Base{to, derived, &from}, flags_{flags}, memmoveFct_{memmoveFct},
+        deallocateAfter_{deallocateAfter} {}
+  RT_API_ATTRS int Begin(WorkQueue &);
+  RT_API_ATTRS int Continue(WorkQueue &);
+
+private:
+  static constexpr bool isComponentwise_{IS_COMPONENTWISE};
+  bool toIsContiguous_{this->instance_.IsContiguous()};
+  bool fromIsContiguous_{this->from_->IsContiguous()};
+  int flags_{0};
+  MemmoveFct memmoveFct_{nullptr};
+  Descriptor *deallocateAfter_{nullptr};
+  StaticDescriptor<common::maxRank, true, 0> fromComponentDescriptor_;
+};
+
+namespace io::descr {
+
+template <io::Direction DIR>
+class DescriptorIoTicket
+    : public ImmediateTicketRunner<DescriptorIoTicket<DIR>>,
+      private Elementwise {
+public:
+  RT_API_ATTRS DescriptorIoTicket(io::IoStatementState &io,
+      const Descriptor &descriptor, const io::NonTbpDefinedIoTable *table,
+      bool &anyIoTookPlace)
+      : ImmediateTicketRunner<DescriptorIoTicket>(*this),
+        Elementwise{descriptor}, io_{io}, table_{table},
+        anyIoTookPlace_{anyIoTookPlace} {}
+  RT_API_ATTRS int Begin(WorkQueue &);
+  RT_API_ATTRS int Continue(WorkQueue &);
+  RT_API_ATTRS bool &anyIoTookPlace() { return anyIoTookPlace_; }
+
+private:
+  io::IoStatementState &io_;
+  const io::NonTbpDefinedIoTable *table_{nullptr};
+  bool &anyIoTookPlace_;
+  common::optional<typeInfo::SpecialBinding> nonTbpSpecial_;
+  const typeInfo::DerivedType *derived_{nullptr};
+  const typeInfo::SpecialBinding *special_{nullptr};
+  StaticDescriptor<common::maxRank, true, 0> elementDescriptor_;
+};
+
+template <io::Direction DIR>
+class DerivedIoTicket : public ImmediateTicketRunner<DerivedIoTicket<DIR>>,
+                        private ElementsOverComponents {
+public:
+  RT_API_ATTRS DerivedIoTicket(io::IoStatementState &io,
+      const Descriptor &descriptor, const typeInfo::DerivedType &derived,
+      const io::NonTbpDefinedIoTable *table, bool &anyIoTookPlace)
+      : ImmediateTicketRunner<DerivedIoTicket>(*this),
+        ElementsOverComponents{descriptor, derived}, io_{io}, table_{table},
+        anyIoTookPlace_{anyIoTookPlace} {}
+  RT_API_ATTRS int Begin(WorkQueue &) { return StatContinue; }
+  RT_API_ATTRS int Continue(WorkQueue &);
+
+private:
+  io::IoStatementState &io_;
+  const io::NonTbpDefinedIoTable *table_{nullptr};
+  bool &anyIoTookPlace_;
+};
+
+} // namespace io::descr
+
+struct NullTicket {
+  RT_API_ATTRS int Begin(WorkQueue &) const { return StatOk; }
+  RT_API_ATTRS int Continue(WorkQueue &) const { return StatOk; }
+};
+
+struct Ticket {
+  RT_API_ATTRS int Continue(WorkQueue &);
+  bool begun{false};
+  std::variant<NullTicket, InitializeTicket, InitializeCloneTicket,
+      FinalizeTicket, DestroyTicket, AssignTicket, DerivedAssignTicket<false>,
+      DerivedAssignTicket<true>,
+      io::descr::DescriptorIoTicket<io::Direction::Output>,
+      io::descr::DescriptorIoTicket<io::Direction::Input>,
+      io::descr::DerivedIoTicket<io::Direction::Output>,
+      io::descr::DerivedIoTicket<io::Direction::Input>>
+      u;
+};
+
+class WorkQueue {
+public:
+  RT_API_ATTRS explicit WorkQueue(Terminator &terminator)
+      : terminator_{terminator} {
+    for (int j{1}; j < numStatic_; ++j) {
+      static_[j].previous = &static_[j - 1];
+      static_[j - 1].next = &static_[j];
+    }
+  }
+  RT_API_ATTRS ~WorkQueue();
+  RT_API_ATTRS Terminator &terminator() { return terminator_; };
+
+  // APIs for particular tasks.  These can return StatOk if the work is
+  // completed immediately.
+  RT_API_ATTRS int BeginInitialize(
+      const Descriptor &descriptor, const typeInfo::DerivedType &derived) {
+    if (runTicketsImmediately_) {
+      return InitializeTicket{descriptor, derived}.Run(*this);
+    } else {
+      StartTicket().u.emplace<InitializeTicket>(descriptor, derived);
+      return StatContinue;
+    }
+  }
+  RT_API_ATTRS int BeginInitializeClone(const Descriptor &clone,
+      const Descriptor &original, const typeInfo::DerivedType &derived,
+      bool hasStat, const Descriptor *errMsg) {
+    if (runTicketsImmediately_) {
+      return InitializeCloneTicket{clone, original, derived, hasStat, errMsg}
+          .Run(*this);
+    } else {
+      StartTicket().u.emplace<InitializeCloneTicket>(
+          clone, original, derived, hasStat, errMsg);
+      return StatContinue;
+    }
+  }
+  RT_API_ATTRS int BeginFinalize(
+      const Descriptor &descriptor, const typeInfo::DerivedType &derived) {
+    if (runTicketsImmediately_) {
+      return FinalizeTicket{descriptor, derived}.Run(*this);
+    } else {
+      StartTicket().u.emplace<FinalizeTicket>(descriptor, derived);
+      return StatContinue;
+    }
+  }
+  RT_API_ATTRS int BeginDestroy(const Descriptor &descriptor,
+      const typeInfo::DerivedType &derived, bool finalize) {
+    if (runTicketsImmediately_) {
+      return DestroyTicket{descriptor, derived, finalize}.Run(*this);
+    } else {
+      StartTicket().u.emplace<DestroyTicket>(descriptor, derived, finalize);
+      return StatContinue;
+    }
+  }
+  RT_API_ATTRS int BeginAssign(Descriptor &to, const Descriptor &from,
+      int flags, MemmoveFct memmoveFct) {
+    if (runTicketsImmediately_) {
+      return AssignTicket{to, from, flags, memmoveFct}.Run(*this);
+    } else {
+      StartTicket().u.emplace<AssignTicket>(to, from, flags, memmoveFct);
+      return StatContinue;
+    }
+  }
+  template <bool IS_COMPONENTWISE>
+  RT_API_ATTRS int BeginDerivedAssign(Descriptor &to, const Descriptor &from,
+      const typeInfo::DerivedType &derived, int flags, MemmoveFct memmoveFct,
+      Descriptor *deallocateAfter) {
+    if (runTicketsImmediately_) {
+      return DerivedAssignTicket<IS_COMPONENTWISE>{
+          to, from, derived, flags, memmoveFct, deallocateAfter}
+          .Run(*this);
+    } else {
+      StartTicket().u.emplace<DerivedAssignTicket<IS_COMPONENTWISE>>(
+          to, from, derived, flags, memmoveFct, deallocateAfter);
+      return StatContinue;
+    }
+  }
+  template <io::Direction DIR>
+  RT_API_ATTRS int BeginDescriptorIo(io::IoStatementState &io,
+      const Descriptor &descriptor, const io::NonTbpDefinedIoTable *table,
+      bool &anyIoTookPlace) {
+    if (runTicketsImmediately_) {
+      return io::descr::DescriptorIoTicket<DIR>{
+          io, descriptor, table, anyIoTookPlace}
+          .Run(*this);
+    } else {
+      StartTicket().u.emplace<io::descr::DescriptorIoTicket<DIR>>(
+          io, descriptor, table, anyIoTookPlace);
+      return StatContinue;
+    }
+  }
+  template <io::Direction DIR>
+  RT_API_ATTRS int BeginDerivedIo(io::IoStatementState &io,
+      const Descriptor &descriptor, const typeInfo::DerivedType &derived,
+      const io::NonTbpDefinedIoTable *table, bool &anyIoTookPlace) {
+    if (runTicketsImmediately_) {
+      return io::descr::DerivedIoTicket<DIR>{
+          io, descriptor, derived, table, anyIoTookPlace}
+          .Run(*this);
+    } else {
+      StartTicket().u.emplace<io::descr::DerivedIoTicket<DIR>>(
+          io, descriptor, derived, table, anyIoTookPlace);
+      return StatContinue;
+    }
+  }
+
+  RT_API_ATTRS int Run();
+
+private:
+#if RT_DEVICE_COMPILATION
+  // Always use the work queue on a GPU device to avoid recursion.
+  static constexpr bool runTicketsImmediately_{false};
+#else
+  // Avoid the work queue overhead on the host, unless it needs
+  // debugging, which is so much easier there.
+  static constexpr bool runTicketsImmediately_{true};
+#endif
+
+  // Most uses of the work queue won't go very deep.
+  static constexpr int numStatic_{2};
+
+  struct TicketList {
+    bool isStatic{true};
+    Ticket ticket;
+    TicketList *previous{nullptr}, *next{nullptr};
+  };
+
+  RT_API_ATTRS Ticket &StartTicket();
+  RT_API_ATTRS void Stop();
+
+  Terminator &terminator_;
+  TicketList *first_{nullptr}, *last_{nullptr}, *insertAfter_{nullptr};
+  TicketList static_[numStatic_];
+  TicketList *firstFree_{static_};
+};
+
+} // namespace Fortran::runtime
+#endif // FLANG_RT_RUNTIME_WORK_QUEUE_H_
diff --git a/flang-rt/lib/runtime/CMakeLists.txt b/flang-rt/lib/runtime/CMakeLists.txt
index a3f63b4315644..332c0872e065f 100644
--- a/flang-rt/lib/runtime/CMakeLists.txt
+++ b/flang-rt/lib/runtime/CMakeLists.txt
@@ -68,6 +68,7 @@ set(supported_sources
   type-info.cpp
   unit.cpp
   utf.cpp
+  work-queue.cpp
 )
 
 # List of source not used for GPU offloading.
@@ -131,6 +132,7 @@ set(gpu_sources
   type-code.cpp
   type-info.cpp
   utf.cpp
+  work-queue.cpp
   complex-powi.cpp
   reduce.cpp
   reduction.cpp
diff --git a/flang-rt/lib/runtime/assign.cpp b/flang-rt/lib/runtime/assign.cpp
index 86aeeaa88f2d1..41b130cc8f257 100644
--- a/flang-rt/lib/runtime/assign.cpp
+++ b/flang-rt/lib/runtime/assign.cpp
@@ -14,6 +14,7 @@
 #include "flang-rt/runtime/terminator.h"
 #include "flang-rt/runtime/tools.h"
 #include "flang-rt/runtime/type-info.h"
+#include "flang-rt/runtime/work-queue.h"
 
 namespace Fortran::runtime {
 
@@ -102,11 +103,7 @@ static RT_API_ATTRS int AllocateAssignmentLHS(
     toDim.SetByteStride(stride);
     stride *= toDim.Extent();
   }
-  int result{ReturnError(terminator, to.Allocate(kNoAsyncObject))};
-  if (result == StatOk && derived && !derived->noInitializationNeeded()) {
-    result = ReturnError(terminator, Initialize(to, *derived, terminator));
-  }
-  return result;
+  return ReturnError(terminator, to.Allocate(kNoAsyncObject));
 }
 
 // least <= 0, most >= 0
@@ -231,6 +228,8 @@ static RT_API_ATTRS void BlankPadCharacterAssignment(Descriptor &to,
   }
 }
 
+RT_OFFLOAD_API_GROUP_BEGIN
+
 // Common implementation of assignments, both intrinsic assignments and
 // those cases of polymorphic user-defined ASSIGNMENT(=) TBPs that could not
 // be resolved in semantics.  Most assignment statements do not need any
@@ -244,274 +243,453 @@ static RT_API_ATTRS void BlankPadCharacterAssignment(Descriptor &to,
 // dealing with array constructors.
 RT_API_ATTRS void Assign(Descriptor &to, const Descriptor &from,
     Terminator &terminator, int flags, MemmoveFct memmoveFct) {
-  bool mustDeallocateLHS{(flags & DeallocateLHS) ||
-      MustDeallocateLHS(to, from, terminator, flags)};
-  DescriptorAddendum *toAddendum{to.Addendum()};
-  const typeInfo::DerivedType *toDerived{
-      toAddendum ? toAddendum->derivedType() : nullptr};
-  if (toDerived && (flags & NeedFinalization) &&
-      toDerived->noFinalizationNeeded()) {
-    flags &= ~NeedFinalization;
-  }
-  std::size_t toElementBytes{to.ElementBytes()};
-  std::size_t fromElementBytes{from.ElementBytes()};
-  // The following lambda definition violates the conding style,
-  // but cuda-11.8 nvcc hits an internal error with the brace initialization.
-  auto isSimpleMemmove = [&]() {
-    return !toDerived && to.rank() == from.rank() && to.IsContiguous() &&
-        from.IsContiguous() && toElementBytes == fromElementBytes;
-  };
-  StaticDescriptor<maxRank, true, 10 /*?*/> deferredDeallocStatDesc;
-  Descriptor *deferDeallocation{nullptr};
-  if (MayAlias(to, from)) {
+  WorkQueue workQueue{terminator};
+  if (workQueue.BeginAssign(to, from, flags, memmoveFct) == StatContinue) {
+    workQueue.Run();
+  }
+}
+
+RT_API_ATTRS int AssignTicket::Begin(WorkQueue &workQueue) {
+  bool mustDeallocateLHS{(flags_ & DeallocateLHS) ||
+      MustDeallocateLHS(to_, *from_, workQueue.terminator(), flags_)};
+  DescriptorAddendum *toAddendum{to_.Addendum()};
+  toDerived_ = toAddendum ? toAddendum->derivedType() : nullptr;
+  if (toDerived_ && (flags_ & NeedFinalization) &&
+      toDerived_->noFinalizationNeeded()) {
+    flags_ &= ~NeedFinalization;
+  }
+  if (MayAlias(to_, *from_)) {
     if (mustDeallocateLHS) {
-      deferDeallocation = &deferredDeallocStatDesc.descriptor();
+      // Convert the LHS into a temporary, then make it look deallocated.
+      toDeallocate_ = &tempDescriptor_.descriptor();
+      persist_ = true; // tempDescriptor_ state must outlive child tickets
       std::memcpy(
-          reinterpret_cast<void *>(deferDeallocation), &to, to.SizeInBytes());
-      to.set_base_addr(nullptr);
-    } else if (!isSimpleMemmove()) {
+          reinterpret_cast<void *>(toDeallocate_), &to_, to_.SizeInBytes());
+      to_.set_base_addr(nullptr);
+      if (toDerived_ && (flags_ & NeedFinalization)) {
+        if (int status{workQueue.BeginFinalize(*toDeallocate_, *toDerived_)};
+            status != StatOk && status != StatContinue) {
+          return status;
+        }
+        flags_ &= ~NeedFinalization;
+      }
+    } else if (!IsSimpleMemmove()) {
       // Handle LHS/RHS aliasing by copying RHS into a temp, then
       // recursively assigning from that temp.
-      auto descBytes{from.SizeInBytes()};
-      StaticDescriptor<maxRank, true, 16> staticDesc;
-      Descriptor &newFrom{staticDesc.descriptor()};
-      std::memcpy(reinterpret_cast<void *>(&newFrom), &from, descBytes);
+      auto descBytes{from_->SizeInBytes()};
+      Descriptor &newFrom{tempDescriptor_.descriptor()};
+      persist_ = true; // tempDescriptor_ state must outlive child tickets
+      std::memcpy(reinterpret_cast<void *>(&newFrom), from_, descBytes);
       // Pretend the temporary descriptor is for an ALLOCATABLE
       // entity, otherwise, the Deallocate() below will not
       // free the descriptor memory.
       newFrom.raw().attribute = CFI_attribute_allocatable;
-      auto stat{ReturnError(terminator, newFrom.Allocate(kNoAsyncObject))};
-      if (stat == StatOk) {
-        if (HasDynamicComponent(from)) {
-          // If 'from' has allocatable/automatic component, we cannot
-          // just make a shallow copy of the descriptor member.
-          // This will still leave data overlap in 'to' and 'newFrom'.
-          // For example:
-          //   type t
-          //     character, allocatable :: c(:)
-          //   end type t
-          //   type(t) :: x(3)
-          //   x(2:3) = x(1:2)
-          // We have to make a deep copy into 'newFrom' in this case.
-          RTNAME(AssignTemporary)
-          (newFrom, from, terminator.sourceFileName(), terminator.sourceLine());
-        } else {
-          ShallowCopy(newFrom, from, true, from.IsContiguous());
+      if (int stat{ReturnError(
+              workQueue.terminator(), newFrom.Allocate(kNoAsyncObject))};
+          stat != StatOk) {
+        return stat;
+      }
+      if (HasDynamicComponent(*from_)) {
+        // If 'from' has allocatable/automatic component, we cannot
+        // just make a shallow copy of the descriptor member.
+        // This will still leave data overlap in 'to' and 'newFrom'.
+        // For example:
+        //   type t
+        //     character, allocatable :: c(:)
+        //   end type t
+        //   type(t) :: x(3)
+        //   x(2:3) = x(1:2)
+        // We have to make a deep copy into 'newFrom' in this case.
+        if (const DescriptorAddendum *addendum{newFrom.Addendum()}) {
+          if (const auto *derived{addendum->derivedType()}) {
+            if (!derived->noInitializationNeeded()) {
+              if (int status{workQueue.BeginInitialize(newFrom, *derived)};
+                  status != StatOk && status != StatContinue) {
+                return status;
+              }
+            }
+          }
+        }
+        static constexpr int nestedFlags{MaybeReallocate | PolymorphicLHS};
+        if (int status{workQueue.BeginAssign(
+                newFrom, *from_, nestedFlags, memmoveFct_)};
+            status != StatOk && status != StatContinue) {
+          return status;
         }
-        Assign(to, newFrom, terminator,
-            flags &
-                (NeedFinalization | ComponentCanBeDefinedAssignment |
-                    ExplicitLengthCharacterLHS | CanBeDefinedAssignment));
-        newFrom.Deallocate();
+      } else {
+        ShallowCopy(newFrom, *from_, true, from_->IsContiguous());
       }
-      return;
+      from_ = &newFrom;
+      flags_ &= NeedFinalization | ComponentCanBeDefinedAssignment |
+          ExplicitLengthCharacterLHS | CanBeDefinedAssignment;
+      toDeallocate_ = &newFrom;
     }
   }
-  if (to.IsAllocatable()) {
+  if (to_.IsAllocatable()) {
     if (mustDeallocateLHS) {
-      if (deferDeallocation) {
-        if ((flags & NeedFinalization) && toDerived) {
-          Finalize(*deferDeallocation, *toDerived, &terminator);
-          flags &= ~NeedFinalization;
-        }
-      } else {
-        to.Destroy((flags & NeedFinalization) != 0, /*destroyPointers=*/false,
-            &terminator);
-        flags &= ~NeedFinalization;
+      if (!toDeallocate_ && to_.IsAllocated()) {
+        toDeallocate_ = &to_;
       }
-    } else if (to.rank() != from.rank() && !to.IsAllocated()) {
-      terminator.Crash("Assign: mismatched ranks (%d != %d) in assignment to "
-                       "unallocated allocatable",
-          to.rank(), from.rank());
+    } else if (to_.rank() != from_->rank() && !to_.IsAllocated()) {
+      workQueue.terminator().Crash("Assign: mismatched ranks (%d != %d) in "
+                                   "assignment to unallocated allocatable",
+          to_.rank(), from_->rank());
     }
-    if (!to.IsAllocated()) {
-      if (AllocateAssignmentLHS(to, from, terminator, flags) != StatOk) {
-        return;
+  } else if (!to_.IsAllocated()) {
+    workQueue.terminator().Crash(
+        "Assign: left-hand side variable is neither allocated nor allocatable");
+  }
+  if (toDerived_ && to_.IsAllocated()) {
+    // Schedule finalization or destruction of the LHS.
+    if (flags_ & NeedFinalization) {
+      if (int status{workQueue.BeginFinalize(to_, *toDerived_)};
+          status != StatOk && status != StatContinue) {
+        return status;
+      }
+    } else if (!toDerived_->noDestructionNeeded()) {
+      if (int status{
+              workQueue.BeginDestroy(to_, *toDerived_, /*finalize=*/false)};
+          status != StatOk && status != StatContinue) {
+        return status;
       }
-      flags &= ~NeedFinalization;
-      toElementBytes = to.ElementBytes(); // may have changed
     }
   }
-  if (toDerived && (flags & CanBeDefinedAssignment)) {
-    // Check for a user-defined assignment type-bound procedure;
-    // see 10.2.1.4-5.  A user-defined assignment TBP defines all of
-    // the semantics, including allocatable (re)allocation and any
-    // finalization.
-    //
-    // Note that the aliasing and LHS (re)allocation handling above
-    // needs to run even with CanBeDefinedAssignment flag, when
-    // the Assign() is invoked recursively for component-per-component
-    // assignments.
-    if (to.rank() == 0) {
-      if (const auto *special{toDerived->FindSpecialBinding(
+  return StatContinue;
+}
+
+RT_API_ATTRS int AssignTicket::Continue(WorkQueue &workQueue) {
+  if (done_) {
+    // All child tickets are complete; can release this ticket's state.
+    if (toDeallocate_) {
+      toDeallocate_->Deallocate();
+    }
+    return StatOk;
+  }
+  // All necessary finalization or destruction that was initiated by Begin()
+  // has been completed.  Deallocation may be pending, and if it's for the LHS,
+  // do it now so that the LHS gets reallocated.
+  if (toDeallocate_ == &to_) {
+    toDeallocate_ = nullptr;
+    to_.Deallocate();
+  }
+  // Allocate the LHS if needed
+  if (!to_.IsAllocated()) {
+    if (int stat{
+            AllocateAssignmentLHS(to_, *from_, workQueue.terminator(), flags_)};
+        stat != StatOk) {
+      return stat;
+    }
+    const auto *addendum{to_.Addendum()};
+    toDerived_ = addendum ? addendum->derivedType() : nullptr;
+    if (toDerived_ && !toDerived_->noInitializationNeeded()) {
+      if (int status{workQueue.BeginInitialize(to_, *toDerived_)};
+          status != StatOk) {
+        return status;
+      }
+    }
+  }
+  // Check for a user-defined assignment type-bound procedure;
+  // see 10.2.1.4-5.
+  // Note that the aliasing and LHS (re)allocation handling above
+  // needs to run even with CanBeDefinedAssignment flag, since
+  // Assign() can be invoked recursively for component-wise assignments.
+  if (toDerived_ && (flags_ & CanBeDefinedAssignment)) {
+    if (to_.rank() == 0) {
+      if (const auto *special{toDerived_->FindSpecialBinding(
               typeInfo::SpecialBinding::Which::ScalarAssignment)}) {
-        return DoScalarDefinedAssignment(to, from, *special);
+        DoScalarDefinedAssignment(to_, *from_, *special);
+        done_ = true;
+        return StatContinue;
       }
     }
-    if (const auto *special{toDerived->FindSpecialBinding(
+    if (const auto *special{toDerived_->FindSpecialBinding(
             typeInfo::SpecialBinding::Which::ElementalAssignment)}) {
-      return DoElementalDefinedAssignment(to, from, *toDerived, *special);
+      DoElementalDefinedAssignment(to_, *from_, *toDerived_, *special);
+      done_ = true;
+      return StatContinue;
     }
   }
-  SubscriptValue toAt[maxRank];
-  to.GetLowerBounds(toAt);
-  // Scalar expansion of the RHS is implied by using the same empty
-  // subscript values on each (seemingly) elemental reference into
-  // "from".
-  SubscriptValue fromAt[maxRank];
-  from.GetLowerBounds(fromAt);
-  std::size_t toElements{to.Elements()};
-  if (from.rank() > 0 && toElements != from.Elements()) {
-    terminator.Crash("Assign: mismatching element counts in array assignment "
-                     "(to %zd, from %zd)",
-        toElements, from.Elements());
+  // Intrinsic assignment
+  std::size_t toElements{to_.Elements()};
+  if (from_->rank() > 0 && toElements != from_->Elements()) {
+    workQueue.terminator().Crash("Assign: mismatching element counts in array "
+                                 "assignment (to %zd, from %zd)",
+        toElements, from_->Elements());
   }
-  if (to.type() != from.type()) {
-    terminator.Crash("Assign: mismatching types (to code %d != from code %d)",
-        to.type().raw(), from.type().raw());
+  if (to_.type() != from_->type()) {
+    workQueue.terminator().Crash(
+        "Assign: mismatching types (to code %d != from code %d)",
+        to_.type().raw(), from_->type().raw());
   }
-  if (toElementBytes > fromElementBytes && !to.type().IsCharacter()) {
-    terminator.Crash("Assign: mismatching non-character element sizes (to %zd "
-                     "bytes != from %zd bytes)",
+  std::size_t toElementBytes{to_.ElementBytes()};
+  std::size_t fromElementBytes{from_->ElementBytes()};
+  if (toElementBytes > fromElementBytes && !to_.type().IsCharacter()) {
+    workQueue.terminator().Crash("Assign: mismatching non-character element "
+                                 "sizes (to %zd bytes != from %zd bytes)",
         toElementBytes, fromElementBytes);
   }
-  if (const typeInfo::DerivedType *
-      updatedToDerived{toAddendum ? toAddendum->derivedType() : nullptr}) {
-    // Derived type intrinsic assignment, which is componentwise and elementwise
-    // for all components, including parent components (10.2.1.2-3).
-    // The target is first finalized if still necessary (7.5.6.3(1))
-    if (flags & NeedFinalization) {
-      Finalize(to, *updatedToDerived, &terminator);
-    } else if (updatedToDerived && !updatedToDerived->noDestructionNeeded()) {
-      Destroy(to, /*finalize=*/false, *updatedToDerived, &terminator);
-    }
-    // Copy the data components (incl. the parent) first.
-    const Descriptor &componentDesc{updatedToDerived->component()};
-    std::size_t numComponents{componentDesc.Elements()};
-    for (std::size_t j{0}; j < toElements;
-         ++j, to.IncrementSubscripts(toAt), from.IncrementSubscripts(fromAt)) {
-      for (std::size_t k{0}; k < numComponents; ++k) {
-        const auto &comp{
-            *componentDesc.ZeroBasedIndexedElement<typeInfo::Component>(
-                k)}; // TODO: exploit contiguity here
-        // Use PolymorphicLHS for components so that the right things happen
-        // when the components are polymorphic; when they're not, they're both
-        // not, and their declared types will match.
-        int nestedFlags{MaybeReallocate | PolymorphicLHS};
-        if (flags & ComponentCanBeDefinedAssignment) {
-          nestedFlags |=
-              CanBeDefinedAssignment | ComponentCanBeDefinedAssignment;
-        }
-        switch (comp.genre()) {
-        case typeInfo::Component::Genre::Data:
-          if (comp.category() == TypeCategory::Derived) {
-            StaticDescriptor<maxRank, true, 10 /*?*/> statDesc[2];
-            Descriptor &toCompDesc{statDesc[0].descriptor()};
-            Descriptor &fromCompDesc{statDesc[1].descriptor()};
-            comp.CreatePointerDescriptor(toCompDesc, to, terminator, toAt);
-            comp.CreatePointerDescriptor(
-                fromCompDesc, from, terminator, fromAt);
-            Assign(toCompDesc, fromCompDesc, terminator, nestedFlags);
-          } else { // Component has intrinsic type; simply copy raw bytes
-            std::size_t componentByteSize{comp.SizeInBytes(to)};
-            memmoveFct(to.Element<char>(toAt) + comp.offset(),
-                from.Element<const char>(fromAt) + comp.offset(),
-                componentByteSize);
-          }
-          break;
-        case typeInfo::Component::Genre::Pointer: {
-          std::size_t componentByteSize{comp.SizeInBytes(to)};
-          memmoveFct(to.Element<char>(toAt) + comp.offset(),
-              from.Element<const char>(fromAt) + comp.offset(),
-              componentByteSize);
-        } break;
-        case typeInfo::Component::Genre::Allocatable:
-        case typeInfo::Component::Genre::Automatic: {
-          auto *toDesc{reinterpret_cast<Descriptor *>(
-              to.Element<char>(toAt) + comp.offset())};
-          const auto *fromDesc{reinterpret_cast<const Descriptor *>(
-              from.Element<char>(fromAt) + comp.offset())};
-          // Allocatable components of the LHS are unconditionally
-          // deallocated before assignment (F'2018 10.2.1.3(13)(1)),
-          // unlike a "top-level" assignment to a variable, where
-          // deallocation is optional.
-          //
-          // Be careful not to destroy/reallocate the LHS, if there is
-          // overlap between LHS and RHS (it seems that partial overlap
-          // is not possible, though).
-          // Invoke Assign() recursively to deal with potential aliasing.
-          if (toDesc->IsAllocatable()) {
-            if (!fromDesc->IsAllocated()) {
-              // No aliasing.
-              //
-              // If to is not allocated, the Destroy() call is a no-op.
-              // This is just a shortcut, because the recursive Assign()
-              // below would initiate the destruction for to.
-              // No finalization is required.
-              toDesc->Destroy(
-                  /*finalize=*/false, /*destroyPointers=*/false, &terminator);
-              continue; // F'2018 10.2.1.3(13)(2)
-            }
-          }
-          // Force LHS deallocation with DeallocateLHS flag.
-          // The actual deallocation may be avoided, if the existing
-          // location can be reoccupied.
-          Assign(*toDesc, *fromDesc, terminator, nestedFlags | DeallocateLHS);
-        } break;
-        }
+  if (toDerived_) {
+    if (toDerived_->noDefinedAssignment()) { // componentwise
+      if (int status{workQueue.BeginDerivedAssign<true>(
+              to_, *from_, *toDerived_, flags_, memmoveFct_, toDeallocate_)};
+          status != StatOk && status != StatContinue) {
+        return status;
       }
-      // Copy procedure pointer components
-      const Descriptor &procPtrDesc{updatedToDerived->procPtr()};
-      std::size_t numProcPtrs{procPtrDesc.Elements()};
-      for (std::size_t k{0}; k < numProcPtrs; ++k) {
-        const auto &procPtr{
-            *procPtrDesc.ZeroBasedIndexedElement<typeInfo::ProcPtrComponent>(
-                k)};
-        memmoveFct(to.Element<char>(toAt) + procPtr.offset,
-            from.Element<const char>(fromAt) + procPtr.offset,
-            sizeof(typeInfo::ProcedurePointer));
+    } else { // elementwise
+      if (int status{workQueue.BeginDerivedAssign<false>(
+              to_, *from_, *toDerived_, flags_, memmoveFct_, toDeallocate_)};
+          status != StatOk && status != StatContinue) {
+        return status;
       }
     }
-  } else { // intrinsic type, intrinsic assignment
-    if (isSimpleMemmove()) {
-      memmoveFct(to.raw().base_addr, from.raw().base_addr,
-          toElements * toElementBytes);
-    } else if (toElementBytes > fromElementBytes) { // blank padding
-      switch (to.type().raw()) {
+    toDeallocate_ = nullptr;
+  } else if (IsSimpleMemmove()) {
+    memmoveFct_(to_.raw().base_addr, from_->raw().base_addr,
+        toElements * toElementBytes);
+  } else {
+    // Scalar expansion of the RHS is implied by using the same empty
+    // subscript values on each (seemingly) elemental reference into
+    // "from".
+    SubscriptValue toAt[maxRank];
+    to_.GetLowerBounds(toAt);
+    SubscriptValue fromAt[maxRank];
+    from_->GetLowerBounds(fromAt);
+    if (toElementBytes > fromElementBytes) { // blank padding
+      switch (to_.type().raw()) {
       case CFI_type_signed_char:
       case CFI_type_char:
-        BlankPadCharacterAssignment<char>(to, from, toAt, fromAt, toElements,
+        BlankPadCharacterAssignment<char>(to_, *from_, toAt, fromAt, toElements,
             toElementBytes, fromElementBytes);
         break;
       case CFI_type_char16_t:
-        BlankPadCharacterAssignment<char16_t>(to, from, toAt, fromAt,
+        BlankPadCharacterAssignment<char16_t>(to_, *from_, toAt, fromAt,
             toElements, toElementBytes, fromElementBytes);
         break;
       case CFI_type_char32_t:
-        BlankPadCharacterAssignment<char32_t>(to, from, toAt, fromAt,
+        BlankPadCharacterAssignment<char32_t>(to_, *from_, toAt, fromAt,
             toElements, toElementBytes, fromElementBytes);
         break;
       default:
-        terminator.Crash("unexpected type code %d in blank padded Assign()",
-            to.type().raw());
+        workQueue.terminator().Crash(
+            "unexpected type code %d in blank padded Assign()",
+            to_.type().raw());
       }
     } else { // elemental copies, possibly with character truncation
       for (std::size_t n{toElements}; n-- > 0;
-          to.IncrementSubscripts(toAt), from.IncrementSubscripts(fromAt)) {
-        memmoveFct(to.Element<char>(toAt), from.Element<const char>(fromAt),
+          to_.IncrementSubscripts(toAt), from_->IncrementSubscripts(fromAt)) {
+        memmoveFct_(to_.Element<char>(toAt), from_->Element<const char>(fromAt),
             toElementBytes);
       }
     }
   }
-  if (deferDeallocation) {
-    // deferDeallocation is used only when LHS is an allocatable.
-    // The finalization has already been run for it.
-    deferDeallocation->Destroy(
-        /*finalize=*/false, /*destroyPointers=*/false, &terminator);
+  if (persist_) {
+    done_ = true;
+    return StatContinue;
+  } else {
+    if (toDeallocate_) {
+      toDeallocate_->Deallocate();
+      toDeallocate_ = nullptr;
+    }
+    return StatOk;
   }
 }
 
-RT_OFFLOAD_API_GROUP_BEGIN
+template <bool IS_COMPONENTWISE>
+RT_API_ATTRS int DerivedAssignTicket<IS_COMPONENTWISE>::Begin(
+    WorkQueue &workQueue) {
+  if (toIsContiguous_ && fromIsContiguous_ &&
+      this->derived_.noDestructionNeeded() &&
+      this->derived_.noDefinedAssignment() &&
+      this->instance_.rank() == this->from_->rank()) {
+    if (std::size_t elementBytes{this->instance_.ElementBytes()};
+        elementBytes == this->from_->ElementBytes()) {
+      // Fastest path.  Both LHS and RHS are contiguous, RHS is not a scalar
+      // to be expanded, the types have the same size, and there are no
+      // allocatable components or defined ASSIGNMENT(=) at any level.
+      memmoveFct_(this->instance_.template OffsetElement<char>(),
+          this->from_->template OffsetElement<const char *>(),
+          this->instance_.Elements() * elementBytes);
+      return StatOk;
+    }
+  }
+  // Use PolymorphicLHS for components so that the right things happen
+  // when the components are polymorphic; when they're not, they're both
+  // not, and their declared types will match.
+  int nestedFlags{MaybeReallocate | PolymorphicLHS};
+  if (flags_ & ComponentCanBeDefinedAssignment) {
+    nestedFlags |= CanBeDefinedAssignment | ComponentCanBeDefinedAssignment;
+  }
+  flags_ = nestedFlags;
+  // Copy procedure pointer components
+  const Descriptor &procPtrDesc{this->derived_.procPtr()};
+  bool noDataComponents{this->IsComplete()};
+  if (std::size_t numProcPtrs{procPtrDesc.Elements()}) {
+    for (std::size_t k{0}; k < numProcPtrs; ++k) {
+      const auto &procPtr{
+          *procPtrDesc.ZeroBasedIndexedElement<typeInfo::ProcPtrComponent>(k)};
+      // Loop only over elements
+      if (noDataComponents) {
+        Elementwise::Reset();
+      }
+      for (; !Elementwise::IsComplete(); Elementwise::Advance()) {
+        memmoveFct_(this->instance_.template ElementComponent<char>(
+                        this->subscripts_, procPtr.offset),
+            this->from_->template ElementComponent<const char>(
+                this->fromSubscripts_, procPtr.offset),
+            sizeof(typeInfo::ProcedurePointer));
+      }
+    }
+    if (noDataComponents) {
+      return StatOk;
+    }
+    Elementwise::Reset();
+  }
+  if (noDataComponents) {
+    return StatOk;
+  }
+  return StatContinue;
+}
+template RT_API_ATTRS int DerivedAssignTicket<false>::Begin(WorkQueue &);
+template RT_API_ATTRS int DerivedAssignTicket<true>::Begin(WorkQueue &);
+
+template <bool IS_COMPONENTWISE>
+RT_API_ATTRS int DerivedAssignTicket<IS_COMPONENTWISE>::Continue(
+    WorkQueue &workQueue) {
+  while (!this->IsComplete()) {
+    // Copy the data components (incl. the parent) first.
+    switch (this->component_->genre()) {
+    case typeInfo::Component::Genre::Data:
+      if (this->component_->category() == TypeCategory::Derived) {
+        Descriptor &toCompDesc{this->componentDescriptor_.descriptor()};
+        Descriptor &fromCompDesc{this->fromComponentDescriptor_.descriptor()};
+        this->component_->CreatePointerDescriptor(toCompDesc, this->instance_,
+            workQueue.terminator(), this->subscripts_);
+        this->component_->CreatePointerDescriptor(fromCompDesc, *this->from_,
+            workQueue.terminator(), this->fromSubscripts_);
+        this->Advance();
+        if (int status{workQueue.BeginAssign(
+                toCompDesc, fromCompDesc, flags_, memmoveFct_)};
+            status != StatOk) {
+          return status;
+        }
+      } else { // Component has intrinsic type; simply copy raw bytes
+        std::size_t componentByteSize{
+            this->component_->SizeInBytes(this->instance_)};
+        if (IS_COMPONENTWISE && toIsContiguous_ && fromIsContiguous_) {
+          std::size_t offset{this->component_->offset()};
+          char *to{this->instance_.template OffsetElement<char>(offset)};
+          const char *from{
+              this->from_->template OffsetElement<const char>(offset)};
+          std::size_t toElementStride{this->instance_.ElementBytes()};
+          std::size_t fromElementStride{
+              this->from_->rank() == 0 ? 0 : this->from_->ElementBytes()};
+          if (toElementStride == fromElementStride &&
+              toElementStride == componentByteSize) {
+            memmoveFct_(to, from, this->elements_ * componentByteSize);
+          } else {
+            for (std::size_t n{this->elements_}; n--;
+                to += toElementStride, from += fromElementStride) {
+              memmoveFct_(to, from, componentByteSize);
+            }
+          }
+          this->Componentwise::Advance();
+        } else {
+          memmoveFct_(
+              this->instance_.template Element<char>(this->subscripts_) +
+                  this->component_->offset(),
+              this->from_->template Element<const char>(this->fromSubscripts_) +
+                  this->component_->offset(),
+              componentByteSize);
+          this->Advance();
+        }
+      }
+      break;
+    case typeInfo::Component::Genre::Pointer: {
+      std::size_t componentByteSize{
+          this->component_->SizeInBytes(this->instance_)};
+      if (IS_COMPONENTWISE && toIsContiguous_ && fromIsContiguous_) {
+        std::size_t offset{this->component_->offset()};
+        char *to{this->instance_.template OffsetElement<char>(offset)};
+        const char *from{
+            this->from_->template OffsetElement<const char>(offset)};
+        std::size_t toElementStride{this->instance_.ElementBytes()};
+        std::size_t fromElementStride{
+            this->from_->rank() == 0 ? 0 : this->from_->ElementBytes()};
+        if (toElementStride == fromElementStride &&
+            toElementStride == componentByteSize) {
+          memmoveFct_(to, from, this->elements_ * componentByteSize);
+        } else {
+          for (std::size_t n{this->elements_}; n--;
+              to += toElementStride, from += fromElementStride) {
+            memmoveFct_(to, from, componentByteSize);
+          }
+        }
+        this->Componentwise::Advance();
+      } else {
+        memmoveFct_(this->instance_.template Element<char>(this->subscripts_) +
+                this->component_->offset(),
+            this->from_->template Element<const char>(this->fromSubscripts_) +
+                this->component_->offset(),
+            componentByteSize);
+        this->Advance();
+      }
+    } break;
+    case typeInfo::Component::Genre::Allocatable:
+    case typeInfo::Component::Genre::Automatic: {
+      auto *toDesc{reinterpret_cast<Descriptor *>(
+          this->instance_.template Element<char>(this->subscripts_) +
+          this->component_->offset())};
+      const auto *fromDesc{reinterpret_cast<const Descriptor *>(
+          this->from_->template Element<char>(this->fromSubscripts_) +
+          this->component_->offset())};
+      if (toDesc->IsAllocatable() && !fromDesc->IsAllocated()) {
+        if (toDesc->IsAllocated()) {
+          if (this->phase_ == 0) {
+            this->phase_++;
+            if (const auto *componentDerived{this->component_->derivedType()};
+                componentDerived && !componentDerived->noDestructionNeeded()) {
+              if (int status{workQueue.BeginDestroy(
+                      *toDesc, *componentDerived, /*finalize=*/false)};
+                  status != StatOk) {
+                return status;
+              }
+            }
+          }
+          toDesc->Deallocate();
+        }
+        this->Advance();
+      } else {
+        // Allocatable components of the LHS are unconditionally
+        // deallocated before assignment (F'2018 10.2.1.3(13)(1)),
+        // unlike a "top-level" assignment to a variable, where
+        // deallocation is optional.
+        this->Advance();
+        int nestedFlags{flags_};
+        if (this->derived_.noFinalizationNeeded() &&
+            this->derived_.noInitializationNeeded() &&
+            this->derived_.noDestructionNeeded()) {
+          // The actual deallocation may be avoided, if the existing
+          // location can be reoccupied.
+        } else {
+          // Force LHS deallocation with DeallocateLHS flag.
+          nestedFlags |= DeallocateLHS;
+        }
+        if (int status{workQueue.BeginAssign(
+                *toDesc, *fromDesc, nestedFlags, memmoveFct_)};
+            status != StatOk) {
+          return status;
+        }
+      }
+    } break;
+    }
+  }
+  if (deallocateAfter_) {
+    deallocateAfter_->Deallocate();
+  }
+  return StatOk;
+}
+template RT_API_ATTRS int DerivedAssignTicket<false>::Continue(WorkQueue &);
+template RT_API_ATTRS int DerivedAssignTicket<true>::Continue(WorkQueue &);
 
 RT_API_ATTRS void DoFromSourceAssign(Descriptor &alloc,
     const Descriptor &source, Terminator &terminator, MemmoveFct memmoveFct) {
@@ -581,7 +759,6 @@ void RTDEF(AssignTemporary)(Descriptor &to, const Descriptor &from,
       }
     }
   }
-
   Assign(to, from, terminator, MaybeReallocate | PolymorphicLHS);
 }
 
@@ -598,7 +775,6 @@ void RTDEF(CopyInAssign)(Descriptor &temp, const Descriptor &var,
 void RTDEF(CopyOutAssign)(
     Descriptor *var, Descriptor &temp, const char *sourceFile, int sourceLine) {
   Terminator terminator{sourceFile, sourceLine};
-
   // Copyout from the temporary must not cause any finalizations
   // for LHS. The variable must be properly initialized already.
   if (var) {
diff --git a/flang-rt/lib/runtime/derived.cpp b/flang-rt/lib/runtime/derived.cpp
index 35037036f63e7..8ab737c701b01 100644
--- a/flang-rt/lib/runtime/derived.cpp
+++ b/flang-rt/lib/runtime/derived.cpp
@@ -12,6 +12,7 @@
 #include "flang-rt/runtime/terminator.h"
 #include "flang-rt/runtime/tools.h"
 #include "flang-rt/runtime/type-info.h"
+#include "flang-rt/runtime/work-queue.h"
 
 namespace Fortran::runtime {
 
@@ -30,180 +31,193 @@ static RT_API_ATTRS void GetComponentExtents(SubscriptValue (&extents)[maxRank],
 }
 
 RT_API_ATTRS int Initialize(const Descriptor &instance,
-    const typeInfo::DerivedType &derived, Terminator &terminator, bool hasStat,
-    const Descriptor *errMsg) {
-  const Descriptor &componentDesc{derived.component()};
-  std::size_t elements{instance.Elements()};
-  int stat{StatOk};
-  // Initialize data components in each element; the per-element iterations
-  // constitute the inner loops, not the outer ones
-  std::size_t myComponents{componentDesc.Elements()};
-  for (std::size_t k{0}; k < myComponents; ++k) {
-    const auto &comp{
-        *componentDesc.ZeroBasedIndexedElement<typeInfo::Component>(k)};
-    SubscriptValue at[maxRank];
-    instance.GetLowerBounds(at);
-    if (comp.genre() == typeInfo::Component::Genre::Allocatable ||
-        comp.genre() == typeInfo::Component::Genre::Automatic) {
-      for (std::size_t j{0}; j++ < elements; instance.IncrementSubscripts(at)) {
-        Descriptor &allocDesc{
-            *instance.ElementComponent<Descriptor>(at, comp.offset())};
-        comp.EstablishDescriptor(allocDesc, instance, terminator);
+    const typeInfo::DerivedType &derived, Terminator &terminator, bool,
+    const Descriptor *) {
+  WorkQueue workQueue{terminator};
+  int status{workQueue.BeginInitialize(instance, derived)};
+  return status == StatContinue ? workQueue.Run() : status;
+}
+
+RT_API_ATTRS int InitializeTicket::Begin(WorkQueue &) {
+  // Initialize procedure pointer components in each element
+  const Descriptor &procPtrDesc{derived_.procPtr()};
+  if (std::size_t numProcPtrs{procPtrDesc.Elements()}) {
+    bool noDataComponents{IsComplete()};
+    for (std::size_t k{0}; k < numProcPtrs; ++k) {
+      const auto &comp{
+          *procPtrDesc.ZeroBasedIndexedElement<typeInfo::ProcPtrComponent>(k)};
+      // Loop only over elements
+      if (noDataComponents) {
+        Elementwise::Reset();
+      }
+      for (; !Elementwise::IsComplete(); Elementwise::Advance()) {
+        auto &pptr{*instance_.ElementComponent<typeInfo::ProcedurePointer>(
+            subscripts_, comp.offset)};
+        pptr = comp.procInitialization;
+      }
+    }
+    if (noDataComponents) {
+      return StatOk;
+    }
+    Elementwise::Reset();
+  }
+  return StatContinue;
+}
+
+RT_API_ATTRS int InitializeTicket::Continue(WorkQueue &workQueue) {
+  while (!IsComplete()) {
+    if (component_->genre() == typeInfo::Component::Genre::Allocatable) {
+      // Establish allocatable descriptors
+      for (; !Elementwise::IsComplete(); Elementwise::Advance()) {
+        Descriptor &allocDesc{*instance_.ElementComponent<Descriptor>(
+            subscripts_, component_->offset())};
+        component_->EstablishDescriptor(
+            allocDesc, instance_, workQueue.terminator());
         allocDesc.raw().attribute = CFI_attribute_allocatable;
-        if (comp.genre() == typeInfo::Component::Genre::Automatic) {
-          stat = ReturnError(
-              terminator, allocDesc.Allocate(kNoAsyncObject), errMsg, hasStat);
-          if (stat == StatOk) {
-            if (const DescriptorAddendum * addendum{allocDesc.Addendum()}) {
-              if (const auto *derived{addendum->derivedType()}) {
-                if (!derived->noInitializationNeeded()) {
-                  stat = Initialize(
-                      allocDesc, *derived, terminator, hasStat, errMsg);
-                }
-              }
-            }
-          }
-          if (stat != StatOk) {
-            break;
-          }
-        }
       }
-    } else if (const void *init{comp.initialization()}) {
+      SkipToNextComponent();
+    } else if (const void *init{component_->initialization()}) {
       // Explicit initialization of data pointers and
       // non-allocatable non-automatic components
-      std::size_t bytes{comp.SizeInBytes(instance)};
-      for (std::size_t j{0}; j++ < elements; instance.IncrementSubscripts(at)) {
-        char *ptr{instance.ElementComponent<char>(at, comp.offset())};
+      std::size_t bytes{component_->SizeInBytes(instance_)};
+      for (; !Elementwise::IsComplete(); Elementwise::Advance()) {
+        char *ptr{instance_.ElementComponent<char>(
+            subscripts_, component_->offset())};
         std::memcpy(ptr, init, bytes);
       }
-    } else if (comp.genre() == typeInfo::Component::Genre::Pointer) {
+      SkipToNextComponent();
+    } else if (component_->genre() == typeInfo::Component::Genre::Pointer) {
       // Data pointers without explicit initialization are established
       // so that they are valid right-hand side targets of pointer
       // assignment statements.
-      for (std::size_t j{0}; j++ < elements; instance.IncrementSubscripts(at)) {
-        Descriptor &ptrDesc{
-            *instance.ElementComponent<Descriptor>(at, comp.offset())};
-        comp.EstablishDescriptor(ptrDesc, instance, terminator);
+      for (; !Elementwise::IsComplete(); Elementwise::Advance()) {
+        Descriptor &ptrDesc{*instance_.ElementComponent<Descriptor>(
+            subscripts_, component_->offset())};
+        component_->EstablishDescriptor(
+            ptrDesc, instance_, workQueue.terminator());
         ptrDesc.raw().attribute = CFI_attribute_pointer;
       }
-    } else if (comp.genre() == typeInfo::Component::Genre::Data &&
-        comp.derivedType() && !comp.derivedType()->noInitializationNeeded()) {
+      SkipToNextComponent();
+    } else if (component_->genre() == typeInfo::Component::Genre::Data &&
+        component_->derivedType() &&
+        !component_->derivedType()->noInitializationNeeded()) {
       // Default initialization of non-pointer non-allocatable/automatic
-      // data component.  Handles parent component's elements.  Recursive.
+      // data component.  Handles parent component's elements.
       SubscriptValue extents[maxRank];
-      GetComponentExtents(extents, comp, instance);
-      StaticDescriptor<maxRank, true, 0> staticDescriptor;
-      Descriptor &compDesc{staticDescriptor.descriptor()};
-      const typeInfo::DerivedType &compType{*comp.derivedType()};
-      for (std::size_t j{0}; j++ < elements; instance.IncrementSubscripts(at)) {
-        compDesc.Establish(compType,
-            instance.ElementComponent<char>(at, comp.offset()), comp.rank(),
-            extents);
-        stat = Initialize(compDesc, compType, terminator, hasStat, errMsg);
-        if (stat != StatOk) {
-          break;
-        }
+      GetComponentExtents(extents, *component_, instance_);
+      Descriptor &compDesc{componentDescriptor_.descriptor()};
+      const typeInfo::DerivedType &compType{*component_->derivedType()};
+      compDesc.Establish(compType,
+          instance_.ElementComponent<char>(subscripts_, component_->offset()),
+          component_->rank(), extents);
+      Advance();
+      if (int status{workQueue.BeginInitialize(compDesc, compType)};
+          status != StatOk) {
+        return status;
       }
+    } else {
+      SkipToNextComponent();
     }
   }
-  // Initialize procedure pointer components in each element
-  const Descriptor &procPtrDesc{derived.procPtr()};
-  std::size_t myProcPtrs{procPtrDesc.Elements()};
-  for (std::size_t k{0}; k < myProcPtrs; ++k) {
-    const auto &comp{
-        *procPtrDesc.ZeroBasedIndexedElement<typeInfo::ProcPtrComponent>(k)};
-    SubscriptValue at[maxRank];
-    instance.GetLowerBounds(at);
-    for (std::size_t j{0}; j++ < elements; instance.IncrementSubscripts(at)) {
-      auto &pptr{*instance.ElementComponent<typeInfo::ProcedurePointer>(
-          at, comp.offset)};
-      pptr = comp.procInitialization;
-    }
-  }
-  return stat;
+  return StatOk;
 }
 
 RT_API_ATTRS int InitializeClone(const Descriptor &clone,
-    const Descriptor &orig, const typeInfo::DerivedType &derived,
+    const Descriptor &original, const typeInfo::DerivedType &derived,
     Terminator &terminator, bool hasStat, const Descriptor *errMsg) {
-  const Descriptor &componentDesc{derived.component()};
-  std::size_t elements{orig.Elements()};
-  int stat{StatOk};
-
-  // Skip pointers and unallocated variables.
-  if (orig.IsPointer() || !orig.IsAllocated()) {
-    return stat;
+  if (original.IsPointer() || !original.IsAllocated()) {
+    return StatOk; // nothing to do
+  } else {
+    WorkQueue workQueue{terminator};
+    int status{workQueue.BeginInitializeClone(
+        clone, original, derived, hasStat, errMsg)};
+    return status == StatContinue ? workQueue.Run() : status;
   }
-  // Initialize each data component.
-  std::size_t components{componentDesc.Elements()};
-  for (std::size_t i{0}; i < components; ++i) {
-    const typeInfo::Component &comp{
-        *componentDesc.ZeroBasedIndexedElement<typeInfo::Component>(i)};
-    SubscriptValue at[maxRank];
-    orig.GetLowerBounds(at);
-    // Allocate allocatable components that are also allocated in the original
-    // object.
-    if (comp.genre() == typeInfo::Component::Genre::Allocatable) {
-      // Initialize each element.
-      for (std::size_t j{0}; j < elements; ++j, orig.IncrementSubscripts(at)) {
-        Descriptor &origDesc{
-            *orig.ElementComponent<Descriptor>(at, comp.offset())};
-        Descriptor &cloneDesc{
-            *clone.ElementComponent<Descriptor>(at, comp.offset())};
-        if (origDesc.IsAllocated()) {
+}
+
+RT_API_ATTRS int InitializeCloneTicket::Continue(WorkQueue &workQueue) {
+  while (!IsComplete()) {
+    if (component_->genre() == typeInfo::Component::Genre::Allocatable) {
+      Descriptor &origDesc{*instance_.ElementComponent<Descriptor>(
+          subscripts_, component_->offset())};
+      if (origDesc.IsAllocated()) {
+        Descriptor &cloneDesc{*clone_.ElementComponent<Descriptor>(
+            subscripts_, component_->offset())};
+        if (phase_ == 0) {
+          ++phase_;
           cloneDesc.ApplyMold(origDesc, origDesc.rank());
-          stat = ReturnError(
-              terminator, cloneDesc.Allocate(kNoAsyncObject), errMsg, hasStat);
-          if (stat == StatOk) {
-            if (const DescriptorAddendum * addendum{cloneDesc.Addendum()}) {
-              if (const typeInfo::DerivedType *
-                  derived{addendum->derivedType()}) {
-                if (!derived->noInitializationNeeded()) {
-                  // Perform default initialization for the allocated element.
-                  stat = Initialize(
-                      cloneDesc, *derived, terminator, hasStat, errMsg);
-                }
-                // Initialize derived type's allocatables.
-                if (stat == StatOk) {
-                  stat = InitializeClone(cloneDesc, origDesc, *derived,
-                      terminator, hasStat, errMsg);
+          if (int stat{ReturnError(workQueue.terminator(),
+                  cloneDesc.Allocate(kNoAsyncObject), errMsg_, hasStat_)};
+              stat != StatOk) {
+            return stat;
+          }
+          if (const DescriptorAddendum *addendum{cloneDesc.Addendum()}) {
+            if (const typeInfo::DerivedType *derived{addendum->derivedType()}) {
+              if (!derived->noInitializationNeeded()) {
+                // Perform default initialization for the allocated element.
+                if (int status{workQueue.BeginInitialize(cloneDesc, *derived)};
+                    status != StatOk) {
+                  return status;
                 }
               }
             }
           }
         }
-        if (stat != StatOk) {
-          break;
+        if (phase_ == 1) {
+          ++phase_;
+          if (const DescriptorAddendum *addendum{cloneDesc.Addendum()}) {
+            if (const typeInfo::DerivedType *derived{addendum->derivedType()}) {
+              // Initialize derived type's allocatables.
+              if (int status{workQueue.BeginInitializeClone(
+                      cloneDesc, origDesc, *derived, hasStat_, errMsg_)};
+                  status != StatOk) {
+                return status;
+              }
+            }
+          }
         }
       }
-    } else if (comp.genre() == typeInfo::Component::Genre::Data &&
-        comp.derivedType()) {
-      // Handle nested derived types.
-      const typeInfo::DerivedType &compType{*comp.derivedType()};
-      SubscriptValue extents[maxRank];
-      GetComponentExtents(extents, comp, orig);
-      // Data components don't have descriptors, allocate them.
-      StaticDescriptor<maxRank, true, 0> origStaticDesc;
-      StaticDescriptor<maxRank, true, 0> cloneStaticDesc;
-      Descriptor &origDesc{origStaticDesc.descriptor()};
-      Descriptor &cloneDesc{cloneStaticDesc.descriptor()};
-      // Initialize each element.
-      for (std::size_t j{0}; j < elements; ++j, orig.IncrementSubscripts(at)) {
+      Advance();
+    } else if (component_->genre() == typeInfo::Component::Genre::Data) {
+      if (component_->derivedType()) {
+        // Handle nested derived types.
+        const typeInfo::DerivedType &compType{*component_->derivedType()};
+        SubscriptValue extents[maxRank];
+        GetComponentExtents(extents, *component_, instance_);
+        Descriptor &origDesc{componentDescriptor_.descriptor()};
+        Descriptor &cloneDesc{cloneComponentDescriptor_.descriptor()};
         origDesc.Establish(compType,
-            orig.ElementComponent<char>(at, comp.offset()), comp.rank(),
-            extents);
+            instance_.ElementComponent<char>(subscripts_, component_->offset()),
+            component_->rank(), extents);
         cloneDesc.Establish(compType,
-            clone.ElementComponent<char>(at, comp.offset()), comp.rank(),
-            extents);
-        stat = InitializeClone(
-            cloneDesc, origDesc, compType, terminator, hasStat, errMsg);
-        if (stat != StatOk) {
-          break;
+            clone_.ElementComponent<char>(subscripts_, component_->offset()),
+            component_->rank(), extents);
+        Advance();
+        if (int status{workQueue.BeginInitializeClone(
+                cloneDesc, origDesc, compType, hasStat_, errMsg_)};
+            status != StatOk) {
+          return status;
         }
+      } else {
+        SkipToNextComponent();
       }
+    } else {
+      SkipToNextComponent();
+    }
+  }
+  return StatOk;
+}
+
+// Fortran 2018 subclause 7.5.6.2
+RT_API_ATTRS void Finalize(const Descriptor &descriptor,
+    const typeInfo::DerivedType &derived, Terminator *terminator) {
+  if (!derived.noFinalizationNeeded() && descriptor.IsAllocated()) {
+    Terminator stubTerminator{"Finalize() in Fortran runtime", 0};
+    WorkQueue workQueue{terminator ? *terminator : stubTerminator};
+    if (workQueue.BeginFinalize(descriptor, derived) == StatContinue) {
+      workQueue.Run();
     }
   }
-  return stat;
 }
 
 static RT_API_ATTRS const typeInfo::SpecialBinding *FindFinal(
@@ -221,7 +235,7 @@ static RT_API_ATTRS const typeInfo::SpecialBinding *FindFinal(
 }
 
 static RT_API_ATTRS void CallFinalSubroutine(const Descriptor &descriptor,
-    const typeInfo::DerivedType &derived, Terminator *terminator) {
+    const typeInfo::DerivedType &derived, Terminator &terminator) {
   if (const auto *special{FindFinal(derived, descriptor.rank())}) {
     if (special->which() == typeInfo::SpecialBinding::Which::ElementalFinal) {
       std::size_t elements{descriptor.Elements()};
@@ -258,9 +272,7 @@ static RT_API_ATTRS void CallFinalSubroutine(const Descriptor &descriptor,
         copy = descriptor;
         copy.set_base_addr(nullptr);
         copy.raw().attribute = CFI_attribute_allocatable;
-        Terminator stubTerminator{"CallFinalProcedure() in Fortran runtime", 0};
-        RUNTIME_CHECK(terminator ? *terminator : stubTerminator,
-            copy.Allocate(kNoAsyncObject) == CFI_SUCCESS);
+        RUNTIME_CHECK(terminator, copy.Allocate(kNoAsyncObject) == CFI_SUCCESS);
         ShallowCopyDiscontiguousToContiguous(copy, descriptor);
         argDescriptor = ©
       }
@@ -284,87 +296,94 @@ static RT_API_ATTRS void CallFinalSubroutine(const Descriptor &descriptor,
   }
 }
 
-// Fortran 2018 subclause 7.5.6.2
-RT_API_ATTRS void Finalize(const Descriptor &descriptor,
-    const typeInfo::DerivedType &derived, Terminator *terminator) {
-  if (derived.noFinalizationNeeded() || !descriptor.IsAllocated()) {
-    return;
-  }
-  CallFinalSubroutine(descriptor, derived, terminator);
-  const auto *parentType{derived.GetParentType()};
-  bool recurse{parentType && !parentType->noFinalizationNeeded()};
+RT_API_ATTRS int FinalizeTicket::Begin(WorkQueue &workQueue) {
+  CallFinalSubroutine(instance_, derived_, workQueue.terminator());
   // If there's a finalizable parent component, handle it last, as required
   // by the Fortran standard (7.5.6.2), and do so recursively with the same
   // descriptor so that the rank is preserved.
-  const Descriptor &componentDesc{derived.component()};
-  std::size_t myComponents{componentDesc.Elements()};
-  std::size_t elements{descriptor.Elements()};
-  for (auto k{recurse ? std::size_t{1}
-                      /* skip first component, it's the parent */
-                      : 0};
-       k < myComponents; ++k) {
-    const auto &comp{
-        *componentDesc.ZeroBasedIndexedElement<typeInfo::Component>(k)};
-    SubscriptValue at[maxRank];
-    descriptor.GetLowerBounds(at);
-    if (comp.genre() == typeInfo::Component::Genre::Allocatable &&
-        comp.category() == TypeCategory::Derived) {
+  finalizableParentType_ = derived_.GetParentType();
+  if (finalizableParentType_) {
+    if (finalizableParentType_->noFinalizationNeeded()) {
+      finalizableParentType_ = nullptr;
+    } else {
+      SkipToNextComponent();
+    }
+  }
+  return StatContinue;
+}
+
+RT_API_ATTRS int FinalizeTicket::Continue(WorkQueue &workQueue) {
+  while (!IsComplete()) {
+    if (component_->genre() == typeInfo::Component::Genre::Allocatable &&
+        component_->category() == TypeCategory::Derived) {
       // Component may be polymorphic or unlimited polymorphic. Need to use the
       // dynamic type to check whether finalization is needed.
-      for (std::size_t j{0}; j++ < elements;
-           descriptor.IncrementSubscripts(at)) {
-        const Descriptor &compDesc{
-            *descriptor.ElementComponent<Descriptor>(at, comp.offset())};
-        if (compDesc.IsAllocated()) {
-          if (const DescriptorAddendum * addendum{compDesc.Addendum()}) {
-            if (const typeInfo::DerivedType *
-                compDynamicType{addendum->derivedType()}) {
-              if (!compDynamicType->noFinalizationNeeded()) {
-                Finalize(compDesc, *compDynamicType, terminator);
+      const Descriptor &compDesc{*instance_.ElementComponent<Descriptor>(
+          subscripts_, component_->offset())};
+      Advance();
+      if (compDesc.IsAllocated()) {
+        if (const DescriptorAddendum *addendum{compDesc.Addendum()}) {
+          if (const typeInfo::DerivedType *compDynamicType{
+                  addendum->derivedType()}) {
+            if (!compDynamicType->noFinalizationNeeded()) {
+              if (int status{
+                      workQueue.BeginFinalize(compDesc, *compDynamicType)};
+                  status != StatOk) {
+                return status;
               }
             }
           }
         }
       }
-    } else if (comp.genre() == typeInfo::Component::Genre::Allocatable ||
-        comp.genre() == typeInfo::Component::Genre::Automatic) {
-      if (const typeInfo::DerivedType * compType{comp.derivedType()}) {
-        if (!compType->noFinalizationNeeded()) {
-          for (std::size_t j{0}; j++ < elements;
-               descriptor.IncrementSubscripts(at)) {
-            const Descriptor &compDesc{
-                *descriptor.ElementComponent<Descriptor>(at, comp.offset())};
-            if (compDesc.IsAllocated()) {
-              Finalize(compDesc, *compType, terminator);
-            }
+    } else if (component_->genre() == typeInfo::Component::Genre::Allocatable ||
+        component_->genre() == typeInfo::Component::Genre::Automatic) {
+      if (const typeInfo::DerivedType *compType{component_->derivedType()};
+          compType && !compType->noFinalizationNeeded()) {
+        const Descriptor &compDesc{*instance_.ElementComponent<Descriptor>(
+            subscripts_, component_->offset())};
+        Advance();
+        if (compDesc.IsAllocated()) {
+          if (int status{workQueue.BeginFinalize(compDesc, *compType)};
+              status != StatOk) {
+            return status;
           }
         }
+      } else {
+        SkipToNextComponent();
       }
-    } else if (comp.genre() == typeInfo::Component::Genre::Data &&
-        comp.derivedType() && !comp.derivedType()->noFinalizationNeeded()) {
+    } else if (component_->genre() == typeInfo::Component::Genre::Data &&
+        component_->derivedType() &&
+        !component_->derivedType()->noFinalizationNeeded()) {
       SubscriptValue extents[maxRank];
-      GetComponentExtents(extents, comp, descriptor);
-      StaticDescriptor<maxRank, true, 0> staticDescriptor;
-      Descriptor &compDesc{staticDescriptor.descriptor()};
-      const typeInfo::DerivedType &compType{*comp.derivedType()};
-      for (std::size_t j{0}; j++ < elements;
-           descriptor.IncrementSubscripts(at)) {
-        compDesc.Establish(compType,
-            descriptor.ElementComponent<char>(at, comp.offset()), comp.rank(),
-            extents);
-        Finalize(compDesc, compType, terminator);
+      GetComponentExtents(extents, *component_, instance_);
+      Descriptor &compDesc{componentDescriptor_.descriptor()};
+      const typeInfo::DerivedType &compType{*component_->derivedType()};
+      compDesc.Establish(compType,
+          instance_.ElementComponent<char>(subscripts_, component_->offset()),
+          component_->rank(), extents);
+      Advance();
+      if (int status{workQueue.BeginFinalize(compDesc, compType)};
+          status != StatOk) {
+        return status;
       }
+    } else {
+      SkipToNextComponent();
     }
   }
-  if (recurse) {
-    StaticDescriptor<maxRank, true, 8 /*?*/> statDesc;
-    Descriptor &tmpDesc{statDesc.descriptor()};
-    tmpDesc = descriptor;
+  // Last, do the parent component, if any and finalizable.
+  if (finalizableParentType_) {
+    Descriptor &tmpDesc{componentDescriptor_.descriptor()};
+    tmpDesc = instance_;
     tmpDesc.raw().attribute = CFI_attribute_pointer;
-    tmpDesc.Addendum()->set_derivedType(parentType);
-    tmpDesc.raw().elem_len = parentType->sizeInBytes();
-    Finalize(tmpDesc, *parentType, terminator);
+    tmpDesc.Addendum()->set_derivedType(finalizableParentType_);
+    tmpDesc.raw().elem_len = finalizableParentType_->sizeInBytes();
+    const auto &parentType{*finalizableParentType_};
+    finalizableParentType_ = nullptr;
+    // Don't return StatOk here if the nested FInalize is still running;
+    // it needs this->componentDescriptor_.
+    return workQueue.BeginFinalize(tmpDesc, parentType);
   }
+  return StatOk;
 }
 
 // The order of finalization follows Fortran 2018 7.5.6.2, with
@@ -373,51 +392,71 @@ RT_API_ATTRS void Finalize(const Descriptor &descriptor,
 // preceding any deallocation.
 RT_API_ATTRS void Destroy(const Descriptor &descriptor, bool finalize,
     const typeInfo::DerivedType &derived, Terminator *terminator) {
-  if (derived.noDestructionNeeded() || !descriptor.IsAllocated()) {
-    return;
+  if (!derived.noFinalizationNeeded() && descriptor.IsAllocated()) {
+    Terminator stubTerminator{"Destroy() in Fortran runtime", 0};
+    WorkQueue workQueue{terminator ? *terminator : stubTerminator};
+    if (workQueue.BeginDestroy(descriptor, derived, finalize) == StatContinue) {
+      workQueue.Run();
+    }
   }
-  if (finalize && !derived.noFinalizationNeeded()) {
-    Finalize(descriptor, derived, terminator);
+}
+
+RT_API_ATTRS int DestroyTicket::Begin(WorkQueue &workQueue) {
+  if (finalize_ && !derived_.noFinalizationNeeded()) {
+    if (int status{workQueue.BeginFinalize(instance_, derived_)};
+        status != StatOk && status != StatContinue) {
+      return status;
+    }
   }
+  return StatContinue;
+}
+
+RT_API_ATTRS int DestroyTicket::Continue(WorkQueue &workQueue) {
   // Deallocate all direct and indirect allocatable and automatic components.
   // Contrary to finalization, the order of deallocation does not matter.
-  const Descriptor &componentDesc{derived.component()};
-  std::size_t myComponents{componentDesc.Elements()};
-  std::size_t elements{descriptor.Elements()};
-  SubscriptValue at[maxRank];
-  descriptor.GetLowerBounds(at);
-  for (std::size_t k{0}; k < myComponents; ++k) {
-    const auto &comp{
-        *componentDesc.ZeroBasedIndexedElement<typeInfo::Component>(k)};
-    const bool destroyComp{
-        comp.derivedType() && !comp.derivedType()->noDestructionNeeded()};
-    if (comp.genre() == typeInfo::Component::Genre::Allocatable ||
-        comp.genre() == typeInfo::Component::Genre::Automatic) {
-      for (std::size_t j{0}; j < elements; ++j) {
-        Descriptor *d{
-            descriptor.ElementComponent<Descriptor>(at, comp.offset())};
-        if (destroyComp) {
-          Destroy(*d, /*finalize=*/false, *comp.derivedType(), terminator);
+  while (!IsComplete()) {
+    const auto *componentDerived{component_->derivedType()};
+    if (component_->genre() == typeInfo::Component::Genre::Allocatable ||
+        component_->genre() == typeInfo::Component::Genre::Automatic) {
+      Descriptor *d{instance_.ElementComponent<Descriptor>(
+          subscripts_, component_->offset())};
+      if (d->IsAllocated()) {
+        if (phase_ == 0) {
+          ++phase_;
+          if (componentDerived && !componentDerived->noDestructionNeeded()) {
+            if (int status{workQueue.BeginDestroy(
+                    *d, *componentDerived, /*finalize=*/false)};
+                status != StatOk) {
+              return status;
+            }
+          }
         }
         d->Deallocate();
-        descriptor.IncrementSubscripts(at);
       }
-    } else if (destroyComp &&
-        comp.genre() == typeInfo::Component::Genre::Data) {
-      SubscriptValue extents[maxRank];
-      GetComponentExtents(extents, comp, descriptor);
-      StaticDescriptor<maxRank, true, 0> staticDescriptor;
-      Descriptor &compDesc{staticDescriptor.descriptor()};
-      const typeInfo::DerivedType &compType{*comp.derivedType()};
-      for (std::size_t j{0}; j++ < elements;
-           descriptor.IncrementSubscripts(at)) {
+      Advance();
+    } else if (component_->genre() == typeInfo::Component::Genre::Data) {
+      if (!componentDerived || componentDerived->noDestructionNeeded()) {
+        SkipToNextComponent();
+      } else {
+        SubscriptValue extents[maxRank];
+        GetComponentExtents(extents, *component_, instance_);
+        Descriptor &compDesc{componentDescriptor_.descriptor()};
+        const typeInfo::DerivedType &compType{*componentDerived};
         compDesc.Establish(compType,
-            descriptor.ElementComponent<char>(at, comp.offset()), comp.rank(),
-            extents);
-        Destroy(compDesc, /*finalize=*/false, *comp.derivedType(), terminator);
+            instance_.ElementComponent<char>(subscripts_, component_->offset()),
+            component_->rank(), extents);
+        Advance();
+        if (int status{workQueue.BeginDestroy(
+                compDesc, *componentDerived, /*finalize=*/false)};
+            status != StatOk) {
+          return status;
+        }
       }
+    } else {
+      SkipToNextComponent();
     }
   }
+  return StatOk;
 }
 
 RT_API_ATTRS bool HasDynamicComponent(const Descriptor &descriptor) {
diff --git a/flang-rt/lib/runtime/descriptor-io.cpp b/flang-rt/lib/runtime/descriptor-io.cpp
index 3db1455af52fe..364724b89ba0d 100644
--- a/flang-rt/lib/runtime/descriptor-io.cpp
+++ b/flang-rt/lib/runtime/descriptor-io.cpp
@@ -7,15 +7,44 @@
 //===----------------------------------------------------------------------===//
 
 #include "descriptor-io.h"
+#include "edit-input.h"
+#include "edit-output.h"
+#include "unit.h"
+#include "flang-rt/runtime/descriptor.h"
+#include "flang-rt/runtime/io-stmt.h"
+#include "flang-rt/runtime/namelist.h"
+#include "flang-rt/runtime/terminator.h"
+#include "flang-rt/runtime/type-info.h"
+#include "flang-rt/runtime/work-queue.h"
+#include "flang/Common/optional.h"
 #include "flang/Common/restorer.h"
+#include "flang/Common/uint128.h"
+#include "flang/Runtime/cpp-type.h"
 #include "flang/Runtime/freestanding-tools.h"
 
+// Implementation of I/O data list item transfers based on descriptors.
+// (All I/O items come through here so that the code is exercised for test;
+// some scalar I/O data transfer APIs could be changed to bypass their use
+// of descriptors in the future for better efficiency.)
+
 namespace Fortran::runtime::io::descr {
 RT_OFFLOAD_API_GROUP_BEGIN
 
+template <typename A>
+inline RT_API_ATTRS A &ExtractElement(IoStatementState &io,
+    const Descriptor &descriptor, const SubscriptValue subscripts[]) {
+  A *p{descriptor.Element<A>(subscripts)};
+  if (!p) {
+    io.GetIoErrorHandler().Crash("Bad address for I/O item -- null base "
+                                 "address or subscripts out of range");
+  }
+  return *p;
+}
+
 // Defined formatted I/O (maybe)
-Fortran::common::optional<bool> DefinedFormattedIo(IoStatementState &io,
-    const Descriptor &descriptor, const typeInfo::DerivedType &derived,
+static RT_API_ATTRS Fortran::common::optional<bool> DefinedFormattedIo(
+    IoStatementState &io, const Descriptor &descriptor,
+    const typeInfo::DerivedType &derived,
     const typeInfo::SpecialBinding &special,
     const SubscriptValue subscripts[]) {
   Fortran::common::optional<DataEdit> peek{
@@ -104,8 +133,8 @@ Fortran::common::optional<bool> DefinedFormattedIo(IoStatementState &io,
 }
 
 // Defined unformatted I/O
-bool DefinedUnformattedIo(IoStatementState &io, const Descriptor &descriptor,
-    const typeInfo::DerivedType &derived,
+static RT_API_ATTRS bool DefinedUnformattedIo(IoStatementState &io,
+    const Descriptor &descriptor, const typeInfo::DerivedType &derived,
     const typeInfo::SpecialBinding &special) {
   // Unformatted I/O must have an external unit (or child thereof).
   IoErrorHandler &handler{io.GetIoErrorHandler()};
@@ -152,5 +181,619 @@ bool DefinedUnformattedIo(IoStatementState &io, const Descriptor &descriptor,
   return handler.GetIoStat() == IostatOk;
 }
 
+// Per-category descriptor-based I/O templates
+
+// TODO (perhaps as a nontrivial but small starter project): implement
+// automatic repetition counts, like "10*3.14159", for list-directed and
+// NAMELIST array output.
+
+template <int KIND, Direction DIR>
+inline RT_API_ATTRS bool FormattedIntegerIO(IoStatementState &io,
+    const Descriptor &descriptor, [[maybe_unused]] bool isSigned) {
+  std::size_t numElements{descriptor.Elements()};
+  SubscriptValue subscripts[maxRank];
+  descriptor.GetLowerBounds(subscripts);
+  using IntType = CppTypeFor<common::TypeCategory::Integer, KIND>;
+  bool anyInput{false};
+  for (std::size_t j{0}; j < numElements; ++j) {
+    if (auto edit{io.GetNextDataEdit()}) {
+      IntType &x{ExtractElement<IntType>(io, descriptor, subscripts)};
+      if constexpr (DIR == Direction::Output) {
+        if (!EditIntegerOutput<KIND>(io, *edit, x, isSigned)) {
+          return false;
+        }
+      } else if (edit->descriptor != DataEdit::ListDirectedNullValue) {
+        if (EditIntegerInput(
+                io, *edit, reinterpret_cast<void *>(&x), KIND, isSigned)) {
+          anyInput = true;
+        } else {
+          return anyInput && edit->IsNamelist();
+        }
+      }
+      if (!descriptor.IncrementSubscripts(subscripts) && j + 1 < numElements) {
+        io.GetIoErrorHandler().Crash(
+            "FormattedIntegerIO: subscripts out of bounds");
+      }
+    } else {
+      return false;
+    }
+  }
+  return true;
+}
+
+template <int KIND, Direction DIR>
+inline RT_API_ATTRS bool FormattedRealIO(
+    IoStatementState &io, const Descriptor &descriptor) {
+  std::size_t numElements{descriptor.Elements()};
+  SubscriptValue subscripts[maxRank];
+  descriptor.GetLowerBounds(subscripts);
+  using RawType = typename RealOutputEditing<KIND>::BinaryFloatingPoint;
+  bool anyInput{false};
+  for (std::size_t j{0}; j < numElements; ++j) {
+    if (auto edit{io.GetNextDataEdit()}) {
+      RawType &x{ExtractElement<RawType>(io, descriptor, subscripts)};
+      if constexpr (DIR == Direction::Output) {
+        if (!RealOutputEditing<KIND>{io, x}.Edit(*edit)) {
+          return false;
+        }
+      } else if (edit->descriptor != DataEdit::ListDirectedNullValue) {
+        if (EditRealInput<KIND>(io, *edit, reinterpret_cast<void *>(&x))) {
+          anyInput = true;
+        } else {
+          return anyInput && edit->IsNamelist();
+        }
+      }
+      if (!descriptor.IncrementSubscripts(subscripts) && j + 1 < numElements) {
+        io.GetIoErrorHandler().Crash(
+            "FormattedRealIO: subscripts out of bounds");
+      }
+    } else {
+      return false;
+    }
+  }
+  return true;
+}
+
+template <int KIND, Direction DIR>
+inline RT_API_ATTRS bool FormattedComplexIO(
+    IoStatementState &io, const Descriptor &descriptor) {
+  std::size_t numElements{descriptor.Elements()};
+  SubscriptValue subscripts[maxRank];
+  descriptor.GetLowerBounds(subscripts);
+  bool isListOutput{
+      io.get_if<ListDirectedStatementState<Direction::Output>>() != nullptr};
+  using RawType = typename RealOutputEditing<KIND>::BinaryFloatingPoint;
+  bool anyInput{false};
+  for (std::size_t j{0}; j < numElements; ++j) {
+    RawType *x{&ExtractElement<RawType>(io, descriptor, subscripts)};
+    if (isListOutput) {
+      DataEdit rEdit, iEdit;
+      rEdit.descriptor = DataEdit::ListDirectedRealPart;
+      iEdit.descriptor = DataEdit::ListDirectedImaginaryPart;
+      rEdit.modes = iEdit.modes = io.mutableModes();
+      if (!RealOutputEditing<KIND>{io, x[0]}.Edit(rEdit) ||
+          !RealOutputEditing<KIND>{io, x[1]}.Edit(iEdit)) {
+        return false;
+      }
+    } else {
+      for (int k{0}; k < 2; ++k, ++x) {
+        auto edit{io.GetNextDataEdit()};
+        if (!edit) {
+          return false;
+        } else if constexpr (DIR == Direction::Output) {
+          if (!RealOutputEditing<KIND>{io, *x}.Edit(*edit)) {
+            return false;
+          }
+        } else if (edit->descriptor == DataEdit::ListDirectedNullValue) {
+          break;
+        } else if (EditRealInput<KIND>(
+                       io, *edit, reinterpret_cast<void *>(x))) {
+          anyInput = true;
+        } else {
+          return anyInput && edit->IsNamelist();
+        }
+      }
+    }
+    if (!descriptor.IncrementSubscripts(subscripts) && j + 1 < numElements) {
+      io.GetIoErrorHandler().Crash(
+          "FormattedComplexIO: subscripts out of bounds");
+    }
+  }
+  return true;
+}
+
+template <typename A, Direction DIR>
+inline RT_API_ATTRS bool FormattedCharacterIO(
+    IoStatementState &io, const Descriptor &descriptor) {
+  std::size_t numElements{descriptor.Elements()};
+  SubscriptValue subscripts[maxRank];
+  descriptor.GetLowerBounds(subscripts);
+  std::size_t length{descriptor.ElementBytes() / sizeof(A)};
+  auto *listOutput{io.get_if<ListDirectedStatementState<Direction::Output>>()};
+  bool anyInput{false};
+  for (std::size_t j{0}; j < numElements; ++j) {
+    A *x{&ExtractElement<A>(io, descriptor, subscripts)};
+    if (listOutput) {
+      if (!ListDirectedCharacterOutput(io, *listOutput, x, length)) {
+        return false;
+      }
+    } else if (auto edit{io.GetNextDataEdit()}) {
+      if constexpr (DIR == Direction::Output) {
+        if (!EditCharacterOutput(io, *edit, x, length)) {
+          return false;
+        }
+      } else { // input
+        if (edit->descriptor != DataEdit::ListDirectedNullValue) {
+          if (EditCharacterInput(io, *edit, x, length)) {
+            anyInput = true;
+          } else {
+            return anyInput && edit->IsNamelist();
+          }
+        }
+      }
+    } else {
+      return false;
+    }
+    if (!descriptor.IncrementSubscripts(subscripts) && j + 1 < numElements) {
+      io.GetIoErrorHandler().Crash(
+          "FormattedCharacterIO: subscripts out of bounds");
+    }
+  }
+  return true;
+}
+
+template <int KIND, Direction DIR>
+inline RT_API_ATTRS bool FormattedLogicalIO(
+    IoStatementState &io, const Descriptor &descriptor) {
+  std::size_t numElements{descriptor.Elements()};
+  SubscriptValue subscripts[maxRank];
+  descriptor.GetLowerBounds(subscripts);
+  auto *listOutput{io.get_if<ListDirectedStatementState<Direction::Output>>()};
+  using IntType = CppTypeFor<TypeCategory::Integer, KIND>;
+  bool anyInput{false};
+  for (std::size_t j{0}; j < numElements; ++j) {
+    IntType &x{ExtractElement<IntType>(io, descriptor, subscripts)};
+    if (listOutput) {
+      if (!ListDirectedLogicalOutput(io, *listOutput, x != 0)) {
+        return false;
+      }
+    } else if (auto edit{io.GetNextDataEdit()}) {
+      if constexpr (DIR == Direction::Output) {
+        if (!EditLogicalOutput(io, *edit, x != 0)) {
+          return false;
+        }
+      } else {
+        if (edit->descriptor != DataEdit::ListDirectedNullValue) {
+          bool truth{};
+          if (EditLogicalInput(io, *edit, truth)) {
+            x = truth;
+            anyInput = true;
+          } else {
+            return anyInput && edit->IsNamelist();
+          }
+        }
+      }
+    } else {
+      return false;
+    }
+    if (!descriptor.IncrementSubscripts(subscripts) && j + 1 < numElements) {
+      io.GetIoErrorHandler().Crash(
+          "FormattedLogicalIO: subscripts out of bounds");
+    }
+  }
+  return true;
+}
+
+template <Direction DIR>
+RT_API_ATTRS int DerivedIoTicket<DIR>::Continue(WorkQueue &workQueue) {
+  while (!IsComplete()) {
+    if (component_->genre() == typeInfo::Component::Genre::Data) {
+      // Create a descriptor for the component
+      Descriptor &compDesc{componentDescriptor_.descriptor()};
+      component_->CreatePointerDescriptor(
+          compDesc, instance_, io_.GetIoErrorHandler(), subscripts_);
+      Advance();
+      if (int status{workQueue.BeginDescriptorIo<DIR>(
+              io_, compDesc, table_, anyIoTookPlace_)};
+          status != StatOk) {
+        return status;
+      }
+    } else {
+      // Component is itself a descriptor
+      char *pointer{
+          instance_.Element<char>(subscripts_) + component_->offset()};
+      const Descriptor &compDesc{
+          *reinterpret_cast<const Descriptor *>(pointer)};
+      Advance();
+      if (compDesc.IsAllocated()) {
+        if (int status{workQueue.BeginDescriptorIo<DIR>(
+                io_, compDesc, table_, anyIoTookPlace_)};
+            status != StatOk) {
+          return status;
+        }
+      }
+    }
+  }
+  return StatOk;
+}
+
+template RT_API_ATTRS int DerivedIoTicket<Direction::Output>::Continue(
+    WorkQueue &);
+template RT_API_ATTRS int DerivedIoTicket<Direction::Input>::Continue(
+    WorkQueue &);
+
+template <Direction DIR>
+RT_API_ATTRS int DescriptorIoTicket<DIR>::Begin(WorkQueue &workQueue) {
+  IoErrorHandler &handler{io_.GetIoErrorHandler()};
+  if (handler.InError()) {
+    return handler.GetIoStat();
+  }
+  if (!io_.get_if<IoDirectionState<DIR>>()) {
+    handler.Crash("DescriptorIO() called for wrong I/O direction");
+    return handler.GetIoStat();
+  }
+  if constexpr (DIR == Direction::Input) {
+    if (!io_.BeginReadingRecord()) {
+      return StatOk;
+    }
+  }
+  if (!io_.get_if<FormattedIoStatementState<DIR>>()) {
+    // Unformatted I/O
+    IoErrorHandler &handler{io_.GetIoErrorHandler()};
+    const DescriptorAddendum *addendum{instance_.Addendum()};
+    if (const typeInfo::DerivedType *type{
+            addendum ? addendum->derivedType() : nullptr}) {
+      // derived type unformatted I/O
+      if (table_) {
+        if (const auto *definedIo{table_->Find(*type,
+                DIR == Direction::Input
+                    ? common::DefinedIo::ReadUnformatted
+                    : common::DefinedIo::WriteUnformatted)}) {
+          if (definedIo->subroutine) {
+            typeInfo::SpecialBinding special{DIR == Direction::Input
+                    ? typeInfo::SpecialBinding::Which::ReadUnformatted
+                    : typeInfo::SpecialBinding::Which::WriteUnformatted,
+                definedIo->subroutine, definedIo->isDtvArgPolymorphic, false,
+                false};
+            if (DefinedUnformattedIo(io_, instance_, *type, special)) {
+              anyIoTookPlace_ = true;
+              return StatOk;
+            }
+          } else {
+            int status{workQueue.BeginDerivedIo<DIR>(
+                io_, instance_, *type, table_, anyIoTookPlace_)};
+            return status == StatContinue ? StatOk : status; // done here
+          }
+        }
+      }
+      if (const typeInfo::SpecialBinding *special{
+              type->FindSpecialBinding(DIR == Direction::Input
+                      ? typeInfo::SpecialBinding::Which::ReadUnformatted
+                      : typeInfo::SpecialBinding::Which::WriteUnformatted)}) {
+        if (!table_ || !table_->ignoreNonTbpEntries || special->isTypeBound()) {
+          // defined derived type unformatted I/O
+          if (DefinedUnformattedIo(io_, instance_, *type, *special)) {
+            anyIoTookPlace_ = true;
+            return StatOk;
+          } else {
+            return IostatEnd;
+          }
+        }
+      }
+      // Default derived type unformatted I/O
+      // TODO: If no component at any level has defined READ or WRITE
+      // (as appropriate), the elements are contiguous, and no byte swapping
+      // is active, do a block transfer via the code below.
+      int status{workQueue.BeginDerivedIo<DIR>(
+          io_, instance_, *type, table_, anyIoTookPlace_)};
+      return status == StatContinue ? StatOk : status; // done here
+    } else {
+      // intrinsic type unformatted I/O
+      auto *externalUnf{io_.get_if<ExternalUnformattedIoStatementState<DIR>>()};
+      ChildUnformattedIoStatementState<DIR> *childUnf{nullptr};
+      InquireIOLengthState *inq{nullptr};
+      bool swapEndianness{false};
+      if (externalUnf) {
+        swapEndianness = externalUnf->unit().swapEndianness();
+      } else {
+        childUnf = io_.get_if<ChildUnformattedIoStatementState<DIR>>();
+        if (!childUnf) {
+          inq = DIR == Direction::Output ? io_.get_if<InquireIOLengthState>()
+                                         : nullptr;
+          RUNTIME_CHECK(handler, inq != nullptr);
+        }
+      }
+      std::size_t elementBytes{instance_.ElementBytes()};
+      std::size_t swappingBytes{elementBytes};
+      if (auto maybeCatAndKind{instance_.type().GetCategoryAndKind()}) {
+        // Byte swapping units can be smaller than elements, namely
+        // for COMPLEX and CHARACTER.
+        if (maybeCatAndKind->first == TypeCategory::Character) {
+          // swap each character position independently
+          swappingBytes = maybeCatAndKind->second; // kind
+        } else if (maybeCatAndKind->first == TypeCategory::Complex) {
+          // swap real and imaginary components independently
+          swappingBytes /= 2;
+        }
+      }
+      using CharType =
+          std::conditional_t<DIR == Direction::Output, const char, char>;
+      auto Transfer{[=](CharType &x, std::size_t totalBytes) -> bool {
+        if constexpr (DIR == Direction::Output) {
+          return externalUnf ? externalUnf->Emit(&x, totalBytes, swappingBytes)
+              : childUnf     ? childUnf->Emit(&x, totalBytes, swappingBytes)
+                             : inq->Emit(&x, totalBytes, swappingBytes);
+        } else {
+          return externalUnf
+              ? externalUnf->Receive(&x, totalBytes, swappingBytes)
+              : childUnf->Receive(&x, totalBytes, swappingBytes);
+        }
+      }};
+      if (!swapEndianness &&
+          instance_.IsContiguous()) { // contiguous unformatted I/O
+        char &x{ExtractElement<char>(io_, instance_, subscripts_)};
+        if (Transfer(x, elements_ * elementBytes)) {
+          anyIoTookPlace_ = true;
+        } else {
+          return IostatEnd;
+        }
+      } else { // non-contiguous or byte-swapped intrinsic type unformatted I/O
+        for (; !IsComplete(); Advance()) {
+          char &x{ExtractElement<char>(io_, instance_, subscripts_)};
+          if (Transfer(x, elementBytes)) {
+            anyIoTookPlace_ = true;
+          } else {
+            return IostatEnd;
+          }
+        }
+      }
+    }
+    // Unformatted I/O never needs to call Continue().
+    return StatOk;
+  }
+  // Formatted I/O
+  if (auto catAndKind{instance_.type().GetCategoryAndKind()}) {
+    TypeCategory cat{catAndKind->first};
+    int kind{catAndKind->second};
+    bool any{false};
+    switch (cat) {
+    case TypeCategory::Integer:
+      switch (kind) {
+      case 1:
+        any = FormattedIntegerIO<1, DIR>(io_, instance_, true);
+        break;
+      case 2:
+        any = FormattedIntegerIO<2, DIR>(io_, instance_, true);
+        break;
+      case 4:
+        any = FormattedIntegerIO<4, DIR>(io_, instance_, true);
+        break;
+      case 8:
+        any = FormattedIntegerIO<8, DIR>(io_, instance_, true);
+        break;
+      case 16:
+        any = FormattedIntegerIO<16, DIR>(io_, instance_, true);
+        break;
+      default:
+        handler.Crash(
+            "not yet implemented: INTEGER(KIND=%d) in formatted IO", kind);
+        return IostatEnd;
+      }
+      break;
+    case TypeCategory::Unsigned:
+      switch (kind) {
+      case 1:
+        any = FormattedIntegerIO<1, DIR>(io_, instance_, false);
+        break;
+      case 2:
+        any = FormattedIntegerIO<2, DIR>(io_, instance_, false);
+        break;
+      case 4:
+        any = FormattedIntegerIO<4, DIR>(io_, instance_, false);
+        break;
+      case 8:
+        any = FormattedIntegerIO<8, DIR>(io_, instance_, false);
+        break;
+      case 16:
+        any = FormattedIntegerIO<16, DIR>(io_, instance_, false);
+        break;
+      default:
+        handler.Crash(
+            "not yet implemented: UNSIGNED(KIND=%d) in formatted IO", kind);
+        return IostatEnd;
+      }
+      break;
+    case TypeCategory::Real:
+      switch (kind) {
+      case 2:
+        any = FormattedRealIO<2, DIR>(io_, instance_);
+        break;
+      case 3:
+        any = FormattedRealIO<3, DIR>(io_, instance_);
+        break;
+      case 4:
+        any = FormattedRealIO<4, DIR>(io_, instance_);
+        break;
+      case 8:
+        any = FormattedRealIO<8, DIR>(io_, instance_);
+        break;
+      case 10:
+        any = FormattedRealIO<10, DIR>(io_, instance_);
+        break;
+      // TODO: case double/double
+      case 16:
+        any = FormattedRealIO<16, DIR>(io_, instance_);
+        break;
+      default:
+        handler.Crash(
+            "not yet implemented: REAL(KIND=%d) in formatted IO", kind);
+        return IostatEnd;
+      }
+      break;
+    case TypeCategory::Complex:
+      switch (kind) {
+      case 2:
+        any = FormattedComplexIO<2, DIR>(io_, instance_);
+        break;
+      case 3:
+        any = FormattedComplexIO<3, DIR>(io_, instance_);
+        break;
+      case 4:
+        any = FormattedComplexIO<4, DIR>(io_, instance_);
+        break;
+      case 8:
+        any = FormattedComplexIO<8, DIR>(io_, instance_);
+        break;
+      case 10:
+        any = FormattedComplexIO<10, DIR>(io_, instance_);
+        break;
+      // TODO: case double/double
+      case 16:
+        any = FormattedComplexIO<16, DIR>(io_, instance_);
+        break;
+      default:
+        handler.Crash(
+            "not yet implemented: COMPLEX(KIND=%d) in formatted IO", kind);
+        return IostatEnd;
+      }
+      break;
+    case TypeCategory::Character:
+      switch (kind) {
+      case 1:
+        any = FormattedCharacterIO<char, DIR>(io_, instance_);
+        break;
+      case 2:
+        any = FormattedCharacterIO<char16_t, DIR>(io_, instance_);
+        break;
+      case 4:
+        any = FormattedCharacterIO<char32_t, DIR>(io_, instance_);
+        break;
+      default:
+        handler.Crash(
+            "not yet implemented: CHARACTER(KIND=%d) in formatted IO", kind);
+        return IostatEnd;
+      }
+      break;
+    case TypeCategory::Logical:
+      switch (kind) {
+      case 1:
+        any = FormattedLogicalIO<1, DIR>(io_, instance_);
+        break;
+      case 2:
+        any = FormattedLogicalIO<2, DIR>(io_, instance_);
+        break;
+      case 4:
+        any = FormattedLogicalIO<4, DIR>(io_, instance_);
+        break;
+      case 8:
+        any = FormattedLogicalIO<8, DIR>(io_, instance_);
+        break;
+      default:
+        handler.Crash(
+            "not yet implemented: LOGICAL(KIND=%d) in formatted IO", kind);
+        return IostatEnd;
+      }
+      break;
+    case TypeCategory::Derived: {
+      // Derived type information must be present for formatted I/O.
+      IoErrorHandler &handler{io_.GetIoErrorHandler()};
+      const DescriptorAddendum *addendum{instance_.Addendum()};
+      RUNTIME_CHECK(handler, addendum != nullptr);
+      derived_ = addendum->derivedType();
+      RUNTIME_CHECK(handler, derived_ != nullptr);
+      if (table_) {
+        if (const auto *definedIo{table_->Find(*derived_,
+                DIR == Direction::Input ? common::DefinedIo::ReadFormatted
+                                        : common::DefinedIo::WriteFormatted)}) {
+          if (definedIo->subroutine) {
+            nonTbpSpecial_.emplace(DIR == Direction::Input
+                    ? typeInfo::SpecialBinding::Which::ReadFormatted
+                    : typeInfo::SpecialBinding::Which::WriteFormatted,
+                definedIo->subroutine, definedIo->isDtvArgPolymorphic, false,
+                false);
+            special_ = &*nonTbpSpecial_;
+          }
+        }
+      }
+      if (!special_) {
+        if (const typeInfo::SpecialBinding *binding{
+                derived_->FindSpecialBinding(DIR == Direction::Input
+                        ? typeInfo::SpecialBinding::Which::ReadFormatted
+                        : typeInfo::SpecialBinding::Which::WriteFormatted)}) {
+          if (!table_ || !table_->ignoreNonTbpEntries ||
+              binding->isTypeBound()) {
+            special_ = binding;
+          }
+        }
+      }
+      return StatContinue;
+    }
+    }
+    if (any) {
+      anyIoTookPlace_ = true;
+    } else {
+      return IostatEnd;
+    }
+  } else {
+    handler.Crash("DescriptorIO: bad type code (%d) in descriptor",
+        static_cast<int>(instance_.type().raw()));
+    return handler.GetIoStat();
+  }
+  return StatOk;
+}
+
+template RT_API_ATTRS int DescriptorIoTicket<Direction::Output>::Begin(
+    WorkQueue &);
+template RT_API_ATTRS int DescriptorIoTicket<Direction::Input>::Begin(
+    WorkQueue &);
+
+template <Direction DIR>
+RT_API_ATTRS int DescriptorIoTicket<DIR>::Continue(WorkQueue &workQueue) {
+  // Only derived type formatted I/O gets here.
+  while (!IsComplete()) {
+    if (special_) {
+      if (auto defined{DefinedFormattedIo(
+              io_, instance_, *derived_, *special_, subscripts_)}) {
+        anyIoTookPlace_ |= *defined;
+        Advance();
+        continue;
+      }
+    }
+    Descriptor &elementDesc{elementDescriptor_.descriptor()};
+    elementDesc.Establish(
+        *derived_, nullptr, 0, nullptr, CFI_attribute_pointer);
+    elementDesc.set_base_addr(instance_.Element<char>(subscripts_));
+    Advance();
+    if (int status{workQueue.BeginDerivedIo<DIR>(
+            io_, elementDesc, *derived_, table_, anyIoTookPlace_)};
+        status != StatOk) {
+      return status;
+    }
+  }
+  return StatOk;
+}
+
+template RT_API_ATTRS int DescriptorIoTicket<Direction::Output>::Continue(
+    WorkQueue &);
+template RT_API_ATTRS int DescriptorIoTicket<Direction::Input>::Continue(
+    WorkQueue &);
+
+template <Direction DIR>
+RT_API_ATTRS bool DescriptorIO(IoStatementState &io,
+    const Descriptor &descriptor, const NonTbpDefinedIoTable *table) {
+  bool anyIoTookPlace{false};
+  WorkQueue workQueue{io.GetIoErrorHandler()};
+  if (workQueue.BeginDescriptorIo<DIR>(io, descriptor, table, anyIoTookPlace) ==
+      StatContinue) {
+    workQueue.Run();
+  }
+  return anyIoTookPlace;
+}
+
+template RT_API_ATTRS bool DescriptorIO<Direction::Output>(
+    IoStatementState &, const Descriptor &, const NonTbpDefinedIoTable *);
+template RT_API_ATTRS bool DescriptorIO<Direction::Input>(
+    IoStatementState &, const Descriptor &, const NonTbpDefinedIoTable *);
+
 RT_OFFLOAD_API_GROUP_END
 } // namespace Fortran::runtime::io::descr
diff --git a/flang-rt/lib/runtime/descriptor-io.h b/flang-rt/lib/runtime/descriptor-io.h
index eb60f106c9203..88ad59bd24b53 100644
--- a/flang-rt/lib/runtime/descriptor-io.h
+++ b/flang-rt/lib/runtime/descriptor-io.h
@@ -9,619 +9,27 @@
 #ifndef FLANG_RT_RUNTIME_DESCRIPTOR_IO_H_
 #define FLANG_RT_RUNTIME_DESCRIPTOR_IO_H_
 
-// Implementation of I/O data list item transfers based on descriptors.
-// (All I/O items come through here so that the code is exercised for test;
-// some scalar I/O data transfer APIs could be changed to bypass their use
-// of descriptors in the future for better efficiency.)
+#include "flang-rt/runtime/connection.h"
 
-#include "edit-input.h"
-#include "edit-output.h"
-#include "unit.h"
-#include "flang-rt/runtime/descriptor.h"
-#include "flang-rt/runtime/io-stmt.h"
-#include "flang-rt/runtime/namelist.h"
-#include "flang-rt/runtime/terminator.h"
-#include "flang-rt/runtime/type-info.h"
-#include "flang/Common/optional.h"
-#include "flang/Common/uint128.h"
-#include "flang/Runtime/cpp-type.h"
+namespace Fortran::runtime {
+class Descriptor;
+} // namespace Fortran::runtime
 
-namespace Fortran::runtime::io::descr {
-template <typename A>
-inline RT_API_ATTRS A &ExtractElement(IoStatementState &io,
-    const Descriptor &descriptor, const SubscriptValue subscripts[]) {
-  A *p{descriptor.Element<A>(subscripts)};
-  if (!p) {
-    io.GetIoErrorHandler().Crash("Bad address for I/O item -- null base "
-                                 "address or subscripts out of range");
-  }
-  return *p;
-}
-
-// Per-category descriptor-based I/O templates
-
-// TODO (perhaps as a nontrivial but small starter project): implement
-// automatic repetition counts, like "10*3.14159", for list-directed and
-// NAMELIST array output.
-
-template <int KIND, Direction DIR>
-inline RT_API_ATTRS bool FormattedIntegerIO(IoStatementState &io,
-    const Descriptor &descriptor, [[maybe_unused]] bool isSigned) {
-  std::size_t numElements{descriptor.Elements()};
-  SubscriptValue subscripts[maxRank];
-  descriptor.GetLowerBounds(subscripts);
-  using IntType = CppTypeFor<common::TypeCategory::Integer, KIND>;
-  bool anyInput{false};
-  for (std::size_t j{0}; j < numElements; ++j) {
-    if (auto edit{io.GetNextDataEdit()}) {
-      IntType &x{ExtractElement<IntType>(io, descriptor, subscripts)};
-      if constexpr (DIR == Direction::Output) {
-        if (!EditIntegerOutput<KIND>(io, *edit, x, isSigned)) {
-          return false;
-        }
-      } else if (edit->descriptor != DataEdit::ListDirectedNullValue) {
-        if (EditIntegerInput(
-                io, *edit, reinterpret_cast<void *>(&x), KIND, isSigned)) {
-          anyInput = true;
-        } else {
-          return anyInput && edit->IsNamelist();
-        }
-      }
-      if (!descriptor.IncrementSubscripts(subscripts) && j + 1 < numElements) {
-        io.GetIoErrorHandler().Crash(
-            "FormattedIntegerIO: subscripts out of bounds");
-      }
-    } else {
-      return false;
-    }
-  }
-  return true;
-}
-
-template <int KIND, Direction DIR>
-inline RT_API_ATTRS bool FormattedRealIO(
-    IoStatementState &io, const Descriptor &descriptor) {
-  std::size_t numElements{descriptor.Elements()};
-  SubscriptValue subscripts[maxRank];
-  descriptor.GetLowerBounds(subscripts);
-  using RawType = typename RealOutputEditing<KIND>::BinaryFloatingPoint;
-  bool anyInput{false};
-  for (std::size_t j{0}; j < numElements; ++j) {
-    if (auto edit{io.GetNextDataEdit()}) {
-      RawType &x{ExtractElement<RawType>(io, descriptor, subscripts)};
-      if constexpr (DIR == Direction::Output) {
-        if (!RealOutputEditing<KIND>{io, x}.Edit(*edit)) {
-          return false;
-        }
-      } else if (edit->descriptor != DataEdit::ListDirectedNullValue) {
-        if (EditRealInput<KIND>(io, *edit, reinterpret_cast<void *>(&x))) {
-          anyInput = true;
-        } else {
-          return anyInput && edit->IsNamelist();
-        }
-      }
-      if (!descriptor.IncrementSubscripts(subscripts) && j + 1 < numElements) {
-        io.GetIoErrorHandler().Crash(
-            "FormattedRealIO: subscripts out of bounds");
-      }
-    } else {
-      return false;
-    }
-  }
-  return true;
-}
+namespace Fortran::runtime::io {
+class IoStatementState;
+struct NonTbpDefinedIoTable;
+} // namespace Fortran::runtime::io
 
-template <int KIND, Direction DIR>
-inline RT_API_ATTRS bool FormattedComplexIO(
-    IoStatementState &io, const Descriptor &descriptor) {
-  std::size_t numElements{descriptor.Elements()};
-  SubscriptValue subscripts[maxRank];
-  descriptor.GetLowerBounds(subscripts);
-  bool isListOutput{
-      io.get_if<ListDirectedStatementState<Direction::Output>>() != nullptr};
-  using RawType = typename RealOutputEditing<KIND>::BinaryFloatingPoint;
-  bool anyInput{false};
-  for (std::size_t j{0}; j < numElements; ++j) {
-    RawType *x{&ExtractElement<RawType>(io, descriptor, subscripts)};
-    if (isListOutput) {
-      DataEdit rEdit, iEdit;
-      rEdit.descriptor = DataEdit::ListDirectedRealPart;
-      iEdit.descriptor = DataEdit::ListDirectedImaginaryPart;
-      rEdit.modes = iEdit.modes = io.mutableModes();
-      if (!RealOutputEditing<KIND>{io, x[0]}.Edit(rEdit) ||
-          !RealOutputEditing<KIND>{io, x[1]}.Edit(iEdit)) {
-        return false;
-      }
-    } else {
-      for (int k{0}; k < 2; ++k, ++x) {
-        auto edit{io.GetNextDataEdit()};
-        if (!edit) {
-          return false;
-        } else if constexpr (DIR == Direction::Output) {
-          if (!RealOutputEditing<KIND>{io, *x}.Edit(*edit)) {
-            return false;
-          }
-        } else if (edit->descriptor == DataEdit::ListDirectedNullValue) {
-          break;
-        } else if (EditRealInput<KIND>(
-                       io, *edit, reinterpret_cast<void *>(x))) {
-          anyInput = true;
-        } else {
-          return anyInput && edit->IsNamelist();
-        }
-      }
-    }
-    if (!descriptor.IncrementSubscripts(subscripts) && j + 1 < numElements) {
-      io.GetIoErrorHandler().Crash(
-          "FormattedComplexIO: subscripts out of bounds");
-    }
-  }
-  return true;
-}
-
-template <typename A, Direction DIR>
-inline RT_API_ATTRS bool FormattedCharacterIO(
-    IoStatementState &io, const Descriptor &descriptor) {
-  std::size_t numElements{descriptor.Elements()};
-  SubscriptValue subscripts[maxRank];
-  descriptor.GetLowerBounds(subscripts);
-  std::size_t length{descriptor.ElementBytes() / sizeof(A)};
-  auto *listOutput{io.get_if<ListDirectedStatementState<Direction::Output>>()};
-  bool anyInput{false};
-  for (std::size_t j{0}; j < numElements; ++j) {
-    A *x{&ExtractElement<A>(io, descriptor, subscripts)};
-    if (listOutput) {
-      if (!ListDirectedCharacterOutput(io, *listOutput, x, length)) {
-        return false;
-      }
-    } else if (auto edit{io.GetNextDataEdit()}) {
-      if constexpr (DIR == Direction::Output) {
-        if (!EditCharacterOutput(io, *edit, x, length)) {
-          return false;
-        }
-      } else { // input
-        if (edit->descriptor != DataEdit::ListDirectedNullValue) {
-          if (EditCharacterInput(io, *edit, x, length)) {
-            anyInput = true;
-          } else {
-            return anyInput && edit->IsNamelist();
-          }
-        }
-      }
-    } else {
-      return false;
-    }
-    if (!descriptor.IncrementSubscripts(subscripts) && j + 1 < numElements) {
-      io.GetIoErrorHandler().Crash(
-          "FormattedCharacterIO: subscripts out of bounds");
-    }
-  }
-  return true;
-}
-
-template <int KIND, Direction DIR>
-inline RT_API_ATTRS bool FormattedLogicalIO(
-    IoStatementState &io, const Descriptor &descriptor) {
-  std::size_t numElements{descriptor.Elements()};
-  SubscriptValue subscripts[maxRank];
-  descriptor.GetLowerBounds(subscripts);
-  auto *listOutput{io.get_if<ListDirectedStatementState<Direction::Output>>()};
-  using IntType = CppTypeFor<TypeCategory::Integer, KIND>;
-  bool anyInput{false};
-  for (std::size_t j{0}; j < numElements; ++j) {
-    IntType &x{ExtractElement<IntType>(io, descriptor, subscripts)};
-    if (listOutput) {
-      if (!ListDirectedLogicalOutput(io, *listOutput, x != 0)) {
-        return false;
-      }
-    } else if (auto edit{io.GetNextDataEdit()}) {
-      if constexpr (DIR == Direction::Output) {
-        if (!EditLogicalOutput(io, *edit, x != 0)) {
-          return false;
-        }
-      } else {
-        if (edit->descriptor != DataEdit::ListDirectedNullValue) {
-          bool truth{};
-          if (EditLogicalInput(io, *edit, truth)) {
-            x = truth;
-            anyInput = true;
-          } else {
-            return anyInput && edit->IsNamelist();
-          }
-        }
-      }
-    } else {
-      return false;
-    }
-    if (!descriptor.IncrementSubscripts(subscripts) && j + 1 < numElements) {
-      io.GetIoErrorHandler().Crash(
-          "FormattedLogicalIO: subscripts out of bounds");
-    }
-  }
-  return true;
-}
+namespace Fortran::runtime::io::descr {
 
 template <Direction DIR>
-static RT_API_ATTRS bool DescriptorIO(IoStatementState &, const Descriptor &,
+RT_API_ATTRS bool DescriptorIO(IoStatementState &, const Descriptor &,
     const NonTbpDefinedIoTable * = nullptr);
 
-// For intrinsic (not defined) derived type I/O, formatted & unformatted
-template <Direction DIR>
-static RT_API_ATTRS bool DefaultComponentIO(IoStatementState &io,
-    const typeInfo::Component &component, const Descriptor &origDescriptor,
-    const SubscriptValue origSubscripts[], Terminator &terminator,
-    const NonTbpDefinedIoTable *table) {
-#if !defined(RT_DEVICE_AVOID_RECURSION)
-  if (component.genre() == typeInfo::Component::Genre::Data) {
-    // Create a descriptor for the component
-    StaticDescriptor<maxRank, true, 16 /*?*/> statDesc;
-    Descriptor &desc{statDesc.descriptor()};
-    component.CreatePointerDescriptor(
-        desc, origDescriptor, terminator, origSubscripts);
-    return DescriptorIO<DIR>(io, desc, table);
-  } else {
-    // Component is itself a descriptor
-    char *pointer{
-        origDescriptor.Element<char>(origSubscripts) + component.offset()};
-    const Descriptor &compDesc{*reinterpret_cast<const Descriptor *>(pointer)};
-    return compDesc.IsAllocated() && DescriptorIO<DIR>(io, compDesc, table);
-  }
-#else
-  terminator.Crash("not yet implemented: component IO");
-#endif
-}
-
-template <Direction DIR>
-static RT_API_ATTRS bool DefaultComponentwiseFormattedIO(IoStatementState &io,
-    const Descriptor &descriptor, const typeInfo::DerivedType &type,
-    const NonTbpDefinedIoTable *table, const SubscriptValue subscripts[]) {
-  IoErrorHandler &handler{io.GetIoErrorHandler()};
-  const Descriptor &compArray{type.component()};
-  RUNTIME_CHECK(handler, compArray.rank() == 1);
-  std::size_t numComponents{compArray.Elements()};
-  SubscriptValue at[maxRank];
-  compArray.GetLowerBounds(at);
-  for (std::size_t k{0}; k < numComponents;
-       ++k, compArray.IncrementSubscripts(at)) {
-    const typeInfo::Component &component{
-        *compArray.Element<typeInfo::Component>(at)};
-    if (!DefaultComponentIO<DIR>(
-            io, component, descriptor, subscripts, handler, table)) {
-      // Return true for NAMELIST input if any component appeared.
-      auto *listInput{
-          io.get_if<ListDirectedStatementState<Direction::Input>>()};
-      return DIR == Direction::Input && k > 0 && listInput &&
-          listInput->inNamelistSequence();
-    }
-  }
-  return true;
-}
-
-template <Direction DIR>
-static RT_API_ATTRS bool DefaultComponentwiseUnformattedIO(IoStatementState &io,
-    const Descriptor &descriptor, const typeInfo::DerivedType &type,
-    const NonTbpDefinedIoTable *table) {
-  IoErrorHandler &handler{io.GetIoErrorHandler()};
-  const Descriptor &compArray{type.component()};
-  RUNTIME_CHECK(handler, compArray.rank() == 1);
-  std::size_t numComponents{compArray.Elements()};
-  std::size_t numElements{descriptor.Elements()};
-  SubscriptValue subscripts[maxRank];
-  descriptor.GetLowerBounds(subscripts);
-  for (std::size_t j{0}; j < numElements;
-       ++j, descriptor.IncrementSubscripts(subscripts)) {
-    SubscriptValue at[maxRank];
-    compArray.GetLowerBounds(at);
-    for (std::size_t k{0}; k < numComponents;
-         ++k, compArray.IncrementSubscripts(at)) {
-      const typeInfo::Component &component{
-          *compArray.Element<typeInfo::Component>(at)};
-      if (!DefaultComponentIO<DIR>(
-              io, component, descriptor, subscripts, handler, table)) {
-        return false;
-      }
-    }
-  }
-  return true;
-}
-
-RT_API_ATTRS Fortran::common::optional<bool> DefinedFormattedIo(
-    IoStatementState &, const Descriptor &, const typeInfo::DerivedType &,
-    const typeInfo::SpecialBinding &, const SubscriptValue[]);
-
-template <Direction DIR>
-static RT_API_ATTRS bool FormattedDerivedTypeIO(IoStatementState &io,
-    const Descriptor &descriptor, const NonTbpDefinedIoTable *table) {
-  IoErrorHandler &handler{io.GetIoErrorHandler()};
-  // Derived type information must be present for formatted I/O.
-  const DescriptorAddendum *addendum{descriptor.Addendum()};
-  RUNTIME_CHECK(handler, addendum != nullptr);
-  const typeInfo::DerivedType *type{addendum->derivedType()};
-  RUNTIME_CHECK(handler, type != nullptr);
-  Fortran::common::optional<typeInfo::SpecialBinding> nonTbpSpecial;
-  const typeInfo::SpecialBinding *special{nullptr};
-  if (table) {
-    if (const auto *definedIo{table->Find(*type,
-            DIR == Direction::Input ? common::DefinedIo::ReadFormatted
-                                    : common::DefinedIo::WriteFormatted)}) {
-      if (definedIo->subroutine) {
-        nonTbpSpecial.emplace(DIR == Direction::Input
-                ? typeInfo::SpecialBinding::Which::ReadFormatted
-                : typeInfo::SpecialBinding::Which::WriteFormatted,
-            definedIo->subroutine, definedIo->isDtvArgPolymorphic, false,
-            false);
-        special = &*nonTbpSpecial;
-      }
-    }
-  }
-  if (!special) {
-    if (const typeInfo::SpecialBinding *
-        binding{type->FindSpecialBinding(DIR == Direction::Input
-                ? typeInfo::SpecialBinding::Which::ReadFormatted
-                : typeInfo::SpecialBinding::Which::WriteFormatted)}) {
-      if (!table || !table->ignoreNonTbpEntries || binding->isTypeBound()) {
-        special = binding;
-      }
-    }
-  }
-  SubscriptValue subscripts[maxRank];
-  descriptor.GetLowerBounds(subscripts);
-  std::size_t numElements{descriptor.Elements()};
-  for (std::size_t j{0}; j < numElements;
-       ++j, descriptor.IncrementSubscripts(subscripts)) {
-    Fortran::common::optional<bool> result;
-    if (special) {
-      result = DefinedFormattedIo(io, descriptor, *type, *special, subscripts);
-    }
-    if (!result) {
-      result = DefaultComponentwiseFormattedIO<DIR>(
-          io, descriptor, *type, table, subscripts);
-    }
-    if (!result.value()) {
-      // Return true for NAMELIST input if we got anything.
-      auto *listInput{
-          io.get_if<ListDirectedStatementState<Direction::Input>>()};
-      return DIR == Direction::Input && j > 0 && listInput &&
-          listInput->inNamelistSequence();
-    }
-  }
-  return true;
-}
-
-RT_API_ATTRS bool DefinedUnformattedIo(IoStatementState &, const Descriptor &,
-    const typeInfo::DerivedType &, const typeInfo::SpecialBinding &);
+extern template RT_API_ATTRS bool DescriptorIO<Direction::Output>(
+    IoStatementState &, const Descriptor &, const NonTbpDefinedIoTable *);
+extern template RT_API_ATTRS bool DescriptorIO<Direction::Input>(
+    IoStatementState &, const Descriptor &, const NonTbpDefinedIoTable *);
 
-// Unformatted I/O
-template <Direction DIR>
-static RT_API_ATTRS bool UnformattedDescriptorIO(IoStatementState &io,
-    const Descriptor &descriptor, const NonTbpDefinedIoTable *table = nullptr) {
-  IoErrorHandler &handler{io.GetIoErrorHandler()};
-  const DescriptorAddendum *addendum{descriptor.Addendum()};
-  if (const typeInfo::DerivedType *
-      type{addendum ? addendum->derivedType() : nullptr}) {
-    // derived type unformatted I/O
-    if (table) {
-      if (const auto *definedIo{table->Find(*type,
-              DIR == Direction::Input ? common::DefinedIo::ReadUnformatted
-                                      : common::DefinedIo::WriteUnformatted)}) {
-        if (definedIo->subroutine) {
-          typeInfo::SpecialBinding special{DIR == Direction::Input
-                  ? typeInfo::SpecialBinding::Which::ReadUnformatted
-                  : typeInfo::SpecialBinding::Which::WriteUnformatted,
-              definedIo->subroutine, definedIo->isDtvArgPolymorphic, false,
-              false};
-          if (Fortran::common::optional<bool> wasDefined{
-                  DefinedUnformattedIo(io, descriptor, *type, special)}) {
-            return *wasDefined;
-          }
-        } else {
-          return DefaultComponentwiseUnformattedIO<DIR>(
-              io, descriptor, *type, table);
-        }
-      }
-    }
-    if (const typeInfo::SpecialBinding *
-        special{type->FindSpecialBinding(DIR == Direction::Input
-                ? typeInfo::SpecialBinding::Which::ReadUnformatted
-                : typeInfo::SpecialBinding::Which::WriteUnformatted)}) {
-      if (!table || !table->ignoreNonTbpEntries || special->isTypeBound()) {
-        // defined derived type unformatted I/O
-        return DefinedUnformattedIo(io, descriptor, *type, *special);
-      }
-    }
-    // Default derived type unformatted I/O
-    // TODO: If no component at any level has defined READ or WRITE
-    // (as appropriate), the elements are contiguous, and no byte swapping
-    // is active, do a block transfer via the code below.
-    return DefaultComponentwiseUnformattedIO<DIR>(io, descriptor, *type, table);
-  } else {
-    // intrinsic type unformatted I/O
-    auto *externalUnf{io.get_if<ExternalUnformattedIoStatementState<DIR>>()};
-    auto *childUnf{io.get_if<ChildUnformattedIoStatementState<DIR>>()};
-    auto *inq{
-        DIR == Direction::Output ? io.get_if<InquireIOLengthState>() : nullptr};
-    RUNTIME_CHECK(handler, externalUnf || childUnf || inq);
-    std::size_t elementBytes{descriptor.ElementBytes()};
-    std::size_t numElements{descriptor.Elements()};
-    std::size_t swappingBytes{elementBytes};
-    if (auto maybeCatAndKind{descriptor.type().GetCategoryAndKind()}) {
-      // Byte swapping units can be smaller than elements, namely
-      // for COMPLEX and CHARACTER.
-      if (maybeCatAndKind->first == TypeCategory::Character) {
-        // swap each character position independently
-        swappingBytes = maybeCatAndKind->second; // kind
-      } else if (maybeCatAndKind->first == TypeCategory::Complex) {
-        // swap real and imaginary components independently
-        swappingBytes /= 2;
-      }
-    }
-    SubscriptValue subscripts[maxRank];
-    descriptor.GetLowerBounds(subscripts);
-    using CharType =
-        std::conditional_t<DIR == Direction::Output, const char, char>;
-    auto Transfer{[=](CharType &x, std::size_t totalBytes) -> bool {
-      if constexpr (DIR == Direction::Output) {
-        return externalUnf ? externalUnf->Emit(&x, totalBytes, swappingBytes)
-            : childUnf     ? childUnf->Emit(&x, totalBytes, swappingBytes)
-                           : inq->Emit(&x, totalBytes, swappingBytes);
-      } else {
-        return externalUnf ? externalUnf->Receive(&x, totalBytes, swappingBytes)
-                           : childUnf->Receive(&x, totalBytes, swappingBytes);
-      }
-    }};
-    bool swapEndianness{externalUnf && externalUnf->unit().swapEndianness()};
-    if (!swapEndianness &&
-        descriptor.IsContiguous()) { // contiguous unformatted I/O
-      char &x{ExtractElement<char>(io, descriptor, subscripts)};
-      return Transfer(x, numElements * elementBytes);
-    } else { // non-contiguous or byte-swapped intrinsic type unformatted I/O
-      for (std::size_t j{0}; j < numElements; ++j) {
-        char &x{ExtractElement<char>(io, descriptor, subscripts)};
-        if (!Transfer(x, elementBytes)) {
-          return false;
-        }
-        if (!descriptor.IncrementSubscripts(subscripts) &&
-            j + 1 < numElements) {
-          handler.Crash("DescriptorIO: subscripts out of bounds");
-        }
-      }
-      return true;
-    }
-  }
-}
-
-template <Direction DIR>
-static RT_API_ATTRS bool DescriptorIO(IoStatementState &io,
-    const Descriptor &descriptor, const NonTbpDefinedIoTable *table) {
-  IoErrorHandler &handler{io.GetIoErrorHandler()};
-  if (handler.InError()) {
-    return false;
-  }
-  if (!io.get_if<IoDirectionState<DIR>>()) {
-    handler.Crash("DescriptorIO() called for wrong I/O direction");
-    return false;
-  }
-  if constexpr (DIR == Direction::Input) {
-    if (!io.BeginReadingRecord()) {
-      return false;
-    }
-  }
-  if (!io.get_if<FormattedIoStatementState<DIR>>()) {
-    return UnformattedDescriptorIO<DIR>(io, descriptor, table);
-  }
-  if (auto catAndKind{descriptor.type().GetCategoryAndKind()}) {
-    TypeCategory cat{catAndKind->first};
-    int kind{catAndKind->second};
-    switch (cat) {
-    case TypeCategory::Integer:
-      switch (kind) {
-      case 1:
-        return FormattedIntegerIO<1, DIR>(io, descriptor, true);
-      case 2:
-        return FormattedIntegerIO<2, DIR>(io, descriptor, true);
-      case 4:
-        return FormattedIntegerIO<4, DIR>(io, descriptor, true);
-      case 8:
-        return FormattedIntegerIO<8, DIR>(io, descriptor, true);
-      case 16:
-        return FormattedIntegerIO<16, DIR>(io, descriptor, true);
-      default:
-        handler.Crash(
-            "not yet implemented: INTEGER(KIND=%d) in formatted IO", kind);
-        return false;
-      }
-    case TypeCategory::Unsigned:
-      switch (kind) {
-      case 1:
-        return FormattedIntegerIO<1, DIR>(io, descriptor, false);
-      case 2:
-        return FormattedIntegerIO<2, DIR>(io, descriptor, false);
-      case 4:
-        return FormattedIntegerIO<4, DIR>(io, descriptor, false);
-      case 8:
-        return FormattedIntegerIO<8, DIR>(io, descriptor, false);
-      case 16:
-        return FormattedIntegerIO<16, DIR>(io, descriptor, false);
-      default:
-        handler.Crash(
-            "not yet implemented: UNSIGNED(KIND=%d) in formatted IO", kind);
-        return false;
-      }
-    case TypeCategory::Real:
-      switch (kind) {
-      case 2:
-        return FormattedRealIO<2, DIR>(io, descriptor);
-      case 3:
-        return FormattedRealIO<3, DIR>(io, descriptor);
-      case 4:
-        return FormattedRealIO<4, DIR>(io, descriptor);
-      case 8:
-        return FormattedRealIO<8, DIR>(io, descriptor);
-      case 10:
-        return FormattedRealIO<10, DIR>(io, descriptor);
-      // TODO: case double/double
-      case 16:
-        return FormattedRealIO<16, DIR>(io, descriptor);
-      default:
-        handler.Crash(
-            "not yet implemented: REAL(KIND=%d) in formatted IO", kind);
-        return false;
-      }
-    case TypeCategory::Complex:
-      switch (kind) {
-      case 2:
-        return FormattedComplexIO<2, DIR>(io, descriptor);
-      case 3:
-        return FormattedComplexIO<3, DIR>(io, descriptor);
-      case 4:
-        return FormattedComplexIO<4, DIR>(io, descriptor);
-      case 8:
-        return FormattedComplexIO<8, DIR>(io, descriptor);
-      case 10:
-        return FormattedComplexIO<10, DIR>(io, descriptor);
-      // TODO: case double/double
-      case 16:
-        return FormattedComplexIO<16, DIR>(io, descriptor);
-      default:
-        handler.Crash(
-            "not yet implemented: COMPLEX(KIND=%d) in formatted IO", kind);
-        return false;
-      }
-    case TypeCategory::Character:
-      switch (kind) {
-      case 1:
-        return FormattedCharacterIO<char, DIR>(io, descriptor);
-      case 2:
-        return FormattedCharacterIO<char16_t, DIR>(io, descriptor);
-      case 4:
-        return FormattedCharacterIO<char32_t, DIR>(io, descriptor);
-      default:
-        handler.Crash(
-            "not yet implemented: CHARACTER(KIND=%d) in formatted IO", kind);
-        return false;
-      }
-    case TypeCategory::Logical:
-      switch (kind) {
-      case 1:
-        return FormattedLogicalIO<1, DIR>(io, descriptor);
-      case 2:
-        return FormattedLogicalIO<2, DIR>(io, descriptor);
-      case 4:
-        return FormattedLogicalIO<4, DIR>(io, descriptor);
-      case 8:
-        return FormattedLogicalIO<8, DIR>(io, descriptor);
-      default:
-        handler.Crash(
-            "not yet implemented: LOGICAL(KIND=%d) in formatted IO", kind);
-        return false;
-      }
-    case TypeCategory::Derived:
-      return FormattedDerivedTypeIO<DIR>(io, descriptor, table);
-    }
-  }
-  handler.Crash("DescriptorIO: bad type code (%d) in descriptor",
-      static_cast<int>(descriptor.type().raw()));
-  return false;
-}
 } // namespace Fortran::runtime::io::descr
 #endif // FLANG_RT_RUNTIME_DESCRIPTOR_IO_H_
diff --git a/flang-rt/lib/runtime/environment.cpp b/flang-rt/lib/runtime/environment.cpp
index 1d5304254ed0e..0f0564403c0e2 100644
--- a/flang-rt/lib/runtime/environment.cpp
+++ b/flang-rt/lib/runtime/environment.cpp
@@ -143,6 +143,10 @@ void ExecutionEnvironment::Configure(int ac, const char *av[],
     }
   }
 
+  if (auto *x{std::getenv("FLANG_RT_DEBUG")}) {
+    internalDebugging = std::strtol(x, nullptr, 10);
+  }
+
   if (auto *x{std::getenv("ACC_OFFLOAD_STACK_SIZE")}) {
     char *end;
     auto n{std::strtoul(x, &end, 10)};
diff --git a/flang-rt/lib/runtime/namelist.cpp b/flang-rt/lib/runtime/namelist.cpp
index b0cf2180fc6d4..1bef387a9771f 100644
--- a/flang-rt/lib/runtime/namelist.cpp
+++ b/flang-rt/lib/runtime/namelist.cpp
@@ -10,6 +10,7 @@
 #include "descriptor-io.h"
 #include "flang-rt/runtime/emit-encoded.h"
 #include "flang-rt/runtime/io-stmt.h"
+#include "flang-rt/runtime/type-info.h"
 #include "flang/Runtime/io-api.h"
 #include <algorithm>
 #include <cstring>
diff --git a/flang-rt/lib/runtime/tools.cpp b/flang-rt/lib/runtime/tools.cpp
index b08195cd31e05..24d05f369fcbe 100644
--- a/flang-rt/lib/runtime/tools.cpp
+++ b/flang-rt/lib/runtime/tools.cpp
@@ -205,7 +205,7 @@ RT_API_ATTRS void ShallowCopyInner(const Descriptor &to, const Descriptor &from,
 // Doing the recursion upwards instead of downwards puts the more common
 // cases earlier in the if-chain and has a tangible impact on performance.
 template <typename P, int RANK> struct ShallowCopyRankSpecialize {
-  static bool execute(const Descriptor &to, const Descriptor &from,
+  static RT_API_ATTRS bool execute(const Descriptor &to, const Descriptor &from,
       bool toIsContiguous, bool fromIsContiguous) {
     if (to.rank() == RANK && from.rank() == RANK) {
       ShallowCopyInner<P, RANK>(to, from, toIsContiguous, fromIsContiguous);
@@ -217,7 +217,7 @@ template <typename P, int RANK> struct ShallowCopyRankSpecialize {
 };
 
 template <typename P> struct ShallowCopyRankSpecialize<P, maxRank + 1> {
-  static bool execute(const Descriptor &to, const Descriptor &from,
+  static RT_API_ATTRS bool execute(const Descriptor &to, const Descriptor &from,
       bool toIsContiguous, bool fromIsContiguous) {
     return false;
   }
diff --git a/flang-rt/lib/runtime/type-info.cpp b/flang-rt/lib/runtime/type-info.cpp
index 82182696d70c6..451213202acef 100644
--- a/flang-rt/lib/runtime/type-info.cpp
+++ b/flang-rt/lib/runtime/type-info.cpp
@@ -140,11 +140,11 @@ RT_API_ATTRS void Component::CreatePointerDescriptor(Descriptor &descriptor,
     const SubscriptValue *subscripts) const {
   RUNTIME_CHECK(terminator, genre_ == Genre::Data);
   EstablishDescriptor(descriptor, container, terminator);
+  std::size_t offset{offset_};
   if (subscripts) {
-    descriptor.set_base_addr(container.Element<char>(subscripts) + offset_);
-  } else {
-    descriptor.set_base_addr(container.OffsetElement<char>() + offset_);
+    offset += container.SubscriptsToByteOffset(subscripts);
   }
+  descriptor.set_base_addr(container.OffsetElement<char>() + offset);
   descriptor.raw().attribute = CFI_attribute_pointer;
 }
 
diff --git a/flang-rt/lib/runtime/work-queue.cpp b/flang-rt/lib/runtime/work-queue.cpp
new file mode 100644
index 0000000000000..a508ecb637102
--- /dev/null
+++ b/flang-rt/lib/runtime/work-queue.cpp
@@ -0,0 +1,161 @@
+//===-- lib/runtime/work-queue.cpp ------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang-rt/runtime/work-queue.h"
+#include "flang-rt/runtime/environment.h"
+#include "flang-rt/runtime/memory.h"
+#include "flang-rt/runtime/type-info.h"
+#include "flang/Common/visit.h"
+
+namespace Fortran::runtime {
+
+#if !defined(RT_DEVICE_COMPILATION)
+// FLANG_RT_DEBUG code is disabled when false.
+static constexpr bool enableDebugOutput{false};
+#endif
+
+RT_OFFLOAD_API_GROUP_BEGIN
+
+RT_API_ATTRS Componentwise::Componentwise(const typeInfo::DerivedType &derived)
+    : derived_{derived}, components_{derived_.component().Elements()} {
+  GetComponent();
+}
+
+RT_API_ATTRS void Componentwise::GetComponent() {
+  if (IsComplete()) {
+    component_ = nullptr;
+  } else {
+    const Descriptor &componentDesc{derived_.component()};
+    component_ = componentDesc.ZeroBasedIndexedElement<typeInfo::Component>(
+        componentAt_);
+  }
+}
+
+RT_API_ATTRS int Ticket::Continue(WorkQueue &workQueue) {
+  if (!begun) {
+    begun = true;
+    return common::visit(
+        [&workQueue](
+            auto &specificTicket) { return specificTicket.Begin(workQueue); },
+        u);
+  } else {
+    return common::visit(
+        [&workQueue](auto &specificTicket) {
+          return specificTicket.Continue(workQueue);
+        },
+        u);
+  }
+}
+
+RT_API_ATTRS WorkQueue::~WorkQueue() {
+  if (last_) {
+    if ((last_->next = firstFree_)) {
+      last_->next->previous = last_;
+    }
+    firstFree_ = first_;
+    first_ = last_ = nullptr;
+  }
+  while (firstFree_) {
+    TicketList *next{firstFree_->next};
+    if (!firstFree_->isStatic) {
+      FreeMemory(firstFree_);
+    }
+    firstFree_ = next;
+  }
+}
+
+RT_API_ATTRS Ticket &WorkQueue::StartTicket() {
+  if (!firstFree_) {
+    void *p{AllocateMemoryOrCrash(terminator_, sizeof(TicketList))};
+    firstFree_ = new (p) TicketList;
+    firstFree_->isStatic = false;
+  }
+  TicketList *newTicket{firstFree_};
+  if ((firstFree_ = newTicket->next)) {
+    firstFree_->previous = nullptr;
+  }
+  TicketList *after{insertAfter_ ? insertAfter_->next : nullptr};
+  if ((newTicket->previous = insertAfter_ ? insertAfter_ : last_)) {
+    newTicket->previous->next = newTicket;
+  } else {
+    first_ = newTicket;
+  }
+  if ((newTicket->next = after)) {
+    after->previous = newTicket;
+  } else {
+    last_ = newTicket;
+  }
+  newTicket->ticket.begun = false;
+#if !defined(RT_DEVICE_COMPILATION)
+  if (enableDebugOutput &&
+      (executionEnvironment.internalDebugging &
+          ExecutionEnvironment::WorkQueue)) {
+    std::fprintf(stderr, "WQ: new ticket\n");
+  }
+#endif
+  return newTicket->ticket;
+}
+
+RT_API_ATTRS int WorkQueue::Run() {
+  while (last_) {
+    TicketList *at{last_};
+    insertAfter_ = last_;
+#if !defined(RT_DEVICE_COMPILATION)
+    if (enableDebugOutput &&
+        (executionEnvironment.internalDebugging &
+            ExecutionEnvironment::WorkQueue)) {
+      std::fprintf(stderr, "WQ: %zd %s\n", at->ticket.u.index(),
+          at->ticket.begun ? "Continue" : "Begin");
+    }
+#endif
+    int stat{at->ticket.Continue(*this)};
+#if !defined(RT_DEVICE_COMPILATION)
+    if (enableDebugOutput &&
+        (executionEnvironment.internalDebugging &
+            ExecutionEnvironment::WorkQueue)) {
+      std::fprintf(stderr, "WQ: ... stat %d\n", stat);
+    }
+#endif
+    insertAfter_ = nullptr;
+    if (stat == StatOk) {
+      if (at->previous) {
+        at->previous->next = at->next;
+      } else {
+        first_ = at->next;
+      }
+      if (at->next) {
+        at->next->previous = at->previous;
+      } else {
+        last_ = at->previous;
+      }
+      if ((at->next = firstFree_)) {
+        at->next->previous = at;
+      }
+      at->previous = nullptr;
+      firstFree_ = at;
+    } else if (stat != StatContinue) {
+      Stop();
+      return stat;
+    }
+  }
+  return StatOk;
+}
+
+RT_API_ATTRS void WorkQueue::Stop() {
+  if (last_) {
+    if ((last_->next = firstFree_)) {
+      last_->next->previous = last_;
+    }
+    firstFree_ = first_;
+    first_ = last_ = nullptr;
+  }
+}
+
+RT_OFFLOAD_API_GROUP_END
+
+} // namespace Fortran::runtime
diff --git a/flang-rt/unittests/Runtime/ExternalIOTest.cpp b/flang-rt/unittests/Runtime/ExternalIOTest.cpp
index 3833e48be3dd6..6c148b1de6f82 100644
--- a/flang-rt/unittests/Runtime/ExternalIOTest.cpp
+++ b/flang-rt/unittests/Runtime/ExternalIOTest.cpp
@@ -184,7 +184,7 @@ TEST(ExternalIOTests, TestSequentialFixedUnformatted) {
   io = IONAME(BeginInquireIoLength)(__FILE__, __LINE__);
   for (int j{1}; j <= 3; ++j) {
     ASSERT_TRUE(IONAME(OutputDescriptor)(io, desc))
-        << "OutputDescriptor() for InquireIoLength";
+        << "OutputDescriptor() for InquireIoLength " << j;
   }
   ASSERT_EQ(IONAME(GetIoLength)(io), 3 * recl) << "GetIoLength";
   ASSERT_EQ(IONAME(EndIoStatement)(io), IostatOk)
diff --git a/flang/docs/Extensions.md b/flang/docs/Extensions.md
index 51969de5ac7fe..377d01dbec69a 100644
--- a/flang/docs/Extensions.md
+++ b/flang/docs/Extensions.md
@@ -850,6 +850,16 @@ print *, [(j,j=1,10)]
   warning since such values may have become defined by the time the nested
   expression's value is required.
 
+* Intrinsic assignment of arrays is defined elementally, and intrinsic
+  assignment of derived type components is defined componentwise.
+  However, when intrinsic assignment takes place for an array of derived
+  type, the order of the loop nesting is not defined.
+  Some compilers will loop over the elements, assigning all of the components
+  of each element before proceeding to the next element.
+  This compiler loops over all of the components, and assigns all of
+  the elements for each component before proceeding to the next component.
+  A program using defined assignment might be able to detect the difference.
+
 ## De Facto Standard Features
 
 * `EXTENDS_TYPE_OF()` returns `.TRUE.` if both of its arguments have the
diff --git a/flang/include/flang/Runtime/assign.h b/flang/include/flang/Runtime/assign.h
index bc80997a1bec2..eb1f63184a177 100644
--- a/flang/include/flang/Runtime/assign.h
+++ b/flang/include/flang/Runtime/assign.h
@@ -38,7 +38,7 @@ enum AssignFlags {
   ComponentCanBeDefinedAssignment = 1 << 3,
   ExplicitLengthCharacterLHS = 1 << 4,
   PolymorphicLHS = 1 << 5,
-  DeallocateLHS = 1 << 6
+  DeallocateLHS = 1 << 6,
 };
 
 #ifdef RT_DEVICE_COMPILATION
diff --git a/flang/include/flang/Semantics/tools.h b/flang/include/flang/Semantics/tools.h
index 3839bc1d2a215..79f7032aac312 100644
--- a/flang/include/flang/Semantics/tools.h
+++ b/flang/include/flang/Semantics/tools.h
@@ -182,9 +182,12 @@ const Symbol *HasImpureFinal(
     const Symbol &, std::optional<int> rank = std::nullopt);
 // Is this type finalizable or does it contain any polymorphic allocatable
 // ultimate components?
-bool MayRequireFinalization(const DerivedTypeSpec &derived);
+bool MayRequireFinalization(const DerivedTypeSpec &);
 // Does this type have an allocatable direct component?
-bool HasAllocatableDirectComponent(const DerivedTypeSpec &derived);
+bool HasAllocatableDirectComponent(const DerivedTypeSpec &);
+// Does this type have any defined assignment at any level (or any polymorphic
+// allocatable)?
+bool MayHaveDefinedAssignment(const DerivedTypeSpec &);
 
 bool IsInBlankCommon(const Symbol &);
 bool IsAssumedLengthCharacter(const Symbol &);
diff --git a/flang/lib/Semantics/runtime-type-info.cpp b/flang/lib/Semantics/runtime-type-info.cpp
index 98295f3705a71..e1fe087b8740e 100644
--- a/flang/lib/Semantics/runtime-type-info.cpp
+++ b/flang/lib/Semantics/runtime-type-info.cpp
@@ -661,6 +661,10 @@ const Symbol *RuntimeTableBuilder::DescribeType(
     AddValue(dtValues, derivedTypeSchema_, "nofinalizationneeded"s,
         IntExpr<1>(
             derivedTypeSpec && !MayRequireFinalization(*derivedTypeSpec)));
+    // Similarly, a flag to enable optimized runtime assignment.
+    AddValue(dtValues, derivedTypeSchema_, "nodefinedassignment"s,
+        IntExpr<1>(
+            derivedTypeSpec && !MayHaveDefinedAssignment(*derivedTypeSpec)));
   }
   dtObject.get<ObjectEntityDetails>().set_init(MaybeExpr{
       StructureExpr(Structure(derivedTypeSchema_, std::move(dtValues)))});
diff --git a/flang/lib/Semantics/tools.cpp b/flang/lib/Semantics/tools.cpp
index 1d1e3ac044166..3247addc905ba 100644
--- a/flang/lib/Semantics/tools.cpp
+++ b/flang/lib/Semantics/tools.cpp
@@ -813,6 +813,38 @@ bool HasAllocatableDirectComponent(const DerivedTypeSpec &derived) {
   return std::any_of(directs.begin(), directs.end(), IsAllocatable);
 }
 
+static bool MayHaveDefinedAssignment(
+    const DerivedTypeSpec &derived, std::set<const Scope *> &checked) {
+  if (const Scope *scope{derived.GetScope()};
+      scope && checked.find(scope) == checked.end()) {
+    checked.insert(scope);
+    for (const auto &[_, symbolRef] : *scope) {
+      if (const auto *generic{symbolRef->detailsIf<GenericDetails>()}) {
+        if (generic->kind().IsAssignment()) {
+          return true;
+        }
+      } else if (symbolRef->has<ObjectEntityDetails>() &&
+          !IsPointer(*symbolRef)) {
+        if (const DeclTypeSpec *type{symbolRef->GetType()}) {
+          if (type->IsPolymorphic()) {
+            return true;
+          } else if (const DerivedTypeSpec *derived{type->AsDerived()}) {
+            if (MayHaveDefinedAssignment(*derived, checked)) {
+              return true;
+            }
+          }
+        }
+      }
+    }
+  }
+  return false;
+}
+
+bool MayHaveDefinedAssignment(const DerivedTypeSpec &derived) {
+  std::set<const Scope *> checked;
+  return MayHaveDefinedAssignment(derived, checked);
+}
+
 bool IsAssumedLengthCharacter(const Symbol &symbol) {
   if (const DeclTypeSpec * type{symbol.GetType()}) {
     return type->category() == DeclTypeSpec::Character &&
diff --git a/flang/module/__fortran_type_info.f90 b/flang/module/__fortran_type_info.f90
index b30a6bf697563..7226b06504d28 100644
--- a/flang/module/__fortran_type_info.f90
+++ b/flang/module/__fortran_type_info.f90
@@ -52,7 +52,8 @@
     integer(1) :: noInitializationNeeded ! 1 if no component w/ init
     integer(1) :: noDestructionNeeded ! 1 if no component w/ dealloc/final
     integer(1) :: noFinalizationNeeded ! 1 if nothing finalizeable
-    integer(1) :: __padding0(4)
+    integer(1) :: noDefinedAssignment ! 1 if no defined ASSIGNMENT(=)
+    integer(1) :: __padding0(3)
   end type
 
   type :: Binding
diff --git a/flang/test/Lower/volatile-openmp.f90 b/flang/test/Lower/volatile-openmp.f90
index 28f0bf78f33c9..2e05b652822b5 100644
--- a/flang/test/Lower/volatile-openmp.f90
+++ b/flang/test/Lower/volatile-openmp.f90
@@ -23,11 +23,11 @@
 ! CHECK:           %[[VAL_11:.*]] = fir.address_of(@_QFEcontainer) : !fir.ref<!fir.type<_QFTt{array:!fir.box<!fir.ptr<!fir.array<?xi32>>>}>>
 ! CHECK:           %[[VAL_12:.*]] = fir.volatile_cast %[[VAL_11]] : (!fir.ref<!fir.type<_QFTt{array:!fir.box<!fir.ptr<!fir.array<?xi32>>>}>>) -> !fir.ref<!fir.type<_QFTt{array:!fir.box<!fir.ptr<!fir.array<?xi32>>>}>, volatile>
 ! CHECK:           %[[VAL_13:.*]]:2 = hlfir.declare %[[VAL_12]] {fortran_attrs = #fir.var_attrs<volatile>, uniq_name = "_QFEcontainer"} : (!fir.ref<!fir.type<_QFTt{array:!fir.box<!fir.ptr<!fir.array<?xi32>>>}>, volatile>) -> (!fir.ref<!fir.type<_QFTt{array:!fir.box<!fir.ptr<!fir.array<?xi32>>>}>, volatile>, !fir.ref<!fir.type<_QFTt{array:!fir.box<!fir.ptr<!fir.array<?xi32>>>}>, volatile>)
-! CHECK:           %[[VAL_14:.*]] = fir.address_of(@_QFE.c.t) : !fir.ref<!fir.array<1x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{{[<]?}}{genre:i8,__padding0:!fir.array<7xi8>,value:i64}{{[>]?}}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{{[<]?}}{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}{{[>]?}}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{{[<]?}}{genre:i8,__padding0:!fir.array<7xi8>,value:i64}{{[>]?}}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{{[<]?}}{genre:i8,__padding0:!fir.array<7xi8>,value:i64}{{[>]?}}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>
+! CHECK:           %[[VAL_14:.*]] = fir.address_of(@_QFE.c.t) : !fir.ref<!fir.array<1x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{{[<]?}}{genre:i8,__padding0:!fir.array<7xi8>,value:i64}{{[>]?}}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{{[<]?}}{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}{{[>]?}}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,nodefinedassignment:i8,__padding0:!fir.array<3xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{{[<]?}}{genre:i8,__padding0:!fir.array<7xi8>,value:i64}{{[>]?}}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{{[<]?}}{genre:i8,__padding0:!fir.array<7xi8>,value:i64}{{[>]?}}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>
 ! CHECK:           %[[VAL_15:.*]] = fir.shape_shift %[[VAL_0]], %[[VAL_1]] : (index, index) -> !fir.shapeshift<1>
-! CHECK:           %[[VAL_16:.*]]:2 = hlfir.declare %[[VAL_14]](%[[VAL_15]]) {fortran_attrs = #fir.var_attrs<target>, uniq_name = "_QFE.c.t"} : (!fir.ref<!fir.array<1x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{{[<]?}}{genre:i8,__padding0:!fir.array<7xi8>,value:i64}{{[>]?}}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{{[<]?}}{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}{{[>]?}}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{{[<]?}}{genre:i8,__padding0:!fir.array<7xi8>,value:i64}{{[>]?}}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{{[<]?}}{genre:i8,__padding0:!fir.array<7xi8>,value:i64}{{[>]?}}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>, !fir.shapeshift<1>) -> (!fir.box<!fir.array<1x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{{[<]?}}{genre:i8,__padding0:!fir.array<7xi8>,value:i64}{{[>]?}}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{{[<]?}}{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}{{[>]?}}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{{[<]?}}{genre:i8,__padding0:!fir.array<7xi8>,value:i64}{{[>]?}}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{{[<]?}}{genre:i8,__padding0:!fir.array<7xi8>,value:i64}{{[>]?}}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>, !fir.ref<!fir.array<1x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{{[<]?}}{genre:i8,__padding0:!fir.array<7xi8>,value:i64}{{[>]?}}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{{[<]?}}{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}{{[>]?}}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{{[<]?}}{genre:i8,__padding0:!fir.array<7xi8>,value:i64}{{[>]?}}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{{[<]?}}{genre:i8,__padding0:!fir.array<7xi8>,value:i64}{{[>]?}}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>)
-! CHECK:           %[[VAL_17:.*]] = fir.address_of(@_QFE.dt.t) : !fir.ref<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{{[<]?}}{genre:i8,__padding0:!fir.array<7xi8>,value:i64}{{[>]?}}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{{[<]?}}{genre:i8,__padding0:!fir.array<7xi8>,value:i64}{{[>]?}}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{{[<]?}}{genre:i8,__padding0:!fir.array<7xi8>,value:i64}{{[>]?}}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{{[<]?}}{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}{{[>]?}}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>
-! CHECK:           %[[VAL_18:.*]]:2 = hlfir.declare %[[VAL_17]] {fortran_attrs = #fir.var_attrs<target>, uniq_name = "_QFE.dt.t"} : (!fir.ref<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{{[<]?}}{genre:i8,__padding0:!fir.array<7xi8>,value:i64}{{[>]?}}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{{[<]?}}{genre:i8,__padding0:!fir.array<7xi8>,value:i64}{{[>]?}}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{{[<]?}}{genre:i8,__padding0:!fir.array<7xi8>,value:i64}{{[>]?}}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{{[<]?}}{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}{{[>]?}}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>) -> (!fir.ref<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{{[<]?}}{genre:i8,__padding0:!fir.array<7xi8>,value:i64}{{[>]?}}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{{[<]?}}{genre:i8,__padding0:!fir.array<7xi8>,value:i64}{{[>]?}}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{{[<]?}}{genre:i8,__padding0:!fir.array<7xi8>,value:i64}{{[>]?}}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{{[<]?}}{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}{{[>]?}}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>, !fir.ref<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{{[<]?}}{genre:i8,__padding0:!fir.array<7xi8>,value:i64}{{[>]?}}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{{[<]?}}{genre:i8,__padding0:!fir.array<7xi8>,value:i64}{{[>]?}}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{{[<]?}}{genre:i8,__padding0:!fir.array<7xi8>,value:i64}{{[>]?}}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{{[<]?}}{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}{{[>]?}}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,__padding0:!fir.array<4xi8>}>>)
+! CHECK:           %[[VAL_16:.*]]:2 = hlfir.declare %[[VAL_14]](%[[VAL_15]]) {fortran_attrs = #fir.var_attrs<target>, uniq_name = "_QFE.c.t"} : (!fir.ref<!fir.array<1x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{{[<]?}}{genre:i8,__padding0:!fir.array<7xi8>,value:i64}{{[>]?}}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{{[<]?}}{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}{{[>]?}}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,nodefinedassignment:i8,__padding0:!fir.array<3xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{{[<]?}}{genre:i8,__padding0:!fir.array<7xi8>,value:i64}{{[>]?}}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{{[<]?}}{genre:i8,__padding0:!fir.array<7xi8>,value:i64}{{[>]?}}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>, !fir.shapeshift<1>) -> (!fir.box<!fir.array<1x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{{[<]?}}{genre:i8,__padding0:!fir.array<7xi8>,value:i64}{{[>]?}}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{{[<]?}}{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}{{[>]?}}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,nodefinedassignment:i8,__padding0:!fir.array<3xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{{[<]?}}{genre:i8,__padding0:!fir.array<7xi8>,value:i64}{{[>]?}}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{{[<]?}}{genre:i8,__padding0:!fir.array<7xi8>,value:i64}{{[>]?}}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>, !fir.ref<!fir.array<1x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{{[<]?}}{genre:i8,__padding0:!fir.array<7xi8>,value:i64}{{[>]?}}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{{[<]?}}{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}{{[>]?}}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,nodefinedassignment:i8,__padding0:!fir.array<3xi8>}>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{{[<]?}}{genre:i8,__padding0:!fir.array<7xi8>,value:i64}{{[>]?}}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{{[<]?}}{genre:i8,__padding0:!fir.array<7xi8>,value:i64}{{[>]?}}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>)
+! CHECK:           %[[VAL_17:.*]] = fir.address_of(@_QFE.dt.t) : !fir.ref<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{{[<]?}}{genre:i8,__padding0:!fir.array<7xi8>,value:i64}{{[>]?}}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{{[<]?}}{genre:i8,__padding0:!fir.array<7xi8>,value:i64}{{[>]?}}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{{[<]?}}{genre:i8,__padding0:!fir.array<7xi8>,value:i64}{{[>]?}}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{{[<]?}}{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}{{[>]?}}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,nodefinedassignment:i8,__padding0:!fir.array<3xi8>}>>
+! CHECK:           %[[VAL_18:.*]]:2 = hlfir.declare %[[VAL_17]] {fortran_attrs = #fir.var_attrs<target>, uniq_name = "_QFE.dt.t"} : (!fir.ref<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{{[<]?}}{genre:i8,__padding0:!fir.array<7xi8>,value:i64}{{[>]?}}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{{[<]?}}{genre:i8,__padding0:!fir.array<7xi8>,value:i64}{{[>]?}}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{{[<]?}}{genre:i8,__padding0:!fir.array<7xi8>,value:i64}{{[>]?}}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{{[<]?}}{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}{{[>]?}}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,nodefinedassignment:i8,__padding0:!fir.array<3xi8>}>>) -> (!fir.ref<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{{[<]?}}{genre:i8,__padding0:!fir.array<7xi8>,value:i64}{{[>]?}}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{{[<]?}}{genre:i8,__padding0:!fir.array<7xi8>,value:i64}{{[>]?}}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{{[<]?}}{genre:i8,__padding0:!fir.array<7xi8>,value:i64}{{[>]?}}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{{[<]?}}{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}{{[>]?}}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,nodefinedassignment:i8,__padding0:!fir.array<3xi8>}>>, !fir.ref<!fir.type<_QM__fortran_type_infoTderivedtype{binding:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTbinding{proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>}>>>>,name:!fir.box<!fir.ptr<!fir.char<1,?>>>,sizeinbytes:i64,uninstantiated:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,kindparameter:!fir.box<!fir.ptr<!fir.array<?xi64>>>,lenparameterkind:!fir.box<!fir.ptr<!fir.array<?xi8>>>,component:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,genre:i8,category:i8,kind:i8,rank:i8,__padding0:!fir.array<4xi8>,offset:i64,characterlen:!fir.type<_QM__fortran_type_infoTvalue{{[<]?}}{genre:i8,__padding0:!fir.array<7xi8>,value:i64}{{[>]?}}>,derived:!fir.box<!fir.ptr<!fir.type<_QM__fortran_type_infoTderivedtype>>>,lenvalue:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTvalue{{[<]?}}{genre:i8,__padding0:!fir.array<7xi8>,value:i64}{{[>]?}}>>>>,bounds:!fir.box<!fir.ptr<!fir.array<?x?x!fir.type<_QM__fortran_type_infoTvalue{{[<]?}}{genre:i8,__padding0:!fir.array<7xi8>,value:i64}{{[>]?}}>>>>,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>}>>>>,procptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTprocptrcomponent{name:!fir.box<!fir.ptr<!fir.char<1,?>>>,offset:i64,initialization:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}>>>>,special:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QM__fortran_type_infoTspecialbinding{{[<]?}}{which:i8,isargdescriptorset:i8,istypebound:i8,isargcontiguousset:i8,__padding0:!fir.array<4xi8>,proc:!fir.type<_QM__fortran_builtinsT__builtin_c_funptr{__address:i64}>}{{[>]?}}>>>>,specialbitset:i32,hasparent:i8,noinitializationneeded:i8,nodestructionneeded:i8,nofinalizationneeded:i8,nodefinedassignment:i8,__padding0:!fir.array<3xi8>}>>)
 ! CHECK:           %[[VAL_19:.*]] = hlfir.designate %[[VAL_13]]#0{"array"}   {fortran_attrs = #fir.var_attrs<pointer>} : (!fir.ref<!fir.type<_QFTt{array:!fir.box<!fir.ptr<!fir.array<?xi32>>>}>, volatile>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>, volatile>
 ! CHECK:           %[[VAL_20:.*]] = fir.load %[[VAL_19]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>, volatile>
 ! CHECK:           %[[VAL_21:.*]]:3 = fir.box_dims %[[VAL_20]], %[[VAL_0]] : (!fir.box<!fir.ptr<!fir.array<?xi32>>>, index) -> (index, index, index)
diff --git a/flang/test/Semantics/typeinfo01.f90 b/flang/test/Semantics/typeinfo01.f90
index c1427f28753cf..7b1a19c3a9725 100644
--- a/flang/test/Semantics/typeinfo01.f90
+++ b/flang/test/Semantics/typeinfo01.f90
@@ -8,7 +8,7 @@ module m01
   end type
 !CHECK: Module scope: m01
 !CHECK: .c.t1, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(component) shape: 0_8:0_8 init:[component::component(name=.n.n,genre=1_1,category=0_1,kind=4_1,rank=0_1,offset=0_8,characterlen=value(genre=1_1,value=0_8),derived=NULL(),lenvalue=NULL(),bounds=NULL(),initialization=NULL())]
-!CHECK: .dt.t1, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(binding=NULL(),name=.n.t1,sizeinbytes=4_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=NULL(),component=.c.t1,procptr=NULL(),special=NULL(),specialbitset=0_4,hasparent=0_1,noinitializationneeded=1_1,nodestructionneeded=1_1,nofinalizationneeded=1_1)
+!CHECK: .dt.t1, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(binding=NULL(),name=.n.t1,sizeinbytes=4_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=NULL(),component=.c.t1,procptr=NULL(),special=NULL(),specialbitset=0_4,hasparent=0_1,noinitializationneeded=1_1,nodestructionneeded=1_1,nofinalizationneeded=1_1,nodefinedassignment=1_1)
 !CHECK: .n.n, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: CHARACTER(1_8,1) init:"n"
 !CHECK: .n.t1, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: CHARACTER(2_8,1) init:"t1"
 !CHECK: DerivedType scope: t1
@@ -23,8 +23,8 @@ module m02
   end type
 !CHECK: .c.child, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(component) shape: 0_8:1_8 init:[component::component(name=.n.parent,genre=1_1,category=6_1,kind=0_1,rank=0_1,offset=0_8,characterlen=value(genre=1_1,value=0_8),derived=.dt.parent,lenvalue=NULL(),bounds=NULL(),initialization=NULL()),component(name=.n.cn,genre=1_1,category=0_1,kind=4_1,rank=0_1,offset=4_8,characterlen=value(genre=1_1,value=0_8),derived=NULL(),lenvalue=NULL(),bounds=NULL(),initialization=NULL())]
 !CHECK: .c.parent, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(component) shape: 0_8:0_8 init:[component::component(name=.n.pn,genre=1_1,category=0_1,kind=4_1,rank=0_1,offset=0_8,characterlen=value(genre=1_1,value=0_8),derived=NULL(),lenvalue=NULL(),bounds=NULL(),initialization=NULL())]
-!CHECK: .dt.child, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(binding=NULL(),name=.n.child,sizeinbytes=8_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=NULL(),component=.c.child,procptr=NULL(),special=NULL(),specialbitset=0_4,hasparent=1_1,noinitializationneeded=1_1,nodestructionneeded=1_1,nofinalizationneeded=1_1)
-!CHECK: .dt.parent, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(binding=NULL(),name=.n.parent,sizeinbytes=4_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=NULL(),component=.c.parent,procptr=NULL(),special=NULL(),specialbitset=0_4,hasparent=0_1,noinitializationneeded=1_1,nodestructionneeded=1_1,nofinalizationneeded=1_1)
+!CHECK: .dt.child, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(binding=NULL(),name=.n.child,sizeinbytes=8_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=NULL(),component=.c.child,procptr=NULL(),special=NULL(),specialbitset=0_4,hasparent=1_1,noinitializationneeded=1_1,nodestructionneeded=1_1,nofinalizationneeded=1_1,nodefinedassignment=1_1)
+!CHECK: .dt.parent, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(binding=NULL(),name=.n.parent,sizeinbytes=4_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=NULL(),component=.c.parent,procptr=NULL(),special=NULL(),specialbitset=0_4,hasparent=0_1,noinitializationneeded=1_1,nodestructionneeded=1_1,nofinalizationneeded=1_1,nodefinedassignment=1_1)
 end module
 
 module m03
@@ -35,7 +35,7 @@ module m03
   type(kpdt(4)) :: x
 !CHECK: .c.kpdt.4, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(component) shape: 0_8:0_8 init:[component::component(name=.n.a,genre=1_1,category=2_1,kind=4_1,rank=0_1,offset=0_8,characterlen=value(genre=1_1,value=0_8),derived=NULL(),lenvalue=NULL(),bounds=NULL(),initialization=NULL())]
 !CHECK: .dt.kpdt, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(name=.n.kpdt,uninstantiated=NULL(),kindparameter=.kp.kpdt,lenparameterkind=NULL())
-!CHECK: .dt.kpdt.4, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(binding=NULL(),name=.n.kpdt,sizeinbytes=4_8,uninstantiated=.dt.kpdt,kindparameter=.kp.kpdt.4,lenparameterkind=NULL(),component=.c.kpdt.4,procptr=NULL(),special=NULL(),specialbitset=0_4,hasparent=0_1,noinitializationneeded=1_1,nodestructionneeded=1_1,nofinalizationneeded=1_1)
+!CHECK: .dt.kpdt.4, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(binding=NULL(),name=.n.kpdt,sizeinbytes=4_8,uninstantiated=.dt.kpdt,kindparameter=.kp.kpdt.4,lenparameterkind=NULL(),component=.c.kpdt.4,procptr=NULL(),special=NULL(),specialbitset=0_4,hasparent=0_1,noinitializationneeded=1_1,nodestructionneeded=1_1,nofinalizationneeded=1_1,nodefinedassignment=1_1)
 !CHECK: .kp.kpdt.4, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: INTEGER(8) shape: 0_8:0_8 init:[INTEGER(8)::4_8]
 end module
 
@@ -49,7 +49,7 @@ module m04
   subroutine s1(x)
     class(tbps), intent(in) :: x
   end subroutine
-!CHECK: .dt.tbps, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(binding=.v.tbps,name=.n.tbps,sizeinbytes=0_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=NULL(),component=NULL(),procptr=NULL(),special=NULL(),specialbitset=0_4,hasparent=0_1,noinitializationneeded=1_1,nodestructionneeded=1_1,nofinalizationneeded=1_1)
+!CHECK: .dt.tbps, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(binding=.v.tbps,name=.n.tbps,sizeinbytes=0_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=NULL(),component=NULL(),procptr=NULL(),special=NULL(),specialbitset=0_4,hasparent=0_1,noinitializationneeded=1_1,nodestructionneeded=1_1,nofinalizationneeded=1_1,nodefinedassignment=1_1)
 !CHECK: .v.tbps, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(binding) shape: 0_8:1_8 init:[binding::binding(proc=s1,name=.n.b1),binding(proc=s1,name=.n.b2)]
 end module
 
@@ -61,7 +61,7 @@ module m05
   subroutine s1(x)
     class(t), intent(in) :: x
   end subroutine
-!CHECK: .dt.t, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(binding=NULL(),name=.n.t,sizeinbytes=8_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=NULL(),component=NULL(),procptr=.p.t,special=NULL(),specialbitset=0_4,hasparent=0_1,noinitializationneeded=0_1,nodestructionneeded=1_1,nofinalizationneeded=1_1)
+!CHECK: .dt.t, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(binding=NULL(),name=.n.t,sizeinbytes=8_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=NULL(),component=NULL(),procptr=.p.t,special=NULL(),specialbitset=0_4,hasparent=0_1,noinitializationneeded=0_1,nodestructionneeded=1_1,nofinalizationneeded=1_1,nodefinedassignment=1_1)
 !CHECK: .p.t, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(procptrcomponent) shape: 0_8:0_8 init:[procptrcomponent::procptrcomponent(name=.n.p1,offset=0_8,initialization=s1)]
 end module
 
@@ -85,8 +85,8 @@ subroutine s2(x, y)
     class(t), intent(in) :: y
   end subroutine
 !CHECK: .c.t2, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(component) shape: 0_8:0_8 init:[component::component(name=.n.t,genre=1_1,category=6_1,kind=0_1,rank=0_1,offset=0_8,characterlen=value(genre=1_1,value=0_8),derived=.dt.t,lenvalue=NULL(),bounds=NULL(),initialization=NULL())]
-!CHECK: .dt.t, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(binding=.v.t,name=.n.t,sizeinbytes=0_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=NULL(),component=NULL(),procptr=NULL(),special=.s.t,specialbitset=2_4,hasparent=0_1,noinitializationneeded=1_1,nodestructionneeded=1_1,nofinalizationneeded=1_1)
-!CHECK: .dt.t2, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(binding=.v.t2,name=.n.t2,sizeinbytes=0_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=NULL(),component=.c.t2,procptr=NULL(),special=NULL(),specialbitset=0_4,hasparent=1_1,noinitializationneeded=1_1,nodestructionneeded=1_1,nofinalizationneeded=1_1)
+!CHECK: .dt.t, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(binding=.v.t,name=.n.t,sizeinbytes=0_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=NULL(),component=NULL(),procptr=NULL(),special=.s.t,specialbitset=2_4,hasparent=0_1,noinitializationneeded=1_1,nodestructionneeded=1_1,nofinalizationneeded=1_1,nodefinedassignment=0_1)
+!CHECK: .dt.t2, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(binding=.v.t2,name=.n.t2,sizeinbytes=0_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=NULL(),component=.c.t2,procptr=NULL(),special=NULL(),specialbitset=0_4,hasparent=1_1,noinitializationneeded=1_1,nodestructionneeded=1_1,nofinalizationneeded=1_1,nodefinedassignment=0_1)
 !CHECK: .s.t, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(specialbinding) shape: 0_8:0_8 init:[specialbinding::specialbinding(which=1_1,isargdescriptorset=3_1,istypebound=1_1,isargcontiguousset=0_1,proc=s1)]
 !CHECK: .v.t, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(binding) shape: 0_8:0_8 init:[binding::binding(proc=s1,name=.n.s1)]
 !CHECK: .v.t2, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(binding) shape: 0_8:0_8 init:[binding::binding(proc=s2,name=.n.s1)]
@@ -103,7 +103,7 @@ impure elemental subroutine s1(x, y)
     class(t), intent(out) :: x
     class(t), intent(in) :: y
   end subroutine
-!CHECK: .dt.t, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(binding=.v.t,name=.n.t,sizeinbytes=0_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=NULL(),component=NULL(),procptr=NULL(),special=.s.t,specialbitset=4_4,hasparent=0_1,noinitializationneeded=1_1,nodestructionneeded=1_1,nofinalizationneeded=1_1)
+!CHECK: .dt.t, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(binding=.v.t,name=.n.t,sizeinbytes=0_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=NULL(),component=NULL(),procptr=NULL(),special=.s.t,specialbitset=4_4,hasparent=0_1,noinitializationneeded=1_1,nodestructionneeded=1_1,nofinalizationneeded=1_1,nodefinedassignment=0_1)
 !CHECK: .s.t, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(specialbinding) shape: 0_8:0_8 init:[specialbinding::specialbinding(which=2_1,isargdescriptorset=3_1,istypebound=1_1,isargcontiguousset=0_1,proc=s1)]
 !CHECK: .v.t, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(binding) shape: 0_8:0_8 init:[binding::binding(proc=s1,name=.n.s1)]
 end module
@@ -126,7 +126,7 @@ impure elemental subroutine s3(x)
   subroutine s4(x)
     type(t), contiguous :: x(:,:,:)
   end subroutine
-!CHECK: .dt.t, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(binding=NULL(),name=.n.t,sizeinbytes=0_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=NULL(),component=NULL(),procptr=NULL(),special=.s.t,specialbitset=7296_4,hasparent=0_1,noinitializationneeded=1_1,nodestructionneeded=0_1,nofinalizationneeded=0_1)
+!CHECK: .dt.t, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(binding=NULL(),name=.n.t,sizeinbytes=0_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=NULL(),component=NULL(),procptr=NULL(),special=.s.t,specialbitset=7296_4,hasparent=0_1,noinitializationneeded=1_1,nodestructionneeded=0_1,nofinalizationneeded=0_1,nodefinedassignment=1_1)
 !CHECK: .s.t, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(specialbinding) shape: 0_8:3_8 init:[specialbinding::specialbinding(which=7_1,isargdescriptorset=0_1,istypebound=1_1,isargcontiguousset=0_1,proc=s3),specialbinding(which=10_1,isargdescriptorset=1_1,istypebound=1_1,isargcontiguousset=0_1,proc=s1),specialbinding(which=11_1,isargdescriptorset=0_1,istypebound=1_1,isargcontiguousset=1_1,proc=s2),specialbinding(which=12_1,isargdescriptorset=1_1,istypebound=1_1,isargcontiguousset=1_1,proc=s4)]
 end module
 
@@ -168,7 +168,7 @@ subroutine wu(x,u,iostat,iomsg)
     integer, intent(out) :: iostat
     character(len=*), intent(inout) :: iomsg
   end subroutine
-!CHECK: .dt.t, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(binding=.v.t,name=.n.t,sizeinbytes=0_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=NULL(),component=NULL(),procptr=NULL(),special=.s.t,specialbitset=120_4,hasparent=0_1,noinitializationneeded=1_1,nodestructionneeded=1_1,nofinalizationneeded=1_1)
+!CHECK: .dt.t, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(binding=.v.t,name=.n.t,sizeinbytes=0_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=NULL(),component=NULL(),procptr=NULL(),special=.s.t,specialbitset=120_4,hasparent=0_1,noinitializationneeded=1_1,nodestructionneeded=1_1,nofinalizationneeded=1_1,nodefinedassignment=1_1)
 !CHECK: .s.t, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(specialbinding) shape: 0_8:3_8 init:[specialbinding::specialbinding(which=3_1,isargdescriptorset=1_1,istypebound=1_1,isargcontiguousset=0_1,proc=rf),specialbinding(which=4_1,isargdescriptorset=1_1,istypebound=1_1,isargcontiguousset=0_1,proc=ru),specialbinding(which=5_1,isargdescriptorset=1_1,istypebound=1_1,isargcontiguousset=0_1,proc=wf),specialbinding(which=6_1,isargdescriptorset=1_1,istypebound=1_1,isargcontiguousset=0_1,proc=wu)]
 !CHECK: .v.t, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(binding) shape: 0_8:3_8 init:[binding::binding(proc=rf,name=.n.rf),binding(proc=ru,name=.n.ru),binding(proc=wf,name=.n.wf),binding(proc=wu,name=.n.wu)]
 end module
@@ -217,7 +217,7 @@ subroutine wu(x,u,iostat,iomsg)
     integer, intent(out) :: iostat
     character(len=*), intent(inout) :: iomsg
   end subroutine
-!CHECK: .dt.t, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(binding=NULL(),name=.n.t,sizeinbytes=0_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=NULL(),component=NULL(),procptr=NULL(),special=.s.t,specialbitset=120_4,hasparent=0_1,noinitializationneeded=1_1,nodestructionneeded=1_1,nofinalizationneeded=1_1)
+!CHECK: .dt.t, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(binding=NULL(),name=.n.t,sizeinbytes=0_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=NULL(),component=NULL(),procptr=NULL(),special=.s.t,specialbitset=120_4,hasparent=0_1,noinitializationneeded=1_1,nodestructionneeded=1_1,nofinalizationneeded=1_1,nodefinedassignment=1_1)
 !CHECK: .s.t, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(specialbinding) shape: 0_8:3_8 init:[specialbinding::specialbinding(which=3_1,isargdescriptorset=0_1,istypebound=0_1,isargcontiguousset=0_1,proc=rf),specialbinding(which=4_1,isargdescriptorset=0_1,istypebound=0_1,isargcontiguousset=0_1,proc=ru),specialbinding(which=5_1,isargdescriptorset=0_1,istypebound=0_1,isargcontiguousset=0_1,proc=wf),specialbinding(which=6_1,isargdescriptorset=0_1,istypebound=0_1,isargcontiguousset=0_1,proc=wu)]
 end module
 
@@ -234,7 +234,7 @@ module m11
 !CHECK: .c.t, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(component) shape: 0_8:3_8 init:[component::component(name=.n.allocatable,genre=3_1,category=2_1,kind=4_1,rank=1_1,offset=0_8,characterlen=value(genre=1_1,value=0_8),derived=NULL(),lenvalue=NULL(),bounds=NULL(),initialization=NULL()),component(name=.n.pointer,genre=2_1,category=2_1,kind=4_1,rank=0_1,offset=48_8,characterlen=value(genre=1_1,value=0_8),derived=NULL(),lenvalue=NULL(),bounds=NULL(),initialization=.di.t.pointer),component(name=.n.chauto,genre=4_1,category=4_1,kind=1_1,rank=0_1,offset=72_8,characterlen=value(genre=3_1,value=0_8),derived=NULL(),lenvalue=NULL(),bounds=NULL(),initialization=NULL()),component(name=.n.automatic,genre=4_1,category=2_1,kind=4_1,rank=1_1,offset=96_8,characterlen=value(genre=1_1,value=0_8),derived=NULL(),lenvalue=NULL(),bounds=.b.t.automatic,initialization=NULL())]
 !CHECK: .di.t.pointer, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(.dp.t.pointer) init:.dp.t.pointer(pointer=target)
 !CHECK: .dp.t.pointer (CompilerCreated): DerivedType components: pointer
-!CHECK: .dt.t, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(binding=NULL(),name=.n.t,sizeinbytes=144_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=.lpk.t,component=.c.t,procptr=NULL(),special=NULL(),specialbitset=0_4,hasparent=0_1,noinitializationneeded=0_1,nodestructionneeded=0_1,nofinalizationneeded=1_1)
+!CHECK: .dt.t, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(binding=NULL(),name=.n.t,sizeinbytes=144_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=.lpk.t,component=.c.t,procptr=NULL(),special=NULL(),specialbitset=0_4,hasparent=0_1,noinitializationneeded=0_1,nodestructionneeded=0_1,nofinalizationneeded=1_1,nodefinedassignment=1_1)
 !CHECK: .lpk.t, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: INTEGER(1) shape: 0_8:0_8 init:[INTEGER(1)::8_1]
 !CHECK: DerivedType scope: .dp.t.pointer size=24 alignment=8 instantiation of .dp.t.pointer
 !CHECK: pointer, POINTER size=24 offset=0: ObjectEntity type: REAL(4)
diff --git a/flang/test/Semantics/typeinfo03.f90 b/flang/test/Semantics/typeinfo03.f90
index f0c0a817da4a4..e2552d0a21d6f 100644
--- a/flang/test/Semantics/typeinfo03.f90
+++ b/flang/test/Semantics/typeinfo03.f90
@@ -6,4 +6,4 @@ module m
     class(*), pointer :: sp, ap(:)
   end type
 end module
-!CHECK: .dt.haspointer, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(binding=NULL(),name=.n.haspointer,sizeinbytes=104_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=NULL(),component=.c.haspointer,procptr=NULL(),special=NULL(),specialbitset=0_4,hasparent=0_1,noinitializationneeded=0_1,nodestructionneeded=1_1,nofinalizationneeded=1_1)
+!CHECK: .dt.haspointer, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(binding=NULL(),name=.n.haspointer,sizeinbytes=104_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=NULL(),component=.c.haspointer,procptr=NULL(),special=NULL(),specialbitset=0_4,hasparent=0_1,noinitializationneeded=0_1,nodestructionneeded=1_1,nofinalizationneeded=1_1,nodefinedassignment=1_1)
diff --git a/flang/test/Semantics/typeinfo04.f90 b/flang/test/Semantics/typeinfo04.f90
index de8464321a409..94dd2199db35a 100644
--- a/flang/test/Semantics/typeinfo04.f90
+++ b/flang/test/Semantics/typeinfo04.f90
@@ -7,18 +7,18 @@ module m
    contains
     final :: final
   end type
-!CHECK: .dt.finalizable, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(binding=NULL(),name=.n.finalizable,sizeinbytes=0_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=NULL(),component=NULL(),procptr=NULL(),special=.s.finalizable,specialbitset=128_4,hasparent=0_1,noinitializationneeded=1_1,nodestructionneeded=0_1,nofinalizationneeded=0_1)
+!CHECK: .dt.finalizable, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(binding=NULL(),name=.n.finalizable,sizeinbytes=0_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=NULL(),component=NULL(),procptr=NULL(),special=.s.finalizable,specialbitset=128_4,hasparent=0_1,noinitializationneeded=1_1,nodestructionneeded=0_1,nofinalizationneeded=0_1,nodefinedassignment=1_1)
   type, abstract :: t1
   end type
-!CHECK: .dt.t1, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(name=.n.t1,sizeinbytes=0_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=NULL(),component=NULL(),procptr=NULL(),specialbitset=0_4,hasparent=0_1,noinitializationneeded=1_1,nodestructionneeded=1_1,nofinalizationneeded=1_1)
+!CHECK: .dt.t1, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(name=.n.t1,sizeinbytes=0_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=NULL(),component=NULL(),procptr=NULL(),specialbitset=0_4,hasparent=0_1,noinitializationneeded=1_1,nodestructionneeded=1_1,nofinalizationneeded=1_1,nodefinedassignment=1_1)
   type, abstract :: t2
     real, allocatable :: a(:)
   end type
-!CHECK: .dt.t2, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(name=.n.t2,sizeinbytes=48_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=NULL(),component=.c.t2,procptr=NULL(),specialbitset=0_4,hasparent=0_1,noinitializationneeded=0_1,nodestructionneeded=0_1,nofinalizationneeded=1_1)
+!CHECK: .dt.t2, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(name=.n.t2,sizeinbytes=48_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=NULL(),component=.c.t2,procptr=NULL(),specialbitset=0_4,hasparent=0_1,noinitializationneeded=0_1,nodestructionneeded=0_1,nofinalizationneeded=1_1,nodefinedassignment=1_1)
   type, abstract :: t3
     type(finalizable) :: x
   end type
-!CHECK: .dt.t3, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(name=.n.t3,sizeinbytes=0_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=NULL(),component=.c.t3,procptr=NULL(),specialbitset=0_4,hasparent=0_1,noinitializationneeded=1_1,nodestructionneeded=0_1,nofinalizationneeded=0_1)
+!CHECK: .dt.t3, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(name=.n.t3,sizeinbytes=0_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=NULL(),component=.c.t3,procptr=NULL(),specialbitset=0_4,hasparent=0_1,noinitializationneeded=1_1,nodestructionneeded=0_1,nofinalizationneeded=0_1,nodefinedassignment=1_1)
  contains
   impure elemental subroutine final(x)
     type(finalizable), intent(in out) :: x
diff --git a/flang/test/Semantics/typeinfo05.f90 b/flang/test/Semantics/typeinfo05.f90
index 2a7f12a153eb8..df1aecf3821de 100644
--- a/flang/test/Semantics/typeinfo05.f90
+++ b/flang/test/Semantics/typeinfo05.f90
@@ -7,10 +7,10 @@ program main
   type t1
     type(t2), pointer :: b
   end type t1
-!CHECK: .dt.t1, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(binding=NULL(),name=.n.t1,sizeinbytes=40_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=NULL(),component=.c.t1,procptr=NULL(),special=NULL(),specialbitset=0_4,hasparent=0_1,noinitializationneeded=0_1,nodestructionneeded=1_1,nofinalizationneeded=1_1)
+!CHECK: .dt.t1, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(binding=NULL(),name=.n.t1,sizeinbytes=40_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=NULL(),component=.c.t1,procptr=NULL(),special=NULL(),specialbitset=0_4,hasparent=0_1,noinitializationneeded=0_1,nodestructionneeded=1_1,nofinalizationneeded=1_1,nodefinedassignment=1_1)
   type :: t2
     type(t1) :: a
   end type t2
-! CHECK: .dt.t2, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(binding=NULL(),name=.n.t2,sizeinbytes=40_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=NULL(),component=.c.t2,procptr=NULL(),special=NULL(),specialbitset=0_4,hasparent=0_1,noinitializationneeded=0_1,nodestructionneeded=1_1,nofinalizationneeded=1_1)
+! CHECK: .dt.t2, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(binding=NULL(),name=.n.t2,sizeinbytes=40_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=NULL(),component=.c.t2,procptr=NULL(),special=NULL(),specialbitset=0_4,hasparent=0_1,noinitializationneeded=0_1,nodestructionneeded=1_1,nofinalizationneeded=1_1,nodefinedassignment=1_1)
 end program main
 
diff --git a/flang/test/Semantics/typeinfo06.f90 b/flang/test/Semantics/typeinfo06.f90
index 2385709a8eb44..22f37b1a4369d 100644
--- a/flang/test/Semantics/typeinfo06.f90
+++ b/flang/test/Semantics/typeinfo06.f90
@@ -7,10 +7,10 @@ program main
   type t1
     type(t2), allocatable :: b
   end type t1
-!CHECK: .dt.t1, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(binding=NULL(),name=.n.t1,sizeinbytes=40_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=NULL(),component=.c.t1,procptr=NULL(),special=NULL(),specialbitset=0_4,hasparent=0_1,noinitializationneeded=0_1,nodestructionneeded=0_1,nofinalizationneeded=1_1)
+!CHECK: .dt.t1, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(binding=NULL(),name=.n.t1,sizeinbytes=40_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=NULL(),component=.c.t1,procptr=NULL(),special=NULL(),specialbitset=0_4,hasparent=0_1,noinitializationneeded=0_1,nodestructionneeded=0_1,nofinalizationneeded=1_1,nodefinedassignment=1_1)
   type :: t2
     type(t1) :: a
   end type t2
-! CHECK: .dt.t2, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(binding=NULL(),name=.n.t2,sizeinbytes=40_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=NULL(),component=.c.t2,procptr=NULL(),special=NULL(),specialbitset=0_4,hasparent=0_1,noinitializationneeded=0_1,nodestructionneeded=0_1,nofinalizationneeded=1_1)
+! CHECK: .dt.t2, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(binding=NULL(),name=.n.t2,sizeinbytes=40_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=NULL(),component=.c.t2,procptr=NULL(),special=NULL(),specialbitset=0_4,hasparent=0_1,noinitializationneeded=0_1,nodestructionneeded=0_1,nofinalizationneeded=1_1,nodefinedassignment=1_1)
 end program main
 
diff --git a/flang/test/Semantics/typeinfo07.f90 b/flang/test/Semantics/typeinfo07.f90
index e8766d9811db8..ab20d6f601106 100644
--- a/flang/test/Semantics/typeinfo07.f90
+++ b/flang/test/Semantics/typeinfo07.f90
@@ -16,7 +16,7 @@
     type(t_container_extension) :: wrapper
   end type
 end
-! CHECK: .dt.t_container, SAVE, TARGET (CompilerCreated, ReadOnly): {{.*}}noinitializationneeded=0_1,nodestructionneeded=0_1,nofinalizationneeded=0_1)
-! CHECK: .dt.t_container_extension, SAVE, TARGET (CompilerCreated, ReadOnly): {{.*}}noinitializationneeded=0_1,nodestructionneeded=0_1,nofinalizationneeded=0_1)
-! CHECK: .dt.t_container_not_polymorphic, SAVE, TARGET (CompilerCreated, ReadOnly): {{.*}}noinitializationneeded=0_1,nodestructionneeded=0_1,nofinalizationneeded=1_1)
-! CHECK: .dt.t_container_wrapper, SAVE, TARGET (CompilerCreated, ReadOnly): {{.*}}noinitializationneeded=0_1,nodestructionneeded=0_1,nofinalizationneeded=0_1)
+! CHECK: .dt.t_container, SAVE, TARGET (CompilerCreated, ReadOnly): {{.*}}noinitializationneeded=0_1,nodestructionneeded=0_1,nofinalizationneeded=0_1,nodefinedassignment=0_1)
+! CHECK: .dt.t_container_extension, SAVE, TARGET (CompilerCreated, ReadOnly): {{.*}}noinitializationneeded=0_1,nodestructionneeded=0_1,nofinalizationneeded=0_1,nodefinedassignment=0_1)
+! CHECK: .dt.t_container_not_polymorphic, SAVE, TARGET (CompilerCreated, ReadOnly): {{.*}}noinitializationneeded=0_1,nodestructionneeded=0_1,nofinalizationneeded=1_1,nodefinedassignment=1_1)
+! CHECK: .dt.t_container_wrapper, SAVE, TARGET (CompilerCreated, ReadOnly): {{.*}}noinitializationneeded=0_1,nodestructionneeded=0_1,nofinalizationneeded=0_1,nodefinedassignment=0_1)
diff --git a/flang/test/Semantics/typeinfo08.f90 b/flang/test/Semantics/typeinfo08.f90
index 689cf469dee3b..391a66f3d6664 100644
--- a/flang/test/Semantics/typeinfo08.f90
+++ b/flang/test/Semantics/typeinfo08.f90
@@ -13,7 +13,7 @@ module m
 
 !CHECK: Module scope: m size=0 alignment=1 sourceRange=113 bytes
 !CHECK: .c.s, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(component) shape: 0_8:0_8 init:[component::component(name=.n.t1,genre=1_1,category=6_1,kind=0_1,rank=0_1,offset=0_8,characterlen=value(genre=1_1,value=0_8),lenvalue=NULL(),bounds=NULL(),initialization=NULL())]
-!CHECK: .dt.s, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(binding=NULL(),name=.n.s,sizeinbytes=0_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=.lpk.s,component=.c.s,procptr=NULL(),special=NULL(),specialbitset=0_4,hasparent=0_1,noinitializationneeded=1_1,nodestructionneeded=1_1,nofinalizationneeded=1_1)
+!CHECK: .dt.s, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(binding=NULL(),name=.n.s,sizeinbytes=0_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=.lpk.s,component=.c.s,procptr=NULL(),special=NULL(),specialbitset=0_4,hasparent=0_1,noinitializationneeded=1_1,nodestructionneeded=1_1,nofinalizationneeded=1_1,nodefinedassignment=1_1)
 !CHECK: .lpk.s, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: INTEGER(1) shape: 0_8:0_8 init:[INTEGER(1)::4_1]
 !CHECK: .n.s, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: CHARACTER(1_8,1) init:"s"
 !CHECK: .n.t1, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: CHARACTER(2_8,1) init:"t1"
diff --git a/flang/test/Semantics/typeinfo11.f90 b/flang/test/Semantics/typeinfo11.f90
index 92efc8f9ea54b..08e0b95abb763 100644
--- a/flang/test/Semantics/typeinfo11.f90
+++ b/flang/test/Semantics/typeinfo11.f90
@@ -14,4 +14,4 @@
 type(t2) x
 end
 
-!CHECK: .dt.t2, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(binding=NULL(),name=.n.t2,sizeinbytes=40_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=NULL(),component=.c.t2,procptr=NULL(),special=NULL(),specialbitset=0_4,hasparent=0_1,noinitializationneeded=0_1,nodestructionneeded=0_1,nofinalizationneeded=0_1)
+!CHECK: .dt.t2, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(binding=NULL(),name=.n.t2,sizeinbytes=40_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=NULL(),component=.c.t2,procptr=NULL(),special=NULL(),specialbitset=0_4,hasparent=0_1,noinitializationneeded=0_1,nodestructionneeded=0_1,nofinalizationneeded=0_1,nodefinedassignment=0_1)
diff --git a/flang/test/Semantics/typeinfo12.f90 b/flang/test/Semantics/typeinfo12.f90
new file mode 100644
index 0000000000000..6b23b63d28b1d
--- /dev/null
+++ b/flang/test/Semantics/typeinfo12.f90
@@ -0,0 +1,67 @@
+!RUN: bbc --dump-symbols %s | FileCheck %s
+!Check "nodefinedassignment" settings.
+
+module m01
+
+  type hasAsst1
+   contains
+    procedure asst1
+    generic :: assignment(=) => asst1
+  end type
+!CHECK: .dt.hasasst1, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(binding=.v.hasasst1,name=.n.hasasst1,sizeinbytes=0_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=NULL(),component=NULL(),procptr=NULL(),special=.s.hasasst1,specialbitset=4_4,hasparent=0_1,noinitializationneeded=1_1,nodestructionneeded=1_1,nofinalizationneeded=1_1,nodefinedassignment=0_1)
+
+  type hasAsst2 ! no defined assignment relevant to the runtime
+  end type
+  interface assignment(=)
+    procedure asst2
+  end interface
+!CHECK: .dt.hasasst2, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(binding=NULL(),name=.n.hasasst2,sizeinbytes=0_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=NULL(),component=NULL(),procptr=NULL(),special=NULL(),specialbitset=0_4,hasparent=0_1,noinitializationneeded=1_1,nodestructionneeded=1_1,nofinalizationneeded=1_1,nodefinedassignment=1_1)
+
+  type test1
+    type(hasAsst1) c
+  end type
+!CHECK: .dt.test1, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(binding=NULL(),name=.n.test1,sizeinbytes=0_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=NULL(),component=.c.test1,procptr=NULL(),special=NULL(),specialbitset=0_4,hasparent=0_1,noinitializationneeded=1_1,nodestructionneeded=1_1,nofinalizationneeded=1_1,nodefinedassignment=0_1)
+
+  type test2
+    type(hasAsst2) c
+  end type
+!CHECK: .dt.test2, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(binding=NULL(),name=.n.test2,sizeinbytes=0_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=NULL(),component=.c.test2,procptr=NULL(),special=NULL(),specialbitset=0_4,hasparent=0_1,noinitializationneeded=1_1,nodestructionneeded=1_1,nofinalizationneeded=1_1,nodefinedassignment=1_1)
+
+  type test3
+    type(hasAsst1), pointer :: p
+  end type
+!CHECK: .dt.test3, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(binding=NULL(),name=.n.test3,sizeinbytes=40_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=NULL(),component=.c.test3,procptr=NULL(),special=NULL(),specialbitset=0_4,hasparent=0_1,noinitializationneeded=0_1,nodestructionneeded=1_1,nofinalizationneeded=1_1,nodefinedassignment=1_1)
+
+  type test4
+    type(hasAsst2), pointer :: p
+  end type
+!CHECK: .dt.test4, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(binding=NULL(),name=.n.test4,sizeinbytes=40_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=NULL(),component=.c.test4,procptr=NULL(),special=NULL(),specialbitset=0_4,hasparent=0_1,noinitializationneeded=0_1,nodestructionneeded=1_1,nofinalizationneeded=1_1,nodefinedassignment=1_1)
+
+  type, extends(hasAsst1) :: test5
+  end type
+!CHECK: .dt.test5, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(binding=.v.test5,name=.n.test5,sizeinbytes=0_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=NULL(),component=.c.test5,procptr=NULL(),special=.s.test5,specialbitset=4_4,hasparent=1_1,noinitializationneeded=1_1,nodestructionneeded=1_1,nofinalizationneeded=1_1,nodefinedassignment=0_1)
+
+  type, extends(hasAsst2) :: test6
+  end type
+!CHECK: .dt.test6, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(binding=NULL(),name=.n.test6,sizeinbytes=0_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=NULL(),component=.c.test6,procptr=NULL(),special=NULL(),specialbitset=0_4,hasparent=1_1,noinitializationneeded=1_1,nodestructionneeded=1_1,nofinalizationneeded=1_1,nodefinedassignment=1_1)
+
+  type test7
+    type(test7), allocatable :: c
+  end type
+!CHECK: .dt.test7, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(binding=NULL(),name=.n.test7,sizeinbytes=40_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=NULL(),component=.c.test7,procptr=NULL(),special=NULL(),specialbitset=0_4,hasparent=0_1,noinitializationneeded=0_1,nodestructionneeded=0_1,nofinalizationneeded=1_1,nodefinedassignment=1_1)
+
+  type test8
+    class(test8), allocatable :: c
+  end type
+!CHECK: .dt.test8, SAVE, TARGET (CompilerCreated, ReadOnly): ObjectEntity type: TYPE(derivedtype) init:derivedtype(binding=NULL(),name=.n.test8,sizeinbytes=40_8,uninstantiated=NULL(),kindparameter=NULL(),lenparameterkind=NULL(),component=.c.test8,procptr=NULL(),special=NULL(),specialbitset=0_4,hasparent=0_1,noinitializationneeded=0_1,nodestructionneeded=0_1,nofinalizationneeded=0_1,nodefinedassignment=0_1)
+
+ contains
+  impure elemental subroutine asst1(left, right)
+    class(hasAsst1), intent(out) :: left
+    class(hasAsst1), intent(in) :: right
+  end
+  impure elemental subroutine asst2(left, right)
+    class(hasAsst2), intent(out) :: left
+    class(hasAsst2), intent(in) :: right
+  end
+end



More information about the llvm-commits mailing list