[flang-commits] [flang] [flang][runtime] Invert component/element loops in assignment (PR #78341)
Peter Klausler via flang-commits
flang-commits at lists.llvm.org
Tue Jan 16 12:04:33 PST 2024
https://github.com/klausler created https://github.com/llvm/llvm-project/pull/78341
The general implementation of intrinsic assignment of derived types in the runtime support library has a doubly-nested loop: an outer loop that traverses the components and inner loops than traverse the array elements. It's done this way to amortize the per-component overhead. However, this turns out to be wrong when the program cares about the order in which defined assignment subroutines are called; the Fortran standard allows less latitude here than we need to invert the ordering in this way when any component is itself an array. So invert the two loops: traverse the array elements, and for each element, traverse its components.
>From 5c25100589a4ae761b5119322ae2f1bcdce002e9 Mon Sep 17 00:00:00 2001
From: Peter Klausler <pklausler at nvidia.com>
Date: Tue, 16 Jan 2024 11:57:45 -0800
Subject: [PATCH] [flang][runtime] Invert component/element loops in assignment
The general implementation of intrinsic assignment of derived types
in the runtime support library has a doubly-nested loop: an outer
loop that traverses the components and inner loops than traverse
the array elements. It's done this way to amortize the per-component
overhead. However, this turns out to be wrong when the program cares
about the order in which defined assignment subroutines are called;
the Fortran standard allows less latitude here than we need to invert
the ordering in this way when any component is itself an array.
So invert the two loops: traverse the array elements, and for each
element, traverse its components.
---
flang/runtime/assign.cpp | 82 ++++++++++++++++++----------------------
1 file changed, 36 insertions(+), 46 deletions(-)
diff --git a/flang/runtime/assign.cpp b/flang/runtime/assign.cpp
index 237acb0c89fc2e..879b413efe1270 100644
--- a/flang/runtime/assign.cpp
+++ b/flang/runtime/assign.cpp
@@ -393,53 +393,45 @@ RT_API_ATTRS static void Assign(
// Copy the data components (incl. the parent) first.
const Descriptor &componentDesc{updatedToDerived->component()};
std::size_t numComponents{componentDesc.Elements()};
- for (std::size_t k{0}; k < numComponents; ++k) {
- const auto &comp{
- *componentDesc.ZeroBasedIndexedElement<typeInfo::Component>(
- k)}; // TODO: exploit contiguity here
- // Use PolymorphicLHS for components so that the right things happen
- // when the components are polymorphic; when they're not, they're both
- // not, and their declared types will match.
- int nestedFlags{MaybeReallocate | PolymorphicLHS};
- if (flags & ComponentCanBeDefinedAssignment) {
- nestedFlags |= CanBeDefinedAssignment | ComponentCanBeDefinedAssignment;
- }
- switch (comp.genre()) {
- case typeInfo::Component::Genre::Data:
- if (comp.category() == TypeCategory::Derived) {
- StaticDescriptor<maxRank, true, 10 /*?*/> statDesc[2];
- Descriptor &toCompDesc{statDesc[0].descriptor()};
- Descriptor &fromCompDesc{statDesc[1].descriptor()};
- for (std::size_t j{0}; j < toElements; ++j,
- to.IncrementSubscripts(toAt), from.IncrementSubscripts(fromAt)) {
+ for (std::size_t j{0}; j < toElements;
+ ++j, to.IncrementSubscripts(toAt), from.IncrementSubscripts(fromAt)) {
+ for (std::size_t k{0}; k < numComponents; ++k) {
+ const auto &comp{
+ *componentDesc.ZeroBasedIndexedElement<typeInfo::Component>(
+ k)}; // TODO: exploit contiguity here
+ // Use PolymorphicLHS for components so that the right things happen
+ // when the components are polymorphic; when they're not, they're both
+ // not, and their declared types will match.
+ int nestedFlags{MaybeReallocate | PolymorphicLHS};
+ if (flags & ComponentCanBeDefinedAssignment) {
+ nestedFlags |=
+ CanBeDefinedAssignment | ComponentCanBeDefinedAssignment;
+ }
+ switch (comp.genre()) {
+ case typeInfo::Component::Genre::Data:
+ if (comp.category() == TypeCategory::Derived) {
+ StaticDescriptor<maxRank, true, 10 /*?*/> statDesc[2];
+ Descriptor &toCompDesc{statDesc[0].descriptor()};
+ Descriptor &fromCompDesc{statDesc[1].descriptor()};
comp.CreatePointerDescriptor(toCompDesc, to, terminator, toAt);
comp.CreatePointerDescriptor(
fromCompDesc, from, terminator, fromAt);
Assign(toCompDesc, fromCompDesc, terminator, nestedFlags);
- }
- } else { // Component has intrinsic type; simply copy raw bytes
- std::size_t componentByteSize{comp.SizeInBytes(to)};
- for (std::size_t j{0}; j < toElements; ++j,
- to.IncrementSubscripts(toAt), from.IncrementSubscripts(fromAt)) {
+ } else { // Component has intrinsic type; simply copy raw bytes
+ std::size_t componentByteSize{comp.SizeInBytes(to)};
Fortran::runtime::memmove(to.Element<char>(toAt) + comp.offset(),
from.Element<const char>(fromAt) + comp.offset(),
componentByteSize);
}
- }
- break;
- case typeInfo::Component::Genre::Pointer: {
- std::size_t componentByteSize{comp.SizeInBytes(to)};
- for (std::size_t j{0}; j < toElements; ++j,
- to.IncrementSubscripts(toAt), from.IncrementSubscripts(fromAt)) {
+ break;
+ case typeInfo::Component::Genre::Pointer: {
+ std::size_t componentByteSize{comp.SizeInBytes(to)};
Fortran::runtime::memmove(to.Element<char>(toAt) + comp.offset(),
from.Element<const char>(fromAt) + comp.offset(),
componentByteSize);
- }
- } break;
- case typeInfo::Component::Genre::Allocatable:
- case typeInfo::Component::Genre::Automatic:
- for (std::size_t j{0}; j < toElements; ++j,
- to.IncrementSubscripts(toAt), from.IncrementSubscripts(fromAt)) {
+ } break;
+ case typeInfo::Component::Genre::Allocatable:
+ case typeInfo::Component::Genre::Automatic: {
auto *toDesc{reinterpret_cast<Descriptor *>(
to.Element<char>(toAt) + comp.offset())};
const auto *fromDesc{reinterpret_cast<const Descriptor *>(
@@ -470,18 +462,16 @@ RT_API_ATTRS static void Assign(
// The actual deallocation may be avoided, if the existing
// location can be reoccupied.
Assign(*toDesc, *fromDesc, terminator, nestedFlags | DeallocateLHS);
+ } break;
}
- break;
}
- }
- // Copy procedure pointer components
- const Descriptor &procPtrDesc{updatedToDerived->procPtr()};
- std::size_t numProcPtrs{procPtrDesc.Elements()};
- for (std::size_t k{0}; k < numProcPtrs; ++k) {
- const auto &procPtr{
- *procPtrDesc.ZeroBasedIndexedElement<typeInfo::ProcPtrComponent>(k)};
- for (std::size_t j{0}; j < toElements; ++j, to.IncrementSubscripts(toAt),
- from.IncrementSubscripts(fromAt)) {
+ // Copy procedure pointer components
+ const Descriptor &procPtrDesc{updatedToDerived->procPtr()};
+ std::size_t numProcPtrs{procPtrDesc.Elements()};
+ for (std::size_t k{0}; k < numProcPtrs; ++k) {
+ const auto &procPtr{
+ *procPtrDesc.ZeroBasedIndexedElement<typeInfo::ProcPtrComponent>(
+ k)};
Fortran::runtime::memmove(to.Element<char>(toAt) + procPtr.offset,
from.Element<const char>(fromAt) + procPtr.offset,
sizeof(typeInfo::ProcedurePointer));
More information about the flang-commits
mailing list