[flang] [llvm] [flang-rt] Added ShallowCopy API. (PR #131702)

Slava Zakharin via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 17 18:00:25 PDT 2025


https://github.com/vzakhari created https://github.com/llvm/llvm-project/pull/131702

This API will be used for copying non-contiguous arrays
into contiguous temporaries to support `-frepack-arrays`.
The builder factory API will be used in the following commits.


>From 6706ef865ade9c8f4333c51bbfbea17e6586c6c7 Mon Sep 17 00:00:00 2001
From: Slava Zakharin <szakharin at nvidia.com>
Date: Fri, 14 Mar 2025 19:50:55 -0700
Subject: [PATCH] [flang-rt] Added ShallowCopy API.

This API will be used for copying non-contiguous arrays
into contiguous temporaries to support `-frepack-arrays`.
The builder factory API will be used in the following commits.
---
 flang-rt/lib/runtime/transformational.cpp     | 79 +++++++++++++++++++
 .../unittests/Runtime/Transformational.cpp    | 64 +++++++++++++++
 flang/docs/ArrayRepacking.md                  | 17 +++-
 .../Builder/Runtime/Transformational.h        |  4 +
 .../include/flang/Runtime/transformational.h  | 21 +++++
 .../Builder/Runtime/Transformational.cpp      | 20 +++++
 6 files changed, 204 insertions(+), 1 deletion(-)

diff --git a/flang-rt/lib/runtime/transformational.cpp b/flang-rt/lib/runtime/transformational.cpp
index eb694a9f2c833..b0f62bae7fbcf 100644
--- a/flang-rt/lib/runtime/transformational.cpp
+++ b/flang-rt/lib/runtime/transformational.cpp
@@ -21,6 +21,7 @@
 #include "flang-rt/runtime/descriptor.h"
 #include "flang-rt/runtime/terminator.h"
 #include "flang-rt/runtime/tools.h"
+#include "flang-rt/runtime/type-info.h"
 #include "flang/Common/float128.h"
 
 namespace Fortran::runtime {
@@ -323,6 +324,71 @@ static inline RT_API_ATTRS void DoBesselYnX0(Descriptor &result, int32_t n1,
   }
 }
 
+static inline RT_API_ATTRS void CheckConformabilityForShallowCopy(
+    const Descriptor &d1, const Descriptor &d2, Terminator &terminator,
+    const char *funcName, const char *d1Name, const char *d2Name) {
+  if (d1.rank() != d2.rank()) {
+    terminator.Crash(
+        "Incompatible arguments to %s: %s has rank %d, %s has rank %d",
+        funcName, d1Name, d1.rank(), d1Name, d2.rank());
+  }
+
+  // Check that the shapes conform.
+  CheckConformability(d1, d2, terminator, funcName, d1Name, d2Name);
+
+  if (d1.ElementBytes() != d2.ElementBytes()) {
+    terminator.Crash("Incompatible arguments to %s: %s has element byte length "
+                     "%zd, %s has length %zd",
+        funcName, d1Name, d1.ElementBytes(), d2Name, d2.ElementBytes());
+  }
+  if (d1.type() != d2.type()) {
+    terminator.Crash("Incompatible arguments to %s: %s has type code %d, %s "
+                     "has type code %d",
+        funcName, d1Name, d1.type(), d2Name, d2.type());
+  }
+  const DescriptorAddendum *d1Addendum{d1.Addendum()};
+  const typeInfo::DerivedType *d1Derived{
+      d1Addendum ? d1Addendum->derivedType() : nullptr};
+  const DescriptorAddendum *d2Addendum{d2.Addendum()};
+  const typeInfo::DerivedType *d2Derived{
+      d2Addendum ? d2Addendum->derivedType() : nullptr};
+  if (d1Derived != d2Derived) {
+    terminator.Crash(
+        "Incompatible arguments to %s: %s and %s have different derived types",
+        funcName, d1Name, d2Name);
+  }
+  if (d2Derived) {
+    // Compare LEN parameters.
+    std::size_t lenParms{d2Derived->LenParameters()};
+    for (std::size_t j{0}; j < lenParms; ++j) {
+      if (d1Addendum->LenParameterValue(j) !=
+          d2Addendum->LenParameterValue(j)) {
+        terminator.Crash("Incompatible arguments to %s: type length parameter "
+                         "%zd for %s is %zd, for %s is %zd",
+            funcName, j, d1Name,
+            static_cast<std::size_t>(d1Addendum->LenParameterValue(j)), d2Name,
+            static_cast<std::size_t>(d2Addendum->LenParameterValue(j)));
+      }
+    }
+  }
+}
+
+template <bool IS_ALLOCATING>
+static inline RT_API_ATTRS void DoShallowCopy(
+    std::conditional_t<IS_ALLOCATING, Descriptor, const Descriptor> &result,
+    const Descriptor &source, Terminator &terminator, const char *funcName) {
+  if constexpr (IS_ALLOCATING) {
+    SubscriptValue extent[maxRank];
+    source.GetShape(extent);
+    AllocateResult(result, source, source.rank(), extent, terminator, funcName);
+  } else {
+    CheckConformabilityForShallowCopy(
+        result, source, terminator, funcName, "RESULT=", "SOURCE=");
+  }
+
+  ShallowCopy(result, source);
+}
+
 extern "C" {
 RT_EXT_API_GROUP_BEGIN
 
@@ -815,6 +881,19 @@ void RTDEF(Reshape)(Descriptor &result, const Descriptor &source,
   }
 }
 
+// ShallowCopy
+void RTDEF(ShallowCopy)(Descriptor &result, const Descriptor &source,
+    const char *sourceFile, int line) {
+  Terminator terminator{sourceFile, line};
+  DoShallowCopy<true>(result, source, terminator, "ShallowCopy");
+}
+
+void RTDEF(ShallowCopyDirect)(const Descriptor &result,
+    const Descriptor &source, const char *sourceFile, int line) {
+  Terminator terminator{sourceFile, line};
+  DoShallowCopy<false>(result, source, terminator, "ShallowCopyDirect");
+}
+
 // SPREAD
 void RTDEF(Spread)(Descriptor &result, const Descriptor &source, int dim,
     std::int64_t ncopies, const char *sourceFile, int line) {
diff --git a/flang-rt/unittests/Runtime/Transformational.cpp b/flang-rt/unittests/Runtime/Transformational.cpp
index 06df96a3cc45a..e61f5a64fcdb0 100644
--- a/flang-rt/unittests/Runtime/Transformational.cpp
+++ b/flang-rt/unittests/Runtime/Transformational.cpp
@@ -550,3 +550,67 @@ TEST(Transformational, TransposeReal10) {
   result.Destroy();
 }
 #endif
+
+TEST(Transformational, ShallowCopy) {
+  auto charArray{MakeArray<TypeCategory::Character, 1>(std::vector<int>{2, 3},
+      std::vector<std::string>{"ab", "cd", "ef", "gh", "ij", "kl"}, 2)};
+  charArray->GetDimension(0).SetBounds(-1, 0);
+  charArray->GetDimension(1).SetBounds(3, 5);
+  StaticDescriptor<2> staticCharResult;
+  Descriptor &charResult{staticCharResult.descriptor()};
+
+  // Test allocating ShallowCopy.
+  RTNAME(ShallowCopy)(charResult, *charArray);
+  ASSERT_TRUE(charResult.IsAllocated());
+  ASSERT_TRUE(charResult.IsContiguous());
+  ASSERT_EQ(charResult.type(), charArray->type());
+  ASSERT_EQ(charResult.ElementBytes(), 2u);
+  EXPECT_EQ(charResult.GetDimension(0).LowerBound(), 1);
+  EXPECT_EQ(charResult.GetDimension(0).Extent(), 2);
+  EXPECT_EQ(charResult.GetDimension(1).LowerBound(), 1);
+  EXPECT_EQ(charResult.GetDimension(1).Extent(), 3);
+  std::string expectedCharResult{"abcdefghijkl"};
+  EXPECT_EQ(std::memcmp(charResult.OffsetElement<char>(0),
+                expectedCharResult.data(), expectedCharResult.length()),
+      0);
+
+  // Test ShallowCopyDirect with pre-allocated result.
+  char *allocatedPtr = charResult.OffsetElement<char>(0);
+  std::memset(
+      charResult.OffsetElement<char>(0), 'x', expectedCharResult.length());
+  // Set new lower bounds for charResult.
+  charResult.GetDimension(0).SetBounds(-2, -1);
+  charResult.GetDimension(1).SetBounds(2, 4);
+  RTNAME(ShallowCopyDirect)(charResult, *charArray);
+  ASSERT_TRUE(charResult.IsAllocated());
+  ASSERT_TRUE(charResult.IsContiguous());
+  ASSERT_EQ(charResult.type(), charArray->type());
+  ASSERT_EQ(charResult.ElementBytes(), 2u);
+  EXPECT_EQ(charResult.GetDimension(0).LowerBound(), -2);
+  EXPECT_EQ(charResult.GetDimension(0).Extent(), 2);
+  EXPECT_EQ(charResult.GetDimension(1).LowerBound(), 2);
+  EXPECT_EQ(charResult.GetDimension(1).Extent(), 3);
+  // Test that the result was not re-allocated.
+  EXPECT_EQ(allocatedPtr, charResult.OffsetElement<char>(0));
+  EXPECT_EQ(std::memcmp(charResult.OffsetElement<char>(0),
+                expectedCharResult.data(), expectedCharResult.length()),
+      0);
+  charResult.Destroy();
+
+  auto intScalar{MakeArray<TypeCategory::Integer, 4>(
+      std::vector<int>{}, std::vector<std::int32_t>{-1})};
+  StaticDescriptor<0> staticIntResult;
+  Descriptor &intResult{staticIntResult.descriptor()};
+  RTNAME(ShallowCopy)(intResult, *intScalar);
+  ASSERT_TRUE(intResult.IsAllocated());
+  ASSERT_EQ(intResult.rank(), 0);
+  ASSERT_EQ(*intResult.ZeroBasedIndexedElement<std::int32_t>(0), -1);
+  *intResult.ZeroBasedIndexedElement<std::int32_t>(0) = 0;
+  allocatedPtr = intResult.OffsetElement<char>(0);
+  RTNAME(ShallowCopyDirect)(intResult, *intScalar);
+  ASSERT_TRUE(intResult.IsAllocated());
+  ASSERT_EQ(intResult.rank(), 0);
+  ASSERT_EQ(*intResult.ZeroBasedIndexedElement<std::int32_t>(0), -1);
+  EXPECT_EQ(allocatedPtr, intResult.OffsetElement<char>(0));
+  intResult.Destroy();
+}
diff --git a/flang/docs/ArrayRepacking.md b/flang/docs/ArrayRepacking.md
index 544904fd968cc..18a6fee4e01ee 100755
--- a/flang/docs/ArrayRepacking.md
+++ b/flang/docs/ArrayRepacking.md
@@ -400,7 +400,22 @@ Lowering of the new operations (after all the optimizations) might be done in a
 
 ### Runtime
 
-[TBD] define the runtime APIs.
+The array copies required for `pack/unpack` actions are done using `ShallowCopyDirect` API of flang-rt.
+
+```C++
+void RTDECL(ShallowCopyDirect)(
+    const Descriptor &result,
+    const Descriptor &source,
+    const char *sourceFile = nullptr,
+    int line = 0);
+```
+
+It copies values from `source` array into the pre-allocated `result` array. The semantics is different from the `Assign` runtime for derived types, because it does not perform the recursive assign actions for the components of derived types.
+
+The arrays must be conforming, i.e. they must have:
+  * Same rank.
+  * Same extents.
+  * Same size and type of elements (including the type parameters).
 
 ### Optimization passes
 
diff --git a/flang/include/flang/Optimizer/Builder/Runtime/Transformational.h b/flang/include/flang/Optimizer/Builder/Runtime/Transformational.h
index ae0a0979902f5..eda1a7f86f6bb 100644
--- a/flang/include/flang/Optimizer/Builder/Runtime/Transformational.h
+++ b/flang/include/flang/Optimizer/Builder/Runtime/Transformational.h
@@ -63,6 +63,10 @@ void genPack(fir::FirOpBuilder &builder, mlir::Location loc,
              mlir::Value resultBox, mlir::Value arrayBox, mlir::Value maskBox,
              mlir::Value vectorBox);
 
+void genShallowCopy(fir::FirOpBuilder &builder, mlir::Location loc,
+                    mlir::Value resultBox, mlir::Value arrayBox,
+                    bool resultIsAllocated);
+
 void genReshape(fir::FirOpBuilder &builder, mlir::Location loc,
                 mlir::Value resultBox, mlir::Value sourceBox,
                 mlir::Value shapeBox, mlir::Value padBox, mlir::Value orderBox);
diff --git a/flang/include/flang/Runtime/transformational.h b/flang/include/flang/Runtime/transformational.h
index d04fa68597883..3393ed1dc9813 100644
--- a/flang/include/flang/Runtime/transformational.h
+++ b/flang/include/flang/Runtime/transformational.h
@@ -146,6 +146,27 @@ void RTDECL(Pack)(Descriptor &result, const Descriptor &source,
     const Descriptor &mask, const Descriptor *vector = nullptr,
     const char *sourceFile = nullptr, int line = 0);
 
+/// Produce a shallow copy of the \p source in \p result.
+/// The \p source may have any type and rank.
+/// Unless \p source is unallocated, the \p result will
+/// be allocated using the same shape and dynamic type,
+/// and will contain the same top-level values as the \p source.
+/// The \p result will have the default lower bounds, if it is an array.
+/// As the name suggests, it is different from the Assign runtime,
+/// because it does not perform recursive assign actions
+/// for the components of the derived types.
+void RTDECL(ShallowCopy)(Descriptor &result, const Descriptor &source,
+    const char *sourceFile = nullptr, int line = 0);
+
+/// Same as ShallowCopy, where the caller provides a pre-allocated
+/// \p result. The \p source and \p result must be conforming:
+///   * Same rank.
+///   * Same extents.
+///   * Same size and type of elements (including the type parameters).
+/// If \p result is an array, its lower bounds are not affected.
+void RTDECL(ShallowCopyDirect)(const Descriptor &result,
+    const Descriptor &source, const char *sourceFile = nullptr, int line = 0);
+
 void RTDECL(Spread)(Descriptor &result, const Descriptor &source, int dim,
     std::int64_t ncopies, const char *sourceFile = nullptr, int line = 0);
 
diff --git a/flang/lib/Optimizer/Builder/Runtime/Transformational.cpp b/flang/lib/Optimizer/Builder/Runtime/Transformational.cpp
index 978524494af9b..47744b0facb53 100644
--- a/flang/lib/Optimizer/Builder/Runtime/Transformational.cpp
+++ b/flang/lib/Optimizer/Builder/Runtime/Transformational.cpp
@@ -474,6 +474,26 @@ void fir::runtime::genReshape(fir::FirOpBuilder &builder, mlir::Location loc,
   builder.create<fir::CallOp>(loc, func, args);
 }
 
+/// Generate call to ShallowCopy[Direct] runtime routine.
+/// ShallowCopyDirect is used iff \p resultIsAllocated is true.
+void fir::runtime::genShallowCopy(fir::FirOpBuilder &builder,
+                                  mlir::Location loc, mlir::Value resultBox,
+                                  mlir::Value arrayBox,
+                                  bool resultIsAllocated) {
+  auto packFunc =
+      resultIsAllocated
+          ? fir::runtime::getRuntimeFunc<mkRTKey(ShallowCopyDirect)>(loc,
+                                                                     builder)
+          : fir::runtime::getRuntimeFunc<mkRTKey(ShallowCopy)>(loc, builder);
+  auto fTy = packFunc.getFunctionType();
+  auto sourceFile = fir::factory::locationToFilename(builder, loc);
+  auto sourceLine =
+      fir::factory::locationToLineNo(builder, loc, fTy.getInput(3));
+  auto args = fir::runtime::createArguments(builder, loc, fTy, resultBox,
+                                            arrayBox, sourceFile, sourceLine);
+  builder.create<fir::CallOp>(loc, packFunc, args);
+}
+
 /// Generate call to Spread intrinsic runtime routine.
 void fir::runtime::genSpread(fir::FirOpBuilder &builder, mlir::Location loc,
                              mlir::Value resultBox, mlir::Value sourceBox,



More information about the llvm-commits mailing list