[flang] [llvm] [flang-rt] Added ShallowCopy API. (PR #131702)
Slava Zakharin via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 17 18:00:25 PDT 2025
https://github.com/vzakhari created https://github.com/llvm/llvm-project/pull/131702
This API will be used for copying non-contiguous arrays
into contiguous temporaries to support `-frepack-arrays`.
The builder factory API will be used in the following commits.
>From 6706ef865ade9c8f4333c51bbfbea17e6586c6c7 Mon Sep 17 00:00:00 2001
From: Slava Zakharin <szakharin at nvidia.com>
Date: Fri, 14 Mar 2025 19:50:55 -0700
Subject: [PATCH] [flang-rt] Added ShallowCopy API.
This API will be used for copying non-contiguous arrays
into contiguous temporaries to support `-frepack-arrays`.
The builder factory API will be used in the following commits.
---
flang-rt/lib/runtime/transformational.cpp | 79 +++++++++++++++++++
.../unittests/Runtime/Transformational.cpp | 64 +++++++++++++++
flang/docs/ArrayRepacking.md | 17 +++-
.../Builder/Runtime/Transformational.h | 4 +
.../include/flang/Runtime/transformational.h | 21 +++++
.../Builder/Runtime/Transformational.cpp | 20 +++++
6 files changed, 204 insertions(+), 1 deletion(-)
diff --git a/flang-rt/lib/runtime/transformational.cpp b/flang-rt/lib/runtime/transformational.cpp
index eb694a9f2c833..b0f62bae7fbcf 100644
--- a/flang-rt/lib/runtime/transformational.cpp
+++ b/flang-rt/lib/runtime/transformational.cpp
@@ -21,6 +21,7 @@
#include "flang-rt/runtime/descriptor.h"
#include "flang-rt/runtime/terminator.h"
#include "flang-rt/runtime/tools.h"
+#include "flang-rt/runtime/type-info.h"
#include "flang/Common/float128.h"
namespace Fortran::runtime {
@@ -323,6 +324,71 @@ static inline RT_API_ATTRS void DoBesselYnX0(Descriptor &result, int32_t n1,
}
}
+static inline RT_API_ATTRS void CheckConformabilityForShallowCopy(
+ const Descriptor &d1, const Descriptor &d2, Terminator &terminator,
+ const char *funcName, const char *d1Name, const char *d2Name) {
+ if (d1.rank() != d2.rank()) {
+ terminator.Crash(
+ "Incompatible arguments to %s: %s has rank %d, %s has rank %d",
+ funcName, d1Name, d1.rank(), d1Name, d2.rank());
+ }
+
+ // Check that the shapes conform.
+ CheckConformability(d1, d2, terminator, funcName, d1Name, d2Name);
+
+ if (d1.ElementBytes() != d2.ElementBytes()) {
+ terminator.Crash("Incompatible arguments to %s: %s has element byte length "
+ "%zd, %s has length %zd",
+ funcName, d1Name, d1.ElementBytes(), d2Name, d2.ElementBytes());
+ }
+ if (d1.type() != d2.type()) {
+ terminator.Crash("Incompatible arguments to %s: %s has type code %d, %s "
+ "has type code %d",
+ funcName, d1Name, d1.type(), d2Name, d2.type());
+ }
+ const DescriptorAddendum *d1Addendum{d1.Addendum()};
+ const typeInfo::DerivedType *d1Derived{
+ d1Addendum ? d1Addendum->derivedType() : nullptr};
+ const DescriptorAddendum *d2Addendum{d2.Addendum()};
+ const typeInfo::DerivedType *d2Derived{
+ d2Addendum ? d2Addendum->derivedType() : nullptr};
+ if (d1Derived != d2Derived) {
+ terminator.Crash(
+ "Incompatible arguments to %s: %s and %s have different derived types",
+ funcName, d1Name, d2Name);
+ }
+ if (d2Derived) {
+ // Compare LEN parameters.
+ std::size_t lenParms{d2Derived->LenParameters()};
+ for (std::size_t j{0}; j < lenParms; ++j) {
+ if (d1Addendum->LenParameterValue(j) !=
+ d2Addendum->LenParameterValue(j)) {
+ terminator.Crash("Incompatible arguments to %s: type length parameter "
+ "%zd for %s is %zd, for %s is %zd",
+ funcName, j, d1Name,
+ static_cast<std::size_t>(d1Addendum->LenParameterValue(j)), d2Name,
+ static_cast<std::size_t>(d2Addendum->LenParameterValue(j)));
+ }
+ }
+ }
+}
+
+template <bool IS_ALLOCATING>
+static inline RT_API_ATTRS void DoShallowCopy(
+ std::conditional_t<IS_ALLOCATING, Descriptor, const Descriptor> &result,
+ const Descriptor &source, Terminator &terminator, const char *funcName) {
+ if constexpr (IS_ALLOCATING) {
+ SubscriptValue extent[maxRank];
+ source.GetShape(extent);
+ AllocateResult(result, source, source.rank(), extent, terminator, funcName);
+ } else {
+ CheckConformabilityForShallowCopy(
+ result, source, terminator, funcName, "RESULT=", "SOURCE=");
+ }
+
+ ShallowCopy(result, source);
+}
+
extern "C" {
RT_EXT_API_GROUP_BEGIN
@@ -815,6 +881,19 @@ void RTDEF(Reshape)(Descriptor &result, const Descriptor &source,
}
}
+// ShallowCopy
+void RTDEF(ShallowCopy)(Descriptor &result, const Descriptor &source,
+ const char *sourceFile, int line) {
+ Terminator terminator{sourceFile, line};
+ DoShallowCopy<true>(result, source, terminator, "ShallowCopy");
+}
+
+void RTDEF(ShallowCopyDirect)(const Descriptor &result,
+ const Descriptor &source, const char *sourceFile, int line) {
+ Terminator terminator{sourceFile, line};
+ DoShallowCopy<false>(result, source, terminator, "ShallowCopyDirect");
+}
+
// SPREAD
void RTDEF(Spread)(Descriptor &result, const Descriptor &source, int dim,
std::int64_t ncopies, const char *sourceFile, int line) {
diff --git a/flang-rt/unittests/Runtime/Transformational.cpp b/flang-rt/unittests/Runtime/Transformational.cpp
index 06df96a3cc45a..e61f5a64fcdb0 100644
--- a/flang-rt/unittests/Runtime/Transformational.cpp
+++ b/flang-rt/unittests/Runtime/Transformational.cpp
@@ -550,3 +550,67 @@ TEST(Transformational, TransposeReal10) {
result.Destroy();
}
#endif
+
+TEST(Transformational, ShallowCopy) {
+ auto charArray{MakeArray<TypeCategory::Character, 1>(std::vector<int>{2, 3},
+ std::vector<std::string>{"ab", "cd", "ef", "gh", "ij", "kl"}, 2)};
+ charArray->GetDimension(0).SetBounds(-1, 0);
+ charArray->GetDimension(1).SetBounds(3, 5);
+ StaticDescriptor<2> staticCharResult;
+ Descriptor &charResult{staticCharResult.descriptor()};
+
+ // Test allocating ShallowCopy.
+ RTNAME(ShallowCopy)(charResult, *charArray);
+ ASSERT_TRUE(charResult.IsAllocated());
+ ASSERT_TRUE(charResult.IsContiguous());
+ ASSERT_EQ(charResult.type(), charArray->type());
+ ASSERT_EQ(charResult.ElementBytes(), 2u);
+ EXPECT_EQ(charResult.GetDimension(0).LowerBound(), 1);
+ EXPECT_EQ(charResult.GetDimension(0).Extent(), 2);
+ EXPECT_EQ(charResult.GetDimension(1).LowerBound(), 1);
+ EXPECT_EQ(charResult.GetDimension(1).Extent(), 3);
+ std::string expectedCharResult{"abcdefghijkl"};
+ EXPECT_EQ(std::memcmp(charResult.OffsetElement<char>(0),
+ expectedCharResult.data(), expectedCharResult.length()),
+ 0);
+
+ // Test ShallowCopyDirect with pre-allocated result.
+ char *allocatedPtr = charResult.OffsetElement<char>(0);
+ std::memset(
+ charResult.OffsetElement<char>(0), 'x', expectedCharResult.length());
+ // Set new lower bounds for charResult.
+ charResult.GetDimension(0).SetBounds(-2, -1);
+ charResult.GetDimension(1).SetBounds(2, 4);
+ RTNAME(ShallowCopyDirect)(charResult, *charArray);
+ ASSERT_TRUE(charResult.IsAllocated());
+ ASSERT_TRUE(charResult.IsContiguous());
+ ASSERT_EQ(charResult.type(), charArray->type());
+ ASSERT_EQ(charResult.ElementBytes(), 2u);
+ EXPECT_EQ(charResult.GetDimension(0).LowerBound(), -2);
+ EXPECT_EQ(charResult.GetDimension(0).Extent(), 2);
+ EXPECT_EQ(charResult.GetDimension(1).LowerBound(), 2);
+ EXPECT_EQ(charResult.GetDimension(1).Extent(), 3);
+ // Test that the result was not re-allocated.
+ EXPECT_EQ(allocatedPtr, charResult.OffsetElement<char>(0));
+ EXPECT_EQ(std::memcmp(charResult.OffsetElement<char>(0),
+ expectedCharResult.data(), expectedCharResult.length()),
+ 0);
+ charResult.Destroy();
+
+ auto intScalar{MakeArray<TypeCategory::Integer, 4>(
+ std::vector<int>{}, std::vector<std::int32_t>{-1})};
+ StaticDescriptor<0> staticIntResult;
+ Descriptor &intResult{staticIntResult.descriptor()};
+ RTNAME(ShallowCopy)(intResult, *intScalar);
+ ASSERT_TRUE(intResult.IsAllocated());
+ ASSERT_EQ(intResult.rank(), 0);
+ ASSERT_EQ(*intResult.ZeroBasedIndexedElement<std::int32_t>(0), -1);
+ *intResult.ZeroBasedIndexedElement<std::int32_t>(0) = 0;
+ allocatedPtr = intResult.OffsetElement<char>(0);
+ RTNAME(ShallowCopyDirect)(intResult, *intScalar);
+ ASSERT_TRUE(intResult.IsAllocated());
+ ASSERT_EQ(intResult.rank(), 0);
+ ASSERT_EQ(*intResult.ZeroBasedIndexedElement<std::int32_t>(0), -1);
+ EXPECT_EQ(allocatedPtr, intResult.OffsetElement<char>(0));
+ intResult.Destroy();
+}
diff --git a/flang/docs/ArrayRepacking.md b/flang/docs/ArrayRepacking.md
index 544904fd968cc..18a6fee4e01ee 100755
--- a/flang/docs/ArrayRepacking.md
+++ b/flang/docs/ArrayRepacking.md
@@ -400,7 +400,22 @@ Lowering of the new operations (after all the optimizations) might be done in a
### Runtime
-[TBD] define the runtime APIs.
+The array copies required for `pack/unpack` actions are done using `ShallowCopyDirect` API of flang-rt.
+
+```C++
+void RTDECL(ShallowCopyDirect)(
+ const Descriptor &result,
+ const Descriptor &source,
+ const char *sourceFile = nullptr,
+ int line = 0);
+```
+
+It copies values from `source` array into the pre-allocated `result` array. The semantics is different from the `Assign` runtime for derived types, because it does not perform the recursive assign actions for the components of derived types.
+
+The arrays must be conforming, i.e. they must have:
+ * Same rank.
+ * Same extents.
+ * Same size and type of elements (including the type parameters).
### Optimization passes
diff --git a/flang/include/flang/Optimizer/Builder/Runtime/Transformational.h b/flang/include/flang/Optimizer/Builder/Runtime/Transformational.h
index ae0a0979902f5..eda1a7f86f6bb 100644
--- a/flang/include/flang/Optimizer/Builder/Runtime/Transformational.h
+++ b/flang/include/flang/Optimizer/Builder/Runtime/Transformational.h
@@ -63,6 +63,10 @@ void genPack(fir::FirOpBuilder &builder, mlir::Location loc,
mlir::Value resultBox, mlir::Value arrayBox, mlir::Value maskBox,
mlir::Value vectorBox);
+void genShallowCopy(fir::FirOpBuilder &builder, mlir::Location loc,
+ mlir::Value resultBox, mlir::Value arrayBox,
+ bool resultIsAllocated);
+
void genReshape(fir::FirOpBuilder &builder, mlir::Location loc,
mlir::Value resultBox, mlir::Value sourceBox,
mlir::Value shapeBox, mlir::Value padBox, mlir::Value orderBox);
diff --git a/flang/include/flang/Runtime/transformational.h b/flang/include/flang/Runtime/transformational.h
index d04fa68597883..3393ed1dc9813 100644
--- a/flang/include/flang/Runtime/transformational.h
+++ b/flang/include/flang/Runtime/transformational.h
@@ -146,6 +146,27 @@ void RTDECL(Pack)(Descriptor &result, const Descriptor &source,
const Descriptor &mask, const Descriptor *vector = nullptr,
const char *sourceFile = nullptr, int line = 0);
+/// Produce a shallow copy of the \p source in \p result.
+/// The \p source may have any type and rank.
+/// Unless \p source is unallocated, the \p result will
+/// be allocated using the same shape and dynamic type,
+/// and will contain the same top-level values as the \p source.
+/// The \p result will have the default lower bounds, if it is an array.
+/// As the name suggests, it is different from the Assign runtime,
+/// because it does not perform recursive assign actions
+/// for the components of the derived types.
+void RTDECL(ShallowCopy)(Descriptor &result, const Descriptor &source,
+ const char *sourceFile = nullptr, int line = 0);
+
+/// Same as ShallowCopy, where the caller provides a pre-allocated
+/// \p result. The \p source and \p result must be conforming:
+/// * Same rank.
+/// * Same extents.
+/// * Same size and type of elements (including the type parameters).
+/// If \p result is an array, its lower bounds are not affected.
+void RTDECL(ShallowCopyDirect)(const Descriptor &result,
+ const Descriptor &source, const char *sourceFile = nullptr, int line = 0);
+
void RTDECL(Spread)(Descriptor &result, const Descriptor &source, int dim,
std::int64_t ncopies, const char *sourceFile = nullptr, int line = 0);
diff --git a/flang/lib/Optimizer/Builder/Runtime/Transformational.cpp b/flang/lib/Optimizer/Builder/Runtime/Transformational.cpp
index 978524494af9b..47744b0facb53 100644
--- a/flang/lib/Optimizer/Builder/Runtime/Transformational.cpp
+++ b/flang/lib/Optimizer/Builder/Runtime/Transformational.cpp
@@ -474,6 +474,26 @@ void fir::runtime::genReshape(fir::FirOpBuilder &builder, mlir::Location loc,
builder.create<fir::CallOp>(loc, func, args);
}
+/// Generate call to ShallowCopy[Direct] runtime routine.
+/// ShallowCopyDirect is used iff \p resultIsAllocated is true.
+void fir::runtime::genShallowCopy(fir::FirOpBuilder &builder,
+ mlir::Location loc, mlir::Value resultBox,
+ mlir::Value arrayBox,
+ bool resultIsAllocated) {
+ auto packFunc =
+ resultIsAllocated
+ ? fir::runtime::getRuntimeFunc<mkRTKey(ShallowCopyDirect)>(loc,
+ builder)
+ : fir::runtime::getRuntimeFunc<mkRTKey(ShallowCopy)>(loc, builder);
+ auto fTy = packFunc.getFunctionType();
+ auto sourceFile = fir::factory::locationToFilename(builder, loc);
+ auto sourceLine =
+ fir::factory::locationToLineNo(builder, loc, fTy.getInput(3));
+ auto args = fir::runtime::createArguments(builder, loc, fTy, resultBox,
+ arrayBox, sourceFile, sourceLine);
+ builder.create<fir::CallOp>(loc, packFunc, args);
+}
+
/// Generate call to Spread intrinsic runtime routine.
void fir::runtime::genSpread(fir::FirOpBuilder &builder, mlir::Location loc,
mlir::Value resultBox, mlir::Value sourceBox,
More information about the llvm-commits
mailing list