[llvm] [flang-rt] Optimise ShallowCopy and use it in CopyInAssign (PR #140569)
Slava Zakharin via llvm-commits
llvm-commits at lists.llvm.org
Tue May 20 13:22:27 PDT 2025
================
@@ -114,61 +114,146 @@ RT_API_ATTRS void CheckIntegerKind(
}
}
+template <typename P, int RANK>
RT_API_ATTRS void ShallowCopyDiscontiguousToDiscontiguous(
const Descriptor &to, const Descriptor &from) {
- SubscriptValue toAt[maxRank], fromAt[maxRank];
- to.GetLowerBounds(toAt);
- from.GetLowerBounds(fromAt);
+ DescriptorIterator<RANK> toIt{to};
+ DescriptorIterator<RANK> fromIt{from};
+ // Knowing the size at compile time can enable memcpy inlining optimisations
+ constexpr std::size_t typeElementBytes{sizeof(P)};
+ // We might still need to check the actual size as a fallback
std::size_t elementBytes{to.ElementBytes()};
for (std::size_t n{to.Elements()}; n-- > 0;
- to.IncrementSubscripts(toAt), from.IncrementSubscripts(fromAt)) {
- std::memcpy(
- to.Element<char>(toAt), from.Element<char>(fromAt), elementBytes);
+ toIt.Advance(), fromIt.Advance()) {
+ // typeElementBytes == 1 when P is a char - the non-specialised case
+ if constexpr (typeElementBytes != 1) {
+ std::memcpy(
+ toIt.template Get<P>(), fromIt.template Get<P>(), typeElementBytes);
+ } else {
+ std::memcpy(
+ toIt.template Get<P>(), fromIt.template Get<P>(), elementBytes);
+ }
}
}
+template <typename P, int RANK>
RT_API_ATTRS void ShallowCopyDiscontiguousToContiguous(
const Descriptor &to, const Descriptor &from) {
char *toAt{to.OffsetElement()};
- SubscriptValue fromAt[maxRank];
- from.GetLowerBounds(fromAt);
+ constexpr std::size_t typeElementBytes{sizeof(P)};
std::size_t elementBytes{to.ElementBytes()};
+ DescriptorIterator<RANK> fromIt{from};
for (std::size_t n{to.Elements()}; n-- > 0;
- toAt += elementBytes, from.IncrementSubscripts(fromAt)) {
- std::memcpy(toAt, from.Element<char>(fromAt), elementBytes);
+ toAt += elementBytes, fromIt.Advance()) {
+ if constexpr (typeElementBytes != 1) {
+ std::memcpy(toAt, fromIt.template Get<P>(), typeElementBytes);
+ } else {
+ std::memcpy(toAt, fromIt.template Get<P>(), elementBytes);
+ }
}
}
+template <typename P, int RANK>
RT_API_ATTRS void ShallowCopyContiguousToDiscontiguous(
const Descriptor &to, const Descriptor &from) {
- SubscriptValue toAt[maxRank];
- to.GetLowerBounds(toAt);
char *fromAt{from.OffsetElement()};
+ DescriptorIterator<RANK> toIt{to};
+ constexpr std::size_t typeElementBytes{sizeof(P)};
std::size_t elementBytes{to.ElementBytes()};
for (std::size_t n{to.Elements()}; n-- > 0;
- to.IncrementSubscripts(toAt), fromAt += elementBytes) {
- std::memcpy(to.Element<char>(toAt), fromAt, elementBytes);
+ toIt.Advance(), fromAt += elementBytes) {
+ if constexpr (typeElementBytes != 1) {
+ std::memcpy(toIt.template Get<P>(), fromAt, typeElementBytes);
+ } else {
+ std::memcpy(toIt.template Get<P>(), fromAt, elementBytes);
+ }
}
}
-RT_API_ATTRS void ShallowCopy(const Descriptor &to, const Descriptor &from,
+// ShallowCopy helper for calling the correct specialised variant based on
+// scenario
+template <typename P, int RANK = -1>
+RT_API_ATTRS void ShallowCopyInner(const Descriptor &to, const Descriptor &from,
bool toIsContiguous, bool fromIsContiguous) {
if (toIsContiguous) {
if (fromIsContiguous) {
std::memcpy(to.OffsetElement(), from.OffsetElement(),
to.Elements() * to.ElementBytes());
} else {
- ShallowCopyDiscontiguousToContiguous(to, from);
+ ShallowCopyDiscontiguousToContiguous<P, RANK>(to, from);
}
} else {
if (fromIsContiguous) {
- ShallowCopyContiguousToDiscontiguous(to, from);
+ ShallowCopyContiguousToDiscontiguous<P, RANK>(to, from);
} else {
- ShallowCopyDiscontiguousToDiscontiguous(to, from);
+ ShallowCopyDiscontiguousToDiscontiguous<P, RANK>(to, from);
}
}
}
+// ShallowCopy helper for specialising the variants based on array rank
+template <typename P>
+RT_API_ATTRS void ShallowCopyRank(const Descriptor &to, const Descriptor &from,
+ bool toIsContiguous, bool fromIsContiguous) {
+ if (to.rank() == 1 && from.rank() == 1) {
----------------
vzakhari wrote:
Please use recursive template instead. First reply from google:
```
template <int N>
struct MyTemplate {
static void execute() {
// Code to execute for the current value N
// ...
MyTemplate<N - 1>::execute(); // Recursive call for the next value
}
};
template <>
struct MyTemplate<0> { // Base case for the recursion
static void execute() {}
};
int main() {
MyTemplate<10>::execute(); // Expands the template for values 10 down to 1
return 0;
}
```
https://github.com/llvm/llvm-project/pull/140569
More information about the llvm-commits
mailing list