[Openmp-commits] [openmp] 89a8077 - [OpenMP][FIX] Properly align firstprivate variables
Johannes Doerfert via Openmp-commits
openmp-commits at lists.llvm.org
Mon Feb 27 17:36:24 PST 2023
Author: Johannes Doerfert
Date: 2023-02-27T17:34:46-08:00
New Revision: 89a8077f3d68f0f431f3657a8805f7751f5eac69
URL: https://github.com/llvm/llvm-project/commit/89a8077f3d68f0f431f3657a8805f7751f5eac69
DIFF: https://github.com/llvm/llvm-project/commit/89a8077f3d68f0f431f3657a8805f7751f5eac69.diff
LOG: [OpenMP][FIX] Properly align firstprivate variables
The old code didn't actually align the values, and it added padding even
when none was necessary. This approach will pad entries if necessary
and, similar to the struct case, use the host pointer as guidance.
NOTE: This does still not align them as the host has, but it's unclear
if the user really should use the alignment bits anyway. For now
this is a reasonable compromise, only if we have host alignment
information (explicitly not implicitly via the host pointer), we
could do it completely right without wasting lots of resources for
>99% of the cases.
Fixes: https://github.com/llvm/llvm-project/issues/61034
Added:
openmp/libomptarget/test/mapping/firstprivate_aligned.cpp
Modified:
openmp/libomptarget/src/omptarget.cpp
Removed:
################################################################################
diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp
index 441da7c452434..9d800d75f0f49 100644
--- a/openmp/libomptarget/src/omptarget.cpp
+++ b/openmp/libomptarget/src/omptarget.cpp
@@ -16,6 +16,8 @@
#include "private.h"
#include "rtl.h"
+#include "llvm/ADT/bit.h"
+
#include <cassert>
#include <cstdint>
#include <vector>
@@ -105,7 +107,7 @@ static const int64_t MaxAlignment = 16;
/// Return the alignment requirement of partially mapped structs, see
/// MaxAlignment above.
-static int64_t getPartialStructRequiredAlignment(void *HstPtrBase) {
+static uint64_t getPartialStructRequiredAlignment(void *HstPtrBase) {
auto BaseAlignment = reinterpret_cast<uintptr_t>(HstPtrBase) % MaxAlignment;
return BaseAlignment == 0 ? MaxAlignment : BaseAlignment;
}
@@ -1289,22 +1291,27 @@ class PrivateArgumentManagerTy {
/// use this information to optimize data transfer by packing all
/// first-private arguments and transfer them all at once.
struct FirstPrivateArgInfoTy {
- /// The index of the element in \p TgtArgs corresponding to the argument
- int Index;
/// Host pointer begin
char *HstPtrBegin;
/// Host pointer end
char *HstPtrEnd;
- /// Aligned size
- int64_t AlignedSize;
+ /// The index of the element in \p TgtArgs corresponding to the argument
+ int Index;
+ /// Alignment of the entry (base of the entry, not after the entry).
+ uint32_t Alignment;
+ /// Size (without alignment, see padding)
+ uint32_t Size;
+ /// Padding used to align this argument entry, if necessary.
+ uint32_t Padding;
/// Host pointer name
map_var_info_t HstPtrName = nullptr;
- FirstPrivateArgInfoTy(int Index, void *HstPtr, int64_t Size,
+ FirstPrivateArgInfoTy(int Index, void *HstPtr, uint32_t Size,
+ uint32_t Alignment, uint32_t Padding,
const map_var_info_t HstPtrName = nullptr)
- : Index(Index), HstPtrBegin(reinterpret_cast<char *>(HstPtr)),
- HstPtrEnd(HstPtrBegin + Size),
- AlignedSize(Size + Size % MaxAlignment), HstPtrName(HstPtrName) {}
+ : HstPtrBegin(reinterpret_cast<char *>(HstPtr)),
+ HstPtrEnd(HstPtrBegin + Size), Index(Index), Alignment(Alignment),
+ Size(Size), Padding(Padding), HstPtrName(HstPtrName) {}
};
/// A vector of target pointers for all private arguments
@@ -1382,9 +1389,34 @@ class PrivateArgumentManagerTy {
// Placeholder value
TgtPtr = nullptr;
+ auto *LastFPArgInfo =
+ FirstPrivateArgInfo.empty() ? nullptr : &FirstPrivateArgInfo.back();
+
+ // Compute the start alignment of this entry, add padding if necessary.
+ // TODO: Consider sorting instead.
+ uint32_t Padding = 0;
+ uint32_t StartAlignment =
+ LastFPArgInfo ? LastFPArgInfo->Alignment : MaxAlignment;
+ if (LastFPArgInfo) {
+ // Check if we keep the start alignment or if it is shrunk due to the
+ // size of the last element.
+ uint32_t Offset = LastFPArgInfo->Size % StartAlignment;
+ if (Offset)
+ StartAlignment = Offset;
+ // We only need as much alignment as the host pointer had (since we
+ // don't know the alignment information from the source we might end up
+ // overaligning accesses but not too much).
+ uint32_t RequiredAlignment =
+ llvm::bit_floor(getPartialStructRequiredAlignment(HstPtr));
+ if (RequiredAlignment > StartAlignment) {
+ Padding = RequiredAlignment - StartAlignment;
+ StartAlignment = RequiredAlignment;
+ }
+ }
+
FirstPrivateArgInfo.emplace_back(TgtArgsIndex, HstPtr, ArgSize,
- HstPtrName);
- FirstPrivateArgSize += FirstPrivateArgInfo.back().AlignedSize;
+ StartAlignment, Padding, HstPtrName);
+ FirstPrivateArgSize += Padding + ArgSize;
}
return OFFLOAD_SUCCESS;
@@ -1400,8 +1432,10 @@ class PrivateArgumentManagerTy {
auto Itr = FirstPrivateArgBuffer.begin();
// Copy all host data to this buffer
for (FirstPrivateArgInfoTy &Info : FirstPrivateArgInfo) {
+ // First pad the pointer as we (have to) pad it on the device too.
+ Itr = std::next(Itr, Info.Padding);
std::copy(Info.HstPtrBegin, Info.HstPtrEnd, Itr);
- Itr = std::next(Itr, Info.AlignedSize);
+ Itr = std::next(Itr, Info.Size);
}
// Allocate target memory
void *TgtPtr =
@@ -1425,8 +1459,10 @@ class PrivateArgumentManagerTy {
for (FirstPrivateArgInfoTy &Info : FirstPrivateArgInfo) {
void *&Ptr = TgtArgs[Info.Index];
assert(Ptr == nullptr && "Target pointer is already set by mistaken");
+ // Pad the device pointer to get the right alignment.
+ TP += Info.Padding;
Ptr = reinterpret_cast<void *>(TP);
- TP += Info.AlignedSize;
+ TP += Info.Size;
DP("Firstprivate array " DPxMOD " of size %" PRId64 " mapped to " DPxMOD
"\n",
DPxPTR(Info.HstPtrBegin), Info.HstPtrEnd - Info.HstPtrBegin,
diff --git a/openmp/libomptarget/test/mapping/firstprivate_aligned.cpp b/openmp/libomptarget/test/mapping/firstprivate_aligned.cpp
new file mode 100644
index 0000000000000..ae6be0f0c07f4
--- /dev/null
+++ b/openmp/libomptarget/test/mapping/firstprivate_aligned.cpp
@@ -0,0 +1,37 @@
+// RUN: %libomptarget-compilexx-generic -O3 && %libomptarget-run-generic
+
+#include <stdio.h>
+
+// CHECK: rx: 16, ry: 16;
+// CHECK: rx: 16, ry: 16;
+// CHECK: rx: 16, ry: 16;
+// CHECK: rx: 16, ry: 16;
+
+template <bool Aligned> void test() {
+ printf("Test %saligned firstprivate\n", Aligned ? "" : "non-");
+ char z1[3 + Aligned], z2[3 + Aligned];
+ int x[4];
+ int y[4];
+ y[0] = y[1] = y[2] = y[3] = 4;
+ x[0] = x[1] = x[2] = x[3] = 4;
+ int rx = -1, ry = -1;
+#pragma omp target firstprivate(z1, y, z2) map(from : ry, rx) map(to : x)
+ {
+ ry = (y[0] + y[1] + y[2] + y[3]);
+ rx = (x[0] + x[1] + x[2] + x[3]);
+ }
+ printf(" rx:%i, ry:%i\n", rx, ry);
+#pragma omp target firstprivate(z1, y, z2) map(from : ry, rx) map(to : x)
+ {
+ z1[2] += 5;
+ ry = (y[0] + y[1] + y[2] + y[3]);
+ rx = (x[0] + x[1] + x[2] + x[3]);
+ z2[2] += 7;
+ }
+ printf(" rx:%i, ry:%i\n", rx, ry);
+}
+
+int main() {
+ test<true>();
+ test<false>();
+}
More information about the Openmp-commits
mailing list