[Openmp-commits] [openmp] 89a8077 - [OpenMP][FIX] Properly align firstprivate variables

Johannes Doerfert via Openmp-commits openmp-commits at lists.llvm.org
Mon Feb 27 17:36:24 PST 2023


Author: Johannes Doerfert
Date: 2023-02-27T17:34:46-08:00
New Revision: 89a8077f3d68f0f431f3657a8805f7751f5eac69

URL: https://github.com/llvm/llvm-project/commit/89a8077f3d68f0f431f3657a8805f7751f5eac69
DIFF: https://github.com/llvm/llvm-project/commit/89a8077f3d68f0f431f3657a8805f7751f5eac69.diff

LOG: [OpenMP][FIX] Properly align firstprivate variables

The old code didn't actually align the values, and it added padding even
when none was necessary. This approach will pad entries if necessary
and, similar to the struct case, use the host pointer as guidance.

NOTE: This does still not align them as the host has, but it's unclear
      if the user really should use the alignment bits anyway. For now
      this is a reasonable compromise, only if we have host alignment
      information (explicitly not implicitly via the host pointer), we
      could do it completely right without wasting lots of resources for
      >99% of the cases.

Fixes: https://github.com/llvm/llvm-project/issues/61034

Added: 
    openmp/libomptarget/test/mapping/firstprivate_aligned.cpp

Modified: 
    openmp/libomptarget/src/omptarget.cpp

Removed: 
    


################################################################################
diff  --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp
index 441da7c452434..9d800d75f0f49 100644
--- a/openmp/libomptarget/src/omptarget.cpp
+++ b/openmp/libomptarget/src/omptarget.cpp
@@ -16,6 +16,8 @@
 #include "private.h"
 #include "rtl.h"
 
+#include "llvm/ADT/bit.h"
+
 #include <cassert>
 #include <cstdint>
 #include <vector>
@@ -105,7 +107,7 @@ static const int64_t MaxAlignment = 16;
 
 /// Return the alignment requirement of partially mapped structs, see
 /// MaxAlignment above.
-static int64_t getPartialStructRequiredAlignment(void *HstPtrBase) {
+static uint64_t getPartialStructRequiredAlignment(void *HstPtrBase) {
   auto BaseAlignment = reinterpret_cast<uintptr_t>(HstPtrBase) % MaxAlignment;
   return BaseAlignment == 0 ? MaxAlignment : BaseAlignment;
 }
@@ -1289,22 +1291,27 @@ class PrivateArgumentManagerTy {
   /// use this information to optimize data transfer by packing all
   /// first-private arguments and transfer them all at once.
   struct FirstPrivateArgInfoTy {
-    /// The index of the element in \p TgtArgs corresponding to the argument
-    int Index;
     /// Host pointer begin
     char *HstPtrBegin;
     /// Host pointer end
     char *HstPtrEnd;
-    /// Aligned size
-    int64_t AlignedSize;
+    /// The index of the element in \p TgtArgs corresponding to the argument
+    int Index;
+    /// Alignment of the entry (base of the entry, not after the entry).
+    uint32_t Alignment;
+    /// Size (without alignment, see padding)
+    uint32_t Size;
+    /// Padding used to align this argument entry, if necessary.
+    uint32_t Padding;
     /// Host pointer name
     map_var_info_t HstPtrName = nullptr;
 
-    FirstPrivateArgInfoTy(int Index, void *HstPtr, int64_t Size,
+    FirstPrivateArgInfoTy(int Index, void *HstPtr, uint32_t Size,
+                          uint32_t Alignment, uint32_t Padding,
                           const map_var_info_t HstPtrName = nullptr)
-        : Index(Index), HstPtrBegin(reinterpret_cast<char *>(HstPtr)),
-          HstPtrEnd(HstPtrBegin + Size),
-          AlignedSize(Size + Size % MaxAlignment), HstPtrName(HstPtrName) {}
+        : HstPtrBegin(reinterpret_cast<char *>(HstPtr)),
+          HstPtrEnd(HstPtrBegin + Size), Index(Index), Alignment(Alignment),
+          Size(Size), Padding(Padding), HstPtrName(HstPtrName) {}
   };
 
   /// A vector of target pointers for all private arguments
@@ -1382,9 +1389,34 @@ class PrivateArgumentManagerTy {
 
       // Placeholder value
       TgtPtr = nullptr;
+      auto *LastFPArgInfo =
+          FirstPrivateArgInfo.empty() ? nullptr : &FirstPrivateArgInfo.back();
+
+      // Compute the start alignment of this entry, add padding if necessary.
+      // TODO: Consider sorting instead.
+      uint32_t Padding = 0;
+      uint32_t StartAlignment =
+          LastFPArgInfo ? LastFPArgInfo->Alignment : MaxAlignment;
+      if (LastFPArgInfo) {
+        // Check if we keep the start alignment or if it is shrunk due to the
+        // size of the last element.
+        uint32_t Offset = LastFPArgInfo->Size % StartAlignment;
+        if (Offset)
+          StartAlignment = Offset;
+        // We only need as much alignment as the host pointer had (since we
+        // don't know the alignment information from the source we might end up
+        // overaligning accesses but not too much).
+        uint32_t RequiredAlignment =
+            llvm::bit_floor(getPartialStructRequiredAlignment(HstPtr));
+        if (RequiredAlignment > StartAlignment) {
+          Padding = RequiredAlignment - StartAlignment;
+          StartAlignment = RequiredAlignment;
+        }
+      }
+
       FirstPrivateArgInfo.emplace_back(TgtArgsIndex, HstPtr, ArgSize,
-                                       HstPtrName);
-      FirstPrivateArgSize += FirstPrivateArgInfo.back().AlignedSize;
+                                       StartAlignment, Padding, HstPtrName);
+      FirstPrivateArgSize += Padding + ArgSize;
     }
 
     return OFFLOAD_SUCCESS;
@@ -1400,8 +1432,10 @@ class PrivateArgumentManagerTy {
       auto Itr = FirstPrivateArgBuffer.begin();
       // Copy all host data to this buffer
       for (FirstPrivateArgInfoTy &Info : FirstPrivateArgInfo) {
+        // First pad the pointer as we (have to) pad it on the device too.
+        Itr = std::next(Itr, Info.Padding);
         std::copy(Info.HstPtrBegin, Info.HstPtrEnd, Itr);
-        Itr = std::next(Itr, Info.AlignedSize);
+        Itr = std::next(Itr, Info.Size);
       }
       // Allocate target memory
       void *TgtPtr =
@@ -1425,8 +1459,10 @@ class PrivateArgumentManagerTy {
       for (FirstPrivateArgInfoTy &Info : FirstPrivateArgInfo) {
         void *&Ptr = TgtArgs[Info.Index];
         assert(Ptr == nullptr && "Target pointer is already set by mistaken");
+        // Pad the device pointer to get the right alignment.
+        TP += Info.Padding;
         Ptr = reinterpret_cast<void *>(TP);
-        TP += Info.AlignedSize;
+        TP += Info.Size;
         DP("Firstprivate array " DPxMOD " of size %" PRId64 " mapped to " DPxMOD
            "\n",
            DPxPTR(Info.HstPtrBegin), Info.HstPtrEnd - Info.HstPtrBegin,

diff  --git a/openmp/libomptarget/test/mapping/firstprivate_aligned.cpp b/openmp/libomptarget/test/mapping/firstprivate_aligned.cpp
new file mode 100644
index 0000000000000..ae6be0f0c07f4
--- /dev/null
+++ b/openmp/libomptarget/test/mapping/firstprivate_aligned.cpp
@@ -0,0 +1,37 @@
+// RUN: %libomptarget-compilexx-generic -O3 && %libomptarget-run-generic
+
+#include <stdio.h>
+
+// CHECK: rx: 16, ry: 16;
+// CHECK: rx: 16, ry: 16;
+// CHECK: rx: 16, ry: 16;
+// CHECK: rx: 16, ry: 16;
+
+template <bool Aligned> void test() {
+  printf("Test %saligned firstprivate\n", Aligned ? "" : "non-");
+  char z1[3 + Aligned], z2[3 + Aligned];
+  int x[4];
+  int y[4];
+  y[0] = y[1] = y[2] = y[3] = 4;
+  x[0] = x[1] = x[2] = x[3] = 4;
+  int rx = -1, ry = -1;
+#pragma omp target firstprivate(z1, y, z2) map(from : ry, rx) map(to : x)
+  {
+    ry = (y[0] + y[1] + y[2] + y[3]);
+    rx = (x[0] + x[1] + x[2] + x[3]);
+  }
+  printf(" rx:%i, ry:%i\n", rx, ry);
+#pragma omp target firstprivate(z1, y, z2) map(from : ry, rx) map(to : x)
+  {
+    z1[2] += 5;
+    ry = (y[0] + y[1] + y[2] + y[3]);
+    rx = (x[0] + x[1] + x[2] + x[3]);
+    z2[2] += 7;
+  }
+  printf(" rx:%i, ry:%i\n", rx, ry);
+}
+
+int main() {
+  test<true>();
+  test<false>();
+}


        


More information about the Openmp-commits mailing list