[Openmp-commits] [openmp] fa280c1 - [OpenMP] In libomptarget, assume alignment at powers of two

Joel E. Denny via Openmp-commits openmp-commits at lists.llvm.org
Tue May 2 06:46:00 PDT 2023


Author: Joel E. Denny
Date: 2023-05-02T09:44:58-04:00
New Revision: fa280c199420729330454e655335fcdf49522042

URL: https://github.com/llvm/llvm-project/commit/fa280c199420729330454e655335fcdf49522042
DIFF: https://github.com/llvm/llvm-project/commit/fa280c199420729330454e655335fcdf49522042.diff

LOG: [OpenMP] In libomptarget, assume alignment at powers of two

This patch fixes a bug introduced by D142586, which landed as
434992c96ed1.  The fix was to only look for alignments that are powers
of 2.  See the new test case for details.

Reviewed By: jdoerfert, jhuber6

Differential Revision: https://reviews.llvm.org/D149490

Added: 
    openmp/libomptarget/test/mapping/power_of_two_alignment.c

Modified: 
    openmp/libomptarget/src/omptarget.cpp

Removed: 
    


################################################################################
diff  --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp
index 3094e899d0de6..04201e8d7a601 100644
--- a/openmp/libomptarget/src/omptarget.cpp
+++ b/openmp/libomptarget/src/omptarget.cpp
@@ -108,8 +108,9 @@ static const int64_t MaxAlignment = 16;
 /// Return the alignment requirement of partially mapped structs, see
 /// MaxAlignment above.
 static uint64_t getPartialStructRequiredAlignment(void *HstPtrBase) {
-  auto BaseAlignment = reinterpret_cast<uintptr_t>(HstPtrBase) % MaxAlignment;
-  return BaseAlignment == 0 ? MaxAlignment : BaseAlignment;
+  int LowestOneBit = __builtin_ffsl(reinterpret_cast<uintptr_t>(HstPtrBase));
+  uint64_t BaseAlignment = 1 << (LowestOneBit - 1);
+  return MaxAlignment < BaseAlignment ? MaxAlignment : BaseAlignment;
 }
 
 /// Map global data and execute pending ctors

diff  --git a/openmp/libomptarget/test/mapping/power_of_two_alignment.c b/openmp/libomptarget/test/mapping/power_of_two_alignment.c
new file mode 100644
index 0000000000000..06b0e457983fb
--- /dev/null
+++ b/openmp/libomptarget/test/mapping/power_of_two_alignment.c
@@ -0,0 +1,87 @@
+// RUN: %libomptarget-compilexx-run-and-check-generic
+
+// Assuming the stack is allocated on the host starting at high addresses, the
+// host memory layout for the following program looks like this:
+//
+//   low addr <----------------------------------------------------- high addr
+//              |   16 bytes  | 16 bytes  |  16 bytes  | ? bytes  |
+//              | collidePost |     s     | collidePre | stackPad |
+//              |             | x | y | z |            |          |
+//              `-------------'
+//                    ^  `--------'
+//                    |      ^
+//                    |      |
+//                    |      `-- too much padding (< 16 bytes) for s maps here
+//                    |
+//                    `------------------array extension error maps here
+//
+// libomptarget used to add too much padding to the device allocation of s and
+// map it back to the host at the location indicated above when all of the
+// following conditions were true:
+// - Multiple members (s.y and s.z below) were mapped.  In this case, initial
+//   padding might be needed to ensure later mapped members (s.z) are aligned
+//   properly on the device.  (If the first member in the struct, s.x, were also
+//   mapped, then the correct initial padding would always be zero.)
+// - mod16 = &s % 16 was not a power of 2 (e.g., 0x7ffcce2b584e % 16 = 14).
+//   libomptarget then incorrectly assumed mod16 was the existing host memory
+//   alignment of s.  (The fix was to only look for alignments that are powers
+//   of 2.)
+// - &s.y % mod16 was > 1 (e.g., 0x7ffcce2b584f % 14 = 11).  libomptarget added
+//   padding of that size for s, but at most 1 byte is ever actually needed.
+//
+// Below, we try many sizes of stackPad to try to produce those conditions.
+//
+// When collidePost was then mapped to the same host memory as the unnecessary
+// padding for s, libomptarget reported an array extension error.  collidePost
+// is never fully contained within that padding (which would avoid the extension
+// error) because collidePost is 16 bytes while the padding is always less than
+// 16 bytes due to the modulo operations.
+
+#include <stdint.h>
+#include <stdio.h>
+
+template <typename StackPad>
+void test() {
+  StackPad stackPad;
+  struct S { char x; char y[7]; char z[8]; };
+  struct S collidePre, s, collidePost;
+  uintptr_t mod16 = (uintptr_t)&s % 16;
+  fprintf(stderr, "&s = %p\n", &s);
+  fprintf(stderr, "&s %% 16 = %lu\n", mod16);
+  if (mod16) {
+    fprintf(stderr, "&s.y = %p\n", &s.y);
+    fprintf(stderr, "&s.y %% %lu = %lu\n", mod16, (uintptr_t)&s.y % mod16);
+  }
+  fprintf(stderr, "&collidePre = %p\n", &collidePre);
+  fprintf(stderr, "&collidePost = %p\n", &collidePost);
+  #pragma omp target data map(to:s.y, s.z)
+  #pragma omp target data map(to:collidePre, collidePost)
+  ;
+}
+
+#define TEST(StackPad)                                                         \
+  fprintf(stderr, "-------------------------------------\n");                  \
+  fprintf(stderr, "StackPad=%s\n", #StackPad);                                 \
+  test<StackPad>()
+
+int main() {
+  TEST(char[1]);
+  TEST(char[2]);
+  TEST(char[3]);
+  TEST(char[4]);
+  TEST(char[5]);
+  TEST(char[6]);
+  TEST(char[7]);
+  TEST(char[8]);
+  TEST(char[9]);
+  TEST(char[10]);
+  TEST(char[11]);
+  TEST(char[12]);
+  TEST(char[13]);
+  TEST(char[14]);
+  TEST(char[15]);
+  TEST(char[16]);
+  // CHECK: pass
+  printf("pass\n");
+  return 0;
+}


        


More information about the Openmp-commits mailing list