[Openmp-commits] [openmp] 6e127c6 - [OpenMP] libomptarget: Don't map alignment padding to host

Joel E. Denny via Openmp-commits openmp-commits at lists.llvm.org
Mon Jul 3 07:25:25 PDT 2023


Author: Joel E. Denny
Date: 2023-07-03T10:23:38-04:00
New Revision: 6e127c6f29470012361811902829cf9798166f27

URL: https://github.com/llvm/llvm-project/commit/6e127c6f29470012361811902829cf9798166f27
DIFF: https://github.com/llvm/llvm-project/commit/6e127c6f29470012361811902829cf9798166f27.diff

LOG: [OpenMP] libomptarget: Don't map alignment padding to host

In the case of partially mapped structs, libomptarget sometimes adds
padding to device allocations to ensure they are aligned properly.
However, without this patch, it considers that padding to be mapped to
the host, which can cause presence checks (e.g.,
`omp_target_is_present` or a `present` modifier) to misbehave for
unmapped parts of the struct.  This patch keeps the padding but treats
it as unmapped.  See the new test case for examples.

Reviewed By: grokos, jdoerfert

Differential Revision: https://reviews.llvm.org/D149685

Added: 
    openmp/libomptarget/test/mapping/padding_not_mapped.c

Modified: 
    openmp/libomptarget/include/device.h
    openmp/libomptarget/src/device.cpp
    openmp/libomptarget/src/omptarget.cpp
    openmp/libomptarget/test/mapping/power_of_two_alignment.c

Removed: 
    


################################################################################
diff  --git a/openmp/libomptarget/include/device.h b/openmp/libomptarget/include/device.h
index 7bc4629c6220e0..cd76d88618be4e 100644
--- a/openmp/libomptarget/include/device.h
+++ b/openmp/libomptarget/include/device.h
@@ -68,7 +68,8 @@ struct HostDataToTargetTy {
   const uintptr_t HstPtrEnd;       // non-inclusive.
   const map_var_info_t HstPtrName; // Optional source name of mapped variable.
 
-  const uintptr_t TgtPtrBegin; // target info.
+  const uintptr_t TgtAllocBegin; // allocated target memory
+  const uintptr_t TgtPtrBegin; // mapped target memory = TgtAllocBegin + padding
 
 private:
   static const uint64_t INFRefCount = ~(uint64_t)0;
@@ -120,16 +121,18 @@ struct HostDataToTargetTy {
   const std::unique_ptr<StatesTy> States;
 
 public:
-  HostDataToTargetTy(uintptr_t BP, uintptr_t B, uintptr_t E, uintptr_t TB,
+  HostDataToTargetTy(uintptr_t BP, uintptr_t B, uintptr_t E,
+                     uintptr_t TgtAllocBegin, uintptr_t TgtPtrBegin,
                      bool UseHoldRefCount, map_var_info_t Name = nullptr,
                      bool IsINF = false)
       : HstPtrBase(BP), HstPtrBegin(B), HstPtrEnd(E), HstPtrName(Name),
-        TgtPtrBegin(TB), States(std::make_unique<StatesTy>(UseHoldRefCount ? 0
-                                                           : IsINF ? INFRefCount
-                                                                   : 1,
-                                                           !UseHoldRefCount ? 0
-                                                           : IsINF ? INFRefCount
-                                                                   : 1)) {}
+        TgtAllocBegin(TgtAllocBegin), TgtPtrBegin(TgtPtrBegin),
+        States(std::make_unique<StatesTy>(UseHoldRefCount ? 0
+                                          : IsINF         ? INFRefCount
+                                                          : 1,
+                                          !UseHoldRefCount ? 0
+                                          : IsINF          ? INFRefCount
+                                                           : 1)) {}
 
   /// Get the total reference count.  This is smarter than just getDynRefCount()
   /// + getHoldRefCount() because it handles the case where at least one is
@@ -446,8 +449,8 @@ struct DeviceTy {
   /// - Data transfer issue fails.
   TargetPointerResultTy getTargetPointer(
       HDTTMapAccessorTy &HDTTMap, void *HstPtrBegin, void *HstPtrBase,
-      int64_t Size, map_var_info_t HstPtrName, bool HasFlagTo,
-      bool HasFlagAlways, bool IsImplicit, bool UpdateRefCount,
+      int64_t TgtPadding, int64_t Size, map_var_info_t HstPtrName,
+      bool HasFlagTo, bool HasFlagAlways, bool IsImplicit, bool UpdateRefCount,
       bool HasCloseModifier, bool HasPresentModifier, bool HasHoldModifier,
       AsyncInfoTy &AsyncInfo, HostDataToTargetTy *OwnedTPR = nullptr,
       bool ReleaseHDTTMap = true);

diff  --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp
index 22ab7436f75b53..a5409e2b2b74e6 100644
--- a/openmp/libomptarget/src/device.cpp
+++ b/openmp/libomptarget/src/device.cpp
@@ -90,6 +90,7 @@ int DeviceTy::associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size) {
                /*HstPtrBase=*/(uintptr_t)HstPtrBegin,
                /*HstPtrBegin=*/(uintptr_t)HstPtrBegin,
                /*HstPtrEnd=*/(uintptr_t)HstPtrBegin + Size,
+               /*TgtAllocBegin=*/(uintptr_t)TgtPtrBegin,
                /*TgtPtrBegin=*/(uintptr_t)TgtPtrBegin,
                /*UseHoldRefCount=*/false, /*Name=*/nullptr,
                /*IsRefCountINF=*/true))
@@ -216,10 +217,10 @@ LookupResult DeviceTy::lookupMapping(HDTTMapAccessorTy &HDTTMap,
 
 TargetPointerResultTy DeviceTy::getTargetPointer(
     HDTTMapAccessorTy &HDTTMap, void *HstPtrBegin, void *HstPtrBase,
-    int64_t Size, map_var_info_t HstPtrName, bool HasFlagTo, bool HasFlagAlways,
-    bool IsImplicit, bool UpdateRefCount, bool HasCloseModifier,
-    bool HasPresentModifier, bool HasHoldModifier, AsyncInfoTy &AsyncInfo,
-    HostDataToTargetTy *OwnedTPR, bool ReleaseHDTTMap) {
+    int64_t TgtPadding, int64_t Size, map_var_info_t HstPtrName, bool HasFlagTo,
+    bool HasFlagAlways, bool IsImplicit, bool UpdateRefCount,
+    bool HasCloseModifier, bool HasPresentModifier, bool HasHoldModifier,
+    AsyncInfoTy &AsyncInfo, HostDataToTargetTy *OwnedTPR, bool ReleaseHDTTMap) {
 
   LookupResult LR = lookupMapping(HDTTMap, HstPtrBegin, Size, OwnedTPR);
   LR.TPR.Flags.IsPresent = true;
@@ -297,24 +298,28 @@ TargetPointerResultTy DeviceTy::getTargetPointer(
   } else if (Size) {
     // If it is not contained and Size > 0, we should create a new entry for it.
     LR.TPR.Flags.IsNewEntry = true;
-    uintptr_t Ptr = (uintptr_t)allocData(Size, HstPtrBegin);
+    uintptr_t TgtAllocBegin =
+        (uintptr_t)allocData(TgtPadding + Size, HstPtrBegin);
+    uintptr_t TgtPtrBegin = TgtAllocBegin + TgtPadding;
     // Release the mapping table lock only after the entry is locked by
     // attaching it to TPR.
     LR.TPR.setEntry(HDTTMap
                         ->emplace(new HostDataToTargetTy(
                             (uintptr_t)HstPtrBase, (uintptr_t)HstPtrBegin,
-                            (uintptr_t)HstPtrBegin + Size, Ptr, HasHoldModifier,
-                            HstPtrName))
+                            (uintptr_t)HstPtrBegin + Size, TgtAllocBegin,
+                            TgtPtrBegin, HasHoldModifier, HstPtrName))
                         .first->HDTT);
     INFO(OMP_INFOTYPE_MAPPING_CHANGED, DeviceID,
          "Creating new map entry with HstPtrBase=" DPxMOD
-         ", HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD ", Size=%ld, "
-         "DynRefCount=%s, HoldRefCount=%s, Name=%s\n",
-         DPxPTR(HstPtrBase), DPxPTR(HstPtrBegin), DPxPTR(Ptr), Size,
+         ", HstPtrBegin=" DPxMOD ", TgtAllocBegin=" DPxMOD
+         ", TgtPtrBegin=" DPxMOD
+         ", Size=%ld, DynRefCount=%s, HoldRefCount=%s, Name=%s\n",
+         DPxPTR(HstPtrBase), DPxPTR(HstPtrBegin), DPxPTR(TgtAllocBegin),
+         DPxPTR(TgtPtrBegin), Size,
          LR.TPR.getEntry()->dynRefCountToStr().c_str(),
          LR.TPR.getEntry()->holdRefCountToStr().c_str(),
          (HstPtrName) ? getNameFromMapping(HstPtrName).c_str() : "unknown");
-    LR.TPR.TargetPointer = (void *)Ptr;
+    LR.TPR.TargetPointer = (void *)TgtPtrBegin;
 
     // Notify the plugin about the new mapping.
     if (notifyDataMapped(HstPtrBegin, Size))
@@ -490,8 +495,9 @@ int DeviceTy::eraseMapEntry(HDTTMapAccessorTy &HDTTMap,
 int DeviceTy::deallocTgtPtrAndEntry(HostDataToTargetTy *Entry, int64_t Size) {
   assert(Entry && "Trying to deallocate a null entry.");
 
-  DP("Deleting tgt data " DPxMOD " of size %" PRId64 "\n",
-     DPxPTR(Entry->TgtPtrBegin), Size);
+  DP("Deleting tgt data " DPxMOD " of size %" PRId64 " by freeing allocation "
+     "starting at " DPxMOD "\n",
+     DPxPTR(Entry->TgtPtrBegin), Size, DPxPTR(Entry->TgtAllocBegin));
 
   void *Event = Entry->getEvent();
   if (Event && destroyEvent(Event) != OFFLOAD_SUCCESS) {
@@ -499,7 +505,7 @@ int DeviceTy::deallocTgtPtrAndEntry(HostDataToTargetTy *Entry, int64_t Size) {
     return OFFLOAD_FAIL;
   }
 
-  int Ret = deleteData((void *)Entry->TgtPtrBegin);
+  int Ret = deleteData((void *)Entry->TgtAllocBegin);
 
   // Notify the plugin about the unmapped memory.
   Ret |= notifyDataUnmapped((void *)Entry->HstPtrBegin);
@@ -551,8 +557,8 @@ void *DeviceTy::allocData(int64_t Size, void *HstPtr, int32_t Kind) {
   return RTL->data_alloc(RTLDeviceID, Size, HstPtr, Kind);
 }
 
-int32_t DeviceTy::deleteData(void *TgtPtrBegin, int32_t Kind) {
-  return RTL->data_delete(RTLDeviceID, TgtPtrBegin, Kind);
+int32_t DeviceTy::deleteData(void *TgtAllocBegin, int32_t Kind) {
+  return RTL->data_delete(RTLDeviceID, TgtAllocBegin, Kind);
 }
 
 static void printCopyInfo(int DeviceId, bool H2D, void *SrcPtrBegin,

diff  --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp
index 04201e8d7a6012..9426d8a0ca76f6 100644
--- a/openmp/libomptarget/src/omptarget.cpp
+++ b/openmp/libomptarget/src/omptarget.cpp
@@ -208,6 +208,7 @@ static int initLibrary(DeviceTy &Device) {
               (uintptr_t)CurrHostEntry->addr /*HstPtrBegin*/,
               (uintptr_t)CurrHostEntry->addr +
                   CurrHostEntry->size /*HstPtrEnd*/,
+              (uintptr_t)CurrDeviceEntry->addr /*TgtAllocBegin*/,
               (uintptr_t)CurrDeviceEntry->addr /*TgtPtrBegin*/,
               false /*UseHoldRefCount*/, CurrHostEntry->name,
               true /*IsRefCountINF*/));
@@ -602,18 +603,16 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
     // Adjust for proper alignment if this is a combined entry (for structs).
     // Look at the next argument - if that is MEMBER_OF this one, then this one
     // is a combined entry.
-    int64_t Padding = 0;
+    int64_t TgtPadding = 0;
     const int NextI = I + 1;
     if (getParentIndex(ArgTypes[I]) < 0 && NextI < ArgNum &&
         getParentIndex(ArgTypes[NextI]) == I) {
       int64_t Alignment = getPartialStructRequiredAlignment(HstPtrBase);
-      Padding = (int64_t)HstPtrBegin % Alignment;
-      if (Padding) {
+      TgtPadding = (int64_t)HstPtrBegin % Alignment;
+      if (TgtPadding) {
         DP("Using a padding of %" PRId64 " bytes for begin address " DPxMOD
            "\n",
-           Padding, DPxPTR(HstPtrBegin));
-        HstPtrBegin = (char *)HstPtrBegin - Padding;
-        DataSize += Padding;
+           TgtPadding, DPxPTR(HstPtrBegin));
       }
     }
 
@@ -653,7 +652,7 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
       // PTR_AND_OBJ entry is handled below, and so the allocation might fail
       // when HasPresentModifier.
       PointerTpr = Device.getTargetPointer(
-          HDTTMap, HstPtrBase, HstPtrBase, sizeof(void *),
+          HDTTMap, HstPtrBase, HstPtrBase, /*TgtPadding=*/0, sizeof(void *),
           /*HstPtrName=*/nullptr,
           /*HasFlagTo=*/false, /*HasFlagAlways=*/false, IsImplicit, UpdateRef,
           HasCloseModifier, HasPresentModifier, HasHoldModifier, AsyncInfo,
@@ -683,8 +682,8 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
     const bool HasFlagAlways = ArgTypes[I] & OMP_TGT_MAPTYPE_ALWAYS;
     // Note that HDTTMap will be released in getTargetPointer.
     auto TPR = Device.getTargetPointer(
-        HDTTMap, HstPtrBegin, HstPtrBase, DataSize, HstPtrName, HasFlagTo,
-        HasFlagAlways, IsImplicit, UpdateRef, HasCloseModifier,
+        HDTTMap, HstPtrBegin, HstPtrBase, TgtPadding, DataSize, HstPtrName,
+        HasFlagTo, HasFlagAlways, IsImplicit, UpdateRef, HasCloseModifier,
         HasPresentModifier, HasHoldModifier, AsyncInfo, PointerTpr.getEntry());
     void *TgtPtrBegin = TPR.TargetPointer;
     IsHostPtr = TPR.Flags.IsHostPointer;
@@ -890,25 +889,7 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
     }
 
     void *HstPtrBegin = Args[I];
-    void *HstPtrBase = ArgBases[I];
     int64_t DataSize = ArgSizes[I];
-    // Adjust for proper alignment if this is a combined entry (for structs).
-    // Look at the next argument - if that is MEMBER_OF this one, then this one
-    // is a combined entry.
-    const int NextI = I + 1;
-    if (getParentIndex(ArgTypes[I]) < 0 && NextI < ArgNum &&
-        getParentIndex(ArgTypes[NextI]) == I) {
-      int64_t Alignment = getPartialStructRequiredAlignment(HstPtrBase);
-      int64_t Padding = (int64_t)HstPtrBegin % Alignment;
-      if (Padding) {
-        DP("Using a Padding of %" PRId64 " bytes for begin address " DPxMOD
-           "\n",
-           Padding, DPxPTR(HstPtrBegin));
-        HstPtrBegin = (char *)HstPtrBegin - Padding;
-        DataSize += Padding;
-      }
-    }
-
     bool IsImplicit = ArgTypes[I] & OMP_TGT_MAPTYPE_IMPLICIT;
     bool UpdateRef = (!(ArgTypes[I] & OMP_TGT_MAPTYPE_MEMBER_OF) ||
                       (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) &&

diff  --git a/openmp/libomptarget/test/mapping/padding_not_mapped.c b/openmp/libomptarget/test/mapping/padding_not_mapped.c
new file mode 100644
index 00000000000000..9d5ef212ffc246
--- /dev/null
+++ b/openmp/libomptarget/test/mapping/padding_not_mapped.c
@@ -0,0 +1,43 @@
+// RUN: %libomptarget-compile-generic -fopenmp-version=51
+// RUN: %libomptarget-run-fail-generic 2>&1 \
+// RUN: | %fcheck-generic
+
+// The host memory layout for the following program looks like this:
+//
+//   | 4 bytes | 4 bytes | 8 bytes |
+//   |   s.x   |   s.y   |   s.z   |
+//   `-----------------------------'
+//
+// s is always at least 8-byte aligned in host memory due to s.z, so
+// libomptarget's device padding for map(s.y,s.z) always maps to host memory
+// that includes s.x.  At one time, s.x appeared to be mapped as a result, but
+// libomptarget has since been fixed not to consider device padding as mapped to
+// host memory.
+
+#include <omp.h>
+#include <stdio.h>
+
+int main() {
+  struct S { int x; int y; double z; } s = {1, 2, 3};
+
+  // CHECK: &s.x = 0x[[#%x,HOST_ADDR:]], size = [[#%u,SIZE:]]
+  fprintf(stderr, "&s = %p\n", &s);
+  fprintf(stderr, "&s.x = %p, size = %ld\n", &s.x, sizeof s.x);
+  fprintf(stderr, "&s.y = %p\n", &s.y);
+  fprintf(stderr, "&s.z = %p\n", &s.z);
+
+  // CHECK: s.x is present: 0
+  // CHECK: s.x = 1{{$}}
+  #pragma omp target enter data map(alloc: s.y, s.z)
+  int dev = omp_get_default_device();
+  fprintf(stderr, "s.x is present: %d\n", omp_target_is_present(&s.x, dev));
+  #pragma omp target update from(s.x) // should have no effect
+  fprintf(stderr, "s.x = %d\n", s.x);
+
+  // CHECK: Libomptarget message: device mapping required by 'present' map type modifier does not exist for host address 0x{{0*}}[[#HOST_ADDR]] ([[#SIZE]] bytes)
+  // CHECK: Libomptarget error: Call to getTargetPointer returned null pointer ('present' map type modifier).
+  // CHECK: Libomptarget fatal error 1: failure of target construct while offloading is mandatory
+  #pragma omp target enter data map(present, alloc: s.x)
+
+  return 0;
+}

diff  --git a/openmp/libomptarget/test/mapping/power_of_two_alignment.c b/openmp/libomptarget/test/mapping/power_of_two_alignment.c
index 06b0e457983fb3..faebe4f89fd9d8 100644
--- a/openmp/libomptarget/test/mapping/power_of_two_alignment.c
+++ b/openmp/libomptarget/test/mapping/power_of_two_alignment.c
@@ -35,7 +35,9 @@
 // padding for s, libomptarget reported an array extension error.  collidePost
 // is never fully contained within that padding (which would avoid the extension
 // error) because collidePost is 16 bytes while the padding is always less than
-// 16 bytes due to the modulo operations.
+// 16 bytes due to the modulo operations.  (Later, libomptarget was changed not
+// to consider padding to be mapped to the host, so it cannot be involved in
+// array extension errors.)
 
 #include <stdint.h>
 #include <stdio.h>


        


More information about the Openmp-commits mailing list