[Openmp-commits] [openmp] r368192 - [OpenMP][libomptarget] Add support for unified memory for regular maps

Gheorghe-Teodor Bercea via Openmp-commits openmp-commits at lists.llvm.org
Wed Aug 7 10:29:48 PDT 2019


Author: gbercea
Date: Wed Aug  7 10:29:45 2019
New Revision: 368192

URL: http://llvm.org/viewvc/llvm-project?rev=368192&view=rev
Log:
[OpenMP][libomptarget] Add support for unified memory for regular maps

Summary:
This patch adds support for using unified memory in the case of regular maps that happen when a target region is offloaded to the device.

For cases where only a single version of the data is required then the host address can be used. When variables need to be privatized in any way or globalized, then the copy to the device is still required for correctness.

Reviewers: ABataev, jdoerfert, Hahnfeld, AlexEichenberger, caomhin, grokos

Reviewed By: Hahnfeld

Subscribers: mgorny, guansong, openmp-commits

Tags: #openmp

Differential Revision: https://reviews.llvm.org/D65001

Added:
    openmp/trunk/libomptarget/test/unified_shared_memory/
    openmp/trunk/libomptarget/test/unified_shared_memory/api.c
    openmp/trunk/libomptarget/test/unified_shared_memory/shared_update.c
Modified:
    openmp/trunk/libomptarget/src/api.cpp
    openmp/trunk/libomptarget/src/device.cpp
    openmp/trunk/libomptarget/src/device.h
    openmp/trunk/libomptarget/src/omptarget.cpp
    openmp/trunk/libomptarget/test/offloading/requires.c

Modified: openmp/trunk/libomptarget/src/api.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/src/api.cpp?rev=368192&r1=368191&r2=368192&view=diff
==============================================================================
--- openmp/trunk/libomptarget/src/api.cpp (original)
+++ openmp/trunk/libomptarget/src/api.cpp Wed Aug  7 10:29:45 2019
@@ -113,7 +113,15 @@ EXTERN int omp_target_is_present(void *p
 
   DeviceTy& Device = Devices[device_num];
   bool IsLast; // not used
-  int rc = (Device.getTgtPtrBegin(ptr, 0, IsLast, false) != NULL);
+  bool IsHostPtr;
+  void *TgtPtr = Device.getTgtPtrBegin(ptr, 0, IsLast, false, IsHostPtr);
+  int rc = (TgtPtr != NULL);
+  // Under unified memory the host pointer can be returned by the
+  // getTgtPtrBegin() function which means that there is no device
+  // corresponding point for ptr. This function should return false
+  // in that situation.
+  if (Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)
+    rc = !IsHostPtr;
   DP("Call to omp_target_is_present returns %d\n", rc);
   return rc;
 }

Modified: openmp/trunk/libomptarget/src/device.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/src/device.cpp?rev=368192&r1=368191&r2=368192&view=diff
==============================================================================
--- openmp/trunk/libomptarget/src/device.cpp (original)
+++ openmp/trunk/libomptarget/src/device.cpp Wed Aug  7 10:29:45 2019
@@ -157,12 +157,17 @@ LookupResult DeviceTy::lookupMapping(voi
 // If NULL is returned, then either data allocation failed or the user tried
 // to do an illegal mapping.
 void *DeviceTy::getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase,
-    int64_t Size, bool &IsNew, bool IsImplicit, bool UpdateRefCount) {
+    int64_t Size, bool &IsNew, bool &IsHostPtr, bool IsImplicit,
+    bool UpdateRefCount) {
   void *rc = NULL;
+  IsHostPtr = false;
   DataMapMtx.lock();
   LookupResult lr = lookupMapping(HstPtrBegin, Size);
 
   // Check if the pointer is contained.
+  // If a variable is mapped to the device manually by the user - which would
+  // lead to the IsContained flag to be true - then we must ensure that the
+  // device address is returned even under unified memory conditions.
   if (lr.Flags.IsContained ||
       ((lr.Flags.ExtendsBefore || lr.Flags.ExtendsAfter) && IsImplicit)) {
     auto &HT = *lr.Entry;
@@ -183,15 +188,28 @@ void *DeviceTy::getOrAllocTgtPtr(void *H
     // Explicit extension of mapped data - not allowed.
     DP("Explicit extension of mapping is not allowed.\n");
   } else if (Size) {
-    // If it is not contained and Size > 0 we should create a new entry for it.
-    IsNew = true;
-    uintptr_t tp = (uintptr_t)RTL->data_alloc(RTLDeviceID, Size, HstPtrBegin);
-    DP("Creating new map entry: HstBase=" DPxMOD ", HstBegin=" DPxMOD ", "
-        "HstEnd=" DPxMOD ", TgtBegin=" DPxMOD "\n", DPxPTR(HstPtrBase),
-        DPxPTR(HstPtrBegin), DPxPTR((uintptr_t)HstPtrBegin + Size), DPxPTR(tp));
-    HostDataToTargetMap.push_front(HostDataToTargetTy((uintptr_t)HstPtrBase,
-        (uintptr_t)HstPtrBegin, (uintptr_t)HstPtrBegin + Size, tp));
-    rc = (void *)tp;
+    // If unified shared memory is active, implicitly mapped variables that are not
+    // privatized use host address. Any explicitly mapped variables also use
+    // host address where correctness is not impeded. In all other cases
+    // maps are respected.
+    // TODO: In addition to the mapping rules above, when the close map
+    // modifier is implemented, foce the mapping of the variable to the device.
+    if (RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) {
+      DP("Return HstPtrBegin " DPxMOD " Size=%ld RefCount=%s\n",
+         DPxPTR((uintptr_t)HstPtrBegin), Size, (UpdateRefCount ? " updated" : ""));
+      IsHostPtr = true;
+      rc = HstPtrBegin;
+    } else {
+      // If it is not contained and Size > 0 we should create a new entry for it.
+      IsNew = true;
+      uintptr_t tp = (uintptr_t)RTL->data_alloc(RTLDeviceID, Size, HstPtrBegin);
+      DP("Creating new map entry: HstBase=" DPxMOD ", HstBegin=" DPxMOD ", "
+          "HstEnd=" DPxMOD ", TgtBegin=" DPxMOD "\n", DPxPTR(HstPtrBase),
+          DPxPTR(HstPtrBegin), DPxPTR((uintptr_t)HstPtrBegin + Size), DPxPTR(tp));
+      HostDataToTargetMap.push_front(HostDataToTargetTy((uintptr_t)HstPtrBase,
+          (uintptr_t)HstPtrBegin, (uintptr_t)HstPtrBegin + Size, tp));
+      rc = (void *)tp;
+    }
   }
 
   DataMapMtx.unlock();
@@ -202,8 +220,10 @@ void *DeviceTy::getOrAllocTgtPtr(void *H
 // Return the target pointer begin (where the data will be moved).
 // Decrement the reference counter if called from target_data_end.
 void *DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast,
-    bool UpdateRefCount) {
+    bool UpdateRefCount, bool &IsHostPtr) {
   void *rc = NULL;
+  IsHostPtr = false;
+  IsLast = false;
   DataMapMtx.lock();
   LookupResult lr = lookupMapping(HstPtrBegin, Size);
 
@@ -221,8 +241,14 @@ void *DeviceTy::getTgtPtrBegin(void *Hst
         (CONSIDERED_INF(HT.RefCount)) ? "INF" :
             std::to_string(HT.RefCount).c_str());
     rc = (void *)tp;
-  } else {
-    IsLast = false;
+  } else if (RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) {
+    // If the value isn't found in the mapping and unified shared memory
+    // is on then it means we have stumbled upon a value which we need to
+    // use directly from the host.
+    DP("Get HstPtrBegin " DPxMOD " Size=%ld RefCount=%s\n",
+       DPxPTR((uintptr_t)HstPtrBegin), Size, (UpdateRefCount ? " updated" : ""));
+    IsHostPtr = true;
+    rc = HstPtrBegin;
   }
 
   DataMapMtx.unlock();
@@ -244,6 +270,8 @@ void *DeviceTy::getTgtPtrBegin(void *Hst
 }
 
 int DeviceTy::deallocTgtPtr(void *HstPtrBegin, int64_t Size, bool ForceDelete) {
+  if (RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)
+    return OFFLOAD_SUCCESS;
   // Check if the pointer is contained in any sub-nodes.
   int rc;
   DataMapMtx.lock();

Modified: openmp/trunk/libomptarget/src/device.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/src/device.h?rev=368192&r1=368191&r2=368192&view=diff
==============================================================================
--- openmp/trunk/libomptarget/src/device.h (original)
+++ openmp/trunk/libomptarget/src/device.h Wed Aug  7 10:29:45 2019
@@ -137,10 +137,10 @@ struct DeviceTy {
   long getMapEntryRefCnt(void *HstPtrBegin);
   LookupResult lookupMapping(void *HstPtrBegin, int64_t Size);
   void *getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase, int64_t Size,
-      bool &IsNew, bool IsImplicit, bool UpdateRefCount = true);
+      bool &IsNew, bool &IsHostPtr, bool IsImplicit, bool UpdateRefCount = true);
   void *getTgtPtrBegin(void *HstPtrBegin, int64_t Size);
   void *getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast,
-      bool UpdateRefCount);
+      bool UpdateRefCount, bool &IsHostPtr);
   int deallocTgtPtr(void *TgtPtrBegin, int64_t Size, bool ForceDelete);
   int associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size);
   int disassociatePtr(void *HstPtrBegin);

Modified: openmp/trunk/libomptarget/src/omptarget.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/src/omptarget.cpp?rev=368192&r1=368191&r2=368192&view=diff
==============================================================================
--- openmp/trunk/libomptarget/src/omptarget.cpp (original)
+++ openmp/trunk/libomptarget/src/omptarget.cpp Wed Aug  7 10:29:45 2019
@@ -242,6 +242,7 @@ int target_data_begin(DeviceTy &Device,
     // Address of pointer on the host and device, respectively.
     void *Pointer_HstPtrBegin, *Pointer_TgtPtrBegin;
     bool IsNew, Pointer_IsNew;
+    bool IsHostPtr = false;
     bool IsImplicit = arg_types[i] & OMP_TGT_MAPTYPE_IMPLICIT;
     // UpdateRef is based on MEMBER_OF instead of TARGET_PARAM because if we
     // have reached this point via __tgt_target_data_begin and not __tgt_target
@@ -253,7 +254,7 @@ int target_data_begin(DeviceTy &Device,
       DP("Has a pointer entry: \n");
       // base is address of pointer.
       Pointer_TgtPtrBegin = Device.getOrAllocTgtPtr(HstPtrBase, HstPtrBase,
-          sizeof(void *), Pointer_IsNew, IsImplicit, UpdateRef);
+          sizeof(void *), Pointer_IsNew, IsHostPtr, IsImplicit, UpdateRef);
       if (!Pointer_TgtPtrBegin) {
         DP("Call to getOrAllocTgtPtr returned null pointer (device failure or "
             "illegal mapping).\n");
@@ -269,7 +270,7 @@ int target_data_begin(DeviceTy &Device,
     }
 
     void *TgtPtrBegin = Device.getOrAllocTgtPtr(HstPtrBegin, HstPtrBase,
-        data_size, IsNew, IsImplicit, UpdateRef);
+        data_size, IsNew, IsHostPtr, IsImplicit, UpdateRef);
     if (!TgtPtrBegin && data_size) {
       // If data_size==0, then the argument could be a zero-length pointer to
       // NULL, so getOrAlloc() returning NULL is not an error.
@@ -289,19 +290,21 @@ int target_data_begin(DeviceTy &Device,
 
     if (arg_types[i] & OMP_TGT_MAPTYPE_TO) {
       bool copy = false;
-      if (IsNew || (arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS)) {
-        copy = true;
-      } else if (arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) {
-        // Copy data only if the "parent" struct has RefCount==1.
-        int32_t parent_idx = member_of(arg_types[i]);
-        long parent_rc = Device.getMapEntryRefCnt(args[parent_idx]);
-        assert(parent_rc > 0 && "parent struct not found");
-        if (parent_rc == 1) {
+      if (!(Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)) {
+        if (IsNew || (arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS)) {
           copy = true;
+        } else if (arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) {
+          // Copy data only if the "parent" struct has RefCount==1.
+          int32_t parent_idx = member_of(arg_types[i]);
+          long parent_rc = Device.getMapEntryRefCnt(args[parent_idx]);
+          assert(parent_rc > 0 && "parent struct not found");
+          if (parent_rc == 1) {
+            copy = true;
+          }
         }
       }
 
-      if (copy) {
+      if (copy && !IsHostPtr) {
         DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n",
             data_size, DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin));
         int rt = Device.data_submit(TgtPtrBegin, HstPtrBegin, data_size);
@@ -312,7 +315,7 @@ int target_data_begin(DeviceTy &Device,
       }
     }
 
-    if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) {
+    if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ && !IsHostPtr) {
       DP("Update pointer (" DPxMOD ") -> [" DPxMOD "]\n",
           DPxPTR(Pointer_TgtPtrBegin), DPxPTR(TgtPtrBegin));
       uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase;
@@ -363,14 +366,14 @@ int target_data_end(DeviceTy &Device, in
       }
     }
 
-    bool IsLast;
+    bool IsLast, IsHostPtr;
     bool UpdateRef = !(arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) ||
         (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ);
     bool ForceDelete = arg_types[i] & OMP_TGT_MAPTYPE_DELETE;
 
     // If PTR_AND_OBJ, HstPtrBegin is address of pointee
     void *TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, data_size, IsLast,
-        UpdateRef);
+        UpdateRef, IsHostPtr);
     DP("There are %" PRId64 " bytes allocated at target address " DPxMOD
         " - is%s last\n", data_size, DPxPTR(TgtPtrBegin),
         (IsLast ? "" : " not"));
@@ -387,18 +390,22 @@ int target_data_end(DeviceTy &Device, in
       if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) {
         bool Always = arg_types[i] & OMP_TGT_MAPTYPE_ALWAYS;
         bool CopyMember = false;
-        if ((arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) &&
-            !(arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) {
-          // Copy data only if the "parent" struct has RefCount==1.
-          int32_t parent_idx = member_of(arg_types[i]);
-          long parent_rc = Device.getMapEntryRefCnt(args[parent_idx]);
-          assert(parent_rc > 0 && "parent struct not found");
-          if (parent_rc == 1) {
-            CopyMember = true;
+        if (!(Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY)) {
+          if ((arg_types[i] & OMP_TGT_MAPTYPE_MEMBER_OF) &&
+              !(arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) {
+            // Copy data only if the "parent" struct has RefCount==1.
+            int32_t parent_idx = member_of(arg_types[i]);
+            long parent_rc = Device.getMapEntryRefCnt(args[parent_idx]);
+            assert(parent_rc > 0 && "parent struct not found");
+            if (parent_rc == 1) {
+              CopyMember = true;
+            }
           }
         }
 
-        if (DelEntry || Always || CopyMember) {
+        if ((DelEntry || Always || CopyMember) &&
+            !(Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
+              TgtPtrBegin == HstPtrBegin)) {
           DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n",
               data_size, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin));
           int rt = Device.data_retrieve(HstPtrBegin, TgtPtrBegin, data_size);
@@ -471,14 +478,21 @@ int target_data_update(DeviceTy &Device,
 
     void *HstPtrBegin = args[i];
     int64_t MapSize = arg_sizes[i];
-    bool IsLast;
+    bool IsLast, IsHostPtr;
     void *TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, MapSize, IsLast,
-        false);
+        false, IsHostPtr);
     if (!TgtPtrBegin) {
       DP("hst data:" DPxMOD " not found, becomes a noop\n", DPxPTR(HstPtrBegin));
       continue;
     }
 
+    if (Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
+        TgtPtrBegin == HstPtrBegin) {
+      DP("hst data:" DPxMOD " unified and shared, becomes a noop\n",
+         DPxPTR(HstPtrBegin));
+      continue;
+    }
+
     if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) {
       DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n",
           arg_sizes[i], DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin));
@@ -514,6 +528,7 @@ int target_data_update(DeviceTy &Device,
         DP("Copying data to device failed.\n");
         return OFFLOAD_FAIL;
       }
+
       uintptr_t lb = (uintptr_t) HstPtrBegin;
       uintptr_t ub = (uintptr_t) HstPtrBegin + MapSize;
       Device.ShadowMtx.lock();
@@ -640,19 +655,26 @@ int target(int64_t device_id, void *host
         void *HstPtrVal = args[i];
         void *HstPtrBegin = args_base[i];
         void *HstPtrBase = args[idx];
-        bool IsLast; // unused.
+        bool IsLast, IsHostPtr; // unused.
         void *TgtPtrBase =
             (void *)((intptr_t)tgt_args[tgtIdx] + tgt_offsets[tgtIdx]);
         DP("Parent lambda base " DPxMOD "\n", DPxPTR(TgtPtrBase));
         uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase;
         void *TgtPtrBegin = (void *)((uintptr_t)TgtPtrBase + Delta);
         void *Pointer_TgtPtrBegin =
-            Device.getTgtPtrBegin(HstPtrVal, arg_sizes[i], IsLast, false);
+            Device.getTgtPtrBegin(HstPtrVal, arg_sizes[i], IsLast, false,
+                                  IsHostPtr);
         if (!Pointer_TgtPtrBegin) {
           DP("No lambda captured variable mapped (" DPxMOD ") - ignored\n",
              DPxPTR(HstPtrVal));
           continue;
         }
+        if (Device.RTLRequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
+            TgtPtrBegin == HstPtrBegin) {
+          DP("Unified memory is active, no need to map lambda captured"
+             "variable (" DPxMOD ")\n", DPxPTR(HstPtrVal));
+          continue;
+        }
         DP("Update lambda reference (" DPxMOD ") -> [" DPxMOD "]\n",
            DPxPTR(Pointer_TgtPtrBegin), DPxPTR(TgtPtrBegin));
         int rt = Device.data_submit(TgtPtrBegin, &Pointer_TgtPtrBegin,
@@ -668,7 +690,7 @@ int target(int64_t device_id, void *host
     void *HstPtrBase = args_base[i];
     void *TgtPtrBegin;
     ptrdiff_t TgtBaseOffset;
-    bool IsLast; // unused.
+    bool IsLast, IsHostPtr; // unused.
     if (arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) {
       DP("Forwarding first-private value " DPxMOD " to the target construct\n",
           DPxPTR(HstPtrBase));
@@ -705,14 +727,14 @@ int target(int64_t device_id, void *host
       }
     } else if (arg_types[i] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) {
       TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBase, sizeof(void *), IsLast,
-          false);
+          false, IsHostPtr);
       TgtBaseOffset = 0; // no offset for ptrs.
       DP("Obtained target argument " DPxMOD " from host pointer " DPxMOD " to "
          "object " DPxMOD "\n", DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBase),
          DPxPTR(HstPtrBase));
     } else {
       TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, arg_sizes[i], IsLast,
-          false);
+          false, IsHostPtr);
       TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin;
 #ifdef OMPTARGET_DEBUG
       void *TgtPtrBase = (void *)((intptr_t)TgtPtrBegin + TgtBaseOffset);

Modified: openmp/trunk/libomptarget/test/offloading/requires.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/test/offloading/requires.c?rev=368192&r1=368191&r2=368192&view=diff
==============================================================================
--- openmp/trunk/libomptarget/test/offloading/requires.c (original)
+++ openmp/trunk/libomptarget/test/offloading/requires.c Wed Aug  7 10:29:45 2019
@@ -43,4 +43,4 @@ int main() {
   {}
 
   return 0;
-}
\ No newline at end of file
+}

Added: openmp/trunk/libomptarget/test/unified_shared_memory/api.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/test/unified_shared_memory/api.c?rev=368192&view=auto
==============================================================================
--- openmp/trunk/libomptarget/test/unified_shared_memory/api.c (added)
+++ openmp/trunk/libomptarget/test/unified_shared_memory/api.c Wed Aug  7 10:29:45 2019
@@ -0,0 +1,164 @@
+// RUN: %libomptarget-compile-run-and-check-aarch64-unknown-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-powerpc64-ibm-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-powerpc64le-ibm-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-x86_64-pc-linux-gnu
+
+#include <stdio.h>
+#include <omp.h>
+
+// ---------------------------------------------------------------------------
+// Various definitions copied from OpenMP RTL
+
+extern void __tgt_register_requires(int64_t);
+
+// End of definitions copied from OpenMP RTL.
+// ---------------------------------------------------------------------------
+
+#pragma omp requires unified_shared_memory
+
+#define N 1024
+
+void init(int A[], int B[], int C[]) {
+  for (int i = 0; i < N; ++i) {
+    A[i] = 0;
+    B[i] = 1;
+    C[i] = i;
+  }
+}
+
+int main(int argc, char *argv[]) {
+  const int device = omp_get_default_device();
+
+  // Manual registration of requires flags for Clang versions
+  // that do not support requires.
+  __tgt_register_requires(8);
+
+  // CHECK: Initial device: -10
+  printf("Initial device: %d\n", omp_get_initial_device());
+
+  //
+  // Target alloc & target memcpy
+  //
+  int A[N], B[N], C[N];
+
+  // Init
+  init(A, B, C);
+
+  int *pA, *pB, *pC;
+
+  // map ptrs
+  pA = &A[0];
+  pB = &B[0];
+  pC = &C[0];
+
+  int *d_A = (int *)omp_target_alloc(N * sizeof(int), device);
+  int *d_B = (int *)omp_target_alloc(N * sizeof(int), device);
+  int *d_C = (int *)omp_target_alloc(N * sizeof(int), device);
+
+  // CHECK: omp_target_alloc succeeded
+  printf("omp_target_alloc %s\n", d_A && d_B && d_C ? "succeeded" : "failed");
+
+  omp_target_memcpy(d_B, pB, N * sizeof(int), 0, 0, device,
+                    omp_get_initial_device());
+  omp_target_memcpy(d_C, pC, N * sizeof(int), 0, 0, device,
+                    omp_get_initial_device());
+
+#pragma omp target is_device_ptr(d_A, d_B, d_C) device(device)
+  {
+#pragma omp parallel for schedule(static, 1)
+    for (int i = 0; i < N; i++) {
+      d_A[i] = d_B[i] + d_C[i] + 1;
+    }
+  }
+
+  omp_target_memcpy(pA, d_A, N * sizeof(int), 0, 0, omp_get_initial_device(),
+                    device);
+
+  // CHECK: Test omp_target_memcpy: Succeeded
+  int fail = 0;
+  for (int i = 0; i < N; ++i) {
+    if (A[i] != i + 2)
+      fail++;
+  }
+  if (fail) {
+    printf("Test omp_target_memcpy: Failed\n");
+  } else {
+    printf("Test omp_target_memcpy: Succeeded\n");
+  }
+
+  //
+  // target_is_present and target_associate/disassociate_ptr
+  //
+  init(A, B, C);
+
+  // CHECK: B is not present, associating it...
+  // CHECK: omp_target_associate_ptr B succeeded
+  if (!omp_target_is_present(B, device)) {
+    printf("B is not present, associating it...\n");
+    int rc = omp_target_associate_ptr(B, d_B, N * sizeof(int), 0, device);
+    printf("omp_target_associate_ptr B %s\n", !rc ? "succeeded" : "failed");
+  }
+
+  // CHECK: C is not present, associating it...
+  // CHECK: omp_target_associate_ptr C succeeded
+  if (!omp_target_is_present(C, device)) {
+    printf("C is not present, associating it...\n");
+    int rc = omp_target_associate_ptr(C, d_C, N * sizeof(int), 0, device);
+    printf("omp_target_associate_ptr C %s\n", !rc ? "succeeded" : "failed");
+  }
+
+// CHECK: Inside target data: A is not present
+// CHECK: Inside target data: B is present
+// CHECK: Inside target data: C is present
+#pragma omp target data map(from : B, C) device(device)
+  {
+    printf("Inside target data: A is%s present\n",
+           omp_target_is_present(A, device) ? "" : " not");
+    printf("Inside target data: B is%s present\n",
+           omp_target_is_present(B, device) ? "" : " not");
+    printf("Inside target data: C is%s present\n",
+           omp_target_is_present(C, device) ? "" : " not");
+
+#pragma omp target map(from : A) device(device)
+    {
+#pragma omp parallel for schedule(static, 1)
+      for (int i = 0; i < N; i++)
+        A[i] = B[i] + C[i] + 1;
+    }
+  }
+
+  // CHECK: B is present, disassociating it...
+  // CHECK: omp_target_disassociate_ptr B succeeded
+  // CHECK: C is present, disassociating it...
+  // CHECK: omp_target_disassociate_ptr C succeeded
+  if (omp_target_is_present(B, device)) {
+    printf("B is present, disassociating it...\n");
+    int rc = omp_target_disassociate_ptr(B, device);
+    printf("omp_target_disassociate_ptr B %s\n", !rc ? "succeeded" : "failed");
+  }
+  if (omp_target_is_present(C, device)) {
+    printf("C is present, disassociating it...\n");
+    int rc = omp_target_disassociate_ptr(C, device);
+    printf("omp_target_disassociate_ptr C %s\n", !rc ? "succeeded" : "failed");
+  }
+
+  // CHECK: Test omp_target_associate_ptr: Succeeded
+  fail = 0;
+  for (int i = 0; i < N; ++i) {
+    if (A[i] != i + 2)
+      fail++;
+  }
+  if (fail) {
+    printf("Test omp_target_associate_ptr: Failed\n");
+  } else {
+    printf("Test omp_target_associate_ptr: Succeeded\n");
+  }
+
+  omp_target_free(d_A, device);
+  omp_target_free(d_B, device);
+  omp_target_free(d_C, device);
+
+  printf("Done!\n");
+
+  return 0;
+}

Added: openmp/trunk/libomptarget/test/unified_shared_memory/shared_update.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/test/unified_shared_memory/shared_update.c?rev=368192&view=auto
==============================================================================
--- openmp/trunk/libomptarget/test/unified_shared_memory/shared_update.c (added)
+++ openmp/trunk/libomptarget/test/unified_shared_memory/shared_update.c Wed Aug  7 10:29:45 2019
@@ -0,0 +1,114 @@
+// RUN: %libomptarget-compile-run-and-check-aarch64-unknown-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-powerpc64-ibm-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-powerpc64le-ibm-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-x86_64-pc-linux-gnu
+
+#include <stdio.h>
+#include <omp.h>
+
+// ---------------------------------------------------------------------------
+// Various definitions copied from OpenMP RTL
+
+extern void __tgt_register_requires(int64_t);
+
+// End of definitions copied from OpenMP RTL.
+// ---------------------------------------------------------------------------
+
+#pragma omp requires unified_shared_memory
+
+#define N 1024
+
+int main(int argc, char *argv[]) {
+  int fails;
+  void *host_alloc, *device_alloc;
+  void *host_data, *device_data;
+  int *alloc = (int *)malloc(N * sizeof(int));
+  int data[N];
+
+  // Manual registration of requires flags for Clang versions
+  // that do not support requires.
+  __tgt_register_requires(8);
+
+  for (int i = 0; i < N; ++i) {
+    alloc[i] = 10;
+    data[i] = 1;
+  }
+
+  host_data = &data[0];
+  host_alloc = &alloc[0];
+
+// implicit mapping of data
+#pragma omp target map(tofrom : device_data, device_alloc)
+  {
+    device_data = &data[0];
+    device_alloc = &alloc[0];
+
+    for (int i = 0; i < N; i++) {
+      alloc[i] += 1;
+      data[i] += 1;
+    }
+  }
+
+  // CHECK: Address of alloc on device matches host address.
+  if (device_alloc == host_alloc)
+    printf("Address of alloc on device matches host address.\n");
+
+  // CHECK: Address of data on device matches host address.
+  if (device_data == host_data)
+    printf("Address of data on device matches host address.\n");
+
+  // On the host, check that the arrays have been updated.
+  // CHECK: Alloc device values updated: Succeeded
+  fails = 0;
+  for (int i = 0; i < N; i++) {
+    if (alloc[i] != 11)
+      fails++;
+  }
+  printf("Alloc device values updated: %s\n",
+         (fails == 0) ? "Succeeded" : "Failed");
+
+  // CHECK: Data device values updated: Succeeded
+  fails = 0;
+  for (int i = 0; i < N; i++) {
+    if (data[i] != 2)
+      fails++;
+  }
+  printf("Data device values updated: %s\n",
+         (fails == 0) ? "Succeeded" : "Failed");
+
+  //
+  // Test that updates on the host snd on the device are both visible.
+  //
+
+  // Update on the host.
+  for (int i = 0; i < N; ++i) {
+    alloc[i] += 1;
+    data[i] += 1;
+  }
+
+#pragma omp target
+  {
+    // CHECK: Alloc host values updated: Succeeded
+    fails = 0;
+    for (int i = 0; i < N; i++) {
+      if (alloc[i] != 12)
+        fails++;
+    }
+    printf("Alloc host values updated: %s\n",
+           (fails == 0) ? "Succeeded" : "Failed");
+    // CHECK: Data host values updated: Succeeded
+    fails = 0;
+    for (int i = 0; i < N; i++) {
+      if (data[i] != 3)
+        fails++;
+    }
+    printf("Data host values updated: %s\n",
+           (fails == 0) ? "Succeeded" : "Failed");
+  }
+
+  free(alloc);
+
+  printf("Done!\n");
+
+  return 0;
+}




More information about the Openmp-commits mailing list