[Openmp-commits] [openmp] 7036fe8 - [libomptarget] Add support for target update non-contiguous

via Openmp-commits openmp-commits at lists.llvm.org
Thu Nov 19 09:33:38 PST 2020


Author: cchen
Date: 2020-11-19T11:33:27-06:00
New Revision: 7036fe8a0cffcefaa542f6dde756b7aa2f9c91b5

URL: https://github.com/llvm/llvm-project/commit/7036fe8a0cffcefaa542f6dde756b7aa2f9c91b5
DIFF: https://github.com/llvm/llvm-project/commit/7036fe8a0cffcefaa542f6dde756b7aa2f9c91b5.diff

LOG: [libomptarget] Add support for target update non-contiguous

This patch is the runtime support for https://reviews.llvm.org/D84192.

In order not to modify the tgt_target_data_update information but still be
able to pass the extra information for non-contiguous map item (offset,
count, and stride for each dimension), this patch overload arg when
the maptype is set as OMP_TGT_MAPTYPE_DESCRIPTOR. The origin arg is for
passing the pointer information, however, the overloaded arg is an
array of descriptor_dim:

```
struct descriptor_dim {
  int64_t offset;
  int64_t count;
  int64_t stride
};
```

and the array size is the dimension size. In addition, since we
have count and stride information in descriptor_dim, we can replace/overload the
arg_size parameter by using dimension size.

Reviewed By: grokos, tianshilei1992

Differential Revision: https://reviews.llvm.org/D82245

Added: 
    openmp/libomptarget/test/offloading/non_contiguous_update.cpp

Modified: 
    openmp/libomptarget/include/omptarget.h
    openmp/libomptarget/src/device.cpp
    openmp/libomptarget/src/interface.cpp
    openmp/libomptarget/src/omptarget.cpp
    openmp/libomptarget/src/private.h

Removed: 
    


################################################################################
diff  --git a/openmp/libomptarget/include/omptarget.h b/openmp/libomptarget/include/omptarget.h
index fd6b3520b013..9c533944d135 100644
--- a/openmp/libomptarget/include/omptarget.h
+++ b/openmp/libomptarget/include/omptarget.h
@@ -52,6 +52,8 @@ enum tgt_map_type {
   OMP_TGT_MAPTYPE_CLOSE           = 0x400,
   // runtime error if not already allocated
   OMP_TGT_MAPTYPE_PRESENT         = 0x1000,
+  // descriptor for non-contiguous target-update
+  OMP_TGT_MAPTYPE_NON_CONTIG      = 0x100000000000,
   // member of struct, member given by [16 MSBs] - 1
   OMP_TGT_MAPTYPE_MEMBER_OF       = 0xffff000000000000
 };
@@ -123,6 +125,13 @@ struct __tgt_async_info {
   void *Queue = nullptr;
 };
 
+/// This struct is a record of non-contiguous information
+struct __tgt_target_non_contig {
+  uint64_t Offset;
+  uint64_t Count;
+  uint64_t Stride;
+};
+
 #ifdef __cplusplus
 extern "C" {
 #endif

diff  --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp
index 9734d6c677ac..9d6f8bde1d0e 100644
--- a/openmp/libomptarget/src/device.cpp
+++ b/openmp/libomptarget/src/device.cpp
@@ -277,7 +277,7 @@ void *DeviceTy::getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase,
   return rc;
 }
 
-// Used by targetDataBegin, targetDataEnd, target_data_update and target.
+// Used by targetDataBegin, targetDataEnd, targetDataUpdate and target.
 // Return the target pointer begin (where the data will be moved).
 // Decrement the reference counter if called from targetDataEnd.
 void *DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast,

diff  --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp
index 7fc51e274a11..a1ff650c193c 100644
--- a/openmp/libomptarget/src/interface.cpp
+++ b/openmp/libomptarget/src/interface.cpp
@@ -281,8 +281,8 @@ EXTERN void __tgt_target_data_update_mapper(ident_t *loc, int64_t device_id,
   }
 
   DeviceTy &Device = PM->Devices[device_id];
-  int rc = target_data_update(Device, arg_num, args_base, args, arg_sizes,
-                              arg_types, arg_names, arg_mappers);
+  int rc = targetDataUpdate(Device, arg_num, args_base, args, arg_sizes,
+                            arg_types, arg_names, arg_mappers);
   HandleTargetOutcome(rc == OFFLOAD_SUCCESS, loc);
 }
 

diff  --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp
index 28860d1da6dd..0846e99461ff 100644
--- a/openmp/libomptarget/src/omptarget.cpp
+++ b/openmp/libomptarget/src/omptarget.cpp
@@ -639,122 +639,182 @@ int targetDataEnd(DeviceTy &Device, int32_t ArgNum, void **ArgBases,
   return OFFLOAD_SUCCESS;
 }
 
-/// Internal function to pass data to/from the target.
-// async_info_ptr is currently unused, added here so target_data_update has the
-// same signature as targetDataBegin and targetDataEnd.
-int target_data_update(DeviceTy &Device, int32_t arg_num, void **args_base,
-                       void **args, int64_t *arg_sizes, int64_t *arg_types,
-                       map_var_info_t *arg_names, void **arg_mappers,
-                       __tgt_async_info *async_info_ptr) {
-  // process each input.
-  for (int32_t i = 0; i < arg_num; ++i) {
-    if ((arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) ||
-        (arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE))
-      continue;
-
-    if (arg_mappers && arg_mappers[i]) {
-      // Instead of executing the regular path of target_data_update, call the
-      // targetDataMapper variant which will call target_data_update again
-      // with new arguments.
-      DP("Calling targetDataMapper for the %dth argument\n", i);
-
-      int rc =
-          targetDataMapper(Device, args_base[i], args[i], arg_sizes[i],
-                           arg_types[i], arg_mappers[i], target_data_update);
+static int targetDataContiguous(DeviceTy &Device, void *ArgsBase,
+                                void *HstPtrBegin, int64_t ArgSize,
+                                int64_t ArgType) {
+  bool IsLast, IsHostPtr;
+  void *TgtPtrBegin = Device.getTgtPtrBegin(HstPtrBegin, ArgSize, IsLast, false,
+                                            IsHostPtr, /*MustContain=*/true);
+  if (!TgtPtrBegin) {
+    DP("hst data:" DPxMOD " not found, becomes a noop\n", DPxPTR(HstPtrBegin));
+    if (ArgType & OMP_TGT_MAPTYPE_PRESENT) {
+      MESSAGE("device mapping required by 'present' motion modifier does not "
+              "exist for host address " DPxMOD " (%" PRId64 " bytes)",
+              DPxPTR(HstPtrBegin), ArgSize);
+      return OFFLOAD_FAIL;
+    }
+    return OFFLOAD_SUCCESS;
+  }
 
-      if (rc != OFFLOAD_SUCCESS) {
-        REPORT(
-            "Call to target_data_update via targetDataMapper for custom mapper"
-            " failed.\n");
-        return OFFLOAD_FAIL;
-      }
+  if (PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
+      TgtPtrBegin == HstPtrBegin) {
+    DP("hst data:" DPxMOD " unified and shared, becomes a noop\n",
+       DPxPTR(HstPtrBegin));
+    return OFFLOAD_SUCCESS;
+  }
 
-      // Skip the rest of this function, continue to the next argument.
-      continue;
+  if (ArgType & OMP_TGT_MAPTYPE_FROM) {
+    DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n",
+       ArgSize, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin));
+    int Ret = Device.retrieveData(HstPtrBegin, TgtPtrBegin, ArgSize, nullptr);
+    if (Ret != OFFLOAD_SUCCESS) {
+      REPORT("Copying data from device failed.\n");
+      return OFFLOAD_FAIL;
     }
 
-    void *HstPtrBegin = args[i];
-    int64_t MapSize = arg_sizes[i];
-    bool IsLast, IsHostPtr;
-    void *TgtPtrBegin = Device.getTgtPtrBegin(
-        HstPtrBegin, MapSize, IsLast, false, IsHostPtr, /*MustContain=*/true);
-    if (!TgtPtrBegin) {
-      DP("hst data:" DPxMOD " not found, becomes a noop\n", DPxPTR(HstPtrBegin));
-      if (arg_types[i] & OMP_TGT_MAPTYPE_PRESENT) {
-        MESSAGE("device mapping required by 'present' motion modifier does not "
-                "exist for host address " DPxMOD " (%" PRId64 " bytes)",
-                DPxPTR(HstPtrBegin), MapSize);
-        return OFFLOAD_FAIL;
-      }
-      continue;
+    uintptr_t LB = (uintptr_t)HstPtrBegin;
+    uintptr_t UB = (uintptr_t)HstPtrBegin + ArgSize;
+    Device.ShadowMtx.lock();
+    for (ShadowPtrListTy::iterator IT = Device.ShadowPtrMap.begin();
+         IT != Device.ShadowPtrMap.end(); ++IT) {
+      void **ShadowHstPtrAddr = (void **)IT->first;
+      if ((uintptr_t)ShadowHstPtrAddr < LB)
+        continue;
+      if ((uintptr_t)ShadowHstPtrAddr >= UB)
+        break;
+      DP("Restoring original host pointer value " DPxMOD
+         " for host pointer " DPxMOD "\n",
+         DPxPTR(IT->second.HstPtrVal), DPxPTR(ShadowHstPtrAddr));
+      *ShadowHstPtrAddr = IT->second.HstPtrVal;
     }
+    Device.ShadowMtx.unlock();
+  }
 
-    if (PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
-        TgtPtrBegin == HstPtrBegin) {
-      DP("hst data:" DPxMOD " unified and shared, becomes a noop\n",
-         DPxPTR(HstPtrBegin));
-      continue;
+  if (ArgType & OMP_TGT_MAPTYPE_TO) {
+    DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n",
+       ArgSize, DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin));
+    int Ret = Device.submitData(TgtPtrBegin, HstPtrBegin, ArgSize, nullptr);
+    if (Ret != OFFLOAD_SUCCESS) {
+      REPORT("Copying data to device failed.\n");
+      return OFFLOAD_FAIL;
     }
 
-    if (arg_types[i] & OMP_TGT_MAPTYPE_FROM) {
-      DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n",
-          arg_sizes[i], DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin));
-      int rt = Device.retrieveData(HstPtrBegin, TgtPtrBegin, MapSize, nullptr);
-      if (rt != OFFLOAD_SUCCESS) {
-        REPORT("Copying data from device failed.\n");
+    uintptr_t LB = (uintptr_t)HstPtrBegin;
+    uintptr_t UB = (uintptr_t)HstPtrBegin + ArgSize;
+    Device.ShadowMtx.lock();
+    for (ShadowPtrListTy::iterator IT = Device.ShadowPtrMap.begin();
+         IT != Device.ShadowPtrMap.end(); ++IT) {
+      void **ShadowHstPtrAddr = (void **)IT->first;
+      if ((uintptr_t)ShadowHstPtrAddr < LB)
+        continue;
+      if ((uintptr_t)ShadowHstPtrAddr >= UB)
+        break;
+      DP("Restoring original target pointer value " DPxMOD " for target "
+         "pointer " DPxMOD "\n",
+         DPxPTR(IT->second.TgtPtrVal), DPxPTR(IT->second.TgtPtrAddr));
+      Ret = Device.submitData(IT->second.TgtPtrAddr, &IT->second.TgtPtrVal,
+                              sizeof(void *), nullptr);
+      if (Ret != OFFLOAD_SUCCESS) {
+        REPORT("Copying data to device failed.\n");
+        Device.ShadowMtx.unlock();
         return OFFLOAD_FAIL;
       }
+    }
+    Device.ShadowMtx.unlock();
+  }
+  return OFFLOAD_SUCCESS;
+}
 
-      uintptr_t lb = (uintptr_t) HstPtrBegin;
-      uintptr_t ub = (uintptr_t) HstPtrBegin + MapSize;
-      Device.ShadowMtx.lock();
-      for (ShadowPtrListTy::iterator it = Device.ShadowPtrMap.begin();
-          it != Device.ShadowPtrMap.end(); ++it) {
-        void **ShadowHstPtrAddr = (void**) it->first;
-        if ((uintptr_t) ShadowHstPtrAddr < lb)
-          continue;
-        if ((uintptr_t) ShadowHstPtrAddr >= ub)
-          break;
-        DP("Restoring original host pointer value " DPxMOD " for host pointer "
-            DPxMOD "\n", DPxPTR(it->second.HstPtrVal),
-            DPxPTR(ShadowHstPtrAddr));
-        *ShadowHstPtrAddr = it->second.HstPtrVal;
+static int targetDataNonContiguous(DeviceTy &Device, void *ArgsBase,
+                                   __tgt_target_non_contig *NonContig,
+                                   uint64_t Size, int64_t ArgType,
+                                   int CurrentDim, int DimSize,
+                                   uint64_t Offset) {
+  int Ret = OFFLOAD_SUCCESS;
+  if (CurrentDim < DimSize) {
+    for (unsigned int I = 0; I < NonContig[CurrentDim].Count; ++I) {
+      uint64_t CurOffset =
+          (NonContig[CurrentDim].Offset + I) * NonContig[CurrentDim].Stride;
+      // we only need to transfer the first element for the last dimension
+      // since we've already got a contiguous piece.
+      if (CurrentDim != DimSize - 1 || I == 0) {
+        Ret = targetDataNonContiguous(Device, ArgsBase, NonContig, Size,
+                                      ArgType, CurrentDim + 1, DimSize,
+                                      Offset + CurOffset);
+        // Stop the whole process if any contiguous piece returns anything
+        // other than OFFLOAD_SUCCESS.
+        if (Ret != OFFLOAD_SUCCESS)
+          return Ret;
       }
-      Device.ShadowMtx.unlock();
     }
+  } else {
+    char *Ptr = (char *)ArgsBase + Offset;
+    DP("Transfer of non-contiguous : host ptr %lx offset %ld len %ld\n",
+       (uint64_t)Ptr, Offset, Size);
+    Ret = targetDataContiguous(Device, ArgsBase, Ptr, Size, ArgType);
+  }
+  return Ret;
+}
 
-    if (arg_types[i] & OMP_TGT_MAPTYPE_TO) {
-      DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n",
-          arg_sizes[i], DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin));
-      int rt = Device.submitData(TgtPtrBegin, HstPtrBegin, MapSize, nullptr);
-      if (rt != OFFLOAD_SUCCESS) {
-        REPORT("Copying data to device failed.\n");
+static int getNonContigMergedDimension(__tgt_target_non_contig *NonContig,
+                                       int32_t DimSize) {
+  int RemovedDim = 0;
+  for (int I = DimSize - 1; I > 0; --I) {
+    if (NonContig[I].Count * NonContig[I].Stride == NonContig[I - 1].Stride)
+      RemovedDim++;
+  }
+  return RemovedDim;
+}
+
+/// Internal function to pass data to/from the target.
+// async_info_ptr is currently unused, added here so targetDataUpdate has the
+// same signature as targetDataBegin and targetDataEnd.
+int targetDataUpdate(DeviceTy &Device, int32_t ArgNum, void **ArgsBase,
+                     void **Args, int64_t *ArgSizes, int64_t *ArgTypes,
+                     map_var_info_t *ArgNames, void **ArgMappers,
+                     __tgt_async_info *AsyncInfoPtr) {
+  // process each input.
+  for (int32_t I = 0; I < ArgNum; ++I) {
+    if ((ArgTypes[I] & OMP_TGT_MAPTYPE_LITERAL) ||
+        (ArgTypes[I] & OMP_TGT_MAPTYPE_PRIVATE))
+      continue;
+
+    if (ArgMappers && ArgMappers[I]) {
+      // Instead of executing the regular path of targetDataUpdate, call the
+      // targetDataMapper variant which will call targetDataUpdate again
+      // with new arguments.
+      DP("Calling targetDataMapper for the %dth argument\n", I);
+
+      int Ret = targetDataMapper(Device, ArgsBase[I], Args[I], ArgSizes[I],
+                                 ArgTypes[I], ArgMappers[I], targetDataUpdate);
+
+      if (Ret != OFFLOAD_SUCCESS) {
+        REPORT("Call to targetDataUpdate via targetDataMapper for custom mapper"
+               " failed.\n");
         return OFFLOAD_FAIL;
       }
 
-      uintptr_t lb = (uintptr_t) HstPtrBegin;
-      uintptr_t ub = (uintptr_t) HstPtrBegin + MapSize;
-      Device.ShadowMtx.lock();
-      for (ShadowPtrListTy::iterator it = Device.ShadowPtrMap.begin();
-          it != Device.ShadowPtrMap.end(); ++it) {
-        void **ShadowHstPtrAddr = (void **)it->first;
-        if ((uintptr_t)ShadowHstPtrAddr < lb)
-          continue;
-        if ((uintptr_t)ShadowHstPtrAddr >= ub)
-          break;
-        DP("Restoring original target pointer value " DPxMOD " for target "
-           "pointer " DPxMOD "\n",
-           DPxPTR(it->second.TgtPtrVal), DPxPTR(it->second.TgtPtrAddr));
-        rt = Device.submitData(it->second.TgtPtrAddr, &it->second.TgtPtrVal,
-                               sizeof(void *), nullptr);
-        if (rt != OFFLOAD_SUCCESS) {
-          REPORT("Copying data to device failed.\n");
-          Device.ShadowMtx.unlock();
-          return OFFLOAD_FAIL;
-        }
-      }
-      Device.ShadowMtx.unlock();
+      // Skip the rest of this function, continue to the next argument.
+      continue;
     }
+
+    int Ret = OFFLOAD_SUCCESS;
+
+    if (ArgTypes[I] & OMP_TGT_MAPTYPE_NON_CONTIG) {
+      __tgt_target_non_contig *NonContig = (__tgt_target_non_contig *)Args[I];
+      int32_t DimSize = ArgSizes[I];
+      uint64_t Size =
+          NonContig[DimSize - 1].Count * NonContig[DimSize - 1].Stride;
+      int32_t MergedDim = getNonContigMergedDimension(NonContig, DimSize);
+      Ret = targetDataNonContiguous(
+          Device, ArgsBase[I], NonContig, Size, ArgTypes[I],
+          /*current_dim=*/0, DimSize - MergedDim, /*offset=*/0);
+    } else {
+      Ret = targetDataContiguous(Device, ArgsBase[I], Args[I], ArgSizes[I],
+                                 ArgTypes[I]);
+    }
+    if (Ret == OFFLOAD_FAIL)
+      return OFFLOAD_FAIL;
   }
   return OFFLOAD_SUCCESS;
 }

diff  --git a/openmp/libomptarget/src/private.h b/openmp/libomptarget/src/private.h
index 251aa0188db9..640226a57c30 100644
--- a/openmp/libomptarget/src/private.h
+++ b/openmp/libomptarget/src/private.h
@@ -28,11 +28,10 @@ extern int targetDataEnd(DeviceTy &Device, int32_t ArgNum, void **ArgBases,
                          map_var_info_t *arg_names, void **ArgMappers,
                          __tgt_async_info *AsyncInfo);
 
-extern int target_data_update(DeviceTy &Device, int32_t arg_num,
-                              void **args_base, void **args, int64_t *arg_sizes,
-                              int64_t *arg_types, map_var_info_t *arg_names,
-                              void **arg_mappers,
-                              __tgt_async_info *async_info_ptr = nullptr);
+extern int targetDataUpdate(DeviceTy &Device, int32_t arg_num, void **args_base,
+                            void **args, int64_t *arg_sizes, int64_t *arg_types,
+                            map_var_info_t *arg_names, void **arg_mappers,
+                            __tgt_async_info *async_info_ptr = nullptr);
 
 extern int target(int64_t DeviceId, void *HostPtr, int32_t ArgNum,
                   void **ArgBases, void **Args, int64_t *ArgSizes,
@@ -68,7 +67,7 @@ struct MapperComponentsTy {
 typedef void (*MapperFuncPtrTy)(void *, void *, void *, int64_t, int64_t);
 
 // Function pointer type for target_data_* functions (targetDataBegin,
-// targetDataEnd and target_data_update).
+// targetDataEnd and targetDataUpdate).
 typedef int (*TargetDataFuncPtrTy)(DeviceTy &, int32_t, void **, void **,
                                    int64_t *, int64_t *, map_var_info_t *,
                                    void **, __tgt_async_info *);

diff  --git a/openmp/libomptarget/test/offloading/non_contiguous_update.cpp b/openmp/libomptarget/test/offloading/non_contiguous_update.cpp
new file mode 100644
index 000000000000..e2f1c4569fe7
--- /dev/null
+++ b/openmp/libomptarget/test/offloading/non_contiguous_update.cpp
@@ -0,0 +1,101 @@
+// RUN: %libomptarget-compile-aarch64-unknown-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-aarch64-unknown-linux-gnu 2>&1 | %fcheck-aarch64-unknown-linux-gnu -allow-empty -check-prefix=DEBUG
+// RUN: %libomptarget-compile-powerpc64-ibm-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-powerpc64-ibm-linux-gnu 2>&1 | %fcheck-powerpc64-ibm-linux-gnu -allow-empty -check-prefix=DEBUG
+// RUN: %libomptarget-compile-powerpc64le-ibm-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-powerpc64le-ibm-linux-gnu 2>&1 | %fcheck-powerpc64le-ibm-linux-gnu -allow-empty -check-prefix=DEBUG
+// RUN: %libomptarget-compile-x86_64-pc-linux-gnu && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-x86_64-pc-linux-gnu 2>&1 | %fcheck-x86_64-pc-linux-gnu -allow-empty -check-prefix=DEBUG
+// RUN: %libomptarget-compile-nvptx64-nvidia-cuda && env LIBOMPTARGET_DEBUG=1 %libomptarget-run-nvptx64-nvidia-cuda 2>&1 | %fcheck-nvptx64-nvidia-cuda -allow-empty -check-prefix=DEBUG
+// REQUIRES: libomptarget-debug
+
+#include <cstdio>
+#include <cstdlib>
+#include <cassert>
+
+// Data structure definitions copied from OpenMP RTL.
+struct __tgt_target_non_contig {
+  int64_t offset;
+  int64_t width;
+  int64_t stride;
+};
+
+enum tgt_map_type {
+  OMP_TGT_MAPTYPE_NON_CONTIG      = 0x100000000000
+};
+
+// OpenMP RTL interfaces
+#ifdef __cplusplus
+extern "C" {
+#endif
+void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
+                              void **args_base, void **args, int64_t *arg_sizes,
+                              int64_t *arg_types);
+#ifdef __cplusplus
+}
+#endif
+
+int main() {
+  // case 1
+  // int arr[3][4][5][6];
+  // #pragma omp target update to(arr[0:2][1:3][1:2][:])
+  // set up descriptor
+  __tgt_target_non_contig non_contig[5] = {
+      {0, 2, 480}, {1, 3, 120}, {1, 2, 24}, {0, 6, 4}, {0, 1, 4}};
+  int64_t size = 4, type = OMP_TGT_MAPTYPE_NON_CONTIG;
+
+  void *base;
+  void *begin = &non_contig;
+  int64_t *sizes = &size;
+  int64_t *types = &type;
+
+  // The below diagram is the visualization of the non-contiguous transfer after
+  // optimization. Note that each element represent the innermost dimension
+  // (unit size = 24) since the stride * count of last dimension is equal to the
+  // stride of second last dimension.
+  //
+  // OOOOO OOOOO OOOOO
+  // OXXOO OXXOO OOOOO
+  // OXXOO OXXOO OOOOO
+  // OXXOO OXXOO OOOOO
+  __tgt_target_data_update(/*device_id*/ -1, /*arg_num*/ 1, &base, &begin,
+                           sizes, types);
+  // DEBUG: offset 144
+  // DEBUG: offset 264
+  // DEBUG: offset 384
+  // DEBUG: offset 624
+  // DEBUG: offset 744
+  // DEBUG: offset 864
+
+
+  // case 2
+  // double darr[3][4][5];
+  // #pragma omp target update to(darr[0:2:2][2:2][:2:2])
+  // set up descriptor
+  __tgt_target_non_contig non_contig_2[4] = {
+      {0, 2, 320}, {2, 2, 40}, {0, 2, 16}, {0, 1, 8}};
+  int64_t size_2 = 4, type_2 = OMP_TGT_MAPTYPE_NON_CONTIG;
+
+  void *base_2;
+  void *begin_2 = &non_contig_2;
+  int64_t *sizes_2 = &size_2;
+  int64_t *types_2 = &type_2;
+
+  // The below diagram is the visualization of the non-contiguous transfer after
+  // optimization. Note that each element represent the innermost dimension
+  // (unit size = 24) since the stride * count of last dimension is equal to the
+  // stride of second last dimension.
+  //
+  // OOOOO OOOOO OOOOO
+  // OOOOO OOOOO OOOOO
+  // XOXOO OOOOO XOXOO
+  // XOXOO OOOOO XOXOO
+  __tgt_target_data_update(/*device_id*/ -1, /*arg_num*/ 1, &base_2, &begin_2,
+                           sizes_2, types_2);
+  // DEBUG: offset 80
+  // DEBUG: offset 96
+  // DEBUG: offset 120
+  // DEBUG: offset 136
+  // DEBUG: offset 400
+  // DEBUG: offset 416
+  // DEBUG: offset 440
+  // DEBUG: offset 456
+  return 0;
+}
+


        


More information about the Openmp-commits mailing list