[Openmp-commits] [openmp] 140ab57 - [OpenMP][Offload] Declare mapper runtime implementation

George Rokos via Openmp-commits openmp-commits at lists.llvm.org
Wed Jul 15 19:11:08 PDT 2020


Author: George Rokos
Date: 2020-07-15T18:11:43-07:00
New Revision: 140ab574a1c81e0878b3238520302509457242d0

URL: https://github.com/llvm/llvm-project/commit/140ab574a1c81e0878b3238520302509457242d0
DIFF: https://github.com/llvm/llvm-project/commit/140ab574a1c81e0878b3238520302509457242d0.diff

LOG: [OpenMP][Offload] Declare mapper runtime implementation

Libomptarget patch adding runtime support for "declare mapper".
Patch co-developed by Lingda Li and George Rokos.

Differential revision: https://reviews.llvm.org/D68100

Added: 
    openmp/libomptarget/test/mapping/declare_mapper_target.cpp
    openmp/libomptarget/test/mapping/declare_mapper_target_data.cpp
    openmp/libomptarget/test/mapping/declare_mapper_target_data_enter_exit.cpp
    openmp/libomptarget/test/mapping/declare_mapper_target_update.cpp

Modified: 
    openmp/libomptarget/include/omptarget.h
    openmp/libomptarget/src/exports
    openmp/libomptarget/src/interface.cpp
    openmp/libomptarget/src/omptarget.cpp
    openmp/libomptarget/src/private.h
    openmp/libomptarget/src/rtl.cpp
    openmp/libomptarget/test/mapping/declare_mapper_api.cpp

Removed: 
    


################################################################################
diff  --git a/openmp/libomptarget/include/omptarget.h b/openmp/libomptarget/include/omptarget.h
index de3afc36c7f2..95d7158969f3 100644
--- a/openmp/libomptarget/include/omptarget.h
+++ b/openmp/libomptarget/include/omptarget.h
@@ -160,6 +160,14 @@ void __tgt_target_data_begin_nowait(int64_t device_id, int32_t arg_num,
                                     int32_t depNum, void *depList,
                                     int32_t noAliasDepNum,
                                     void *noAliasDepList);
+void __tgt_target_data_begin_mapper(int64_t device_id, int32_t arg_num,
+                                    void **args_base, void **args,
+                                    int64_t *arg_sizes, int64_t *arg_types,
+                                    void **arg_mappers);
+void __tgt_target_data_begin_nowait_mapper(
+    int64_t device_id, int32_t arg_num, void **args_base, void **args,
+    int64_t *arg_sizes, int64_t *arg_types, void **arg_mappers, int32_t depNum,
+    void *depList, int32_t noAliasDepNum, void *noAliasDepList);
 
 // passes data from the target, release target memory and destroys the
 // host-target mapping (top entry from the stack of data maps) created by
@@ -171,6 +179,16 @@ void __tgt_target_data_end_nowait(int64_t device_id, int32_t arg_num,
                                   int64_t *arg_sizes, int64_t *arg_types,
                                   int32_t depNum, void *depList,
                                   int32_t noAliasDepNum, void *noAliasDepList);
+void __tgt_target_data_end_mapper(int64_t device_id, int32_t arg_num,
+                                  void **args_base, void **args,
+                                  int64_t *arg_sizes, int64_t *arg_types,
+                                  void **arg_mappers);
+void __tgt_target_data_end_nowait_mapper(int64_t device_id, int32_t arg_num,
+                                         void **args_base, void **args,
+                                         int64_t *arg_sizes, int64_t *arg_types,
+                                         void **arg_mappers, int32_t depNum,
+                                         void *depList, int32_t noAliasDepNum,
+                                         void *noAliasDepList);
 
 /// passes data to/from the target
 void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
@@ -182,6 +200,14 @@ void __tgt_target_data_update_nowait(int64_t device_id, int32_t arg_num,
                                      int32_t depNum, void *depList,
                                      int32_t noAliasDepNum,
                                      void *noAliasDepList);
+void __tgt_target_data_update_mapper(int64_t device_id, int32_t arg_num,
+                                     void **args_base, void **args,
+                                     int64_t *arg_sizes, int64_t *arg_types,
+                                     void **arg_mappers);
+void __tgt_target_data_update_nowait_mapper(
+    int64_t device_id, int32_t arg_num, void **args_base, void **args,
+    int64_t *arg_sizes, int64_t *arg_types, void **arg_mappers, int32_t depNum,
+    void *depList, int32_t noAliasDepNum, void *noAliasDepList);
 
 // Performs the same actions as data_begin in case arg_num is non-zero
 // and initiates run of offloaded region on target platform; if arg_num
@@ -196,6 +222,15 @@ int __tgt_target_nowait(int64_t device_id, void *host_ptr, int32_t arg_num,
                         void **args_base, void **args, int64_t *arg_sizes,
                         int64_t *arg_types, int32_t depNum, void *depList,
                         int32_t noAliasDepNum, void *noAliasDepList);
+int __tgt_target_mapper(int64_t device_id, void *host_ptr, int32_t arg_num,
+                        void **args_base, void **args, int64_t *arg_sizes,
+                        int64_t *arg_types, void **arg_mappers);
+int __tgt_target_nowait_mapper(int64_t device_id, void *host_ptr,
+                               int32_t arg_num, void **args_base, void **args,
+                               int64_t *arg_sizes, int64_t *arg_types,
+                               void **arg_mappers, int32_t depNum,
+                               void *depList, int32_t noAliasDepNum,
+                               void *noAliasDepList);
 
 int __tgt_target_teams(int64_t device_id, void *host_ptr, int32_t arg_num,
                        void **args_base, void **args, int64_t *arg_sizes,
@@ -207,6 +242,17 @@ int __tgt_target_teams_nowait(int64_t device_id, void *host_ptr,
                               int32_t num_teams, int32_t thread_limit,
                               int32_t depNum, void *depList,
                               int32_t noAliasDepNum, void *noAliasDepList);
+int __tgt_target_teams_mapper(int64_t device_id, void *host_ptr,
+                              int32_t arg_num, void **args_base, void **args,
+                              int64_t *arg_sizes, int64_t *arg_types,
+                              void **arg_mappers, int32_t num_teams,
+                              int32_t thread_limit);
+int __tgt_target_teams_nowait_mapper(
+    int64_t device_id, void *host_ptr, int32_t arg_num, void **args_base,
+    void **args, int64_t *arg_sizes, int64_t *arg_types, void **arg_mappers,
+    int32_t num_teams, int32_t thread_limit, int32_t depNum, void *depList,
+    int32_t noAliasDepNum, void *noAliasDepList);
+
 void __kmpc_push_target_tripcount(int64_t device_id, uint64_t loop_tripcount);
 
 #ifdef __cplusplus

diff  --git a/openmp/libomptarget/src/exports b/openmp/libomptarget/src/exports
index e1fee4bbefce..5e09a088533d 100644
--- a/openmp/libomptarget/src/exports
+++ b/openmp/libomptarget/src/exports
@@ -13,6 +13,16 @@ VERS1.0 {
     __tgt_target_data_update_nowait;
     __tgt_target_nowait;
     __tgt_target_teams_nowait;
+    __tgt_target_data_begin_mapper;
+    __tgt_target_data_end_mapper;
+    __tgt_target_data_update_mapper;
+    __tgt_target_mapper;
+    __tgt_target_teams_mapper;
+    __tgt_target_data_begin_nowait_mapper;
+    __tgt_target_data_end_nowait_mapper;
+    __tgt_target_data_update_nowait_mapper;
+    __tgt_target_nowait_mapper;
+    __tgt_target_teams_nowait_mapper;
     __tgt_mapper_num_components;
     __tgt_push_mapper_component;
     omp_get_num_devices;

diff  --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp
index 924bc490b110..751641183437 100644
--- a/openmp/libomptarget/src/interface.cpp
+++ b/openmp/libomptarget/src/interface.cpp
@@ -91,6 +91,24 @@ EXTERN void __tgt_unregister_lib(__tgt_bin_desc *desc) {
 /// and passes the data to the device.
 EXTERN void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
     void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
+  __tgt_target_data_begin_mapper(device_id, arg_num, args_base, args,
+      arg_sizes, arg_types, nullptr);
+}
+
+EXTERN void __tgt_target_data_begin_nowait(int64_t device_id, int32_t arg_num,
+    void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
+    int32_t depNum, void *depList, int32_t noAliasDepNum,
+    void *noAliasDepList) {
+  if (depNum + noAliasDepNum > 0)
+    __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
+
+  __tgt_target_data_begin_mapper(device_id, arg_num, args_base, args,
+      arg_sizes, arg_types, nullptr);
+}
+
+EXTERN void __tgt_target_data_begin_mapper(int64_t device_id, int32_t arg_num,
+    void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
+    void **arg_mappers) {
   if (IsOffloadDisabled()) return;
 
   DP("Entering data begin region for device %" PRId64 " with %d mappings\n",
@@ -119,19 +137,19 @@ EXTERN void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
 #endif
 
   int rc = target_data_begin(Device, arg_num, args_base, args, arg_sizes,
-                             arg_types, nullptr);
+      arg_types, arg_mappers, nullptr);
   HandleTargetOutcome(rc == OFFLOAD_SUCCESS);
 }
 
-EXTERN void __tgt_target_data_begin_nowait(int64_t device_id, int32_t arg_num,
-    void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
-    int32_t depNum, void *depList, int32_t noAliasDepNum,
-    void *noAliasDepList) {
+EXTERN void __tgt_target_data_begin_nowait_mapper(int64_t device_id,
+    int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes,
+    int64_t *arg_types, void **arg_mappers, int32_t depNum, void *depList,
+    int32_t noAliasDepNum, void *noAliasDepList) {
   if (depNum + noAliasDepNum > 0)
     __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
 
-  __tgt_target_data_begin(device_id, arg_num, args_base, args, arg_sizes,
-                          arg_types);
+  __tgt_target_data_begin_mapper(device_id, arg_num, args_base, args,
+      arg_sizes, arg_types, arg_mappers);
 }
 
 /// passes data from the target, releases target memory and destroys
@@ -139,6 +157,24 @@ EXTERN void __tgt_target_data_begin_nowait(int64_t device_id, int32_t arg_num,
 /// created by the last __tgt_target_data_begin.
 EXTERN void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
     void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
+  __tgt_target_data_end_mapper(device_id, arg_num, args_base, args, arg_sizes,
+      arg_types, nullptr);
+}
+
+EXTERN void __tgt_target_data_end_nowait(int64_t device_id, int32_t arg_num,
+    void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
+    int32_t depNum, void *depList, int32_t noAliasDepNum,
+    void *noAliasDepList) {
+  if (depNum + noAliasDepNum > 0)
+    __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
+
+  __tgt_target_data_end_mapper(device_id, arg_num, args_base, args, arg_sizes,
+      arg_types, nullptr);
+}
+
+EXTERN void __tgt_target_data_end_mapper(int64_t device_id, int32_t arg_num,
+    void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
+    void **arg_mappers) {
   if (IsOffloadDisabled()) return;
   DP("Entering data end region with %d mappings\n", arg_num);
 
@@ -172,23 +208,41 @@ EXTERN void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
 #endif
 
   int rc = target_data_end(Device, arg_num, args_base, args, arg_sizes,
-                           arg_types, nullptr);
+      arg_types, arg_mappers, nullptr);
   HandleTargetOutcome(rc == OFFLOAD_SUCCESS);
 }
 
-EXTERN void __tgt_target_data_end_nowait(int64_t device_id, int32_t arg_num,
+EXTERN void __tgt_target_data_end_nowait_mapper(int64_t device_id,
+    int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes,
+    int64_t *arg_types, void **arg_mappers, int32_t depNum, void *depList,
+    int32_t noAliasDepNum, void *noAliasDepList) {
+  if (depNum + noAliasDepNum > 0)
+    __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
+
+  __tgt_target_data_end_mapper(device_id, arg_num, args_base, args, arg_sizes,
+      arg_types, arg_mappers);
+}
+
+EXTERN void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
+    void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
+  __tgt_target_data_update_mapper(device_id, arg_num, args_base, args,
+      arg_sizes, arg_types, nullptr);
+}
+
+EXTERN void __tgt_target_data_update_nowait(int64_t device_id, int32_t arg_num,
     void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
     int32_t depNum, void *depList, int32_t noAliasDepNum,
     void *noAliasDepList) {
   if (depNum + noAliasDepNum > 0)
     __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
 
-  __tgt_target_data_end(device_id, arg_num, args_base, args, arg_sizes,
-                        arg_types);
+  __tgt_target_data_update_mapper(device_id, arg_num, args_base, args,
+      arg_sizes, arg_types, nullptr);
 }
 
-EXTERN void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
-    void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
+EXTERN void __tgt_target_data_update_mapper(int64_t device_id, int32_t arg_num,
+    void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
+    void **arg_mappers) {
   if (IsOffloadDisabled()) return;
   DP("Entering data update with %d mappings\n", arg_num);
 
@@ -205,23 +259,41 @@ EXTERN void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
 
   DeviceTy& Device = Devices[device_id];
   int rc = target_data_update(Device, arg_num, args_base,
-      args, arg_sizes, arg_types);
+      args, arg_sizes, arg_types, arg_mappers);
   HandleTargetOutcome(rc == OFFLOAD_SUCCESS);
 }
 
-EXTERN void __tgt_target_data_update_nowait(
-    int64_t device_id, int32_t arg_num, void **args_base, void **args,
-    int64_t *arg_sizes, int64_t *arg_types, int32_t depNum, void *depList,
+EXTERN void __tgt_target_data_update_nowait_mapper(int64_t device_id,
+    int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes,
+    int64_t *arg_types, void **arg_mappers, int32_t depNum, void *depList,
     int32_t noAliasDepNum, void *noAliasDepList) {
   if (depNum + noAliasDepNum > 0)
     __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
 
-  __tgt_target_data_update(device_id, arg_num, args_base, args, arg_sizes,
-                           arg_types);
+  __tgt_target_data_update_mapper(device_id, arg_num, args_base, args,
+      arg_sizes, arg_types, arg_mappers);
 }
 
 EXTERN int __tgt_target(int64_t device_id, void *host_ptr, int32_t arg_num,
     void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
+  return __tgt_target_mapper(device_id, host_ptr, arg_num, args_base, args,
+      arg_sizes, arg_types, nullptr);
+}
+
+EXTERN int __tgt_target_nowait(int64_t device_id, void *host_ptr,
+    int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes,
+    int64_t *arg_types, int32_t depNum, void *depList, int32_t noAliasDepNum,
+    void *noAliasDepList) {
+  if (depNum + noAliasDepNum > 0)
+    __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
+
+  return __tgt_target_mapper(device_id, host_ptr, arg_num, args_base, args,
+      arg_sizes, arg_types, nullptr);
+}
+
+EXTERN int __tgt_target_mapper(int64_t device_id, void *host_ptr,
+    int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes,
+    int64_t *arg_types, void **arg_mappers) {
   if (IsOffloadDisabled()) return OFFLOAD_FAIL;
   DP("Entering target region with entry point " DPxMOD " and device Id %"
       PRId64 "\n", DPxPTR(host_ptr), device_id);
@@ -245,25 +317,43 @@ EXTERN int __tgt_target(int64_t device_id, void *host_ptr, int32_t arg_num,
 #endif
 
   int rc = target(device_id, host_ptr, arg_num, args_base, args, arg_sizes,
-      arg_types, 0, 0, false /*team*/);
+      arg_types, arg_mappers, 0, 0, false /*team*/);
   HandleTargetOutcome(rc == OFFLOAD_SUCCESS);
   return rc;
 }
 
-EXTERN int __tgt_target_nowait(int64_t device_id, void *host_ptr,
+EXTERN int __tgt_target_nowait_mapper(int64_t device_id, void *host_ptr,
     int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes,
-    int64_t *arg_types, int32_t depNum, void *depList, int32_t noAliasDepNum,
-    void *noAliasDepList) {
+    int64_t *arg_types, void **arg_mappers, int32_t depNum, void *depList,
+    int32_t noAliasDepNum, void *noAliasDepList) {
   if (depNum + noAliasDepNum > 0)
     __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
 
-  return __tgt_target(device_id, host_ptr, arg_num, args_base, args, arg_sizes,
-                      arg_types);
+  return __tgt_target_mapper(device_id, host_ptr, arg_num, args_base, args,
+      arg_sizes, arg_types, arg_mappers);
 }
 
 EXTERN int __tgt_target_teams(int64_t device_id, void *host_ptr,
     int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes,
     int64_t *arg_types, int32_t team_num, int32_t thread_limit) {
+  return __tgt_target_teams_mapper(device_id, host_ptr, arg_num, args_base,
+      args, arg_sizes, arg_types, nullptr, team_num, thread_limit);
+}
+
+EXTERN int __tgt_target_teams_nowait(int64_t device_id, void *host_ptr,
+    int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes,
+    int64_t *arg_types, int32_t team_num, int32_t thread_limit, int32_t depNum,
+    void *depList, int32_t noAliasDepNum, void *noAliasDepList) {
+  if (depNum + noAliasDepNum > 0)
+    __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
+
+  return __tgt_target_teams_mapper(device_id, host_ptr, arg_num, args_base,
+      args, arg_sizes, arg_types, nullptr, team_num, thread_limit);
+}
+
+EXTERN int __tgt_target_teams_mapper(int64_t device_id, void *host_ptr,
+    int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes,
+    int64_t *arg_types, void **arg_mappers, int32_t team_num, int32_t thread_limit) {
   if (IsOffloadDisabled()) return OFFLOAD_FAIL;
   DP("Entering target region with entry point " DPxMOD " and device Id %"
       PRId64 "\n", DPxPTR(host_ptr), device_id);
@@ -287,21 +377,22 @@ EXTERN int __tgt_target_teams(int64_t device_id, void *host_ptr,
 #endif
 
   int rc = target(device_id, host_ptr, arg_num, args_base, args, arg_sizes,
-      arg_types, team_num, thread_limit, true /*team*/);
+      arg_types, arg_mappers, team_num, thread_limit, true /*team*/);
   HandleTargetOutcome(rc == OFFLOAD_SUCCESS);
 
   return rc;
 }
 
-EXTERN int __tgt_target_teams_nowait(int64_t device_id, void *host_ptr,
+EXTERN int __tgt_target_teams_nowait_mapper(int64_t device_id, void *host_ptr,
     int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes,
-    int64_t *arg_types, int32_t team_num, int32_t thread_limit, int32_t depNum,
-    void *depList, int32_t noAliasDepNum, void *noAliasDepList) {
+    int64_t *arg_types, void **arg_mappers, int32_t team_num,
+    int32_t thread_limit, int32_t depNum, void *depList, int32_t noAliasDepNum,
+    void *noAliasDepList) {
   if (depNum + noAliasDepNum > 0)
     __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
 
-  return __tgt_target_teams(device_id, host_ptr, arg_num, args_base, args,
-                            arg_sizes, arg_types, team_num, thread_limit);
+  return __tgt_target_teams_mapper(device_id, host_ptr, arg_num, args_base,
+      args, arg_sizes, arg_types, arg_mappers, team_num, thread_limit);
 }
 
 // Get the current number of components for a user-defined mapper.

diff  --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp
index cce9dbd2fe15..6b4549be6ae1 100644
--- a/openmp/libomptarget/src/omptarget.cpp
+++ b/openmp/libomptarget/src/omptarget.cpp
@@ -166,8 +166,8 @@ static int InitLibrary(DeviceTy& Device) {
         DP("Has pending ctors... call now\n");
         for (auto &entry : lib.second.PendingCtors) {
           void *ctor = entry;
-          int rc = target(device_id, ctor, 0, NULL, NULL, NULL,
-                          NULL, 1, 1, true /*team*/);
+          int rc = target(device_id, ctor, 0, NULL, NULL, NULL, NULL, NULL, 1,
+              1, true /*team*/);
           if (rc != OFFLOAD_SUCCESS) {
             DP("Running ctor " DPxMOD " failed.\n", DPxPTR(ctor));
             Device.PendingGlobalsMtx.unlock();
@@ -214,10 +214,46 @@ static int32_t member_of(int64_t type) {
   return ((type & OMP_TGT_MAPTYPE_MEMBER_OF) >> 48) - 1;
 }
 
+/// Call the user-defined mapper function followed by the appropriate
+// target_data_* function (target_data_{begin,end,update}).
+int target_data_mapper(DeviceTy &Device, void *arg_base,
+    void *arg, int64_t arg_size, int64_t arg_type, void *arg_mapper,
+    TargetDataFuncPtrTy target_data_function) {
+  DP("Calling the mapper function " DPxMOD "\n", DPxPTR(arg_mapper));
+
+  // The mapper function fills up Components.
+  MapperComponentsTy MapperComponents;
+  MapperFuncPtrTy MapperFuncPtr = (MapperFuncPtrTy)(arg_mapper);
+  (*MapperFuncPtr)((void *)&MapperComponents, arg_base, arg, arg_size,
+      arg_type);
+
+  // Construct new arrays for args_base, args, arg_sizes and arg_types
+  // using the information in MapperComponents and call the corresponding
+  // target_data_* function using these new arrays.
+  std::vector<void *> mapper_args_base;
+  std::vector<void *> mapper_args;
+  std::vector<int64_t> mapper_arg_sizes;
+  std::vector<int64_t> mapper_arg_types;
+
+  for (auto& C : MapperComponents.Components) {
+    mapper_args_base.push_back(C.Base);
+    mapper_args.push_back(C.Begin);
+    mapper_arg_sizes.push_back(C.Size);
+    mapper_arg_types.push_back(C.Type);
+  }
+
+  int rc = target_data_function(Device, MapperComponents.Components.size(),
+      mapper_args_base.data(), mapper_args.data(), mapper_arg_sizes.data(),
+      mapper_arg_types.data(), /*arg_mappers*/ nullptr,
+      /*__tgt_async_info*/ nullptr);
+
+  return rc;
+}
+
 /// Internal function to do the mapping and transfer the data to the device
 int target_data_begin(DeviceTy &Device, int32_t arg_num, void **args_base,
                       void **args, int64_t *arg_sizes, int64_t *arg_types,
-                      __tgt_async_info *async_info_ptr) {
+                      void **arg_mappers, __tgt_async_info *async_info_ptr) {
   // process each input.
   for (int32_t i = 0; i < arg_num; ++i) {
     // Ignore private variables and arrays - there is no mapping for them.
@@ -225,6 +261,25 @@ int target_data_begin(DeviceTy &Device, int32_t arg_num, void **args_base,
         (arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE))
       continue;
 
+    if (arg_mappers && arg_mappers[i]) {
+      // Instead of executing the regular path of target_data_begin, call the
+      // target_data_mapper variant which will call target_data_begin again
+      // with new arguments.
+      DP("Calling target_data_mapper for the %dth argument\n", i);
+
+      int rc = target_data_mapper(Device, args_base[i], args[i], arg_sizes[i],
+          arg_types[i], arg_mappers[i], target_data_begin);
+
+      if (rc != OFFLOAD_SUCCESS) {
+        DP("Call to target_data_begin via target_data_mapper for custom mapper"
+            " failed.\n");
+        return OFFLOAD_FAIL;
+      }
+
+      // Skip the rest of this function, continue to the next argument.
+      continue;
+    }
+
     void *HstPtrBegin = args[i];
     void *HstPtrBase = args_base[i];
     int64_t data_size = arg_sizes[i];
@@ -353,7 +408,7 @@ int target_data_begin(DeviceTy &Device, int32_t arg_num, void **args_base,
 /// Internal function to undo the mapping and retrieve the data from the device.
 int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base,
                     void **args, int64_t *arg_sizes, int64_t *arg_types,
-                    __tgt_async_info *async_info_ptr) {
+                    void **arg_mappers, __tgt_async_info *async_info_ptr) {
   // process each input.
   for (int32_t i = arg_num - 1; i >= 0; --i) {
     // Ignore private variables and arrays - there is no mapping for them.
@@ -362,6 +417,25 @@ int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base,
         (arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE))
       continue;
 
+    if (arg_mappers && arg_mappers[i]) {
+      // Instead of executing the regular path of target_data_end, call the
+      // target_data_mapper variant which will call target_data_end again
+      // with new arguments.
+      DP("Calling target_data_mapper for the %dth argument\n", i);
+
+      int rc = target_data_mapper(Device, args_base[i], args[i], arg_sizes[i],
+          arg_types[i], arg_mappers[i], target_data_end);
+
+      if (rc != OFFLOAD_SUCCESS) {
+        DP("Call to target_data_end via target_data_mapper for custom mapper"
+            " failed.\n");
+        return OFFLOAD_FAIL;
+      }
+
+      // Skip the rest of this function, continue to the next argument.
+      continue;
+    }
+
     void *HstPtrBegin = args[i];
     int64_t data_size = arg_sizes[i];
     // Adjust for proper alignment if this is a combined entry (for structs).
@@ -486,14 +560,36 @@ int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base,
 }
 
 /// Internal function to pass data to/from the target.
+// async_info_ptr is currently unused, added here so target_data_update has the
+// same signature as target_data_begin and target_data_end.
 int target_data_update(DeviceTy &Device, int32_t arg_num,
-    void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
+    void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
+    void **arg_mappers, __tgt_async_info *async_info_ptr) {
   // process each input.
   for (int32_t i = 0; i < arg_num; ++i) {
     if ((arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) ||
         (arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE))
       continue;
 
+    if (arg_mappers && arg_mappers[i]) {
+      // Instead of executing the regular path of target_data_update, call the
+      // target_data_mapper variant which will call target_data_update again
+      // with new arguments.
+      DP("Calling target_data_mapper for the %dth argument\n", i);
+
+      int rc = target_data_mapper(Device, args_base[i], args[i], arg_sizes[i],
+          arg_types[i], arg_mappers[i], target_data_update);
+
+      if (rc != OFFLOAD_SUCCESS) {
+        DP("Call to target_data_update via target_data_mapper for custom mapper"
+            " failed.\n");
+        return OFFLOAD_FAIL;
+      }
+
+      // Skip the rest of this function, continue to the next argument.
+      continue;
+    }
+
     void *HstPtrBegin = args[i];
     int64_t MapSize = arg_sizes[i];
     bool IsLast, IsHostPtr;
@@ -589,7 +685,8 @@ static bool isLambdaMapping(int64_t Mapping) {
 /// integer 
diff erent from zero otherwise.
 int target(int64_t device_id, void *host_ptr, int32_t arg_num,
     void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
-    int32_t team_num, int32_t thread_limit, int IsTeamConstruct) {
+    void **arg_mappers, int32_t team_num, int32_t thread_limit,
+    int IsTeamConstruct) {
   DeviceTy &Device = Devices[device_id];
 
   // Find the table information in the map or look it up in the translation
@@ -647,7 +744,7 @@ int target(int64_t device_id, void *host_ptr, int32_t arg_num,
 
   // Move data to device.
   int rc = target_data_begin(Device, arg_num, args_base, args, arg_sizes,
-                             arg_types, &AsyncInfo);
+                             arg_types, arg_mappers, &AsyncInfo);
   if (rc != OFFLOAD_SUCCESS) {
     DP("Call to target_data_begin failed, abort target.\n");
     return OFFLOAD_FAIL;
@@ -811,7 +908,7 @@ int target(int64_t device_id, void *host_ptr, int32_t arg_num,
 
   // Move data from device.
   int rt = target_data_end(Device, arg_num, args_base, args, arg_sizes,
-                           arg_types, &AsyncInfo);
+                           arg_types, arg_mappers, &AsyncInfo);
   if (rt != OFFLOAD_SUCCESS) {
     DP("Call to target_data_end failed, abort targe.\n");
     return OFFLOAD_FAIL;

diff  --git a/openmp/libomptarget/src/private.h b/openmp/libomptarget/src/private.h
index dbc5bafbab5b..cb20d8cdc790 100644
--- a/openmp/libomptarget/src/private.h
+++ b/openmp/libomptarget/src/private.h
@@ -19,19 +19,24 @@
 
 extern int target_data_begin(DeviceTy &Device, int32_t arg_num,
                              void **args_base, void **args, int64_t *arg_sizes,
-                             int64_t *arg_types,
+                             int64_t *arg_types, void **arg_mappers,
                              __tgt_async_info *async_info_ptr);
 
 extern int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base,
                            void **args, int64_t *arg_sizes, int64_t *arg_types,
+                           void **arg_mappers,
                            __tgt_async_info *async_info_ptr);
 
 extern int target_data_update(DeviceTy &Device, int32_t arg_num,
-    void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
+                              void **args_base, void **args,
+                              int64_t *arg_sizes, int64_t *arg_types,
+                              void **arg_mappers,
+                              __tgt_async_info *async_info_ptr = nullptr);
 
 extern int target(int64_t device_id, void *host_ptr, int32_t arg_num,
-    void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
-    int32_t team_num, int32_t thread_limit, int IsTeamConstruct);
+                  void **args_base, void **args, int64_t *arg_sizes,
+                  int64_t *arg_types, void **arg_mappers, int32_t team_num,
+                  int32_t thread_limit, int IsTeamConstruct);
 
 extern int CheckDeviceAndCtors(int64_t device_id);
 
@@ -60,8 +65,20 @@ struct MapComponentInfoTy {
 // implementation here.
 struct MapperComponentsTy {
   std::vector<MapComponentInfoTy> Components;
+  int32_t size() { return Components.size(); }
 };
 
+// The mapper function pointer type. It follows the signature below:
+// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
+//                                           void *base, void *begin,
+//                                           size_t size, int64_t type);
+typedef void (*MapperFuncPtrTy)(void *, void *, void *, int64_t, int64_t);
+
+// Function pointer type for target_data_* functions (target_data_begin,
+// target_data_end and target_data_update).
+typedef int (*TargetDataFuncPtrTy)(DeviceTy &, int32_t, void **, void **,
+    int64_t *, int64_t *, void **, __tgt_async_info *);
+
 ////////////////////////////////////////////////////////////////////////////////
 // implementation for fatal messages
 ////////////////////////////////////////////////////////////////////////////////

diff  --git a/openmp/libomptarget/src/rtl.cpp b/openmp/libomptarget/src/rtl.cpp
index 7c344ca6ee66..4bab4c6da063 100644
--- a/openmp/libomptarget/src/rtl.cpp
+++ b/openmp/libomptarget/src/rtl.cpp
@@ -387,8 +387,8 @@ void RTLsTy::UnregisterLib(__tgt_bin_desc *desc) {
         Device.PendingGlobalsMtx.lock();
         if (Device.PendingCtorsDtors[desc].PendingCtors.empty()) {
           for (auto &dtor : Device.PendingCtorsDtors[desc].PendingDtors) {
-            int rc = target(Device.DeviceID, dtor, 0, NULL, NULL, NULL, NULL, 1,
-                1, true /*team*/);
+            int rc = target(Device.DeviceID, dtor, 0, NULL, NULL, NULL, NULL,
+                NULL, 1, 1, true /*team*/);
             if (rc != OFFLOAD_SUCCESS) {
               DP("Running destructor " DPxMOD " failed.\n", DPxPTR(dtor));
             }

diff  --git a/openmp/libomptarget/test/mapping/declare_mapper_api.cpp b/openmp/libomptarget/test/mapping/declare_mapper_api.cpp
index 275b6c3c5702..45bc076d4110 100644
--- a/openmp/libomptarget/test/mapping/declare_mapper_api.cpp
+++ b/openmp/libomptarget/test/mapping/declare_mapper_api.cpp
@@ -6,6 +6,7 @@
 #include <cstdio>
 #include <cstdlib>
 #include <vector>
+#include <cinttypes>
 
 // Data structure definitions copied from OpenMP RTL.
 struct MapComponentInfoTy {
@@ -42,6 +43,6 @@ int main(int argc, char *argv[]) {
   __tgt_push_mapper_component((void *)&MC, base, begin, size, type);
   int64_t num = __tgt_mapper_num_components((void *)&MC);
   // CHECK: num=2
-  printf("num=%lld\n", num);
+  printf("num=%" PRId64 "\n", num);
   return 0;
 }

diff  --git a/openmp/libomptarget/test/mapping/declare_mapper_target.cpp b/openmp/libomptarget/test/mapping/declare_mapper_target.cpp
new file mode 100644
index 000000000000..624628528339
--- /dev/null
+++ b/openmp/libomptarget/test/mapping/declare_mapper_target.cpp
@@ -0,0 +1,37 @@
+// RUN: %libomptarget-compile-run-and-check-aarch64-unknown-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-nvptx64-nvidia-cuda
+// RUN: %libomptarget-compile-run-and-check-powerpc64-ibm-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-powerpc64le-ibm-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-x86_64-pc-linux-gnu
+
+#include <cstdio>
+#include <cstdlib>
+
+#define NUM 1024
+
+class C {
+public:
+  int *a;
+};
+
+#pragma omp declare mapper(id: C s) map(s.a[0:NUM])
+
+int main() {
+  C c;
+  c.a = (int*) malloc(sizeof(int)*NUM);
+  for (int i = 0; i < NUM; i++) {
+    c.a[i] = 1;
+  }
+  #pragma omp target teams distribute parallel for map(mapper(id),tofrom: c)
+  for (int i = 0; i < NUM; i++) {
+    ++c.a[i];
+  }
+  int sum = 0;
+  for (int i = 0; i < NUM; i++) {
+    sum += c.a[i];
+  }
+  // CHECK: Sum = 2048
+  printf("Sum = %d\n", sum);
+  return 0;
+}
+

diff  --git a/openmp/libomptarget/test/mapping/declare_mapper_target_data.cpp b/openmp/libomptarget/test/mapping/declare_mapper_target_data.cpp
new file mode 100644
index 000000000000..b457048a2e58
--- /dev/null
+++ b/openmp/libomptarget/test/mapping/declare_mapper_target_data.cpp
@@ -0,0 +1,40 @@
+// RUN: %libomptarget-compile-run-and-check-aarch64-unknown-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-nvptx64-nvidia-cuda
+// RUN: %libomptarget-compile-run-and-check-powerpc64-ibm-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-powerpc64le-ibm-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-x86_64-pc-linux-gnu
+
+#include <cstdio>
+#include <cstdlib>
+
+#define NUM 1024
+
+class C {
+public:
+  int *a;
+};
+
+#pragma omp declare mapper(id: C s) map(s.a[0:NUM])
+
+int main() {
+  C c;
+  c.a = (int*) malloc(sizeof(int)*NUM);
+  for (int i = 0; i < NUM; i++) {
+    c.a[i] = 1;
+  }
+  #pragma omp target data map(mapper(id),tofrom: c)
+  {
+  #pragma omp target teams distribute parallel for
+  for (int i = 0; i < NUM; i++) {
+    ++c.a[i];
+  }
+  }
+  int sum = 0;
+  for (int i = 0; i < NUM; i++) {
+    sum += c.a[i];
+  }
+  // CHECK: Sum = 2048
+  printf("Sum = %d\n", sum);
+  return 0;
+}
+

diff  --git a/openmp/libomptarget/test/mapping/declare_mapper_target_data_enter_exit.cpp b/openmp/libomptarget/test/mapping/declare_mapper_target_data_enter_exit.cpp
new file mode 100644
index 000000000000..ac915a08ea8e
--- /dev/null
+++ b/openmp/libomptarget/test/mapping/declare_mapper_target_data_enter_exit.cpp
@@ -0,0 +1,39 @@
+// RUN: %libomptarget-compile-run-and-check-aarch64-unknown-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-nvptx64-nvidia-cuda
+// RUN: %libomptarget-compile-run-and-check-powerpc64-ibm-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-powerpc64le-ibm-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-x86_64-pc-linux-gnu
+
+#include <cstdio>
+#include <cstdlib>
+
+#define NUM 1024
+
+class C {
+public:
+  int *a;
+};
+
+#pragma omp declare mapper(id: C s) map(s.a[0:NUM])
+
+int main() {
+  C c;
+  c.a = (int*) malloc(sizeof(int)*NUM);
+  for (int i = 0; i < NUM; i++) {
+    c.a[i] = 1;
+  }
+  #pragma omp target enter data map(mapper(id),to: c)
+  #pragma omp target teams distribute parallel for
+  for (int i = 0; i < NUM; i++) {
+    ++c.a[i];
+  }
+  #pragma omp target exit data map(mapper(id),from: c)
+  int sum = 0;
+  for (int i = 0; i < NUM; i++) {
+    sum += c.a[i];
+  }
+  // CHECK: Sum = 2048
+  printf("Sum = %d\n", sum);
+  return 0;
+}
+

diff  --git a/openmp/libomptarget/test/mapping/declare_mapper_target_update.cpp b/openmp/libomptarget/test/mapping/declare_mapper_target_update.cpp
new file mode 100644
index 000000000000..689275962f2c
--- /dev/null
+++ b/openmp/libomptarget/test/mapping/declare_mapper_target_update.cpp
@@ -0,0 +1,61 @@
+// RUN: %libomptarget-compile-run-and-check-aarch64-unknown-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-nvptx64-nvidia-cuda
+// RUN: %libomptarget-compile-run-and-check-powerpc64-ibm-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-powerpc64le-ibm-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-x86_64-pc-linux-gnu
+
+#include <cstdio>
+#include <cstdlib>
+
+#define NUM 1024
+
+class C {
+public:
+  int *a;
+};
+
+#pragma omp declare mapper(id: C s) map(s.a[0:NUM])
+
+int main() {
+  C c;
+  int sum = 0;
+  c.a = (int*) malloc(sizeof(int)*NUM);
+  for (int i = 0; i < NUM; i++) {
+    c.a[i] = 1;
+  }
+  #pragma omp target enter data map(mapper(id),alloc: c)
+  #pragma omp target teams distribute parallel for
+  for (int i = 0; i < NUM; i++) {
+    c.a[i] = 0;
+  }
+  #pragma omp target update from(mapper(id): c)
+  for (int i = 0; i < NUM; i++) {
+    sum += c.a[i];
+  }
+  // CHECK: Sum (after first update from) = 0
+  printf("Sum (after first update from) = %d\n", sum);
+  for (int i = 0; i < NUM; i++) {
+    c.a[i] = 1;
+  }
+  #pragma omp target update to(mapper(id): c)
+  #pragma omp target teams distribute parallel for
+  for (int i = 0; i < NUM; i++) {
+    ++c.a[i];
+  }
+  sum = 0;
+  for (int i = 0; i < NUM; i++) {
+    sum += c.a[i];
+  }
+  // CHECK: Sum (after update to) = 1024
+  printf("Sum (after update to) = %d\n", sum);
+  #pragma omp target update from(mapper(id): c)
+  sum = 0;
+  for (int i = 0; i < NUM; i++) {
+    sum += c.a[i];
+  }
+  // CHECK: Sum (after second update from) = 2048
+  printf("Sum (after second update from) = %d\n", sum);
+  #pragma omp target exit data map(mapper(id),delete: c)
+  return 0;
+}
+


        


More information about the Openmp-commits mailing list