[Openmp-commits] [openmp] 140ab57 - [OpenMP][Offload] Declare mapper runtime implementation
George Rokos via Openmp-commits
openmp-commits at lists.llvm.org
Wed Jul 15 19:11:08 PDT 2020
Author: George Rokos
Date: 2020-07-15T18:11:43-07:00
New Revision: 140ab574a1c81e0878b3238520302509457242d0
URL: https://github.com/llvm/llvm-project/commit/140ab574a1c81e0878b3238520302509457242d0
DIFF: https://github.com/llvm/llvm-project/commit/140ab574a1c81e0878b3238520302509457242d0.diff
LOG: [OpenMP][Offload] Declare mapper runtime implementation
Libomptarget patch adding runtime support for "declare mapper".
Patch co-developed by Lingda Li and George Rokos.
Differential revision: https://reviews.llvm.org/D68100
Added:
openmp/libomptarget/test/mapping/declare_mapper_target.cpp
openmp/libomptarget/test/mapping/declare_mapper_target_data.cpp
openmp/libomptarget/test/mapping/declare_mapper_target_data_enter_exit.cpp
openmp/libomptarget/test/mapping/declare_mapper_target_update.cpp
Modified:
openmp/libomptarget/include/omptarget.h
openmp/libomptarget/src/exports
openmp/libomptarget/src/interface.cpp
openmp/libomptarget/src/omptarget.cpp
openmp/libomptarget/src/private.h
openmp/libomptarget/src/rtl.cpp
openmp/libomptarget/test/mapping/declare_mapper_api.cpp
Removed:
################################################################################
diff --git a/openmp/libomptarget/include/omptarget.h b/openmp/libomptarget/include/omptarget.h
index de3afc36c7f2..95d7158969f3 100644
--- a/openmp/libomptarget/include/omptarget.h
+++ b/openmp/libomptarget/include/omptarget.h
@@ -160,6 +160,14 @@ void __tgt_target_data_begin_nowait(int64_t device_id, int32_t arg_num,
int32_t depNum, void *depList,
int32_t noAliasDepNum,
void *noAliasDepList);
+void __tgt_target_data_begin_mapper(int64_t device_id, int32_t arg_num,
+ void **args_base, void **args,
+ int64_t *arg_sizes, int64_t *arg_types,
+ void **arg_mappers);
+void __tgt_target_data_begin_nowait_mapper(
+ int64_t device_id, int32_t arg_num, void **args_base, void **args,
+ int64_t *arg_sizes, int64_t *arg_types, void **arg_mappers, int32_t depNum,
+ void *depList, int32_t noAliasDepNum, void *noAliasDepList);
// passes data from the target, release target memory and destroys the
// host-target mapping (top entry from the stack of data maps) created by
@@ -171,6 +179,16 @@ void __tgt_target_data_end_nowait(int64_t device_id, int32_t arg_num,
int64_t *arg_sizes, int64_t *arg_types,
int32_t depNum, void *depList,
int32_t noAliasDepNum, void *noAliasDepList);
+void __tgt_target_data_end_mapper(int64_t device_id, int32_t arg_num,
+ void **args_base, void **args,
+ int64_t *arg_sizes, int64_t *arg_types,
+ void **arg_mappers);
+void __tgt_target_data_end_nowait_mapper(int64_t device_id, int32_t arg_num,
+ void **args_base, void **args,
+ int64_t *arg_sizes, int64_t *arg_types,
+ void **arg_mappers, int32_t depNum,
+ void *depList, int32_t noAliasDepNum,
+ void *noAliasDepList);
/// passes data to/from the target
void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
@@ -182,6 +200,14 @@ void __tgt_target_data_update_nowait(int64_t device_id, int32_t arg_num,
int32_t depNum, void *depList,
int32_t noAliasDepNum,
void *noAliasDepList);
+void __tgt_target_data_update_mapper(int64_t device_id, int32_t arg_num,
+ void **args_base, void **args,
+ int64_t *arg_sizes, int64_t *arg_types,
+ void **arg_mappers);
+void __tgt_target_data_update_nowait_mapper(
+ int64_t device_id, int32_t arg_num, void **args_base, void **args,
+ int64_t *arg_sizes, int64_t *arg_types, void **arg_mappers, int32_t depNum,
+ void *depList, int32_t noAliasDepNum, void *noAliasDepList);
// Performs the same actions as data_begin in case arg_num is non-zero
// and initiates run of offloaded region on target platform; if arg_num
@@ -196,6 +222,15 @@ int __tgt_target_nowait(int64_t device_id, void *host_ptr, int32_t arg_num,
void **args_base, void **args, int64_t *arg_sizes,
int64_t *arg_types, int32_t depNum, void *depList,
int32_t noAliasDepNum, void *noAliasDepList);
+int __tgt_target_mapper(int64_t device_id, void *host_ptr, int32_t arg_num,
+ void **args_base, void **args, int64_t *arg_sizes,
+ int64_t *arg_types, void **arg_mappers);
+int __tgt_target_nowait_mapper(int64_t device_id, void *host_ptr,
+ int32_t arg_num, void **args_base, void **args,
+ int64_t *arg_sizes, int64_t *arg_types,
+ void **arg_mappers, int32_t depNum,
+ void *depList, int32_t noAliasDepNum,
+ void *noAliasDepList);
int __tgt_target_teams(int64_t device_id, void *host_ptr, int32_t arg_num,
void **args_base, void **args, int64_t *arg_sizes,
@@ -207,6 +242,17 @@ int __tgt_target_teams_nowait(int64_t device_id, void *host_ptr,
int32_t num_teams, int32_t thread_limit,
int32_t depNum, void *depList,
int32_t noAliasDepNum, void *noAliasDepList);
+int __tgt_target_teams_mapper(int64_t device_id, void *host_ptr,
+ int32_t arg_num, void **args_base, void **args,
+ int64_t *arg_sizes, int64_t *arg_types,
+ void **arg_mappers, int32_t num_teams,
+ int32_t thread_limit);
+int __tgt_target_teams_nowait_mapper(
+ int64_t device_id, void *host_ptr, int32_t arg_num, void **args_base,
+ void **args, int64_t *arg_sizes, int64_t *arg_types, void **arg_mappers,
+ int32_t num_teams, int32_t thread_limit, int32_t depNum, void *depList,
+ int32_t noAliasDepNum, void *noAliasDepList);
+
void __kmpc_push_target_tripcount(int64_t device_id, uint64_t loop_tripcount);
#ifdef __cplusplus
diff --git a/openmp/libomptarget/src/exports b/openmp/libomptarget/src/exports
index e1fee4bbefce..5e09a088533d 100644
--- a/openmp/libomptarget/src/exports
+++ b/openmp/libomptarget/src/exports
@@ -13,6 +13,16 @@ VERS1.0 {
__tgt_target_data_update_nowait;
__tgt_target_nowait;
__tgt_target_teams_nowait;
+ __tgt_target_data_begin_mapper;
+ __tgt_target_data_end_mapper;
+ __tgt_target_data_update_mapper;
+ __tgt_target_mapper;
+ __tgt_target_teams_mapper;
+ __tgt_target_data_begin_nowait_mapper;
+ __tgt_target_data_end_nowait_mapper;
+ __tgt_target_data_update_nowait_mapper;
+ __tgt_target_nowait_mapper;
+ __tgt_target_teams_nowait_mapper;
__tgt_mapper_num_components;
__tgt_push_mapper_component;
omp_get_num_devices;
diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp
index 924bc490b110..751641183437 100644
--- a/openmp/libomptarget/src/interface.cpp
+++ b/openmp/libomptarget/src/interface.cpp
@@ -91,6 +91,24 @@ EXTERN void __tgt_unregister_lib(__tgt_bin_desc *desc) {
/// and passes the data to the device.
EXTERN void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
+ __tgt_target_data_begin_mapper(device_id, arg_num, args_base, args,
+ arg_sizes, arg_types, nullptr);
+}
+
+EXTERN void __tgt_target_data_begin_nowait(int64_t device_id, int32_t arg_num,
+ void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
+ int32_t depNum, void *depList, int32_t noAliasDepNum,
+ void *noAliasDepList) {
+ if (depNum + noAliasDepNum > 0)
+ __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
+
+ __tgt_target_data_begin_mapper(device_id, arg_num, args_base, args,
+ arg_sizes, arg_types, nullptr);
+}
+
+EXTERN void __tgt_target_data_begin_mapper(int64_t device_id, int32_t arg_num,
+ void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
+ void **arg_mappers) {
if (IsOffloadDisabled()) return;
DP("Entering data begin region for device %" PRId64 " with %d mappings\n",
@@ -119,19 +137,19 @@ EXTERN void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
#endif
int rc = target_data_begin(Device, arg_num, args_base, args, arg_sizes,
- arg_types, nullptr);
+ arg_types, arg_mappers, nullptr);
HandleTargetOutcome(rc == OFFLOAD_SUCCESS);
}
-EXTERN void __tgt_target_data_begin_nowait(int64_t device_id, int32_t arg_num,
- void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
- int32_t depNum, void *depList, int32_t noAliasDepNum,
- void *noAliasDepList) {
+EXTERN void __tgt_target_data_begin_nowait_mapper(int64_t device_id,
+ int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes,
+ int64_t *arg_types, void **arg_mappers, int32_t depNum, void *depList,
+ int32_t noAliasDepNum, void *noAliasDepList) {
if (depNum + noAliasDepNum > 0)
__kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
- __tgt_target_data_begin(device_id, arg_num, args_base, args, arg_sizes,
- arg_types);
+ __tgt_target_data_begin_mapper(device_id, arg_num, args_base, args,
+ arg_sizes, arg_types, arg_mappers);
}
/// passes data from the target, releases target memory and destroys
@@ -139,6 +157,24 @@ EXTERN void __tgt_target_data_begin_nowait(int64_t device_id, int32_t arg_num,
/// created by the last __tgt_target_data_begin.
EXTERN void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
+ __tgt_target_data_end_mapper(device_id, arg_num, args_base, args, arg_sizes,
+ arg_types, nullptr);
+}
+
+EXTERN void __tgt_target_data_end_nowait(int64_t device_id, int32_t arg_num,
+ void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
+ int32_t depNum, void *depList, int32_t noAliasDepNum,
+ void *noAliasDepList) {
+ if (depNum + noAliasDepNum > 0)
+ __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
+
+ __tgt_target_data_end_mapper(device_id, arg_num, args_base, args, arg_sizes,
+ arg_types, nullptr);
+}
+
+EXTERN void __tgt_target_data_end_mapper(int64_t device_id, int32_t arg_num,
+ void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
+ void **arg_mappers) {
if (IsOffloadDisabled()) return;
DP("Entering data end region with %d mappings\n", arg_num);
@@ -172,23 +208,41 @@ EXTERN void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
#endif
int rc = target_data_end(Device, arg_num, args_base, args, arg_sizes,
- arg_types, nullptr);
+ arg_types, arg_mappers, nullptr);
HandleTargetOutcome(rc == OFFLOAD_SUCCESS);
}
-EXTERN void __tgt_target_data_end_nowait(int64_t device_id, int32_t arg_num,
+EXTERN void __tgt_target_data_end_nowait_mapper(int64_t device_id,
+ int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes,
+ int64_t *arg_types, void **arg_mappers, int32_t depNum, void *depList,
+ int32_t noAliasDepNum, void *noAliasDepList) {
+ if (depNum + noAliasDepNum > 0)
+ __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
+
+ __tgt_target_data_end_mapper(device_id, arg_num, args_base, args, arg_sizes,
+ arg_types, arg_mappers);
+}
+
+EXTERN void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
+ void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
+ __tgt_target_data_update_mapper(device_id, arg_num, args_base, args,
+ arg_sizes, arg_types, nullptr);
+}
+
+EXTERN void __tgt_target_data_update_nowait(int64_t device_id, int32_t arg_num,
void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
int32_t depNum, void *depList, int32_t noAliasDepNum,
void *noAliasDepList) {
if (depNum + noAliasDepNum > 0)
__kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
- __tgt_target_data_end(device_id, arg_num, args_base, args, arg_sizes,
- arg_types);
+ __tgt_target_data_update_mapper(device_id, arg_num, args_base, args,
+ arg_sizes, arg_types, nullptr);
}
-EXTERN void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
- void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
+EXTERN void __tgt_target_data_update_mapper(int64_t device_id, int32_t arg_num,
+ void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
+ void **arg_mappers) {
if (IsOffloadDisabled()) return;
DP("Entering data update with %d mappings\n", arg_num);
@@ -205,23 +259,41 @@ EXTERN void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
DeviceTy& Device = Devices[device_id];
int rc = target_data_update(Device, arg_num, args_base,
- args, arg_sizes, arg_types);
+ args, arg_sizes, arg_types, arg_mappers);
HandleTargetOutcome(rc == OFFLOAD_SUCCESS);
}
-EXTERN void __tgt_target_data_update_nowait(
- int64_t device_id, int32_t arg_num, void **args_base, void **args,
- int64_t *arg_sizes, int64_t *arg_types, int32_t depNum, void *depList,
+EXTERN void __tgt_target_data_update_nowait_mapper(int64_t device_id,
+ int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes,
+ int64_t *arg_types, void **arg_mappers, int32_t depNum, void *depList,
int32_t noAliasDepNum, void *noAliasDepList) {
if (depNum + noAliasDepNum > 0)
__kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
- __tgt_target_data_update(device_id, arg_num, args_base, args, arg_sizes,
- arg_types);
+ __tgt_target_data_update_mapper(device_id, arg_num, args_base, args,
+ arg_sizes, arg_types, arg_mappers);
}
EXTERN int __tgt_target(int64_t device_id, void *host_ptr, int32_t arg_num,
void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
+ return __tgt_target_mapper(device_id, host_ptr, arg_num, args_base, args,
+ arg_sizes, arg_types, nullptr);
+}
+
+EXTERN int __tgt_target_nowait(int64_t device_id, void *host_ptr,
+ int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes,
+ int64_t *arg_types, int32_t depNum, void *depList, int32_t noAliasDepNum,
+ void *noAliasDepList) {
+ if (depNum + noAliasDepNum > 0)
+ __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
+
+ return __tgt_target_mapper(device_id, host_ptr, arg_num, args_base, args,
+ arg_sizes, arg_types, nullptr);
+}
+
+EXTERN int __tgt_target_mapper(int64_t device_id, void *host_ptr,
+ int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes,
+ int64_t *arg_types, void **arg_mappers) {
if (IsOffloadDisabled()) return OFFLOAD_FAIL;
DP("Entering target region with entry point " DPxMOD " and device Id %"
PRId64 "\n", DPxPTR(host_ptr), device_id);
@@ -245,25 +317,43 @@ EXTERN int __tgt_target(int64_t device_id, void *host_ptr, int32_t arg_num,
#endif
int rc = target(device_id, host_ptr, arg_num, args_base, args, arg_sizes,
- arg_types, 0, 0, false /*team*/);
+ arg_types, arg_mappers, 0, 0, false /*team*/);
HandleTargetOutcome(rc == OFFLOAD_SUCCESS);
return rc;
}
-EXTERN int __tgt_target_nowait(int64_t device_id, void *host_ptr,
+EXTERN int __tgt_target_nowait_mapper(int64_t device_id, void *host_ptr,
int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes,
- int64_t *arg_types, int32_t depNum, void *depList, int32_t noAliasDepNum,
- void *noAliasDepList) {
+ int64_t *arg_types, void **arg_mappers, int32_t depNum, void *depList,
+ int32_t noAliasDepNum, void *noAliasDepList) {
if (depNum + noAliasDepNum > 0)
__kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
- return __tgt_target(device_id, host_ptr, arg_num, args_base, args, arg_sizes,
- arg_types);
+ return __tgt_target_mapper(device_id, host_ptr, arg_num, args_base, args,
+ arg_sizes, arg_types, arg_mappers);
}
EXTERN int __tgt_target_teams(int64_t device_id, void *host_ptr,
int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes,
int64_t *arg_types, int32_t team_num, int32_t thread_limit) {
+ return __tgt_target_teams_mapper(device_id, host_ptr, arg_num, args_base,
+ args, arg_sizes, arg_types, nullptr, team_num, thread_limit);
+}
+
+EXTERN int __tgt_target_teams_nowait(int64_t device_id, void *host_ptr,
+ int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes,
+ int64_t *arg_types, int32_t team_num, int32_t thread_limit, int32_t depNum,
+ void *depList, int32_t noAliasDepNum, void *noAliasDepList) {
+ if (depNum + noAliasDepNum > 0)
+ __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
+
+ return __tgt_target_teams_mapper(device_id, host_ptr, arg_num, args_base,
+ args, arg_sizes, arg_types, nullptr, team_num, thread_limit);
+}
+
+EXTERN int __tgt_target_teams_mapper(int64_t device_id, void *host_ptr,
+ int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes,
+ int64_t *arg_types, void **arg_mappers, int32_t team_num, int32_t thread_limit) {
if (IsOffloadDisabled()) return OFFLOAD_FAIL;
DP("Entering target region with entry point " DPxMOD " and device Id %"
PRId64 "\n", DPxPTR(host_ptr), device_id);
@@ -287,21 +377,22 @@ EXTERN int __tgt_target_teams(int64_t device_id, void *host_ptr,
#endif
int rc = target(device_id, host_ptr, arg_num, args_base, args, arg_sizes,
- arg_types, team_num, thread_limit, true /*team*/);
+ arg_types, arg_mappers, team_num, thread_limit, true /*team*/);
HandleTargetOutcome(rc == OFFLOAD_SUCCESS);
return rc;
}
-EXTERN int __tgt_target_teams_nowait(int64_t device_id, void *host_ptr,
+EXTERN int __tgt_target_teams_nowait_mapper(int64_t device_id, void *host_ptr,
int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes,
- int64_t *arg_types, int32_t team_num, int32_t thread_limit, int32_t depNum,
- void *depList, int32_t noAliasDepNum, void *noAliasDepList) {
+ int64_t *arg_types, void **arg_mappers, int32_t team_num,
+ int32_t thread_limit, int32_t depNum, void *depList, int32_t noAliasDepNum,
+ void *noAliasDepList) {
if (depNum + noAliasDepNum > 0)
__kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
- return __tgt_target_teams(device_id, host_ptr, arg_num, args_base, args,
- arg_sizes, arg_types, team_num, thread_limit);
+ return __tgt_target_teams_mapper(device_id, host_ptr, arg_num, args_base,
+ args, arg_sizes, arg_types, arg_mappers, team_num, thread_limit);
}
// Get the current number of components for a user-defined mapper.
diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp
index cce9dbd2fe15..6b4549be6ae1 100644
--- a/openmp/libomptarget/src/omptarget.cpp
+++ b/openmp/libomptarget/src/omptarget.cpp
@@ -166,8 +166,8 @@ static int InitLibrary(DeviceTy& Device) {
DP("Has pending ctors... call now\n");
for (auto &entry : lib.second.PendingCtors) {
void *ctor = entry;
- int rc = target(device_id, ctor, 0, NULL, NULL, NULL,
- NULL, 1, 1, true /*team*/);
+ int rc = target(device_id, ctor, 0, NULL, NULL, NULL, NULL, NULL, 1,
+ 1, true /*team*/);
if (rc != OFFLOAD_SUCCESS) {
DP("Running ctor " DPxMOD " failed.\n", DPxPTR(ctor));
Device.PendingGlobalsMtx.unlock();
@@ -214,10 +214,46 @@ static int32_t member_of(int64_t type) {
return ((type & OMP_TGT_MAPTYPE_MEMBER_OF) >> 48) - 1;
}
+/// Call the user-defined mapper function followed by the appropriate
+// target_data_* function (target_data_{begin,end,update}).
+int target_data_mapper(DeviceTy &Device, void *arg_base,
+ void *arg, int64_t arg_size, int64_t arg_type, void *arg_mapper,
+ TargetDataFuncPtrTy target_data_function) {
+ DP("Calling the mapper function " DPxMOD "\n", DPxPTR(arg_mapper));
+
+ // The mapper function fills up Components.
+ MapperComponentsTy MapperComponents;
+ MapperFuncPtrTy MapperFuncPtr = (MapperFuncPtrTy)(arg_mapper);
+ (*MapperFuncPtr)((void *)&MapperComponents, arg_base, arg, arg_size,
+ arg_type);
+
+ // Construct new arrays for args_base, args, arg_sizes and arg_types
+ // using the information in MapperComponents and call the corresponding
+ // target_data_* function using these new arrays.
+ std::vector<void *> mapper_args_base;
+ std::vector<void *> mapper_args;
+ std::vector<int64_t> mapper_arg_sizes;
+ std::vector<int64_t> mapper_arg_types;
+
+ for (auto& C : MapperComponents.Components) {
+ mapper_args_base.push_back(C.Base);
+ mapper_args.push_back(C.Begin);
+ mapper_arg_sizes.push_back(C.Size);
+ mapper_arg_types.push_back(C.Type);
+ }
+
+ int rc = target_data_function(Device, MapperComponents.Components.size(),
+ mapper_args_base.data(), mapper_args.data(), mapper_arg_sizes.data(),
+ mapper_arg_types.data(), /*arg_mappers*/ nullptr,
+ /*__tgt_async_info*/ nullptr);
+
+ return rc;
+}
+
/// Internal function to do the mapping and transfer the data to the device
int target_data_begin(DeviceTy &Device, int32_t arg_num, void **args_base,
void **args, int64_t *arg_sizes, int64_t *arg_types,
- __tgt_async_info *async_info_ptr) {
+ void **arg_mappers, __tgt_async_info *async_info_ptr) {
// process each input.
for (int32_t i = 0; i < arg_num; ++i) {
// Ignore private variables and arrays - there is no mapping for them.
@@ -225,6 +261,25 @@ int target_data_begin(DeviceTy &Device, int32_t arg_num, void **args_base,
(arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE))
continue;
+ if (arg_mappers && arg_mappers[i]) {
+ // Instead of executing the regular path of target_data_begin, call the
+ // target_data_mapper variant which will call target_data_begin again
+ // with new arguments.
+ DP("Calling target_data_mapper for the %dth argument\n", i);
+
+ int rc = target_data_mapper(Device, args_base[i], args[i], arg_sizes[i],
+ arg_types[i], arg_mappers[i], target_data_begin);
+
+ if (rc != OFFLOAD_SUCCESS) {
+ DP("Call to target_data_begin via target_data_mapper for custom mapper"
+ " failed.\n");
+ return OFFLOAD_FAIL;
+ }
+
+ // Skip the rest of this function, continue to the next argument.
+ continue;
+ }
+
void *HstPtrBegin = args[i];
void *HstPtrBase = args_base[i];
int64_t data_size = arg_sizes[i];
@@ -353,7 +408,7 @@ int target_data_begin(DeviceTy &Device, int32_t arg_num, void **args_base,
/// Internal function to undo the mapping and retrieve the data from the device.
int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base,
void **args, int64_t *arg_sizes, int64_t *arg_types,
- __tgt_async_info *async_info_ptr) {
+ void **arg_mappers, __tgt_async_info *async_info_ptr) {
// process each input.
for (int32_t i = arg_num - 1; i >= 0; --i) {
// Ignore private variables and arrays - there is no mapping for them.
@@ -362,6 +417,25 @@ int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base,
(arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE))
continue;
+ if (arg_mappers && arg_mappers[i]) {
+ // Instead of executing the regular path of target_data_end, call the
+ // target_data_mapper variant which will call target_data_end again
+ // with new arguments.
+ DP("Calling target_data_mapper for the %dth argument\n", i);
+
+ int rc = target_data_mapper(Device, args_base[i], args[i], arg_sizes[i],
+ arg_types[i], arg_mappers[i], target_data_end);
+
+ if (rc != OFFLOAD_SUCCESS) {
+ DP("Call to target_data_end via target_data_mapper for custom mapper"
+ " failed.\n");
+ return OFFLOAD_FAIL;
+ }
+
+ // Skip the rest of this function, continue to the next argument.
+ continue;
+ }
+
void *HstPtrBegin = args[i];
int64_t data_size = arg_sizes[i];
// Adjust for proper alignment if this is a combined entry (for structs).
@@ -486,14 +560,36 @@ int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base,
}
/// Internal function to pass data to/from the target.
+// async_info_ptr is currently unused, added here so target_data_update has the
+// same signature as target_data_begin and target_data_end.
int target_data_update(DeviceTy &Device, int32_t arg_num,
- void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
+ void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
+ void **arg_mappers, __tgt_async_info *async_info_ptr) {
// process each input.
for (int32_t i = 0; i < arg_num; ++i) {
if ((arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) ||
(arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE))
continue;
+ if (arg_mappers && arg_mappers[i]) {
+ // Instead of executing the regular path of target_data_update, call the
+ // target_data_mapper variant which will call target_data_update again
+ // with new arguments.
+ DP("Calling target_data_mapper for the %dth argument\n", i);
+
+ int rc = target_data_mapper(Device, args_base[i], args[i], arg_sizes[i],
+ arg_types[i], arg_mappers[i], target_data_update);
+
+ if (rc != OFFLOAD_SUCCESS) {
+ DP("Call to target_data_update via target_data_mapper for custom mapper"
+ " failed.\n");
+ return OFFLOAD_FAIL;
+ }
+
+ // Skip the rest of this function, continue to the next argument.
+ continue;
+ }
+
void *HstPtrBegin = args[i];
int64_t MapSize = arg_sizes[i];
bool IsLast, IsHostPtr;
@@ -589,7 +685,8 @@ static bool isLambdaMapping(int64_t Mapping) {
/// integer
diff erent from zero otherwise.
int target(int64_t device_id, void *host_ptr, int32_t arg_num,
void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
- int32_t team_num, int32_t thread_limit, int IsTeamConstruct) {
+ void **arg_mappers, int32_t team_num, int32_t thread_limit,
+ int IsTeamConstruct) {
DeviceTy &Device = Devices[device_id];
// Find the table information in the map or look it up in the translation
@@ -647,7 +744,7 @@ int target(int64_t device_id, void *host_ptr, int32_t arg_num,
// Move data to device.
int rc = target_data_begin(Device, arg_num, args_base, args, arg_sizes,
- arg_types, &AsyncInfo);
+ arg_types, arg_mappers, &AsyncInfo);
if (rc != OFFLOAD_SUCCESS) {
DP("Call to target_data_begin failed, abort target.\n");
return OFFLOAD_FAIL;
@@ -811,7 +908,7 @@ int target(int64_t device_id, void *host_ptr, int32_t arg_num,
// Move data from device.
int rt = target_data_end(Device, arg_num, args_base, args, arg_sizes,
- arg_types, &AsyncInfo);
+ arg_types, arg_mappers, &AsyncInfo);
if (rt != OFFLOAD_SUCCESS) {
DP("Call to target_data_end failed, abort targe.\n");
return OFFLOAD_FAIL;
diff --git a/openmp/libomptarget/src/private.h b/openmp/libomptarget/src/private.h
index dbc5bafbab5b..cb20d8cdc790 100644
--- a/openmp/libomptarget/src/private.h
+++ b/openmp/libomptarget/src/private.h
@@ -19,19 +19,24 @@
extern int target_data_begin(DeviceTy &Device, int32_t arg_num,
void **args_base, void **args, int64_t *arg_sizes,
- int64_t *arg_types,
+ int64_t *arg_types, void **arg_mappers,
__tgt_async_info *async_info_ptr);
extern int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base,
void **args, int64_t *arg_sizes, int64_t *arg_types,
+ void **arg_mappers,
__tgt_async_info *async_info_ptr);
extern int target_data_update(DeviceTy &Device, int32_t arg_num,
- void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
+ void **args_base, void **args,
+ int64_t *arg_sizes, int64_t *arg_types,
+ void **arg_mappers,
+ __tgt_async_info *async_info_ptr = nullptr);
extern int target(int64_t device_id, void *host_ptr, int32_t arg_num,
- void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
- int32_t team_num, int32_t thread_limit, int IsTeamConstruct);
+ void **args_base, void **args, int64_t *arg_sizes,
+ int64_t *arg_types, void **arg_mappers, int32_t team_num,
+ int32_t thread_limit, int IsTeamConstruct);
extern int CheckDeviceAndCtors(int64_t device_id);
@@ -60,8 +65,20 @@ struct MapComponentInfoTy {
// implementation here.
struct MapperComponentsTy {
std::vector<MapComponentInfoTy> Components;
+ int32_t size() { return Components.size(); }
};
+// The mapper function pointer type. It follows the signature below:
+// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
+// void *base, void *begin,
+// size_t size, int64_t type);
+typedef void (*MapperFuncPtrTy)(void *, void *, void *, int64_t, int64_t);
+
+// Function pointer type for target_data_* functions (target_data_begin,
+// target_data_end and target_data_update).
+typedef int (*TargetDataFuncPtrTy)(DeviceTy &, int32_t, void **, void **,
+ int64_t *, int64_t *, void **, __tgt_async_info *);
+
////////////////////////////////////////////////////////////////////////////////
// implementation for fatal messages
////////////////////////////////////////////////////////////////////////////////
diff --git a/openmp/libomptarget/src/rtl.cpp b/openmp/libomptarget/src/rtl.cpp
index 7c344ca6ee66..4bab4c6da063 100644
--- a/openmp/libomptarget/src/rtl.cpp
+++ b/openmp/libomptarget/src/rtl.cpp
@@ -387,8 +387,8 @@ void RTLsTy::UnregisterLib(__tgt_bin_desc *desc) {
Device.PendingGlobalsMtx.lock();
if (Device.PendingCtorsDtors[desc].PendingCtors.empty()) {
for (auto &dtor : Device.PendingCtorsDtors[desc].PendingDtors) {
- int rc = target(Device.DeviceID, dtor, 0, NULL, NULL, NULL, NULL, 1,
- 1, true /*team*/);
+ int rc = target(Device.DeviceID, dtor, 0, NULL, NULL, NULL, NULL,
+ NULL, 1, 1, true /*team*/);
if (rc != OFFLOAD_SUCCESS) {
DP("Running destructor " DPxMOD " failed.\n", DPxPTR(dtor));
}
diff --git a/openmp/libomptarget/test/mapping/declare_mapper_api.cpp b/openmp/libomptarget/test/mapping/declare_mapper_api.cpp
index 275b6c3c5702..45bc076d4110 100644
--- a/openmp/libomptarget/test/mapping/declare_mapper_api.cpp
+++ b/openmp/libomptarget/test/mapping/declare_mapper_api.cpp
@@ -6,6 +6,7 @@
#include <cstdio>
#include <cstdlib>
#include <vector>
+#include <cinttypes>
// Data structure definitions copied from OpenMP RTL.
struct MapComponentInfoTy {
@@ -42,6 +43,6 @@ int main(int argc, char *argv[]) {
__tgt_push_mapper_component((void *)&MC, base, begin, size, type);
int64_t num = __tgt_mapper_num_components((void *)&MC);
// CHECK: num=2
- printf("num=%lld\n", num);
+ printf("num=%" PRId64 "\n", num);
return 0;
}
diff --git a/openmp/libomptarget/test/mapping/declare_mapper_target.cpp b/openmp/libomptarget/test/mapping/declare_mapper_target.cpp
new file mode 100644
index 000000000000..624628528339
--- /dev/null
+++ b/openmp/libomptarget/test/mapping/declare_mapper_target.cpp
@@ -0,0 +1,37 @@
+// RUN: %libomptarget-compile-run-and-check-aarch64-unknown-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-nvptx64-nvidia-cuda
+// RUN: %libomptarget-compile-run-and-check-powerpc64-ibm-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-powerpc64le-ibm-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-x86_64-pc-linux-gnu
+
+#include <cstdio>
+#include <cstdlib>
+
+#define NUM 1024
+
+class C {
+public:
+ int *a;
+};
+
+#pragma omp declare mapper(id: C s) map(s.a[0:NUM])
+
+int main() {
+ C c;
+ c.a = (int*) malloc(sizeof(int)*NUM);
+ for (int i = 0; i < NUM; i++) {
+ c.a[i] = 1;
+ }
+ #pragma omp target teams distribute parallel for map(mapper(id),tofrom: c)
+ for (int i = 0; i < NUM; i++) {
+ ++c.a[i];
+ }
+ int sum = 0;
+ for (int i = 0; i < NUM; i++) {
+ sum += c.a[i];
+ }
+ // CHECK: Sum = 2048
+ printf("Sum = %d\n", sum);
+ return 0;
+}
+
diff --git a/openmp/libomptarget/test/mapping/declare_mapper_target_data.cpp b/openmp/libomptarget/test/mapping/declare_mapper_target_data.cpp
new file mode 100644
index 000000000000..b457048a2e58
--- /dev/null
+++ b/openmp/libomptarget/test/mapping/declare_mapper_target_data.cpp
@@ -0,0 +1,40 @@
+// RUN: %libomptarget-compile-run-and-check-aarch64-unknown-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-nvptx64-nvidia-cuda
+// RUN: %libomptarget-compile-run-and-check-powerpc64-ibm-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-powerpc64le-ibm-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-x86_64-pc-linux-gnu
+
+#include <cstdio>
+#include <cstdlib>
+
+#define NUM 1024
+
+class C {
+public:
+ int *a;
+};
+
+#pragma omp declare mapper(id: C s) map(s.a[0:NUM])
+
+int main() {
+ C c;
+ c.a = (int*) malloc(sizeof(int)*NUM);
+ for (int i = 0; i < NUM; i++) {
+ c.a[i] = 1;
+ }
+ #pragma omp target data map(mapper(id),tofrom: c)
+ {
+ #pragma omp target teams distribute parallel for
+ for (int i = 0; i < NUM; i++) {
+ ++c.a[i];
+ }
+ }
+ int sum = 0;
+ for (int i = 0; i < NUM; i++) {
+ sum += c.a[i];
+ }
+ // CHECK: Sum = 2048
+ printf("Sum = %d\n", sum);
+ return 0;
+}
+
diff --git a/openmp/libomptarget/test/mapping/declare_mapper_target_data_enter_exit.cpp b/openmp/libomptarget/test/mapping/declare_mapper_target_data_enter_exit.cpp
new file mode 100644
index 000000000000..ac915a08ea8e
--- /dev/null
+++ b/openmp/libomptarget/test/mapping/declare_mapper_target_data_enter_exit.cpp
@@ -0,0 +1,39 @@
+// RUN: %libomptarget-compile-run-and-check-aarch64-unknown-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-nvptx64-nvidia-cuda
+// RUN: %libomptarget-compile-run-and-check-powerpc64-ibm-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-powerpc64le-ibm-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-x86_64-pc-linux-gnu
+
+#include <cstdio>
+#include <cstdlib>
+
+#define NUM 1024
+
+class C {
+public:
+ int *a;
+};
+
+#pragma omp declare mapper(id: C s) map(s.a[0:NUM])
+
+int main() {
+ C c;
+ c.a = (int*) malloc(sizeof(int)*NUM);
+ for (int i = 0; i < NUM; i++) {
+ c.a[i] = 1;
+ }
+ #pragma omp target enter data map(mapper(id),to: c)
+ #pragma omp target teams distribute parallel for
+ for (int i = 0; i < NUM; i++) {
+ ++c.a[i];
+ }
+ #pragma omp target exit data map(mapper(id),from: c)
+ int sum = 0;
+ for (int i = 0; i < NUM; i++) {
+ sum += c.a[i];
+ }
+ // CHECK: Sum = 2048
+ printf("Sum = %d\n", sum);
+ return 0;
+}
+
diff --git a/openmp/libomptarget/test/mapping/declare_mapper_target_update.cpp b/openmp/libomptarget/test/mapping/declare_mapper_target_update.cpp
new file mode 100644
index 000000000000..689275962f2c
--- /dev/null
+++ b/openmp/libomptarget/test/mapping/declare_mapper_target_update.cpp
@@ -0,0 +1,61 @@
+// RUN: %libomptarget-compile-run-and-check-aarch64-unknown-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-nvptx64-nvidia-cuda
+// RUN: %libomptarget-compile-run-and-check-powerpc64-ibm-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-powerpc64le-ibm-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-x86_64-pc-linux-gnu
+
+#include <cstdio>
+#include <cstdlib>
+
+#define NUM 1024
+
+class C {
+public:
+ int *a;
+};
+
+#pragma omp declare mapper(id: C s) map(s.a[0:NUM])
+
+int main() {
+ C c;
+ int sum = 0;
+ c.a = (int*) malloc(sizeof(int)*NUM);
+ for (int i = 0; i < NUM; i++) {
+ c.a[i] = 1;
+ }
+ #pragma omp target enter data map(mapper(id),alloc: c)
+ #pragma omp target teams distribute parallel for
+ for (int i = 0; i < NUM; i++) {
+ c.a[i] = 0;
+ }
+ #pragma omp target update from(mapper(id): c)
+ for (int i = 0; i < NUM; i++) {
+ sum += c.a[i];
+ }
+ // CHECK: Sum (after first update from) = 0
+ printf("Sum (after first update from) = %d\n", sum);
+ for (int i = 0; i < NUM; i++) {
+ c.a[i] = 1;
+ }
+ #pragma omp target update to(mapper(id): c)
+ #pragma omp target teams distribute parallel for
+ for (int i = 0; i < NUM; i++) {
+ ++c.a[i];
+ }
+ sum = 0;
+ for (int i = 0; i < NUM; i++) {
+ sum += c.a[i];
+ }
+ // CHECK: Sum (after update to) = 1024
+ printf("Sum (after update to) = %d\n", sum);
+ #pragma omp target update from(mapper(id): c)
+ sum = 0;
+ for (int i = 0; i < NUM; i++) {
+ sum += c.a[i];
+ }
+ // CHECK: Sum (after second update from) = 2048
+ printf("Sum (after second update from) = %d\n", sum);
+ #pragma omp target exit data map(mapper(id),delete: c)
+ return 0;
+}
+
More information about the Openmp-commits
mailing list