[Openmp-commits] [openmp] 36d53af - [OpenMP][Offloading] Remove task wait in nowait interfaces

Shilei Tian via Openmp-commits openmp-commits at lists.llvm.org
Fri Jul 30 08:39:51 PDT 2021


Author: Shilei Tian
Date: 2021-07-30T11:39:46-04:00
New Revision: 36d53af4a9c812e023fb6096072b906fcebf1abb

URL: https://github.com/llvm/llvm-project/commit/36d53af4a9c812e023fb6096072b906fcebf1abb
DIFF: https://github.com/llvm/llvm-project/commit/36d53af4a9c812e023fb6096072b906fcebf1abb.diff

LOG: [OpenMP][Offloading] Remove task wait in nowait interfaces

All `nowait` series of interfaces in `libomptarget` accept four more arguments (`int32_t depNum, void *depList, int32_t noAliasDepNum, void *noAliasDepList`) compared with their counterparts w/o `nowait`. These extra arguments were expected for dependence resolution, potentially lowered to device side. Current implementation calls `libomp` function `__kmpc_omp_taskwait`. However, the front end simply ignores them, that these four arguments are not emitted at all. As a consequence, the `depNum` and `noAliasDepNum` are garbage, which could lead to unnecessary task wait.

Reviewed By: grokos

Differential Revision: https://reviews.llvm.org/D107164

Added: 
    

Modified: 
    openmp/libomptarget/src/interface.cpp
    openmp/libomptarget/src/private.h

Removed: 
    


################################################################################
diff  --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp
index 793bfe0f92913..6f75cce421505 100644
--- a/openmp/libomptarget/src/interface.cpp
+++ b/openmp/libomptarget/src/interface.cpp
@@ -79,8 +79,6 @@ EXTERN void __tgt_target_data_begin_nowait(int64_t device_id, int32_t arg_num,
                                            void *depList, int32_t noAliasDepNum,
                                            void *noAliasDepList) {
   TIMESCOPE();
-  if (depNum + noAliasDepNum > 0)
-    __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
 
   __tgt_target_data_begin_mapper(nullptr, device_id, arg_num, args_base, args,
                                  arg_sizes, arg_types, nullptr, nullptr);
@@ -128,8 +126,6 @@ EXTERN void __tgt_target_data_begin_nowait_mapper(
     map_var_info_t *arg_names, void **arg_mappers, int32_t depNum,
     void *depList, int32_t noAliasDepNum, void *noAliasDepList) {
   TIMESCOPE_WITH_IDENT(loc);
-  if (depNum + noAliasDepNum > 0)
-    __kmpc_omp_taskwait(loc, __kmpc_global_thread_num(loc));
 
   __tgt_target_data_begin_mapper(loc, device_id, arg_num, args_base, args,
                                  arg_sizes, arg_types, arg_names, arg_mappers);
@@ -153,8 +149,6 @@ EXTERN void __tgt_target_data_end_nowait(int64_t device_id, int32_t arg_num,
                                          int32_t noAliasDepNum,
                                          void *noAliasDepList) {
   TIMESCOPE();
-  if (depNum + noAliasDepNum > 0)
-    __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
 
   __tgt_target_data_end_mapper(nullptr, device_id, arg_num, args_base, args,
                                arg_sizes, arg_types, nullptr, nullptr);
@@ -201,8 +195,6 @@ EXTERN void __tgt_target_data_end_nowait_mapper(
     map_var_info_t *arg_names, void **arg_mappers, int32_t depNum,
     void *depList, int32_t noAliasDepNum, void *noAliasDepList) {
   TIMESCOPE_WITH_IDENT(loc);
-  if (depNum + noAliasDepNum > 0)
-    __kmpc_omp_taskwait(loc, __kmpc_global_thread_num(loc));
 
   __tgt_target_data_end_mapper(loc, device_id, arg_num, args_base, args,
                                arg_sizes, arg_types, arg_names, arg_mappers);
@@ -221,8 +213,6 @@ EXTERN void __tgt_target_data_update_nowait(
     int64_t *arg_sizes, int64_t *arg_types, int32_t depNum, void *depList,
     int32_t noAliasDepNum, void *noAliasDepList) {
   TIMESCOPE();
-  if (depNum + noAliasDepNum > 0)
-    __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
 
   __tgt_target_data_update_mapper(nullptr, device_id, arg_num, args_base, args,
                                   arg_sizes, arg_types, nullptr, nullptr);
@@ -260,8 +250,6 @@ EXTERN void __tgt_target_data_update_nowait_mapper(
     map_var_info_t *arg_names, void **arg_mappers, int32_t depNum,
     void *depList, int32_t noAliasDepNum, void *noAliasDepList) {
   TIMESCOPE_WITH_IDENT(loc);
-  if (depNum + noAliasDepNum > 0)
-    __kmpc_omp_taskwait(loc, __kmpc_global_thread_num(loc));
 
   __tgt_target_data_update_mapper(loc, device_id, arg_num, args_base, args,
                                   arg_sizes, arg_types, arg_names, arg_mappers);
@@ -281,8 +269,6 @@ EXTERN int __tgt_target_nowait(int64_t device_id, void *host_ptr,
                                int32_t depNum, void *depList,
                                int32_t noAliasDepNum, void *noAliasDepList) {
   TIMESCOPE();
-  if (depNum + noAliasDepNum > 0)
-    __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
 
   return __tgt_target_mapper(nullptr, device_id, host_ptr, arg_num, args_base,
                              args, arg_sizes, arg_types, nullptr, nullptr);
@@ -330,8 +316,6 @@ EXTERN int __tgt_target_nowait_mapper(
     map_var_info_t *arg_names, void **arg_mappers, int32_t depNum,
     void *depList, int32_t noAliasDepNum, void *noAliasDepList) {
   TIMESCOPE_WITH_IDENT(loc);
-  if (depNum + noAliasDepNum > 0)
-    __kmpc_omp_taskwait(loc, __kmpc_global_thread_num(loc));
 
   return __tgt_target_mapper(loc, device_id, host_ptr, arg_num, args_base, args,
                              arg_sizes, arg_types, arg_names, arg_mappers);
@@ -355,8 +339,6 @@ EXTERN int __tgt_target_teams_nowait(int64_t device_id, void *host_ptr,
                                      void *depList, int32_t noAliasDepNum,
                                      void *noAliasDepList) {
   TIMESCOPE();
-  if (depNum + noAliasDepNum > 0)
-    __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
 
   return __tgt_target_teams_mapper(nullptr, device_id, host_ptr, arg_num,
                                    args_base, args, arg_sizes, arg_types,
@@ -408,8 +390,6 @@ EXTERN int __tgt_target_teams_nowait_mapper(
     int32_t thread_limit, int32_t depNum, void *depList, int32_t noAliasDepNum,
     void *noAliasDepList) {
   TIMESCOPE_WITH_IDENT(loc);
-  if (depNum + noAliasDepNum > 0)
-    __kmpc_omp_taskwait(loc, __kmpc_global_thread_num(loc));
 
   return __tgt_target_teams_mapper(loc, device_id, host_ptr, arg_num, args_base,
                                    args, arg_sizes, arg_types, arg_names,

diff  --git a/openmp/libomptarget/src/private.h b/openmp/libomptarget/src/private.h
index 8b6fabe75971b..05996eb5cdf39 100644
--- a/openmp/libomptarget/src/private.h
+++ b/openmp/libomptarget/src/private.h
@@ -91,7 +91,6 @@ extern "C" {
 #endif
 // functions that extract info from libomp; keep in sync
 int omp_get_default_device(void) __attribute__((weak));
-int32_t __kmpc_omp_taskwait(void *loc_ref, int32_t gtid) __attribute__((weak));
 int32_t __kmpc_global_thread_num(void *) __attribute__((weak));
 int __kmpc_get_target_offload(void) __attribute__((weak));
 #ifdef __cplusplus


        


More information about the Openmp-commits mailing list