[Openmp-commits] [openmp] 4692bb4 - [OpenMP] Add lower and upper bound in num_teams clause

Nawrin Sultana via Openmp-commits openmp-commits at lists.llvm.org
Wed Feb 10 11:59:18 PST 2021


Author: Nawrin Sultana
Date: 2021-02-10T13:58:50-06:00
New Revision: 4692bb4a8a6f78ee166ac8d2b1ec9082bd6e2be5

URL: https://github.com/llvm/llvm-project/commit/4692bb4a8a6f78ee166ac8d2b1ec9082bd6e2be5
DIFF: https://github.com/llvm/llvm-project/commit/4692bb4a8a6f78ee166ac8d2b1ec9082bd6e2be5.diff

LOG: [OpenMP] Add lower and upper bound in num_teams clause

This patch adds lower-bound and upper-bound to num_teams clause
according to OpenMP 5.1 specification. The initial number of teams
created is implementation defined, but it will be greater than or
equal to lower-bound and less than or equal to upper-bound. If
num_teams clause is not specified, the number of teams created is
implementation defined, but it will be greater or equal to 1.

Differential Revision: https://reviews.llvm.org/D95820

Added: 
    openmp/runtime/test/teams/kmp_num_teams.c

Modified: 
    openmp/runtime/src/dllexports
    openmp/runtime/src/i18n/en_US.txt
    openmp/runtime/src/kmp.h
    openmp/runtime/src/kmp_csupport.cpp
    openmp/runtime/src/kmp_runtime.cpp

Removed: 
    


################################################################################
diff  --git a/openmp/runtime/src/dllexports b/openmp/runtime/src/dllexports
index 8f9c73f637d0..362550d047a5 100644
--- a/openmp/runtime/src/dllexports
+++ b/openmp/runtime/src/dllexports
@@ -360,6 +360,7 @@ kmpc_set_defaults                           224
         __kmpc_team_static_init_4u          256
         __kmpc_team_static_init_8           257
         __kmpc_team_static_init_8u          258
+        __kmpc_push_num_teams_51            284
 %endif
 
 %ifndef stub

diff  --git a/openmp/runtime/src/i18n/en_US.txt b/openmp/runtime/src/i18n/en_US.txt
index 26a164db465b..c19165f53e5d 100644
--- a/openmp/runtime/src/i18n/en_US.txt
+++ b/openmp/runtime/src/i18n/en_US.txt
@@ -455,6 +455,7 @@ AffHWSubsetManyDies          "KMP_HW_SUBSET ignored: too many Dies requested."
 AffUseGlobCpuidL             "%1$s: Affinity capable, using global cpuid leaf %2$d info"
 AffNotCapableUseLocCpuidL    "%1$s: Affinity not capable, using local cpuid leaf %2$d info"
 AffNotUsingHwloc             "%1$s: Affinity not capable, using hwloc."
+FailedToCreateTeam           "Failed to create teams between lower bound (%1$d) and upper bound (%2$d)."
 
 # --------------------------------------------------------------------------------------------------
 -*- HINTS -*-
@@ -512,7 +513,7 @@ BadExeFormat                 "System error #193 is \"Bad format of EXE or DLL fi
                              "a file for another architecture. "
                              "Check whether \"%1$s\" is a file for %2$s architecture."
 SystemLimitOnThreads         "System-related limit on the number of threads."
-
+SetNewBound                  "Try setting new bounds (preferably less than or equal to %1$d) for num_teams clause."
 
 
 # --------------------------------------------------------------------------------------------------

diff  --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index 1c84def4c038..17ca2bf4738b 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -3363,6 +3363,8 @@ extern void __kmp_push_proc_bind(ident_t *loc, int gtid,
                                  kmp_proc_bind_t proc_bind);
 extern void __kmp_push_num_teams(ident_t *loc, int gtid, int num_teams,
                                  int num_threads);
+extern void __kmp_push_num_teams_51(ident_t *loc, int gtid, int num_teams_lb,
+                                    int num_teams_ub, int num_threads);
 
 extern void __kmp_yield();
 
@@ -3921,6 +3923,11 @@ KMP_EXPORT void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
 KMP_EXPORT void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
                                       kmp_int32 num_teams,
                                       kmp_int32 num_threads);
+/* Function for OpenMP 5.1 num_teams clause */
+KMP_EXPORT void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid,
+                                         kmp_int32 num_teams_lb,
+                                         kmp_int32 num_teams_ub,
+                                         kmp_int32 num_threads);
 KMP_EXPORT void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc,
                                   kmpc_micro microtask, ...);
 struct kmp_dim { // loop bounds info casted to kmp_int64

diff  --git a/openmp/runtime/src/kmp_csupport.cpp b/openmp/runtime/src/kmp_csupport.cpp
index 4b188763a58a..b2388627e2b9 100644
--- a/openmp/runtime/src/kmp_csupport.cpp
+++ b/openmp/runtime/src/kmp_csupport.cpp
@@ -351,6 +351,33 @@ void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
   __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
 }
 
+/*!
+ at ingroup PARALLEL
+ at param loc source location information
+ at param global_tid global thread number
+ at param num_teams_lo lower bound on number of teams requested for the teams
+construct
+ at param num_teams_up upper bound on number of teams requested for the teams
+construct
+ at param num_threads number of threads per team requested for the teams construct
+
+Set the number of teams to be used by the teams construct. The number of initial
+teams cretaed will be greater than or equal to the lower bound and less than or
+equal to the upper bound.
+This call is only required if the teams construct has a `num_teams` clause
+or a `thread_limit` clause (or both).
+*/
+void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid,
+                              kmp_int32 num_teams_lb, kmp_int32 num_teams_ub,
+                              kmp_int32 num_threads) {
+  KA_TRACE(20, ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d"
+                " num_teams_ub=%d num_threads=%d\n",
+                global_tid, num_teams_lb, num_teams_ub, num_threads));
+  __kmp_assert_valid_gtid(global_tid);
+  __kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub,
+                          num_threads);
+}
+
 /*!
 @ingroup PARALLEL
 @param loc  source location information

diff  --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp
index 34e2c223eb0f..8799d72bca6d 100644
--- a/openmp/runtime/src/kmp_runtime.cpp
+++ b/openmp/runtime/src/kmp_runtime.cpp
@@ -7425,39 +7425,15 @@ void __kmp_push_num_threads(ident_t *id, int gtid, int num_threads) {
     thr->th.th_set_nproc = num_threads;
 }
 
-/* this sets the requested number of teams for the teams region and/or
-   the number of threads for the next parallel region encountered  */
-void __kmp_push_num_teams(ident_t *id, int gtid, int num_teams,
-                          int num_threads) {
-  kmp_info_t *thr = __kmp_threads[gtid];
-  KMP_DEBUG_ASSERT(num_teams >= 0);
-  KMP_DEBUG_ASSERT(num_threads >= 0);
-
-  if (num_teams == 0) {
-    if (__kmp_nteams > 0) {
-      num_teams = __kmp_nteams;
-    } else {
-      num_teams = 1; // default number of teams is 1.
-    }
-  }
-  if (num_teams > __kmp_teams_max_nth) { // if too many teams requested?
-    if (!__kmp_reserve_warn) {
-      __kmp_reserve_warn = 1;
-      __kmp_msg(kmp_ms_warning,
-                KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
-                KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
-    }
-    num_teams = __kmp_teams_max_nth;
-  }
-  // Set number of teams (number of threads in the outer "parallel" of the
-  // teams)
-  thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
-
+static void __kmp_push_thread_limit(kmp_info_t *thr, int num_teams,
+                                    int num_threads) {
+  KMP_DEBUG_ASSERT(thr);
   // Remember the number of threads for inner parallel regions
   if (!TCR_4(__kmp_init_middle))
     __kmp_middle_initialize(); // get internal globals calculated
   KMP_DEBUG_ASSERT(__kmp_avail_proc);
   KMP_DEBUG_ASSERT(__kmp_dflt_team_nth);
+
   if (num_threads == 0) {
     if (__kmp_teams_thread_limit > 0) {
       num_threads = __kmp_teams_thread_limit;
@@ -7476,6 +7452,9 @@ void __kmp_push_num_teams(ident_t *id, int gtid, int num_teams,
     if (num_teams * num_threads > __kmp_teams_max_nth) {
       num_threads = __kmp_teams_max_nth / num_teams;
     }
+    if (num_threads == 0) {
+      num_threads = 1;
+    }
   } else {
     // This thread will be the master of the league masters
     // Store new thread limit; old limit is saved in th_cg_roots list
@@ -7486,11 +7465,16 @@ void __kmp_push_num_teams(ident_t *id, int gtid, int num_teams,
     }
     if (num_teams * num_threads > __kmp_teams_max_nth) {
       int new_threads = __kmp_teams_max_nth / num_teams;
-      if (!__kmp_reserve_warn) { // user asked for too many threads
-        __kmp_reserve_warn = 1; // conflicts with KMP_TEAMS_THREAD_LIMIT
-        __kmp_msg(kmp_ms_warning,
-                  KMP_MSG(CantFormThrTeam, num_threads, new_threads),
-                  KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
+      if (new_threads == 0) {
+        new_threads = 1;
+      }
+      if (new_threads != num_threads) {
+        if (!__kmp_reserve_warn) { // user asked for too many threads
+          __kmp_reserve_warn = 1; // conflicts with KMP_TEAMS_THREAD_LIMIT
+          __kmp_msg(kmp_ms_warning,
+                    KMP_MSG(CantFormThrTeam, num_threads, new_threads),
+                    KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
+        }
       }
       num_threads = new_threads;
     }
@@ -7498,6 +7482,94 @@ void __kmp_push_num_teams(ident_t *id, int gtid, int num_teams,
   thr->th.th_teams_size.nth = num_threads;
 }
 
+/* this sets the requested number of teams for the teams region and/or
+   the number of threads for the next parallel region encountered  */
+void __kmp_push_num_teams(ident_t *id, int gtid, int num_teams,
+                          int num_threads) {
+  kmp_info_t *thr = __kmp_threads[gtid];
+  KMP_DEBUG_ASSERT(num_teams >= 0);
+  KMP_DEBUG_ASSERT(num_threads >= 0);
+
+  if (num_teams == 0) {
+    if (__kmp_nteams > 0) {
+      num_teams = __kmp_nteams;
+    } else {
+      num_teams = 1; // default number of teams is 1.
+    }
+  }
+  if (num_teams > __kmp_teams_max_nth) { // if too many teams requested?
+    if (!__kmp_reserve_warn) {
+      __kmp_reserve_warn = 1;
+      __kmp_msg(kmp_ms_warning,
+                KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
+                KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
+    }
+    num_teams = __kmp_teams_max_nth;
+  }
+  // Set number of teams (number of threads in the outer "parallel" of the
+  // teams)
+  thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
+
+  __kmp_push_thread_limit(thr, num_teams, num_threads);
+}
+
+/* This sets the requested number of teams for the teams region and/or
+   the number of threads for the next parallel region encountered  */
+void __kmp_push_num_teams_51(ident_t *id, int gtid, int num_teams_lb,
+                             int num_teams_ub, int num_threads) {
+  kmp_info_t *thr = __kmp_threads[gtid];
+  KMP_DEBUG_ASSERT(num_teams_lb >= 0 && num_teams_ub >= 0);
+  KMP_DEBUG_ASSERT(num_teams_ub >= num_teams_lb);
+  KMP_DEBUG_ASSERT(num_threads >= 0);
+
+  if (num_teams_lb > num_teams_ub) {
+    __kmp_fatal(KMP_MSG(FailedToCreateTeam, num_teams_lb, num_teams_ub),
+                KMP_HNT(SetNewBound, __kmp_teams_max_nth), __kmp_msg_null);
+  }
+
+  int num_teams = 1; // defalt number of teams is 1.
+
+  if (num_teams_lb == 0 && num_teams_ub > 0)
+    num_teams_lb = num_teams_ub;
+
+  if (num_teams_lb == 0 && num_teams_ub == 0) { // no num_teams clause
+    num_teams = (__kmp_nteams > 0) ? __kmp_nteams : num_teams;
+    if (num_teams > __kmp_teams_max_nth) {
+      if (!__kmp_reserve_warn) {
+        __kmp_reserve_warn = 1;
+        __kmp_msg(kmp_ms_warning,
+                  KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
+                  KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
+      }
+      num_teams = __kmp_teams_max_nth;
+    }
+  } else if (num_teams_lb == num_teams_ub) { // requires exact number of teams
+    num_teams = num_teams_ub;
+  } else { // num_teams_lb <= num_teams <= num_teams_ub
+    if (num_threads == 0) {
+      if (num_teams_ub > __kmp_teams_max_nth) {
+        num_teams = num_teams_lb;
+      } else {
+        num_teams = num_teams_ub;
+      }
+    } else {
+      num_teams = (num_threads > __kmp_teams_max_nth)
+                      ? num_teams
+                      : __kmp_teams_max_nth / num_threads;
+      if (num_teams < num_teams_lb) {
+        num_teams = num_teams_lb;
+      } else if (num_teams > num_teams_ub) {
+        num_teams = num_teams_ub;
+      }
+    }
+  }
+  // Set number of teams (number of threads in the outer "parallel" of the
+  // teams)
+  thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
+
+  __kmp_push_thread_limit(thr, num_teams, num_threads);
+}
+
 // Set the proc_bind var to use in the following parallel region.
 void __kmp_push_proc_bind(ident_t *id, int gtid, kmp_proc_bind_t proc_bind) {
   kmp_info_t *thr = __kmp_threads[gtid];

diff  --git a/openmp/runtime/test/teams/kmp_num_teams.c b/openmp/runtime/test/teams/kmp_num_teams.c
new file mode 100644
index 000000000000..8012f1f9ecd3
--- /dev/null
+++ b/openmp/runtime/test/teams/kmp_num_teams.c
@@ -0,0 +1,93 @@
+// RUN: %libomp-compile-and-run
+// UNSUPPORTED: gcc
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <omp.h>
+
+#define NT 8
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+typedef int kmp_int32;
+typedef struct ident {
+  kmp_int32 reserved_1;
+  kmp_int32 flags;
+  kmp_int32 reserved_2;
+  kmp_int32 reserved_3;
+  char const *psource;
+} ident_t;
+extern int __kmpc_global_thread_num(ident_t *);
+extern void __kmpc_push_num_teams_51(ident_t *, kmp_int32, kmp_int32, kmp_int32,
+                                     kmp_int32);
+#ifdef __cplusplus
+}
+#endif
+
+void check_num_teams(int num_teams_lb, int num_teams_ub, int thread_limit) {
+  int nteams, nthreads;
+  int a = 0;
+
+  int gtid = __kmpc_global_thread_num(NULL);
+  __kmpc_push_num_teams_51(NULL, gtid, num_teams_lb, num_teams_ub,
+                           thread_limit);
+
+#pragma omp target teams
+  {
+    int priv_nteams;
+    int team_num = omp_get_team_num();
+    if (team_num == 0)
+      nteams = omp_get_num_teams();
+    priv_nteams = omp_get_num_teams();
+#pragma omp parallel
+    {
+      int priv_nthreads;
+      int thread_num = omp_get_thread_num();
+      int teams_ub, teams_lb, thr_limit;
+      if (team_num == 0 && thread_num == 0)
+        nthreads = omp_get_num_threads();
+      priv_nthreads = omp_get_num_threads();
+
+      teams_ub = (num_teams_ub ? num_teams_ub : priv_nteams);
+      teams_lb = (num_teams_lb ? num_teams_lb : teams_ub);
+      thr_limit = (thread_limit ? thread_limit : priv_nthreads);
+
+      if (priv_nteams < teams_lb || priv_nteams > teams_ub) {
+        fprintf(stderr, "error: invalid number of teams=%d\n", priv_nteams);
+        exit(1);
+      }
+      if (priv_nthreads > thr_limit) {
+        fprintf(stderr, "error: invalid number of threads=%d\n", priv_nthreads);
+        exit(1);
+      }
+#pragma omp atomic
+      a++;
+    }
+  }
+  if (a != nteams * nthreads) {
+    fprintf(stderr, "error: a (%d) != nteams * nthreads (%d)\n", a,
+            nteams * nthreads);
+    exit(1);
+  } else {
+    printf("#teams %d, #threads %d: Hello!\n", nteams, nthreads);
+  }
+}
+
+int main(int argc, char *argv[]) {
+  omp_set_num_threads(NT);
+
+  check_num_teams(1, 8, 2);
+  check_num_teams(2, 2, 2);
+  check_num_teams(2, 2, 0);
+  check_num_teams(8, 16, 2);
+  check_num_teams(9, 16, 0);
+  check_num_teams(9, 16, 2);
+  check_num_teams(2, 3, 0);
+  check_num_teams(0, 0, 2);
+  check_num_teams(0, 4, 0);
+  check_num_teams(0, 2, 2);
+
+  printf("Test Passed\n");
+  return 0;
+}


        


More information about the Openmp-commits mailing list