[Openmp-commits] [openmp] 4692bb4 - [OpenMP] Add lower and upper bound in num_teams clause
Nawrin Sultana via Openmp-commits
openmp-commits at lists.llvm.org
Wed Feb 10 11:59:18 PST 2021
Author: Nawrin Sultana
Date: 2021-02-10T13:58:50-06:00
New Revision: 4692bb4a8a6f78ee166ac8d2b1ec9082bd6e2be5
URL: https://github.com/llvm/llvm-project/commit/4692bb4a8a6f78ee166ac8d2b1ec9082bd6e2be5
DIFF: https://github.com/llvm/llvm-project/commit/4692bb4a8a6f78ee166ac8d2b1ec9082bd6e2be5.diff
LOG: [OpenMP] Add lower and upper bound in num_teams clause
This patch adds lower-bound and upper-bound to num_teams clause
according to OpenMP 5.1 specification. The initial number of teams
created is implementation defined, but it will be greater than or
equal to lower-bound and less than or equal to upper-bound. If
num_teams clause is not specified, the number of teams created is
implementation defined, but it will be greater or equal to 1.
Differential Revision: https://reviews.llvm.org/D95820
Added:
openmp/runtime/test/teams/kmp_num_teams.c
Modified:
openmp/runtime/src/dllexports
openmp/runtime/src/i18n/en_US.txt
openmp/runtime/src/kmp.h
openmp/runtime/src/kmp_csupport.cpp
openmp/runtime/src/kmp_runtime.cpp
Removed:
################################################################################
diff --git a/openmp/runtime/src/dllexports b/openmp/runtime/src/dllexports
index 8f9c73f637d0..362550d047a5 100644
--- a/openmp/runtime/src/dllexports
+++ b/openmp/runtime/src/dllexports
@@ -360,6 +360,7 @@ kmpc_set_defaults 224
__kmpc_team_static_init_4u 256
__kmpc_team_static_init_8 257
__kmpc_team_static_init_8u 258
+ __kmpc_push_num_teams_51 284
%endif
%ifndef stub
diff --git a/openmp/runtime/src/i18n/en_US.txt b/openmp/runtime/src/i18n/en_US.txt
index 26a164db465b..c19165f53e5d 100644
--- a/openmp/runtime/src/i18n/en_US.txt
+++ b/openmp/runtime/src/i18n/en_US.txt
@@ -455,6 +455,7 @@ AffHWSubsetManyDies "KMP_HW_SUBSET ignored: too many Dies requested."
AffUseGlobCpuidL "%1$s: Affinity capable, using global cpuid leaf %2$d info"
AffNotCapableUseLocCpuidL "%1$s: Affinity not capable, using local cpuid leaf %2$d info"
AffNotUsingHwloc "%1$s: Affinity not capable, using hwloc."
+FailedToCreateTeam "Failed to create teams between lower bound (%1$d) and upper bound (%2$d)."
# --------------------------------------------------------------------------------------------------
-*- HINTS -*-
@@ -512,7 +513,7 @@ BadExeFormat "System error #193 is \"Bad format of EXE or DLL fi
"a file for another architecture. "
"Check whether \"%1$s\" is a file for %2$s architecture."
SystemLimitOnThreads "System-related limit on the number of threads."
-
+SetNewBound "Try setting new bounds (preferably less than or equal to %1$d) for num_teams clause."
# --------------------------------------------------------------------------------------------------
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index 1c84def4c038..17ca2bf4738b 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -3363,6 +3363,8 @@ extern void __kmp_push_proc_bind(ident_t *loc, int gtid,
kmp_proc_bind_t proc_bind);
extern void __kmp_push_num_teams(ident_t *loc, int gtid, int num_teams,
int num_threads);
+extern void __kmp_push_num_teams_51(ident_t *loc, int gtid, int num_teams_lb,
+ int num_teams_ub, int num_threads);
extern void __kmp_yield();
@@ -3921,6 +3923,11 @@ KMP_EXPORT void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
KMP_EXPORT void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
kmp_int32 num_teams,
kmp_int32 num_threads);
+/* Function for OpenMP 5.1 num_teams clause */
+KMP_EXPORT void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid,
+ kmp_int32 num_teams_lb,
+ kmp_int32 num_teams_ub,
+ kmp_int32 num_threads);
KMP_EXPORT void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc,
kmpc_micro microtask, ...);
struct kmp_dim { // loop bounds info casted to kmp_int64
diff --git a/openmp/runtime/src/kmp_csupport.cpp b/openmp/runtime/src/kmp_csupport.cpp
index 4b188763a58a..b2388627e2b9 100644
--- a/openmp/runtime/src/kmp_csupport.cpp
+++ b/openmp/runtime/src/kmp_csupport.cpp
@@ -351,6 +351,33 @@ void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
__kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
}
+/*!
+ at ingroup PARALLEL
+ at param loc source location information
+ at param global_tid global thread number
+ at param num_teams_lo lower bound on number of teams requested for the teams
+construct
+ at param num_teams_up upper bound on number of teams requested for the teams
+construct
+ at param num_threads number of threads per team requested for the teams construct
+
+Set the number of teams to be used by the teams construct. The number of initial
+teams cretaed will be greater than or equal to the lower bound and less than or
+equal to the upper bound.
+This call is only required if the teams construct has a `num_teams` clause
+or a `thread_limit` clause (or both).
+*/
+void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid,
+ kmp_int32 num_teams_lb, kmp_int32 num_teams_ub,
+ kmp_int32 num_threads) {
+ KA_TRACE(20, ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d"
+ " num_teams_ub=%d num_threads=%d\n",
+ global_tid, num_teams_lb, num_teams_ub, num_threads));
+ __kmp_assert_valid_gtid(global_tid);
+ __kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub,
+ num_threads);
+}
+
/*!
@ingroup PARALLEL
@param loc source location information
diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp
index 34e2c223eb0f..8799d72bca6d 100644
--- a/openmp/runtime/src/kmp_runtime.cpp
+++ b/openmp/runtime/src/kmp_runtime.cpp
@@ -7425,39 +7425,15 @@ void __kmp_push_num_threads(ident_t *id, int gtid, int num_threads) {
thr->th.th_set_nproc = num_threads;
}
-/* this sets the requested number of teams for the teams region and/or
- the number of threads for the next parallel region encountered */
-void __kmp_push_num_teams(ident_t *id, int gtid, int num_teams,
- int num_threads) {
- kmp_info_t *thr = __kmp_threads[gtid];
- KMP_DEBUG_ASSERT(num_teams >= 0);
- KMP_DEBUG_ASSERT(num_threads >= 0);
-
- if (num_teams == 0) {
- if (__kmp_nteams > 0) {
- num_teams = __kmp_nteams;
- } else {
- num_teams = 1; // default number of teams is 1.
- }
- }
- if (num_teams > __kmp_teams_max_nth) { // if too many teams requested?
- if (!__kmp_reserve_warn) {
- __kmp_reserve_warn = 1;
- __kmp_msg(kmp_ms_warning,
- KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
- KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
- }
- num_teams = __kmp_teams_max_nth;
- }
- // Set number of teams (number of threads in the outer "parallel" of the
- // teams)
- thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
-
+static void __kmp_push_thread_limit(kmp_info_t *thr, int num_teams,
+ int num_threads) {
+ KMP_DEBUG_ASSERT(thr);
// Remember the number of threads for inner parallel regions
if (!TCR_4(__kmp_init_middle))
__kmp_middle_initialize(); // get internal globals calculated
KMP_DEBUG_ASSERT(__kmp_avail_proc);
KMP_DEBUG_ASSERT(__kmp_dflt_team_nth);
+
if (num_threads == 0) {
if (__kmp_teams_thread_limit > 0) {
num_threads = __kmp_teams_thread_limit;
@@ -7476,6 +7452,9 @@ void __kmp_push_num_teams(ident_t *id, int gtid, int num_teams,
if (num_teams * num_threads > __kmp_teams_max_nth) {
num_threads = __kmp_teams_max_nth / num_teams;
}
+ if (num_threads == 0) {
+ num_threads = 1;
+ }
} else {
// This thread will be the master of the league masters
// Store new thread limit; old limit is saved in th_cg_roots list
@@ -7486,11 +7465,16 @@ void __kmp_push_num_teams(ident_t *id, int gtid, int num_teams,
}
if (num_teams * num_threads > __kmp_teams_max_nth) {
int new_threads = __kmp_teams_max_nth / num_teams;
- if (!__kmp_reserve_warn) { // user asked for too many threads
- __kmp_reserve_warn = 1; // conflicts with KMP_TEAMS_THREAD_LIMIT
- __kmp_msg(kmp_ms_warning,
- KMP_MSG(CantFormThrTeam, num_threads, new_threads),
- KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
+ if (new_threads == 0) {
+ new_threads = 1;
+ }
+ if (new_threads != num_threads) {
+ if (!__kmp_reserve_warn) { // user asked for too many threads
+ __kmp_reserve_warn = 1; // conflicts with KMP_TEAMS_THREAD_LIMIT
+ __kmp_msg(kmp_ms_warning,
+ KMP_MSG(CantFormThrTeam, num_threads, new_threads),
+ KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
+ }
}
num_threads = new_threads;
}
@@ -7498,6 +7482,94 @@ void __kmp_push_num_teams(ident_t *id, int gtid, int num_teams,
thr->th.th_teams_size.nth = num_threads;
}
+/* this sets the requested number of teams for the teams region and/or
+ the number of threads for the next parallel region encountered */
+void __kmp_push_num_teams(ident_t *id, int gtid, int num_teams,
+ int num_threads) {
+ kmp_info_t *thr = __kmp_threads[gtid];
+ KMP_DEBUG_ASSERT(num_teams >= 0);
+ KMP_DEBUG_ASSERT(num_threads >= 0);
+
+ if (num_teams == 0) {
+ if (__kmp_nteams > 0) {
+ num_teams = __kmp_nteams;
+ } else {
+ num_teams = 1; // default number of teams is 1.
+ }
+ }
+ if (num_teams > __kmp_teams_max_nth) { // if too many teams requested?
+ if (!__kmp_reserve_warn) {
+ __kmp_reserve_warn = 1;
+ __kmp_msg(kmp_ms_warning,
+ KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
+ KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
+ }
+ num_teams = __kmp_teams_max_nth;
+ }
+ // Set number of teams (number of threads in the outer "parallel" of the
+ // teams)
+ thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
+
+ __kmp_push_thread_limit(thr, num_teams, num_threads);
+}
+
+/* This sets the requested number of teams for the teams region and/or
+ the number of threads for the next parallel region encountered */
+void __kmp_push_num_teams_51(ident_t *id, int gtid, int num_teams_lb,
+ int num_teams_ub, int num_threads) {
+ kmp_info_t *thr = __kmp_threads[gtid];
+ KMP_DEBUG_ASSERT(num_teams_lb >= 0 && num_teams_ub >= 0);
+ KMP_DEBUG_ASSERT(num_teams_ub >= num_teams_lb);
+ KMP_DEBUG_ASSERT(num_threads >= 0);
+
+ if (num_teams_lb > num_teams_ub) {
+ __kmp_fatal(KMP_MSG(FailedToCreateTeam, num_teams_lb, num_teams_ub),
+ KMP_HNT(SetNewBound, __kmp_teams_max_nth), __kmp_msg_null);
+ }
+
+ int num_teams = 1; // defalt number of teams is 1.
+
+ if (num_teams_lb == 0 && num_teams_ub > 0)
+ num_teams_lb = num_teams_ub;
+
+ if (num_teams_lb == 0 && num_teams_ub == 0) { // no num_teams clause
+ num_teams = (__kmp_nteams > 0) ? __kmp_nteams : num_teams;
+ if (num_teams > __kmp_teams_max_nth) {
+ if (!__kmp_reserve_warn) {
+ __kmp_reserve_warn = 1;
+ __kmp_msg(kmp_ms_warning,
+ KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
+ KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
+ }
+ num_teams = __kmp_teams_max_nth;
+ }
+ } else if (num_teams_lb == num_teams_ub) { // requires exact number of teams
+ num_teams = num_teams_ub;
+ } else { // num_teams_lb <= num_teams <= num_teams_ub
+ if (num_threads == 0) {
+ if (num_teams_ub > __kmp_teams_max_nth) {
+ num_teams = num_teams_lb;
+ } else {
+ num_teams = num_teams_ub;
+ }
+ } else {
+ num_teams = (num_threads > __kmp_teams_max_nth)
+ ? num_teams
+ : __kmp_teams_max_nth / num_threads;
+ if (num_teams < num_teams_lb) {
+ num_teams = num_teams_lb;
+ } else if (num_teams > num_teams_ub) {
+ num_teams = num_teams_ub;
+ }
+ }
+ }
+ // Set number of teams (number of threads in the outer "parallel" of the
+ // teams)
+ thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
+
+ __kmp_push_thread_limit(thr, num_teams, num_threads);
+}
+
// Set the proc_bind var to use in the following parallel region.
void __kmp_push_proc_bind(ident_t *id, int gtid, kmp_proc_bind_t proc_bind) {
kmp_info_t *thr = __kmp_threads[gtid];
diff --git a/openmp/runtime/test/teams/kmp_num_teams.c b/openmp/runtime/test/teams/kmp_num_teams.c
new file mode 100644
index 000000000000..8012f1f9ecd3
--- /dev/null
+++ b/openmp/runtime/test/teams/kmp_num_teams.c
@@ -0,0 +1,93 @@
+// RUN: %libomp-compile-and-run
+// UNSUPPORTED: gcc
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <omp.h>
+
+#define NT 8
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+typedef int kmp_int32;
+typedef struct ident {
+ kmp_int32 reserved_1;
+ kmp_int32 flags;
+ kmp_int32 reserved_2;
+ kmp_int32 reserved_3;
+ char const *psource;
+} ident_t;
+extern int __kmpc_global_thread_num(ident_t *);
+extern void __kmpc_push_num_teams_51(ident_t *, kmp_int32, kmp_int32, kmp_int32,
+ kmp_int32);
+#ifdef __cplusplus
+}
+#endif
+
+void check_num_teams(int num_teams_lb, int num_teams_ub, int thread_limit) {
+ int nteams, nthreads;
+ int a = 0;
+
+ int gtid = __kmpc_global_thread_num(NULL);
+ __kmpc_push_num_teams_51(NULL, gtid, num_teams_lb, num_teams_ub,
+ thread_limit);
+
+#pragma omp target teams
+ {
+ int priv_nteams;
+ int team_num = omp_get_team_num();
+ if (team_num == 0)
+ nteams = omp_get_num_teams();
+ priv_nteams = omp_get_num_teams();
+#pragma omp parallel
+ {
+ int priv_nthreads;
+ int thread_num = omp_get_thread_num();
+ int teams_ub, teams_lb, thr_limit;
+ if (team_num == 0 && thread_num == 0)
+ nthreads = omp_get_num_threads();
+ priv_nthreads = omp_get_num_threads();
+
+ teams_ub = (num_teams_ub ? num_teams_ub : priv_nteams);
+ teams_lb = (num_teams_lb ? num_teams_lb : teams_ub);
+ thr_limit = (thread_limit ? thread_limit : priv_nthreads);
+
+ if (priv_nteams < teams_lb || priv_nteams > teams_ub) {
+ fprintf(stderr, "error: invalid number of teams=%d\n", priv_nteams);
+ exit(1);
+ }
+ if (priv_nthreads > thr_limit) {
+ fprintf(stderr, "error: invalid number of threads=%d\n", priv_nthreads);
+ exit(1);
+ }
+#pragma omp atomic
+ a++;
+ }
+ }
+ if (a != nteams * nthreads) {
+ fprintf(stderr, "error: a (%d) != nteams * nthreads (%d)\n", a,
+ nteams * nthreads);
+ exit(1);
+ } else {
+ printf("#teams %d, #threads %d: Hello!\n", nteams, nthreads);
+ }
+}
+
+int main(int argc, char *argv[]) {
+ omp_set_num_threads(NT);
+
+ check_num_teams(1, 8, 2);
+ check_num_teams(2, 2, 2);
+ check_num_teams(2, 2, 0);
+ check_num_teams(8, 16, 2);
+ check_num_teams(9, 16, 0);
+ check_num_teams(9, 16, 2);
+ check_num_teams(2, 3, 0);
+ check_num_teams(0, 0, 2);
+ check_num_teams(0, 4, 0);
+ check_num_teams(0, 2, 2);
+
+ printf("Test Passed\n");
+ return 0;
+}
More information about the Openmp-commits
mailing list