[Openmp-commits] [openmp] 50b68a3 - [OpenMP][host runtime] Add support for teams affinity
via Openmp-commits
openmp-commits at lists.llvm.org
Thu Oct 14 14:32:18 PDT 2021
Author: Peyton, Jonathan L
Date: 2021-10-14T16:30:28-05:00
New Revision: 50b68a3d030543daf97794d68682cc698964ca26
URL: https://github.com/llvm/llvm-project/commit/50b68a3d030543daf97794d68682cc698964ca26
DIFF: https://github.com/llvm/llvm-project/commit/50b68a3d030543daf97794d68682cc698964ca26.diff
LOG: [OpenMP][host runtime] Add support for teams affinity
This patch implements teams affinity on the host.
The default is spread. A user can specify either spread, close, or
primary using KMP_TEAMS_PROC_BIND environment variable. Unlike
OMP_PROC_BIND, KMP_TEAMS_PROC_BIND is only a single value and is not a
list of values. The values follow the same semantics under the OpenMP
specification for parallel regions except T is the number of teams in
a league instead of the number of threads in a parallel region.
Differential Revision: https://reviews.llvm.org/D109921
Added:
openmp/runtime/test/affinity/teams-affinity.c
Modified:
openmp/runtime/src/kmp.h
openmp/runtime/src/kmp_global.cpp
openmp/runtime/src/kmp_runtime.cpp
openmp/runtime/src/kmp_settings.cpp
openmp/runtime/test/affinity/libomp_test_topology.h
Removed:
################################################################################
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index a4d8ece9025eb..8537dcae7821a 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -849,6 +849,7 @@ typedef struct kmp_nested_proc_bind_t {
} kmp_nested_proc_bind_t;
extern kmp_nested_proc_bind_t __kmp_nested_proc_bind;
+extern kmp_proc_bind_t __kmp_teams_proc_bind;
extern int __kmp_display_affinity;
extern char *__kmp_affinity_format;
diff --git a/openmp/runtime/src/kmp_global.cpp b/openmp/runtime/src/kmp_global.cpp
index b6babbe0e97e3..4aea5a2d86632 100644
--- a/openmp/runtime/src/kmp_global.cpp
+++ b/openmp/runtime/src/kmp_global.cpp
@@ -280,6 +280,7 @@ char *__kmp_cpuinfo_file = NULL;
#endif /* KMP_AFFINITY_SUPPORTED */
kmp_nested_proc_bind_t __kmp_nested_proc_bind = {NULL, 0, 0};
+kmp_proc_bind_t __kmp_teams_proc_bind = proc_bind_spread;
int __kmp_affinity_num_places = 0;
int __kmp_display_affinity = FALSE;
char *__kmp_affinity_format = NULL;
diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp
index 05272a92e9b93..4505d269c2b6b 100644
--- a/openmp/runtime/src/kmp_runtime.cpp
+++ b/openmp/runtime/src/kmp_runtime.cpp
@@ -914,7 +914,8 @@ static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
assured that there are enough threads available, because we checked on that
earlier within critical section forkjoin */
static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
- kmp_info_t *master_th, int master_gtid) {
+ kmp_info_t *master_th, int master_gtid,
+ int fork_teams_workers) {
int i;
int use_hot_team;
@@ -1003,7 +1004,12 @@ static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
}
#if KMP_AFFINITY_SUPPORTED
- __kmp_partition_places(team);
+ // Do not partition the places list for teams construct workers who
+ // haven't actually been forked to do real work yet. This partitioning
+ // will take place in the parallel region nested within the teams construct.
+ if (!fork_teams_workers) {
+ __kmp_partition_places(team);
+ }
#endif
}
@@ -1597,6 +1603,41 @@ int __kmp_fork_call(ident_t *loc, int gtid,
}
#endif
+ // Figure out the proc_bind policy for the nested parallel within teams
+ kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
+ // proc_bind_default means don't update
+ kmp_proc_bind_t proc_bind_icv = proc_bind_default;
+ if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
+ proc_bind = proc_bind_false;
+ } else {
+ // No proc_bind clause specified; use current proc-bind-var
+ if (proc_bind == proc_bind_default) {
+ proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
+ }
+ /* else: The proc_bind policy was specified explicitly on parallel
+ clause.
+ This overrides proc-bind-var for this parallel region, but does not
+ change proc-bind-var. */
+ // Figure the value of proc-bind-var for the child threads.
+ if ((level + 1 < __kmp_nested_proc_bind.used) &&
+ (__kmp_nested_proc_bind.bind_types[level + 1] !=
+ master_th->th.th_current_task->td_icvs.proc_bind)) {
+ proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
+ }
+ }
+ KMP_CHECK_UPDATE(parent_team->t.t_proc_bind, proc_bind);
+ // Need to change the bind-var ICV to correct value for each implicit task
+ if (proc_bind_icv != proc_bind_default &&
+ master_th->th.th_current_task->td_icvs.proc_bind != proc_bind_icv) {
+ kmp_info_t **other_threads = parent_team->t.t_threads;
+ for (i = 0; i < master_th->th.th_team_nproc; ++i) {
+ other_threads[i]->th.th_current_task->td_icvs.proc_bind =
+ proc_bind_icv;
+ }
+ }
+ // Reset for next parallel region
+ master_th->th.th_set_proc_bind = proc_bind_default;
+
#if USE_ITT_BUILD && USE_ITT_NOTIFY
if (((__itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr) ||
KMP_ITT_DEBUG) &&
@@ -1613,6 +1654,9 @@ int __kmp_fork_call(ident_t *loc, int gtid,
parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
}
#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */
+#if KMP_AFFINITY_SUPPORTED
+ __kmp_partition_places(parent_team);
+#endif
KF_TRACE(10, ("__kmp_fork_call: before internal fork: root=%p, team=%p, "
"master_th=%p, gtid=%d\n",
@@ -1953,16 +1997,21 @@ int __kmp_fork_call(ident_t *loc, int gtid,
// Figure out the proc_bind_policy for the new team.
kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
- kmp_proc_bind_t proc_bind_icv =
- proc_bind_default; // proc_bind_default means don't update
+ // proc_bind_default means don't update
+ kmp_proc_bind_t proc_bind_icv = proc_bind_default;
if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
proc_bind = proc_bind_false;
} else {
+ // No proc_bind clause specified; use current proc-bind-var for this
+ // parallel region
if (proc_bind == proc_bind_default) {
- // No proc_bind clause specified; use current proc-bind-var for this
- // parallel region
proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
}
+ // Have teams construct take proc_bind value from KMP_TEAMS_PROC_BIND
+ if (master_th->th.th_teams_microtask &&
+ microtask == (microtask_t)__kmp_teams_master) {
+ proc_bind = __kmp_teams_proc_bind;
+ }
/* else: The proc_bind policy was specified explicitly on parallel clause.
This overrides proc-bind-var for this parallel region, but does not
change proc-bind-var. */
@@ -1970,7 +2019,11 @@ int __kmp_fork_call(ident_t *loc, int gtid,
if ((level + 1 < __kmp_nested_proc_bind.used) &&
(__kmp_nested_proc_bind.bind_types[level + 1] !=
master_th->th.th_current_task->td_icvs.proc_bind)) {
- proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
+ // Do not modify the proc bind icv for the two teams construct forks
+ // They just let the proc bind icv pass through
+ if (!master_th->th.th_teams_microtask ||
+ !(microtask == (microtask_t)__kmp_teams_master || ap == NULL))
+ proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
}
}
@@ -2142,7 +2195,7 @@ int __kmp_fork_call(ident_t *loc, int gtid,
if (!root->r.r_active) // Only do assignment if it prevents cache ping-pong
root->r.r_active = TRUE;
- __kmp_fork_team_threads(root, team, master_th, gtid);
+ __kmp_fork_team_threads(root, team, master_th, gtid, !ap);
__kmp_setup_icv_copy(team, nthreads,
&master_th->th.th_current_task->td_icvs, loc);
@@ -2411,6 +2464,14 @@ void __kmp_join_call(ident_t *loc, int gtid
} // active_level == 1
#endif /* USE_ITT_BUILD */
+#if KMP_AFFINITY_SUPPORTED
+ if (!exit_teams) {
+ // Restore master thread's partition.
+ master_th->th.th_first_place = team->t.t_first_place;
+ master_th->th.th_last_place = team->t.t_last_place;
+ }
+#endif // KMP_AFFINITY_SUPPORTED
+
if (master_th->th.th_teams_microtask && !exit_teams &&
team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
team->t.t_level == master_th->th.th_teams_level + 1) {
@@ -2518,11 +2579,6 @@ void __kmp_join_call(ident_t *loc, int gtid
master_th, team));
__kmp_pop_current_task_from_thread(master_th);
-#if KMP_AFFINITY_SUPPORTED
- // Restore master thread's partition.
- master_th->th.th_first_place = team->t.t_first_place;
- master_th->th.th_last_place = team->t.t_last_place;
-#endif // KMP_AFFINITY_SUPPORTED
master_th->th.th_def_allocator = team->t.t_def_allocator;
#if OMPD_SUPPORT
@@ -5016,6 +5072,7 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
kmp_team_t *team;
int use_hot_team = !root->r.r_active;
int level = 0;
+ int do_place_partition = 1;
KA_TRACE(20, ("__kmp_allocate_team: called\n"));
KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0);
@@ -5037,6 +5094,12 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
++level; // not increment if #teams==1, or for outer fork of the teams;
// increment otherwise
}
+ // Do not perform the place partition if inner fork of the teams
+ // Wait until nested parallel region encountered inside teams construct
+ if ((master->th.th_teams_size.nteams == 1 &&
+ master->th.th_teams_level >= team->t.t_level) ||
+ (team->t.t_pkfn == (microtask_t)__kmp_teams_master))
+ do_place_partition = 0;
}
hot_teams = master->th.th_hot_teams;
if (level < __kmp_hot_teams_max_level && hot_teams &&
@@ -5074,6 +5137,10 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
__kmp_resize_dist_barrier(team, old_nthr, new_nproc);
}
+ // If not doing the place partition, then reset the team's proc bind
+ // to indicate that partitioning of all threads still needs to take place
+ if (do_place_partition == 0)
+ team->t.t_proc_bind = proc_bind_default;
// Has the number of threads changed?
/* Let's assume the most common case is that the number of threads is
unchanged, and put that case first. */
@@ -5103,16 +5170,20 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
if ((team->t.t_size_changed == 0) &&
(team->t.t_proc_bind == new_proc_bind)) {
if (new_proc_bind == proc_bind_spread) {
- __kmp_partition_places(
- team, 1); // add flag to update only master for spread
+ if (do_place_partition) {
+ // add flag to update only master for spread
+ __kmp_partition_places(team, 1);
+ }
}
KA_TRACE(200, ("__kmp_allocate_team: reusing hot team #%d bindings: "
"proc_bind = %d, partition = [%d,%d]\n",
team->t.t_id, new_proc_bind, team->t.t_first_place,
team->t.t_last_place));
} else {
- KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
- __kmp_partition_places(team);
+ if (do_place_partition) {
+ KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
+ __kmp_partition_places(team);
+ }
}
#else
KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
@@ -5189,10 +5260,12 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
}
#endif
- KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
+ if (do_place_partition) {
+ KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
#if KMP_AFFINITY_SUPPORTED
- __kmp_partition_places(team);
+ __kmp_partition_places(team);
#endif
+ }
} else { // team->t.t_nproc < new_nproc
#if (KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED
kmp_affin_mask_t *old_mask;
@@ -5328,10 +5401,12 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
}
#endif
- KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
+ if (do_place_partition) {
+ KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
#if KMP_AFFINITY_SUPPORTED
- __kmp_partition_places(team);
+ __kmp_partition_places(team);
#endif
+ }
} // Check changes in number of threads
kmp_info_t *master = team->t.t_threads[0];
diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp
index 00830b57e3b8d..aa0641f7f70f8 100644
--- a/openmp/runtime/src/kmp_settings.cpp
+++ b/openmp/runtime/src/kmp_settings.cpp
@@ -3207,6 +3207,47 @@ static void __kmp_stg_print_topology_method(kmp_str_buf_t *buffer,
}
} // __kmp_stg_print_topology_method
+// KMP_TEAMS_PROC_BIND
+struct kmp_proc_bind_info_t {
+ const char *name;
+ kmp_proc_bind_t proc_bind;
+};
+static kmp_proc_bind_info_t proc_bind_table[] = {
+ {"spread", proc_bind_spread},
+ {"true", proc_bind_spread},
+ {"close", proc_bind_close},
+ // teams-bind = false means "replicate the primary thread's affinity"
+ {"false", proc_bind_primary},
+ {"primary", proc_bind_primary}};
+static void __kmp_stg_parse_teams_proc_bind(char const *name, char const *value,
+ void *data) {
+ int valid;
+ const char *end;
+ valid = 0;
+ for (size_t i = 0; i < sizeof(proc_bind_table) / sizeof(proc_bind_table[0]);
+ ++i) {
+ if (__kmp_match_str(proc_bind_table[i].name, value, &end)) {
+ __kmp_teams_proc_bind = proc_bind_table[i].proc_bind;
+ valid = 1;
+ break;
+ }
+ }
+ if (!valid) {
+ KMP_WARNING(StgInvalidValue, name, value);
+ }
+}
+static void __kmp_stg_print_teams_proc_bind(kmp_str_buf_t *buffer,
+ char const *name, void *data) {
+ const char *value = KMP_I18N_STR(NotDefined);
+ for (size_t i = 0; i < sizeof(proc_bind_table) / sizeof(proc_bind_table[0]);
+ ++i) {
+ if (__kmp_teams_proc_bind == proc_bind_table[i].proc_bind) {
+ value = proc_bind_table[i].name;
+ break;
+ }
+ }
+ __kmp_stg_print_str(buffer, name, value);
+}
#endif /* KMP_AFFINITY_SUPPORTED */
// OMP_PROC_BIND / bind-var is functional on all 4.0 builds, including OS X*
@@ -5312,6 +5353,8 @@ static kmp_setting_t __kmp_stg_table[] = {
#endif /* KMP_GOMP_COMPAT */
{"OMP_PROC_BIND", __kmp_stg_parse_proc_bind, __kmp_stg_print_proc_bind,
NULL, 0, 0},
+ {"KMP_TEAMS_PROC_BIND", __kmp_stg_parse_teams_proc_bind,
+ __kmp_stg_print_teams_proc_bind, NULL, 0, 0},
{"OMP_PLACES", __kmp_stg_parse_places, __kmp_stg_print_places, NULL, 0, 0},
{"KMP_TOPOLOGY_METHOD", __kmp_stg_parse_topology_method,
__kmp_stg_print_topology_method, NULL, 0, 0},
diff --git a/openmp/runtime/test/affinity/libomp_test_topology.h b/openmp/runtime/test/affinity/libomp_test_topology.h
index 4a84742825331..410103d809249 100644
--- a/openmp/runtime/test/affinity/libomp_test_topology.h
+++ b/openmp/runtime/test/affinity/libomp_test_topology.h
@@ -8,6 +8,7 @@
#include <errno.h>
#include <ctype.h>
#include <omp.h>
+#include <stdarg.h>
typedef enum topology_obj_type_t {
TOPOLOGY_OBJ_THREAD,
@@ -18,6 +19,8 @@ typedef enum topology_obj_type_t {
typedef struct place_list_t {
int num_places;
+ int current_place;
+ int *place_nums;
affinity_mask_t **masks;
} place_list_t;
@@ -147,6 +150,7 @@ static int topology_using_full_mask() {
static place_list_t *topology_alloc_type_places(topology_obj_type_t type) {
char buf[1024];
int i, cpu, num_places, num_unique;
+ int *place_nums;
int num_cpus = topology_get_num_cpus();
place_list_t *places = (place_list_t *)malloc(sizeof(place_list_t));
affinity_mask_t **masks =
@@ -184,8 +188,13 @@ static place_list_t *topology_alloc_type_places(topology_obj_type_t type) {
if (mask)
masks[num_unique++] = mask;
}
+ place_nums = (int *)malloc(sizeof(int) * num_unique);
+ for (i = 0; i < num_unique; ++i)
+ place_nums[i] = i;
places->num_places = num_unique;
places->masks = masks;
+ places->place_nums = place_nums;
+ places->current_place = -1;
return places;
}
@@ -195,6 +204,7 @@ static place_list_t *topology_alloc_openmp_places() {
place_list_t *places = (place_list_t *)malloc(sizeof(place_list_t));
affinity_mask_t **masks =
(affinity_mask_t **)malloc(sizeof(affinity_mask_t *) * num_places);
+ int *place_nums = (int *)malloc(sizeof(int) * num_places);
for (place = 0; place < num_places; ++place) {
int num_procs = omp_get_place_num_procs(place);
int *ids = (int *)malloc(sizeof(int) * num_procs);
@@ -203,9 +213,45 @@ static place_list_t *topology_alloc_openmp_places() {
for (i = 0; i < num_procs; ++i)
affinity_mask_set(mask, ids[i]);
masks[place] = mask;
+ place_nums[place] = place;
}
places->num_places = num_places;
+ places->place_nums = place_nums;
places->masks = masks;
+ places->current_place = omp_get_place_num();
+ return places;
+}
+
+static place_list_t *topology_alloc_openmp_partition() {
+ int p, i;
+ int num_places = omp_get_partition_num_places();
+ place_list_t *places = (place_list_t *)malloc(sizeof(place_list_t));
+ int *place_nums = (int *)malloc(sizeof(int) * num_places);
+ affinity_mask_t **masks =
+ (affinity_mask_t **)malloc(sizeof(affinity_mask_t *) * num_places);
+ omp_get_partition_place_nums(place_nums);
+ for (p = 0; p < num_places; ++p) {
+ int place = place_nums[p];
+ int num_procs = omp_get_place_num_procs(place);
+ int *ids = (int *)malloc(sizeof(int) * num_procs);
+ if (num_procs == 0) {
+ fprintf(stderr, "place %d has 0 procs?\n", place);
+ exit(EXIT_FAILURE);
+ }
+ omp_get_place_proc_ids(place, ids);
+ affinity_mask_t *mask = affinity_mask_alloc();
+ for (i = 0; i < num_procs; ++i)
+ affinity_mask_set(mask, ids[i]);
+ if (affinity_mask_count(mask) == 0) {
+ fprintf(stderr, "place %d has 0 procs set?\n", place);
+ exit(EXIT_FAILURE);
+ }
+ masks[p] = mask;
+ }
+ places->num_places = num_places;
+ places->place_nums = place_nums;
+ places->masks = masks;
+ places->current_place = omp_get_place_num();
return places;
}
@@ -216,6 +262,7 @@ static void topology_free_places(place_list_t *places) {
for (i = 0; i < places->num_places; ++i)
affinity_mask_free(places->masks[i]);
free(places->masks);
+ free(places->place_nums);
free(places);
}
@@ -224,8 +271,306 @@ static void topology_print_places(const place_list_t *p) {
char buf[1024];
for (i = 0; i < p->num_places; ++i) {
affinity_mask_snprintf(buf, sizeof(buf), p->masks[i]);
- printf("Place %d: %s\n", i, buf);
+ printf("Place %d: %s\n", p->place_nums[i], buf);
+ }
+}
+
+// Print out an error message, possibly with two problem place lists,
+// and then exit with failure
+static void proc_bind_die(omp_proc_bind_t proc_bind, int T, int P,
+ const char *format, ...) {
+ va_list args;
+ va_start(args, format);
+ const char *pb;
+ switch (proc_bind) {
+ case omp_proc_bind_false:
+ pb = "False";
+ break;
+ case omp_proc_bind_true:
+ pb = "True";
+ break;
+ case omp_proc_bind_master:
+ pb = "Master (Primary)";
+ break;
+ case omp_proc_bind_close:
+ pb = "Close";
+ break;
+ case omp_proc_bind_spread:
+ pb = "Spread";
+ break;
+ default:
+ pb = "(Unknown Proc Bind Type)";
+ break;
+ }
+ if (proc_bind == omp_proc_bind_spread || proc_bind == omp_proc_bind_close) {
+ if (T <= P) {
+ fprintf(stderr, "%s : (T(%d) <= P(%d)) : ", pb, T, P);
+ } else {
+ fprintf(stderr, "%s : (T(%d) > P(%d)) : ", pb, T, P);
+ }
+ } else {
+ fprintf(stderr, "%s : T = %d, P = %d : ", pb, T, P);
+ }
+ vfprintf(stderr, format, args);
+ va_end(args);
+
+ exit(EXIT_FAILURE);
+}
+
+// Return 1 on failure, 0 on success.
+static void proc_bind_check(omp_proc_bind_t proc_bind,
+ const place_list_t *parent, place_list_t **children,
+ int nchildren) {
+ place_list_t *partition;
+ int T, i, j, place, low, high, first, last, count, current_place, num_places;
+ const int *place_nums;
+ int P = parent->num_places;
+
+ // Find the correct T (there could be null entries in children)
+ place_list_t **partitions =
+ (place_list_t **)malloc(sizeof(place_list_t *) * nchildren);
+ T = 0;
+ for (i = 0; i < nchildren; ++i)
+ if (children[i])
+ partitions[T++] = children[i];
+ // Only able to check spread, close, master (primary)
+ if (proc_bind != omp_proc_bind_spread && proc_bind != omp_proc_bind_close &&
+ proc_bind != omp_proc_bind_master)
+ proc_bind_die(proc_bind, T, P, NULL, NULL,
+ "Cannot check this proc bind type\n");
+
+ if (proc_bind == omp_proc_bind_spread) {
+ if (T <= P) {
+ // Run through each subpartition
+ for (i = 0; i < T; ++i) {
+ partition = partitions[i];
+ place_nums = partition->place_nums;
+ num_places = partition->num_places;
+ current_place = partition->current_place;
+ // Correct count?
+ low = P / T;
+ high = P / T + (P % T ? 1 : 0);
+ if (num_places != low && num_places != high) {
+ proc_bind_die(proc_bind, T, P,
+ "Incorrect number of places for thread %d: %d. "
+ "Expecting between %d and %d\n",
+ i, num_places, low, high);
+ }
+ // Consecutive places?
+ for (j = 1; j < num_places; ++j) {
+ if (place_nums[j] != (place_nums[j - 1] + 1) % P) {
+ proc_bind_die(proc_bind, T, P,
+ "Not consecutive places: %d, %d in partition\n",
+ place_nums[j - 1], place_nums[j]);
+ }
+ }
+ first = place_nums[0];
+ last = place_nums[num_places - 1];
+ // Primary thread executes on place of the parent thread?
+ if (i == 0) {
+ if (current_place != parent->current_place) {
+ proc_bind_die(
+ proc_bind, T, P,
+ "Primary thread not on same place (%d) as parent thread (%d)\n",
+ current_place, parent->current_place);
+ }
+ } else {
+ // Thread's current place is first place within it's partition?
+ if (current_place != first) {
+ proc_bind_die(proc_bind, T, P,
+ "Thread's current place (%d) is not the first place "
+ "in its partition [%d, %d]\n",
+ current_place, first, last);
+ }
+ }
+ // Partitions don't have intersections?
+ int f1 = first;
+ int l1 = last;
+ for (j = 0; j < i; ++j) {
+ int f2 = partitions[j]->place_nums[0];
+ int l2 = partitions[j]->place_nums[partitions[j]->num_places - 1];
+ if (f1 > l1 && f2 > l2) {
+ proc_bind_die(proc_bind, T, P,
+ "partitions intersect. [%d, %d] and [%d, %d]\n", f1,
+ l1, f2, l2);
+ }
+ if (f1 > l1 && f2 <= l2)
+ if (f1 < l2 || l1 > f2) {
+ proc_bind_die(proc_bind, T, P,
+ "partitions intersect. [%d, %d] and [%d, %d]\n", f1,
+ l1, f2, l2);
+ }
+ if (f1 <= l1 && f2 > l2)
+ if (f2 < l1 || l2 > f1) {
+ proc_bind_die(proc_bind, T, P,
+ "partitions intersect. [%d, %d] and [%d, %d]\n", f1,
+ l1, f2, l2);
+ }
+ if (f1 <= l1 && f2 <= l2)
+ if (!(f2 > l1 || l2 < f1)) {
+ proc_bind_die(proc_bind, T, P,
+ "partitions intersect. [%d, %d] and [%d, %d]\n", f1,
+ l1, f2, l2);
+ }
+ }
+ }
+ } else {
+ // T > P
+ // Each partition has only one place?
+ for (i = 0; i < T; ++i) {
+ if (partitions[i]->num_places != 1) {
+ proc_bind_die(
+ proc_bind, T, P,
+ "Incorrect number of places for thread %d: %d. Expecting 1\n", i,
+ partitions[i]->num_places);
+ }
+ }
+ // Correct number of consecutive threads per partition?
+ low = T / P;
+ high = T / P + (T % P ? 1 : 0);
+ for (i = 1, count = 1; i < T; ++i) {
+ if (partitions[i]->place_nums[0] == partitions[i - 1]->place_nums[0]) {
+ count++;
+ if (count > high) {
+ proc_bind_die(
+ proc_bind, T, P,
+ "Too many threads have place %d for their partition\n",
+ partitions[i]->place_nums[0]);
+ }
+ } else {
+ if (count < low) {
+ proc_bind_die(
+ proc_bind, T, P,
+ "Not enough threads have place %d for their partition\n",
+ partitions[i]->place_nums[0]);
+ }
+ count = 1;
+ }
+ }
+ // Primary thread executes on place of the parent thread?
+ current_place = partitions[0]->place_nums[0];
+ if (parent->current_place != -1 &&
+ current_place != parent->current_place) {
+ proc_bind_die(
+ proc_bind, T, P,
+ "Primary thread not on same place (%d) as parent thread (%d)\n",
+ current_place, parent->current_place);
+ }
+ }
+ } else if (proc_bind == omp_proc_bind_close ||
+ proc_bind == omp_proc_bind_master) {
+ // Check that each subpartition is the same as the parent
+ for (i = 0; i < T; ++i) {
+ partition = partitions[i];
+ place_nums = partition->place_nums;
+ num_places = partition->num_places;
+ current_place = partition->current_place;
+ if (parent->num_places != num_places) {
+ proc_bind_die(proc_bind, T, P,
+ "Number of places in subpartition (%d) does not match "
+ "parent (%d)\n",
+ num_places, parent->num_places);
+ }
+ for (j = 0; j < num_places; ++j) {
+ if (parent->place_nums[j] != place_nums[j]) {
+ proc_bind_die(proc_bind, T, P,
+ "Subpartition place (%d) does not match "
+ "parent partition place (%d)\n",
+ place_nums[j], parent->place_nums[j]);
+ }
+ }
+ }
+ // Find index into place_nums of current place for parent
+ for (j = 0; j < parent->num_places; ++j)
+ if (parent->place_nums[j] == parent->current_place)
+ break;
+ if (proc_bind == omp_proc_bind_close) {
+ if (T <= P) {
+ // close T <= P
+ // check place assignment for each thread
+ for (i = 0; i < T; ++i) {
+ partition = partitions[i];
+ current_place = partition->current_place;
+ if (current_place != parent->place_nums[j]) {
+ proc_bind_die(
+ proc_bind, T, P,
+ "Thread %d's current place (%d) is incorrect. expected %d\n", i,
+ current_place, parent->place_nums[j]);
+ }
+ j = (j + 1) % parent->num_places;
+ }
+ } else {
+ // close T > P
+ // check place assignment for each thread
+ low = T / P;
+ high = T / P + (T % P ? 1 : 0);
+ count = 1;
+ if (partitions[0]->current_place != parent->current_place) {
+ proc_bind_die(
+ proc_bind, T, P,
+ "Primary thread's place (%d) is not parent thread's place (%d)\n",
+ partitions[0]->current_place, parent->current_place);
+ }
+ for (i = 1; i < T; ++i) {
+ current_place = partitions[i]->current_place;
+ if (current_place == parent->place_nums[j]) {
+ count++;
+ if (count > high) {
+ proc_bind_die(
+ proc_bind, T, P,
+ "Too many threads have place %d for their current place\n",
+ current_place);
+ }
+ } else {
+ if (count < low) {
+ proc_bind_die(
+ proc_bind, T, P,
+ "Not enough threads have place %d for their current place\n",
+ parent->place_nums[j]);
+ }
+ j = (j + 1) % parent->num_places;
+ if (current_place != parent->place_nums[j]) {
+ proc_bind_die(
+ proc_bind, T, P,
+ "Thread %d's place (%d) is not corret. Expected %d\n", i,
+ partitions[i]->current_place, parent->place_nums[j]);
+ }
+ count = 1;
+ }
+ }
+ }
+ } else {
+ // proc_bind_primary
+ // Every thread should be assigned to the primary thread's place
+ for (i = 0; i < T; ++i) {
+ if (partitions[i]->current_place != parent->current_place) {
+ proc_bind_die(
+ proc_bind, T, P,
+ "Thread %d's place (%d) is not the primary thread's place (%d)\n",
+ i, partitions[i]->current_place, parent->current_place);
+ }
+ }
+ }
}
+
+ // Check that each partition's current place is within the partition
+ for (i = 0; i < T; ++i) {
+ current_place = partitions[i]->current_place;
+ num_places = partitions[i]->num_places;
+ first = partitions[i]->place_nums[0];
+ last = partitions[i]->place_nums[num_places - 1];
+ for (j = 0; j < num_places; ++j)
+ if (partitions[i]->place_nums[j] == current_place)
+ break;
+ if (j == num_places) {
+ proc_bind_die(proc_bind, T, P,
+ "Thread %d's current place (%d) is not within its "
+ "partition [%d, %d]\n",
+ i, current_place, first, last);
+ }
+ }
+
+ free(partitions);
}
#endif
diff --git a/openmp/runtime/test/affinity/teams-affinity.c b/openmp/runtime/test/affinity/teams-affinity.c
new file mode 100644
index 0000000000000..0ca74758d3e71
--- /dev/null
+++ b/openmp/runtime/test/affinity/teams-affinity.c
@@ -0,0 +1,119 @@
+// RUN: %libomp-compile && env OMP_PLACES=cores OMP_TEAMS_THREAD_LIMIT=1 KMP_TEAMS_THREAD_LIMIT=256 %libomp-run
+// RUN: %libomp-compile && env OMP_PLACES=cores OMP_TEAMS_THREAD_LIMIT=1 KMP_TEAMS_THREAD_LIMIT=256 KMP_HOT_TEAMS_MAX_LEVEL=2 %libomp-run
+// RUN: %libomp-compile && env OMP_PLACES=cores OMP_TEAMS_THREAD_LIMIT=1 KMP_TEAMS_THREAD_LIMIT=256 KMP_TEAMS_PROC_BIND=close %libomp-run
+// RUN: %libomp-compile && env OMP_PLACES=cores OMP_TEAMS_THREAD_LIMIT=1 KMP_TEAMS_THREAD_LIMIT=256 KMP_TEAMS_PROC_BIND=close KMP_HOT_TEAMS_MAX_LEVEL=2 %libomp-run
+// RUN: %libomp-compile && env OMP_PLACES=cores OMP_TEAMS_THREAD_LIMIT=1 KMP_TEAMS_THREAD_LIMIT=256 KMP_TEAMS_PROC_BIND=primary %libomp-run
+// RUN: %libomp-compile && env OMP_PLACES=cores OMP_TEAMS_THREAD_LIMIT=1 KMP_TEAMS_THREAD_LIMIT=256 KMP_TEAMS_PROC_BIND=primary KMP_HOT_TEAMS_MAX_LEVEL=2 %libomp-run
+// REQUIRES: linux
+// UNSUPPORTED: clang-5, clang-6, clang-7, clang-8, clang-9, clang-10
+// UNSUPPORTED: gcc-5, gcc-6, gcc-7, gcc-8
+// UNSUPPORTED: icc
+//
+// KMP_TEAMS_THREAD_LIMIT limits the number of total teams
+// OMP_TEAMS_THREAD_LIMIT limits the number of threads per team
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "libomp_test_affinity.h"
+#include "libomp_test_topology.h"
+
+#define _STR(X) #X
+#define STR(X) _STR(X)
+
+#ifndef MAX_NTEAMS
+#define MAX_NTEAMS 256
+#endif
+
+static void set_default_max_nteams() {
+ // Do not overwrite if already in environment
+ setenv("KMP_TEAMS_THREAD_LIMIT", STR(MAX_NTEAMS), 0);
+}
+
+static int get_max_nteams() {
+ int max_nteams;
+ const char *value = getenv("KMP_TEAMS_THREAD_LIMIT");
+ if (!value) {
+ fprintf(stderr, "KMP_TEAMS_THREAD_LIMIT must be set!\n");
+ exit(EXIT_FAILURE);
+ }
+ max_nteams = atoi(value);
+ if (max_nteams <= 0)
+ max_nteams = 1;
+ if (max_nteams > MAX_NTEAMS)
+ max_nteams = MAX_NTEAMS;
+ return max_nteams;
+}
+
+// Return the value in KMP_TEAMS_PROC_BIND
+static omp_proc_bind_t get_teams_proc_bind() {
+ // defaults to spread
+ omp_proc_bind_t proc_bind = omp_proc_bind_spread;
+ const char *value = getenv("KMP_TEAMS_PROC_BIND");
+ if (value) {
+ if (strcmp(value, "spread") == 0) {
+ proc_bind = omp_proc_bind_spread;
+ } else if (strcmp(value, "close") == 0) {
+ proc_bind = omp_proc_bind_close;
+ } else if (strcmp(value, "primary") == 0 || strcmp(value, "master") == 0) {
+ proc_bind = omp_proc_bind_master;
+ } else {
+ fprintf(stderr,
+ "KMP_TEAMS_PROC_BIND should be one of spread, close, primary");
+ exit(EXIT_FAILURE);
+ }
+ }
+ return proc_bind;
+}
+
+int main(int argc, char **argv) {
+ int i, nteams, max_nteams, factor;
+ place_list_t **teams_places;
+ place_list_t *place_list;
+ omp_proc_bind_t teams_proc_bind;
+
+ // Set a default for the max number of teams if it is not already set
+ set_default_max_nteams();
+ place_list = topology_alloc_openmp_places();
+ max_nteams = get_max_nteams();
+ // Further limit the number of teams twice the number of OMP_PLACES
+ if (max_nteams > 2 * place_list->num_places)
+ max_nteams = 2 * place_list->num_places;
+ teams_places = (place_list_t **)malloc(sizeof(place_list_t *) * max_nteams);
+ for (i = 0; i < max_nteams; ++i)
+ teams_places[i] = NULL;
+ teams_proc_bind = get_teams_proc_bind();
+
+ // factor inversely controls the number of test cases.
+ // the larger the factor, the more test cases will be performed.
+ if (teams_proc_bind == omp_proc_bind_master) {
+ factor = 2;
+ } else {
+ factor = 8;
+ }
+
+ for (nteams = 1; nteams <= max_nteams;
+ nteams = nteams * factor / (factor - 1) + 1) {
+ // Check the same value twice to make sure hot teams are ok
+ int j;
+ for (j = 0; j < 2; ++j) {
+ // Gather the proc bind partitions from each team
+ #pragma omp teams num_teams(nteams)
+ teams_places[omp_get_team_num()] = topology_alloc_openmp_partition();
+
+ // Check all the partitions with the parent partition
+ proc_bind_check(teams_proc_bind, place_list, teams_places, nteams);
+
+ // Free the proc bind partitions
+ for (i = 0; i < nteams; ++i)
+ topology_free_places(teams_places[i]);
+ }
+ }
+
+ free(teams_places);
+ topology_free_places(place_list);
+ return EXIT_SUCCESS;
+}
More information about the Openmp-commits
mailing list