[Openmp-commits] [openmp] 0ddde4d - [OpenMP] Lazily assign root affinity
via Openmp-commits
openmp-commits at lists.llvm.org
Tue Jun 15 14:21:17 PDT 2021
Author: Peyton, Jonathan L
Date: 2021-06-15T16:21:06-05:00
New Revision: 0ddde4d86518de1154b6e850dc49010c829b717c
URL: https://github.com/llvm/llvm-project/commit/0ddde4d86518de1154b6e850dc49010c829b717c
DIFF: https://github.com/llvm/llvm-project/commit/0ddde4d86518de1154b6e850dc49010c829b717c.diff
LOG: [OpenMP] Lazily assign root affinity
Lazily set affinity for root threads. Previously, the root thread
executing middle initialization would attempt to assign affinity
to other existing root threads. This was not working properly as the
set_system_affinity() function wasn't setting the affinity for the
target thread. Instead, the middle init thread was resetting the
its own affinity using the target thread's affinity mask.
Differential Revision: https://reviews.llvm.org/D103625
Added:
openmp/runtime/test/affinity/root-threads-affinity.c
Modified:
openmp/runtime/src/kmp.h
openmp/runtime/src/kmp_csupport.cpp
openmp/runtime/src/kmp_ftn_entry.h
openmp/runtime/src/kmp_runtime.cpp
Removed:
################################################################################
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index 9c3fbf67db67..d1f0da8ae356 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -2874,6 +2874,9 @@ typedef struct kmp_base_root {
kmp_lock_t r_begin_lock;
volatile int r_begin;
int r_blocktime; /* blocktime for this root and descendants */
+#if KMP_AFFINITY_SUPPORTED
+ int r_affinity_assigned;
+#endif // KMP_AFFINITY_SUPPORTED
} kmp_base_root_t;
typedef union KMP_ALIGN_CACHE kmp_root {
@@ -3495,6 +3498,16 @@ extern void __kmp_balanced_affinity(kmp_info_t *th, int team_size);
#if KMP_OS_LINUX || KMP_OS_FREEBSD
extern int kmp_set_thread_affinity_mask_initial(void);
#endif
+static inline void __kmp_assign_root_init_mask() {
+ int gtid = __kmp_entry_gtid();
+ kmp_root_t *r = __kmp_threads[gtid]->th.th_root;
+ if (r->r.r_uber_thread == __kmp_threads[gtid] && !r->r.r_affinity_assigned) {
+ __kmp_affinity_set_init_mask(gtid, TRUE);
+ r->r.r_affinity_assigned = TRUE;
+ }
+}
+#else /* KMP_AFFINITY_SUPPORTED */
+#define __kmp_assign_root_init_mask() /* Nothing */
#endif /* KMP_AFFINITY_SUPPORTED */
// No need for KMP_AFFINITY_SUPPORTED guard as only one field in the
// format string is for affinity, so platforms that do not support
diff --git a/openmp/runtime/src/kmp_csupport.cpp b/openmp/runtime/src/kmp_csupport.cpp
index 98d0cdf7af54..1189db1a70e5 100644
--- a/openmp/runtime/src/kmp_csupport.cpp
+++ b/openmp/runtime/src/kmp_csupport.cpp
@@ -39,6 +39,7 @@ void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
__kmp_str_match_true(env)) {
__kmp_middle_initialize();
+ __kmp_assign_root_init_mask();
KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
} else if (__kmp_ignore_mppbeg() == FALSE) {
// By default __kmp_ignore_mppbeg() returns TRUE.
@@ -2023,6 +2024,7 @@ void ompc_display_affinity(char const *format) {
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
+ __kmp_assign_root_init_mask();
gtid = __kmp_get_gtid();
__kmp_aux_display_affinity(gtid, format);
}
@@ -2035,6 +2037,7 @@ size_t ompc_capture_affinity(char *buffer, size_t buf_size,
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
+ __kmp_assign_root_init_mask();
gtid = __kmp_get_gtid();
__kmp_str_buf_init(&capture_buf);
num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
@@ -2093,6 +2096,7 @@ int kmpc_set_affinity_mask_proc(int proc, void **mask) {
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
+ __kmp_assign_root_init_mask();
return __kmp_aux_set_affinity_mask_proc(proc, mask);
#endif
}
@@ -2104,6 +2108,7 @@ int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
+ __kmp_assign_root_init_mask();
return __kmp_aux_unset_affinity_mask_proc(proc, mask);
#endif
}
@@ -2115,6 +2120,7 @@ int kmpc_get_affinity_mask_proc(int proc, void **mask) {
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
+ __kmp_assign_root_init_mask();
return __kmp_aux_get_affinity_mask_proc(proc, mask);
#endif
}
diff --git a/openmp/runtime/src/kmp_ftn_entry.h b/openmp/runtime/src/kmp_ftn_entry.h
index 5d8398d3e7f8..d88bf1e68aef 100644
--- a/openmp/runtime/src/kmp_ftn_entry.h
+++ b/openmp/runtime/src/kmp_ftn_entry.h
@@ -217,6 +217,7 @@ int FTN_STDCALL FTN_SET_AFFINITY(void **mask) {
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
+ __kmp_assign_root_init_mask();
return __kmp_aux_set_affinity(mask);
#endif
}
@@ -228,6 +229,7 @@ int FTN_STDCALL FTN_GET_AFFINITY(void **mask) {
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
+ __kmp_assign_root_init_mask();
return __kmp_aux_get_affinity(mask);
#endif
}
@@ -240,6 +242,7 @@ int FTN_STDCALL FTN_GET_AFFINITY_MAX_PROC(void) {
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
+ __kmp_assign_root_init_mask();
return __kmp_aux_get_affinity_max_proc();
#endif
}
@@ -253,6 +256,7 @@ void FTN_STDCALL FTN_CREATE_AFFINITY_MASK(void **mask) {
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
+ __kmp_assign_root_init_mask();
mask_internals = __kmp_affinity_dispatch->allocate_mask();
KMP_CPU_ZERO(mask_internals);
*mask = mask_internals;
@@ -268,6 +272,7 @@ void FTN_STDCALL FTN_DESTROY_AFFINITY_MASK(void **mask) {
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
+ __kmp_assign_root_init_mask();
if (__kmp_env_consistency_check) {
if (*mask == NULL) {
KMP_FATAL(AffinityInvalidMask, "kmp_destroy_affinity_mask");
@@ -286,6 +291,7 @@ int FTN_STDCALL FTN_SET_AFFINITY_MASK_PROC(int KMP_DEREF proc, void **mask) {
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
+ __kmp_assign_root_init_mask();
return __kmp_aux_set_affinity_mask_proc(KMP_DEREF proc, mask);
#endif
}
@@ -297,6 +303,7 @@ int FTN_STDCALL FTN_UNSET_AFFINITY_MASK_PROC(int KMP_DEREF proc, void **mask) {
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
+ __kmp_assign_root_init_mask();
return __kmp_aux_unset_affinity_mask_proc(KMP_DEREF proc, mask);
#endif
}
@@ -308,6 +315,7 @@ int FTN_STDCALL FTN_GET_AFFINITY_MASK_PROC(int KMP_DEREF proc, void **mask) {
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
+ __kmp_assign_root_init_mask();
return __kmp_aux_get_affinity_mask_proc(KMP_DEREF proc, mask);
#endif
}
@@ -342,6 +350,7 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_MAX_THREADS)(void) {
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
+ __kmp_assign_root_init_mask();
gtid = __kmp_entry_gtid();
thread = __kmp_threads[gtid];
// return thread -> th.th_team -> t.t_current_task[
@@ -487,6 +496,7 @@ void FTN_STDCALL FTN_DISPLAY_AFFINITY(char const *format, size_t size) {
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
+ __kmp_assign_root_init_mask();
gtid = __kmp_get_gtid();
ConvertedString cformat(format, size);
__kmp_aux_display_affinity(gtid, cformat.get());
@@ -514,6 +524,7 @@ size_t FTN_STDCALL FTN_CAPTURE_AFFINITY(char *buffer, char const *format,
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
+ __kmp_assign_root_init_mask();
gtid = __kmp_get_gtid();
__kmp_str_buf_init(&capture_buf);
ConvertedString cformat(format, for_size);
@@ -590,6 +601,7 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_NUM_PROCS)(void) {
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
+ __kmp_assign_root_init_mask();
return __kmp_avail_proc;
#endif
}
@@ -779,6 +791,7 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_NUM_PLACES)(void) {
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
+ __kmp_assign_root_init_mask();
if (!KMP_AFFINITY_CAPABLE())
return 0;
return __kmp_affinity_num_masks;
@@ -794,6 +807,7 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_PLACE_NUM_PROCS)(int place_num) {
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
+ __kmp_assign_root_init_mask();
if (!KMP_AFFINITY_CAPABLE())
return 0;
if (place_num < 0 || place_num >= (int)__kmp_affinity_num_masks)
@@ -819,6 +833,7 @@ void FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_PLACE_PROC_IDS)(int place_num,
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
+ __kmp_assign_root_init_mask();
if (!KMP_AFFINITY_CAPABLE())
return;
if (place_num < 0 || place_num >= (int)__kmp_affinity_num_masks)
@@ -844,6 +859,7 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_PLACE_NUM)(void) {
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
+ __kmp_assign_root_init_mask();
if (!KMP_AFFINITY_CAPABLE())
return -1;
gtid = __kmp_entry_gtid();
@@ -863,6 +879,7 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_PARTITION_NUM_PLACES)(void) {
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
+ __kmp_assign_root_init_mask();
if (!KMP_AFFINITY_CAPABLE())
return 0;
gtid = __kmp_entry_gtid();
@@ -889,6 +906,7 @@ KMP_EXPAND_NAME(FTN_GET_PARTITION_PLACE_NUMS)(int *place_nums) {
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
+ __kmp_assign_root_init_mask();
if (!KMP_AFFINITY_CAPABLE())
return;
gtid = __kmp_entry_gtid();
diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp
index 34f986998132..72929d833230 100644
--- a/openmp/runtime/src/kmp_runtime.cpp
+++ b/openmp/runtime/src/kmp_runtime.cpp
@@ -1412,6 +1412,9 @@ int __kmp_fork_call(ident_t *loc, int gtid,
}
#endif
+ // Assign affinity to root thread if it hasn't happened yet
+ __kmp_assign_root_init_mask();
+
// Nested level will be an index in the nested nthreads array
level = parent_team->t.t_level;
// used to launch non-serial teams even if nested is not allowed
@@ -3171,6 +3174,9 @@ static void __kmp_initialize_root(kmp_root_t *root) {
root->r.r_active = FALSE;
root->r.r_in_parallel = 0;
root->r.r_blocktime = __kmp_dflt_blocktime;
+#if KMP_AFFINITY_SUPPORTED
+ root->r.r_affinity_assigned = FALSE;
+#endif
/* setup the root team for this task */
/* allocate the root team structure */
@@ -3816,9 +3822,6 @@ int __kmp_register_root(int initial_thread) {
root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
- if (TCR_4(__kmp_init_middle)) {
- __kmp_affinity_set_init_mask(gtid, TRUE);
- }
#endif /* KMP_AFFINITY_SUPPORTED */
root_thread->th.th_def_allocator = __kmp_def_allocator;
root_thread->th.th_prev_level = 0;
@@ -7037,13 +7040,6 @@ static void __kmp_do_middle_initialize(void) {
// number of cores on the machine.
__kmp_affinity_initialize();
- // Run through the __kmp_threads array and set the affinity mask
- // for each root thread that is currently registered with the RTL.
- for (i = 0; i < __kmp_threads_capacity; i++) {
- if (TCR_PTR(__kmp_threads[i]) != NULL) {
- __kmp_affinity_set_init_mask(i, TRUE);
- }
- }
#endif /* KMP_AFFINITY_SUPPORTED */
KMP_ASSERT(__kmp_xproc > 0);
@@ -7165,6 +7161,7 @@ void __kmp_parallel_initialize(void) {
if (!__kmp_init_middle) {
__kmp_do_middle_initialize();
}
+ __kmp_assign_root_init_mask();
__kmp_resume_if_hard_paused();
/* begin initialization */
@@ -7471,6 +7468,7 @@ static void __kmp_push_thread_limit(kmp_info_t *thr, int num_teams,
// Remember the number of threads for inner parallel regions
if (!TCR_4(__kmp_init_middle))
__kmp_middle_initialize(); // get internal globals calculated
+ __kmp_assign_root_init_mask();
KMP_DEBUG_ASSERT(__kmp_avail_proc);
KMP_DEBUG_ASSERT(__kmp_dflt_team_nth);
diff --git a/openmp/runtime/test/affinity/root-threads-affinity.c b/openmp/runtime/test/affinity/root-threads-affinity.c
new file mode 100644
index 000000000000..b8ed5b291631
--- /dev/null
+++ b/openmp/runtime/test/affinity/root-threads-affinity.c
@@ -0,0 +1,197 @@
+// RUN: %libomp-compile && env LIBOMP_NUM_HIDDEN_HELPER_THREADS=0 OMP_PROC_BIND=close OMP_PLACES=cores KMP_AFFINITY=verbose %libomp-run 8 1 4
+// REQUIRED: linux
+//
+// This test pthread_creates 8 root threads before any OpenMP
+// runtime entry is ever called. We have all the root threads
+// register with the runtime by calling omp_set_num_threads(),
+// but this does not initialize their affinity. The fourth root thread
+// then calls a parallel region and we make sure its affinity
+// is correct. We also make sure all the other root threads are
+// free-floating since they have not called into a parallel region.
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <omp.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <sys/syscall.h>
+#include "libomp_test_affinity.h"
+
+volatile int entry_flag = 0;
+volatile int flag = 0;
+volatile int num_roots_arrived = 0;
+int num_roots;
+int spawner = 0;
+pthread_mutex_t lock;
+int register_workers = 0; // boolean
+affinity_mask_t *full_mask;
+
+int __kmpc_global_thread_num(void*);
+
+int get_os_thread_id() {
+ return (int)syscall(SYS_gettid);
+}
+
+int place_and_affinity_match() {
+ int i, max_cpu;
+ char buf[512];
+ affinity_mask_t *mask = affinity_mask_alloc();
+ int place = omp_get_place_num();
+ int num_procs = omp_get_place_num_procs(place);
+ int *ids = (int*)malloc(sizeof(int) * num_procs);
+ omp_get_place_proc_ids(place, ids);
+ get_thread_affinity(mask);
+ affinity_mask_snprintf(buf, sizeof(buf), mask);
+ printf("Primary Thread Place: %d\n", place);
+ printf("Primary Thread mask: %s\n", buf);
+
+ for (i = 0; i < num_procs; ++i) {
+ int cpu = ids[i];
+ if (!affinity_mask_isset(mask, cpu))
+ return 0;
+ }
+
+ max_cpu = AFFINITY_MAX_CPUS;
+ for (i = 0; i < max_cpu; ++i) {
+ int cpu = i;
+ if (affinity_mask_isset(mask, cpu)) {
+ int j, found = 0;
+ for (j = 0; j < num_procs; ++j) {
+ if (ids[j] == cpu) {
+ found = 1;
+ break;
+ }
+ }
+ if (!found)
+ return 0;
+ }
+ }
+
+ affinity_mask_free(mask);
+ free(ids);
+ return 1;
+}
+
+void* thread_func(void *arg) {
+ int place, nplaces;
+ int root_id = *((int*)arg);
+ int pid = getpid();
+ int tid = get_os_thread_id();
+
+ // Order how the root threads are assigned a gtid in the runtime
+ // i.e., root_id = gtid
+ while (1) {
+ int v = entry_flag;
+ if (v == root_id)
+ break;
+ }
+
+ // If main root thread
+ if (root_id == spawner) {
+ printf("Initial application thread (pid=%d, tid=%d, spawner=%d) reached thread_func (will call OpenMP)\n", pid, tid, spawner);
+ omp_set_num_threads(4);
+ #pragma omp atomic
+ entry_flag++;
+ // Wait for the workers to signal their arrival before #pragma omp parallel
+ while (num_roots_arrived < num_roots - 1) {}
+ // This will trigger the output for KMP_AFFINITY in this case
+ #pragma omp parallel
+ {
+ int gtid = __kmpc_global_thread_num(NULL);
+ #pragma omp single
+ {
+ printf("Exactly %d threads in the #pragma omp parallel\n",
+ omp_get_num_threads());
+ }
+ #pragma omp critical
+ {
+ printf("OpenMP thread %d: gtid=%d\n", omp_get_thread_num(), gtid);
+ }
+ }
+ flag = 1;
+ if (!place_and_affinity_match()) {
+ fprintf(stderr, "error: place and affinity mask do not match for primary thread\n");
+ exit (EXIT_FAILURE);
+ }
+
+ } else { // If worker root thread
+ // Worker root threads, register with OpenMP through omp_set_num_threads()
+ // if designated to, signal their arrival and then wait for the main root
+ // thread to signal them to exit.
+ printf("New root pthread (pid=%d, tid=%d) reached thread_func\n", pid, tid);
+ if (register_workers)
+ omp_set_num_threads(4);
+ #pragma omp atomic
+ entry_flag++;
+
+ pthread_mutex_lock(&lock);
+ num_roots_arrived++;
+ pthread_mutex_unlock(&lock);
+ while (flag == 0) {}
+
+ // Main check whether root threads' mask is equal to the
+ // initial affinity mask
+ affinity_mask_t *mask = affinity_mask_alloc();
+ get_thread_affinity(mask);
+ if (!affinity_mask_equal(mask, full_mask)) {
+ char buf[1024];
+ printf("root thread %d mask: ", root_id);
+ affinity_mask_snprintf(buf, sizeof(buf), mask);
+ printf("initial affinity mask: %s\n", buf);
+ fprintf(stderr, "error: root thread %d affinity mask not equal"
+ " to initial full mask\n", root_id);
+ affinity_mask_free(mask);
+ exit(EXIT_FAILURE);
+ }
+ affinity_mask_free(mask);
+ }
+ return NULL;
+}
+
+int main(int argc, char** argv) {
+ int i;
+ if (argc != 3 && argc != 4) {
+ fprintf(stderr, "usage: %s <num_roots> <register_workers_bool> [<spawn_root_number>]\n", argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ // Initialize pthread mutex
+ pthread_mutex_init(&lock, NULL);
+
+ // Get initial full mask
+ full_mask = affinity_mask_alloc();
+ get_thread_affinity(full_mask);
+
+ // Get the number of root pthreads to create and allocate resources for them
+ num_roots = atoi(argv[1]);
+ pthread_t *roots = (pthread_t*)malloc(sizeof(pthread_t) * num_roots);
+ int *root_ids = (int*)malloc(sizeof(int) * num_roots);
+
+ // Get the flag indicating whether to have root pthreads call omp_set_num_threads() or not
+ register_workers = atoi(argv[2]);
+
+ if (argc == 4)
+ spawner = atoi(argv[3]);
+
+ // Spawn worker root threads
+ for (i = 1; i < num_roots; ++i) {
+ *(root_ids + i) = i;
+ pthread_create(roots + i, NULL, thread_func, root_ids + i);
+ }
+ // Have main root thread (root 0) go into thread_func
+ *root_ids = 0;
+ thread_func(root_ids);
+
+ // Cleanup all resources
+ for (i = 1; i < num_roots; ++i) {
+ void *status;
+ pthread_join(roots[i], &status);
+ }
+ free(roots);
+ free(root_ids);
+ pthread_mutex_destroy(&lock);
+ return EXIT_SUCCESS;
+}
More information about the Openmp-commits
mailing list