[Openmp-commits] [openmp] 17dcde5 - [OpenMP][libomp] Allow reset affinity mask after parallel
Jonathan Peyton via Openmp-commits
openmp-commits at lists.llvm.org
Tue Jul 19 11:10:12 PDT 2022
Author: AndreyChurbanov
Date: 2022-07-19T13:05:05-05:00
New Revision: 17dcde5f1bfefb8acdf2458158cac2666d638d66
URL: https://github.com/llvm/llvm-project/commit/17dcde5f1bfefb8acdf2458158cac2666d638d66
DIFF: https://github.com/llvm/llvm-project/commit/17dcde5f1bfefb8acdf2458158cac2666d638d66.diff
LOG: [OpenMP][libomp] Allow reset affinity mask after parallel
Added control to reset affinity of primary thread after outermost parallel
region to initial affinity encountered before OpenMP runtime was initialized.
KMP_AFFINITY environment variable reset/noreset modifier introduced.
Default behavior is unchanged.
Differential Revision: https://reviews.llvm.org/D125993
Added:
openmp/runtime/test/affinity/kmp-affinity-reset.c
Modified:
openmp/docs/design/Runtimes.rst
openmp/runtime/src/kmp.h
openmp/runtime/src/kmp_affinity.cpp
openmp/runtime/src/kmp_csupport.cpp
openmp/runtime/src/kmp_ftn_entry.h
openmp/runtime/src/kmp_global.cpp
openmp/runtime/src/kmp_runtime.cpp
openmp/runtime/src/kmp_settings.cpp
Removed:
################################################################################
diff --git a/openmp/docs/design/Runtimes.rst b/openmp/docs/design/Runtimes.rst
index c04da946abc92..532b76f72c41d 100644
--- a/openmp/docs/design/Runtimes.rst
+++ b/openmp/docs/design/Runtimes.rst
@@ -299,6 +299,7 @@ and any affinity API calls.
* ``respect`` (default) and ``norespect`` - determine whether to respect the original process affinity mask.
* ``verbose`` and ``noverbose`` (default) - determine whether to display affinity information.
* ``warnings`` (default) and ``nowarnings`` - determine whether to display warnings during affinity detection.
+* ``reset`` and ``noreset`` (default) - determine whether to reset primary thread's affinity after outermost parallel region(s)
* ``granularity=<specifier>`` - takes the following specifiers ``thread``, ``core`` (default), ``tile``,
``socket``, ``die``, ``group`` (Windows only).
The granularity describes the lowest topology levels that OpenMP threads are allowed to float within a topology map.
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index 2efe12024284f..6aeb495d44f24 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -841,7 +841,9 @@ extern unsigned __kmp_affinity_num_masks;
extern void __kmp_affinity_bind_thread(int which);
extern kmp_affin_mask_t *__kmp_affin_fullMask;
+extern kmp_affin_mask_t *__kmp_affin_origMask;
extern char *__kmp_cpuinfo_file;
+extern bool __kmp_affin_reset;
#endif /* KMP_AFFINITY_SUPPORTED */
@@ -3627,8 +3629,18 @@ static inline void __kmp_assign_root_init_mask() {
r->r.r_affinity_assigned = TRUE;
}
}
+static inline void __kmp_reset_root_init_mask(int gtid) {
+ kmp_info_t *th = __kmp_threads[gtid];
+ kmp_root_t *r = th->th.th_root;
+ if (r->r.r_uber_thread == th && r->r.r_affinity_assigned) {
+ __kmp_set_system_affinity(__kmp_affin_origMask, FALSE);
+ KMP_CPU_COPY(th->th.th_affin_mask, __kmp_affin_origMask);
+ r->r.r_affinity_assigned = FALSE;
+ }
+}
#else /* KMP_AFFINITY_SUPPORTED */
#define __kmp_assign_root_init_mask() /* Nothing */
+static inline void __kmp_reset_root_init_mask(int gtid) {}
#endif /* KMP_AFFINITY_SUPPORTED */
// No need for KMP_AFFINITY_SUPPORTED guard as only one field in the
// format string is for affinity, so platforms that do not support
diff --git a/openmp/runtime/src/kmp_affinity.cpp b/openmp/runtime/src/kmp_affinity.cpp
index d3c989e4d0c5f..edc12b7d08d15 100644
--- a/openmp/runtime/src/kmp_affinity.cpp
+++ b/openmp/runtime/src/kmp_affinity.cpp
@@ -1536,6 +1536,8 @@ int __kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask) {
// internal topology object and set the layer ids for it. Each routine
// returns a boolean on whether it was successful at doing so.
kmp_affin_mask_t *__kmp_affin_fullMask = NULL;
+// Original mask is a subset of full mask in multiple processor groups topology
+kmp_affin_mask_t *__kmp_affin_origMask = NULL;
#if KMP_USE_HWLOC
static inline bool __kmp_hwloc_is_cache_type(hwloc_obj_t obj) {
@@ -4072,8 +4074,13 @@ static void __kmp_aux_affinity_initialize(void) {
if (__kmp_affin_fullMask == NULL) {
KMP_CPU_ALLOC(__kmp_affin_fullMask);
}
+ if (__kmp_affin_origMask == NULL) {
+ KMP_CPU_ALLOC(__kmp_affin_origMask);
+ }
if (KMP_AFFINITY_CAPABLE()) {
__kmp_get_system_affinity(__kmp_affin_fullMask, TRUE);
+ // Make a copy before possible expanding to the entire machine mask
+ __kmp_affin_origMask->copy(__kmp_affin_fullMask);
if (__kmp_affinity_respect_mask) {
// Count the number of available processors.
unsigned i;
@@ -4111,6 +4118,10 @@ static void __kmp_aux_affinity_initialize(void) {
__kmp_avail_proc =
__kmp_affinity_entire_machine_mask(__kmp_affin_fullMask);
#if KMP_OS_WINDOWS
+ if (__kmp_num_proc_groups <= 1) {
+ // Copy expanded full mask if topology has single processor group
+ __kmp_affin_origMask->copy(__kmp_affin_fullMask);
+ }
// Set the process affinity mask since threads' affinity
// masks must be subset of process mask in Windows* OS
__kmp_affin_fullMask->set_process_affinity(true);
@@ -4283,6 +4294,13 @@ static void __kmp_aux_affinity_initialize(void) {
if (__kmp_affinity_verbose)
__kmp_topology->print("KMP_AFFINITY");
bool filtered = __kmp_topology->filter_hw_subset();
+ if (filtered) {
+#if KMP_OS_WINDOWS
+ // Copy filtered full mask if topology has single processor group
+ if (__kmp_num_proc_groups <= 1)
+#endif
+ __kmp_affin_origMask->copy(__kmp_affin_fullMask);
+ }
if (filtered && __kmp_affinity_verbose)
__kmp_topology->print("KMP_HW_SUBSET");
machine_hierarchy.init(__kmp_topology->get_num_hw_threads());
@@ -4506,6 +4524,10 @@ void __kmp_affinity_uninitialize(void) {
KMP_CPU_FREE(__kmp_affin_fullMask);
__kmp_affin_fullMask = NULL;
}
+ if (__kmp_affin_origMask != NULL) {
+ KMP_CPU_FREE(__kmp_affin_origMask);
+ __kmp_affin_origMask = NULL;
+ }
__kmp_affinity_num_masks = 0;
__kmp_affinity_type = affinity_default;
__kmp_affinity_num_places = 0;
diff --git a/openmp/runtime/src/kmp_csupport.cpp b/openmp/runtime/src/kmp_csupport.cpp
index b7bcc4c94148e..c932d450c84e9 100644
--- a/openmp/runtime/src/kmp_csupport.cpp
+++ b/openmp/runtime/src/kmp_csupport.cpp
@@ -632,6 +632,11 @@ void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
"team %p\n",
global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
}
+#if KMP_AFFINITY_SUPPORTED
+ if (this_thr->th.th_team->t.t_level == 0 && __kmp_affin_reset) {
+ __kmp_reset_root_init_mask(global_tid);
+ }
+#endif
} else {
if (__kmp_tasking_mode != tskm_immediate_exec) {
KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
@@ -2021,6 +2026,11 @@ void KMP_EXPAND_NAME(ompc_display_affinity)(char const *format) {
}
__kmp_assign_root_init_mask();
gtid = __kmp_get_gtid();
+#if KMP_AFFINITY_SUPPORTED
+ if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && __kmp_affin_reset) {
+ __kmp_reset_root_init_mask(gtid);
+ }
+#endif
__kmp_aux_display_affinity(gtid, format);
}
@@ -2034,6 +2044,11 @@ size_t KMP_EXPAND_NAME(ompc_capture_affinity)(char *buffer, size_t buf_size,
}
__kmp_assign_root_init_mask();
gtid = __kmp_get_gtid();
+#if KMP_AFFINITY_SUPPORTED
+ if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && __kmp_affin_reset) {
+ __kmp_reset_root_init_mask(gtid);
+ }
+#endif
__kmp_str_buf_init(&capture_buf);
num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
if (buffer && buf_size) {
diff --git a/openmp/runtime/src/kmp_ftn_entry.h b/openmp/runtime/src/kmp_ftn_entry.h
index 048fcf9d03974..8dbd3ce976902 100644
--- a/openmp/runtime/src/kmp_ftn_entry.h
+++ b/openmp/runtime/src/kmp_ftn_entry.h
@@ -238,6 +238,10 @@ int FTN_STDCALL FTN_GET_AFFINITY(void **mask) {
__kmp_middle_initialize();
}
__kmp_assign_root_init_mask();
+ int gtid = __kmp_get_gtid();
+ if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && __kmp_affin_reset) {
+ __kmp_reset_root_init_mask(gtid);
+ }
return __kmp_aux_get_affinity(mask);
#endif
}
@@ -358,9 +362,13 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_MAX_THREADS)(void) {
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
- __kmp_assign_root_init_mask();
gtid = __kmp_entry_gtid();
thread = __kmp_threads[gtid];
+#if KMP_AFFINITY_SUPPORTED
+ if (thread->th.th_team->t.t_level == 0 && !__kmp_affin_reset) {
+ __kmp_assign_root_init_mask();
+ }
+#endif
// return thread -> th.th_team -> t.t_current_task[
// thread->th.th_info.ds.ds_tid ] -> icvs.nproc;
return thread->th.th_current_task->td_icvs.nproc;
@@ -509,6 +517,11 @@ void FTN_STDCALL KMP_EXPAND_NAME_IF_APPEND(FTN_DISPLAY_AFFINITY)(
}
__kmp_assign_root_init_mask();
gtid = __kmp_get_gtid();
+#if KMP_AFFINITY_SUPPORTED
+ if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && __kmp_affin_reset) {
+ __kmp_reset_root_init_mask(gtid);
+ }
+#endif
ConvertedString cformat(format, size);
__kmp_aux_display_affinity(gtid, cformat.get());
#endif
@@ -537,6 +550,11 @@ size_t FTN_STDCALL KMP_EXPAND_NAME_IF_APPEND(FTN_CAPTURE_AFFINITY)(
}
__kmp_assign_root_init_mask();
gtid = __kmp_get_gtid();
+#if KMP_AFFINITY_SUPPORTED
+ if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && __kmp_affin_reset) {
+ __kmp_reset_root_init_mask(gtid);
+ }
+#endif
__kmp_str_buf_init(&capture_buf);
ConvertedString cformat(format, for_size);
num_required = __kmp_aux_capture_affinity(gtid, cformat.get(), &capture_buf);
@@ -612,7 +630,16 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_NUM_PROCS)(void) {
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
- __kmp_assign_root_init_mask();
+#if KMP_AFFINITY_SUPPORTED
+ if (!__kmp_affin_reset) {
+ // only bind root here if its affinity reset is not requested
+ int gtid = __kmp_entry_gtid();
+ kmp_info_t *thread = __kmp_threads[gtid];
+ if (thread->th.th_team->t.t_level == 0) {
+ __kmp_assign_root_init_mask();
+ }
+ }
+#endif
return __kmp_avail_proc;
#endif
}
@@ -802,9 +829,16 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_NUM_PLACES)(void) {
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
- __kmp_assign_root_init_mask();
if (!KMP_AFFINITY_CAPABLE())
return 0;
+ if (!__kmp_affin_reset) {
+ // only bind root here if its affinity reset is not requested
+ int gtid = __kmp_entry_gtid();
+ kmp_info_t *thread = __kmp_threads[gtid];
+ if (thread->th.th_team->t.t_level == 0) {
+ __kmp_assign_root_init_mask();
+ }
+ }
return __kmp_affinity_num_masks;
#endif
}
@@ -818,9 +852,16 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_PLACE_NUM_PROCS)(int place_num) {
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
- __kmp_assign_root_init_mask();
if (!KMP_AFFINITY_CAPABLE())
return 0;
+ if (!__kmp_affin_reset) {
+ // only bind root here if its affinity reset is not requested
+ int gtid = __kmp_entry_gtid();
+ kmp_info_t *thread = __kmp_threads[gtid];
+ if (thread->th.th_team->t.t_level == 0) {
+ __kmp_assign_root_init_mask();
+ }
+ }
if (place_num < 0 || place_num >= (int)__kmp_affinity_num_masks)
return 0;
kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks, place_num);
@@ -844,9 +885,16 @@ void FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_PLACE_PROC_IDS)(int place_num,
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
- __kmp_assign_root_init_mask();
if (!KMP_AFFINITY_CAPABLE())
return;
+ if (!__kmp_affin_reset) {
+ // only bind root here if its affinity reset is not requested
+ int gtid = __kmp_entry_gtid();
+ kmp_info_t *thread = __kmp_threads[gtid];
+ if (thread->th.th_team->t.t_level == 0) {
+ __kmp_assign_root_init_mask();
+ }
+ }
if (place_num < 0 || place_num >= (int)__kmp_affinity_num_masks)
return;
kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks, place_num);
@@ -870,11 +918,13 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_PLACE_NUM)(void) {
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
- __kmp_assign_root_init_mask();
if (!KMP_AFFINITY_CAPABLE())
return -1;
gtid = __kmp_entry_gtid();
thread = __kmp_thread_from_gtid(gtid);
+ if (thread->th.th_team->t.t_level == 0 && !__kmp_affin_reset) {
+ __kmp_assign_root_init_mask();
+ }
if (thread->th.th_current_place < 0)
return -1;
return thread->th.th_current_place;
@@ -890,11 +940,13 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_PARTITION_NUM_PLACES)(void) {
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
- __kmp_assign_root_init_mask();
if (!KMP_AFFINITY_CAPABLE())
return 0;
gtid = __kmp_entry_gtid();
thread = __kmp_thread_from_gtid(gtid);
+ if (thread->th.th_team->t.t_level == 0 && !__kmp_affin_reset) {
+ __kmp_assign_root_init_mask();
+ }
first_place = thread->th.th_first_place;
last_place = thread->th.th_last_place;
if (first_place < 0 || last_place < 0)
@@ -917,11 +969,13 @@ KMP_EXPAND_NAME(FTN_GET_PARTITION_PLACE_NUMS)(int *place_nums) {
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
- __kmp_assign_root_init_mask();
if (!KMP_AFFINITY_CAPABLE())
return;
gtid = __kmp_entry_gtid();
thread = __kmp_thread_from_gtid(gtid);
+ if (thread->th.th_team->t.t_level == 0 && !__kmp_affin_reset) {
+ __kmp_assign_root_init_mask();
+ }
first_place = thread->th.th_first_place;
last_place = thread->th.th_last_place;
if (first_place < 0 || last_place < 0)
diff --git a/openmp/runtime/src/kmp_global.cpp b/openmp/runtime/src/kmp_global.cpp
index 7a3e3b09c983c..3fd536416de7e 100644
--- a/openmp/runtime/src/kmp_global.cpp
+++ b/openmp/runtime/src/kmp_global.cpp
@@ -285,6 +285,7 @@ kmp_affin_mask_t *__kmp_affinity_masks = NULL;
unsigned __kmp_affinity_num_masks = 0;
char *__kmp_cpuinfo_file = NULL;
+bool __kmp_affin_reset = 0;
#endif /* KMP_AFFINITY_SUPPORTED */
diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp
index 744a241f50a11..b8d470528798d 100644
--- a/openmp/runtime/src/kmp_runtime.cpp
+++ b/openmp/runtime/src/kmp_runtime.cpp
@@ -2641,6 +2641,11 @@ void __kmp_join_call(ident_t *loc, int gtid
__kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
+#if KMP_AFFINITY_SUPPORTED
+ if (master_th->th.th_team->t.t_level == 0 && __kmp_affin_reset) {
+ __kmp_reset_root_init_mask(gtid);
+ }
+#endif
#if OMPT_SUPPORT
int flags =
OMPT_INVOKER(fork_context) |
diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp
index 49b23a866a583..97b20d4f86642 100644
--- a/openmp/runtime/src/kmp_settings.cpp
+++ b/openmp/runtime/src/kmp_settings.cpp
@@ -2169,6 +2169,7 @@ static void __kmp_parse_affinity_env(char const *name, char const *value,
int respect = 0;
int gran = 0;
int dups = 0;
+ int reset = 0;
bool set = false;
KMP_ASSERT(value != NULL);
@@ -2224,6 +2225,7 @@ static void __kmp_parse_affinity_env(char const *name, char const *value,
#define set_respect(val) _set_param(respect, *out_respect, val)
#define set_dups(val) _set_param(dups, *out_dups, val)
#define set_proclist(val) _set_param(proclist, *out_proclist, val)
+#define set_reset(val) _set_param(reset, __kmp_affin_reset, val)
#define set_gran(val, levels) \
{ \
@@ -2293,6 +2295,12 @@ static void __kmp_parse_affinity_env(char const *name, char const *value,
} else if (__kmp_match_str("norespect", buf, CCAST(const char **, &next))) {
set_respect(FALSE);
buf = next;
+ } else if (__kmp_match_str("reset", buf, CCAST(const char **, &next))) {
+ set_reset(TRUE);
+ buf = next;
+ } else if (__kmp_match_str("noreset", buf, CCAST(const char **, &next))) {
+ set_reset(FALSE);
+ buf = next;
} else if (__kmp_match_str("duplicates", buf,
CCAST(const char **, &next)) ||
__kmp_match_str("dups", buf, CCAST(const char **, &next))) {
@@ -2433,6 +2441,7 @@ static void __kmp_parse_affinity_env(char const *name, char const *value,
#undef set_warnings
#undef set_respect
#undef set_granularity
+#undef set_reset
__kmp_str_free(&buffer);
@@ -2564,6 +2573,11 @@ static void __kmp_stg_print_affinity(kmp_str_buf_t *buffer, char const *name,
} else {
__kmp_str_buf_print(buffer, "%s,", "norespect");
}
+ if (__kmp_affin_reset) {
+ __kmp_str_buf_print(buffer, "%s,", "reset");
+ } else {
+ __kmp_str_buf_print(buffer, "%s,", "noreset");
+ }
__kmp_str_buf_print(buffer, "granularity=%s,",
__kmp_hw_get_keyword(__kmp_affinity_gran, false));
}
diff --git a/openmp/runtime/test/affinity/kmp-affinity-reset.c b/openmp/runtime/test/affinity/kmp-affinity-reset.c
new file mode 100644
index 0000000000000..dac75f70a65e3
--- /dev/null
+++ b/openmp/runtime/test/affinity/kmp-affinity-reset.c
@@ -0,0 +1,66 @@
+// RUN: %libomp-compile -D_GNU_SOURCE
+// RUN: env OMP_NUM_THREADS=2,2 KMP_AFFINITY=reset,granularity=thread,compact %libomp-run
+// REQUIRES: linux
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <omp.h>
+#include "libomp_test_affinity.h"
+
+#define CHECK_EQUAL 0
+#define CHECK_NOT_EQUAL 1
+
+void check_primary_thread_affinity(int line, affinity_mask_t *other_aff,
+ int type) {
+ #pragma omp master
+ {
+ affinity_mask_t *primary_aff = affinity_mask_alloc();
+ get_thread_affinity(primary_aff);
+ if (type == CHECK_EQUAL && !affinity_mask_equal(primary_aff, other_aff)) {
+ fprintf(stderr, "error: line %d: primary affinity was not equal\n", line);
+ exit(EXIT_FAILURE);
+ } else if (type == CHECK_NOT_EQUAL &&
+ affinity_mask_equal(primary_aff, other_aff)) {
+ fprintf(stderr, "error: line %d: primary affinity was equal\n", line);
+ exit(EXIT_FAILURE);
+ }
+ affinity_mask_free(primary_aff);
+ }
+}
+
+#define CHECK_PRIMARY_THREAD_AFFINITY_EQUAL(other_aff) \
+ check_primary_thread_affinity(__LINE__, other_aff, CHECK_EQUAL)
+#define CHECK_PRIMARY_THREAD_AFFINITY_NOT_EQUAL(other_aff) \
+ check_primary_thread_affinity(__LINE__, other_aff, CHECK_NOT_EQUAL)
+
+int main() {
+ int i;
+ affinity_mask_t *initial_mask = affinity_mask_alloc();
+ get_thread_affinity(initial_mask);
+
+ for (i = 0; i < 10; ++i) {
+ #pragma omp parallel
+ {
+ CHECK_PRIMARY_THREAD_AFFINITY_NOT_EQUAL(initial_mask);
+ }
+ CHECK_PRIMARY_THREAD_AFFINITY_EQUAL(initial_mask);
+ }
+
+ omp_set_max_active_levels(2);
+ for (i = 0; i < 10; ++i) {
+ #pragma omp parallel
+ {
+ CHECK_PRIMARY_THREAD_AFFINITY_NOT_EQUAL(initial_mask);
+
+ #pragma omp parallel
+ CHECK_PRIMARY_THREAD_AFFINITY_NOT_EQUAL(initial_mask);
+
+ CHECK_PRIMARY_THREAD_AFFINITY_NOT_EQUAL(initial_mask);
+ }
+ CHECK_PRIMARY_THREAD_AFFINITY_EQUAL(initial_mask);
+ }
+
+ affinity_mask_free(initial_mask);
+ return EXIT_SUCCESS;
+}
More information about the Openmp-commits
mailing list