[Openmp-commits] [openmp] 174502f - [OpenMP][libomp] Parameterize affinity functions
Jonathan Peyton via Openmp-commits
openmp-commits at lists.llvm.org
Fri Oct 28 13:21:39 PDT 2022
Author: Jonathan Peyton
Date: 2022-10-28T15:21:06-05:00
New Revision: 174502fc14d08ad4aeee81a2ff7e551138b77152
URL: https://github.com/llvm/llvm-project/commit/174502fc14d08ad4aeee81a2ff7e551138b77152
DIFF: https://github.com/llvm/llvm-project/commit/174502fc14d08ad4aeee81a2ff7e551138b77152.diff
LOG: [OpenMP][libomp] Parameterize affinity functions
This patch parameterizes the affinity initialization code to allow multiple
affinity settings. Almost all global affinity settings are consolidated
and put into a structure kmp_affinity_t. This is in anticipation of the
addition of hidden helper affinity which will have the same syntax and
semantics as KMP_AFFINITY only for the hidden helper team.
Differential Revision: https://reviews.llvm.org/D135109
Added:
Modified:
openmp/runtime/src/kmp.h
openmp/runtime/src/kmp_affinity.cpp
openmp/runtime/src/kmp_affinity.h
openmp/runtime/src/kmp_barrier.cpp
openmp/runtime/src/kmp_csupport.cpp
openmp/runtime/src/kmp_ftn_entry.h
openmp/runtime/src/kmp_global.cpp
openmp/runtime/src/kmp_runtime.cpp
openmp/runtime/src/kmp_settings.cpp
openmp/runtime/src/kmp_version.cpp
openmp/runtime/src/ompt-general.cpp
openmp/runtime/src/z_Linux_util.cpp
openmp/runtime/src/z_Windows_NT_util.cpp
Removed:
################################################################################
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index df02f40c8df04..ec4f4f4b8f31f 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -823,27 +823,49 @@ enum affinity_top_method {
affinity_top_method_default
};
-#define affinity_respect_mask_default (-1)
+#define affinity_respect_mask_default (2)
+
+typedef struct kmp_affinity_flags_t {
+ unsigned dups : 1;
+ unsigned verbose : 1;
+ unsigned warnings : 1;
+ unsigned respect : 2;
+ unsigned reset : 1;
+ unsigned initialized : 1;
+ unsigned reserved : 25;
+} kmp_affinity_flags_t;
+KMP_BUILD_ASSERT(sizeof(kmp_affinity_flags_t) == 4);
+
+typedef struct kmp_affinity_t {
+ char *proclist;
+ enum affinity_type type;
+ kmp_hw_t gran;
+ int gran_levels;
+ int compact;
+ int offset;
+ kmp_affinity_flags_t flags;
+ unsigned num_masks;
+ kmp_affin_mask_t *masks;
+ unsigned num_os_id_masks;
+ kmp_affin_mask_t *os_id_masks;
+ const char *env_var;
+} kmp_affinity_t;
+
+#define KMP_AFFINITY_INIT(env) \
+ { \
+ nullptr, affinity_default, KMP_HW_UNKNOWN, -1, 0, 0, \
+ {TRUE, FALSE, TRUE, affinity_respect_mask_default, FALSE, FALSE}, 0, \
+ nullptr, 0, nullptr, env \
+ }
-extern enum affinity_type __kmp_affinity_type; /* Affinity type */
-extern kmp_hw_t __kmp_affinity_gran; /* Affinity granularity */
-extern int __kmp_affinity_gran_levels; /* corresponding int value */
-extern int __kmp_affinity_dups; /* Affinity duplicate masks */
extern enum affinity_top_method __kmp_affinity_top_method;
-extern int __kmp_affinity_compact; /* Affinity 'compact' value */
-extern int __kmp_affinity_offset; /* Affinity offset value */
-extern int __kmp_affinity_verbose; /* Was verbose specified for KMP_AFFINITY? */
-extern int __kmp_affinity_warnings; /* KMP_AFFINITY warnings enabled ? */
-extern int __kmp_affinity_respect_mask; // Respect process' init affinity mask?
-extern char *__kmp_affinity_proclist; /* proc ID list */
-extern kmp_affin_mask_t *__kmp_affinity_masks;
-extern unsigned __kmp_affinity_num_masks;
+extern kmp_affinity_t __kmp_affinity;
+
extern void __kmp_affinity_bind_thread(int which);
extern kmp_affin_mask_t *__kmp_affin_fullMask;
extern kmp_affin_mask_t *__kmp_affin_origMask;
extern char *__kmp_cpuinfo_file;
-extern bool __kmp_affin_reset;
#endif /* KMP_AFFINITY_SUPPORTED */
@@ -882,7 +904,7 @@ extern char *__kmp_tool_libraries;
#define KMP_AFFINITY_NON_PROC_BIND \
((__kmp_nested_proc_bind.bind_types[0] == proc_bind_false || \
__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel) && \
- (__kmp_affinity_num_masks > 0 || __kmp_affinity_type == affinity_balanced))
+ (__kmp_affinity.num_masks > 0 || __kmp_affinity.type == affinity_balanced))
#endif /* KMP_AFFINITY_SUPPORTED */
extern int __kmp_affinity_num_places;
@@ -3606,7 +3628,7 @@ extern char *__kmp_affinity_print_mask(char *buf, int buf_len,
kmp_affin_mask_t *mask);
extern kmp_str_buf_t *__kmp_affinity_str_buf_mask(kmp_str_buf_t *buf,
kmp_affin_mask_t *mask);
-extern void __kmp_affinity_initialize(void);
+extern void __kmp_affinity_initialize(kmp_affinity_t &affinity);
extern void __kmp_affinity_uninitialize(void);
extern void __kmp_affinity_set_init_mask(
int gtid, int isa_root); /* set affinity according to KMP_AFFINITY */
diff --git a/openmp/runtime/src/kmp_affinity.cpp b/openmp/runtime/src/kmp_affinity.cpp
index f9d3578c634f8..2309d0a9dc443 100644
--- a/openmp/runtime/src/kmp_affinity.cpp
+++ b/openmp/runtime/src/kmp_affinity.cpp
@@ -142,8 +142,9 @@ const char *__kmp_hw_get_core_type_string(kmp_hw_core_type_t type) {
// If affinity is supported, check the affinity
// verbose and warning flags before printing warning
#define KMP_AFF_WARNING(...) \
- if (__kmp_affinity_verbose || \
- (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) { \
+ if (__kmp_affinity.flags.verbose || \
+ (__kmp_affinity.flags.warnings && \
+ (__kmp_affinity.type != affinity_none))) { \
KMP_WARNING(__VA_ARGS__); \
}
#else
@@ -175,9 +176,9 @@ int kmp_hw_thread_t::compare_compact(const void *a, const void *b) {
const kmp_hw_thread_t *aa = (const kmp_hw_thread_t *)a;
const kmp_hw_thread_t *bb = (const kmp_hw_thread_t *)b;
int depth = __kmp_topology->get_depth();
- KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
- KMP_DEBUG_ASSERT(__kmp_affinity_compact <= depth);
- for (i = 0; i < __kmp_affinity_compact; i++) {
+ KMP_DEBUG_ASSERT(__kmp_affinity.compact >= 0);
+ KMP_DEBUG_ASSERT(__kmp_affinity.compact <= depth);
+ for (i = 0; i < __kmp_affinity.compact; i++) {
int j = depth - i - 1;
if (aa->sub_ids[j] < bb->sub_ids[j])
return -1;
@@ -185,7 +186,7 @@ int kmp_hw_thread_t::compare_compact(const void *a, const void *b) {
return 1;
}
for (; i < depth; i++) {
- int j = i - __kmp_affinity_compact;
+ int j = i - __kmp_affinity.compact;
if (aa->sub_ids[j] < bb->sub_ids[j])
return -1;
if (aa->sub_ids[j] > bb->sub_ids[j])
@@ -790,41 +791,12 @@ void kmp_topology_t::print(const char *env_var) const {
__kmp_str_buf_free(&buf);
}
-void kmp_topology_t::canonicalize() {
-#if KMP_GROUP_AFFINITY
- _insert_windows_proc_groups();
-#endif
- _remove_radix1_layers();
- _gather_enumeration_information();
- _discover_uniformity();
- _set_sub_ids();
- _set_globals();
- _set_last_level_cache();
-
-#if KMP_MIC_SUPPORTED
- // Manually Add L2 = Tile equivalence
- if (__kmp_mic_type == mic3) {
- if (get_level(KMP_HW_L2) != -1)
- set_equivalent_type(KMP_HW_TILE, KMP_HW_L2);
- else if (get_level(KMP_HW_TILE) != -1)
- set_equivalent_type(KMP_HW_L2, KMP_HW_TILE);
- }
-#endif
-
- // Perform post canonicalization checking
- KMP_ASSERT(depth > 0);
- for (int level = 0; level < depth; ++level) {
- // All counts, ratios, and types must be valid
- KMP_ASSERT(count[level] > 0 && ratio[level] > 0);
- KMP_ASSERT_VALID_HW_TYPE(types[level]);
- // Detected types must point to themselves
- KMP_ASSERT(equivalent[types[level]] == types[level]);
- }
-
#if KMP_AFFINITY_SUPPORTED
+void kmp_topology_t::set_granularity(kmp_affinity_t &affinity) const {
+ const char *env_var = affinity.env_var;
// Set the number of affinity granularity levels
- if (__kmp_affinity_gran_levels < 0) {
- kmp_hw_t gran_type = get_equivalent_type(__kmp_affinity_gran);
+ if (affinity.gran_levels < 0) {
+ kmp_hw_t gran_type = get_equivalent_type(affinity.gran);
// Check if user's granularity request is valid
if (gran_type == KMP_HW_UNKNOWN) {
// First try core, then thread, then package
@@ -837,10 +809,10 @@ void kmp_topology_t::canonicalize() {
}
KMP_ASSERT(gran_type != KMP_HW_UNKNOWN);
// Warn user what granularity setting will be used instead
- KMP_AFF_WARNING(AffGranularityBad, "KMP_AFFINITY",
- __kmp_hw_get_catalog_string(__kmp_affinity_gran),
+ KMP_AFF_WARNING(AffGranularityBad, env_var,
+ __kmp_hw_get_catalog_string(affinity.gran),
__kmp_hw_get_catalog_string(gran_type));
- __kmp_affinity_gran = gran_type;
+ affinity.gran = gran_type;
}
#if KMP_GROUP_AFFINITY
// If more than one processor group exists, and the level of
@@ -855,17 +827,49 @@ void kmp_topology_t::canonicalize() {
int proc_group_depth = get_level(KMP_HW_PROC_GROUP);
if (gran_depth >= 0 && proc_group_depth >= 0 &&
gran_depth < proc_group_depth) {
- KMP_AFF_WARNING(AffGranTooCoarseProcGroup, "KMP_AFFINITY",
- __kmp_hw_get_catalog_string(__kmp_affinity_gran));
- __kmp_affinity_gran = gran_type = KMP_HW_PROC_GROUP;
+ KMP_AFF_WARNING(AffGranTooCoarseProcGroup, env_var,
+ __kmp_hw_get_catalog_string(affinity.gran));
+ affinity.gran = gran_type = KMP_HW_PROC_GROUP;
}
}
#endif
- __kmp_affinity_gran_levels = 0;
+ affinity.gran_levels = 0;
for (int i = depth - 1; i >= 0 && get_type(i) != gran_type; --i)
- __kmp_affinity_gran_levels++;
+ affinity.gran_levels++;
+ }
+}
+#endif
+
+void kmp_topology_t::canonicalize() {
+#if KMP_GROUP_AFFINITY
+ _insert_windows_proc_groups();
+#endif
+ _remove_radix1_layers();
+ _gather_enumeration_information();
+ _discover_uniformity();
+ _set_sub_ids();
+ _set_globals();
+ _set_last_level_cache();
+
+#if KMP_MIC_SUPPORTED
+ // Manually Add L2 = Tile equivalence
+ if (__kmp_mic_type == mic3) {
+ if (get_level(KMP_HW_L2) != -1)
+ set_equivalent_type(KMP_HW_TILE, KMP_HW_L2);
+ else if (get_level(KMP_HW_TILE) != -1)
+ set_equivalent_type(KMP_HW_L2, KMP_HW_TILE);
+ }
+#endif
+
+ // Perform post canonicalization checking
+ KMP_ASSERT(depth > 0);
+ for (int level = 0; level < depth; ++level) {
+ // All counts, ratios, and types must be valid
+ KMP_ASSERT(count[level] > 0 && ratio[level] > 0);
+ KMP_ASSERT_VALID_HW_TYPE(types[level]);
+ // Detected types must point to themselves
+ KMP_ASSERT(equivalent[types[level]] == types[level]);
}
-#endif // KMP_AFFINITY_SUPPORTED
}
// Canonicalize an explicit packages X cores/pkg X threads/core topology
@@ -1301,7 +1305,7 @@ void KMPAffinity::pick_api() {
// Only use Hwloc if affinity isn't explicitly disabled and
// user requests Hwloc topology method
if (__kmp_affinity_top_method == affinity_top_method_hwloc &&
- __kmp_affinity_type != affinity_disabled) {
+ __kmp_affinity.type != affinity_disabled) {
affinity_dispatch = new KMPHwlocAffinity();
} else
#endif
@@ -1663,14 +1667,14 @@ static bool __kmp_affinity_create_hwloc_map(kmp_i18n_id_t *const msg_id) {
hwloc_topology_t tp = __kmp_hwloc_topology;
*msg_id = kmp_i18n_null;
- if (__kmp_affinity_verbose) {
+ if (__kmp_affinity.flags.verbose) {
KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY");
}
if (!KMP_AFFINITY_CAPABLE()) {
// Hack to try and infer the machine topology using only the data
// available from hwloc on the current thread, and __kmp_xproc.
- KMP_ASSERT(__kmp_affinity_type == affinity_none);
+ KMP_ASSERT(__kmp_affinity.type == affinity_none);
// hwloc only guarantees existance of PU object, so check PACKAGE and CORE
hwloc_obj_t o = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PACKAGE, 0);
if (o != NULL)
@@ -1864,15 +1868,15 @@ static bool __kmp_affinity_create_flat_map(kmp_i18n_id_t *const msg_id) {
int depth = 3;
kmp_hw_t types[] = {KMP_HW_SOCKET, KMP_HW_CORE, KMP_HW_THREAD};
- if (__kmp_affinity_verbose) {
+ if (__kmp_affinity.flags.verbose) {
KMP_INFORM(UsingFlatOS, "KMP_AFFINITY");
}
- // Even if __kmp_affinity_type == affinity_none, this routine might still
- // called to set __kmp_ncores, as well as
+ // Even if __kmp_affinity.type == affinity_none, this routine might still
+ // be called to set __kmp_ncores, as well as
// __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages.
if (!KMP_AFFINITY_CAPABLE()) {
- KMP_ASSERT(__kmp_affinity_type == affinity_none);
+ KMP_ASSERT(__kmp_affinity.type == affinity_none);
__kmp_ncores = nPackages = __kmp_xproc;
__kmp_nThreadsPerCore = nCoresPerPkg = 1;
return true;
@@ -1902,7 +1906,7 @@ static bool __kmp_affinity_create_flat_map(kmp_i18n_id_t *const msg_id) {
hw_thread.ids[2] = 0;
avail_ct++;
}
- if (__kmp_affinity_verbose) {
+ if (__kmp_affinity.flags.verbose) {
KMP_INFORM(OSProcToPackage, "KMP_AFFINITY");
}
return true;
@@ -1919,13 +1923,13 @@ static bool __kmp_affinity_create_proc_group_map(kmp_i18n_id_t *const msg_id) {
kmp_hw_t types[] = {KMP_HW_PROC_GROUP, KMP_HW_CORE, KMP_HW_THREAD};
const static size_t BITS_PER_GROUP = CHAR_BIT * sizeof(DWORD_PTR);
- if (__kmp_affinity_verbose) {
+ if (__kmp_affinity.flags.verbose) {
KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY");
}
// If we aren't affinity capable, then use flat topology
if (!KMP_AFFINITY_CAPABLE()) {
- KMP_ASSERT(__kmp_affinity_type == affinity_none);
+ KMP_ASSERT(__kmp_affinity.type == affinity_none);
nPackages = __kmp_num_proc_groups;
__kmp_nThreadsPerCore = 1;
__kmp_ncores = __kmp_xproc;
@@ -2065,7 +2069,7 @@ static bool __kmp_affinity_create_apicid_map(kmp_i18n_id_t *const msg_id) {
kmp_cpuid buf;
*msg_id = kmp_i18n_null;
- if (__kmp_affinity_verbose) {
+ if (__kmp_affinity.flags.verbose) {
KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
}
@@ -2084,7 +2088,7 @@ static bool __kmp_affinity_create_apicid_map(kmp_i18n_id_t *const msg_id) {
if (!KMP_AFFINITY_CAPABLE()) {
// Hack to try and infer the machine topology using only the data
// available from cpuid on the current thread, and __kmp_xproc.
- KMP_ASSERT(__kmp_affinity_type == affinity_none);
+ KMP_ASSERT(__kmp_affinity.type == affinity_none);
// Get an upper bound on the number of threads per package using cpuid(1).
// On some OS/chps combinations where HT is supported by the chip but is
@@ -2136,7 +2140,7 @@ static bool __kmp_affinity_create_apicid_map(kmp_i18n_id_t *const msg_id) {
// From here on, we can assume that it is safe to call
// __kmp_get_system_affinity() and __kmp_set_system_affinity(), even if
- // __kmp_affinity_type = affinity_none.
+ // __kmp_affinity.type = affinity_none.
// Save the affinity mask for the current thread.
kmp_affinity_raii_t previous_affinity;
@@ -2521,7 +2525,7 @@ static bool __kmp_affinity_create_x2apicid_map(kmp_i18n_id_t *const msg_id) {
KMP_BUILD_ASSERT(sizeof(known_levels) * CHAR_BIT > KMP_HW_LAST);
*msg_id = kmp_i18n_null;
- if (__kmp_affinity_verbose) {
+ if (__kmp_affinity.flags.verbose) {
KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
}
@@ -2585,7 +2589,7 @@ static bool __kmp_affinity_create_x2apicid_map(kmp_i18n_id_t *const msg_id) {
if (!KMP_AFFINITY_CAPABLE()) {
// Hack to try and infer the machine topology using only the data
// available from cpuid on the current thread, and __kmp_xproc.
- KMP_ASSERT(__kmp_affinity_type == affinity_none);
+ KMP_ASSERT(__kmp_affinity.type == affinity_none);
for (unsigned i = 0; i < levels_index; ++i) {
if (levels[i].level_type == INTEL_LEVEL_TYPE_SMT) {
__kmp_nThreadsPerCore = levels[i].nitems;
@@ -2624,7 +2628,7 @@ static bool __kmp_affinity_create_x2apicid_map(kmp_i18n_id_t *const msg_id) {
// From here on, we can assume that it is safe to call
// __kmp_get_system_affinity() and __kmp_set_system_affinity(), even if
- // __kmp_affinity_type = affinity_none.
+ // __kmp_affinity.type = affinity_none.
// Save the affinity mask for the current thread.
kmp_affinity_raii_t previous_affinity;
@@ -2808,7 +2812,7 @@ static bool __kmp_affinity_create_cpuinfo_map(int *line,
const char *envvar = __kmp_cpuinfo_get_envvar();
*msg_id = kmp_i18n_null;
- if (__kmp_affinity_verbose) {
+ if (__kmp_affinity.flags.verbose) {
KMP_INFORM(AffParseFilename, "KMP_AFFINITY", filename);
}
@@ -3239,7 +3243,7 @@ static bool __kmp_affinity_create_cpuinfo_map(int *line,
// not enabled.
__kmp_ncores = totals[coreIdIndex];
if (!KMP_AFFINITY_CAPABLE()) {
- KMP_ASSERT(__kmp_affinity_type == affinity_none);
+ KMP_ASSERT(__kmp_affinity.type == affinity_none);
return true;
}
@@ -3339,13 +3343,14 @@ static bool __kmp_affinity_create_cpuinfo_map(int *line,
// Create and return a table of affinity masks, indexed by OS thread ID.
// This routine handles OR'ing together all the affinity masks of threads
// that are sufficiently close, if granularity > fine.
-static kmp_affin_mask_t *__kmp_create_masks(unsigned *maxIndex,
- unsigned *numUnique) {
+static void __kmp_create_os_id_masks(unsigned *numUnique,
+ kmp_affinity_t &affinity) {
// First form a table of affinity masks in order of OS thread id.
int maxOsId;
int i;
int numAddrs = __kmp_topology->get_num_hw_threads();
int depth = __kmp_topology->get_depth();
+ const char *env_var = affinity.env_var;
KMP_ASSERT(numAddrs);
KMP_ASSERT(depth);
@@ -3358,13 +3363,13 @@ static kmp_affin_mask_t *__kmp_create_masks(unsigned *maxIndex,
if (i == 0)
break;
}
- kmp_affin_mask_t *osId2Mask;
- KMP_CPU_ALLOC_ARRAY(osId2Mask, (maxOsId + 1));
- KMP_ASSERT(__kmp_affinity_gran_levels >= 0);
- if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) {
- KMP_INFORM(ThreadsMigrate, "KMP_AFFINITY", __kmp_affinity_gran_levels);
+ affinity.num_os_id_masks = maxOsId + 1;
+ KMP_CPU_ALLOC_ARRAY(affinity.os_id_masks, affinity.num_os_id_masks);
+ KMP_ASSERT(affinity.gran_levels >= 0);
+ if (affinity.flags.verbose && (affinity.gran_levels > 0)) {
+ KMP_INFORM(ThreadsMigrate, env_var, affinity.gran_levels);
}
- if (__kmp_affinity_gran_levels >= (int)depth) {
+ if (affinity.gran_levels >= (int)depth) {
KMP_AFF_WARNING(AffThreadsMayMigrate);
}
@@ -3383,17 +3388,17 @@ static kmp_affin_mask_t *__kmp_create_masks(unsigned *maxIndex,
// If this thread is sufficiently close to the leader (within the
// granularity setting), then set the bit for this os thread in the
// affinity mask for this group, and go on to the next thread.
- if (__kmp_topology->is_close(leader, i, __kmp_affinity_gran_levels)) {
+ if (__kmp_topology->is_close(leader, i, affinity.gran_levels)) {
KMP_CPU_SET(__kmp_topology->at(i).os_id, sum);
continue;
}
// For every thread in this group, copy the mask to the thread's entry in
- // the osId2Mask table. Mark the first address as a leader.
+ // the OS Id mask table. Mark the first address as a leader.
for (; j < i; j++) {
int osId = __kmp_topology->at(j).os_id;
KMP_DEBUG_ASSERT(osId <= maxOsId);
- kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
+ kmp_affin_mask_t *mask = KMP_CPU_INDEX(affinity.os_id_masks, osId);
KMP_CPU_COPY(mask, sum);
__kmp_topology->at(j).leader = (j == leader);
}
@@ -3406,20 +3411,18 @@ static kmp_affin_mask_t *__kmp_create_masks(unsigned *maxIndex,
}
// For every thread in last group, copy the mask to the thread's
- // entry in the osId2Mask table.
+ // entry in the OS Id mask table.
for (; j < i; j++) {
int osId = __kmp_topology->at(j).os_id;
KMP_DEBUG_ASSERT(osId <= maxOsId);
- kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
+ kmp_affin_mask_t *mask = KMP_CPU_INDEX(affinity.os_id_masks, osId);
KMP_CPU_COPY(mask, sum);
__kmp_topology->at(j).leader = (j == leader);
}
unique++;
KMP_CPU_FREE_FROM_STACK(sum);
- *maxIndex = maxOsId;
*numUnique = unique;
- return osId2Mask;
}
// Stuff for the affinity proclist parsers. It's easier to declare these vars
@@ -3460,12 +3463,13 @@ static int nextNewMask;
// Re-parse the proclist (for the explicit affinity type), and form the list
// of affinity newMasks indexed by gtid.
-static void __kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
- unsigned int *out_numMasks,
- const char *proclist,
- kmp_affin_mask_t *osId2Mask,
- int maxOsId) {
+static void __kmp_affinity_process_proclist(kmp_affinity_t &affinity) {
int i;
+ kmp_affin_mask_t **out_masks = &affinity.masks;
+ unsigned *out_numMasks = &affinity.num_masks;
+ const char *proclist = affinity.proclist;
+ kmp_affin_mask_t *osId2Mask = affinity.os_id_masks;
+ int maxOsId = affinity.num_os_id_masks - 1;
const char *scan = proclist;
const char *next = proclist;
@@ -3670,10 +3674,11 @@ signed := + signed
signed := - signed
-----------------------------------------------------------------------------*/
static void __kmp_process_subplace_list(const char **scan,
- kmp_affin_mask_t *osId2Mask,
- int maxOsId, kmp_affin_mask_t *tempMask,
+ kmp_affinity_t &affinity, int maxOsId,
+ kmp_affin_mask_t *tempMask,
int *setSize) {
const char *next;
+ kmp_affin_mask_t *osId2Mask = affinity.os_id_masks;
for (;;) {
int start, count, stride, i;
@@ -3787,21 +3792,22 @@ static void __kmp_process_subplace_list(const char **scan,
}
}
-static void __kmp_process_place(const char **scan, kmp_affin_mask_t *osId2Mask,
+static void __kmp_process_place(const char **scan, kmp_affinity_t &affinity,
int maxOsId, kmp_affin_mask_t *tempMask,
int *setSize) {
const char *next;
+ kmp_affin_mask_t *osId2Mask = affinity.os_id_masks;
// valid follow sets are '{' '!' and num
SKIP_WS(*scan);
if (**scan == '{') {
(*scan)++; // skip '{'
- __kmp_process_subplace_list(scan, osId2Mask, maxOsId, tempMask, setSize);
+ __kmp_process_subplace_list(scan, affinity, maxOsId, tempMask, setSize);
KMP_ASSERT2(**scan == '}', "bad explicit places list");
(*scan)++; // skip '}'
} else if (**scan == '!') {
(*scan)++; // skip '!'
- __kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);
+ __kmp_process_place(scan, affinity, maxOsId, tempMask, setSize);
KMP_CPU_COMPLEMENT(maxOsId, tempMask);
} else if ((**scan >= '0') && (**scan <= '9')) {
next = *scan;
@@ -3822,12 +3828,13 @@ static void __kmp_process_place(const char **scan, kmp_affin_mask_t *osId2Mask,
}
// static void
-void __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
- unsigned int *out_numMasks,
- const char *placelist,
- kmp_affin_mask_t *osId2Mask,
- int maxOsId) {
+void __kmp_affinity_process_placelist(kmp_affinity_t &affinity) {
int i, j, count, stride, sign;
+ kmp_affin_mask_t **out_masks = &affinity.masks;
+ unsigned *out_numMasks = &affinity.num_masks;
+ const char *placelist = affinity.proclist;
+ kmp_affin_mask_t *osId2Mask = affinity.os_id_masks;
+ int maxOsId = affinity.num_os_id_masks - 1;
const char *scan = placelist;
const char *next = placelist;
@@ -3847,7 +3854,7 @@ void __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
int setSize = 0;
for (;;) {
- __kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);
+ __kmp_process_place(&scan, affinity, maxOsId, tempMask, &setSize);
// valid follow sets are ',' ':' and EOL
SKIP_WS(scan);
@@ -4029,25 +4036,27 @@ static int __kmp_aff_depth = 0;
// Create a one element mask array (set of places) which only contains the
// initial process's affinity mask
-static void __kmp_create_affinity_none_places() {
+static void __kmp_create_affinity_none_places(kmp_affinity_t &affinity) {
KMP_ASSERT(__kmp_affin_fullMask != NULL);
- KMP_ASSERT(__kmp_affinity_type == affinity_none);
- __kmp_affinity_num_masks = 1;
- KMP_CPU_ALLOC_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
- kmp_affin_mask_t *dest = KMP_CPU_INDEX(__kmp_affinity_masks, 0);
+ KMP_ASSERT(affinity.type == affinity_none);
+ affinity.num_masks = 1;
+ KMP_CPU_ALLOC_ARRAY(affinity.masks, affinity.num_masks);
+ kmp_affin_mask_t *dest = KMP_CPU_INDEX(affinity.masks, 0);
KMP_CPU_COPY(dest, __kmp_affin_fullMask);
}
-static void __kmp_aux_affinity_initialize(void) {
- if (__kmp_affinity_masks != NULL) {
- KMP_ASSERT(__kmp_affin_fullMask != NULL);
- return;
- }
-
+static void __kmp_aux_affinity_initialize_masks(kmp_affinity_t &affinity) {
// Create the "full" mask - this defines all of the processors that we
// consider to be in the machine model. If respect is set, then it is the
// initialization thread's affinity mask. Otherwise, it is all processors that
// we know about on the machine.
+ int verbose = affinity.flags.verbose;
+ const char *env_var = affinity.env_var;
+
+ // Already initialized
+ if (__kmp_affin_fullMask && __kmp_affin_origMask)
+ return;
+
if (__kmp_affin_fullMask == NULL) {
KMP_CPU_ALLOC(__kmp_affin_fullMask);
}
@@ -4058,7 +4067,7 @@ static void __kmp_aux_affinity_initialize(void) {
__kmp_get_system_affinity(__kmp_affin_fullMask, TRUE);
// Make a copy before possible expanding to the entire machine mask
__kmp_affin_origMask->copy(__kmp_affin_fullMask);
- if (__kmp_affinity_respect_mask) {
+ if (affinity.flags.respect) {
// Count the number of available processors.
unsigned i;
__kmp_avail_proc = 0;
@@ -4070,23 +4079,23 @@ static void __kmp_aux_affinity_initialize(void) {
}
if (__kmp_avail_proc > __kmp_xproc) {
KMP_AFF_WARNING(ErrorInitializeAffinity);
- __kmp_affinity_type = affinity_none;
+ affinity.type = affinity_none;
KMP_AFFINITY_DISABLE();
return;
}
- if (__kmp_affinity_verbose) {
+ if (verbose) {
char buf[KMP_AFFIN_MASK_PRINT_LEN];
__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
__kmp_affin_fullMask);
- KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf);
+ KMP_INFORM(InitOSProcSetRespect, env_var, buf);
}
} else {
- if (__kmp_affinity_verbose) {
+ if (verbose) {
char buf[KMP_AFFIN_MASK_PRINT_LEN];
__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
__kmp_affin_fullMask);
- KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf);
+ KMP_INFORM(InitOSProcSetNotRespect, env_var, buf);
}
__kmp_avail_proc =
__kmp_affinity_entire_machine_mask(__kmp_affin_fullMask);
@@ -4101,8 +4110,13 @@ static void __kmp_aux_affinity_initialize(void) {
#endif
}
}
+}
+static bool __kmp_aux_affinity_initialize_topology(kmp_affinity_t &affinity) {
+ bool success = false;
+ const char *env_var = affinity.env_var;
kmp_i18n_id_t msg_id = kmp_i18n_null;
+ int verbose = affinity.flags.verbose;
// For backward compatibility, setting KMP_CPUINFO_FILE =>
// KMP_TOPOLOGY_METHOD=cpuinfo
@@ -4111,7 +4125,6 @@ static void __kmp_aux_affinity_initialize(void) {
__kmp_affinity_top_method = affinity_top_method_cpuinfo;
}
- bool success = false;
if (__kmp_affinity_top_method == affinity_top_method_all) {
// In the default code path, errors are not fatal - we just try using
// another method. We only emit a warning message if affinity is on, or the
@@ -4121,11 +4134,11 @@ static void __kmp_aux_affinity_initialize(void) {
__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) {
if (!__kmp_hwloc_error) {
success = __kmp_affinity_create_hwloc_map(&msg_id);
- if (!success && __kmp_affinity_verbose) {
- KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY");
+ if (!success && verbose) {
+ KMP_INFORM(AffIgnoringHwloc, env_var);
}
- } else if (__kmp_affinity_verbose) {
- KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY");
+ } else if (verbose) {
+ KMP_INFORM(AffIgnoringHwloc, env_var);
}
}
#endif
@@ -4133,14 +4146,14 @@ static void __kmp_aux_affinity_initialize(void) {
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
if (!success) {
success = __kmp_affinity_create_x2apicid_map(&msg_id);
- if (!success && __kmp_affinity_verbose && msg_id != kmp_i18n_null) {
- KMP_INFORM(AffInfoStr, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id));
+ if (!success && verbose && msg_id != kmp_i18n_null) {
+ KMP_INFORM(AffInfoStr, env_var, __kmp_i18n_catgets(msg_id));
}
}
if (!success) {
success = __kmp_affinity_create_apicid_map(&msg_id);
- if (!success && __kmp_affinity_verbose && msg_id != kmp_i18n_null) {
- KMP_INFORM(AffInfoStr, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id));
+ if (!success && verbose && msg_id != kmp_i18n_null) {
+ KMP_INFORM(AffInfoStr, env_var, __kmp_i18n_catgets(msg_id));
}
}
#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
@@ -4149,8 +4162,8 @@ static void __kmp_aux_affinity_initialize(void) {
if (!success) {
int line = 0;
success = __kmp_affinity_create_cpuinfo_map(&line, &msg_id);
- if (!success && __kmp_affinity_verbose && msg_id != kmp_i18n_null) {
- KMP_INFORM(AffInfoStr, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id));
+ if (!success && verbose && msg_id != kmp_i18n_null) {
+ KMP_INFORM(AffInfoStr, env_var, __kmp_i18n_catgets(msg_id));
}
}
#endif /* KMP_OS_LINUX */
@@ -4158,16 +4171,16 @@ static void __kmp_aux_affinity_initialize(void) {
#if KMP_GROUP_AFFINITY
if (!success && (__kmp_num_proc_groups > 1)) {
success = __kmp_affinity_create_proc_group_map(&msg_id);
- if (!success && __kmp_affinity_verbose && msg_id != kmp_i18n_null) {
- KMP_INFORM(AffInfoStr, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id));
+ if (!success && verbose && msg_id != kmp_i18n_null) {
+ KMP_INFORM(AffInfoStr, env_var, __kmp_i18n_catgets(msg_id));
}
}
#endif /* KMP_GROUP_AFFINITY */
if (!success) {
success = __kmp_affinity_create_flat_map(&msg_id);
- if (!success && __kmp_affinity_verbose && msg_id != kmp_i18n_null) {
- KMP_INFORM(AffInfoStr, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id));
+ if (!success && verbose && msg_id != kmp_i18n_null) {
+ KMP_INFORM(AffInfoStr, env_var, __kmp_i18n_catgets(msg_id));
}
KMP_ASSERT(success);
}
@@ -4246,24 +4259,17 @@ static void __kmp_aux_affinity_initialize(void) {
__kmp_topology = kmp_topology_t::allocate(0, 0, NULL);
__kmp_topology->canonicalize(nPackages, nCoresPerPkg,
__kmp_nThreadsPerCore, __kmp_ncores);
- if (__kmp_affinity_verbose) {
- __kmp_topology->print("KMP_AFFINITY");
+ if (verbose) {
+ __kmp_topology->print(env_var);
}
}
- __kmp_affinity_type = affinity_none;
- __kmp_create_affinity_none_places();
-#if KMP_USE_HIER_SCHED
- __kmp_dispatch_set_hierarchy_values();
-#endif
- KMP_AFFINITY_DISABLE();
- return;
+ return false;
}
- // Canonicalize, print (if requested), apply KMP_HW_SUBSET, and
- // initialize other data structures which depend on the topology
+ // Canonicalize, print (if requested), apply KMP_HW_SUBSET
__kmp_topology->canonicalize();
- if (__kmp_affinity_verbose)
- __kmp_topology->print("KMP_AFFINITY");
+ if (verbose)
+ __kmp_topology->print(env_var);
bool filtered = __kmp_topology->filter_hw_subset();
if (filtered) {
#if KMP_OS_WINDOWS
@@ -4272,97 +4278,122 @@ static void __kmp_aux_affinity_initialize(void) {
#endif
__kmp_affin_origMask->copy(__kmp_affin_fullMask);
}
- if (filtered && __kmp_affinity_verbose)
+ if (filtered && verbose)
__kmp_topology->print("KMP_HW_SUBSET");
- machine_hierarchy.init(__kmp_topology->get_num_hw_threads());
- KMP_ASSERT(__kmp_avail_proc == __kmp_topology->get_num_hw_threads());
+ return success;
+}
+
+static void __kmp_aux_affinity_initialize(kmp_affinity_t &affinity) {
+ bool is_regular_affinity = (&affinity == &__kmp_affinity);
+ const char *env_var = affinity.env_var;
+
+ if (affinity.flags.initialized) {
+ KMP_ASSERT(__kmp_affin_fullMask != NULL);
+ return;
+ }
+
+ if (is_regular_affinity && (!__kmp_affin_fullMask || !__kmp_affin_origMask))
+ __kmp_aux_affinity_initialize_masks(affinity);
+
+ if (is_regular_affinity && !__kmp_topology) {
+ bool success = __kmp_aux_affinity_initialize_topology(affinity);
+ if (success) {
+ // Initialize other data structures which depend on the topology
+ machine_hierarchy.init(__kmp_topology->get_num_hw_threads());
+ KMP_ASSERT(__kmp_avail_proc == __kmp_topology->get_num_hw_threads());
+ } else {
+ affinity.type = affinity_none;
+ KMP_AFFINITY_DISABLE();
+ }
+ }
+
// If KMP_AFFINITY=none, then only create the single "none" place
// which is the process's initial affinity mask or the number of
// hardware threads depending on respect,norespect
- if (__kmp_affinity_type == affinity_none) {
- __kmp_create_affinity_none_places();
+ if (affinity.type == affinity_none) {
+ __kmp_create_affinity_none_places(affinity);
#if KMP_USE_HIER_SCHED
__kmp_dispatch_set_hierarchy_values();
#endif
+ affinity.flags.initialized = TRUE;
return;
}
+
+ __kmp_topology->set_granularity(affinity);
int depth = __kmp_topology->get_depth();
// Create the table of masks, indexed by thread Id.
- unsigned maxIndex;
unsigned numUnique;
- kmp_affin_mask_t *osId2Mask = __kmp_create_masks(&maxIndex, &numUnique);
- if (__kmp_affinity_gran_levels == 0) {
+ __kmp_create_os_id_masks(&numUnique, affinity);
+ if (affinity.gran_levels == 0) {
KMP_DEBUG_ASSERT((int)numUnique == __kmp_avail_proc);
}
- switch (__kmp_affinity_type) {
+ switch (affinity.type) {
case affinity_explicit:
- KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL);
+ KMP_DEBUG_ASSERT(affinity.proclist != NULL);
if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel) {
- __kmp_affinity_process_proclist(
- &__kmp_affinity_masks, &__kmp_affinity_num_masks,
- __kmp_affinity_proclist, osId2Mask, maxIndex);
+ __kmp_affinity_process_proclist(affinity);
} else {
- __kmp_affinity_process_placelist(
- &__kmp_affinity_masks, &__kmp_affinity_num_masks,
- __kmp_affinity_proclist, osId2Mask, maxIndex);
+ __kmp_affinity_process_placelist(affinity);
}
- if (__kmp_affinity_num_masks == 0) {
+ if (affinity.num_masks == 0) {
KMP_AFF_WARNING(AffNoValidProcID);
- __kmp_affinity_type = affinity_none;
- __kmp_create_affinity_none_places();
+ affinity.type = affinity_none;
+ __kmp_create_affinity_none_places(affinity);
+ affinity.flags.initialized = TRUE;
return;
}
break;
// The other affinity types rely on sorting the hardware threads according to
- // some permutation of the machine topology tree. Set __kmp_affinity_compact
- // and __kmp_affinity_offset appropriately, then jump to a common code
+ // some permutation of the machine topology tree. Set affinity.compact
+ // and affinity.offset appropriately, then jump to a common code
// fragment to do the sort and create the array of affinity masks.
case affinity_logical:
- __kmp_affinity_compact = 0;
- if (__kmp_affinity_offset) {
- __kmp_affinity_offset =
- __kmp_nThreadsPerCore * __kmp_affinity_offset % __kmp_avail_proc;
+ affinity.compact = 0;
+ if (affinity.offset) {
+ affinity.offset =
+ __kmp_nThreadsPerCore * affinity.offset % __kmp_avail_proc;
}
goto sortTopology;
case affinity_physical:
if (__kmp_nThreadsPerCore > 1) {
- __kmp_affinity_compact = 1;
- if (__kmp_affinity_compact >= depth) {
- __kmp_affinity_compact = 0;
+ affinity.compact = 1;
+ if (affinity.compact >= depth) {
+ affinity.compact = 0;
}
} else {
- __kmp_affinity_compact = 0;
+ affinity.compact = 0;
}
- if (__kmp_affinity_offset) {
- __kmp_affinity_offset =
- __kmp_nThreadsPerCore * __kmp_affinity_offset % __kmp_avail_proc;
+ if (affinity.offset) {
+ affinity.offset =
+ __kmp_nThreadsPerCore * affinity.offset % __kmp_avail_proc;
}
goto sortTopology;
case affinity_scatter:
- if (__kmp_affinity_compact >= depth) {
- __kmp_affinity_compact = 0;
+ if (affinity.compact >= depth) {
+ affinity.compact = 0;
} else {
- __kmp_affinity_compact = depth - 1 - __kmp_affinity_compact;
+ affinity.compact = depth - 1 - affinity.compact;
}
goto sortTopology;
case affinity_compact:
- if (__kmp_affinity_compact >= depth) {
- __kmp_affinity_compact = depth - 1;
+ if (affinity.compact >= depth) {
+ affinity.compact = depth - 1;
}
goto sortTopology;
case affinity_balanced:
if (depth <= 1) {
- KMP_AFF_WARNING(AffBalancedNotAvail, "KMP_AFFINITY");
- __kmp_affinity_type = affinity_none;
- __kmp_create_affinity_none_places();
+ KMP_AFF_WARNING(AffBalancedNotAvail, env_var);
+ affinity.type = affinity_none;
+ __kmp_create_affinity_none_places(affinity);
+ affinity.flags.initialized = TRUE;
return;
} else if (!__kmp_topology->is_uniform()) {
// Save the depth for further usage
@@ -4377,8 +4408,9 @@ static void __kmp_aux_affinity_initialize(void) {
int nproc = ncores * maxprocpercore;
if ((nproc < 2) || (nproc < __kmp_avail_proc)) {
- KMP_AFF_WARNING(AffBalancedNotAvail, "KMP_AFFINITY");
- __kmp_affinity_type = affinity_none;
+ KMP_AFF_WARNING(AffBalancedNotAvail, env_var);
+ affinity.type = affinity_none;
+ affinity.flags.initialized = TRUE;
return;
}
@@ -4403,48 +4435,48 @@ static void __kmp_aux_affinity_initialize(void) {
procarr[core * maxprocpercore + inlastcore] = proc;
}
}
- if (__kmp_affinity_compact >= depth) {
- __kmp_affinity_compact = depth - 1;
+ if (affinity.compact >= depth) {
+ affinity.compact = depth - 1;
}
sortTopology:
// Allocate the gtid->affinity mask table.
- if (__kmp_affinity_dups) {
- __kmp_affinity_num_masks = __kmp_avail_proc;
+ if (affinity.flags.dups) {
+ affinity.num_masks = __kmp_avail_proc;
} else {
- __kmp_affinity_num_masks = numUnique;
+ affinity.num_masks = numUnique;
}
if ((__kmp_nested_proc_bind.bind_types[0] != proc_bind_intel) &&
(__kmp_affinity_num_places > 0) &&
- ((unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks)) {
- __kmp_affinity_num_masks = __kmp_affinity_num_places;
+ ((unsigned)__kmp_affinity_num_places < affinity.num_masks)) {
+ affinity.num_masks = __kmp_affinity_num_places;
}
- KMP_CPU_ALLOC_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
+ KMP_CPU_ALLOC_ARRAY(affinity.masks, affinity.num_masks);
// Sort the topology table according to the current setting of
- // __kmp_affinity_compact, then fill out __kmp_affinity_masks.
+ // affinity.compact, then fill out affinity.masks.
__kmp_topology->sort_compact();
{
int i;
unsigned j;
int num_hw_threads = __kmp_topology->get_num_hw_threads();
for (i = 0, j = 0; i < num_hw_threads; i++) {
- if ((!__kmp_affinity_dups) && (!__kmp_topology->at(i).leader)) {
+ if ((!affinity.flags.dups) && (!__kmp_topology->at(i).leader)) {
continue;
}
int osId = __kmp_topology->at(i).os_id;
- kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId);
- kmp_affin_mask_t *dest = KMP_CPU_INDEX(__kmp_affinity_masks, j);
+ kmp_affin_mask_t *src = KMP_CPU_INDEX(affinity.os_id_masks, osId);
+ kmp_affin_mask_t *dest = KMP_CPU_INDEX(affinity.masks, j);
KMP_ASSERT(KMP_CPU_ISSET(osId, src));
KMP_CPU_COPY(dest, src);
- if (++j >= __kmp_affinity_num_masks) {
+ if (++j >= affinity.num_masks) {
break;
}
}
- KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks);
+ KMP_DEBUG_ASSERT(j == affinity.num_masks);
}
// Sort the topology back using ids
__kmp_topology->sort_ids();
@@ -4453,52 +4485,44 @@ static void __kmp_aux_affinity_initialize(void) {
default:
KMP_ASSERT2(0, "Unexpected affinity setting");
}
-
- KMP_CPU_FREE_ARRAY(osId2Mask, maxIndex + 1);
+ affinity.flags.initialized = TRUE;
}
-void __kmp_affinity_initialize(void) {
+void __kmp_affinity_initialize(kmp_affinity_t &affinity) {
// Much of the code above was written assuming that if a machine was not
- // affinity capable, then __kmp_affinity_type == affinity_none. We now
- // explicitly represent this as __kmp_affinity_type == affinity_disabled.
- // There are too many checks for __kmp_affinity_type == affinity_none
- // in this code. Instead of trying to change them all, check if
- // __kmp_affinity_type == affinity_disabled, and if so, slam it with
- // affinity_none, call the real initialization routine, then restore
- // __kmp_affinity_type to affinity_disabled.
- int disabled = (__kmp_affinity_type == affinity_disabled);
- if (!KMP_AFFINITY_CAPABLE()) {
+ // affinity capable, then affinity type == affinity_none.
+ // We now explicitly represent this as affinity type == affinity_disabled.
+ // There are too many checks for affinity type == affinity_none in this code.
+ // Instead of trying to change them all, check if
+ // affinity type == affinity_disabled, and if so, slam it with affinity_none,
+ // call the real initialization routine, then restore affinity type to
+ // affinity_disabled.
+ int disabled = (affinity.type == affinity_disabled);
+ if (!KMP_AFFINITY_CAPABLE())
KMP_ASSERT(disabled);
- }
- if (disabled) {
- __kmp_affinity_type = affinity_none;
- }
- __kmp_aux_affinity_initialize();
- if (disabled) {
- __kmp_affinity_type = affinity_disabled;
- }
+ if (disabled)
+ affinity.type = affinity_none;
+ __kmp_aux_affinity_initialize(affinity);
+ if (disabled)
+ affinity.type = affinity_disabled;
}
void __kmp_affinity_uninitialize(void) {
- if (__kmp_affinity_masks != NULL) {
- KMP_CPU_FREE_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
- __kmp_affinity_masks = NULL;
- }
- if (__kmp_affin_fullMask != NULL) {
- KMP_CPU_FREE(__kmp_affin_fullMask);
- __kmp_affin_fullMask = NULL;
+ {
+ kmp_affinity_t *affinity = &__kmp_affinity;
+ if (affinity->masks != NULL)
+ KMP_CPU_FREE_ARRAY(affinity->masks, affinity->num_masks);
+ if (affinity->os_id_masks != NULL)
+ KMP_CPU_FREE_ARRAY(affinity->os_id_masks, affinity->num_os_id_masks);
+ if (affinity->proclist != NULL)
+ __kmp_free(affinity->proclist);
+ *affinity = KMP_AFFINITY_INIT(affinity->env_var);
}
if (__kmp_affin_origMask != NULL) {
KMP_CPU_FREE(__kmp_affin_origMask);
__kmp_affin_origMask = NULL;
}
- __kmp_affinity_num_masks = 0;
- __kmp_affinity_type = affinity_default;
__kmp_affinity_num_places = 0;
- if (__kmp_affinity_proclist != NULL) {
- __kmp_free(__kmp_affinity_proclist);
- __kmp_affinity_proclist = NULL;
- }
if (procarr != NULL) {
__kmp_free(procarr);
procarr = NULL;
@@ -4533,15 +4557,18 @@ void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
}
// Copy the thread mask to the kmp_info_t structure. If
- // __kmp_affinity_type == affinity_none, copy the "full" mask, i.e. one that
- // has all of the OS proc ids set, or if __kmp_affinity_respect_mask is set,
- // then the full mask is the same as the mask of the initialization thread.
+ // __kmp_affinity.type == affinity_none, copy the "full" mask, i.e.
+ // one that has all of the OS proc ids set, or if
+ // __kmp_affinity.flags.respect is set, then the full mask is the
+ // same as the mask of the initialization thread.
kmp_affin_mask_t *mask;
int i;
+ const kmp_affinity_t *affinity = &__kmp_affinity;
+ const char *env_var = affinity->env_var;
if (KMP_AFFINITY_NON_PROC_BIND) {
- if ((__kmp_affinity_type == affinity_none) ||
- (__kmp_affinity_type == affinity_balanced) ||
+ if ((affinity->type == affinity_none) ||
+ (affinity->type == affinity_balanced) ||
KMP_HIDDEN_HELPER_THREAD(gtid)) {
#if KMP_GROUP_AFFINITY
if (__kmp_num_proc_groups > 1) {
@@ -4553,9 +4580,9 @@ void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
mask = __kmp_affin_fullMask;
} else {
int mask_idx = __kmp_adjust_gtid_for_hidden_helpers(gtid);
- KMP_DEBUG_ASSERT(__kmp_affinity_num_masks > 0);
- i = (mask_idx + __kmp_affinity_offset) % __kmp_affinity_num_masks;
- mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
+ KMP_DEBUG_ASSERT(affinity->num_masks > 0);
+ i = (mask_idx + affinity->offset) % affinity->num_masks;
+ mask = KMP_CPU_INDEX(affinity->masks, i);
}
} else {
if ((!isa_root) || KMP_HIDDEN_HELPER_THREAD(gtid) ||
@@ -4572,9 +4599,9 @@ void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
// int i = some hash function or just a counter that doesn't
// always start at 0. Use adjusted gtid for now.
int mask_idx = __kmp_adjust_gtid_for_hidden_helpers(gtid);
- KMP_DEBUG_ASSERT(__kmp_affinity_num_masks > 0);
- i = (mask_idx + __kmp_affinity_offset) % __kmp_affinity_num_masks;
- mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
+ KMP_DEBUG_ASSERT(affinity->num_masks > 0);
+ i = (mask_idx + affinity->offset) % affinity->num_masks;
+ mask = KMP_CPU_INDEX(affinity->masks, i);
}
}
@@ -4582,12 +4609,12 @@ void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
if (isa_root || KMP_HIDDEN_HELPER_THREAD(gtid)) {
th->th.th_new_place = i;
th->th.th_first_place = 0;
- th->th.th_last_place = __kmp_affinity_num_masks - 1;
+ th->th.th_last_place = affinity->num_masks - 1;
} else if (KMP_AFFINITY_NON_PROC_BIND) {
// When using a Non-OMP_PROC_BIND affinity method,
// set all threads' place-partition-var to the entire place list
th->th.th_first_place = 0;
- th->th.th_last_place = __kmp_affinity_num_masks - 1;
+ th->th.th_last_place = affinity->num_masks - 1;
}
if (i == KMP_PLACE_ALL) {
@@ -4600,20 +4627,20 @@ void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
KMP_CPU_COPY(th->th.th_affin_mask, mask);
- if (__kmp_affinity_verbose && !KMP_HIDDEN_HELPER_THREAD(gtid)
+ if (affinity->flags.verbose && !KMP_HIDDEN_HELPER_THREAD(gtid)
/* to avoid duplicate printing (will be correctly printed on barrier) */
- && (__kmp_affinity_type == affinity_none ||
- (i != KMP_PLACE_ALL && __kmp_affinity_type != affinity_balanced))) {
+ && (affinity->type == affinity_none ||
+ (i != KMP_PLACE_ALL && affinity->type != affinity_balanced))) {
char buf[KMP_AFFIN_MASK_PRINT_LEN];
__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
th->th.th_affin_mask);
- KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
- __kmp_gettid(), gtid, buf);
+ KMP_INFORM(BoundToOSProcSet, env_var, (kmp_int32)getpid(), __kmp_gettid(),
+ gtid, buf);
}
#if KMP_DEBUG
// Hidden helper thread affinity only printed for debug builds
- if (__kmp_affinity_verbose && KMP_HIDDEN_HELPER_THREAD(gtid)) {
+ if (affinity->flags.verbose && KMP_HIDDEN_HELPER_THREAD(gtid)) {
char buf[KMP_AFFIN_MASK_PRINT_LEN];
__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
th->th.th_affin_mask);
@@ -4626,7 +4653,7 @@ void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
// On Windows* OS, the process affinity mask might have changed. If the user
// didn't request affinity and this call fails, just continue silently.
// See CQ171393.
- if (__kmp_affinity_type == affinity_none) {
+ if (affinity->type == affinity_none) {
__kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
} else
#endif
@@ -4647,7 +4674,7 @@ void __kmp_affinity_set_place(int gtid) {
// Check that the new place is within this thread's partition.
KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
KMP_ASSERT(th->th.th_new_place >= 0);
- KMP_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity_num_masks);
+ KMP_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity.num_masks);
if (th->th.th_first_place <= th->th.th_last_place) {
KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place) &&
(th->th.th_new_place <= th->th.th_last_place));
@@ -4659,11 +4686,11 @@ void __kmp_affinity_set_place(int gtid) {
// Copy the thread mask to the kmp_info_t structure,
// and set this thread's affinity.
kmp_affin_mask_t *mask =
- KMP_CPU_INDEX(__kmp_affinity_masks, th->th.th_new_place);
+ KMP_CPU_INDEX(__kmp_affinity.masks, th->th.th_new_place);
KMP_CPU_COPY(th->th.th_affin_mask, mask);
th->th.th_current_place = th->th.th_new_place;
- if (__kmp_affinity_verbose) {
+ if (__kmp_affinity.flags.verbose) {
char buf[KMP_AFFIN_MASK_PRINT_LEN];
__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
th->th.th_affin_mask);
@@ -4731,7 +4758,7 @@ int __kmp_aux_set_affinity(void **mask) {
th->th.th_current_place = KMP_PLACE_UNDEFINED;
th->th.th_new_place = KMP_PLACE_UNDEFINED;
th->th.th_first_place = 0;
- th->th.th_last_place = __kmp_affinity_num_masks - 1;
+ th->th.th_last_place = __kmp_affinity.num_masks - 1;
// Turn off 4.0 affinity for the current tread at this parallel level.
th->th.th_current_task->td_icvs.proc_bind = proc_bind_false;
@@ -4911,12 +4938,13 @@ void __kmp_balanced_affinity(kmp_info_t *th, int nthreads) {
KMP_DEBUG_ASSERT(th);
bool fine_gran = true;
int tid = th->th.th_info.ds.ds_tid;
+ const char *env_var = "KMP_AFFINITY";
// Do not perform balanced affinity for the hidden helper threads
if (KMP_HIDDEN_HELPER_THREAD(__kmp_gtid_from_thread(th)))
return;
- switch (__kmp_affinity_gran) {
+ switch (__kmp_affinity.gran) {
case KMP_HW_THREAD:
break;
case KMP_HW_CORE:
@@ -4974,11 +5002,11 @@ void __kmp_balanced_affinity(kmp_info_t *th, int nthreads) {
KMP_CPU_SET(osID, mask);
}
}
- if (__kmp_affinity_verbose) {
+ if (__kmp_affinity.flags.verbose) {
char buf[KMP_AFFIN_MASK_PRINT_LEN];
__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
- KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
- __kmp_gettid(), tid, buf);
+ KMP_INFORM(BoundToOSProcSet, env_var, (kmp_int32)getpid(), __kmp_gettid(),
+ tid, buf);
}
__kmp_set_system_affinity(mask, TRUE);
} else { // Non-uniform topology
@@ -5140,11 +5168,11 @@ void __kmp_balanced_affinity(kmp_info_t *th, int nthreads) {
__kmp_free(newarr);
}
- if (__kmp_affinity_verbose) {
+ if (__kmp_affinity.flags.verbose) {
char buf[KMP_AFFIN_MASK_PRINT_LEN];
__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
- KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),
- __kmp_gettid(), tid, buf);
+ KMP_INFORM(BoundToOSProcSet, env_var, (kmp_int32)getpid(), __kmp_gettid(),
+ tid, buf);
}
__kmp_set_system_affinity(mask, TRUE);
}
diff --git a/openmp/runtime/src/kmp_affinity.h b/openmp/runtime/src/kmp_affinity.h
index 2e0480f65f67b..e1305f406b0df 100644
--- a/openmp/runtime/src/kmp_affinity.h
+++ b/openmp/runtime/src/kmp_affinity.h
@@ -128,13 +128,15 @@ class KMPHwlocAffinity : public KMPAffinity {
if (__kmp_hwloc_topology == NULL) {
if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
__kmp_hwloc_error = TRUE;
- if (__kmp_affinity_verbose)
+ if (__kmp_affinity.flags.verbose) {
KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()");
+ }
}
if (hwloc_topology_load(__kmp_hwloc_topology) < 0) {
__kmp_hwloc_error = TRUE;
- if (__kmp_affinity_verbose)
+ if (__kmp_affinity.flags.verbose) {
KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()");
+ }
}
}
topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
@@ -792,7 +794,12 @@ class kmp_topology_t {
void canonicalize();
void canonicalize(int pkgs, int cores_per_pkg, int thr_per_core, int cores);
- // Functions used after canonicalize() called
+// Functions used after canonicalize() called
+
+#if KMP_AFFINITY_SUPPORTED
+ // Set the granularity for affinity settings
+ void set_granularity(kmp_affinity_t &stgs) const;
+#endif
bool filter_hw_subset();
bool is_close(int hwt1, int hwt2, int level) const;
bool is_uniform() const { return flags.uniform; }
diff --git a/openmp/runtime/src/kmp_barrier.cpp b/openmp/runtime/src/kmp_barrier.cpp
index 1a718b45ffffc..bf56c7884970f 100644
--- a/openmp/runtime/src/kmp_barrier.cpp
+++ b/openmp/runtime/src/kmp_barrier.cpp
@@ -2582,7 +2582,7 @@ void __kmp_fork_barrier(int gtid, int tid) {
kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
if (proc_bind == proc_bind_intel) {
// Call dynamic affinity settings
- if (__kmp_affinity_type == affinity_balanced && team->t.t_size_changed) {
+ if (__kmp_affinity.type == affinity_balanced && team->t.t_size_changed) {
__kmp_balanced_affinity(this_thr, team->t.t_nproc);
}
} else if (proc_bind != proc_bind_false) {
@@ -2599,7 +2599,7 @@ void __kmp_fork_barrier(int gtid, int tid) {
if (__kmp_display_affinity) {
if (team->t.t_display_affinity
#if KMP_AFFINITY_SUPPORTED
- || (__kmp_affinity_type == affinity_balanced && team->t.t_size_changed)
+ || (__kmp_affinity.type == affinity_balanced && team->t.t_size_changed)
#endif
) {
// NULL means use the affinity-format-var ICV
diff --git a/openmp/runtime/src/kmp_csupport.cpp b/openmp/runtime/src/kmp_csupport.cpp
index 72031a2ba4405..97b15be967f0c 100644
--- a/openmp/runtime/src/kmp_csupport.cpp
+++ b/openmp/runtime/src/kmp_csupport.cpp
@@ -633,7 +633,7 @@ void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
}
#if KMP_AFFINITY_SUPPORTED
- if (this_thr->th.th_team->t.t_level == 0 && __kmp_affin_reset) {
+ if (this_thr->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) {
__kmp_reset_root_init_mask(global_tid);
}
#endif
@@ -1989,7 +1989,8 @@ void KMP_EXPAND_NAME(ompc_display_affinity)(char const *format) {
__kmp_assign_root_init_mask();
gtid = __kmp_get_gtid();
#if KMP_AFFINITY_SUPPORTED
- if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && __kmp_affin_reset) {
+ if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 &&
+ __kmp_affinity.flags.reset) {
__kmp_reset_root_init_mask(gtid);
}
#endif
@@ -2007,7 +2008,8 @@ size_t KMP_EXPAND_NAME(ompc_capture_affinity)(char *buffer, size_t buf_size,
__kmp_assign_root_init_mask();
gtid = __kmp_get_gtid();
#if KMP_AFFINITY_SUPPORTED
- if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && __kmp_affin_reset) {
+ if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 &&
+ __kmp_affinity.flags.reset) {
__kmp_reset_root_init_mask(gtid);
}
#endif
diff --git a/openmp/runtime/src/kmp_ftn_entry.h b/openmp/runtime/src/kmp_ftn_entry.h
index 8dbd3ce976902..bfd582ab0d456 100644
--- a/openmp/runtime/src/kmp_ftn_entry.h
+++ b/openmp/runtime/src/kmp_ftn_entry.h
@@ -239,7 +239,8 @@ int FTN_STDCALL FTN_GET_AFFINITY(void **mask) {
}
__kmp_assign_root_init_mask();
int gtid = __kmp_get_gtid();
- if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && __kmp_affin_reset) {
+ if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 &&
+ __kmp_affinity.flags.reset) {
__kmp_reset_root_init_mask(gtid);
}
return __kmp_aux_get_affinity(mask);
@@ -365,7 +366,7 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_MAX_THREADS)(void) {
gtid = __kmp_entry_gtid();
thread = __kmp_threads[gtid];
#if KMP_AFFINITY_SUPPORTED
- if (thread->th.th_team->t.t_level == 0 && !__kmp_affin_reset) {
+ if (thread->th.th_team->t.t_level == 0 && !__kmp_affinity.flags.reset) {
__kmp_assign_root_init_mask();
}
#endif
@@ -518,7 +519,8 @@ void FTN_STDCALL KMP_EXPAND_NAME_IF_APPEND(FTN_DISPLAY_AFFINITY)(
__kmp_assign_root_init_mask();
gtid = __kmp_get_gtid();
#if KMP_AFFINITY_SUPPORTED
- if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && __kmp_affin_reset) {
+ if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 &&
+ __kmp_affinity.flags.reset) {
__kmp_reset_root_init_mask(gtid);
}
#endif
@@ -551,7 +553,8 @@ size_t FTN_STDCALL KMP_EXPAND_NAME_IF_APPEND(FTN_CAPTURE_AFFINITY)(
__kmp_assign_root_init_mask();
gtid = __kmp_get_gtid();
#if KMP_AFFINITY_SUPPORTED
- if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 && __kmp_affin_reset) {
+ if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 &&
+ __kmp_affinity.flags.reset) {
__kmp_reset_root_init_mask(gtid);
}
#endif
@@ -631,7 +634,7 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_NUM_PROCS)(void) {
__kmp_middle_initialize();
}
#if KMP_AFFINITY_SUPPORTED
- if (!__kmp_affin_reset) {
+ if (!__kmp_affinity.flags.reset) {
// only bind root here if its affinity reset is not requested
int gtid = __kmp_entry_gtid();
kmp_info_t *thread = __kmp_threads[gtid];
@@ -831,7 +834,7 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_NUM_PLACES)(void) {
}
if (!KMP_AFFINITY_CAPABLE())
return 0;
- if (!__kmp_affin_reset) {
+ if (!__kmp_affinity.flags.reset) {
// only bind root here if its affinity reset is not requested
int gtid = __kmp_entry_gtid();
kmp_info_t *thread = __kmp_threads[gtid];
@@ -839,7 +842,7 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_NUM_PLACES)(void) {
__kmp_assign_root_init_mask();
}
}
- return __kmp_affinity_num_masks;
+ return __kmp_affinity.num_masks;
#endif
}
@@ -854,7 +857,7 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_PLACE_NUM_PROCS)(int place_num) {
}
if (!KMP_AFFINITY_CAPABLE())
return 0;
- if (!__kmp_affin_reset) {
+ if (!__kmp_affinity.flags.reset) {
// only bind root here if its affinity reset is not requested
int gtid = __kmp_entry_gtid();
kmp_info_t *thread = __kmp_threads[gtid];
@@ -862,9 +865,9 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_PLACE_NUM_PROCS)(int place_num) {
__kmp_assign_root_init_mask();
}
}
- if (place_num < 0 || place_num >= (int)__kmp_affinity_num_masks)
+ if (place_num < 0 || place_num >= (int)__kmp_affinity.num_masks)
return 0;
- kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks, place_num);
+ kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity.masks, place_num);
KMP_CPU_SET_ITERATE(i, mask) {
if ((!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) ||
(!KMP_CPU_ISSET(i, mask))) {
@@ -887,7 +890,7 @@ void FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_PLACE_PROC_IDS)(int place_num,
}
if (!KMP_AFFINITY_CAPABLE())
return;
- if (!__kmp_affin_reset) {
+ if (!__kmp_affinity.flags.reset) {
// only bind root here if its affinity reset is not requested
int gtid = __kmp_entry_gtid();
kmp_info_t *thread = __kmp_threads[gtid];
@@ -895,9 +898,9 @@ void FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_PLACE_PROC_IDS)(int place_num,
__kmp_assign_root_init_mask();
}
}
- if (place_num < 0 || place_num >= (int)__kmp_affinity_num_masks)
+ if (place_num < 0 || place_num >= (int)__kmp_affinity.num_masks)
return;
- kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks, place_num);
+ kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity.masks, place_num);
j = 0;
KMP_CPU_SET_ITERATE(i, mask) {
if ((!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) ||
@@ -922,7 +925,7 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_PLACE_NUM)(void) {
return -1;
gtid = __kmp_entry_gtid();
thread = __kmp_thread_from_gtid(gtid);
- if (thread->th.th_team->t.t_level == 0 && !__kmp_affin_reset) {
+ if (thread->th.th_team->t.t_level == 0 && !__kmp_affinity.flags.reset) {
__kmp_assign_root_init_mask();
}
if (thread->th.th_current_place < 0)
@@ -944,7 +947,7 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_PARTITION_NUM_PLACES)(void) {
return 0;
gtid = __kmp_entry_gtid();
thread = __kmp_thread_from_gtid(gtid);
- if (thread->th.th_team->t.t_level == 0 && !__kmp_affin_reset) {
+ if (thread->th.th_team->t.t_level == 0 && !__kmp_affinity.flags.reset) {
__kmp_assign_root_init_mask();
}
first_place = thread->th.th_first_place;
@@ -954,7 +957,7 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_PARTITION_NUM_PLACES)(void) {
if (first_place <= last_place)
num_places = last_place - first_place + 1;
else
- num_places = __kmp_affinity_num_masks - first_place + last_place + 1;
+ num_places = __kmp_affinity.num_masks - first_place + last_place + 1;
return num_places;
#endif
}
@@ -973,7 +976,7 @@ KMP_EXPAND_NAME(FTN_GET_PARTITION_PLACE_NUMS)(int *place_nums) {
return;
gtid = __kmp_entry_gtid();
thread = __kmp_thread_from_gtid(gtid);
- if (thread->th.th_team->t.t_level == 0 && !__kmp_affin_reset) {
+ if (thread->th.th_team->t.t_level == 0 && !__kmp_affinity.flags.reset) {
__kmp_assign_root_init_mask();
}
first_place = thread->th.th_first_place;
diff --git a/openmp/runtime/src/kmp_global.cpp b/openmp/runtime/src/kmp_global.cpp
index 3fd536416de7e..8b41bfde339b3 100644
--- a/openmp/runtime/src/kmp_global.cpp
+++ b/openmp/runtime/src/kmp_global.cpp
@@ -269,23 +269,13 @@ kmp_SetThreadGroupAffinity_t __kmp_SetThreadGroupAffinity = NULL;
#endif /* KMP_OS_WINDOWS */
size_t __kmp_affin_mask_size = 0;
-enum affinity_type __kmp_affinity_type = affinity_default;
-kmp_hw_t __kmp_affinity_gran = KMP_HW_UNKNOWN;
-int __kmp_affinity_gran_levels = -1;
-int __kmp_affinity_dups = TRUE;
enum affinity_top_method __kmp_affinity_top_method =
affinity_top_method_default;
-int __kmp_affinity_compact = 0;
-int __kmp_affinity_offset = 0;
-int __kmp_affinity_verbose = FALSE;
-int __kmp_affinity_warnings = TRUE;
-int __kmp_affinity_respect_mask = affinity_respect_mask_default;
-char *__kmp_affinity_proclist = NULL;
-kmp_affin_mask_t *__kmp_affinity_masks = NULL;
-unsigned __kmp_affinity_num_masks = 0;
+
+// Regular thread affinity settings from KMP_AFFINITY
+kmp_affinity_t __kmp_affinity = KMP_AFFINITY_INIT("KMP_AFFINITY");
char *__kmp_cpuinfo_file = NULL;
-bool __kmp_affin_reset = 0;
#endif /* KMP_AFFINITY_SUPPORTED */
diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp
index dd4ec28a9764c..a1051c752e312 100644
--- a/openmp/runtime/src/kmp_runtime.cpp
+++ b/openmp/runtime/src/kmp_runtime.cpp
@@ -2647,7 +2647,7 @@ void __kmp_join_call(ident_t *loc, int gtid
__kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
#if KMP_AFFINITY_SUPPORTED
- if (master_th->th.th_team->t.t_level == 0 && __kmp_affin_reset) {
+ if (master_th->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) {
__kmp_reset_root_init_mask(gtid);
}
#endif
@@ -4736,6 +4736,7 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
int first_place = master_th->th.th_first_place;
int last_place = master_th->th.th_last_place;
int masters_place = master_th->th.th_current_place;
+ int num_masks = __kmp_affinity.num_masks;
team->t.t_first_place = first_place;
team->t.t_last_place = last_place;
@@ -4780,7 +4781,7 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
if (first_place <= last_place) {
n_places = last_place - first_place + 1;
} else {
- n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
+ n_places = num_masks - first_place + last_place + 1;
}
if (n_th <= n_places) {
int place = masters_place;
@@ -4790,7 +4791,7 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
if (place == last_place) {
place = first_place;
- } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
+ } else if (place == (num_masks - 1)) {
place = 0;
} else {
place++;
@@ -4835,7 +4836,7 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
// we added an extra thread to this place; move to next place
if (place == last_place) {
place = first_place;
- } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
+ } else if (place == (num_masks - 1)) {
place = 0;
} else {
place++;
@@ -4846,7 +4847,7 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
} else if (s_count == S) { // place full; don't add extra
if (place == last_place) {
place = first_place;
- } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
+ } else if (place == (num_masks - 1)) {
place = 0;
} else {
place++;
@@ -4873,12 +4874,12 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
if (first_place <= last_place) {
n_places = last_place - first_place + 1;
} else {
- n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
+ n_places = num_masks - first_place + last_place + 1;
}
if (n_th <= n_places) {
int place = -1;
- if (n_places != static_cast<int>(__kmp_affinity_num_masks)) {
+ if (n_places != num_masks) {
int S = n_places / n_th;
int s_count, rem, gap, gap_ct;
@@ -4903,7 +4904,7 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
while (s_count < S) {
if (place == last_place) {
place = first_place;
- } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
+ } else if (place == (num_masks - 1)) {
place = 0;
} else {
place++;
@@ -4913,7 +4914,7 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
if (rem && (gap_ct == gap)) {
if (place == last_place) {
place = first_place;
- } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
+ } else if (place == (num_masks - 1)) {
place = 0;
} else {
place++;
@@ -4926,7 +4927,7 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
if (place == last_place) {
place = first_place;
- } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
+ } else if (place == (num_masks - 1)) {
place = 0;
} else {
place++;
@@ -4934,10 +4935,10 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
KA_TRACE(100,
("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
- "partition = [%d,%d], __kmp_affinity_num_masks: %u\n",
+ "partition = [%d,%d], num_masks: %u\n",
__kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
f, th->th.th_new_place, th->th.th_first_place,
- th->th.th_last_place, __kmp_affinity_num_masks));
+ th->th.th_last_place, num_masks));
}
} else {
/* Having uniform space of available computation places I can create
@@ -5034,7 +5035,7 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
// we added an extra thread to this place; move on to next place
if (place == last_place) {
place = first_place;
- } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
+ } else if (place == (num_masks - 1)) {
place = 0;
} else {
place++;
@@ -5045,7 +5046,7 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
} else if (s_count == S) { // place is full; don't add extra thread
if (place == last_place) {
place = first_place;
- } else if (place == (int)(__kmp_affinity_num_masks - 1)) {
+ } else if (place == (num_masks - 1)) {
place = 0;
} else {
place++;
@@ -7280,7 +7281,7 @@ static void __kmp_do_middle_initialize(void) {
#if KMP_AFFINITY_SUPPORTED
// __kmp_affinity_initialize() will try to set __kmp_ncores to the
// number of cores on the machine.
- __kmp_affinity_initialize();
+ __kmp_affinity_initialize(__kmp_affinity);
#endif /* KMP_AFFINITY_SUPPORTED */
diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp
index 97b20d4f86642..7de6871c6eb10 100644
--- a/openmp/runtime/src/kmp_settings.cpp
+++ b/openmp/runtime/src/kmp_settings.cpp
@@ -2148,12 +2148,7 @@ static int __kmp_parse_affinity_proc_id_list(const char *var, const char *env,
static kmp_setting_t *__kmp_affinity_notype = NULL;
static void __kmp_parse_affinity_env(char const *name, char const *value,
- enum affinity_type *out_type,
- char **out_proclist, int *out_verbose,
- int *out_warn, int *out_respect,
- kmp_hw_t *out_gran, int *out_gran_levels,
- int *out_dups, int *out_compact,
- int *out_offset) {
+ kmp_affinity_t *out_affinity) {
char *buffer = NULL; // Copy of env var value.
char *buf = NULL; // Buffer for strtok_r() function.
char *next = NULL; // end of token / start of next.
@@ -2219,19 +2214,20 @@ static void __kmp_parse_affinity_env(char const *name, char const *value,
++_guard; \
}
-#define set_type(val) _set_param(type, *out_type, val)
-#define set_verbose(val) _set_param(verbose, *out_verbose, val)
-#define set_warnings(val) _set_param(warnings, *out_warn, val)
-#define set_respect(val) _set_param(respect, *out_respect, val)
-#define set_dups(val) _set_param(dups, *out_dups, val)
-#define set_proclist(val) _set_param(proclist, *out_proclist, val)
-#define set_reset(val) _set_param(reset, __kmp_affin_reset, val)
+#define set_type(val) _set_param(type, out_affinity->type, val)
+#define set_verbose(val) _set_param(verbose, out_affinity->flags.verbose, val)
+#define set_warnings(val) \
+ _set_param(warnings, out_affinity->flags.warnings, val)
+#define set_respect(val) _set_param(respect, out_affinity->flags.respect, val)
+#define set_dups(val) _set_param(dups, out_affinity->flags.dups, val)
+#define set_proclist(val) _set_param(proclist, out_affinity->proclist, val)
+#define set_reset(val) _set_param(reset, out_affinity->flags.reset, val)
#define set_gran(val, levels) \
{ \
if (gran == 0) { \
- *out_gran = val; \
- *out_gran_levels = levels; \
+ out_affinity->gran = val; \
+ out_affinity->gran_levels = levels; \
} else { \
EMIT_WARN(FALSE, (AffParamDefined, name, start)); \
} \
@@ -2448,20 +2444,20 @@ static void __kmp_parse_affinity_env(char const *name, char const *value,
if (proclist) {
if (!type) {
KMP_WARNING(AffProcListNoType, name);
- *out_type = affinity_explicit;
+ out_affinity->type = affinity_explicit;
__kmp_nested_proc_bind.bind_types[0] = proc_bind_intel;
- } else if (*out_type != affinity_explicit) {
+ } else if (out_affinity->type != affinity_explicit) {
KMP_WARNING(AffProcListNotExplicit, name);
- KMP_ASSERT(*out_proclist != NULL);
- KMP_INTERNAL_FREE(*out_proclist);
- *out_proclist = NULL;
+ KMP_ASSERT(out_affinity->proclist != NULL);
+ KMP_INTERNAL_FREE(out_affinity->proclist);
+ out_affinity->proclist = NULL;
}
}
- switch (*out_type) {
+ switch (out_affinity->type) {
case affinity_logical:
case affinity_physical: {
if (count > 0) {
- *out_offset = number[0];
+ out_affinity->offset = number[0];
}
if (count > 1) {
KMP_WARNING(AffManyParamsForLogic, name, number[1]);
@@ -2469,42 +2465,44 @@ static void __kmp_parse_affinity_env(char const *name, char const *value,
} break;
case affinity_balanced: {
if (count > 0) {
- *out_compact = number[0];
+ out_affinity->compact = number[0];
}
if (count > 1) {
- *out_offset = number[1];
+ out_affinity->offset = number[1];
}
- if (__kmp_affinity_gran == KMP_HW_UNKNOWN) {
+ if (__kmp_affinity.gran == KMP_HW_UNKNOWN) {
+ int verbose = out_affinity->flags.verbose;
+ int warnings = out_affinity->flags.warnings;
#if KMP_MIC_SUPPORTED
if (__kmp_mic_type != non_mic) {
- if (__kmp_affinity_verbose || __kmp_affinity_warnings) {
- KMP_WARNING(AffGranUsing, "KMP_AFFINITY", "fine");
+ if (verbose || warnings) {
+ KMP_WARNING(AffGranUsing, out_affinity->env_var, "fine");
}
- __kmp_affinity_gran = KMP_HW_THREAD;
+ out_affinity->gran = KMP_HW_THREAD;
} else
#endif
{
- if (__kmp_affinity_verbose || __kmp_affinity_warnings) {
- KMP_WARNING(AffGranUsing, "KMP_AFFINITY", "core");
+ if (verbose || warnings) {
+ KMP_WARNING(AffGranUsing, out_affinity->env_var, "core");
}
- __kmp_affinity_gran = KMP_HW_CORE;
+ out_affinity->gran = KMP_HW_CORE;
}
}
} break;
case affinity_scatter:
case affinity_compact: {
if (count > 0) {
- *out_compact = number[0];
+ out_affinity->compact = number[0];
}
if (count > 1) {
- *out_offset = number[1];
+ out_affinity->offset = number[1];
}
} break;
case affinity_explicit: {
- if (*out_proclist == NULL) {
+ if (out_affinity->proclist == NULL) {
KMP_WARNING(AffNoProcList, name);
- __kmp_affinity_type = affinity_none;
+ out_affinity->type = affinity_none;
}
if (count > 0) {
KMP_WARNING(AffNoParam, name, "explicit");
@@ -2541,74 +2539,68 @@ static void __kmp_stg_parse_affinity(char const *name, char const *value,
return;
}
- __kmp_parse_affinity_env(name, value, &__kmp_affinity_type,
- &__kmp_affinity_proclist, &__kmp_affinity_verbose,
- &__kmp_affinity_warnings,
- &__kmp_affinity_respect_mask, &__kmp_affinity_gran,
- &__kmp_affinity_gran_levels, &__kmp_affinity_dups,
- &__kmp_affinity_compact, &__kmp_affinity_offset);
+ __kmp_parse_affinity_env(name, value, &__kmp_affinity);
} // __kmp_stg_parse_affinity
-static void __kmp_stg_print_affinity(kmp_str_buf_t *buffer, char const *name,
- void *data) {
+static void __kmp_print_affinity_env(kmp_str_buf_t *buffer, char const *name,
+ const kmp_affinity_t &affinity) {
if (__kmp_env_format) {
KMP_STR_BUF_PRINT_NAME_EX(name);
} else {
__kmp_str_buf_print(buffer, " %s='", name);
}
- if (__kmp_affinity_verbose) {
+ if (affinity.flags.verbose) {
__kmp_str_buf_print(buffer, "%s,", "verbose");
} else {
__kmp_str_buf_print(buffer, "%s,", "noverbose");
}
- if (__kmp_affinity_warnings) {
+ if (affinity.flags.warnings) {
__kmp_str_buf_print(buffer, "%s,", "warnings");
} else {
__kmp_str_buf_print(buffer, "%s,", "nowarnings");
}
if (KMP_AFFINITY_CAPABLE()) {
- if (__kmp_affinity_respect_mask) {
+ if (affinity.flags.respect) {
__kmp_str_buf_print(buffer, "%s,", "respect");
} else {
__kmp_str_buf_print(buffer, "%s,", "norespect");
}
- if (__kmp_affin_reset) {
+ if (affinity.flags.reset) {
__kmp_str_buf_print(buffer, "%s,", "reset");
} else {
__kmp_str_buf_print(buffer, "%s,", "noreset");
}
__kmp_str_buf_print(buffer, "granularity=%s,",
- __kmp_hw_get_keyword(__kmp_affinity_gran, false));
+ __kmp_hw_get_keyword(affinity.gran, false));
}
if (!KMP_AFFINITY_CAPABLE()) {
__kmp_str_buf_print(buffer, "%s", "disabled");
- } else
- switch (__kmp_affinity_type) {
+ } else {
+ int compact = affinity.compact;
+ int offset = affinity.offset;
+ switch (affinity.type) {
case affinity_none:
__kmp_str_buf_print(buffer, "%s", "none");
break;
case affinity_physical:
- __kmp_str_buf_print(buffer, "%s,%d", "physical", __kmp_affinity_offset);
+ __kmp_str_buf_print(buffer, "%s,%d", "physical", offset);
break;
case affinity_logical:
- __kmp_str_buf_print(buffer, "%s,%d", "logical", __kmp_affinity_offset);
+ __kmp_str_buf_print(buffer, "%s,%d", "logical", offset);
break;
case affinity_compact:
- __kmp_str_buf_print(buffer, "%s,%d,%d", "compact", __kmp_affinity_compact,
- __kmp_affinity_offset);
+ __kmp_str_buf_print(buffer, "%s,%d,%d", "compact", compact, offset);
break;
case affinity_scatter:
- __kmp_str_buf_print(buffer, "%s,%d,%d", "scatter", __kmp_affinity_compact,
- __kmp_affinity_offset);
+ __kmp_str_buf_print(buffer, "%s,%d,%d", "scatter", compact, offset);
break;
case affinity_explicit:
- __kmp_str_buf_print(buffer, "%s=[%s],%s", "proclist",
- __kmp_affinity_proclist, "explicit");
+ __kmp_str_buf_print(buffer, "%s=[%s],%s", "proclist", affinity.proclist,
+ "explicit");
break;
case affinity_balanced:
- __kmp_str_buf_print(buffer, "%s,%d,%d", "balanced",
- __kmp_affinity_compact, __kmp_affinity_offset);
+ __kmp_str_buf_print(buffer, "%s,%d,%d", "balanced", compact, offset);
break;
case affinity_disabled:
__kmp_str_buf_print(buffer, "%s", "disabled");
@@ -2620,9 +2612,15 @@ static void __kmp_stg_print_affinity(kmp_str_buf_t *buffer, char const *name,
__kmp_str_buf_print(buffer, "%s", "<unknown>");
break;
}
+ }
__kmp_str_buf_print(buffer, "'\n");
} //__kmp_stg_print_affinity
+static void __kmp_stg_print_affinity(kmp_str_buf_t *buffer, char const *name,
+ void *data) {
+ __kmp_print_affinity_env(buffer, name, __kmp_affinity);
+}
+
#ifdef KMP_GOMP_COMPAT
static void __kmp_stg_parse_gomp_cpu_affinity(char const *name,
@@ -2649,9 +2647,9 @@ static void __kmp_stg_parse_gomp_cpu_affinity(char const *name,
SKIP_WS(next);
if (*next == '\0') {
// GOMP_CPU_AFFINITY => granularity=fine,explicit,proclist=...
- __kmp_affinity_proclist = temp_proclist;
- __kmp_affinity_type = affinity_explicit;
- __kmp_affinity_gran = KMP_HW_THREAD;
+ __kmp_affinity.proclist = temp_proclist;
+ __kmp_affinity.type = affinity_explicit;
+ __kmp_affinity.gran = KMP_HW_THREAD;
__kmp_nested_proc_bind.bind_types[0] = proc_bind_intel;
} else {
KMP_WARNING(AffSyntaxError, name);
@@ -2661,7 +2659,7 @@ static void __kmp_stg_parse_gomp_cpu_affinity(char const *name,
}
} else {
// Warning already emitted
- __kmp_affinity_type = affinity_none;
+ __kmp_affinity.type = affinity_none;
__kmp_nested_proc_bind.bind_types[0] = proc_bind_false;
}
} // __kmp_stg_parse_gomp_cpu_affinity
@@ -2963,9 +2961,9 @@ static void __kmp_stg_parse_places(char const *name, char const *value,
const kmp_place_t &place = std_places[i];
if (__kmp_match_str(place.name, scan, &next)) {
scan = next;
- __kmp_affinity_type = affinity_compact;
- __kmp_affinity_gran = place.type;
- __kmp_affinity_dups = FALSE;
+ __kmp_affinity.type = affinity_compact;
+ __kmp_affinity.gran = place.type;
+ __kmp_affinity.flags.dups = FALSE;
set = true;
break;
}
@@ -2978,36 +2976,36 @@ static void __kmp_stg_parse_places(char const *name, char const *value,
continue;
if (__kmp_match_str(name, scan, &next)) {
scan = next;
- __kmp_affinity_type = affinity_compact;
- __kmp_affinity_gran = type;
- __kmp_affinity_dups = FALSE;
+ __kmp_affinity.type = affinity_compact;
+ __kmp_affinity.gran = type;
+ __kmp_affinity.flags.dups = FALSE;
set = true;
break;
}
}
}
if (!set) {
- if (__kmp_affinity_proclist != NULL) {
- KMP_INTERNAL_FREE((void *)__kmp_affinity_proclist);
- __kmp_affinity_proclist = NULL;
- }
- if (__kmp_parse_place_list(name, value, &__kmp_affinity_proclist)) {
- __kmp_affinity_type = affinity_explicit;
- __kmp_affinity_gran = KMP_HW_THREAD;
- __kmp_affinity_dups = FALSE;
+ if (__kmp_affinity.proclist != NULL) {
+ KMP_INTERNAL_FREE((void *)__kmp_affinity.proclist);
+ __kmp_affinity.proclist = NULL;
+ }
+ if (__kmp_parse_place_list(name, value, &__kmp_affinity.proclist)) {
+ __kmp_affinity.type = affinity_explicit;
+ __kmp_affinity.gran = KMP_HW_THREAD;
+ __kmp_affinity.flags.dups = FALSE;
} else {
// Syntax error fallback
- __kmp_affinity_type = affinity_compact;
- __kmp_affinity_gran = KMP_HW_CORE;
- __kmp_affinity_dups = FALSE;
+ __kmp_affinity.type = affinity_compact;
+ __kmp_affinity.gran = KMP_HW_CORE;
+ __kmp_affinity.flags.dups = FALSE;
}
if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_default) {
__kmp_nested_proc_bind.bind_types[0] = proc_bind_true;
}
return;
}
- if (__kmp_affinity_gran != KMP_HW_UNKNOWN) {
- kind = __kmp_hw_get_keyword(__kmp_affinity_gran);
+ if (__kmp_affinity.gran != KMP_HW_UNKNOWN) {
+ kind = __kmp_hw_get_keyword(__kmp_affinity.gran);
}
if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_default) {
@@ -3049,6 +3047,10 @@ static void __kmp_stg_parse_places(char const *name, char const *value,
static void __kmp_stg_print_places(kmp_str_buf_t *buffer, char const *name,
void *data) {
+ enum affinity_type type = __kmp_affinity.type;
+ const char *proclist = __kmp_affinity.proclist;
+ kmp_hw_t gran = __kmp_affinity.gran;
+
if (__kmp_env_format) {
KMP_STR_BUF_PRINT_NAME;
} else {
@@ -3058,23 +3060,23 @@ static void __kmp_stg_print_places(kmp_str_buf_t *buffer, char const *name,
(__kmp_nested_proc_bind.bind_types == NULL) ||
(__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
__kmp_str_buf_print(buffer, ": %s\n", KMP_I18N_STR(NotDefined));
- } else if (__kmp_affinity_type == affinity_explicit) {
- if (__kmp_affinity_proclist != NULL) {
- __kmp_str_buf_print(buffer, "='%s'\n", __kmp_affinity_proclist);
+ } else if (type == affinity_explicit) {
+ if (proclist != NULL) {
+ __kmp_str_buf_print(buffer, "='%s'\n", proclist);
} else {
__kmp_str_buf_print(buffer, ": %s\n", KMP_I18N_STR(NotDefined));
}
- } else if (__kmp_affinity_type == affinity_compact) {
+ } else if (type == affinity_compact) {
int num;
- if (__kmp_affinity_num_masks > 0) {
- num = __kmp_affinity_num_masks;
+ if (__kmp_affinity.num_masks > 0) {
+ num = __kmp_affinity.num_masks;
} else if (__kmp_affinity_num_places > 0) {
num = __kmp_affinity_num_places;
} else {
num = 0;
}
- if (__kmp_affinity_gran != KMP_HW_UNKNOWN) {
- const char *name = __kmp_hw_get_keyword(__kmp_affinity_gran, true);
+ if (gran != KMP_HW_UNKNOWN) {
+ const char *name = __kmp_hw_get_keyword(gran, true);
if (num > 0) {
__kmp_str_buf_print(buffer, "='%s(%d)'\n", name, num);
} else {
@@ -3306,7 +3308,7 @@ static void __kmp_stg_parse_proc_bind(char const *name, char const *value,
buf = next;
SKIP_WS(buf);
#if KMP_AFFINITY_SUPPORTED
- __kmp_affinity_type = affinity_disabled;
+ __kmp_affinity.type = affinity_disabled;
#endif /* KMP_AFFINITY_SUPPORTED */
__kmp_nested_proc_bind.used = 1;
__kmp_nested_proc_bind.bind_types[0] = proc_bind_false;
@@ -3315,7 +3317,7 @@ static void __kmp_stg_parse_proc_bind(char const *name, char const *value,
buf = next;
SKIP_WS(buf);
#if KMP_AFFINITY_SUPPORTED
- __kmp_affinity_type = affinity_none;
+ __kmp_affinity.type = affinity_none;
#endif /* KMP_AFFINITY_SUPPORTED */
__kmp_nested_proc_bind.used = 1;
__kmp_nested_proc_bind.bind_types[0] = proc_bind_false;
@@ -5887,6 +5889,22 @@ static int __kmp_env_toPrint(char const *name, int flag) {
return rc;
}
+#if defined(KMP_DEBUG) && KMP_AFFINITY_SUPPORTED
+static void __kmp_print_affinity_settings(const kmp_affinity_t *affinity) {
+ K_DIAG(1, ("%s:\n", affinity->env_var));
+ K_DIAG(1, (" type : %d\n", affinity->type));
+ K_DIAG(1, (" compact : %d\n", affinity->compact));
+ K_DIAG(1, (" offset : %d\n", affinity->offset));
+ K_DIAG(1, (" verbose : %u\n", affinity->flags.verbose));
+ K_DIAG(1, (" warnings : %u\n", affinity->flags.warnings));
+ K_DIAG(1, (" respect : %u\n", affinity->flags.respect));
+ K_DIAG(1, (" reset : %u\n", affinity->flags.reset));
+ K_DIAG(1, (" dups : %u\n", affinity->flags.dups));
+ K_DIAG(1, (" gran : %d\n", (int)affinity->gran));
+ KMP_DEBUG_ASSERT(affinity->type != affinity_default);
+}
+#endif
+
static void __kmp_aux_env_initialize(kmp_env_blk_t *block) {
char const *value;
@@ -5994,20 +6012,20 @@ void __kmp_env_initialize(char const *string) {
// A new affinity type is specified.
// Reset the affinity flags to their default values,
// in case this is called from kmp_set_defaults().
- __kmp_affinity_type = affinity_default;
- __kmp_affinity_gran = KMP_HW_UNKNOWN;
+ __kmp_affinity.type = affinity_default;
+ __kmp_affinity.gran = KMP_HW_UNKNOWN;
__kmp_affinity_top_method = affinity_top_method_default;
- __kmp_affinity_respect_mask = affinity_respect_mask_default;
+ __kmp_affinity.flags.respect = affinity_respect_mask_default;
}
#undef FIND
// Also reset the affinity flags if OMP_PROC_BIND is specified.
aff_str = __kmp_env_blk_var(&block, "OMP_PROC_BIND");
if (aff_str != NULL) {
- __kmp_affinity_type = affinity_default;
- __kmp_affinity_gran = KMP_HW_UNKNOWN;
+ __kmp_affinity.type = affinity_default;
+ __kmp_affinity.gran = KMP_HW_UNKNOWN;
__kmp_affinity_top_method = affinity_top_method_default;
- __kmp_affinity_respect_mask = affinity_respect_mask_default;
+ __kmp_affinity.flags.respect = affinity_respect_mask_default;
}
}
@@ -6083,12 +6101,12 @@ void __kmp_env_initialize(char const *string) {
__kmp_affinity_top_method == affinity_top_method_default)
if (__kmp_hw_subset->specified(KMP_HW_NUMA) ||
__kmp_hw_subset->specified(KMP_HW_TILE) ||
- __kmp_affinity_gran == KMP_HW_TILE ||
- __kmp_affinity_gran == KMP_HW_NUMA)
+ __kmp_affinity.gran == KMP_HW_TILE ||
+ __kmp_affinity.gran == KMP_HW_NUMA)
__kmp_affinity_top_method = affinity_top_method_hwloc;
// Force using hwloc when tiles or numa nodes requested for OMP_PLACES
- if (__kmp_affinity_gran == KMP_HW_NUMA ||
- __kmp_affinity_gran == KMP_HW_TILE)
+ if (__kmp_affinity.gran == KMP_HW_NUMA ||
+ __kmp_affinity.gran == KMP_HW_TILE)
__kmp_affinity_top_method = affinity_top_method_hwloc;
#endif
// Determine if the machine/OS is actually capable of supporting
@@ -6105,25 +6123,25 @@ void __kmp_env_initialize(char const *string) {
__kmp_affinity_top_method = affinity_top_method_all;
}
#endif
- if (__kmp_affinity_type == affinity_disabled) {
+ if (__kmp_affinity.type == affinity_disabled) {
KMP_AFFINITY_DISABLE();
} else if (!KMP_AFFINITY_CAPABLE()) {
__kmp_affinity_dispatch->determine_capable(var);
if (!KMP_AFFINITY_CAPABLE()) {
- if (__kmp_affinity_verbose ||
- (__kmp_affinity_warnings &&
- (__kmp_affinity_type != affinity_default) &&
- (__kmp_affinity_type != affinity_none) &&
- (__kmp_affinity_type != affinity_disabled))) {
+ if (__kmp_affinity.flags.verbose ||
+ (__kmp_affinity.flags.warnings &&
+ (__kmp_affinity.type != affinity_default) &&
+ (__kmp_affinity.type != affinity_none) &&
+ (__kmp_affinity.type != affinity_disabled))) {
KMP_WARNING(AffNotSupported, var);
}
- __kmp_affinity_type = affinity_disabled;
- __kmp_affinity_respect_mask = 0;
- __kmp_affinity_gran = KMP_HW_THREAD;
+ __kmp_affinity.type = affinity_disabled;
+ __kmp_affinity.flags.respect = FALSE;
+ __kmp_affinity.gran = KMP_HW_THREAD;
}
}
- if (__kmp_affinity_type == affinity_disabled) {
+ if (__kmp_affinity.type == affinity_disabled) {
__kmp_nested_proc_bind.bind_types[0] = proc_bind_false;
} else if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_true) {
// OMP_PROC_BIND=true maps to OMP_PROC_BIND=spread.
@@ -6165,48 +6183,48 @@ void __kmp_env_initialize(char const *string) {
// processor groups, or if the user requested it, and OMP 4.0
// affinity is not in effect.
if (__kmp_num_proc_groups > 1 &&
- __kmp_affinity_type == affinity_default &&
+ __kmp_affinity.type == affinity_default &&
__kmp_nested_proc_bind.bind_types[0] == proc_bind_default) {
// Do not respect the initial processor affinity mask if it is assigned
// exactly one Windows Processor Group since this is interpreted as the
// default OS assignment. Not respecting the mask allows the runtime to
// use all the logical processors in all groups.
- if (__kmp_affinity_respect_mask == affinity_respect_mask_default &&
+ if (__kmp_affinity.flags.respect == affinity_respect_mask_default &&
exactly_one_group) {
- __kmp_affinity_respect_mask = FALSE;
+ __kmp_affinity.flags.respect = FALSE;
}
// Use compact affinity with anticipation of pinning to at least the
// group granularity since threads can only be bound to one group.
- if (__kmp_affinity_type == affinity_default) {
- __kmp_affinity_type = affinity_compact;
+ if (__kmp_affinity.type == affinity_default) {
+ __kmp_affinity.type = affinity_compact;
__kmp_nested_proc_bind.bind_types[0] = proc_bind_intel;
}
if (__kmp_affinity_top_method == affinity_top_method_default)
__kmp_affinity_top_method = affinity_top_method_all;
- if (__kmp_affinity_gran == KMP_HW_UNKNOWN)
- __kmp_affinity_gran = KMP_HW_PROC_GROUP;
+ if (__kmp_affinity.gran == KMP_HW_UNKNOWN)
+ __kmp_affinity.gran = KMP_HW_PROC_GROUP;
} else
#endif /* KMP_GROUP_AFFINITY */
{
- if (__kmp_affinity_respect_mask == affinity_respect_mask_default) {
+ if (__kmp_affinity.flags.respect == affinity_respect_mask_default) {
#if KMP_GROUP_AFFINITY
if (__kmp_num_proc_groups > 1 && exactly_one_group) {
- __kmp_affinity_respect_mask = FALSE;
+ __kmp_affinity.flags.respect = FALSE;
} else
#endif /* KMP_GROUP_AFFINITY */
{
- __kmp_affinity_respect_mask = TRUE;
+ __kmp_affinity.flags.respect = TRUE;
}
}
if ((__kmp_nested_proc_bind.bind_types[0] != proc_bind_intel) &&
(__kmp_nested_proc_bind.bind_types[0] != proc_bind_default)) {
- if (__kmp_affinity_type == affinity_default) {
- __kmp_affinity_type = affinity_compact;
- __kmp_affinity_dups = FALSE;
+ if (__kmp_affinity.type == affinity_default) {
+ __kmp_affinity.type = affinity_compact;
+ __kmp_affinity.flags.dups = FALSE;
}
- } else if (__kmp_affinity_type == affinity_default) {
+ } else if (__kmp_affinity.type == affinity_default) {
#if KMP_MIC_SUPPORTED
if (__kmp_mic_type != non_mic) {
__kmp_nested_proc_bind.bind_types[0] = proc_bind_intel;
@@ -6217,22 +6235,22 @@ void __kmp_env_initialize(char const *string) {
}
#if KMP_MIC_SUPPORTED
if (__kmp_mic_type != non_mic) {
- __kmp_affinity_type = affinity_scatter;
+ __kmp_affinity.type = affinity_scatter;
} else
#endif
{
- __kmp_affinity_type = affinity_none;
+ __kmp_affinity.type = affinity_none;
}
}
- if ((__kmp_affinity_gran == KMP_HW_UNKNOWN) &&
- (__kmp_affinity_gran_levels < 0)) {
+ if ((__kmp_affinity.gran == KMP_HW_UNKNOWN) &&
+ (__kmp_affinity.gran_levels < 0)) {
#if KMP_MIC_SUPPORTED
if (__kmp_mic_type != non_mic) {
- __kmp_affinity_gran = KMP_HW_THREAD;
+ __kmp_affinity.gran = KMP_HW_THREAD;
} else
#endif
{
- __kmp_affinity_gran = KMP_HW_CORE;
+ __kmp_affinity.gran = KMP_HW_CORE;
}
}
if (__kmp_affinity_top_method == affinity_top_method_default) {
@@ -6241,19 +6259,12 @@ void __kmp_env_initialize(char const *string) {
}
}
- K_DIAG(1, ("__kmp_affinity_type == %d\n", __kmp_affinity_type));
- K_DIAG(1, ("__kmp_affinity_compact == %d\n", __kmp_affinity_compact));
- K_DIAG(1, ("__kmp_affinity_offset == %d\n", __kmp_affinity_offset));
- K_DIAG(1, ("__kmp_affinity_verbose == %d\n", __kmp_affinity_verbose));
- K_DIAG(1, ("__kmp_affinity_warnings == %d\n", __kmp_affinity_warnings));
- K_DIAG(1, ("__kmp_affinity_respect_mask == %d\n",
- __kmp_affinity_respect_mask));
- K_DIAG(1, ("__kmp_affinity_gran == %d\n", __kmp_affinity_gran));
-
- KMP_DEBUG_ASSERT(__kmp_affinity_type != affinity_default);
+#ifdef KMP_DEBUG
+ __kmp_print_affinity_settings(&__kmp_affinity);
KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.bind_types[0] != proc_bind_default);
K_DIAG(1, ("__kmp_nested_proc_bind.bind_types[0] == %d\n",
__kmp_nested_proc_bind.bind_types[0]));
+#endif
}
#endif /* KMP_AFFINITY_SUPPORTED */
diff --git a/openmp/runtime/src/kmp_version.cpp b/openmp/runtime/src/kmp_version.cpp
index bb600c120dd6c..39d0f6084badc 100644
--- a/openmp/runtime/src/kmp_version.cpp
+++ b/openmp/runtime/src/kmp_version.cpp
@@ -179,7 +179,7 @@ void __kmp_print_version_1(void) {
&buffer, "%sthread affinity support: %s\n", KMP_VERSION_PREF_STR,
#if KMP_AFFINITY_SUPPORTED
(KMP_AFFINITY_CAPABLE()
- ? (__kmp_affinity_type == affinity_none ? "not used" : "yes")
+ ? (__kmp_affinity.type == affinity_none ? "not used" : "yes")
: "no")
#else
"no"
diff --git a/openmp/runtime/src/ompt-general.cpp b/openmp/runtime/src/ompt-general.cpp
index 0bee7e77c8174..cdafa81a18b13 100644
--- a/openmp/runtime/src/ompt-general.cpp
+++ b/openmp/runtime/src/ompt-general.cpp
@@ -687,7 +687,7 @@ OMPT_API_ROUTINE int ompt_get_num_places(void) {
#else
if (!KMP_AFFINITY_CAPABLE())
return 0;
- return __kmp_affinity_num_masks;
+ return __kmp_affinity.num_masks;
#endif
}
@@ -703,11 +703,11 @@ OMPT_API_ROUTINE int ompt_get_place_proc_ids(int place_num, int ids_size,
tmp_ids[j] = 0;
if (!KMP_AFFINITY_CAPABLE())
return 0;
- if (place_num < 0 || place_num >= (int)__kmp_affinity_num_masks)
+ if (place_num < 0 || place_num >= (int)__kmp_affinity.num_masks)
return 0;
/* TODO: Is this safe for asynchronous call from signal handler during runtime
* shutdown? */
- kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks, place_num);
+ kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity.masks, place_num);
count = 0;
KMP_CPU_SET_ITERATE(i, mask) {
if ((!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) ||
diff --git a/openmp/runtime/src/z_Linux_util.cpp b/openmp/runtime/src/z_Linux_util.cpp
index 6fd2f34a68da9..7b027a238d635 100644
--- a/openmp/runtime/src/z_Linux_util.cpp
+++ b/openmp/runtime/src/z_Linux_util.cpp
@@ -135,6 +135,9 @@ void __kmp_affinity_determine_capable(const char *env_var) {
long gCode;
unsigned char *buf;
buf = (unsigned char *)KMP_INTERNAL_MALLOC(KMP_CPU_SET_SIZE_LIMIT);
+ int verbose = __kmp_affinity.flags.verbose;
+ int warnings = __kmp_affinity.flags.warnings;
+ enum affinity_type type = __kmp_affinity.type;
// If the syscall returns a suggestion for the size,
// then we don't have to search for an appropriate size.
@@ -145,10 +148,9 @@ void __kmp_affinity_determine_capable(const char *env_var) {
if (gCode < 0 && errno != EINVAL) {
// System call not supported
- if (__kmp_affinity_verbose ||
- (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none) &&
- (__kmp_affinity_type != affinity_default) &&
- (__kmp_affinity_type != affinity_disabled))) {
+ if (verbose ||
+ (warnings && (type != affinity_none) && (type != affinity_default) &&
+ (type != affinity_disabled))) {
int error = errno;
kmp_msg_t err_code = KMP_ERR(error);
__kmp_msg(kmp_ms_warning, KMP_MSG(GetAffSysCallNotSupported, env_var),
@@ -188,11 +190,9 @@ void __kmp_affinity_determine_capable(const char *env_var) {
"inconsistent OS call behavior: errno == ENOSYS for mask "
"size %d\n",
size));
- if (__kmp_affinity_verbose ||
- (__kmp_affinity_warnings &&
- (__kmp_affinity_type != affinity_none) &&
- (__kmp_affinity_type != affinity_default) &&
- (__kmp_affinity_type != affinity_disabled))) {
+ if (verbose ||
+ (warnings && (type != affinity_none) &&
+ (type != affinity_default) && (type != affinity_disabled))) {
int error = errno;
kmp_msg_t err_code = KMP_ERR(error);
__kmp_msg(kmp_ms_warning, KMP_MSG(GetAffSysCallNotSupported, env_var),
@@ -239,10 +239,8 @@ void __kmp_affinity_determine_capable(const char *env_var) {
KMP_AFFINITY_DISABLE();
KA_TRACE(10, ("__kmp_affinity_determine_capable: "
"cannot determine mask size - affinity not supported\n"));
- if (__kmp_affinity_verbose ||
- (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none) &&
- (__kmp_affinity_type != affinity_default) &&
- (__kmp_affinity_type != affinity_disabled))) {
+ if (verbose || (warnings && (type != affinity_none) &&
+ (type != affinity_default) && (type != affinity_disabled))) {
KMP_WARNING(AffCantGetMaskSize, env_var);
}
}
@@ -1230,12 +1228,12 @@ static void __kmp_atfork_child(void) {
// Set default not to bind threads tightly in the child (we're expecting
// over-subscription after the fork and this can improve things for
// scripting languages that use OpenMP inside process-parallel code).
- __kmp_affinity_type = affinity_none;
if (__kmp_nested_proc_bind.bind_types != NULL) {
__kmp_nested_proc_bind.bind_types[0] = proc_bind_false;
}
- __kmp_affinity_masks = NULL;
- __kmp_affinity_num_masks = 0;
+ __kmp_affinity = KMP_AFFINITY_INIT("KMP_AFFINITY");
+ __kmp_affin_fullMask = nullptr;
+ __kmp_affin_origMask = nullptr;
#endif // KMP_AFFINITY_SUPPORTED
#if KMP_USE_MONITOR
diff --git a/openmp/runtime/src/z_Windows_NT_util.cpp b/openmp/runtime/src/z_Windows_NT_util.cpp
index 9c77e287cc5a2..d6ec80e9c7b4e 100644
--- a/openmp/runtime/src/z_Windows_NT_util.cpp
+++ b/openmp/runtime/src/z_Windows_NT_util.cpp
@@ -608,7 +608,8 @@ void __kmp_affinity_bind_thread(int proc) {
KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
DWORD error = GetLastError();
- if (__kmp_affinity_verbose) { // AC: continue silently if not verbose
+ // AC: continue silently if not verbose
+ if (__kmp_affinity.flags.verbose) {
kmp_msg_t err_code = KMP_ERR(error);
__kmp_msg(kmp_ms_warning, KMP_MSG(CantSetThreadAffMask), err_code,
__kmp_msg_null);
More information about the Openmp-commits
mailing list