[Openmp-commits] [openmp] b34c7d8 - [OpenMP] Introduce hybrid core attributes to OMP_PLACES and KMP_AFFINITY
Jonathan Peyton via Openmp-commits
openmp-commits at lists.llvm.org
Mon Jul 31 11:56:32 PDT 2023
Author: Jonathan Peyton
Date: 2023-07-31T13:55:32-05:00
New Revision: b34c7d8c8e1a36bdd432210247511aee4423a316
URL: https://github.com/llvm/llvm-project/commit/b34c7d8c8e1a36bdd432210247511aee4423a316
DIFF: https://github.com/llvm/llvm-project/commit/b34c7d8c8e1a36bdd432210247511aee4423a316.diff
LOG: [OpenMP] Introduce hybrid core attributes to OMP_PLACES and KMP_AFFINITY
* Add KMP_CPU_EQUAL and KMP_CPU_ISEMPTY to affinity mask API
* Add printout of leader to hardware thread dump
* Allow OMP_PLACES to restrict fullMask
This change fixes an issue with the OMP_PLACES=resource(#) syntax.
Before this change, specifying the number of resources did NOT change
the default number of threads created by the runtime. e.g.,
OMP_PLACES=cores(2) would still create __kmp_avail_proc number of
threads. After this change, the fullMask and __kmp_avail_proc are
modified if necessary so that the final place list dictates which
resources are available and how thus, how many threads are created by
default.
* Introduce hybrid core attributes to OMP_PLACES and KMP_AFFINITY
For OMP_PLACES, two new features are added:
1) OMP_PLACES=cores:<attribute> where <attribute> is either
intel_atom, intel_core, or eff# where # is 0 - number of core
efficiencies-1. This syntax also supports the optional (#)
number selection of resources.
2) OMP_PLACES=core_types|core_effs where this setting will create
the number of core_types (or core_effs|core_efficiencies).
For KMP_AFFINITY, the granularity setting is expanded to include two new
keywords: core_type, and core_eff (or core_efficiency). This will set
the granularity to include all cores with a particular core type (or
efficiency). e.g., KMP_AFFINITY=granularity=core_type,compact will
create threads which can float across a single core type.
Differential Revision: https://reviews.llvm.org/D154547
Added:
Modified:
openmp/runtime/src/i18n/en_US.txt
openmp/runtime/src/kmp.h
openmp/runtime/src/kmp_affinity.cpp
openmp/runtime/src/kmp_affinity.h
openmp/runtime/src/kmp_settings.cpp
openmp/runtime/src/z_Linux_util.cpp
openmp/runtime/test/affinity/omp-places-invalid-syntax.c
Removed:
################################################################################
diff --git a/openmp/runtime/src/i18n/en_US.txt b/openmp/runtime/src/i18n/en_US.txt
index 228bcdb25a8ea7..08e837d3dea11e 100644
--- a/openmp/runtime/src/i18n/en_US.txt
+++ b/openmp/runtime/src/i18n/en_US.txt
@@ -480,6 +480,8 @@ AffHWSubsetAllFiltered "KMP_HW_SUBSET ignored: all hardware resources woul
AffHWSubsetAttrsNonHybrid "KMP_HW_SUBSET ignored: Too many attributes specified. This machine is not a hybrid architecutre."
AffHWSubsetIgnoringAttr "KMP_HW_SUBSET: ignoring %1$s attribute. This machine is not a hybrid architecutre."
TargetMemNotAvailable "Target memory not available, will use default allocator."
+AffIgnoringNonHybrid "%1$s ignored: This machine is not a hybrid architecutre. Using \"%2$s\" instead."
+AffIgnoringNotAvailable "%1$s ignored: %2$s is not available. Using \"%3$s\" instead."
# --------------------------------------------------------------------------------------------------
-*- HINTS -*-
diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index 641d32357ce873..a9308ed938a1a6 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -690,10 +690,12 @@ extern size_t __kmp_affin_mask_size;
#define KMP_CPU_ISSET(i, mask) (mask)->is_set(i)
#define KMP_CPU_CLR(i, mask) (mask)->clear(i)
#define KMP_CPU_ZERO(mask) (mask)->zero()
+#define KMP_CPU_ISEMPTY(mask) (mask)->empty()
#define KMP_CPU_COPY(dest, src) (dest)->copy(src)
#define KMP_CPU_AND(dest, src) (dest)->bitwise_and(src)
#define KMP_CPU_COMPLEMENT(max_bit_number, mask) (mask)->bitwise_not()
#define KMP_CPU_UNION(dest, src) (dest)->bitwise_or(src)
+#define KMP_CPU_EQUAL(dest, src) (dest)->is_equal(src)
#define KMP_CPU_ALLOC(ptr) (ptr = __kmp_affinity_dispatch->allocate_mask())
#define KMP_CPU_FREE(ptr) __kmp_affinity_dispatch->deallocate_mask(ptr)
#define KMP_CPU_ALLOC_ON_STACK(ptr) KMP_CPU_ALLOC(ptr)
@@ -730,6 +732,8 @@ class KMPAffinity {
virtual void clear(int i) {}
// Zero out entire mask
virtual void zero() {}
+ // Check whether mask is empty
+ virtual bool empty() const { return true; }
// Copy src into this mask
virtual void copy(const Mask *src) {}
// this &= rhs
@@ -738,6 +742,8 @@ class KMPAffinity {
virtual void bitwise_or(const Mask *rhs) {}
// this = ~this
virtual void bitwise_not() {}
+ // this == rhs
+ virtual bool is_equal(const Mask *rhs) const { return false; }
// API for iterating over an affinity mask
// for (int i = mask->begin(); i != mask->end(); i = mask->next(i))
virtual int begin() const { return 0; }
@@ -866,7 +872,10 @@ typedef struct kmp_affinity_flags_t {
unsigned respect : 2;
unsigned reset : 1;
unsigned initialized : 1;
- unsigned reserved : 25;
+ unsigned core_types_gran : 1;
+ unsigned core_effs_gran : 1;
+ unsigned omp_places : 1;
+ unsigned reserved : 22;
} kmp_affinity_flags_t;
KMP_BUILD_ASSERT(sizeof(kmp_affinity_flags_t) == 4);
@@ -895,6 +904,7 @@ typedef struct kmp_affinity_t {
enum affinity_type type;
kmp_hw_t gran;
int gran_levels;
+ kmp_affinity_attrs_t core_attr_gran;
int compact;
int offset;
kmp_affinity_flags_t flags;
@@ -909,9 +919,11 @@ typedef struct kmp_affinity_t {
#define KMP_AFFINITY_INIT(env) \
{ \
- nullptr, affinity_default, KMP_HW_UNKNOWN, -1, 0, 0, \
- {TRUE, FALSE, TRUE, affinity_respect_mask_default, FALSE, FALSE}, 0, \
- nullptr, nullptr, nullptr, 0, nullptr, env \
+ nullptr, affinity_default, KMP_HW_UNKNOWN, -1, KMP_AFFINITY_ATTRS_UNKNOWN, \
+ 0, 0, \
+ {TRUE, FALSE, TRUE, affinity_respect_mask_default, FALSE, FALSE, \
+ FALSE, FALSE, FALSE}, \
+ 0, nullptr, nullptr, nullptr, 0, nullptr, env \
}
extern enum affinity_top_method __kmp_affinity_top_method;
diff --git a/openmp/runtime/src/kmp_affinity.cpp b/openmp/runtime/src/kmp_affinity.cpp
index cbb80bf3a8485d..ce7a41811d4a14 100644
--- a/openmp/runtime/src/kmp_affinity.cpp
+++ b/openmp/runtime/src/kmp_affinity.cpp
@@ -38,6 +38,43 @@ static hierarchy_info machine_hierarchy;
void __kmp_cleanup_hierarchy() { machine_hierarchy.fini(); }
+#if KMP_AFFINITY_SUPPORTED
+// Helper class to see if place lists further restrict the fullMask
+class kmp_full_mask_modifier_t {
+ kmp_affin_mask_t *mask;
+
+public:
+ kmp_full_mask_modifier_t() {
+ KMP_CPU_ALLOC(mask);
+ KMP_CPU_ZERO(mask);
+ }
+ ~kmp_full_mask_modifier_t() {
+ KMP_CPU_FREE(mask);
+ mask = nullptr;
+ }
+ void include(const kmp_affin_mask_t *other) { KMP_CPU_UNION(mask, other); }
+ // If the new full mask is
diff erent from the current full mask,
+ // then switch them. Returns true if full mask was affected, false otherwise.
+ bool restrict_to_mask() {
+ // See if the new mask further restricts or changes the full mask
+ if (KMP_CPU_EQUAL(__kmp_affin_fullMask, mask) || KMP_CPU_ISEMPTY(mask))
+ return false;
+ return __kmp_topology->restrict_to_mask(mask);
+ }
+};
+
+static inline const char *
+__kmp_get_affinity_env_var(const kmp_affinity_t &affinity,
+ bool for_binding = false) {
+ if (affinity.flags.omp_places) {
+ if (for_binding)
+ return "OMP_PROC_BIND";
+ return "OMP_PLACES";
+ }
+ return affinity.env_var;
+}
+#endif // KMP_AFFINITY_SUPPORTED
+
void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
kmp_uint32 depth;
// The test below is true if affinity is available, but set to "none". Need to
@@ -207,6 +244,8 @@ void kmp_hw_thread_t::print() const {
if (attrs.is_core_eff_valid())
printf(" (eff=%d)", attrs.get_core_eff());
}
+ if (leader)
+ printf(" (leader)");
printf("\n");
}
@@ -797,7 +836,40 @@ void kmp_topology_t::print(const char *env_var) const {
#if KMP_AFFINITY_SUPPORTED
void kmp_topology_t::set_granularity(kmp_affinity_t &affinity) const {
- const char *env_var = affinity.env_var;
+ const char *env_var = __kmp_get_affinity_env_var(affinity);
+ // If requested hybrid CPU attributes for granularity (either OMP_PLACES or
+ // KMP_AFFINITY), but none exist, then reset granularity and have below method
+ // select a granularity and warn user.
+ if (!__kmp_is_hybrid_cpu()) {
+ if (affinity.core_attr_gran.valid) {
+ // OMP_PLACES with cores:<attribute> but non-hybrid arch, use cores
+ // instead
+ KMP_AFF_WARNING(
+ affinity, AffIgnoringNonHybrid, env_var,
+ __kmp_hw_get_catalog_string(KMP_HW_CORE, /*plural=*/true));
+ affinity.gran = KMP_HW_CORE;
+ affinity.gran_levels = -1;
+ affinity.core_attr_gran = KMP_AFFINITY_ATTRS_UNKNOWN;
+ affinity.flags.core_types_gran = affinity.flags.core_effs_gran = 0;
+ } else if (affinity.flags.core_types_gran ||
+ affinity.flags.core_effs_gran) {
+ // OMP_PLACES=core_types|core_effs but non-hybrid, use cores instead
+ if (affinity.flags.omp_places) {
+ KMP_AFF_WARNING(
+ affinity, AffIgnoringNonHybrid, env_var,
+ __kmp_hw_get_catalog_string(KMP_HW_CORE, /*plural=*/true));
+ } else {
+ // KMP_AFFINITY=granularity=core_type|core_eff,...
+ KMP_AFF_WARNING(affinity, AffGranularityBad, env_var,
+ "Intel(R) Hybrid Technology core attribute",
+ __kmp_hw_get_catalog_string(KMP_HW_CORE));
+ }
+ affinity.gran = KMP_HW_CORE;
+ affinity.gran_levels = -1;
+ affinity.core_attr_gran = KMP_AFFINITY_ATTRS_UNKNOWN;
+ affinity.flags.core_types_gran = affinity.flags.core_effs_gran = 0;
+ }
+ }
// Set the number of affinity granularity levels
if (affinity.gran_levels < 0) {
kmp_hw_t gran_type = get_equivalent_type(affinity.gran);
@@ -937,6 +1009,7 @@ template <size_t SIZE, typename IndexFunc> struct kmp_sub_ids_t {
}
};
+#if KMP_AFFINITY_SUPPORTED
static kmp_str_buf_t *
__kmp_hw_get_catalog_core_string(const kmp_hw_attr_t &attr, kmp_str_buf_t *buf,
bool plural) {
@@ -952,6 +1025,41 @@ __kmp_hw_get_catalog_core_string(const kmp_hw_attr_t &attr, kmp_str_buf_t *buf,
return buf;
}
+bool kmp_topology_t::restrict_to_mask(const kmp_affin_mask_t *mask) {
+ // Apply the filter
+ bool affected;
+ int new_index = 0;
+ for (int i = 0; i < num_hw_threads; ++i) {
+ int os_id = hw_threads[i].os_id;
+ if (KMP_CPU_ISSET(os_id, mask)) {
+ if (i != new_index)
+ hw_threads[new_index] = hw_threads[i];
+ new_index++;
+ } else {
+ KMP_CPU_CLR(os_id, __kmp_affin_fullMask);
+ __kmp_avail_proc--;
+ }
+ }
+
+ KMP_DEBUG_ASSERT(new_index <= num_hw_threads);
+ affected = (num_hw_threads != new_index);
+ num_hw_threads = new_index;
+
+ // Post hardware subset canonicalization
+ if (affected) {
+ _gather_enumeration_information();
+ _discover_uniformity();
+ _set_globals();
+ _set_last_level_cache();
+#if KMP_OS_WINDOWS
+ // Copy filtered full mask if topology has single processor group
+ if (__kmp_num_proc_groups <= 1)
+#endif
+ __kmp_affin_origMask->copy(__kmp_affin_fullMask);
+ }
+ return affected;
+}
+
// Apply the KMP_HW_SUBSET envirable to the topology
// Returns true if KMP_HW_SUBSET filtered any processors
// otherwise, returns false
@@ -1156,7 +1264,9 @@ bool kmp_topology_t::filter_hw_subset() {
// Determine which hardware threads should be filtered.
int num_filtered = 0;
- bool *filtered = (bool *)__kmp_allocate(sizeof(bool) * num_hw_threads);
+ kmp_affin_mask_t *filtered_mask;
+ KMP_CPU_ALLOC(filtered_mask);
+ KMP_CPU_COPY(filtered_mask, __kmp_affin_fullMask);
for (int i = 0; i < num_hw_threads; ++i) {
kmp_hw_thread_t &hw_thread = hw_threads[i];
// Update type_sub_id
@@ -1218,51 +1328,35 @@ bool kmp_topology_t::filter_hw_subset() {
}
}
// Collect filtering information
- filtered[i] = should_be_filtered;
- if (should_be_filtered)
+ if (should_be_filtered) {
+ KMP_CPU_CLR(hw_thread.os_id, filtered_mask);
num_filtered++;
+ }
}
// One last check that we shouldn't allow filtering entire machine
if (num_filtered == num_hw_threads) {
KMP_AFF_WARNING(__kmp_affinity, AffHWSubsetAllFiltered);
- __kmp_free(filtered);
return false;
}
// Apply the filter
- int new_index = 0;
- for (int i = 0; i < num_hw_threads; ++i) {
- if (!filtered[i]) {
- if (i != new_index)
- hw_threads[new_index] = hw_threads[i];
- new_index++;
- } else {
-#if KMP_AFFINITY_SUPPORTED
- KMP_CPU_CLR(hw_threads[i].os_id, __kmp_affin_fullMask);
-#endif
- __kmp_avail_proc--;
- }
- }
-
- KMP_DEBUG_ASSERT(new_index <= num_hw_threads);
- num_hw_threads = new_index;
-
- // Post hardware subset canonicalization
- _gather_enumeration_information();
- _discover_uniformity();
- _set_globals();
- _set_last_level_cache();
- __kmp_free(filtered);
+ restrict_to_mask(filtered_mask);
return true;
}
-bool kmp_topology_t::is_close(int hwt1, int hwt2, int hw_level) const {
+bool kmp_topology_t::is_close(int hwt1, int hwt2,
+ const kmp_affinity_t &stgs) const {
+ int hw_level = stgs.gran_levels;
if (hw_level >= depth)
return true;
bool retval = true;
const kmp_hw_thread_t &t1 = hw_threads[hwt1];
const kmp_hw_thread_t &t2 = hw_threads[hwt2];
+ if (stgs.flags.core_types_gran)
+ return t1.attrs.get_core_type() == t2.attrs.get_core_type();
+ if (stgs.flags.core_effs_gran)
+ return t1.attrs.get_core_eff() == t2.attrs.get_core_eff();
for (int i = 0; i < (depth - hw_level); ++i) {
if (t1.ids[i] != t2.ids[i])
return false;
@@ -1272,8 +1366,6 @@ bool kmp_topology_t::is_close(int hwt1, int hwt2, int hw_level) const {
////////////////////////////////////////////////////////////////////////////////
-#if KMP_AFFINITY_SUPPORTED
-
bool KMPAffinity::picked_api = false;
void *KMPAffinity::Mask::operator new(size_t n) { return __kmp_allocate(n); }
@@ -3353,17 +3445,25 @@ static bool __kmp_affinity_create_cpuinfo_map(int *line,
// Create and return a table of affinity masks, indexed by OS thread ID.
// This routine handles OR'ing together all the affinity masks of threads
// that are sufficiently close, if granularity > fine.
+template <typename FindNextFunctionType>
static void __kmp_create_os_id_masks(unsigned *numUnique,
- kmp_affinity_t &affinity) {
+ kmp_affinity_t &affinity,
+ FindNextFunctionType find_next) {
// First form a table of affinity masks in order of OS thread id.
int maxOsId;
int i;
int numAddrs = __kmp_topology->get_num_hw_threads();
int depth = __kmp_topology->get_depth();
- const char *env_var = affinity.env_var;
+ const char *env_var = __kmp_get_affinity_env_var(affinity);
KMP_ASSERT(numAddrs);
KMP_ASSERT(depth);
+ i = find_next(-1);
+ // If could not find HW thread location with attributes, then return and
+ // fallback to increment find_next and disregard core attributes.
+ if (i >= numAddrs)
+ return;
+
maxOsId = 0;
for (i = numAddrs - 1;; --i) {
int osId = __kmp_topology->at(i).os_id;
@@ -3393,19 +3493,22 @@ static void __kmp_create_os_id_masks(unsigned *numUnique,
kmp_affin_mask_t *sum;
KMP_CPU_ALLOC_ON_STACK(sum);
KMP_CPU_ZERO(sum);
- KMP_CPU_SET(__kmp_topology->at(0).os_id, sum);
- for (i = 1; i < numAddrs; i++) {
+
+ i = j = leader = find_next(-1);
+ KMP_CPU_SET(__kmp_topology->at(i).os_id, sum);
+ kmp_full_mask_modifier_t full_mask;
+ for (i = find_next(i); i < numAddrs; i = find_next(i)) {
// If this thread is sufficiently close to the leader (within the
// granularity setting), then set the bit for this os thread in the
// affinity mask for this group, and go on to the next thread.
- if (__kmp_topology->is_close(leader, i, affinity.gran_levels)) {
+ if (__kmp_topology->is_close(leader, i, affinity)) {
KMP_CPU_SET(__kmp_topology->at(i).os_id, sum);
continue;
}
// For every thread in this group, copy the mask to the thread's entry in
// the OS Id mask table. Mark the first address as a leader.
- for (; j < i; j++) {
+ for (; j < i; j = find_next(j)) {
int osId = __kmp_topology->at(j).os_id;
KMP_DEBUG_ASSERT(osId <= maxOsId);
kmp_affin_mask_t *mask = KMP_CPU_INDEX(affinity.os_id_masks, osId);
@@ -3416,22 +3519,29 @@ static void __kmp_create_os_id_masks(unsigned *numUnique,
// Start a new mask.
leader = i;
+ full_mask.include(sum);
KMP_CPU_ZERO(sum);
KMP_CPU_SET(__kmp_topology->at(i).os_id, sum);
}
// For every thread in last group, copy the mask to the thread's
// entry in the OS Id mask table.
- for (; j < i; j++) {
+ for (; j < i; j = find_next(j)) {
int osId = __kmp_topology->at(j).os_id;
KMP_DEBUG_ASSERT(osId <= maxOsId);
kmp_affin_mask_t *mask = KMP_CPU_INDEX(affinity.os_id_masks, osId);
KMP_CPU_COPY(mask, sum);
__kmp_topology->at(j).leader = (j == leader);
}
+ full_mask.include(sum);
unique++;
KMP_CPU_FREE_FROM_STACK(sum);
+ // See if the OS Id mask table further restricts or changes the full mask
+ if (full_mask.restrict_to_mask() && affinity.flags.verbose) {
+ __kmp_topology->print(env_var);
+ }
+
*numUnique = unique;
}
@@ -4134,8 +4244,11 @@ static void __kmp_affinity_get_topology_info(kmp_affinity_t &affinity) {
}
// Create the OS proc to hardware thread map
- for (int hw_thread = 0; hw_thread < num_hw_threads; ++hw_thread)
- __kmp_osid_to_hwthread_map[__kmp_topology->at(hw_thread).os_id] = hw_thread;
+ for (int hw_thread = 0; hw_thread < num_hw_threads; ++hw_thread) {
+ int os_id = __kmp_topology->at(hw_thread).os_id;
+ if (KMP_CPU_ISSET(os_id, __kmp_affin_fullMask))
+ __kmp_osid_to_hwthread_map[os_id] = hw_thread;
+ }
for (unsigned i = 0; i < affinity.num_masks; ++i) {
kmp_affinity_ids_t &ids = affinity.ids[i];
@@ -4145,16 +4258,26 @@ static void __kmp_affinity_get_topology_info(kmp_affinity_t &affinity) {
}
}
+// Called when __kmp_topology is ready
+static void __kmp_aux_affinity_initialize_other_data(kmp_affinity_t &affinity) {
+ // Initialize data dependent on __kmp_topology
+ if (__kmp_topology) {
+ machine_hierarchy.init(__kmp_topology->get_num_hw_threads());
+ __kmp_affinity_get_topology_info(affinity);
+ }
+}
+
// Create a one element mask array (set of places) which only contains the
// initial process's affinity mask
static void __kmp_create_affinity_none_places(kmp_affinity_t &affinity) {
KMP_ASSERT(__kmp_affin_fullMask != NULL);
KMP_ASSERT(affinity.type == affinity_none);
+ KMP_ASSERT(__kmp_avail_proc == __kmp_topology->get_num_hw_threads());
affinity.num_masks = 1;
KMP_CPU_ALLOC_ARRAY(affinity.masks, affinity.num_masks);
kmp_affin_mask_t *dest = KMP_CPU_INDEX(affinity.masks, 0);
KMP_CPU_COPY(dest, __kmp_affin_fullMask);
- __kmp_affinity_get_topology_info(affinity);
+ __kmp_aux_affinity_initialize_other_data(affinity);
}
static void __kmp_aux_affinity_initialize_masks(kmp_affinity_t &affinity) {
@@ -4383,13 +4506,6 @@ static bool __kmp_aux_affinity_initialize_topology(kmp_affinity_t &affinity) {
if (verbose)
__kmp_topology->print(env_var);
bool filtered = __kmp_topology->filter_hw_subset();
- if (filtered) {
-#if KMP_OS_WINDOWS
- // Copy filtered full mask if topology has single processor group
- if (__kmp_num_proc_groups <= 1)
-#endif
- __kmp_affin_origMask->copy(__kmp_affin_fullMask);
- }
if (filtered && verbose)
__kmp_topology->print("KMP_HW_SUBSET");
return success;
@@ -4398,7 +4514,7 @@ static bool __kmp_aux_affinity_initialize_topology(kmp_affinity_t &affinity) {
static void __kmp_aux_affinity_initialize(kmp_affinity_t &affinity) {
bool is_regular_affinity = (&affinity == &__kmp_affinity);
bool is_hidden_helper_affinity = (&affinity == &__kmp_hh_affinity);
- const char *env_var = affinity.env_var;
+ const char *env_var = __kmp_get_affinity_env_var(affinity);
if (affinity.flags.initialized) {
KMP_ASSERT(__kmp_affin_fullMask != NULL);
@@ -4437,7 +4553,36 @@ static void __kmp_aux_affinity_initialize(kmp_affinity_t &affinity) {
// Create the table of masks, indexed by thread Id.
unsigned numUnique;
- __kmp_create_os_id_masks(&numUnique, affinity);
+ int numAddrs = __kmp_topology->get_num_hw_threads();
+ // If OMP_PLACES=cores:<attribute> specified, then attempt
+ // to make OS Id mask table using those attributes
+ if (affinity.core_attr_gran.valid) {
+ __kmp_create_os_id_masks(&numUnique, affinity, [&](int idx) {
+ KMP_ASSERT(idx >= -1);
+ for (int i = idx + 1; i < numAddrs; ++i)
+ if (__kmp_topology->at(i).attrs.contains(affinity.core_attr_gran))
+ return i;
+ return numAddrs;
+ });
+ if (!affinity.os_id_masks) {
+ const char *core_attribute;
+ if (affinity.core_attr_gran.core_eff != kmp_hw_attr_t::UNKNOWN_CORE_EFF)
+ core_attribute = "core_efficiency";
+ else
+ core_attribute = "core_type";
+ KMP_AFF_WARNING(affinity, AffIgnoringNotAvailable, env_var,
+ core_attribute,
+ __kmp_hw_get_catalog_string(KMP_HW_CORE, /*plural=*/true))
+ }
+ }
+ // If core attributes did not work, or none were specified,
+ // then make OS Id mask table using typical incremental way.
+ if (!affinity.os_id_masks) {
+ __kmp_create_os_id_masks(&numUnique, affinity, [](int idx) {
+ KMP_ASSERT(idx >= -1);
+ return idx + 1;
+ });
+ }
if (affinity.gran_levels == 0) {
KMP_DEBUG_ASSERT((int)numUnique == __kmp_avail_proc);
}
@@ -4578,6 +4723,7 @@ static void __kmp_aux_affinity_initialize(kmp_affinity_t &affinity) {
int i;
unsigned j;
int num_hw_threads = __kmp_topology->get_num_hw_threads();
+ kmp_full_mask_modifier_t full_mask;
for (i = 0, j = 0; i < num_hw_threads; i++) {
if ((!affinity.flags.dups) && (!__kmp_topology->at(i).leader)) {
continue;
@@ -4588,11 +4734,16 @@ static void __kmp_aux_affinity_initialize(kmp_affinity_t &affinity) {
kmp_affin_mask_t *dest = KMP_CPU_INDEX(affinity.masks, j);
KMP_ASSERT(KMP_CPU_ISSET(osId, src));
KMP_CPU_COPY(dest, src);
+ full_mask.include(src);
if (++j >= affinity.num_masks) {
break;
}
}
KMP_DEBUG_ASSERT(j == affinity.num_masks);
+ // See if the places list further restricts or changes the full mask
+ if (full_mask.restrict_to_mask() && affinity.flags.verbose) {
+ __kmp_topology->print(env_var);
+ }
}
// Sort the topology back using ids
__kmp_topology->sort_ids();
@@ -4601,7 +4752,7 @@ static void __kmp_aux_affinity_initialize(kmp_affinity_t &affinity) {
default:
KMP_ASSERT2(0, "Unexpected affinity setting");
}
- __kmp_affinity_get_topology_info(affinity);
+ __kmp_aux_affinity_initialize_other_data(affinity);
affinity.flags.initialized = TRUE;
}
@@ -4722,7 +4873,7 @@ void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
affinity = &__kmp_hh_affinity;
else
affinity = &__kmp_affinity;
- env_var = affinity->env_var;
+ env_var = __kmp_get_affinity_env_var(*affinity, /*for_binding=*/true);
if (KMP_AFFINITY_NON_PROC_BIND || is_hidden_helper) {
if ((affinity->type == affinity_none) ||
diff --git a/openmp/runtime/src/kmp_affinity.h b/openmp/runtime/src/kmp_affinity.h
index f27dd9a5339e86..fbc0d400de772e 100644
--- a/openmp/runtime/src/kmp_affinity.h
+++ b/openmp/runtime/src/kmp_affinity.h
@@ -34,6 +34,7 @@ class KMPHwlocAffinity : public KMPAffinity {
bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); }
void clear(int i) override { hwloc_bitmap_clr(mask, i); }
void zero() override { hwloc_bitmap_zero(mask); }
+ bool empty() const override { return hwloc_bitmap_iszero(mask); }
void copy(const KMPAffinity::Mask *src) override {
const Mask *convert = static_cast<const Mask *>(src);
hwloc_bitmap_copy(mask, convert->mask);
@@ -47,6 +48,10 @@ class KMPHwlocAffinity : public KMPAffinity {
hwloc_bitmap_or(mask, mask, convert->mask);
}
void bitwise_not() override { hwloc_bitmap_not(mask, mask); }
+ bool is_equal(const KMPAffinity::Mask *rhs) const override {
+ const Mask *convert = static_cast<const Mask *>(rhs);
+ return hwloc_bitmap_isequal(mask, convert->mask);
+ }
int begin() const override { return hwloc_bitmap_first(mask); }
int end() const override { return -1; }
int next(int previous) const override {
@@ -319,6 +324,13 @@ class KMPNativeAffinity : public KMPAffinity {
for (mask_size_type i = 0; i < e; ++i)
mask[i] = (mask_t)0;
}
+ bool empty() const override {
+ mask_size_type e = get_num_mask_types();
+ for (mask_size_type i = 0; i < e; ++i)
+ if (mask[i] != (mask_t)0)
+ return false;
+ return true;
+ }
void copy(const KMPAffinity::Mask *src) override {
const Mask *convert = static_cast<const Mask *>(src);
mask_size_type e = get_num_mask_types();
@@ -342,6 +354,14 @@ class KMPNativeAffinity : public KMPAffinity {
for (mask_size_type i = 0; i < e; ++i)
mask[i] = ~(mask[i]);
}
+ bool is_equal(const KMPAffinity::Mask *rhs) const override {
+ const Mask *convert = static_cast<const Mask *>(rhs);
+ mask_size_type e = get_num_mask_types();
+ for (mask_size_type i = 0; i < e; ++i)
+ if (mask[i] != convert->mask[i])
+ return false;
+ return true;
+ }
int begin() const override {
int retval = 0;
while (retval < end() && !is_set(retval))
@@ -459,6 +479,12 @@ class KMPNativeAffinity : public KMPAffinity {
for (int i = 0; i < __kmp_num_proc_groups; ++i)
mask[i] = 0;
}
+ bool empty() const override {
+ for (size_t i = 0; i < __kmp_num_proc_groups; ++i)
+ if (mask[i])
+ return false;
+ return true;
+ }
void copy(const KMPAffinity::Mask *src) override {
const Mask *convert = static_cast<const Mask *>(src);
for (int i = 0; i < __kmp_num_proc_groups; ++i)
@@ -478,6 +504,13 @@ class KMPNativeAffinity : public KMPAffinity {
for (int i = 0; i < __kmp_num_proc_groups; ++i)
mask[i] = ~(mask[i]);
}
+ bool is_equal(const KMPAffinity::Mask *rhs) const override {
+ const Mask *convert = static_cast<const Mask *>(rhs);
+ for (size_t i = 0; i < __kmp_num_proc_groups; ++i)
+ if (mask[i] != convert->mask[i])
+ return false;
+ return true;
+ }
int begin() const override {
int retval = 0;
while (retval < end() && !is_set(retval))
@@ -679,6 +712,21 @@ struct kmp_hw_attr_t {
}
return false;
}
+#if KMP_AFFINITY_SUPPORTED
+ bool contains(const kmp_affinity_attrs_t &attr) const {
+ if (!valid && !attr.valid)
+ return true;
+ if (valid && attr.valid) {
+ if (attr.core_type != KMP_HW_CORE_TYPE_UNKNOWN)
+ return (is_core_type_valid() &&
+ (get_core_type() == (kmp_hw_core_type_t)attr.core_type));
+ if (attr.core_eff != UNKNOWN_CORE_EFF)
+ return (is_core_eff_valid() && (get_core_eff() == attr.core_eff));
+ return true;
+ }
+ return false;
+ }
+#endif // KMP_AFFINITY_SUPPORTED
bool operator==(const kmp_hw_attr_t &rhs) const {
return (rhs.valid == valid && rhs.core_eff == core_eff &&
rhs.core_type == core_type);
@@ -834,13 +882,18 @@ class kmp_topology_t {
#if KMP_AFFINITY_SUPPORTED
// Set the granularity for affinity settings
void set_granularity(kmp_affinity_t &stgs) const;
-#endif
+ bool is_close(int hwt1, int hwt2, const kmp_affinity_t &stgs) const;
+ bool restrict_to_mask(const kmp_affin_mask_t *mask);
bool filter_hw_subset();
- bool is_close(int hwt1, int hwt2, int level) const;
+#endif
bool is_uniform() const { return flags.uniform; }
// Tell whether a type is a valid type in the topology
// returns KMP_HW_UNKNOWN when there is no equivalent type
- kmp_hw_t get_equivalent_type(kmp_hw_t type) const { return equivalent[type]; }
+ kmp_hw_t get_equivalent_type(kmp_hw_t type) const {
+ if (type == KMP_HW_UNKNOWN)
+ return KMP_HW_UNKNOWN;
+ return equivalent[type];
+ }
// Set type1 = type2
void set_equivalent_type(kmp_hw_t type1, kmp_hw_t type2) {
KMP_DEBUG_ASSERT_VALID_HW_TYPE(type1);
diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp
index b81376d1632bad..647cf22d3d0aee 100644
--- a/openmp/runtime/src/kmp_settings.cpp
+++ b/openmp/runtime/src/kmp_settings.cpp
@@ -2005,6 +2005,21 @@ static void __kmp_stg_print_foreign_threads_threadprivate(kmp_str_buf_t *buffer,
// -----------------------------------------------------------------------------
// KMP_AFFINITY, GOMP_CPU_AFFINITY, KMP_TOPOLOGY_METHOD
+static inline const char *
+__kmp_hw_get_core_type_keyword(kmp_hw_core_type_t type) {
+ switch (type) {
+ case KMP_HW_CORE_TYPE_UNKNOWN:
+ return "unknown";
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+ case KMP_HW_CORE_TYPE_ATOM:
+ return "intel_atom";
+ case KMP_HW_CORE_TYPE_CORE:
+ return "intel_core";
+#endif
+ }
+ return "unknown";
+}
+
#if KMP_AFFINITY_SUPPORTED
// Parse the proc id list. Return TRUE if successful, FALSE otherwise.
static int __kmp_parse_affinity_proc_id_list(const char *var, const char *env,
@@ -2359,14 +2374,32 @@ static void __kmp_parse_affinity_env(char const *name, char const *value,
buf = next;
- // Try any hardware topology type for granularity
- KMP_FOREACH_HW_TYPE(type) {
- const char *name = __kmp_hw_get_keyword(type);
- if (__kmp_match_str(name, buf, CCAST(const char **, &next))) {
- set_gran(type, -1);
- buf = next;
- set = true;
- break;
+ // Have to try core_type and core_efficiency matches first since "core"
+ // will register as core granularity with "extra chars"
+ if (__kmp_match_str("core_type", buf, CCAST(const char **, &next))) {
+ set_gran(KMP_HW_CORE, -1);
+ out_affinity->flags.core_types_gran = 1;
+ buf = next;
+ set = true;
+ } else if (__kmp_match_str("core_efficiency", buf,
+ CCAST(const char **, &next)) ||
+ __kmp_match_str("core_eff", buf,
+ CCAST(const char **, &next))) {
+ set_gran(KMP_HW_CORE, -1);
+ out_affinity->flags.core_effs_gran = 1;
+ buf = next;
+ set = true;
+ }
+ if (!set) {
+ // Try any hardware topology type for granularity
+ KMP_FOREACH_HW_TYPE(type) {
+ const char *name = __kmp_hw_get_keyword(type);
+ if (__kmp_match_str(name, buf, CCAST(const char **, &next))) {
+ set_gran(type, -1);
+ buf = next;
+ set = true;
+ break;
+ }
}
}
if (!set) {
@@ -2626,8 +2659,15 @@ static void __kmp_print_affinity_env(kmp_str_buf_t *buffer, char const *name,
__kmp_str_buf_print(buffer, "%s,", "noreset");
}
}
- __kmp_str_buf_print(buffer, "granularity=%s,",
- __kmp_hw_get_keyword(affinity.gran, false));
+ __kmp_str_buf_print(buffer, "granularity=");
+ if (affinity.flags.core_types_gran)
+ __kmp_str_buf_print(buffer, "core_type,");
+ else if (affinity.flags.core_effs_gran) {
+ __kmp_str_buf_print(buffer, "core_eff,");
+ } else {
+ __kmp_str_buf_print(
+ buffer, "%s,", __kmp_hw_get_keyword(affinity.gran, /*plural=*/false));
+ }
}
if (!KMP_AFFINITY_CAPABLE()) {
__kmp_str_buf_print(buffer, "%s", "disabled");
@@ -2745,11 +2785,7 @@ signed := + signed
signed := - signed
-----------------------------------------------------------------------------*/
-// Warning to issue for syntax error during parsing of OMP_PLACES
-static inline void __kmp_omp_places_syntax_warn(const char *var) {
- KMP_WARNING(SyntaxErrorUsing, var, "\"cores\"");
-}
-
+// Return TRUE if successful parse, FALSE otherwise
static int __kmp_parse_subplace_list(const char *var, const char **scan) {
const char *next;
@@ -2761,7 +2797,6 @@ static int __kmp_parse_subplace_list(const char *var, const char **scan) {
//
SKIP_WS(*scan);
if ((**scan < '0') || (**scan > '9')) {
- __kmp_omp_places_syntax_warn(var);
return FALSE;
}
next = *scan;
@@ -2780,7 +2815,6 @@ static int __kmp_parse_subplace_list(const char *var, const char **scan) {
continue;
}
if (**scan != ':') {
- __kmp_omp_places_syntax_warn(var);
return FALSE;
}
(*scan)++; // skip ':'
@@ -2788,7 +2822,6 @@ static int __kmp_parse_subplace_list(const char *var, const char **scan) {
// Read count parameter
SKIP_WS(*scan);
if ((**scan < '0') || (**scan > '9')) {
- __kmp_omp_places_syntax_warn(var);
return FALSE;
}
next = *scan;
@@ -2807,7 +2840,6 @@ static int __kmp_parse_subplace_list(const char *var, const char **scan) {
continue;
}
if (**scan != ':') {
- __kmp_omp_places_syntax_warn(var);
return FALSE;
}
(*scan)++; // skip ':'
@@ -2829,7 +2861,6 @@ static int __kmp_parse_subplace_list(const char *var, const char **scan) {
}
SKIP_WS(*scan);
if ((**scan < '0') || (**scan > '9')) {
- __kmp_omp_places_syntax_warn(var);
return FALSE;
}
next = *scan;
@@ -2848,13 +2879,12 @@ static int __kmp_parse_subplace_list(const char *var, const char **scan) {
(*scan)++; // skip ','
continue;
}
-
- __kmp_omp_places_syntax_warn(var);
return FALSE;
}
return TRUE;
}
+// Return TRUE if successful parse, FALSE otherwise
static int __kmp_parse_place(const char *var, const char **scan) {
const char *next;
@@ -2866,7 +2896,6 @@ static int __kmp_parse_place(const char *var, const char **scan) {
return FALSE;
}
if (**scan != '}') {
- __kmp_omp_places_syntax_warn(var);
return FALSE;
}
(*scan)++; // skip '}'
@@ -2880,12 +2909,12 @@ static int __kmp_parse_place(const char *var, const char **scan) {
KMP_ASSERT(proc >= 0);
*scan = next;
} else {
- __kmp_omp_places_syntax_warn(var);
return FALSE;
}
return TRUE;
}
+// Return TRUE if successful parse, FALSE otherwise
static int __kmp_parse_place_list(const char *var, const char *env,
char **place_list) {
const char *scan = env;
@@ -2908,7 +2937,6 @@ static int __kmp_parse_place_list(const char *var, const char *env,
continue;
}
if (*scan != ':') {
- __kmp_omp_places_syntax_warn(var);
return FALSE;
}
scan++; // skip ':'
@@ -2916,7 +2944,6 @@ static int __kmp_parse_place_list(const char *var, const char *env,
// Read count parameter
SKIP_WS(scan);
if ((*scan < '0') || (*scan > '9')) {
- __kmp_omp_places_syntax_warn(var);
return FALSE;
}
next = scan;
@@ -2935,7 +2962,6 @@ static int __kmp_parse_place_list(const char *var, const char *env,
continue;
}
if (*scan != ':') {
- __kmp_omp_places_syntax_warn(var);
return FALSE;
}
scan++; // skip ':'
@@ -2957,7 +2983,6 @@ static int __kmp_parse_place_list(const char *var, const char *env,
}
SKIP_WS(scan);
if ((*scan < '0') || (*scan > '9')) {
- __kmp_omp_places_syntax_warn(var);
return FALSE;
}
next = scan;
@@ -2977,7 +3002,6 @@ static int __kmp_parse_place_list(const char *var, const char *env,
continue;
}
- __kmp_omp_places_syntax_warn(var);
return FALSE;
}
@@ -2991,6 +3015,22 @@ static int __kmp_parse_place_list(const char *var, const char *env,
return TRUE;
}
+static inline void __kmp_places_set(enum affinity_type type, kmp_hw_t kind) {
+ __kmp_affinity.type = type;
+ __kmp_affinity.gran = kind;
+ __kmp_affinity.flags.dups = FALSE;
+ __kmp_affinity.flags.omp_places = TRUE;
+}
+
+static void __kmp_places_syntax_error_fallback(char const *name,
+ kmp_hw_t kind) {
+ const char *str = __kmp_hw_get_catalog_string(kind, /*plural=*/true);
+ KMP_WARNING(SyntaxErrorUsing, name, str);
+ __kmp_places_set(affinity_compact, kind);
+ if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_default)
+ __kmp_nested_proc_bind.bind_types[0] = proc_bind_true;
+}
+
static void __kmp_stg_parse_places(char const *name, char const *value,
void *data) {
struct kmp_place_t {
@@ -3001,7 +3041,6 @@ static void __kmp_stg_parse_places(char const *name, char const *value,
bool set = false;
const char *scan = value;
const char *next = scan;
- const char *kind = "\"threads\"";
kmp_place_t std_places[] = {{"threads", KMP_HW_THREAD},
{"cores", KMP_HW_CORE},
{"numa_domains", KMP_HW_NUMA},
@@ -3020,10 +3059,54 @@ static void __kmp_stg_parse_places(char const *name, char const *value,
const kmp_place_t &place = std_places[i];
if (__kmp_match_str(place.name, scan, &next)) {
scan = next;
- __kmp_affinity.type = affinity_compact;
- __kmp_affinity.gran = place.type;
- __kmp_affinity.flags.dups = FALSE;
+ __kmp_places_set(affinity_compact, place.type);
set = true;
+ // Parse core attribute if it exists
+ if (KMP_HW_MAX_NUM_CORE_TYPES > 1) {
+ SKIP_WS(scan);
+ if (*scan == ':') {
+ if (place.type != KMP_HW_CORE) {
+ __kmp_places_syntax_error_fallback(name, place.type);
+ return;
+ }
+ scan++; // skip ':'
+ SKIP_WS(scan);
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+ if (__kmp_match_str("intel_core", scan, &next)) {
+ __kmp_affinity.core_attr_gran.core_type = KMP_HW_CORE_TYPE_CORE;
+ __kmp_affinity.core_attr_gran.valid = 1;
+ scan = next;
+ } else if (__kmp_match_str("intel_atom", scan, &next)) {
+ __kmp_affinity.core_attr_gran.core_type = KMP_HW_CORE_TYPE_ATOM;
+ __kmp_affinity.core_attr_gran.valid = 1;
+ scan = next;
+ } else
+#endif
+ if (__kmp_match_str("eff", scan, &next)) {
+ int eff;
+ if (!isdigit(*next)) {
+ __kmp_places_syntax_error_fallback(name, place.type);
+ return;
+ }
+ scan = next;
+ SKIP_DIGITS(next);
+ eff = __kmp_str_to_int(scan, *next);
+ if (eff < 0) {
+ __kmp_places_syntax_error_fallback(name, place.type);
+ return;
+ }
+ if (eff >= KMP_HW_MAX_NUM_CORE_EFFS)
+ eff = KMP_HW_MAX_NUM_CORE_EFFS - 1;
+ __kmp_affinity.core_attr_gran.core_eff = eff;
+ __kmp_affinity.core_attr_gran.valid = 1;
+ scan = next;
+ }
+ if (!__kmp_affinity.core_attr_gran.valid) {
+ __kmp_places_syntax_error_fallback(name, place.type);
+ return;
+ }
+ }
+ }
break;
}
}
@@ -3035,36 +3118,56 @@ static void __kmp_stg_parse_places(char const *name, char const *value,
continue;
if (__kmp_match_str(name, scan, &next)) {
scan = next;
- __kmp_affinity.type = affinity_compact;
- __kmp_affinity.gran = type;
- __kmp_affinity.flags.dups = FALSE;
+ __kmp_places_set(affinity_compact, type);
set = true;
break;
}
}
}
+ // Implementation choices for OMP_PLACES based on core attributes
+ if (!set) {
+ if (__kmp_match_str("core_types", scan, &next)) {
+ scan = next;
+ if (*scan != '\0') {
+ KMP_WARNING(ParseExtraCharsWarn, name, scan);
+ }
+ __kmp_places_set(affinity_compact, KMP_HW_CORE);
+ __kmp_affinity.flags.core_types_gran = 1;
+ set = true;
+ } else if (__kmp_match_str("core_effs", scan, &next) ||
+ __kmp_match_str("core_efficiencies", scan, &next)) {
+ scan = next;
+ if (*scan != '\0') {
+ KMP_WARNING(ParseExtraCharsWarn, name, scan);
+ }
+ __kmp_places_set(affinity_compact, KMP_HW_CORE);
+ __kmp_affinity.flags.core_effs_gran = 1;
+ set = true;
+ }
+ }
+ // Explicit place list
if (!set) {
if (__kmp_affinity.proclist != NULL) {
KMP_INTERNAL_FREE((void *)__kmp_affinity.proclist);
__kmp_affinity.proclist = NULL;
}
if (__kmp_parse_place_list(name, value, &__kmp_affinity.proclist)) {
- __kmp_affinity.type = affinity_explicit;
- __kmp_affinity.gran = KMP_HW_THREAD;
- __kmp_affinity.flags.dups = FALSE;
+ __kmp_places_set(affinity_explicit, KMP_HW_THREAD);
} else {
// Syntax error fallback
- __kmp_affinity.type = affinity_compact;
- __kmp_affinity.gran = KMP_HW_CORE;
- __kmp_affinity.flags.dups = FALSE;
+ __kmp_places_syntax_error_fallback(name, KMP_HW_CORE);
}
if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_default) {
__kmp_nested_proc_bind.bind_types[0] = proc_bind_true;
}
return;
}
+
+ kmp_hw_t gran = __kmp_affinity.gran;
if (__kmp_affinity.gran != KMP_HW_UNKNOWN) {
- kind = __kmp_hw_get_keyword(__kmp_affinity.gran);
+ gran = __kmp_affinity.gran;
+ } else {
+ gran = KMP_HW_CORE;
}
if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_default) {
@@ -3078,7 +3181,7 @@ static void __kmp_stg_parse_places(char const *name, char const *value,
// Parse option count parameter in parentheses
if (*scan != '(') {
- KMP_WARNING(SyntaxErrorUsing, name, kind);
+ __kmp_places_syntax_error_fallback(name, gran);
return;
}
scan++; // skip '('
@@ -3092,7 +3195,7 @@ static void __kmp_stg_parse_places(char const *name, char const *value,
SKIP_WS(scan);
if (*scan != ')') {
- KMP_WARNING(SyntaxErrorUsing, name, kind);
+ __kmp_places_syntax_error_fallback(name, gran);
return;
}
scan++; // skip ')'
@@ -3135,12 +3238,37 @@ static void __kmp_stg_print_places(kmp_str_buf_t *buffer, char const *name,
num = 0;
}
if (gran != KMP_HW_UNKNOWN) {
+ // If core_types or core_effs, just print and return
+ if (__kmp_affinity.flags.core_types_gran) {
+ __kmp_str_buf_print(buffer, "='%s'\n", "core_types");
+ return;
+ }
+ if (__kmp_affinity.flags.core_effs_gran) {
+ __kmp_str_buf_print(buffer, "='%s'\n", "core_effs");
+ return;
+ }
+
+ // threads, cores, sockets, cores:<attribute>, etc.
const char *name = __kmp_hw_get_keyword(gran, true);
- if (num > 0) {
- __kmp_str_buf_print(buffer, "='%s(%d)'\n", name, num);
- } else {
- __kmp_str_buf_print(buffer, "='%s'\n", name);
+ __kmp_str_buf_print(buffer, "='%s", name);
+
+ // Add core attributes if it exists
+ if (__kmp_affinity.core_attr_gran.valid) {
+ kmp_hw_core_type_t ct =
+ (kmp_hw_core_type_t)__kmp_affinity.core_attr_gran.core_type;
+ int eff = __kmp_affinity.core_attr_gran.core_eff;
+ if (ct != KMP_HW_CORE_TYPE_UNKNOWN) {
+ const char *ct_name = __kmp_hw_get_core_type_keyword(ct);
+ __kmp_str_buf_print(buffer, ":%s", name, ct_name);
+ } else if (eff >= 0 && eff < KMP_HW_MAX_NUM_CORE_EFFS) {
+ __kmp_str_buf_print(buffer, ":eff%d", name, eff);
+ }
}
+
+ // Add the '(#)' part if it exists
+ if (num > 0)
+ __kmp_str_buf_print(buffer, "(%d)", num);
+ __kmp_str_buf_print(buffer, "'\n");
} else {
__kmp_str_buf_print(buffer, ": %s\n", KMP_I18N_STR(NotDefined));
}
@@ -5139,21 +5267,6 @@ static void __kmp_stg_parse_hw_subset(char const *name, char const *value,
return;
}
-static inline const char *
-__kmp_hw_get_core_type_keyword(kmp_hw_core_type_t type) {
- switch (type) {
- case KMP_HW_CORE_TYPE_UNKNOWN:
- return "unknown";
-#if KMP_ARCH_X86 || KMP_ARCH_X86_64
- case KMP_HW_CORE_TYPE_ATOM:
- return "intel_atom";
- case KMP_HW_CORE_TYPE_CORE:
- return "intel_core";
-#endif
- }
- return "unknown";
-}
-
static void __kmp_stg_print_hw_subset(kmp_str_buf_t *buffer, char const *name,
void *data) {
kmp_str_buf_t buf;
diff --git a/openmp/runtime/src/z_Linux_util.cpp b/openmp/runtime/src/z_Linux_util.cpp
index 260b982af200e5..56022e19695d40 100644
--- a/openmp/runtime/src/z_Linux_util.cpp
+++ b/openmp/runtime/src/z_Linux_util.cpp
@@ -1242,6 +1242,7 @@ static void __kmp_atfork_child(void) {
*affinity = KMP_AFFINITY_INIT(affinity->env_var);
__kmp_affin_fullMask = nullptr;
__kmp_affin_origMask = nullptr;
+ __kmp_topology = nullptr;
#endif // KMP_AFFINITY_SUPPORTED
#if KMP_USE_MONITOR
diff --git a/openmp/runtime/test/affinity/omp-places-invalid-syntax.c b/openmp/runtime/test/affinity/omp-places-invalid-syntax.c
index 1157bb61d9f320..c2edcef38f7f8d 100644
--- a/openmp/runtime/test/affinity/omp-places-invalid-syntax.c
+++ b/openmp/runtime/test/affinity/omp-places-invalid-syntax.c
@@ -1,7 +1,20 @@
-// RUN: %libomp-compile && env KMP_SETTINGS=1 OMP_PLACES=invalid %libomp-run 2>&1 | FileCheck %s
-// CHECK-DAG: Effective settings
-// CHECK: OMP_PLACES=
-// CHECK-SAME: cores
+// RUN: %libomp-compile
+// RUN: env KMP_SETTINGS=1 OMP_PLACES=invalid %libomp-run 2>&1 | FileCheck --check-prefix=INVALID %s
+// RUN: env KMP_SETTINGS=1 OMP_PLACES='sockets(' %libomp-run 2>&1 | FileCheck --check-prefix=SOCKETS %s
+// RUN: env KMP_SETTINGS=1 OMP_PLACES='threads()' %libomp-run 2>&1 | FileCheck --check-prefix=THREADS %s
+//
+// INVALID-DAG: Effective settings
+// INVALID: OMP_PLACES=
+// INVALID-SAME: cores
+//
+// SOCKETS-DAG: Effective settings
+// SOCKETS: OMP_PLACES=
+// SOCKETS-SAME: sockets
+//
+// THREADS-DAG: Effective settings
+// THREADS: OMP_PLACES=
+// THREADS-SAME: threads
+//
// REQUIRES: affinity
#include "omp_testsuite.h"
More information about the Openmp-commits
mailing list