[Openmp-commits] [openmp] b34c7d8 - [OpenMP] Introduce hybrid core attributes to OMP_PLACES and KMP_AFFINITY

Mon Jul 31 11:56:32 PDT 2023

Author: Jonathan Peyton
Date: 2023-07-31T13:55:32-05:00
New Revision: b34c7d8c8e1a36bdd432210247511aee4423a316

URL: https://github.com/llvm/llvm-project/commit/b34c7d8c8e1a36bdd432210247511aee4423a316
DIFF: https://github.com/llvm/llvm-project/commit/b34c7d8c8e1a36bdd432210247511aee4423a316.diff

LOG: [OpenMP] Introduce hybrid core attributes to OMP_PLACES and KMP_AFFINITY

* Add KMP_CPU_EQUAL and KMP_CPU_ISEMPTY to affinity mask API

* Add printout of leader to hardware thread dump

* Allow OMP_PLACES to restrict fullMask

This change fixes an issue with the OMP_PLACES=resource(#) syntax.
Before this change, specifying the number of resources did NOT change
the default number of threads created by the runtime. e.g.,
OMP_PLACES=cores(2) would still create __kmp_avail_proc number of
threads. After this change, the fullMask and __kmp_avail_proc are
modified if necessary so that the final place list dictates which
resources are available and how thus, how many threads are created by
default.

* Introduce hybrid core attributes to OMP_PLACES and KMP_AFFINITY

For OMP_PLACES, two new features are added:
  1) OMP_PLACES=cores:<attribute> where <attribute> is either
     intel_atom, intel_core, or eff# where # is 0 - number of core
     efficiencies-1. This syntax also supports the optional (#)
     number selection of resources.
  2) OMP_PLACES=core_types|core_effs where this setting will create
     the number of core_types (or core_effs|core_efficiencies).

For KMP_AFFINITY, the granularity setting is expanded to include two new
keywords: core_type, and core_eff (or core_efficiency). This will set
the granularity to include all cores with a particular core type (or
efficiency). e.g., KMP_AFFINITY=granularity=core_type,compact will
create threads which can float across a single core type.

Differential Revision: https://reviews.llvm.org/D154547

Added: 
    

Modified: 
    openmp/runtime/src/i18n/en_US.txt
    openmp/runtime/src/kmp.h
    openmp/runtime/src/kmp_affinity.cpp
    openmp/runtime/src/kmp_affinity.h
    openmp/runtime/src/kmp_settings.cpp
    openmp/runtime/src/z_Linux_util.cpp
    openmp/runtime/test/affinity/omp-places-invalid-syntax.c

Removed: 
    


################################################################################
diff  --git a/openmp/runtime/src/i18n/en_US.txt b/openmp/runtime/src/i18n/en_US.txt
index 228bcdb25a8ea7..08e837d3dea11e 100644

--- a/openmp/runtime/src/i18n/en_US.txt
+++ b/openmp/runtime/src/i18n/en_US.txt
@@ -480,6 +480,8 @@ AffHWSubsetAllFiltered       "KMP_HW_SUBSET ignored: all hardware resources woul
 AffHWSubsetAttrsNonHybrid    "KMP_HW_SUBSET ignored: Too many attributes specified. This machine is not a hybrid architecutre."
 AffHWSubsetIgnoringAttr      "KMP_HW_SUBSET: ignoring %1$s attribute. This machine is not a hybrid architecutre."
 TargetMemNotAvailable        "Target memory not available, will use default allocator."
+AffIgnoringNonHybrid         "%1$s ignored: This machine is not a hybrid architecutre. Using \"%2$s\" instead."
+AffIgnoringNotAvailable      "%1$s ignored: %2$s is not available. Using \"%3$s\" instead."
 
 # --------------------------------------------------------------------------------------------------
 -*- HINTS -*-

diff  --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index 641d32357ce873..a9308ed938a1a6 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -690,10 +690,12 @@ extern size_t __kmp_affin_mask_size;
 #define KMP_CPU_ISSET(i, mask) (mask)->is_set(i)
 #define KMP_CPU_CLR(i, mask) (mask)->clear(i)
 #define KMP_CPU_ZERO(mask) (mask)->zero()
+#define KMP_CPU_ISEMPTY(mask) (mask)->empty()
 #define KMP_CPU_COPY(dest, src) (dest)->copy(src)
 #define KMP_CPU_AND(dest, src) (dest)->bitwise_and(src)
 #define KMP_CPU_COMPLEMENT(max_bit_number, mask) (mask)->bitwise_not()
 #define KMP_CPU_UNION(dest, src) (dest)->bitwise_or(src)
+#define KMP_CPU_EQUAL(dest, src) (dest)->is_equal(src)
 #define KMP_CPU_ALLOC(ptr) (ptr = __kmp_affinity_dispatch->allocate_mask())
 #define KMP_CPU_FREE(ptr) __kmp_affinity_dispatch->deallocate_mask(ptr)
 #define KMP_CPU_ALLOC_ON_STACK(ptr) KMP_CPU_ALLOC(ptr)
@@ -730,6 +732,8 @@ class KMPAffinity {
     virtual void clear(int i) {}
     // Zero out entire mask
     virtual void zero() {}
+    // Check whether mask is empty
+    virtual bool empty() const { return true; }
     // Copy src into this mask
     virtual void copy(const Mask *src) {}
     // this &= rhs
@@ -738,6 +742,8 @@ class KMPAffinity {
     virtual void bitwise_or(const Mask *rhs) {}
     // this = ~this
     virtual void bitwise_not() {}
+    // this == rhs
+    virtual bool is_equal(const Mask *rhs) const { return false; }
     // API for iterating over an affinity mask
     // for (int i = mask->begin(); i != mask->end(); i = mask->next(i))
     virtual int begin() const { return 0; }
@@ -866,7 +872,10 @@ typedef struct kmp_affinity_flags_t {
   unsigned respect : 2;
   unsigned reset : 1;
   unsigned initialized : 1;
-  unsigned reserved : 25;
+  unsigned core_types_gran : 1;
+  unsigned core_effs_gran : 1;
+  unsigned omp_places : 1;
+  unsigned reserved : 22;
 } kmp_affinity_flags_t;
 KMP_BUILD_ASSERT(sizeof(kmp_affinity_flags_t) == 4);
 
@@ -895,6 +904,7 @@ typedef struct kmp_affinity_t {
   enum affinity_type type;
   kmp_hw_t gran;
   int gran_levels;
+  kmp_affinity_attrs_t core_attr_gran;
   int compact;
   int offset;
   kmp_affinity_flags_t flags;
@@ -909,9 +919,11 @@ typedef struct kmp_affinity_t {
 
 #define KMP_AFFINITY_INIT(env)                                                 \
   {                                                                            \
-    nullptr, affinity_default, KMP_HW_UNKNOWN, -1, 0, 0,                       \
-        {TRUE, FALSE, TRUE, affinity_respect_mask_default, FALSE, FALSE}, 0,   \
-        nullptr, nullptr, nullptr, 0, nullptr, env                             \
+    nullptr, affinity_default, KMP_HW_UNKNOWN, -1, KMP_AFFINITY_ATTRS_UNKNOWN, \
+        0, 0,                                                                  \
+        {TRUE,  FALSE, TRUE, affinity_respect_mask_default, FALSE, FALSE,      \
+         FALSE, FALSE, FALSE},                                                 \
+        0, nullptr, nullptr, nullptr, 0, nullptr, env                          \
   }
 
 extern enum affinity_top_method __kmp_affinity_top_method;

diff  --git a/openmp/runtime/src/kmp_affinity.cpp b/openmp/runtime/src/kmp_affinity.cpp
index cbb80bf3a8485d..ce7a41811d4a14 100644
--- a/openmp/runtime/src/kmp_affinity.cpp
+++ b/openmp/runtime/src/kmp_affinity.cpp
@@ -38,6 +38,43 @@ static hierarchy_info machine_hierarchy;
 
 void __kmp_cleanup_hierarchy() { machine_hierarchy.fini(); }
 
+#if KMP_AFFINITY_SUPPORTED
+// Helper class to see if place lists further restrict the fullMask
+class kmp_full_mask_modifier_t {
+  kmp_affin_mask_t *mask;
+
+public:
+  kmp_full_mask_modifier_t() {
+    KMP_CPU_ALLOC(mask);
+    KMP_CPU_ZERO(mask);
+  }
+  ~kmp_full_mask_modifier_t() {
+    KMP_CPU_FREE(mask);
+    mask = nullptr;
+  }
+  void include(const kmp_affin_mask_t *other) { KMP_CPU_UNION(mask, other); }
+  // If the new full mask is 
diff erent from the current full mask,
+  // then switch them. Returns true if full mask was affected, false otherwise.
+  bool restrict_to_mask() {
+    // See if the new mask further restricts or changes the full mask
+    if (KMP_CPU_EQUAL(__kmp_affin_fullMask, mask) || KMP_CPU_ISEMPTY(mask))
+      return false;
+    return __kmp_topology->restrict_to_mask(mask);
+  }
+};
+
+static inline const char *
+__kmp_get_affinity_env_var(const kmp_affinity_t &affinity,
+                           bool for_binding = false) {
+  if (affinity.flags.omp_places) {
+    if (for_binding)
+      return "OMP_PROC_BIND";
+    return "OMP_PLACES";
+  }
+  return affinity.env_var;
+}
+#endif // KMP_AFFINITY_SUPPORTED
+
 void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
   kmp_uint32 depth;
   // The test below is true if affinity is available, but set to "none". Need to
@@ -207,6 +244,8 @@ void kmp_hw_thread_t::print() const {
     if (attrs.is_core_eff_valid())
       printf(" (eff=%d)", attrs.get_core_eff());
   }
+  if (leader)
+    printf(" (leader)");
   printf("\n");
 }
 
@@ -797,7 +836,40 @@ void kmp_topology_t::print(const char *env_var) const {
 
 #if KMP_AFFINITY_SUPPORTED
 void kmp_topology_t::set_granularity(kmp_affinity_t &affinity) const {
-  const char *env_var = affinity.env_var;
+  const char *env_var = __kmp_get_affinity_env_var(affinity);
+  // If requested hybrid CPU attributes for granularity (either OMP_PLACES or
+  // KMP_AFFINITY), but none exist, then reset granularity and have below method
+  // select a granularity and warn user.
+  if (!__kmp_is_hybrid_cpu()) {
+    if (affinity.core_attr_gran.valid) {
+      // OMP_PLACES with cores:<attribute> but non-hybrid arch, use cores
+      // instead
+      KMP_AFF_WARNING(
+          affinity, AffIgnoringNonHybrid, env_var,
+          __kmp_hw_get_catalog_string(KMP_HW_CORE, /*plural=*/true));
+      affinity.gran = KMP_HW_CORE;
+      affinity.gran_levels = -1;
+      affinity.core_attr_gran = KMP_AFFINITY_ATTRS_UNKNOWN;
+      affinity.flags.core_types_gran = affinity.flags.core_effs_gran = 0;
+    } else if (affinity.flags.core_types_gran ||
+               affinity.flags.core_effs_gran) {
+      // OMP_PLACES=core_types|core_effs but non-hybrid, use cores instead
+      if (affinity.flags.omp_places) {
+        KMP_AFF_WARNING(
+            affinity, AffIgnoringNonHybrid, env_var,
+            __kmp_hw_get_catalog_string(KMP_HW_CORE, /*plural=*/true));
+      } else {
+        // KMP_AFFINITY=granularity=core_type|core_eff,...
+        KMP_AFF_WARNING(affinity, AffGranularityBad, env_var,
+                        "Intel(R) Hybrid Technology core attribute",
+                        __kmp_hw_get_catalog_string(KMP_HW_CORE));
+      }
+      affinity.gran = KMP_HW_CORE;
+      affinity.gran_levels = -1;
+      affinity.core_attr_gran = KMP_AFFINITY_ATTRS_UNKNOWN;
+      affinity.flags.core_types_gran = affinity.flags.core_effs_gran = 0;
+    }
+  }
   // Set the number of affinity granularity levels
   if (affinity.gran_levels < 0) {
     kmp_hw_t gran_type = get_equivalent_type(affinity.gran);
@@ -937,6 +1009,7 @@ template <size_t SIZE, typename IndexFunc> struct kmp_sub_ids_t {
   }
 };
 
+#if KMP_AFFINITY_SUPPORTED
 static kmp_str_buf_t *
 __kmp_hw_get_catalog_core_string(const kmp_hw_attr_t &attr, kmp_str_buf_t *buf,
                                  bool plural) {
@@ -952,6 +1025,41 @@ __kmp_hw_get_catalog_core_string(const kmp_hw_attr_t &attr, kmp_str_buf_t *buf,
   return buf;
 }
 
+bool kmp_topology_t::restrict_to_mask(const kmp_affin_mask_t *mask) {
+  // Apply the filter
+  bool affected;
+  int new_index = 0;
+  for (int i = 0; i < num_hw_threads; ++i) {
+    int os_id = hw_threads[i].os_id;
+    if (KMP_CPU_ISSET(os_id, mask)) {
+      if (i != new_index)
+        hw_threads[new_index] = hw_threads[i];
+      new_index++;
+    } else {
+      KMP_CPU_CLR(os_id, __kmp_affin_fullMask);
+      __kmp_avail_proc--;
+    }
+  }
+
+  KMP_DEBUG_ASSERT(new_index <= num_hw_threads);
+  affected = (num_hw_threads != new_index);
+  num_hw_threads = new_index;
+
+  // Post hardware subset canonicalization
+  if (affected) {
+    _gather_enumeration_information();
+    _discover_uniformity();
+    _set_globals();
+    _set_last_level_cache();
+#if KMP_OS_WINDOWS
+    // Copy filtered full mask if topology has single processor group
+    if (__kmp_num_proc_groups <= 1)
+#endif
+      __kmp_affin_origMask->copy(__kmp_affin_fullMask);
+  }
+  return affected;
+}
+
 // Apply the KMP_HW_SUBSET envirable to the topology
 // Returns true if KMP_HW_SUBSET filtered any processors
 // otherwise, returns false
@@ -1156,7 +1264,9 @@ bool kmp_topology_t::filter_hw_subset() {
 
   // Determine which hardware threads should be filtered.
   int num_filtered = 0;
-  bool *filtered = (bool *)__kmp_allocate(sizeof(bool) * num_hw_threads);
+  kmp_affin_mask_t *filtered_mask;
+  KMP_CPU_ALLOC(filtered_mask);
+  KMP_CPU_COPY(filtered_mask, __kmp_affin_fullMask);
   for (int i = 0; i < num_hw_threads; ++i) {
     kmp_hw_thread_t &hw_thread = hw_threads[i];
     // Update type_sub_id
@@ -1218,51 +1328,35 @@ bool kmp_topology_t::filter_hw_subset() {
       }
     }
     // Collect filtering information
-    filtered[i] = should_be_filtered;
-    if (should_be_filtered)
+    if (should_be_filtered) {
+      KMP_CPU_CLR(hw_thread.os_id, filtered_mask);
       num_filtered++;
+    }
   }
 
   // One last check that we shouldn't allow filtering entire machine
   if (num_filtered == num_hw_threads) {
     KMP_AFF_WARNING(__kmp_affinity, AffHWSubsetAllFiltered);
-    __kmp_free(filtered);
     return false;
   }
 
   // Apply the filter
-  int new_index = 0;
-  for (int i = 0; i < num_hw_threads; ++i) {
-    if (!filtered[i]) {
-      if (i != new_index)
-        hw_threads[new_index] = hw_threads[i];
-      new_index++;
-    } else {
-#if KMP_AFFINITY_SUPPORTED
-      KMP_CPU_CLR(hw_threads[i].os_id, __kmp_affin_fullMask);
-#endif
-      __kmp_avail_proc--;
-    }
-  }
-
-  KMP_DEBUG_ASSERT(new_index <= num_hw_threads);
-  num_hw_threads = new_index;
-
-  // Post hardware subset canonicalization
-  _gather_enumeration_information();
-  _discover_uniformity();
-  _set_globals();
-  _set_last_level_cache();
-  __kmp_free(filtered);
+  restrict_to_mask(filtered_mask);
   return true;
 }
 
-bool kmp_topology_t::is_close(int hwt1, int hwt2, int hw_level) const {
+bool kmp_topology_t::is_close(int hwt1, int hwt2,
+                              const kmp_affinity_t &stgs) const {
+  int hw_level = stgs.gran_levels;
   if (hw_level >= depth)
     return true;
   bool retval = true;
   const kmp_hw_thread_t &t1 = hw_threads[hwt1];
   const kmp_hw_thread_t &t2 = hw_threads[hwt2];
+  if (stgs.flags.core_types_gran)
+    return t1.attrs.get_core_type() == t2.attrs.get_core_type();
+  if (stgs.flags.core_effs_gran)
+    return t1.attrs.get_core_eff() == t2.attrs.get_core_eff();
   for (int i = 0; i < (depth - hw_level); ++i) {
     if (t1.ids[i] != t2.ids[i])
       return false;
@@ -1272,8 +1366,6 @@ bool kmp_topology_t::is_close(int hwt1, int hwt2, int hw_level) const {
 
 ////////////////////////////////////////////////////////////////////////////////
 
-#if KMP_AFFINITY_SUPPORTED
-
 bool KMPAffinity::picked_api = false;
 
 void *KMPAffinity::Mask::operator new(size_t n) { return __kmp_allocate(n); }
@@ -3353,17 +3445,25 @@ static bool __kmp_affinity_create_cpuinfo_map(int *line,
 // Create and return a table of affinity masks, indexed by OS thread ID.
 // This routine handles OR'ing together all the affinity masks of threads
 // that are sufficiently close, if granularity > fine.
+template <typename FindNextFunctionType>
 static void __kmp_create_os_id_masks(unsigned *numUnique,
-                                     kmp_affinity_t &affinity) {
+                                     kmp_affinity_t &affinity,
+                                     FindNextFunctionType find_next) {
   // First form a table of affinity masks in order of OS thread id.
   int maxOsId;
   int i;
   int numAddrs = __kmp_topology->get_num_hw_threads();
   int depth = __kmp_topology->get_depth();
-  const char *env_var = affinity.env_var;
+  const char *env_var = __kmp_get_affinity_env_var(affinity);
   KMP_ASSERT(numAddrs);
   KMP_ASSERT(depth);
 
+  i = find_next(-1);
+  // If could not find HW thread location with attributes, then return and
+  // fallback to increment find_next and disregard core attributes.
+  if (i >= numAddrs)
+    return;
+
   maxOsId = 0;
   for (i = numAddrs - 1;; --i) {
     int osId = __kmp_topology->at(i).os_id;
@@ -3393,19 +3493,22 @@ static void __kmp_create_os_id_masks(unsigned *numUnique,
   kmp_affin_mask_t *sum;
   KMP_CPU_ALLOC_ON_STACK(sum);
   KMP_CPU_ZERO(sum);
-  KMP_CPU_SET(__kmp_topology->at(0).os_id, sum);
-  for (i = 1; i < numAddrs; i++) {
+
+  i = j = leader = find_next(-1);
+  KMP_CPU_SET(__kmp_topology->at(i).os_id, sum);
+  kmp_full_mask_modifier_t full_mask;
+  for (i = find_next(i); i < numAddrs; i = find_next(i)) {
     // If this thread is sufficiently close to the leader (within the
     // granularity setting), then set the bit for this os thread in the
     // affinity mask for this group, and go on to the next thread.
-    if (__kmp_topology->is_close(leader, i, affinity.gran_levels)) {
+    if (__kmp_topology->is_close(leader, i, affinity)) {
       KMP_CPU_SET(__kmp_topology->at(i).os_id, sum);
       continue;
     }
 
     // For every thread in this group, copy the mask to the thread's entry in
     // the OS Id mask table. Mark the first address as a leader.
-    for (; j < i; j++) {
+    for (; j < i; j = find_next(j)) {
       int osId = __kmp_topology->at(j).os_id;
       KMP_DEBUG_ASSERT(osId <= maxOsId);
       kmp_affin_mask_t *mask = KMP_CPU_INDEX(affinity.os_id_masks, osId);
@@ -3416,22 +3519,29 @@ static void __kmp_create_os_id_masks(unsigned *numUnique,
 
     // Start a new mask.
     leader = i;
+    full_mask.include(sum);
     KMP_CPU_ZERO(sum);
     KMP_CPU_SET(__kmp_topology->at(i).os_id, sum);
   }
 
   // For every thread in last group, copy the mask to the thread's
   // entry in the OS Id mask table.
-  for (; j < i; j++) {
+  for (; j < i; j = find_next(j)) {
     int osId = __kmp_topology->at(j).os_id;
     KMP_DEBUG_ASSERT(osId <= maxOsId);
     kmp_affin_mask_t *mask = KMP_CPU_INDEX(affinity.os_id_masks, osId);
     KMP_CPU_COPY(mask, sum);
     __kmp_topology->at(j).leader = (j == leader);
   }
+  full_mask.include(sum);
   unique++;
   KMP_CPU_FREE_FROM_STACK(sum);
 
+  // See if the OS Id mask table further restricts or changes the full mask
+  if (full_mask.restrict_to_mask() && affinity.flags.verbose) {
+    __kmp_topology->print(env_var);
+  }
+
   *numUnique = unique;
 }
 
@@ -4134,8 +4244,11 @@ static void __kmp_affinity_get_topology_info(kmp_affinity_t &affinity) {
   }
 
   // Create the OS proc to hardware thread map
-  for (int hw_thread = 0; hw_thread < num_hw_threads; ++hw_thread)
-    __kmp_osid_to_hwthread_map[__kmp_topology->at(hw_thread).os_id] = hw_thread;
+  for (int hw_thread = 0; hw_thread < num_hw_threads; ++hw_thread) {
+    int os_id = __kmp_topology->at(hw_thread).os_id;
+    if (KMP_CPU_ISSET(os_id, __kmp_affin_fullMask))
+      __kmp_osid_to_hwthread_map[os_id] = hw_thread;
+  }
 
   for (unsigned i = 0; i < affinity.num_masks; ++i) {
     kmp_affinity_ids_t &ids = affinity.ids[i];
@@ -4145,16 +4258,26 @@ static void __kmp_affinity_get_topology_info(kmp_affinity_t &affinity) {
   }
 }
 
+// Called when __kmp_topology is ready
+static void __kmp_aux_affinity_initialize_other_data(kmp_affinity_t &affinity) {
+  // Initialize data dependent on __kmp_topology
+  if (__kmp_topology) {
+    machine_hierarchy.init(__kmp_topology->get_num_hw_threads());
+    __kmp_affinity_get_topology_info(affinity);
+  }
+}
+
 // Create a one element mask array (set of places) which only contains the
 // initial process's affinity mask
 static void __kmp_create_affinity_none_places(kmp_affinity_t &affinity) {
   KMP_ASSERT(__kmp_affin_fullMask != NULL);
   KMP_ASSERT(affinity.type == affinity_none);
+  KMP_ASSERT(__kmp_avail_proc == __kmp_topology->get_num_hw_threads());
   affinity.num_masks = 1;
   KMP_CPU_ALLOC_ARRAY(affinity.masks, affinity.num_masks);
   kmp_affin_mask_t *dest = KMP_CPU_INDEX(affinity.masks, 0);
   KMP_CPU_COPY(dest, __kmp_affin_fullMask);
-  __kmp_affinity_get_topology_info(affinity);
+  __kmp_aux_affinity_initialize_other_data(affinity);
 }
 
 static void __kmp_aux_affinity_initialize_masks(kmp_affinity_t &affinity) {
@@ -4383,13 +4506,6 @@ static bool __kmp_aux_affinity_initialize_topology(kmp_affinity_t &affinity) {
   if (verbose)
     __kmp_topology->print(env_var);
   bool filtered = __kmp_topology->filter_hw_subset();
-  if (filtered) {
-#if KMP_OS_WINDOWS
-    // Copy filtered full mask if topology has single processor group
-    if (__kmp_num_proc_groups <= 1)
-#endif
-      __kmp_affin_origMask->copy(__kmp_affin_fullMask);
-  }
   if (filtered && verbose)
     __kmp_topology->print("KMP_HW_SUBSET");
   return success;
@@ -4398,7 +4514,7 @@ static bool __kmp_aux_affinity_initialize_topology(kmp_affinity_t &affinity) {
 static void __kmp_aux_affinity_initialize(kmp_affinity_t &affinity) {
   bool is_regular_affinity = (&affinity == &__kmp_affinity);
   bool is_hidden_helper_affinity = (&affinity == &__kmp_hh_affinity);
-  const char *env_var = affinity.env_var;
+  const char *env_var = __kmp_get_affinity_env_var(affinity);
 
   if (affinity.flags.initialized) {
     KMP_ASSERT(__kmp_affin_fullMask != NULL);
@@ -4437,7 +4553,36 @@ static void __kmp_aux_affinity_initialize(kmp_affinity_t &affinity) {
 
   // Create the table of masks, indexed by thread Id.
   unsigned numUnique;
-  __kmp_create_os_id_masks(&numUnique, affinity);
+  int numAddrs = __kmp_topology->get_num_hw_threads();
+  // If OMP_PLACES=cores:<attribute> specified, then attempt
+  // to make OS Id mask table using those attributes
+  if (affinity.core_attr_gran.valid) {
+    __kmp_create_os_id_masks(&numUnique, affinity, [&](int idx) {
+      KMP_ASSERT(idx >= -1);
+      for (int i = idx + 1; i < numAddrs; ++i)
+        if (__kmp_topology->at(i).attrs.contains(affinity.core_attr_gran))
+          return i;
+      return numAddrs;
+    });
+    if (!affinity.os_id_masks) {
+      const char *core_attribute;
+      if (affinity.core_attr_gran.core_eff != kmp_hw_attr_t::UNKNOWN_CORE_EFF)
+        core_attribute = "core_efficiency";
+      else
+        core_attribute = "core_type";
+      KMP_AFF_WARNING(affinity, AffIgnoringNotAvailable, env_var,
+                      core_attribute,
+                      __kmp_hw_get_catalog_string(KMP_HW_CORE, /*plural=*/true))
+    }
+  }
+  // If core attributes did not work, or none were specified,
+  // then make OS Id mask table using typical incremental way.
+  if (!affinity.os_id_masks) {
+    __kmp_create_os_id_masks(&numUnique, affinity, [](int idx) {
+      KMP_ASSERT(idx >= -1);
+      return idx + 1;
+    });
+  }
   if (affinity.gran_levels == 0) {
     KMP_DEBUG_ASSERT((int)numUnique == __kmp_avail_proc);
   }
@@ -4578,6 +4723,7 @@ static void __kmp_aux_affinity_initialize(kmp_affinity_t &affinity) {
       int i;
       unsigned j;
       int num_hw_threads = __kmp_topology->get_num_hw_threads();
+      kmp_full_mask_modifier_t full_mask;
       for (i = 0, j = 0; i < num_hw_threads; i++) {
         if ((!affinity.flags.dups) && (!__kmp_topology->at(i).leader)) {
           continue;
@@ -4588,11 +4734,16 @@ static void __kmp_aux_affinity_initialize(kmp_affinity_t &affinity) {
         kmp_affin_mask_t *dest = KMP_CPU_INDEX(affinity.masks, j);
         KMP_ASSERT(KMP_CPU_ISSET(osId, src));
         KMP_CPU_COPY(dest, src);
+        full_mask.include(src);
         if (++j >= affinity.num_masks) {
           break;
         }
       }
       KMP_DEBUG_ASSERT(j == affinity.num_masks);
+      // See if the places list further restricts or changes the full mask
+      if (full_mask.restrict_to_mask() && affinity.flags.verbose) {
+        __kmp_topology->print(env_var);
+      }
     }
     // Sort the topology back using ids
     __kmp_topology->sort_ids();
@@ -4601,7 +4752,7 @@ static void __kmp_aux_affinity_initialize(kmp_affinity_t &affinity) {
   default:
     KMP_ASSERT2(0, "Unexpected affinity setting");
   }
-  __kmp_affinity_get_topology_info(affinity);
+  __kmp_aux_affinity_initialize_other_data(affinity);
   affinity.flags.initialized = TRUE;
 }
 
@@ -4722,7 +4873,7 @@ void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
     affinity = &__kmp_hh_affinity;
   else
     affinity = &__kmp_affinity;
-  env_var = affinity->env_var;
+  env_var = __kmp_get_affinity_env_var(*affinity, /*for_binding=*/true);
 
   if (KMP_AFFINITY_NON_PROC_BIND || is_hidden_helper) {
     if ((affinity->type == affinity_none) ||

diff  --git a/openmp/runtime/src/kmp_affinity.h b/openmp/runtime/src/kmp_affinity.h
index f27dd9a5339e86..fbc0d400de772e 100644
--- a/openmp/runtime/src/kmp_affinity.h
+++ b/openmp/runtime/src/kmp_affinity.h
@@ -34,6 +34,7 @@ class KMPHwlocAffinity : public KMPAffinity {
     bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); }
     void clear(int i) override { hwloc_bitmap_clr(mask, i); }
     void zero() override { hwloc_bitmap_zero(mask); }
+    bool empty() const override { return hwloc_bitmap_iszero(mask); }
     void copy(const KMPAffinity::Mask *src) override {
       const Mask *convert = static_cast<const Mask *>(src);
       hwloc_bitmap_copy(mask, convert->mask);
@@ -47,6 +48,10 @@ class KMPHwlocAffinity : public KMPAffinity {
       hwloc_bitmap_or(mask, mask, convert->mask);
     }
     void bitwise_not() override { hwloc_bitmap_not(mask, mask); }
+    bool is_equal(const KMPAffinity::Mask *rhs) const override {
+      const Mask *convert = static_cast<const Mask *>(rhs);
+      return hwloc_bitmap_isequal(mask, convert->mask);
+    }
     int begin() const override { return hwloc_bitmap_first(mask); }
     int end() const override { return -1; }
     int next(int previous) const override {
@@ -319,6 +324,13 @@ class KMPNativeAffinity : public KMPAffinity {
       for (mask_size_type i = 0; i < e; ++i)
         mask[i] = (mask_t)0;
     }
+    bool empty() const override {
+      mask_size_type e = get_num_mask_types();
+      for (mask_size_type i = 0; i < e; ++i)
+        if (mask[i] != (mask_t)0)
+          return false;
+      return true;
+    }
     void copy(const KMPAffinity::Mask *src) override {
       const Mask *convert = static_cast<const Mask *>(src);
       mask_size_type e = get_num_mask_types();
@@ -342,6 +354,14 @@ class KMPNativeAffinity : public KMPAffinity {
       for (mask_size_type i = 0; i < e; ++i)
         mask[i] = ~(mask[i]);
     }
+    bool is_equal(const KMPAffinity::Mask *rhs) const override {
+      const Mask *convert = static_cast<const Mask *>(rhs);
+      mask_size_type e = get_num_mask_types();
+      for (mask_size_type i = 0; i < e; ++i)
+        if (mask[i] != convert->mask[i])
+          return false;
+      return true;
+    }
     int begin() const override {
       int retval = 0;
       while (retval < end() && !is_set(retval))
@@ -459,6 +479,12 @@ class KMPNativeAffinity : public KMPAffinity {
       for (int i = 0; i < __kmp_num_proc_groups; ++i)
         mask[i] = 0;
     }
+    bool empty() const override {
+      for (size_t i = 0; i < __kmp_num_proc_groups; ++i)
+        if (mask[i])
+          return false;
+      return true;
+    }
     void copy(const KMPAffinity::Mask *src) override {
       const Mask *convert = static_cast<const Mask *>(src);
       for (int i = 0; i < __kmp_num_proc_groups; ++i)
@@ -478,6 +504,13 @@ class KMPNativeAffinity : public KMPAffinity {
       for (int i = 0; i < __kmp_num_proc_groups; ++i)
         mask[i] = ~(mask[i]);
     }
+    bool is_equal(const KMPAffinity::Mask *rhs) const override {
+      const Mask *convert = static_cast<const Mask *>(rhs);
+      for (size_t i = 0; i < __kmp_num_proc_groups; ++i)
+        if (mask[i] != convert->mask[i])
+          return false;
+      return true;
+    }
     int begin() const override {
       int retval = 0;
       while (retval < end() && !is_set(retval))
@@ -679,6 +712,21 @@ struct kmp_hw_attr_t {
     }
     return false;
   }
+#if KMP_AFFINITY_SUPPORTED
+  bool contains(const kmp_affinity_attrs_t &attr) const {
+    if (!valid && !attr.valid)
+      return true;
+    if (valid && attr.valid) {
+      if (attr.core_type != KMP_HW_CORE_TYPE_UNKNOWN)
+        return (is_core_type_valid() &&
+                (get_core_type() == (kmp_hw_core_type_t)attr.core_type));
+      if (attr.core_eff != UNKNOWN_CORE_EFF)
+        return (is_core_eff_valid() && (get_core_eff() == attr.core_eff));
+      return true;
+    }
+    return false;
+  }
+#endif // KMP_AFFINITY_SUPPORTED
   bool operator==(const kmp_hw_attr_t &rhs) const {
     return (rhs.valid == valid && rhs.core_eff == core_eff &&
             rhs.core_type == core_type);
@@ -834,13 +882,18 @@ class kmp_topology_t {
 #if KMP_AFFINITY_SUPPORTED
   // Set the granularity for affinity settings
   void set_granularity(kmp_affinity_t &stgs) const;
-#endif
+  bool is_close(int hwt1, int hwt2, const kmp_affinity_t &stgs) const;
+  bool restrict_to_mask(const kmp_affin_mask_t *mask);
   bool filter_hw_subset();
-  bool is_close(int hwt1, int hwt2, int level) const;
+#endif
   bool is_uniform() const { return flags.uniform; }
   // Tell whether a type is a valid type in the topology
   // returns KMP_HW_UNKNOWN when there is no equivalent type
-  kmp_hw_t get_equivalent_type(kmp_hw_t type) const { return equivalent[type]; }
+  kmp_hw_t get_equivalent_type(kmp_hw_t type) const {
+    if (type == KMP_HW_UNKNOWN)
+      return KMP_HW_UNKNOWN;
+    return equivalent[type];
+  }
   // Set type1 = type2
   void set_equivalent_type(kmp_hw_t type1, kmp_hw_t type2) {
     KMP_DEBUG_ASSERT_VALID_HW_TYPE(type1);

diff  --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp
index b81376d1632bad..647cf22d3d0aee 100644
--- a/openmp/runtime/src/kmp_settings.cpp
+++ b/openmp/runtime/src/kmp_settings.cpp
@@ -2005,6 +2005,21 @@ static void __kmp_stg_print_foreign_threads_threadprivate(kmp_str_buf_t *buffer,
 // -----------------------------------------------------------------------------
 // KMP_AFFINITY, GOMP_CPU_AFFINITY, KMP_TOPOLOGY_METHOD
 
+static inline const char *
+__kmp_hw_get_core_type_keyword(kmp_hw_core_type_t type) {
+  switch (type) {
+  case KMP_HW_CORE_TYPE_UNKNOWN:
+    return "unknown";
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+  case KMP_HW_CORE_TYPE_ATOM:
+    return "intel_atom";
+  case KMP_HW_CORE_TYPE_CORE:
+    return "intel_core";
+#endif
+  }
+  return "unknown";
+}
+
 #if KMP_AFFINITY_SUPPORTED
 // Parse the proc id list.  Return TRUE if successful, FALSE otherwise.
 static int __kmp_parse_affinity_proc_id_list(const char *var, const char *env,
@@ -2359,14 +2374,32 @@ static void __kmp_parse_affinity_env(char const *name, char const *value,
 
       buf = next;
 
-      // Try any hardware topology type for granularity
-      KMP_FOREACH_HW_TYPE(type) {
-        const char *name = __kmp_hw_get_keyword(type);
-        if (__kmp_match_str(name, buf, CCAST(const char **, &next))) {
-          set_gran(type, -1);
-          buf = next;
-          set = true;
-          break;
+      // Have to try core_type and core_efficiency matches first since "core"
+      // will register as core granularity with "extra chars"
+      if (__kmp_match_str("core_type", buf, CCAST(const char **, &next))) {
+        set_gran(KMP_HW_CORE, -1);
+        out_affinity->flags.core_types_gran = 1;
+        buf = next;
+        set = true;
+      } else if (__kmp_match_str("core_efficiency", buf,
+                                 CCAST(const char **, &next)) ||
+                 __kmp_match_str("core_eff", buf,
+                                 CCAST(const char **, &next))) {
+        set_gran(KMP_HW_CORE, -1);
+        out_affinity->flags.core_effs_gran = 1;
+        buf = next;
+        set = true;
+      }
+      if (!set) {
+        // Try any hardware topology type for granularity
+        KMP_FOREACH_HW_TYPE(type) {
+          const char *name = __kmp_hw_get_keyword(type);
+          if (__kmp_match_str(name, buf, CCAST(const char **, &next))) {
+            set_gran(type, -1);
+            buf = next;
+            set = true;
+            break;
+          }
         }
       }
       if (!set) {
@@ -2626,8 +2659,15 @@ static void __kmp_print_affinity_env(kmp_str_buf_t *buffer, char const *name,
         __kmp_str_buf_print(buffer, "%s,", "noreset");
       }
     }
-    __kmp_str_buf_print(buffer, "granularity=%s,",
-                        __kmp_hw_get_keyword(affinity.gran, false));
+    __kmp_str_buf_print(buffer, "granularity=");
+    if (affinity.flags.core_types_gran)
+      __kmp_str_buf_print(buffer, "core_type,");
+    else if (affinity.flags.core_effs_gran) {
+      __kmp_str_buf_print(buffer, "core_eff,");
+    } else {
+      __kmp_str_buf_print(
+          buffer, "%s,", __kmp_hw_get_keyword(affinity.gran, /*plural=*/false));
+    }
   }
   if (!KMP_AFFINITY_CAPABLE()) {
     __kmp_str_buf_print(buffer, "%s", "disabled");
@@ -2745,11 +2785,7 @@ signed := + signed
 signed := - signed
 -----------------------------------------------------------------------------*/
 
-// Warning to issue for syntax error during parsing of OMP_PLACES
-static inline void __kmp_omp_places_syntax_warn(const char *var) {
-  KMP_WARNING(SyntaxErrorUsing, var, "\"cores\"");
-}
-
+// Return TRUE if successful parse, FALSE otherwise
 static int __kmp_parse_subplace_list(const char *var, const char **scan) {
   const char *next;
 
@@ -2761,7 +2797,6 @@ static int __kmp_parse_subplace_list(const char *var, const char **scan) {
     //
     SKIP_WS(*scan);
     if ((**scan < '0') || (**scan > '9')) {
-      __kmp_omp_places_syntax_warn(var);
       return FALSE;
     }
     next = *scan;
@@ -2780,7 +2815,6 @@ static int __kmp_parse_subplace_list(const char *var, const char **scan) {
       continue;
     }
     if (**scan != ':') {
-      __kmp_omp_places_syntax_warn(var);
       return FALSE;
     }
     (*scan)++; // skip ':'
@@ -2788,7 +2822,6 @@ static int __kmp_parse_subplace_list(const char *var, const char **scan) {
     // Read count parameter
     SKIP_WS(*scan);
     if ((**scan < '0') || (**scan > '9')) {
-      __kmp_omp_places_syntax_warn(var);
       return FALSE;
     }
     next = *scan;
@@ -2807,7 +2840,6 @@ static int __kmp_parse_subplace_list(const char *var, const char **scan) {
       continue;
     }
     if (**scan != ':') {
-      __kmp_omp_places_syntax_warn(var);
       return FALSE;
     }
     (*scan)++; // skip ':'
@@ -2829,7 +2861,6 @@ static int __kmp_parse_subplace_list(const char *var, const char **scan) {
     }
     SKIP_WS(*scan);
     if ((**scan < '0') || (**scan > '9')) {
-      __kmp_omp_places_syntax_warn(var);
       return FALSE;
     }
     next = *scan;
@@ -2848,13 +2879,12 @@ static int __kmp_parse_subplace_list(const char *var, const char **scan) {
       (*scan)++; // skip ','
       continue;
     }
-
-    __kmp_omp_places_syntax_warn(var);
     return FALSE;
   }
   return TRUE;
 }
 
+// Return TRUE if successful parse, FALSE otherwise
 static int __kmp_parse_place(const char *var, const char **scan) {
   const char *next;
 
@@ -2866,7 +2896,6 @@ static int __kmp_parse_place(const char *var, const char **scan) {
       return FALSE;
     }
     if (**scan != '}') {
-      __kmp_omp_places_syntax_warn(var);
       return FALSE;
     }
     (*scan)++; // skip '}'
@@ -2880,12 +2909,12 @@ static int __kmp_parse_place(const char *var, const char **scan) {
     KMP_ASSERT(proc >= 0);
     *scan = next;
   } else {
-    __kmp_omp_places_syntax_warn(var);
     return FALSE;
   }
   return TRUE;
 }
 
+// Return TRUE if successful parse, FALSE otherwise
 static int __kmp_parse_place_list(const char *var, const char *env,
                                   char **place_list) {
   const char *scan = env;
@@ -2908,7 +2937,6 @@ static int __kmp_parse_place_list(const char *var, const char *env,
       continue;
     }
     if (*scan != ':') {
-      __kmp_omp_places_syntax_warn(var);
       return FALSE;
     }
     scan++; // skip ':'
@@ -2916,7 +2944,6 @@ static int __kmp_parse_place_list(const char *var, const char *env,
     // Read count parameter
     SKIP_WS(scan);
     if ((*scan < '0') || (*scan > '9')) {
-      __kmp_omp_places_syntax_warn(var);
       return FALSE;
     }
     next = scan;
@@ -2935,7 +2962,6 @@ static int __kmp_parse_place_list(const char *var, const char *env,
       continue;
     }
     if (*scan != ':') {
-      __kmp_omp_places_syntax_warn(var);
       return FALSE;
     }
     scan++; // skip ':'
@@ -2957,7 +2983,6 @@ static int __kmp_parse_place_list(const char *var, const char *env,
     }
     SKIP_WS(scan);
     if ((*scan < '0') || (*scan > '9')) {
-      __kmp_omp_places_syntax_warn(var);
       return FALSE;
     }
     next = scan;
@@ -2977,7 +3002,6 @@ static int __kmp_parse_place_list(const char *var, const char *env,
       continue;
     }
 
-    __kmp_omp_places_syntax_warn(var);
     return FALSE;
   }
 
@@ -2991,6 +3015,22 @@ static int __kmp_parse_place_list(const char *var, const char *env,
   return TRUE;
 }
 
+static inline void __kmp_places_set(enum affinity_type type, kmp_hw_t kind) {
+  __kmp_affinity.type = type;
+  __kmp_affinity.gran = kind;
+  __kmp_affinity.flags.dups = FALSE;
+  __kmp_affinity.flags.omp_places = TRUE;
+}
+
+static void __kmp_places_syntax_error_fallback(char const *name,
+                                               kmp_hw_t kind) {
+  const char *str = __kmp_hw_get_catalog_string(kind, /*plural=*/true);
+  KMP_WARNING(SyntaxErrorUsing, name, str);
+  __kmp_places_set(affinity_compact, kind);
+  if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_default)
+    __kmp_nested_proc_bind.bind_types[0] = proc_bind_true;
+}
+
 static void __kmp_stg_parse_places(char const *name, char const *value,
                                    void *data) {
   struct kmp_place_t {
@@ -3001,7 +3041,6 @@ static void __kmp_stg_parse_places(char const *name, char const *value,
   bool set = false;
   const char *scan = value;
   const char *next = scan;
-  const char *kind = "\"threads\"";
   kmp_place_t std_places[] = {{"threads", KMP_HW_THREAD},
                               {"cores", KMP_HW_CORE},
                               {"numa_domains", KMP_HW_NUMA},
@@ -3020,10 +3059,54 @@ static void __kmp_stg_parse_places(char const *name, char const *value,
     const kmp_place_t &place = std_places[i];
     if (__kmp_match_str(place.name, scan, &next)) {
       scan = next;
-      __kmp_affinity.type = affinity_compact;
-      __kmp_affinity.gran = place.type;
-      __kmp_affinity.flags.dups = FALSE;
+      __kmp_places_set(affinity_compact, place.type);
       set = true;
+      // Parse core attribute if it exists
+      if (KMP_HW_MAX_NUM_CORE_TYPES > 1) {
+        SKIP_WS(scan);
+        if (*scan == ':') {
+          if (place.type != KMP_HW_CORE) {
+            __kmp_places_syntax_error_fallback(name, place.type);
+            return;
+          }
+          scan++; // skip ':'
+          SKIP_WS(scan);
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+          if (__kmp_match_str("intel_core", scan, &next)) {
+            __kmp_affinity.core_attr_gran.core_type = KMP_HW_CORE_TYPE_CORE;
+            __kmp_affinity.core_attr_gran.valid = 1;
+            scan = next;
+          } else if (__kmp_match_str("intel_atom", scan, &next)) {
+            __kmp_affinity.core_attr_gran.core_type = KMP_HW_CORE_TYPE_ATOM;
+            __kmp_affinity.core_attr_gran.valid = 1;
+            scan = next;
+          } else
+#endif
+              if (__kmp_match_str("eff", scan, &next)) {
+            int eff;
+            if (!isdigit(*next)) {
+              __kmp_places_syntax_error_fallback(name, place.type);
+              return;
+            }
+            scan = next;
+            SKIP_DIGITS(next);
+            eff = __kmp_str_to_int(scan, *next);
+            if (eff < 0) {
+              __kmp_places_syntax_error_fallback(name, place.type);
+              return;
+            }
+            if (eff >= KMP_HW_MAX_NUM_CORE_EFFS)
+              eff = KMP_HW_MAX_NUM_CORE_EFFS - 1;
+            __kmp_affinity.core_attr_gran.core_eff = eff;
+            __kmp_affinity.core_attr_gran.valid = 1;
+            scan = next;
+          }
+          if (!__kmp_affinity.core_attr_gran.valid) {
+            __kmp_places_syntax_error_fallback(name, place.type);
+            return;
+          }
+        }
+      }
       break;
     }
   }
@@ -3035,36 +3118,56 @@ static void __kmp_stg_parse_places(char const *name, char const *value,
         continue;
       if (__kmp_match_str(name, scan, &next)) {
         scan = next;
-        __kmp_affinity.type = affinity_compact;
-        __kmp_affinity.gran = type;
-        __kmp_affinity.flags.dups = FALSE;
+        __kmp_places_set(affinity_compact, type);
         set = true;
         break;
       }
     }
   }
+  // Implementation choices for OMP_PLACES based on core attributes
+  if (!set) {
+    if (__kmp_match_str("core_types", scan, &next)) {
+      scan = next;
+      if (*scan != '\0') {
+        KMP_WARNING(ParseExtraCharsWarn, name, scan);
+      }
+      __kmp_places_set(affinity_compact, KMP_HW_CORE);
+      __kmp_affinity.flags.core_types_gran = 1;
+      set = true;
+    } else if (__kmp_match_str("core_effs", scan, &next) ||
+               __kmp_match_str("core_efficiencies", scan, &next)) {
+      scan = next;
+      if (*scan != '\0') {
+        KMP_WARNING(ParseExtraCharsWarn, name, scan);
+      }
+      __kmp_places_set(affinity_compact, KMP_HW_CORE);
+      __kmp_affinity.flags.core_effs_gran = 1;
+      set = true;
+    }
+  }
+  // Explicit place list
   if (!set) {
     if (__kmp_affinity.proclist != NULL) {
       KMP_INTERNAL_FREE((void *)__kmp_affinity.proclist);
       __kmp_affinity.proclist = NULL;
     }
     if (__kmp_parse_place_list(name, value, &__kmp_affinity.proclist)) {
-      __kmp_affinity.type = affinity_explicit;
-      __kmp_affinity.gran = KMP_HW_THREAD;
-      __kmp_affinity.flags.dups = FALSE;
+      __kmp_places_set(affinity_explicit, KMP_HW_THREAD);
     } else {
       // Syntax error fallback
-      __kmp_affinity.type = affinity_compact;
-      __kmp_affinity.gran = KMP_HW_CORE;
-      __kmp_affinity.flags.dups = FALSE;
+      __kmp_places_syntax_error_fallback(name, KMP_HW_CORE);
     }
     if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_default) {
       __kmp_nested_proc_bind.bind_types[0] = proc_bind_true;
     }
     return;
   }
+
+  kmp_hw_t gran = __kmp_affinity.gran;
   if (__kmp_affinity.gran != KMP_HW_UNKNOWN) {
-    kind = __kmp_hw_get_keyword(__kmp_affinity.gran);
+    gran = __kmp_affinity.gran;
+  } else {
+    gran = KMP_HW_CORE;
   }
 
   if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_default) {
@@ -3078,7 +3181,7 @@ static void __kmp_stg_parse_places(char const *name, char const *value,
 
   // Parse option count parameter in parentheses
   if (*scan != '(') {
-    KMP_WARNING(SyntaxErrorUsing, name, kind);
+    __kmp_places_syntax_error_fallback(name, gran);
     return;
   }
   scan++; // skip '('
@@ -3092,7 +3195,7 @@ static void __kmp_stg_parse_places(char const *name, char const *value,
 
   SKIP_WS(scan);
   if (*scan != ')') {
-    KMP_WARNING(SyntaxErrorUsing, name, kind);
+    __kmp_places_syntax_error_fallback(name, gran);
     return;
   }
   scan++; // skip ')'
@@ -3135,12 +3238,37 @@ static void __kmp_stg_print_places(kmp_str_buf_t *buffer, char const *name,
       num = 0;
     }
     if (gran != KMP_HW_UNKNOWN) {
+      // If core_types or core_effs, just print and return
+      if (__kmp_affinity.flags.core_types_gran) {
+        __kmp_str_buf_print(buffer, "='%s'\n", "core_types");
+        return;
+      }
+      if (__kmp_affinity.flags.core_effs_gran) {
+        __kmp_str_buf_print(buffer, "='%s'\n", "core_effs");
+        return;
+      }
+
+      // threads, cores, sockets, cores:<attribute>, etc.
       const char *name = __kmp_hw_get_keyword(gran, true);
-      if (num > 0) {
-        __kmp_str_buf_print(buffer, "='%s(%d)'\n", name, num);
-      } else {
-        __kmp_str_buf_print(buffer, "='%s'\n", name);
+      __kmp_str_buf_print(buffer, "='%s", name);
+
+      // Add core attributes if it exists
+      if (__kmp_affinity.core_attr_gran.valid) {
+        kmp_hw_core_type_t ct =
+            (kmp_hw_core_type_t)__kmp_affinity.core_attr_gran.core_type;
+        int eff = __kmp_affinity.core_attr_gran.core_eff;
+        if (ct != KMP_HW_CORE_TYPE_UNKNOWN) {
+          const char *ct_name = __kmp_hw_get_core_type_keyword(ct);
+          __kmp_str_buf_print(buffer, ":%s", name, ct_name);
+        } else if (eff >= 0 && eff < KMP_HW_MAX_NUM_CORE_EFFS) {
+          __kmp_str_buf_print(buffer, ":eff%d", name, eff);
+        }
       }
+
+      // Add the '(#)' part if it exists
+      if (num > 0)
+        __kmp_str_buf_print(buffer, "(%d)", num);
+      __kmp_str_buf_print(buffer, "'\n");
     } else {
       __kmp_str_buf_print(buffer, ": %s\n", KMP_I18N_STR(NotDefined));
     }
@@ -5139,21 +5267,6 @@ static void __kmp_stg_parse_hw_subset(char const *name, char const *value,
   return;
 }
 
-static inline const char *
-__kmp_hw_get_core_type_keyword(kmp_hw_core_type_t type) {
-  switch (type) {
-  case KMP_HW_CORE_TYPE_UNKNOWN:
-    return "unknown";
-#if KMP_ARCH_X86 || KMP_ARCH_X86_64
-  case KMP_HW_CORE_TYPE_ATOM:
-    return "intel_atom";
-  case KMP_HW_CORE_TYPE_CORE:
-    return "intel_core";
-#endif
-  }
-  return "unknown";
-}
-
 static void __kmp_stg_print_hw_subset(kmp_str_buf_t *buffer, char const *name,
                                       void *data) {
   kmp_str_buf_t buf;

diff  --git a/openmp/runtime/src/z_Linux_util.cpp b/openmp/runtime/src/z_Linux_util.cpp
index 260b982af200e5..56022e19695d40 100644
--- a/openmp/runtime/src/z_Linux_util.cpp
+++ b/openmp/runtime/src/z_Linux_util.cpp
@@ -1242,6 +1242,7 @@ static void __kmp_atfork_child(void) {
     *affinity = KMP_AFFINITY_INIT(affinity->env_var);
   __kmp_affin_fullMask = nullptr;
   __kmp_affin_origMask = nullptr;
+  __kmp_topology = nullptr;
 #endif // KMP_AFFINITY_SUPPORTED
 
 #if KMP_USE_MONITOR

diff  --git a/openmp/runtime/test/affinity/omp-places-invalid-syntax.c b/openmp/runtime/test/affinity/omp-places-invalid-syntax.c
index 1157bb61d9f320..c2edcef38f7f8d 100644
--- a/openmp/runtime/test/affinity/omp-places-invalid-syntax.c
+++ b/openmp/runtime/test/affinity/omp-places-invalid-syntax.c
@@ -1,7 +1,20 @@
-// RUN: %libomp-compile && env KMP_SETTINGS=1 OMP_PLACES=invalid %libomp-run 2>&1 | FileCheck %s
-// CHECK-DAG: Effective settings
-// CHECK: OMP_PLACES=
-// CHECK-SAME: cores
+// RUN: %libomp-compile
+// RUN: env KMP_SETTINGS=1 OMP_PLACES=invalid %libomp-run 2>&1 | FileCheck --check-prefix=INVALID %s
+// RUN: env KMP_SETTINGS=1 OMP_PLACES='sockets(' %libomp-run 2>&1 | FileCheck --check-prefix=SOCKETS %s
+// RUN: env KMP_SETTINGS=1 OMP_PLACES='threads()' %libomp-run 2>&1 | FileCheck --check-prefix=THREADS %s
+//
+// INVALID-DAG: Effective settings
+// INVALID: OMP_PLACES=
+// INVALID-SAME: cores
+//
+// SOCKETS-DAG: Effective settings
+// SOCKETS: OMP_PLACES=
+// SOCKETS-SAME: sockets
+//
+// THREADS-DAG: Effective settings
+// THREADS: OMP_PLACES=
+// THREADS-SAME: threads
+//
 // REQUIRES: affinity
 
 #include "omp_testsuite.h"