[Openmp-commits] [openmp] 7a9643f - [OpenMP][libomp] Add hidden helper affinity

Jonathan Peyton via Openmp-commits openmp-commits at lists.llvm.org
Fri Oct 28 13:21:42 PDT 2022


Author: Jonathan Peyton
Date: 2022-10-28T15:21:07-05:00
New Revision: 7a9643fd2a07976576fd82b987d1eca924a747dd

URL: https://github.com/llvm/llvm-project/commit/7a9643fd2a07976576fd82b987d1eca924a747dd
DIFF: https://github.com/llvm/llvm-project/commit/7a9643fd2a07976576fd82b987d1eca924a747dd.diff

LOG: [OpenMP][libomp] Add hidden helper affinity

Add new hidden helper affinity via the environment variable,
KMP_HIDDEN_HELPER_AFFINITY, which allows users to assign thread
affinity to hidden helper threads using the same syntax as
KMP_AFFINITY. OMP_PLACES/OMP_PROC_BIND have no interaction with
KMP_HIDDEN_HELPER_AFFINITY.

Differential Revision: https://reviews.llvm.org/D135113

Added: 
    openmp/runtime/test/tasking/hidden_helper_task/affinity.cpp

Modified: 
    openmp/docs/design/Runtimes.rst
    openmp/runtime/src/kmp.h
    openmp/runtime/src/kmp_affinity.cpp
    openmp/runtime/src/kmp_affinity.h
    openmp/runtime/src/kmp_global.cpp
    openmp/runtime/src/kmp_runtime.cpp
    openmp/runtime/src/kmp_settings.cpp
    openmp/runtime/src/z_Linux_util.cpp

Removed: 
    


################################################################################
diff  --git a/openmp/docs/design/Runtimes.rst b/openmp/docs/design/Runtimes.rst
index 2bb6e8b9ae8e8..960e07f5cd4fb 100644
--- a/openmp/docs/design/Runtimes.rst
+++ b/openmp/docs/design/Runtimes.rst
@@ -374,6 +374,24 @@ The ``offset`` specifier indicates the starting position for thread assignment.
     across one socket, and ``granularity=socket`` the runtime will shift the
     granularity down to group since that is the largest granularity allowed by the OS.
 
+KMP_HIDDEN_HELPER_AFFINITY (Windows, Linux)
+"""""""""""""""""""""""""""""
+
+Enables run-time library to bind hidden helper threads to physical processing units.
+This environment variable has the same syntax and semantics as ``KMP_AFFINIY`` but only
+applies to the hidden helper team.
+
+You must set this environment variable before the first parallel region, or
+certain API calls including ``omp_get_max_threads()``, ``omp_get_num_procs()``
+and any affinity API calls.
+
+**Syntax:** Same as ``KMP_AFFINITY``
+
+The following ``modifiers`` are ignored in ``KMP_HIDDEN_HELPER_AFFINITY`` and are only valid
+for ``KMP_AFFINITY``:
+* ``respect`` and ``norespect``
+* ``reset`` and ``noreset``
+
 KMP_ALL_THREADS
 """""""""""""""
 

diff  --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index ec4f4f4b8f31f..6f7da88f66ac1 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -860,6 +860,8 @@ typedef struct kmp_affinity_t {
 
 extern enum affinity_top_method __kmp_affinity_top_method;
 extern kmp_affinity_t __kmp_affinity;
+extern kmp_affinity_t __kmp_hh_affinity;
+extern kmp_affinity_t *__kmp_affinities[2];
 
 extern void __kmp_affinity_bind_thread(int which);
 
@@ -4257,6 +4259,9 @@ extern void __kmp_hidden_helper_main_thread_release();
 #define KMP_HIDDEN_HELPER_WORKER_THREAD(gtid)                                  \
   ((gtid) > 1 && (gtid) <= __kmp_hidden_helper_threads_num)
 
+#define KMP_HIDDEN_HELPER_MAIN_THREAD(gtid)                                    \
+  ((gtid) == 1 && (gtid) <= __kmp_hidden_helper_threads_num)
+
 #define KMP_HIDDEN_HELPER_TEAM(team)                                           \
   (team->t.t_threads[0] == __kmp_hidden_helper_main_thread)
 

diff  --git a/openmp/runtime/src/kmp_affinity.cpp b/openmp/runtime/src/kmp_affinity.cpp
index 6c204ec628bdf..e9d0b99f6417e 100644
--- a/openmp/runtime/src/kmp_affinity.cpp
+++ b/openmp/runtime/src/kmp_affinity.cpp
@@ -174,9 +174,10 @@ int kmp_hw_thread_t::compare_compact(const void *a, const void *b) {
   const kmp_hw_thread_t *aa = (const kmp_hw_thread_t *)a;
   const kmp_hw_thread_t *bb = (const kmp_hw_thread_t *)b;
   int depth = __kmp_topology->get_depth();
-  KMP_DEBUG_ASSERT(__kmp_affinity.compact >= 0);
-  KMP_DEBUG_ASSERT(__kmp_affinity.compact <= depth);
-  for (i = 0; i < __kmp_affinity.compact; i++) {
+  int compact = __kmp_topology->compact;
+  KMP_DEBUG_ASSERT(compact >= 0);
+  KMP_DEBUG_ASSERT(compact <= depth);
+  for (i = 0; i < compact; i++) {
     int j = depth - i - 1;
     if (aa->sub_ids[j] < bb->sub_ids[j])
       return -1;
@@ -184,7 +185,7 @@ int kmp_hw_thread_t::compare_compact(const void *a, const void *b) {
       return 1;
   }
   for (; i < depth; i++) {
-    int j = i - __kmp_affinity.compact;
+    int j = i - compact;
     if (aa->sub_ids[j] < bb->sub_ids[j])
       return -1;
     if (aa->sub_ids[j] > bb->sub_ids[j])
@@ -583,6 +584,7 @@ kmp_topology_t *kmp_topology_t::allocate(int nproc, int ndepth,
   retval->count = arr + 2 * (size_t)KMP_HW_LAST;
   retval->num_core_efficiencies = 0;
   retval->num_core_types = 0;
+  retval->compact = 0;
   for (int i = 0; i < KMP_HW_MAX_NUM_CORE_TYPES; ++i)
     retval->core_types[i] = KMP_HW_CORE_TYPE_UNKNOWN;
   KMP_FOREACH_HW_TYPE(type) { retval->equivalent[type] = KMP_HW_UNKNOWN; }
@@ -4287,6 +4289,7 @@ static bool __kmp_aux_affinity_initialize_topology(kmp_affinity_t &affinity) {
 
 static void __kmp_aux_affinity_initialize(kmp_affinity_t &affinity) {
   bool is_regular_affinity = (&affinity == &__kmp_affinity);
+  bool is_hidden_helper_affinity = (&affinity == &__kmp_hh_affinity);
   const char *env_var = affinity.env_var;
 
   if (affinity.flags.initialized) {
@@ -4335,7 +4338,8 @@ static void __kmp_aux_affinity_initialize(kmp_affinity_t &affinity) {
 
   case affinity_explicit:
     KMP_DEBUG_ASSERT(affinity.proclist != NULL);
-    if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel) {
+    if (is_hidden_helper_affinity ||
+        __kmp_nested_proc_bind.bind_types[0] == proc_bind_intel) {
       __kmp_affinity_process_proclist(affinity);
     } else {
       __kmp_affinity_process_placelist(affinity);
@@ -4391,7 +4395,7 @@ static void __kmp_aux_affinity_initialize(kmp_affinity_t &affinity) {
     goto sortTopology;
 
   case affinity_balanced:
-    if (depth <= 1) {
+    if (depth <= 1 || is_hidden_helper_affinity) {
       KMP_AFF_WARNING(affinity, AffBalancedNotAvail, env_var);
       affinity.type = affinity_none;
       __kmp_create_affinity_none_places(affinity);
@@ -4451,7 +4455,8 @@ static void __kmp_aux_affinity_initialize(kmp_affinity_t &affinity) {
 
     if ((__kmp_nested_proc_bind.bind_types[0] != proc_bind_intel) &&
         (__kmp_affinity_num_places > 0) &&
-        ((unsigned)__kmp_affinity_num_places < affinity.num_masks)) {
+        ((unsigned)__kmp_affinity_num_places < affinity.num_masks) &&
+        !is_hidden_helper_affinity) {
       affinity.num_masks = __kmp_affinity_num_places;
     }
 
@@ -4459,7 +4464,7 @@ static void __kmp_aux_affinity_initialize(kmp_affinity_t &affinity) {
 
     // Sort the topology table according to the current setting of
     // affinity.compact, then fill out affinity.masks.
-    __kmp_topology->sort_compact();
+    __kmp_topology->sort_compact(affinity);
     {
       int i;
       unsigned j;
@@ -4510,8 +4515,7 @@ void __kmp_affinity_initialize(kmp_affinity_t &affinity) {
 }
 
 void __kmp_affinity_uninitialize(void) {
-  {
-    kmp_affinity_t *affinity = &__kmp_affinity;
+  for (kmp_affinity_t *affinity : __kmp_affinities) {
     if (affinity->masks != NULL)
       KMP_CPU_FREE_ARRAY(affinity->masks, affinity->num_masks);
     if (affinity->os_id_masks != NULL)
@@ -4546,6 +4550,21 @@ void __kmp_affinity_uninitialize(void) {
   KMPAffinity::destroy_api();
 }
 
+static void __kmp_select_mask_by_gtid(int gtid, const kmp_affinity_t *affinity,
+                                      int *place, kmp_affin_mask_t **mask) {
+  int mask_idx;
+  bool is_hidden_helper = KMP_HIDDEN_HELPER_THREAD(gtid);
+  if (is_hidden_helper)
+    // The first gtid is the regular primary thread, the second gtid is the main
+    // thread of hidden team which does not participate in task execution.
+    mask_idx = gtid - 2;
+  else
+    mask_idx = __kmp_adjust_gtid_for_hidden_helpers(gtid);
+  KMP_DEBUG_ASSERT(affinity->num_masks > 0);
+  *place = (mask_idx + affinity->offset) % affinity->num_masks;
+  *mask = KMP_CPU_INDEX(affinity->masks, *place);
+}
+
 void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
   if (!KMP_AFFINITY_CAPABLE()) {
     return;
@@ -4565,13 +4584,20 @@ void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
   // same as the mask of the initialization thread.
   kmp_affin_mask_t *mask;
   int i;
-  const kmp_affinity_t *affinity = &__kmp_affinity;
-  const char *env_var = affinity->env_var;
+  const kmp_affinity_t *affinity;
+  const char *env_var;
+  bool is_hidden_helper = KMP_HIDDEN_HELPER_THREAD(gtid);
 
-  if (KMP_AFFINITY_NON_PROC_BIND) {
+  if (is_hidden_helper)
+    affinity = &__kmp_hh_affinity;
+  else
+    affinity = &__kmp_affinity;
+  env_var = affinity->env_var;
+
+  if (KMP_AFFINITY_NON_PROC_BIND || is_hidden_helper) {
     if ((affinity->type == affinity_none) ||
         (affinity->type == affinity_balanced) ||
-        KMP_HIDDEN_HELPER_THREAD(gtid)) {
+        KMP_HIDDEN_HELPER_MAIN_THREAD(gtid)) {
 #if KMP_GROUP_AFFINITY
       if (__kmp_num_proc_groups > 1) {
         return;
@@ -4581,14 +4607,10 @@ void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
       i = 0;
       mask = __kmp_affin_fullMask;
     } else {
-      int mask_idx = __kmp_adjust_gtid_for_hidden_helpers(gtid);
-      KMP_DEBUG_ASSERT(affinity->num_masks > 0);
-      i = (mask_idx + affinity->offset) % affinity->num_masks;
-      mask = KMP_CPU_INDEX(affinity->masks, i);
+      __kmp_select_mask_by_gtid(gtid, affinity, &i, &mask);
     }
   } else {
-    if ((!isa_root) || KMP_HIDDEN_HELPER_THREAD(gtid) ||
-        (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
+    if (!isa_root || __kmp_nested_proc_bind.bind_types[0] == proc_bind_false) {
 #if KMP_GROUP_AFFINITY
       if (__kmp_num_proc_groups > 1) {
         return;
@@ -4598,17 +4620,12 @@ void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
       i = KMP_PLACE_ALL;
       mask = __kmp_affin_fullMask;
     } else {
-      // int i = some hash function or just a counter that doesn't
-      // always start at 0.  Use adjusted gtid for now.
-      int mask_idx = __kmp_adjust_gtid_for_hidden_helpers(gtid);
-      KMP_DEBUG_ASSERT(affinity->num_masks > 0);
-      i = (mask_idx + affinity->offset) % affinity->num_masks;
-      mask = KMP_CPU_INDEX(affinity->masks, i);
+      __kmp_select_mask_by_gtid(gtid, affinity, &i, &mask);
     }
   }
 
   th->th.th_current_place = i;
-  if (isa_root || KMP_HIDDEN_HELPER_THREAD(gtid)) {
+  if (isa_root && !is_hidden_helper) {
     th->th.th_new_place = i;
     th->th.th_first_place = 0;
     th->th.th_last_place = affinity->num_masks - 1;
@@ -4629,10 +4646,11 @@ void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
 
   KMP_CPU_COPY(th->th.th_affin_mask, mask);
 
-  if (affinity->flags.verbose && !KMP_HIDDEN_HELPER_THREAD(gtid)
-      /* to avoid duplicate printing (will be correctly printed on barrier) */
-      && (affinity->type == affinity_none ||
-          (i != KMP_PLACE_ALL && affinity->type != affinity_balanced))) {
+  /* to avoid duplicate printing (will be correctly printed on barrier) */
+  if (affinity->flags.verbose &&
+      (affinity->type == affinity_none ||
+       (i != KMP_PLACE_ALL && affinity->type != affinity_balanced)) &&
+      !KMP_HIDDEN_HELPER_MAIN_THREAD(gtid)) {
     char buf[KMP_AFFIN_MASK_PRINT_LEN];
     __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
                               th->th.th_affin_mask);
@@ -4640,17 +4658,6 @@ void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
                gtid, buf);
   }
 
-#if KMP_DEBUG
-  // Hidden helper thread affinity only printed for debug builds
-  if (affinity->flags.verbose && KMP_HIDDEN_HELPER_THREAD(gtid)) {
-    char buf[KMP_AFFIN_MASK_PRINT_LEN];
-    __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
-                              th->th.th_affin_mask);
-    KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY (hidden helper thread)",
-               (kmp_int32)getpid(), __kmp_gettid(), gtid, buf);
-  }
-#endif
-
 #if KMP_OS_WINDOWS
   // On Windows* OS, the process affinity mask might have changed. If the user
   // didn't request affinity and this call fails, just continue silently.
@@ -4663,7 +4670,8 @@ void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
 }
 
 void __kmp_affinity_set_place(int gtid) {
-  if (!KMP_AFFINITY_CAPABLE()) {
+  // Hidden helper threads should not be affected by OMP_PLACES/OMP_PROC_BIND
+  if (!KMP_AFFINITY_CAPABLE() || KMP_HIDDEN_HELPER_THREAD(gtid)) {
     return;
   }
 

diff  --git a/openmp/runtime/src/kmp_affinity.h b/openmp/runtime/src/kmp_affinity.h
index e1305f406b0df..42de135362ec9 100644
--- a/openmp/runtime/src/kmp_affinity.h
+++ b/openmp/runtime/src/kmp_affinity.h
@@ -724,6 +724,9 @@ class kmp_topology_t {
   // Flags describing the topology
   flags_t flags;
 
+  // Compact value used during sort_compact()
+  int compact;
+
   // Insert a new topology layer after allocation
   void _insert_layer(kmp_hw_t type, const int *ids);
 
@@ -866,7 +869,9 @@ class kmp_topology_t {
   }
 
 #if KMP_AFFINITY_SUPPORTED
-  void sort_compact() {
+  friend int kmp_hw_thread_t::compare_compact(const void *a, const void *b);
+  void sort_compact(kmp_affinity_t &affinity) {
+    compact = affinity.compact;
     qsort(hw_threads, num_hw_threads, sizeof(kmp_hw_thread_t),
           kmp_hw_thread_t::compare_compact);
   }

diff  --git a/openmp/runtime/src/kmp_global.cpp b/openmp/runtime/src/kmp_global.cpp
index 8b41bfde339b3..7b94164f6dbb0 100644
--- a/openmp/runtime/src/kmp_global.cpp
+++ b/openmp/runtime/src/kmp_global.cpp
@@ -274,6 +274,10 @@ enum affinity_top_method __kmp_affinity_top_method =
 
 // Regular thread affinity settings from KMP_AFFINITY
 kmp_affinity_t __kmp_affinity = KMP_AFFINITY_INIT("KMP_AFFINITY");
+// Hidden helper thread affinity settings from KMP_HIDDEN_HELPER_AFFINITY
+kmp_affinity_t __kmp_hh_affinity =
+    KMP_AFFINITY_INIT("KMP_HIDDEN_HELPER_AFFINITY");
+kmp_affinity_t *__kmp_affinities[] = {&__kmp_affinity, &__kmp_hh_affinity};
 
 char *__kmp_cpuinfo_file = NULL;
 

diff  --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp
index a1051c752e312..0e9c9a697ec0a 100644
--- a/openmp/runtime/src/kmp_runtime.cpp
+++ b/openmp/runtime/src/kmp_runtime.cpp
@@ -7467,6 +7467,14 @@ void __kmp_hidden_helper_initialize() {
     return;
   }
 
+#if KMP_AFFINITY_SUPPORTED
+  // Initialize hidden helper affinity settings.
+  // The above __kmp_parallel_initialize() will initialize
+  // regular affinity (and topology) if not already done.
+  if (!__kmp_hh_affinity.flags.initialized)
+    __kmp_affinity_initialize(__kmp_hh_affinity);
+#endif
+
   // Set the count of hidden helper tasks to be executed to zero
   KMP_ATOMIC_ST_REL(&__kmp_unexecuted_hidden_helper_tasks, 0);
 

diff  --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp
index 7de6871c6eb10..080f4015b6e06 100644
--- a/openmp/runtime/src/kmp_settings.cpp
+++ b/openmp/runtime/src/kmp_settings.cpp
@@ -1247,7 +1247,7 @@ static void __kmp_stg_parse_num_hidden_helper_threads(char const *name,
   if (__kmp_hidden_helper_threads_num == 0) {
     __kmp_enable_hidden_helper = FALSE;
   } else {
-    // Since the main thread of hidden helper team dooes not participate
+    // Since the main thread of hidden helper team does not participate
     // in tasks execution let's increment the number of threads by one
     // so that requested number of threads do actual job.
     __kmp_hidden_helper_threads_num++;
@@ -2542,9 +2542,21 @@ static void __kmp_stg_parse_affinity(char const *name, char const *value,
   __kmp_parse_affinity_env(name, value, &__kmp_affinity);
 
 } // __kmp_stg_parse_affinity
+static void __kmp_stg_parse_hh_affinity(char const *name, char const *value,
+                                        void *data) {
+  __kmp_parse_affinity_env(name, value, &__kmp_hh_affinity);
+  // Warn about unused parts of hidden helper affinity settings if specified.
+  if (__kmp_hh_affinity.flags.reset) {
+    KMP_WARNING(AffInvalidParam, name, "reset");
+  }
+  if (__kmp_hh_affinity.flags.respect != affinity_respect_mask_default) {
+    KMP_WARNING(AffInvalidParam, name, "respect");
+  }
+}
 
 static void __kmp_print_affinity_env(kmp_str_buf_t *buffer, char const *name,
                                      const kmp_affinity_t &affinity) {
+  bool is_hh_affinity = (&affinity == &__kmp_hh_affinity);
   if (__kmp_env_format) {
     KMP_STR_BUF_PRINT_NAME_EX(name);
   } else {
@@ -2561,15 +2573,19 @@ static void __kmp_print_affinity_env(kmp_str_buf_t *buffer, char const *name,
     __kmp_str_buf_print(buffer, "%s,", "nowarnings");
   }
   if (KMP_AFFINITY_CAPABLE()) {
-    if (affinity.flags.respect) {
-      __kmp_str_buf_print(buffer, "%s,", "respect");
-    } else {
-      __kmp_str_buf_print(buffer, "%s,", "norespect");
-    }
-    if (affinity.flags.reset) {
-      __kmp_str_buf_print(buffer, "%s,", "reset");
-    } else {
-      __kmp_str_buf_print(buffer, "%s,", "noreset");
+    // Hidden helper affinity does not affect global reset
+    // or respect flags. That is still solely controlled by KMP_AFFINITY.
+    if (!is_hh_affinity) {
+      if (affinity.flags.respect) {
+        __kmp_str_buf_print(buffer, "%s,", "respect");
+      } else {
+        __kmp_str_buf_print(buffer, "%s,", "norespect");
+      }
+      if (affinity.flags.reset) {
+        __kmp_str_buf_print(buffer, "%s,", "reset");
+      } else {
+        __kmp_str_buf_print(buffer, "%s,", "noreset");
+      }
     }
     __kmp_str_buf_print(buffer, "granularity=%s,",
                         __kmp_hw_get_keyword(affinity.gran, false));
@@ -2620,6 +2636,10 @@ static void __kmp_stg_print_affinity(kmp_str_buf_t *buffer, char const *name,
                                      void *data) {
   __kmp_print_affinity_env(buffer, name, __kmp_affinity);
 }
+static void __kmp_stg_print_hh_affinity(kmp_str_buf_t *buffer, char const *name,
+                                        void *data) {
+  __kmp_print_affinity_env(buffer, name, __kmp_hh_affinity);
+}
 
 #ifdef KMP_GOMP_COMPAT
 
@@ -5472,6 +5492,8 @@ static kmp_setting_t __kmp_stg_table[] = {
 #if KMP_AFFINITY_SUPPORTED
     {"KMP_AFFINITY", __kmp_stg_parse_affinity, __kmp_stg_print_affinity, NULL,
      0, 0},
+    {"KMP_HIDDEN_HELPER_AFFINITY", __kmp_stg_parse_hh_affinity,
+     __kmp_stg_print_hh_affinity, NULL, 0, 0},
 #ifdef KMP_GOMP_COMPAT
     {"GOMP_CPU_AFFINITY", __kmp_stg_parse_gomp_cpu_affinity, NULL,
      /* no print */ NULL, 0, 0},
@@ -6199,10 +6221,14 @@ void __kmp_env_initialize(char const *string) {
           __kmp_affinity.type = affinity_compact;
           __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel;
         }
+        if (__kmp_hh_affinity.type == affinity_default)
+          __kmp_hh_affinity.type = affinity_compact;
         if (__kmp_affinity_top_method == affinity_top_method_default)
           __kmp_affinity_top_method = affinity_top_method_all;
         if (__kmp_affinity.gran == KMP_HW_UNKNOWN)
           __kmp_affinity.gran = KMP_HW_PROC_GROUP;
+        if (__kmp_hh_affinity.gran == KMP_HW_UNKNOWN)
+          __kmp_hh_affinity.gran = KMP_HW_PROC_GROUP;
       } else
 
 #endif /* KMP_GROUP_AFFINITY */
@@ -6242,6 +6268,8 @@ void __kmp_env_initialize(char const *string) {
             __kmp_affinity.type = affinity_none;
           }
         }
+        if (__kmp_hh_affinity.type == affinity_default)
+          __kmp_hh_affinity.type = affinity_none;
         if ((__kmp_affinity.gran == KMP_HW_UNKNOWN) &&
             (__kmp_affinity.gran_levels < 0)) {
 #if KMP_MIC_SUPPORTED
@@ -6253,6 +6281,17 @@ void __kmp_env_initialize(char const *string) {
             __kmp_affinity.gran = KMP_HW_CORE;
           }
         }
+        if ((__kmp_hh_affinity.gran == KMP_HW_UNKNOWN) &&
+            (__kmp_hh_affinity.gran_levels < 0)) {
+#if KMP_MIC_SUPPORTED
+          if (__kmp_mic_type != non_mic) {
+            __kmp_hh_affinity.gran = KMP_HW_THREAD;
+          } else
+#endif
+          {
+            __kmp_hh_affinity.gran = KMP_HW_CORE;
+          }
+        }
         if (__kmp_affinity_top_method == affinity_top_method_default) {
           __kmp_affinity_top_method = affinity_top_method_all;
         }
@@ -6260,7 +6299,8 @@ void __kmp_env_initialize(char const *string) {
     }
 
 #ifdef KMP_DEBUG
-    __kmp_print_affinity_settings(&__kmp_affinity);
+    for (const kmp_affinity_t *affinity : __kmp_affinities)
+      __kmp_print_affinity_settings(affinity);
     KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.bind_types[0] != proc_bind_default);
     K_DIAG(1, ("__kmp_nested_proc_bind.bind_types[0] == %d\n",
                __kmp_nested_proc_bind.bind_types[0]));

diff  --git a/openmp/runtime/src/z_Linux_util.cpp b/openmp/runtime/src/z_Linux_util.cpp
index 7b027a238d635..21771e6ef4bcf 100644
--- a/openmp/runtime/src/z_Linux_util.cpp
+++ b/openmp/runtime/src/z_Linux_util.cpp
@@ -1231,7 +1231,8 @@ static void __kmp_atfork_child(void) {
   if (__kmp_nested_proc_bind.bind_types != NULL) {
     __kmp_nested_proc_bind.bind_types[0] = proc_bind_false;
   }
-  __kmp_affinity = KMP_AFFINITY_INIT("KMP_AFFINITY");
+  for (kmp_affinity_t *affinity : __kmp_affinities)
+    *affinity = KMP_AFFINITY_INIT(affinity->env_var);
   __kmp_affin_fullMask = nullptr;
   __kmp_affin_origMask = nullptr;
 #endif // KMP_AFFINITY_SUPPORTED

diff  --git a/openmp/runtime/test/tasking/hidden_helper_task/affinity.cpp b/openmp/runtime/test/tasking/hidden_helper_task/affinity.cpp
new file mode 100644
index 0000000000000..4816a01922208
--- /dev/null
+++ b/openmp/runtime/test/tasking/hidden_helper_task/affinity.cpp
@@ -0,0 +1,120 @@
+// RUN: %libomp-cxx-compile
+// RUN: env LIBOMP_USE_HIDDEN_HELPER_TASK=1 LIBOMP_NUM_HIDDEN_HELPER_THREADS=8 \
+// RUN:     KMP_HIDDEN_HELPER_AFFINITY=verbose,granularity=socket,compact %libomp-run 2>&1 | FileCheck --check-prefix=SOCKET %s
+// RUN: env LIBOMP_USE_HIDDEN_HELPER_TASK=1 LIBOMP_NUM_HIDDEN_HELPER_THREADS=8 \
+// RUN:     KMP_HIDDEN_HELPER_AFFINITY=verbose,granularity=core,scatter %libomp-run 2>&1 | FileCheck --check-prefix=CORE %s
+// RUN: env LIBOMP_USE_HIDDEN_HELPER_TASK=1 LIBOMP_NUM_HIDDEN_HELPER_THREADS=8 \
+// RUN:     KMP_HIDDEN_HELPER_AFFINITY='verbose,granularity=fine,explicit,proclist=[0,1]' %libomp-run 2>&1 | FileCheck --check-prefix=FINE %s
+// RUN: env LIBOMP_USE_HIDDEN_HELPER_TASK=1 LIBOMP_NUM_HIDDEN_HELPER_THREADS=8 \
+// RUN:     KMP_HIDDEN_HELPER_AFFINITY=verbose,granularity=socket,compact KMP_AFFINITY=compact,granularity=fine %libomp-run 2>&1 | FileCheck --check-prefix=SOCKET %s
+// RUN: env LIBOMP_USE_HIDDEN_HELPER_TASK=1 LIBOMP_NUM_HIDDEN_HELPER_THREADS=8 \
+// RUN:     KMP_HIDDEN_HELPER_AFFINITY=verbose,granularity=core,scatter KMP_AFFINITY=compact,granularity=socket %libomp-run 2>&1 | FileCheck --check-prefix=CORE %s
+// RUN: env LIBOMP_USE_HIDDEN_HELPER_TASK=1 LIBOMP_NUM_HIDDEN_HELPER_THREADS=8 \
+// RUN:     KMP_HIDDEN_HELPER_AFFINITY='verbose,granularity=fine,explicit,proclist=[0,1]' KMP_AFFINITY=compact,granularity=core %libomp-run 2>&1 | FileCheck --check-prefix=FINE %s
+// RUN: env LIBOMP_USE_HIDDEN_HELPER_TASK=1 LIBOMP_NUM_HIDDEN_HELPER_THREADS=8 \
+// RUN:     KMP_HIDDEN_HELPER_AFFINITY=verbose,granularity=socket,compact OMP_PROC_BIND=close OMP_PLACES=threads %libomp-run 2>&1 | FileCheck --check-prefix=SOCKET %s
+// RUN: env LIBOMP_USE_HIDDEN_HELPER_TASK=1 LIBOMP_NUM_HIDDEN_HELPER_THREADS=8 \
+// RUN:     KMP_HIDDEN_HELPER_AFFINITY=verbose,granularity=core,scatter OMP_PROC_BIND=close OMP_PLACES=sockets %libomp-run 2>&1 | FileCheck --check-prefix=CORE %s
+// RUN: env LIBOMP_USE_HIDDEN_HELPER_TASK=1 LIBOMP_NUM_HIDDEN_HELPER_THREADS=8 \
+// RUN:     KMP_HIDDEN_HELPER_AFFINITY='verbose,granularity=fine,explicit,proclist=[0,1]' OMP_PROC_BIND=cores OMP_PLACES=cores %libomp-run 2>&1 | FileCheck --check-prefix=FINE %s
+
+/*
+ * This test aims to check hidden helper affinity
+ *
+ * #pragma omp parallel for
+ * for (int i = 0; i < N; ++i) {
+ *   int data1 = 0, data2 = 0;
+ * #pragma omp taskgroup
+ *   {
+ * #pragma omp hidden helper task shared(data1)
+ *    {
+ *      data1 = 1;
+ *    }
+ * #pragma omp hidden helper task shared(data2)
+ *    {
+ *      data2 = 2;
+ *    }
+ *   }
+ *   assert(data1 == 1);
+ *   assert(data2 == 2);
+ * }
+ */
+
+#include "common.h"
+
+extern "C" {
+struct kmp_task_t_with_privates {
+  kmp_task_t task;
+};
+
+struct anon {
+  int32_t *data;
+};
+}
+
+template <int I>
+kmp_int32 omp_task_entry(kmp_int32 gtid, kmp_task_t_with_privates *task) {
+  auto shareds = reinterpret_cast<anon *>(task->task.shareds);
+  auto p = shareds->data;
+  *p = I;
+  return 0;
+}
+
+int main(int argc, char *argv[]) {
+  constexpr const int N = 16;
+#pragma omp parallel for
+  for (int i = 0; i < N; ++i) {
+    int32_t gtid = __kmpc_global_thread_num(nullptr);
+    int32_t data1 = 0;
+    __kmpc_taskgroup(nullptr, gtid);
+
+    auto task1 = __kmpc_omp_target_task_alloc(
+        nullptr, gtid, 1, sizeof(kmp_task_t_with_privates), sizeof(anon),
+        reinterpret_cast<kmp_routine_entry_t>(omp_task_entry<1>), -1);
+    auto shareds = reinterpret_cast<anon *>(task1->shareds);
+    shareds->data = &data1;
+    __kmpc_omp_task(nullptr, gtid, task1);
+
+    __kmpc_end_taskgroup(nullptr, gtid);
+
+    assert(data1 == 1);
+  }
+
+  std::cout << "PASS\n";
+  return 0;
+}
+
+// SOCKET: OMP: Info #{{[0-9]+}}: KMP_HIDDEN_HELPER_AFFINITY: Threads may migrate across
+// SOCKET-NOT: OMP: Info #{{[0-9]+}}: KMP_HIDDEN_HELPER_AFFINITY: pid {{[0-9]+}} tid {{[0-9]+}} thread 1 bound to OS proc set
+// SOCKET-DAG: OMP: Info #[[NUM:[0-9]+]]: KMP_HIDDEN_HELPER_AFFINITY: pid [[PID:[0-9]+]] tid {{[0-9]+}} thread 2 bound to OS proc set
+// SOCKET-DAG: OMP: Info #[[NUM]]: KMP_HIDDEN_HELPER_AFFINITY: pid [[PID]] tid {{[0-9]+}} thread 3 bound to OS proc set
+// SOCKET-DAG: OMP: Info #[[NUM]]: KMP_HIDDEN_HELPER_AFFINITY: pid [[PID]] tid {{[0-9]+}} thread 4 bound to OS proc set
+// SOCKET-DAG: OMP: Info #[[NUM]]: KMP_HIDDEN_HELPER_AFFINITY: pid [[PID]] tid {{[0-9]+}} thread 5 bound to OS proc set
+// SOCKET-DAG: OMP: Info #[[NUM]]: KMP_HIDDEN_HELPER_AFFINITY: pid [[PID]] tid {{[0-9]+}} thread 6 bound to OS proc set
+// SOCKET-DAG: OMP: Info #[[NUM]]: KMP_HIDDEN_HELPER_AFFINITY: pid [[PID]] tid {{[0-9]+}} thread 7 bound to OS proc set
+// SOCKET-DAG: OMP: Info #[[NUM]]: KMP_HIDDEN_HELPER_AFFINITY: pid [[PID]] tid {{[0-9]+}} thread 8 bound to OS proc set
+// SOCKET-DAG: OMP: Info #[[NUM]]: KMP_HIDDEN_HELPER_AFFINITY: pid [[PID]] tid {{[0-9]+}} thread 9 bound to OS proc set
+
+// CORE: OMP: Info #{{[0-9]+}}: KMP_HIDDEN_HELPER_AFFINITY: Threads may migrate across
+// CORE-NOT: OMP: Info #{{[0-9]+}}: KMP_HIDDEN_HELPER_AFFINITY: pid {{[0-9]+}} tid {{[0-9]+}} thread 1 bound to OS proc set
+// CORE-DAG: OMP: Info #[[NUM:[0-9]+]]: KMP_HIDDEN_HELPER_AFFINITY: pid [[PID:[0-9]+]] tid {{[0-9]+}} thread 2 bound to OS proc set
+// CORE-DAG: OMP: Info #[[NUM]]: KMP_HIDDEN_HELPER_AFFINITY: pid [[PID]] tid {{[0-9]+}} thread 3 bound to OS proc set
+// CORE-DAG: OMP: Info #[[NUM]]: KMP_HIDDEN_HELPER_AFFINITY: pid [[PID]] tid {{[0-9]+}} thread 4 bound to OS proc set
+// CORE-DAG: OMP: Info #[[NUM]]: KMP_HIDDEN_HELPER_AFFINITY: pid [[PID]] tid {{[0-9]+}} thread 5 bound to OS proc set
+// CORE-DAG: OMP: Info #[[NUM]]: KMP_HIDDEN_HELPER_AFFINITY: pid [[PID]] tid {{[0-9]+}} thread 6 bound to OS proc set
+// CORE-DAG: OMP: Info #[[NUM]]: KMP_HIDDEN_HELPER_AFFINITY: pid [[PID]] tid {{[0-9]+}} thread 7 bound to OS proc set
+// CORE-DAG: OMP: Info #[[NUM]]: KMP_HIDDEN_HELPER_AFFINITY: pid [[PID]] tid {{[0-9]+}} thread 8 bound to OS proc set
+// CORE-DAG: OMP: Info #[[NUM]]: KMP_HIDDEN_HELPER_AFFINITY: pid [[PID]] tid {{[0-9]+}} thread 9 bound to OS proc set
+
+// FINE-NOT: OMP: Info #{{[0-9]+}}: KMP_HIDDEN_HELPER_AFFINITY: Threads may migrate across
+// FINE-NOT: OMP: Info #{{[0-9]+}}: KMP_HIDDEN_HELPER_AFFINITY: pid {{[0-9]+}} tid {{[0-9]+}} thread 1 bound to OS proc set
+// FINE-DAG: OMP: Info #[[NUM:[0-9]+]]: KMP_HIDDEN_HELPER_AFFINITY: pid [[PID:[0-9]+]] tid {{[0-9]+}} thread 2 bound to OS proc set
+// FINE-DAG: OMP: Info #[[NUM]]: KMP_HIDDEN_HELPER_AFFINITY: pid [[PID]] tid {{[0-9]+}} thread 3 bound to OS proc set
+// FINE-DAG: OMP: Info #[[NUM]]: KMP_HIDDEN_HELPER_AFFINITY: pid [[PID]] tid {{[0-9]+}} thread 4 bound to OS proc set
+// FINE-DAG: OMP: Info #[[NUM]]: KMP_HIDDEN_HELPER_AFFINITY: pid [[PID]] tid {{[0-9]+}} thread 5 bound to OS proc set
+// FINE-DAG: OMP: Info #[[NUM]]: KMP_HIDDEN_HELPER_AFFINITY: pid [[PID]] tid {{[0-9]+}} thread 6 bound to OS proc set
+// FINE-DAG: OMP: Info #[[NUM]]: KMP_HIDDEN_HELPER_AFFINITY: pid [[PID]] tid {{[0-9]+}} thread 7 bound to OS proc set
+// FINE-DAG: OMP: Info #[[NUM]]: KMP_HIDDEN_HELPER_AFFINITY: pid [[PID]] tid {{[0-9]+}} thread 8 bound to OS proc set
+// FINE-DAG: OMP: Info #[[NUM]]: KMP_HIDDEN_HELPER_AFFINITY: pid [[PID]] tid {{[0-9]+}} thread 9 bound to OS proc set
+
+// End of file


        


More information about the Openmp-commits mailing list