[Openmp-commits] [openmp] 8ec9aa2 - [OpenMP] Add experimental nesting mode feature

Terry Wilmarth via Openmp-commits openmp-commits at lists.llvm.org
Fri Jun 4 14:01:24 PDT 2021


Author: Terry Wilmarth
Date: 2021-06-04T16:01:11-05:00
New Revision: 8ec9aa236e325fd4629cfeefac2919302e14d61a

URL: https://github.com/llvm/llvm-project/commit/8ec9aa236e325fd4629cfeefac2919302e14d61a
DIFF: https://github.com/llvm/llvm-project/commit/8ec9aa236e325fd4629cfeefac2919302e14d61a.diff

LOG: [OpenMP] Add experimental nesting mode feature

Nesting mode is a new experimental feature in the OpenMP
runtime. It allows a user to set up nesting for an application in a
way that corresponds to the hardware topology levels on the machine an
application is being run on.  For example, if a machine has 2 sockets,
each with 12 cores, then use of nesting mode could set up an outer
level of nesting that uses 2 threads per parallel region, and an inner
level of nesting that uses 12 threads per parallel region.

Nesting mode is controlled with the KMP_NESTING_MODE environment
variable as follows:

1) KMP_NESTING_MODE = 0: Nesting mode is off (default); max-active-levels-var
is set to 1 (the default -- nesting is off, nested parallel regions
are serialized).

2) KMP_NESTING_MODE = 1: Nesting mode is on, and a number of threads
will be assigned for each level discovered in the machine topology;
max-active-levels-var is set to the number of levels discovered.

3) KMP_NESTING_MODE = n, n>1: [Note: this option is experimental and may change
or be removed in the future.] Nesting mode is on, and a number of
threads will be assigned for each topology level discovered on the
machine, up to k<=n levels (since there may be fewer than n levels
discovered in the topology), and beyond the kth level, nested parallel
regions will be serialized; NOTE: max-active-levels-var is 1 (the default --
nesting is off, and nested parallel regions are serialized until the
user changes max-active-levels-var.

If the user sets OMP_NUM_THREADS or OMP_MAX_ACTIVE_LEVELS, they will
override KMP_NESTING_MODE settings for the associated environment
variables. The detected topology may be limited by an affinity mask
setting on the initial thread, or if the user sets KMP_HW_SUBSET. See
also: KMP_HOT_TEAMS_MAX_LEVEL for controlling use of hot teams for
nested parallel regions. Note that this feature only sets numbers of
threads used at nesting levels.  The user should make use of
OMP_PLACES and OMP_PROC_BIND or KMP_AFFINITY for affinitizing those
threads, if desired.

Differential Revision: https://reviews.llvm.org/D102188

Added: 
    

Modified: 
    openmp/runtime/src/kmp.h
    openmp/runtime/src/kmp_ftn_entry.h
    openmp/runtime/src/kmp_global.cpp
    openmp/runtime/src/kmp_runtime.cpp
    openmp/runtime/src/kmp_settings.cpp

Removed: 
    


################################################################################
diff  --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index 327ebbeae1e9a..9c23f159b9e73 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -4140,6 +4140,12 @@ int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid,
 #endif /* USE_ITT_BUILD */
                                kmp_int32 is_constrained);
 
+extern int __kmp_nesting_mode;
+extern int __kmp_nesting_mode_nlevels;
+extern int *__kmp_nesting_nth_level;
+extern void __kmp_init_nesting_mode();
+extern void __kmp_set_nesting_mode_threads();
+
 /// This class safely opens and closes a C-style FILE* object using RAII
 /// semantics. There are also methods which allow using stdout or stderr as
 /// the underlying FILE* object. With the implicit conversion operator to

diff  --git a/openmp/runtime/src/kmp_ftn_entry.h b/openmp/runtime/src/kmp_ftn_entry.h
index cdd2344b51f61..5d8398d3e7f85 100644
--- a/openmp/runtime/src/kmp_ftn_entry.h
+++ b/openmp/runtime/src/kmp_ftn_entry.h
@@ -695,6 +695,9 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_MAX_ACTIVE_LEVELS)(void) {
   return 0;
 #else
   /* TO DO: We want per-task implementation of this internal control */
+  if (!TCR_4(__kmp_init_middle)) {
+    __kmp_middle_initialize();
+  }
   return __kmp_get_max_active_levels(__kmp_entry_gtid());
 #endif
 }

diff  --git a/openmp/runtime/src/kmp_global.cpp b/openmp/runtime/src/kmp_global.cpp
index d7678dbb500af..24de14fe8c33c 100644
--- a/openmp/runtime/src/kmp_global.cpp
+++ b/openmp/runtime/src/kmp_global.cpp
@@ -548,4 +548,9 @@ kmp_target_offload_kind_t __kmp_target_offload = tgt_default;
 // OMP Pause Resources
 kmp_pause_status_t __kmp_pause_status = kmp_not_paused;
 
+// Nesting mode
+int __kmp_nesting_mode = 0;
+int __kmp_nesting_mode_nlevels = 1;
+int *__kmp_nesting_nth_level;
+
 // end of file //

diff  --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp
index 08a9611632870..c59f9960d09b6 100644
--- a/openmp/runtime/src/kmp_runtime.cpp
+++ b/openmp/runtime/src/kmp_runtime.cpp
@@ -6848,6 +6848,8 @@ static void __kmp_do_serial_initialize(void) {
   __kmp_global.g.g_dynamic = FALSE;
   __kmp_global.g.g_dynamic_mode = dynamic_default;
 
+  __kmp_init_nesting_mode();
+
   __kmp_env_initialize(NULL);
 
 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
@@ -7040,6 +7042,9 @@ static void __kmp_do_middle_initialize(void) {
     __kmp_dflt_team_nth = __kmp_sys_max_nth;
   }
 
+  if (__kmp_nesting_mode > 0)
+    __kmp_set_nesting_mode_threads();
+
   // There's no harm in continuing if the following check fails,
   // but it indicates an error in the previous logic.
   KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub);
@@ -8700,3 +8705,89 @@ void __kmp_hidden_helper_threads_initz_routine() {
 
   __kmp_hidden_helper_threads_deinitz_release();
 }
+
+/* Nesting Mode:
+   Set via KMP_NESTING_MODE, which takes an integer.
+   Note: we skip duplicate topology levels, and skip levels with only
+      one entity.
+   KMP_NESTING_MODE=0 is the default, and doesn't use nesting mode.
+   KMP_NESTING_MODE=1 sets as many nesting levels as there are distinct levels
+      in the topology, and initializes the number of threads at each of those
+      levels to the number of entities at each level, respectively, below the
+      entity at the parent level.
+   KMP_NESTING_MODE=N, where N>1, attempts to create up to N nesting levels,
+      but starts with nesting OFF -- max-active-levels-var is 1 -- and requires
+      the user to turn nesting on explicitly. This is an even more experimental
+      option to this experimental feature, and may change or go away in the
+      future.
+*/
+
+// Allocate space to store nesting levels
+void __kmp_init_nesting_mode() {
+  int levels = KMP_HW_LAST;
+  __kmp_nesting_mode_nlevels = levels;
+  __kmp_nesting_nth_level = (int *)KMP_INTERNAL_MALLOC(levels * sizeof(int));
+  for (int i = 0; i < levels; ++i)
+    __kmp_nesting_nth_level[i] = 0;
+  if (__kmp_nested_nth.size < levels) {
+    __kmp_nested_nth.nth =
+        (int *)KMP_INTERNAL_REALLOC(__kmp_nested_nth.nth, levels * sizeof(int));
+    __kmp_nested_nth.size = levels;
+  }
+}
+
+// Set # threads for top levels of nesting; must be called after topology set
+void __kmp_set_nesting_mode_threads() {
+  kmp_info_t *thread = __kmp_threads[__kmp_entry_gtid()];
+
+  if (__kmp_nesting_mode == 1)
+    __kmp_nesting_mode_nlevels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
+  else if (__kmp_nesting_mode > 1)
+    __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
+
+  if (__kmp_topology) { // use topology info
+    int loc, hw_level;
+    for (loc = 0, hw_level = 0; hw_level < __kmp_topology->get_depth() &&
+                                loc < __kmp_nesting_mode_nlevels;
+         loc++, hw_level++) {
+      __kmp_nesting_nth_level[loc] = __kmp_topology->get_ratio(hw_level);
+      if (__kmp_nesting_nth_level[loc] == 1)
+        loc--;
+    }
+    // Make sure all cores are used
+    if (__kmp_nesting_mode > 1 && loc > 1) {
+      int core_level = __kmp_topology->get_level(KMP_HW_CORE);
+      int num_cores = __kmp_topology->get_count(core_level);
+      int upper_levels = 1;
+      for (int level = 0; level < loc - 1; ++level)
+        upper_levels *= __kmp_nesting_nth_level[level];
+      if (upper_levels * __kmp_nesting_nth_level[loc - 1] < num_cores)
+        __kmp_nesting_nth_level[loc - 1] =
+            num_cores / __kmp_nesting_nth_level[loc - 2];
+    }
+    __kmp_nesting_mode_nlevels = loc;
+    __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
+  } else { // no topology info available; provide a reasonable guesstimation
+    if (__kmp_avail_proc >= 4) {
+      __kmp_nesting_nth_level[0] = __kmp_avail_proc / 2;
+      __kmp_nesting_nth_level[1] = 2;
+      __kmp_nesting_mode_nlevels = 2;
+    } else {
+      __kmp_nesting_nth_level[0] = __kmp_avail_proc;
+      __kmp_nesting_mode_nlevels = 1;
+    }
+    __kmp_nested_nth.used = __kmp_nesting_mode_nlevels;
+  }
+  for (int i = 0; i < __kmp_nesting_mode_nlevels; ++i) {
+    __kmp_nested_nth.nth[i] = __kmp_nesting_nth_level[i];
+  }
+  set__nproc(thread, __kmp_nesting_nth_level[0]);
+  if (__kmp_nesting_mode > 1 && __kmp_nesting_mode_nlevels > __kmp_nesting_mode)
+    __kmp_nesting_mode_nlevels = __kmp_nesting_mode;
+  if (get__max_active_levels(thread) > 1) {
+    // if max levels was set, set nesting mode levels to same
+    __kmp_nesting_mode_nlevels = get__max_active_levels(thread);
+  }
+  if (__kmp_nesting_mode == 1) // turn on nesting for this case only
+    set__max_active_levels(thread, __kmp_nesting_mode_nlevels);
+}

diff  --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp
index 7fd88dc3b9465..89799a7769854 100644
--- a/openmp/runtime/src/kmp_settings.cpp
+++ b/openmp/runtime/src/kmp_settings.cpp
@@ -1015,6 +1015,28 @@ static void __kmp_stg_print_warnings(kmp_str_buf_t *buffer, char const *name,
   __kmp_stg_print_bool(buffer, name, __kmp_generate_warnings);
 } // __kmp_stg_print_warnings
 
+// -----------------------------------------------------------------------------
+// KMP_NESTING_MODE
+
+static void __kmp_stg_parse_nesting_mode(char const *name, char const *value,
+                                         void *data) {
+  __kmp_stg_parse_int(name, value, 0, INT_MAX, &__kmp_nesting_mode);
+#if KMP_AFFINITY_SUPPORTED && KMP_USE_HWLOC
+  if (__kmp_nesting_mode > 0)
+    __kmp_affinity_top_method = affinity_top_method_hwloc;
+#endif
+} // __kmp_stg_parse_nesting_mode
+
+static void __kmp_stg_print_nesting_mode(kmp_str_buf_t *buffer,
+                                         char const *name, void *data) {
+  if (__kmp_env_format) {
+    KMP_STR_BUF_PRINT_NAME;
+  } else {
+    __kmp_str_buf_print(buffer, "   %s", name);
+  }
+  __kmp_str_buf_print(buffer, "=%d\n", __kmp_nesting_mode);
+} // __kmp_stg_print_nesting_mode
+
 // -----------------------------------------------------------------------------
 // OMP_NESTED, OMP_NUM_THREADS
 
@@ -5106,6 +5128,8 @@ static kmp_setting_t __kmp_stg_table[] = {
     {"KMP_WARNINGS", __kmp_stg_parse_warnings, __kmp_stg_print_warnings, NULL,
      0, 0},
 
+    {"KMP_NESTING_MODE", __kmp_stg_parse_nesting_mode,
+     __kmp_stg_print_nesting_mode, NULL, 0, 0},
     {"OMP_NESTED", __kmp_stg_parse_nested, __kmp_stg_print_nested, NULL, 0, 0},
     {"OMP_NUM_THREADS", __kmp_stg_parse_num_threads,
      __kmp_stg_print_num_threads, NULL, 0, 0},


        


More information about the Openmp-commits mailing list