[Openmp-commits] [openmp] r309319 - Fix implementation of OMP_THREAD_LIMIT

Jonathan Peyton via Openmp-commits openmp-commits at lists.llvm.org
Thu Jul 27 13:58:41 PDT 2017


Author: jlpeyton
Date: Thu Jul 27 13:58:41 2017
New Revision: 309319

URL: http://llvm.org/viewvc/llvm-project?rev=309319&view=rev
Log:
Fix implementation of OMP_THREAD_LIMIT

This change fixes the implementation of OMP_THREAD_LIMIT. The implementation of
this previously was not restricted to a contention group (but it should be,
according to the spec), and this is fixed here. A field is added to root thread
to store a counter of the threads in the contention group. An extra check is
added when reserving threads for a parallel region that checks this variable and
compares to threadlimit-var, which is implemented as a new global variable,
kmp_cg_max_nth. Associated settings changes were also made, and clean up of
comments that referred to OMP_THREAD_LIMIT, but should refer to the new
KMP_DEVICE_THREAD_LIMIT (added in an earlier patch).

Patch by Terry Wilmarth

Differential Revision: https://reviews.llvm.org/D35912

Added:
    openmp/trunk/runtime/test/env/omp_thread_limit.c
Modified:
    openmp/trunk/runtime/src/i18n/en_US.txt
    openmp/trunk/runtime/src/kmp.h
    openmp/trunk/runtime/src/kmp_ftn_entry.h
    openmp/trunk/runtime/src/kmp_global.cpp
    openmp/trunk/runtime/src/kmp_runtime.cpp
    openmp/trunk/runtime/src/kmp_settings.cpp

Modified: openmp/trunk/runtime/src/i18n/en_US.txt
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/i18n/en_US.txt?rev=309319&r1=309318&r2=309319&view=diff
==============================================================================
--- openmp/trunk/runtime/src/i18n/en_US.txt (original)
+++ openmp/trunk/runtime/src/i18n/en_US.txt Thu Jul 27 13:58:41 2017
@@ -433,7 +433,7 @@ SubmitBugReport              "Please sub
 OBSOLETE                     "Check NLSPATH environment variable, its value is \"%1$s\"."
 ChangeStackLimit             "Please try changing the shell stack limit or adjusting the "
                              "OMP_STACKSIZE environment variable."
-Unset_ALL_THREADS            "Consider unsetting KMP_ALL_THREADS and OMP_THREAD_LIMIT (if either is set)."
+Unset_ALL_THREADS            "Consider unsetting KMP_DEVICE_THREAD_LIMIT (KMP_ALL_THREADS) and OMP_THREAD_LIMIT (if either is set)."
 Set_ALL_THREADPRIVATE        "Consider setting KMP_ALL_THREADPRIVATE to a value larger than %1$d."
 PossibleSystemLimitOnThreads "This could also be due to a system-related limit on the number of threads."
 DuplicateLibrary             "This means that multiple copies of the OpenMP runtime have been "

Modified: openmp/trunk/runtime/src/kmp.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp.h?rev=309319&r1=309318&r2=309319&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp.h (original)
+++ openmp/trunk/runtime/src/kmp.h Thu Jul 27 13:58:41 2017
@@ -2689,6 +2689,7 @@ typedef struct kmp_base_root {
   kmp_lock_t r_begin_lock;
   volatile int r_begin;
   int r_blocktime; /* blocktime for this root and descendants */
+  int r_cg_nthreads; // count of active threads in a contention group
 } kmp_base_root_t;
 
 typedef union KMP_ALIGN_CACHE kmp_root {
@@ -2863,8 +2864,10 @@ extern int __kmp_xproc; /* number of pro
 extern int __kmp_avail_proc; /* number of processors available to the process */
 extern size_t __kmp_sys_min_stksize; /* system-defined minimum stack size */
 extern int __kmp_sys_max_nth; /* system-imposed maximum number of threads */
-extern int
-    __kmp_max_nth; /* maximum total number of concurrently-existing threads */
+// maximum total number of concurrently-existing threads on device
+extern int __kmp_max_nth;
+// maximum total number of concurrently-existing threads in a contention group
+extern int __kmp_cg_max_nth;
 extern int __kmp_threads_capacity; /* capacity of the arrays __kmp_threads and
                                       __kmp_root */
 extern int __kmp_dflt_team_nth; /* default number of threads in a parallel

Modified: openmp/trunk/runtime/src/kmp_ftn_entry.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_ftn_entry.h?rev=309319&r1=309318&r2=309319&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_ftn_entry.h (original)
+++ openmp/trunk/runtime/src/kmp_ftn_entry.h Thu Jul 27 13:58:41 2017
@@ -550,7 +550,7 @@ int FTN_STDCALL xexpand(FTN_GET_THREAD_L
     __kmp_serial_initialize();
   };
   /* global ICV */
-  return __kmp_max_nth;
+  return __kmp_cg_max_nth;
 #endif
 }
 

Modified: openmp/trunk/runtime/src/kmp_global.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_global.cpp?rev=309319&r1=309318&r2=309319&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_global.cpp (original)
+++ openmp/trunk/runtime/src/kmp_global.cpp Thu Jul 27 13:58:41 2017
@@ -135,6 +135,7 @@ int __kmp_avail_proc = 0;
 size_t __kmp_sys_min_stksize = KMP_MIN_STKSIZE;
 int __kmp_sys_max_nth = KMP_MAX_NTH;
 int __kmp_max_nth = 0;
+int __kmp_cg_max_nth = 0;
 int __kmp_threads_capacity = 0;
 int __kmp_dflt_team_nth = 0;
 int __kmp_dflt_team_nth_ub = 0;

Modified: openmp/trunk/runtime/src/kmp_runtime.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_runtime.cpp?rev=309319&r1=309318&r2=309319&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_runtime.cpp (original)
+++ openmp/trunk/runtime/src/kmp_runtime.cpp Thu Jul 27 13:58:41 2017
@@ -881,7 +881,7 @@ static int __kmp_reserve_threads(kmp_roo
     KMP_ASSERT(0);
   }
 
-  // Respect KMP_ALL_THREADS, KMP_DEVICE_THREAD_LIMIT, OMP_THREAD_LIMIT.
+  // Respect KMP_ALL_THREADS/KMP_DEVICE_THREAD_LIMIT.
   if (__kmp_nth + new_nthreads -
           (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
       __kmp_max_nth) {
@@ -899,12 +899,41 @@ static int __kmp_reserve_threads(kmp_roo
                 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
     }
     if (tl_nthreads == 1) {
-      KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced "
-                    "reservation to 1 thread\n",
+      KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
+                    "reduced reservation to 1 thread\n",
                     master_tid));
       return 1;
     }
-    KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced "
+    KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
+                  "reservation to %d threads\n",
+                  master_tid, tl_nthreads));
+    new_nthreads = tl_nthreads;
+  }
+
+  // Respect OMP_THREAD_LIMIT
+  if (root->r.r_cg_nthreads + new_nthreads -
+          (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
+      __kmp_cg_max_nth) {
+    int tl_nthreads = __kmp_cg_max_nth - root->r.r_cg_nthreads +
+                      (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
+    if (tl_nthreads <= 0) {
+      tl_nthreads = 1;
+    }
+
+    // If dyn-var is false, emit a 1-time warning.
+    if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
+      __kmp_reserve_warn = 1;
+      __kmp_msg(kmp_ms_warning,
+                KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
+                KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
+    }
+    if (tl_nthreads == 1) {
+      KC_TRACE(10, ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
+                    "reduced reservation to 1 thread\n",
+                    master_tid));
+      return 1;
+    }
+    KC_TRACE(10, ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
                   "reservation to %d threads\n",
                   master_tid, tl_nthreads));
     new_nthreads = tl_nthreads;
@@ -3116,6 +3145,7 @@ static void __kmp_initialize_root(kmp_ro
   root->r.r_in_parallel = 0;
   root->r.r_blocktime = __kmp_dflt_blocktime;
   root->r.r_nested = __kmp_dflt_nested;
+  root->r.r_cg_nthreads = 1;
 
   /* setup the root team for this task */
   /* allocate the root team structure */
@@ -3508,7 +3538,7 @@ static int __kmp_expand_threads(int nWis
 
     // Note that __kmp_threads_capacity is not bounded by __kmp_max_nth. If
     // __kmp_max_nth is set to some value less than __kmp_sys_max_nth by the
-    // user via OMP_THREAD_LIMIT, then __kmp_threads_capacity may become
+    // user via KMP_DEVICE_THREAD_LIMIT, then __kmp_threads_capacity may become
     // > __kmp_max_nth in one of two ways:
     //
     // 1) The initialization thread (gtid = 0) exits.  __kmp_threads[0]
@@ -3889,6 +3919,8 @@ static int __kmp_reset_root(int gtid, km
 
   TCW_4(__kmp_nth,
         __kmp_nth - 1); // __kmp_reap_thread will decrement __kmp_all_nth.
+  root->r.r_cg_nthreads--;
+
   __kmp_reap_thread(root->r.r_uber_thread, 1);
 
   // We canot put root thread to __kmp_thread_pool, so we have to reap it istead
@@ -4169,6 +4201,7 @@ kmp_info_t *__kmp_allocate_thread(kmp_ro
     KMP_DEBUG_ASSERT(new_thr->th.th_serial_team);
 
     TCW_4(__kmp_nth, __kmp_nth + 1);
+    root->r.r_cg_nthreads++;
 
     new_thr->th.th_task_state = 0;
     new_thr->th.th_task_state_top = 0;
@@ -4316,6 +4349,8 @@ kmp_info_t *__kmp_allocate_thread(kmp_ro
   __kmp_all_nth++;
   __kmp_nth++;
 
+  root->r.r_cg_nthreads++;
+
   // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low
   // numbers of procs, and method #2 (keyed API call) for higher numbers.
   if (__kmp_adjust_gtid_mode) {
@@ -5378,6 +5413,7 @@ kmp_team_t *__kmp_reap_team(kmp_team_t *
 void __kmp_free_thread(kmp_info_t *this_th) {
   int gtid;
   kmp_info_t **scan;
+  kmp_root_t *root = this_th->th.th_root;
 
   KA_TRACE(20, ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
                 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid));
@@ -5436,6 +5472,7 @@ void __kmp_free_thread(kmp_info_t *this_
   __kmp_thread_pool_nth++;
 
   TCW_4(__kmp_nth, __kmp_nth - 1);
+  root->r.r_cg_nthreads--;
 
 #ifdef KMP_ADJUST_BLOCKTIME
   /* Adjust blocktime back to user setting or default if necessary */
@@ -6375,6 +6412,7 @@ static void __kmp_do_serial_initialize(v
     __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
   }
   __kmp_max_nth = __kmp_sys_max_nth;
+  __kmp_cg_max_nth = __kmp_sys_max_nth;
 
   // Three vars below moved here from __kmp_env_initialize() "KMP_BLOCKTIME"
   // part
@@ -6977,7 +7015,7 @@ void __kmp_push_num_teams(ident_t *id, i
     if (num_teams * num_threads > __kmp_max_nth) {
       int new_threads = __kmp_max_nth / num_teams;
       if (!__kmp_reserve_warn) { // user asked for too many threads
-        __kmp_reserve_warn = 1; // that conflicts with OMP_THREAD_LIMIT
+        __kmp_reserve_warn = 1; // that conflicts with KMP_DEVICE_THREAD_LIMIT
         __kmp_msg(kmp_ms_warning,
                   KMP_MSG(CantFormThrTeam, num_threads, new_threads),
                   KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);

Modified: openmp/trunk/runtime/src/kmp_settings.cpp
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/src/kmp_settings.cpp?rev=309319&r1=309318&r2=309319&view=diff
==============================================================================
--- openmp/trunk/runtime/src/kmp_settings.cpp (original)
+++ openmp/trunk/runtime/src/kmp_settings.cpp Thu Jul 27 13:58:41 2017
@@ -569,7 +569,7 @@ static void __kmp_stg_print_size(kmp_str
 // Parse and print functions.
 
 // -----------------------------------------------------------------------------
-// KMP_ALL_THREADS, KMP_DEVICE_THREAD_LIMIT, OMP_THREAD_LIMIT
+// KMP_DEVICE_THREAD_LIMIT, KMP_ALL_THREADS
 
 static void __kmp_stg_parse_device_thread_limit(char const *name,
                                                 char const *value, void *data) {
@@ -599,6 +599,20 @@ static void __kmp_stg_print_device_threa
 } // __kmp_stg_print_device_thread_limit
 
 // -----------------------------------------------------------------------------
+// OMP_THREAD_LIMIT
+static void __kmp_stg_parse_thread_limit(char const *name, char const *value,
+                                         void *data) {
+  __kmp_stg_parse_int(name, value, 1, __kmp_sys_max_nth, &__kmp_cg_max_nth);
+  K_DIAG(1, ("__kmp_cg_max_nth == %d\n", __kmp_cg_max_nth));
+
+} // __kmp_stg_parse_thread_limit
+
+static void __kmp_stg_print_thread_limit(kmp_str_buf_t *buffer,
+                                         char const *name, void *data) {
+  __kmp_stg_print_int(buffer, name, __kmp_cg_max_nth);
+} // __kmp_stg_print_thread_limit
+
+// -----------------------------------------------------------------------------
 // KMP_BLOCKTIME
 
 static void __kmp_stg_parse_blocktime(char const *name, char const *value,
@@ -4386,8 +4400,8 @@ static kmp_setting_t __kmp_stg_table[] =
     {"KMP_TASKLOOP_MIN_TASKS", __kmp_stg_parse_taskloop_min_tasks,
      __kmp_stg_print_taskloop_min_tasks, NULL, 0, 0},
 #endif
-    {"OMP_THREAD_LIMIT", __kmp_stg_parse_device_thread_limit,
-     __kmp_stg_print_device_thread_limit, NULL, 0, 0},
+    {"OMP_THREAD_LIMIT", __kmp_stg_parse_thread_limit,
+     __kmp_stg_print_thread_limit, NULL, 0, 0},
     {"OMP_WAIT_POLICY", __kmp_stg_parse_wait_policy,
      __kmp_stg_print_wait_policy, NULL, 0, 0},
     {"KMP_DISP_NUM_BUFFERS", __kmp_stg_parse_disp_buffers,
@@ -4687,27 +4701,22 @@ static void __kmp_stg_init(void) {
       }; // if
     }
 
-    { // Initialize KMP_DEVICE_THREAD_LIMIT, KMP_ALL_THREADS, and
-      // OMP_THREAD_LIMIT data.
+    { // Initialize KMP_DEVICE_THREAD_LIMIT and KMP_ALL_THREADS
       kmp_setting_t *kmp_device_thread_limit =
           __kmp_stg_find("KMP_DEVICE_THREAD_LIMIT"); // 1st priority.
       kmp_setting_t *kmp_all_threads =
           __kmp_stg_find("KMP_ALL_THREADS"); // 2nd priority.
-      kmp_setting_t *omp_thread_limit =
-          __kmp_stg_find("OMP_THREAD_LIMIT"); // 3rd priority.
 
       // !!! volatile keyword is Intel (R) C Compiler bug CQ49908 workaround.
-      static kmp_setting_t *volatile rivals[4];
+      static kmp_setting_t *volatile rivals[3];
       int i = 0;
 
       rivals[i++] = kmp_device_thread_limit;
       rivals[i++] = kmp_all_threads;
-      rivals[i++] = omp_thread_limit;
       rivals[i++] = NULL;
 
       kmp_device_thread_limit->data = CCAST(kmp_setting_t **, rivals);
       kmp_all_threads->data = CCAST(kmp_setting_t **, rivals);
-      omp_thread_limit->data = CCAST(kmp_setting_t **, rivals);
     }
 
 #if KMP_AFFINITY_SUPPORTED

Added: openmp/trunk/runtime/test/env/omp_thread_limit.c
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/runtime/test/env/omp_thread_limit.c?rev=309319&view=auto
==============================================================================
--- openmp/trunk/runtime/test/env/omp_thread_limit.c (added)
+++ openmp/trunk/runtime/test/env/omp_thread_limit.c Thu Jul 27 13:58:41 2017
@@ -0,0 +1,82 @@
+// RUN: %libomp-compile && env OMP_THREAD_LIMIT=4 %libomp-run 4
+// RUN: %libomp-compile && env OMP_THREAD_LIMIT=7 %libomp-run 7
+//
+// OMP_THREAD_LIMIT=N should imply that no more than N threads are active in
+// a contention group
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+#include "omp_testsuite.h"
+
+int failed = 0;
+
+void usage() {
+    fprintf(stderr, "usage: omp_thread_limit <n>\n");
+}
+
+void verify(const char* file_name, int line_number, int team_size) {
+  int num_threads = omp_get_num_threads();
+  if (team_size != num_threads) {
+#pragma omp critical(A)
+    {
+      char label[256];
+      snprintf(label, sizeof(label), "%s:%d", file_name, line_number);
+      failed = 1;
+      printf("failed: %s: team_size(%d) != omp_get_num_threads(%d)\n",
+             label, team_size, num_threads);
+    }
+  }
+}
+
+int main(int argc, char** argv)
+{
+  int cl_thread_limit;
+
+  if (argc != 2) {
+    usage();
+    return 1;
+  }
+  cl_thread_limit = atoi(argv[1]);
+
+  omp_set_dynamic(0);
+  if (omp_get_thread_limit() != cl_thread_limit) {
+    fprintf(stderr, "omp_get_thread_limit failed with %d, should be%d\n",
+            omp_get_thread_limit(), cl_thread_limit);
+    return 1;
+  }
+  else if (omp_get_max_threads() > cl_thread_limit) {
+#if _OPENMP
+    int team_size = cl_thread_limit;
+#else
+    int team_size = 1;
+#endif
+    omp_set_num_threads(19);
+    verify(__FILE__, __LINE__, 1);
+#pragma omp parallel
+    {
+      verify(__FILE__, __LINE__, team_size);
+      verify(__FILE__, __LINE__, team_size);
+    }
+    verify(__FILE__, __LINE__, 1);
+
+    omp_set_nested(1);
+#pragma omp parallel num_threads(3)
+    {
+      verify(__FILE__, __LINE__, 3);
+#pragma omp master
+#pragma omp parallel num_threads(21)
+      {
+        verify(__FILE__, __LINE__, team_size-2);
+        verify(__FILE__, __LINE__, team_size-2);
+      }
+    }
+    verify(__FILE__, __LINE__, 1);
+
+    return failed;
+  } else {
+    fprintf(stderr, "This test is not applicable for max num_threads='%d'\n",
+            omp_get_max_threads());
+    return 0;
+  }
+
+}




More information about the Openmp-commits mailing list