[Openmp-commits] [openmp] [OpenMP] Add num_threads clause list format and strict modifier support (PR #85466)

Terry Wilmarth via Openmp-commits openmp-commits at lists.llvm.org
Fri Mar 29 19:21:13 PDT 2024


https://github.com/TerryLWilmarth updated https://github.com/llvm/llvm-project/pull/85466

>From dad0577dc49e8077b924b33a84ed0312c40ba431 Mon Sep 17 00:00:00 2001
From: Terry Wilmarth <terry.l.wilmarth at intel.com>
Date: Fri, 15 Mar 2024 15:51:49 -0500
Subject: [PATCH 1/4] Add num_threads clause list format and strict modifier
 support

Add support to the runtime for 6.0 spec features that allow
num_threads clause to take a list, and also make use of the strict
modifier.  Provides new compiler interface functions for these
features.
---
 openmp/runtime/src/kmp.h            |  46 +++++++--
 openmp/runtime/src/kmp_csupport.cpp |  44 +++++++++
 openmp/runtime/src/kmp_runtime.cpp  | 142 +++++++++++++++++++++++++---
 3 files changed, 212 insertions(+), 20 deletions(-)

diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index 121e7e959129ea..7397244a16f606 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -532,6 +532,19 @@ enum clock_function_type {
 enum mic_type { non_mic, mic1, mic2, mic3, dummy };
 #endif
 
+// OpenMP 3.1 - Nested num threads array
+typedef struct kmp_nested_nthreads_t {
+  int *nth;
+  int size;
+  int used;
+  bool strict; // num_threads clause has strict modifier
+  ident_t *loc; // loc for strict modifier
+  int sev; // error severity for strict modifier
+  const char *msg; // error message for strict modifier
+} kmp_nested_nthreads_t;
+
+extern kmp_nested_nthreads_t __kmp_nested_nth;
+
 /* -- fast reduction stuff ------------------------------------------------ */
 
 #undef KMP_FAST_REDUCTION_BARRIER
@@ -2958,6 +2971,12 @@ typedef struct KMP_ALIGN_CACHE kmp_base_info {
   /* The data set by the primary thread at reinit, then R/W by the worker */
   KMP_ALIGN_CACHE int
       th_set_nproc; /* if > 0, then only use this request for the next fork */
+  int *th_set_nested_nth;
+  bool th_nt_strict; // num_threads clause has strict modifier
+  ident_t *th_nt_loc; // loc for strict modifier
+  int th_nt_sev; // error severity for strict modifier
+  const char *th_nt_msg; // error message for strict modifier
+  int th_set_nested_nth_sz;
 #if KMP_NESTED_HOT_TEAMS
   kmp_hot_team_ptr_t *th_hot_teams; /* array of hot teams */
 #endif
@@ -3202,6 +3221,7 @@ typedef struct KMP_ALIGN_CACHE kmp_base_team {
   void *t_stack_id; // team specific stack stitching id (for ittnotify)
 #endif /* USE_ITT_BUILD */
   distributedBarrier *b; // Distributed barrier data associated with team
+  kmp_nested_nthreads_t *t_nested_nth;
 } kmp_base_team_t;
 
 union KMP_ALIGN_CACHE kmp_team {
@@ -3532,15 +3552,6 @@ extern enum mic_type __kmp_mic_type;
 extern double __kmp_load_balance_interval; // load balance algorithm interval
 #endif /* USE_LOAD_BALANCE */
 
-// OpenMP 3.1 - Nested num threads array
-typedef struct kmp_nested_nthreads_t {
-  int *nth;
-  int size;
-  int used;
-} kmp_nested_nthreads_t;
-
-extern kmp_nested_nthreads_t __kmp_nested_nth;
-
 #if KMP_USE_ADAPTIVE_LOCKS
 
 // Parameters for the speculative lock backoff system.
@@ -3775,6 +3786,11 @@ extern void ___kmp_thread_free(kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL);
   ___kmp_thread_free((th), (ptr)KMP_SRC_LOC_CURR)
 
 extern void __kmp_push_num_threads(ident_t *loc, int gtid, int num_threads);
+extern void __kmp_push_num_threads_list(ident_t *loc, int gtid,
+                                        kmp_uint32 list_length,
+                                        int *num_threads_list);
+extern void __kmp_set_strict_num_threads(ident_t *loc, int gtid, int sev,
+                                         const char *msg);
 
 extern void __kmp_push_proc_bind(ident_t *loc, int gtid,
                                  kmp_proc_bind_t proc_bind);
@@ -4403,6 +4419,18 @@ KMP_EXPORT kmp_int32 __kmpc_in_parallel(ident_t *loc);
 KMP_EXPORT void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid);
 KMP_EXPORT void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
                                         kmp_int32 num_threads);
+KMP_EXPORT void __kmpc_push_num_threads_strict(ident_t *loc,
+                                               kmp_int32 global_tid,
+                                               kmp_int32 num_threads,
+                                               int severity,
+                                               const char *message);
+
+KMP_EXPORT void __kmpc_push_num_threads_list(ident_t *loc, kmp_int32 global_tid,
+                                             kmp_uint32 list_length,
+                                             kmp_int32 *num_threads_list);
+KMP_EXPORT void __kmpc_push_num_threads_list_strict(
+    ident_t *loc, kmp_int32 global_tid, kmp_uint32 list_length,
+    kmp_int32 *num_threads_list, int severity, const char *message);
 
 KMP_EXPORT void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
                                       int proc_bind);
diff --git a/openmp/runtime/src/kmp_csupport.cpp b/openmp/runtime/src/kmp_csupport.cpp
index 878e78b5c7ad2d..0895ff6288f55c 100644
--- a/openmp/runtime/src/kmp_csupport.cpp
+++ b/openmp/runtime/src/kmp_csupport.cpp
@@ -236,6 +236,50 @@ void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
   __kmp_push_num_threads(loc, global_tid, num_threads);
 }
 
+void __kmpc_push_num_threads_strict(ident_t *loc, kmp_int32 global_tid,
+                                    kmp_int32 num_threads, int severity,
+                                    const char *message) {
+  __kmp_push_num_threads(loc, global_tid, num_threads);
+  __kmp_set_strict_num_threads(loc, global_tid, severity, message);
+}
+
+/*!
+ at ingroup PARALLEL
+ at param loc source location information
+ at param global_tid global thread number
+ at param list_length number of entries in the num_threads_list array
+ at param num_threads_list array of numbers of threads requested for this parallel
+construct and subsequent nested parallel constructs
+
+Set the number of threads to be used by the next fork spawned by this thread,
+and some nested forks as well.
+This call is only required if the parallel construct has a `num_threads` clause
+that has a list of integers as the argument.
+*/
+void __kmpc_push_num_threads_list(ident_t *loc, kmp_int32 global_tid,
+                                  kmp_uint32 list_length,
+                                  kmp_int32 *num_threads_list) {
+  KA_TRACE(20, ("__kmpc_push_num_threads_list: enter T#%d num_threads_list=",
+                global_tid));
+  KA_TRACE(20, ("%d", num_threads_list[0]));
+#ifdef KMP_DEBUG
+  for (kmp_uint32 i = 1; i < list_length; ++i)
+    KA_TRACE(20, (", %d", num_threads_list[i]));
+#endif
+  KA_TRACE(20, ("/n"));
+
+  __kmp_assert_valid_gtid(global_tid);
+  __kmp_push_num_threads_list(loc, global_tid, list_length, num_threads_list);
+}
+
+void __kmpc_push_num_threads_list_strict(ident_t *loc, kmp_int32 global_tid,
+                                         kmp_uint32 list_length,
+                                         kmp_int32 *num_threads_list,
+                                         int severity, const char *message) {
+  __kmp_push_num_threads_list(loc, global_tid, list_length, num_threads_list);
+  __kmp_set_strict_num_threads(loc, global_tid, severity, message);
+}
+
 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
   KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
   /* the num_threads are automatically popped */
diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp
index 7edb0b440acc7f..11dcb7e8c1c015 100644
--- a/openmp/runtime/src/kmp_runtime.cpp
+++ b/openmp/runtime/src/kmp_runtime.cpp
@@ -113,6 +113,25 @@ void __kmp_resize_dist_barrier(kmp_team_t *team, int old_nthreads,
                                int new_nthreads);
 void __kmp_add_threads_to_team(kmp_team_t *team, int new_nthreads);
 
+static kmp_nested_nthreads_t *__kmp_override_nested_nth(kmp_info_t *thr,
+                                                        int level) {
+  kmp_nested_nthreads_t *new_nested_nth =
+      (kmp_nested_nthreads_t *)KMP_INTERNAL_MALLOC(
+          sizeof(kmp_nested_nthreads_t));
+  int new_size = level + thr->th.th_set_nested_nth_sz;
+  new_nested_nth->nth = (int *)KMP_INTERNAL_MALLOC(new_size * sizeof(int));
+  for (int i = 0; i < level + 1; ++i)
+    new_nested_nth->nth[i] = 0;
+  for (int i = level + 1, j = 1; i < new_size; ++i, ++j)
+    new_nested_nth->nth[i] = thr->th.th_set_nested_nth[j];
+  new_nested_nth->size = new_nested_nth->used = new_size;
+  new_nested_nth->strict = thr->th.th_nt_strict;
+  new_nested_nth->loc = thr->th.th_nt_loc;
+  new_nested_nth->sev = thr->th.th_nt_sev;
+  new_nested_nth->msg = thr->th.th_nt_msg;
+  return new_nested_nth;
+}
+
 /* Calculate the identifier of the current thread */
 /* fast (and somewhat portable) way to get unique identifier of executing
    thread. Returns KMP_GTID_DNE if we haven't been assigned a gtid. */
@@ -930,6 +949,12 @@ static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
                   __kmp_get_gtid(), new_nthreads, set_nthreads));
   }
 #endif // KMP_DEBUG
+
+  if ((this_thr->th.th_nt_strict || parent_team->t.t_nested_nth->strict) &&
+      new_nthreads < set_nthreads) {
+    __kmpc_error(this_thr->th.th_nt_loc, this_thr->th.th_nt_sev,
+                 this_thr->th.th_nt_msg);
+  }
   return new_nthreads;
 }
 
@@ -1242,6 +1267,10 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
     serial_team->t.t_serialized = 1;
     serial_team->t.t_nproc = 1;
     serial_team->t.t_parent = this_thr->th.th_team;
+    if (this_thr->th.th_team->t.t_nested_nth)
+      serial_team->t.t_nested_nth = this_thr->th.th_team->t.t_nested_nth;
+    else
+      serial_team->t.t_nested_nth = &__kmp_nested_nth;
     serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;
     this_thr->th.th_team = serial_team;
     serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
@@ -1261,9 +1290,11 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
 
     // Thread value exists in the nested nthreads array for the next nested
     // level
-    if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
-      this_thr->th.th_current_task->td_icvs.nproc =
-          __kmp_nested_nth.nth[level + 1];
+    kmp_nested_nthreads_t *nested_nth = &__kmp_nested_nth;
+    if (this_thr->th.th_team->t.t_nested_nth)
+      nested_nth = this_thr->th.th_team->t.t_nested_nth;
+    if (nested_nth->used && (level + 1 < nested_nth->used)) {
+      this_thr->th.th_current_task->td_icvs.nproc = nested_nth->nth[level + 1];
     }
 
     if (__kmp_nested_proc_bind.used &&
@@ -1312,10 +1343,14 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
     int level = this_thr->th.th_team->t.t_level;
     // Thread value exists in the nested nthreads array for the next nested
     // level
-    if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
-      this_thr->th.th_current_task->td_icvs.nproc =
-          __kmp_nested_nth.nth[level + 1];
+
+    kmp_nested_nthreads_t *nested_nth = &__kmp_nested_nth;
+    if (serial_team->t.t_nested_nth)
+      nested_nth = serial_team->t.t_nested_nth;
+    if (nested_nth->used && (level + 1 < nested_nth->used)) {
+      this_thr->th.th_current_task->td_icvs.nproc = nested_nth->nth[level + 1];
     }
+
     serial_team->t.t_level++;
     KF_TRACE(10, ("__kmpc_serialized_parallel: T#%d increasing nesting level "
                   "of serial team %p to %d\n",
@@ -2074,9 +2109,18 @@ int __kmp_fork_call(ident_t *loc, int gtid,
 
     // See if we need to make a copy of the ICVs.
     int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
-    if ((level + 1 < __kmp_nested_nth.used) &&
-        (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) {
-      nthreads_icv = __kmp_nested_nth.nth[level + 1];
+    kmp_nested_nthreads_t *nested_nth = NULL;
+    if (!master_th->th.th_set_nested_nth &&
+        (level + 1 < parent_team->t.t_nested_nth->used) &&
+        (parent_team->t.t_nested_nth->nth[level + 1] != nthreads_icv)) {
+      nthreads_icv = parent_team->t.t_nested_nth->nth[level + 1];
+    } else if (master_th->th.th_set_nested_nth) {
+      nested_nth = __kmp_override_nested_nth(master_th, level);
+      if ((level + 1 < nested_nth->used) &&
+          (nested_nth->nth[level + 1] != nthreads_icv))
+        nthreads_icv = nested_nth->nth[level + 1];
+      else
+        nthreads_icv = 0; // don't update
     } else {
       nthreads_icv = 0; // don't update
     }
@@ -2185,6 +2229,24 @@ int __kmp_fork_call(ident_t *loc, int gtid,
     KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
     KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator);
 
+    // Check if hot team has potentially outdated list, and if so, free it
+    if (team->t.t_nested_nth &&
+        team->t.t_nested_nth != parent_team->t.t_nested_nth) {
+      KMP_INTERNAL_FREE(team->t.t_nested_nth->nth);
+      KMP_INTERNAL_FREE(team->t.t_nested_nth);
+      team->t.t_nested_nth = NULL;
+    }
+    team->t.t_nested_nth = parent_team->t.t_nested_nth;
+    if (master_th->th.th_set_nested_nth) {
+      if (!nested_nth)
+        nested_nth = __kmp_override_nested_nth(master_th, level);
+      team->t.t_nested_nth = nested_nth;
+      KMP_INTERNAL_FREE(master_th->th.th_set_nested_nth);
+      master_th->th.th_set_nested_nth = NULL;
+      master_th->th.th_set_nested_nth_sz = 0;
+      master_th->th.th_nt_strict = false;
+    }
+
     // Update the floating point rounding in the team if required.
     propagateFPControl(team);
 #if OMPD_SUPPORT
@@ -3390,6 +3452,7 @@ static void __kmp_initialize_root(kmp_root_t *root) {
   root_team->t.t_serialized = 1;
   // TODO???: root_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
   root_team->t.t_sched.sched = r_sched.sched;
+  root_team->t.t_nested_nth = &__kmp_nested_nth;
   KA_TRACE(
       20,
       ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
@@ -3427,6 +3490,7 @@ static void __kmp_initialize_root(kmp_root_t *root) {
   // TODO???: hot_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;
   hot_team->t.t_sched.sched = r_sched.sched;
   hot_team->t.t_size_changed = 0;
+  hot_team->t.t_nested_nth = &__kmp_nested_nth;
 }
 
 #ifdef KMP_DEBUG
@@ -4293,6 +4357,7 @@ static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
   else // no tasking --> always safe to reap
     this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
   this_thr->th.th_set_proc_bind = proc_bind_default;
+
 #if KMP_AFFINITY_SUPPORTED
   this_thr->th.th_new_place = this_thr->th.th_current_place;
 #endif
@@ -4556,6 +4621,11 @@ kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
   /* allocate space for it. */
   new_thr = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t));
 
+  new_thr->th.th_nt_strict = false;
+  new_thr->th.th_nt_loc = NULL;
+  new_thr->th.th_nt_sev = severity_fatal;
+  new_thr->th.th_nt_msg = NULL;
+
   TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
 
 #if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG
@@ -4666,6 +4736,9 @@ kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
   new_thr->th.th_active_in_pool = FALSE;
   TCW_4(new_thr->th.th_active, TRUE);
 
+  new_thr->th.th_set_nested_nth = NULL;
+  new_thr->th.th_set_nested_nth_sz = 0;
+
   /* adjust the global counters */
   __kmp_all_nth++;
   __kmp_nth++;
@@ -5456,7 +5529,6 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
       }
     } // Check changes in number of threads
 
-    kmp_info_t *master = team->t.t_threads[0];
     if (master->th.th_teams_microtask) {
       for (f = 1; f < new_nproc; ++f) {
         // propagate teams construct specific info to workers
@@ -5562,6 +5634,8 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
       __ompt_team_assign_id(team, ompt_parallel_data);
 #endif
 
+      team->t.t_nested_nth = NULL;
+
       KMP_MB();
 
       return team;
@@ -5633,6 +5707,8 @@ __kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,
 
   KMP_MB();
 
+  team->t.t_nested_nth = NULL;
+
   KA_TRACE(20, ("__kmp_allocate_team: done creating a new team %d.\n",
                 team->t.t_id));
 
@@ -5735,6 +5811,14 @@ void __kmp_free_team(kmp_root_t *root,
       }
     }
 
+    // Before clearing parent pointer, check if nested_nth list should be freed
+    if (team->t.t_nested_nth && team->t.t_nested_nth != &__kmp_nested_nth &&
+        team->t.t_nested_nth != team->t.t_parent->t.t_nested_nth) {
+      KMP_INTERNAL_FREE(team->t.t_nested_nth->nth);
+      KMP_INTERNAL_FREE(team->t.t_nested_nth);
+    }
+    team->t.t_nested_nth = NULL;
+
     // Reset pointer to parent team only for non-hot teams.
     team->t.t_parent = NULL;
     team->t.t_level = 0;
@@ -7837,7 +7921,6 @@ int __kmp_invoke_teams_master(int gtid) {
    encountered by this team. since this should be enclosed in the forkjoin
    critical section it should avoid race conditions with asymmetrical nested
    parallelism */
-
 void __kmp_push_num_threads(ident_t *id, int gtid, int num_threads) {
   kmp_info_t *thr = __kmp_threads[gtid];
 
@@ -7845,6 +7928,39 @@ void __kmp_push_num_threads(ident_t *id, int gtid, int num_threads) {
     thr->th.th_set_nproc = num_threads;
 }
 
+void __kmp_push_num_threads_list(ident_t *id, int gtid, kmp_uint32 list_length,
+                                 int *num_threads_list) {
+  kmp_info_t *thr = __kmp_threads[gtid];
+
+  KMP_DEBUG_ASSERT(list_length > 1);
+
+  if (num_threads_list[0] > 0)
+    thr->th.th_set_nproc = num_threads_list[0];
+  thr->th.th_set_nested_nth =
+      (int *)KMP_INTERNAL_MALLOC(list_length * sizeof(int));
+  for (kmp_uint32 i = 0; i < list_length; ++i)
+    thr->th.th_set_nested_nth[i] = num_threads_list[i];
+  thr->th.th_set_nested_nth_sz = list_length;
+}
+
+void __kmp_set_strict_num_threads(ident_t *loc, int gtid, int sev,
+                                  const char *msg) {
+  kmp_info_t *thr = __kmp_threads[gtid];
+  thr->th.th_nt_strict = true;
+  thr->th.th_nt_loc = loc;
+  // if sev is unset make fatal
+  if (sev == severity_warning)
+    thr->th.th_nt_sev = sev;
+  else
+    thr->th.th_nt_sev = severity_fatal;
+  // if msg is unset, use an appropriate message
+  if (msg)
+    thr->th.th_nt_msg = msg;
+  else
+    thr->th.th_nt_msg = "Cannot form team with number of threads specified by "
+                        "strict num_threads clause.";
+}
+
 static void __kmp_push_thread_limit(kmp_info_t *thr, int num_teams,
                                     int num_threads) {
   KMP_DEBUG_ASSERT(thr);
@@ -8301,6 +8417,10 @@ void __kmp_cleanup(void) {
   __kmp_nested_nth.nth = NULL;
   __kmp_nested_nth.size = 0;
   __kmp_nested_nth.used = 0;
+  __kmp_nested_nth.strict = false;
+  __kmp_nested_nth.loc = NULL;
+  __kmp_nested_nth.sev = 0;
+  __kmp_nested_nth.msg = NULL;
   KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types);
   __kmp_nested_proc_bind.bind_types = NULL;
   __kmp_nested_proc_bind.size = 0;

>From 47155e3a2b5ff3fab61309f0377b6ddef5e97819 Mon Sep 17 00:00:00 2001
From: Terry Wilmarth <terry.l.wilmarth at intel.com>
Date: Tue, 19 Mar 2024 10:15:02 -0500
Subject: [PATCH 2/4] Add dllexports.

---
 openmp/runtime/src/dllexports | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/openmp/runtime/src/dllexports b/openmp/runtime/src/dllexports
index 0d49643709e0a0..747b8280931568 100644
--- a/openmp/runtime/src/dllexports
+++ b/openmp/runtime/src/dllexports
@@ -1268,6 +1268,11 @@ kmp_set_disp_num_buffers                    890
     __kmpc_atomic_val_8_cas_cpt            2158
     %endif
 
+    # No longer need to put ordinal numbers
+    __kmpc_push_num_threads_list
+    __kmpc_push_num_threads_strict
+    __kmpc_push_num_threads_list_strict
+
 %endif
 
 __kmpc_set_thread_limit

>From e34c994d1b16d369dde5b210f44bd16416137929 Mon Sep 17 00:00:00 2001
From: Terry Wilmarth <terry.l.wilmarth at intel.com>
Date: Fri, 29 Mar 2024 21:04:35 -0500
Subject: [PATCH 3/4] Add tests.

---
 .../parallel/omp_parallel_num_threads_list.c  | 209 ++++++++++++++++++
 .../omp_parallel_num_threads_strict.c         |  99 +++++++++
 2 files changed, 308 insertions(+)
 create mode 100644 openmp/runtime/test/parallel/omp_parallel_num_threads_list.c
 create mode 100644 openmp/runtime/test/parallel/omp_parallel_num_threads_strict.c

diff --git a/openmp/runtime/test/parallel/omp_parallel_num_threads_list.c b/openmp/runtime/test/parallel/omp_parallel_num_threads_list.c
new file mode 100644
index 00000000000000..1c1771c255b317
--- /dev/null
+++ b/openmp/runtime/test/parallel/omp_parallel_num_threads_list.c
@@ -0,0 +1,209 @@
+// RUN: %libomp-compile && env OMP_NUM_THREADS=2,2,2,2,2 %libomp-run
+#include <stdio.h>
+#include "omp_testsuite.h"
+
+// When compiler supports num_threads clause list format, remove the following
+// and use num_threads clause directly
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+int __kmpc_global_thread_num(void *loc);
+void __kmpc_push_num_threads_list(void *loc, int gtid, unsigned length, int *list);
+
+#if defined(__cplusplus)
+}
+#endif
+
+int test_omp_parallel_num_threads_list()
+{
+  int num_failed;
+
+  // Initially, 5 levels specified via OMP_NUM_THREADS with 2 threads per level
+  // Check top 3 levels
+#pragma omp parallel reduction(+:num_failed) // 1st level
+  {
+#pragma omp single
+    num_failed = num_failed + !(omp_get_num_threads() == 2);
+#pragma omp parallel reduction(+:num_failed) // 2nd level
+    {
+#pragma omp single
+      num_failed = num_failed + !(omp_get_num_threads() == 2);
+#pragma omp parallel reduction(+:num_failed) // 3rd level
+      {
+#pragma omp single
+        num_failed = num_failed + !(omp_get_num_threads() == 2);
+      } // end 3rd level parallel
+    } // end 2nd level parallel
+  } // end 1st level parallel
+
+  // Make sure that basic single element num_threads clause works
+#pragma omp parallel reduction(+:num_failed) num_threads(4) // 1st level
+  {
+#pragma omp single
+    num_failed = num_failed + !(omp_get_num_threads() == 4);
+#pragma omp parallel reduction(+:num_failed) // 2nd level
+    {
+#pragma omp single
+      num_failed = num_failed + !(omp_get_num_threads() == 2); // Unaffected
+#pragma omp parallel reduction(+:num_failed) // 3rd level
+      {
+#pragma omp single
+        num_failed = num_failed + !(omp_get_num_threads() == 2); // Unaffected
+      } // end 3rd level parallel
+    } // end 2nd level parallel
+  } // end 1st level parallel
+
+  // Check that basic single element num_threads clause works on second level
+#pragma omp parallel  reduction(+:num_failed) // 1st level
+  {
+#pragma omp single
+    num_failed = num_failed + !(omp_get_num_threads() == 2); // Unaffected
+#pragma omp parallel reduction(+:num_failed) num_threads(4) // 2nd level
+    {
+#pragma omp single
+      num_failed = num_failed + !(omp_get_num_threads() == 4);
+#pragma omp parallel reduction(+:num_failed) // 3rd level
+      {
+#pragma omp single
+        num_failed = num_failed + !(omp_get_num_threads() == 2); // Unaffected
+      } // end 3rd level parallel
+    } // end 2nd level parallel
+  } // end 1st level parallel
+
+  // Try a short list. It should completely overwrite the old settings.
+  // We need to use the compiler interface for now.
+  int threads[2] = {3,3};
+  __kmpc_push_num_threads_list(NULL, __kmpc_global_thread_num(NULL), 2, threads);
+#pragma omp parallel reduction(+:num_failed) //num_threads(3,3) // 1st level
+  {
+#pragma omp single
+    num_failed = num_failed + !(omp_get_num_threads() == 3);
+#pragma omp parallel reduction(+:num_failed) // 2nd level
+    {
+#pragma omp single
+      num_failed = num_failed + !(omp_get_num_threads() == 3);
+#pragma omp parallel reduction(+:num_failed) // 3rd level
+      {
+        // NOTE: should just keep using last element in list, to nesting depth
+#pragma omp single
+        num_failed = num_failed + !(omp_get_num_threads() == 3);
+      } // end 3rd level parallel
+    } // end 2nd level parallel
+  } // end 1st level parallel
+
+  // Similar,  but at a lower level.
+#pragma omp parallel reduction(+:num_failed) // 1st level
+  {
+#pragma omp single
+    num_failed = num_failed + !(omp_get_num_threads() == 2); // Unaffected
+    int threads[2] = {3,3};
+    __kmpc_push_num_threads_list(NULL, __kmpc_global_thread_num(NULL), 2, threads);
+#pragma omp parallel reduction(+:num_failed) // num_clause(3,3) // 2nd level
+    {
+#pragma omp single
+      num_failed = num_failed + !(omp_get_num_threads() == 3);
+#pragma omp parallel reduction(+:num_failed) // 3rd level
+      {
+        // NOTE: just keep using last element in list, to nesting depth
+#pragma omp single
+        num_failed = num_failed + !(omp_get_num_threads() == 3);
+      } // end 3rd level parallel
+    } // end 2nd level parallel
+    // Make sure a second inner parallel is NOT affected by the clause
+#pragma omp parallel reduction(+:num_failed) // 2nd level
+    {
+#pragma omp single
+      num_failed = num_failed + !(omp_get_num_threads() == 2); // Unaffected
+#pragma omp parallel reduction(+:num_failed) // 3rd level
+      {
+#pragma omp single
+        // NOTE: just keep using last element in list, to nesting depth
+        num_failed = num_failed + !(omp_get_num_threads() == 2); // Unaffected
+      } // end 3rd level parallel
+    } // end 2nd level parallel
+  } // end 1st level parallel
+
+  // Test lists at multiple levels
+  int threads2[2] = {4,3};
+  __kmpc_push_num_threads_list(NULL, __kmpc_global_thread_num(NULL), 2, threads2);
+#pragma omp parallel reduction(+:num_failed) // num_clause(4,3) // 1st level
+  {
+#pragma omp single
+    num_failed = num_failed + !(omp_get_num_threads() == 4);
+#pragma omp parallel reduction(+:num_failed) // 2nd level
+    {
+#pragma omp single
+      num_failed = num_failed + !(omp_get_num_threads() == 3);
+#pragma omp parallel reduction(+:num_failed) // 3rd level
+      {
+#pragma omp single
+        num_failed = num_failed + !(omp_get_num_threads() == 3);
+        int threads3[2] = {2,5};
+        __kmpc_push_num_threads_list(NULL, __kmpc_global_thread_num(NULL), 2, threads3);
+#pragma omp parallel reduction(+:num_failed) //num_clause(2,5) // 4th level
+        {
+#pragma omp single
+          num_failed = num_failed + !(omp_get_num_threads() == 2);
+#pragma omp parallel reduction(+:num_failed) // 5th level
+          {
+#pragma omp single
+            num_failed = num_failed + !(omp_get_num_threads() == 5);
+#pragma omp parallel reduction(+:num_failed) // 6th level
+            {
+#pragma omp single
+              num_failed = num_failed + !(omp_get_num_threads() == 5);
+            } // end 6th level parallel
+          } // end 5th level parallel
+        } // end 4th level parallel
+#pragma omp parallel reduction(+:num_failed) // 4th level
+        {
+#pragma omp single
+          num_failed = num_failed + !(omp_get_num_threads() == 3);
+        } // end 4th level parallel
+      } // end 3rd level parallel
+    } // end 2nd level parallel
+#pragma omp parallel reduction(+:num_failed) // 2nd level
+    {
+#pragma omp single
+      num_failed = num_failed + !(omp_get_num_threads() == 3);
+#pragma omp parallel reduction(+:num_failed) // 3rd level
+      {
+#pragma omp single
+        num_failed = num_failed + !(omp_get_num_threads() == 3);
+      } // end 3rd level parallel
+    } // end 2nd level parallel
+  } // end 1st level parallel
+
+  // Now we should be back to the way we started.
+#pragma omp parallel reduction(+:num_failed) // 1st level
+  {
+#pragma omp single
+    num_failed = num_failed + !(omp_get_num_threads() == 2);
+#pragma omp parallel reduction(+:num_failed) // 2nd level
+    {
+#pragma omp single
+      num_failed = num_failed + !(omp_get_num_threads() == 2);
+#pragma omp parallel reduction(+:num_failed) // 3rd level
+      {
+#pragma omp single
+        num_failed = num_failed + !(omp_get_num_threads() == 2);
+      } // end 3rd level parallel
+    } // end 2nd level parallel
+  } // end 1st level parallel
+
+  return (!num_failed);
+}
+
+int main()
+{
+  int i;
+  int num_failed=0;
+
+  for(i = 0; i < REPETITIONS; i++) {
+    if(!test_omp_parallel_num_threads_list()) {
+      num_failed++;
+    }
+  }
+  return num_failed;
+}
diff --git a/openmp/runtime/test/parallel/omp_parallel_num_threads_strict.c b/openmp/runtime/test/parallel/omp_parallel_num_threads_strict.c
new file mode 100644
index 00000000000000..358ac915c1713f
--- /dev/null
+++ b/openmp/runtime/test/parallel/omp_parallel_num_threads_strict.c
@@ -0,0 +1,99 @@
+// RUN: %libomp-compile && env OMP_NUM_THREADS=2,2,2,2,2 OMP_THREAD_LIMIT=16 %libomp-run
+#include <stdio.h>
+#include "omp_testsuite.h"
+
+// When compiler supports num_threads clause list format and strict modifier,
+// remove the following and use num_threads clause directly
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+int __kmpc_global_thread_num(void *loc);
+void __kmpc_push_num_threads_list(void *loc, int gtid, unsigned length, int *list);
+void __kmpc_push_num_threads_strict(void *loc, int gtid, int nth, int sev, const char *msg);
+void __kmpc_push_num_threads_list_strict(void *loc, int gtid, unsigned length, int *list, int sev, const char *msg);
+
+#if defined(__cplusplus)
+}
+#endif
+
+int test_omp_parallel_num_threads_strict()
+{
+  int num_failed;
+
+  // Test regular runtime warning about exceeding thread limit.
+  // Tolerate whatever value was given.
+#pragma omp parallel num_threads(22)
+#pragma omp single
+  num_failed = num_failed + !(omp_get_num_threads() <= 22);
+
+  // Test with 4 threads and strict -- no problem, no warning.
+  __kmpc_push_num_threads_strict(NULL, __kmpc_global_thread_num(NULL), 4,
+                                 1, "This warning shouldn't happen.");
+#pragma omp parallel //num_threads(strict:4)
+#pragma omp single
+  num_failed = num_failed + !(omp_get_num_threads() == 4);
+
+  // Exceed limit, specify user warning message. Tolerate whatever was given.
+  __kmpc_push_num_threads_strict(NULL, __kmpc_global_thread_num(NULL), 20,
+                                 1, "User-supplied warning for strict.");
+#pragma omp parallel //num_threads(strict:20) severity(warning)		\
+    message("User-supplied warning for strict.")
+#pragma omp single
+  num_failed = num_failed + !(omp_get_num_threads() <= 20);
+
+  // Exceed limit, no user message, use runtime default message for strict.
+  // Tolerate whatever value was given.
+  __kmpc_push_num_threads_strict(NULL, __kmpc_global_thread_num(NULL), 21,
+                                 1, NULL);
+#pragma omp parallel //num_threads(strict:21)
+#pragma omp single
+  num_failed = num_failed + !(omp_get_num_threads() <= 21);
+
+
+  // Exceed limit in nested level. Should see user warning message.
+  int threads3[2] = {2,24};
+  __kmpc_push_num_threads_list_strict(NULL, __kmpc_global_thread_num(NULL), 2,
+                                      threads3, 1,
+                                      "User-supplied warning on strict list.");
+#pragma omp parallel //num_threads(strict:2,24)  severity(warning)	\
+    message("User-supplied warning on strict. list") // 1st level
+  {
+#pragma omp single
+    num_failed = num_failed + !(omp_get_num_threads() == 2);
+#pragma omp parallel // 2nd level
+    {
+#pragma omp single
+      num_failed = num_failed + !(omp_get_num_threads() <= 24);
+    }
+  }
+
+  // No strict limit in nested level. Regular runtime limiting applies.
+  __kmpc_push_num_threads_list(NULL, __kmpc_global_thread_num(NULL), 2,
+                               threads3);
+#pragma omp parallel //num_threads(2,24) // 1st level
+  {
+#pragma omp single
+    num_failed = num_failed + !(omp_get_num_threads() == 2);
+#pragma omp parallel // 2nd level
+    {
+#pragma omp single
+      num_failed = num_failed + !(omp_get_num_threads() <= 24);
+    }
+  }
+
+  return (!num_failed);
+}
+
+int main()
+{
+  int i;
+  int num_failed=0;
+
+  for(i = 0; i < REPETITIONS; i++) {
+    if(!test_omp_parallel_num_threads_strict()) {
+      num_failed++;
+    }
+  }
+  return num_failed;
+}

>From 177f9eb335bcc5ffe8ac381381d0c3703d9548c2 Mon Sep 17 00:00:00 2001
From: Terry Wilmarth <terry.l.wilmarth at intel.com>
Date: Fri, 29 Mar 2024 21:18:18 -0500
Subject: [PATCH 4/4] Fix test.

---
 .../parallel/omp_parallel_num_threads_strict.c   | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/openmp/runtime/test/parallel/omp_parallel_num_threads_strict.c b/openmp/runtime/test/parallel/omp_parallel_num_threads_strict.c
index 358ac915c1713f..6fccfa7f4e7646 100644
--- a/openmp/runtime/test/parallel/omp_parallel_num_threads_strict.c
+++ b/openmp/runtime/test/parallel/omp_parallel_num_threads_strict.c
@@ -23,21 +23,21 @@ int test_omp_parallel_num_threads_strict()
 
   // Test regular runtime warning about exceeding thread limit.
   // Tolerate whatever value was given.
-#pragma omp parallel num_threads(22)
+#pragma omp parallel reduction(+:num_failed) num_threads(22)
 #pragma omp single
   num_failed = num_failed + !(omp_get_num_threads() <= 22);
 
   // Test with 4 threads and strict -- no problem, no warning.
   __kmpc_push_num_threads_strict(NULL, __kmpc_global_thread_num(NULL), 4,
                                  1, "This warning shouldn't happen.");
-#pragma omp parallel //num_threads(strict:4)
+#pragma omp parallel reduction(+:num_failed) //num_threads(strict:4)
 #pragma omp single
   num_failed = num_failed + !(omp_get_num_threads() == 4);
 
   // Exceed limit, specify user warning message. Tolerate whatever was given.
   __kmpc_push_num_threads_strict(NULL, __kmpc_global_thread_num(NULL), 20,
                                  1, "User-supplied warning for strict.");
-#pragma omp parallel //num_threads(strict:20) severity(warning)		\
+#pragma omp parallel reduction(+:num_failed) //num_threads(strict:20) severity(warning)		\
     message("User-supplied warning for strict.")
 #pragma omp single
   num_failed = num_failed + !(omp_get_num_threads() <= 20);
@@ -46,7 +46,7 @@ int test_omp_parallel_num_threads_strict()
   // Tolerate whatever value was given.
   __kmpc_push_num_threads_strict(NULL, __kmpc_global_thread_num(NULL), 21,
                                  1, NULL);
-#pragma omp parallel //num_threads(strict:21)
+#pragma omp parallel reduction(+:num_failed) //num_threads(strict:21)
 #pragma omp single
   num_failed = num_failed + !(omp_get_num_threads() <= 21);
 
@@ -56,12 +56,12 @@ int test_omp_parallel_num_threads_strict()
   __kmpc_push_num_threads_list_strict(NULL, __kmpc_global_thread_num(NULL), 2,
                                       threads3, 1,
                                       "User-supplied warning on strict list.");
-#pragma omp parallel //num_threads(strict:2,24)  severity(warning)	\
+#pragma omp parallel reduction(+:num_failed) //num_threads(strict:2,24)  severity(warning)	\
     message("User-supplied warning on strict. list") // 1st level
   {
 #pragma omp single
     num_failed = num_failed + !(omp_get_num_threads() == 2);
-#pragma omp parallel // 2nd level
+#pragma omp parallel reduction(+:num_failed) // 2nd level
     {
 #pragma omp single
       num_failed = num_failed + !(omp_get_num_threads() <= 24);
@@ -71,11 +71,11 @@ int test_omp_parallel_num_threads_strict()
   // No strict limit in nested level. Regular runtime limiting applies.
   __kmpc_push_num_threads_list(NULL, __kmpc_global_thread_num(NULL), 2,
                                threads3);
-#pragma omp parallel //num_threads(2,24) // 1st level
+#pragma omp parallel reduction(+:num_failed) //num_threads(2,24) // 1st level
   {
 #pragma omp single
     num_failed = num_failed + !(omp_get_num_threads() == 2);
-#pragma omp parallel // 2nd level
+#pragma omp parallel reduction(+:num_failed) // 2nd level
     {
 #pragma omp single
       num_failed = num_failed + !(omp_get_num_threads() <= 24);



More information about the Openmp-commits mailing list