[Openmp-commits] [openmp] ea34d95 - [OpenMP] Introduce GOMP teams support in runtime

via Openmp-commits openmp-commits at lists.llvm.org
Thu Sep 24 07:50:10 PDT 2020


Author: Peyton, Jonathan L
Date: 2020-09-24T09:45:13-05:00
New Revision: ea34d95e0ad664fa879bb1d8b71f32928b1d6c0f

URL: https://github.com/llvm/llvm-project/commit/ea34d95e0ad664fa879bb1d8b71f32928b1d6c0f
DIFF: https://github.com/llvm/llvm-project/commit/ea34d95e0ad664fa879bb1d8b71f32928b1d6c0f.diff

LOG: [OpenMP] Introduce GOMP teams support in runtime

Implement GOMP_teams_reg() function which enables GOMP support of the
standalone teams construct. The GOMP_parallel* functions were modified
to call __kmp_fork_call() unconditionally so that the teams-specific
code could be reused within __kmp_fork_call() instead of reproduced
inside the GOMP_* functions.

Differential Revision: https://reviews.llvm.org/D87167

Added: 
    openmp/runtime/test/teams/teams.c

Modified: 
    openmp/runtime/src/kmp_ftn_os.h
    openmp/runtime/src/kmp_gsupport.cpp
    openmp/runtime/src/kmp_runtime.cpp

Removed: 
    


################################################################################
diff  --git a/openmp/runtime/src/kmp_ftn_os.h b/openmp/runtime/src/kmp_ftn_os.h
index 22fb2bb2f5ca..d8fdd83f29e4 100644
--- a/openmp/runtime/src/kmp_ftn_os.h
+++ b/openmp/runtime/src/kmp_ftn_os.h
@@ -679,5 +679,6 @@
   GOMP_parallel_loop_nonmonotonic_runtime
 #define KMP_API_NAME_GOMP_PARALLEL_LOOP_MAYBE_NONMONOTONIC_RUNTIME             \
   GOMP_parallel_loop_maybe_nonmonotonic_runtime
+#define KMP_API_NAME_GOMP_TEAMS_REG GOMP_teams_reg
 
 #endif /* KMP_FTN_OS_H */

diff  --git a/openmp/runtime/src/kmp_gsupport.cpp b/openmp/runtime/src/kmp_gsupport.cpp
index e57cfc37ec09..f4f199411491 100644
--- a/openmp/runtime/src/kmp_gsupport.cpp
+++ b/openmp/runtime/src/kmp_gsupport.cpp
@@ -361,12 +361,9 @@ static
 #endif
 }
 
-#ifndef KMP_DEBUG
-static
-#endif /* KMP_DEBUG */
-    void
-    __kmp_GOMP_fork_call(ident_t *loc, int gtid, void (*unwrapped_task)(void *),
-                         microtask_t wrapper, int argc, ...) {
+static void __kmp_GOMP_fork_call(ident_t *loc, int gtid, unsigned num_threads,
+                                 unsigned flags, void (*unwrapped_task)(void *),
+                                 microtask_t wrapper, int argc, ...) {
   int rc;
   kmp_info_t *thr = __kmp_threads[gtid];
   kmp_team_t *team = thr->th.th_team;
@@ -375,6 +372,10 @@ static
   va_list ap;
   va_start(ap, argc);
 
+  if (num_threads != 0)
+    __kmp_push_num_threads(loc, gtid, num_threads);
+  if (flags != 0)
+    __kmp_push_proc_bind(loc, gtid, (kmp_proc_bind_t)flags);
   rc = __kmp_fork_call(loc, gtid, fork_context_gnu, argc, wrapper,
                        __kmp_invoke_task_func, kmp_va_addr_of(ap));
 
@@ -403,14 +404,6 @@ static
 #endif
 }
 
-static void __kmp_GOMP_serialized_parallel(ident_t *loc, kmp_int32 gtid,
-                                           void (*task)(void *)) {
-#if OMPT_SUPPORT
-  OMPT_STORE_RETURN_ADDRESS(gtid);
-#endif
-  __kmp_serialized_parallel(loc, gtid);
-}
-
 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_START)(void (*task)(void *),
                                                        void *data,
                                                        unsigned num_threads) {
@@ -428,18 +421,9 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_START)(void (*task)(void *),
 
   MKLOC(loc, "GOMP_parallel_start");
   KA_TRACE(20, ("GOMP_parallel_start: T#%d\n", gtid));
-
-  if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) {
-    if (num_threads != 0) {
-      __kmp_push_num_threads(&loc, gtid, num_threads);
-    }
-    __kmp_GOMP_fork_call(&loc, gtid, task,
-                         (microtask_t)__kmp_GOMP_microtask_wrapper, 2, task,
-                         data);
-  } else {
-    __kmp_GOMP_serialized_parallel(&loc, gtid, task);
-  }
-
+  __kmp_GOMP_fork_call(&loc, gtid, num_threads, 0u, task,
+                       (microtask_t)__kmp_GOMP_microtask_wrapper, 2, task,
+                       data);
 #if OMPT_SUPPORT
   if (ompt_enabled.enabled) {
     __ompt_get_task_info_internal(0, NULL, NULL, &frame, NULL, NULL);
@@ -460,25 +444,22 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)(void) {
   if (!thr->th.th_team->t.t_serialized) {
     __kmp_run_after_invoked_task(gtid, __kmp_tid_from_gtid(gtid), thr,
                                  thr->th.th_team);
-
+  }
 #if OMPT_SUPPORT
-    if (ompt_enabled.enabled) {
-      // Implicit task is finished here, in the barrier we might schedule
-      // deferred tasks,
-      // these don't see the implicit task on the stack
-      OMPT_CUR_TASK_INFO(thr)->frame.exit_frame = ompt_data_none;
-    }
+  if (ompt_enabled.enabled) {
+    // Implicit task is finished here, in the barrier we might schedule
+    // deferred tasks,
+    // these don't see the implicit task on the stack
+    OMPT_CUR_TASK_INFO(thr)->frame.exit_frame = ompt_data_none;
+  }
 #endif
 
-    __kmp_join_call(&loc, gtid
+  __kmp_join_call(&loc, gtid
 #if OMPT_SUPPORT
-                    ,
-                    fork_context_gnu
+                  ,
+                  fork_context_gnu
 #endif
-                    );
-  } else {
-    __kmpc_end_serialized_parallel(&loc, gtid);
-  }
+                  );
 }
 
 // Loop worksharing constructs
@@ -1073,19 +1054,11 @@ LOOP_DOACROSS_RUNTIME_START_ULL(
                                                                                \
     ompt_pre();                                                                \
                                                                                \
-    if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) {                       \
-      if (num_threads != 0) {                                                  \
-        __kmp_push_num_threads(&loc, gtid, num_threads);                       \
-      }                                                                        \
-      __kmp_GOMP_fork_call(&loc, gtid, task,                                   \
-                           (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, \
-                           9, task, data, num_threads, &loc, (schedule), lb,   \
-                           (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz);    \
-      IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid));                        \
-    } else {                                                                   \
-      __kmp_GOMP_serialized_parallel(&loc, gtid, task);                        \
-      IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid));                        \
-    }                                                                          \
+    __kmp_GOMP_fork_call(&loc, gtid, num_threads, 0u, task,                    \
+                         (microtask_t)__kmp_GOMP_parallel_microtask_wrapper,   \
+                         9, task, data, num_threads, &loc, (schedule), lb,     \
+                         (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz);      \
+    IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid));                          \
                                                                                \
     KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb,                              \
                       (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz,          \
@@ -1332,17 +1305,10 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START)(
   MKLOC(loc, "GOMP_parallel_sections_start");
   KA_TRACE(20, ("GOMP_parallel_sections_start: T#%d\n", gtid));
 
-  if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) {
-    if (num_threads != 0) {
-      __kmp_push_num_threads(&loc, gtid, num_threads);
-    }
-    __kmp_GOMP_fork_call(&loc, gtid, task,
-                         (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, 9,
-                         task, data, num_threads, &loc, kmp_nm_dynamic_chunked,
-                         (kmp_int)1, (kmp_int)count, (kmp_int)1, (kmp_int)1);
-  } else {
-    __kmp_GOMP_serialized_parallel(&loc, gtid, task);
-  }
+  __kmp_GOMP_fork_call(&loc, gtid, num_threads, 0u, task,
+                       (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, 9,
+                       task, data, num_threads, &loc, kmp_nm_dynamic_chunked,
+                       (kmp_int)1, (kmp_int)count, (kmp_int)1, (kmp_int)1);
 
 #if OMPT_SUPPORT
   if (ompt_enabled.enabled) {
@@ -1403,19 +1369,9 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL)(void (*task)(void *),
     OMPT_STORE_RETURN_ADDRESS(gtid);
   }
 #endif
-  if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) {
-    if (num_threads != 0) {
-      __kmp_push_num_threads(&loc, gtid, num_threads);
-    }
-    if (flags != 0) {
-      __kmp_push_proc_bind(&loc, gtid, (kmp_proc_bind_t)flags);
-    }
-    __kmp_GOMP_fork_call(&loc, gtid, task,
-                         (microtask_t)__kmp_GOMP_microtask_wrapper, 2, task,
-                         data);
-  } else {
-    __kmp_GOMP_serialized_parallel(&loc, gtid, task);
-  }
+  __kmp_GOMP_fork_call(&loc, gtid, num_threads, flags, task,
+                       (microtask_t)__kmp_GOMP_microtask_wrapper, 2, task,
+                       data);
 #if OMPT_SUPPORT
   if (ompt_enabled.enabled) {
     task_info = __ompt_get_task_info_object(0);
@@ -1450,20 +1406,10 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_SECTIONS)(void (*task)(void *),
   OMPT_STORE_RETURN_ADDRESS(gtid);
 #endif
 
-  if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) {
-    if (num_threads != 0) {
-      __kmp_push_num_threads(&loc, gtid, num_threads);
-    }
-    if (flags != 0) {
-      __kmp_push_proc_bind(&loc, gtid, (kmp_proc_bind_t)flags);
-    }
-    __kmp_GOMP_fork_call(&loc, gtid, task,
-                         (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, 9,
-                         task, data, num_threads, &loc, kmp_nm_dynamic_chunked,
-                         (kmp_int)1, (kmp_int)count, (kmp_int)1, (kmp_int)1);
-  } else {
-    __kmp_GOMP_serialized_parallel(&loc, gtid, task);
-  }
+  __kmp_GOMP_fork_call(&loc, gtid, num_threads, flags, task,
+                       (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, 9,
+                       task, data, num_threads, &loc, kmp_nm_dynamic_chunked,
+                       (kmp_int)1, (kmp_int)count, (kmp_int)1, (kmp_int)1);
 
 #if OMPT_SUPPORT
   OMPT_STORE_RETURN_ADDRESS(gtid);
@@ -1488,20 +1434,10 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_SECTIONS)(void (*task)(void *),
          gtid, lb, ub, str, chunk_sz));                                        \
                                                                                \
     ompt_pre();                                                                \
-    if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) {                       \
-      if (num_threads != 0) {                                                  \
-        __kmp_push_num_threads(&loc, gtid, num_threads);                       \
-      }                                                                        \
-      if (flags != 0) {                                                        \
-        __kmp_push_proc_bind(&loc, gtid, (kmp_proc_bind_t)flags);              \
-      }                                                                        \
-      __kmp_GOMP_fork_call(&loc, gtid, task,                                   \
-                           (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, \
-                           9, task, data, num_threads, &loc, (schedule), lb,   \
-                           (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz);    \
-    } else {                                                                   \
-      __kmp_GOMP_serialized_parallel(&loc, gtid, task);                        \
-    }                                                                          \
+    __kmp_GOMP_fork_call(&loc, gtid, num_threads, flags, task,                 \
+                         (microtask_t)__kmp_GOMP_parallel_microtask_wrapper,   \
+                         9, task, data, num_threads, &loc, (schedule), lb,     \
+                         (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz);      \
                                                                                \
     IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);)                          \
     KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb,                              \
@@ -1856,6 +1792,25 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_DOACROSS_ULL_WAIT)(
   va_end(args);
 }
 
+// fn: the function each master thread of new team will call
+// data: argument to fn
+// num_teams, thread_limit: max bounds on respective ICV
+// flags: unused
+void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TEAMS_REG)(void (*fn)(void *),
+                                                  void *data,
+                                                  unsigned num_teams,
+                                                  unsigned thread_limit,
+                                                  unsigned flags) {
+  MKLOC(loc, "GOMP_teams_reg");
+  int gtid = __kmp_entry_gtid();
+  KA_TRACE(20, ("GOMP_teams_reg: T#%d num_teams=%u thread_limit=%u flag=%u\n",
+                gtid, num_teams, thread_limit, flags));
+  __kmpc_push_num_teams(&loc, gtid, num_teams, thread_limit);
+  __kmpc_fork_teams(&loc, 2, (microtask_t)__kmp_GOMP_microtask_wrapper, fn,
+                    data);
+  KA_TRACE(20, ("GOMP_teams_reg exit: T#%d\n", gtid));
+}
+
 /* The following sections of code create aliases for the GOMP_* functions, then
    create versioned symbols using the assembler directive .symver. This is only
    pertinent for ELF .so library. The KMP_VERSION_SYMBOL macro is defined in
@@ -2027,6 +1982,7 @@ KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_RUNTIME, 50,
                    "GOMP_5.0");
 KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_MAYBE_NONMONOTONIC_RUNTIME,
                    50, "GOMP_5.0");
+KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TEAMS_REG, 50, "GOMP_5.0");
 
 #endif // KMP_USE_VERSION_SYMBOLS
 

diff  --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp
index d5cf7509306a..b8337fe27bf7 100644
--- a/openmp/runtime/src/kmp_runtime.cpp
+++ b/openmp/runtime/src/kmp_runtime.cpp
@@ -1506,6 +1506,13 @@ int __kmp_fork_call(ident_t *loc, int gtid,
         __kmpc_serialized_parallel(loc, gtid);
         KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);
 
+        if (call_context == fork_context_gnu) {
+          // AC: need to decrement t_serialized for enquiry functions to work
+          // correctly, will restore at join time
+          parent_team->t.t_serialized--;
+          return TRUE;
+        }
+
 #if OMPT_SUPPORT
         void *dummy;
         void **exit_frame_p;
@@ -1638,6 +1645,9 @@ int __kmp_fork_call(ident_t *loc, int gtid,
                     "master_th=%p, gtid=%d\n",
                     root, parent_team, master_th, gtid));
 
+      if (call_context == fork_context_gnu)
+        return TRUE;
+
       /* Invoke microtask for MASTER thread */
       KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
                     parent_team->t.t_id, parent_team->t.t_pkfn));
@@ -2293,7 +2303,11 @@ void __kmp_join_call(ident_t *loc, int gtid
 
 #if OMPT_SUPPORT
   void *team_microtask = (void *)team->t.t_pkfn;
-  if (ompt_enabled.enabled) {
+  // For GOMP interface with serialized parallel, need the
+  // __kmpc_end_serialized_parallel to call hooks for OMPT end-implicit-task
+  // and end-parallel events.
+  if (ompt_enabled.enabled &&
+      !(team->t.t_serialized && fork_context == fork_context_gnu)) {
     master_th->th.ompt_thread_info.state = ompt_state_overhead;
   }
 #endif

diff  --git a/openmp/runtime/test/teams/teams.c b/openmp/runtime/test/teams/teams.c
new file mode 100644
index 000000000000..bc009346a05e
--- /dev/null
+++ b/openmp/runtime/test/teams/teams.c
@@ -0,0 +1,57 @@
+// RUN: %libomp-compile-and-run
+// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7, gcc-8
+// UNSUPPORTED: icc, clang
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <omp.h>
+
+#define NUM_TEAMS 2
+#define NUM_THREADS_PER_TEAM 3
+
+int main(int argc, char** argv) {
+  #pragma omp teams num_teams(NUM_TEAMS)
+  {
+    int i;
+    int members[NUM_THREADS_PER_TEAM];
+    // Only an upper bound is guaranteed for number of teams
+    int nteams = omp_get_num_teams();
+    if (nteams > NUM_TEAMS) {
+      fprintf(stderr, "error: too many teams: %d\n", nteams);
+      exit(1);
+    }
+    for (i = 0; i < NUM_THREADS_PER_TEAM; ++i)
+      members[i] = -1;
+    #pragma omp parallel num_threads(NUM_THREADS_PER_TEAM) private(i)
+    {
+      int tid = omp_get_thread_num();
+      int team_id = omp_get_team_num();
+      int nthreads = omp_get_num_threads();
+      if (nthreads != NUM_THREADS_PER_TEAM) {
+        fprintf(stderr, "error: detected number of threads (%d) is not %d\n",
+                nthreads, NUM_THREADS_PER_TEAM);
+        exit(1);
+      }
+      if (tid < 0 || tid >= nthreads) {
+        fprintf(stderr, "error: thread id is out of range: %d\n", tid);
+        exit(1);
+      }
+      if (team_id < 0 || team_id > omp_get_num_teams()) {
+        fprintf(stderr, "error: team id is out of range: %d\n", team_id);
+        exit(1);
+      }
+      members[omp_get_thread_num()] = 1;
+      #pragma omp barrier
+      #pragma omp single
+      {
+        for (i = 0; i < NUM_THREADS_PER_TEAM; ++i) {
+          if (members[i] != 1) {
+            fprintf(stderr, "error: worker %d not flagged\n", i);
+            exit(1);
+          }
+        }
+      }
+    }
+  }
+  return 0;
+}


        


More information about the Openmp-commits mailing list