[Openmp-commits] [openmp] f61602b - [OpenMP][OMPD] Implementation of OMPD debugging library - libompd.

Vignesh Balasubramanian via Openmp-commits openmp-commits at lists.llvm.org
Tue Jun 8 04:15:05 PDT 2021


Author: Vignesh Balasubramanian
Date: 2021-06-08T16:44:22+05:30
New Revision: f61602b0d3fd3ff5b277dc44cf22cfb5356dee5c

URL: https://github.com/llvm/llvm-project/commit/f61602b0d3fd3ff5b277dc44cf22cfb5356dee5c
DIFF: https://github.com/llvm/llvm-project/commit/f61602b0d3fd3ff5b277dc44cf22cfb5356dee5c.diff

LOG: [OpenMP][OMPD] Implementation of OMPD debugging library - libompd.

This is the first of seven patches that implements OMPD, a debugging interface to support debugging of OpenMP programs.
It contains support code required in "openmp/runtime" for OMPD implementation.

Reviewed By: @hbae
Differential Revision: https://reviews.llvm.org/D100181

Added: 
    openmp/runtime/src/ompd-specific.cpp
    openmp/runtime/src/ompd-specific.h

Modified: 
    openmp/runtime/CMakeLists.txt
    openmp/runtime/src/CMakeLists.txt
    openmp/runtime/src/include/omp-tools.h.var
    openmp/runtime/src/kmp.h
    openmp/runtime/src/kmp_config.h.cmake
    openmp/runtime/src/kmp_csupport.cpp
    openmp/runtime/src/kmp_gsupport.cpp
    openmp/runtime/src/kmp_runtime.cpp
    openmp/runtime/src/kmp_settings.cpp
    openmp/runtime/src/kmp_settings.h
    openmp/runtime/src/kmp_tasking.cpp
    openmp/runtime/src/kmp_wait_release.h
    openmp/runtime/src/ompt-general.cpp
    openmp/runtime/src/ompt-specific.cpp

Removed: 
    


################################################################################
diff  --git a/openmp/runtime/CMakeLists.txt b/openmp/runtime/CMakeLists.txt
index 8828ff8ef455d..90aab6da3f3f3 100644
--- a/openmp/runtime/CMakeLists.txt
+++ b/openmp/runtime/CMakeLists.txt
@@ -329,6 +329,20 @@ if(LIBOMP_TSAN_SUPPORT AND (NOT LIBOMP_HAVE_TSAN_SUPPORT))
   libomp_error_say("TSAN functionality requested but not available")
 endif()
 
+# OMPD-support
+# Enable if OMPT SUPPORT is ON
+set(OMPD_DEFAULT FALSE)
+if (LIBOMP_HAVE_OMPT_SUPPORT AND ("${CMAKE_SYSTEM_NAME}" STREQUAL "Linux"))
+  set(OMPD_DEFAULT TRUE)
+endif()
+
+set(LIBOMP_OMPD_SUPPORT ${OMPD_DEFAULT} CACHE BOOL
+  "OMPD-support?")
+
+if(LIBOMP_OMPD_SUPPORT AND ((NOT LIBOMP_OMPT_SUPPORT) OR (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "Linux")))
+  libomp_error_say("OpenMP Debug Interface(OMPD) requested but not available in this implementation")
+endif()
+
 # Error check hwloc support after config-ix has run
 if(LIBOMP_USE_HWLOC AND (NOT LIBOMP_HAVE_HWLOC))
   libomp_error_say("Hwloc requested but not available")
@@ -389,6 +403,7 @@ if(${OPENMP_STANDALONE_BUILD})
   if(${LIBOMP_OMPT_SUPPORT})
     libomp_say("Use OMPT-optional  -- ${LIBOMP_OMPT_OPTIONAL}")
   endif()
+  libomp_say("Use OMPD-support     -- ${LIBOMP_OMPD_SUPPORT}")
   libomp_say("Use Adaptive locks   -- ${LIBOMP_USE_ADAPTIVE_LOCKS}")
   libomp_say("Use quad precision   -- ${LIBOMP_USE_QUAD_PRECISION}")
   libomp_say("Use TSAN-support     -- ${LIBOMP_TSAN_SUPPORT}")
@@ -401,3 +416,5 @@ add_subdirectory(test)
 # make these variables available for tools:
 set(LIBOMP_LIBRARY_DIR ${LIBOMP_LIBRARY_DIR} PARENT_SCOPE)
 set(LIBOMP_INCLUDE_DIR ${LIBOMP_INCLUDE_DIR} PARENT_SCOPE)
+# make these variables available for tools/libompd:
+set(LIBOMP_SRC_DIR ${LIBOMP_SRC_DIR} PARENT_SCOPE)

diff  --git a/openmp/runtime/src/CMakeLists.txt b/openmp/runtime/src/CMakeLists.txt
index f075c67778fc8..fd2cd3cb11f9d 100644
--- a/openmp/runtime/src/CMakeLists.txt
+++ b/openmp/runtime/src/CMakeLists.txt
@@ -115,6 +115,7 @@ libomp_append(LIBOMP_CXXFILES kmp_ftn_extra.cpp)
 libomp_append(LIBOMP_CXXFILES kmp_version.cpp)
 libomp_append(LIBOMP_CXXFILES ompt-general.cpp IF_TRUE LIBOMP_OMPT_SUPPORT)
 libomp_append(LIBOMP_CXXFILES tsan_annotations.cpp IF_TRUE LIBOMP_TSAN_SUPPORT)
+libomp_append(LIBOMP_CXXFILES ompd-specific.cpp IF_TRUE LIBOMP_OMPD_SUPPORT)
 
 set(LIBOMP_SOURCE_FILES ${LIBOMP_CXXFILES} ${LIBOMP_ASMFILES})
 # For Windows, there is a resource file (.rc -> .res) that is also compiled
@@ -188,6 +189,7 @@ if(NOT WIN32)
     WORKING_DIRECTORY ${LIBOMP_LIBRARY_DIR}
   )
 endif()
+set(LIBOMP_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE)
 
 # Create *.inc before compiling any sources
 # objects depend on : .inc files
@@ -206,6 +208,7 @@ if(WIN32)
   libomp_append(LIBOMP_MASM_DEFINITIONS "-D_M_IA32" IF_TRUE IA32)
   libomp_append(LIBOMP_MASM_DEFINITIONS "-D_M_AMD64" IF_TRUE INTEL64)
   libomp_append(LIBOMP_MASM_DEFINITIONS "-DOMPT_SUPPORT" IF_TRUE_1_0 LIBOMP_OMPT_SUPPORT)
+  libomp_append(LIBOMP_MASM_DEFINITIONS "-DOMPD_SUPPORT" IF_TRUE_1_0 LIBOMP_OMPD_SUPPORT)
   libomp_list_to_string("${LIBOMP_MASM_DEFINITIONS}" LIBOMP_MASM_DEFINITIONS)
   set_property(SOURCE z_Windows_NT-586_asm.asm APPEND_STRING PROPERTY COMPILE_FLAGS " ${LIBOMP_MASM_DEFINITIONS}")
   set_source_files_properties(thirdparty/ittnotify/ittnotify_static.cpp PROPERTIES COMPILE_DEFINITIONS "UNICODE")

diff  --git a/openmp/runtime/src/include/omp-tools.h.var b/openmp/runtime/src/include/omp-tools.h.var
index 8e822750b53e2..5092174d66eff 100644
--- a/openmp/runtime/src/include/omp-tools.h.var
+++ b/openmp/runtime/src/include/omp-tools.h.var
@@ -483,6 +483,8 @@ typedef enum ompd_rc_t {
   ompd_rc_device_read_error = 8,
   ompd_rc_device_write_error = 9,
   ompd_rc_nomem = 10,
+  ompd_rc_incomplete = 11,
+  ompd_rc_callback_error = 12
 } ompd_rc_t;
 
 typedef void (*ompt_interface_fn_t) (void);
@@ -1133,6 +1135,198 @@ typedef struct ompd_device_type_sizes_t {
   uint8_t sizeof_pointer;
 } ompd_device_type_sizes_t;
 
+void ompd_dll_locations_valid(void);
+
+typedef ompd_rc_t (*ompd_callback_memory_alloc_fn_t)(ompd_size_t nbytes,
+                                                     void **ptr);
+
+typedef ompd_rc_t (*ompd_callback_memory_free_fn_t)(void *ptr);
+
+typedef ompd_rc_t (*ompd_callback_get_thread_context_for_thread_id_fn_t)(
+    ompd_address_space_context_t *address_space_context, ompd_thread_id_t kind,
+    ompd_size_t sizeof_thread_id, const void *thread_id,
+    ompd_thread_context_t **thread_context);
+
+typedef ompd_rc_t (*ompd_callback_sizeof_fn_t)(
+    ompd_address_space_context_t *address_space_context,
+    ompd_device_type_sizes_t *sizes);
+
+typedef ompd_rc_t (*ompd_callback_symbol_addr_fn_t)(
+    ompd_address_space_context_t *address_space_context,
+    ompd_thread_context_t *thread_context, const char *symbol_name,
+    ompd_address_t *symbol_addr, const char *file_name);
+
+typedef ompd_rc_t (*ompd_callback_memory_read_fn_t)(
+    ompd_address_space_context_t *address_space_context,
+    ompd_thread_context_t *thread_context, const ompd_address_t *addr,
+    ompd_size_t nbytes, void *buffer);
+
+typedef ompd_rc_t (*ompd_callback_memory_write_fn_t)(
+    ompd_address_space_context_t *address_space_context,
+    ompd_thread_context_t *thread_context, const ompd_address_t *addr,
+    ompd_size_t nbytes, const void *buffer);
+
+typedef ompd_rc_t (*ompd_callback_device_host_fn_t)(
+    ompd_address_space_context_t *address_space_context, const void *input,
+    ompd_size_t unit_size, ompd_size_t count, void *output);
+
+typedef ompd_rc_t (*ompd_callback_print_string_fn_t)(const char *string,
+                                                     int category);
+
+typedef struct ompd_callbacks_t {
+  ompd_callback_memory_alloc_fn_t alloc_memory;
+  ompd_callback_memory_free_fn_t free_memory;
+  ompd_callback_print_string_fn_t print_string;
+  ompd_callback_sizeof_fn_t sizeof_type;
+  ompd_callback_symbol_addr_fn_t symbol_addr_lookup;
+  ompd_callback_memory_read_fn_t read_memory;
+  ompd_callback_memory_write_fn_t write_memory;
+  ompd_callback_memory_read_fn_t read_string;
+  ompd_callback_device_host_fn_t device_to_host;
+  ompd_callback_device_host_fn_t host_to_device;
+  ompd_callback_get_thread_context_for_thread_id_fn_t
+      get_thread_context_for_thread_id;
+} ompd_callbacks_t;
+
+void ompd_bp_parallel_begin(void);
+
+void ompd_bp_parallel_end(void);
+
+void ompd_bp_task_begin(void);
+
+void ompd_bp_task_end(void);
+
+void ompd_bp_thread_begin(void);
+
+void ompd_bp_thread_end(void);
+
+void ompd_bp_device_begin(void);
+
+void ompd_bp_device_end(void);
+
+ompd_rc_t ompd_initialize(ompd_word_t api_version,
+                          const ompd_callbacks_t *callbacks);
+
+ompd_rc_t ompd_get_api_version(ompd_word_t *version);
+
+ompd_rc_t ompd_get_version_string(const char **string);
+
+ompd_rc_t ompd_finalize(void);
+
+ompd_rc_t ompd_process_initialize(ompd_address_space_context_t *context,
+                                  ompd_address_space_handle_t **handle);
+
+ompd_rc_t ompd_device_initialize(ompd_address_space_handle_t *process_handle,
+                                 ompd_address_space_context_t *device_context,
+                                 ompd_device_t kind, ompd_size_t sizeof_id,
+                                 void *id,
+                                 ompd_address_space_handle_t **device_handle);
+
+ompd_rc_t ompd_rel_address_space_handle(ompd_address_space_handle_t *handle);
+
+ompd_rc_t ompd_get_omp_version(ompd_address_space_handle_t *address_space,
+                               ompd_word_t *omp_version);
+
+ompd_rc_t
+ompd_get_omp_version_string(ompd_address_space_handle_t *address_space,
+                            const char **string);
+
+ompd_rc_t ompd_get_thread_in_parallel(ompd_parallel_handle_t *parallel_handle,
+                                      int thread_num,
+                                      ompd_thread_handle_t **thread_handle);
+
+ompd_rc_t ompd_get_thread_handle(ompd_address_space_handle_t *handle,
+                                 ompd_thread_id_t kind,
+                                 ompd_size_t sizeof_thread_id,
+                                 const void *thread_id,
+                                 ompd_thread_handle_t **thread_handle);
+
+ompd_rc_t ompd_rel_thread_handle(ompd_thread_handle_t *thread_handle);
+
+ompd_rc_t ompd_thread_handle_compare(ompd_thread_handle_t *thread_handle_1,
+                                     ompd_thread_handle_t *thread_handle_2,
+                                     int *cmp_value);
+
+ompd_rc_t ompd_get_thread_id(ompd_thread_handle_t *thread_handle,
+                             ompd_thread_id_t kind,
+                             ompd_size_t sizeof_thread_id, void *thread_id);
+
+ompd_rc_t
+ompd_get_curr_parallel_handle(ompd_thread_handle_t *thread_handle,
+                              ompd_parallel_handle_t **parallel_handle);
+
+ompd_rc_t ompd_get_enclosing_parallel_handle(
+    ompd_parallel_handle_t *parallel_handle,
+    ompd_parallel_handle_t **enclosing_parallel_handle);
+
+ompd_rc_t
+ompd_get_task_parallel_handle(ompd_task_handle_t *task_handle,
+                              ompd_parallel_handle_t **task_parallel_handle);
+
+ompd_rc_t ompd_rel_parallel_handle(ompd_parallel_handle_t *parallel_handle);
+
+ompd_rc_t
+ompd_parallel_handle_compare(ompd_parallel_handle_t *parallel_handle_1,
+                             ompd_parallel_handle_t *parallel_handle_2,
+                             int *cmp_value);
+
+ompd_rc_t ompd_get_curr_task_handle(ompd_thread_handle_t *thread_handle,
+                                    ompd_task_handle_t **task_handle);
+
+ompd_rc_t
+ompd_get_generating_task_handle(ompd_task_handle_t *task_handle,
+                                ompd_task_handle_t **generating_task_handle);
+
+ompd_rc_t
+ompd_get_scheduling_task_handle(ompd_task_handle_t *task_handle,
+                                ompd_task_handle_t **scheduling_task_handle);
+
+ompd_rc_t ompd_get_task_in_parallel(ompd_parallel_handle_t *parallel_handle,
+                                    int thread_num,
+                                    ompd_task_handle_t **task_handle);
+
+ompd_rc_t ompd_rel_task_handle(ompd_task_handle_t *task_handle);
+
+ompd_rc_t ompd_task_handle_compare(ompd_task_handle_t *task_handle_1,
+                                   ompd_task_handle_t *task_handle_2,
+                                   int *cmp_value);
+
+ompd_rc_t ompd_get_task_function(ompd_task_handle_t *task_handle,
+                                 ompd_address_t *entry_point);
+
+ompd_rc_t ompd_get_task_frame(ompd_task_handle_t *task_handle,
+                              ompd_frame_info_t *exit_frame,
+                              ompd_frame_info_t *enter_frame);
+
+ompd_rc_t
+ompd_enumerate_states(ompd_address_space_handle_t *address_space_handle,
+                      ompd_word_t current_state, ompd_word_t *next_state,
+                      const char **next_state_name, ompd_word_t *more_enums);
+
+ompd_rc_t ompd_get_state(ompd_thread_handle_t *thread_handle,
+                         ompd_word_t *state, ompd_wait_id_t *wait_id);
+
+ompd_rc_t
+ompd_get_display_control_vars(ompd_address_space_handle_t *address_space_handle,
+                              const char *const **control_vars);
+
+ompd_rc_t ompd_rel_display_control_vars(const char *const **control_vars);
+
+ompd_rc_t ompd_enumerate_icvs(ompd_address_space_handle_t *handle,
+                              ompd_icv_id_t current, ompd_icv_id_t *next_id,
+                              const char **next_icv_name,
+                              ompd_scope_t *next_scope, int *more);
+
+ompd_rc_t ompd_get_icv_from_scope(void *handle, ompd_scope_t scope,
+                                  ompd_icv_id_t icv_id, ompd_word_t *icv_value);
+
+ompd_rc_t ompd_get_icv_string_from_scope(void *handle, ompd_scope_t scope,
+                                         ompd_icv_id_t icv_id,
+                                         const char **icv_string);
+
+ompd_rc_t ompd_get_tool_data(void *handle, ompd_scope_t scope,
+                             ompd_word_t *value, ompd_address_t *ptr);
+
 typedef struct ompt_record_ompt_t {
   ompt_callbacks_t type;
   ompt_device_time_t time;

diff  --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h
index 66c7362ae6ce7..515da9300f7cc 100644
--- a/openmp/runtime/src/kmp.h
+++ b/openmp/runtime/src/kmp.h
@@ -138,6 +138,10 @@ typedef unsigned int kmp_hwloc_depth_t;
 #include "ompt-internal.h"
 #endif
 
+#if OMPD_SUPPORT
+#include "ompd-specific.h"
+#endif
+
 #ifndef UNLIKELY
 #define UNLIKELY(x) (x)
 #endif
@@ -847,6 +851,10 @@ extern kmp_nested_proc_bind_t __kmp_nested_proc_bind;
 extern int __kmp_display_affinity;
 extern char *__kmp_affinity_format;
 static const size_t KMP_AFFINITY_FORMAT_SIZE = 512;
+#if OMPT_SUPPORT
+extern int __kmp_tool;
+extern char *__kmp_tool_libraries;
+#endif // OMPT_SUPPORT
 
 #if KMP_AFFINITY_SUPPORTED
 #define KMP_PLACE_ALL (-1)

diff  --git a/openmp/runtime/src/kmp_config.h.cmake b/openmp/runtime/src/kmp_config.h.cmake
index f6aee7197ee88..f092efe67ad74 100644
--- a/openmp/runtime/src/kmp_config.h.cmake
+++ b/openmp/runtime/src/kmp_config.h.cmake
@@ -44,6 +44,8 @@
 #define OMPT_DEBUG LIBOMP_OMPT_DEBUG
 #cmakedefine01 LIBOMP_OMPT_SUPPORT
 #define OMPT_SUPPORT LIBOMP_OMPT_SUPPORT
+#cmakedefine01 LIBOMP_OMPD_SUPPORT
+#define OMPD_SUPPORT LIBOMP_OMPD_SUPPORT
 #cmakedefine01 LIBOMP_PROFILING_SUPPORT
 #define OMP_PROFILING_SUPPORT LIBOMP_PROFILING_SUPPORT
 #cmakedefine01 LIBOMP_OMPT_OPTIONAL

diff  --git a/openmp/runtime/src/kmp_csupport.cpp b/openmp/runtime/src/kmp_csupport.cpp
index 802a40442dad6..98d0cdf7af545 100644
--- a/openmp/runtime/src/kmp_csupport.cpp
+++ b/openmp/runtime/src/kmp_csupport.cpp
@@ -604,6 +604,11 @@ void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
     }
 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
 
+#if OMPD_SUPPORT
+    if (ompd_state & OMPD_ENABLE_BP)
+      ompd_bp_parallel_end();
+#endif
+
     this_thr->th.th_team = serial_team->t.t_parent;
     this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
 

diff  --git a/openmp/runtime/src/kmp_gsupport.cpp b/openmp/runtime/src/kmp_gsupport.cpp
index d4e0c5b18b1bf..61a3199f1a039 100644
--- a/openmp/runtime/src/kmp_gsupport.cpp
+++ b/openmp/runtime/src/kmp_gsupport.cpp
@@ -498,6 +498,10 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_START)(void (*task)(void *),
     frame->exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
   }
 #endif
+#if OMPD_SUPPORT
+  if (ompd_state & OMPD_ENABLE_BP)
+    ompd_bp_parallel_begin();
+#endif
 }
 
 void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)(void) {
@@ -528,6 +532,10 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)(void) {
                   fork_context_gnu
 #endif
   );
+#if OMPD_SUPPORT
+  if (ompd_state & OMPD_ENABLE_BP)
+    ompd_bp_parallel_end();
+#endif
 }
 
 // Loop worksharing constructs

diff  --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp
index c59f9960d09b6..34f9869981325 100644
--- a/openmp/runtime/src/kmp_runtime.cpp
+++ b/openmp/runtime/src/kmp_runtime.cpp
@@ -31,6 +31,9 @@
 #if OMPT_SUPPORT
 #include "ompt-specific.h"
 #endif
+#if OMPD_SUPPORT
+#include "ompd-specific.h"
+#endif
 
 #if OMP_PROFILING_SUPPORT
 #include "llvm/Support/TimeProfiler.h"
@@ -1470,6 +1473,10 @@ int __kmp_fork_call(ident_t *loc, int gtid,
           return TRUE;
         }
 
+#if OMPD_SUPPORT
+        parent_team->t.t_pkfn = microtask;
+#endif
+
 #if OMPT_SUPPORT
         void *dummy;
         void **exit_frame_p;
@@ -1694,6 +1701,10 @@ int __kmp_fork_call(ident_t *loc, int gtid,
 
       __kmpc_serialized_parallel(loc, gtid);
 
+#if OMPD_SUPPORT
+      master_th->th.th_serial_team->t.t_pkfn = microtask;
+#endif
+
       if (call_context == fork_context_intel) {
         /* TODO this sucks, use the compiler itself to pass args! :) */
         master_th->th.th_serial_team->t.t_ident = loc;
@@ -2020,6 +2031,10 @@ int __kmp_fork_call(ident_t *loc, int gtid,
 
     // Update the floating point rounding in the team if required.
     propagateFPControl(team);
+#if OMPD_SUPPORT
+    if (ompd_state & OMPD_ENABLE_BP)
+      ompd_bp_parallel_begin();
+#endif
 
     if (__kmp_tasking_mode != tskm_immediate_exec) {
       // Set primary thread's task team to team's task team. Unless this is hot
@@ -2212,7 +2227,6 @@ int __kmp_fork_call(ident_t *loc, int gtid,
   KMP_MB(); /* Flush all pending memory write invalidates.  */
 
   KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid));
-
 #if OMPT_SUPPORT
   if (ompt_enabled.enabled) {
     master_th->th.ompt_thread_info.state = ompt_state_overhead;
@@ -2488,6 +2502,10 @@ void __kmp_join_call(ident_t *loc, int gtid
 #endif // KMP_AFFINITY_SUPPORTED
   master_th->th.th_def_allocator = team->t.t_def_allocator;
 
+#if OMPD_SUPPORT
+  if (ompd_state & OMPD_ENABLE_BP)
+    ompd_bp_parallel_end();
+#endif
   updateHWFPControl(team);
 
   if (root->r.r_active != master_active)
@@ -3841,6 +3859,10 @@ int __kmp_register_root(int initial_thread) {
     ompt_set_thread_state(root_thread, ompt_state_work_serial);
   }
 #endif
+#if OMPD_SUPPORT
+  if (ompd_state & OMPD_ENABLE_BP)
+    ompd_bp_thread_begin();
+#endif
 
   KMP_MB();
   __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
@@ -3924,6 +3946,11 @@ static int __kmp_reset_root(int gtid, kmp_root_t *root) {
   __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread);
 #endif /* KMP_OS_WINDOWS */
 
+#if OMPD_SUPPORT
+  if (ompd_state & OMPD_ENABLE_BP)
+    ompd_bp_thread_end();
+#endif
+
 #if OMPT_SUPPORT
   ompt_data_t *task_data;
   ompt_data_t *parallel_data;
@@ -5753,6 +5780,11 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) {
     this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid); // ATT: Memory leak?
   }
 
+#if OMPD_SUPPORT
+  if (ompd_state & OMPD_ENABLE_BP)
+    ompd_bp_thread_begin();
+#endif
+
 #if OMPT_SUPPORT
   ompt_data_t *thread_data = nullptr;
   if (ompt_enabled.enabled) {
@@ -5830,6 +5862,11 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) {
   }
   TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
 
+#if OMPD_SUPPORT
+  if (ompd_state & OMPD_ENABLE_BP)
+    ompd_bp_thread_end();
+#endif
+
 #if OMPT_SUPPORT
   if (ompt_enabled.ompt_callback_thread_end) {
     ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data);
@@ -6691,6 +6728,10 @@ static void __kmp_do_serial_initialize(void) {
 #if OMPT_SUPPORT
   ompt_pre_init();
 #endif
+#if OMPD_SUPPORT
+  __kmp_env_dump();
+  ompd_init();
+#endif
 
   __kmp_validate_locks();
 
@@ -7839,6 +7880,13 @@ void __kmp_cleanup(void) {
 #else
   __kmp_cleanup_user_locks();
 #endif
+#if OMPD_SUPPORT
+  if (ompd_state) {
+    __kmp_free(ompd_env_block);
+    ompd_env_block = NULL;
+    ompd_env_block_size = 0;
+  }
+#endif
 
 #if KMP_AFFINITY_SUPPORTED
   KMP_INTERNAL_FREE(CCAST(char *, __kmp_cpuinfo_file));

diff  --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp
index 89799a7769854..33429f8ff5afc 100644
--- a/openmp/runtime/src/kmp_settings.cpp
+++ b/openmp/runtime/src/kmp_settings.cpp
@@ -25,6 +25,9 @@
 #include "kmp_str.h"
 #include "kmp_wrapper_getpid.h"
 #include <ctype.h> // toupper()
+#if OMPD_SUPPORT
+#include "ompd-specific.h"
+#endif
 
 static int __kmp_env_toPrint(char const *name, int flag);
 
@@ -5033,7 +5036,7 @@ static void __kmp_stg_print_omp_cancellation(kmp_str_buf_t *buffer,
 } // __kmp_stg_print_omp_cancellation
 
 #if OMPT_SUPPORT
-static int __kmp_tool = 1;
+int __kmp_tool = 1;
 
 static void __kmp_stg_parse_omp_tool(char const *name, char const *value,
                                      void *data) {
@@ -5050,7 +5053,7 @@ static void __kmp_stg_print_omp_tool(kmp_str_buf_t *buffer, char const *name,
   }
 } // __kmp_stg_print_omp_tool
 
-static char *__kmp_tool_libraries = NULL;
+char *__kmp_tool_libraries = NULL;
 
 static void __kmp_stg_parse_omp_tool_libraries(char const *name,
                                                char const *value, void *data) {
@@ -5071,7 +5074,7 @@ static void __kmp_stg_print_omp_tool_libraries(kmp_str_buf_t *buffer,
   }
 } // __kmp_stg_print_omp_tool_libraries
 
-static char *__kmp_tool_verbose_init = NULL;
+char *__kmp_tool_verbose_init = NULL;
 
 static void __kmp_stg_parse_omp_tool_verbose_init(char const *name,
                                                   char const *value,
@@ -6169,4 +6172,47 @@ void __kmp_display_env_impl(int display_env, int display_env_verbose) {
   __kmp_printf("\n");
 }
 
+#if OMPD_SUPPORT
+// Dump environment variables for OMPD
+void __kmp_env_dump() {
+
+  kmp_env_blk_t block;
+  kmp_str_buf_t buffer, env, notdefined;
+
+  __kmp_stg_init();
+  __kmp_str_buf_init(&buffer);
+  __kmp_str_buf_init(&env);
+  __kmp_str_buf_init(&notdefined);
+
+  __kmp_env_blk_init(&block, NULL);
+  __kmp_env_blk_sort(&block);
+
+  __kmp_str_buf_print(&notdefined, ": %s", KMP_I18N_STR(NotDefined));
+
+  for (int i = 0; i < __kmp_stg_count; ++i) {
+    if (__kmp_stg_table[i].print == NULL)
+      continue;
+    __kmp_str_buf_clear(&env);
+    __kmp_stg_table[i].print(&env, __kmp_stg_table[i].name,
+                             __kmp_stg_table[i].data);
+    if (env.used < 4) // valid definition must have indents (3) and a new line
+      continue;
+    if (strstr(env.str, notdefined.str))
+      // normalize the string
+      __kmp_str_buf_print(&buffer, "%s=undefined\n", __kmp_stg_table[i].name);
+    else
+      __kmp_str_buf_cat(&buffer, env.str + 3, env.used - 3);
+  }
+
+  ompd_env_block = (char *)__kmp_allocate(buffer.used + 1);
+  KMP_MEMCPY(ompd_env_block, buffer.str, buffer.used + 1);
+  ompd_env_block_size = (ompd_size_t)KMP_STRLEN(ompd_env_block);
+
+  __kmp_env_blk_free(&block);
+  __kmp_str_buf_free(&buffer);
+  __kmp_str_buf_free(&env);
+  __kmp_str_buf_free(&notdefined);
+}
+#endif // OMPD_SUPPORT
+
 // end of file

diff  --git a/openmp/runtime/src/kmp_settings.h b/openmp/runtime/src/kmp_settings.h
index d61c40694cf6a..f63f105940eff 100644
--- a/openmp/runtime/src/kmp_settings.h
+++ b/openmp/runtime/src/kmp_settings.h
@@ -18,6 +18,9 @@ void __kmp_env_initialize(char const *);
 void __kmp_env_print();
 void __kmp_env_print_2();
 void __kmp_display_env_impl(int display_env, int display_env_verbose);
+#if OMPD_SUPPORT
+void __kmp_env_dump();
+#endif
 
 int __kmp_initial_threads_capacity(int req_nproc);
 void __kmp_init_dflt_team_nth();

diff  --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp
index 8964decfb1ce3..62f0bdca4be93 100644
--- a/openmp/runtime/src/kmp_tasking.cpp
+++ b/openmp/runtime/src/kmp_tasking.cpp
@@ -1603,6 +1603,11 @@ static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task,
       __ompt_task_start(task, current_task, gtid);
 #endif
 
+#if OMPD_SUPPORT
+    if (ompd_state & OMPD_ENABLE_BP)
+      ompd_bp_task_begin();
+#endif
+
 #if USE_ITT_BUILD && USE_ITT_NOTIFY
     kmp_uint64 cur_time;
     kmp_int32 kmp_itt_count_task =
@@ -1639,6 +1644,11 @@ static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task,
 #endif
   }
 
+#if OMPD_SUPPORT
+  if (ompd_state & OMPD_ENABLE_BP)
+    ompd_bp_task_end();
+#endif
+
   // Proxy tasks are not handled by the runtime
   if (taskdata->td_flags.proxy != TASK_PROXY) {
     ANNOTATE_HAPPENS_BEFORE(taskdata->td_parent);

diff  --git a/openmp/runtime/src/kmp_wait_release.h b/openmp/runtime/src/kmp_wait_release.h
index 7544d92794979..8234ddaa3a18f 100644
--- a/openmp/runtime/src/kmp_wait_release.h
+++ b/openmp/runtime/src/kmp_wait_release.h
@@ -1026,9 +1026,18 @@ class kmp_flag_oncore : public kmp_flag_native<kmp_uint64> {
   int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
                     int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
                     kmp_int32 is_constrained) {
+#if OMPD_SUPPORT
+    int ret = __kmp_execute_tasks_oncore(
+        this_thr, gtid, this, final_spin,
+        thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
+    if (ompd_state & OMPD_ENABLE_BP)
+      ompd_bp_task_end();
+    return ret;
+#else
     return __kmp_execute_tasks_oncore(
         this_thr, gtid, this, final_spin,
         thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
+#endif
   }
   kmp_uint8 *get_stolen() { return NULL; }
   enum barrier_type get_bt() { return bt; }

diff  --git a/openmp/runtime/src/ompd-specific.cpp b/openmp/runtime/src/ompd-specific.cpp
new file mode 100644
index 0000000000000..a1728afd7e602
--- /dev/null
+++ b/openmp/runtime/src/ompd-specific.cpp
@@ -0,0 +1,155 @@
+/*
+ * ompd-specific.cpp -- OpenMP debug support
+ */
+
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "ompd-specific.h"
+
+#if OMPD_SUPPORT
+
+/**
+ * Declaration of symbols to hold struct size and member offset information
+ */
+
+#define ompd_declare_access(t, m) uint64_t ompd_access__##t##__##m;
+OMPD_FOREACH_ACCESS(ompd_declare_access)
+#undef ompd_declare_access
+
+#define ompd_declare_sizeof_member(t, m) uint64_t ompd_sizeof__##t##__##m;
+OMPD_FOREACH_ACCESS(ompd_declare_sizeof_member)
+#undef ompd_declare_sizeof_member
+
+#define ompd_declare_bitfield(t, m) uint64_t ompd_bitfield__##t##__##m;
+OMPD_FOREACH_BITFIELD(ompd_declare_bitfield)
+#undef ompd_declare_bitfield
+
+#define ompd_declare_sizeof(t) uint64_t ompd_sizeof__##t;
+OMPD_FOREACH_SIZEOF(ompd_declare_sizeof)
+#undef ompd_declare_sizeof
+
+volatile const char **ompd_dll_locations = NULL;
+uint64_t ompd_state = 0;
+
+char *ompd_env_block = NULL;
+ompd_size_t ompd_env_block_size = 0;
+
+void ompd_init() {
+
+  static int ompd_initialized = 0;
+
+  if (ompd_initialized)
+    return;
+
+    /**
+     * Calculate member offsets for structs and unions
+     */
+
+#define ompd_init_access(t, m)                                                 \
+  ompd_access__##t##__##m = (uint64_t) & (((t *)0)->m);
+  OMPD_FOREACH_ACCESS(ompd_init_access)
+#undef ompd_init_access
+
+  /**
+   * Create bit mask for bitfield access
+   */
+
+#define ompd_init_bitfield(t, m)                                               \
+  ompd_bitfield__##t##__##m = 0;                                               \
+  ((t *)(&ompd_bitfield__##t##__##m))->m = 1;
+  OMPD_FOREACH_BITFIELD(ompd_init_bitfield)
+#undef ompd_init_bitfield
+
+  /**
+   * Calculate type size information
+   */
+
+#define ompd_init_sizeof_member(t, m)                                          \
+  ompd_sizeof__##t##__##m = sizeof(((t *)0)->m);
+  OMPD_FOREACH_ACCESS(ompd_init_sizeof_member)
+#undef ompd_init_sizeof_member
+
+#define ompd_init_sizeof(t) ompd_sizeof__##t = sizeof(t);
+  OMPD_FOREACH_SIZEOF(ompd_init_sizeof)
+#undef ompd_init_sizeof
+
+  char *libname = NULL;
+
+#if KMP_OS_UNIX
+  // Find the location of libomp.so thru dladdr and replace the libomp with
+  // libompd to get the full path of libompd
+  Dl_info dl_info;
+  int ret = dladdr((void *)ompd_init, &dl_info);
+  if (!ret) {
+    fprintf(stderr, "%s\n", dlerror());
+  }
+  int lib_path_length;
+  if (strrchr(dl_info.dli_fname, '/')) {
+    lib_path_length = strrchr(dl_info.dli_fname, '/') - dl_info.dli_fname;
+
+    libname =
+        (char *)malloc(lib_path_length + 12 /*for '/libompd.so' and '\0'*/);
+    strcpy(libname, dl_info.dli_fname);
+    memcpy(strrchr(libname, '/'), "/libompd.so\0", 12);
+  }
+#endif
+
+  const char *ompd_env_var = getenv("OMP_DEBUG");
+  if (ompd_env_var && !strcmp(ompd_env_var, "enabled")) {
+    fprintf(stderr, "OMP_OMPD active\n");
+    ompt_enabled.enabled = 1;
+    ompd_state |= OMPD_ENABLE_BP;
+  }
+
+  ompd_initialized = 1;
+  ompd_dll_locations = (volatile const char **)malloc(3 * sizeof(const char *));
+  ompd_dll_locations[0] = "libompd.so";
+  ompd_dll_locations[1] = libname;
+  ompd_dll_locations[2] = NULL;
+  ompd_dll_locations_valid();
+}
+
+void __attribute__((noinline)) ompd_dll_locations_valid(void) {
+  /* naive way of implementing hard to opt-out empty function
+     we might want to use a separate object file? */
+  asm("");
+}
+
+void ompd_bp_parallel_begin(void) {
+  /* naive way of implementing hard to opt-out empty function
+     we might want to use a separate object file? */
+  asm("");
+}
+void ompd_bp_parallel_end(void) {
+  /* naive way of implementing hard to opt-out empty function
+     we might want to use a separate object file? */
+  asm("");
+}
+void ompd_bp_task_begin(void) {
+  /* naive way of implementing hard to opt-out empty function
+     we might want to use a separate object file? */
+  asm("");
+}
+void ompd_bp_task_end(void) {
+  /* naive way of implementing hard to opt-out empty function
+     we might want to use a separate object file? */
+  asm("");
+}
+void ompd_bp_thread_begin(void) {
+  /* naive way of implementing hard to opt-out empty function
+     we might want to use a separate object file? */
+  asm("");
+}
+void ompd_bp_thread_end(void) {
+  /* naive way of implementing hard to opt-out empty function
+     we might want to use a separate object file? */
+  asm("");
+}
+
+#endif /* OMPD_SUPPORT */

diff  --git a/openmp/runtime/src/ompd-specific.h b/openmp/runtime/src/ompd-specific.h
new file mode 100644
index 0000000000000..21809ef52f532
--- /dev/null
+++ b/openmp/runtime/src/ompd-specific.h
@@ -0,0 +1,154 @@
+/*
+ * ompd-specific.h -- OpenMP debug support
+ */
+
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "kmp.h"
+#include "omp-tools.h"
+#include <stdint.h>
+
+#ifndef __OMPD_SPECIFIC_H__
+#define __OMPD_SPECIFIC_H__
+
+#if OMPD_SUPPORT
+
+void ompd_init();
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+extern char *ompd_env_block;
+extern ompd_size_t ompd_env_block_size;
+extern char *__kmp_tool_verbose_init;
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+extern uint64_t ompd_state;
+#define OMPD_ENABLE_BP 0x1
+
+#define OMPD_FOREACH_ACCESS(OMPD_ACCESS)                                       \
+  OMPD_ACCESS(kmp_base_info_t, th_current_task)                                \
+  OMPD_ACCESS(kmp_base_info_t, th_team)                                        \
+  OMPD_ACCESS(kmp_base_info_t, th_info)                                        \
+  OMPD_ACCESS(kmp_base_info_t, ompt_thread_info)                               \
+                                                                               \
+  OMPD_ACCESS(kmp_base_root_t, r_in_parallel)                                  \
+                                                                               \
+  OMPD_ACCESS(kmp_base_team_t, ompt_team_info)                                 \
+  OMPD_ACCESS(kmp_base_team_t, ompt_serialized_team_info)                      \
+  OMPD_ACCESS(kmp_base_team_t, t_active_level)                                 \
+  OMPD_ACCESS(kmp_base_team_t, t_implicit_task_taskdata)                       \
+  OMPD_ACCESS(kmp_base_team_t, t_master_tid)                                   \
+  OMPD_ACCESS(kmp_base_team_t, t_nproc)                                        \
+  OMPD_ACCESS(kmp_base_team_t, t_level)                                        \
+  OMPD_ACCESS(kmp_base_team_t, t_parent)                                       \
+  OMPD_ACCESS(kmp_base_team_t, t_pkfn)                                         \
+  OMPD_ACCESS(kmp_base_team_t, t_threads)                                      \
+                                                                               \
+  OMPD_ACCESS(kmp_desc_t, ds)                                                  \
+                                                                               \
+  OMPD_ACCESS(kmp_desc_base_t, ds_thread)                                      \
+  OMPD_ACCESS(kmp_desc_base_t, ds_tid)                                         \
+                                                                               \
+  OMPD_ACCESS(kmp_info_t, th)                                                  \
+                                                                               \
+  OMPD_ACCESS(kmp_r_sched_t, r_sched_type)                                     \
+  OMPD_ACCESS(kmp_r_sched_t, chunk)                                            \
+                                                                               \
+  OMPD_ACCESS(kmp_root_t, r)                                                   \
+                                                                               \
+  OMPD_ACCESS(kmp_internal_control_t, dynamic)                                 \
+  OMPD_ACCESS(kmp_internal_control_t, max_active_levels)                       \
+  OMPD_ACCESS(kmp_internal_control_t, nproc)                                   \
+  OMPD_ACCESS(kmp_internal_control_t, proc_bind)                               \
+  OMPD_ACCESS(kmp_internal_control_t, sched)                                   \
+  OMPD_ACCESS(kmp_internal_control_t, default_device)                          \
+  OMPD_ACCESS(kmp_internal_control_t, thread_limit)                            \
+                                                                               \
+  OMPD_ACCESS(kmp_taskdata_t, ompt_task_info)                                  \
+  OMPD_ACCESS(kmp_taskdata_t, td_flags)                                        \
+  OMPD_ACCESS(kmp_taskdata_t, td_icvs)                                         \
+  OMPD_ACCESS(kmp_taskdata_t, td_parent)                                       \
+  OMPD_ACCESS(kmp_taskdata_t, td_team)                                         \
+                                                                               \
+  OMPD_ACCESS(kmp_task_t, routine)                                             \
+                                                                               \
+  OMPD_ACCESS(kmp_team_p, t)                                                   \
+                                                                               \
+  OMPD_ACCESS(kmp_nested_nthreads_t, used)                                     \
+  OMPD_ACCESS(kmp_nested_nthreads_t, nth)                                      \
+                                                                               \
+  OMPD_ACCESS(kmp_nested_proc_bind_t, used)                                    \
+  OMPD_ACCESS(kmp_nested_proc_bind_t, bind_types)                              \
+                                                                               \
+  OMPD_ACCESS(ompt_task_info_t, frame)                                         \
+  OMPD_ACCESS(ompt_task_info_t, scheduling_parent)                             \
+  OMPD_ACCESS(ompt_task_info_t, task_data)                                     \
+                                                                               \
+  OMPD_ACCESS(ompt_team_info_t, parallel_data)                                 \
+                                                                               \
+  OMPD_ACCESS(ompt_thread_info_t, state)                                       \
+  OMPD_ACCESS(ompt_thread_info_t, wait_id)                                     \
+  OMPD_ACCESS(ompt_thread_info_t, thread_data)                                 \
+                                                                               \
+  OMPD_ACCESS(ompt_data_t, value)                                              \
+  OMPD_ACCESS(ompt_data_t, ptr)                                                \
+                                                                               \
+  OMPD_ACCESS(ompt_frame_t, exit_frame)                                        \
+  OMPD_ACCESS(ompt_frame_t, enter_frame)                                       \
+                                                                               \
+  OMPD_ACCESS(ompt_lw_taskteam_t, parent)                                      \
+  OMPD_ACCESS(ompt_lw_taskteam_t, ompt_team_info)                              \
+  OMPD_ACCESS(ompt_lw_taskteam_t, ompt_task_info)
+
+#define OMPD_FOREACH_BITFIELD(OMPD_BITFIELD)                                   \
+  OMPD_BITFIELD(kmp_tasking_flags_t, final)                                    \
+  OMPD_BITFIELD(kmp_tasking_flags_t, tiedness)                                 \
+  OMPD_BITFIELD(kmp_tasking_flags_t, tasktype)                                 \
+  OMPD_BITFIELD(kmp_tasking_flags_t, task_serial)                              \
+  OMPD_BITFIELD(kmp_tasking_flags_t, tasking_ser)                              \
+  OMPD_BITFIELD(kmp_tasking_flags_t, team_serial)                              \
+  OMPD_BITFIELD(kmp_tasking_flags_t, started)                                  \
+  OMPD_BITFIELD(kmp_tasking_flags_t, executing)                                \
+  OMPD_BITFIELD(kmp_tasking_flags_t, complete)                                 \
+  OMPD_BITFIELD(kmp_tasking_flags_t, freed)                                    \
+  OMPD_BITFIELD(kmp_tasking_flags_t, native)
+
+#define OMPD_FOREACH_SIZEOF(OMPD_SIZEOF)                                       \
+  OMPD_SIZEOF(kmp_info_t)                                                      \
+  OMPD_SIZEOF(kmp_taskdata_t)                                                  \
+  OMPD_SIZEOF(kmp_task_t)                                                      \
+  OMPD_SIZEOF(kmp_tasking_flags_t)                                             \
+  OMPD_SIZEOF(kmp_thread_t)                                                    \
+  OMPD_SIZEOF(ompt_data_t)                                                     \
+  OMPD_SIZEOF(ompt_id_t)                                                       \
+  OMPD_SIZEOF(__kmp_avail_proc)                                                \
+  OMPD_SIZEOF(__kmp_max_nth)                                                   \
+  OMPD_SIZEOF(__kmp_stksize)                                                   \
+  OMPD_SIZEOF(__kmp_omp_cancellation)                                          \
+  OMPD_SIZEOF(__kmp_max_task_priority)                                         \
+  OMPD_SIZEOF(__kmp_display_affinity)                                          \
+  OMPD_SIZEOF(__kmp_affinity_format)                                           \
+  OMPD_SIZEOF(__kmp_tool_libraries)                                            \
+  OMPD_SIZEOF(__kmp_tool_verbose_init)                                         \
+  OMPD_SIZEOF(__kmp_tool)                                                      \
+  OMPD_SIZEOF(ompd_state)                                                      \
+  OMPD_SIZEOF(kmp_nested_nthreads_t)                                           \
+  OMPD_SIZEOF(__kmp_nested_nth)                                                \
+  OMPD_SIZEOF(kmp_nested_proc_bind_t)                                          \
+  OMPD_SIZEOF(__kmp_nested_proc_bind)                                          \
+  OMPD_SIZEOF(int)                                                             \
+  OMPD_SIZEOF(char)                                                            \
+  OMPD_SIZEOF(__kmp_gtid)                                                      \
+  OMPD_SIZEOF(__kmp_nth)
+
+#endif /* OMPD_SUPPORT */
+#endif

diff  --git a/openmp/runtime/src/ompt-general.cpp b/openmp/runtime/src/ompt-general.cpp
index b2f244fc25dd4..3d8ef041f724d 100644
--- a/openmp/runtime/src/ompt-general.cpp
+++ b/openmp/runtime/src/ompt-general.cpp
@@ -501,7 +501,11 @@ void ompt_post_init() {
 }
 
 void ompt_fini() {
-  if (ompt_enabled.enabled) {
+  if (ompt_enabled.enabled
+#if OMPD_SUPPORT
+      && ompt_start_tool_result && ompt_start_tool_result->finalize
+#endif
+  ) {
     ompt_start_tool_result->finalize(&(ompt_start_tool_result->tool_data));
   }
 

diff  --git a/openmp/runtime/src/ompt-specific.cpp b/openmp/runtime/src/ompt-specific.cpp
index c74426c3012cf..1ad0e17ed408e 100644
--- a/openmp/runtime/src/ompt-specific.cpp
+++ b/openmp/runtime/src/ompt-specific.cpp
@@ -292,10 +292,20 @@ void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, kmp_info_t *thr,
         thr->th.th_team->t.ompt_serialized_team_info;
     link_lwt->parent = my_parent;
     thr->th.th_team->t.ompt_serialized_team_info = link_lwt;
+#if OMPD_SUPPORT
+    if (ompd_state & OMPD_ENABLE_BP) {
+      ompd_bp_parallel_begin();
+    }
+#endif
   } else {
     // this is the first serialized team, so we just store the values in the
     // team and drop the taskteam-object
     *OMPT_CUR_TEAM_INFO(thr) = lwt->ompt_team_info;
+#if OMPD_SUPPORT
+    if (ompd_state & OMPD_ENABLE_BP) {
+      ompd_bp_parallel_begin();
+    }
+#endif
     *OMPT_CUR_TASK_INFO(thr) = lwt->ompt_task_info;
   }
 }
@@ -303,6 +313,11 @@ void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, kmp_info_t *thr,
 void __ompt_lw_taskteam_unlink(kmp_info_t *thr) {
   ompt_lw_taskteam_t *lwtask = thr->th.th_team->t.ompt_serialized_team_info;
   if (lwtask) {
+#if OMPD_SUPPORT
+    if (ompd_state & OMPD_ENABLE_BP) {
+      ompd_bp_parallel_end();
+    }
+#endif
     thr->th.th_team->t.ompt_serialized_team_info = lwtask->parent;
 
     ompt_team_info_t tmp_team = lwtask->ompt_team_info;


        


More information about the Openmp-commits mailing list