[Openmp-commits] [openmp] e9b8ed1 - [OpenMP][Tool] Header-only multiplexing of OMPT tools

Joachim Protze via Openmp-commits openmp-commits at lists.llvm.org
Wed Jun 17 00:17:57 PDT 2020


Author: Joachim Protze
Date: 2020-06-17T09:16:46+02:00
New Revision: e9b8ed1fd7c561956b5b2b2e9c43de84a00b4018

URL: https://github.com/llvm/llvm-project/commit/e9b8ed1fd7c561956b5b2b2e9c43de84a00b4018
DIFF: https://github.com/llvm/llvm-project/commit/e9b8ed1fd7c561956b5b2b2e9c43de84a00b4018.diff

LOG: [OpenMP][Tool] Header-only multiplexing of OMPT tools

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D76012

Added: 
    openmp/tools/multiplex/CMakeLists.txt
    openmp/tools/multiplex/README.md
    openmp/tools/multiplex/ompt-multiplex.h
    openmp/tools/multiplex/tests/CMakeLists.txt
    openmp/tools/multiplex/tests/custom_data_storage/custom_data_storage.c
    openmp/tools/multiplex/tests/custom_data_storage/first-tool.h
    openmp/tools/multiplex/tests/custom_data_storage/second-tool.h
    openmp/tools/multiplex/tests/lit.cfg
    openmp/tools/multiplex/tests/lit.site.cfg.in
    openmp/tools/multiplex/tests/ompt-signal.h
    openmp/tools/multiplex/tests/print/first-tool.h
    openmp/tools/multiplex/tests/print/print.c
    openmp/tools/multiplex/tests/print/second-tool.h

Modified: 
    openmp/runtime/test/ompt/callback.h

Removed: 
    


################################################################################
diff  --git a/openmp/runtime/test/ompt/callback.h b/openmp/runtime/test/ompt/callback.h
index c49d43e44207..3975ed9d1e10 100644
--- a/openmp/runtime/test/ompt/callback.h
+++ b/openmp/runtime/test/ompt/callback.h
@@ -1,7 +1,9 @@
 #ifndef _BSD_SOURCE
 #define _BSD_SOURCE
 #endif
+#ifndef _DEFAULT_SOURCE
 #define _DEFAULT_SOURCE
+#endif
 #include <stdio.h>
 #ifndef __STDC_FORMAT_MACROS
 #define __STDC_FORMAT_MACROS

diff  --git a/openmp/tools/multiplex/CMakeLists.txt b/openmp/tools/multiplex/CMakeLists.txt
new file mode 100644
index 000000000000..1201888d2585
--- /dev/null
+++ b/openmp/tools/multiplex/CMakeLists.txt
@@ -0,0 +1,12 @@
+project(OMPT-Multiplex)
+
+if(LIBOMP_OMPT_SUPPORT)
+  include_directories(${LIBOMP_INCLUDE_DIR})
+
+  add_library(ompt-multiplex INTERFACE)
+  target_include_directories(ompt-multiplex INTERFACE ${CMAKE_CURRENT_SOURCE_DIR})
+
+  install(FILES ompt-multiplex.h DESTINATION include)
+
+  add_subdirectory(tests)
+endif()

diff  --git a/openmp/tools/multiplex/README.md b/openmp/tools/multiplex/README.md
new file mode 100644
index 000000000000..601a14a41179
--- /dev/null
+++ b/openmp/tools/multiplex/README.md
@@ -0,0 +1,60 @@
+# OMPT-Multiplexing
+The OMPT-Multiplexing header file allows a tool to load a second tool to 
+overcome the restriction of the OpenMP to only load one tool at a time. 
+The header file can also be used to load more than two tools using a cascade 
+of tools that include the header file. OMPT-Multiplexing takes care of the 
+multiplexing of OMPT callbacks, data pointers and runtime entry functions.
+
+Examples can be found under ./tests
+
+## Prerequisits
+- LLVM/OpenMP runtime with OMPT (https://github.com/OpenMPToolsInterface/LLVM-openmp)
+- LLVM-lit
+
+### Getting LLVM-lit
+Either build llvm and find lit+FileCheck in build directory of llvm or install using `pip`:
+```
+ $ pip install --upgrade --user pip
+ $ export PATH=$HOME/.local/bin:$PATH
+ $ export PYTHONPATH=$HOME/.local/lib/python3.*/site-packages/
+ $ pip install --user lit
+```
+
+## How to test
+```
+ $ make check-ompt-multiplex
+```
+
+## How to compile and use your OpenMP tools
+Code of first tool must include the following with the convention, that the environment variable containing the path to the client tool is the tool name with the suffix "_TOOL_LIBRARIES":
+```
+#define CLIENT_TOOL_LIBRARIES_VAR "EXAMPLE_TOOL_LIBRARIES"
+#include <ompt-multiplex.h>
+```
+Note that functions and variables with prefix "ompt_multiplex" are reserved by the tool
+
+
+To use both tools execute the following:
+```
+ $ clang -fopenmp -o program.exe
+ $ OMP_TOOL_LIBRARIES=/path/to/first/tool.so EXAMPLE_TOOL_LBRARIES=/path/to/second/tool.so ./program.exe
+```
+Note that EXAMPLE_TOOL_LIBRARIES may also contain a list of paths to tools which will be tried to load in order (similar to lists in OMP_TOOL_LIBRARIES).
+
+## Advanced usage
+To reduce the amount of memory allocations, the user can define macros before including the ompt-multiplex.h file, that specify custom data access handlers:
+
+```
+#define OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA get_client_thread_data
+#define OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA get_client_parallel_data
+#define OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA get_client_task_data
+```
+
+This will reverse the calling order of the current tool and its client. In order to avoid this, one can specify a custom delete handler as well:
+
+```
+#define OMPT_MULTIPLEX_CUSTOM_DELETE_THREAD_DATA delete_thread_data
+#define OMPT_MULTIPLEX_CUSTOM_DELETE_PARALLEL_DATA delete_parallel_data
+#define OMPT_MULTIPLEX_CUSTOM_DELETE_TASK_DATA delete_task_data
+```
+

diff  --git a/openmp/tools/multiplex/ompt-multiplex.h b/openmp/tools/multiplex/ompt-multiplex.h
new file mode 100644
index 000000000000..097b7379b82e
--- /dev/null
+++ b/openmp/tools/multiplex/ompt-multiplex.h
@@ -0,0 +1,1094 @@
+//===--- ompt-multiplex.h - header-only multiplexing of OMPT tools -- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This header file enables an OMPT tool to load another OMPT tool and
+// automatically forwards OMPT event-callbacks to the nested tool.
+//
+// For details see openmp/tools/multiplex/README.md
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef OMPT_MULTIPLEX_H
+#define OMPT_MULTIPLEX_H
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <dlfcn.h>
+#include <execinfo.h>
+#include <inttypes.h>
+#include <omp-tools.h>
+#include <omp.h>
+#include <stdio.h>
+#include <string.h>
+
+static ompt_set_callback_t ompt_multiplex_set_callback;
+static ompt_get_task_info_t ompt_multiplex_get_task_info;
+static ompt_get_thread_data_t ompt_multiplex_get_thread_data;
+static ompt_get_parallel_info_t ompt_multiplex_get_parallel_info;
+
+// contains name of the environment var in which the tool path is specified
+#ifndef CLIENT_TOOL_LIBRARIES_VAR
+#error CLIENT_TOOL_LIBRARIES_VAR should be defined before including of ompt-multiplex.h
+#endif
+
+#if defined(CUSTOM_DELETE_DATA) && !defined(CUSTOM_GET_CLIENT_DATA)
+#error CUSTOM_GET_CLIENT_DATA must be set if CUSTOM_DELETE_DATA is set
+#endif
+
+#define OMPT_API_ROUTINE static
+
+#define OMPT_LOAD_CLIENT_FOREACH_OMPT_EVENT(macro)                             \
+  macro(callback_thread_begin, ompt_callback_thread_begin_t, 1);               \
+  macro(callback_thread_end, ompt_callback_thread_end_t, 2);                   \
+  macro(callback_parallel_begin, ompt_callback_parallel_begin_t, 3);           \
+  macro(callback_parallel_end, ompt_callback_parallel_end_t, 4);               \
+  macro(callback_task_create, ompt_callback_task_create_t, 5);                 \
+  macro(callback_task_schedule, ompt_callback_task_schedule_t, 6);             \
+  macro(callback_implicit_task, ompt_callback_implicit_task_t, 7);             \
+  macro(callback_target, ompt_callback_target_t, 8);                           \
+  macro(callback_target_data_op, ompt_callback_target_data_op_t, 9);           \
+  macro(callback_target_submit, ompt_callback_target_submit_t, 10);            \
+  macro(callback_control_tool, ompt_callback_control_tool_t, 11);              \
+  macro(callback_device_initialize, ompt_callback_device_initialize_t, 12);    \
+  macro(callback_device_finalize, ompt_callback_device_finalize_t, 13);        \
+  macro(callback_device_load, ompt_callback_device_load_t, 14);                \
+  macro(callback_device_unload, ompt_callback_device_unload_t, 15);            \
+  macro(callback_sync_region_wait, ompt_callback_sync_region_t, 16);           \
+  macro(callback_mutex_released, ompt_callback_mutex_t, 17);                   \
+  macro(callback_dependences, ompt_callback_dependences_t, 18);                \
+  macro(callback_task_dependence, ompt_callback_task_dependence_t, 19);        \
+  macro(callback_work, ompt_callback_work_t, 20);                              \
+  macro(callback_master, ompt_callback_master_t, 21);                          \
+  macro(callback_target_map, ompt_callback_target_map_t, 22);                  \
+  macro(callback_sync_region, ompt_callback_sync_region_t, 23);                \
+  macro(callback_lock_init, ompt_callback_mutex_acquire_t, 24);                \
+  macro(callback_lock_destroy, ompt_callback_mutex_t, 25);                     \
+  macro(callback_mutex_acquire, ompt_callback_mutex_acquire_t, 26);            \
+  macro(callback_mutex_acquired, ompt_callback_mutex_t, 27);                   \
+  macro(callback_nest_lock, ompt_callback_nest_lock_t, 28);                    \
+  macro(callback_flush, ompt_callback_flush_t, 29);                            \
+  macro(callback_cancel, ompt_callback_cancel_t, 30);                          \
+  macro(callback_reduction, ompt_callback_sync_region_t, 31);                  \
+  macro(callback_dispatch, ompt_callback_dispatch_t, 32);
+
+typedef struct ompt_multiplex_callbacks_s {
+#define ompt_event_macro(event, callback, eventid) callback ompt_##event
+
+  OMPT_LOAD_CLIENT_FOREACH_OMPT_EVENT(ompt_event_macro)
+
+#undef ompt_event_macro
+} ompt_multiplex_callbacks_t;
+
+typedef struct ompt_multiplex_callback_implementation_status_s {
+#define ompt_event_macro(event, callback, eventid) int ompt_##event
+
+  OMPT_LOAD_CLIENT_FOREACH_OMPT_EVENT(ompt_event_macro)
+
+#undef ompt_event_macro
+} ompt_multiplex_callback_implementation_status_t;
+
+ompt_start_tool_result_t *ompt_multiplex_own_fns;
+ompt_start_tool_result_t *ompt_multiplex_client_fns;
+ompt_function_lookup_t ompt_multiplex_lookup_function;
+ompt_multiplex_callbacks_t ompt_multiplex_own_callbacks,
+    ompt_multiplex_client_callbacks;
+ompt_multiplex_callback_implementation_status_t
+    ompt_multiplex_implementation_status;
+
+typedef struct ompt_multiplex_data_pair_s {
+  ompt_data_t own_data;
+  ompt_data_t client_data;
+} ompt_multiplex_data_pair_t;
+
+#if !defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA) ||                  \
+    !defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA) ||                \
+    !defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA)
+static ompt_multiplex_data_pair_t *
+ompt_multiplex_allocate_data_pair(ompt_data_t *data_pointer) {
+  data_pointer->ptr = malloc(sizeof(ompt_multiplex_data_pair_t));
+  if (!data_pointer->ptr) {
+    printf("Malloc ERROR\n");
+    exit(-1);
+  }
+  ompt_multiplex_data_pair_t *data_pair =
+      (ompt_multiplex_data_pair_t *)data_pointer->ptr;
+  data_pair->own_data.ptr = NULL;
+  data_pair->client_data.ptr = NULL;
+  return data_pair;
+}
+
+static void ompt_multiplex_free_data_pair(ompt_data_t *data_pointer) {
+  free((*data_pointer).ptr);
+}
+
+static ompt_data_t *ompt_multiplex_get_own_ompt_data(ompt_data_t *data) {
+  if (!data)
+    return NULL;
+  ompt_multiplex_data_pair_t *data_pair =
+      (ompt_multiplex_data_pair_t *)data->ptr;
+  return &(data_pair->own_data);
+}
+
+static ompt_data_t *ompt_multiplex_get_client_ompt_data(ompt_data_t *data) {
+  if (!data)
+    return NULL;
+  ompt_multiplex_data_pair_t *data_pair =
+      (ompt_multiplex_data_pair_t *)data->ptr;
+  return &(data_pair->client_data);
+}
+#endif //! defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA) ||
+       //! !defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA) ||
+       //! !defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA)
+
+static ompt_data_t *ompt_multiplex_get_own_thread_data(ompt_data_t *data) {
+#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA
+  return ompt_multiplex_get_own_ompt_data(data);
+#else
+  return data;
+#endif
+}
+
+static ompt_data_t *ompt_multiplex_get_own_parallel_data(ompt_data_t *data) {
+#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA
+  return ompt_multiplex_get_own_ompt_data(data);
+#else
+  return data;
+#endif
+}
+
+static ompt_data_t *ompt_multiplex_get_own_task_data(ompt_data_t *data) {
+#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA
+  return ompt_multiplex_get_own_ompt_data(data);
+#else
+  return data;
+#endif
+}
+
+static ompt_data_t *ompt_multiplex_get_client_thread_data(ompt_data_t *data) {
+#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA
+  return ompt_multiplex_get_client_ompt_data(data);
+#else
+  return OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA(data);
+#endif
+}
+
+static ompt_data_t *ompt_multiplex_get_client_parallel_data(ompt_data_t *data) {
+#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA
+  return ompt_multiplex_get_client_ompt_data(data);
+#else
+  return OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA(data);
+#endif
+}
+
+static ompt_data_t *ompt_multiplex_get_client_task_data(ompt_data_t *data) {
+#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA
+  return ompt_multiplex_get_client_ompt_data(data);
+#else
+  return OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA(data);
+#endif
+}
+
+static void ompt_multiplex_callback_mutex_acquire(ompt_mutex_t kind,
+                                                  unsigned int hint,
+                                                  unsigned int impl,
+                                                  ompt_wait_id_t wait_id,
+                                                  const void *codeptr_ra) {
+  if (ompt_multiplex_own_callbacks.ompt_callback_mutex_acquire) {
+    ompt_multiplex_own_callbacks.ompt_callback_mutex_acquire(
+        kind, hint, impl, wait_id, codeptr_ra);
+  }
+  if (ompt_multiplex_client_callbacks.ompt_callback_mutex_acquire) {
+    ompt_multiplex_client_callbacks.ompt_callback_mutex_acquire(
+        kind, hint, impl, wait_id, codeptr_ra);
+  }
+}
+
+static void ompt_multiplex_callback_mutex_acquired(ompt_mutex_t kind,
+                                                   ompt_wait_id_t wait_id,
+                                                   const void *codeptr_ra) {
+  if (ompt_multiplex_own_callbacks.ompt_callback_mutex_acquired) {
+    ompt_multiplex_own_callbacks.ompt_callback_mutex_acquired(kind, wait_id,
+                                                              codeptr_ra);
+  }
+  if (ompt_multiplex_client_callbacks.ompt_callback_mutex_acquired) {
+    ompt_multiplex_client_callbacks.ompt_callback_mutex_acquired(kind, wait_id,
+                                                                 codeptr_ra);
+  }
+}
+
+static void ompt_multiplex_callback_mutex_released(ompt_mutex_t kind,
+                                                   ompt_wait_id_t wait_id,
+                                                   const void *codeptr_ra) {
+  if (ompt_multiplex_own_callbacks.ompt_callback_mutex_released) {
+    ompt_multiplex_own_callbacks.ompt_callback_mutex_released(kind, wait_id,
+                                                              codeptr_ra);
+  }
+  if (ompt_multiplex_client_callbacks.ompt_callback_mutex_released) {
+    ompt_multiplex_client_callbacks.ompt_callback_mutex_released(kind, wait_id,
+                                                                 codeptr_ra);
+  }
+}
+
+static void ompt_multiplex_callback_nest_lock(ompt_scope_endpoint_t endpoint,
+                                              ompt_wait_id_t wait_id,
+                                              const void *codeptr_ra) {
+  if (ompt_multiplex_own_callbacks.ompt_callback_nest_lock) {
+    ompt_multiplex_own_callbacks.ompt_callback_nest_lock(endpoint, wait_id,
+                                                         codeptr_ra);
+  }
+  if (ompt_multiplex_client_callbacks.ompt_callback_nest_lock) {
+    ompt_multiplex_client_callbacks.ompt_callback_nest_lock(endpoint, wait_id,
+                                                            codeptr_ra);
+  }
+}
+
+static void ompt_multiplex_callback_sync_region(ompt_sync_region_t kind,
+                                                ompt_scope_endpoint_t endpoint,
+                                                ompt_data_t *parallel_data,
+                                                ompt_data_t *task_data,
+                                                const void *codeptr_ra) {
+  if (ompt_multiplex_own_callbacks.ompt_callback_sync_region) {
+    ompt_multiplex_own_callbacks.ompt_callback_sync_region(
+        kind, endpoint, ompt_multiplex_get_own_parallel_data(parallel_data),
+        ompt_multiplex_get_own_task_data(task_data), codeptr_ra);
+  }
+  if (ompt_multiplex_client_callbacks.ompt_callback_sync_region) {
+    ompt_multiplex_client_callbacks.ompt_callback_sync_region(
+        kind, endpoint, ompt_multiplex_get_client_parallel_data(parallel_data),
+        ompt_multiplex_get_client_task_data(task_data), codeptr_ra);
+  }
+}
+
+static void ompt_multiplex_callback_sync_region_wait(
+    ompt_sync_region_t kind, ompt_scope_endpoint_t endpoint,
+    ompt_data_t *parallel_data, ompt_data_t *task_data,
+    const void *codeptr_ra) {
+  if (ompt_multiplex_own_callbacks.ompt_callback_sync_region_wait) {
+    ompt_multiplex_own_callbacks.ompt_callback_sync_region_wait(
+        kind, endpoint, ompt_multiplex_get_own_parallel_data(parallel_data),
+        ompt_multiplex_get_own_task_data(task_data), codeptr_ra);
+  }
+  if (ompt_multiplex_client_callbacks.ompt_callback_sync_region_wait) {
+    ompt_multiplex_client_callbacks.ompt_callback_sync_region_wait(
+        kind, endpoint, ompt_multiplex_get_client_parallel_data(parallel_data),
+        ompt_multiplex_get_client_task_data(task_data), codeptr_ra);
+  }
+}
+
+static void ompt_multiplex_callback_flush(ompt_data_t *thread_data,
+                                          const void *codeptr_ra) {
+  if (ompt_multiplex_own_callbacks.ompt_callback_flush) {
+    ompt_multiplex_own_callbacks.ompt_callback_flush(
+        ompt_multiplex_get_own_thread_data(thread_data), codeptr_ra);
+  }
+  if (ompt_multiplex_client_callbacks.ompt_callback_flush) {
+    ompt_multiplex_client_callbacks.ompt_callback_flush(
+        ompt_multiplex_get_client_thread_data(thread_data), codeptr_ra);
+  }
+}
+
+static void ompt_multiplex_callback_cancel(ompt_data_t *task_data, int flags,
+                                           const void *codeptr_ra) {
+  if (ompt_multiplex_own_callbacks.ompt_callback_cancel) {
+    ompt_multiplex_own_callbacks.ompt_callback_cancel(
+        ompt_multiplex_get_own_task_data(task_data), flags, codeptr_ra);
+  }
+  if (ompt_multiplex_client_callbacks.ompt_callback_cancel) {
+    ompt_multiplex_client_callbacks.ompt_callback_cancel(
+        ompt_multiplex_get_client_task_data(task_data), flags, codeptr_ra);
+  }
+}
+
+static void ompt_multiplex_callback_implicit_task(
+    ompt_scope_endpoint_t endpoint, ompt_data_t *parallel_data,
+    ompt_data_t *task_data, unsigned int team_size, unsigned int thread_num,
+    int flags) {
+  if (endpoint == ompt_scope_begin) {
+#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA
+    ompt_multiplex_allocate_data_pair(task_data);
+#endif
+#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA
+    if (flags & ompt_task_initial)
+      ompt_multiplex_allocate_data_pair(parallel_data);
+#endif
+    if (ompt_multiplex_own_callbacks.ompt_callback_implicit_task) {
+      ompt_multiplex_own_callbacks.ompt_callback_implicit_task(
+          endpoint, ompt_multiplex_get_own_parallel_data(parallel_data),
+          ompt_multiplex_get_own_task_data(task_data), team_size, thread_num,
+          flags);
+    }
+    if (ompt_multiplex_client_callbacks.ompt_callback_implicit_task) {
+      ompt_multiplex_client_callbacks.ompt_callback_implicit_task(
+          endpoint, ompt_multiplex_get_client_parallel_data(parallel_data),
+          ompt_multiplex_get_client_task_data(task_data), team_size, thread_num,
+          flags);
+    }
+  } else {
+// defines to make sure, callbacks are called in correct order depending on
+// defines set by the user
+#if defined(OMPT_MULTIPLEX_CUSTOM_DELETE_TASK_DATA) ||                         \
+    !defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA)
+    if (ompt_multiplex_own_callbacks.ompt_callback_implicit_task) {
+      ompt_multiplex_own_callbacks.ompt_callback_implicit_task(
+          endpoint, ompt_multiplex_get_own_parallel_data(parallel_data),
+          ompt_multiplex_get_own_task_data(task_data), team_size, thread_num,
+          flags);
+    }
+#endif
+
+    if (ompt_multiplex_client_callbacks.ompt_callback_implicit_task) {
+      ompt_multiplex_client_callbacks.ompt_callback_implicit_task(
+          endpoint, ompt_multiplex_get_client_parallel_data(parallel_data),
+          ompt_multiplex_get_client_task_data(task_data), team_size, thread_num,
+          flags);
+    }
+
+#if defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA) &&                     \
+    !defined(OMPT_MULTIPLEX_CUSTOM_DELETE_TASK_DATA)
+    if (ompt_multiplex_own_callbacks.ompt_callback_implicit_task) {
+      ompt_multiplex_own_callbacks.ompt_callback_implicit_task(
+          endpoint, ompt_multiplex_get_own_parallel_data(parallel_data),
+          ompt_multiplex_get_own_task_data(task_data), team_size, thread_num,
+          flags);
+    }
+#endif
+
+#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA
+    ompt_multiplex_free_data_pair(task_data);
+#endif
+
+#if defined(OMPT_MULTIPLEX_CUSTOM_DELETE_PARALLEL_DATA)
+    if (flags & ompt_task_initial)
+      OMPT_MULTIPLEX_CUSTOM_DELETE_PARALLEL_DATA(parallel_data);
+#endif
+#if defined(OMPT_MULTIPLEX_CUSTOM_DELETE_TASK_DATA)
+    OMPT_MULTIPLEX_CUSTOM_DELETE_TASK_DATA(task_data);
+#endif
+  }
+}
+
+static void ompt_multiplex_callback_lock_init(ompt_mutex_t kind,
+                                              unsigned int hint,
+                                              unsigned int impl,
+                                              ompt_wait_id_t wait_id,
+                                              const void *codeptr_ra) {
+  if (ompt_multiplex_own_callbacks.ompt_callback_lock_init) {
+    ompt_multiplex_own_callbacks.ompt_callback_lock_init(kind, hint, impl,
+                                                         wait_id, codeptr_ra);
+  }
+  if (ompt_multiplex_client_callbacks.ompt_callback_lock_init) {
+    ompt_multiplex_client_callbacks.ompt_callback_lock_init(
+        kind, hint, impl, wait_id, codeptr_ra);
+  }
+}
+
+static void ompt_multiplex_callback_lock_destroy(ompt_mutex_t kind,
+                                                 ompt_wait_id_t wait_id,
+                                                 const void *codeptr_ra) {
+  if (ompt_multiplex_own_callbacks.ompt_callback_lock_destroy) {
+    ompt_multiplex_own_callbacks.ompt_callback_lock_destroy(kind, wait_id,
+                                                            codeptr_ra);
+  }
+  if (ompt_multiplex_client_callbacks.ompt_callback_lock_destroy) {
+    ompt_multiplex_client_callbacks.ompt_callback_lock_destroy(kind, wait_id,
+                                                               codeptr_ra);
+  }
+}
+
+static void ompt_multiplex_callback_work(ompt_work_t wstype,
+                                         ompt_scope_endpoint_t endpoint,
+                                         ompt_data_t *parallel_data,
+                                         ompt_data_t *task_data, uint64_t count,
+                                         const void *codeptr_ra) {
+  if (ompt_multiplex_own_callbacks.ompt_callback_work) {
+    ompt_multiplex_own_callbacks.ompt_callback_work(
+        wstype, endpoint, ompt_multiplex_get_own_parallel_data(parallel_data),
+        ompt_multiplex_get_own_task_data(task_data), count, codeptr_ra);
+  }
+  if (ompt_multiplex_client_callbacks.ompt_callback_work) {
+    ompt_multiplex_client_callbacks.ompt_callback_work(
+        wstype, endpoint,
+        ompt_multiplex_get_client_parallel_data(parallel_data),
+        ompt_multiplex_get_client_task_data(task_data), count, codeptr_ra);
+  }
+}
+
+static void ompt_multiplex_callback_master(ompt_scope_endpoint_t endpoint,
+                                           ompt_data_t *parallel_data,
+                                           ompt_data_t *task_data,
+                                           const void *codeptr_ra) {
+  if (ompt_multiplex_own_callbacks.ompt_callback_master) {
+    ompt_multiplex_own_callbacks.ompt_callback_master(
+        endpoint, ompt_multiplex_get_own_parallel_data(parallel_data),
+        ompt_multiplex_get_own_task_data(task_data), codeptr_ra);
+  }
+  if (ompt_multiplex_client_callbacks.ompt_callback_master) {
+    ompt_multiplex_client_callbacks.ompt_callback_master(
+        endpoint, ompt_multiplex_get_client_parallel_data(parallel_data),
+        ompt_multiplex_get_client_task_data(task_data), codeptr_ra);
+  }
+}
+
+static void ompt_multiplex_callback_parallel_begin(
+    ompt_data_t *parent_task_data, const ompt_frame_t *parent_task_frame,
+    ompt_data_t *parallel_data, uint32_t requested_team_size, int flag,
+    const void *codeptr_ra) {
+#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA
+  ompt_multiplex_allocate_data_pair(parallel_data);
+#endif
+  if (ompt_multiplex_own_callbacks.ompt_callback_parallel_begin) {
+    ompt_multiplex_own_callbacks.ompt_callback_parallel_begin(
+        ompt_multiplex_get_own_task_data(parent_task_data), parent_task_frame,
+        ompt_multiplex_get_own_parallel_data(parallel_data),
+        requested_team_size, flag, codeptr_ra);
+  }
+  if (ompt_multiplex_client_callbacks.ompt_callback_parallel_begin) {
+    ompt_multiplex_client_callbacks.ompt_callback_parallel_begin(
+        ompt_multiplex_get_client_task_data(parent_task_data),
+        parent_task_frame,
+        ompt_multiplex_get_client_parallel_data(parallel_data),
+        requested_team_size, flag, codeptr_ra);
+  }
+}
+
+static void ompt_multiplex_callback_parallel_end(ompt_data_t *parallel_data,
+                                                 ompt_data_t *task_data,
+                                                 int flag,
+                                                 const void *codeptr_ra) {
+// defines to make sure, callbacks are called in correct order depending on
+// defines set by the user
+#if defined(OMPT_MULTIPLEX_CUSTOM_DELETE_PARALLEL_DATA) ||                     \
+    !defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA)
+  if (ompt_multiplex_own_callbacks.ompt_callback_parallel_end) {
+    ompt_multiplex_own_callbacks.ompt_callback_parallel_end(
+        ompt_multiplex_get_own_parallel_data(parallel_data),
+        ompt_multiplex_get_own_task_data(task_data), flag, codeptr_ra);
+  }
+#endif
+
+  if (ompt_multiplex_client_callbacks.ompt_callback_parallel_end) {
+    ompt_multiplex_client_callbacks.ompt_callback_parallel_end(
+        ompt_multiplex_get_client_parallel_data(parallel_data),
+        ompt_multiplex_get_client_task_data(task_data), flag, codeptr_ra);
+  }
+
+#if defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA) &&                 \
+    !defined(OMPT_MULTIPLEX_CUSTOM_DELETE_PARALLEL_DATA)
+  if (ompt_multiplex_own_callbacks.ompt_callback_parallel_end) {
+    ompt_multiplex_own_callbacks.ompt_callback_parallel_end(
+        ompt_multiplex_get_own_parallel_data(parallel_data),
+        ompt_multiplex_get_own_task_data(task_data), flag, codeptr_ra);
+  }
+#endif
+
+#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA
+  ompt_multiplex_free_data_pair(parallel_data);
+#endif
+
+#if defined(OMPT_MULTIPLEX_CUSTOM_DELETE_PARALLEL_DATA)
+  OMPT_MULTIPLEX_CUSTOM_DELETE_PARALLEL_DATA(parallel_data);
+#endif
+}
+
+static void ompt_multiplex_callback_task_create(
+    ompt_data_t *parent_task_data, const ompt_frame_t *parent_frame,
+    ompt_data_t *new_task_data, int type, int has_dependences,
+    const void *codeptr_ra) {
+#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA
+  ompt_multiplex_allocate_data_pair(new_task_data);
+#endif
+
+#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA
+  if (type & ompt_task_initial) {
+    ompt_data_t *parallel_data;
+    ompt_multiplex_get_parallel_info(0, &parallel_data, NULL);
+    ompt_multiplex_allocate_data_pair(parallel_data);
+  }
+#endif
+
+  if (ompt_multiplex_own_callbacks.ompt_callback_task_create) {
+    ompt_multiplex_own_callbacks.ompt_callback_task_create(
+        ompt_multiplex_get_own_task_data(parent_task_data), parent_frame,
+        ompt_multiplex_get_own_task_data(new_task_data), type, has_dependences,
+        codeptr_ra);
+  }
+  if (ompt_multiplex_client_callbacks.ompt_callback_task_create) {
+    ompt_multiplex_client_callbacks.ompt_callback_task_create(
+        ompt_multiplex_get_client_task_data(parent_task_data), parent_frame,
+        ompt_multiplex_get_client_task_data(new_task_data), type,
+        has_dependences, codeptr_ra);
+  }
+}
+
+static void
+ompt_multiplex_callback_task_schedule(ompt_data_t *first_task_data,
+                                      ompt_task_status_t prior_task_status,
+                                      ompt_data_t *second_task_data) {
+  if (prior_task_status != ompt_task_complete) {
+    if (ompt_multiplex_own_callbacks.ompt_callback_task_schedule) {
+      ompt_multiplex_own_callbacks.ompt_callback_task_schedule(
+          ompt_multiplex_get_own_task_data(first_task_data), prior_task_status,
+          ompt_multiplex_get_own_task_data(second_task_data));
+    }
+    if (ompt_multiplex_client_callbacks.ompt_callback_task_schedule) {
+      ompt_multiplex_client_callbacks.ompt_callback_task_schedule(
+          ompt_multiplex_get_client_task_data(first_task_data),
+          prior_task_status,
+          ompt_multiplex_get_client_task_data(second_task_data));
+    }
+  } else {
+// defines to make sure, callbacks are called in correct order depending on
+// defines set by the user
+#if defined(OMPT_MULTIPLEX_CUSTOM_DELETE_TASK_DATA) ||                         \
+    !defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA)
+    if (ompt_multiplex_own_callbacks.ompt_callback_task_schedule) {
+      ompt_multiplex_own_callbacks.ompt_callback_task_schedule(
+          ompt_multiplex_get_own_task_data(first_task_data), prior_task_status,
+          ompt_multiplex_get_own_task_data(second_task_data));
+    }
+#endif
+
+    if (ompt_multiplex_client_callbacks.ompt_callback_task_schedule) {
+      ompt_multiplex_client_callbacks.ompt_callback_task_schedule(
+          ompt_multiplex_get_client_task_data(first_task_data),
+          prior_task_status,
+          ompt_multiplex_get_client_task_data(second_task_data));
+    }
+
+#if defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA) &&                     \
+    !defined(OMPT_MULTIPLEX_CUSTOM_DELETE_TASK_DATA)
+    if (ompt_multiplex_own_callbacks.ompt_callback_task_schedule) {
+      ompt_multiplex_own_callbacks.ompt_callback_task_schedule(
+          ompt_multiplex_get_own_task_data(first_task_data), prior_task_status,
+          ompt_multiplex_get_own_task_data(second_task_data));
+    }
+#endif
+
+#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA
+    ompt_multiplex_free_data_pair(first_task_data);
+#endif
+
+#if defined(OMPT_MULTIPLEX_CUSTOM_DELETE_TASK_DATA)
+    OMPT_MULTIPLEX_CUSTOM_DELETE_TASK_DATA(first_task_data);
+#endif
+  }
+}
+
+static void ompt_multiplex_callback_dependences(ompt_data_t *task_data,
+                                                const ompt_dependence_t *deps,
+                                                int ndeps) {
+  if (ompt_multiplex_own_callbacks.ompt_callback_dependences) {
+    ompt_multiplex_own_callbacks.ompt_callback_dependences(
+        ompt_multiplex_get_own_task_data(task_data), deps, ndeps);
+  }
+  if (ompt_multiplex_client_callbacks.ompt_callback_dependences) {
+    ompt_multiplex_client_callbacks.ompt_callback_dependences(
+        ompt_multiplex_get_client_task_data(task_data), deps, ndeps);
+  }
+}
+
+static void
+ompt_multiplex_callback_task_dependence(ompt_data_t *first_task_data,
+                                        ompt_data_t *second_task_data) {
+  if (ompt_multiplex_own_callbacks.ompt_callback_task_dependence) {
+    ompt_multiplex_own_callbacks.ompt_callback_task_dependence(
+        ompt_multiplex_get_own_task_data(first_task_data),
+        ompt_multiplex_get_own_task_data(second_task_data));
+  }
+  if (ompt_multiplex_client_callbacks.ompt_callback_task_dependence) {
+    ompt_multiplex_client_callbacks.ompt_callback_task_dependence(
+        ompt_multiplex_get_client_task_data(first_task_data),
+        ompt_multiplex_get_client_task_data(second_task_data));
+  }
+}
+
+static void ompt_multiplex_callback_thread_begin(ompt_thread_t thread_type,
+                                                 ompt_data_t *thread_data) {
+#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA
+  ompt_multiplex_allocate_data_pair(thread_data);
+#endif
+  if (ompt_multiplex_own_callbacks.ompt_callback_thread_begin) {
+    ompt_multiplex_own_callbacks.ompt_callback_thread_begin(
+        thread_type, ompt_multiplex_get_own_thread_data(thread_data));
+  }
+  if (ompt_multiplex_client_callbacks.ompt_callback_thread_begin) {
+    ompt_multiplex_client_callbacks.ompt_callback_thread_begin(
+        thread_type, ompt_multiplex_get_client_thread_data(thread_data));
+  }
+}
+
+static void ompt_multiplex_callback_thread_end(ompt_data_t *thread_data) {
+// defines to make sure, callbacks are called in correct order depending on
+// defines set by the user
+#if defined(OMPT_MULTIPLEX_CUSTOM_DELETE_THREAD_DATA) ||                       \
+    !defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA)
+  if (ompt_multiplex_own_callbacks.ompt_callback_thread_end) {
+    ompt_multiplex_own_callbacks.ompt_callback_thread_end(
+        ompt_multiplex_get_own_thread_data(thread_data));
+  }
+#endif
+
+  if (ompt_multiplex_client_callbacks.ompt_callback_thread_end) {
+    ompt_multiplex_client_callbacks.ompt_callback_thread_end(
+        ompt_multiplex_get_client_thread_data(thread_data));
+  }
+
+#if defined(OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA) &&                   \
+    !defined(OMPT_MULTIPLEX_CUSTOM_DELETE_THREAD_DATA)
+  if (ompt_multiplex_own_callbacks.ompt_callback_thread_end) {
+    ompt_multiplex_own_callbacks.ompt_callback_thread_end(
+        ompt_multiplex_get_own_thread_data(thread_data));
+  }
+#endif
+
+#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA
+  ompt_multiplex_free_data_pair(thread_data);
+#endif
+
+#if defined(OMPT_MULTIPLEX_CUSTOM_DELETE_THREAD_DATA)
+  OMPT_MULTIPLEX_CUSTOM_DELETE_THREAD_DATA(thread_data);
+#endif
+}
+
+static int ompt_multiplex_callback_control_tool(uint64_t command,
+                                                uint64_t modifier, void *arg,
+                                                const void *codeptr_ra) {
+  int ownRet = 0, clientRet = 0;
+  if (ompt_multiplex_own_callbacks.ompt_callback_control_tool) {
+    ownRet = ompt_multiplex_own_callbacks.ompt_callback_control_tool(
+        command, modifier, arg, codeptr_ra);
+  }
+  if (ompt_multiplex_client_callbacks.ompt_callback_control_tool) {
+    clientRet = ompt_multiplex_client_callbacks.ompt_callback_control_tool(
+        command, modifier, arg, codeptr_ra);
+  }
+  return ownRet < clientRet ? ownRet : clientRet;
+}
+
+static void ompt_multiplex_callback_target(
+    ompt_target_t kind, ompt_scope_endpoint_t endpoint, int device_num,
+    ompt_data_t *task_data, ompt_id_t target_id, const void *codeptr_ra) {
+  if (ompt_multiplex_own_callbacks.ompt_callback_target) {
+    ompt_multiplex_own_callbacks.ompt_callback_target(
+        kind, endpoint, device_num, ompt_multiplex_get_own_task_data(task_data),
+        target_id, codeptr_ra);
+  }
+  if (ompt_multiplex_client_callbacks.ompt_callback_target) {
+    ompt_multiplex_client_callbacks.ompt_callback_target(
+        kind, endpoint, device_num,
+        ompt_multiplex_get_client_task_data(task_data), target_id, codeptr_ra);
+  }
+}
+
+static void ompt_multiplex_callback_target_data_op(
+    ompt_id_t target_id, ompt_id_t host_op_id, ompt_target_data_op_t optype,
+    void *src_addr, int src_device_num, void *dest_addr, int dest_device_num,
+    size_t bytes, const void *codeptr_ra) {
+  if (ompt_multiplex_own_callbacks.ompt_callback_target_data_op) {
+    ompt_multiplex_own_callbacks.ompt_callback_target_data_op(
+        target_id, host_op_id, optype, src_addr, src_device_num, dest_addr,
+        dest_device_num, bytes, codeptr_ra);
+  }
+  if (ompt_multiplex_client_callbacks.ompt_callback_target_data_op) {
+    ompt_multiplex_client_callbacks.ompt_callback_target_data_op(
+        target_id, host_op_id, optype, src_addr, src_device_num, dest_addr,
+        dest_device_num, bytes, codeptr_ra);
+  }
+}
+
+static void
+ompt_multiplex_callback_target_submit(ompt_id_t target_id, ompt_id_t host_op_id,
+                                      unsigned int requested_num_teams) {
+  if (ompt_multiplex_own_callbacks.ompt_callback_target_submit) {
+    ompt_multiplex_own_callbacks.ompt_callback_target_submit(
+        target_id, host_op_id, requested_num_teams);
+  }
+  if (ompt_multiplex_client_callbacks.ompt_callback_target_submit) {
+    ompt_multiplex_client_callbacks.ompt_callback_target_submit(
+        target_id, host_op_id, requested_num_teams);
+  }
+}
+
+static void ompt_multiplex_callback_device_initialize(
+    int device_num, const char *type, ompt_device_t *device,
+    ompt_function_lookup_t lookup, const char *documentation) {
+  if (ompt_multiplex_own_callbacks.ompt_callback_device_initialize) {
+    ompt_multiplex_own_callbacks.ompt_callback_device_initialize(
+        device_num, type, device, lookup, documentation);
+  }
+  if (ompt_multiplex_client_callbacks.ompt_callback_device_initialize) {
+    ompt_multiplex_client_callbacks.ompt_callback_device_initialize(
+        device_num, type, device, lookup, documentation);
+  }
+}
+
+static void ompt_multiplex_callback_device_finalize(int device_num) {
+  if (ompt_multiplex_own_callbacks.ompt_callback_device_finalize) {
+    ompt_multiplex_own_callbacks.ompt_callback_device_finalize(device_num);
+  }
+  if (ompt_multiplex_client_callbacks.ompt_callback_device_finalize) {
+    ompt_multiplex_client_callbacks.ompt_callback_device_finalize(device_num);
+  }
+}
+
+static void
+ompt_multiplex_callback_device_load(int device_num, const char *filename,
+                                    int64_t offset_in_file, void *vma_in_file,
+                                    size_t bytes, void *host_addr,
+                                    void *device_addr, uint64_t module_id) {
+  if (ompt_multiplex_own_callbacks.ompt_callback_device_load) {
+    ompt_multiplex_own_callbacks.ompt_callback_device_load(
+        device_num, filename, offset_in_file, vma_in_file, bytes, host_addr,
+        device_addr, module_id);
+  }
+  if (ompt_multiplex_client_callbacks.ompt_callback_device_load) {
+    ompt_multiplex_client_callbacks.ompt_callback_device_load(
+        device_num, filename, offset_in_file, vma_in_file, bytes, host_addr,
+        device_addr, module_id);
+  }
+}
+
+static void ompt_multiplex_callback_device_unload(int device_num,
+                                                  uint64_t module_id) {
+  if (ompt_multiplex_own_callbacks.ompt_callback_device_unload) {
+    ompt_multiplex_own_callbacks.ompt_callback_device_unload(device_num,
+                                                             module_id);
+  }
+  if (ompt_multiplex_client_callbacks.ompt_callback_device_unload) {
+    ompt_multiplex_client_callbacks.ompt_callback_device_unload(device_num,
+                                                                module_id);
+  }
+}
+
+static void
+ompt_multiplex_callback_target_map(ompt_id_t target_id, unsigned int nitems,
+                                   void **host_addr, void **device_addr,
+                                   size_t *bytes, unsigned int *mapping_flags,
+                                   const void *codeptr_ra) {
+  if (ompt_multiplex_own_callbacks.ompt_callback_target_map) {
+    ompt_multiplex_own_callbacks.ompt_callback_target_map(
+        target_id, nitems, host_addr, device_addr, bytes, mapping_flags,
+        codeptr_ra);
+  }
+  if (ompt_multiplex_client_callbacks.ompt_callback_target_map) {
+    ompt_multiplex_client_callbacks.ompt_callback_target_map(
+        target_id, nitems, host_addr, device_addr, bytes, mapping_flags,
+        codeptr_ra);
+  }
+}
+
+static void ompt_multiplex_callback_reduction(ompt_sync_region_t kind,
+                                              ompt_scope_endpoint_t endpoint,
+                                              ompt_data_t *parallel_data,
+                                              ompt_data_t *task_data,
+                                              const void *codeptr_ra) {
+  if (ompt_multiplex_own_callbacks.ompt_callback_reduction) {
+    ompt_multiplex_own_callbacks.ompt_callback_reduction(
+        kind, endpoint, ompt_multiplex_get_own_parallel_data(parallel_data),
+        ompt_multiplex_get_own_task_data(task_data), codeptr_ra);
+  }
+  if (ompt_multiplex_client_callbacks.ompt_callback_reduction) {
+    ompt_multiplex_client_callbacks.ompt_callback_reduction(
+        kind, endpoint, ompt_multiplex_get_client_parallel_data(parallel_data),
+        ompt_multiplex_get_client_task_data(task_data), codeptr_ra);
+  }
+}
+
+static void ompt_multiplex_callback_dispatch(ompt_data_t *parallel_data,
+                                             ompt_data_t *task_data,
+                                             ompt_dispatch_t kind,
+                                             ompt_data_t instance) {
+  if (ompt_multiplex_own_callbacks.ompt_callback_dispatch) {
+    ompt_multiplex_own_callbacks.ompt_callback_dispatch(
+        ompt_multiplex_get_own_parallel_data(parallel_data),
+        ompt_multiplex_get_own_task_data(task_data), kind, instance);
+  }
+  if (ompt_multiplex_client_callbacks.ompt_callback_dispatch) {
+    ompt_multiplex_client_callbacks.ompt_callback_dispatch(
+        ompt_multiplex_get_client_parallel_data(parallel_data),
+        ompt_multiplex_get_client_task_data(task_data), kind, instance);
+  }
+}
+
+// runtime entry functions
+
+int ompt_multiplex_own_get_task_info(int ancestor_level, int *type,
+                                     ompt_data_t **task_data,
+                                     ompt_frame_t **task_frame,
+                                     ompt_data_t **parallel_data,
+                                     int *thread_num) {
+  int ret = ompt_multiplex_get_task_info(ancestor_level, type, task_data,
+                                         task_frame, parallel_data, thread_num);
+
+#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA
+  if (task_data)
+    *task_data = ompt_multiplex_get_own_ompt_data(*task_data);
+#endif
+#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA
+  if (parallel_data)
+    *parallel_data = ompt_multiplex_get_own_ompt_data(*parallel_data);
+#endif
+  return ret;
+}
+
+int ompt_multiplex_client_get_task_info(int ancestor_level, int *type,
+                                        ompt_data_t **task_data,
+                                        ompt_frame_t **task_frame,
+                                        ompt_data_t **parallel_data,
+                                        int *thread_num) {
+  int ret = ompt_multiplex_get_task_info(ancestor_level, type, task_data,
+                                         task_frame, parallel_data, thread_num);
+
+  if (task_data)
+#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA
+    *task_data = ompt_multiplex_get_client_ompt_data(*task_data);
+#else
+    *task_data = OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA(*task_data);
+#endif
+
+  if (parallel_data)
+#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA
+    *parallel_data = ompt_multiplex_get_client_ompt_data(*parallel_data);
+#else
+    *parallel_data =
+        OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA(*parallel_data);
+#endif
+  return ret;
+}
+
+ompt_data_t *ompt_multiplex_own_get_thread_data() {
+  ompt_data_t *ret;
+#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA
+  ret = ompt_multiplex_get_own_ompt_data(ompt_multiplex_get_thread_data());
+#else
+  ret = ompt_multiplex_get_thread_data();
+#endif
+  return ret;
+}
+
+ompt_data_t *ompt_multiplex_client_get_thread_data() {
+  ompt_data_t *ret;
+#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA
+  ret = ompt_multiplex_get_client_ompt_data(ompt_multiplex_get_thread_data());
+#else
+  ret = OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA(
+      ompt_multiplex_get_thread_data());
+#endif
+  return ret;
+}
+
+int ompt_multiplex_own_get_parallel_info(int ancestor_level,
+                                         ompt_data_t **parallel_data,
+                                         int *team_size) {
+  int ret = ompt_multiplex_get_parallel_info(ancestor_level, parallel_data,
+                                             team_size);
+  if (parallel_data)
+    *parallel_data = ompt_multiplex_get_own_parallel_data(*parallel_data);
+  return ret;
+}
+
+int ompt_multiplex_client_get_parallel_info(int ancestor_level,
+                                            ompt_data_t **parallel_data,
+                                            int *team_size) {
+  int ret = ompt_multiplex_get_parallel_info(ancestor_level, parallel_data,
+                                             team_size);
+  if (parallel_data)
+#ifndef OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA
+    *parallel_data = ompt_multiplex_get_client_ompt_data(*parallel_data);
+#else
+    *parallel_data =
+        OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA(*parallel_data);
+#endif
+  return ret;
+}
+
+OMPT_API_ROUTINE int ompt_multiplex_own_set_callback(ompt_callbacks_t which,
+                                                     ompt_callback_t callback) {
+  switch (which) {
+
+#define ompt_event_macro(event_name, callback_type, event_id)                  \
+  case ompt_##event_name:                                                      \
+    ompt_multiplex_own_callbacks.ompt_##event_name = (callback_type)callback;  \
+    if (ompt_multiplex_implementation_status.ompt_##event_name == -1)          \
+      return ompt_multiplex_implementation_status.ompt_##event_name =          \
+                 ompt_multiplex_set_callback(                                  \
+                     ompt_##event_name,                                        \
+                     (ompt_callback_t)&ompt_multiplex_##event_name);           \
+    else                                                                       \
+      return ompt_multiplex_implementation_status.ompt_##event_name
+
+    OMPT_LOAD_CLIENT_FOREACH_OMPT_EVENT(ompt_event_macro)
+
+#undef ompt_event_macro
+
+  default:
+    return ompt_set_error;
+  }
+}
+
+OMPT_API_ROUTINE int
+ompt_multiplex_client_set_callback(ompt_callbacks_t which,
+                                   ompt_callback_t callback) {
+  switch (which) {
+
+#define ompt_event_macro(event_name, callback_type, event_id)                  \
+  case ompt_##event_name:                                                      \
+    ompt_multiplex_client_callbacks.ompt_##event_name =                        \
+        (callback_type)callback;                                               \
+    if (ompt_multiplex_implementation_status.ompt_##event_name == -1)          \
+      return ompt_multiplex_implementation_status.ompt_##event_name =          \
+                 ompt_multiplex_set_callback(                                  \
+                     ompt_##event_name,                                        \
+                     (ompt_callback_t)&ompt_multiplex_##event_name);           \
+    else                                                                       \
+      return ompt_multiplex_implementation_status.ompt_##event_name
+
+    OMPT_LOAD_CLIENT_FOREACH_OMPT_EVENT(ompt_event_macro)
+
+#undef ompt_event_macro
+
+  default:
+    return ompt_set_error;
+  }
+}
+
+ompt_interface_fn_t ompt_multiplex_own_lookup(const char *name) {
+  if (!strcmp(name, "ompt_set_callback"))
+    return (ompt_interface_fn_t)&ompt_multiplex_own_set_callback;
+  else if (!strcmp(name, "ompt_get_task_info"))
+    return (ompt_interface_fn_t)&ompt_multiplex_own_get_task_info;
+  else if (!strcmp(name, "ompt_get_thread_data"))
+    return (ompt_interface_fn_t)&ompt_multiplex_own_get_thread_data;
+  else if (!strcmp(name, "ompt_get_parallel_info"))
+    return (ompt_interface_fn_t)&ompt_multiplex_own_get_parallel_info;
+  else
+    return ompt_multiplex_lookup_function(name);
+}
+
+ompt_interface_fn_t ompt_multiplex_client_lookup(const char *name) {
+  if (!strcmp(name, "ompt_set_callback"))
+    return (ompt_interface_fn_t)&ompt_multiplex_client_set_callback;
+  else if (!strcmp(name, "ompt_get_task_info"))
+    return (ompt_interface_fn_t)&ompt_multiplex_client_get_task_info;
+  else if (!strcmp(name, "ompt_get_thread_data"))
+    return (ompt_interface_fn_t)&ompt_multiplex_client_get_thread_data;
+  else if (!strcmp(name, "ompt_get_parallel_info"))
+    return (ompt_interface_fn_t)&ompt_multiplex_client_get_parallel_info;
+  else
+    return ompt_multiplex_lookup_function(name);
+}
+
+int ompt_multiplex_initialize(ompt_function_lookup_t lookup,
+                              int initial_device_num, ompt_data_t *data) {
+  ompt_multiplex_lookup_function = lookup;
+  ompt_multiplex_set_callback =
+      (ompt_set_callback_t)lookup("ompt_set_callback");
+  ompt_multiplex_get_task_info =
+      (ompt_get_task_info_t)lookup("ompt_get_task_info");
+  ompt_multiplex_get_thread_data =
+      (ompt_get_thread_data_t)lookup("ompt_get_thread_data");
+  ompt_multiplex_get_parallel_info =
+      (ompt_get_parallel_info_t)lookup("ompt_get_parallel_info");
+
+  // initialize ompt_multiplex_implementation_status
+#define ompt_event_macro(event_name, callback_type, event_id)                  \
+  ompt_multiplex_implementation_status.ompt_##event_name = -1
+
+  OMPT_LOAD_CLIENT_FOREACH_OMPT_EVENT(ompt_event_macro)
+
+#undef ompt_event_macro
+
+  int ownRet = ompt_multiplex_own_fns->initialize(
+      ompt_multiplex_own_lookup, initial_device_num,
+      &(ompt_multiplex_own_fns->tool_data));
+  int clientRet = 0;
+  if (ompt_multiplex_client_fns)
+    clientRet = ompt_multiplex_client_fns->initialize(
+        ompt_multiplex_client_lookup, initial_device_num,
+        &(ompt_multiplex_client_fns->tool_data));
+
+  return ownRet > clientRet ? ownRet : clientRet;
+}
+
+void ompt_multiplex_finalize(ompt_data_t *fns) {
+  if (ompt_multiplex_client_fns)
+    ompt_multiplex_client_fns->finalize(
+        &(ompt_multiplex_client_fns->tool_data));
+  ompt_multiplex_own_fns->finalize(&(ompt_multiplex_own_fns->tool_data));
+}
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+ompt_start_tool_result_t *
+ompt_multiplex_own_start_tool(unsigned int omp_version,
+                              const char *runtime_version);
+
+ompt_start_tool_result_t *ompt_start_tool(unsigned int omp_version,
+                                          const char *runtime_version) {
+  // try loading client tool
+  ompt_multiplex_client_fns = NULL;
+  ompt_start_tool_result_t *(*client_start_tool)(unsigned int, const char *) =
+      NULL;
+
+  const char *tool_libs = getenv(CLIENT_TOOL_LIBRARIES_VAR);
+  if (tool_libs) {
+    // copy environement variable
+    char *tool_libs_buffer = (char *)malloc(sizeof(char) * strlen(tool_libs));
+    if (!tool_libs_buffer) {
+      printf("malloc Error\n");
+      exit(-1);
+    }
+    strcpy(tool_libs_buffer, tool_libs);
+
+    int progress = 0;
+    while (progress < strlen(tool_libs)) {
+      int tmp_progress = progress;
+      while (tmp_progress < strlen(tool_libs) &&
+             tool_libs_buffer[tmp_progress] != ':')
+        tmp_progress++;
+      if (tmp_progress < strlen(tool_libs))
+        tool_libs_buffer[tmp_progress] = 0;
+      void *h = dlopen(tool_libs_buffer + progress, RTLD_LAZY);
+      if (h) {
+        client_start_tool =
+            (ompt_start_tool_result_t * (*)(unsigned int, const char *))
+                dlsym(h, "ompt_start_tool");
+        if (client_start_tool &&
+            (ompt_multiplex_client_fns =
+                 (*client_start_tool)(omp_version, runtime_version))) {
+          break;
+        }
+      } else {
+        printf("Loading %s from %s failed with: %s\n",
+               tool_libs_buffer + progress, CLIENT_TOOL_LIBRARIES_VAR,
+               dlerror());
+      }
+      progress = tmp_progress + 1;
+    }
+    free(tool_libs_buffer);
+  }
+  // load own tool
+  ompt_multiplex_own_fns =
+      ompt_multiplex_own_start_tool(omp_version, runtime_version);
+
+  // return multiplexed versions
+  static ompt_start_tool_result_t ompt_start_tool_result = {
+      &ompt_multiplex_initialize, &ompt_multiplex_finalize, {0}};
+  return &ompt_start_tool_result;
+}
+#ifdef __cplusplus
+}
+#endif
+
+// We rename the ompt_start_tool function of the OMPT tool and call the
+// renamed function from the ompt_start_tool function defined above.
+#define ompt_start_tool ompt_multiplex_own_start_tool
+
+#endif /* OMPT_MULTIPLEX_H */

diff  --git a/openmp/tools/multiplex/tests/CMakeLists.txt b/openmp/tools/multiplex/tests/CMakeLists.txt
new file mode 100644
index 000000000000..48e8a5730755
--- /dev/null
+++ b/openmp/tools/multiplex/tests/CMakeLists.txt
@@ -0,0 +1,21 @@
+# CMakeLists.txt file for unit testing OMPT multiplex header.
+include(CheckFunctionExists)
+include(CheckLibraryExists)
+
+macro(pythonize_bool var)
+  if (${var})
+    set(${var} True)
+  else()
+    set(${var} False)
+  endif()
+endmacro()
+
+set(OMPT_LOAD_CLIENT_TEST_CFLAGS "" CACHE STRING
+  "Extra compiler flags to send to the test compiler")
+
+get_target_property(OMPT_PRINT_CALLBACKS_DIR ompt-print-callback INTERFACE_INCLUDE_DIRECTORIES)
+add_openmp_testsuite(check-ompt-multiplex "Running OMPT multiplex tests" ${CMAKE_CURRENT_BINARY_DIR} DEPENDS omp)
+
+# Configure the lit.site.cfg.in file
+set(AUTO_GEN_COMMENT "## Autogenerated by OMPT_LOAD_CLIENT configuration.\n# Do not edit!")
+configure_file(lit.site.cfg.in lit.site.cfg @ONLY)

diff  --git a/openmp/tools/multiplex/tests/custom_data_storage/custom_data_storage.c b/openmp/tools/multiplex/tests/custom_data_storage/custom_data_storage.c
new file mode 100644
index 000000000000..8ab2a96c5682
--- /dev/null
+++ b/openmp/tools/multiplex/tests/custom_data_storage/custom_data_storage.c
@@ -0,0 +1,313 @@
+// RUN: %libomp-tool -DFIRST_TOOL -o %t.first.tool.so %s && \
+// RUN: %libomp-tool -DSECOND_TOOL -o %t.second.tool.so %s && \
+// RUN: %libomp-compile && \
+// RUN: env OMP_TOOL_LIBRARIES=%t.first.tool.so \
+// RUN: CUSTOM_DATA_STORAGE_TOOL_LIBRARIES=%t.second.tool.so \
+// RUN: %libomp-run | %sort-threads | FileCheck %s
+
+#if defined(FIRST_TOOL)
+#include "first-tool.h"
+#elif defined(SECOND_TOOL)
+#include "second-tool.h"
+#else /* APP */
+
+#include "../ompt-signal.h"
+#include "omp.h"
+#include <stdio.h>
+
+int main() {
+  int x, s = 0;
+#pragma omp parallel num_threads(2) shared(s)
+  {
+#pragma omp master
+    {
+#pragma omp task shared(s)
+      {
+        omp_control_tool(5, 1, NULL);
+        OMPT_SIGNAL(s);
+      }
+    }
+    if (omp_get_thread_num() == 1)
+      OMPT_WAIT(s, 1);
+  }
+  return 0;
+}
+// Check if libomp supports the callbacks for this test.
+// CHECK-NOT: {{^}}0: Could not register callback
+
+// CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
+// CHECK: {{^}}0: NULL_POINTER=[[NULL]]
+// CHECK: {{^}}0: ompt_event_runtime_shutdown
+// CHECK: {{^}}0: ompt_event_runtime_shutdown
+
+// CHECK: {{^}}[[_1ST_MSTR_TID:[0-9]+]]: _first_tool: ompt_event_thread_begin:
+// CHECK-SAME: thread_type=ompt_thread_initial=1,
+// CHECK-SAME: thread_id=[[_1ST_MSTR_TID]]
+
+// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_initial_task_begin:
+// CHECK-SAME: parallel_id=[[_FIRST_INIT_PARALLEL_ID:[0-9]+]],
+// CHECK-SAME: task_id=[[_FIRST_INITIAL_TASK_ID:[0-9]+]], actual_parallelism=1,
+
+// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_parallel_begin:
+// CHECK-SAME: parent_task_id=[[_FIRST_INITIAL_TASK_ID]],
+// CHECK-SAME: parent_task_frame.exit=(nil),
+// CHECK-SAME: parent_task_frame.reenter={{0x[0-f]+}},
+// CHECK-SAME: parallel_id=[[_FIRST_PARALLEL_ID:[0-9]+]], requested_team_size=2,
+// CHECK-SAME: codeptr_ra={{0x[0-f]+}}, invoker=2
+
+// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_implicit_task_begin:
+// CHECK-SAME: parallel_id=[[_FIRST_PARALLEL_ID]],
+// CHECK-SAME: task_id=[[_FIRST_MASTER_IMPLICIT_TASK_ID:[0-9]+]], team_size=2,
+// CHECK-SAME: thread_num=0
+
+// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_master_begin:
+// CHECK-SAME: parallel_id=[[_FIRST_PARALLEL_ID]],
+// CHECK-SAME: task_id=[[_FIRST_MASTER_IMPLICIT_TASK_ID]],
+// CHECK-SAME: codeptr_ra={{0x[0-f]+}}
+
+// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_task_create:
+// CHECK-SAME: parent_task_id=[[_FIRST_MASTER_IMPLICIT_TASK_ID]],
+// CHECK-SAME: parent_task_frame.exit={{0x[0-f]+}},
+// CHECK-SAME: parent_task_frame.reenter={{0x[0-f]+}},
+// CHECK-SAME: new_task_id=[[_FIRST_EXPLICIT_TASK_ID:[0-9]+]],
+// CHECK-SAME: codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit=4,
+// CHECK-SAME: has_dependences=no
+
+// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_master_end:
+// CHECK-SAME: parallel_id=[[_FIRST_PARALLEL_ID]],
+// CHECK-SAME: task_id=[[_FIRST_MASTER_IMPLICIT_TASK_ID]],
+// CHECK-SAME: codeptr_ra={{0x[0-f]+}}
+
+// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_barrier_begin:
+// CHECK-SAME: parallel_id=[[_FIRST_PARALLEL_ID]],
+// CHECK-SAME: task_id=[[_FIRST_MASTER_IMPLICIT_TASK_ID]],
+// CHECK-SAME: codeptr_ra={{0x[0-f]+}}
+
+// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_wait_barrier_begin:
+// CHECK-SAME: parallel_id=[[_FIRST_PARALLEL_ID]],
+// CHECK-SAME: task_id=[[_FIRST_MASTER_IMPLICIT_TASK_ID]],
+// CHECK-SAME: codeptr_ra={{0x[0-f]+}}
+
+// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_task_schedule:
+// CHECK-SAME: first_task_id=[[_FIRST_MASTER_IMPLICIT_TASK_ID]],
+// CHECK-SAME: second_task_id=[[_FIRST_EXPLICIT_TASK_ID]],
+// CHECK-SAME: prior_task_status=ompt_task_switch=7
+
+// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_control_tool:
+// CHECK-SAME: command=5, modifier=1, arg=(nil),
+// CHECK-SAME: codeptr_ra={{0x[0-f]+}}
+
+// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: task level 0:
+// CHECK-SAME: task_id=[[_FIRST_EXPLICIT_TASK_ID]]
+
+// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: task level 1:
+// CHECK-SAME: task_id=[[_FIRST_MASTER_IMPLICIT_TASK_ID]]
+
+// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: task level 2:
+// CHECK-SAME: task_id=[[_FIRST_INITIAL_TASK_ID]]
+
+// CHECK: {{^}}[[_1ST_MSTR_TID]]:
+// CHECK-SAME: _first_tool: parallel level 0: parallel_id=[[_FIRST_PARALLEL_ID]]
+
+// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: parallel level 1:
+// CHECK-SAME: parallel_id={{[0-9]+}}
+
+// CHECK: {{^}}[[_1ST_MSTR_TID]]:
+// CHECK-SAME: _first_tool: ompt_event_task_schedule:
+// CHECK-SAME: first_task_id=[[_FIRST_EXPLICIT_TASK_ID]],
+// CHECK-SAME: second_task_id=[[_FIRST_MASTER_IMPLICIT_TASK_ID]],
+// CHECK-SAME: prior_task_status=ompt_task_complete=1
+
+// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_task_end:
+// CHECK-SAME: task_id=[[_FIRST_EXPLICIT_TASK_ID]]
+
+// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_wait_barrier_end:
+// CHECK-SAME: parallel_id=0,
+// CHECK-SAME: task_id=[[_FIRST_MASTER_IMPLICIT_TASK_ID]], codeptr_ra=(nil)
+
+// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_barrier_end:
+// CHECK-SAME: parallel_id=0,
+// CHECK-SAME: task_id=[[_FIRST_MASTER_IMPLICIT_TASK_ID]], codeptr_ra=(nil)
+
+// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_implicit_task_end:
+// CHECK-SAME: parallel_id=0, task_id=[[_FIRST_MASTER_IMPLICIT_TASK_ID]],
+// CHECK-SAME: team_size=2, thread_num=0
+
+// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_parallel_end:
+// CHECK-SAME: parallel_id=[[_FIRST_PARALLEL_ID]],
+// CHECK-SAME: task_id=[[_FIRST_INITIAL_TASK_ID]], invoker=2,
+// CHECK-SAME: codeptr_ra={{0x[0-f]+}}
+
+// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_thread_end:
+// CHECK-SAME: thread_id=[[_1ST_MSTR_TID]]
+
+// CHECK: {{^}}[[_2ND_MSTR_TID:[0-9]+]]: second_tool: ompt_event_thread_begin:
+// CHECK-SAME: thread_type=ompt_thread_initial=1,
+// CHECK-SAME: thread_id=[[_2ND_MSTR_TID]]
+
+// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_initial_task_begin:
+// CHECK-SAME: parallel_id=[[SECOND_INIT_PARALLEL_ID:[0-9]+]],
+// CHECK-SAME: task_id=[[SECOND_INITIAL_TASK_ID:[0-9]+]], actual_parallelism=1,
+// CHECK-SAME: index=1, flags=1
+
+// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_parallel_begin:
+// CHECK-SAME: parent_task_id=[[SECOND_INITIAL_TASK_ID]],
+// CHECK-SAME: parent_task_frame.exit=(nil),
+// CHECK-SAME: parent_task_frame.reenter={{0x[0-f]+}},
+// CHECK-SAME: parallel_id=[[SECOND_PARALLEL_ID:[0-9]+]], requested_team_size=2,
+// CHECK-SAME: codeptr_ra={{0x[0-f]+}}, invoker=2
+
+// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_implicit_task_begin:
+// CHECK-SAME: parallel_id=[[SECOND_PARALLEL_ID]],
+// CHECK-SAME: task_id=[[SECOND_MASTER_IMPLICIT_TASK_ID:[0-9]+]], team_size=2,
+// CHECK-SAME: thread_num=0
+
+// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_master_begin:
+// CHECK-SAME: parallel_id=[[SECOND_PARALLEL_ID]],
+// CHECK-SAME: task_id=[[SECOND_MASTER_IMPLICIT_TASK_ID]],
+// CHECK-SAME: codeptr_ra={{0x[0-f]+}}
+
+// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_task_create:
+// CHECK-SAME: parent_task_id=[[SECOND_MASTER_IMPLICIT_TASK_ID]],
+// CHECK-SAME: parent_task_frame.exit={{0x[0-f]+}},
+// CHECK-SAME: parent_task_frame.reenter={{0x[0-f]+}},
+// CHECK-SAME: new_task_id=[[SECOND_EXPLICIT_TASK_ID:[0-9]+]],
+// CHECK-SAME: codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit=4,
+// CHECK-SAME: has_dependences=no
+
+// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_master_end:
+// CHECK-SAME: parallel_id=[[SECOND_PARALLEL_ID]],
+// CHECK-SAME: task_id=[[SECOND_MASTER_IMPLICIT_TASK_ID]],
+// CHECK-SAME: codeptr_ra={{0x[0-f]+}}
+
+// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_barrier_begin:
+// CHECK-SAME: parallel_id=[[SECOND_PARALLEL_ID]],
+// CHECK-SAME: task_id=[[SECOND_MASTER_IMPLICIT_TASK_ID]],
+// CHECK-SAME: codeptr_ra={{0x[0-f]+}}
+
+// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_wait_barrier_begin:
+// CHECK-SAME: parallel_id=[[SECOND_PARALLEL_ID]],
+// CHECK-SAME: task_id=[[SECOND_MASTER_IMPLICIT_TASK_ID]],
+// CHECK-SAME: codeptr_ra={{0x[0-f]+}}
+
+// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_task_schedule:
+// CHECK-SAME: first_task_id=[[SECOND_MASTER_IMPLICIT_TASK_ID]],
+// CHECK-SAME: second_task_id=[[SECOND_EXPLICIT_TASK_ID]],
+// CHECK-SAME: prior_task_status=ompt_task_switch=7
+
+// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_control_tool:
+// CHECK-SAME: command=5, modifier=1, arg=(nil),
+// CHECK-SAME: codeptr_ra={{0x[0-f]+}}
+
+// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: task level 0:
+// CHECK-SAME: task_id=[[SECOND_EXPLICIT_TASK_ID]]
+
+// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: task level 1:
+// CHECK-SAME: task_id=[[SECOND_MASTER_IMPLICIT_TASK_ID]]
+
+// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: task level 2:
+// CHECK-SAME: task_id=[[SECOND_INITIAL_TASK_ID]]
+
+// CHECK: {{^}}[[_2ND_MSTR_TID]]:
+// CHECK-SAME: second_tool: parallel level 0: parallel_id=[[SECOND_PARALLEL_ID]]
+
+// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: parallel level 1:
+// CHECK-SAME: parallel_id={{[0-9]+}}
+
+// CHECK: {{^}}[[_2ND_MSTR_TID]]:
+// CHECK-SAME: second_tool: ompt_event_task_schedule:
+// CHECK-SAME: first_task_id=[[SECOND_EXPLICIT_TASK_ID]],
+// CHECK-SAME: second_task_id=[[SECOND_MASTER_IMPLICIT_TASK_ID]],
+// CHECK-SAME: prior_task_status=ompt_task_complete=1
+
+// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_task_end:
+// CHECK-SAME: task_id=[[SECOND_EXPLICIT_TASK_ID]]
+
+// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_wait_barrier_end:
+// CHECK-SAME: parallel_id=0,
+// CHECK-SAME: task_id=[[SECOND_MASTER_IMPLICIT_TASK_ID]], codeptr_ra=(nil)
+
+// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_barrier_end:
+// CHECK-SAME: parallel_id=0,
+// CHECK-SAME: task_id=[[SECOND_MASTER_IMPLICIT_TASK_ID]], codeptr_ra=(nil)
+
+// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_implicit_task_end:
+// CHECK-SAME: parallel_id=0,
+// CHECK-SAME: task_id=[[SECOND_MASTER_IMPLICIT_TASK_ID]], team_size=2,
+// CHECK-SAME: thread_num=0
+
+// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_parallel_end:
+// CHECK-SAME: parallel_id=[[SECOND_PARALLEL_ID]],
+// CHECK-SAME: task_id=[[SECOND_INITIAL_TASK_ID]], invoker=2,
+// CHECK-SAME: codeptr_ra={{0x[0-f]+}}
+
+// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_thread_end:
+// CHECK-SAME: thread_id=[[_2ND_MSTR_TID]]
+
+// CHECK: {{^}}[[_1ST_WRKR_TID:[0-9]+]]: _first_tool: ompt_event_thread_begin:
+// CHECK-SAME: thread_type=ompt_thread_worker=2,
+// CHECK-SAME: thread_id=[[_1ST_WRKR_TID]]
+
+// CHECK: {{^}}[[_1ST_WRKR_TID]]: _first_tool: ompt_event_implicit_task_begin:
+// CHECK-SAME: parallel_id=[[_FIRST_PARALLEL_ID]],
+// CHECK-SAME: task_id=[[_FIRST_WORKER_IMPLICIT_TASK_ID:[0-9]+]], team_size=2,
+// CHECK-SAME: thread_num=1
+
+// CHECK: {{^}}[[_1ST_WRKR_TID]]: _first_tool: ompt_event_barrier_begin:
+// CHECK-SAME: parallel_id=[[_FIRST_PARALLEL_ID]],
+// CHECK-SAME: task_id=[[_FIRST_WORKER_IMPLICIT_TASK_ID]], codeptr_ra=(nil)
+
+// CHECK: {{^}}[[_1ST_WRKR_TID]]: _first_tool: ompt_event_wait_barrier_begin:
+// CHECK-SAME: parallel_id=[[_FIRST_PARALLEL_ID]],
+// CHECK-SAME: task_id=[[_FIRST_WORKER_IMPLICIT_TASK_ID]], codeptr_ra=(nil)
+
+// CHECK: {{^}}[[_1ST_WRKR_TID]]: _first_tool: ompt_event_wait_barrier_end:
+// CHECK-SAME: parallel_id=0,
+// CHECK-SAME: task_id=[[_FIRST_WORKER_IMPLICIT_TASK_ID]], codeptr_ra=(nil)
+
+// CHECK: {{^}}[[_1ST_WRKR_TID]]: _first_tool: ompt_event_barrier_end:
+// CHECK-SAME: parallel_id=0,
+// CHECK-SAME: task_id=[[_FIRST_WORKER_IMPLICIT_TASK_ID]], codeptr_ra=(nil)
+
+// CHECK: {{^}}[[_1ST_WRKR_TID]]: _first_tool: ompt_event_implicit_task_end:
+// CHECK-SAME: parallel_id=0,
+// CHECK-SAME: task_id=[[_FIRST_WORKER_IMPLICIT_TASK_ID]], team_size=0,
+// thread_num=1
+
+// CHECK: {{^}}[[_1ST_WRKR_TID]]: _first_tool: ompt_event_thread_end:
+// CHECK-SAME: thread_id=[[_1ST_WRKR_TID]]
+
+// CHECK: {{^}}[[_2ND_WRKR_TID:[0-9]+]]: second_tool: ompt_event_thread_begin:
+// CHECK-SAME: thread_type=ompt_thread_worker=2,
+// CHECK-SAME: thread_id=[[_2ND_WRKR_TID]]
+
+// CHECK: {{^}}[[_2ND_WRKR_TID]]: second_tool: ompt_event_implicit_task_begin:
+// CHECK-SAME: parallel_id=[[SECOND_PARALLEL_ID]],
+// CHECK-SAME: task_id=[[SECOND_WORKER_IMPLICIT_TASK_ID:[0-9]+]], team_size=2,
+// CHECK-SAME: thread_num=1
+
+// CHECK: {{^}}[[_2ND_WRKR_TID]]: second_tool: ompt_event_barrier_begin:
+// CHECK-SAME: parallel_id=[[SECOND_PARALLEL_ID]],
+// CHECK-SAME: task_id=[[SECOND_WORKER_IMPLICIT_TASK_ID]], codeptr_ra=(nil)
+
+// CHECK: {{^}}[[_2ND_WRKR_TID]]: second_tool: ompt_event_wait_barrier_begin:
+// CHECK-SAME: parallel_id=[[SECOND_PARALLEL_ID]],
+// CHECK-SAME: task_id=[[SECOND_WORKER_IMPLICIT_TASK_ID]], codeptr_ra=(nil)
+
+// CHECK: {{^}}[[_2ND_WRKR_TID]]: second_tool: ompt_event_wait_barrier_end:
+// CHECK-SAME: parallel_id=0,
+// CHECK-SAME: task_id=[[SECOND_WORKER_IMPLICIT_TASK_ID]], codeptr_ra=(nil)
+
+// CHECK: {{^}}[[_2ND_WRKR_TID]]: second_tool: ompt_event_barrier_end:
+// CHECK-SAME: parallel_id=0,
+// CHECK-SAME: task_id=[[SECOND_WORKER_IMPLICIT_TASK_ID]], codeptr_ra=(nil)
+
+// CHECK: {{^}}[[_2ND_WRKR_TID]]: second_tool: ompt_event_implicit_task_end:
+// CHECK-SAME: parallel_id=0,
+// CHECK-SAME: task_id=[[SECOND_WORKER_IMPLICIT_TASK_ID]], team_size=0,
+// CHECK-SAME: thread_num=1
+
+// CHECK: {{^}}[[_2ND_WRKR_TID]]: second_tool: ompt_event_thread_end:
+// CHECK-SAME: thread_id=[[_2ND_WRKR_TID]]
+
+#endif /* APP */

diff  --git a/openmp/tools/multiplex/tests/custom_data_storage/first-tool.h b/openmp/tools/multiplex/tests/custom_data_storage/first-tool.h
new file mode 100644
index 000000000000..15a025af3c3b
--- /dev/null
+++ b/openmp/tools/multiplex/tests/custom_data_storage/first-tool.h
@@ -0,0 +1,293 @@
+#include "omp-tools.h"
+
+#define ompt_start_tool disable_ompt_start_tool
+#define _TOOL_PREFIX " _first_tool:"
+#include "callback.h"
+#undef _TOOL_PREFIX
+#undef ompt_start_tool
+
+#define CLIENT_TOOL_LIBRARIES_VAR "CUSTOM_DATA_STORAGE_TOOL_LIBRARIES"
+static ompt_data_t *custom_get_client_ompt_data(ompt_data_t *);
+static void free_data_pair(ompt_data_t *);
+#define OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_THREAD_DATA custom_get_client_ompt_data
+#define OMPT_MULTIPLEX_CUSTOM_DELETE_THREAD_DATA free_data_pair
+#define OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_PARALLEL_DATA                         \
+  custom_get_client_ompt_data
+#define OMPT_MULTIPLEX_CUSTOM_DELETE_PARALLEL_DATA free_data_pair
+#define OMPT_MULTIPLEX_CUSTOM_GET_CLIENT_TASK_DATA custom_get_client_ompt_data
+#define OMPT_MULTIPLEX_CUSTOM_DELETE_TASK_DATA free_data_pair
+#include "ompt-multiplex.h"
+
+typedef struct custom_data_pair_s {
+  ompt_data_t own_data;
+  ompt_data_t client_data;
+} custom_data_pair_t;
+
+static ompt_data_t *custom_get_client_ompt_data(ompt_data_t *data) {
+  if (data)
+    return &(((custom_data_pair_t *)(data->ptr))->client_data);
+  else
+    return NULL;
+}
+
+static ompt_data_t *get_own_ompt_data(ompt_data_t *data) {
+  if (data)
+    return &(((custom_data_pair_t *)(data->ptr))->own_data);
+  else
+    return NULL;
+}
+
+static ompt_multiplex_data_pair_t *
+allocate_data_pair(ompt_data_t *data_pointer) {
+  data_pointer->ptr = malloc(sizeof(ompt_multiplex_data_pair_t));
+  if (!data_pointer->ptr) {
+    printf("Malloc ERROR\n");
+    exit(-1);
+  }
+  ompt_multiplex_data_pair_t *data_pair =
+      (ompt_multiplex_data_pair_t *)data_pointer->ptr;
+  data_pair->own_data.ptr = NULL;
+  data_pair->client_data.ptr = NULL;
+  return data_pair;
+}
+
+static void free_data_pair(ompt_data_t *data_pointer) {
+  free((*data_pointer).ptr);
+}
+
+static void on_cds_ompt_callback_sync_region(ompt_sync_region_t kind,
+                                             ompt_scope_endpoint_t endpoint,
+                                             ompt_data_t *parallel_data,
+                                             ompt_data_t *task_data,
+                                             const void *codeptr_ra) {
+  parallel_data = get_own_ompt_data(parallel_data);
+  task_data = get_own_ompt_data(task_data);
+  on_ompt_callback_sync_region(kind, endpoint, parallel_data, task_data,
+                               codeptr_ra);
+}
+
+static void on_cds_ompt_callback_sync_region_wait(
+    ompt_sync_region_t kind, ompt_scope_endpoint_t endpoint,
+    ompt_data_t *parallel_data, ompt_data_t *task_data,
+    const void *codeptr_ra) {
+  parallel_data = get_own_ompt_data(parallel_data);
+  task_data = get_own_ompt_data(task_data);
+  on_ompt_callback_sync_region_wait(kind, endpoint, parallel_data, task_data,
+                                    codeptr_ra);
+}
+
+static void on_cds_ompt_callback_flush(ompt_data_t *thread_data,
+                                       const void *codeptr_ra) {
+  thread_data = get_own_ompt_data(thread_data);
+  on_cds_ompt_callback_flush(thread_data, codeptr_ra);
+}
+
+static void on_cds_ompt_callback_cancel(ompt_data_t *task_data, int flags,
+                                        const void *codeptr_ra) {
+  task_data = get_own_ompt_data(task_data);
+  on_ompt_callback_cancel(task_data, flags, codeptr_ra);
+}
+
+static void on_cds_ompt_callback_implicit_task(ompt_scope_endpoint_t endpoint,
+                                               ompt_data_t *parallel_data,
+                                               ompt_data_t *task_data,
+                                               unsigned int team_size,
+                                               unsigned int thread_num,
+                                               int type) {
+  if (endpoint == ompt_scope_begin && (type & ompt_task_initial)) {
+    allocate_data_pair(parallel_data);
+  }
+  if (endpoint == ompt_scope_begin) {
+    allocate_data_pair(task_data);
+  }
+  parallel_data = get_own_ompt_data(parallel_data);
+  task_data = get_own_ompt_data(task_data);
+  on_ompt_callback_implicit_task(endpoint, parallel_data, task_data, team_size,
+                                 thread_num, type);
+}
+
+static void on_cds_ompt_callback_work(ompt_work_t wstype,
+                                      ompt_scope_endpoint_t endpoint,
+                                      ompt_data_t *parallel_data,
+                                      ompt_data_t *task_data, uint64_t count,
+                                      const void *codeptr_ra) {
+  parallel_data = get_own_ompt_data(parallel_data);
+  task_data = get_own_ompt_data(task_data);
+  on_ompt_callback_work(wstype, endpoint, parallel_data, task_data, count,
+                        codeptr_ra);
+}
+
+static void on_cds_ompt_callback_master(ompt_scope_endpoint_t endpoint,
+                                        ompt_data_t *parallel_data,
+                                        ompt_data_t *task_data,
+                                        const void *codeptr_ra) {
+  parallel_data = get_own_ompt_data(parallel_data);
+  task_data = get_own_ompt_data(task_data);
+  on_ompt_callback_master(endpoint, parallel_data, task_data, codeptr_ra);
+}
+
+static void on_cds_ompt_callback_parallel_begin(
+    ompt_data_t *parent_task_data, const ompt_frame_t *parent_task_frame,
+    ompt_data_t *parallel_data, uint32_t requested_team_size, int invoker,
+    const void *codeptr_ra) {
+  parent_task_data = get_own_ompt_data(parent_task_data);
+  if (parallel_data->ptr)
+    printf("%s\n", "0: parallel_data initially not null");
+  allocate_data_pair(parallel_data);
+  parallel_data = get_own_ompt_data(parallel_data);
+  on_ompt_callback_parallel_begin(parent_task_data, parent_task_frame,
+                                  parallel_data, requested_team_size, invoker,
+                                  codeptr_ra);
+}
+
+static void on_cds_ompt_callback_parallel_end(ompt_data_t *parallel_data,
+                                              ompt_data_t *task_data,
+                                              int invoker,
+                                              const void *codeptr_ra) {
+  task_data = get_own_ompt_data(task_data);
+  parallel_data = get_own_ompt_data(parallel_data);
+  on_ompt_callback_parallel_end(parallel_data, task_data, invoker, codeptr_ra);
+}
+
+static void on_cds_ompt_callback_task_create(ompt_data_t *parent_task_data,
+                                             const ompt_frame_t *parent_frame,
+                                             ompt_data_t *new_task_data,
+                                             int type, int has_dependences,
+                                             const void *codeptr_ra) {
+  parent_task_data = get_own_ompt_data(parent_task_data);
+  if (new_task_data->ptr)
+    printf("%s\n", "0: new_task_data initially not null");
+  allocate_data_pair(new_task_data);
+  new_task_data = get_own_ompt_data(new_task_data);
+  on_ompt_callback_task_create(parent_task_data, parent_frame, new_task_data,
+                               type, has_dependences, codeptr_ra);
+}
+
+static void
+on_cds_ompt_callback_task_schedule(ompt_data_t *first_task_data,
+                                   ompt_task_status_t prior_task_status,
+                                   ompt_data_t *second_task_data) {
+  ompt_data_t *original_first_task_data = first_task_data;
+  first_task_data = get_own_ompt_data(first_task_data);
+  second_task_data = get_own_ompt_data(second_task_data);
+  on_ompt_callback_task_schedule(first_task_data, prior_task_status,
+                                 second_task_data);
+}
+
+static void on_cds_ompt_callback_dependences(ompt_data_t *task_data,
+                                             const ompt_dependence_t *deps,
+                                             int ndeps) {
+  task_data = get_own_ompt_data(task_data);
+  on_ompt_callback_dependences(task_data, deps, ndeps);
+}
+
+static void
+on_cds_ompt_callback_task_dependence(ompt_data_t *first_task_data,
+                                     ompt_data_t *second_task_data) {
+  first_task_data = get_own_ompt_data(first_task_data);
+  second_task_data = get_own_ompt_data(second_task_data);
+  on_ompt_callback_task_dependence(first_task_data, second_task_data);
+}
+
+static void on_cds_ompt_callback_thread_begin(ompt_thread_t thread_type,
+                                              ompt_data_t *thread_data) {
+  if (thread_data->ptr)
+    printf("%s\n", "0: thread_data initially not null");
+  allocate_data_pair(thread_data);
+  thread_data = get_own_ompt_data(thread_data);
+  on_ompt_callback_thread_begin(thread_type, thread_data);
+}
+
+static void on_cds_ompt_callback_thread_end(ompt_data_t *thread_data) {
+  thread_data = get_own_ompt_data(thread_data);
+  on_ompt_callback_thread_end(thread_data);
+}
+
+static int on_cds_ompt_callback_control_tool(uint64_t command,
+                                             uint64_t modifier, void *arg,
+                                             const void *codeptr_ra) {
+  printf("%" PRIu64 ": _first_tool: ompt_event_control_tool: command=%" PRIu64
+         ", modifier=%" PRIu64 ", arg=%p, codeptr_ra=%p \n",
+         ompt_get_thread_data()->value, command, modifier, arg, codeptr_ra);
+
+  // print task data
+  int task_level = 0;
+  ompt_data_t *task_data;
+  while (ompt_get_task_info(task_level, NULL, (ompt_data_t **)&task_data, NULL,
+                            NULL, NULL)) {
+    task_data = get_own_ompt_data(task_data);
+    printf("%" PRIu64 ": _first_tool: task level %d: task_id=%" PRIu64 "\n",
+           ompt_get_thread_data()->value, task_level, task_data->value);
+    task_level++;
+  }
+
+  // print parallel data
+  int parallel_level = 0;
+  ompt_data_t *parallel_data;
+  while (ompt_get_parallel_info(parallel_level, (ompt_data_t **)&parallel_data,
+                                NULL)) {
+    parallel_data = get_own_ompt_data(parallel_data);
+    printf("%" PRIu64 ": _first_tool: parallel level %d: parallel_id=%" PRIu64
+           "\n",
+           ompt_get_thread_data()->value, parallel_level, parallel_data->value);
+    parallel_level++;
+  }
+  return 0; // success
+}
+
+static ompt_get_thread_data_t ompt_cds_get_thread_data;
+ompt_data_t *ompt_get_own_thread_data() {
+  return get_own_ompt_data(ompt_cds_get_thread_data());
+}
+
+#define register_callback2_t(name, type)                                       \
+  do {                                                                         \
+    type f_##name = &on_cds_##name;                                            \
+    if (ompt_set_callback(name, (ompt_callback_t)f_##name) == ompt_set_never)  \
+      printf("0: Could not register callback '" #name "'\n");                  \
+  } while (0)
+
+#define register_callback2(name) register_callback2_t(name, name##_t)
+
+int ompt_cds_initialize(ompt_function_lookup_t lookup, int initial_device_num,
+                        ompt_data_t *tool_data) {
+  ompt_initialize(lookup, initial_device_num, tool_data);
+  ompt_cds_get_thread_data = ompt_get_thread_data;
+  ompt_get_thread_data = ompt_get_own_thread_data;
+
+  register_callback(ompt_callback_mutex_acquire);
+  register_callback_t(ompt_callback_mutex_acquired, ompt_callback_mutex_t);
+  register_callback_t(ompt_callback_mutex_released, ompt_callback_mutex_t);
+  register_callback(ompt_callback_nest_lock);
+  register_callback2(ompt_callback_sync_region);
+  register_callback2_t(ompt_callback_sync_region_wait,
+                       ompt_callback_sync_region_t);
+  register_callback2(ompt_callback_control_tool);
+  register_callback2(ompt_callback_flush);
+  register_callback2(ompt_callback_cancel);
+  register_callback2(ompt_callback_implicit_task);
+  register_callback_t(ompt_callback_lock_init, ompt_callback_mutex_acquire_t);
+  register_callback_t(ompt_callback_lock_destroy, ompt_callback_mutex_t);
+  register_callback2(ompt_callback_work);
+  register_callback2(ompt_callback_master);
+  register_callback2(ompt_callback_parallel_begin);
+  register_callback2(ompt_callback_parallel_end);
+  register_callback2(ompt_callback_task_create);
+  register_callback2(ompt_callback_task_schedule);
+  register_callback2(ompt_callback_dependences);
+  register_callback2(ompt_callback_task_dependence);
+  register_callback2(ompt_callback_thread_begin);
+  register_callback2(ompt_callback_thread_end);
+  return 1; // success
+}
+
+void ompt_cds_finalize(ompt_data_t *tool_data) {
+  printf("0: ompt_event_runtime_shutdown\n");
+}
+
+ompt_start_tool_result_t *ompt_start_tool(unsigned int omp_version,
+                                          const char *runtime_version) {
+  static ompt_start_tool_result_t ompt_start_tool_result = {
+      &ompt_cds_initialize, &ompt_cds_finalize, 0};
+  return &ompt_start_tool_result;
+}

diff  --git a/openmp/tools/multiplex/tests/custom_data_storage/second-tool.h b/openmp/tools/multiplex/tests/custom_data_storage/second-tool.h
new file mode 100644
index 000000000000..4c0f39e499db
--- /dev/null
+++ b/openmp/tools/multiplex/tests/custom_data_storage/second-tool.h
@@ -0,0 +1,5 @@
+#define CLIENT_TOOL_LIBRARIES_VAR "PRINT_EMBEDDED_TOOL_LIBRARIES"
+#include "ompt-multiplex.h"
+#define _TOOL_PREFIX " second_tool:"
+#include "callback.h"
+#undef _TOOL_PREFIX

diff  --git a/openmp/tools/multiplex/tests/lit.cfg b/openmp/tools/multiplex/tests/lit.cfg
new file mode 100644
index 000000000000..e792fc365d9d
--- /dev/null
+++ b/openmp/tools/multiplex/tests/lit.cfg
@@ -0,0 +1,92 @@
+# -*- Python -*- vim: set ft=python ts=4 sw=4 expandtab tw=79:
+# Configuration file for the 'lit' test runner.
+
+import os
+import re
+import subprocess
+import lit.formats
+
+# Tell pylint that we know config and lit_config exist somewhere.
+if 'PYLINT_IMPORT' in os.environ:
+    config = object()
+    lit_config = object()
+
+def append_dynamic_library_path(path):
+    if config.operating_system == 'Windows':
+        name = 'PATH'
+        sep = ';'
+    elif config.operating_system == 'Darwin':
+        name = 'DYLD_LIBRARY_PATH'
+        sep = ':'
+    else:
+        name = 'LD_LIBRARY_PATH'
+        sep = ':'
+    if name in config.environment:
+        config.environment[name] = path + sep + config.environment[name]
+    else:
+        config.environment[name] = path
+
+# name: The name of this test suite.
+config.name = 'OMPT multiplex'
+
+# suffixes: A list of file extensions to treat as test files.
+config.suffixes = ['.c']
+
+# test_source_root: The root path where tests are located.
+config.test_source_root = os.path.dirname(__file__)
+
+# test_exec_root: The root object directory where output is placed
+config.test_exec_root = config.test_obj_root
+
+# test format
+config.test_format = lit.formats.ShTest()
+
+# compiler flags
+config.test_flags = " -I " + config.test_source_root + "/.."\
+    " -I " + config.omp_header_dir + \
+    " -L " + config.omp_library_dir + \
+    " -I " + config.ompt_print_callback_dir + \
+    " -Wl,-rpath," + config.omp_library_dir + \
+    " " + config.test_openmp_flags
+
+# Allow XFAIL to work
+config.target_triple = [ ]
+for feature in config.test_compiler_features:
+    config.available_features.add(feature)
+
+# Setup environment to find dynamic library at runtime
+append_dynamic_library_path(config.omp_library_dir)
+append_dynamic_library_path(config.test_obj_root+"/..")
+
+# Rpath modifications for Darwin
+if config.operating_system == 'Darwin':
+    config.test_flags += " -Wl,-rpath," + config.omp_library_dir
+
+# Find the SDK on Darwin
+if config.operating_system == 'Darwin':
+  cmd = subprocess.Popen(['xcrun', '--show-sdk-path'],
+                         stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+  out, err = cmd.communicate()
+  out = out.strip()
+  res = cmd.wait()
+  if res == 0 and out:
+    config.test_flags += " -isysroot " + out
+
+if 'Linux' in config.operating_system:
+    config.available_features.add("linux")
+
+# substitutions
+config.substitutions.append(("FileCheck", "tee %%t.out | %s" % config.test_filecheck))
+config.substitutions.append(("%sort-threads", "sort --numeric-sort --stable"))
+
+config.substitutions.append(("%libomp-compile-and-run", \
+    "%libomp-compile && %libomp-run"))
+config.substitutions.append(("%libomp-compile", \
+    "%clang %cflags %s -o %t"))
+config.substitutions.append(("%libomp-tool", \
+    "%clang %cflags -shared -fPIC -g"))
+config.substitutions.append(("%libomp-run", "%t"))
+config.substitutions.append(("%clang", config.test_c_compiler))
+config.substitutions.append(("%openmp_flag", config.test_openmp_flags))
+config.substitutions.append(("%cflags", config.test_flags))
+

diff  --git a/openmp/tools/multiplex/tests/lit.site.cfg.in b/openmp/tools/multiplex/tests/lit.site.cfg.in
new file mode 100644
index 000000000000..dbe7a33291ec
--- /dev/null
+++ b/openmp/tools/multiplex/tests/lit.site.cfg.in
@@ -0,0 +1,16 @@
+ at AUTO_GEN_COMMENT@
+
+config.test_c_compiler = "@OPENMP_TEST_C_COMPILER@"
+config.test_cxx_compiler = "@OPENMP_TEST_CXX_COMPILER@"
+config.test_compiler_features = @OPENMP_TEST_COMPILER_FEATURES@
+config.test_filecheck = "@OPENMP_FILECHECK_EXECUTABLE@"
+config.test_openmp_flags = "@OPENMP_TEST_OPENMP_FLAGS@"
+config.test_extra_flags = "@OPENMP_TEST_FLAGS@"
+config.test_obj_root = "@CMAKE_CURRENT_BINARY_DIR@"
+config.omp_library_dir = "@LIBOMP_LIBRARY_DIR@"
+config.omp_header_dir = "@LIBOMP_INCLUDE_DIR@"
+config.ompt_print_callback_dir = "@OMPT_PRINT_CALLBACKS_DIR@"
+config.operating_system = "@CMAKE_SYSTEM_NAME@"
+
+# Let the main config do the real work.
+lit_config.load_config(config, "@CMAKE_CURRENT_SOURCE_DIR@/lit.cfg")

diff  --git a/openmp/tools/multiplex/tests/ompt-signal.h b/openmp/tools/multiplex/tests/ompt-signal.h
new file mode 100644
index 000000000000..9933dbfc39b1
--- /dev/null
+++ b/openmp/tools/multiplex/tests/ompt-signal.h
@@ -0,0 +1,23 @@
+// These functions are used to provide a signal-wait mechanism to enforce
+// expected scheduling for the test cases. Conditional variable (s) needs to be
+// shared! Initialize to 0
+#include <unistd.h>
+
+#define OMPT_SIGNAL(s) ompt_signal(&s)
+// inline
+void ompt_signal(int *s) {
+#pragma omp atomic
+  (*s)++;
+}
+
+#define OMPT_WAIT(s, v) ompt_wait(&s, v)
+// wait for s >= v
+// inline
+void ompt_wait(int *s, int v) {
+  int wait = 0;
+  do {
+    usleep(10);
+#pragma omp atomic read
+    wait = (*s);
+  } while (wait < v);
+}

diff  --git a/openmp/tools/multiplex/tests/print/first-tool.h b/openmp/tools/multiplex/tests/print/first-tool.h
new file mode 100644
index 000000000000..acd957264122
--- /dev/null
+++ b/openmp/tools/multiplex/tests/print/first-tool.h
@@ -0,0 +1,5 @@
+#define CLIENT_TOOL_LIBRARIES_VAR "PRINT_TOOL_LIBRARIES"
+#include "ompt-multiplex.h"
+#define _TOOL_PREFIX " _first_tool:"
+#include "callback.h"
+#undef _TOOL_PREFIX

diff  --git a/openmp/tools/multiplex/tests/print/print.c b/openmp/tools/multiplex/tests/print/print.c
new file mode 100644
index 000000000000..08acd0d88d56
--- /dev/null
+++ b/openmp/tools/multiplex/tests/print/print.c
@@ -0,0 +1,304 @@
+// RUN: %libomp-tool -DFIRST_TOOL -o %t.first.tool.so %s && \
+// RUN: %libomp-tool -DSECOND_TOOL -o %t.second.tool.so %s && \
+// RUN: %libomp-compile && \
+// RUN: env OMP_TOOL_LIBRARIES=%t.first.tool.so \
+// RUN: PRINT_TOOL_LIBRARIES=%t.second.tool.so \
+// RUN: %libomp-run | %sort-threads | FileCheck %s
+
+#if defined(FIRST_TOOL)
+#include "first-tool.h"
+#elif defined(SECOND_TOOL)
+#include "second-tool.h"
+#else /* APP */
+
+#include "../ompt-signal.h"
+#include "omp.h"
+#include <stdio.h>
+
+int main() {
+  int x, s = 0;
+#pragma omp parallel num_threads(2) shared(s)
+  {
+#pragma omp master
+    {
+#pragma omp task shared(s)
+      {
+        omp_control_tool(5, 1, NULL);
+        OMPT_SIGNAL(s);
+      }
+    }
+    if (omp_get_thread_num() == 1)
+      OMPT_WAIT(s, 1);
+  }
+  return 0;
+}
+
+// Check if libomp supports the callbacks for this test.
+// CHECK-NOT: {{^}}0: Could not register callback
+
+// CHECK: {{^}}0: NULL_POINTER=[[NULL:.*$]]
+// CHECK: {{^}}0: NULL_POINTER=[[NULL]]
+// CHECK: {{^}}0: ompt_event_runtime_shutdown
+// CHECK: {{^}}0: ompt_event_runtime_shutdown
+
+// CHECK: {{^}}[[_1ST_MSTR_TID:[0-9]+]]: _first_tool: ompt_event_thread_begin:
+// CHECK-SAME: thread_type=ompt_thread_initial=1, thread_id=[[_1ST_MSTR_TID]]
+
+// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_initial_task_begin:
+// CHECK-SAME: parallel_id=[[_FIRST_INIT_PARALLEL_ID:[0-9]+]],
+// CHECK-SAME: task_id=[[_FIRST_INITIAL_TASK_ID:[0-9]+]],
+// CHECK-SAME: actual_parallelism=1, index=1, flags=1
+
+// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_parallel_begin:
+// CHECK-SAME: parent_task_id=[[_FIRST_INITIAL_TASK_ID]],
+// CHECK-SAME: parent_task_frame.exit=(nil),
+// CHECK-SAME: parent_task_frame.reenter={{0x[0-f]+}},
+// CHECK-SAME: parallel_id=[[_FIRST_PARALLEL_ID:[0-9]+]],
+// CHECK-SAME: requested_team_size=2, codeptr_ra={{0x[0-f]+}}, invoker=2
+
+// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_implicit_task_begin:
+// CHECK-SAME: parallel_id=[[_FIRST_PARALLEL_ID]],
+// CHECK-SAME: task_id=[[_FIRST_MASTER_IMPLICIT_TASK_ID:[0-9]+]], team_size=2,
+// CHECK-SAME: thread_num=0
+
+// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_master_begin:
+// CHECK-SAME: parallel_id=[[_FIRST_PARALLEL_ID]],
+// CHECK-SAME: task_id=[[_FIRST_MASTER_IMPLICIT_TASK_ID]],
+// CHECK-SAME: codeptr_ra={{0x[0-f]+}}
+
+// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_task_create:
+// CHECK-SAME: parent_task_id=[[_FIRST_MASTER_IMPLICIT_TASK_ID]],
+// CHECK-SAME: parent_task_frame.exit={{0x[0-f]+}},
+// CHECK-SAME: parent_task_frame.reenter={{0x[0-f]+}},
+// CHECK-SAME: new_task_id=[[_FIRST_EXPLICIT_TASK_ID:[0-9]+]],
+// CHECK-SAME: codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit=4,
+// CHECK-SAME: has_dependences=no
+
+// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_master_end:
+// CHECK-SAME: parallel_id=[[_FIRST_PARALLEL_ID]],
+// CHECK-SAME: task_id=[[_FIRST_MASTER_IMPLICIT_TASK_ID]],
+// CHECK-SAME: codeptr_ra={{0x[0-f]+}}
+
+// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_barrier_begin:
+// CHECK-SAME: parallel_id=[[_FIRST_PARALLEL_ID]],
+// CHECK-SAME: task_id=[[_FIRST_MASTER_IMPLICIT_TASK_ID]],
+// CHECK-SAME: codeptr_ra={{0x[0-f]+}}
+
+// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_wait_barrier_begin:
+// CHECK-SAME: parallel_id=[[_FIRST_PARALLEL_ID]],
+// CHECK-SAME: task_id=[[_FIRST_MASTER_IMPLICIT_TASK_ID]],
+// CHECK-SAME: codeptr_ra={{0x[0-f]+}}
+
+// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_task_schedule:
+// CHECK-SAME: first_task_id=[[_FIRST_MASTER_IMPLICIT_TASK_ID]],
+// CHECK-SAME: second_task_id=[[_FIRST_EXPLICIT_TASK_ID]],
+// CHECK-SAME: prior_task_status=ompt_task_switch=7
+
+// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_control_tool:
+// CHECK-SAME: command=5, modifier=1, arg=(nil), codeptr_ra={{0x[0-f]+}}
+
+// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: task level 0:
+// CHECK-SAME: task_id=[[_FIRST_EXPLICIT_TASK_ID]]
+
+// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: task level 1:
+// CHECK-SAME: task_id=[[_FIRST_MASTER_IMPLICIT_TASK_ID]]
+
+// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: task level 2:
+// CHECK-SAME: task_id=[[_FIRST_INITIAL_TASK_ID]]
+
+// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: parallel level 0:
+// CHECK-SAME: parallel_id=[[_FIRST_PARALLEL_ID]]
+
+// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: parallel level 1:
+// CHECK-SAME: parallel_id={{[0-9]+}}
+
+// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_task_schedule:
+// CHECK-SAME: first_task_id=[[_FIRST_EXPLICIT_TASK_ID]],
+// CHECK-SAME: second_task_id=[[_FIRST_MASTER_IMPLICIT_TASK_ID]],
+// CHECK-SAME: prior_task_status=ompt_task_complete=1
+
+// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_task_end:
+// CHECK-SAME: task_id=[[_FIRST_EXPLICIT_TASK_ID]]
+
+// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_wait_barrier_end:
+// CHECK-SAME: parallel_id=0, task_id=[[_FIRST_MASTER_IMPLICIT_TASK_ID]],
+// CHECK-SAME: codeptr_ra=(nil)
+
+// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_barrier_end:
+// CHECK-SAME: parallel_id=0, task_id=[[_FIRST_MASTER_IMPLICIT_TASK_ID]],
+// CHECK-SAME: codeptr_ra=(nil)
+
+// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_implicit_task_end:
+// CHECK-SAME: parallel_id=0, task_id=[[_FIRST_MASTER_IMPLICIT_TASK_ID]],
+// CHECK-SAME: team_size=2, thread_num=0
+
+// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_parallel_end:
+// CHECK-SAME: parallel_id=[[_FIRST_PARALLEL_ID]],
+// CHECK-SAME: task_id=[[_FIRST_INITIAL_TASK_ID]], invoker=2,
+// CHECK-SAME: codeptr_ra={{0x[0-f]+}}
+
+// CHECK: {{^}}[[_1ST_MSTR_TID]]: _first_tool: ompt_event_thread_end:
+// CHECK-SAME: thread_id=[[_1ST_MSTR_TID]]
+// CHECK: {{^}}[[_2ND_MSTR_TID:[0-9]+]]: second_tool: ompt_event_thread_begin:
+// CHECK-SAME: thread_type=ompt_thread_initial=1, thread_id=[[_2ND_MSTR_TID]]
+
+// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_initial_task_begin:
+// CHECK-SAME: parallel_id=[[SECOND_INIT_PARALLEL_ID:[0-9]+]],
+// CHECK-SAME: task_id=[[SECOND_INITIAL_TASK_ID:[0-9]+]],
+// CHECK-SAME: actual_parallelism=1, index=1, flags=1
+
+// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_parallel_begin:
+// CHECK-SAME: parent_task_id=[[SECOND_INITIAL_TASK_ID]],
+// CHECK-SAME: parent_task_frame.exit=(nil),
+// CHECK-SAME: parent_task_frame.reenter={{0x[0-f]+}},
+// CHECK-SAME: parallel_id=[[SECOND_PARALLEL_ID:[0-9]+]],
+// CHECK-SAME: requested_team_size=2, codeptr_ra={{0x[0-f]+}}, invoker=2
+
+// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_implicit_task_begin:
+// CHECK-SAME: parallel_id=[[SECOND_PARALLEL_ID]],
+// CHECK-SAME: task_id=[[SECOND_MASTER_IMPLICIT_TASK_ID:[0-9]+]], team_size=2,
+// CHECK-SAME: thread_num=0
+
+// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_master_begin:
+// CHECK-SAME: parallel_id=[[SECOND_PARALLEL_ID]],
+// CHECK-SAME: task_id=[[SECOND_MASTER_IMPLICIT_TASK_ID]],
+// CHECK-SAME: codeptr_ra={{0x[0-f]+}}
+
+// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_task_create:
+// CHECK-SAME: parent_task_id=[[SECOND_MASTER_IMPLICIT_TASK_ID]],
+// CHECK-SAME: parent_task_frame.exit={{0x[0-f]+}},
+// CHECK-SAME: parent_task_frame.reenter={{0x[0-f]+}},
+// CHECK-SAME: new_task_id=[[SECOND_EXPLICIT_TASK_ID:[0-9]+]],
+// CHECK-SAME: codeptr_ra={{0x[0-f]+}}, task_type=ompt_task_explicit=4,
+// CHECK-SAME: has_dependences=no
+
+// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_master_end:
+// CHECK-SAME: parallel_id=[[SECOND_PARALLEL_ID]],
+// CHECK-SAME: task_id=[[SECOND_MASTER_IMPLICIT_TASK_ID]],
+// CHECK-SAME: codeptr_ra={{0x[0-f]+}}
+
+// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_barrier_begin:
+// CHECK-SAME: parallel_id=[[SECOND_PARALLEL_ID]],
+// CHECK-SAME: task_id=[[SECOND_MASTER_IMPLICIT_TASK_ID]],
+// CHECK-SAME: codeptr_ra={{0x[0-f]+}}
+
+// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_wait_barrier_begin:
+// CHECK-SAME: parallel_id=[[SECOND_PARALLEL_ID]],
+// CHECK-SAME: task_id=[[SECOND_MASTER_IMPLICIT_TASK_ID]],
+// CHECK-SAME: codeptr_ra={{0x[0-f]+}}
+
+// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_task_schedule:
+// CHECK-SAME: first_task_id=[[SECOND_MASTER_IMPLICIT_TASK_ID]],
+// CHECK-SAME: second_task_id=[[SECOND_EXPLICIT_TASK_ID]],
+// CHECK-SAME: prior_task_status=ompt_task_switch=7
+
+// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_control_tool:
+// CHECK-SAME: command=5, modifier=1, arg=(nil), codeptr_ra={{0x[0-f]+}}
+
+// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: task level 0:
+// CHECK-SAME: task_id=[[SECOND_EXPLICIT_TASK_ID]]
+
+// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: task level 1:
+// CHECK-SAME: task_id=[[SECOND_MASTER_IMPLICIT_TASK_ID]]
+
+// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: task level 2:
+// CHECK-SAME: task_id=[[SECOND_INITIAL_TASK_ID]]
+
+// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: parallel level 0:
+// CHECK-SAME: parallel_id=[[SECOND_PARALLEL_ID]]
+
+// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: parallel level 1:
+// CHECK-SAME: parallel_id={{[0-9]+}}
+
+// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_task_schedule:
+// CHECK-SAME: first_task_id=[[SECOND_EXPLICIT_TASK_ID]],
+// CHECK-SAME: second_task_id=[[SECOND_MASTER_IMPLICIT_TASK_ID]],
+// CHECK-SAME: prior_task_status=ompt_task_complete=1
+
+// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_task_end:
+// CHECK-SAME: task_id=[[SECOND_EXPLICIT_TASK_ID]]
+
+// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_wait_barrier_end:
+// CHECK-SAME: parallel_id=0, task_id=[[SECOND_MASTER_IMPLICIT_TASK_ID]],
+// CHECK-SAME: codeptr_ra=(nil)
+
+// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_barrier_end:
+// CHECK-SAME: parallel_id=0, task_id=[[SECOND_MASTER_IMPLICIT_TASK_ID]],
+// CHECK-SAME: codeptr_ra=(nil)
+
+// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_implicit_task_end:
+// CHECK-SAME: parallel_id=0, task_id=[[SECOND_MASTER_IMPLICIT_TASK_ID]],
+// CHECK-SAME: team_size=2, thread_num=0
+
+// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_parallel_end:
+// CHECK-SAME: parallel_id=[[SECOND_PARALLEL_ID]],
+// CHECK-SAME: task_id=[[SECOND_INITIAL_TASK_ID]], invoker=2,
+// CHECK-SAME: codeptr_ra={{0x[0-f]+}}
+
+// CHECK: {{^}}[[_2ND_MSTR_TID]]: second_tool: ompt_event_thread_end:
+// CHECK-SAME: thread_id=[[_2ND_MSTR_TID]]
+
+// CHECK: {{^}}[[_1ST_WRKR_TID:[0-9]+]]: _first_tool: ompt_event_thread_begin:
+// CHECK-SAME: thread_type=ompt_thread_worker=2, thread_id=[[_1ST_WRKR_TID]]
+
+// CHECK: {{^}}[[_1ST_WRKR_TID]]: _first_tool: ompt_event_implicit_task_begin:
+// CHECK-SAME: parallel_id=[[_FIRST_PARALLEL_ID]],
+// CHECK-SAME: task_id=[[_FIRST_WORKER_IMPLICIT_TASK_ID:[0-9]+]], team_size=2,
+// CHECK-SAME: thread_num=1
+
+// CHECK: {{^}}[[_1ST_WRKR_TID]]: _first_tool: ompt_event_barrier_begin:
+// CHECK-SAME: parallel_id=[[_FIRST_PARALLEL_ID]],
+// CHECK-SAME: task_id=[[_FIRST_WORKER_IMPLICIT_TASK_ID]], codeptr_ra=(nil)
+
+// CHECK: {{^}}[[_1ST_WRKR_TID]]: _first_tool: ompt_event_wait_barrier_begin:
+// CHECK-SAME: parallel_id=[[_FIRST_PARALLEL_ID]],
+// CHECK-SAME: task_id=[[_FIRST_WORKER_IMPLICIT_TASK_ID]], codeptr_ra=(nil)
+
+// CHECK: {{^}}[[_1ST_WRKR_TID]]: _first_tool: ompt_event_wait_barrier_end:
+// CHECK-SAME: parallel_id=0, task_id=[[_FIRST_WORKER_IMPLICIT_TASK_ID]],
+// CHECK-SAME: codeptr_ra=(nil)
+
+// CHECK: {{^}}[[_1ST_WRKR_TID]]: _first_tool: ompt_event_barrier_end:
+// CHECK-SAME: parallel_id=0, task_id=[[_FIRST_WORKER_IMPLICIT_TASK_ID]],
+// CHECK-SAME: codeptr_ra=(nil)
+
+// CHECK: {{^}}[[_1ST_WRKR_TID]]: _first_tool: ompt_event_implicit_task_end:
+// CHECK-SAME: parallel_id=0, task_id=[[_FIRST_WORKER_IMPLICIT_TASK_ID]],
+// CHECK-SAME: team_size=0, thread_num=1
+
+// CHECK: {{^}}[[_1ST_WRKR_TID]]: _first_tool: ompt_event_thread_end:
+// CHECK-SAME: thread_id=[[_1ST_WRKR_TID]]
+
+// CHECK: {{^}}[[_2ND_WRKR_TID:[0-9]+]]: second_tool: ompt_event_thread_begin:
+// CHECK-SAME: thread_type=ompt_thread_worker=2,
+// CHECK-SAME: thread_id=[[_2ND_WRKR_TID]]
+
+// CHECK: {{^}}[[_2ND_WRKR_TID]]: second_tool: ompt_event_implicit_task_begin:
+// CHECK-SAME: parallel_id=[[SECOND_PARALLEL_ID]],
+// CHECK-SAME: task_id=[[SECOND_WORKER_IMPLICIT_TASK_ID:[0-9]+]],
+// CHECK-SAME: team_size=2, thread_num=1
+
+// CHECK: {{^}}[[_2ND_WRKR_TID]]: second_tool: ompt_event_barrier_begin:
+// CHECK-SAME: parallel_id=[[SECOND_PARALLEL_ID]],
+// CHECK-SAME: task_id=[[SECOND_WORKER_IMPLICIT_TASK_ID]], codeptr_ra=(nil)
+
+// CHECK: {{^}}[[_2ND_WRKR_TID]]: second_tool: ompt_event_wait_barrier_begin:
+// CHECK-SAME: parallel_id=[[SECOND_PARALLEL_ID]],
+// CHECK-SAME: task_id=[[SECOND_WORKER_IMPLICIT_TASK_ID]], codeptr_ra=(nil)
+
+// CHECK: {{^}}[[_2ND_WRKR_TID]]: second_tool: ompt_event_wait_barrier_end:
+// CHECK-SAME: parallel_id=0, task_id=[[SECOND_WORKER_IMPLICIT_TASK_ID]],
+// CHECK-SAME: codeptr_ra=(nil)
+
+// CHECK: {{^}}[[_2ND_WRKR_TID]]: second_tool: ompt_event_barrier_end:
+// CHECK-SAME: parallel_id=0, task_id=[[SECOND_WORKER_IMPLICIT_TASK_ID]],
+// CHECK-SAME: codeptr_ra=(nil)
+
+// CHECK: {{^}}[[_2ND_WRKR_TID]]: second_tool: ompt_event_implicit_task_end:
+// CHECK-SAME: parallel_id=0, task_id=[[SECOND_WORKER_IMPLICIT_TASK_ID]],
+// CHECK-SAME: team_size=0, thread_num=1
+
+// CHECK: {{^}}[[_2ND_WRKR_TID]]: second_tool: ompt_event_thread_end:
+// CHECK-SAME: thread_id=[[_2ND_WRKR_TID]]
+
+#endif /* APP */

diff  --git a/openmp/tools/multiplex/tests/print/second-tool.h b/openmp/tools/multiplex/tests/print/second-tool.h
new file mode 100644
index 000000000000..4c0f39e499db
--- /dev/null
+++ b/openmp/tools/multiplex/tests/print/second-tool.h
@@ -0,0 +1,5 @@
+#define CLIENT_TOOL_LIBRARIES_VAR "PRINT_EMBEDDED_TOOL_LIBRARIES"
+#include "ompt-multiplex.h"
+#define _TOOL_PREFIX " second_tool:"
+#include "callback.h"
+#undef _TOOL_PREFIX


        


More information about the Openmp-commits mailing list