[Openmp-commits] [llvm] [openmp] [OMPT][Offload][OpenMP] Fixes for OMPT data used by libomptarget (reapply #156020) (PR #175165)

Kaloyan Ignatov via Openmp-commits openmp-commits at lists.llvm.org
Tue Jan 13 07:36:13 PST 2026


https://github.com/kaloyan-ignatov updated https://github.com/llvm/llvm-project/pull/175165

>From b999566290c9aab03c41be4a41f6369d3b30751a Mon Sep 17 00:00:00 2001
From: Kaloyan Ignatov <kaloyan.ignatov at rwth-aachen.de>
Date: Sat, 16 Aug 2025 01:59:45 +0200
Subject: [PATCH 1/5] [OpenMP][OMPT] Change handling of target-related tool
 data in libomp

- store target_data in ompt_task_info_t to prevent data loss across
  scheduling of target region (both for deferred and undeferred target
  tasks)
- target_task_data is already in ompt_task_info_t, replace previous
  implementation which returned the wrong value
- combine queries for target_data and target_task_data and directly
  return ompt_task_info_t
- return correct task_data, OpenMP standard defines task_data in target
  callbacks as belonging to the generating (encountering) task
---
 openmp/runtime/src/ompt-general.cpp  | 10 ++++++----
 openmp/runtime/src/ompt-internal.h   |  5 ++++-
 openmp/runtime/src/ompt-specific.cpp | 26 +++++++++++++++++++++-----
 openmp/runtime/src/ompt-specific.h   |  4 ++--
 openmp/runtime/test/ompt/callback.h  |  3 +++
 5 files changed, 36 insertions(+), 12 deletions(-)

diff --git a/openmp/runtime/src/ompt-general.cpp b/openmp/runtime/src/ompt-general.cpp
index 1a778e4ecac3a..a529f9976f9fc 100644
--- a/openmp/runtime/src/ompt-general.cpp
+++ b/openmp/runtime/src/ompt-general.cpp
@@ -886,10 +886,12 @@ static ompt_interface_fn_t ompt_fn_lookup(const char *s) {
   return NULL;
 }
 
-static ompt_data_t *ompt_get_task_data() { return __ompt_get_task_data(); }
+static ompt_data_t *ompt_get_task_data() {
+  return __ompt_get_generating_task();
+}
 
-static ompt_data_t *ompt_get_target_task_data() {
-  return __ompt_get_target_task_data();
+static ompt_task_info_t *ompt_get_task_info_target() {
+  return __ompt_get_task_info_target();
 }
 
 /// Lookup function to query libomp callbacks registered by the tool
@@ -900,7 +902,7 @@ static ompt_interface_fn_t ompt_libomp_target_fn_lookup(const char *s) {
 
   provide_fn(ompt_get_callback);
   provide_fn(ompt_get_task_data);
-  provide_fn(ompt_get_target_task_data);
+  provide_fn(ompt_get_task_info_target);
 #undef provide_fn
 
 #define ompt_interface_fn(fn, type, code)                                      \
diff --git a/openmp/runtime/src/ompt-internal.h b/openmp/runtime/src/ompt-internal.h
index 36b45f7a91ea2..6a9d6776e27e9 100644
--- a/openmp/runtime/src/ompt-internal.h
+++ b/openmp/runtime/src/ompt-internal.h
@@ -57,8 +57,11 @@ typedef struct ompt_callbacks_active_s {
       (info->td_flags.merged_if0 ? ompt_task_mergeable : 0x0)
 
 typedef struct {
-  ompt_frame_t frame;
+  // liboffload only uses task_data and target_data. They must be the first
+  // elements!
   ompt_data_t task_data;
+  ompt_data_t target_data;
+  ompt_frame_t frame;
   struct kmp_taskdata *scheduling_parent;
   int thread_num;
   ompt_dispatch_chunk_t dispatch_chunk;
diff --git a/openmp/runtime/src/ompt-specific.cpp b/openmp/runtime/src/ompt-specific.cpp
index 94ae2e5293875..fea428cb38344 100644
--- a/openmp/runtime/src/ompt-specific.cpp
+++ b/openmp/runtime/src/ompt-specific.cpp
@@ -346,14 +346,30 @@ void __ompt_lw_taskteam_unlink(kmp_info_t *thr) {
 // task support
 //----------------------------------------------------------
 
-ompt_data_t *__ompt_get_task_data() {
+ompt_data_t *__ompt_get_generating_task() {
   kmp_info_t *thr = ompt_get_thread();
-  ompt_data_t *task_data = thr ? OMPT_CUR_TASK_DATA(thr) : NULL;
-  return task_data;
+  if (thr) {
+    kmp_taskdata_t *taskdata = thr->th.th_current_task;
+    if (taskdata == NULL)
+      return NULL;
+    if (taskdata->td_flags.target)
+      return &(taskdata->td_parent->ompt_task_info.task_data);
+    else
+      return &(taskdata->ompt_task_info.task_data);
+  }
+  return NULL;
 }
 
-ompt_data_t *__ompt_get_target_task_data() {
-  return &__kmp_threads[__kmp_get_gtid()]->th.ompt_thread_info.target_task_data;
+ompt_task_info_t *__ompt_get_task_info_target() {
+  kmp_info_t *thr = ompt_get_thread();
+  if (thr) {
+    kmp_taskdata_t *taskdata = thr->th.th_current_task;
+    if (taskdata == NULL)
+      return NULL;
+    if (taskdata->td_flags.target)
+      return &taskdata->ompt_task_info;
+  }
+  return NULL;
 }
 
 int __ompt_get_task_info_internal(int ancestor_level, int *type,
diff --git a/openmp/runtime/src/ompt-specific.h b/openmp/runtime/src/ompt-specific.h
index b7eb140458b40..6dad1d76274db 100644
--- a/openmp/runtime/src/ompt-specific.h
+++ b/openmp/runtime/src/ompt-specific.h
@@ -37,9 +37,9 @@ void __ompt_lw_taskteam_unlink(kmp_info_t *thr);
 
 ompt_team_info_t *__ompt_get_teaminfo(int depth, int *size);
 
-ompt_data_t *__ompt_get_task_data();
+ompt_data_t *__ompt_get_generating_task();
 
-ompt_data_t *__ompt_get_target_task_data();
+ompt_task_info_t *__ompt_get_task_info_target();
 
 ompt_task_info_t *__ompt_get_task_info_object(int depth);
 
diff --git a/openmp/runtime/test/ompt/callback.h b/openmp/runtime/test/ompt/callback.h
index cd8acb57ee2f7..1e5283856c5b5 100644
--- a/openmp/runtime/test/ompt/callback.h
+++ b/openmp/runtime/test/ompt/callback.h
@@ -1018,6 +1018,8 @@ static void on_ompt_callback_error(ompt_severity_t severity,
          codeptr_ra);
 }
 
+#ifndef SKIP_CALLBACK_REGISTRATION
+
 int ompt_initialize(ompt_function_lookup_t lookup, int initial_device_num,
                     ompt_data_t *tool_data) {
   ompt_set_callback = (ompt_set_callback_t)lookup("ompt_set_callback");
@@ -1094,6 +1096,7 @@ ompt_start_tool_result_t *ompt_start_tool(unsigned int omp_version,
 #ifdef __cplusplus
 }
 #endif
+#endif // ifndef SKIP_CALLBACK_REGISTRATION
 #endif // ifndef USE_PRIVATE_TOOL
 #ifdef _OMPT_TESTS
 #undef _OMPT_TESTS

>From d439922f9cbddd406ef08f9c45f6acea221fffb1 Mon Sep 17 00:00:00 2001
From: Kaloyan Ignatov <kaloyan.ignatov at outlook.com>
Date: Fri, 22 Aug 2025 11:16:48 +0200
Subject: [PATCH 2/5] [Offload][OMPT] Change handling of target_data and
 target_task_data

- store target_data and target_task_data in OMP runtime
- redefine ompt_task_info_t from libomp with only first two fields -
  target_task_data and target_data
- replace get_target_task_data with ompt_get_task_info_target
- define macro for transparent handling of task_data and
  target_task_data
- provide a local ompt_task_info_t struct for merged target regions
- provide tests to ensure correct values of target_data and
  target_task_data
---
 offload/include/OpenMP/OMPT/Interface.h       |  28 ++-
 offload/libomptarget/OpenMP/OMPT/Callback.cpp |  21 +--
 offload/test/ompt/register_with_host.h        |  68 +++++++
 offload/test/ompt/target_tool_data.c          | 152 ++++++++++++++++
 offload/test/ompt/target_tool_data_nowait.c   | 171 ++++++++++++++++++
 .../ompt/target_tool_data_nowait_nodepend.c   |  59 ++++++
 offload/test/ompt/veccopy_data.c              |   1 +
 7 files changed, 479 insertions(+), 21 deletions(-)
 create mode 100644 offload/test/ompt/register_with_host.h
 create mode 100644 offload/test/ompt/target_tool_data.c
 create mode 100644 offload/test/ompt/target_tool_data_nowait.c
 create mode 100644 offload/test/ompt/target_tool_data_nowait_nodepend.c

diff --git a/offload/include/OpenMP/OMPT/Interface.h b/offload/include/OpenMP/OMPT/Interface.h
index 43fb193bc75a6..b45c953dd2edb 100644
--- a/offload/include/OpenMP/OMPT/Interface.h
+++ b/offload/include/OpenMP/OMPT/Interface.h
@@ -25,12 +25,22 @@
 
 #define OMPT_IF_BUILT(stmt) stmt
 
+#define TargetTaskData                                                         \
+  ((OmptTaskInfoPtr == &OmptTaskInfo) ? nullptr                                \
+                                      : (&(OmptTaskInfoPtr->task_data)))
+#define TargetData (OmptTaskInfoPtr->target_data)
+
+typedef struct ompt_task_info_t {
+  ompt_data_t task_data;
+  ompt_data_t target_data;
+} ompt_task_info_t;
+
 /// Callbacks for target regions require task_data representing the
 /// encountering task.
 /// Callbacks for target regions and target data ops require
 /// target_task_data representing the target task region.
 typedef ompt_data_t *(*ompt_get_task_data_t)();
-typedef ompt_data_t *(*ompt_get_target_task_data_t)();
+typedef ompt_task_info_t *(*ompt_get_task_info_target_t)();
 
 namespace llvm {
 namespace omp {
@@ -40,7 +50,7 @@ namespace ompt {
 /// Function pointers that will be used to track task_data and
 /// target_task_data.
 static ompt_get_task_data_t ompt_get_task_data_fn;
-static ompt_get_target_task_data_t ompt_get_target_task_data_fn;
+static ompt_get_task_info_target_t ompt_get_task_info_target_fn;
 
 /// Used to maintain execution state for this thread
 class Interface {
@@ -216,16 +226,16 @@ class Interface {
 
 private:
   /// Target operations id
-  ompt_id_t HostOpId = 0;
-
-  /// Target region data
-  ompt_data_t TargetData = ompt_data_none;
+  ompt_id_t HostOpId{0};
 
   /// Task data representing the encountering task
-  ompt_data_t *TaskData = nullptr;
+  ompt_data_t *TaskData{nullptr};
+
+  /// TaskInfo contains target_data and task_data
+  ompt_task_info_t OmptTaskInfo{ompt_data_none, ompt_data_none};
 
-  /// Target task data representing the target task region
-  ompt_data_t *TargetTaskData = nullptr;
+  /// Ptr to TaskInfo in OpenMP runtime in case of deferred target tasks
+  ompt_task_info_t *OmptTaskInfoPtr{&OmptTaskInfo};
 
   /// Used for marking begin of a data operation
   void beginTargetDataOperation();
diff --git a/offload/libomptarget/OpenMP/OMPT/Callback.cpp b/offload/libomptarget/OpenMP/OMPT/Callback.cpp
index 99c8a122c81f4..86eab7806aef0 100644
--- a/offload/libomptarget/OpenMP/OMPT/Callback.cpp
+++ b/offload/libomptarget/OpenMP/OMPT/Callback.cpp
@@ -51,8 +51,8 @@ bool llvm::omp::target::ompt::Initialized = false;
 
 ompt_get_callback_t llvm::omp::target::ompt::lookupCallbackByCode = nullptr;
 ompt_function_lookup_t llvm::omp::target::ompt::lookupCallbackByName = nullptr;
-ompt_get_target_task_data_t ompt_get_target_task_data_fn = nullptr;
 ompt_get_task_data_t ompt_get_task_data_fn = nullptr;
+ompt_get_task_info_target_t ompt_get_task_info_target_fn = nullptr;
 
 /// Unique correlation id
 static std::atomic<uint64_t> IdCounter(1);
@@ -424,18 +424,17 @@ void Interface::beginTargetRegion() {
   // Set up task state
   assert(ompt_get_task_data_fn && "Calling a null task data function");
   TaskData = ompt_get_task_data_fn();
-  // Set up target task state
-  assert(ompt_get_target_task_data_fn &&
-         "Calling a null target task data function");
-  TargetTaskData = ompt_get_target_task_data_fn();
-  // Target state will be set later
-  TargetData = ompt_data_none;
+  // Set up target task and target state
+  assert(ompt_get_task_info_target_fn &&
+         "Calling a null target task info function");
+  if (ompt_task_info_t *TempTaskInfo = ompt_get_task_info_target_fn())
+    OmptTaskInfoPtr = TempTaskInfo;
 }
 
 void Interface::endTargetRegion() {
   TaskData = 0;
-  TargetTaskData = 0;
-  TargetData = ompt_data_none;
+  OmptTaskInfo = {ompt_data_none, ompt_data_none};
+  OmptTaskInfoPtr = &OmptTaskInfo;
 }
 
 /// Used to maintain the finalization functions that are received
@@ -474,7 +473,7 @@ int llvm::omp::target::ompt::initializeLibrary(ompt_function_lookup_t lookup,
 
   bindOmptFunctionName(ompt_get_callback, lookupCallbackByCode);
   bindOmptFunctionName(ompt_get_task_data, ompt_get_task_data_fn);
-  bindOmptFunctionName(ompt_get_target_task_data, ompt_get_target_task_data_fn);
+  bindOmptFunctionName(ompt_get_task_info_target, ompt_get_task_info_target_fn);
 #undef bindOmptFunctionName
 
   // Store pointer of 'ompt_libomp_target_fn_lookup' for use by libomptarget
@@ -483,8 +482,6 @@ int llvm::omp::target::ompt::initializeLibrary(ompt_function_lookup_t lookup,
   assert(lookupCallbackByCode && "lookupCallbackByCode should be non-null");
   assert(lookupCallbackByName && "lookupCallbackByName should be non-null");
   assert(ompt_get_task_data_fn && "ompt_get_task_data_fn should be non-null");
-  assert(ompt_get_target_task_data_fn &&
-         "ompt_get_target_task_data_fn should be non-null");
   assert(LibraryFinalizer == nullptr &&
          "LibraryFinalizer should not be initialized yet");
 
diff --git a/offload/test/ompt/register_with_host.h b/offload/test/ompt/register_with_host.h
new file mode 100644
index 0000000000000..5e97f2c0b751a
--- /dev/null
+++ b/offload/test/ompt/register_with_host.h
@@ -0,0 +1,68 @@
+#define SKIP_CALLBACK_REGISTRATION 1
+
+#include "../../../openmp/runtime/test/ompt/callback.h"
+#include "callbacks.h"
+#include <omp-tools.h>
+
+// From openmp/runtime/test/ompt/callback.h
+#define register_ompt_callback_t(name, type)                                   \
+  do {                                                                         \
+    type f_##name = &on_##name;                                                \
+    if (ompt_set_callback(name, (ompt_callback_t)f_##name) == ompt_set_never)  \
+      printf("0: Could not register callback '" #name "'\n");                  \
+  } while (0)
+
+#define register_ompt_callback(name) register_ompt_callback_t(name, name##_t)
+
+// Init functions
+int ompt_initialize(ompt_function_lookup_t lookup, int initial_device_num,
+                    ompt_data_t *tool_data) {
+  ompt_set_callback = (ompt_set_callback_t)lookup("ompt_set_callback");
+
+  if (!ompt_set_callback)
+    return 0; // failed
+
+  // host runtime functions
+  ompt_get_unique_id = (ompt_get_unique_id_t)lookup("ompt_get_unique_id");
+  ompt_get_thread_data = (ompt_get_thread_data_t)lookup("ompt_get_thread_data");
+  ompt_get_task_info = (ompt_get_task_info_t)lookup("ompt_get_task_info");
+
+  ompt_get_unique_id();
+
+  // host callbacks
+  register_ompt_callback(ompt_callback_sync_region);
+  register_ompt_callback_t(ompt_callback_sync_region_wait,
+                           ompt_callback_sync_region_t);
+  register_ompt_callback_t(ompt_callback_reduction,
+                           ompt_callback_sync_region_t);
+  register_ompt_callback(ompt_callback_implicit_task);
+  register_ompt_callback(ompt_callback_parallel_begin);
+  register_ompt_callback(ompt_callback_parallel_end);
+  register_ompt_callback(ompt_callback_task_create);
+  register_ompt_callback(ompt_callback_task_schedule);
+
+  // device callbacks
+  register_ompt_callback(ompt_callback_device_initialize);
+  register_ompt_callback(ompt_callback_device_finalize);
+  register_ompt_callback(ompt_callback_device_load);
+  register_ompt_callback(ompt_callback_target_data_op_emi);
+  register_ompt_callback(ompt_callback_target_emi);
+  register_ompt_callback(ompt_callback_target_submit_emi);
+
+  return 1; // success
+}
+
+void ompt_finalize(ompt_data_t *tool_data) {}
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+ompt_start_tool_result_t *ompt_start_tool(unsigned int omp_version,
+                                          const char *runtime_version) {
+  static ompt_start_tool_result_t ompt_start_tool_result = {&ompt_initialize,
+                                                            &ompt_finalize, 0};
+  return &ompt_start_tool_result;
+}
+#ifdef __cplusplus
+}
+#endif
diff --git a/offload/test/ompt/target_tool_data.c b/offload/test/ompt/target_tool_data.c
new file mode 100644
index 0000000000000..7407e106700ba
--- /dev/null
+++ b/offload/test/ompt/target_tool_data.c
@@ -0,0 +1,152 @@
+// clang-format off
+// RUN: env LIBOMP_NUM_HIDDEN_HELPER_THREADS=1 %libomptarget-compile-run-and-check-generic
+// REQUIRES: ompt
+// clang-format on
+
+#include <inttypes.h>
+#include <omp-tools.h>
+#include <omp.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "register_with_host.h"
+
+#define N 1000000
+#define M 1000
+
+int main() {
+  float *x = malloc(N * sizeof(float));
+  float *y = malloc(N * sizeof(float));
+
+  for (int i = 0; i < N; i++) {
+    x[i] = 1;
+    y[i] = 1;
+  }
+
+#pragma omp target enter data map(to : x[0 : N]) map(alloc : y[0 : N])
+#pragma omp target teams distribute parallel for
+  for (int i = 0; i < N; i++) {
+    for (int j = 0; j < M; j++) {
+      y[i] += 3 * x[i];
+    }
+  }
+
+#pragma omp target teams distribute parallel for
+  for (int i = 0; i < N; i++) {
+    for (int j = 0; j < M; j++) {
+      y[i] += 3 * x[i];
+    }
+  }
+
+#pragma omp target exit data map(release : x[0 : N]) map(from : y[0 : N])
+
+  printf("%f, %f\n", x[0], y[0]);
+
+  free(x);
+  free(y);
+  return 0;
+}
+
+// clang-format off
+/// CHECK: ompt_event_initial_task_begin
+/// CHECK-SAME: task_id=[[ENCOUNTERING_TASK:[0-f]+]]
+
+/// CHECK: Callback Target EMI: kind=ompt_target_enter_data endpoint=ompt_scope_begin
+/// CHECK-SAME: task_data=0x{{[0-f]+}} (0x[[ENCOUNTERING_TASK]])
+/// CHECK-SAME: target_task_data=(nil) (0x0)
+/// CHECK-SAME: target_data=0x{{[0-f]+}} (0x[[TARGET_DATA:[0-f]+]])
+
+/// CHECK: Callback DataOp EMI: endpoint=ompt_scope_begin
+/// CHECK-SAME: target_task_data=(nil) (0x0)
+/// CHECK-SAME: target_data=0x{{[0-f]+}} (0x[[TARGET_DATA]])
+
+/// CHECK: Callback DataOp EMI: endpoint=ompt_scope_end
+/// CHECK-SAME: target_task_data=(nil) (0x0)
+/// CHECK-SAME: target_data=0x{{[0-f]+}} (0x[[TARGET_DATA]])
+
+/// CHECK: Callback DataOp EMI: endpoint=ompt_scope_begin
+/// CHECK-SAME: target_task_data=(nil) (0x0)
+/// CHECK-SAME: target_data=0x{{[0-f]+}} (0x[[TARGET_DATA]])
+
+/// CHECK: Callback DataOp EMI: endpoint=ompt_scope_end
+/// CHECK-SAME: target_task_data=(nil) (0x0)
+/// CHECK-SAME: target_data=0x{{[0-f]+}} (0x[[TARGET_DATA]])
+
+/// CHECK: Callback DataOp EMI: endpoint=ompt_scope_begin
+/// CHECK-SAME: target_task_data=(nil) (0x0)
+/// CHECK-SAME: target_data=0x{{[0-f]+}} (0x[[TARGET_DATA]])
+
+/// CHECK: Callback DataOp EMI: endpoint=ompt_scope_end
+/// CHECK-SAME: target_task_data=(nil) (0x0)
+/// CHECK-SAME: target_data=0x{{[0-f]+}} (0x[[TARGET_DATA]])
+
+/// CHECK: Callback Target EMI: kind=ompt_target_enter_data endpoint=ompt_scope_end
+/// CHECK-SAME: task_data=0x{{[0-f]+}} (0x[[ENCOUNTERING_TASK]])
+/// CHECK-SAME: target_task_data=(nil) (0x0)
+/// CHECK-SAME: target_data=0x{{[0-f]+}} (0x[[TARGET_DATA]])
+
+/// CHECK: Callback Target EMI: kind=ompt_target endpoint=ompt_scope_begin
+/// CHECK-SAME: task_data=0x{{[0-f]+}} (0x[[ENCOUNTERING_TASK]])
+/// CHECK-SAME: target_task_data=(nil) (0x0)
+/// CHECK-SAME: target_data=0x{{[0-f]+}} (0x[[TARGET_DATA:[0-f]+]])
+
+/// CHECK: Callback Submit EMI: endpoint=ompt_scope_begin
+/// CHECK-SAME: target_data=0x{{[0-f]+}} (0x[[TARGET_DATA]])
+
+/// CHECK: Callback Submit EMI: endpoint=ompt_scope_end
+/// CHECK-SAME: target_data=0x{{[0-f]+}} (0x[[TARGET_DATA]])
+
+/// CHECK: Callback Target EMI: kind=ompt_target endpoint=ompt_scope_end
+/// CHECK-SAME: task_data=0x{{[0-f]+}} (0x[[ENCOUNTERING_TASK]])
+/// CHECK-SAME: target_task_data=(nil) (0x0)
+/// CHECK-SAME: target_data=0x{{[0-f]+}} (0x[[TARGET_DATA]])
+
+/// CHECK: Callback Target EMI: kind=ompt_target endpoint=ompt_scope_begin
+/// CHECK-SAME: task_data=0x{{[0-f]+}} (0x[[ENCOUNTERING_TASK]])
+/// CHECK-SAME: target_task_data=(nil) (0x0)
+/// CHECK-SAME: target_data=0x{{[0-f]+}} (0x[[TARGET_DATA:[0-f]+]])
+
+/// CHECK: Callback Submit EMI: endpoint=ompt_scope_begin
+/// CHECK-SAME: target_data=0x{{[0-f]+}} (0x[[TARGET_DATA]])
+
+/// CHECK: Callback Submit EMI: endpoint=ompt_scope_end
+/// CHECK-SAME: target_data=0x{{[0-f]+}} (0x[[TARGET_DATA]])
+
+/// CHECK: Callback Target EMI: kind=ompt_target endpoint=ompt_scope_end
+/// CHECK-SAME: task_data=0x{{[0-f]+}} (0x[[ENCOUNTERING_TASK]])
+/// CHECK-SAME: target_task_data=(nil) (0x0)
+/// CHECK-SAME: target_data=0x{{[0-f]+}} (0x[[TARGET_DATA]])
+
+/// CHECK: Callback Target EMI: kind=ompt_target_exit_data endpoint=ompt_scope_begin
+/// CHECK-SAME: task_data=0x{{[0-f]+}} (0x[[ENCOUNTERING_TASK]])
+/// CHECK-SAME: target_task_data=(nil) (0x0)
+/// CHECK-SAME: target_data=0x{{[0-f]+}} (0x[[TARGET_DATA:[0-f]+]])
+
+/// CHECK: Callback DataOp EMI: endpoint=ompt_scope_begin
+/// CHECK-SAME: target_task_data=(nil) (0x0)
+/// CHECK-SAME: target_data=0x{{[0-f]+}} (0x[[TARGET_DATA]])
+
+/// CHECK: Callback DataOp EMI: endpoint=ompt_scope_end
+/// CHECK-SAME: target_task_data=(nil) (0x0)
+/// CHECK-SAME: target_data=0x{{[0-f]+}} (0x[[TARGET_DATA]])
+
+/// CHECK: Callback DataOp EMI: endpoint=ompt_scope_begin
+/// CHECK-SAME: target_task_data=(nil) (0x0)
+/// CHECK-SAME: target_data=0x{{[0-f]+}} (0x[[TARGET_DATA]])
+
+/// CHECK: Callback DataOp EMI: endpoint=ompt_scope_end
+/// CHECK-SAME: target_task_data=(nil) (0x0)
+/// CHECK-SAME: target_data=0x{{[0-f]+}} (0x[[TARGET_DATA]])
+
+/// CHECK: Callback DataOp EMI: endpoint=ompt_scope_begin
+/// CHECK-SAME: target_task_data=(nil) (0x0)
+/// CHECK-SAME: target_data=0x{{[0-f]+}} (0x[[TARGET_DATA]])
+
+/// CHECK: Callback DataOp EMI: endpoint=ompt_scope_end
+/// CHECK-SAME: target_task_data=(nil) (0x0)
+/// CHECK-SAME: target_data=0x{{[0-f]+}} (0x[[TARGET_DATA]])
+
+/// CHECK: Callback Target EMI: kind=ompt_target_exit_data endpoint=ompt_scope_end
+/// CHECK-SAME: task_data=0x{{[0-f]+}} (0x[[ENCOUNTERING_TASK]])
+/// CHECK-SAME: target_task_data=(nil) (0x0)
+/// CHECK-SAME: target_data=0x{{[0-f]+}} (0x[[TARGET_DATA]])
diff --git a/offload/test/ompt/target_tool_data_nowait.c b/offload/test/ompt/target_tool_data_nowait.c
new file mode 100644
index 0000000000000..c5e20bec1bd55
--- /dev/null
+++ b/offload/test/ompt/target_tool_data_nowait.c
@@ -0,0 +1,171 @@
+// clang-format off
+// RUN: env LIBOMP_NUM_HIDDEN_HELPER_THREADS=1 %libomptarget-compile-run-and-check-generic
+// REQUIRES: ompt
+// clang-format on
+
+#include <inttypes.h>
+#include <omp-tools.h>
+#include <omp.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "register_with_host.h"
+
+#define N 1000000
+#define M 1000
+
+int main() {
+  float *x = malloc(N * sizeof(float));
+  float *y = malloc(N * sizeof(float));
+
+  for (int i = 0; i < N; i++) {
+    x[i] = 1;
+    y[i] = 1;
+  }
+
+#pragma omp target enter data map(to : x[0 : N]) map(alloc : y[0 : N])         \
+    nowait depend(inout : x)
+#pragma omp target teams distribute parallel for nowait depend(inout : x)
+  for (int i = 0; i < N; i++) {
+    for (int j = 0; j < M; j++) {
+      y[i] += 3 * x[i];
+    }
+  }
+
+#pragma omp target teams distribute parallel for nowait depend(inout : x)
+  for (int i = 0; i < N; i++) {
+    for (int j = 0; j < M; j++) {
+      y[i] += 3 * x[i];
+    }
+  }
+
+#pragma omp target exit data map(release : x[0 : N]) map(from : y[0 : N])      \
+    nowait depend(inout : x)
+#pragma omp taskwait
+
+  printf("%f, %f\n", x[0], y[0]);
+
+  free(x);
+  free(y);
+  return 0;
+}
+
+// clang-format off
+/// CHECK: ompt_event_initial_task_begin
+/// CHECK-SAME: task_id=[[ENCOUNTERING_TASK:[0-f]+]]
+
+/// CHECK: ompt_event_task_create
+/// CHECK-SAME: new_task_id=[[TARGET_TASK_1:[0-f]+]]
+/// CHECK-SAME: task_type=ompt_task_target
+
+/// CHECK: ompt_event_task_create
+/// CHECK-SAME: new_task_id=[[TARGET_TASK_2:[0-f]+]]
+/// CHECK-SAME: task_type=ompt_task_target
+
+/// CHECK: ompt_event_task_create
+/// CHECK-SAME: new_task_id=[[TARGET_TASK_3:[0-f]+]]
+/// CHECK-SAME: task_type=ompt_task_target
+
+/// CHECK: ompt_event_task_create
+/// CHECK-SAME: new_task_id=[[TARGET_TASK_4:[0-f]+]]
+/// CHECK-SAME: task_type=ompt_task_target
+
+/// CHECK: Callback Target EMI: kind=ompt_target_enter_data endpoint=ompt_scope_begin
+/// CHECK-SAME: task_data=0x{{[0-f]+}} (0x[[ENCOUNTERING_TASK]])
+/// CHECK-SAME: target_task_data=0x{{[0-f]+}} (0x[[TARGET_TASK_1]])
+/// CHECK-SAME: target_data=0x{{[0-f]+}} (0x[[TARGET_DATA:[0-f]+]])
+
+/// CHECK: Callback DataOp EMI: endpoint=ompt_scope_begin
+/// CHECK-SAME: target_task_data=0x{{[0-f]+}} (0x[[TARGET_TASK_1]])
+/// CHECK-SAME: target_data=0x{{[0-f]+}} (0x[[TARGET_DATA]])
+
+/// CHECK: Callback DataOp EMI: endpoint=ompt_scope_end
+/// CHECK-SAME: target_task_data=0x{{[0-f]+}} (0x[[TARGET_TASK_1]])
+/// CHECK-SAME: target_data=0x{{[0-f]+}} (0x[[TARGET_DATA]])
+
+/// CHECK: Callback DataOp EMI: endpoint=ompt_scope_begin
+/// CHECK-SAME: target_task_data=0x{{[0-f]+}} (0x[[TARGET_TASK_1]])
+/// CHECK-SAME: target_data=0x{{[0-f]+}} (0x[[TARGET_DATA]])
+
+/// CHECK: Callback DataOp EMI: endpoint=ompt_scope_end
+/// CHECK-SAME: target_task_data=0x{{[0-f]+}} (0x[[TARGET_TASK_1]])
+/// CHECK-SAME: target_data=0x{{[0-f]+}} (0x[[TARGET_DATA]])
+
+/// CHECK: Callback DataOp EMI: endpoint=ompt_scope_begin
+/// CHECK-SAME: target_task_data=0x{{[0-f]+}} (0x[[TARGET_TASK_1]])
+/// CHECK-SAME: target_data=0x{{[0-f]+}} (0x[[TARGET_DATA]])
+
+/// CHECK: Callback DataOp EMI: endpoint=ompt_scope_end
+/// CHECK-SAME: target_task_data=0x{{[0-f]+}} (0x[[TARGET_TASK_1]])
+/// CHECK-SAME: target_data=0x{{[0-f]+}} (0x[[TARGET_DATA]])
+
+/// CHECK: Callback Target EMI: kind=ompt_target_enter_data endpoint=ompt_scope_end
+/// CHECK-SAME: task_data=0x{{[0-f]+}} (0x[[ENCOUNTERING_TASK]])
+/// CHECK-SAME: target_task_data=0x{{[0-f]+}} (0x[[TARGET_TASK_1]])
+/// CHECK-SAME: target_data=0x{{[0-f]+}} (0x[[TARGET_DATA]])
+
+/// CHECK: Callback Target EMI: kind=ompt_target endpoint=ompt_scope_begin
+/// CHECK-SAME: task_data=0x{{[0-f]+}} (0x[[ENCOUNTERING_TASK]])
+/// CHECK-SAME: target_task_data=0x{{[0-f]+}} (0x[[TARGET_TASK_2]])
+/// CHECK-SAME: target_data=0x{{[0-f]+}} (0x[[TARGET_DATA:[0-f]+]])
+
+/// CHECK: Callback Submit EMI: endpoint=ompt_scope_begin
+/// CHECK-SAME: target_data=0x{{[0-f]+}} (0x[[TARGET_DATA]])
+
+/// CHECK: Callback Submit EMI: endpoint=ompt_scope_end
+/// CHECK-SAME: target_data=0x{{[0-f]+}} (0x[[TARGET_DATA]])
+
+/// CHECK: Callback Target EMI: kind=ompt_target endpoint=ompt_scope_end
+/// CHECK-SAME: task_data=0x{{[0-f]+}} (0x[[ENCOUNTERING_TASK]])
+/// CHECK-SAME: target_task_data=0x{{[0-f]+}} (0x[[TARGET_TASK_2]])
+/// CHECK-SAME: target_data=0x{{[0-f]+}} (0x[[TARGET_DATA]])
+
+/// CHECK: Callback Target EMI: kind=ompt_target endpoint=ompt_scope_begin
+/// CHECK-SAME: task_data=0x{{[0-f]+}} (0x[[ENCOUNTERING_TASK]])
+/// CHECK-SAME: target_task_data=0x{{[0-f]+}} (0x[[TARGET_TASK_3]])
+/// CHECK-SAME: target_data=0x{{[0-f]+}} (0x[[TARGET_DATA:[0-f]+]])
+
+/// CHECK: Callback Submit EMI: endpoint=ompt_scope_begin
+/// CHECK-SAME: target_data=0x{{[0-f]+}} (0x[[TARGET_DATA]])
+
+/// CHECK: Callback Submit EMI: endpoint=ompt_scope_end
+/// CHECK-SAME: target_data=0x{{[0-f]+}} (0x[[TARGET_DATA]])
+
+/// CHECK: Callback Target EMI: kind=ompt_target endpoint=ompt_scope_end
+/// CHECK-SAME: task_data=0x{{[0-f]+}} (0x[[ENCOUNTERING_TASK]])
+/// CHECK-SAME: target_task_data=0x{{[0-f]+}} (0x[[TARGET_TASK_3]])
+/// CHECK-SAME: target_data=0x{{[0-f]+}} (0x[[TARGET_DATA]])
+
+/// CHECK: Callback Target EMI: kind=ompt_target_exit_data endpoint=ompt_scope_begin
+/// CHECK-SAME: task_data=0x{{[0-f]+}} (0x[[ENCOUNTERING_TASK]])
+/// CHECK-SAME: target_task_data=0x{{[0-f]+}} (0x[[TARGET_TASK_4]])
+/// CHECK-SAME: target_data=0x{{[0-f]+}} (0x[[TARGET_DATA:[0-f]+]])
+
+/// CHECK: Callback DataOp EMI: endpoint=ompt_scope_begin
+/// CHECK-SAME: target_task_data=0x{{[0-f]+}} (0x[[TARGET_TASK_4]])
+/// CHECK-SAME: target_data=0x{{[0-f]+}} (0x[[TARGET_DATA]])
+
+/// CHECK: Callback DataOp EMI: endpoint=ompt_scope_end
+/// CHECK-SAME: target_task_data=0x{{[0-f]+}} (0x[[TARGET_TASK_4]])
+/// CHECK-SAME: target_data=0x{{[0-f]+}} (0x[[TARGET_DATA]])
+
+/// CHECK: Callback DataOp EMI: endpoint=ompt_scope_begin
+/// CHECK-SAME: target_task_data=0x{{[0-f]+}} (0x[[TARGET_TASK_4]])
+/// CHECK-SAME: target_data=0x{{[0-f]+}} (0x[[TARGET_DATA]])
+
+/// CHECK: Callback DataOp EMI: endpoint=ompt_scope_end
+/// CHECK-SAME: target_task_data=0x{{[0-f]+}} (0x[[TARGET_TASK_4]])
+/// CHECK-SAME: target_data=0x{{[0-f]+}} (0x[[TARGET_DATA]])
+
+/// CHECK: Callback DataOp EMI: endpoint=ompt_scope_begin
+/// CHECK-SAME: target_task_data=0x{{[0-f]+}} (0x[[TARGET_TASK_4]])
+/// CHECK-SAME: target_data=0x{{[0-f]+}} (0x[[TARGET_DATA]])
+
+/// CHECK: Callback DataOp EMI: endpoint=ompt_scope_end
+/// CHECK-SAME: target_task_data=0x{{[0-f]+}} (0x[[TARGET_TASK_4]])
+/// CHECK-SAME: target_data=0x{{[0-f]+}} (0x[[TARGET_DATA]])
+
+/// CHECK: Callback Target EMI: kind=ompt_target_exit_data endpoint=ompt_scope_end
+/// CHECK-SAME: task_data=0x{{[0-f]+}} (0x[[ENCOUNTERING_TASK]])
+/// CHECK-SAME: target_task_data=0x{{[0-f]+}} (0x[[TARGET_TASK_4]])
+/// CHECK-SAME: target_data=0x{{[0-f]+}} (0x[[TARGET_DATA]])
diff --git a/offload/test/ompt/target_tool_data_nowait_nodepend.c b/offload/test/ompt/target_tool_data_nowait_nodepend.c
new file mode 100644
index 0000000000000..b28184bc304db
--- /dev/null
+++ b/offload/test/ompt/target_tool_data_nowait_nodepend.c
@@ -0,0 +1,59 @@
+// clang-format off
+// RUN: env LIBOMP_NUM_HIDDEN_HELPER_THREADS=1 %libomptarget-compile-run-and-check-generic
+// REQUIRES: ompt
+// clang-format on
+
+#include <inttypes.h>
+#include <omp-tools.h>
+#include <omp.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "register_with_host.h"
+
+#define N 1000000
+#define M 1000
+
+int main() {
+  float *x = malloc(N * sizeof(float));
+  float *y = malloc(N * sizeof(float));
+  float *a = malloc(N * sizeof(float));
+  float *b = malloc(N * sizeof(float));
+
+  for (int i = 0; i < N; i++) {
+    x[i] = 1;
+    y[i] = 1;
+    a[i] = 1;
+    b[i] = 1;
+  }
+
+#pragma omp target teams distribute parallel for nowait map(to : x[0 : N])     \
+    map(from : y[0 : N])
+  for (int i = 0; i < N; i++) {
+    for (int j = 0; j < M; j++) {
+      y[i] += 3 * x[i];
+    }
+  }
+
+#pragma omp target teams distribute parallel for nowait map(to : a[0 : N])     \
+    map(from : b[0 : N])
+  for (int i = 0; i < N; i++) {
+    for (int j = 0; j < M; j++) {
+      b[i] += 3 * a[i];
+    }
+  }
+
+#pragma omp taskwait
+
+  printf("%f, %f, %f, %f\n", x[0], y[0], a[0], b[0]);
+
+  free(x);
+  free(y);
+  free(a);
+  free(b);
+  return 0;
+}
+
+// clang-format off
+/// CHECK-NOT: target_task_data=(nil) (0x0)
+/// CHECK-NOT: target_data=(nil) (0x0)
diff --git a/offload/test/ompt/veccopy_data.c b/offload/test/ompt/veccopy_data.c
index 2e470613c3c69..e4fac931bba2c 100644
--- a/offload/test/ompt/veccopy_data.c
+++ b/offload/test/ompt/veccopy_data.c
@@ -4,6 +4,7 @@
 // REQUIRES: gpu
 // XFAIL: intelgpu
 // clang-format on
+// clang-format on
 
 /*
  * Example OpenMP program that registers EMI callbacks.

>From 1e4084c2ab16848a8aacf68d62044d473f2abc60 Mon Sep 17 00:00:00 2001
From: Kaloyan Ignatov <kaloyan.ignatov at outlook.com>
Date: Fri, 31 Oct 2025 16:31:56 +0100
Subject: [PATCH 3/5] added comment about omp_task_info_t struct

---
 offload/include/OpenMP/OMPT/Interface.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/offload/include/OpenMP/OMPT/Interface.h b/offload/include/OpenMP/OMPT/Interface.h
index b45c953dd2edb..5fc853a2aa662 100644
--- a/offload/include/OpenMP/OMPT/Interface.h
+++ b/offload/include/OpenMP/OMPT/Interface.h
@@ -30,6 +30,7 @@
                                       : (&(OmptTaskInfoPtr->task_data)))
 #define TargetData (OmptTaskInfoPtr->target_data)
 
+/// Prefix of ompt_task_info_t struct from libomp
 typedef struct ompt_task_info_t {
   ompt_data_t task_data;
   ompt_data_t target_data;

>From 65a20c72b8282feaf45fe4e908b6884e6760e5e3 Mon Sep 17 00:00:00 2001
From: Kaloyan Ignatov <kaloyan.ignatov at outlook.com>
Date: Thu, 13 Nov 2025 12:44:51 +0100
Subject: [PATCH 4/5] added comment to taskinfo pointer from libomp

---
 offload/libomptarget/OpenMP/OMPT/Callback.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/offload/libomptarget/OpenMP/OMPT/Callback.cpp b/offload/libomptarget/OpenMP/OMPT/Callback.cpp
index 86eab7806aef0..9d30f94412793 100644
--- a/offload/libomptarget/OpenMP/OMPT/Callback.cpp
+++ b/offload/libomptarget/OpenMP/OMPT/Callback.cpp
@@ -427,8 +427,9 @@ void Interface::beginTargetRegion() {
   // Set up target task and target state
   assert(ompt_get_task_info_target_fn &&
          "Calling a null target task info function");
-  if (ompt_task_info_t *TempTaskInfo = ompt_get_task_info_target_fn())
-    OmptTaskInfoPtr = TempTaskInfo;
+  // In case of deferred target tasks, use pointer from libomp
+  if (ompt_task_info_t *TempTaskInfoPtr = ompt_get_task_info_target_fn())
+    OmptTaskInfoPtr = TempTaskInfoPtr;
 }
 
 void Interface::endTargetRegion() {

>From 7f1e4d4c8065f4c03f28e7e6d3b0a1bd272b0a3b Mon Sep 17 00:00:00 2001
From: Kaloyan Ignatov <kaloyan.ignatov at outlook.com>
Date: Fri, 19 Dec 2025 13:44:37 +0100
Subject: [PATCH 5/5] fix -Wglobal-constructor

---
 offload/include/OpenMP/OMPT/Interface.h       | 2 +-
 offload/libomptarget/OpenMP/OMPT/Callback.cpp | 6 +++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/offload/include/OpenMP/OMPT/Interface.h b/offload/include/OpenMP/OMPT/Interface.h
index 5fc853a2aa662..e1d4d47224614 100644
--- a/offload/include/OpenMP/OMPT/Interface.h
+++ b/offload/include/OpenMP/OMPT/Interface.h
@@ -236,7 +236,7 @@ class Interface {
   ompt_task_info_t OmptTaskInfo{ompt_data_none, ompt_data_none};
 
   /// Ptr to TaskInfo in OpenMP runtime in case of deferred target tasks
-  ompt_task_info_t *OmptTaskInfoPtr{&OmptTaskInfo};
+  ompt_task_info_t *OmptTaskInfoPtr{nullptr};
 
   /// Used for marking begin of a data operation
   void beginTargetDataOperation();
diff --git a/offload/libomptarget/OpenMP/OMPT/Callback.cpp b/offload/libomptarget/OpenMP/OMPT/Callback.cpp
index 9d30f94412793..00a3a04199871 100644
--- a/offload/libomptarget/OpenMP/OMPT/Callback.cpp
+++ b/offload/libomptarget/OpenMP/OMPT/Callback.cpp
@@ -411,6 +411,7 @@ void Interface::endTarget(int64_t DeviceId, void *Code) {
 }
 
 void Interface::beginTargetDataOperation() {
+  OmptTaskInfoPtr = &OmptTaskInfo;
   ODBG(ODT_Tool) << "in ompt_target_region_begin (TargetRegionId = "
                  << TargetData.value << ")";
 }
@@ -418,6 +419,7 @@ void Interface::beginTargetDataOperation() {
 void Interface::endTargetDataOperation() {
   ODBG(ODT_Tool) << "in ompt_target_region_end (TargetRegionId = "
                  << TargetData.value << ")";
+  OmptTaskInfoPtr = nullptr;
 }
 
 void Interface::beginTargetRegion() {
@@ -430,12 +432,14 @@ void Interface::beginTargetRegion() {
   // In case of deferred target tasks, use pointer from libomp
   if (ompt_task_info_t *TempTaskInfoPtr = ompt_get_task_info_target_fn())
     OmptTaskInfoPtr = TempTaskInfoPtr;
+  else
+    OmptTaskInfoPtr = &OmptTaskInfo;
 }
 
 void Interface::endTargetRegion() {
   TaskData = 0;
   OmptTaskInfo = {ompt_data_none, ompt_data_none};
-  OmptTaskInfoPtr = &OmptTaskInfo;
+  OmptTaskInfoPtr = nullptr;
 }
 
 /// Used to maintain the finalization functions that are received



More information about the Openmp-commits mailing list