[Openmp-commits] [llvm] [openmp] [OMPT][OpenMP] Use omp_initial_device for host in callbacks (PR #192924)

Jan André Reuter via Openmp-commits openmp-commits at lists.llvm.org
Tue Apr 28 04:13:00 PDT 2026


https://github.com/Thyre updated https://github.com/llvm/llvm-project/pull/192924

>From 84a0d5fc37fb2d253fda6b6f43965e6d933b6eb6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Andr=C3=A9=20Reuter?= <j.reuter at fz-juelich.de>
Date: Mon, 20 Apr 2026 11:06:04 +0200
Subject: [PATCH] [OMPT][OpenMP] Use omp_initial_device for host in callbacks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The OpenMP specification offers different ways for identifying the host
device. While users of the OpenMP API can use `omp_get_initial_device()`
or the constant `omp_initial_device` (available since OpenMP v5.2), a tool
needs to rely on the `initial_device_num` passed by the OpenMP runtime during
the `initialize` callback.

In #134451, it was discovered that the `initial_device_num` passed is
always `0`, regardless of any device are available for offload execution.
For host-only OpenMP code, this matches the result of `omp_get_num_devices()`,
and is a valid result. In the case of devices being available though, this
passed identifier is incorrect. While `libomp` calls `omp_get_num_devices()`,
`libomptarget` has not fully initialized its PluginManager at that point, hence
returning no available devices. Tools relying on `initial_device_num` might
therefore incorrectly assume host-side execution when some code runs on a
device.
Since the `ompt_get_num_devices()` entry point is also not fully implemented,
tools currently need to do on-the-fly handling for the host device.

To make handling easier, consistently use the `omp_initial_device` identifier
as the `initial_device_num` and for all device callbacks.
This does not rely on knowing the number of available devices during OMPT
initialization in the runtime and ensures a consistent value for tools to
handle.

Closes #134451

Signed-off-by: Jan André Reuter <j.reuter at fz-juelich.de>
---
 offload/libomptarget/OpenMP/OMPT/Callback.cpp | 32 +++++++++++--------
 offload/libomptarget/device.cpp               |  4 +--
 offload/test/ompt/target_memcpy.c             |  2 +-
 offload/test/ompt/target_memcpy_emi.c         |  2 +-
 openmp/runtime/src/ompt-general.cpp           |  7 ++--
 5 files changed, 26 insertions(+), 21 deletions(-)

diff --git a/offload/libomptarget/OpenMP/OMPT/Callback.cpp b/offload/libomptarget/OpenMP/OMPT/Callback.cpp
index 99c8a122c81f4..c107fa00ce291 100644
--- a/offload/libomptarget/OpenMP/OMPT/Callback.cpp
+++ b/offload/libomptarget/OpenMP/OMPT/Callback.cpp
@@ -34,6 +34,9 @@ FOREACH_OMPT_NOEMI_EVENT(defineOmptCallback)
 FOREACH_OMPT_EMI_EVENT(defineOmptCallback)
 #undef defineOmptCallback
 
+// See definition in OpenMP (omp.h.var/omp_lib.(F90|h).var)
+#define omp_initial_device -1
+
 using namespace llvm::omp::target::ompt;
 using namespace llvm::omp::target::debug;
 
@@ -84,7 +87,7 @@ void Interface::beginTargetDataAlloc(int64_t DeviceId, void *HstPtrBegin,
     ompt_callback_target_data_op_emi_fn(
         ompt_scope_begin, TargetTaskData, &TargetData, &HostOpId,
         ompt_target_data_alloc, HstPtrBegin,
-        /*SrcDeviceNum=*/omp_get_initial_device(), *TgtPtrBegin,
+        /*SrcDeviceNum=*/omp_initial_device, *TgtPtrBegin,
         /*TgtDeviceNum=*/DeviceId, Size, Code);
   } else if (ompt_callback_target_data_op_fn) {
     // HostOpId is set by the runtime
@@ -92,7 +95,7 @@ void Interface::beginTargetDataAlloc(int64_t DeviceId, void *HstPtrBegin,
     // Invoke the tool supplied data op callback
     ompt_callback_target_data_op_fn(
         TargetData.value, HostOpId, ompt_target_data_alloc, HstPtrBegin,
-        /*SrcDeviceNum=*/omp_get_initial_device(), *TgtPtrBegin,
+        /*SrcDeviceNum=*/omp_initial_device, *TgtPtrBegin,
         /*TgtDeviceNum=*/DeviceId, Size, Code);
   }
 }
@@ -107,7 +110,7 @@ void Interface::endTargetDataAlloc(int64_t DeviceId, void *HstPtrBegin,
     ompt_callback_target_data_op_emi_fn(
         ompt_scope_end, TargetTaskData, &TargetData, &HostOpId,
         ompt_target_data_alloc, HstPtrBegin,
-        /*SrcDeviceNum=*/omp_get_initial_device(), *TgtPtrBegin,
+        /*SrcDeviceNum=*/omp_initial_device, *TgtPtrBegin,
         /*TgtDeviceNum=*/DeviceId, Size, Code);
   }
   endTargetDataOperation();
@@ -163,10 +166,10 @@ void Interface::beginTargetDataDelete(int64_t DeviceId, void *TgtPtrBegin,
     // HostOpId is set by the runtime
     HostOpId = createOpId();
     // Invoke the tool supplied data op callback
-    ompt_callback_target_data_op_fn(TargetData.value, HostOpId,
-                                    ompt_target_data_delete, TgtPtrBegin,
-                                    DeviceId, /*TgtPtrBegin=*/nullptr,
-                                    /*TgtDeviceNum=*/-1, /*Bytes=*/0, Code);
+    ompt_callback_target_data_op_fn(
+        TargetData.value, HostOpId, ompt_target_data_delete, TgtPtrBegin,
+        DeviceId, /*TgtPtrBegin=*/nullptr,
+        /*TgtDeviceNum=*/omp_initial_device, /*Bytes=*/0, Code);
   }
 }
 
@@ -179,7 +182,8 @@ void Interface::endTargetDataDelete(int64_t DeviceId, void *TgtPtrBegin,
     ompt_callback_target_data_op_emi_fn(
         ompt_scope_end, TargetTaskData, &TargetData, &HostOpId,
         ompt_target_data_delete, TgtPtrBegin, DeviceId,
-        /*TgtPtrBegin=*/nullptr, /*TgtDeviceNum=*/-1, /*Bytes=*/0, Code);
+        /*TgtPtrBegin=*/nullptr, /*TgtDeviceNum=*/omp_initial_device,
+        /*Bytes=*/0, Code);
   }
   endTargetDataOperation();
 }
@@ -333,13 +337,13 @@ void Interface::beginTargetAssociatePointer(int64_t DeviceId, void *HstPtrBegin,
   if (ompt_callback_target_data_op_emi_fn) {
     ompt_callback_target_data_op_emi_fn(
         ompt_scope_begin, TargetTaskData, &TargetData, &HostOpId,
-        ompt_target_data_associate, HstPtrBegin, omp_get_initial_device(),
+        ompt_target_data_associate, HstPtrBegin, omp_initial_device,
         TgtPtrBegin, DeviceId, Size, Code);
   } else if (ompt_callback_target_data_op_fn) {
     HostOpId = createOpId();
     ompt_callback_target_data_op_fn(
         TargetData.value, HostOpId, ompt_target_data_associate, HstPtrBegin,
-        omp_get_initial_device(), TgtPtrBegin, DeviceId, Size, Code);
+        omp_initial_device, TgtPtrBegin, DeviceId, Size, Code);
   }
 }
 
@@ -349,7 +353,7 @@ void Interface::endTargetAssociatePointer(int64_t DeviceId, void *HstPtrBegin,
   if (ompt_callback_target_data_op_emi_fn) {
     ompt_callback_target_data_op_emi_fn(
         ompt_scope_end, TargetTaskData, &TargetData, &HostOpId,
-        ompt_target_data_associate, HstPtrBegin, omp_get_initial_device(),
+        ompt_target_data_associate, HstPtrBegin, omp_initial_device,
         TgtPtrBegin, DeviceId, Size, Code);
   }
 }
@@ -362,13 +366,13 @@ void Interface::beginTargetDisassociatePointer(int64_t DeviceId,
   if (ompt_callback_target_data_op_emi_fn) {
     ompt_callback_target_data_op_emi_fn(
         ompt_scope_begin, TargetTaskData, &TargetData, &HostOpId,
-        ompt_target_data_disassociate, HstPtrBegin, omp_get_initial_device(),
+        ompt_target_data_disassociate, HstPtrBegin, omp_initial_device,
         TgtPtrBegin, DeviceId, Size, Code);
   } else if (ompt_callback_target_data_op_fn) {
     HostOpId = createOpId();
     ompt_callback_target_data_op_fn(
         TargetData.value, HostOpId, ompt_target_data_disassociate, HstPtrBegin,
-        omp_get_initial_device(), TgtPtrBegin, DeviceId, Size, Code);
+        omp_initial_device, TgtPtrBegin, DeviceId, Size, Code);
   }
 }
 void Interface::endTargetDisassociatePointer(int64_t DeviceId,
@@ -378,7 +382,7 @@ void Interface::endTargetDisassociatePointer(int64_t DeviceId,
   if (ompt_callback_target_data_op_emi_fn) {
     ompt_callback_target_data_op_emi_fn(
         ompt_scope_end, TargetTaskData, &TargetData, &HostOpId,
-        ompt_target_data_disassociate, HstPtrBegin, omp_get_initial_device(),
+        ompt_target_data_disassociate, HstPtrBegin, omp_initial_device,
         TgtPtrBegin, DeviceId, Size, Code);
   }
 }
diff --git a/offload/libomptarget/device.cpp b/offload/libomptarget/device.cpp
index 48aa29c3d4fbb..546f679353544 100644
--- a/offload/libomptarget/device.cpp
+++ b/offload/libomptarget/device.cpp
@@ -273,7 +273,7 @@ int32_t DeviceTy::submitData(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size,
   OMPT_IF_BUILT(
       InterfaceRAII TargetDataSubmitRAII(
           RegionInterface.getCallbacks<ompt_target_data_transfer_to_device>(),
-          omp_get_initial_device(), HstPtrBegin, DeviceID, TgtPtrBegin, Size,
+          omp_initial_device, HstPtrBegin, DeviceID, TgtPtrBegin, Size,
           /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);)
 
   return RTL->data_submit_async(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size,
@@ -293,7 +293,7 @@ int32_t DeviceTy::retrieveData(void *HstPtrBegin, void *TgtPtrBegin,
   OMPT_IF_BUILT(
       InterfaceRAII TargetDataRetrieveRAII(
           RegionInterface.getCallbacks<ompt_target_data_transfer_from_device>(),
-          DeviceID, TgtPtrBegin, omp_get_initial_device(), HstPtrBegin, Size,
+          DeviceID, TgtPtrBegin, omp_initial_device, HstPtrBegin, Size,
           /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);)
 
   return RTL->data_retrieve_async(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size,
diff --git a/offload/test/ompt/target_memcpy.c b/offload/test/ompt/target_memcpy.c
index f769995579f50..fd0c5ed60193d 100644
--- a/offload/test/ompt/target_memcpy.c
+++ b/offload/test/ompt/target_memcpy.c
@@ -49,7 +49,7 @@ int main() {
 
 // clang-format off
 /// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=ompt_target_data_alloc
-/// CHECK-SAME: src_device_num=[[HOST:[0-9]+]]
+/// CHECK-SAME: src_device_num=[[HOST:-1]]
 /// CHECK-SAME: dest_device_num=[[DEVICE:[0-9]+]]
 /// CHECK-NOT: code=(nil)
 /// CHECK: code=[[CODE1:0x[0-f]+]]
diff --git a/offload/test/ompt/target_memcpy_emi.c b/offload/test/ompt/target_memcpy_emi.c
index 39f262a366f94..c493b08f6f105 100644
--- a/offload/test/ompt/target_memcpy_emi.c
+++ b/offload/test/ompt/target_memcpy_emi.c
@@ -57,7 +57,7 @@ int main(void) {
 
 /// CHECK: Allocating Memory on Device
 /// CHECK: Callback DataOp EMI: endpoint=ompt_scope_begin optype=ompt_target_data_alloc
-/// CHECK-SAME: src_device_num=[[HOST:[0-9]+]]
+/// CHECK-SAME: src_device_num=[[HOST:-1]]
 /// CHECK-SAME: dest_device_num=[[DEVICE:[0-9]+]]
 /// CHECK: Callback DataOp EMI: endpoint=ompt_scope_end optype=ompt_target_data_alloc {{.+}} src_device_num=[[HOST]] {{.+}} dest_device_num=[[DEVICE]]
 
diff --git a/openmp/runtime/src/ompt-general.cpp b/openmp/runtime/src/ompt-general.cpp
index 1a778e4ecac3a..959457d380d03 100644
--- a/openmp/runtime/src/ompt-general.cpp
+++ b/openmp/runtime/src/ompt-general.cpp
@@ -468,7 +468,7 @@ void ompt_pre_init() {
 #endif
 }
 
-extern "C" int omp_get_initial_device(void);
+#define omp_initial_device -1 /* see omp.h.var */
 
 void ompt_post_init() {
   //--------------------------------------------------
@@ -486,7 +486,7 @@ void ompt_post_init() {
   //--------------------------------------------------
   if (ompt_start_tool_result) {
     ompt_enabled.enabled = !!ompt_start_tool_result->initialize(
-        ompt_fn_lookup, omp_get_initial_device(),
+        ompt_fn_lookup, omp_initial_device,
         &(ompt_start_tool_result->tool_data));
 
     if (!ompt_enabled.enabled) {
@@ -929,7 +929,8 @@ _OMP_EXTERN void ompt_libomp_connect(ompt_start_tool_result_t *result) {
     // functions can be extracted and assigned to the callbacks in
     // libomptarget
     result->initialize(ompt_libomp_target_fn_lookup,
-                       /* initial_device_num */ 0, /* tool_data */ nullptr);
+                       /* initial_device_num */ omp_initial_device,
+                       /* tool_data */ nullptr);
     // Track the object provided by libomptarget so that the finalizer can be
     // called during OMPT finalization
     libomptarget_ompt_result = result;



More information about the Openmp-commits mailing list