[Openmp-commits] [clang] [llvm] [openmp] [OMPT] Add callback for `omp_target_memset` calls (PR #194168)
Jan André Reuter via Openmp-commits
openmp-commits at lists.llvm.org
Wed Apr 29 05:12:12 PDT 2026
https://github.com/Thyre updated https://github.com/llvm/llvm-project/pull/194168
>From 627c19c07bd86cc88bf05b64cfac66904068ac30 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Andr=C3=A9=20Reuter?= <j.reuter at fz-juelich.de>
Date: Wed, 29 Apr 2026 14:11:57 +0200
Subject: [PATCH] [OMPT] Add callback for `omp_target_memset` calls
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
OpenMP v6.0 added new enum values to `ompt_callback_target_data_op_t`
for communicating `memset` events. Add a corresponding callback invokation
to `omp_target_memset` calls.
Expand the tests to ensure correct behavior.
Signed-off-by: Jan André Reuter <j.reuter at fz-juelich.de>
---
clang/docs/OpenMPSupport.rst | 2 +-
offload/include/OpenMP/OMPT/Interface.h | 13 +++
offload/libomptarget/OpenMP/API.cpp | 5 ++
offload/libomptarget/OpenMP/OMPT/Callback.cpp | 27 +++++++
offload/test/ompt/callbacks.h | 8 +-
offload/test/ompt/target_memset.c | 76 ++++++++++++++++++
offload/test/ompt/target_memset_async.c | 79 +++++++++++++++++++
offload/test/ompt/target_memset_emi.c | 78 ++++++++++++++++++
openmp/runtime/src/include/omp-tools.h.var | 4 +-
9 files changed, 289 insertions(+), 3 deletions(-)
create mode 100644 offload/test/ompt/target_memset.c
create mode 100644 offload/test/ompt/target_memset_async.c
create mode 100644 offload/test/ompt/target_memset_emi.c
diff --git a/clang/docs/OpenMPSupport.rst b/clang/docs/OpenMPSupport.rst
index 6f37ce044fbe5..35a4d907c84c7 100644
--- a/clang/docs/OpenMPSupport.rst
+++ b/clang/docs/OpenMPSupport.rst
@@ -619,7 +619,7 @@ implementation.
+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
| OMPT: ompt_target_data_transfer(_async) | :none:`unclaimed` | :good:`N/A` | |
+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
-| OMPT: ompt_target_data_memset(_async) | :none:`unclaimed` | :good:`N/A` | |
+| OMPT: ompt_target_data_memset(_async) | :part:`partial` | :good:`N/A` | Callbacks: https://github.com/llvm/llvm-project/pull/194168 |
+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
| OMPT: workdistribute work callback enum | :none:`unclaimed` | :good:`N/A` | |
+-------------------------------------------------------------+---------------------------+---------------------------+--------------------------------------------------------------------------+
diff --git a/offload/include/OpenMP/OMPT/Interface.h b/offload/include/OpenMP/OMPT/Interface.h
index 43fb193bc75a6..6961641769b76 100644
--- a/offload/include/OpenMP/OMPT/Interface.h
+++ b/offload/include/OpenMP/OMPT/Interface.h
@@ -126,6 +126,14 @@ class Interface {
void endTargetDisassociatePointer(int64_t DeviceId, void *HstPtrBegin,
void *TgtPtrBegin, size_t Size, void *Code);
+ /// Top-level function for invoking callback before target memset API
+ void beginTargetMemset(int64_t DeviceId, void *HostPtrBegin,
+ void *TgtPtrBegin, size_t Size, void *Code);
+
+ /// Top-level function for invoking callback after target memset API
+ void endTargetMemset(int64_t DeviceId, void *HostPtrBegin, void *TgtPtrBegin,
+ size_t Size, void *Code);
+
// Target kernel callbacks
/// Top-level function for invoking callback before target construct
@@ -166,6 +174,11 @@ class Interface {
std::mem_fn(&Interface::beginTargetDisassociatePointer),
std::mem_fn(&Interface::endTargetDisassociatePointer));
+ if constexpr (OpType == ompt_target_data_memset ||
+ OpType == ompt_target_data_memset_async)
+ return std::make_pair(std::mem_fn(&Interface::beginTargetMemset),
+ std::mem_fn(&Interface::endTargetMemset));
+
llvm_unreachable("Unhandled target data operation type!");
}
diff --git a/offload/libomptarget/OpenMP/API.cpp b/offload/libomptarget/OpenMP/API.cpp
index 6dcd94e48e987..dc4bccd01dfea 100644
--- a/offload/libomptarget/OpenMP/API.cpp
+++ b/offload/libomptarget/OpenMP/API.cpp
@@ -477,6 +477,11 @@ EXTERN void *omp_target_memset(void *Ptr, int ByteVal, size_t NumBytes,
ODBG(ODT_Interface) << "filling memory on host via memset";
memset(Ptr, ByteVal, NumBytes); // ignore return value, memset() cannot fail
} else {
+ OMPT_IF_BUILT(InterfaceRAII TargetMemsetRAII(
+ RegionInterface.getCallbacks<ompt_target_data_memset>(), DeviceNum,
+ nullptr, const_cast<void *>(Ptr), NumBytes,
+ __builtin_return_address(0)));
+
// TODO: replace the omp_target_memset() slow path with the fast path.
// That will require the ability to execute a kernel from within
// libomptarget.so (which we do not have at the moment).
diff --git a/offload/libomptarget/OpenMP/OMPT/Callback.cpp b/offload/libomptarget/OpenMP/OMPT/Callback.cpp
index c107fa00ce291..9b7705c146869 100644
--- a/offload/libomptarget/OpenMP/OMPT/Callback.cpp
+++ b/offload/libomptarget/OpenMP/OMPT/Callback.cpp
@@ -387,6 +387,33 @@ void Interface::endTargetDisassociatePointer(int64_t DeviceId,
}
}
+void Interface::beginTargetMemset(int64_t DeviceId, void *HostPtrBegin,
+ void *TgtPtrBegin, size_t Size, void *Code) {
+ beginTargetDataOperation();
+ if (ompt_callback_target_data_op_emi_fn) {
+ ompt_callback_target_data_op_emi_fn(
+ ompt_scope_begin, TargetTaskData, &TargetData, &HostOpId,
+ ompt_target_data_memset, HostPtrBegin, omp_initial_device, TgtPtrBegin,
+ DeviceId, Size, Code);
+ } else if (ompt_callback_target_data_op_fn) {
+ HostOpId = createOpId();
+ ompt_callback_target_data_op_fn(
+ TargetData.value, HostOpId, ompt_target_data_memset, HostPtrBegin,
+ omp_get_initial_device(), TgtPtrBegin, DeviceId, Size, Code);
+ }
+}
+
+void Interface::endTargetMemset(int64_t DeviceId, void *HostPtrBegin,
+ void *TgtPtrBegin, size_t Size, void *Code) {
+ if (ompt_callback_target_data_op_emi_fn) {
+ ompt_callback_target_data_op_emi_fn(
+ ompt_scope_end, TargetTaskData, &TargetData, &HostOpId,
+ ompt_target_data_memset, HostPtrBegin, omp_initial_device, TgtPtrBegin,
+ DeviceId, Size, Code);
+ }
+ endTargetDataOperation();
+}
+
void Interface::beginTarget(int64_t DeviceId, void *Code) {
beginTargetRegion();
if (ompt_callback_target_emi_fn) {
diff --git a/offload/test/ompt/callbacks.h b/offload/test/ompt/callbacks.h
index 2e7763f0abbac..b82c15fd5b48f 100644
--- a/offload/test/ompt/callbacks.h
+++ b/offload/test/ompt/callbacks.h
@@ -13,10 +13,16 @@ static const char *ompt_target_data_op_t_values[] = {
"ompt_target_data_delete",
"ompt_target_data_associate",
"ompt_target_data_disassociate",
+ "ompt_target_data_transfer",
+ "ompt_target_data_memset",
+ "ompt_target_data_transfer_rect",
"ompt_target_data_alloc_async",
"ompt_target_data_transfer_to_device_async",
"ompt_target_data_transfer_from_device_async",
- "ompt_target_data_delete_async"};
+ "ompt_target_data_delete_async",
+ "ompt_target_data_transfer_async",
+ "ompt_target_data_memset_async",
+ "ompt_target_data_transfer_rect_async"};
static const char *ompt_scope_endpoint_t_values[] = {
"", "ompt_scope_begin", "ompt_scope_end", "ompt_scope_beginend"};
diff --git a/offload/test/ompt/target_memset.c b/offload/test/ompt/target_memset.c
new file mode 100644
index 0000000000000..285e6f014067f
--- /dev/null
+++ b/offload/test/ompt/target_memset.c
@@ -0,0 +1,76 @@
+// clang-format off
+// RUN: %libomptarget-compile-run-and-check-generic
+// REQUIRES: ompt
+// REQUIRES: gpu
+// clang-format on
+
+/*
+ * Verify that for the target OpenMP APIs, the return address is non-null and
+ * distinct.
+ */
+
+#include <omp.h>
+#include <stdlib.h>
+
+#include "callbacks.h"
+#include "register_non_emi.h"
+
+int main() {
+ int d = omp_get_default_device();
+ int id = omp_get_initial_device();
+ int q[128], i;
+ void *p;
+ void *result;
+
+ if (d < 0 || d >= omp_get_num_devices())
+ d = id;
+
+ p = omp_target_alloc(130 * sizeof(int), d);
+ if (p == NULL)
+ return 0;
+
+ for (i = 0; i < 128; i++)
+ q[i] = i;
+
+ result = omp_target_memset(p, 0, 130 * sizeof(int), d);
+ if (result != p) {
+ abort();
+ }
+
+ int q2[128];
+ for (i = 0; i < 128; ++i)
+ q2[i] = i;
+ if (omp_target_memcpy_async(q2, p, 128 * sizeof(int), 0, sizeof(int), id, d,
+ 0, NULL))
+ abort();
+
+#pragma omp taskwait
+
+ for (i = 0; i < 128; ++i)
+ if (q2[i] != 0)
+ abort();
+
+ omp_target_free(p, d);
+
+ return 0;
+}
+
+// clang-format off
+/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=ompt_target_data_alloc
+/// CHECK-SAME: src_device_num=[[HOST:[0-9]+]]
+/// CHECK-SAME: dest_device_num=[[DEVICE:[0-9]+]]
+/// CHECK-NOT: code=(nil)
+/// CHECK: code=[[CODE1:0x[0-f]+]]
+/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=ompt_target_data_memset
+/// CHECK-SAME: src_device_num=[[HOST]] {{.+}} dest_device_num=[[DEVICE]]
+/// CHECK-NOT: code=(nil)
+/// CHECK-NOT: code=[[CODE1]]
+/// CHECK: code=[[CODE2:0x[0-f]+]]
+/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=ompt_target_data_transfer_from_device
+/// CHECK-SAME: src_device_num=[[DEVICE]] {{.+}} dest_device_num=[[HOST]]
+/// CHECK-NOT: code=(nil)
+/// CHECK-NOT: code=[[CODE2]]
+/// CHECK: code=[[CODE3:0x[0-f]+]]
+/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=ompt_target_data_delete
+/// CHECK-NOT: code=(nil)
+/// CHECK-NOT: code=[[CODE3]]
diff --git a/offload/test/ompt/target_memset_async.c b/offload/test/ompt/target_memset_async.c
new file mode 100644
index 0000000000000..634d224d84d19
--- /dev/null
+++ b/offload/test/ompt/target_memset_async.c
@@ -0,0 +1,79 @@
+// clang-format off
+// RUN: %libomptarget-compile-run-and-check-generic
+// REQUIRES: ompt
+// REQUIRES: gpu
+// clang-format on
+
+/*
+ * Verify that for the target OpenMP APIs, the return address is non-null and
+ * distinct.
+ */
+
+#include <omp.h>
+#include <stdlib.h>
+
+#include "callbacks.h"
+#include "register_non_emi.h"
+
+int main() {
+ int d = omp_get_default_device();
+ int id = omp_get_initial_device();
+ int q[128], i;
+ void *p;
+ void *result;
+
+ if (d < 0 || d >= omp_get_num_devices())
+ d = id;
+
+ p = omp_target_alloc(130 * sizeof(int), d);
+ if (p == NULL)
+ return 0;
+
+ for (i = 0; i < 128; i++)
+ q[i] = i;
+
+ result = omp_target_memset_async(p, 0, 130 * sizeof(int), d, 0, NULL);
+
+#pragma omp taskwait
+
+ if (result != p) {
+ abort();
+ }
+
+ int q2[128];
+ for (i = 0; i < 128; ++i)
+ q2[i] = i;
+ if (omp_target_memcpy_async(q2, p, 128 * sizeof(int), 0, sizeof(int), id, d,
+ 0, NULL))
+ abort();
+
+#pragma omp taskwait
+
+ for (i = 0; i < 128; ++i)
+ if (q2[i] != 0)
+ abort();
+
+ omp_target_free(p, d);
+
+ return 0;
+}
+
+// clang-format off
+/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=ompt_target_data_alloc
+/// CHECK-SAME: src_device_num=[[HOST:[0-9]+]]
+/// CHECK-SAME: dest_device_num=[[DEVICE:[0-9]+]]
+/// CHECK-NOT: code=(nil)
+/// CHECK: code=[[CODE1:0x[0-f]+]]
+/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=ompt_target_data_memset
+/// CHECK-SAME: src_device_num=[[HOST]] {{.+}} dest_device_num=[[DEVICE]]
+/// CHECK-NOT: code=(nil)
+/// CHECK-NOT: code=[[CODE1]]
+/// CHECK: code=[[CODE2:0x[0-f]+]]
+/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=ompt_target_data_transfer_from_device
+/// CHECK-SAME: src_device_num=[[DEVICE]] {{.+}} dest_device_num=[[HOST]]
+/// CHECK-NOT: code=(nil)
+/// CHECK-NOT: code=[[CODE2]]
+/// CHECK: code=[[CODE3:0x[0-f]+]]
+/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=ompt_target_data_delete
+/// CHECK-NOT: code=(nil)
+/// CHECK-NOT: code=[[CODE3]]
diff --git a/offload/test/ompt/target_memset_emi.c b/offload/test/ompt/target_memset_emi.c
new file mode 100644
index 0000000000000..333d15cc386ef
--- /dev/null
+++ b/offload/test/ompt/target_memset_emi.c
@@ -0,0 +1,78 @@
+// clang-format off
+// RUN: %libomptarget-compile-run-and-check-generic
+// REQUIRES: ompt
+// REQUIRES: gpu
+// clang-format on
+
+/*
+ * Verify correct callback sequence for memset API call.
+ */
+
+#include <omp.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "callbacks.h"
+#include "register_emi.h"
+
+int main() {
+ int d = omp_get_default_device();
+ int id = omp_get_initial_device();
+ int q[128], i;
+ void *p;
+ void *result;
+
+ if (d < 0 || d >= omp_get_num_devices())
+ d = id;
+
+ p = omp_target_alloc(130 * sizeof(int), d);
+ if (p == NULL)
+ return 0;
+
+ for (i = 0; i < 128; i++)
+ q[i] = i;
+
+ result = omp_target_memset(p, 0, 130 * sizeof(int), d);
+ if (result != p) {
+ abort();
+ }
+
+ int q2[128];
+ for (i = 0; i < 128; ++i)
+ q2[i] = i;
+ if (omp_target_memcpy_async(q2, p, 128 * sizeof(int), 0, sizeof(int), id, d,
+ 0, NULL))
+ abort();
+
+#pragma omp taskwait
+
+ for (i = 0; i < 128; ++i)
+ if (q2[i] != 0)
+ abort();
+
+ omp_target_free(p, d);
+
+ return 0;
+}
+
+// clang-format off
+
+/// CHECK: Callback Init:
+
+/// CHECK: Callback DataOp EMI: endpoint=ompt_scope_begin optype=ompt_target_data_alloc
+/// CHECK-SAME: src_device_num=[[HOST:[0-9]+]]
+/// CHECK-SAME: dest_device_num=[[DEVICE:[0-9]+]]
+/// CHECK: Callback DataOp EMI: endpoint=ompt_scope_end optype=ompt_target_data_alloc {{.+}} src_device_num=[[HOST]] {{.+}} dest_device_num=[[DEVICE]]
+
+/// CHECK: Callback DataOp EMI: endpoint=ompt_scope_begin optype=ompt_target_data_memset {{.+}} src_device_num=[[HOST]] {{.+}} dest_device_num=[[DEVICE]]
+/// CHECK: Callback DataOp EMI: endpoint=ompt_scope_end optype=ompt_target_data_memset {{.+}} src_device_num=[[HOST]] {{.+}} dest_device_num=[[DEVICE]]
+
+/// CHECK: Callback DataOp EMI: endpoint=ompt_scope_begin optype=ompt_target_data_transfer_from_device {{.+}} src_device_num=[[DEVICE]] {{.+}} dest_device_num=[[HOST]]
+/// CHECK: Callback DataOp EMI: endpoint=ompt_scope_end optype=ompt_target_data_transfer_from_device {{.+}} src_device_num=[[DEVICE]] {{.+}} dest_device_num=[[HOST]]
+
+/// CHECK: Callback DataOp EMI: endpoint=ompt_scope_begin optype=ompt_target_data_delete {{.+}} src_device_num=[[DEVICE]]
+/// CHECK: Callback DataOp EMI: endpoint=ompt_scope_end optype=ompt_target_data_delete {{.+}} src_device_num=[[DEVICE]]
+
+/// CHECK: Callback Fini:
+
+// clang-format on
diff --git a/openmp/runtime/src/include/omp-tools.h.var b/openmp/runtime/src/include/omp-tools.h.var
index e727fa3ebad28..1d7cb86c0799c 100644
--- a/openmp/runtime/src/include/omp-tools.h.var
+++ b/openmp/runtime/src/include/omp-tools.h.var
@@ -325,10 +325,12 @@ typedef enum ompt_target_data_op_t {
ompt_target_data_delete = 4,
ompt_target_data_associate = 5,
ompt_target_data_disassociate = 6,
+ ompt_target_data_memset = 8,
ompt_target_data_alloc_async = 17,
ompt_target_data_transfer_to_device_async = 18,
ompt_target_data_transfer_from_device_async = 19,
- ompt_target_data_delete_async = 20
+ ompt_target_data_delete_async = 20,
+ ompt_target_data_memset_async = 24,
} ompt_target_data_op_t;
typedef enum ompt_work_t {
More information about the Openmp-commits
mailing list