[Openmp-commits] [llvm] [openmp] [OMPT] Add callback for `omp_target_memset` calls (PR #194168)
via Openmp-commits
openmp-commits at lists.llvm.org
Sat Apr 25 11:32:52 PDT 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-offload
Author: Jan André Reuter (Thyre)
<details>
<summary>Changes</summary>
OpenMP v6.0 added new enum values to `ompt_callback_target_data_op_t` for communicating `memset` events. Add a corresponding callback invokation to `omp_target_memset` calls.
Expand the tests to ensure correct behavior.
---
Full diff: https://github.com/llvm/llvm-project/pull/194168.diff
8 Files Affected:
- (modified) offload/include/OpenMP/OMPT/Interface.h (+13)
- (modified) offload/libomptarget/OpenMP/API.cpp (+5)
- (modified) offload/libomptarget/OpenMP/OMPT/Callback.cpp (+27)
- (modified) offload/test/ompt/callbacks.h (+7-1)
- (added) offload/test/ompt/target_memset.c (+76)
- (added) offload/test/ompt/target_memset_async.c (+79)
- (added) offload/test/ompt/target_memset_emi.c (+78)
- (modified) openmp/runtime/src/include/omp-tools.h.var (+3-1)
``````````diff
diff --git a/offload/include/OpenMP/OMPT/Interface.h b/offload/include/OpenMP/OMPT/Interface.h
index 43fb193bc75a6..6961641769b76 100644
--- a/offload/include/OpenMP/OMPT/Interface.h
+++ b/offload/include/OpenMP/OMPT/Interface.h
@@ -126,6 +126,14 @@ class Interface {
void endTargetDisassociatePointer(int64_t DeviceId, void *HstPtrBegin,
void *TgtPtrBegin, size_t Size, void *Code);
+ /// Top-level function for invoking callback before target memset API
+ void beginTargetMemset(int64_t DeviceId, void *HostPtrBegin,
+ void *TgtPtrBegin, size_t Size, void *Code);
+
+ /// Top-level function for invoking callback after target memset API
+ void endTargetMemset(int64_t DeviceId, void *HostPtrBegin, void *TgtPtrBegin,
+ size_t Size, void *Code);
+
// Target kernel callbacks
/// Top-level function for invoking callback before target construct
@@ -166,6 +174,11 @@ class Interface {
std::mem_fn(&Interface::beginTargetDisassociatePointer),
std::mem_fn(&Interface::endTargetDisassociatePointer));
+ if constexpr (OpType == ompt_target_data_memset ||
+ OpType == ompt_target_data_memset_async)
+ return std::make_pair(std::mem_fn(&Interface::beginTargetMemset),
+ std::mem_fn(&Interface::endTargetMemset));
+
llvm_unreachable("Unhandled target data operation type!");
}
diff --git a/offload/libomptarget/OpenMP/API.cpp b/offload/libomptarget/OpenMP/API.cpp
index 6dcd94e48e987..dc4bccd01dfea 100644
--- a/offload/libomptarget/OpenMP/API.cpp
+++ b/offload/libomptarget/OpenMP/API.cpp
@@ -477,6 +477,11 @@ EXTERN void *omp_target_memset(void *Ptr, int ByteVal, size_t NumBytes,
ODBG(ODT_Interface) << "filling memory on host via memset";
memset(Ptr, ByteVal, NumBytes); // ignore return value, memset() cannot fail
} else {
+ OMPT_IF_BUILT(InterfaceRAII TargetMemsetRAII(
+ RegionInterface.getCallbacks<ompt_target_data_memset>(), DeviceNum,
+ nullptr, const_cast<void *>(Ptr), NumBytes,
+ __builtin_return_address(0)));
+
// TODO: replace the omp_target_memset() slow path with the fast path.
// That will require the ability to execute a kernel from within
// libomptarget.so (which we do not have at the moment).
diff --git a/offload/libomptarget/OpenMP/OMPT/Callback.cpp b/offload/libomptarget/OpenMP/OMPT/Callback.cpp
index 99c8a122c81f4..d74b9fb08afbe 100644
--- a/offload/libomptarget/OpenMP/OMPT/Callback.cpp
+++ b/offload/libomptarget/OpenMP/OMPT/Callback.cpp
@@ -383,6 +383,33 @@ void Interface::endTargetDisassociatePointer(int64_t DeviceId,
}
}
+void Interface::beginTargetMemset(int64_t DeviceId, void *HostPtrBegin,
+ void *TgtPtrBegin, size_t Size, void *Code) {
+ beginTargetDataOperation();
+ if (ompt_callback_target_data_op_emi_fn) {
+ ompt_callback_target_data_op_emi_fn(
+ ompt_scope_begin, TargetTaskData, &TargetData, &HostOpId,
+ ompt_target_data_memset, HostPtrBegin, omp_get_initial_device(),
+ TgtPtrBegin, DeviceId, Size, Code);
+ } else if (ompt_callback_target_data_op_fn) {
+ HostOpId = createOpId();
+ ompt_callback_target_data_op_fn(
+ TargetData.value, HostOpId, ompt_target_data_memset, HostPtrBegin,
+ omp_get_initial_device(), TgtPtrBegin, DeviceId, Size, Code);
+ }
+}
+
+void Interface::endTargetMemset(int64_t DeviceId, void *HostPtrBegin,
+ void *TgtPtrBegin, size_t Size, void *Code) {
+ if (ompt_callback_target_data_op_emi_fn) {
+ ompt_callback_target_data_op_emi_fn(
+ ompt_scope_end, TargetTaskData, &TargetData, &HostOpId,
+ ompt_target_data_memset, HostPtrBegin, omp_get_initial_device(),
+ TgtPtrBegin, DeviceId, Size, Code);
+ }
+ endTargetDataOperation();
+}
+
void Interface::beginTarget(int64_t DeviceId, void *Code) {
beginTargetRegion();
if (ompt_callback_target_emi_fn) {
diff --git a/offload/test/ompt/callbacks.h b/offload/test/ompt/callbacks.h
index 2e7763f0abbac..b82c15fd5b48f 100644
--- a/offload/test/ompt/callbacks.h
+++ b/offload/test/ompt/callbacks.h
@@ -13,10 +13,16 @@ static const char *ompt_target_data_op_t_values[] = {
"ompt_target_data_delete",
"ompt_target_data_associate",
"ompt_target_data_disassociate",
+ "ompt_target_data_transfer",
+ "ompt_target_data_memset",
+ "ompt_target_data_transfer_rect",
"ompt_target_data_alloc_async",
"ompt_target_data_transfer_to_device_async",
"ompt_target_data_transfer_from_device_async",
- "ompt_target_data_delete_async"};
+ "ompt_target_data_delete_async",
+ "ompt_target_data_transfer_async",
+ "ompt_target_data_memset_async",
+ "ompt_target_data_transfer_rect_async"};
static const char *ompt_scope_endpoint_t_values[] = {
"", "ompt_scope_begin", "ompt_scope_end", "ompt_scope_beginend"};
diff --git a/offload/test/ompt/target_memset.c b/offload/test/ompt/target_memset.c
new file mode 100644
index 0000000000000..285e6f014067f
--- /dev/null
+++ b/offload/test/ompt/target_memset.c
@@ -0,0 +1,76 @@
+// clang-format off
+// RUN: %libomptarget-compile-run-and-check-generic
+// REQUIRES: ompt
+// REQUIRES: gpu
+// clang-format on
+
+/*
+ * Verify that for the target OpenMP APIs, the return address is non-null and
+ * distinct.
+ */
+
+#include <omp.h>
+#include <stdlib.h>
+
+#include "callbacks.h"
+#include "register_non_emi.h"
+
+int main() {
+ int d = omp_get_default_device();
+ int id = omp_get_initial_device();
+ int q[128], i;
+ void *p;
+ void *result;
+
+ if (d < 0 || d >= omp_get_num_devices())
+ d = id;
+
+ p = omp_target_alloc(130 * sizeof(int), d);
+ if (p == NULL)
+ return 0;
+
+ for (i = 0; i < 128; i++)
+ q[i] = i;
+
+ result = omp_target_memset(p, 0, 130 * sizeof(int), d);
+ if (result != p) {
+ abort();
+ }
+
+ int q2[128];
+ for (i = 0; i < 128; ++i)
+ q2[i] = i;
+ if (omp_target_memcpy_async(q2, p, 128 * sizeof(int), 0, sizeof(int), id, d,
+ 0, NULL))
+ abort();
+
+#pragma omp taskwait
+
+ for (i = 0; i < 128; ++i)
+ if (q2[i] != 0)
+ abort();
+
+ omp_target_free(p, d);
+
+ return 0;
+}
+
+// clang-format off
+/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=ompt_target_data_alloc
+/// CHECK-SAME: src_device_num=[[HOST:[0-9]+]]
+/// CHECK-SAME: dest_device_num=[[DEVICE:[0-9]+]]
+/// CHECK-NOT: code=(nil)
+/// CHECK: code=[[CODE1:0x[0-f]+]]
+/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=ompt_target_data_memset
+/// CHECK-SAME: src_device_num=[[HOST]] {{.+}} dest_device_num=[[DEVICE]]
+/// CHECK-NOT: code=(nil)
+/// CHECK-NOT: code=[[CODE1]]
+/// CHECK: code=[[CODE2:0x[0-f]+]]
+/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=ompt_target_data_transfer_from_device
+/// CHECK-SAME: src_device_num=[[DEVICE]] {{.+}} dest_device_num=[[HOST]]
+/// CHECK-NOT: code=(nil)
+/// CHECK-NOT: code=[[CODE2]]
+/// CHECK: code=[[CODE3:0x[0-f]+]]
+/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=ompt_target_data_delete
+/// CHECK-NOT: code=(nil)
+/// CHECK-NOT: code=[[CODE3]]
diff --git a/offload/test/ompt/target_memset_async.c b/offload/test/ompt/target_memset_async.c
new file mode 100644
index 0000000000000..634d224d84d19
--- /dev/null
+++ b/offload/test/ompt/target_memset_async.c
@@ -0,0 +1,79 @@
+// clang-format off
+// RUN: %libomptarget-compile-run-and-check-generic
+// REQUIRES: ompt
+// REQUIRES: gpu
+// clang-format on
+
+/*
+ * Verify that for the target OpenMP APIs, the return address is non-null and
+ * distinct.
+ */
+
+#include <omp.h>
+#include <stdlib.h>
+
+#include "callbacks.h"
+#include "register_non_emi.h"
+
+int main() {
+ int d = omp_get_default_device();
+ int id = omp_get_initial_device();
+ int q[128], i;
+ void *p;
+ void *result;
+
+ if (d < 0 || d >= omp_get_num_devices())
+ d = id;
+
+ p = omp_target_alloc(130 * sizeof(int), d);
+ if (p == NULL)
+ return 0;
+
+ for (i = 0; i < 128; i++)
+ q[i] = i;
+
+ result = omp_target_memset_async(p, 0, 130 * sizeof(int), d, 0, NULL);
+
+#pragma omp taskwait
+
+ if (result != p) {
+ abort();
+ }
+
+ int q2[128];
+ for (i = 0; i < 128; ++i)
+ q2[i] = i;
+ if (omp_target_memcpy_async(q2, p, 128 * sizeof(int), 0, sizeof(int), id, d,
+ 0, NULL))
+ abort();
+
+#pragma omp taskwait
+
+ for (i = 0; i < 128; ++i)
+ if (q2[i] != 0)
+ abort();
+
+ omp_target_free(p, d);
+
+ return 0;
+}
+
+// clang-format off
+/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=ompt_target_data_alloc
+/// CHECK-SAME: src_device_num=[[HOST:[0-9]+]]
+/// CHECK-SAME: dest_device_num=[[DEVICE:[0-9]+]]
+/// CHECK-NOT: code=(nil)
+/// CHECK: code=[[CODE1:0x[0-f]+]]
+/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=ompt_target_data_memset
+/// CHECK-SAME: src_device_num=[[HOST]] {{.+}} dest_device_num=[[DEVICE]]
+/// CHECK-NOT: code=(nil)
+/// CHECK-NOT: code=[[CODE1]]
+/// CHECK: code=[[CODE2:0x[0-f]+]]
+/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=ompt_target_data_transfer_from_device
+/// CHECK-SAME: src_device_num=[[DEVICE]] {{.+}} dest_device_num=[[HOST]]
+/// CHECK-NOT: code=(nil)
+/// CHECK-NOT: code=[[CODE2]]
+/// CHECK: code=[[CODE3:0x[0-f]+]]
+/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=ompt_target_data_delete
+/// CHECK-NOT: code=(nil)
+/// CHECK-NOT: code=[[CODE3]]
diff --git a/offload/test/ompt/target_memset_emi.c b/offload/test/ompt/target_memset_emi.c
new file mode 100644
index 0000000000000..333d15cc386ef
--- /dev/null
+++ b/offload/test/ompt/target_memset_emi.c
@@ -0,0 +1,78 @@
+// clang-format off
+// RUN: %libomptarget-compile-run-and-check-generic
+// REQUIRES: ompt
+// REQUIRES: gpu
+// clang-format on
+
+/*
+ * Verify correct callback sequence for memset API call.
+ */
+
+#include <omp.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "callbacks.h"
+#include "register_emi.h"
+
+int main() {
+ int d = omp_get_default_device();
+ int id = omp_get_initial_device();
+ int q[128], i;
+ void *p;
+ void *result;
+
+ if (d < 0 || d >= omp_get_num_devices())
+ d = id;
+
+ p = omp_target_alloc(130 * sizeof(int), d);
+ if (p == NULL)
+ return 0;
+
+ for (i = 0; i < 128; i++)
+ q[i] = i;
+
+ result = omp_target_memset(p, 0, 130 * sizeof(int), d);
+ if (result != p) {
+ abort();
+ }
+
+ int q2[128];
+ for (i = 0; i < 128; ++i)
+ q2[i] = i;
+ if (omp_target_memcpy_async(q2, p, 128 * sizeof(int), 0, sizeof(int), id, d,
+ 0, NULL))
+ abort();
+
+#pragma omp taskwait
+
+ for (i = 0; i < 128; ++i)
+ if (q2[i] != 0)
+ abort();
+
+ omp_target_free(p, d);
+
+ return 0;
+}
+
+// clang-format off
+
+/// CHECK: Callback Init:
+
+/// CHECK: Callback DataOp EMI: endpoint=ompt_scope_begin optype=ompt_target_data_alloc
+/// CHECK-SAME: src_device_num=[[HOST:[0-9]+]]
+/// CHECK-SAME: dest_device_num=[[DEVICE:[0-9]+]]
+/// CHECK: Callback DataOp EMI: endpoint=ompt_scope_end optype=ompt_target_data_alloc {{.+}} src_device_num=[[HOST]] {{.+}} dest_device_num=[[DEVICE]]
+
+/// CHECK: Callback DataOp EMI: endpoint=ompt_scope_begin optype=ompt_target_data_memset {{.+}} src_device_num=[[HOST]] {{.+}} dest_device_num=[[DEVICE]]
+/// CHECK: Callback DataOp EMI: endpoint=ompt_scope_end optype=ompt_target_data_memset {{.+}} src_device_num=[[HOST]] {{.+}} dest_device_num=[[DEVICE]]
+
+/// CHECK: Callback DataOp EMI: endpoint=ompt_scope_begin optype=ompt_target_data_transfer_from_device {{.+}} src_device_num=[[DEVICE]] {{.+}} dest_device_num=[[HOST]]
+/// CHECK: Callback DataOp EMI: endpoint=ompt_scope_end optype=ompt_target_data_transfer_from_device {{.+}} src_device_num=[[DEVICE]] {{.+}} dest_device_num=[[HOST]]
+
+/// CHECK: Callback DataOp EMI: endpoint=ompt_scope_begin optype=ompt_target_data_delete {{.+}} src_device_num=[[DEVICE]]
+/// CHECK: Callback DataOp EMI: endpoint=ompt_scope_end optype=ompt_target_data_delete {{.+}} src_device_num=[[DEVICE]]
+
+/// CHECK: Callback Fini:
+
+// clang-format on
diff --git a/openmp/runtime/src/include/omp-tools.h.var b/openmp/runtime/src/include/omp-tools.h.var
index e727fa3ebad28..1d7cb86c0799c 100644
--- a/openmp/runtime/src/include/omp-tools.h.var
+++ b/openmp/runtime/src/include/omp-tools.h.var
@@ -325,10 +325,12 @@ typedef enum ompt_target_data_op_t {
ompt_target_data_delete = 4,
ompt_target_data_associate = 5,
ompt_target_data_disassociate = 6,
+ ompt_target_data_memset = 8,
ompt_target_data_alloc_async = 17,
ompt_target_data_transfer_to_device_async = 18,
ompt_target_data_transfer_from_device_async = 19,
- ompt_target_data_delete_async = 20
+ ompt_target_data_delete_async = 20,
+ ompt_target_data_memset_async = 24,
} ompt_target_data_op_t;
typedef enum ompt_work_t {
``````````
</details>
https://github.com/llvm/llvm-project/pull/194168
More information about the Openmp-commits
mailing list