[Openmp-commits] [openmp] [libomptarget] [OMPT] Fixed return address computation for OMPT events. (PR #80498)
via Openmp-commits
openmp-commits at lists.llvm.org
Fri Feb 2 14:03:20 PST 2024
https://github.com/dhruvachak created https://github.com/llvm/llvm-project/pull/80498
Currently, __builtin_return_address is used to generate the return address when the callback invoker is created. However, this may result in the return address pointing to an internal runtime function. This is not what a tool would typically want. A tool would want to know the corresponding user code from where the runtime entry point is invoked.
This change adds a thread local variable that is assigned the return address at the OpenMP runtime entry points. An RAII is used to manage the modifications to the thread local variable. Whenever the return address is required for OMPT events, it is read from the thread local variable.
>From c68b08d17e91bc57dbf58902fd16910cffdad52b Mon Sep 17 00:00:00 2001
From: Dhruva Chakrabarti <Dhruva.Chakrabarti at amd.com>
Date: Fri, 2 Feb 2024 15:18:57 -0500
Subject: [PATCH] [libomptarget] [OMPT] Fixed return address computation for
OMPT events.
Currently, __builtin_return_address is used to generate the return
address when the callback invoker is created. However, this may result
in the return address pointing to an internal runtime function. This is
not what a tool would typically want. A tool would want to know the
corresponding user code from where the runtime entry point is invoked.
This change adds a thread local variable that is assigned the return
address at the OpenMP runtime entry points. An RAII is used to manage
the modifications to the thread local variable. Whenever the return address
is required for OMPT events, it is read from the thread local variable.
---
.../include/OpenMP/OMPT/Interface.h | 38 +++++++++++-
openmp/libomptarget/src/LegacyAPI.cpp | 28 ++++++---
openmp/libomptarget/src/OpenMP/API.cpp | 42 ++++++++++++-
.../libomptarget/src/OpenMP/OMPT/Callback.cpp | 2 +
openmp/libomptarget/src/device.cpp | 8 +--
openmp/libomptarget/src/interface.cpp | 21 ++++---
openmp/libomptarget/test/ompt/callbacks.h | 5 +-
openmp/libomptarget/test/ompt/target_memcpy.c | 61 +++++++++++++++++++
openmp/libomptarget/test/ompt/veccopy.c | 28 ++++++++-
openmp/libomptarget/test/ompt/veccopy_data.c | 33 ++++++++++
openmp/libomptarget/test/ompt/veccopy_emi.c | 39 ++++++++++++
11 files changed, 275 insertions(+), 30 deletions(-)
create mode 100644 openmp/libomptarget/test/ompt/target_memcpy.c
diff --git a/openmp/libomptarget/include/OpenMP/OMPT/Interface.h b/openmp/libomptarget/include/OpenMP/OMPT/Interface.h
index ed34cbed785bc..13eca730a9295 100644
--- a/openmp/libomptarget/include/OpenMP/OMPT/Interface.h
+++ b/openmp/libomptarget/include/OpenMP/OMPT/Interface.h
@@ -24,7 +24,6 @@
#include "llvm/Support/ErrorHandling.h"
#define OMPT_IF_BUILT(stmt) stmt
-#define OMPT_GET_RETURN_ADDRESS(level) __builtin_return_address(level)
/// Callbacks for target regions require task_data representing the
/// encountering task.
@@ -211,6 +210,11 @@ class Interface {
/// Thread local state for target region and associated metadata
extern thread_local Interface RegionInterface;
+/// Thread local variable holding the return address.
+/// When using __builtin_return_address to set the return address,
+/// allow 0 as the only argument to avoid unpredictable effects.
+extern thread_local void *ReturnAddress;
+
template <typename FuncTy, typename ArgsTy, size_t... IndexSeq>
void InvokeInterfaceFunction(FuncTy Func, ArgsTy Args,
std::index_sequence<IndexSeq...>) {
@@ -249,10 +253,42 @@ template <typename CallbackPairTy, typename... ArgsTy>
InterfaceRAII(CallbackPairTy Callbacks, ArgsTy... Args)
-> InterfaceRAII<CallbackPairTy, ArgsTy...>;
+/// Used to set and reset the thread-local return address. The RAII is expected
+/// to be created at a runtime entry point when the return address should be
+/// null. If so, the return address is set and \p IsSetter is set in the ctor.
+/// The dtor resets the return address only if the corresponding object set it.
+/// So if the RAII is called from a nested runtime function, the ctor/dtor will
+/// do nothing since the thread local return address is already set.
+class ReturnAddressSetterRAII {
+public:
+ ReturnAddressSetterRAII(void *RA) : IsSetter(false) {
+ // Handle nested calls. If already set, do not set again since it
+ // must be in a nested call.
+ if (ReturnAddress == nullptr) {
+ // Store the return address to a thread local variable.
+ ReturnAddress = RA;
+ IsSetter = true;
+ }
+ }
+ ~ReturnAddressSetterRAII() {
+ // Reset the return address if this object set it.
+ if (IsSetter)
+ ReturnAddress = nullptr;
+ }
+
+private:
+ // Did this object set the thread-local return address?
+ bool IsSetter;
+};
+
} // namespace ompt
} // namespace target
} // namespace omp
} // namespace llvm
+
+// The getter returns the address stored in the thread local variable.
+#define OMPT_GET_RETURN_ADDRESS llvm::omp::target::ompt::ReturnAddress
+
#else
#define OMPT_IF_BUILT(stmt)
#endif
diff --git a/openmp/libomptarget/src/LegacyAPI.cpp b/openmp/libomptarget/src/LegacyAPI.cpp
index d0f21a36513ad..91d5642e81128 100644
--- a/openmp/libomptarget/src/LegacyAPI.cpp
+++ b/openmp/libomptarget/src/LegacyAPI.cpp
@@ -10,15 +10,21 @@
//
//===----------------------------------------------------------------------===//
+#include "OpenMP/OMPT/Interface.h"
#include "omptarget.h"
#include "private.h"
#include "Shared/Profile.h"
+#ifdef OMPT_SUPPORT
+using namespace llvm::omp::target::ompt;
+#endif
+
EXTERN void __tgt_target_data_begin(int64_t DeviceId, int32_t ArgNum,
void **ArgsBase, void **Args,
int64_t *ArgSizes, int64_t *ArgTypes) {
TIMESCOPE();
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
__tgt_target_data_begin_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args,
ArgSizes, ArgTypes, nullptr, nullptr);
}
@@ -30,7 +36,7 @@ EXTERN void __tgt_target_data_begin_nowait(int64_t DeviceId, int32_t ArgNum,
int32_t NoAliasDepNum,
void *NoAliasDepList) {
TIMESCOPE();
-
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
__tgt_target_data_begin_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args,
ArgSizes, ArgTypes, nullptr, nullptr);
}
@@ -39,6 +45,7 @@ EXTERN void __tgt_target_data_end(int64_t DeviceId, int32_t ArgNum,
void **ArgsBase, void **Args,
int64_t *ArgSizes, int64_t *ArgTypes) {
TIMESCOPE();
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
__tgt_target_data_end_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args,
ArgSizes, ArgTypes, nullptr, nullptr);
}
@@ -47,6 +54,7 @@ EXTERN void __tgt_target_data_update(int64_t DeviceId, int32_t ArgNum,
void **ArgsBase, void **Args,
int64_t *ArgSizes, int64_t *ArgTypes) {
TIMESCOPE();
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
__tgt_target_data_update_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args,
ArgSizes, ArgTypes, nullptr, nullptr);
}
@@ -56,7 +64,7 @@ EXTERN void __tgt_target_data_update_nowait(
int64_t *ArgSizes, int64_t *ArgTypes, int32_t DepNum, void *DepList,
int32_t NoAliasDepNum, void *NoAliasDepList) {
TIMESCOPE();
-
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
__tgt_target_data_update_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args,
ArgSizes, ArgTypes, nullptr, nullptr);
}
@@ -68,7 +76,7 @@ EXTERN void __tgt_target_data_end_nowait(int64_t DeviceId, int32_t ArgNum,
int32_t NoAliasDepNum,
void *NoAliasDepList) {
TIMESCOPE();
-
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
__tgt_target_data_end_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args,
ArgSizes, ArgTypes, nullptr, nullptr);
}
@@ -78,6 +86,7 @@ EXTERN int __tgt_target_mapper(ident_t *Loc, int64_t DeviceId, void *HostPtr,
int64_t *ArgSizes, int64_t *ArgTypes,
map_var_info_t *ArgNames, void **ArgMappers) {
TIMESCOPE_WITH_IDENT(Loc);
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
KernelArgsTy KernelArgs{1, ArgNum, ArgsBase, Args, ArgSizes,
ArgTypes, ArgNames, ArgMappers, 0};
return __tgt_target_kernel(Loc, DeviceId, -1, -1, HostPtr, &KernelArgs);
@@ -87,6 +96,7 @@ EXTERN int __tgt_target(int64_t DeviceId, void *HostPtr, int32_t ArgNum,
void **ArgsBase, void **Args, int64_t *ArgSizes,
int64_t *ArgTypes) {
TIMESCOPE();
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
return __tgt_target_mapper(nullptr, DeviceId, HostPtr, ArgNum, ArgsBase, Args,
ArgSizes, ArgTypes, nullptr, nullptr);
}
@@ -96,7 +106,7 @@ EXTERN int __tgt_target_nowait(int64_t DeviceId, void *HostPtr, int32_t ArgNum,
int64_t *ArgTypes, int32_t DepNum, void *DepList,
int32_t NoAliasDepNum, void *NoAliasDepList) {
TIMESCOPE();
-
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
return __tgt_target_mapper(nullptr, DeviceId, HostPtr, ArgNum, ArgsBase, Args,
ArgSizes, ArgTypes, nullptr, nullptr);
}
@@ -107,7 +117,7 @@ EXTERN int __tgt_target_nowait_mapper(
map_var_info_t *ArgNames, void **ArgMappers, int32_t DepNum, void *DepList,
int32_t NoAliasDepNum, void *NoAliasDepList) {
TIMESCOPE_WITH_IDENT(Loc);
-
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
return __tgt_target_mapper(Loc, DeviceId, HostPtr, ArgNum, ArgsBase, Args,
ArgSizes, ArgTypes, ArgNames, ArgMappers);
}
@@ -120,7 +130,7 @@ EXTERN int __tgt_target_teams_mapper(ident_t *Loc, int64_t DeviceId,
void **ArgMappers, int32_t NumTeams,
int32_t ThreadLimit) {
TIMESCOPE_WITH_IDENT(Loc);
-
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
KernelArgsTy KernelArgs{1, ArgNum, ArgsBase, Args, ArgSizes,
ArgTypes, ArgNames, ArgMappers, 0};
return __tgt_target_kernel(Loc, DeviceId, NumTeams, ThreadLimit, HostPtr,
@@ -132,6 +142,7 @@ EXTERN int __tgt_target_teams(int64_t DeviceId, void *HostPtr, int32_t ArgNum,
int64_t *ArgTypes, int32_t NumTeams,
int32_t ThreadLimit) {
TIMESCOPE();
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
return __tgt_target_teams_mapper(nullptr, DeviceId, HostPtr, ArgNum, ArgsBase,
Args, ArgSizes, ArgTypes, nullptr, nullptr,
NumTeams, ThreadLimit);
@@ -145,7 +156,7 @@ EXTERN int __tgt_target_teams_nowait(int64_t DeviceId, void *HostPtr,
void *DepList, int32_t NoAliasDepNum,
void *NoAliasDepList) {
TIMESCOPE();
-
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
return __tgt_target_teams_mapper(nullptr, DeviceId, HostPtr, ArgNum, ArgsBase,
Args, ArgSizes, ArgTypes, nullptr, nullptr,
NumTeams, ThreadLimit);
@@ -158,7 +169,7 @@ EXTERN int __tgt_target_teams_nowait_mapper(
int32_t ThreadLimit, int32_t DepNum, void *DepList, int32_t NoAliasDepNum,
void *NoAliasDepList) {
TIMESCOPE_WITH_IDENT(Loc);
-
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
return __tgt_target_teams_mapper(Loc, DeviceId, HostPtr, ArgNum, ArgsBase,
Args, ArgSizes, ArgTypes, ArgNames,
ArgMappers, NumTeams, ThreadLimit);
@@ -182,6 +193,7 @@ EXTERN int __tgt_target_kernel_nowait(ident_t *Loc, int64_t DeviceId,
int32_t NoAliasDepNum,
void *NoAliasDepList) {
TIMESCOPE_WITH_IDENT(Loc);
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
return __tgt_target_kernel(Loc, DeviceId, NumTeams, ThreadLimit, HostPtr,
KernelArgs);
}
diff --git a/openmp/libomptarget/src/OpenMP/API.cpp b/openmp/libomptarget/src/OpenMP/API.cpp
index 1ab1877774f6e..85fb08c00a9a7 100644
--- a/openmp/libomptarget/src/OpenMP/API.cpp
+++ b/openmp/libomptarget/src/OpenMP/API.cpp
@@ -16,6 +16,7 @@
#include "rtl.h"
#include "OpenMP/InternalTypes.h"
+#include "OpenMP/OMPT/Interface.h"
#include "OpenMP/omp.h"
#include "Shared/Profile.h"
@@ -26,6 +27,10 @@
#include <cstring>
#include <mutex>
+#ifdef OMPT_SUPPORT
+using namespace llvm::omp::target::ompt;
+#endif
+
void *targetAllocExplicit(size_t Size, int DeviceNum, int Kind,
const char *Name);
void targetFreeExplicit(void *DevicePtr, int DeviceNum, int Kind,
@@ -59,6 +64,7 @@ int32_t __kmpc_omp_task_with_deps(ident_t *loc_ref, int32_t gtid,
EXTERN int omp_get_num_devices(void) {
TIMESCOPE();
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
size_t NumDevices = PM->getNumDevices();
DP("Call to omp_get_num_devices returning %zd\n", NumDevices);
@@ -68,6 +74,7 @@ EXTERN int omp_get_num_devices(void) {
EXTERN int omp_get_device_num(void) {
TIMESCOPE();
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
int HostDevice = omp_get_initial_device();
DP("Call to omp_get_device_num returning %d\n", HostDevice);
@@ -77,6 +84,7 @@ EXTERN int omp_get_device_num(void) {
EXTERN int omp_get_initial_device(void) {
TIMESCOPE();
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
int HostDevice = omp_get_num_devices();
DP("Call to omp_get_initial_device returning %d\n", HostDevice);
return HostDevice;
@@ -85,52 +93,70 @@ EXTERN int omp_get_initial_device(void) {
EXTERN void *omp_target_alloc(size_t Size, int DeviceNum) {
TIMESCOPE_WITH_DETAILS("dst_dev=" + std::to_string(DeviceNum) +
";size=" + std::to_string(Size));
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
return targetAllocExplicit(Size, DeviceNum, TARGET_ALLOC_DEFAULT, __func__);
}
EXTERN void *llvm_omp_target_alloc_device(size_t Size, int DeviceNum) {
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
return targetAllocExplicit(Size, DeviceNum, TARGET_ALLOC_DEVICE, __func__);
}
EXTERN void *llvm_omp_target_alloc_host(size_t Size, int DeviceNum) {
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
return targetAllocExplicit(Size, DeviceNum, TARGET_ALLOC_HOST, __func__);
}
EXTERN void *llvm_omp_target_alloc_shared(size_t Size, int DeviceNum) {
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
return targetAllocExplicit(Size, DeviceNum, TARGET_ALLOC_SHARED, __func__);
}
EXTERN void omp_target_free(void *Ptr, int DeviceNum) {
TIMESCOPE();
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
return targetFreeExplicit(Ptr, DeviceNum, TARGET_ALLOC_DEFAULT, __func__);
}
EXTERN void llvm_omp_target_free_device(void *Ptr, int DeviceNum) {
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
return targetFreeExplicit(Ptr, DeviceNum, TARGET_ALLOC_DEVICE, __func__);
}
EXTERN void llvm_omp_target_free_host(void *Ptr, int DeviceNum) {
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
return targetFreeExplicit(Ptr, DeviceNum, TARGET_ALLOC_HOST, __func__);
}
EXTERN void llvm_omp_target_free_shared(void *Ptre, int DeviceNum) {
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
return targetFreeExplicit(Ptre, DeviceNum, TARGET_ALLOC_SHARED, __func__);
}
-EXTERN void *llvm_omp_target_dynamic_shared_alloc() { return nullptr; }
-EXTERN void *llvm_omp_get_dynamic_shared() { return nullptr; }
+EXTERN void *llvm_omp_target_dynamic_shared_alloc() {
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
+ return nullptr;
+}
+
+EXTERN void *llvm_omp_get_dynamic_shared() {
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
+ return nullptr;
+}
EXTERN [[nodiscard]] void *llvm_omp_target_lock_mem(void *Ptr, size_t Size,
int DeviceNum) {
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
return targetLockExplicit(Ptr, Size, DeviceNum, __func__);
}
EXTERN void llvm_omp_target_unlock_mem(void *Ptr, int DeviceNum) {
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
targetUnlockExplicit(Ptr, DeviceNum, __func__);
}
EXTERN int omp_target_is_present(const void *Ptr, int DeviceNum) {
TIMESCOPE();
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
DP("Call to omp_target_is_present for device %d and address " DPxMOD "\n",
DeviceNum, DPxPTR(Ptr));
@@ -167,6 +193,7 @@ EXTERN int omp_target_memcpy(void *Dst, const void *Src, size_t Length,
TIMESCOPE_WITH_DETAILS("dst_dev=" + std::to_string(DstDevice) +
";src_dev=" + std::to_string(SrcDevice) +
";size=" + std::to_string(Length));
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
DP("Call to omp_target_memcpy, dst device %d, src device %d, "
"dst addr " DPxMOD ", src addr " DPxMOD ", dst offset %zu, "
"src offset %zu, length %zu\n",
@@ -248,6 +275,7 @@ EXTERN int omp_target_memcpy(void *Dst, const void *Src, size_t Length,
// The helper function that calls omp_target_memcpy or omp_target_memcpy_rect
static int libomp_target_memcpy_async_task(int32_t Gtid, kmp_task_t *Task) {
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
if (Task == nullptr)
return OFFLOAD_FAIL;
@@ -279,6 +307,7 @@ static int libomp_target_memcpy_async_task(int32_t Gtid, kmp_task_t *Task) {
}
static int libomp_target_memset_async_task(int32_t Gtid, kmp_task_t *Task) {
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
if (!Task)
return OFFLOAD_FAIL;
@@ -307,6 +336,7 @@ template <class T>
static inline int
libomp_helper_task_creation(T *Args, int (*Fn)(int32_t, kmp_task_t *),
int DepObjCount, omp_depend_t *DepObjList) {
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
// Create global thread ID
int Gtid = __kmpc_global_thread_num(nullptr);
@@ -340,6 +370,7 @@ libomp_helper_task_creation(T *Args, int (*Fn)(int32_t, kmp_task_t *),
EXTERN void *omp_target_memset(void *Ptr, int ByteVal, size_t NumBytes,
int DeviceNum) {
TIMESCOPE();
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
DP("Call to omp_target_memset, device %d, device pointer %p, size %zu\n",
DeviceNum, Ptr, NumBytes);
@@ -382,6 +413,7 @@ EXTERN void *omp_target_memset(void *Ptr, int ByteVal, size_t NumBytes,
EXTERN void *omp_target_memset_async(void *Ptr, int ByteVal, size_t NumBytes,
int DeviceNum, int DepObjCount,
omp_depend_t *DepObjList) {
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
DP("Call to omp_target_memset_async, device %d, device pointer %p, size %zu",
DeviceNum, Ptr, NumBytes);
@@ -408,6 +440,7 @@ EXTERN int omp_target_memcpy_async(void *Dst, const void *Src, size_t Length,
TIMESCOPE_WITH_DETAILS("dst_dev=" + std::to_string(DstDevice) +
";src_dev=" + std::to_string(SrcDevice) +
";size=" + std::to_string(Length));
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
DP("Call to omp_target_memcpy_async, dst device %d, src device %d, "
"dst addr " DPxMOD ", src addr " DPxMOD ", dst offset %zu, "
"src offset %zu, length %zu\n",
@@ -436,6 +469,7 @@ omp_target_memcpy_rect(void *Dst, const void *Src, size_t ElementSize,
const size_t *DstOffsets, const size_t *SrcOffsets,
const size_t *DstDimensions, const size_t *SrcDimensions,
int DstDevice, int SrcDevice) {
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
DP("Call to omp_target_memcpy_rect, dst device %d, src device %d, "
"dst addr " DPxMOD ", src addr " DPxMOD ", dst offsets " DPxMOD ", "
"src offsets " DPxMOD ", dst dims " DPxMOD ", src dims " DPxMOD ", "
@@ -498,6 +532,7 @@ EXTERN int omp_target_memcpy_rect_async(
";src_dev=" + std::to_string(SrcDevice) +
";size=" + std::to_string(ElementSize) +
";num_dims=" + std::to_string(NumDims));
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
DP("Call to omp_target_memcpy_rect_async, dst device %d, src device %d, "
"dst addr " DPxMOD ", src addr " DPxMOD ", dst offsets " DPxMOD ", "
"src offsets " DPxMOD ", dst dims " DPxMOD ", src dims " DPxMOD ", "
@@ -534,6 +569,7 @@ EXTERN int omp_target_associate_ptr(const void *HostPtr, const void *DevicePtr,
size_t Size, size_t DeviceOffset,
int DeviceNum) {
TIMESCOPE();
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
DP("Call to omp_target_associate_ptr with host_ptr " DPxMOD ", "
"device_ptr " DPxMOD ", size %zu, device_offset %zu, device_num %d\n",
DPxPTR(HostPtr), DPxPTR(DevicePtr), Size, DeviceOffset, DeviceNum);
@@ -561,6 +597,7 @@ EXTERN int omp_target_associate_ptr(const void *HostPtr, const void *DevicePtr,
EXTERN int omp_target_disassociate_ptr(const void *HostPtr, int DeviceNum) {
TIMESCOPE();
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
DP("Call to omp_target_disassociate_ptr with host_ptr " DPxMOD ", "
"device_num %d\n",
DPxPTR(HostPtr), DeviceNum);
@@ -588,6 +625,7 @@ EXTERN int omp_target_disassociate_ptr(const void *HostPtr, int DeviceNum) {
EXTERN void *omp_get_mapped_ptr(const void *Ptr, int DeviceNum) {
TIMESCOPE();
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
DP("Call to omp_get_mapped_ptr with ptr " DPxMOD ", device_num %d.\n",
DPxPTR(Ptr), DeviceNum);
diff --git a/openmp/libomptarget/src/OpenMP/OMPT/Callback.cpp b/openmp/libomptarget/src/OpenMP/OMPT/Callback.cpp
index 82934f10486c5..66435d2a4fe64 100644
--- a/openmp/libomptarget/src/OpenMP/OMPT/Callback.cpp
+++ b/openmp/libomptarget/src/OpenMP/OMPT/Callback.cpp
@@ -51,6 +51,8 @@ LibomptargetRtlFinalizer *LibraryFinalizer = nullptr;
thread_local Interface llvm::omp::target::ompt::RegionInterface;
+thread_local void *llvm::omp::target::ompt::ReturnAddress = nullptr;
+
bool llvm::omp::target::ompt::Initialized = false;
ompt_get_callback_t llvm::omp::target::ompt::lookupCallbackByCode = nullptr;
diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp
index 67edc559e8ede..5fe3f508b739c 100644
--- a/openmp/libomptarget/src/device.cpp
+++ b/openmp/libomptarget/src/device.cpp
@@ -123,7 +123,7 @@ void *DeviceTy::allocData(int64_t Size, void *HstPtr, int32_t Kind) {
OMPT_IF_BUILT(InterfaceRAII TargetDataAllocRAII(
RegionInterface.getCallbacks<ompt_target_data_alloc>(),
DeviceID, HstPtr, &TargetPtr, Size,
- /*CodePtr=*/OMPT_GET_RETURN_ADDRESS(0));)
+ /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);)
TargetPtr = RTL->data_alloc(RTLDeviceID, Size, HstPtr, Kind);
return TargetPtr;
@@ -134,7 +134,7 @@ int32_t DeviceTy::deleteData(void *TgtAllocBegin, int32_t Kind) {
OMPT_IF_BUILT(InterfaceRAII TargetDataDeleteRAII(
RegionInterface.getCallbacks<ompt_target_data_delete>(),
DeviceID, TgtAllocBegin,
- /*CodePtr=*/OMPT_GET_RETURN_ADDRESS(0));)
+ /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);)
return RTL->data_delete(RTLDeviceID, TgtAllocBegin, Kind);
}
@@ -152,7 +152,7 @@ int32_t DeviceTy::submitData(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size,
InterfaceRAII TargetDataSubmitRAII(
RegionInterface.getCallbacks<ompt_target_data_transfer_to_device>(),
DeviceID, TgtPtrBegin, HstPtrBegin, Size,
- /*CodePtr=*/OMPT_GET_RETURN_ADDRESS(0));)
+ /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);)
if (!AsyncInfo || !RTL->data_submit_async || !RTL->synchronize)
return RTL->data_submit(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size);
@@ -174,7 +174,7 @@ int32_t DeviceTy::retrieveData(void *HstPtrBegin, void *TgtPtrBegin,
InterfaceRAII TargetDataRetrieveRAII(
RegionInterface.getCallbacks<ompt_target_data_transfer_from_device>(),
DeviceID, HstPtrBegin, TgtPtrBegin, Size,
- /*CodePtr=*/OMPT_GET_RETURN_ADDRESS(0));)
+ /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);)
if (!RTL->data_retrieve_async || !RTL->synchronize)
return RTL->data_retrieve(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size);
diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp
index 49495ac266f1b..8725e5eb55fc9 100644
--- a/openmp/libomptarget/src/interface.cpp
+++ b/openmp/libomptarget/src/interface.cpp
@@ -108,7 +108,7 @@ targetData(ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
? RegionInterface.getCallbacks<ompt_target_exit_data>()
: RegionInterface.getCallbacks<ompt_target_update>();
InterfaceRAII TargetDataRAII(CallbackFunctions, DeviceId,
- OMPT_GET_RETURN_ADDRESS(0));)
+ OMPT_GET_RETURN_ADDRESS);)
int Rc = OFFLOAD_SUCCESS;
Rc = TargetDataFunction(Loc, *DeviceOrErr, ArgNum, ArgsBase, Args, ArgSizes,
@@ -130,7 +130,7 @@ EXTERN void __tgt_target_data_begin_mapper(ident_t *Loc, int64_t DeviceId,
int64_t *ArgTypes,
map_var_info_t *ArgNames,
void **ArgMappers) {
-
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
targetData<AsyncInfoTy>(Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes,
ArgTypes, ArgNames, ArgMappers, targetDataBegin,
"Entering OpenMP data region with being_mapper",
@@ -142,7 +142,7 @@ EXTERN void __tgt_target_data_begin_nowait_mapper(
void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames,
void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum,
void *NoAliasDepList) {
-
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
targetData<TaskAsyncInfoWrapperTy>(
Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames,
ArgMappers, targetDataBegin,
@@ -158,7 +158,7 @@ EXTERN void __tgt_target_data_end_mapper(ident_t *Loc, int64_t DeviceId,
int64_t *ArgTypes,
map_var_info_t *ArgNames,
void **ArgMappers) {
-
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
targetData<AsyncInfoTy>(Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes,
ArgTypes, ArgNames, ArgMappers, targetDataEnd,
"Exiting OpenMP data region with end_mapper", "end");
@@ -169,7 +169,7 @@ EXTERN void __tgt_target_data_end_nowait_mapper(
void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames,
void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum,
void *NoAliasDepList) {
-
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
targetData<TaskAsyncInfoWrapperTy>(
Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames,
ArgMappers, targetDataEnd,
@@ -182,7 +182,7 @@ EXTERN void __tgt_target_data_update_mapper(ident_t *Loc, int64_t DeviceId,
int64_t *ArgTypes,
map_var_info_t *ArgNames,
void **ArgMappers) {
-
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
targetData<AsyncInfoTy>(
Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames,
ArgMappers, targetDataUpdate,
@@ -195,6 +195,7 @@ EXTERN void __tgt_target_data_update_nowait_mapper(
void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames,
void **ArgMappers, int32_t DepNum, void *DepList, int32_t NoAliasDepNum,
void *NoAliasDepList) {
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
targetData<TaskAsyncInfoWrapperTy>(
Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames,
ArgMappers, targetDataUpdate,
@@ -293,7 +294,7 @@ static inline int targetKernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams,
/// RAII to establish tool anchors before and after target region
OMPT_IF_BUILT(InterfaceRAII TargetRAII(
RegionInterface.getCallbacks<ompt_target>(), DeviceId,
- /*CodePtr=*/OMPT_GET_RETURN_ADDRESS(0));)
+ /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);)
int Rc = OFFLOAD_SUCCESS;
Rc = target(Loc, *DeviceOrErr, HostPtr, *KernelArgs, AsyncInfo);
@@ -322,6 +323,7 @@ static inline int targetKernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams,
EXTERN int __tgt_target_kernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams,
int32_t ThreadLimit, void *HostPtr,
KernelArgsTy *KernelArgs) {
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
if (KernelArgs->Flags.NoWait)
return targetKernel<TaskAsyncInfoWrapperTy>(
Loc, DeviceId, NumTeams, ThreadLimit, HostPtr, KernelArgs);
@@ -341,6 +343,7 @@ EXTERN int __tgt_activate_record_replay(int64_t DeviceId, uint64_t MemorySize,
void *VAddr, bool IsRecord,
bool SaveOutput,
uint64_t &ReqPtrArgOffset) {
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
auto DeviceOrErr = PM->getDevice(DeviceId);
if (!DeviceOrErr)
FATAL_MESSAGE(DeviceId, "%s", toString(DeviceOrErr.takeError()).c_str());
@@ -376,6 +379,7 @@ EXTERN int __tgt_target_kernel_replay(ident_t *Loc, int64_t DeviceId,
int32_t NumTeams, int32_t ThreadLimit,
uint64_t LoopTripCount) {
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
if (checkDeviceAndCtors(DeviceId, Loc)) {
DP("Not offloading to device %" PRId64 "\n", DeviceId);
return OMP_TGT_FAIL;
@@ -387,7 +391,7 @@ EXTERN int __tgt_target_kernel_replay(ident_t *Loc, int64_t DeviceId,
/// RAII to establish tool anchors before and after target region
OMPT_IF_BUILT(InterfaceRAII TargetRAII(
RegionInterface.getCallbacks<ompt_target>(), DeviceId,
- /*CodePtr=*/OMPT_GET_RETURN_ADDRESS(0));)
+ /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);)
AsyncInfoTy AsyncInfo(*DeviceOrErr);
int Rc = target_replay(Loc, *DeviceOrErr, HostPtr, DeviceMemory,
@@ -442,6 +446,7 @@ EXTERN int __tgt_print_device_info(int64_t DeviceId) {
}
EXTERN void __tgt_target_nowait_query(void **AsyncHandle) {
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
if (!AsyncHandle || !*AsyncHandle) {
FATAL_MESSAGE0(
1, "Receive an invalid async handle from the current OpenMP task. Is "
diff --git a/openmp/libomptarget/test/ompt/callbacks.h b/openmp/libomptarget/test/ompt/callbacks.h
index 083006f756def..1f9b7c177b286 100644
--- a/openmp/libomptarget/test/ompt/callbacks.h
+++ b/openmp/libomptarget/test/ompt/callbacks.h
@@ -1,4 +1,5 @@
#include <assert.h>
+#include <stdio.h>
#include <stdlib.h>
// Tool related code below
@@ -37,8 +38,6 @@ static void on_ompt_callback_target_data_op(
void *src_addr, int src_device_num, void *dest_addr, int dest_device_num,
size_t bytes, const void *codeptr_ra) {
assert(codeptr_ra != 0 && "Unexpected null codeptr");
- // Both src and dest must not be null
- assert((src_addr != 0 || dest_addr != 0) && "Both src and dest addr null");
printf(" Callback DataOp: target_id=%lu host_op_id=%lu optype=%d src=%p "
"src_device_num=%d "
"dest=%p dest_device_num=%d bytes=%lu code=%p\n",
@@ -80,8 +79,6 @@ static void on_ompt_callback_target_data_op_emi(
void *dest_addr, int dest_device_num, size_t bytes,
const void *codeptr_ra) {
assert(codeptr_ra != 0 && "Unexpected null codeptr");
- // Both src and dest must not be null
- assert((src_addr != 0 || dest_addr != 0) && "Both src and dest addr null");
if (endpoint == ompt_scope_begin)
*host_op_id = next_op_id++;
printf(" Callback DataOp EMI: endpoint=%d optype=%d target_task_data=%p "
diff --git a/openmp/libomptarget/test/ompt/target_memcpy.c b/openmp/libomptarget/test/ompt/target_memcpy.c
new file mode 100644
index 0000000000000..444f4b7bdbda3
--- /dev/null
+++ b/openmp/libomptarget/test/ompt/target_memcpy.c
@@ -0,0 +1,61 @@
+// RUN: %libomptarget-compile-run-and-check-generic
+// REQUIRES: ompt
+// UNSUPPORTED: aarch64-unknown-linux-gnu
+// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
+// UNSUPPORTED: x86_64-pc-linux-gnu
+// UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+
+/*
+ * Verify that for the target OpenMP APIs, the return address is non-null and
+ * distinct.
+ */
+
+#include <omp.h>
+#include <stdlib.h>
+
+#include "callbacks.h"
+#include "register_non_emi.h"
+
+int main() {
+ int dev = omp_get_default_device();
+ int host = omp_get_initial_device();
+
+ int host_var1 = 42;
+ int host_var2 = 0;
+ void *dev_ptr = NULL;
+
+ // Allocate space on the device
+ dev_ptr = omp_target_alloc(sizeof(int), dev);
+ if (dev_ptr == NULL)
+ abort();
+
+ // H2D transfer
+ if (omp_target_memcpy(dev_ptr, &host_var1, sizeof(int), 0, 0, dev, host))
+ abort();
+
+ // D2H transfer
+ if (omp_target_memcpy(&host_var2, dev_ptr, sizeof(int), 0, 0, host, dev))
+ abort();
+
+ // Free the device location
+ omp_target_free(dev_ptr, dev);
+
+ // Both host variables should have the same value.
+ return host_var1 != host_var2;
+}
+
+// clang-format off
+/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=1
+/// CHECK-NOT: code=(nil)
+/// CHECK: code=[[CODE1:.*]]
+/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=2
+/// CHECK-NOT: code=(nil)
+/// CHECK-NOT: code=[[CODE1]]
+/// CHECK: code=[[CODE2:.*]]
+/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=3
+/// CHECK-NOT: code=(nil)
+/// CHECK-NOT: code=[[CODE2]]
+/// CHECK: code=[[CODE3:.*]]
+/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=4
+/// CHECK-NOT: code=(nil)
+/// CHECK-NOT: code=[[CODE3]]
diff --git a/openmp/libomptarget/test/ompt/veccopy.c b/openmp/libomptarget/test/ompt/veccopy.c
index 79cd918a60c57..80e71fd8a48cb 100644
--- a/openmp/libomptarget/test/ompt/veccopy.c
+++ b/openmp/libomptarget/test/ompt/veccopy.c
@@ -54,29 +54,51 @@ int main() {
return rc;
}
+// clang-format off
/// CHECK: Callback Init:
/// CHECK: Callback Load:
-/// CHECK: Callback Target: target_id=[[TARGET_ID:[0-9]+]] kind=1 endpoint=1
+/// CHECK: Callback Target: target_id=[[TARGET_ID:[0-9]+]] kind=1 endpoint=1 device_num=[[DEVICE_NUM:[0-9]+]]
+/// CHECK-NOT: code=(nil)
+/// CHECK: code=[[CODE1:.*]]
/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=1
+/// CHECK: code=[[CODE1]]
/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=2
+/// CHECK: code=[[CODE1]]
/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=1
+/// CHECK: code=[[CODE1]]
/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=2
+/// CHECK: code=[[CODE1]]
/// CHECK: Callback Submit: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] req_num_teams=1
/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=3
+/// CHECK: code=[[CODE1]]
/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=3
+/// CHECK: code=[[CODE1]]
/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=4
+/// CHECK: code=[[CODE1]]
/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=4
-/// CHECK: Callback Target: target_id=[[TARGET_ID:[0-9]+]] kind=1 endpoint=2
+/// CHECK: code=[[CODE1]]
+/// CHECK: Callback Target: target_id=[[TARGET_ID:[0-9]+]] kind=1 endpoint=2 device_num=[[DEVICE_NUM]] code=[[CODE1]]
/// CHECK: Callback Target: target_id=[[TARGET_ID:[0-9]+]] kind=1 endpoint=1
+/// device_num=[[DEVICE_NUM]]
+/// CHECK-NOT: code=(nil)
+/// CHECK: code=[[CODE2:.*]]
/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=1
+/// CHECK: code=[[CODE2]]
/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=2
+/// CHECK: code=[[CODE2]]
/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=1
+/// CHECK: code=[[CODE2]]
/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=2
+/// CHECK: code=[[CODE2]]
/// CHECK: Callback Submit: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] req_num_teams=0
/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=3
+/// CHECK: code=[[CODE2]]
/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=3
+/// CHECK: code=[[CODE2]]
/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=4
+/// CHECK: code=[[CODE2]]
/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=4
-/// CHECK: Callback Target: target_id=[[TARGET_ID:[0-9]+]] kind=1 endpoint=2
+/// CHECK: code=[[CODE2]]
+/// CHECK: Callback Target: target_id=[[TARGET_ID:[0-9]+]] kind=1 endpoint=2 device_num=[[DEVICE_NUM]] code=[[CODE2]]
/// CHECK: Callback Fini:
diff --git a/openmp/libomptarget/test/ompt/veccopy_data.c b/openmp/libomptarget/test/ompt/veccopy_data.c
index 540a7d6423345..cef1de316a7a1 100644
--- a/openmp/libomptarget/test/ompt/veccopy_data.c
+++ b/openmp/libomptarget/test/ompt/veccopy_data.c
@@ -82,46 +82,79 @@ int main() {
/// CHECK: Callback Load:
/// CHECK: Callback Target EMI: kind=2 endpoint=1
/// CHECK-NOT: device_num=-1
+/// CHECK-NOT: code=(nil)
+/// CHECK: code=[[CODE1:.*]]
/// CHECK: Callback DataOp EMI: endpoint=1 optype=1
+/// CHECK: code=[[CODE1]]
/// CHECK: Callback DataOp EMI: endpoint=2 optype=1
/// CHECK-NOT: dest=(nil)
+/// CHECK: code=[[CODE1]]
/// CHECK: Callback DataOp EMI: endpoint=1 optype=2
+/// CHECK: code=[[CODE1]]
/// CHECK: Callback DataOp EMI: endpoint=2 optype=2
+/// CHECK: code=[[CODE1]]
/// CHECK: Callback Target EMI: kind=2 endpoint=2
/// CHECK-NOT: device_num=-1
+/// CHECK: code=[[CODE1]]
/// CHECK: Callback Target EMI: kind=1 endpoint=1
/// CHECK-NOT: device_num=-1
+/// CHECK-NOT: code=(nil)
+/// CHECK: code=[[CODE2:.*]]
/// CHECK: Callback DataOp EMI: endpoint=1 optype=1
+/// CHECK: code=[[CODE2]]
/// CHECK: Callback DataOp EMI: endpoint=2 optype=1
/// CHECK-NOT: dest=(nil)
+/// CHECK: code=[[CODE2]]
/// CHECK: Callback DataOp EMI: endpoint=1 optype=2
+/// CHECK: code=[[CODE2]]
/// CHECK: Callback DataOp EMI: endpoint=2 optype=2
+/// CHECK: code=[[CODE2]]
/// CHECK: Callback Submit EMI: endpoint=1 req_num_teams=1
/// CHECK: Callback Submit EMI: endpoint=2 req_num_teams=1
/// CHECK: Callback DataOp EMI: endpoint=1 optype=3
+/// CHECK: code=[[CODE2]]
/// CHECK: Callback DataOp EMI: endpoint=2 optype=3
+/// CHECK: code=[[CODE2]]
/// CHECK: Callback DataOp EMI: endpoint=1 optype=4
+/// CHECK: code=[[CODE2]]
/// CHECK: Callback DataOp EMI: endpoint=2 optype=4
+/// CHECK: code=[[CODE2]]
/// CHECK: Callback Target EMI: kind=1 endpoint=2
/// CHECK-NOT: device_num=-1
+/// CHECK: code=[[CODE2]]
/// CHECK: Callback Target EMI: kind=3 endpoint=1
/// CHECK-NOT: device_num=-1
+/// CHECK-NOT: code=(nil)
+/// CHECK: code=[[CODE3:.*]]
/// CHECK: Callback DataOp EMI: endpoint=1 optype=3
+/// CHECK: code=[[CODE3]]
/// CHECK: Callback DataOp EMI: endpoint=2 optype=3
+/// CHECK: code=[[CODE3]]
/// CHECK: Callback DataOp EMI: endpoint=1 optype=4
+/// CHECK: code=[[CODE3]]
/// CHECK: Callback DataOp EMI: endpoint=2 optype=4
+/// CHECK: code=[[CODE3]]
/// CHECK: Callback Target EMI: kind=3 endpoint=2
/// CHECK-NOT: device_num=-1
+/// CHECK: code=[[CODE3]]
/// CHECK: Callback Target EMI: kind=1 endpoint=1
/// CHECK-NOT: device_num=-1
+/// CHECK-NOT: code=(nil)
+/// CHECK: code=[[CODE4:.*]]
/// CHECK: Callback Submit EMI: endpoint=1 req_num_teams=1
/// CHECK: Callback Submit EMI: endpoint=2 req_num_teams=1
/// CHECK: Callback Target EMI: kind=1 endpoint=2
/// CHECK-NOT: device_num=-1
+/// CHECK: code=[[CODE4]]
/// CHECK: Callback Target EMI: kind=4 endpoint=1
/// CHECK-NOT: device_num=-1
+/// CHECK-NOT: code=(nil)
+/// CHECK: code=[[CODE5:.*]]
/// CHECK: Callback DataOp EMI: endpoint=1 optype=3
+/// CHECK: code=[[CODE5]]
/// CHECK: Callback DataOp EMI: endpoint=2 optype=3
+/// CHECK: code=[[CODE5]]
/// CHECK: Callback Target EMI: kind=4 endpoint=2
/// CHECK-NOT: device_num=-1
+/// CHECK: code=[[CODE5]]
/// CHECK: Callback Fini:
diff --git a/openmp/libomptarget/test/ompt/veccopy_emi.c b/openmp/libomptarget/test/ompt/veccopy_emi.c
index 37600a3482ba9..b597d7be6aff6 100644
--- a/openmp/libomptarget/test/ompt/veccopy_emi.c
+++ b/openmp/libomptarget/test/ompt/veccopy_emi.c
@@ -58,47 +58,86 @@ int main() {
/// CHECK: Callback Init:
/// CHECK: Callback Load:
/// CHECK: Callback Target EMI: kind=1 endpoint=1
+/// CHECK-NOT: code=(nil)
+/// CHECK: code=[[CODE1:.*]]
/// CHECK: Callback DataOp EMI: endpoint=1 optype=1
+/// CHECK: code=[[CODE1]]
/// CHECK: Callback DataOp EMI: endpoint=2 optype=1
/// CHECK-NOT: dest=(nil)
+/// CHECK: code=[[CODE1]]
/// CHECK: Callback DataOp EMI: endpoint=1 optype=2
+/// CHECK: code=[[CODE1]]
/// CHECK: Callback DataOp EMI: endpoint=2 optype=2
+/// CHECK: code=[[CODE1]]
/// CHECK: Callback DataOp EMI: endpoint=1 optype=1
+/// CHECK: code=[[CODE1]]
/// CHECK: Callback DataOp EMI: endpoint=2 optype=1
/// CHECK-NOT: dest=(nil)
+/// CHECK: code=[[CODE1]]
/// CHECK: Callback DataOp EMI: endpoint=1 optype=2
+/// CHECK: code=[[CODE1]]
/// CHECK: Callback DataOp EMI: endpoint=2 optype=2
+/// CHECK: code=[[CODE1]]
/// CHECK: Callback Submit EMI: endpoint=1 req_num_teams=1
/// CHECK: Callback Submit EMI: endpoint=2 req_num_teams=1
/// CHECK: Callback DataOp EMI: endpoint=1 optype=3
+/// CHECK: code=[[CODE1]]
/// CHECK: Callback DataOp EMI: endpoint=2 optype=3
+/// CHECK: code=[[CODE1]]
/// CHECK: Callback DataOp EMI: endpoint=1 optype=3
+/// CHECK: code=[[CODE1]]
/// CHECK: Callback DataOp EMI: endpoint=2 optype=3
+/// CHECK: code=[[CODE1]]
/// CHECK: Callback DataOp EMI: endpoint=1 optype=4
+/// CHECK: code=[[CODE1]]
/// CHECK: Callback DataOp EMI: endpoint=2 optype=4
+/// CHECK: code=[[CODE1]]
/// CHECK: Callback DataOp EMI: endpoint=1 optype=4
+/// CHECK: code=[[CODE1]]
/// CHECK: Callback DataOp EMI: endpoint=2 optype=4
+/// CHECK: code=[[CODE1]]
/// CHECK: Callback Target EMI: kind=1 endpoint=2
+/// CHECK: code=[[CODE1]]
+
/// CHECK: Callback Target EMI: kind=1 endpoint=1
+/// CHECK-NOT: code=(nil)
+/// CHECK: code=[[CODE2:.*]]
/// CHECK: Callback DataOp EMI: endpoint=1 optype=1
+/// CHECK: code=[[CODE2]]
/// CHECK: Callback DataOp EMI: endpoint=2 optype=1
/// CHECK-NOT: dest=(nil)
+/// CHECK: code=[[CODE2]]
/// CHECK: Callback DataOp EMI: endpoint=1 optype=2
+/// CHECK: code=[[CODE2]]
/// CHECK: Callback DataOp EMI: endpoint=2 optype=2
+/// CHECK: code=[[CODE2]]
/// CHECK: Callback DataOp EMI: endpoint=1 optype=1
+/// CHECK: code=[[CODE2]]
/// CHECK: Callback DataOp EMI: endpoint=2 optype=1
/// CHECK-NOT: dest=(nil)
+/// CHECK: code=[[CODE2]]
/// CHECK: Callback DataOp EMI: endpoint=1 optype=2
+/// CHECK: code=[[CODE2]]
/// CHECK: Callback DataOp EMI: endpoint=2 optype=2
+/// CHECK: code=[[CODE2]]
/// CHECK: Callback Submit EMI: endpoint=1 req_num_teams=0
/// CHECK: Callback Submit EMI: endpoint=2 req_num_teams=0
/// CHECK: Callback DataOp EMI: endpoint=1 optype=3
+/// CHECK: code=[[CODE2]]
/// CHECK: Callback DataOp EMI: endpoint=2 optype=3
+/// CHECK: code=[[CODE2]]
/// CHECK: Callback DataOp EMI: endpoint=1 optype=3
+/// CHECK: code=[[CODE2]]
/// CHECK: Callback DataOp EMI: endpoint=2 optype=3
+/// CHECK: code=[[CODE2]]
/// CHECK: Callback DataOp EMI: endpoint=1 optype=4
+/// CHECK: code=[[CODE2]]
/// CHECK: Callback DataOp EMI: endpoint=2 optype=4
+/// CHECK: code=[[CODE2]]
/// CHECK: Callback DataOp EMI: endpoint=1 optype=4
+/// CHECK: code=[[CODE2]]
/// CHECK: Callback DataOp EMI: endpoint=2 optype=4
+/// CHECK: code=[[CODE2]]
/// CHECK: Callback Target EMI: kind=1 endpoint=2
+/// CHECK: code=[[CODE2]]
/// CHECK: Callback Fini:
More information about the Openmp-commits
mailing list