[Openmp-commits] [openmp] e521752 - [OpenMP][OMPT] Add OMPT callback for device data exchange 'Device-to-Device' (#81991)
via Openmp-commits
openmp-commits at lists.llvm.org
Mon Feb 26 02:16:30 PST 2024
Author: Michael Halkenhäuser
Date: 2024-02-26T11:16:25+01:00
New Revision: e521752c04a479e3751003645a728667f3199d24
URL: https://github.com/llvm/llvm-project/commit/e521752c04a479e3751003645a728667f3199d24
DIFF: https://github.com/llvm/llvm-project/commit/e521752c04a479e3751003645a728667f3199d24.diff
LOG: [OpenMP][OMPT] Add OMPT callback for device data exchange 'Device-to-Device' (#81991)
Since there's no `ompt_target_data_transfer_tofrom_device` (within
ompt_target_data_op_t enum) or something other that conveys the meaning
of inter-device data exchange we decided to indicate a Device-to-Device
transfer by using: optype == ompt_target_data_transfer_from_device (=3)
Hence, a device transfer may be identified e.g. by checking for: (optype
== 3) &&
(src_device_num < omp_get_num_devices()) &&
(dest_device_num < omp_get_num_devices())
Fixes: #66478
Added:
openmp/libomptarget/test/ompt/target_memcpy_emi.c
Modified:
openmp/libomptarget/include/OpenMP/OMPT/Interface.h
openmp/libomptarget/src/OpenMP/OMPT/Callback.cpp
openmp/libomptarget/src/device.cpp
openmp/libomptarget/test/ompt/callbacks.h
openmp/libomptarget/test/ompt/target_memcpy.c
Removed:
################################################################################
diff --git a/openmp/libomptarget/include/OpenMP/OMPT/Interface.h b/openmp/libomptarget/include/OpenMP/OMPT/Interface.h
index 13eca730a9295d..327fadfcd4acd3 100644
--- a/openmp/libomptarget/include/OpenMP/OMPT/Interface.h
+++ b/openmp/libomptarget/include/OpenMP/OMPT/Interface.h
@@ -54,12 +54,14 @@ class Interface {
void **TgtPtrBegin, size_t Size, void *Code);
/// Top-level function for invoking callback before data submit
- void beginTargetDataSubmit(int64_t DeviceId, void *HstPtrBegin,
- void *TgtPtrBegin, size_t Size, void *Code);
+ void beginTargetDataSubmit(int64_t SrcDeviceId, void *SrcPtrBegin,
+ int64_t DstDeviceId, void *DstPtrBegin,
+ size_t Size, void *Code);
/// Top-level function for invoking callback after data submit
- void endTargetDataSubmit(int64_t DeviceId, void *HstPtrBegin,
- void *TgtPtrBegin, size_t Size, void *Code);
+ void endTargetDataSubmit(int64_t SrcDeviceId, void *SrcPtrBegin,
+ int64_t DstDeviceId, void *DstPtrBegin, size_t Size,
+ void *Code);
/// Top-level function for invoking callback before device data deallocation
void beginTargetDataDelete(int64_t DeviceId, void *TgtPtrBegin, void *Code);
@@ -68,12 +70,14 @@ class Interface {
void endTargetDataDelete(int64_t DeviceId, void *TgtPtrBegin, void *Code);
/// Top-level function for invoking callback before data retrieve
- void beginTargetDataRetrieve(int64_t DeviceId, void *HstPtrBegin,
- void *TgtPtrBegin, size_t Size, void *Code);
+ void beginTargetDataRetrieve(int64_t SrcDeviceId, void *SrcPtrBegin,
+ int64_t DstDeviceId, void *DstPtrBegin,
+ size_t Size, void *Code);
/// Top-level function for invoking callback after data retrieve
- void endTargetDataRetrieve(int64_t DeviceId, void *HstPtrBegin,
- void *TgtPtrBegin, size_t Size, void *Code);
+ void endTargetDataRetrieve(int64_t SrcDeviceId, void *SrcPtrBegin,
+ int64_t DstDeviceId, void *DstPtrBegin,
+ size_t Size, void *Code);
/// Top-level function for invoking callback before kernel dispatch
void beginTargetSubmit(unsigned int NumTeams = 1);
diff --git a/openmp/libomptarget/src/OpenMP/OMPT/Callback.cpp b/openmp/libomptarget/src/OpenMP/OMPT/Callback.cpp
index 66435d2a4fe64f..f285843e39f387 100644
--- a/openmp/libomptarget/src/OpenMP/OMPT/Callback.cpp
+++ b/openmp/libomptarget/src/OpenMP/OMPT/Callback.cpp
@@ -119,41 +119,38 @@ void Interface::endTargetDataAlloc(int64_t DeviceId, void *HstPtrBegin,
endTargetDataOperation();
}
-void Interface::beginTargetDataSubmit(int64_t DeviceId, void *TgtPtrBegin,
- void *HstPtrBegin, size_t Size,
- void *Code) {
+void Interface::beginTargetDataSubmit(int64_t SrcDeviceId, void *SrcPtrBegin,
+ int64_t DstDeviceId, void *DstPtrBegin,
+ size_t Size, void *Code) {
beginTargetDataOperation();
if (ompt_callback_target_data_op_emi_fn) {
// HostOpId will be set by the tool. Invoke the tool supplied data op EMI
// callback
ompt_callback_target_data_op_emi_fn(
ompt_scope_begin, TargetTaskData, &TargetData, &HostOpId,
- ompt_target_data_transfer_to_device, HstPtrBegin,
- /*SrcDeviceNum=*/omp_get_initial_device(), TgtPtrBegin, DeviceId, Size,
- Code);
+ ompt_target_data_transfer_to_device, SrcPtrBegin, SrcDeviceId,
+ DstPtrBegin, DstDeviceId, Size, Code);
} else if (ompt_callback_target_data_op_fn) {
// HostOpId is set by the runtime
HostOpId = createOpId();
// Invoke the tool supplied data op callback
ompt_callback_target_data_op_fn(
TargetData.value, HostOpId, ompt_target_data_transfer_to_device,
- HstPtrBegin, /*SrcDeviceNum=*/omp_get_initial_device(), TgtPtrBegin,
- DeviceId, Size, Code);
+ SrcPtrBegin, SrcDeviceId, DstPtrBegin, DstDeviceId, Size, Code);
}
}
-void Interface::endTargetDataSubmit(int64_t DeviceId, void *TgtPtrBegin,
- void *HstPtrBegin, size_t Size,
- void *Code) {
+void Interface::endTargetDataSubmit(int64_t SrcDeviceId, void *SrcPtrBegin,
+ int64_t DstDeviceId, void *DstPtrBegin,
+ size_t Size, void *Code) {
// Only EMI callback handles end scope
if (ompt_callback_target_data_op_emi_fn) {
// HostOpId will be set by the tool. Invoke the tool supplied data op EMI
// callback
ompt_callback_target_data_op_emi_fn(
ompt_scope_end, TargetTaskData, &TargetData, &HostOpId,
- ompt_target_data_transfer_to_device, HstPtrBegin,
- /*SrcDeviceNum=*/omp_get_initial_device(), TgtPtrBegin, DeviceId, Size,
- Code);
+ ompt_target_data_transfer_to_device, SrcPtrBegin, SrcDeviceId,
+ DstPtrBegin, DstDeviceId, Size, Code);
}
endTargetDataOperation();
}
@@ -193,41 +190,38 @@ void Interface::endTargetDataDelete(int64_t DeviceId, void *TgtPtrBegin,
endTargetDataOperation();
}
-void Interface::beginTargetDataRetrieve(int64_t DeviceId, void *HstPtrBegin,
- void *TgtPtrBegin, size_t Size,
- void *Code) {
+void Interface::beginTargetDataRetrieve(int64_t SrcDeviceId, void *SrcPtrBegin,
+ int64_t DstDeviceId, void *DstPtrBegin,
+ size_t Size, void *Code) {
beginTargetDataOperation();
if (ompt_callback_target_data_op_emi_fn) {
// HostOpId will be set by the tool. Invoke the tool supplied data op EMI
// callback
ompt_callback_target_data_op_emi_fn(
ompt_scope_begin, TargetTaskData, &TargetData, &HostOpId,
- ompt_target_data_transfer_from_device, TgtPtrBegin, DeviceId,
- HstPtrBegin,
- /*TgtDeviceNum=*/omp_get_initial_device(), Size, Code);
+ ompt_target_data_transfer_from_device, SrcPtrBegin, SrcDeviceId,
+ DstPtrBegin, DstDeviceId, Size, Code);
} else if (ompt_callback_target_data_op_fn) {
// HostOpId is set by the runtime
HostOpId = createOpId();
// Invoke the tool supplied data op callback
ompt_callback_target_data_op_fn(
TargetData.value, HostOpId, ompt_target_data_transfer_from_device,
- TgtPtrBegin, DeviceId, HstPtrBegin,
- /*TgtDeviceNum=*/omp_get_initial_device(), Size, Code);
+ SrcPtrBegin, SrcDeviceId, DstPtrBegin, DstDeviceId, Size, Code);
}
}
-void Interface::endTargetDataRetrieve(int64_t DeviceId, void *HstPtrBegin,
- void *TgtPtrBegin, size_t Size,
- void *Code) {
+void Interface::endTargetDataRetrieve(int64_t SrcDeviceId, void *SrcPtrBegin,
+ int64_t DstDeviceId, void *DstPtrBegin,
+ size_t Size, void *Code) {
// Only EMI callback handles end scope
if (ompt_callback_target_data_op_emi_fn) {
// HostOpId will be set by the tool. Invoke the tool supplied data op EMI
// callback
ompt_callback_target_data_op_emi_fn(
ompt_scope_end, TargetTaskData, &TargetData, &HostOpId,
- ompt_target_data_transfer_from_device, TgtPtrBegin, DeviceId,
- HstPtrBegin,
- /*TgtDeviceNum=*/omp_get_initial_device(), Size, Code);
+ ompt_target_data_transfer_from_device, SrcPtrBegin, SrcDeviceId,
+ DstPtrBegin, DstDeviceId, Size, Code);
}
endTargetDataOperation();
}
diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp
index 5fe3f508b739cb..3345277d91d3a9 100644
--- a/openmp/libomptarget/src/device.cpp
+++ b/openmp/libomptarget/src/device.cpp
@@ -151,7 +151,7 @@ int32_t DeviceTy::submitData(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size,
OMPT_IF_BUILT(
InterfaceRAII TargetDataSubmitRAII(
RegionInterface.getCallbacks<ompt_target_data_transfer_to_device>(),
- DeviceID, TgtPtrBegin, HstPtrBegin, Size,
+ omp_get_initial_device(), HstPtrBegin, DeviceID, TgtPtrBegin, Size,
/*CodePtr=*/OMPT_GET_RETURN_ADDRESS);)
if (!AsyncInfo || !RTL->data_submit_async || !RTL->synchronize)
@@ -173,7 +173,7 @@ int32_t DeviceTy::retrieveData(void *HstPtrBegin, void *TgtPtrBegin,
OMPT_IF_BUILT(
InterfaceRAII TargetDataRetrieveRAII(
RegionInterface.getCallbacks<ompt_target_data_transfer_from_device>(),
- DeviceID, HstPtrBegin, TgtPtrBegin, Size,
+ DeviceID, TgtPtrBegin, omp_get_initial_device(), HstPtrBegin, Size,
/*CodePtr=*/OMPT_GET_RETURN_ADDRESS);)
if (!RTL->data_retrieve_async || !RTL->synchronize)
@@ -185,6 +185,17 @@ int32_t DeviceTy::retrieveData(void *HstPtrBegin, void *TgtPtrBegin,
// Copy data from current device to destination device directly
int32_t DeviceTy::dataExchange(void *SrcPtr, DeviceTy &DstDev, void *DstPtr,
int64_t Size, AsyncInfoTy &AsyncInfo) {
+ /// RAII to establish tool anchors before and after data exchange
+ /// Note: Despite the fact that this is a data exchange, we use 'from_device'
+ /// operation enum (w.r.t. ompt_target_data_op_t) as there is currently
+ /// no better alternative. It is still possible to distinguish this
+ /// scenario from a real data retrieve by checking if both involved
+ /// device numbers are less than omp_get_num_devices().
+ OMPT_IF_BUILT(
+ InterfaceRAII TargetDataExchangeRAII(
+ RegionInterface.getCallbacks<ompt_target_data_transfer_from_device>(),
+ RTLDeviceID, SrcPtr, DstDev.RTLDeviceID, DstPtr, Size,
+ /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);)
if (!AsyncInfo || !RTL->data_exchange_async || !RTL->synchronize) {
assert(RTL->data_exchange && "RTL->data_exchange is nullptr");
return RTL->data_exchange(RTLDeviceID, SrcPtr, DstDev.RTLDeviceID, DstPtr,
diff --git a/openmp/libomptarget/test/ompt/callbacks.h b/openmp/libomptarget/test/ompt/callbacks.h
index 1f9b7c177b2860..95437d9cdcfb1f 100644
--- a/openmp/libomptarget/test/ompt/callbacks.h
+++ b/openmp/libomptarget/test/ompt/callbacks.h
@@ -81,11 +81,14 @@ static void on_ompt_callback_target_data_op_emi(
assert(codeptr_ra != 0 && "Unexpected null codeptr");
if (endpoint == ompt_scope_begin)
*host_op_id = next_op_id++;
+ // target_task_data may be null, avoid dereferencing it
+ uint64_t target_task_data_value =
+ (target_task_data) ? target_task_data->value : 0;
printf(" Callback DataOp EMI: endpoint=%d optype=%d target_task_data=%p "
"(0x%lx) target_data=%p (0x%lx) host_op_id=%p (0x%lx) src=%p "
"src_device_num=%d "
"dest=%p dest_device_num=%d bytes=%lu code=%p\n",
- endpoint, optype, target_task_data, target_task_data->value,
+ endpoint, optype, target_task_data, target_task_data_value,
target_data, target_data->value, host_op_id, *host_op_id, src_addr,
src_device_num, dest_addr, dest_device_num, bytes, codeptr_ra);
}
diff --git a/openmp/libomptarget/test/ompt/target_memcpy.c b/openmp/libomptarget/test/ompt/target_memcpy.c
index 444f4b7bdbda3d..80a8d6a4b32e59 100644
--- a/openmp/libomptarget/test/ompt/target_memcpy.c
+++ b/openmp/libomptarget/test/ompt/target_memcpy.c
@@ -33,6 +33,10 @@ int main() {
if (omp_target_memcpy(dev_ptr, &host_var1, sizeof(int), 0, 0, dev, host))
abort();
+ // D2D transfer
+ if (omp_target_memcpy(dev_ptr, dev_ptr, sizeof(int), 0, 0, dev, dev))
+ abort();
+
// D2H transfer
if (omp_target_memcpy(&host_var2, dev_ptr, sizeof(int), 0, 0, host, dev))
abort();
@@ -46,16 +50,25 @@ int main() {
// clang-format off
/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=1
+/// CHECK-SAME: src_device_num=[[HOST:[0-9]+]]
+/// CHECK-SAME: dest_device_num=[[DEVICE:[0-9]+]]
/// CHECK-NOT: code=(nil)
-/// CHECK: code=[[CODE1:.*]]
+/// CHECK: code=[[CODE1:0x[0-f]+]]
/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=2
+/// CHECK-SAME: src_device_num=[[HOST]] {{.+}} dest_device_num=[[DEVICE]]
/// CHECK-NOT: code=(nil)
/// CHECK-NOT: code=[[CODE1]]
-/// CHECK: code=[[CODE2:.*]]
+/// CHECK: code=[[CODE2:0x[0-f]+]]
/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=3
+/// CHECK-SAME: src_device_num=[[DEVICE]] {{.+}} dest_device_num=[[DEVICE]]
/// CHECK-NOT: code=(nil)
/// CHECK-NOT: code=[[CODE2]]
-/// CHECK: code=[[CODE3:.*]]
-/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=4
+/// CHECK: code=[[CODE3:0x[0-f]+]]
+/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=3
+/// CHECK-SAME: src_device_num=[[DEVICE]] {{.+}} dest_device_num=[[HOST]]
/// CHECK-NOT: code=(nil)
/// CHECK-NOT: code=[[CODE3]]
+/// CHECK: code=[[CODE4:0x[0-f]+]]
+/// CHECK: Callback DataOp: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] optype=4
+/// CHECK-NOT: code=(nil)
+/// CHECK-NOT: code=[[CODE4]]
diff --git a/openmp/libomptarget/test/ompt/target_memcpy_emi.c b/openmp/libomptarget/test/ompt/target_memcpy_emi.c
new file mode 100644
index 00000000000000..5347f38b87b6ff
--- /dev/null
+++ b/openmp/libomptarget/test/ompt/target_memcpy_emi.c
@@ -0,0 +1,85 @@
+// RUN: %libomptarget-compile-run-and-check-generic
+// REQUIRES: ompt
+// UNSUPPORTED: aarch64-unknown-linux-gnu
+// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
+// UNSUPPORTED: x86_64-pc-linux-gnu
+// UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+
+/*
+ * Verify all three data transfer directions: H2D, D2D and D2H
+ */
+
+#include <omp.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "callbacks.h"
+#include "register_emi.h"
+
+int main(void) {
+ int NumDevices = omp_get_num_devices();
+ assert(NumDevices > 0 && "No device(s) present.");
+ int Device = omp_get_default_device();
+ int Host = omp_get_initial_device();
+ // Note: Zero value depicts an OFFLOAD_SUCCESS
+ int Status;
+
+ printf("Allocating Memory on Device\n");
+ int *DevPtr = (int *)omp_target_alloc(sizeof(int), Device);
+ assert(DevPtr && "Could not allocate memory on device.");
+ int *HstPtr = (int *)malloc(sizeof(int));
+ *HstPtr = 42;
+
+ printf("Testing: Host to Device\n");
+ Status = omp_target_memcpy(DevPtr, HstPtr, sizeof(int), 0, 0, Device, Host);
+ assert(Status == 0 && "H2D memory copy operation failed.\n");
+
+ printf("Testing: Device to Device\n");
+ Status = omp_target_memcpy(DevPtr, DevPtr, sizeof(int), 0, 0, Device, Device);
+ assert(Status == 0 && "D2D memory copy operation failed.\n");
+
+ printf("Testing: Device to Host\n");
+ Status = omp_target_memcpy(HstPtr, DevPtr, sizeof(int), 0, 0, Host, Device);
+ assert(Status == 0 && "D2H memory copy operation failed.\n");
+
+ printf("Checking Correctness\n");
+ assert(*HstPtr == 42);
+
+ printf("Freeing Memory on Device\n");
+ free(HstPtr);
+ omp_target_free(DevPtr, Device);
+
+ return 0;
+}
+
+// clang-format off
+
+/// CHECK: Callback Init:
+
+/// CHECK: Allocating Memory on Device
+/// CHECK: Callback DataOp EMI: endpoint=1 optype=1
+/// CHECK-SAME: src_device_num=[[HOST:[0-9]+]]
+/// CHECK-SAME: dest_device_num=[[DEVICE:[0-9]+]]
+/// CHECK: Callback DataOp EMI: endpoint=2 optype=1 {{.+}} src_device_num=[[HOST]] {{.+}} dest_device_num=[[DEVICE]]
+
+/// CHECK: Testing: Host to Device
+/// CHECK: Callback DataOp EMI: endpoint=1 optype=2 {{.+}} src_device_num=[[HOST]] {{.+}} dest_device_num=[[DEVICE]]
+/// CHECK: Callback DataOp EMI: endpoint=2 optype=2 {{.+}} src_device_num=[[HOST]] {{.+}} dest_device_num=[[DEVICE]]
+
+/// CHECK: Testing: Device to Device
+/// CHECK: Callback DataOp EMI: endpoint=1 optype=3 {{.+}} src_device_num=[[DEVICE]] {{.+}} dest_device_num=[[DEVICE]]
+/// CHECK: Callback DataOp EMI: endpoint=2 optype=3 {{.+}} src_device_num=[[DEVICE]] {{.+}} dest_device_num=[[DEVICE]]
+
+/// CHECK: Testing: Device to Host
+/// CHECK: Callback DataOp EMI: endpoint=1 optype=3 {{.+}} src_device_num=[[DEVICE]] {{.+}} dest_device_num=[[HOST]]
+/// CHECK: Callback DataOp EMI: endpoint=2 optype=3 {{.+}} src_device_num=[[DEVICE]] {{.+}} dest_device_num=[[HOST]]
+
+/// CHECK: Checking Correctness
+
+/// CHECK: Freeing Memory on Device
+/// CHECK: Callback DataOp EMI: endpoint=1 optype=4 {{.+}} src_device_num=[[DEVICE]]
+/// CHECK: Callback DataOp EMI: endpoint=2 optype=4 {{.+}} src_device_num=[[DEVICE]]
+
+/// CHECK: Callback Fini:
+
+// clang-format on
More information about the Openmp-commits
mailing list