[Openmp-commits] [openmp] [OpenMP][OMPT] Add OMPT callback for device data exchange 'Device-to-Device' (PR #81991)
via Openmp-commits
openmp-commits at lists.llvm.org
Fri Feb 16 09:45:34 PST 2024
https://github.com/lntue updated https://github.com/llvm/llvm-project/pull/81991
>From a4ed787f54fc8c2073a4ded6ef9cbead6bdf550d Mon Sep 17 00:00:00 2001
From: Michael Halkenhaeuser <MichaelGerald.Halkenhauser at amd.com>
Date: Thu, 15 Feb 2024 09:51:17 -0600
Subject: [PATCH] [OpenMP][OMPT] Add OMPT callback for device data exchange
'Device-to-Device'
Since there's no `ompt_target_data_transfer_tofrom_device` (within
ompt_target_data_op_t enum) or something other that conveys the meaning of
inter-device data exchange we decided to indicate a Device-to-Device transfer
by using: optype == ompt_target_data_transfer_from_device (=3)
Hence, a device transfer may be identified e.g. by checking for:
(optype == 3) &&
(src_device_num < omp_get_num_devices()) &&
(dest_device_num < omp_get_num_devices())
Fixes: #66478
---
.../include/OpenMP/OMPT/Interface.h | 20 +++--
.../libomptarget/src/OpenMP/OMPT/Callback.cpp | 50 ++++++------
openmp/libomptarget/src/device.cpp | 10 ++-
openmp/libomptarget/test/ompt/callbacks.h | 7 +-
.../test/ompt/target_memcpy_emi.c | 79 +++++++++++++++++++
5 files changed, 127 insertions(+), 39 deletions(-)
create mode 100644 openmp/libomptarget/test/ompt/target_memcpy_emi.c
diff --git a/openmp/libomptarget/include/OpenMP/OMPT/Interface.h b/openmp/libomptarget/include/OpenMP/OMPT/Interface.h
index 13eca730a9295d..327fadfcd4acd3 100644
--- a/openmp/libomptarget/include/OpenMP/OMPT/Interface.h
+++ b/openmp/libomptarget/include/OpenMP/OMPT/Interface.h
@@ -54,12 +54,14 @@ class Interface {
void **TgtPtrBegin, size_t Size, void *Code);
/// Top-level function for invoking callback before data submit
- void beginTargetDataSubmit(int64_t DeviceId, void *HstPtrBegin,
- void *TgtPtrBegin, size_t Size, void *Code);
+ void beginTargetDataSubmit(int64_t SrcDeviceId, void *SrcPtrBegin,
+ int64_t DstDeviceId, void *DstPtrBegin,
+ size_t Size, void *Code);
/// Top-level function for invoking callback after data submit
- void endTargetDataSubmit(int64_t DeviceId, void *HstPtrBegin,
- void *TgtPtrBegin, size_t Size, void *Code);
+ void endTargetDataSubmit(int64_t SrcDeviceId, void *SrcPtrBegin,
+ int64_t DstDeviceId, void *DstPtrBegin, size_t Size,
+ void *Code);
/// Top-level function for invoking callback before device data deallocation
void beginTargetDataDelete(int64_t DeviceId, void *TgtPtrBegin, void *Code);
@@ -68,12 +70,14 @@ class Interface {
void endTargetDataDelete(int64_t DeviceId, void *TgtPtrBegin, void *Code);
/// Top-level function for invoking callback before data retrieve
- void beginTargetDataRetrieve(int64_t DeviceId, void *HstPtrBegin,
- void *TgtPtrBegin, size_t Size, void *Code);
+ void beginTargetDataRetrieve(int64_t SrcDeviceId, void *SrcPtrBegin,
+ int64_t DstDeviceId, void *DstPtrBegin,
+ size_t Size, void *Code);
/// Top-level function for invoking callback after data retrieve
- void endTargetDataRetrieve(int64_t DeviceId, void *HstPtrBegin,
- void *TgtPtrBegin, size_t Size, void *Code);
+ void endTargetDataRetrieve(int64_t SrcDeviceId, void *SrcPtrBegin,
+ int64_t DstDeviceId, void *DstPtrBegin,
+ size_t Size, void *Code);
/// Top-level function for invoking callback before kernel dispatch
void beginTargetSubmit(unsigned int NumTeams = 1);
diff --git a/openmp/libomptarget/src/OpenMP/OMPT/Callback.cpp b/openmp/libomptarget/src/OpenMP/OMPT/Callback.cpp
index 66435d2a4fe64f..0068212efa53b3 100644
--- a/openmp/libomptarget/src/OpenMP/OMPT/Callback.cpp
+++ b/openmp/libomptarget/src/OpenMP/OMPT/Callback.cpp
@@ -119,41 +119,38 @@ void Interface::endTargetDataAlloc(int64_t DeviceId, void *HstPtrBegin,
endTargetDataOperation();
}
-void Interface::beginTargetDataSubmit(int64_t DeviceId, void *TgtPtrBegin,
- void *HstPtrBegin, size_t Size,
- void *Code) {
+void Interface::beginTargetDataSubmit(int64_t SrcDeviceId, void *SrcPtrBegin,
+ int64_t DstDeviceId, void *DstPtrBegin,
+ size_t Size, void *Code) {
beginTargetDataOperation();
if (ompt_callback_target_data_op_emi_fn) {
// HostOpId will be set by the tool. Invoke the tool supplied data op EMI
// callback
ompt_callback_target_data_op_emi_fn(
ompt_scope_begin, TargetTaskData, &TargetData, &HostOpId,
- ompt_target_data_transfer_to_device, HstPtrBegin,
- /*SrcDeviceNum=*/omp_get_initial_device(), TgtPtrBegin, DeviceId, Size,
- Code);
+ ompt_target_data_transfer_to_device, SrcPtrBegin, SrcDeviceId,
+ DstPtrBegin, DstDeviceId, Size, Code);
} else if (ompt_callback_target_data_op_fn) {
// HostOpId is set by the runtime
HostOpId = createOpId();
// Invoke the tool supplied data op callback
ompt_callback_target_data_op_fn(
TargetData.value, HostOpId, ompt_target_data_transfer_to_device,
- HstPtrBegin, /*SrcDeviceNum=*/omp_get_initial_device(), TgtPtrBegin,
- DeviceId, Size, Code);
+ SrcPtrBegin, SrcDeviceId, DstPtrBegin, DstDeviceId, Size, Code);
}
}
-void Interface::endTargetDataSubmit(int64_t DeviceId, void *TgtPtrBegin,
- void *HstPtrBegin, size_t Size,
- void *Code) {
+void Interface::endTargetDataSubmit(int64_t SrcDeviceId, void *SrcPtrBegin,
+ int64_t DstDeviceId, void *DstPtrBegin,
+ size_t Size, void *Code) {
// Only EMI callback handles end scope
if (ompt_callback_target_data_op_emi_fn) {
// HostOpId will be set by the tool. Invoke the tool supplied data op EMI
// callback
ompt_callback_target_data_op_emi_fn(
ompt_scope_end, TargetTaskData, &TargetData, &HostOpId,
- ompt_target_data_transfer_to_device, HstPtrBegin,
- /*SrcDeviceNum=*/omp_get_initial_device(), TgtPtrBegin, DeviceId, Size,
- Code);
+ ompt_target_data_transfer_to_device, SrcPtrBegin, SrcDeviceId,
+ DstPtrBegin, DstDeviceId, Size, Code);
}
endTargetDataOperation();
}
@@ -193,41 +190,38 @@ void Interface::endTargetDataDelete(int64_t DeviceId, void *TgtPtrBegin,
endTargetDataOperation();
}
-void Interface::beginTargetDataRetrieve(int64_t DeviceId, void *HstPtrBegin,
- void *TgtPtrBegin, size_t Size,
- void *Code) {
+void Interface::beginTargetDataRetrieve(int64_t SrcDeviceId, void *SrcPtrBegin,
+ int64_t DstDeviceId, void *DstPtrBegin,
+ size_t Size, void *Code) {
beginTargetDataOperation();
if (ompt_callback_target_data_op_emi_fn) {
// HostOpId will be set by the tool. Invoke the tool supplied data op EMI
// callback
ompt_callback_target_data_op_emi_fn(
ompt_scope_begin, TargetTaskData, &TargetData, &HostOpId,
- ompt_target_data_transfer_from_device, TgtPtrBegin, DeviceId,
- HstPtrBegin,
- /*TgtDeviceNum=*/omp_get_initial_device(), Size, Code);
+ ompt_target_data_transfer_from_device, SrcPtrBegin, SrcDeviceId,
+ DstPtrBegin, DstDeviceId, Size, Code);
} else if (ompt_callback_target_data_op_fn) {
// HostOpId is set by the runtime
HostOpId = createOpId();
// Invoke the tool supplied data op callback
ompt_callback_target_data_op_fn(
TargetData.value, HostOpId, ompt_target_data_transfer_from_device,
- TgtPtrBegin, DeviceId, HstPtrBegin,
- /*TgtDeviceNum=*/omp_get_initial_device(), Size, Code);
+ DstPtrBegin, DstDeviceId, SrcPtrBegin, SrcDeviceId, Size, Code);
}
}
-void Interface::endTargetDataRetrieve(int64_t DeviceId, void *HstPtrBegin,
- void *TgtPtrBegin, size_t Size,
- void *Code) {
+void Interface::endTargetDataRetrieve(int64_t SrcDeviceId, void *SrcPtrBegin,
+ int64_t DstDeviceId, void *DstPtrBegin,
+ size_t Size, void *Code) {
// Only EMI callback handles end scope
if (ompt_callback_target_data_op_emi_fn) {
// HostOpId will be set by the tool. Invoke the tool supplied data op EMI
// callback
ompt_callback_target_data_op_emi_fn(
ompt_scope_end, TargetTaskData, &TargetData, &HostOpId,
- ompt_target_data_transfer_from_device, TgtPtrBegin, DeviceId,
- HstPtrBegin,
- /*TgtDeviceNum=*/omp_get_initial_device(), Size, Code);
+ ompt_target_data_transfer_from_device, SrcPtrBegin, SrcDeviceId,
+ DstPtrBegin, DstDeviceId, Size, Code);
}
endTargetDataOperation();
}
diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp
index 5fe3f508b739cb..a4dc2b9ab27a64 100644
--- a/openmp/libomptarget/src/device.cpp
+++ b/openmp/libomptarget/src/device.cpp
@@ -151,7 +151,7 @@ int32_t DeviceTy::submitData(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size,
OMPT_IF_BUILT(
InterfaceRAII TargetDataSubmitRAII(
RegionInterface.getCallbacks<ompt_target_data_transfer_to_device>(),
- DeviceID, TgtPtrBegin, HstPtrBegin, Size,
+ omp_get_initial_device(), HstPtrBegin, DeviceID, TgtPtrBegin, Size,
/*CodePtr=*/OMPT_GET_RETURN_ADDRESS);)
if (!AsyncInfo || !RTL->data_submit_async || !RTL->synchronize)
@@ -173,7 +173,7 @@ int32_t DeviceTy::retrieveData(void *HstPtrBegin, void *TgtPtrBegin,
OMPT_IF_BUILT(
InterfaceRAII TargetDataRetrieveRAII(
RegionInterface.getCallbacks<ompt_target_data_transfer_from_device>(),
- DeviceID, HstPtrBegin, TgtPtrBegin, Size,
+ DeviceID, TgtPtrBegin, omp_get_initial_device(), HstPtrBegin, Size,
/*CodePtr=*/OMPT_GET_RETURN_ADDRESS);)
if (!RTL->data_retrieve_async || !RTL->synchronize)
@@ -185,6 +185,12 @@ int32_t DeviceTy::retrieveData(void *HstPtrBegin, void *TgtPtrBegin,
// Copy data from current device to destination device directly
int32_t DeviceTy::dataExchange(void *SrcPtr, DeviceTy &DstDev, void *DstPtr,
int64_t Size, AsyncInfoTy &AsyncInfo) {
+ /// RAII to establish tool anchors before and after data exchange
+ OMPT_IF_BUILT(
+ InterfaceRAII TargetDataExchangeRAII(
+ RegionInterface.getCallbacks<ompt_target_data_transfer_from_device>(),
+ DeviceID, SrcPtr, DstDev.RTLDeviceID, DstPtr, Size,
+ /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);)
if (!AsyncInfo || !RTL->data_exchange_async || !RTL->synchronize) {
assert(RTL->data_exchange && "RTL->data_exchange is nullptr");
return RTL->data_exchange(RTLDeviceID, SrcPtr, DstDev.RTLDeviceID, DstPtr,
diff --git a/openmp/libomptarget/test/ompt/callbacks.h b/openmp/libomptarget/test/ompt/callbacks.h
index 1f9b7c177b2860..597b6f01432102 100644
--- a/openmp/libomptarget/test/ompt/callbacks.h
+++ b/openmp/libomptarget/test/ompt/callbacks.h
@@ -81,11 +81,16 @@ static void on_ompt_callback_target_data_op_emi(
assert(codeptr_ra != 0 && "Unexpected null codeptr");
if (endpoint == ompt_scope_begin)
*host_op_id = next_op_id++;
+ // If target_task_data is non-null: src_addr or dest_addr must not be null
+ assert(target_task_data == 0 || src_addr != 0 || dest_addr != 0);
+ // target_task_data may be null, avoid dereferencing it
+ uint64_t target_task_data_value =
+ (target_task_data) ? target_task_data->value : 0;
printf(" Callback DataOp EMI: endpoint=%d optype=%d target_task_data=%p "
"(0x%lx) target_data=%p (0x%lx) host_op_id=%p (0x%lx) src=%p "
"src_device_num=%d "
"dest=%p dest_device_num=%d bytes=%lu code=%p\n",
- endpoint, optype, target_task_data, target_task_data->value,
+ endpoint, optype, target_task_data, target_task_data_value,
target_data, target_data->value, host_op_id, *host_op_id, src_addr,
src_device_num, dest_addr, dest_device_num, bytes, codeptr_ra);
}
diff --git a/openmp/libomptarget/test/ompt/target_memcpy_emi.c b/openmp/libomptarget/test/ompt/target_memcpy_emi.c
new file mode 100644
index 00000000000000..7b66b0e595a561
--- /dev/null
+++ b/openmp/libomptarget/test/ompt/target_memcpy_emi.c
@@ -0,0 +1,79 @@
+// RUN: %libomptarget-compile-run-and-check-generic
+// REQUIRES: ompt
+// UNSUPPORTED: aarch64-unknown-linux-gnu
+// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
+// UNSUPPORTED: x86_64-pc-linux-gnu
+// UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+
+/*
+ * Verify all three data transfer directions: H2D, D2D and D2H
+ */
+
+#include <omp.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "callbacks.h"
+#include "register_emi.h"
+
+int main(void) {
+ int NumDevices = omp_get_num_devices();
+ assert(NumDevices > 0);
+ int Device = 0;
+ int Host = omp_get_initial_device();
+
+ printf("Allocating Memory on Device\n");
+ int *DevPtr = (int *)omp_target_alloc(sizeof(int), Device);
+ int *HstPtr = (int *)malloc(sizeof(int));
+ *HstPtr = 42;
+
+ printf("Testing: Host to Device\n");
+ omp_target_memcpy(DevPtr, HstPtr, sizeof(int), 0, 0, Device, Host);
+
+ printf("Testing: Device to Device\n");
+ omp_target_memcpy(DevPtr, DevPtr, sizeof(int), 0, 0, Device, Device);
+
+ printf("Testing: Device to Host\n");
+ omp_target_memcpy(HstPtr, DevPtr, sizeof(int), 0, 0, Host, Device);
+
+ printf("Checking Correctness\n");
+ assert(*HstPtr == 42);
+
+ printf("Freeing Memory on Device\n");
+ free(HstPtr);
+ omp_target_free(DevPtr, Device);
+
+ return 0;
+}
+
+// clang-format off
+
+/// CHECK: Callback Init:
+
+/// CHECK: Allocating Memory on Device
+/// CHECK: Callback DataOp EMI: endpoint=1 optype=1
+/// CHECK-SAME: src_device_num=[[HOST:[0-9]+]]
+/// CHECK-SAME: dest_device_num=[[DEVICE:[0-9]+]]
+/// CHECK: Callback DataOp EMI: endpoint=2 optype=1 {{.+}} src_device_num=[[HOST]] {{.+}} dest_device_num=[[DEVICE]]
+
+/// CHECK: Testing: Host to Device
+/// CHECK: Callback DataOp EMI: endpoint=1 optype=2 {{.+}} src_device_num=[[HOST]] {{.+}} dest_device_num=[[DEVICE]]
+/// CHECK: Callback DataOp EMI: endpoint=2 optype=2 {{.+}} src_device_num=[[HOST]] {{.+}} dest_device_num=[[DEVICE]]
+
+/// CHECK: Testing: Device to Device
+/// CHECK: Callback DataOp EMI: endpoint=1 optype=3 {{.+}} src_device_num=[[DEVICE]] {{.+}} dest_device_num=[[DEVICE]]
+/// CHECK: Callback DataOp EMI: endpoint=2 optype=3 {{.+}} src_device_num=[[DEVICE]] {{.+}} dest_device_num=[[DEVICE]]
+
+/// CHECK: Testing: Device to Host
+/// CHECK: Callback DataOp EMI: endpoint=1 optype=3 {{.+}} src_device_num=[[DEVICE]] {{.+}} dest_device_num=[[HOST]]
+/// CHECK: Callback DataOp EMI: endpoint=2 optype=3 {{.+}} src_device_num=[[DEVICE]] {{.+}} dest_device_num=[[HOST]]
+
+/// CHECK: Checking Correctness
+
+/// CHECK: Freeing Memory on Device
+/// CHECK: Callback DataOp EMI: endpoint=1 optype=4 {{.+}} src_device_num=[[DEVICE]]
+/// CHECK: Callback DataOp EMI: endpoint=2 optype=4 {{.+}} src_device_num=[[DEVICE]]
+
+/// CHECK: Callback Fini:
+
+// clang-format on
More information about the Openmp-commits
mailing list