[Openmp-commits] [openmp] [OpenMP][OMPT] Add OMPT callback for device data exchange 'Device-to-Device' (PR #81991)

Michael Halkenhäuser via Openmp-commits openmp-commits at lists.llvm.org
Fri Feb 16 05:38:11 PST 2024


https://github.com/mhalk created https://github.com/llvm/llvm-project/pull/81991

Since there's no `ompt_target_data_transfer_tofrom_device` (within ompt_target_data_op_t enum) or something other that conveys the meaning of inter-device data exchange we decided to indicate a Device-to-Device transfer by using: optype == ompt_target_data_transfer_from_device (=3)

Hence, a device transfer may be identified e.g. by checking for: (optype == 3) &&
(src_device_num < omp_get_num_devices()) &&
(dest_device_num < omp_get_num_devices())

Fixes: #66478

>From 86d9cca58a84a1f3f3c5d3bf6963c33948e36ec4 Mon Sep 17 00:00:00 2001
From: Michael Halkenhaeuser <MichaelGerald.Halkenhauser at amd.com>
Date: Thu, 15 Feb 2024 09:51:17 -0600
Subject: [PATCH] [OpenMP][OMPT] Add OMPT callback for device data exchange
 'Device-to-Device'

Since there's no `ompt_target_data_transfer_tofrom_device` (within
ompt_target_data_op_t enum) or something other that conveys the meaning of
inter-device data exchange we decided to indicate a Device-to-Device transfer
by using: optype == ompt_target_data_transfer_from_device (=3)

Hence, a device transfer may be identified e.g. by checking for:
(optype == 3) &&
(src_device_num < omp_get_num_devices()) &&
(dest_device_num < omp_get_num_devices())

Fixes: #66478
---
 .../include/OpenMP/OMPT/Interface.h           | 20 +++--
 .../libomptarget/src/OpenMP/OMPT/Callback.cpp | 50 ++++++------
 openmp/libomptarget/src/device.cpp            | 10 ++-
 openmp/libomptarget/test/ompt/callbacks.h     |  8 +-
 .../test/ompt/target_memcpy_emi.c             | 79 +++++++++++++++++++
 5 files changed, 128 insertions(+), 39 deletions(-)
 create mode 100644 openmp/libomptarget/test/ompt/target_memcpy_emi.c

diff --git a/openmp/libomptarget/include/OpenMP/OMPT/Interface.h b/openmp/libomptarget/include/OpenMP/OMPT/Interface.h
index 13eca730a9295d..327fadfcd4acd3 100644
--- a/openmp/libomptarget/include/OpenMP/OMPT/Interface.h
+++ b/openmp/libomptarget/include/OpenMP/OMPT/Interface.h
@@ -54,12 +54,14 @@ class Interface {
                           void **TgtPtrBegin, size_t Size, void *Code);
 
   /// Top-level function for invoking callback before data submit
-  void beginTargetDataSubmit(int64_t DeviceId, void *HstPtrBegin,
-                             void *TgtPtrBegin, size_t Size, void *Code);
+  void beginTargetDataSubmit(int64_t SrcDeviceId, void *SrcPtrBegin,
+                             int64_t DstDeviceId, void *DstPtrBegin,
+                             size_t Size, void *Code);
 
   /// Top-level function for invoking callback after data submit
-  void endTargetDataSubmit(int64_t DeviceId, void *HstPtrBegin,
-                           void *TgtPtrBegin, size_t Size, void *Code);
+  void endTargetDataSubmit(int64_t SrcDeviceId, void *SrcPtrBegin,
+                           int64_t DstDeviceId, void *DstPtrBegin, size_t Size,
+                           void *Code);
 
   /// Top-level function for invoking callback before device data deallocation
   void beginTargetDataDelete(int64_t DeviceId, void *TgtPtrBegin, void *Code);
@@ -68,12 +70,14 @@ class Interface {
   void endTargetDataDelete(int64_t DeviceId, void *TgtPtrBegin, void *Code);
 
   /// Top-level function for invoking callback before data retrieve
-  void beginTargetDataRetrieve(int64_t DeviceId, void *HstPtrBegin,
-                               void *TgtPtrBegin, size_t Size, void *Code);
+  void beginTargetDataRetrieve(int64_t SrcDeviceId, void *SrcPtrBegin,
+                               int64_t DstDeviceId, void *DstPtrBegin,
+                               size_t Size, void *Code);
 
   /// Top-level function for invoking callback after data retrieve
-  void endTargetDataRetrieve(int64_t DeviceId, void *HstPtrBegin,
-                             void *TgtPtrBegin, size_t Size, void *Code);
+  void endTargetDataRetrieve(int64_t SrcDeviceId, void *SrcPtrBegin,
+                             int64_t DstDeviceId, void *DstPtrBegin,
+                             size_t Size, void *Code);
 
   /// Top-level function for invoking callback before kernel dispatch
   void beginTargetSubmit(unsigned int NumTeams = 1);
diff --git a/openmp/libomptarget/src/OpenMP/OMPT/Callback.cpp b/openmp/libomptarget/src/OpenMP/OMPT/Callback.cpp
index 66435d2a4fe64f..0068212efa53b3 100644
--- a/openmp/libomptarget/src/OpenMP/OMPT/Callback.cpp
+++ b/openmp/libomptarget/src/OpenMP/OMPT/Callback.cpp
@@ -119,41 +119,38 @@ void Interface::endTargetDataAlloc(int64_t DeviceId, void *HstPtrBegin,
   endTargetDataOperation();
 }
 
-void Interface::beginTargetDataSubmit(int64_t DeviceId, void *TgtPtrBegin,
-                                      void *HstPtrBegin, size_t Size,
-                                      void *Code) {
+void Interface::beginTargetDataSubmit(int64_t SrcDeviceId, void *SrcPtrBegin,
+                                      int64_t DstDeviceId, void *DstPtrBegin,
+                                      size_t Size, void *Code) {
   beginTargetDataOperation();
   if (ompt_callback_target_data_op_emi_fn) {
     // HostOpId will be set by the tool. Invoke the tool supplied data op EMI
     // callback
     ompt_callback_target_data_op_emi_fn(
         ompt_scope_begin, TargetTaskData, &TargetData, &HostOpId,
-        ompt_target_data_transfer_to_device, HstPtrBegin,
-        /*SrcDeviceNum=*/omp_get_initial_device(), TgtPtrBegin, DeviceId, Size,
-        Code);
+        ompt_target_data_transfer_to_device, SrcPtrBegin, SrcDeviceId,
+        DstPtrBegin, DstDeviceId, Size, Code);
   } else if (ompt_callback_target_data_op_fn) {
     // HostOpId is set by the runtime
     HostOpId = createOpId();
     // Invoke the tool supplied data op callback
     ompt_callback_target_data_op_fn(
         TargetData.value, HostOpId, ompt_target_data_transfer_to_device,
-        HstPtrBegin, /*SrcDeviceNum=*/omp_get_initial_device(), TgtPtrBegin,
-        DeviceId, Size, Code);
+        SrcPtrBegin, SrcDeviceId, DstPtrBegin, DstDeviceId, Size, Code);
   }
 }
 
-void Interface::endTargetDataSubmit(int64_t DeviceId, void *TgtPtrBegin,
-                                    void *HstPtrBegin, size_t Size,
-                                    void *Code) {
+void Interface::endTargetDataSubmit(int64_t SrcDeviceId, void *SrcPtrBegin,
+                                    int64_t DstDeviceId, void *DstPtrBegin,
+                                    size_t Size, void *Code) {
   // Only EMI callback handles end scope
   if (ompt_callback_target_data_op_emi_fn) {
     // HostOpId will be set by the tool. Invoke the tool supplied data op EMI
     // callback
     ompt_callback_target_data_op_emi_fn(
         ompt_scope_end, TargetTaskData, &TargetData, &HostOpId,
-        ompt_target_data_transfer_to_device, HstPtrBegin,
-        /*SrcDeviceNum=*/omp_get_initial_device(), TgtPtrBegin, DeviceId, Size,
-        Code);
+        ompt_target_data_transfer_to_device, SrcPtrBegin, SrcDeviceId,
+        DstPtrBegin, DstDeviceId, Size, Code);
   }
   endTargetDataOperation();
 }
@@ -193,41 +190,38 @@ void Interface::endTargetDataDelete(int64_t DeviceId, void *TgtPtrBegin,
   endTargetDataOperation();
 }
 
-void Interface::beginTargetDataRetrieve(int64_t DeviceId, void *HstPtrBegin,
-                                        void *TgtPtrBegin, size_t Size,
-                                        void *Code) {
+void Interface::beginTargetDataRetrieve(int64_t SrcDeviceId, void *SrcPtrBegin,
+                                        int64_t DstDeviceId, void *DstPtrBegin,
+                                        size_t Size, void *Code) {
   beginTargetDataOperation();
   if (ompt_callback_target_data_op_emi_fn) {
     // HostOpId will be set by the tool. Invoke the tool supplied data op EMI
     // callback
     ompt_callback_target_data_op_emi_fn(
         ompt_scope_begin, TargetTaskData, &TargetData, &HostOpId,
-        ompt_target_data_transfer_from_device, TgtPtrBegin, DeviceId,
-        HstPtrBegin,
-        /*TgtDeviceNum=*/omp_get_initial_device(), Size, Code);
+        ompt_target_data_transfer_from_device, SrcPtrBegin, SrcDeviceId,
+        DstPtrBegin, DstDeviceId, Size, Code);
   } else if (ompt_callback_target_data_op_fn) {
     // HostOpId is set by the runtime
     HostOpId = createOpId();
     // Invoke the tool supplied data op callback
     ompt_callback_target_data_op_fn(
         TargetData.value, HostOpId, ompt_target_data_transfer_from_device,
-        TgtPtrBegin, DeviceId, HstPtrBegin,
-        /*TgtDeviceNum=*/omp_get_initial_device(), Size, Code);
+        DstPtrBegin, DstDeviceId, SrcPtrBegin, SrcDeviceId, Size, Code);
   }
 }
 
-void Interface::endTargetDataRetrieve(int64_t DeviceId, void *HstPtrBegin,
-                                      void *TgtPtrBegin, size_t Size,
-                                      void *Code) {
+void Interface::endTargetDataRetrieve(int64_t SrcDeviceId, void *SrcPtrBegin,
+                                      int64_t DstDeviceId, void *DstPtrBegin,
+                                      size_t Size, void *Code) {
   // Only EMI callback handles end scope
   if (ompt_callback_target_data_op_emi_fn) {
     // HostOpId will be set by the tool. Invoke the tool supplied data op EMI
     // callback
     ompt_callback_target_data_op_emi_fn(
         ompt_scope_end, TargetTaskData, &TargetData, &HostOpId,
-        ompt_target_data_transfer_from_device, TgtPtrBegin, DeviceId,
-        HstPtrBegin,
-        /*TgtDeviceNum=*/omp_get_initial_device(), Size, Code);
+        ompt_target_data_transfer_from_device, SrcPtrBegin, SrcDeviceId,
+        DstPtrBegin, DstDeviceId, Size, Code);
   }
   endTargetDataOperation();
 }
diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp
index 5fe3f508b739cb..a4dc2b9ab27a64 100644
--- a/openmp/libomptarget/src/device.cpp
+++ b/openmp/libomptarget/src/device.cpp
@@ -151,7 +151,7 @@ int32_t DeviceTy::submitData(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size,
   OMPT_IF_BUILT(
       InterfaceRAII TargetDataSubmitRAII(
           RegionInterface.getCallbacks<ompt_target_data_transfer_to_device>(),
-          DeviceID, TgtPtrBegin, HstPtrBegin, Size,
+          omp_get_initial_device(), HstPtrBegin, DeviceID, TgtPtrBegin, Size,
           /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);)
 
   if (!AsyncInfo || !RTL->data_submit_async || !RTL->synchronize)
@@ -173,7 +173,7 @@ int32_t DeviceTy::retrieveData(void *HstPtrBegin, void *TgtPtrBegin,
   OMPT_IF_BUILT(
       InterfaceRAII TargetDataRetrieveRAII(
           RegionInterface.getCallbacks<ompt_target_data_transfer_from_device>(),
-          DeviceID, HstPtrBegin, TgtPtrBegin, Size,
+          DeviceID, TgtPtrBegin, omp_get_initial_device(), HstPtrBegin, Size,
           /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);)
 
   if (!RTL->data_retrieve_async || !RTL->synchronize)
@@ -185,6 +185,12 @@ int32_t DeviceTy::retrieveData(void *HstPtrBegin, void *TgtPtrBegin,
 // Copy data from current device to destination device directly
 int32_t DeviceTy::dataExchange(void *SrcPtr, DeviceTy &DstDev, void *DstPtr,
                                int64_t Size, AsyncInfoTy &AsyncInfo) {
+  /// RAII to establish tool anchors before and after data exchange
+  OMPT_IF_BUILT(
+      InterfaceRAII TargetDataExchangeRAII(
+          RegionInterface.getCallbacks<ompt_target_data_transfer_from_device>(),
+          DeviceID, SrcPtr, DstDev.RTLDeviceID, DstPtr, Size,
+          /*CodePtr=*/OMPT_GET_RETURN_ADDRESS);)
   if (!AsyncInfo || !RTL->data_exchange_async || !RTL->synchronize) {
     assert(RTL->data_exchange && "RTL->data_exchange is nullptr");
     return RTL->data_exchange(RTLDeviceID, SrcPtr, DstDev.RTLDeviceID, DstPtr,
diff --git a/openmp/libomptarget/test/ompt/callbacks.h b/openmp/libomptarget/test/ompt/callbacks.h
index 1f9b7c177b2860..2a9fdcde71f9c1 100644
--- a/openmp/libomptarget/test/ompt/callbacks.h
+++ b/openmp/libomptarget/test/ompt/callbacks.h
@@ -81,11 +81,17 @@ static void on_ompt_callback_target_data_op_emi(
   assert(codeptr_ra != 0 && "Unexpected null codeptr");
   if (endpoint == ompt_scope_begin)
     *host_op_id = next_op_id++;
+  // If target_task_data is non-null: Both src and dest must not be null
+  assert(target_task_data == 0 || target_task_data && src_addr != 0 ||
+         target_task_data && dest_addr != 0);
+  // target_task_data may be null, avoid dereferencing it
+  uint64_t target_task_data_value =
+      (target_task_data) ? target_task_data->value : 0;
   printf("  Callback DataOp EMI: endpoint=%d optype=%d target_task_data=%p "
          "(0x%lx) target_data=%p (0x%lx) host_op_id=%p (0x%lx) src=%p "
          "src_device_num=%d "
          "dest=%p dest_device_num=%d bytes=%lu code=%p\n",
-         endpoint, optype, target_task_data, target_task_data->value,
+         endpoint, optype, target_task_data, target_task_data_value,
          target_data, target_data->value, host_op_id, *host_op_id, src_addr,
          src_device_num, dest_addr, dest_device_num, bytes, codeptr_ra);
 }
diff --git a/openmp/libomptarget/test/ompt/target_memcpy_emi.c b/openmp/libomptarget/test/ompt/target_memcpy_emi.c
new file mode 100644
index 00000000000000..7b66b0e595a561
--- /dev/null
+++ b/openmp/libomptarget/test/ompt/target_memcpy_emi.c
@@ -0,0 +1,79 @@
+// RUN: %libomptarget-compile-run-and-check-generic
+// REQUIRES: ompt
+// UNSUPPORTED: aarch64-unknown-linux-gnu
+// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
+// UNSUPPORTED: x86_64-pc-linux-gnu
+// UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+
+/*
+ * Verify all three data transfer directions: H2D, D2D and D2H
+ */
+
+#include <omp.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "callbacks.h"
+#include "register_emi.h"
+
+int main(void) {
+  int NumDevices = omp_get_num_devices();
+  assert(NumDevices > 0);
+  int Device = 0;
+  int Host = omp_get_initial_device();
+
+  printf("Allocating Memory on Device\n");
+  int *DevPtr = (int *)omp_target_alloc(sizeof(int), Device);
+  int *HstPtr = (int *)malloc(sizeof(int));
+  *HstPtr = 42;
+
+  printf("Testing: Host to Device\n");
+  omp_target_memcpy(DevPtr, HstPtr, sizeof(int), 0, 0, Device, Host);
+
+  printf("Testing: Device to Device\n");
+  omp_target_memcpy(DevPtr, DevPtr, sizeof(int), 0, 0, Device, Device);
+
+  printf("Testing: Device to Host\n");
+  omp_target_memcpy(HstPtr, DevPtr, sizeof(int), 0, 0, Host, Device);
+
+  printf("Checking Correctness\n");
+  assert(*HstPtr == 42);
+
+  printf("Freeing Memory on Device\n");
+  free(HstPtr);
+  omp_target_free(DevPtr, Device);
+
+  return 0;
+}
+
+// clang-format off
+
+/// CHECK: Callback Init:
+
+/// CHECK: Allocating Memory on Device
+/// CHECK: Callback DataOp EMI: endpoint=1 optype=1
+/// CHECK-SAME: src_device_num=[[HOST:[0-9]+]]
+/// CHECK-SAME: dest_device_num=[[DEVICE:[0-9]+]]
+/// CHECK: Callback DataOp EMI: endpoint=2 optype=1 {{.+}} src_device_num=[[HOST]] {{.+}} dest_device_num=[[DEVICE]]
+
+/// CHECK: Testing: Host to Device
+/// CHECK: Callback DataOp EMI: endpoint=1 optype=2 {{.+}} src_device_num=[[HOST]] {{.+}} dest_device_num=[[DEVICE]]
+/// CHECK: Callback DataOp EMI: endpoint=2 optype=2 {{.+}} src_device_num=[[HOST]] {{.+}} dest_device_num=[[DEVICE]]
+
+/// CHECK: Testing: Device to Device
+/// CHECK: Callback DataOp EMI: endpoint=1 optype=3 {{.+}} src_device_num=[[DEVICE]] {{.+}} dest_device_num=[[DEVICE]]
+/// CHECK: Callback DataOp EMI: endpoint=2 optype=3 {{.+}} src_device_num=[[DEVICE]] {{.+}} dest_device_num=[[DEVICE]]
+
+/// CHECK: Testing: Device to Host
+/// CHECK: Callback DataOp EMI: endpoint=1 optype=3 {{.+}} src_device_num=[[DEVICE]] {{.+}} dest_device_num=[[HOST]]
+/// CHECK: Callback DataOp EMI: endpoint=2 optype=3 {{.+}} src_device_num=[[DEVICE]] {{.+}} dest_device_num=[[HOST]]
+
+/// CHECK: Checking Correctness
+
+/// CHECK: Freeing Memory on Device
+/// CHECK: Callback DataOp EMI: endpoint=1 optype=4 {{.+}} src_device_num=[[DEVICE]]
+/// CHECK: Callback DataOp EMI: endpoint=2 optype=4 {{.+}} src_device_num=[[DEVICE]]
+
+/// CHECK: Callback Fini:
+
+// clang-format on



More information about the Openmp-commits mailing list