[llvm] [Offload] Introduce dataFence plugin interface. (PR #153793)

Abhinav Gaba via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 15 05:12:18 PDT 2025


https://github.com/abhinavgaba created https://github.com/llvm/llvm-project/pull/153793

The purpose of this fence is to ensure that any `dataSubmit`s inserted
into a queue before a `dataFence` finish before finish before any `dataSubmit`s
inserted after it begin.

This is a no-op for most queues, since they are in-order, and by design
any operations inserted into them occur in order.

But the interface is supposed to be functional for out-of-order queues.

The addition of the interface means that any operations that rely on
such ordering (like ATTACH map-type support in #149036) can invoke it,
without worrying about whether the underlying queue is in-order or
out-of-order.

Once a plugin supports out-of-order queues, the plugins can implement
this function, without requiring any change at the libomptarget level.

>From b85804f95598d77f960a7cead3ede93ab9bb6687 Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Fri, 15 Aug 2025 04:47:14 -0700
Subject: [PATCH 1/2] [Offload] Introduce `dataFence` plugin interface.

The purpose of this fence is to ensure that any `dataSubmit`s inserted
into a queue before a `dataFence` finish before finish before any `dataSubmit`s
inserted after it begin.

This is a no-op for most queues, since they are in-order, and by design
any operations inserted into them occur in order.

But the interface is supposed to be functional for out-of-order queues.

The addition of the interface means that any operations that rely on
such ordering (like ATTACH map-type support in #149036) can invoke it,
without worrying about whether the underlying queue is in-order or
out-of-order.

Once a plugin supports out-of-order queues, the plugins can implement
this function, without requiring any change at the libomptarget level.
---
 offload/plugins-nextgen/amdgpu/src/rtl.cpp           |  4 ++++
 .../plugins-nextgen/common/include/PluginInterface.h |  8 ++++++++
 .../plugins-nextgen/common/src/PluginInterface.cpp   | 12 ++++++++++++
 offload/plugins-nextgen/cuda/src/rtl.cpp             |  4 ++++
 offload/plugins-nextgen/host/src/rtl.cpp             |  4 ++++
 5 files changed, 32 insertions(+)

diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index 796182075ff3d..9232f79e9c576 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -2537,6 +2537,10 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
                                           getAgent(), (uint64_t)Size);
   }
 
+  Error dataFence(__tgt_async_info *Async) override {
+    return Plugin::success();
+  }
+
   /// Initialize the async info for interoperability purposes.
   Error initAsyncInfoImpl(AsyncInfoWrapperTy &AsyncInfoWrapper) override {
     // TODO: Implement this function.
diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h
index c9ab34b024b77..1b110f6b74553 100644
--- a/offload/plugins-nextgen/common/include/PluginInterface.h
+++ b/offload/plugins-nextgen/common/include/PluginInterface.h
@@ -944,6 +944,10 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
   virtual Error dataRetrieveImpl(void *HstPtr, const void *TgtPtr, int64_t Size,
                                  AsyncInfoWrapperTy &AsyncInfoWrapper) = 0;
 
+  /// Instert a data fence between previous data operations and the following
+  /// operations if necessary for the device
+  virtual Error dataFence(__tgt_async_info *AsyncInfo) = 0;
+
   /// Exchange data between devices (device to device transfer). Calling this
   /// function is only valid if GenericPlugin::isDataExchangable() passing the
   /// two devices returns true.
@@ -1448,6 +1452,10 @@ struct GenericPluginTy {
                               int DstDeviceId, void *DstPtr, int64_t Size,
                               __tgt_async_info *AsyncInfo);
 
+  /// Places a fence between previous data movements and following data
+  /// movements if necessary on the device
+  int32_t data_fence(int32_t DeviceId, __tgt_async_info *AsyncInfo);
+
   /// Begin executing a kernel on the given device.
   int32_t launch_kernel(int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs,
                         ptrdiff_t *TgtOffsets, KernelArgsTy *KernelArgs,
diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp
index 083d41659a469..c99871452efe9 100644
--- a/offload/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp
@@ -2324,3 +2324,15 @@ int32_t GenericPluginTy::async_barrier(omp_interop_val_t *Interop) {
   }
   return OFFLOAD_SUCCESS;
 }
+
+int32_t GenericPluginTy::data_fence(int32_t DeviceId,
+                                    __tgt_async_info *AsyncInfo) {
+  auto Err = getDevice(DeviceId).dataFence(AsyncInfo);
+  if (Err) {
+    REPORT("Failure to place data fence on device %d: %s\n", DeviceId,
+           toString(std::move(Err)).data());
+    return OFFLOAD_FAIL;
+  }
+
+  return OFFLOAD_SUCCESS;
+}
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp
index 82c9f9b706cbd..426db75be790b 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -856,6 +856,10 @@ struct CUDADeviceTy : public GenericDeviceTy {
     return Plugin::success();
   }
 
+  Error dataFence(__tgt_async_info *Async) override {
+    return Plugin::success();
+  }
+
   /// Initialize the device info for interoperability purposes.
   Error initDeviceInfoImpl(__tgt_device_info *DeviceInfo) override {
     assert(Context && "Context is null");
diff --git a/offload/plugins-nextgen/host/src/rtl.cpp b/offload/plugins-nextgen/host/src/rtl.cpp
index ed5213531999d..8b61179223b7c 100644
--- a/offload/plugins-nextgen/host/src/rtl.cpp
+++ b/offload/plugins-nextgen/host/src/rtl.cpp
@@ -295,6 +295,10 @@ struct GenELF64DeviceTy : public GenericDeviceTy {
                          "dataExchangeImpl not supported");
   }
 
+  Error dataFence(__tgt_async_info *Async) override {
+    return Plugin::success();
+  }
+
   /// All functions are already synchronous. No need to do anything on this
   /// synchronization function.
   Error synchronizeImpl(__tgt_async_info &AsyncInfo,

>From 2a043b294a026817cdb42f10bc52d328d9eeca01 Mon Sep 17 00:00:00 2001
From: Abhinav Gaba <abhinav.gaba at intel.com>
Date: Fri, 15 Aug 2025 05:08:52 -0700
Subject: [PATCH 2/2] Add some comments, change casing of error message.

---
 offload/plugins-nextgen/amdgpu/src/rtl.cpp             | 3 +++
 offload/plugins-nextgen/common/src/PluginInterface.cpp | 2 +-
 offload/plugins-nextgen/cuda/src/rtl.cpp               | 3 +++
 offload/plugins-nextgen/host/src/rtl.cpp               | 3 +++
 4 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index 9232f79e9c576..eab0fda9ff0d0 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -2537,6 +2537,9 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
                                           getAgent(), (uint64_t)Size);
   }
 
+  /// Insert a data fence between previous data operations and the following
+  /// operations. This is a no-op for AMDGPU devices as operations inserted into
+  /// a queue are in-order.
   Error dataFence(__tgt_async_info *Async) override {
     return Plugin::success();
   }
diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp
index c99871452efe9..5a7d8e936313f 100644
--- a/offload/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp
@@ -2329,7 +2329,7 @@ int32_t GenericPluginTy::data_fence(int32_t DeviceId,
                                     __tgt_async_info *AsyncInfo) {
   auto Err = getDevice(DeviceId).dataFence(AsyncInfo);
   if (Err) {
-    REPORT("Failure to place data fence on device %d: %s\n", DeviceId,
+    REPORT("failure to place data fence on device %d: %s\n", DeviceId,
            toString(std::move(Err)).data());
     return OFFLOAD_FAIL;
   }
diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp
index 426db75be790b..f3b2229daf372 100644
--- a/offload/plugins-nextgen/cuda/src/rtl.cpp
+++ b/offload/plugins-nextgen/cuda/src/rtl.cpp
@@ -856,6 +856,9 @@ struct CUDADeviceTy : public GenericDeviceTy {
     return Plugin::success();
   }
 
+  /// Insert a data fence between previous data operations and the following
+  /// operations. This is a no-op for CUDA devices as operations inserted into
+  /// a queue are in-order.
   Error dataFence(__tgt_async_info *Async) override {
     return Plugin::success();
   }
diff --git a/offload/plugins-nextgen/host/src/rtl.cpp b/offload/plugins-nextgen/host/src/rtl.cpp
index 8b61179223b7c..b88435aa9e02b 100644
--- a/offload/plugins-nextgen/host/src/rtl.cpp
+++ b/offload/plugins-nextgen/host/src/rtl.cpp
@@ -295,6 +295,9 @@ struct GenELF64DeviceTy : public GenericDeviceTy {
                          "dataExchangeImpl not supported");
   }
 
+  /// Insert a data fence between previous data operations and the following
+  /// operations. This is a no-op for Host devices as operations inserted into
+  /// a queue are in-order.
   Error dataFence(__tgt_async_info *Async) override {
     return Plugin::success();
   }



More information about the llvm-commits mailing list