[llvm] [Offload] Implement the remaining initial Offload API (PR #122106)

Johannes Doerfert via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 19 08:06:32 PST 2025

@@ -245,3 +315,331 @@ ol_impl_result_t olGetDeviceInfoSize_impl(ol_device_handle_t Device,
                                           size_t *PropSizeRet) {
   return olGetDeviceInfoImplDetail(Device, PropName, 0, nullptr, PropSizeRet);
+ol_impl_result_t olGetHostDevice_impl(ol_device_handle_t *Device) {
+  *Device = HostDevice();
+  return OL_SUCCESS;
+TargetAllocTy convertOlToPluginAllocTy(ol_alloc_type_t Type) {
+  switch (Type) {
+    return TARGET_ALLOC_HOST;
+  default:
+  }
+ol_impl_result_t olMemAlloc_impl(ol_device_handle_t Device,
+                                 ol_alloc_type_t Type, size_t Size,
+                                 void **AllocationOut) {
+  auto Alloc =
+      Device->Device->dataAlloc(Size, nullptr, convertOlToPluginAllocTy(Type));
+  if (!Alloc)
+            formatv("Could not create allocation on device {0}", Device).str()};
+  *AllocationOut = *Alloc;
+  return OL_SUCCESS;
+ol_impl_result_t olMemFree_impl(ol_device_handle_t Device, ol_alloc_type_t Type,
+                                void *Address) {
+  auto Res =
+      Device->Device->dataDelete(Address, convertOlToPluginAllocTy(Type));
+  if (Res)
+    return {OL_ERRC_OUT_OF_RESOURCES, "Could not free allocation"};
+  return OL_SUCCESS;
+ol_impl_result_t olCreateQueue_impl(ol_device_handle_t Device,
+                                    ol_queue_handle_t *Queue) {
+  auto CreatedQueue = std::make_unique<ol_queue_impl_t>();
+  auto Err = Device->Device->initAsyncInfo(&(CreatedQueue->AsyncInfo));
+  if (Err)
+    return {OL_ERRC_UNKNOWN, "Could not initialize stream resource"};
+  CreatedQueue->Device = Device;
+  CreatedQueue->RefCount = 1;
+  *Queue = CreatedQueue.release();
+  return OL_SUCCESS;
+ol_impl_result_t olRetainQueue_impl(ol_queue_handle_t Queue) {
+  Queue->RefCount++;
+  return OL_SUCCESS;
+ol_impl_result_t olReleaseQueue_impl(ol_queue_handle_t Queue) {
+  if (--Queue->RefCount == 0)
+    delete Queue;
+  return OL_SUCCESS;
+ol_impl_result_t olFinishQueue_impl(ol_queue_handle_t Queue) {
+  // Host plugin doesn't have a queue set so it's not safe to call synchronize
+  // on it, but we have nothing to synchronize in that situation anyway.
+  if (Queue->AsyncInfo->Queue) {
+    auto Err = Queue->Device->Device->synchronize(Queue->AsyncInfo);
+    if (Err)
+      return {OL_ERRC_INVALID_QUEUE, "The queue failed to synchronize"};
+  }
+  // Recreate the stream resource so the queue can be reused
+  // TODO: Would be easier for the synchronization to (optionally) not release
+  // it to begin with.
+  auto Res = Queue->Device->Device->initAsyncInfo(&Queue->AsyncInfo);
+  if (Res)
+    return {OL_ERRC_UNKNOWN, "Could not reinitialize the stream resource"};
+  return OL_SUCCESS;
+ol_impl_result_t olWaitEvent_impl(ol_event_handle_t Event) {
+  auto Res = Event->Queue->Device->Device->syncEvent(Event->EventInfo);
+  if (Res)
+    return {OL_ERRC_INVALID_EVENT, "The event failed to synchronize"};
+  return OL_SUCCESS;
+ol_impl_result_t olRetainEvent_impl(ol_event_handle_t Event) {
+  Event->RefCount++;
+  return OL_SUCCESS;
+ol_impl_result_t olReleaseEvent_impl(ol_event_handle_t Event) {
+  if (--Event->RefCount == 0)
+    delete Event;
+  return OL_SUCCESS;
+ol_event_handle_t makeEvent(ol_queue_handle_t Queue) {
+  auto EventImpl = std::make_unique<ol_event_impl_t>();
+  EventImpl->Queue = Queue;
+  auto Res = Queue->Device->Device->createEvent(&EventImpl->EventInfo);
+  if (Res)
+    return nullptr;
+  Res = Queue->Device->Device->recordEvent(EventImpl->EventInfo,
+                                           Queue->AsyncInfo);
+  if (Res)
+    return nullptr;
+  return EventImpl.release();
+ol_impl_result_t olEnqueueMemcpy_impl(ol_queue_handle_t Queue, void *DstPtr,
+                                      ol_device_handle_t DstDevice,
+                                      void *SrcPtr,
+                                      ol_device_handle_t SrcDevice, size_t Size,
+                                      ol_event_handle_t *EventOut) {
+  if (DstDevice == HostDevice() && SrcDevice == HostDevice()) {
+    // TODO: We could actually handle this with a plain memcpy but we currently
+    // have no way of synchronizing this with the queue
jdoerfert wrote:

Historically, we did synchronous execution for the host, so this is simply memcpy. Did this change, or is expected to change?
Maybe related, are we expecting the queue to be tied to one of the devices or is it allowed to be independent? I don't think adding a copy from A to B into device C's queue makes much sense. If so, we should check for that.


