[llvm] [Offload] Ensure all `llvm::Error`s are handled (PR #137339)

Ross Brunton via llvm-commits llvm-commits at lists.llvm.org
Fri Apr 25 07:26:15 PDT 2025


https://github.com/RossBrunton created https://github.com/llvm/llvm-project/pull/137339

`llvm::Error`s containing errors must be explicitly handled or an assert
will be raised. With this change, `ol_impl_result_t` can accept and
consume an `llvm::Error` for errors raised by PluginInterface that have
multiple causes and other places now call `llvm::consumeError`.

Note that there is currently no facility for PluginInterface to
communicate exact error codes, but the constructor is designed in
such a way that it can be easily added later. This MR is to
convert a crash into an error code.

A new test was added, however due to the aforementioned issue with
error codes, it does not pass and instead is marked as a skip.


>From 1c8b7c300ec38f685d75a273910f42c6d66eb493 Mon Sep 17 00:00:00 2001
From: Ross Brunton <ross at codeplay.com>
Date: Fri, 25 Apr 2025 15:18:56 +0100
Subject: [PATCH] [Offload] Ensure all `llvm::Error`s are handled

`llvm::Error`s containing errors must be explicitly handled or an assert
will be raised. With this change, `ol_impl_result_t` can accept and
consume an `llvm::Error` for errors raised by PluginInterface that have
multiple causes and other places now call `llvm::consumeError`.

Note that there is currently no facility for PluginInterface to
communicate exact error codes, but the constructor is designed in
such a way that it can be easily added later. This MR is to
convert a crash into an error code.

A new test was added, however due to the aforementioned issue with
error codes, it does not pass and instead is marked as a skip.
---
 offload/liboffload/include/OffloadImpl.hpp    | 12 ++++++
 offload/liboffload/src/OffloadImpl.cpp        | 39 ++++++++++++-------
 .../OffloadAPI/kernel/olGetKernel.cpp         |  8 ++++
 3 files changed, 46 insertions(+), 13 deletions(-)

diff --git a/offload/liboffload/include/OffloadImpl.hpp b/offload/liboffload/include/OffloadImpl.hpp
index ec470a355309a..570fddfa87f02 100644
--- a/offload/liboffload/include/OffloadImpl.hpp
+++ b/offload/liboffload/include/OffloadImpl.hpp
@@ -19,6 +19,7 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSet.h"
+#include "llvm/Support/Error.h"
 
 struct OffloadConfig {
   bool TracingEnabled = false;
@@ -88,6 +89,17 @@ struct ol_impl_result_t {
     Result = errors().emplace(std::move(Err)).first->get();
   }
 
+  ol_impl_result_t(llvm::Error &&Error, llvm::StringRef Details) {
+    ol_errc_t ErrCode;
+    llvm::handleAllErrors(std::move(Error), [&](llvm::StringError &Err) {
+      // TODO: PluginInterface doesn't yet have a way to communicate offload
+      // error codes
+      ErrCode = OL_ERRC_UNKNOWN;
+    });
+
+    ol_impl_result_t(ErrCode, Details);
+  }
+
   operator ol_result_t() { return Result; }
 
 private:
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index d956d274b5eb1..fc74771abbbdb 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -308,9 +308,11 @@ ol_impl_result_t olMemAlloc_impl(ol_device_handle_t Device,
                                  void **AllocationOut) {
   auto Alloc =
       Device->Device->dataAlloc(Size, nullptr, convertOlToPluginAllocTy(Type));
-  if (!Alloc)
+  if (!Alloc) {
+    llvm::consumeError(Alloc.takeError());
     return {OL_ERRC_OUT_OF_RESOURCES,
             formatv("Could not create allocation on device {0}", Device).str()};
+  }
 
   *AllocationOut = *Alloc;
   allocInfoMap().insert_or_assign(*Alloc, AllocInfo{Device, Type});
@@ -327,8 +329,10 @@ ol_impl_result_t olMemFree_impl(void *Address) {
 
   auto Res =
       Device->Device->dataDelete(Address, convertOlToPluginAllocTy(Type));
-  if (Res)
+  if (Res) {
+    llvm::consumeError(std::move(Res));
     return {OL_ERRC_OUT_OF_RESOURCES, "Could not free allocation"};
+  }
 
   allocInfoMap().erase(Address);
 
@@ -340,7 +344,7 @@ ol_impl_result_t olCreateQueue_impl(ol_device_handle_t Device,
   auto CreatedQueue = std::make_unique<ol_queue_impl_t>(nullptr, Device);
   auto Err = Device->Device->initAsyncInfo(&(CreatedQueue->AsyncInfo));
   if (Err)
-    return {OL_ERRC_UNKNOWN, "Could not initialize stream resource"};
+    return {std::move(Err), "Could not initialize stream resource"};
 
   *Queue = CreatedQueue.release();
   return OL_SUCCESS;
@@ -355,8 +359,10 @@ ol_impl_result_t olWaitQueue_impl(ol_queue_handle_t Queue) {
   // on it, but we have nothing to synchronize in that situation anyway.
   if (Queue->AsyncInfo->Queue) {
     auto Err = Queue->Device->Device->synchronize(Queue->AsyncInfo);
-    if (Err)
+    if (Err) {
+      llvm::consumeError(std::move(Err));
       return {OL_ERRC_INVALID_QUEUE, "The queue failed to synchronize"};
+    }
   }
 
   // Recreate the stream resource so the queue can be reused
@@ -364,15 +370,17 @@ ol_impl_result_t olWaitQueue_impl(ol_queue_handle_t Queue) {
   // it to begin with.
   auto Res = Queue->Device->Device->initAsyncInfo(&Queue->AsyncInfo);
   if (Res)
-    return {OL_ERRC_UNKNOWN, "Could not reinitialize the stream resource"};
+    return {std::move(Res), "Could not reinitialize the stream resource"};
 
   return OL_SUCCESS;
 }
 
 ol_impl_result_t olWaitEvent_impl(ol_event_handle_t Event) {
   auto Res = Event->Queue->Device->Device->syncEvent(Event->EventInfo);
-  if (Res)
+  if (Res) {
+    llvm::consumeError(std::move(Res));
     return {OL_ERRC_INVALID_EVENT, "The event failed to synchronize"};
+  }
 
   return OL_SUCCESS;
 }
@@ -384,13 +392,17 @@ ol_impl_result_t olDestroyEvent_impl(ol_event_handle_t Event) {
 ol_event_handle_t makeEvent(ol_queue_handle_t Queue) {
   auto EventImpl = std::make_unique<ol_event_impl_t>(nullptr, Queue);
   auto Res = Queue->Device->Device->createEvent(&EventImpl->EventInfo);
-  if (Res)
+  if (Res) {
+    llvm::consumeError(std::move(Res));
     return nullptr;
+  }
 
   Res = Queue->Device->Device->recordEvent(EventImpl->EventInfo,
                                            Queue->AsyncInfo);
-  if (Res)
+  if (Res) {
+    llvm::consumeError(std::move(Res));
     return nullptr;
+  }
 
   return EventImpl.release();
 }
@@ -416,16 +428,16 @@ ol_impl_result_t olMemcpy_impl(ol_queue_handle_t Queue, void *DstPtr,
   if (DstDevice == HostDevice()) {
     auto Res = SrcDevice->Device->dataRetrieve(DstPtr, SrcPtr, Size, QueueImpl);
     if (Res)
-      return {OL_ERRC_UNKNOWN, "The data retrieve operation failed"};
+      return {std::move(Res), "The data retrieve operation failed"};
   } else if (SrcDevice == HostDevice()) {
     auto Res = DstDevice->Device->dataSubmit(DstPtr, SrcPtr, Size, QueueImpl);
     if (Res)
-      return {OL_ERRC_UNKNOWN, "The data submit operation failed"};
+      return {std::move(Res), "The data submit operation failed"};
   } else {
     auto Res = SrcDevice->Device->dataExchange(SrcPtr, *DstDevice->Device,
                                                DstPtr, Size, QueueImpl);
     if (Res)
-      return {OL_ERRC_UNKNOWN, "The data exchange operation failed"};
+      return {std::move(Res), "The data exchange operation failed"};
   }
 
   if (EventOut)
@@ -452,6 +464,7 @@ ol_impl_result_t olCreateProgram_impl(ol_device_handle_t Device,
   auto Res =
       Device->Device->loadBinary(Device->Device->Plugin, &Prog->DeviceImage);
   if (!Res) {
+    llvm::consumeError(Res.takeError());
     delete Prog;
     return OL_ERRC_INVALID_VALUE;
   }
@@ -477,7 +490,7 @@ ol_impl_result_t olGetKernel_impl(ol_program_handle_t Program,
 
   auto Err = KernelImpl->init(Device, *Program->Image);
   if (Err)
-    return {OL_ERRC_UNKNOWN, "Could not initialize the kernel"};
+    return {std::move(Err), "Could not initialize the kernel"};
 
   *Kernel = &*KernelImpl;
 
@@ -520,7 +533,7 @@ olLaunchKernel_impl(ol_queue_handle_t Queue, ol_device_handle_t Device,
 
   AsyncInfoWrapper.finalize(Err);
   if (Err)
-    return {OL_ERRC_UNKNOWN, "Could not finalize the AsyncInfoWrapper"};
+    return {std::move(Err), "Could not finalize the AsyncInfoWrapper"};
 
   if (EventOut)
     *EventOut = makeEvent(Queue);
diff --git a/offload/unittests/OffloadAPI/kernel/olGetKernel.cpp b/offload/unittests/OffloadAPI/kernel/olGetKernel.cpp
index f320d191ad58f..0dbedbdebd2fd 100644
--- a/offload/unittests/OffloadAPI/kernel/olGetKernel.cpp
+++ b/offload/unittests/OffloadAPI/kernel/olGetKernel.cpp
@@ -28,3 +28,11 @@ TEST_F(olGetKernelTest, InvalidNullKernelPointer) {
   ASSERT_ERROR(OL_ERRC_INVALID_NULL_POINTER,
                olGetKernel(Program, "foo", nullptr));
 }
+
+TEST_F(olGetKernelTest, InvalidKernelName) {
+  GTEST_SKIP()
+      << "Error code returning from plugin interface not yet supported";
+  ol_kernel_handle_t Kernel = nullptr;
+  ASSERT_ERROR(OL_ERRC_INVALID_KERNEL_NAME,
+               olGetKernel(Program, "invalid_kernel_name", &Kernel));
+}



More information about the llvm-commits mailing list