[llvm] [Offload] Re-allocate overlapping memory (PR #159567)

Ross Brunton via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 22 05:06:37 PDT 2025


https://github.com/RossBrunton updated https://github.com/llvm/llvm-project/pull/159567

>From 3d0de74a69c58b731e5b6a66a234444d4fcc2c1b Mon Sep 17 00:00:00 2001
From: Ross Brunton <bruntonross at protonmail.com>
Date: Thu, 18 Sep 2025 14:01:16 +0100
Subject: [PATCH 1/4] [Offload] Re-allocate overlapping memory

If olMemAlloc happens to allocate memory that was already allocated
elsewhere (possibly by another device on another platform), it is now
thrown away and a new allocation generated.

A new `AllocBases` vector is now available, which is an ordered list
of allocation start addresses.
---
 offload/liboffload/API/Memory.td              |  3 +
 offload/liboffload/src/OffloadImpl.cpp        | 69 ++++++++++++++++---
 .../OffloadAPI/memory/olMemAlloc.cpp          | 20 ++++++
 3 files changed, 82 insertions(+), 10 deletions(-)

diff --git a/offload/liboffload/API/Memory.td b/offload/liboffload/API/Memory.td
index cc98b672a26a9..debda165d2b23 100644
--- a/offload/liboffload/API/Memory.td
+++ b/offload/liboffload/API/Memory.td
@@ -21,6 +21,9 @@ def ol_alloc_type_t : Enum {
 
 def olMemAlloc : Function {
   let desc = "Creates a memory allocation on the specified device.";
+  let details = [
+      "All allocations through olMemAlloc regardless of source share a single virtual address range. There is no risk of multiple devices returning equal pointers to different memory."
+  ];
   let params = [
     Param<"ol_device_handle_t", "Device", "handle of the device to allocate on", PARAM_IN>,
     Param<"ol_alloc_type_t", "Type", "type of the allocation", PARAM_IN>,
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index b5b9b0e83b975..ed969f5f01095 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -182,6 +182,9 @@ namespace offload {
 struct AllocInfo {
   ol_device_handle_t Device;
   ol_alloc_type_t Type;
+  void *Start;
+  // One byte past the end
+  void *End;
 };
 
 // Global shared state for liboffload
@@ -200,6 +203,9 @@ struct OffloadContext {
   bool ValidationEnabled = true;
   DenseMap<void *, AllocInfo> AllocInfoMap{};
   std::mutex AllocInfoMapMutex{};
+  // Partitioned list of memory base addresses. Each element in this list is a
+  // key in AllocInfoMap
+  llvm::SmallVector<void *> AllocBases{};
   SmallVector<ol_platform_impl_t, 4> Platforms{};
   size_t RefCount;
 
@@ -615,18 +621,58 @@ TargetAllocTy convertOlToPluginAllocTy(ol_alloc_type_t Type) {
 
 Error olMemAlloc_impl(ol_device_handle_t Device, ol_alloc_type_t Type,
                       size_t Size, void **AllocationOut) {
-  auto Alloc =
-      Device->Device->dataAlloc(Size, nullptr, convertOlToPluginAllocTy(Type));
-  if (!Alloc)
-    return Alloc.takeError();
+  void *OldAlloc = nullptr;
+
+  // Repeat the allocation up to a certain amount of times. If it happens to
+  // already be allocated (e.g. by a device from another vendor) throw it away
+  // and try again.
+  for (size_t Count = 0; Count < 10; Count++) {
+    auto NewAlloc = Device->Device->dataAlloc(Size, nullptr,
+                                              convertOlToPluginAllocTy(Type));
+    if (!NewAlloc)
+      return NewAlloc.takeError();
+
+    if (OldAlloc)
+      if (auto Err = Device->Device->dataDelete(OldAlloc,
+                                                convertOlToPluginAllocTy(Type)))
+        return Err;
 
-  *AllocationOut = *Alloc;
-  {
-    std::lock_guard<std::mutex> Lock(OffloadContext::get().AllocInfoMapMutex);
-    OffloadContext::get().AllocInfoMap.insert_or_assign(
-        *Alloc, AllocInfo{Device, Type});
+    void *NewEnd = &reinterpret_cast<char *>(*NewAlloc)[Size];
+    auto &AllocBases = OffloadContext::get().AllocBases;
+    auto &AllocInfoMap = OffloadContext::get().AllocInfoMap;
+    {
+      std::lock_guard<std::mutex> Lock(OffloadContext::get().AllocInfoMapMutex);
+
+      // Check that this memory region doesn't overlap another one
+      // That is, the start of this allocation needs to be after another
+      // allocation's end point, and the end of this allocation needs to be
+      // before the next one's start.
+      // `Gap` is the first alloc who ends after the new alloc's start point.
+      auto Gap =
+          std::lower_bound(AllocBases.begin(), AllocBases.end(), *NewAlloc,
+                           [&](const void *Iter, const void *Val) {
+                             return AllocInfoMap.at(Iter).End <= Val;
+                           });
+      if (Gap == AllocBases.end() || NewEnd <= AllocInfoMap.at(*Gap).Start) {
+        // Success, no conflict
+        AllocInfoMap.insert_or_assign(
+            *NewAlloc, AllocInfo{Device, Type, *NewAlloc, NewEnd});
+        AllocBases.insert(
+            std::lower_bound(AllocBases.begin(), AllocBases.end(), *NewAlloc),
+            *NewAlloc);
+        *AllocationOut = *NewAlloc;
+        return Error::success();
+      }
+
+      // To avoid the next attempt allocating the same memory we just freed, we
+      // hold onto it until we complete the next allocation
+      OldAlloc = *NewAlloc;
+    }
   }
-  return Error::success();
+
+  // We've tried multiple times, and can't allocate a non-overlapping region.
+  return createOffloadError(ErrorCode::BACKEND_FAILURE,
+                            "failed to allocate non-overlapping memory");
 }
 
 Error olMemFree_impl(void *Address) {
@@ -642,6 +688,9 @@ Error olMemFree_impl(void *Address) {
     Device = AllocInfo.Device;
     Type = AllocInfo.Type;
     OffloadContext::get().AllocInfoMap.erase(Address);
+
+    auto &Bases = OffloadContext::get().AllocBases;
+    Bases.erase(std::lower_bound(Bases.begin(), Bases.end(), Address));
   }
 
   if (auto Res =
diff --git a/offload/unittests/OffloadAPI/memory/olMemAlloc.cpp b/offload/unittests/OffloadAPI/memory/olMemAlloc.cpp
index 00e428ec2abc7..445262aa0c583 100644
--- a/offload/unittests/OffloadAPI/memory/olMemAlloc.cpp
+++ b/offload/unittests/OffloadAPI/memory/olMemAlloc.cpp
@@ -34,6 +34,26 @@ TEST_P(olMemAllocTest, SuccessAllocDevice) {
   olMemFree(Alloc);
 }
 
+TEST_P(olMemAllocTest, SuccessAllocMany) {
+  std::vector<void *> Allocs;
+  Allocs.reserve(1000);
+
+  constexpr ol_alloc_type_t TYPES[3] = {
+      OL_ALLOC_TYPE_DEVICE, OL_ALLOC_TYPE_MANAGED, OL_ALLOC_TYPE_HOST};
+
+  for (size_t I = 1; I < 1000; I++) {
+    void *Alloc = nullptr;
+    ASSERT_SUCCESS(olMemAlloc(Device, TYPES[I % 3], 1024 * I, &Alloc));
+    ASSERT_NE(Alloc, nullptr);
+
+    Allocs.push_back(Alloc);
+  }
+
+  for (auto *A : Allocs) {
+    olMemFree(A);
+  }
+}
+
 TEST_P(olMemAllocTest, InvalidNullDevice) {
   void *Alloc = nullptr;
   ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE,

>From 22ff6295765729baacd745ff3dcf3e8593dc94ba Mon Sep 17 00:00:00 2001
From: Ross Brunton <bruntonross at protonmail.com>
Date: Thu, 18 Sep 2025 14:21:21 +0100
Subject: [PATCH 2/4] Use static_cast

---
 offload/liboffload/src/OffloadImpl.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index ed969f5f01095..ecf687d561c5b 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -637,7 +637,7 @@ Error olMemAlloc_impl(ol_device_handle_t Device, ol_alloc_type_t Type,
                                                 convertOlToPluginAllocTy(Type)))
         return Err;
 
-    void *NewEnd = &reinterpret_cast<char *>(*NewAlloc)[Size];
+    void *NewEnd = &static_cast<char *>(*NewAlloc)[Size];
     auto &AllocBases = OffloadContext::get().AllocBases;
     auto &AllocInfoMap = OffloadContext::get().AllocInfoMap;
     {

>From 15dfd03eb62b74990583cdf4bb425a099f1c4321 Mon Sep 17 00:00:00 2001
From: Ross Brunton <bruntonross at protonmail.com>
Date: Thu, 18 Sep 2025 15:21:22 +0100
Subject: [PATCH 3/4] Keep all rejects

---
 offload/liboffload/src/OffloadImpl.cpp | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index ecf687d561c5b..4a253c61a657b 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -621,7 +621,7 @@ TargetAllocTy convertOlToPluginAllocTy(ol_alloc_type_t Type) {
 
 Error olMemAlloc_impl(ol_device_handle_t Device, ol_alloc_type_t Type,
                       size_t Size, void **AllocationOut) {
-  void *OldAlloc = nullptr;
+  SmallVector<void *> Rejects;
 
   // Repeat the allocation up to a certain amount of times. If it happens to
   // already be allocated (e.g. by a device from another vendor) throw it away
@@ -632,11 +632,6 @@ Error olMemAlloc_impl(ol_device_handle_t Device, ol_alloc_type_t Type,
     if (!NewAlloc)
       return NewAlloc.takeError();
 
-    if (OldAlloc)
-      if (auto Err = Device->Device->dataDelete(OldAlloc,
-                                                convertOlToPluginAllocTy(Type)))
-        return Err;
-
     void *NewEnd = &static_cast<char *>(*NewAlloc)[Size];
     auto &AllocBases = OffloadContext::get().AllocBases;
     auto &AllocInfoMap = OffloadContext::get().AllocInfoMap;
@@ -661,12 +656,17 @@ Error olMemAlloc_impl(ol_device_handle_t Device, ol_alloc_type_t Type,
             std::lower_bound(AllocBases.begin(), AllocBases.end(), *NewAlloc),
             *NewAlloc);
         *AllocationOut = *NewAlloc;
+
+        for (void *R : Rejects)
+          if (auto Err =
+                  Device->Device->dataDelete(R, convertOlToPluginAllocTy(Type)))
+            return Err;
         return Error::success();
       }
 
       // To avoid the next attempt allocating the same memory we just freed, we
-      // hold onto it until we complete the next allocation
-      OldAlloc = *NewAlloc;
+      // hold onto it until we complete the allocation
+      Rejects.push_back(*NewAlloc);
     }
   }
 

>From 99dd81048c4dbc00fddc5fe01b5402ecf88415fc Mon Sep 17 00:00:00 2001
From: Ross Brunton <bruntonross at protonmail.com>
Date: Mon, 22 Sep 2025 13:06:17 +0100
Subject: [PATCH 4/4] Increase repeat count to 50

---
 offload/liboffload/src/OffloadImpl.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index 4a253c61a657b..58a1c09d7e96e 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -619,6 +619,7 @@ TargetAllocTy convertOlToPluginAllocTy(ol_alloc_type_t Type) {
   }
 }
 
+constexpr size_t MAX_ALLOC_TRIES = 50;
 Error olMemAlloc_impl(ol_device_handle_t Device, ol_alloc_type_t Type,
                       size_t Size, void **AllocationOut) {
   SmallVector<void *> Rejects;
@@ -626,7 +627,7 @@ Error olMemAlloc_impl(ol_device_handle_t Device, ol_alloc_type_t Type,
   // Repeat the allocation up to a certain amount of times. If it happens to
   // already be allocated (e.g. by a device from another vendor) throw it away
   // and try again.
-  for (size_t Count = 0; Count < 10; Count++) {
+  for (size_t Count = 0; Count < MAX_ALLOC_TRIES; Count++) {
     auto NewAlloc = Device->Device->dataAlloc(Size, nullptr,
                                               convertOlToPluginAllocTy(Type));
     if (!NewAlloc)



More information about the llvm-commits mailing list