[llvm] [OFFLOAD] Add support for indexed per-thread containers (PR #164263)

Alex Duran via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 4 13:12:13 PST 2025


https://github.com/adurang updated https://github.com/llvm/llvm-project/pull/164263

>From 987f44cc66042dcd6d32430463cbffdba2a55691 Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Mon, 20 Oct 2025 16:33:41 +0200
Subject: [PATCH 01/13] [OFFLOAD] Add support for indexed per-thread containers

Split from #158900 it adds a PerThreadContainer that can use STL-like
indexed containers based on a slightly refactored PerThreadTable.
---
 offload/include/OpenMP/InteropAPI.h |   8 +-
 offload/include/PerThreadTable.h    | 155 +++++++++++++++++++++++++++-
 2 files changed, 154 insertions(+), 9 deletions(-)

diff --git a/offload/include/OpenMP/InteropAPI.h b/offload/include/OpenMP/InteropAPI.h
index 53ac4be2e2e98..c1957d8205839 100644
--- a/offload/include/OpenMP/InteropAPI.h
+++ b/offload/include/OpenMP/InteropAPI.h
@@ -160,17 +160,11 @@ struct InteropTableEntry {
     Interops.push_back(obj);
   }
 
-  template <class ClearFuncTy> void clear(ClearFuncTy f) {
-    for (auto &Obj : Interops) {
-      f(Obj);
-    }
-  }
-
   /// vector interface
   int size() const { return Interops.size(); }
   iterator begin() { return Interops.begin(); }
   iterator end() { return Interops.end(); }
-  iterator erase(iterator it) { return Interops.erase(it); }
+  void clear() { Interops.clear(); }
 };
 
 struct InteropTblTy
diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h
index 45b196171b4c8..2b2327985a78c 100644
--- a/offload/include/PerThreadTable.h
+++ b/offload/include/PerThreadTable.h
@@ -16,6 +16,60 @@
 #include <list>
 #include <memory>
 #include <mutex>
+#include <type_traits>
+
+template <typename ObjectType> struct PerThread {
+  struct PerThreadData {
+    std::unique_ptr<ObjectType> ThreadEntry;
+  };
+
+  std::mutex Mutex;
+  std::list<std::shared_ptr<PerThreadData>> ThreadDataList;
+
+  // define default constructors, disable copy and move constructors
+  PerThread() = default;
+  PerThread(const PerThread &) = delete;
+  PerThread(PerThread &&) = delete;
+  PerThread &operator=(const PerThread &) = delete;
+  PerThread &operator=(PerThread &&) = delete;
+  ~PerThread() {
+    std::lock_guard<std::mutex> Lock(Mutex);
+    ThreadDataList.clear();
+  }
+
+private:
+  PerThreadData &getThreadData() {
+    static thread_local std::shared_ptr<PerThreadData> ThreadData = nullptr;
+    if (!ThreadData) {
+      ThreadData = std::make_shared<PerThreadData>();
+      std::lock_guard<std::mutex> Lock(Mutex);
+      ThreadDataList.push_back(ThreadData);
+    }
+    return *ThreadData;
+  }
+
+protected:
+  ObjectType &getThreadEntry() {
+    auto &ThData = getThreadData();
+    if (ThData.ThEntry)
+      return *ThData.ThEntry;
+    ThData.ThEntry = std::make_unique<ObjectType>();
+    return *ThData.ThEntry;
+  }
+
+public:
+  ObjectType &get() { return getThreadEntry(); }
+
+  template <class F> void clear(F f) {
+    std::lock_guard<std::mutex> Lock(Mutex);
+    for (auto ThData : ThreadDataList) {
+      if (!ThData->ThEntry)
+        continue;
+      f(*ThData->ThEntry);
+    }
+    ThreadDataList.clear();
+  }
+};
 
 // Using an STL container (such as std::vector) indexed by thread ID has
 // too many race conditions issues so we store each thread entry into a
@@ -23,10 +77,32 @@
 // T is the container type used to store the objects, e.g., std::vector,
 // std::set, etc. by each thread. O is the type of the stored objects e.g.,
 // omp_interop_val_t *, ...
-
 template <typename ContainerType, typename ObjectType> struct PerThreadTable {
   using iterator = typename ContainerType::iterator;
 
+  template <typename, typename = std::void_t<>>
+  struct has_iterator : std::false_type {};
+  template <typename T>
+  struct has_iterator<T, std::void_t<typename T::iterator>> : std::true_type {};
+
+  template <typename T, typename = std::void_t<>>
+  struct has_clear : std::false_type {};
+  template <typename T>
+  struct has_clear<T, std::void_t<decltype(std::declval<T>().clear())>>
+      : std::true_type {};
+
+  template <typename T, typename = std::void_t<>>
+  struct has_clearAll : std::false_type {};
+  template <typename T>
+  struct has_clearAll<T, std::void_t<decltype(std::declval<T>().clearAll(1))>>
+      : std::true_type {};
+
+  template <typename, typename = std::void_t<>>
+  struct is_associative : std::false_type {};
+  template <typename T>
+  struct is_associative<T, std::void_t<typename T::mapped_type>>
+      : std::true_type {};
+
   struct PerThreadData {
     size_t NElements = 0;
     std::unique_ptr<ContainerType> ThEntry;
@@ -71,6 +147,11 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
     return ThData.NElements;
   }
 
+  void setNElements(size_t Size) {
+    auto &NElements = getThreadNElements();
+    NElements = Size;
+  }
+
 public:
   void add(ObjectType obj) {
     auto &Entry = getThreadEntry();
@@ -104,11 +185,81 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
     for (auto ThData : ThreadDataList) {
       if (!ThData->ThEntry || ThData->NElements == 0)
         continue;
-      ThData->ThEntry->clear(f);
+      if constexpr (has_clearAll<ContainerType>::value) {
+        ThData->ThEntry->clearAll(f);
+      } else if constexpr (has_iterator<ContainerType>::value &&
+                           has_clear<ContainerType>::value) {
+        for (auto &Obj : *ThData->ThEntry) {
+          if constexpr (is_associative<ContainerType>::value) {
+            f(Obj.second);
+          } else {
+            f(Obj);
+          }
+        }
+        ThData->ThEntry->clear();
+      } else {
+        static_assert(true, "Container type not supported");
+      }
       ThData->NElements = 0;
     }
     ThreadDataList.clear();
   }
 };
 
+template <typename T, typename = std::void_t<>> struct ContainerValueType {
+  using type = typename T::value_type;
+};
+template <typename T>
+struct ContainerValueType<T, std::void_t<typename T::mapped_type>> {
+  using type = typename T::mapped_type;
+};
+
+template <typename ContainerType, size_t reserveSize = 0>
+struct PerThreadContainer
+    : public PerThreadTable<ContainerType,
+                            typename ContainerValueType<ContainerType>::type> {
+
+  // helpers
+  template <typename T, typename = std::void_t<>> struct indexType {
+    using type = typename T::size_type;
+  };
+  template <typename T> struct indexType<T, std::void_t<typename T::key_type>> {
+    using type = typename T::key_type;
+  };
+  template <typename T, typename = std::void_t<>>
+  struct has_resize : std::false_type {};
+  template <typename T>
+  struct has_resize<T, std::void_t<decltype(std::declval<T>().resize(1))>>
+      : std::true_type {};
+
+  template <typename T, typename = std::void_t<>>
+  struct has_reserve : std::false_type {};
+  template <typename T>
+  struct has_reserve<T, std::void_t<decltype(std::declval<T>().reserve(1))>>
+      : std::true_type {};
+
+  using IndexType = typename indexType<ContainerType>::type;
+  using ObjectType = typename ContainerValueType<ContainerType>::type;
+
+  // Get the object for the given index in the current thread
+  ObjectType &get(IndexType Index) {
+    auto &Entry = this->getThreadEntry();
+
+    // specialized code for vector-like containers
+    if constexpr (has_resize<ContainerType>::value) {
+      if (Index >= Entry.size()) {
+        if constexpr (has_reserve<ContainerType>::value && reserveSize > 0) {
+          if (Entry.capacity() < reserveSize)
+            Entry.reserve(reserveSize);
+        }
+        // If the index is out of bounds, try resize the container
+        Entry.resize(Index + 1);
+      }
+    }
+    ObjectType &Ret = Entry[Index];
+    this->setNElements(Entry.size());
+    return Ret;
+  }
+};
+
 #endif

>From e0c46a67f381867df4a6e9af27358c0866e702d2 Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Thu, 23 Oct 2025 10:04:08 +0200
Subject: [PATCH 02/13] Change container

---
 offload/include/PerThreadTable.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h
index 2b2327985a78c..e0d669998f260 100644
--- a/offload/include/PerThreadTable.h
+++ b/offload/include/PerThreadTable.h
@@ -14,6 +14,7 @@
 #define OFFLOAD_PERTHREADTABLE_H
 
 #include <list>
+#include <llvm/ADT/SmallVector.h>
 #include <memory>
 #include <mutex>
 #include <type_traits>
@@ -24,7 +25,7 @@ template <typename ObjectType> struct PerThread {
   };
 
   std::mutex Mutex;
-  std::list<std::shared_ptr<PerThreadData>> ThreadDataList;
+  llvm::SmallVector<std::shared_ptr<PerThreadData>> ThreadDataList;
 
   // define default constructors, disable copy and move constructors
   PerThread() = default;

>From 0d3d6dc614d98333cd142fc1cab2940059e07260 Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Thu, 23 Oct 2025 10:06:09 +0200
Subject: [PATCH 03/13] Add deinit method

---
 offload/include/PerThreadTable.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h
index e0d669998f260..dc6adf9bbb21d 100644
--- a/offload/include/PerThreadTable.h
+++ b/offload/include/PerThreadTable.h
@@ -15,6 +15,7 @@
 
 #include <list>
 #include <llvm/ADT/SmallVector.h>
+#include <llvm/Support/Error.h>
 #include <memory>
 #include <mutex>
 #include <type_traits>
@@ -205,6 +206,24 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
     }
     ThreadDataList.clear();
   }
+
+  template <class F> llvm::Error deinit(F f) {
+    std::lock_guard<std::mutex> Lock(Mtx);
+    for (auto ThData : ThreadDataList) {
+      if (!ThData->ThEntry || ThData->NElements == 0)
+        continue;
+      for (auto &Obj : *ThData->ThEntry) {
+        if constexpr (is_associative<ContainerType>::value) {
+          if (auto Err = f(Obj.second))
+            return Err;
+        } else {
+          if (auto Err = f(Obj))
+            return Err;
+        }
+      }
+    }
+    return llvm::Error::success();
+  }
 };
 
 template <typename T, typename = std::void_t<>> struct ContainerValueType {

>From 2de9273091bd7dafdf596d58856dcd7b8f6b8842 Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Thu, 23 Oct 2025 10:14:57 +0200
Subject: [PATCH 04/13] renaming variables, remove some locks

---
 offload/include/PerThreadTable.h | 54 +++++++++++++++-----------------
 1 file changed, 25 insertions(+), 29 deletions(-)

diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h
index dc6adf9bbb21d..6d6067364f4cf 100644
--- a/offload/include/PerThreadTable.h
+++ b/offload/include/PerThreadTable.h
@@ -35,7 +35,6 @@ template <typename ObjectType> struct PerThread {
   PerThread &operator=(const PerThread &) = delete;
   PerThread &operator=(PerThread &&) = delete;
   ~PerThread() {
-    std::lock_guard<std::mutex> Lock(Mutex);
     ThreadDataList.clear();
   }
 
@@ -63,7 +62,6 @@ template <typename ObjectType> struct PerThread {
   ObjectType &get() { return getThreadEntry(); }
 
   template <class F> void clear(F f) {
-    std::lock_guard<std::mutex> Lock(Mutex);
     for (auto ThData : ThreadDataList) {
       if (!ThData->ThEntry)
         continue;
@@ -107,10 +105,10 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
 
   struct PerThreadData {
     size_t NElements = 0;
-    std::unique_ptr<ContainerType> ThEntry;
+    std::unique_ptr<ContainerType> ThreadEntry;
   };
 
-  std::mutex Mtx;
+  std::mutex Mutex;
   std::list<std::shared_ptr<PerThreadData>> ThreadDataList;
 
   // define default constructors, disable copy and move constructors
@@ -120,33 +118,32 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
   PerThreadTable &operator=(const PerThreadTable &) = delete;
   PerThreadTable &operator=(PerThreadTable &&) = delete;
   ~PerThreadTable() {
-    std::lock_guard<std::mutex> Lock(Mtx);
     ThreadDataList.clear();
   }
 
 private:
   PerThreadData &getThreadData() {
-    static thread_local std::shared_ptr<PerThreadData> ThData = nullptr;
-    if (!ThData) {
-      ThData = std::make_shared<PerThreadData>();
-      std::lock_guard<std::mutex> Lock(Mtx);
-      ThreadDataList.push_back(ThData);
+    static thread_local std::shared_ptr<PerThreadData> ThreadData = nullptr;
+    if (!ThreadData) {
+      ThreadData = std::make_shared<PerThreadData>();
+      std::lock_guard<std::mutex> Lock(Mutex);
+      ThreadDataList.push_back(ThreadData);
     }
-    return *ThData;
+    return *ThreadData;
   }
 
 protected:
   ContainerType &getThreadEntry() {
-    auto &ThData = getThreadData();
-    if (ThData.ThEntry)
-      return *ThData.ThEntry;
-    ThData.ThEntry = std::make_unique<ContainerType>();
-    return *ThData.ThEntry;
+    auto &ThreadData = getThreadData();
+    if (ThreadData.ThreadEntry)
+      return *ThreadData.ThreadEntry;
+    ThreadData.ThreadEntry = std::make_unique<ContainerType>();
+    return *ThreadData.ThreadEntry;
   }
 
   size_t &getThreadNElements() {
-    auto &ThData = getThreadData();
-    return ThData.NElements;
+    auto &ThreadData = getThreadData();
+    return ThreadData.NElements;
   }
 
   void setNElements(size_t Size) {
@@ -183,36 +180,35 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
   }
 
   template <class F> void clear(F f) {
-    std::lock_guard<std::mutex> Lock(Mtx);
-    for (auto ThData : ThreadDataList) {
-      if (!ThData->ThEntry || ThData->NElements == 0)
+    std::lock_guard<std::mutex> Lock(Mutex);
+    for (auto ThreadData : ThreadDataList) {
+      if (!ThreadData->ThreadEntry || ThreadData->NElements == 0)
         continue;
       if constexpr (has_clearAll<ContainerType>::value) {
-        ThData->ThEntry->clearAll(f);
+        ThreadData->ThreadEntry->clearAll(f);
       } else if constexpr (has_iterator<ContainerType>::value &&
                            has_clear<ContainerType>::value) {
-        for (auto &Obj : *ThData->ThEntry) {
+        for (auto &Obj : *ThreadData->ThreadEntry) {
           if constexpr (is_associative<ContainerType>::value) {
             f(Obj.second);
           } else {
             f(Obj);
           }
         }
-        ThData->ThEntry->clear();
+        ThreadData->ThreadEntry->clear();
       } else {
         static_assert(true, "Container type not supported");
       }
-      ThData->NElements = 0;
+      ThreadData->NElements = 0;
     }
     ThreadDataList.clear();
   }
 
   template <class F> llvm::Error deinit(F f) {
-    std::lock_guard<std::mutex> Lock(Mtx);
-    for (auto ThData : ThreadDataList) {
-      if (!ThData->ThEntry || ThData->NElements == 0)
+    for (auto ThreadData : ThreadDataList) {
+      if (!ThreadData->ThreadEntry || ThreadData->NElements == 0)
         continue;
-      for (auto &Obj : *ThData->ThEntry) {
+      for (auto &Obj : *ThreadData->ThreadEntry) {
         if constexpr (is_associative<ContainerType>::value) {
           if (auto Err = f(Obj.second))
             return Err;

>From f4e89a986d2a1b31c42b43b036b7bd6316767870 Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Thu, 23 Oct 2025 10:16:41 +0200
Subject: [PATCH 05/13] format

---
 offload/include/PerThreadTable.h | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h
index 6d6067364f4cf..466291503edfc 100644
--- a/offload/include/PerThreadTable.h
+++ b/offload/include/PerThreadTable.h
@@ -34,9 +34,7 @@ template <typename ObjectType> struct PerThread {
   PerThread(PerThread &&) = delete;
   PerThread &operator=(const PerThread &) = delete;
   PerThread &operator=(PerThread &&) = delete;
-  ~PerThread() {
-    ThreadDataList.clear();
-  }
+  ~PerThread() { ThreadDataList.clear(); }
 
 private:
   PerThreadData &getThreadData() {
@@ -117,9 +115,7 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
   PerThreadTable(PerThreadTable &&) = delete;
   PerThreadTable &operator=(const PerThreadTable &) = delete;
   PerThreadTable &operator=(PerThreadTable &&) = delete;
-  ~PerThreadTable() {
-    ThreadDataList.clear();
-  }
+  ~PerThreadTable() { ThreadDataList.clear(); }
 
 private:
   PerThreadData &getThreadData() {

>From 5ebf0a4e591dba5a1960b8c1972d7437134f50d8 Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Thu, 23 Oct 2025 10:19:21 +0200
Subject: [PATCH 06/13] more renaming

---
 offload/include/PerThreadTable.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h
index 466291503edfc..b27fbc8e09408 100644
--- a/offload/include/PerThreadTable.h
+++ b/offload/include/PerThreadTable.h
@@ -49,21 +49,21 @@ template <typename ObjectType> struct PerThread {
 
 protected:
   ObjectType &getThreadEntry() {
-    auto &ThData = getThreadData();
-    if (ThData.ThEntry)
-      return *ThData.ThEntry;
-    ThData.ThEntry = std::make_unique<ObjectType>();
-    return *ThData.ThEntry;
+    auto &ThreadData = getThreadData();
+    if (ThreadData.ThreadEntry)
+      return *ThreadData.ThreadEntry;
+    ThreadData.ThreadEntry = std::make_unique<ObjectType>();
+    return *ThreadData.ThreadEntry;
   }
 
 public:
   ObjectType &get() { return getThreadEntry(); }
 
   template <class F> void clear(F f) {
-    for (auto ThData : ThreadDataList) {
-      if (!ThData->ThEntry)
+    for (auto ThreadData : ThreadDataList) {
+      if (!ThreadData->ThreadEntry)
         continue;
-      f(*ThData->ThEntry);
+      f(*ThreadData->ThreadEntry);
     }
     ThreadDataList.clear();
   }

>From 8bff29d07671eb5e235137e9001151932a861d12 Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Thu, 23 Oct 2025 10:41:34 +0200
Subject: [PATCH 07/13] forgot one mutex

---
 offload/include/PerThreadTable.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h
index b27fbc8e09408..1263268ac8e45 100644
--- a/offload/include/PerThreadTable.h
+++ b/offload/include/PerThreadTable.h
@@ -176,7 +176,6 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
   }
 
   template <class F> void clear(F f) {
-    std::lock_guard<std::mutex> Lock(Mutex);
     for (auto ThreadData : ThreadDataList) {
       if (!ThreadData->ThreadEntry || ThreadData->NElements == 0)
         continue;

>From d29eac0aeb07c1fbe96aeea40c1a971d36906bb6 Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Thu, 23 Oct 2025 11:06:48 +0200
Subject: [PATCH 08/13] missed one container

---
 offload/include/PerThreadTable.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h
index 1263268ac8e45..3f887282eeecf 100644
--- a/offload/include/PerThreadTable.h
+++ b/offload/include/PerThreadTable.h
@@ -107,7 +107,7 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
   };
 
   std::mutex Mutex;
-  std::list<std::shared_ptr<PerThreadData>> ThreadDataList;
+  llvm::SmallVector<std::shared_ptr<PerThreadData>> ThreadDataList;
 
   // define default constructors, disable copy and move constructors
   PerThreadTable() = default;

>From caedd4d6891b5c85ac72f9ee6f137ee6851df32b Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Thu, 23 Oct 2025 22:53:14 +0200
Subject: [PATCH 09/13] remove auto

---
 offload/include/PerThreadTable.h | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h
index 3f887282eeecf..0177e871ed664 100644
--- a/offload/include/PerThreadTable.h
+++ b/offload/include/PerThreadTable.h
@@ -49,7 +49,7 @@ template <typename ObjectType> struct PerThread {
 
 protected:
   ObjectType &getThreadEntry() {
-    auto &ThreadData = getThreadData();
+    PerThreadData &ThreadData = getThreadData();
     if (ThreadData.ThreadEntry)
       return *ThreadData.ThreadEntry;
     ThreadData.ThreadEntry = std::make_unique<ObjectType>();
@@ -60,7 +60,7 @@ template <typename ObjectType> struct PerThread {
   ObjectType &get() { return getThreadEntry(); }
 
   template <class F> void clear(F f) {
-    for (auto ThreadData : ThreadDataList) {
+    for (std::shared_ptr<PerThreadData> ThreadData : ThreadDataList) {
       if (!ThreadData->ThreadEntry)
         continue;
       f(*ThreadData->ThreadEntry);
@@ -130,7 +130,7 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
 
 protected:
   ContainerType &getThreadEntry() {
-    auto &ThreadData = getThreadData();
+    PerThreadData &ThreadData = getThreadData();
     if (ThreadData.ThreadEntry)
       return *ThreadData.ThreadEntry;
     ThreadData.ThreadEntry = std::make_unique<ContainerType>();
@@ -138,26 +138,26 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
   }
 
   size_t &getThreadNElements() {
-    auto &ThreadData = getThreadData();
+    PerThreadData &ThreadData = getThreadData();
     return ThreadData.NElements;
   }
 
   void setNElements(size_t Size) {
-    auto &NElements = getThreadNElements();
+    size_t &NElements = getThreadNElements();
     NElements = Size;
   }
 
 public:
   void add(ObjectType obj) {
-    auto &Entry = getThreadEntry();
-    auto &NElements = getThreadNElements();
+    ContainerType &Entry = getThreadEntry();
+    size_t &NElements = getThreadNElements();
     NElements++;
     Entry.add(obj);
   }
 
   iterator erase(iterator it) {
-    auto &Entry = getThreadEntry();
-    auto &NElements = getThreadNElements();
+    ContainerType &Entry = getThreadEntry();
+    size_t &NElements = getThreadNElements();
     NElements--;
     return Entry.erase(it);
   }
@@ -167,11 +167,11 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
   // Iterators to traverse objects owned by
   // the current thread
   iterator begin() {
-    auto &Entry = getThreadEntry();
+    ContainerType &Entry = getThreadEntry();
     return Entry.begin();
   }
   iterator end() {
-    auto &Entry = getThreadEntry();
+    ContainerType &Entry = getThreadEntry();
     return Entry.end();
   }
 
@@ -200,7 +200,7 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
   }
 
   template <class F> llvm::Error deinit(F f) {
-    for (auto ThreadData : ThreadDataList) {
+    for (std::shared_ptr<PerThreadData> ThreadData : ThreadDataList) {
       if (!ThreadData->ThreadEntry || ThreadData->NElements == 0)
         continue;
       for (auto &Obj : *ThreadData->ThreadEntry) {
@@ -254,7 +254,7 @@ struct PerThreadContainer
 
   // Get the object for the given index in the current thread
   ObjectType &get(IndexType Index) {
-    auto &Entry = this->getThreadEntry();
+    ContainerType &Entry = this->getThreadEntry();
 
     // specialized code for vector-like containers
     if constexpr (has_resize<ContainerType>::value) {

>From 01f9c447213dd8d66aa768e4b6a826bfca81dd55 Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Thu, 23 Oct 2025 23:12:57 +0200
Subject: [PATCH 10/13] missed one

---
 offload/include/PerThreadTable.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h
index 0177e871ed664..936eebaaf6155 100644
--- a/offload/include/PerThreadTable.h
+++ b/offload/include/PerThreadTable.h
@@ -176,7 +176,7 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
   }
 
   template <class F> void clear(F f) {
-    for (auto ThreadData : ThreadDataList) {
+    for (std::shared_ptr<PerThreadData> ThreadData : ThreadDataList) {
       if (!ThreadData->ThreadEntry || ThreadData->NElements == 0)
         continue;
       if constexpr (has_clearAll<ContainerType>::value) {

>From e80dcf9f17761ad0d3adb9c51fca08dac252cb70 Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Thu, 23 Oct 2025 23:16:56 +0200
Subject: [PATCH 11/13] Some renaming

---
 offload/include/PerThreadTable.h | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h
index 936eebaaf6155..67c7d74eae2c5 100644
--- a/offload/include/PerThreadTable.h
+++ b/offload/include/PerThreadTable.h
@@ -59,11 +59,11 @@ template <typename ObjectType> struct PerThread {
 public:
   ObjectType &get() { return getThreadEntry(); }
 
-  template <class F> void clear(F f) {
+  template <class ClearFuncTy> void clear(ClearFuncTy ClearFunc) {
     for (std::shared_ptr<PerThreadData> ThreadData : ThreadDataList) {
       if (!ThreadData->ThreadEntry)
         continue;
-      f(*ThreadData->ThreadEntry);
+      ClearFunc(*ThreadData->ThreadEntry);
     }
     ThreadDataList.clear();
   }
@@ -175,19 +175,19 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
     return Entry.end();
   }
 
-  template <class F> void clear(F f) {
+  template <class ClearFuncTy> void clear(ClearFuncTy ClearFunc) {
     for (std::shared_ptr<PerThreadData> ThreadData : ThreadDataList) {
       if (!ThreadData->ThreadEntry || ThreadData->NElements == 0)
         continue;
       if constexpr (has_clearAll<ContainerType>::value) {
-        ThreadData->ThreadEntry->clearAll(f);
+        ThreadData->ThreadEntry->clearAll(ClearFunc);
       } else if constexpr (has_iterator<ContainerType>::value &&
                            has_clear<ContainerType>::value) {
         for (auto &Obj : *ThreadData->ThreadEntry) {
           if constexpr (is_associative<ContainerType>::value) {
-            f(Obj.second);
+            ClearFunc(Obj.second);
           } else {
-            f(Obj);
+            ClearFunc(Obj);
           }
         }
         ThreadData->ThreadEntry->clear();
@@ -199,16 +199,16 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
     ThreadDataList.clear();
   }
 
-  template <class F> llvm::Error deinit(F f) {
+  template <class DeinitFuncTy> llvm::Error deinit(DeinitFuncTy DeinitFunc) {
     for (std::shared_ptr<PerThreadData> ThreadData : ThreadDataList) {
       if (!ThreadData->ThreadEntry || ThreadData->NElements == 0)
         continue;
       for (auto &Obj : *ThreadData->ThreadEntry) {
         if constexpr (is_associative<ContainerType>::value) {
-          if (auto Err = f(Obj.second))
+          if (auto Err = DeinitFunc(Obj.second))
             return Err;
         } else {
-          if (auto Err = f(Obj))
+          if (auto Err = DeinitFunc(Obj))
             return Err;
         }
       }

>From 5451f2294da3f2f4fb6eaa925838f54b17c64008 Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Thu, 23 Oct 2025 23:25:40 +0200
Subject: [PATCH 12/13] Add asserts

---
 offload/include/PerThreadTable.h | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h
index 67c7d74eae2c5..d8222d99b6515 100644
--- a/offload/include/PerThreadTable.h
+++ b/offload/include/PerThreadTable.h
@@ -34,7 +34,11 @@ template <typename ObjectType> struct PerThread {
   PerThread(PerThread &&) = delete;
   PerThread &operator=(const PerThread &) = delete;
   PerThread &operator=(PerThread &&) = delete;
-  ~PerThread() { ThreadDataList.clear(); }
+  ~PerThread() {
+    assert(Mutex.try_lock() &&
+           "Cannot be deleted while other threads are adding entries");
+    ThreadDataList.clear();
+  }
 
 private:
   PerThreadData &getThreadData() {
@@ -60,6 +64,8 @@ template <typename ObjectType> struct PerThread {
   ObjectType &get() { return getThreadEntry(); }
 
   template <class ClearFuncTy> void clear(ClearFuncTy ClearFunc) {
+    assert(Mutex.try_lock() &&
+           "Clear cannot be called while other threads are adding entries");
     for (std::shared_ptr<PerThreadData> ThreadData : ThreadDataList) {
       if (!ThreadData->ThreadEntry)
         continue;
@@ -115,7 +121,11 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
   PerThreadTable(PerThreadTable &&) = delete;
   PerThreadTable &operator=(const PerThreadTable &) = delete;
   PerThreadTable &operator=(PerThreadTable &&) = delete;
-  ~PerThreadTable() { ThreadDataList.clear(); }
+  ~PerThreadTable() {
+    assert(Mutex.try_lock() &&
+           "Cannot be deleted while other threads are adding entries");
+    ThreadDataList.clear();
+  }
 
 private:
   PerThreadData &getThreadData() {
@@ -176,6 +186,8 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
   }
 
   template <class ClearFuncTy> void clear(ClearFuncTy ClearFunc) {
+    assert(Mutex.try_lock() &&
+           "Clear cannot be called while other threads are adding entries");
     for (std::shared_ptr<PerThreadData> ThreadData : ThreadDataList) {
       if (!ThreadData->ThreadEntry || ThreadData->NElements == 0)
         continue;
@@ -200,6 +212,8 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
   }
 
   template <class DeinitFuncTy> llvm::Error deinit(DeinitFuncTy DeinitFunc) {
+    assert(Mutex.try_lock() &&
+           "Deinit cannot be called while other threads are adding entries");
     for (std::shared_ptr<PerThreadData> ThreadData : ThreadDataList) {
       if (!ThreadData->ThreadEntry || ThreadData->NElements == 0)
         continue;

>From a14ee62b1a7ac0dda4bc8eb39fad04cb504bee32 Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Tue, 4 Nov 2025 22:11:55 +0100
Subject: [PATCH 13/13] Fix asserts to not be undefined

---
 offload/include/PerThreadTable.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h
index d8222d99b6515..56e2d75ccd198 100644
--- a/offload/include/PerThreadTable.h
+++ b/offload/include/PerThreadTable.h
@@ -35,7 +35,7 @@ template <typename ObjectType> struct PerThread {
   PerThread &operator=(const PerThread &) = delete;
   PerThread &operator=(PerThread &&) = delete;
   ~PerThread() {
-    assert(Mutex.try_lock() &&
+    assert(Mutex.try_lock() && (Mutex.unlock(), true) &&
            "Cannot be deleted while other threads are adding entries");
     ThreadDataList.clear();
   }
@@ -64,7 +64,7 @@ template <typename ObjectType> struct PerThread {
   ObjectType &get() { return getThreadEntry(); }
 
   template <class ClearFuncTy> void clear(ClearFuncTy ClearFunc) {
-    assert(Mutex.try_lock() &&
+    assert(Mutex.try_lock() && (Mutex.unlock(), true) &&
            "Clear cannot be called while other threads are adding entries");
     for (std::shared_ptr<PerThreadData> ThreadData : ThreadDataList) {
       if (!ThreadData->ThreadEntry)
@@ -122,7 +122,7 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
   PerThreadTable &operator=(const PerThreadTable &) = delete;
   PerThreadTable &operator=(PerThreadTable &&) = delete;
   ~PerThreadTable() {
-    assert(Mutex.try_lock() &&
+    assert(Mutex.try_lock() && (Mutex.unlock(), true) &&
            "Cannot be deleted while other threads are adding entries");
     ThreadDataList.clear();
   }
@@ -186,7 +186,7 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
   }
 
   template <class ClearFuncTy> void clear(ClearFuncTy ClearFunc) {
-    assert(Mutex.try_lock() &&
+    assert(Mutex.try_lock() && (Mutex.unlock(), true) &&
            "Clear cannot be called while other threads are adding entries");
     for (std::shared_ptr<PerThreadData> ThreadData : ThreadDataList) {
       if (!ThreadData->ThreadEntry || ThreadData->NElements == 0)
@@ -212,7 +212,7 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
   }
 
   template <class DeinitFuncTy> llvm::Error deinit(DeinitFuncTy DeinitFunc) {
-    assert(Mutex.try_lock() &&
+    assert(Mutex.try_lock() && (Mutex.unlock(), true) &&
            "Deinit cannot be called while other threads are adding entries");
     for (std::shared_ptr<PerThreadData> ThreadData : ThreadDataList) {
       if (!ThreadData->ThreadEntry || ThreadData->NElements == 0)



More information about the llvm-commits mailing list