[llvm] [OFFLOAD] Add support for indexed per-thread containers (PR #164263)

Alex Duran via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 19 06:00:44 PST 2025


https://github.com/adurang updated https://github.com/llvm/llvm-project/pull/164263

>From 987f44cc66042dcd6d32430463cbffdba2a55691 Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Mon, 20 Oct 2025 16:33:41 +0200
Subject: [PATCH 01/17] [OFFLOAD] Add support for indexed per-thread containers

Split from #158900 it adds a PerThreadContainer that can use STL-like
indexed containers based on a slightly refactored PerThreadTable.
---
 offload/include/OpenMP/InteropAPI.h |   8 +-
 offload/include/PerThreadTable.h    | 155 +++++++++++++++++++++++++++-
 2 files changed, 154 insertions(+), 9 deletions(-)

diff --git a/offload/include/OpenMP/InteropAPI.h b/offload/include/OpenMP/InteropAPI.h
index 53ac4be2e2e98..c1957d8205839 100644
--- a/offload/include/OpenMP/InteropAPI.h
+++ b/offload/include/OpenMP/InteropAPI.h
@@ -160,17 +160,11 @@ struct InteropTableEntry {
     Interops.push_back(obj);
   }
 
-  template <class ClearFuncTy> void clear(ClearFuncTy f) {
-    for (auto &Obj : Interops) {
-      f(Obj);
-    }
-  }
-
   /// vector interface
   int size() const { return Interops.size(); }
   iterator begin() { return Interops.begin(); }
   iterator end() { return Interops.end(); }
-  iterator erase(iterator it) { return Interops.erase(it); }
+  void clear() { Interops.clear(); }
 };
 
 struct InteropTblTy
diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h
index 45b196171b4c8..2b2327985a78c 100644
--- a/offload/include/PerThreadTable.h
+++ b/offload/include/PerThreadTable.h
@@ -16,6 +16,60 @@
 #include <list>
 #include <memory>
 #include <mutex>
+#include <type_traits>
+
+template <typename ObjectType> struct PerThread {
+  struct PerThreadData {
+    std::unique_ptr<ObjectType> ThreadEntry;
+  };
+
+  std::mutex Mutex;
+  std::list<std::shared_ptr<PerThreadData>> ThreadDataList;
+
+  // define default constructors, disable copy and move constructors
+  PerThread() = default;
+  PerThread(const PerThread &) = delete;
+  PerThread(PerThread &&) = delete;
+  PerThread &operator=(const PerThread &) = delete;
+  PerThread &operator=(PerThread &&) = delete;
+  ~PerThread() {
+    std::lock_guard<std::mutex> Lock(Mutex);
+    ThreadDataList.clear();
+  }
+
+private:
+  PerThreadData &getThreadData() {
+    static thread_local std::shared_ptr<PerThreadData> ThreadData = nullptr;
+    if (!ThreadData) {
+      ThreadData = std::make_shared<PerThreadData>();
+      std::lock_guard<std::mutex> Lock(Mutex);
+      ThreadDataList.push_back(ThreadData);
+    }
+    return *ThreadData;
+  }
+
+protected:
+  ObjectType &getThreadEntry() {
+    auto &ThData = getThreadData();
+    if (ThData.ThEntry)
+      return *ThData.ThEntry;
+    ThData.ThEntry = std::make_unique<ObjectType>();
+    return *ThData.ThEntry;
+  }
+
+public:
+  ObjectType &get() { return getThreadEntry(); }
+
+  template <class F> void clear(F f) {
+    std::lock_guard<std::mutex> Lock(Mutex);
+    for (auto ThData : ThreadDataList) {
+      if (!ThData->ThEntry)
+        continue;
+      f(*ThData->ThEntry);
+    }
+    ThreadDataList.clear();
+  }
+};
 
 // Using an STL container (such as std::vector) indexed by thread ID has
 // too many race conditions issues so we store each thread entry into a
@@ -23,10 +77,32 @@
 // T is the container type used to store the objects, e.g., std::vector,
 // std::set, etc. by each thread. O is the type of the stored objects e.g.,
 // omp_interop_val_t *, ...
-
 template <typename ContainerType, typename ObjectType> struct PerThreadTable {
   using iterator = typename ContainerType::iterator;
 
+  template <typename, typename = std::void_t<>>
+  struct has_iterator : std::false_type {};
+  template <typename T>
+  struct has_iterator<T, std::void_t<typename T::iterator>> : std::true_type {};
+
+  template <typename T, typename = std::void_t<>>
+  struct has_clear : std::false_type {};
+  template <typename T>
+  struct has_clear<T, std::void_t<decltype(std::declval<T>().clear())>>
+      : std::true_type {};
+
+  template <typename T, typename = std::void_t<>>
+  struct has_clearAll : std::false_type {};
+  template <typename T>
+  struct has_clearAll<T, std::void_t<decltype(std::declval<T>().clearAll(1))>>
+      : std::true_type {};
+
+  template <typename, typename = std::void_t<>>
+  struct is_associative : std::false_type {};
+  template <typename T>
+  struct is_associative<T, std::void_t<typename T::mapped_type>>
+      : std::true_type {};
+
   struct PerThreadData {
     size_t NElements = 0;
     std::unique_ptr<ContainerType> ThEntry;
@@ -71,6 +147,11 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
     return ThData.NElements;
   }
 
+  void setNElements(size_t Size) {
+    auto &NElements = getThreadNElements();
+    NElements = Size;
+  }
+
 public:
   void add(ObjectType obj) {
     auto &Entry = getThreadEntry();
@@ -104,11 +185,81 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
     for (auto ThData : ThreadDataList) {
       if (!ThData->ThEntry || ThData->NElements == 0)
         continue;
-      ThData->ThEntry->clear(f);
+      if constexpr (has_clearAll<ContainerType>::value) {
+        ThData->ThEntry->clearAll(f);
+      } else if constexpr (has_iterator<ContainerType>::value &&
+                           has_clear<ContainerType>::value) {
+        for (auto &Obj : *ThData->ThEntry) {
+          if constexpr (is_associative<ContainerType>::value) {
+            f(Obj.second);
+          } else {
+            f(Obj);
+          }
+        }
+        ThData->ThEntry->clear();
+      } else {
+        static_assert(true, "Container type not supported");
+      }
       ThData->NElements = 0;
     }
     ThreadDataList.clear();
   }
 };
 
+template <typename T, typename = std::void_t<>> struct ContainerValueType {
+  using type = typename T::value_type;
+};
+template <typename T>
+struct ContainerValueType<T, std::void_t<typename T::mapped_type>> {
+  using type = typename T::mapped_type;
+};
+
+template <typename ContainerType, size_t reserveSize = 0>
+struct PerThreadContainer
+    : public PerThreadTable<ContainerType,
+                            typename ContainerValueType<ContainerType>::type> {
+
+  // helpers
+  template <typename T, typename = std::void_t<>> struct indexType {
+    using type = typename T::size_type;
+  };
+  template <typename T> struct indexType<T, std::void_t<typename T::key_type>> {
+    using type = typename T::key_type;
+  };
+  template <typename T, typename = std::void_t<>>
+  struct has_resize : std::false_type {};
+  template <typename T>
+  struct has_resize<T, std::void_t<decltype(std::declval<T>().resize(1))>>
+      : std::true_type {};
+
+  template <typename T, typename = std::void_t<>>
+  struct has_reserve : std::false_type {};
+  template <typename T>
+  struct has_reserve<T, std::void_t<decltype(std::declval<T>().reserve(1))>>
+      : std::true_type {};
+
+  using IndexType = typename indexType<ContainerType>::type;
+  using ObjectType = typename ContainerValueType<ContainerType>::type;
+
+  // Get the object for the given index in the current thread
+  ObjectType &get(IndexType Index) {
+    auto &Entry = this->getThreadEntry();
+
+    // specialized code for vector-like containers
+    if constexpr (has_resize<ContainerType>::value) {
+      if (Index >= Entry.size()) {
+        if constexpr (has_reserve<ContainerType>::value && reserveSize > 0) {
+          if (Entry.capacity() < reserveSize)
+            Entry.reserve(reserveSize);
+        }
+        // If the index is out of bounds, try resize the container
+        Entry.resize(Index + 1);
+      }
+    }
+    ObjectType &Ret = Entry[Index];
+    this->setNElements(Entry.size());
+    return Ret;
+  }
+};
+
 #endif

>From e0c46a67f381867df4a6e9af27358c0866e702d2 Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Thu, 23 Oct 2025 10:04:08 +0200
Subject: [PATCH 02/17] Change container

---
 offload/include/PerThreadTable.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h
index 2b2327985a78c..e0d669998f260 100644
--- a/offload/include/PerThreadTable.h
+++ b/offload/include/PerThreadTable.h
@@ -14,6 +14,7 @@
 #define OFFLOAD_PERTHREADTABLE_H
 
 #include <list>
+#include <llvm/ADT/SmallVector.h>
 #include <memory>
 #include <mutex>
 #include <type_traits>
@@ -24,7 +25,7 @@ template <typename ObjectType> struct PerThread {
   };
 
   std::mutex Mutex;
-  std::list<std::shared_ptr<PerThreadData>> ThreadDataList;
+  llvm::SmallVector<std::shared_ptr<PerThreadData>> ThreadDataList;
 
   // define default constructors, disable copy and move constructors
   PerThread() = default;

>From 0d3d6dc614d98333cd142fc1cab2940059e07260 Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Thu, 23 Oct 2025 10:06:09 +0200
Subject: [PATCH 03/17] Add deinit method

---
 offload/include/PerThreadTable.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h
index e0d669998f260..dc6adf9bbb21d 100644
--- a/offload/include/PerThreadTable.h
+++ b/offload/include/PerThreadTable.h
@@ -15,6 +15,7 @@
 
 #include <list>
 #include <llvm/ADT/SmallVector.h>
+#include <llvm/Support/Error.h>
 #include <memory>
 #include <mutex>
 #include <type_traits>
@@ -205,6 +206,24 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
     }
     ThreadDataList.clear();
   }
+
+  template <class F> llvm::Error deinit(F f) {
+    std::lock_guard<std::mutex> Lock(Mtx);
+    for (auto ThData : ThreadDataList) {
+      if (!ThData->ThEntry || ThData->NElements == 0)
+        continue;
+      for (auto &Obj : *ThData->ThEntry) {
+        if constexpr (is_associative<ContainerType>::value) {
+          if (auto Err = f(Obj.second))
+            return Err;
+        } else {
+          if (auto Err = f(Obj))
+            return Err;
+        }
+      }
+    }
+    return llvm::Error::success();
+  }
 };
 
 template <typename T, typename = std::void_t<>> struct ContainerValueType {

>From 2de9273091bd7dafdf596d58856dcd7b8f6b8842 Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Thu, 23 Oct 2025 10:14:57 +0200
Subject: [PATCH 04/17] renaming variables, remove some locks

---
 offload/include/PerThreadTable.h | 54 +++++++++++++++-----------------
 1 file changed, 25 insertions(+), 29 deletions(-)

diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h
index dc6adf9bbb21d..6d6067364f4cf 100644
--- a/offload/include/PerThreadTable.h
+++ b/offload/include/PerThreadTable.h
@@ -35,7 +35,6 @@ template <typename ObjectType> struct PerThread {
   PerThread &operator=(const PerThread &) = delete;
   PerThread &operator=(PerThread &&) = delete;
   ~PerThread() {
-    std::lock_guard<std::mutex> Lock(Mutex);
     ThreadDataList.clear();
   }
 
@@ -63,7 +62,6 @@ template <typename ObjectType> struct PerThread {
   ObjectType &get() { return getThreadEntry(); }
 
   template <class F> void clear(F f) {
-    std::lock_guard<std::mutex> Lock(Mutex);
     for (auto ThData : ThreadDataList) {
       if (!ThData->ThEntry)
         continue;
@@ -107,10 +105,10 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
 
   struct PerThreadData {
     size_t NElements = 0;
-    std::unique_ptr<ContainerType> ThEntry;
+    std::unique_ptr<ContainerType> ThreadEntry;
   };
 
-  std::mutex Mtx;
+  std::mutex Mutex;
   std::list<std::shared_ptr<PerThreadData>> ThreadDataList;
 
   // define default constructors, disable copy and move constructors
@@ -120,33 +118,32 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
   PerThreadTable &operator=(const PerThreadTable &) = delete;
   PerThreadTable &operator=(PerThreadTable &&) = delete;
   ~PerThreadTable() {
-    std::lock_guard<std::mutex> Lock(Mtx);
     ThreadDataList.clear();
   }
 
 private:
   PerThreadData &getThreadData() {
-    static thread_local std::shared_ptr<PerThreadData> ThData = nullptr;
-    if (!ThData) {
-      ThData = std::make_shared<PerThreadData>();
-      std::lock_guard<std::mutex> Lock(Mtx);
-      ThreadDataList.push_back(ThData);
+    static thread_local std::shared_ptr<PerThreadData> ThreadData = nullptr;
+    if (!ThreadData) {
+      ThreadData = std::make_shared<PerThreadData>();
+      std::lock_guard<std::mutex> Lock(Mutex);
+      ThreadDataList.push_back(ThreadData);
     }
-    return *ThData;
+    return *ThreadData;
   }
 
 protected:
   ContainerType &getThreadEntry() {
-    auto &ThData = getThreadData();
-    if (ThData.ThEntry)
-      return *ThData.ThEntry;
-    ThData.ThEntry = std::make_unique<ContainerType>();
-    return *ThData.ThEntry;
+    auto &ThreadData = getThreadData();
+    if (ThreadData.ThreadEntry)
+      return *ThreadData.ThreadEntry;
+    ThreadData.ThreadEntry = std::make_unique<ContainerType>();
+    return *ThreadData.ThreadEntry;
   }
 
   size_t &getThreadNElements() {
-    auto &ThData = getThreadData();
-    return ThData.NElements;
+    auto &ThreadData = getThreadData();
+    return ThreadData.NElements;
   }
 
   void setNElements(size_t Size) {
@@ -183,36 +180,35 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
   }
 
   template <class F> void clear(F f) {
-    std::lock_guard<std::mutex> Lock(Mtx);
-    for (auto ThData : ThreadDataList) {
-      if (!ThData->ThEntry || ThData->NElements == 0)
+    std::lock_guard<std::mutex> Lock(Mutex);
+    for (auto ThreadData : ThreadDataList) {
+      if (!ThreadData->ThreadEntry || ThreadData->NElements == 0)
         continue;
       if constexpr (has_clearAll<ContainerType>::value) {
-        ThData->ThEntry->clearAll(f);
+        ThreadData->ThreadEntry->clearAll(f);
       } else if constexpr (has_iterator<ContainerType>::value &&
                            has_clear<ContainerType>::value) {
-        for (auto &Obj : *ThData->ThEntry) {
+        for (auto &Obj : *ThreadData->ThreadEntry) {
           if constexpr (is_associative<ContainerType>::value) {
             f(Obj.second);
           } else {
             f(Obj);
           }
         }
-        ThData->ThEntry->clear();
+        ThreadData->ThreadEntry->clear();
       } else {
         static_assert(true, "Container type not supported");
       }
-      ThData->NElements = 0;
+      ThreadData->NElements = 0;
     }
     ThreadDataList.clear();
   }
 
   template <class F> llvm::Error deinit(F f) {
-    std::lock_guard<std::mutex> Lock(Mtx);
-    for (auto ThData : ThreadDataList) {
-      if (!ThData->ThEntry || ThData->NElements == 0)
+    for (auto ThreadData : ThreadDataList) {
+      if (!ThreadData->ThreadEntry || ThreadData->NElements == 0)
         continue;
-      for (auto &Obj : *ThData->ThEntry) {
+      for (auto &Obj : *ThreadData->ThreadEntry) {
         if constexpr (is_associative<ContainerType>::value) {
           if (auto Err = f(Obj.second))
             return Err;

>From f4e89a986d2a1b31c42b43b036b7bd6316767870 Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Thu, 23 Oct 2025 10:16:41 +0200
Subject: [PATCH 05/17] format

---
 offload/include/PerThreadTable.h | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h
index 6d6067364f4cf..466291503edfc 100644
--- a/offload/include/PerThreadTable.h
+++ b/offload/include/PerThreadTable.h
@@ -34,9 +34,7 @@ template <typename ObjectType> struct PerThread {
   PerThread(PerThread &&) = delete;
   PerThread &operator=(const PerThread &) = delete;
   PerThread &operator=(PerThread &&) = delete;
-  ~PerThread() {
-    ThreadDataList.clear();
-  }
+  ~PerThread() { ThreadDataList.clear(); }
 
 private:
   PerThreadData &getThreadData() {
@@ -117,9 +115,7 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
   PerThreadTable(PerThreadTable &&) = delete;
   PerThreadTable &operator=(const PerThreadTable &) = delete;
   PerThreadTable &operator=(PerThreadTable &&) = delete;
-  ~PerThreadTable() {
-    ThreadDataList.clear();
-  }
+  ~PerThreadTable() { ThreadDataList.clear(); }
 
 private:
   PerThreadData &getThreadData() {

>From 5ebf0a4e591dba5a1960b8c1972d7437134f50d8 Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Thu, 23 Oct 2025 10:19:21 +0200
Subject: [PATCH 06/17] more renaming

---
 offload/include/PerThreadTable.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h
index 466291503edfc..b27fbc8e09408 100644
--- a/offload/include/PerThreadTable.h
+++ b/offload/include/PerThreadTable.h
@@ -49,21 +49,21 @@ template <typename ObjectType> struct PerThread {
 
 protected:
   ObjectType &getThreadEntry() {
-    auto &ThData = getThreadData();
-    if (ThData.ThEntry)
-      return *ThData.ThEntry;
-    ThData.ThEntry = std::make_unique<ObjectType>();
-    return *ThData.ThEntry;
+    auto &ThreadData = getThreadData();
+    if (ThreadData.ThreadEntry)
+      return *ThreadData.ThreadEntry;
+    ThreadData.ThreadEntry = std::make_unique<ObjectType>();
+    return *ThreadData.ThreadEntry;
   }
 
 public:
   ObjectType &get() { return getThreadEntry(); }
 
   template <class F> void clear(F f) {
-    for (auto ThData : ThreadDataList) {
-      if (!ThData->ThEntry)
+    for (auto ThreadData : ThreadDataList) {
+      if (!ThreadData->ThreadEntry)
         continue;
-      f(*ThData->ThEntry);
+      f(*ThreadData->ThreadEntry);
     }
     ThreadDataList.clear();
   }

>From 8bff29d07671eb5e235137e9001151932a861d12 Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Thu, 23 Oct 2025 10:41:34 +0200
Subject: [PATCH 07/17] forgot one mutex

---
 offload/include/PerThreadTable.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h
index b27fbc8e09408..1263268ac8e45 100644
--- a/offload/include/PerThreadTable.h
+++ b/offload/include/PerThreadTable.h
@@ -176,7 +176,6 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
   }
 
   template <class F> void clear(F f) {
-    std::lock_guard<std::mutex> Lock(Mutex);
     for (auto ThreadData : ThreadDataList) {
       if (!ThreadData->ThreadEntry || ThreadData->NElements == 0)
         continue;

>From d29eac0aeb07c1fbe96aeea40c1a971d36906bb6 Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Thu, 23 Oct 2025 11:06:48 +0200
Subject: [PATCH 08/17] missed one container

---
 offload/include/PerThreadTable.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h
index 1263268ac8e45..3f887282eeecf 100644
--- a/offload/include/PerThreadTable.h
+++ b/offload/include/PerThreadTable.h
@@ -107,7 +107,7 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
   };
 
   std::mutex Mutex;
-  std::list<std::shared_ptr<PerThreadData>> ThreadDataList;
+  llvm::SmallVector<std::shared_ptr<PerThreadData>> ThreadDataList;
 
   // define default constructors, disable copy and move constructors
   PerThreadTable() = default;

>From caedd4d6891b5c85ac72f9ee6f137ee6851df32b Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Thu, 23 Oct 2025 22:53:14 +0200
Subject: [PATCH 09/17] remove auto

---
 offload/include/PerThreadTable.h | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h
index 3f887282eeecf..0177e871ed664 100644
--- a/offload/include/PerThreadTable.h
+++ b/offload/include/PerThreadTable.h
@@ -49,7 +49,7 @@ template <typename ObjectType> struct PerThread {
 
 protected:
   ObjectType &getThreadEntry() {
-    auto &ThreadData = getThreadData();
+    PerThreadData &ThreadData = getThreadData();
     if (ThreadData.ThreadEntry)
       return *ThreadData.ThreadEntry;
     ThreadData.ThreadEntry = std::make_unique<ObjectType>();
@@ -60,7 +60,7 @@ template <typename ObjectType> struct PerThread {
   ObjectType &get() { return getThreadEntry(); }
 
   template <class F> void clear(F f) {
-    for (auto ThreadData : ThreadDataList) {
+    for (std::shared_ptr<PerThreadData> ThreadData : ThreadDataList) {
       if (!ThreadData->ThreadEntry)
         continue;
       f(*ThreadData->ThreadEntry);
@@ -130,7 +130,7 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
 
 protected:
   ContainerType &getThreadEntry() {
-    auto &ThreadData = getThreadData();
+    PerThreadData &ThreadData = getThreadData();
     if (ThreadData.ThreadEntry)
       return *ThreadData.ThreadEntry;
     ThreadData.ThreadEntry = std::make_unique<ContainerType>();
@@ -138,26 +138,26 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
   }
 
   size_t &getThreadNElements() {
-    auto &ThreadData = getThreadData();
+    PerThreadData &ThreadData = getThreadData();
     return ThreadData.NElements;
   }
 
   void setNElements(size_t Size) {
-    auto &NElements = getThreadNElements();
+    size_t &NElements = getThreadNElements();
     NElements = Size;
   }
 
 public:
   void add(ObjectType obj) {
-    auto &Entry = getThreadEntry();
-    auto &NElements = getThreadNElements();
+    ContainerType &Entry = getThreadEntry();
+    size_t &NElements = getThreadNElements();
     NElements++;
     Entry.add(obj);
   }
 
   iterator erase(iterator it) {
-    auto &Entry = getThreadEntry();
-    auto &NElements = getThreadNElements();
+    ContainerType &Entry = getThreadEntry();
+    size_t &NElements = getThreadNElements();
     NElements--;
     return Entry.erase(it);
   }
@@ -167,11 +167,11 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
   // Iterators to traverse objects owned by
   // the current thread
   iterator begin() {
-    auto &Entry = getThreadEntry();
+    ContainerType &Entry = getThreadEntry();
     return Entry.begin();
   }
   iterator end() {
-    auto &Entry = getThreadEntry();
+    ContainerType &Entry = getThreadEntry();
     return Entry.end();
   }
 
@@ -200,7 +200,7 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
   }
 
   template <class F> llvm::Error deinit(F f) {
-    for (auto ThreadData : ThreadDataList) {
+    for (std::shared_ptr<PerThreadData> ThreadData : ThreadDataList) {
       if (!ThreadData->ThreadEntry || ThreadData->NElements == 0)
         continue;
       for (auto &Obj : *ThreadData->ThreadEntry) {
@@ -254,7 +254,7 @@ struct PerThreadContainer
 
   // Get the object for the given index in the current thread
   ObjectType &get(IndexType Index) {
-    auto &Entry = this->getThreadEntry();
+    ContainerType &Entry = this->getThreadEntry();
 
     // specialized code for vector-like containers
     if constexpr (has_resize<ContainerType>::value) {

>From 01f9c447213dd8d66aa768e4b6a826bfca81dd55 Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Thu, 23 Oct 2025 23:12:57 +0200
Subject: [PATCH 10/17] missed one

---
 offload/include/PerThreadTable.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h
index 0177e871ed664..936eebaaf6155 100644
--- a/offload/include/PerThreadTable.h
+++ b/offload/include/PerThreadTable.h
@@ -176,7 +176,7 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
   }
 
   template <class F> void clear(F f) {
-    for (auto ThreadData : ThreadDataList) {
+    for (std::shared_ptr<PerThreadData> ThreadData : ThreadDataList) {
       if (!ThreadData->ThreadEntry || ThreadData->NElements == 0)
         continue;
       if constexpr (has_clearAll<ContainerType>::value) {

>From e80dcf9f17761ad0d3adb9c51fca08dac252cb70 Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Thu, 23 Oct 2025 23:16:56 +0200
Subject: [PATCH 11/17] Some renaming

---
 offload/include/PerThreadTable.h | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h
index 936eebaaf6155..67c7d74eae2c5 100644
--- a/offload/include/PerThreadTable.h
+++ b/offload/include/PerThreadTable.h
@@ -59,11 +59,11 @@ template <typename ObjectType> struct PerThread {
 public:
   ObjectType &get() { return getThreadEntry(); }
 
-  template <class F> void clear(F f) {
+  template <class ClearFuncTy> void clear(ClearFuncTy ClearFunc) {
     for (std::shared_ptr<PerThreadData> ThreadData : ThreadDataList) {
       if (!ThreadData->ThreadEntry)
         continue;
-      f(*ThreadData->ThreadEntry);
+      ClearFunc(*ThreadData->ThreadEntry);
     }
     ThreadDataList.clear();
   }
@@ -175,19 +175,19 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
     return Entry.end();
   }
 
-  template <class F> void clear(F f) {
+  template <class ClearFuncTy> void clear(ClearFuncTy ClearFunc) {
     for (std::shared_ptr<PerThreadData> ThreadData : ThreadDataList) {
       if (!ThreadData->ThreadEntry || ThreadData->NElements == 0)
         continue;
       if constexpr (has_clearAll<ContainerType>::value) {
-        ThreadData->ThreadEntry->clearAll(f);
+        ThreadData->ThreadEntry->clearAll(ClearFunc);
       } else if constexpr (has_iterator<ContainerType>::value &&
                            has_clear<ContainerType>::value) {
         for (auto &Obj : *ThreadData->ThreadEntry) {
           if constexpr (is_associative<ContainerType>::value) {
-            f(Obj.second);
+            ClearFunc(Obj.second);
           } else {
-            f(Obj);
+            ClearFunc(Obj);
           }
         }
         ThreadData->ThreadEntry->clear();
@@ -199,16 +199,16 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
     ThreadDataList.clear();
   }
 
-  template <class F> llvm::Error deinit(F f) {
+  template <class DeinitFuncTy> llvm::Error deinit(DeinitFuncTy DeinitFunc) {
     for (std::shared_ptr<PerThreadData> ThreadData : ThreadDataList) {
       if (!ThreadData->ThreadEntry || ThreadData->NElements == 0)
         continue;
       for (auto &Obj : *ThreadData->ThreadEntry) {
         if constexpr (is_associative<ContainerType>::value) {
-          if (auto Err = f(Obj.second))
+          if (auto Err = DeinitFunc(Obj.second))
             return Err;
         } else {
-          if (auto Err = f(Obj))
+          if (auto Err = DeinitFunc(Obj))
             return Err;
         }
       }

>From 5451f2294da3f2f4fb6eaa925838f54b17c64008 Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Thu, 23 Oct 2025 23:25:40 +0200
Subject: [PATCH 12/17] Add asserts

---
 offload/include/PerThreadTable.h | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h
index 67c7d74eae2c5..d8222d99b6515 100644
--- a/offload/include/PerThreadTable.h
+++ b/offload/include/PerThreadTable.h
@@ -34,7 +34,11 @@ template <typename ObjectType> struct PerThread {
   PerThread(PerThread &&) = delete;
   PerThread &operator=(const PerThread &) = delete;
   PerThread &operator=(PerThread &&) = delete;
-  ~PerThread() { ThreadDataList.clear(); }
+  ~PerThread() {
+    assert(Mutex.try_lock() &&
+           "Cannot be deleted while other threads are adding entries");
+    ThreadDataList.clear();
+  }
 
 private:
   PerThreadData &getThreadData() {
@@ -60,6 +64,8 @@ template <typename ObjectType> struct PerThread {
   ObjectType &get() { return getThreadEntry(); }
 
   template <class ClearFuncTy> void clear(ClearFuncTy ClearFunc) {
+    assert(Mutex.try_lock() &&
+           "Clear cannot be called while other threads are adding entries");
     for (std::shared_ptr<PerThreadData> ThreadData : ThreadDataList) {
       if (!ThreadData->ThreadEntry)
         continue;
@@ -115,7 +121,11 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
   PerThreadTable(PerThreadTable &&) = delete;
   PerThreadTable &operator=(const PerThreadTable &) = delete;
   PerThreadTable &operator=(PerThreadTable &&) = delete;
-  ~PerThreadTable() { ThreadDataList.clear(); }
+  ~PerThreadTable() {
+    assert(Mutex.try_lock() &&
+           "Cannot be deleted while other threads are adding entries");
+    ThreadDataList.clear();
+  }
 
 private:
   PerThreadData &getThreadData() {
@@ -176,6 +186,8 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
   }
 
   template <class ClearFuncTy> void clear(ClearFuncTy ClearFunc) {
+    assert(Mutex.try_lock() &&
+           "Clear cannot be called while other threads are adding entries");
     for (std::shared_ptr<PerThreadData> ThreadData : ThreadDataList) {
       if (!ThreadData->ThreadEntry || ThreadData->NElements == 0)
         continue;
@@ -200,6 +212,8 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
   }
 
   template <class DeinitFuncTy> llvm::Error deinit(DeinitFuncTy DeinitFunc) {
+    assert(Mutex.try_lock() &&
+           "Deinit cannot be called while other threads are adding entries");
     for (std::shared_ptr<PerThreadData> ThreadData : ThreadDataList) {
       if (!ThreadData->ThreadEntry || ThreadData->NElements == 0)
         continue;

>From a14ee62b1a7ac0dda4bc8eb39fad04cb504bee32 Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Tue, 4 Nov 2025 22:11:55 +0100
Subject: [PATCH 13/17] Fix asserts to not be undefined

---
 offload/include/PerThreadTable.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h
index d8222d99b6515..56e2d75ccd198 100644
--- a/offload/include/PerThreadTable.h
+++ b/offload/include/PerThreadTable.h
@@ -35,7 +35,7 @@ template <typename ObjectType> struct PerThread {
   PerThread &operator=(const PerThread &) = delete;
   PerThread &operator=(PerThread &&) = delete;
   ~PerThread() {
-    assert(Mutex.try_lock() &&
+    assert(Mutex.try_lock() && (Mutex.unlock(), true) &&
            "Cannot be deleted while other threads are adding entries");
     ThreadDataList.clear();
   }
@@ -64,7 +64,7 @@ template <typename ObjectType> struct PerThread {
   ObjectType &get() { return getThreadEntry(); }
 
   template <class ClearFuncTy> void clear(ClearFuncTy ClearFunc) {
-    assert(Mutex.try_lock() &&
+    assert(Mutex.try_lock() && (Mutex.unlock(), true) &&
            "Clear cannot be called while other threads are adding entries");
     for (std::shared_ptr<PerThreadData> ThreadData : ThreadDataList) {
       if (!ThreadData->ThreadEntry)
@@ -122,7 +122,7 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
   PerThreadTable &operator=(const PerThreadTable &) = delete;
   PerThreadTable &operator=(PerThreadTable &&) = delete;
   ~PerThreadTable() {
-    assert(Mutex.try_lock() &&
+    assert(Mutex.try_lock() && (Mutex.unlock(), true) &&
            "Cannot be deleted while other threads are adding entries");
     ThreadDataList.clear();
   }
@@ -186,7 +186,7 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
   }
 
   template <class ClearFuncTy> void clear(ClearFuncTy ClearFunc) {
-    assert(Mutex.try_lock() &&
+    assert(Mutex.try_lock() && (Mutex.unlock(), true) &&
            "Clear cannot be called while other threads are adding entries");
     for (std::shared_ptr<PerThreadData> ThreadData : ThreadDataList) {
       if (!ThreadData->ThreadEntry || ThreadData->NElements == 0)
@@ -212,7 +212,7 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
   }
 
   template <class DeinitFuncTy> llvm::Error deinit(DeinitFuncTy DeinitFunc) {
-    assert(Mutex.try_lock() &&
+    assert(Mutex.try_lock() && (Mutex.unlock(), true) &&
            "Deinit cannot be called while other threads are adding entries");
     for (std::shared_ptr<PerThreadData> ThreadData : ThreadDataList) {
       if (!ThreadData->ThreadEntry || ThreadData->NElements == 0)

>From 579264a4801ac780c0539eb9f856c400bb390f48 Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Wed, 19 Nov 2025 12:19:17 +0100
Subject: [PATCH 14/17] minor refactoring

---
 offload/include/PerThreadTable.h | 74 +++++++++++++++-----------------
 1 file changed, 35 insertions(+), 39 deletions(-)

diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h
index 56e2d75ccd198..ad147657b9228 100644
--- a/offload/include/PerThreadTable.h
+++ b/offload/include/PerThreadTable.h
@@ -20,7 +20,7 @@
 #include <mutex>
 #include <type_traits>
 
-template <typename ObjectType> struct PerThread {
+template <typename ObjectType> class PerThread {
   struct PerThreadData {
     std::unique_ptr<ObjectType> ThreadEntry;
   };
@@ -28,19 +28,6 @@ template <typename ObjectType> struct PerThread {
   std::mutex Mutex;
   llvm::SmallVector<std::shared_ptr<PerThreadData>> ThreadDataList;
 
-  // define default constructors, disable copy and move constructors
-  PerThread() = default;
-  PerThread(const PerThread &) = delete;
-  PerThread(PerThread &&) = delete;
-  PerThread &operator=(const PerThread &) = delete;
-  PerThread &operator=(PerThread &&) = delete;
-  ~PerThread() {
-    assert(Mutex.try_lock() && (Mutex.unlock(), true) &&
-           "Cannot be deleted while other threads are adding entries");
-    ThreadDataList.clear();
-  }
-
-private:
   PerThreadData &getThreadData() {
     static thread_local std::shared_ptr<PerThreadData> ThreadData = nullptr;
     if (!ThreadData) {
@@ -51,7 +38,6 @@ template <typename ObjectType> struct PerThread {
     return *ThreadData;
   }
 
-protected:
   ObjectType &getThreadEntry() {
     PerThreadData &ThreadData = getThreadData();
     if (ThreadData.ThreadEntry)
@@ -61,6 +47,18 @@ template <typename ObjectType> struct PerThread {
   }
 
 public:
+  // define default constructors, disable copy and move constructors
+  PerThread() = default;
+  PerThread(const PerThread &) = delete;
+  PerThread(PerThread &&) = delete;
+  PerThread &operator=(const PerThread &) = delete;
+  PerThread &operator=(PerThread &&) = delete;
+  ~PerThread() {
+    assert(Mutex.try_lock() && (Mutex.unlock(), true) &&
+           "Cannot be deleted while other threads are adding entries");
+    ThreadDataList.clear();
+  }
+
   ObjectType &get() { return getThreadEntry(); }
 
   template <class ClearFuncTy> void clear(ClearFuncTy ClearFunc) {
@@ -78,10 +76,10 @@ template <typename ObjectType> struct PerThread {
 // Using an STL container (such as std::vector) indexed by thread ID has
 // too many race conditions issues so we store each thread entry into a
 // thread_local variable.
-// T is the container type used to store the objects, e.g., std::vector,
-// std::set, etc. by each thread. O is the type of the stored objects e.g.,
-// omp_interop_val_t *, ...
-template <typename ContainerType, typename ObjectType> struct PerThreadTable {
+// ContainerType is the container type used to store the objects, e.g.,
+// std::vector, std::set, etc. by each thread. ObjectType is the type of the
+// stored objects e.g., omp_interop_val_t *, ...
+template <typename ContainerType, typename ObjectType> class PerThreadTable {
   using iterator = typename ContainerType::iterator;
 
   template <typename, typename = std::void_t<>>
@@ -115,19 +113,6 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
   std::mutex Mutex;
   llvm::SmallVector<std::shared_ptr<PerThreadData>> ThreadDataList;
 
-  // define default constructors, disable copy and move constructors
-  PerThreadTable() = default;
-  PerThreadTable(const PerThreadTable &) = delete;
-  PerThreadTable(PerThreadTable &&) = delete;
-  PerThreadTable &operator=(const PerThreadTable &) = delete;
-  PerThreadTable &operator=(PerThreadTable &&) = delete;
-  ~PerThreadTable() {
-    assert(Mutex.try_lock() && (Mutex.unlock(), true) &&
-           "Cannot be deleted while other threads are adding entries");
-    ThreadDataList.clear();
-  }
-
-private:
   PerThreadData &getThreadData() {
     static thread_local std::shared_ptr<PerThreadData> ThreadData = nullptr;
     if (!ThreadData) {
@@ -158,6 +143,18 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
   }
 
 public:
+  // define default constructors, disable copy and move constructors
+  PerThreadTable() = default;
+  PerThreadTable(const PerThreadTable &) = delete;
+  PerThreadTable(PerThreadTable &&) = delete;
+  PerThreadTable &operator=(const PerThreadTable &) = delete;
+  PerThreadTable &operator=(PerThreadTable &&) = delete;
+  ~PerThreadTable() {
+    assert(Mutex.try_lock() && (Mutex.unlock(), true) &&
+           "Cannot be deleted while other threads are adding entries");
+    ThreadDataList.clear();
+  }
+
   void add(ObjectType obj) {
     ContainerType &Entry = getThreadEntry();
     size_t &NElements = getThreadNElements();
@@ -239,8 +236,8 @@ struct ContainerValueType<T, std::void_t<typename T::mapped_type>> {
   using type = typename T::mapped_type;
 };
 
-template <typename ContainerType, size_t reserveSize = 0>
-struct PerThreadContainer
+template <typename ContainerType, size_t ReserveSize = 0>
+class PerThreadContainer
     : public PerThreadTable<ContainerType,
                             typename ContainerValueType<ContainerType>::type> {
 
@@ -265,7 +262,7 @@ struct PerThreadContainer
 
   using IndexType = typename indexType<ContainerType>::type;
   using ObjectType = typename ContainerValueType<ContainerType>::type;
-
+public:
   // Get the object for the given index in the current thread
   ObjectType &get(IndexType Index) {
     ContainerType &Entry = this->getThreadEntry();
@@ -273,10 +270,9 @@ struct PerThreadContainer
     // specialized code for vector-like containers
     if constexpr (has_resize<ContainerType>::value) {
       if (Index >= Entry.size()) {
-        if constexpr (has_reserve<ContainerType>::value && reserveSize > 0) {
-          if (Entry.capacity() < reserveSize)
-            Entry.reserve(reserveSize);
-        }
+        if constexpr (has_reserve<ContainerType>::value && ReserveSize > 0)
+          Entry.reserve(ReserveSize);
+
         // If the index is out of bounds, try resize the container
         Entry.resize(Index + 1);
       }

>From 26fbcc0eb4b888ef7675682ebdbaa01392d4ecaa Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Wed, 19 Nov 2025 14:35:40 +0100
Subject: [PATCH 15/17] Refactor container concepts checks

---
 offload/include/PerThreadTable.h | 124 +++++++++++++++----------------
 1 file changed, 62 insertions(+), 62 deletions(-)

diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h
index ad147657b9228..f118f4d617fa8 100644
--- a/offload/include/PerThreadTable.h
+++ b/offload/include/PerThreadTable.h
@@ -73,6 +73,54 @@ template <typename ObjectType> class PerThread {
   }
 };
 
+template <typename ContainerTy> struct ContainerConcepts {
+  template <typename, template <typename> class, typename = std::void_t<>>
+  struct has : std::false_type {};
+  template <typename Ty, template <typename> class Op>
+  struct has<Ty, Op, std::void_t<Op<Ty>>> : std::true_type {};
+
+  template <typename Ty> using IteratorTypeCheck = typename Ty::iterator;
+  template <typename Ty> using MappedTypeCheck = typename Ty::mapped_type;
+  template <typename Ty> using ValueTypeCheck = typename Ty::value_type;
+  template <typename Ty> using KeyTypeCheck = typename Ty::key_type;
+  template <typename Ty> using SizeTyCheck = typename Ty::size_type;
+
+  template <typename Ty>
+  using ClearCheck = decltype(std::declval<Ty>().clear());
+  template <typename Ty>
+  using ClearAllCheck = decltype(std::declval<Ty>().clearAll(1));
+  template <typename Ty>
+  using ReserveCheck = decltype(std::declval<Ty>().reserve(1));
+  template <typename Ty>
+  using ResizeCheck = decltype(std::declval<Ty>().resize(1));
+
+  static constexpr bool hasIterator =
+      has<ContainerTy, IteratorTypeCheck>::value;
+  static constexpr bool hasClear = has<ContainerTy, ClearCheck>::value;
+  static constexpr bool hasClearAll = has<ContainerTy, ClearAllCheck>::value;
+  static constexpr bool isAssociative =
+      has<ContainerTy, MappedTypeCheck>::value;
+  static constexpr bool hasReserve = has<ContainerTy, ReserveCheck>::value;
+  static constexpr bool hasResize = has<ContainerTy, ResizeCheck>::value;
+
+  template <typename, template <typename> class, typename = std::void_t<>>
+  struct has_type {
+    using type = void;
+  };
+  template <typename Ty, template <typename> class Op>
+  struct has_type<Ty, Op, std::void_t<Op<Ty>>> {
+    using type = Op<Ty>;
+  };
+
+  using iterator = typename has_type<ContainerTy, IteratorTypeCheck>::type;
+  using value_type = typename std::conditional_t<
+      isAssociative, typename has_type<ContainerTy, MappedTypeCheck>::type,
+      typename has_type<ContainerTy, ValueTypeCheck>::type>;
+  using key_type = typename std::conditional_t<
+      isAssociative, typename has_type<ContainerTy, KeyTypeCheck>::type,
+      typename has_type<ContainerTy, SizeTyCheck>::type>;
+};
+
 // Using an STL container (such as std::vector) indexed by thread ID has
 // too many race conditions issues so we store each thread entry into a
 // thread_local variable.
@@ -80,30 +128,7 @@ template <typename ObjectType> class PerThread {
 // std::vector, std::set, etc. by each thread. ObjectType is the type of the
 // stored objects e.g., omp_interop_val_t *, ...
 template <typename ContainerType, typename ObjectType> class PerThreadTable {
-  using iterator = typename ContainerType::iterator;
-
-  template <typename, typename = std::void_t<>>
-  struct has_iterator : std::false_type {};
-  template <typename T>
-  struct has_iterator<T, std::void_t<typename T::iterator>> : std::true_type {};
-
-  template <typename T, typename = std::void_t<>>
-  struct has_clear : std::false_type {};
-  template <typename T>
-  struct has_clear<T, std::void_t<decltype(std::declval<T>().clear())>>
-      : std::true_type {};
-
-  template <typename T, typename = std::void_t<>>
-  struct has_clearAll : std::false_type {};
-  template <typename T>
-  struct has_clearAll<T, std::void_t<decltype(std::declval<T>().clearAll(1))>>
-      : std::true_type {};
-
-  template <typename, typename = std::void_t<>>
-  struct is_associative : std::false_type {};
-  template <typename T>
-  struct is_associative<T, std::void_t<typename T::mapped_type>>
-      : std::true_type {};
+  using iterator = typename ContainerConcepts<ContainerType>::iterator;
 
   struct PerThreadData {
     size_t NElements = 0;
@@ -188,12 +213,12 @@ template <typename ContainerType, typename ObjectType> class PerThreadTable {
     for (std::shared_ptr<PerThreadData> ThreadData : ThreadDataList) {
       if (!ThreadData->ThreadEntry || ThreadData->NElements == 0)
         continue;
-      if constexpr (has_clearAll<ContainerType>::value) {
+      if constexpr (ContainerConcepts<ContainerType>::hasClearAll) {
         ThreadData->ThreadEntry->clearAll(ClearFunc);
-      } else if constexpr (has_iterator<ContainerType>::value &&
-                           has_clear<ContainerType>::value) {
+      } else if constexpr (ContainerConcepts<ContainerType>::hasIterator &&
+                           ContainerConcepts<ContainerType>::hasClear) {
         for (auto &Obj : *ThreadData->ThreadEntry) {
-          if constexpr (is_associative<ContainerType>::value) {
+          if constexpr (ContainerConcepts<ContainerType>::isAssociative) {
             ClearFunc(Obj.second);
           } else {
             ClearFunc(Obj);
@@ -215,7 +240,7 @@ template <typename ContainerType, typename ObjectType> class PerThreadTable {
       if (!ThreadData->ThreadEntry || ThreadData->NElements == 0)
         continue;
       for (auto &Obj : *ThreadData->ThreadEntry) {
-        if constexpr (is_associative<ContainerType>::value) {
+        if constexpr (ContainerConcepts<ContainerType>::isAssociative) {
           if (auto Err = DeinitFunc(Obj.second))
             return Err;
         } else {
@@ -228,49 +253,24 @@ template <typename ContainerType, typename ObjectType> class PerThreadTable {
   }
 };
 
-template <typename T, typename = std::void_t<>> struct ContainerValueType {
-  using type = typename T::value_type;
-};
-template <typename T>
-struct ContainerValueType<T, std::void_t<typename T::mapped_type>> {
-  using type = typename T::mapped_type;
-};
-
 template <typename ContainerType, size_t ReserveSize = 0>
 class PerThreadContainer
-    : public PerThreadTable<ContainerType,
-                            typename ContainerValueType<ContainerType>::type> {
+    : public PerThreadTable<ContainerType, typename ContainerConcepts<
+                                               ContainerType>::value_type> {
+
+  using IndexType = typename ContainerConcepts<ContainerType>::key_type;
+  using ObjectType = typename ContainerConcepts<ContainerType>::value_type;
 
-  // helpers
-  template <typename T, typename = std::void_t<>> struct indexType {
-    using type = typename T::size_type;
-  };
-  template <typename T> struct indexType<T, std::void_t<typename T::key_type>> {
-    using type = typename T::key_type;
-  };
-  template <typename T, typename = std::void_t<>>
-  struct has_resize : std::false_type {};
-  template <typename T>
-  struct has_resize<T, std::void_t<decltype(std::declval<T>().resize(1))>>
-      : std::true_type {};
-
-  template <typename T, typename = std::void_t<>>
-  struct has_reserve : std::false_type {};
-  template <typename T>
-  struct has_reserve<T, std::void_t<decltype(std::declval<T>().reserve(1))>>
-      : std::true_type {};
-
-  using IndexType = typename indexType<ContainerType>::type;
-  using ObjectType = typename ContainerValueType<ContainerType>::type;
 public:
   // Get the object for the given index in the current thread
   ObjectType &get(IndexType Index) {
     ContainerType &Entry = this->getThreadEntry();
 
     // specialized code for vector-like containers
-    if constexpr (has_resize<ContainerType>::value) {
+    if constexpr (ContainerConcepts<ContainerType>::hasResize) {
       if (Index >= Entry.size()) {
-        if constexpr (has_reserve<ContainerType>::value && ReserveSize > 0)
+        if constexpr (ContainerConcepts<ContainerType>::hasReserve &&
+                      ReserveSize > 0)
           Entry.reserve(ReserveSize);
 
         // If the index is out of bounds, try resize the container

>From 32819dff8d0120c069cd0f51d6358b523dea4800 Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Wed, 19 Nov 2025 14:39:15 +0100
Subject: [PATCH 16/17] remove clearAll

---
 offload/include/PerThreadTable.h | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h
index f118f4d617fa8..f2e54eaf9abf1 100644
--- a/offload/include/PerThreadTable.h
+++ b/offload/include/PerThreadTable.h
@@ -88,8 +88,6 @@ template <typename ContainerTy> struct ContainerConcepts {
   template <typename Ty>
   using ClearCheck = decltype(std::declval<Ty>().clear());
   template <typename Ty>
-  using ClearAllCheck = decltype(std::declval<Ty>().clearAll(1));
-  template <typename Ty>
   using ReserveCheck = decltype(std::declval<Ty>().reserve(1));
   template <typename Ty>
   using ResizeCheck = decltype(std::declval<Ty>().resize(1));
@@ -97,7 +95,6 @@ template <typename ContainerTy> struct ContainerConcepts {
   static constexpr bool hasIterator =
       has<ContainerTy, IteratorTypeCheck>::value;
   static constexpr bool hasClear = has<ContainerTy, ClearCheck>::value;
-  static constexpr bool hasClearAll = has<ContainerTy, ClearAllCheck>::value;
   static constexpr bool isAssociative =
       has<ContainerTy, MappedTypeCheck>::value;
   static constexpr bool hasReserve = has<ContainerTy, ReserveCheck>::value;
@@ -213,10 +210,8 @@ template <typename ContainerType, typename ObjectType> class PerThreadTable {
     for (std::shared_ptr<PerThreadData> ThreadData : ThreadDataList) {
       if (!ThreadData->ThreadEntry || ThreadData->NElements == 0)
         continue;
-      if constexpr (ContainerConcepts<ContainerType>::hasClearAll) {
-        ThreadData->ThreadEntry->clearAll(ClearFunc);
-      } else if constexpr (ContainerConcepts<ContainerType>::hasIterator &&
-                           ContainerConcepts<ContainerType>::hasClear) {
+      if constexpr (ContainerConcepts<ContainerType>::hasIterator &&
+                    ContainerConcepts<ContainerType>::hasClear) {
         for (auto &Obj : *ThreadData->ThreadEntry) {
           if constexpr (ContainerConcepts<ContainerType>::isAssociative) {
             ClearFunc(Obj.second);

>From 2d4f36464d83fed2ac72bacec730eff5f6d6a49d Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Wed, 19 Nov 2025 14:57:37 +0100
Subject: [PATCH 17/17] Simplify PerThreadData

---
 offload/include/PerThreadTable.h | 28 ++++++++--------------------
 1 file changed, 8 insertions(+), 20 deletions(-)

diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h
index f2e54eaf9abf1..f26f0b45a8520 100644
--- a/offload/include/PerThreadTable.h
+++ b/offload/include/PerThreadTable.h
@@ -21,31 +21,19 @@
 #include <type_traits>
 
 template <typename ObjectType> class PerThread {
-  struct PerThreadData {
-    std::unique_ptr<ObjectType> ThreadEntry;
-  };
-
   std::mutex Mutex;
-  llvm::SmallVector<std::shared_ptr<PerThreadData>> ThreadDataList;
+  llvm::SmallVector<std::shared_ptr<ObjectType>> ThreadDataList;
 
-  PerThreadData &getThreadData() {
-    static thread_local std::shared_ptr<PerThreadData> ThreadData = nullptr;
+  ObjectType &getThreadData() {
+    static thread_local std::shared_ptr<ObjectType> ThreadData = nullptr;
     if (!ThreadData) {
-      ThreadData = std::make_shared<PerThreadData>();
+      ThreadData = std::make_shared<ObjectType>();
       std::lock_guard<std::mutex> Lock(Mutex);
       ThreadDataList.push_back(ThreadData);
     }
     return *ThreadData;
   }
 
-  ObjectType &getThreadEntry() {
-    PerThreadData &ThreadData = getThreadData();
-    if (ThreadData.ThreadEntry)
-      return *ThreadData.ThreadEntry;
-    ThreadData.ThreadEntry = std::make_unique<ObjectType>();
-    return *ThreadData.ThreadEntry;
-  }
-
 public:
   // define default constructors, disable copy and move constructors
   PerThread() = default;
@@ -59,15 +47,15 @@ template <typename ObjectType> class PerThread {
     ThreadDataList.clear();
   }
 
-  ObjectType &get() { return getThreadEntry(); }
+  ObjectType &get() { return getThreadData(); }
 
   template <class ClearFuncTy> void clear(ClearFuncTy ClearFunc) {
     assert(Mutex.try_lock() && (Mutex.unlock(), true) &&
            "Clear cannot be called while other threads are adding entries");
-    for (std::shared_ptr<PerThreadData> ThreadData : ThreadDataList) {
-      if (!ThreadData->ThreadEntry)
+    for (std::shared_ptr<ObjectType> ThreadData : ThreadDataList) {
+      if (!ThreadData)
         continue;
-      ClearFunc(*ThreadData->ThreadEntry);
+      ClearFunc(*ThreadData);
     }
     ThreadDataList.clear();
   }



More information about the llvm-commits mailing list