[llvm] [OFFLOAD] Add support for indexed per-thread containers (PR #164263)
Alex Duran via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 19 06:00:44 PST 2025
https://github.com/adurang updated https://github.com/llvm/llvm-project/pull/164263
>From 987f44cc66042dcd6d32430463cbffdba2a55691 Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Mon, 20 Oct 2025 16:33:41 +0200
Subject: [PATCH 01/17] [OFFLOAD] Add support for indexed per-thread containers
Split from #158900 it adds a PerThreadContainer that can use STL-like
indexed containers based on a slightly refactored PerThreadTable.
---
offload/include/OpenMP/InteropAPI.h | 8 +-
offload/include/PerThreadTable.h | 155 +++++++++++++++++++++++++++-
2 files changed, 154 insertions(+), 9 deletions(-)
diff --git a/offload/include/OpenMP/InteropAPI.h b/offload/include/OpenMP/InteropAPI.h
index 53ac4be2e2e98..c1957d8205839 100644
--- a/offload/include/OpenMP/InteropAPI.h
+++ b/offload/include/OpenMP/InteropAPI.h
@@ -160,17 +160,11 @@ struct InteropTableEntry {
Interops.push_back(obj);
}
- template <class ClearFuncTy> void clear(ClearFuncTy f) {
- for (auto &Obj : Interops) {
- f(Obj);
- }
- }
-
/// vector interface
int size() const { return Interops.size(); }
iterator begin() { return Interops.begin(); }
iterator end() { return Interops.end(); }
- iterator erase(iterator it) { return Interops.erase(it); }
+ void clear() { Interops.clear(); }
};
struct InteropTblTy
diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h
index 45b196171b4c8..2b2327985a78c 100644
--- a/offload/include/PerThreadTable.h
+++ b/offload/include/PerThreadTable.h
@@ -16,6 +16,60 @@
#include <list>
#include <memory>
#include <mutex>
+#include <type_traits>
+
+template <typename ObjectType> struct PerThread {
+ struct PerThreadData {
+ std::unique_ptr<ObjectType> ThreadEntry;
+ };
+
+ std::mutex Mutex;
+ std::list<std::shared_ptr<PerThreadData>> ThreadDataList;
+
+ // define default constructors, disable copy and move constructors
+ PerThread() = default;
+ PerThread(const PerThread &) = delete;
+ PerThread(PerThread &&) = delete;
+ PerThread &operator=(const PerThread &) = delete;
+ PerThread &operator=(PerThread &&) = delete;
+ ~PerThread() {
+ std::lock_guard<std::mutex> Lock(Mutex);
+ ThreadDataList.clear();
+ }
+
+private:
+ PerThreadData &getThreadData() {
+ static thread_local std::shared_ptr<PerThreadData> ThreadData = nullptr;
+ if (!ThreadData) {
+ ThreadData = std::make_shared<PerThreadData>();
+ std::lock_guard<std::mutex> Lock(Mutex);
+ ThreadDataList.push_back(ThreadData);
+ }
+ return *ThreadData;
+ }
+
+protected:
+ ObjectType &getThreadEntry() {
+ auto &ThData = getThreadData();
+ if (ThData.ThEntry)
+ return *ThData.ThEntry;
+ ThData.ThEntry = std::make_unique<ObjectType>();
+ return *ThData.ThEntry;
+ }
+
+public:
+ ObjectType &get() { return getThreadEntry(); }
+
+ template <class F> void clear(F f) {
+ std::lock_guard<std::mutex> Lock(Mutex);
+ for (auto ThData : ThreadDataList) {
+ if (!ThData->ThEntry)
+ continue;
+ f(*ThData->ThEntry);
+ }
+ ThreadDataList.clear();
+ }
+};
// Using an STL container (such as std::vector) indexed by thread ID has
// too many race conditions issues so we store each thread entry into a
@@ -23,10 +77,32 @@
// T is the container type used to store the objects, e.g., std::vector,
// std::set, etc. by each thread. O is the type of the stored objects e.g.,
// omp_interop_val_t *, ...
-
template <typename ContainerType, typename ObjectType> struct PerThreadTable {
using iterator = typename ContainerType::iterator;
+ template <typename, typename = std::void_t<>>
+ struct has_iterator : std::false_type {};
+ template <typename T>
+ struct has_iterator<T, std::void_t<typename T::iterator>> : std::true_type {};
+
+ template <typename T, typename = std::void_t<>>
+ struct has_clear : std::false_type {};
+ template <typename T>
+ struct has_clear<T, std::void_t<decltype(std::declval<T>().clear())>>
+ : std::true_type {};
+
+ template <typename T, typename = std::void_t<>>
+ struct has_clearAll : std::false_type {};
+ template <typename T>
+ struct has_clearAll<T, std::void_t<decltype(std::declval<T>().clearAll(1))>>
+ : std::true_type {};
+
+ template <typename, typename = std::void_t<>>
+ struct is_associative : std::false_type {};
+ template <typename T>
+ struct is_associative<T, std::void_t<typename T::mapped_type>>
+ : std::true_type {};
+
struct PerThreadData {
size_t NElements = 0;
std::unique_ptr<ContainerType> ThEntry;
@@ -71,6 +147,11 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
return ThData.NElements;
}
+ void setNElements(size_t Size) {
+ auto &NElements = getThreadNElements();
+ NElements = Size;
+ }
+
public:
void add(ObjectType obj) {
auto &Entry = getThreadEntry();
@@ -104,11 +185,81 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
for (auto ThData : ThreadDataList) {
if (!ThData->ThEntry || ThData->NElements == 0)
continue;
- ThData->ThEntry->clear(f);
+ if constexpr (has_clearAll<ContainerType>::value) {
+ ThData->ThEntry->clearAll(f);
+ } else if constexpr (has_iterator<ContainerType>::value &&
+ has_clear<ContainerType>::value) {
+ for (auto &Obj : *ThData->ThEntry) {
+ if constexpr (is_associative<ContainerType>::value) {
+ f(Obj.second);
+ } else {
+ f(Obj);
+ }
+ }
+ ThData->ThEntry->clear();
+ } else {
+ static_assert(true, "Container type not supported");
+ }
ThData->NElements = 0;
}
ThreadDataList.clear();
}
};
+template <typename T, typename = std::void_t<>> struct ContainerValueType {
+ using type = typename T::value_type;
+};
+template <typename T>
+struct ContainerValueType<T, std::void_t<typename T::mapped_type>> {
+ using type = typename T::mapped_type;
+};
+
+template <typename ContainerType, size_t reserveSize = 0>
+struct PerThreadContainer
+ : public PerThreadTable<ContainerType,
+ typename ContainerValueType<ContainerType>::type> {
+
+ // helpers
+ template <typename T, typename = std::void_t<>> struct indexType {
+ using type = typename T::size_type;
+ };
+ template <typename T> struct indexType<T, std::void_t<typename T::key_type>> {
+ using type = typename T::key_type;
+ };
+ template <typename T, typename = std::void_t<>>
+ struct has_resize : std::false_type {};
+ template <typename T>
+ struct has_resize<T, std::void_t<decltype(std::declval<T>().resize(1))>>
+ : std::true_type {};
+
+ template <typename T, typename = std::void_t<>>
+ struct has_reserve : std::false_type {};
+ template <typename T>
+ struct has_reserve<T, std::void_t<decltype(std::declval<T>().reserve(1))>>
+ : std::true_type {};
+
+ using IndexType = typename indexType<ContainerType>::type;
+ using ObjectType = typename ContainerValueType<ContainerType>::type;
+
+ // Get the object for the given index in the current thread
+ ObjectType &get(IndexType Index) {
+ auto &Entry = this->getThreadEntry();
+
+ // specialized code for vector-like containers
+ if constexpr (has_resize<ContainerType>::value) {
+ if (Index >= Entry.size()) {
+ if constexpr (has_reserve<ContainerType>::value && reserveSize > 0) {
+ if (Entry.capacity() < reserveSize)
+ Entry.reserve(reserveSize);
+ }
+ // If the index is out of bounds, try resize the container
+ Entry.resize(Index + 1);
+ }
+ }
+ ObjectType &Ret = Entry[Index];
+ this->setNElements(Entry.size());
+ return Ret;
+ }
+};
+
#endif
>From e0c46a67f381867df4a6e9af27358c0866e702d2 Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Thu, 23 Oct 2025 10:04:08 +0200
Subject: [PATCH 02/17] Change container
---
offload/include/PerThreadTable.h | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h
index 2b2327985a78c..e0d669998f260 100644
--- a/offload/include/PerThreadTable.h
+++ b/offload/include/PerThreadTable.h
@@ -14,6 +14,7 @@
#define OFFLOAD_PERTHREADTABLE_H
#include <list>
+#include <llvm/ADT/SmallVector.h>
#include <memory>
#include <mutex>
#include <type_traits>
@@ -24,7 +25,7 @@ template <typename ObjectType> struct PerThread {
};
std::mutex Mutex;
- std::list<std::shared_ptr<PerThreadData>> ThreadDataList;
+ llvm::SmallVector<std::shared_ptr<PerThreadData>> ThreadDataList;
// define default constructors, disable copy and move constructors
PerThread() = default;
>From 0d3d6dc614d98333cd142fc1cab2940059e07260 Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Thu, 23 Oct 2025 10:06:09 +0200
Subject: [PATCH 03/17] Add deinit method
---
offload/include/PerThreadTable.h | 19 +++++++++++++++++++
1 file changed, 19 insertions(+)
diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h
index e0d669998f260..dc6adf9bbb21d 100644
--- a/offload/include/PerThreadTable.h
+++ b/offload/include/PerThreadTable.h
@@ -15,6 +15,7 @@
#include <list>
#include <llvm/ADT/SmallVector.h>
+#include <llvm/Support/Error.h>
#include <memory>
#include <mutex>
#include <type_traits>
@@ -205,6 +206,24 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
}
ThreadDataList.clear();
}
+
+ template <class F> llvm::Error deinit(F f) {
+ std::lock_guard<std::mutex> Lock(Mtx);
+ for (auto ThData : ThreadDataList) {
+ if (!ThData->ThEntry || ThData->NElements == 0)
+ continue;
+ for (auto &Obj : *ThData->ThEntry) {
+ if constexpr (is_associative<ContainerType>::value) {
+ if (auto Err = f(Obj.second))
+ return Err;
+ } else {
+ if (auto Err = f(Obj))
+ return Err;
+ }
+ }
+ }
+ return llvm::Error::success();
+ }
};
template <typename T, typename = std::void_t<>> struct ContainerValueType {
>From 2de9273091bd7dafdf596d58856dcd7b8f6b8842 Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Thu, 23 Oct 2025 10:14:57 +0200
Subject: [PATCH 04/17] renaming variables, remove some locks
---
offload/include/PerThreadTable.h | 54 +++++++++++++++-----------------
1 file changed, 25 insertions(+), 29 deletions(-)
diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h
index dc6adf9bbb21d..6d6067364f4cf 100644
--- a/offload/include/PerThreadTable.h
+++ b/offload/include/PerThreadTable.h
@@ -35,7 +35,6 @@ template <typename ObjectType> struct PerThread {
PerThread &operator=(const PerThread &) = delete;
PerThread &operator=(PerThread &&) = delete;
~PerThread() {
- std::lock_guard<std::mutex> Lock(Mutex);
ThreadDataList.clear();
}
@@ -63,7 +62,6 @@ template <typename ObjectType> struct PerThread {
ObjectType &get() { return getThreadEntry(); }
template <class F> void clear(F f) {
- std::lock_guard<std::mutex> Lock(Mutex);
for (auto ThData : ThreadDataList) {
if (!ThData->ThEntry)
continue;
@@ -107,10 +105,10 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
struct PerThreadData {
size_t NElements = 0;
- std::unique_ptr<ContainerType> ThEntry;
+ std::unique_ptr<ContainerType> ThreadEntry;
};
- std::mutex Mtx;
+ std::mutex Mutex;
std::list<std::shared_ptr<PerThreadData>> ThreadDataList;
// define default constructors, disable copy and move constructors
@@ -120,33 +118,32 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
PerThreadTable &operator=(const PerThreadTable &) = delete;
PerThreadTable &operator=(PerThreadTable &&) = delete;
~PerThreadTable() {
- std::lock_guard<std::mutex> Lock(Mtx);
ThreadDataList.clear();
}
private:
PerThreadData &getThreadData() {
- static thread_local std::shared_ptr<PerThreadData> ThData = nullptr;
- if (!ThData) {
- ThData = std::make_shared<PerThreadData>();
- std::lock_guard<std::mutex> Lock(Mtx);
- ThreadDataList.push_back(ThData);
+ static thread_local std::shared_ptr<PerThreadData> ThreadData = nullptr;
+ if (!ThreadData) {
+ ThreadData = std::make_shared<PerThreadData>();
+ std::lock_guard<std::mutex> Lock(Mutex);
+ ThreadDataList.push_back(ThreadData);
}
- return *ThData;
+ return *ThreadData;
}
protected:
ContainerType &getThreadEntry() {
- auto &ThData = getThreadData();
- if (ThData.ThEntry)
- return *ThData.ThEntry;
- ThData.ThEntry = std::make_unique<ContainerType>();
- return *ThData.ThEntry;
+ auto &ThreadData = getThreadData();
+ if (ThreadData.ThreadEntry)
+ return *ThreadData.ThreadEntry;
+ ThreadData.ThreadEntry = std::make_unique<ContainerType>();
+ return *ThreadData.ThreadEntry;
}
size_t &getThreadNElements() {
- auto &ThData = getThreadData();
- return ThData.NElements;
+ auto &ThreadData = getThreadData();
+ return ThreadData.NElements;
}
void setNElements(size_t Size) {
@@ -183,36 +180,35 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
}
template <class F> void clear(F f) {
- std::lock_guard<std::mutex> Lock(Mtx);
- for (auto ThData : ThreadDataList) {
- if (!ThData->ThEntry || ThData->NElements == 0)
+ std::lock_guard<std::mutex> Lock(Mutex);
+ for (auto ThreadData : ThreadDataList) {
+ if (!ThreadData->ThreadEntry || ThreadData->NElements == 0)
continue;
if constexpr (has_clearAll<ContainerType>::value) {
- ThData->ThEntry->clearAll(f);
+ ThreadData->ThreadEntry->clearAll(f);
} else if constexpr (has_iterator<ContainerType>::value &&
has_clear<ContainerType>::value) {
- for (auto &Obj : *ThData->ThEntry) {
+ for (auto &Obj : *ThreadData->ThreadEntry) {
if constexpr (is_associative<ContainerType>::value) {
f(Obj.second);
} else {
f(Obj);
}
}
- ThData->ThEntry->clear();
+ ThreadData->ThreadEntry->clear();
} else {
static_assert(true, "Container type not supported");
}
- ThData->NElements = 0;
+ ThreadData->NElements = 0;
}
ThreadDataList.clear();
}
template <class F> llvm::Error deinit(F f) {
- std::lock_guard<std::mutex> Lock(Mtx);
- for (auto ThData : ThreadDataList) {
- if (!ThData->ThEntry || ThData->NElements == 0)
+ for (auto ThreadData : ThreadDataList) {
+ if (!ThreadData->ThreadEntry || ThreadData->NElements == 0)
continue;
- for (auto &Obj : *ThData->ThEntry) {
+ for (auto &Obj : *ThreadData->ThreadEntry) {
if constexpr (is_associative<ContainerType>::value) {
if (auto Err = f(Obj.second))
return Err;
>From f4e89a986d2a1b31c42b43b036b7bd6316767870 Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Thu, 23 Oct 2025 10:16:41 +0200
Subject: [PATCH 05/17] format
---
offload/include/PerThreadTable.h | 8 ++------
1 file changed, 2 insertions(+), 6 deletions(-)
diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h
index 6d6067364f4cf..466291503edfc 100644
--- a/offload/include/PerThreadTable.h
+++ b/offload/include/PerThreadTable.h
@@ -34,9 +34,7 @@ template <typename ObjectType> struct PerThread {
PerThread(PerThread &&) = delete;
PerThread &operator=(const PerThread &) = delete;
PerThread &operator=(PerThread &&) = delete;
- ~PerThread() {
- ThreadDataList.clear();
- }
+ ~PerThread() { ThreadDataList.clear(); }
private:
PerThreadData &getThreadData() {
@@ -117,9 +115,7 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
PerThreadTable(PerThreadTable &&) = delete;
PerThreadTable &operator=(const PerThreadTable &) = delete;
PerThreadTable &operator=(PerThreadTable &&) = delete;
- ~PerThreadTable() {
- ThreadDataList.clear();
- }
+ ~PerThreadTable() { ThreadDataList.clear(); }
private:
PerThreadData &getThreadData() {
>From 5ebf0a4e591dba5a1960b8c1972d7437134f50d8 Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Thu, 23 Oct 2025 10:19:21 +0200
Subject: [PATCH 06/17] more renaming
---
offload/include/PerThreadTable.h | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h
index 466291503edfc..b27fbc8e09408 100644
--- a/offload/include/PerThreadTable.h
+++ b/offload/include/PerThreadTable.h
@@ -49,21 +49,21 @@ template <typename ObjectType> struct PerThread {
protected:
ObjectType &getThreadEntry() {
- auto &ThData = getThreadData();
- if (ThData.ThEntry)
- return *ThData.ThEntry;
- ThData.ThEntry = std::make_unique<ObjectType>();
- return *ThData.ThEntry;
+ auto &ThreadData = getThreadData();
+ if (ThreadData.ThreadEntry)
+ return *ThreadData.ThreadEntry;
+ ThreadData.ThreadEntry = std::make_unique<ObjectType>();
+ return *ThreadData.ThreadEntry;
}
public:
ObjectType &get() { return getThreadEntry(); }
template <class F> void clear(F f) {
- for (auto ThData : ThreadDataList) {
- if (!ThData->ThEntry)
+ for (auto ThreadData : ThreadDataList) {
+ if (!ThreadData->ThreadEntry)
continue;
- f(*ThData->ThEntry);
+ f(*ThreadData->ThreadEntry);
}
ThreadDataList.clear();
}
>From 8bff29d07671eb5e235137e9001151932a861d12 Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Thu, 23 Oct 2025 10:41:34 +0200
Subject: [PATCH 07/17] forgot one mutex
---
offload/include/PerThreadTable.h | 1 -
1 file changed, 1 deletion(-)
diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h
index b27fbc8e09408..1263268ac8e45 100644
--- a/offload/include/PerThreadTable.h
+++ b/offload/include/PerThreadTable.h
@@ -176,7 +176,6 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
}
template <class F> void clear(F f) {
- std::lock_guard<std::mutex> Lock(Mutex);
for (auto ThreadData : ThreadDataList) {
if (!ThreadData->ThreadEntry || ThreadData->NElements == 0)
continue;
>From d29eac0aeb07c1fbe96aeea40c1a971d36906bb6 Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Thu, 23 Oct 2025 11:06:48 +0200
Subject: [PATCH 08/17] missed one container
---
offload/include/PerThreadTable.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h
index 1263268ac8e45..3f887282eeecf 100644
--- a/offload/include/PerThreadTable.h
+++ b/offload/include/PerThreadTable.h
@@ -107,7 +107,7 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
};
std::mutex Mutex;
- std::list<std::shared_ptr<PerThreadData>> ThreadDataList;
+ llvm::SmallVector<std::shared_ptr<PerThreadData>> ThreadDataList;
// define default constructors, disable copy and move constructors
PerThreadTable() = default;
>From caedd4d6891b5c85ac72f9ee6f137ee6851df32b Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Thu, 23 Oct 2025 22:53:14 +0200
Subject: [PATCH 09/17] remove auto
---
offload/include/PerThreadTable.h | 26 +++++++++++++-------------
1 file changed, 13 insertions(+), 13 deletions(-)
diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h
index 3f887282eeecf..0177e871ed664 100644
--- a/offload/include/PerThreadTable.h
+++ b/offload/include/PerThreadTable.h
@@ -49,7 +49,7 @@ template <typename ObjectType> struct PerThread {
protected:
ObjectType &getThreadEntry() {
- auto &ThreadData = getThreadData();
+ PerThreadData &ThreadData = getThreadData();
if (ThreadData.ThreadEntry)
return *ThreadData.ThreadEntry;
ThreadData.ThreadEntry = std::make_unique<ObjectType>();
@@ -60,7 +60,7 @@ template <typename ObjectType> struct PerThread {
ObjectType &get() { return getThreadEntry(); }
template <class F> void clear(F f) {
- for (auto ThreadData : ThreadDataList) {
+ for (std::shared_ptr<PerThreadData> ThreadData : ThreadDataList) {
if (!ThreadData->ThreadEntry)
continue;
f(*ThreadData->ThreadEntry);
@@ -130,7 +130,7 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
protected:
ContainerType &getThreadEntry() {
- auto &ThreadData = getThreadData();
+ PerThreadData &ThreadData = getThreadData();
if (ThreadData.ThreadEntry)
return *ThreadData.ThreadEntry;
ThreadData.ThreadEntry = std::make_unique<ContainerType>();
@@ -138,26 +138,26 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
}
size_t &getThreadNElements() {
- auto &ThreadData = getThreadData();
+ PerThreadData &ThreadData = getThreadData();
return ThreadData.NElements;
}
void setNElements(size_t Size) {
- auto &NElements = getThreadNElements();
+ size_t &NElements = getThreadNElements();
NElements = Size;
}
public:
void add(ObjectType obj) {
- auto &Entry = getThreadEntry();
- auto &NElements = getThreadNElements();
+ ContainerType &Entry = getThreadEntry();
+ size_t &NElements = getThreadNElements();
NElements++;
Entry.add(obj);
}
iterator erase(iterator it) {
- auto &Entry = getThreadEntry();
- auto &NElements = getThreadNElements();
+ ContainerType &Entry = getThreadEntry();
+ size_t &NElements = getThreadNElements();
NElements--;
return Entry.erase(it);
}
@@ -167,11 +167,11 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
// Iterators to traverse objects owned by
// the current thread
iterator begin() {
- auto &Entry = getThreadEntry();
+ ContainerType &Entry = getThreadEntry();
return Entry.begin();
}
iterator end() {
- auto &Entry = getThreadEntry();
+ ContainerType &Entry = getThreadEntry();
return Entry.end();
}
@@ -200,7 +200,7 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
}
template <class F> llvm::Error deinit(F f) {
- for (auto ThreadData : ThreadDataList) {
+ for (std::shared_ptr<PerThreadData> ThreadData : ThreadDataList) {
if (!ThreadData->ThreadEntry || ThreadData->NElements == 0)
continue;
for (auto &Obj : *ThreadData->ThreadEntry) {
@@ -254,7 +254,7 @@ struct PerThreadContainer
// Get the object for the given index in the current thread
ObjectType &get(IndexType Index) {
- auto &Entry = this->getThreadEntry();
+ ContainerType &Entry = this->getThreadEntry();
// specialized code for vector-like containers
if constexpr (has_resize<ContainerType>::value) {
>From 01f9c447213dd8d66aa768e4b6a826bfca81dd55 Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Thu, 23 Oct 2025 23:12:57 +0200
Subject: [PATCH 10/17] missed one
---
offload/include/PerThreadTable.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h
index 0177e871ed664..936eebaaf6155 100644
--- a/offload/include/PerThreadTable.h
+++ b/offload/include/PerThreadTable.h
@@ -176,7 +176,7 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
}
template <class F> void clear(F f) {
- for (auto ThreadData : ThreadDataList) {
+ for (std::shared_ptr<PerThreadData> ThreadData : ThreadDataList) {
if (!ThreadData->ThreadEntry || ThreadData->NElements == 0)
continue;
if constexpr (has_clearAll<ContainerType>::value) {
>From e80dcf9f17761ad0d3adb9c51fca08dac252cb70 Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Thu, 23 Oct 2025 23:16:56 +0200
Subject: [PATCH 11/17] Some renaming
---
offload/include/PerThreadTable.h | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h
index 936eebaaf6155..67c7d74eae2c5 100644
--- a/offload/include/PerThreadTable.h
+++ b/offload/include/PerThreadTable.h
@@ -59,11 +59,11 @@ template <typename ObjectType> struct PerThread {
public:
ObjectType &get() { return getThreadEntry(); }
- template <class F> void clear(F f) {
+ template <class ClearFuncTy> void clear(ClearFuncTy ClearFunc) {
for (std::shared_ptr<PerThreadData> ThreadData : ThreadDataList) {
if (!ThreadData->ThreadEntry)
continue;
- f(*ThreadData->ThreadEntry);
+ ClearFunc(*ThreadData->ThreadEntry);
}
ThreadDataList.clear();
}
@@ -175,19 +175,19 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
return Entry.end();
}
- template <class F> void clear(F f) {
+ template <class ClearFuncTy> void clear(ClearFuncTy ClearFunc) {
for (std::shared_ptr<PerThreadData> ThreadData : ThreadDataList) {
if (!ThreadData->ThreadEntry || ThreadData->NElements == 0)
continue;
if constexpr (has_clearAll<ContainerType>::value) {
- ThreadData->ThreadEntry->clearAll(f);
+ ThreadData->ThreadEntry->clearAll(ClearFunc);
} else if constexpr (has_iterator<ContainerType>::value &&
has_clear<ContainerType>::value) {
for (auto &Obj : *ThreadData->ThreadEntry) {
if constexpr (is_associative<ContainerType>::value) {
- f(Obj.second);
+ ClearFunc(Obj.second);
} else {
- f(Obj);
+ ClearFunc(Obj);
}
}
ThreadData->ThreadEntry->clear();
@@ -199,16 +199,16 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
ThreadDataList.clear();
}
- template <class F> llvm::Error deinit(F f) {
+ template <class DeinitFuncTy> llvm::Error deinit(DeinitFuncTy DeinitFunc) {
for (std::shared_ptr<PerThreadData> ThreadData : ThreadDataList) {
if (!ThreadData->ThreadEntry || ThreadData->NElements == 0)
continue;
for (auto &Obj : *ThreadData->ThreadEntry) {
if constexpr (is_associative<ContainerType>::value) {
- if (auto Err = f(Obj.second))
+ if (auto Err = DeinitFunc(Obj.second))
return Err;
} else {
- if (auto Err = f(Obj))
+ if (auto Err = DeinitFunc(Obj))
return Err;
}
}
>From 5451f2294da3f2f4fb6eaa925838f54b17c64008 Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Thu, 23 Oct 2025 23:25:40 +0200
Subject: [PATCH 12/17] Add asserts
---
offload/include/PerThreadTable.h | 18 ++++++++++++++++--
1 file changed, 16 insertions(+), 2 deletions(-)
diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h
index 67c7d74eae2c5..d8222d99b6515 100644
--- a/offload/include/PerThreadTable.h
+++ b/offload/include/PerThreadTable.h
@@ -34,7 +34,11 @@ template <typename ObjectType> struct PerThread {
PerThread(PerThread &&) = delete;
PerThread &operator=(const PerThread &) = delete;
PerThread &operator=(PerThread &&) = delete;
- ~PerThread() { ThreadDataList.clear(); }
+ ~PerThread() {
+ assert(Mutex.try_lock() &&
+ "Cannot be deleted while other threads are adding entries");
+ ThreadDataList.clear();
+ }
private:
PerThreadData &getThreadData() {
@@ -60,6 +64,8 @@ template <typename ObjectType> struct PerThread {
ObjectType &get() { return getThreadEntry(); }
template <class ClearFuncTy> void clear(ClearFuncTy ClearFunc) {
+ assert(Mutex.try_lock() &&
+ "Clear cannot be called while other threads are adding entries");
for (std::shared_ptr<PerThreadData> ThreadData : ThreadDataList) {
if (!ThreadData->ThreadEntry)
continue;
@@ -115,7 +121,11 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
PerThreadTable(PerThreadTable &&) = delete;
PerThreadTable &operator=(const PerThreadTable &) = delete;
PerThreadTable &operator=(PerThreadTable &&) = delete;
- ~PerThreadTable() { ThreadDataList.clear(); }
+ ~PerThreadTable() {
+ assert(Mutex.try_lock() &&
+ "Cannot be deleted while other threads are adding entries");
+ ThreadDataList.clear();
+ }
private:
PerThreadData &getThreadData() {
@@ -176,6 +186,8 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
}
template <class ClearFuncTy> void clear(ClearFuncTy ClearFunc) {
+ assert(Mutex.try_lock() &&
+ "Clear cannot be called while other threads are adding entries");
for (std::shared_ptr<PerThreadData> ThreadData : ThreadDataList) {
if (!ThreadData->ThreadEntry || ThreadData->NElements == 0)
continue;
@@ -200,6 +212,8 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
}
template <class DeinitFuncTy> llvm::Error deinit(DeinitFuncTy DeinitFunc) {
+ assert(Mutex.try_lock() &&
+ "Deinit cannot be called while other threads are adding entries");
for (std::shared_ptr<PerThreadData> ThreadData : ThreadDataList) {
if (!ThreadData->ThreadEntry || ThreadData->NElements == 0)
continue;
>From a14ee62b1a7ac0dda4bc8eb39fad04cb504bee32 Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Tue, 4 Nov 2025 22:11:55 +0100
Subject: [PATCH 13/17] Fix asserts to not be undefined
---
offload/include/PerThreadTable.h | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h
index d8222d99b6515..56e2d75ccd198 100644
--- a/offload/include/PerThreadTable.h
+++ b/offload/include/PerThreadTable.h
@@ -35,7 +35,7 @@ template <typename ObjectType> struct PerThread {
PerThread &operator=(const PerThread &) = delete;
PerThread &operator=(PerThread &&) = delete;
~PerThread() {
- assert(Mutex.try_lock() &&
+ assert(Mutex.try_lock() && (Mutex.unlock(), true) &&
"Cannot be deleted while other threads are adding entries");
ThreadDataList.clear();
}
@@ -64,7 +64,7 @@ template <typename ObjectType> struct PerThread {
ObjectType &get() { return getThreadEntry(); }
template <class ClearFuncTy> void clear(ClearFuncTy ClearFunc) {
- assert(Mutex.try_lock() &&
+ assert(Mutex.try_lock() && (Mutex.unlock(), true) &&
"Clear cannot be called while other threads are adding entries");
for (std::shared_ptr<PerThreadData> ThreadData : ThreadDataList) {
if (!ThreadData->ThreadEntry)
@@ -122,7 +122,7 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
PerThreadTable &operator=(const PerThreadTable &) = delete;
PerThreadTable &operator=(PerThreadTable &&) = delete;
~PerThreadTable() {
- assert(Mutex.try_lock() &&
+ assert(Mutex.try_lock() && (Mutex.unlock(), true) &&
"Cannot be deleted while other threads are adding entries");
ThreadDataList.clear();
}
@@ -186,7 +186,7 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
}
template <class ClearFuncTy> void clear(ClearFuncTy ClearFunc) {
- assert(Mutex.try_lock() &&
+ assert(Mutex.try_lock() && (Mutex.unlock(), true) &&
"Clear cannot be called while other threads are adding entries");
for (std::shared_ptr<PerThreadData> ThreadData : ThreadDataList) {
if (!ThreadData->ThreadEntry || ThreadData->NElements == 0)
@@ -212,7 +212,7 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
}
template <class DeinitFuncTy> llvm::Error deinit(DeinitFuncTy DeinitFunc) {
- assert(Mutex.try_lock() &&
+ assert(Mutex.try_lock() && (Mutex.unlock(), true) &&
"Deinit cannot be called while other threads are adding entries");
for (std::shared_ptr<PerThreadData> ThreadData : ThreadDataList) {
if (!ThreadData->ThreadEntry || ThreadData->NElements == 0)
>From 579264a4801ac780c0539eb9f856c400bb390f48 Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Wed, 19 Nov 2025 12:19:17 +0100
Subject: [PATCH 14/17] minor refactoring
---
offload/include/PerThreadTable.h | 74 +++++++++++++++-----------------
1 file changed, 35 insertions(+), 39 deletions(-)
diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h
index 56e2d75ccd198..ad147657b9228 100644
--- a/offload/include/PerThreadTable.h
+++ b/offload/include/PerThreadTable.h
@@ -20,7 +20,7 @@
#include <mutex>
#include <type_traits>
-template <typename ObjectType> struct PerThread {
+template <typename ObjectType> class PerThread {
struct PerThreadData {
std::unique_ptr<ObjectType> ThreadEntry;
};
@@ -28,19 +28,6 @@ template <typename ObjectType> struct PerThread {
std::mutex Mutex;
llvm::SmallVector<std::shared_ptr<PerThreadData>> ThreadDataList;
- // define default constructors, disable copy and move constructors
- PerThread() = default;
- PerThread(const PerThread &) = delete;
- PerThread(PerThread &&) = delete;
- PerThread &operator=(const PerThread &) = delete;
- PerThread &operator=(PerThread &&) = delete;
- ~PerThread() {
- assert(Mutex.try_lock() && (Mutex.unlock(), true) &&
- "Cannot be deleted while other threads are adding entries");
- ThreadDataList.clear();
- }
-
-private:
PerThreadData &getThreadData() {
static thread_local std::shared_ptr<PerThreadData> ThreadData = nullptr;
if (!ThreadData) {
@@ -51,7 +38,6 @@ template <typename ObjectType> struct PerThread {
return *ThreadData;
}
-protected:
ObjectType &getThreadEntry() {
PerThreadData &ThreadData = getThreadData();
if (ThreadData.ThreadEntry)
@@ -61,6 +47,18 @@ template <typename ObjectType> struct PerThread {
}
public:
+ // define default constructors, disable copy and move constructors
+ PerThread() = default;
+ PerThread(const PerThread &) = delete;
+ PerThread(PerThread &&) = delete;
+ PerThread &operator=(const PerThread &) = delete;
+ PerThread &operator=(PerThread &&) = delete;
+ ~PerThread() {
+ assert(Mutex.try_lock() && (Mutex.unlock(), true) &&
+ "Cannot be deleted while other threads are adding entries");
+ ThreadDataList.clear();
+ }
+
ObjectType &get() { return getThreadEntry(); }
template <class ClearFuncTy> void clear(ClearFuncTy ClearFunc) {
@@ -78,10 +76,10 @@ template <typename ObjectType> struct PerThread {
// Using an STL container (such as std::vector) indexed by thread ID has
// too many race conditions issues so we store each thread entry into a
// thread_local variable.
-// T is the container type used to store the objects, e.g., std::vector,
-// std::set, etc. by each thread. O is the type of the stored objects e.g.,
-// omp_interop_val_t *, ...
-template <typename ContainerType, typename ObjectType> struct PerThreadTable {
+// ContainerType is the container type used to store the objects, e.g.,
+// std::vector, std::set, etc. by each thread. ObjectType is the type of the
+// stored objects e.g., omp_interop_val_t *, ...
+template <typename ContainerType, typename ObjectType> class PerThreadTable {
using iterator = typename ContainerType::iterator;
template <typename, typename = std::void_t<>>
@@ -115,19 +113,6 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
std::mutex Mutex;
llvm::SmallVector<std::shared_ptr<PerThreadData>> ThreadDataList;
- // define default constructors, disable copy and move constructors
- PerThreadTable() = default;
- PerThreadTable(const PerThreadTable &) = delete;
- PerThreadTable(PerThreadTable &&) = delete;
- PerThreadTable &operator=(const PerThreadTable &) = delete;
- PerThreadTable &operator=(PerThreadTable &&) = delete;
- ~PerThreadTable() {
- assert(Mutex.try_lock() && (Mutex.unlock(), true) &&
- "Cannot be deleted while other threads are adding entries");
- ThreadDataList.clear();
- }
-
-private:
PerThreadData &getThreadData() {
static thread_local std::shared_ptr<PerThreadData> ThreadData = nullptr;
if (!ThreadData) {
@@ -158,6 +143,18 @@ template <typename ContainerType, typename ObjectType> struct PerThreadTable {
}
public:
+ // define default constructors, disable copy and move constructors
+ PerThreadTable() = default;
+ PerThreadTable(const PerThreadTable &) = delete;
+ PerThreadTable(PerThreadTable &&) = delete;
+ PerThreadTable &operator=(const PerThreadTable &) = delete;
+ PerThreadTable &operator=(PerThreadTable &&) = delete;
+ ~PerThreadTable() {
+ assert(Mutex.try_lock() && (Mutex.unlock(), true) &&
+ "Cannot be deleted while other threads are adding entries");
+ ThreadDataList.clear();
+ }
+
void add(ObjectType obj) {
ContainerType &Entry = getThreadEntry();
size_t &NElements = getThreadNElements();
@@ -239,8 +236,8 @@ struct ContainerValueType<T, std::void_t<typename T::mapped_type>> {
using type = typename T::mapped_type;
};
-template <typename ContainerType, size_t reserveSize = 0>
-struct PerThreadContainer
+template <typename ContainerType, size_t ReserveSize = 0>
+class PerThreadContainer
: public PerThreadTable<ContainerType,
typename ContainerValueType<ContainerType>::type> {
@@ -265,7 +262,7 @@ struct PerThreadContainer
using IndexType = typename indexType<ContainerType>::type;
using ObjectType = typename ContainerValueType<ContainerType>::type;
-
+public:
// Get the object for the given index in the current thread
ObjectType &get(IndexType Index) {
ContainerType &Entry = this->getThreadEntry();
@@ -273,10 +270,9 @@ struct PerThreadContainer
// specialized code for vector-like containers
if constexpr (has_resize<ContainerType>::value) {
if (Index >= Entry.size()) {
- if constexpr (has_reserve<ContainerType>::value && reserveSize > 0) {
- if (Entry.capacity() < reserveSize)
- Entry.reserve(reserveSize);
- }
+ if constexpr (has_reserve<ContainerType>::value && ReserveSize > 0)
+ Entry.reserve(ReserveSize);
+
// If the index is out of bounds, try resize the container
Entry.resize(Index + 1);
}
>From 26fbcc0eb4b888ef7675682ebdbaa01392d4ecaa Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Wed, 19 Nov 2025 14:35:40 +0100
Subject: [PATCH 15/17] Refactor container concepts checks
---
offload/include/PerThreadTable.h | 124 +++++++++++++++----------------
1 file changed, 62 insertions(+), 62 deletions(-)
diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h
index ad147657b9228..f118f4d617fa8 100644
--- a/offload/include/PerThreadTable.h
+++ b/offload/include/PerThreadTable.h
@@ -73,6 +73,54 @@ template <typename ObjectType> class PerThread {
}
};
+template <typename ContainerTy> struct ContainerConcepts {
+ template <typename, template <typename> class, typename = std::void_t<>>
+ struct has : std::false_type {};
+ template <typename Ty, template <typename> class Op>
+ struct has<Ty, Op, std::void_t<Op<Ty>>> : std::true_type {};
+
+ template <typename Ty> using IteratorTypeCheck = typename Ty::iterator;
+ template <typename Ty> using MappedTypeCheck = typename Ty::mapped_type;
+ template <typename Ty> using ValueTypeCheck = typename Ty::value_type;
+ template <typename Ty> using KeyTypeCheck = typename Ty::key_type;
+ template <typename Ty> using SizeTyCheck = typename Ty::size_type;
+
+ template <typename Ty>
+ using ClearCheck = decltype(std::declval<Ty>().clear());
+ template <typename Ty>
+ using ClearAllCheck = decltype(std::declval<Ty>().clearAll(1));
+ template <typename Ty>
+ using ReserveCheck = decltype(std::declval<Ty>().reserve(1));
+ template <typename Ty>
+ using ResizeCheck = decltype(std::declval<Ty>().resize(1));
+
+ static constexpr bool hasIterator =
+ has<ContainerTy, IteratorTypeCheck>::value;
+ static constexpr bool hasClear = has<ContainerTy, ClearCheck>::value;
+ static constexpr bool hasClearAll = has<ContainerTy, ClearAllCheck>::value;
+ static constexpr bool isAssociative =
+ has<ContainerTy, MappedTypeCheck>::value;
+ static constexpr bool hasReserve = has<ContainerTy, ReserveCheck>::value;
+ static constexpr bool hasResize = has<ContainerTy, ResizeCheck>::value;
+
+ template <typename, template <typename> class, typename = std::void_t<>>
+ struct has_type {
+ using type = void;
+ };
+ template <typename Ty, template <typename> class Op>
+ struct has_type<Ty, Op, std::void_t<Op<Ty>>> {
+ using type = Op<Ty>;
+ };
+
+ using iterator = typename has_type<ContainerTy, IteratorTypeCheck>::type;
+ using value_type = typename std::conditional_t<
+ isAssociative, typename has_type<ContainerTy, MappedTypeCheck>::type,
+ typename has_type<ContainerTy, ValueTypeCheck>::type>;
+ using key_type = typename std::conditional_t<
+ isAssociative, typename has_type<ContainerTy, KeyTypeCheck>::type,
+ typename has_type<ContainerTy, SizeTyCheck>::type>;
+};
+
// Using an STL container (such as std::vector) indexed by thread ID has
// too many race conditions issues so we store each thread entry into a
// thread_local variable.
@@ -80,30 +128,7 @@ template <typename ObjectType> class PerThread {
// std::vector, std::set, etc. by each thread. ObjectType is the type of the
// stored objects e.g., omp_interop_val_t *, ...
template <typename ContainerType, typename ObjectType> class PerThreadTable {
- using iterator = typename ContainerType::iterator;
-
- template <typename, typename = std::void_t<>>
- struct has_iterator : std::false_type {};
- template <typename T>
- struct has_iterator<T, std::void_t<typename T::iterator>> : std::true_type {};
-
- template <typename T, typename = std::void_t<>>
- struct has_clear : std::false_type {};
- template <typename T>
- struct has_clear<T, std::void_t<decltype(std::declval<T>().clear())>>
- : std::true_type {};
-
- template <typename T, typename = std::void_t<>>
- struct has_clearAll : std::false_type {};
- template <typename T>
- struct has_clearAll<T, std::void_t<decltype(std::declval<T>().clearAll(1))>>
- : std::true_type {};
-
- template <typename, typename = std::void_t<>>
- struct is_associative : std::false_type {};
- template <typename T>
- struct is_associative<T, std::void_t<typename T::mapped_type>>
- : std::true_type {};
+ using iterator = typename ContainerConcepts<ContainerType>::iterator;
struct PerThreadData {
size_t NElements = 0;
@@ -188,12 +213,12 @@ template <typename ContainerType, typename ObjectType> class PerThreadTable {
for (std::shared_ptr<PerThreadData> ThreadData : ThreadDataList) {
if (!ThreadData->ThreadEntry || ThreadData->NElements == 0)
continue;
- if constexpr (has_clearAll<ContainerType>::value) {
+ if constexpr (ContainerConcepts<ContainerType>::hasClearAll) {
ThreadData->ThreadEntry->clearAll(ClearFunc);
- } else if constexpr (has_iterator<ContainerType>::value &&
- has_clear<ContainerType>::value) {
+ } else if constexpr (ContainerConcepts<ContainerType>::hasIterator &&
+ ContainerConcepts<ContainerType>::hasClear) {
for (auto &Obj : *ThreadData->ThreadEntry) {
- if constexpr (is_associative<ContainerType>::value) {
+ if constexpr (ContainerConcepts<ContainerType>::isAssociative) {
ClearFunc(Obj.second);
} else {
ClearFunc(Obj);
@@ -215,7 +240,7 @@ template <typename ContainerType, typename ObjectType> class PerThreadTable {
if (!ThreadData->ThreadEntry || ThreadData->NElements == 0)
continue;
for (auto &Obj : *ThreadData->ThreadEntry) {
- if constexpr (is_associative<ContainerType>::value) {
+ if constexpr (ContainerConcepts<ContainerType>::isAssociative) {
if (auto Err = DeinitFunc(Obj.second))
return Err;
} else {
@@ -228,49 +253,24 @@ template <typename ContainerType, typename ObjectType> class PerThreadTable {
}
};
-template <typename T, typename = std::void_t<>> struct ContainerValueType {
- using type = typename T::value_type;
-};
-template <typename T>
-struct ContainerValueType<T, std::void_t<typename T::mapped_type>> {
- using type = typename T::mapped_type;
-};
-
template <typename ContainerType, size_t ReserveSize = 0>
class PerThreadContainer
- : public PerThreadTable<ContainerType,
- typename ContainerValueType<ContainerType>::type> {
+ : public PerThreadTable<ContainerType, typename ContainerConcepts<
+ ContainerType>::value_type> {
+
+ using IndexType = typename ContainerConcepts<ContainerType>::key_type;
+ using ObjectType = typename ContainerConcepts<ContainerType>::value_type;
- // helpers
- template <typename T, typename = std::void_t<>> struct indexType {
- using type = typename T::size_type;
- };
- template <typename T> struct indexType<T, std::void_t<typename T::key_type>> {
- using type = typename T::key_type;
- };
- template <typename T, typename = std::void_t<>>
- struct has_resize : std::false_type {};
- template <typename T>
- struct has_resize<T, std::void_t<decltype(std::declval<T>().resize(1))>>
- : std::true_type {};
-
- template <typename T, typename = std::void_t<>>
- struct has_reserve : std::false_type {};
- template <typename T>
- struct has_reserve<T, std::void_t<decltype(std::declval<T>().reserve(1))>>
- : std::true_type {};
-
- using IndexType = typename indexType<ContainerType>::type;
- using ObjectType = typename ContainerValueType<ContainerType>::type;
public:
// Get the object for the given index in the current thread
ObjectType &get(IndexType Index) {
ContainerType &Entry = this->getThreadEntry();
// specialized code for vector-like containers
- if constexpr (has_resize<ContainerType>::value) {
+ if constexpr (ContainerConcepts<ContainerType>::hasResize) {
if (Index >= Entry.size()) {
- if constexpr (has_reserve<ContainerType>::value && ReserveSize > 0)
+ if constexpr (ContainerConcepts<ContainerType>::hasReserve &&
+ ReserveSize > 0)
Entry.reserve(ReserveSize);
// If the index is out of bounds, try resize the container
>From 32819dff8d0120c069cd0f51d6358b523dea4800 Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Wed, 19 Nov 2025 14:39:15 +0100
Subject: [PATCH 16/17] remove clearAll
---
offload/include/PerThreadTable.h | 9 ++-------
1 file changed, 2 insertions(+), 7 deletions(-)
diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h
index f118f4d617fa8..f2e54eaf9abf1 100644
--- a/offload/include/PerThreadTable.h
+++ b/offload/include/PerThreadTable.h
@@ -88,8 +88,6 @@ template <typename ContainerTy> struct ContainerConcepts {
template <typename Ty>
using ClearCheck = decltype(std::declval<Ty>().clear());
template <typename Ty>
- using ClearAllCheck = decltype(std::declval<Ty>().clearAll(1));
- template <typename Ty>
using ReserveCheck = decltype(std::declval<Ty>().reserve(1));
template <typename Ty>
using ResizeCheck = decltype(std::declval<Ty>().resize(1));
@@ -97,7 +95,6 @@ template <typename ContainerTy> struct ContainerConcepts {
static constexpr bool hasIterator =
has<ContainerTy, IteratorTypeCheck>::value;
static constexpr bool hasClear = has<ContainerTy, ClearCheck>::value;
- static constexpr bool hasClearAll = has<ContainerTy, ClearAllCheck>::value;
static constexpr bool isAssociative =
has<ContainerTy, MappedTypeCheck>::value;
static constexpr bool hasReserve = has<ContainerTy, ReserveCheck>::value;
@@ -213,10 +210,8 @@ template <typename ContainerType, typename ObjectType> class PerThreadTable {
for (std::shared_ptr<PerThreadData> ThreadData : ThreadDataList) {
if (!ThreadData->ThreadEntry || ThreadData->NElements == 0)
continue;
- if constexpr (ContainerConcepts<ContainerType>::hasClearAll) {
- ThreadData->ThreadEntry->clearAll(ClearFunc);
- } else if constexpr (ContainerConcepts<ContainerType>::hasIterator &&
- ContainerConcepts<ContainerType>::hasClear) {
+ if constexpr (ContainerConcepts<ContainerType>::hasIterator &&
+ ContainerConcepts<ContainerType>::hasClear) {
for (auto &Obj : *ThreadData->ThreadEntry) {
if constexpr (ContainerConcepts<ContainerType>::isAssociative) {
ClearFunc(Obj.second);
>From 2d4f36464d83fed2ac72bacec730eff5f6d6a49d Mon Sep 17 00:00:00 2001
From: Alex Duran <alejandro.duran at intel.com>
Date: Wed, 19 Nov 2025 14:57:37 +0100
Subject: [PATCH 17/17] Simplify PerThreadData
---
offload/include/PerThreadTable.h | 28 ++++++++--------------------
1 file changed, 8 insertions(+), 20 deletions(-)
diff --git a/offload/include/PerThreadTable.h b/offload/include/PerThreadTable.h
index f2e54eaf9abf1..f26f0b45a8520 100644
--- a/offload/include/PerThreadTable.h
+++ b/offload/include/PerThreadTable.h
@@ -21,31 +21,19 @@
#include <type_traits>
template <typename ObjectType> class PerThread {
- struct PerThreadData {
- std::unique_ptr<ObjectType> ThreadEntry;
- };
-
std::mutex Mutex;
- llvm::SmallVector<std::shared_ptr<PerThreadData>> ThreadDataList;
+ llvm::SmallVector<std::shared_ptr<ObjectType>> ThreadDataList;
- PerThreadData &getThreadData() {
- static thread_local std::shared_ptr<PerThreadData> ThreadData = nullptr;
+ ObjectType &getThreadData() {
+ static thread_local std::shared_ptr<ObjectType> ThreadData = nullptr;
if (!ThreadData) {
- ThreadData = std::make_shared<PerThreadData>();
+ ThreadData = std::make_shared<ObjectType>();
std::lock_guard<std::mutex> Lock(Mutex);
ThreadDataList.push_back(ThreadData);
}
return *ThreadData;
}
- ObjectType &getThreadEntry() {
- PerThreadData &ThreadData = getThreadData();
- if (ThreadData.ThreadEntry)
- return *ThreadData.ThreadEntry;
- ThreadData.ThreadEntry = std::make_unique<ObjectType>();
- return *ThreadData.ThreadEntry;
- }
-
public:
// define default constructors, disable copy and move constructors
PerThread() = default;
@@ -59,15 +47,15 @@ template <typename ObjectType> class PerThread {
ThreadDataList.clear();
}
- ObjectType &get() { return getThreadEntry(); }
+ ObjectType &get() { return getThreadData(); }
template <class ClearFuncTy> void clear(ClearFuncTy ClearFunc) {
assert(Mutex.try_lock() && (Mutex.unlock(), true) &&
"Clear cannot be called while other threads are adding entries");
- for (std::shared_ptr<PerThreadData> ThreadData : ThreadDataList) {
- if (!ThreadData->ThreadEntry)
+ for (std::shared_ptr<ObjectType> ThreadData : ThreadDataList) {
+ if (!ThreadData)
continue;
- ClearFunc(*ThreadData->ThreadEntry);
+ ClearFunc(*ThreadData);
}
ThreadDataList.clear();
}
More information about the llvm-commits
mailing list