[llvm-branch-commits] [openmp] f43958b - [OpenMP] Fixed a crash in hidden helper thread
Tom Stellard via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Mar 29 16:30:20 PDT 2021
Author: Shilei Tian
Date: 2021-03-29T16:29:53-07:00
New Revision: f43958b7c497c526b238607624ee0069888f4c98
URL: https://github.com/llvm/llvm-project/commit/f43958b7c497c526b238607624ee0069888f4c98
DIFF: https://github.com/llvm/llvm-project/commit/f43958b7c497c526b238607624ee0069888f4c98.diff
LOG: [OpenMP] Fixed a crash in hidden helper thread
It is reported that after enabling hidden helper thread, the program
can hit the assertion `new_gtid < __kmp_threads_capacity` sometimes. The root
cause is explained as follows. Let's say the default `__kmp_threads_capacity` is
`N`. If hidden helper thread is enabled, `__kmp_threads_capacity` will be offset
to `N+8` by default. If the number of threads we need exceeds `N+8`, e.g. via
`num_threads` clause, we need to expand `__kmp_threads`. In
`__kmp_expand_threads`, the expansion starts from `__kmp_threads_capacity`, and
repeatedly doubling it until the new capacity meets the requirement. Let's
assume the new requirement is `Y`. If `Y` happens to meet the constraint
`(N+8)*2^X=Y` where `X` is the number of iterations, the new capacity is not
enough because we have 8 slots for hidden helper threads.
Here is an example.
```
#include <vector>
int main(int argc, char *argv[]) {
constexpr const size_t N = 1344;
std::vector<int> data(N);
#pragma omp parallel for
for (unsigned i = 0; i < N; ++i) {
data[i] = i;
}
#pragma omp parallel for num_threads(N)
for (unsigned i = 0; i < N; ++i) {
data[i] += i;
}
return 0;
}
```
My CPU is 20C40T, then `__kmp_threads_capacity` is 160. After offset,
`__kmp_threads_capacity` becomes 168. `1344 = (160+8)*2^3`, then the assertions
hit.
Reviewed By: protze.joachim
Differential Revision: https://reviews.llvm.org/D98838
(cherry picked from commit 2df65f87c1ea81008768e14522e5d9277234ba70)
Added:
openmp/runtime/test/tasking/hidden_helper_task/capacity_mix_threads.cpp
openmp/runtime/test/tasking/hidden_helper_task/capacity_nthreads.cpp
Modified:
openmp/runtime/src/kmp_runtime.cpp
openmp/runtime/src/kmp_settings.cpp
Removed:
################################################################################
diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp
index a6e32bd008e10..b981f8740dbe2 100644
--- a/openmp/runtime/src/kmp_runtime.cpp
+++ b/openmp/runtime/src/kmp_runtime.cpp
@@ -920,6 +920,12 @@ static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
if (TCR_PTR(__kmp_threads[0]) == NULL) {
--capacity;
}
+ // If it is not for initializing the hidden helper team, we need to take
+ // __kmp_hidden_helper_threads_num out of the capacity because it is included
+ // in __kmp_threads_capacity.
+ if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
+ capacity -= __kmp_hidden_helper_threads_num;
+ }
if (__kmp_nth + new_nthreads -
(root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
capacity) {
@@ -3632,6 +3638,13 @@ int __kmp_register_root(int initial_thread) {
--capacity;
}
+ // If it is not for initializing the hidden helper team, we need to take
+ // __kmp_hidden_helper_threads_num out of the capacity because it is included
+ // in __kmp_threads_capacity.
+ if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
+ capacity -= __kmp_hidden_helper_threads_num;
+ }
+
/* see if there are too many threads */
if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) {
if (__kmp_tp_cached) {
@@ -3664,7 +3677,7 @@ int __kmp_register_root(int initial_thread) {
/* find an available thread slot */
// Don't reassign the zero slot since we need that to only be used by
// initial thread. Slots for hidden helper threads should also be skipped.
- if (initial_thread && __kmp_threads[0] == NULL) {
+ if (initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
gtid = 0;
} else {
for (gtid = __kmp_hidden_helper_threads_num + 1;
diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp
index b477edbbfb42b..50f6a05faaf59 100644
--- a/openmp/runtime/src/kmp_settings.cpp
+++ b/openmp/runtime/src/kmp_settings.cpp
@@ -504,9 +504,10 @@ int __kmp_initial_threads_capacity(int req_nproc) {
nth = (4 * __kmp_xproc);
// If hidden helper task is enabled, we initialize the thread capacity with
- // extra
- // __kmp_hidden_helper_threads_num.
- nth += __kmp_hidden_helper_threads_num;
+ // extra __kmp_hidden_helper_threads_num.
+ if (__kmp_enable_hidden_helper) {
+ nth += __kmp_hidden_helper_threads_num;
+ }
if (nth > __kmp_max_nth)
nth = __kmp_max_nth;
diff --git a/openmp/runtime/test/tasking/hidden_helper_task/capacity_mix_threads.cpp b/openmp/runtime/test/tasking/hidden_helper_task/capacity_mix_threads.cpp
new file mode 100644
index 0000000000000..776aee9d8e2ca
--- /dev/null
+++ b/openmp/runtime/test/tasking/hidden_helper_task/capacity_mix_threads.cpp
@@ -0,0 +1,45 @@
+// RUN: %libomp-cxx-compile-and-run
+
+#include <omp.h>
+
+#include <algorithm>
+#include <cassert>
+#include <chrono>
+#include <thread>
+#include <vector>
+
+void dummy_root() {
+ // omp_get_max_threads() will do middle initialization
+ int nthreads = omp_get_max_threads();
+ std::this_thread::sleep_for(std::chrono::milliseconds(1000));
+}
+
+int main(int argc, char *argv[]) {
+ const int N = std::min(std::max(std::max(32, 4 * omp_get_max_threads()),
+ 4 * omp_get_num_procs()),
+ std::numeric_limits<int>::max());
+
+ std::vector<int> data(N);
+
+ // Create a new thread to initialize the OpenMP RTL. The new thread will not
+ // be taken as the "initial thread".
+ std::thread root(dummy_root);
+
+#pragma omp parallel for num_threads(N)
+ for (unsigned i = 0; i < N; ++i) {
+ data[i] = i;
+ }
+
+#pragma omp parallel for num_threads(N + 1)
+ for (unsigned i = 0; i < N; ++i) {
+ data[i] += i;
+ }
+
+ for (unsigned i = 0; i < N; ++i) {
+ assert(data[i] == 2 * i);
+ }
+
+ root.join();
+
+ return 0;
+}
diff --git a/openmp/runtime/test/tasking/hidden_helper_task/capacity_nthreads.cpp b/openmp/runtime/test/tasking/hidden_helper_task/capacity_nthreads.cpp
new file mode 100644
index 0000000000000..a9d394f729e9c
--- /dev/null
+++ b/openmp/runtime/test/tasking/hidden_helper_task/capacity_nthreads.cpp
@@ -0,0 +1,31 @@
+// RUN: %libomp-cxx-compile-and-run
+
+#include <omp.h>
+
+#include <algorithm>
+#include <cassert>
+#include <vector>
+
+int main(int argc, char *argv[]) {
+ const int N = std::min(std::max(std::max(32, 4 * omp_get_max_threads()),
+ 4 * omp_get_num_procs()),
+ std::numeric_limits<int>::max());
+
+ std::vector<int> data(N);
+
+#pragma omp parallel for num_threads(N)
+ for (unsigned i = 0; i < N; ++i) {
+ data[i] = i;
+ }
+
+#pragma omp parallel for num_threads(N + 1)
+ for (unsigned i = 0; i < N; ++i) {
+ data[i] += i;
+ }
+
+ for (unsigned i = 0; i < N; ++i) {
+ assert(data[i] == 2 * i);
+ }
+
+ return 0;
+}
More information about the llvm-branch-commits
mailing list