[libcxx-commits] [libcxx] [libc++] remove yield from atomic::wait (PR #120012)
via libcxx-commits
libcxx-commits at lists.llvm.org
Sun Dec 15 07:53:27 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-libcxx
Author: Hui (huixie90)
<details>
<summary>Changes</summary>
- **[libc++] atomic wait more benchmark**
- **fix compiler error**
- **[libc++] remove yield from atomic::wait**
---
Full diff: https://github.com/llvm/llvm-project/pull/120012.diff
2 Files Affected:
- (modified) libcxx/include/__atomic/atomic_sync.h (+2-4)
- (modified) libcxx/test/benchmarks/atomic_wait.bench.cpp (+275-19)
``````````diff
diff --git a/libcxx/include/__atomic/atomic_sync.h b/libcxx/include/__atomic/atomic_sync.h
index 153001e7b62e30..5ec792e9b9a29c 100644
--- a/libcxx/include/__atomic/atomic_sync.h
+++ b/libcxx/include/__atomic/atomic_sync.h
@@ -108,15 +108,13 @@ struct __atomic_wait_backoff_impl {
_LIBCPP_AVAILABILITY_SYNC
_LIBCPP_HIDE_FROM_ABI bool operator()(chrono::nanoseconds __elapsed) const {
- if (__elapsed > chrono::microseconds(64)) {
+ if (__elapsed > chrono::microseconds(4)) {
auto __contention_address = __waitable_traits::__atomic_contention_address(__a_);
__cxx_contention_t __monitor_val;
if (__update_monitor_val_and_poll(__contention_address, __monitor_val))
return true;
std::__libcpp_atomic_wait(__contention_address, __monitor_val);
- } else if (__elapsed > chrono::microseconds(4))
- __libcpp_thread_yield();
- else {
+ } else {
} // poll
return false;
}
diff --git a/libcxx/test/benchmarks/atomic_wait.bench.cpp b/libcxx/test/benchmarks/atomic_wait.bench.cpp
index d19f5fbed8ad60..b85aec49471729 100644
--- a/libcxx/test/benchmarks/atomic_wait.bench.cpp
+++ b/libcxx/test/benchmarks/atomic_wait.bench.cpp
@@ -12,21 +12,88 @@
#include <cstdint>
#include <numeric>
#include <stop_token>
+#include <pthread.h>
+#include <sched.h>
#include <thread>
+#include <chrono>
+#include <array>
#include "benchmark/benchmark.h"
#include "make_test_thread.h"
using namespace std::chrono_literals;
-void BM_atomic_wait_one_thread_one_atomic_wait(benchmark::State& state) {
- std::atomic<std::uint64_t> a;
- auto thread_func = [&](std::stop_token st) {
+struct HighPrioTask {
+ sched_param param;
+ pthread_attr_t attr_t;
+ pthread_t thread;
+ std::atomic_bool stopped{false};
+
+ HighPrioTask(const HighPrioTask&) = delete;
+
+ HighPrioTask() {
+ pthread_attr_init(&attr_t);
+ pthread_attr_setschedpolicy(&attr_t, SCHED_FIFO);
+ param.sched_priority = sched_get_priority_max(SCHED_FIFO);
+ pthread_attr_setschedparam(&attr_t, ¶m);
+ pthread_attr_setinheritsched(&attr_t, PTHREAD_EXPLICIT_SCHED);
+
+ auto thread_fun = [](void* arg) -> void* {
+ auto* stop = reinterpret_cast<std::atomic_bool*>(arg);
+ while (!stop->load(std::memory_order_relaxed)) {
+ // spin
+ }
+ return nullptr;
+ };
+
+ if (pthread_create(&thread, &attr_t, thread_fun, &stopped) != 0) {
+ throw std::runtime_error("failed to create thread");
+ }
+ }
+
+ ~HighPrioTask() {
+ stopped = true;
+ pthread_attr_destroy(&attr_t);
+ pthread_join(thread, nullptr);
+ }
+};
+
+
+template <std::size_t N>
+struct NumHighPrioTasks {
+ static constexpr auto value = N;
+};
+
+
+struct KeepNotifying {
+ template <class Atomic>
+ static void notify(Atomic& a, std::stop_token st) {
while (!st.stop_requested()) {
a.fetch_add(1, std::memory_order_relaxed);
a.notify_all();
}
- };
+ }
+};
+
+template <std::size_t N>
+struct NotifyEveryNus {
+ template <class Atomic>
+ static void notify(Atomic& a, std::stop_token st) {
+ while (!st.stop_requested()) {
+ auto start = std::chrono::system_clock::now();
+ a.fetch_add(1, std::memory_order_relaxed);
+ a.notify_all();
+ while (std::chrono::system_clock::now() - start < std::chrono::microseconds{N}) {
+ }
+ }
+ }
+};
+
+template <class NotifyPolicy, class NumPrioTasks>
+void BM_1_atomic_1_waiter_1_notifier(benchmark::State& state) {
+ [[maybe_unused]] std::array<HighPrioTask, NumPrioTasks::value> tasks{};
+ std::atomic<std::uint64_t> a;
+ auto thread_func = [&](std::stop_token st) { NotifyPolicy::notify(a, st); };
std::uint64_t total_loop_test_param = state.range(0);
@@ -39,19 +106,34 @@ void BM_atomic_wait_one_thread_one_atomic_wait(benchmark::State& state) {
}
}
}
-BENCHMARK(BM_atomic_wait_one_thread_one_atomic_wait)->RangeMultiplier(2)->Range(1 << 10, 1 << 24);
-void BM_atomic_wait_multi_thread_one_atomic_wait(benchmark::State& state) {
+BENCHMARK(BM_1_atomic_1_waiter_1_notifier<KeepNotifying, NumHighPrioTasks<0>>)->RangeMultiplier(2)->Range(1 << 18, 1 << 20);
+BENCHMARK(BM_1_atomic_1_waiter_1_notifier<NotifyEveryNus<50>, NumHighPrioTasks<0>>)->RangeMultiplier(2)->Range(1 << 12, 1 << 14);
+BENCHMARK(BM_1_atomic_1_waiter_1_notifier<NotifyEveryNus<100>, NumHighPrioTasks<0>>)->RangeMultiplier(2)->Range(1 << 12, 1 << 14);
+
+BENCHMARK(BM_1_atomic_1_waiter_1_notifier<KeepNotifying, NumHighPrioTasks<4>>)->RangeMultiplier(2)->Range(1 << 18, 1 << 20);
+BENCHMARK(BM_1_atomic_1_waiter_1_notifier<NotifyEveryNus<50>, NumHighPrioTasks<4>>)->RangeMultiplier(2)->Range(1 << 12, 1 << 14);
+BENCHMARK(BM_1_atomic_1_waiter_1_notifier<NotifyEveryNus<100>, NumHighPrioTasks<4>>)->RangeMultiplier(2)->Range(1 << 12, 1 << 14);
+
+BENCHMARK(BM_1_atomic_1_waiter_1_notifier<KeepNotifying, NumHighPrioTasks<7>>)->RangeMultiplier(2)->Range(1 << 4, 1 << 6);
+BENCHMARK(BM_1_atomic_1_waiter_1_notifier<NotifyEveryNus<50>, NumHighPrioTasks<7>>)->RangeMultiplier(2)->Range(1 << 3, 1 << 5);
+BENCHMARK(BM_1_atomic_1_waiter_1_notifier<NotifyEveryNus<100>, NumHighPrioTasks<7>>)->RangeMultiplier(2)->Range(1 << 3, 1 << 5);
+
+
+template <std::size_t N>
+struct NumWaitingThreads {
+ static constexpr auto value = N;
+};
+
+template <class NotifyPolicy, class NumWaitingThreads, class NumPrioTasks>
+void BM_1_atomic_multi_waiter_1_notifier(benchmark::State& state) {
+ [[maybe_unused]] std::array<HighPrioTask, NumPrioTasks::value> tasks{};
+
std::atomic<std::uint64_t> a;
- auto notify_func = [&](std::stop_token st) {
- while (!st.stop_requested()) {
- a.fetch_add(1, std::memory_order_relaxed);
- a.notify_all();
- }
- };
+ auto notify_func = [&](std::stop_token st) { NotifyPolicy::notify(a, st); };
std::uint64_t total_loop_test_param = state.range(0);
- constexpr auto num_waiting_threads = 15;
+ constexpr auto num_waiting_threads = NumWaitingThreads::value;
std::vector<std::jthread> wait_threads;
wait_threads.reserve(num_waiting_threads);
@@ -93,17 +175,113 @@ void BM_atomic_wait_multi_thread_one_atomic_wait(benchmark::State& state) {
t.join();
}
}
-BENCHMARK(BM_atomic_wait_multi_thread_one_atomic_wait)->RangeMultiplier(2)->Range(1 << 10, 1 << 20);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<3>, NumHighPrioTasks<0>>)
+ ->RangeMultiplier(2)
+ ->Range(1 << 14, 1 << 16);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<7>, NumHighPrioTasks<0>>)
+ ->RangeMultiplier(2)
+ ->Range(1 << 12, 1 << 14);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<15>, NumHighPrioTasks<0>>)
+ ->RangeMultiplier(2)
+ ->Range(1 << 10, 1 << 12);
+
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<3>, NumHighPrioTasks<0>>)
+ ->RangeMultiplier(2)
+ ->Range(1 << 10, 1 << 12);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<7>, NumHighPrioTasks<0>>)
+ ->RangeMultiplier(2)
+ ->Range(1 << 8, 1 << 10);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<15>, NumHighPrioTasks<0>>)
+ ->RangeMultiplier(2)
+ ->Range(1 << 6, 1 << 8);
+
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<3>, NumHighPrioTasks<0>>)
+ ->RangeMultiplier(2)
+ ->Range(1 << 8, 1 << 10);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<7>, NumHighPrioTasks<0>>)
+ ->RangeMultiplier(2)
+ ->Range(1 << 6, 1 << 8);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<15>, NumHighPrioTasks<0>>)
+ ->RangeMultiplier(2)
+ ->Range(1 << 4, 1 << 6);
+
+
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<3>, NumHighPrioTasks<4>>)
+ ->RangeMultiplier(2)
+ ->Range(1 << 8, 1 << 10);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<7>, NumHighPrioTasks<4>>)
+ ->RangeMultiplier(2)
+ ->Range(1 << 6, 1 << 8);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<15>, NumHighPrioTasks<4>>)
+ ->RangeMultiplier(2)
+ ->Range(1 << 4, 1 << 6);
+
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<3>, NumHighPrioTasks<4>>)
+ ->RangeMultiplier(2)
+ ->Range(1 << 8, 1 << 10);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<7>, NumHighPrioTasks<4>>)
+ ->RangeMultiplier(2)
+ ->Range(1 << 6, 1 << 8);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<15>, NumHighPrioTasks<4>>)
+ ->RangeMultiplier(2)
+ ->Range(1 << 4, 1 << 6);
-void BM_atomic_wait_multi_thread_wait_different_atomics(benchmark::State& state) {
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<3>, NumHighPrioTasks<4>>)
+ ->RangeMultiplier(2)
+ ->Range(1 << 8, 1 << 10);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<7>, NumHighPrioTasks<4>>)
+ ->RangeMultiplier(2)
+ ->Range(1 << 6, 1 << 8);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<15>, NumHighPrioTasks<4>>)
+ ->RangeMultiplier(2)
+ ->Range(1 << 4, 1 << 6);
+
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<3>, NumHighPrioTasks<7>>)
+ ->RangeMultiplier(2)
+ ->Range(1 << 4, 1 << 6);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<7>, NumHighPrioTasks<7>>)
+ ->RangeMultiplier(2)
+ ->Range(1 << 3, 1 << 5);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<15>, NumHighPrioTasks<7>>)
+ ->RangeMultiplier(2)
+ ->Range(1 << 2, 1 << 4);
+
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<3>, NumHighPrioTasks<7>>)
+ ->RangeMultiplier(2)
+ ->Range(1 << 3, 1 << 5);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<7>, NumHighPrioTasks<7>>)
+ ->RangeMultiplier(2)
+ ->Range(1 << 2, 1 << 4);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<15>, NumHighPrioTasks<7>>)
+ ->RangeMultiplier(2)
+ ->Range(1 << 1, 1 << 3);
+
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<3>, NumHighPrioTasks<7>>)
+ ->RangeMultiplier(2)
+ ->Range(1 << 3, 1 << 5);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<7>, NumHighPrioTasks<7>>)
+ ->RangeMultiplier(2)
+ ->Range(1 << 2, 1 << 4);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<15>, NumHighPrioTasks<7>>)
+ ->RangeMultiplier(2)
+ ->Range(1 << 1, 1 << 3);
+
+
+template <std::size_t N>
+struct NumberOfAtomics {
+ static constexpr auto value = N;
+};
+
+template <class NotifyPolicy, class NumberOfAtomics, class NumPrioTasks>
+void BM_N_atomics_N_waiter_N_notifier(benchmark::State& state) {
+ [[maybe_unused]] std::array<HighPrioTask, NumPrioTasks::value> tasks{};
const std::uint64_t total_loop_test_param = state.range(0);
- constexpr std::uint64_t num_atomics = 7;
+ constexpr std::uint64_t num_atomics = NumberOfAtomics::value;
std::vector<std::atomic<std::uint64_t>> atomics(num_atomics);
auto notify_func = [&](std::stop_token st, size_t idx) {
while (!st.stop_requested()) {
- atomics[idx].fetch_add(1, std::memory_order_relaxed);
- atomics[idx].notify_all();
+ NotifyPolicy::notify(atomics[idx], st);
}
};
@@ -154,6 +332,84 @@ void BM_atomic_wait_multi_thread_wait_different_atomics(benchmark::State& state)
t.join();
}
}
-BENCHMARK(BM_atomic_wait_multi_thread_wait_different_atomics)->RangeMultiplier(2)->Range(1 << 10, 1 << 20);
+
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<2>, NumHighPrioTasks<0>>)
+ ->RangeMultiplier(2)
+ ->Range(1 << 12, 1 << 14);
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<3>, NumHighPrioTasks<0>>)
+ ->RangeMultiplier(2)
+ ->Range(1 << 10, 1 << 12);
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<5>, NumHighPrioTasks<0>>)
+ ->RangeMultiplier(2)
+ ->Range(1 << 10, 1 << 12);
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<7>, NumHighPrioTasks<0>>)
+ ->RangeMultiplier(2)
+ ->Range(1 << 8, 1 << 10);
+
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<2>, NumHighPrioTasks<0>>)
+ ->RangeMultiplier(2)
+ ->Range(1 << 10, 1 << 12);
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<3>, NumHighPrioTasks<0>>)
+ ->RangeMultiplier(2)
+ ->Range(1 << 8, 1 << 10);
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<5>, NumHighPrioTasks<0>>)
+ ->RangeMultiplier(2)
+ ->Range(1 << 8, 1 << 10);
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<7>, NumHighPrioTasks<0>>)
+ ->RangeMultiplier(2)
+ ->Range(1 << 6, 1 << 8);
+
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<2>, NumHighPrioTasks<0>>)
+ ->RangeMultiplier(2)
+ ->Range(1 << 8, 1 << 10);
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<3>, NumHighPrioTasks<0>>)
+ ->RangeMultiplier(2)
+ ->Range(1 << 8, 1 << 10);
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<5>, NumHighPrioTasks<0>>)
+ ->RangeMultiplier(2)
+ ->Range(1 << 7, 1 << 9);
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<7>, NumHighPrioTasks<0>>)
+ ->RangeMultiplier(2)
+ ->Range(1 << 6, 1 << 8);
+
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<2>, NumHighPrioTasks<4>>)
+ ->RangeMultiplier(2)
+ ->Range(1 << 7, 1 << 9);
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<3>, NumHighPrioTasks<4>>)
+ ->RangeMultiplier(2)
+ ->Range(1 << 7, 1 << 9);
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<5>, NumHighPrioTasks<4>>)
+ ->RangeMultiplier(2)
+ ->Range(1 << 6, 1 << 8);
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<7>, NumHighPrioTasks<4>>)
+ ->RangeMultiplier(2)
+ ->Range(1 << 4, 1 << 6);
+
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<2>, NumHighPrioTasks<4>>)
+ ->RangeMultiplier(2)
+ ->Range(1 << 7, 1 << 9);
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<3>, NumHighPrioTasks<4>>)
+ ->RangeMultiplier(2)
+ ->Range(1 << 7, 1 << 9);
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<5>, NumHighPrioTasks<4>>)
+ ->RangeMultiplier(2)
+ ->Range(1 << 5, 1 << 7);
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<7>, NumHighPrioTasks<4>>)
+ ->RangeMultiplier(2)
+ ->Range(1 << 3, 1 << 5);
+
+
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<2>, NumHighPrioTasks<4>>)
+ ->RangeMultiplier(2)
+ ->Range(1 << 6, 1 << 8);
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<3>, NumHighPrioTasks<4>>)
+ ->RangeMultiplier(2)
+ ->Range(1 << 6, 1 << 8);
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<5>, NumHighPrioTasks<4>>)
+ ->RangeMultiplier(2)
+ ->Range(1 << 5, 1 << 7);
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<7>, NumHighPrioTasks<4>>)
+ ->RangeMultiplier(2)
+ ->Range(1 << 3, 1 << 5);
BENCHMARK_MAIN();
``````````
</details>
https://github.com/llvm/llvm-project/pull/120012
More information about the libcxx-commits
mailing list