[libcxx-commits] [libcxx] [libc++] remove yield from atomic::wait (PR #120012)

via libcxx-commits libcxx-commits at lists.llvm.org
Fri Dec 20 05:49:10 PST 2024


https://github.com/huixie90 updated https://github.com/llvm/llvm-project/pull/120012

>From ad859c220eee4beb9aed76a8a0aaf056559f3bef Mon Sep 17 00:00:00 2001
From: Hui <hui.xie0621 at gmail.com>
Date: Thu, 28 Mar 2024 20:52:29 +0000
Subject: [PATCH 1/5] [libc++] atomic wait more benchmark

---
 libcxx/test/benchmarks/atomic_wait.bench.cpp | 292 +++++++++++++++++--
 1 file changed, 273 insertions(+), 19 deletions(-)

diff --git a/libcxx/test/benchmarks/atomic_wait.bench.cpp b/libcxx/test/benchmarks/atomic_wait.bench.cpp
index d19f5fbed8ad60..fc872d2a1a8693 100644
--- a/libcxx/test/benchmarks/atomic_wait.bench.cpp
+++ b/libcxx/test/benchmarks/atomic_wait.bench.cpp
@@ -12,6 +12,8 @@
 #include <cstdint>
 #include <numeric>
 #include <stop_token>
+#include <pthread.h>
+#include <sched.h>
 #include <thread>
 
 #include "benchmark/benchmark.h"
@@ -19,14 +21,77 @@
 
 using namespace std::chrono_literals;
 
-void BM_atomic_wait_one_thread_one_atomic_wait(benchmark::State& state) {
-  std::atomic<std::uint64_t> a;
-  auto thread_func = [&](std::stop_token st) {
+struct HighPrioTask {
+  sched_param param;
+  pthread_attr_t attr_t;
+  pthread_t thread;
+  std::atomic_bool stopped{false};
+
+  HighPrioTask(const HighPrioTask&) = delete;
+
+  HighPrioTask() {
+    pthread_attr_init(&attr_t);
+    pthread_attr_setschedpolicy(&attr_t, SCHED_FIFO);
+    param.sched_priority = sched_get_priority_max(SCHED_FIFO);
+    pthread_attr_setschedparam(&attr_t, &param);
+    pthread_attr_setinheritsched(&attr_t, PTHREAD_EXPLICIT_SCHED);
+
+    auto thread_fun = [](void* arg) -> void* {
+      auto* stop = reinterpret_cast<std::atomic_bool*>(arg);
+      while (!stop->load(std::memory_order_relaxed)) {
+        // spin
+      }
+      return nullptr;
+    };
+
+    if (pthread_create(&thread, &attr_t, thread_fun, &stopped) != 0) {
+      throw std::runtime_error("failed to create thread");
+    }
+  }
+
+  ~HighPrioTask() {
+    stopped = true;
+    pthread_attr_destroy(&attr_t);
+    pthread_join(thread, nullptr);
+  }
+};
+
+
+template <std::size_t N>
+struct NumHighPrioTasks {
+  static constexpr auto value = N;
+};
+
+
+struct KeepNotifying {
+  template <class Atomic>
+  static void notify(Atomic& a, std::stop_token st) {
     while (!st.stop_requested()) {
       a.fetch_add(1, std::memory_order_relaxed);
       a.notify_all();
     }
-  };
+  }
+};
+
+template <std::size_t N>
+struct NotifyEveryNus {
+  template <class Atomic>
+  static void notify(Atomic& a, std::stop_token st) {
+    while (!st.stop_requested()) {
+      auto start = std::chrono::system_clock::now();
+      a.fetch_add(1, std::memory_order_relaxed);
+      a.notify_all();
+      while (std::chrono::system_clock::now() - start < std::chrono::microseconds{N}) {
+      }
+    }
+  }
+};
+
+template <class NotifyPolicy, class NumPrioTasks>
+void BM_1_atomic_1_waiter_1_notifier(benchmark::State& state) {
+  [[maybe_unused]] std::array<HighPrioTask, NumPrioTasks::value> tasks{};
+  std::atomic<std::uint64_t> a;
+  auto thread_func = [&](std::stop_token st) { NotifyPolicy::notify(a, st); };
 
   std::uint64_t total_loop_test_param = state.range(0);
 
@@ -39,19 +104,34 @@ void BM_atomic_wait_one_thread_one_atomic_wait(benchmark::State& state) {
     }
   }
 }
-BENCHMARK(BM_atomic_wait_one_thread_one_atomic_wait)->RangeMultiplier(2)->Range(1 << 10, 1 << 24);
 
-void BM_atomic_wait_multi_thread_one_atomic_wait(benchmark::State& state) {
+BENCHMARK(BM_1_atomic_1_waiter_1_notifier<KeepNotifying, NumHighPrioTasks<0>>)->RangeMultiplier(2)->Range(1 << 10, 1 << 24);
+BENCHMARK(BM_1_atomic_1_waiter_1_notifier<NotifyEveryNus<50>, NumHighPrioTasks<0>>)->RangeMultiplier(2)->Range(1 << 10, 1 << 16);
+BENCHMARK(BM_1_atomic_1_waiter_1_notifier<NotifyEveryNus<100>, NumHighPrioTasks<0>>)->RangeMultiplier(2)->Range(1 << 10, 1 << 16);
+
+BENCHMARK(BM_1_atomic_1_waiter_1_notifier<KeepNotifying, NumHighPrioTasks<4>>)->RangeMultiplier(2)->Range(1 << 10, 1 << 24);
+BENCHMARK(BM_1_atomic_1_waiter_1_notifier<NotifyEveryNus<50>, NumHighPrioTasks<4>>)->RangeMultiplier(2)->Range(1 << 10, 1 << 16);
+BENCHMARK(BM_1_atomic_1_waiter_1_notifier<NotifyEveryNus<100>, NumHighPrioTasks<4>>)->RangeMultiplier(2)->Range(1 << 10, 1 << 16);
+
+BENCHMARK(BM_1_atomic_1_waiter_1_notifier<KeepNotifying, NumHighPrioTasks<7>>)->RangeMultiplier(2)->Range(1 << 4, 1 << 8);
+BENCHMARK(BM_1_atomic_1_waiter_1_notifier<NotifyEveryNus<50>, NumHighPrioTasks<7>>)->RangeMultiplier(2)->Range(1 << 4, 1 << 8);
+BENCHMARK(BM_1_atomic_1_waiter_1_notifier<NotifyEveryNus<100>, NumHighPrioTasks<7>>)->RangeMultiplier(2)->Range(1 << 4, 1 << 8);
+
+
+template <std::size_t N>
+struct NumWaitingThreads {
+  static constexpr auto value = N;
+};
+
+template <class NotifyPolicy, class NumWaitingThreads, class NumPrioTasks>
+void BM_1_atomic_multi_waiter_1_notifier(benchmark::State& state) {
+  [[maybe_unused]] std::array<HighPrioTask, NumPrioTasks::value> tasks{};
+
   std::atomic<std::uint64_t> a;
-  auto notify_func = [&](std::stop_token st) {
-    while (!st.stop_requested()) {
-      a.fetch_add(1, std::memory_order_relaxed);
-      a.notify_all();
-    }
-  };
+  auto notify_func = [&](std::stop_token st) { NotifyPolicy::notify(a, st); };
 
   std::uint64_t total_loop_test_param = state.range(0);
-  constexpr auto num_waiting_threads  = 15;
+  constexpr auto num_waiting_threads  = NumWaitingThreads::value;
   std::vector<std::jthread> wait_threads;
   wait_threads.reserve(num_waiting_threads);
 
@@ -93,17 +173,113 @@ void BM_atomic_wait_multi_thread_one_atomic_wait(benchmark::State& state) {
     t.join();
   }
 }
-BENCHMARK(BM_atomic_wait_multi_thread_one_atomic_wait)->RangeMultiplier(2)->Range(1 << 10, 1 << 20);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<3>, NumHighPrioTasks<0>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 10, 1 << 20);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<7>, NumHighPrioTasks<0>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 10, 1 << 20);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<15>, NumHighPrioTasks<0>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 10, 1 << 20);
+
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<3>, NumHighPrioTasks<0>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 10, 1 << 16);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<7>, NumHighPrioTasks<0>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 10, 1 << 16);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<15>, NumHighPrioTasks<0>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 10, 1 << 16);
+
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<3>, NumHighPrioTasks<0>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 8, 1 << 14);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<7>, NumHighPrioTasks<0>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 8, 1 << 14);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<15>, NumHighPrioTasks<0>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 8, 1 << 14);
+
+
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<3>, NumHighPrioTasks<4>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 10, 1 << 18);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<7>, NumHighPrioTasks<4>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 10, 1 << 18);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<15>, NumHighPrioTasks<4>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 10, 1 << 18);
+
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<3>, NumHighPrioTasks<4>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 10, 1 << 14);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<7>, NumHighPrioTasks<4>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 10, 1 << 14);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<15>, NumHighPrioTasks<4>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 10, 1 << 14);
 
-void BM_atomic_wait_multi_thread_wait_different_atomics(benchmark::State& state) {
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<3>, NumHighPrioTasks<4>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 8, 1 << 14);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<7>, NumHighPrioTasks<4>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 8, 1 << 14);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<15>, NumHighPrioTasks<4>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 8, 1 << 14);
+
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<3>, NumHighPrioTasks<7>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 4, 1 << 8);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<7>, NumHighPrioTasks<7>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 4, 1 << 8);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<15>, NumHighPrioTasks<7>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 4, 1 << 8);
+
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<3>, NumHighPrioTasks<7>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 4, 1 << 8);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<7>, NumHighPrioTasks<7>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 4, 1 << 8);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<15>, NumHighPrioTasks<7>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 4, 1 << 8);
+
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<3>, NumHighPrioTasks<7>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 4, 1 << 8);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<7>, NumHighPrioTasks<7>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 4, 1 << 8);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<15>, NumHighPrioTasks<7>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 4, 1 << 8);
+
+
+template <std::size_t N>
+struct NumberOfAtomics {
+  static constexpr auto value = N;
+};
+
+template <class NotifyPolicy, class NumberOfAtomics, class NumPrioTasks>
+void BM_N_atomics_N_waiter_N_notifier(benchmark::State& state) {
+  [[maybe_unused]] std::array<HighPrioTask, NumPrioTasks::value> tasks{};
   const std::uint64_t total_loop_test_param = state.range(0);
-  constexpr std::uint64_t num_atomics       = 7;
+  constexpr std::uint64_t num_atomics       = NumberOfAtomics::value;
   std::vector<std::atomic<std::uint64_t>> atomics(num_atomics);
 
   auto notify_func = [&](std::stop_token st, size_t idx) {
     while (!st.stop_requested()) {
-      atomics[idx].fetch_add(1, std::memory_order_relaxed);
-      atomics[idx].notify_all();
+      NotifyPolicy::notify(atomics[idx], st);
     }
   };
 
@@ -154,6 +330,84 @@ void BM_atomic_wait_multi_thread_wait_different_atomics(benchmark::State& state)
     t.join();
   }
 }
-BENCHMARK(BM_atomic_wait_multi_thread_wait_different_atomics)->RangeMultiplier(2)->Range(1 << 10, 1 << 20);
+
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<2>, NumHighPrioTasks<0>>)
+     ->RangeMultiplier(2)
+     ->Range(1 << 10, 1 << 20);
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<3>, NumHighPrioTasks<0>>)
+     ->RangeMultiplier(2)
+     ->Range(1 << 10, 1 << 20);
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<5>, NumHighPrioTasks<0>>)
+     ->RangeMultiplier(2)
+     ->Range(1 << 10, 1 << 20);
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<7>, NumHighPrioTasks<0>>)
+     ->RangeMultiplier(2)
+     ->Range(1 << 10, 1 << 20);
+
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<2>, NumHighPrioTasks<0>>)
+     ->RangeMultiplier(2)
+     ->Range(1 << 10, 1 << 16);
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<3>, NumHighPrioTasks<0>>)
+     ->RangeMultiplier(2)
+     ->Range(1 << 10, 1 << 16);
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<5>, NumHighPrioTasks<0>>)
+     ->RangeMultiplier(2)
+     ->Range(1 << 10, 1 << 16);
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<7>, NumHighPrioTasks<0>>)
+     ->RangeMultiplier(2)
+     ->Range(1 << 10, 1 << 16);
+
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<2>, NumHighPrioTasks<0>>)
+     ->RangeMultiplier(2)
+     ->Range(1 << 8, 1 << 14);
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<3>, NumHighPrioTasks<0>>)
+     ->RangeMultiplier(2)
+     ->Range(1 << 8, 1 << 14);
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<5>, NumHighPrioTasks<0>>)
+     ->RangeMultiplier(2)
+     ->Range(1 << 8, 1 << 14);
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<7>, NumHighPrioTasks<0>>)
+     ->RangeMultiplier(2)
+     ->Range(1 << 8, 1 << 14);
+
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<2>, NumHighPrioTasks<4>>)
+     ->RangeMultiplier(2)
+     ->Range(1 << 10, 1 << 20);
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<3>, NumHighPrioTasks<4>>)
+     ->RangeMultiplier(2)
+     ->Range(1 << 10, 1 << 20);
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<5>, NumHighPrioTasks<4>>)
+     ->RangeMultiplier(2)
+     ->Range(1 << 10, 1 << 20);
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<7>, NumHighPrioTasks<4>>)
+     ->RangeMultiplier(2)
+     ->Range(1 << 10, 1 << 20);
+
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<2>, NumHighPrioTasks<4>>)
+     ->RangeMultiplier(2)
+     ->Range(1 << 10, 1 << 16);
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<3>, NumHighPrioTasks<4>>)
+     ->RangeMultiplier(2)
+     ->Range(1 << 10, 1 << 16);
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<5>, NumHighPrioTasks<4>>)
+     ->RangeMultiplier(2)
+     ->Range(1 << 10, 1 << 16);
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<7>, NumHighPrioTasks<4>>)
+     ->RangeMultiplier(2)
+     ->Range(1 << 10, 1 << 16);
+
+
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<2>, NumHighPrioTasks<4>>)
+     ->RangeMultiplier(2)
+     ->Range(1 << 8, 1 << 14);
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<3>, NumHighPrioTasks<4>>)
+     ->RangeMultiplier(2)
+     ->Range(1 << 8, 1 << 14);
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<5>, NumHighPrioTasks<4>>)
+     ->RangeMultiplier(2)
+     ->Range(1 << 6, 1 << 10);
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<7>, NumHighPrioTasks<4>>)
+     ->RangeMultiplier(2)
+     ->Range(1 << 4, 1 << 8);
 
 BENCHMARK_MAIN();

>From a87c430909886a8b69c26c19a28a1fb4cb1ec9d3 Mon Sep 17 00:00:00 2001
From: Hui Xie <huixie at Huis-MacBook-Pro.local>
Date: Sat, 23 Nov 2024 16:55:39 +0000
Subject: [PATCH 2/5] fix compiler error

---
 libcxx/test/benchmarks/atomic_wait.bench.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libcxx/test/benchmarks/atomic_wait.bench.cpp b/libcxx/test/benchmarks/atomic_wait.bench.cpp
index fc872d2a1a8693..d6de47b0e46707 100644
--- a/libcxx/test/benchmarks/atomic_wait.bench.cpp
+++ b/libcxx/test/benchmarks/atomic_wait.bench.cpp
@@ -15,6 +15,8 @@
 #include <pthread.h>
 #include <sched.h>
 #include <thread>
+#include <chrono>
+#include <array>
 
 #include "benchmark/benchmark.h"
 #include "make_test_thread.h"

>From 552425d49eaa91c31b271ed2cf532b23076b9157 Mon Sep 17 00:00:00 2001
From: Hui Xie <huixie at Mac.broadband>
Date: Sun, 15 Dec 2024 15:51:31 +0000
Subject: [PATCH 3/5] [libc++] remove yield from atomic::wait

---
 libcxx/include/__atomic/atomic_sync.h        |   6 +-
 libcxx/test/benchmarks/atomic_wait.bench.cpp | 120 +++++++++----------
 2 files changed, 62 insertions(+), 64 deletions(-)

diff --git a/libcxx/include/__atomic/atomic_sync.h b/libcxx/include/__atomic/atomic_sync.h
index 153001e7b62e30..5ec792e9b9a29c 100644
--- a/libcxx/include/__atomic/atomic_sync.h
+++ b/libcxx/include/__atomic/atomic_sync.h
@@ -108,15 +108,13 @@ struct __atomic_wait_backoff_impl {
 
   _LIBCPP_AVAILABILITY_SYNC
   _LIBCPP_HIDE_FROM_ABI bool operator()(chrono::nanoseconds __elapsed) const {
-    if (__elapsed > chrono::microseconds(64)) {
+    if (__elapsed > chrono::microseconds(4)) {
       auto __contention_address = __waitable_traits::__atomic_contention_address(__a_);
       __cxx_contention_t __monitor_val;
       if (__update_monitor_val_and_poll(__contention_address, __monitor_val))
         return true;
       std::__libcpp_atomic_wait(__contention_address, __monitor_val);
-    } else if (__elapsed > chrono::microseconds(4))
-      __libcpp_thread_yield();
-    else {
+    } else {
     } // poll
     return false;
   }
diff --git a/libcxx/test/benchmarks/atomic_wait.bench.cpp b/libcxx/test/benchmarks/atomic_wait.bench.cpp
index d6de47b0e46707..b85aec49471729 100644
--- a/libcxx/test/benchmarks/atomic_wait.bench.cpp
+++ b/libcxx/test/benchmarks/atomic_wait.bench.cpp
@@ -107,17 +107,17 @@ void BM_1_atomic_1_waiter_1_notifier(benchmark::State& state) {
   }
 }
 
-BENCHMARK(BM_1_atomic_1_waiter_1_notifier<KeepNotifying, NumHighPrioTasks<0>>)->RangeMultiplier(2)->Range(1 << 10, 1 << 24);
-BENCHMARK(BM_1_atomic_1_waiter_1_notifier<NotifyEveryNus<50>, NumHighPrioTasks<0>>)->RangeMultiplier(2)->Range(1 << 10, 1 << 16);
-BENCHMARK(BM_1_atomic_1_waiter_1_notifier<NotifyEveryNus<100>, NumHighPrioTasks<0>>)->RangeMultiplier(2)->Range(1 << 10, 1 << 16);
+BENCHMARK(BM_1_atomic_1_waiter_1_notifier<KeepNotifying, NumHighPrioTasks<0>>)->RangeMultiplier(2)->Range(1 << 18, 1 << 20);
+BENCHMARK(BM_1_atomic_1_waiter_1_notifier<NotifyEveryNus<50>, NumHighPrioTasks<0>>)->RangeMultiplier(2)->Range(1 << 12, 1 << 14);
+BENCHMARK(BM_1_atomic_1_waiter_1_notifier<NotifyEveryNus<100>, NumHighPrioTasks<0>>)->RangeMultiplier(2)->Range(1 << 12, 1 << 14);
 
-BENCHMARK(BM_1_atomic_1_waiter_1_notifier<KeepNotifying, NumHighPrioTasks<4>>)->RangeMultiplier(2)->Range(1 << 10, 1 << 24);
-BENCHMARK(BM_1_atomic_1_waiter_1_notifier<NotifyEveryNus<50>, NumHighPrioTasks<4>>)->RangeMultiplier(2)->Range(1 << 10, 1 << 16);
-BENCHMARK(BM_1_atomic_1_waiter_1_notifier<NotifyEveryNus<100>, NumHighPrioTasks<4>>)->RangeMultiplier(2)->Range(1 << 10, 1 << 16);
+BENCHMARK(BM_1_atomic_1_waiter_1_notifier<KeepNotifying, NumHighPrioTasks<4>>)->RangeMultiplier(2)->Range(1 << 18, 1 << 20);
+BENCHMARK(BM_1_atomic_1_waiter_1_notifier<NotifyEveryNus<50>, NumHighPrioTasks<4>>)->RangeMultiplier(2)->Range(1 << 12, 1 << 14);
+BENCHMARK(BM_1_atomic_1_waiter_1_notifier<NotifyEveryNus<100>, NumHighPrioTasks<4>>)->RangeMultiplier(2)->Range(1 << 12, 1 << 14);
 
-BENCHMARK(BM_1_atomic_1_waiter_1_notifier<KeepNotifying, NumHighPrioTasks<7>>)->RangeMultiplier(2)->Range(1 << 4, 1 << 8);
-BENCHMARK(BM_1_atomic_1_waiter_1_notifier<NotifyEveryNus<50>, NumHighPrioTasks<7>>)->RangeMultiplier(2)->Range(1 << 4, 1 << 8);
-BENCHMARK(BM_1_atomic_1_waiter_1_notifier<NotifyEveryNus<100>, NumHighPrioTasks<7>>)->RangeMultiplier(2)->Range(1 << 4, 1 << 8);
+BENCHMARK(BM_1_atomic_1_waiter_1_notifier<KeepNotifying, NumHighPrioTasks<7>>)->RangeMultiplier(2)->Range(1 << 4, 1 << 6);
+BENCHMARK(BM_1_atomic_1_waiter_1_notifier<NotifyEveryNus<50>, NumHighPrioTasks<7>>)->RangeMultiplier(2)->Range(1 << 3, 1 << 5);
+BENCHMARK(BM_1_atomic_1_waiter_1_notifier<NotifyEveryNus<100>, NumHighPrioTasks<7>>)->RangeMultiplier(2)->Range(1 << 3, 1 << 5);
 
 
 template <std::size_t N>
@@ -177,94 +177,94 @@ void BM_1_atomic_multi_waiter_1_notifier(benchmark::State& state) {
 }
 BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<3>, NumHighPrioTasks<0>>)
     ->RangeMultiplier(2)
-    ->Range(1 << 10, 1 << 20);
+    ->Range(1 << 14, 1 << 16);
 BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<7>, NumHighPrioTasks<0>>)
     ->RangeMultiplier(2)
-    ->Range(1 << 10, 1 << 20);
+    ->Range(1 << 12, 1 << 14);
 BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<15>, NumHighPrioTasks<0>>)
     ->RangeMultiplier(2)
-    ->Range(1 << 10, 1 << 20);
+    ->Range(1 << 10, 1 << 12);
 
 BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<3>, NumHighPrioTasks<0>>)
     ->RangeMultiplier(2)
-    ->Range(1 << 10, 1 << 16);
+    ->Range(1 << 10, 1 << 12);
 BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<7>, NumHighPrioTasks<0>>)
     ->RangeMultiplier(2)
-    ->Range(1 << 10, 1 << 16);
+    ->Range(1 << 8, 1 << 10);
 BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<15>, NumHighPrioTasks<0>>)
     ->RangeMultiplier(2)
-    ->Range(1 << 10, 1 << 16);
+    ->Range(1 << 6, 1 << 8);
 
 BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<3>, NumHighPrioTasks<0>>)
     ->RangeMultiplier(2)
-    ->Range(1 << 8, 1 << 14);
+    ->Range(1 << 8, 1 << 10);
 BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<7>, NumHighPrioTasks<0>>)
     ->RangeMultiplier(2)
-    ->Range(1 << 8, 1 << 14);
+    ->Range(1 << 6, 1 << 8);
 BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<15>, NumHighPrioTasks<0>>)
     ->RangeMultiplier(2)
-    ->Range(1 << 8, 1 << 14);
+    ->Range(1 << 4, 1 << 6);
 
 
 BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<3>, NumHighPrioTasks<4>>)
     ->RangeMultiplier(2)
-    ->Range(1 << 10, 1 << 18);
+    ->Range(1 << 8, 1 << 10);
 BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<7>, NumHighPrioTasks<4>>)
     ->RangeMultiplier(2)
-    ->Range(1 << 10, 1 << 18);
+    ->Range(1 << 6, 1 << 8);
 BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<15>, NumHighPrioTasks<4>>)
     ->RangeMultiplier(2)
-    ->Range(1 << 10, 1 << 18);
+    ->Range(1 << 4, 1 << 6);
 
 BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<3>, NumHighPrioTasks<4>>)
     ->RangeMultiplier(2)
-    ->Range(1 << 10, 1 << 14);
+    ->Range(1 << 8, 1 << 10);
 BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<7>, NumHighPrioTasks<4>>)
     ->RangeMultiplier(2)
-    ->Range(1 << 10, 1 << 14);
+    ->Range(1 << 6, 1 << 8);
 BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<15>, NumHighPrioTasks<4>>)
     ->RangeMultiplier(2)
-    ->Range(1 << 10, 1 << 14);
+    ->Range(1 << 4, 1 << 6);
 
 BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<3>, NumHighPrioTasks<4>>)
     ->RangeMultiplier(2)
-    ->Range(1 << 8, 1 << 14);
+    ->Range(1 << 8, 1 << 10);
 BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<7>, NumHighPrioTasks<4>>)
     ->RangeMultiplier(2)
-    ->Range(1 << 8, 1 << 14);
+    ->Range(1 << 6, 1 << 8);
 BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<15>, NumHighPrioTasks<4>>)
     ->RangeMultiplier(2)
-    ->Range(1 << 8, 1 << 14);
+    ->Range(1 << 4, 1 << 6);
 
 BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<3>, NumHighPrioTasks<7>>)
     ->RangeMultiplier(2)
-    ->Range(1 << 4, 1 << 8);
+    ->Range(1 << 4, 1 << 6);
 BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<7>, NumHighPrioTasks<7>>)
     ->RangeMultiplier(2)
-    ->Range(1 << 4, 1 << 8);
+    ->Range(1 << 3, 1 << 5);
 BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<15>, NumHighPrioTasks<7>>)
     ->RangeMultiplier(2)
-    ->Range(1 << 4, 1 << 8);
+    ->Range(1 << 2, 1 << 4);
 
 BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<3>, NumHighPrioTasks<7>>)
     ->RangeMultiplier(2)
-    ->Range(1 << 4, 1 << 8);
+    ->Range(1 << 3, 1 << 5);
 BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<7>, NumHighPrioTasks<7>>)
     ->RangeMultiplier(2)
-    ->Range(1 << 4, 1 << 8);
+    ->Range(1 << 2, 1 << 4);
 BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<15>, NumHighPrioTasks<7>>)
     ->RangeMultiplier(2)
-    ->Range(1 << 4, 1 << 8);
+    ->Range(1 << 1, 1 << 3);
 
 BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<3>, NumHighPrioTasks<7>>)
     ->RangeMultiplier(2)
-    ->Range(1 << 4, 1 << 8);
+    ->Range(1 << 3, 1 << 5);
 BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<7>, NumHighPrioTasks<7>>)
     ->RangeMultiplier(2)
-    ->Range(1 << 4, 1 << 8);
+    ->Range(1 << 2, 1 << 4);
 BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<15>, NumHighPrioTasks<7>>)
     ->RangeMultiplier(2)
-    ->Range(1 << 4, 1 << 8);
+    ->Range(1 << 1, 1 << 3);
 
 
 template <std::size_t N>
@@ -335,81 +335,81 @@ void BM_N_atomics_N_waiter_N_notifier(benchmark::State& state) {
 
  BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<2>, NumHighPrioTasks<0>>)
      ->RangeMultiplier(2)
-     ->Range(1 << 10, 1 << 20);
+     ->Range(1 << 12, 1 << 14);
  BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<3>, NumHighPrioTasks<0>>)
      ->RangeMultiplier(2)
-     ->Range(1 << 10, 1 << 20);
+     ->Range(1 << 10, 1 << 12);
  BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<5>, NumHighPrioTasks<0>>)
      ->RangeMultiplier(2)
-     ->Range(1 << 10, 1 << 20);
+     ->Range(1 << 10, 1 << 12);
  BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<7>, NumHighPrioTasks<0>>)
      ->RangeMultiplier(2)
-     ->Range(1 << 10, 1 << 20);
+     ->Range(1 << 8, 1 << 10);
 
  BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<2>, NumHighPrioTasks<0>>)
      ->RangeMultiplier(2)
-     ->Range(1 << 10, 1 << 16);
+     ->Range(1 << 10, 1 << 12);
  BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<3>, NumHighPrioTasks<0>>)
      ->RangeMultiplier(2)
-     ->Range(1 << 10, 1 << 16);
+     ->Range(1 << 8, 1 << 10);
  BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<5>, NumHighPrioTasks<0>>)
      ->RangeMultiplier(2)
-     ->Range(1 << 10, 1 << 16);
+     ->Range(1 << 8, 1 << 10);
  BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<7>, NumHighPrioTasks<0>>)
      ->RangeMultiplier(2)
-     ->Range(1 << 10, 1 << 16);
+     ->Range(1 << 6, 1 << 8);
 
  BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<2>, NumHighPrioTasks<0>>)
      ->RangeMultiplier(2)
-     ->Range(1 << 8, 1 << 14);
+     ->Range(1 << 8, 1 << 10);
  BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<3>, NumHighPrioTasks<0>>)
      ->RangeMultiplier(2)
-     ->Range(1 << 8, 1 << 14);
+     ->Range(1 << 8, 1 << 10);
  BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<5>, NumHighPrioTasks<0>>)
      ->RangeMultiplier(2)
-     ->Range(1 << 8, 1 << 14);
+     ->Range(1 << 7, 1 << 9);
  BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<7>, NumHighPrioTasks<0>>)
      ->RangeMultiplier(2)
-     ->Range(1 << 8, 1 << 14);
+     ->Range(1 << 6, 1 << 8);
 
  BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<2>, NumHighPrioTasks<4>>)
      ->RangeMultiplier(2)
-     ->Range(1 << 10, 1 << 20);
+     ->Range(1 << 7, 1 << 9);
  BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<3>, NumHighPrioTasks<4>>)
      ->RangeMultiplier(2)
-     ->Range(1 << 10, 1 << 20);
+     ->Range(1 << 7, 1 << 9);
  BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<5>, NumHighPrioTasks<4>>)
      ->RangeMultiplier(2)
-     ->Range(1 << 10, 1 << 20);
+     ->Range(1 << 6, 1 << 8);
  BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<7>, NumHighPrioTasks<4>>)
      ->RangeMultiplier(2)
-     ->Range(1 << 10, 1 << 20);
+     ->Range(1 << 4, 1 << 6);
 
  BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<2>, NumHighPrioTasks<4>>)
      ->RangeMultiplier(2)
-     ->Range(1 << 10, 1 << 16);
+     ->Range(1 << 7, 1 << 9);
  BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<3>, NumHighPrioTasks<4>>)
      ->RangeMultiplier(2)
-     ->Range(1 << 10, 1 << 16);
+     ->Range(1 << 7, 1 << 9);
  BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<5>, NumHighPrioTasks<4>>)
      ->RangeMultiplier(2)
-     ->Range(1 << 10, 1 << 16);
+     ->Range(1 << 5, 1 << 7);
  BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<7>, NumHighPrioTasks<4>>)
      ->RangeMultiplier(2)
-     ->Range(1 << 10, 1 << 16);
+     ->Range(1 << 3, 1 << 5);
 
 
  BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<2>, NumHighPrioTasks<4>>)
      ->RangeMultiplier(2)
-     ->Range(1 << 8, 1 << 14);
+     ->Range(1 << 6, 1 << 8);
  BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<3>, NumHighPrioTasks<4>>)
      ->RangeMultiplier(2)
-     ->Range(1 << 8, 1 << 14);
+     ->Range(1 << 6, 1 << 8);
  BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<5>, NumHighPrioTasks<4>>)
      ->RangeMultiplier(2)
-     ->Range(1 << 6, 1 << 10);
+     ->Range(1 << 5, 1 << 7);
  BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<7>, NumHighPrioTasks<4>>)
      ->RangeMultiplier(2)
-     ->Range(1 << 4, 1 << 8);
+     ->Range(1 << 3, 1 << 5);
 
 BENCHMARK_MAIN();

>From 247a258e43e89a51fafce3c52c1bf040a54a362a Mon Sep 17 00:00:00 2001
From: Hui Xie <huixie at Huis-MacBook-Pro.local>
Date: Fri, 20 Dec 2024 11:33:32 +0000
Subject: [PATCH 4/5] address

---
 libcxx/include/__atomic/atomic_sync.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/libcxx/include/__atomic/atomic_sync.h b/libcxx/include/__atomic/atomic_sync.h
index 5ec792e9b9a29c..e8d51fdc5803c3 100644
--- a/libcxx/include/__atomic/atomic_sync.h
+++ b/libcxx/include/__atomic/atomic_sync.h
@@ -16,7 +16,6 @@
 #include <__config>
 #include <__memory/addressof.h>
 #include <__thread/poll_with_backoff.h>
-#include <__thread/support.h>
 #include <__type_traits/conjunction.h>
 #include <__type_traits/decay.h>
 #include <__type_traits/invoke.h>

>From 8c6b07255546dfdc57e642d59255a54361f9fb5c Mon Sep 17 00:00:00 2001
From: Hui Xie <huixie at Huis-MacBook-Pro.local>
Date: Fri, 20 Dec 2024 13:48:56 +0000
Subject: [PATCH 5/5] split benchmarks

---
 libcxx/test/benchmarks/atomic_wait.bench.cpp  | 415 ------------------
 .../atomic_wait_1_waiter_1_notifier.bench.cpp |  74 ++++
 .../atomic_wait_N_waiter_N_notifier.bench.cpp | 167 +++++++
 libcxx/test/benchmarks/atomic_wait_helper.h   |  94 ++++
 ...mic_wait_multi_waiter_1_notifier.bench.cpp | 167 +++++++
 5 files changed, 502 insertions(+), 415 deletions(-)
 delete mode 100644 libcxx/test/benchmarks/atomic_wait.bench.cpp
 create mode 100644 libcxx/test/benchmarks/atomic_wait_1_waiter_1_notifier.bench.cpp
 create mode 100644 libcxx/test/benchmarks/atomic_wait_N_waiter_N_notifier.bench.cpp
 create mode 100644 libcxx/test/benchmarks/atomic_wait_helper.h
 create mode 100644 libcxx/test/benchmarks/atomic_wait_multi_waiter_1_notifier.bench.cpp

diff --git a/libcxx/test/benchmarks/atomic_wait.bench.cpp b/libcxx/test/benchmarks/atomic_wait.bench.cpp
deleted file mode 100644
index b85aec49471729..00000000000000
--- a/libcxx/test/benchmarks/atomic_wait.bench.cpp
+++ /dev/null
@@ -1,415 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-// UNSUPPORTED: c++03, c++11, c++14, c++17
-
-#include <atomic>
-#include <cstdint>
-#include <numeric>
-#include <stop_token>
-#include <pthread.h>
-#include <sched.h>
-#include <thread>
-#include <chrono>
-#include <array>
-
-#include "benchmark/benchmark.h"
-#include "make_test_thread.h"
-
-using namespace std::chrono_literals;
-
-struct HighPrioTask {
-  sched_param param;
-  pthread_attr_t attr_t;
-  pthread_t thread;
-  std::atomic_bool stopped{false};
-
-  HighPrioTask(const HighPrioTask&) = delete;
-
-  HighPrioTask() {
-    pthread_attr_init(&attr_t);
-    pthread_attr_setschedpolicy(&attr_t, SCHED_FIFO);
-    param.sched_priority = sched_get_priority_max(SCHED_FIFO);
-    pthread_attr_setschedparam(&attr_t, &param);
-    pthread_attr_setinheritsched(&attr_t, PTHREAD_EXPLICIT_SCHED);
-
-    auto thread_fun = [](void* arg) -> void* {
-      auto* stop = reinterpret_cast<std::atomic_bool*>(arg);
-      while (!stop->load(std::memory_order_relaxed)) {
-        // spin
-      }
-      return nullptr;
-    };
-
-    if (pthread_create(&thread, &attr_t, thread_fun, &stopped) != 0) {
-      throw std::runtime_error("failed to create thread");
-    }
-  }
-
-  ~HighPrioTask() {
-    stopped = true;
-    pthread_attr_destroy(&attr_t);
-    pthread_join(thread, nullptr);
-  }
-};
-
-
-template <std::size_t N>
-struct NumHighPrioTasks {
-  static constexpr auto value = N;
-};
-
-
-struct KeepNotifying {
-  template <class Atomic>
-  static void notify(Atomic& a, std::stop_token st) {
-    while (!st.stop_requested()) {
-      a.fetch_add(1, std::memory_order_relaxed);
-      a.notify_all();
-    }
-  }
-};
-
-template <std::size_t N>
-struct NotifyEveryNus {
-  template <class Atomic>
-  static void notify(Atomic& a, std::stop_token st) {
-    while (!st.stop_requested()) {
-      auto start = std::chrono::system_clock::now();
-      a.fetch_add(1, std::memory_order_relaxed);
-      a.notify_all();
-      while (std::chrono::system_clock::now() - start < std::chrono::microseconds{N}) {
-      }
-    }
-  }
-};
-
-template <class NotifyPolicy, class NumPrioTasks>
-void BM_1_atomic_1_waiter_1_notifier(benchmark::State& state) {
-  [[maybe_unused]] std::array<HighPrioTask, NumPrioTasks::value> tasks{};
-  std::atomic<std::uint64_t> a;
-  auto thread_func = [&](std::stop_token st) { NotifyPolicy::notify(a, st); };
-
-  std::uint64_t total_loop_test_param = state.range(0);
-
-  auto thread = support::make_test_jthread(thread_func);
-
-  for (auto _ : state) {
-    for (std::uint64_t i = 0; i < total_loop_test_param; ++i) {
-      auto old = a.load(std::memory_order_relaxed);
-      a.wait(old);
-    }
-  }
-}
-
-BENCHMARK(BM_1_atomic_1_waiter_1_notifier<KeepNotifying, NumHighPrioTasks<0>>)->RangeMultiplier(2)->Range(1 << 18, 1 << 20);
-BENCHMARK(BM_1_atomic_1_waiter_1_notifier<NotifyEveryNus<50>, NumHighPrioTasks<0>>)->RangeMultiplier(2)->Range(1 << 12, 1 << 14);
-BENCHMARK(BM_1_atomic_1_waiter_1_notifier<NotifyEveryNus<100>, NumHighPrioTasks<0>>)->RangeMultiplier(2)->Range(1 << 12, 1 << 14);
-
-BENCHMARK(BM_1_atomic_1_waiter_1_notifier<KeepNotifying, NumHighPrioTasks<4>>)->RangeMultiplier(2)->Range(1 << 18, 1 << 20);
-BENCHMARK(BM_1_atomic_1_waiter_1_notifier<NotifyEveryNus<50>, NumHighPrioTasks<4>>)->RangeMultiplier(2)->Range(1 << 12, 1 << 14);
-BENCHMARK(BM_1_atomic_1_waiter_1_notifier<NotifyEveryNus<100>, NumHighPrioTasks<4>>)->RangeMultiplier(2)->Range(1 << 12, 1 << 14);
-
-BENCHMARK(BM_1_atomic_1_waiter_1_notifier<KeepNotifying, NumHighPrioTasks<7>>)->RangeMultiplier(2)->Range(1 << 4, 1 << 6);
-BENCHMARK(BM_1_atomic_1_waiter_1_notifier<NotifyEveryNus<50>, NumHighPrioTasks<7>>)->RangeMultiplier(2)->Range(1 << 3, 1 << 5);
-BENCHMARK(BM_1_atomic_1_waiter_1_notifier<NotifyEveryNus<100>, NumHighPrioTasks<7>>)->RangeMultiplier(2)->Range(1 << 3, 1 << 5);
-
-
-template <std::size_t N>
-struct NumWaitingThreads {
-  static constexpr auto value = N;
-};
-
-template <class NotifyPolicy, class NumWaitingThreads, class NumPrioTasks>
-void BM_1_atomic_multi_waiter_1_notifier(benchmark::State& state) {
-  [[maybe_unused]] std::array<HighPrioTask, NumPrioTasks::value> tasks{};
-
-  std::atomic<std::uint64_t> a;
-  auto notify_func = [&](std::stop_token st) { NotifyPolicy::notify(a, st); };
-
-  std::uint64_t total_loop_test_param = state.range(0);
-  constexpr auto num_waiting_threads  = NumWaitingThreads::value;
-  std::vector<std::jthread> wait_threads;
-  wait_threads.reserve(num_waiting_threads);
-
-  auto notify_thread = support::make_test_jthread(notify_func);
-
-  std::atomic<std::uint64_t> start_flag = 0;
-  std::atomic<std::uint64_t> done_count = 0;
-  auto wait_func                        = [&a, &start_flag, &done_count, total_loop_test_param](std::stop_token st) {
-    auto old_start = 0;
-    while (!st.stop_requested()) {
-      start_flag.wait(old_start);
-      old_start = start_flag.load();
-      for (std::uint64_t i = 0; i < total_loop_test_param; ++i) {
-        auto old = a.load(std::memory_order_relaxed);
-        a.wait(old);
-      }
-      done_count.fetch_add(1);
-    }
-  };
-
-  for (size_t i = 0; i < num_waiting_threads; ++i) {
-    wait_threads.emplace_back(support::make_test_jthread(wait_func));
-  }
-
-  for (auto _ : state) {
-    done_count = 0;
-    start_flag.fetch_add(1);
-    start_flag.notify_all();
-    while (done_count < num_waiting_threads) {
-      std::this_thread::yield();
-    }
-  }
-  for (auto& t : wait_threads) {
-    t.request_stop();
-  }
-  start_flag.fetch_add(1);
-  start_flag.notify_all();
-  for (auto& t : wait_threads) {
-    t.join();
-  }
-}
-BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<3>, NumHighPrioTasks<0>>)
-    ->RangeMultiplier(2)
-    ->Range(1 << 14, 1 << 16);
-BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<7>, NumHighPrioTasks<0>>)
-    ->RangeMultiplier(2)
-    ->Range(1 << 12, 1 << 14);
-BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<15>, NumHighPrioTasks<0>>)
-    ->RangeMultiplier(2)
-    ->Range(1 << 10, 1 << 12);
-
-BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<3>, NumHighPrioTasks<0>>)
-    ->RangeMultiplier(2)
-    ->Range(1 << 10, 1 << 12);
-BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<7>, NumHighPrioTasks<0>>)
-    ->RangeMultiplier(2)
-    ->Range(1 << 8, 1 << 10);
-BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<15>, NumHighPrioTasks<0>>)
-    ->RangeMultiplier(2)
-    ->Range(1 << 6, 1 << 8);
-
-BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<3>, NumHighPrioTasks<0>>)
-    ->RangeMultiplier(2)
-    ->Range(1 << 8, 1 << 10);
-BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<7>, NumHighPrioTasks<0>>)
-    ->RangeMultiplier(2)
-    ->Range(1 << 6, 1 << 8);
-BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<15>, NumHighPrioTasks<0>>)
-    ->RangeMultiplier(2)
-    ->Range(1 << 4, 1 << 6);
-
-
-BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<3>, NumHighPrioTasks<4>>)
-    ->RangeMultiplier(2)
-    ->Range(1 << 8, 1 << 10);
-BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<7>, NumHighPrioTasks<4>>)
-    ->RangeMultiplier(2)
-    ->Range(1 << 6, 1 << 8);
-BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<15>, NumHighPrioTasks<4>>)
-    ->RangeMultiplier(2)
-    ->Range(1 << 4, 1 << 6);
-
-BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<3>, NumHighPrioTasks<4>>)
-    ->RangeMultiplier(2)
-    ->Range(1 << 8, 1 << 10);
-BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<7>, NumHighPrioTasks<4>>)
-    ->RangeMultiplier(2)
-    ->Range(1 << 6, 1 << 8);
-BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<15>, NumHighPrioTasks<4>>)
-    ->RangeMultiplier(2)
-    ->Range(1 << 4, 1 << 6);
-
-BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<3>, NumHighPrioTasks<4>>)
-    ->RangeMultiplier(2)
-    ->Range(1 << 8, 1 << 10);
-BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<7>, NumHighPrioTasks<4>>)
-    ->RangeMultiplier(2)
-    ->Range(1 << 6, 1 << 8);
-BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<15>, NumHighPrioTasks<4>>)
-    ->RangeMultiplier(2)
-    ->Range(1 << 4, 1 << 6);
-
-BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<3>, NumHighPrioTasks<7>>)
-    ->RangeMultiplier(2)
-    ->Range(1 << 4, 1 << 6);
-BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<7>, NumHighPrioTasks<7>>)
-    ->RangeMultiplier(2)
-    ->Range(1 << 3, 1 << 5);
-BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<15>, NumHighPrioTasks<7>>)
-    ->RangeMultiplier(2)
-    ->Range(1 << 2, 1 << 4);
-
-BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<3>, NumHighPrioTasks<7>>)
-    ->RangeMultiplier(2)
-    ->Range(1 << 3, 1 << 5);
-BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<7>, NumHighPrioTasks<7>>)
-    ->RangeMultiplier(2)
-    ->Range(1 << 2, 1 << 4);
-BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<15>, NumHighPrioTasks<7>>)
-    ->RangeMultiplier(2)
-    ->Range(1 << 1, 1 << 3);
-
-BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<3>, NumHighPrioTasks<7>>)
-    ->RangeMultiplier(2)
-    ->Range(1 << 3, 1 << 5);
-BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<7>, NumHighPrioTasks<7>>)
-    ->RangeMultiplier(2)
-    ->Range(1 << 2, 1 << 4);
-BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<15>, NumHighPrioTasks<7>>)
-    ->RangeMultiplier(2)
-    ->Range(1 << 1, 1 << 3);
-
-
-template <std::size_t N>
-struct NumberOfAtomics {
-  static constexpr auto value = N;
-};
-
-template <class NotifyPolicy, class NumberOfAtomics, class NumPrioTasks>
-void BM_N_atomics_N_waiter_N_notifier(benchmark::State& state) {
-  [[maybe_unused]] std::array<HighPrioTask, NumPrioTasks::value> tasks{};
-  const std::uint64_t total_loop_test_param = state.range(0);
-  constexpr std::uint64_t num_atomics       = NumberOfAtomics::value;
-  std::vector<std::atomic<std::uint64_t>> atomics(num_atomics);
-
-  auto notify_func = [&](std::stop_token st, size_t idx) {
-    while (!st.stop_requested()) {
-      NotifyPolicy::notify(atomics[idx], st);
-    }
-  };
-
-  std::atomic<std::uint64_t> start_flag = 0;
-  std::atomic<std::uint64_t> done_count = 0;
-
-  auto wait_func = [&, total_loop_test_param](std::stop_token st, size_t idx) {
-    auto old_start = 0;
-    while (!st.stop_requested()) {
-      start_flag.wait(old_start);
-      old_start = start_flag.load();
-      for (std::uint64_t i = 0; i < total_loop_test_param; ++i) {
-        auto old = atomics[idx].load(std::memory_order_relaxed);
-        atomics[idx].wait(old);
-      }
-      done_count.fetch_add(1);
-    }
-  };
-
-  std::vector<std::jthread> notify_threads;
-  notify_threads.reserve(num_atomics);
-
-  std::vector<std::jthread> wait_threads;
-  wait_threads.reserve(num_atomics);
-
-  for (size_t i = 0; i < num_atomics; ++i) {
-    notify_threads.emplace_back(support::make_test_jthread(notify_func, i));
-  }
-
-  for (size_t i = 0; i < num_atomics; ++i) {
-    wait_threads.emplace_back(support::make_test_jthread(wait_func, i));
-  }
-
-  for (auto _ : state) {
-    done_count = 0;
-    start_flag.fetch_add(1);
-    start_flag.notify_all();
-    while (done_count < num_atomics) {
-      std::this_thread::yield();
-    }
-  }
-  for (auto& t : wait_threads) {
-    t.request_stop();
-  }
-  start_flag.fetch_add(1);
-  start_flag.notify_all();
-  for (auto& t : wait_threads) {
-    t.join();
-  }
-}
-
- BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<2>, NumHighPrioTasks<0>>)
-     ->RangeMultiplier(2)
-     ->Range(1 << 12, 1 << 14);
- BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<3>, NumHighPrioTasks<0>>)
-     ->RangeMultiplier(2)
-     ->Range(1 << 10, 1 << 12);
- BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<5>, NumHighPrioTasks<0>>)
-     ->RangeMultiplier(2)
-     ->Range(1 << 10, 1 << 12);
- BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<7>, NumHighPrioTasks<0>>)
-     ->RangeMultiplier(2)
-     ->Range(1 << 8, 1 << 10);
-
- BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<2>, NumHighPrioTasks<0>>)
-     ->RangeMultiplier(2)
-     ->Range(1 << 10, 1 << 12);
- BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<3>, NumHighPrioTasks<0>>)
-     ->RangeMultiplier(2)
-     ->Range(1 << 8, 1 << 10);
- BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<5>, NumHighPrioTasks<0>>)
-     ->RangeMultiplier(2)
-     ->Range(1 << 8, 1 << 10);
- BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<7>, NumHighPrioTasks<0>>)
-     ->RangeMultiplier(2)
-     ->Range(1 << 6, 1 << 8);
-
- BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<2>, NumHighPrioTasks<0>>)
-     ->RangeMultiplier(2)
-     ->Range(1 << 8, 1 << 10);
- BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<3>, NumHighPrioTasks<0>>)
-     ->RangeMultiplier(2)
-     ->Range(1 << 8, 1 << 10);
- BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<5>, NumHighPrioTasks<0>>)
-     ->RangeMultiplier(2)
-     ->Range(1 << 7, 1 << 9);
- BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<7>, NumHighPrioTasks<0>>)
-     ->RangeMultiplier(2)
-     ->Range(1 << 6, 1 << 8);
-
- BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<2>, NumHighPrioTasks<4>>)
-     ->RangeMultiplier(2)
-     ->Range(1 << 7, 1 << 9);
- BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<3>, NumHighPrioTasks<4>>)
-     ->RangeMultiplier(2)
-     ->Range(1 << 7, 1 << 9);
- BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<5>, NumHighPrioTasks<4>>)
-     ->RangeMultiplier(2)
-     ->Range(1 << 6, 1 << 8);
- BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<7>, NumHighPrioTasks<4>>)
-     ->RangeMultiplier(2)
-     ->Range(1 << 4, 1 << 6);
-
- BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<2>, NumHighPrioTasks<4>>)
-     ->RangeMultiplier(2)
-     ->Range(1 << 7, 1 << 9);
- BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<3>, NumHighPrioTasks<4>>)
-     ->RangeMultiplier(2)
-     ->Range(1 << 7, 1 << 9);
- BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<5>, NumHighPrioTasks<4>>)
-     ->RangeMultiplier(2)
-     ->Range(1 << 5, 1 << 7);
- BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<7>, NumHighPrioTasks<4>>)
-     ->RangeMultiplier(2)
-     ->Range(1 << 3, 1 << 5);
-
-
- BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<2>, NumHighPrioTasks<4>>)
-     ->RangeMultiplier(2)
-     ->Range(1 << 6, 1 << 8);
- BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<3>, NumHighPrioTasks<4>>)
-     ->RangeMultiplier(2)
-     ->Range(1 << 6, 1 << 8);
- BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<5>, NumHighPrioTasks<4>>)
-     ->RangeMultiplier(2)
-     ->Range(1 << 5, 1 << 7);
- BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<7>, NumHighPrioTasks<4>>)
-     ->RangeMultiplier(2)
-     ->Range(1 << 3, 1 << 5);
-
-BENCHMARK_MAIN();
diff --git a/libcxx/test/benchmarks/atomic_wait_1_waiter_1_notifier.bench.cpp b/libcxx/test/benchmarks/atomic_wait_1_waiter_1_notifier.bench.cpp
new file mode 100644
index 00000000000000..c3d7e6511925d6
--- /dev/null
+++ b/libcxx/test/benchmarks/atomic_wait_1_waiter_1_notifier.bench.cpp
@@ -0,0 +1,74 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+
+#include "atomic_wait_helper.h"
+
+#include <atomic>
+#include <array>
+#include <chrono>
+#include <cstdint>
+#include <numeric>
+#include <stop_token>
+#include <thread>
+
+#include "benchmark/benchmark.h"
+#include "make_test_thread.h"
+
+using namespace std::chrono_literals;
+
+template <class NotifyPolicy, class NumPrioTasks>
+void BM_1_atomic_1_waiter_1_notifier(benchmark::State& state) {
+  [[maybe_unused]] std::array<HighPrioTask, NumPrioTasks::value> tasks{};
+  std::atomic<std::uint64_t> a;
+  auto thread_func = [&](std::stop_token st) { NotifyPolicy::notify(a, st); };
+
+  std::uint64_t total_loop_test_param = state.range(0);
+
+  auto thread = support::make_test_jthread(thread_func);
+
+  for (auto _ : state) {
+    for (std::uint64_t i = 0; i < total_loop_test_param; ++i) {
+      auto old = a.load(std::memory_order_relaxed);
+      a.wait(old);
+    }
+  }
+}
+
+BENCHMARK(BM_1_atomic_1_waiter_1_notifier<KeepNotifying, NumHighPrioTasks<0>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 16, 1 << 18);
+BENCHMARK(BM_1_atomic_1_waiter_1_notifier<NotifyEveryNus<50>, NumHighPrioTasks<0>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 10, 1 << 12);
+BENCHMARK(BM_1_atomic_1_waiter_1_notifier<NotifyEveryNus<100>, NumHighPrioTasks<0>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 10, 1 << 12);
+
+BENCHMARK(BM_1_atomic_1_waiter_1_notifier<KeepNotifying, NumHighPrioTasks<4>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 16, 1 << 18);
+BENCHMARK(BM_1_atomic_1_waiter_1_notifier<NotifyEveryNus<50>, NumHighPrioTasks<4>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 10, 1 << 12);
+BENCHMARK(BM_1_atomic_1_waiter_1_notifier<NotifyEveryNus<100>, NumHighPrioTasks<4>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 10, 1 << 12);
+
+BENCHMARK(BM_1_atomic_1_waiter_1_notifier<KeepNotifying, NumHighPrioTasks<7>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 4, 1 << 6);
+BENCHMARK(BM_1_atomic_1_waiter_1_notifier<NotifyEveryNus<50>, NumHighPrioTasks<7>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 3, 1 << 5);
+BENCHMARK(BM_1_atomic_1_waiter_1_notifier<NotifyEveryNus<100>, NumHighPrioTasks<7>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 3, 1 << 5);
+
+BENCHMARK_MAIN();
diff --git a/libcxx/test/benchmarks/atomic_wait_N_waiter_N_notifier.bench.cpp b/libcxx/test/benchmarks/atomic_wait_N_waiter_N_notifier.bench.cpp
new file mode 100644
index 00000000000000..d9b9aa212f602f
--- /dev/null
+++ b/libcxx/test/benchmarks/atomic_wait_N_waiter_N_notifier.bench.cpp
@@ -0,0 +1,167 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+
+#include "atomic_wait_helper.h"
+
+#include <atomic>
+#include <cstdint>
+#include <numeric>
+#include <stop_token>
+#include <pthread.h>
+#include <sched.h>
+#include <thread>
+#include <chrono>
+#include <array>
+
+#include "benchmark/benchmark.h"
+#include "make_test_thread.h"
+
+using namespace std::chrono_literals;
+
+template <class NotifyPolicy, class NumberOfAtomics, class NumPrioTasks>
+void BM_N_atomics_N_waiter_N_notifier(benchmark::State& state) {
+  [[maybe_unused]] std::array<HighPrioTask, NumPrioTasks::value> tasks{};
+  const std::uint64_t total_loop_test_param = state.range(0);
+  constexpr std::uint64_t num_atomics       = NumberOfAtomics::value;
+  std::vector<std::atomic<std::uint64_t>> atomics(num_atomics);
+
+  auto notify_func = [&](std::stop_token st, size_t idx) {
+    while (!st.stop_requested()) {
+      NotifyPolicy::notify(atomics[idx], st);
+    }
+  };
+
+  std::atomic<std::uint64_t> start_flag = 0;
+  std::atomic<std::uint64_t> done_count = 0;
+
+  auto wait_func = [&, total_loop_test_param](std::stop_token st, size_t idx) {
+    auto old_start = 0;
+    while (!st.stop_requested()) {
+      start_flag.wait(old_start);
+      old_start = start_flag.load();
+      for (std::uint64_t i = 0; i < total_loop_test_param; ++i) {
+        auto old = atomics[idx].load(std::memory_order_relaxed);
+        atomics[idx].wait(old);
+      }
+      done_count.fetch_add(1);
+    }
+  };
+
+  std::vector<std::jthread> notify_threads;
+  notify_threads.reserve(num_atomics);
+
+  std::vector<std::jthread> wait_threads;
+  wait_threads.reserve(num_atomics);
+
+  for (size_t i = 0; i < num_atomics; ++i) {
+    notify_threads.emplace_back(support::make_test_jthread(notify_func, i));
+  }
+
+  for (size_t i = 0; i < num_atomics; ++i) {
+    wait_threads.emplace_back(support::make_test_jthread(wait_func, i));
+  }
+
+  for (auto _ : state) {
+    done_count = 0;
+    start_flag.fetch_add(1);
+    start_flag.notify_all();
+    while (done_count < num_atomics) {
+      std::this_thread::yield();
+    }
+  }
+  for (auto& t : wait_threads) {
+    t.request_stop();
+  }
+  start_flag.fetch_add(1);
+  start_flag.notify_all();
+  for (auto& t : wait_threads) {
+    t.join();
+  }
+}
+
+BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<2>, NumHighPrioTasks<0>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 12, 1 << 14);
+BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<3>, NumHighPrioTasks<0>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 10, 1 << 12);
+BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<5>, NumHighPrioTasks<0>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 10, 1 << 12);
+BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<7>, NumHighPrioTasks<0>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 8, 1 << 10);
+
+BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<2>, NumHighPrioTasks<0>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 10, 1 << 12);
+BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<3>, NumHighPrioTasks<0>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 8, 1 << 10);
+BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<5>, NumHighPrioTasks<0>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 8, 1 << 10);
+BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<7>, NumHighPrioTasks<0>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 6, 1 << 8);
+
+BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<2>, NumHighPrioTasks<0>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 8, 1 << 10);
+BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<3>, NumHighPrioTasks<0>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 8, 1 << 10);
+BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<5>, NumHighPrioTasks<0>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 7, 1 << 9);
+BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<7>, NumHighPrioTasks<0>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 6, 1 << 8);
+
+BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<2>, NumHighPrioTasks<4>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 7, 1 << 9);
+BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<3>, NumHighPrioTasks<4>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 7, 1 << 9);
+BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<5>, NumHighPrioTasks<4>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 6, 1 << 8);
+BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<7>, NumHighPrioTasks<4>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 4, 1 << 6);
+
+BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<2>, NumHighPrioTasks<4>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 7, 1 << 9);
+BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<3>, NumHighPrioTasks<4>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 7, 1 << 9);
+BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<5>, NumHighPrioTasks<4>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 5, 1 << 7);
+BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<7>, NumHighPrioTasks<4>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 3, 1 << 5);
+
+BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<2>, NumHighPrioTasks<4>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 6, 1 << 8);
+BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<3>, NumHighPrioTasks<4>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 6, 1 << 8);
+BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<5>, NumHighPrioTasks<4>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 5, 1 << 7);
+BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<7>, NumHighPrioTasks<4>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 3, 1 << 5);
+
+BENCHMARK_MAIN();
diff --git a/libcxx/test/benchmarks/atomic_wait_helper.h b/libcxx/test/benchmarks/atomic_wait_helper.h
new file mode 100644
index 00000000000000..c0f1e63e1ae3c9
--- /dev/null
+++ b/libcxx/test/benchmarks/atomic_wait_helper.h
@@ -0,0 +1,94 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ATOMIC_WAIT_HELPER_H
+#define ATOMIC_WAIT_HELPER_H
+
+#include <atomic>
+#include <chrono>
+#include <exception>
+#include <stop_token>
+#include <pthread.h>
+#include <sched.h>
+#include <thread>
+
+struct HighPrioTask {
+  sched_param param;
+  pthread_attr_t attr_t;
+  pthread_t thread;
+  std::atomic_bool stopped{false};
+
+  HighPrioTask(const HighPrioTask&) = delete;
+
+  HighPrioTask() {
+    pthread_attr_init(&attr_t);
+    pthread_attr_setschedpolicy(&attr_t, SCHED_FIFO);
+    param.sched_priority = sched_get_priority_max(SCHED_FIFO);
+    pthread_attr_setschedparam(&attr_t, &param);
+    pthread_attr_setinheritsched(&attr_t, PTHREAD_EXPLICIT_SCHED);
+
+    auto thread_fun = [](void* arg) -> void* {
+      auto* stop = reinterpret_cast<std::atomic_bool*>(arg);
+      while (!stop->load(std::memory_order_relaxed)) {
+        // spin
+      }
+      return nullptr;
+    };
+
+    if (pthread_create(&thread, &attr_t, thread_fun, &stopped) != 0) {
+      throw std::runtime_error("failed to create thread");
+    }
+  }
+
+  ~HighPrioTask() {
+    stopped = true;
+    pthread_attr_destroy(&attr_t);
+    pthread_join(thread, nullptr);
+  }
+};
+
+template <std::size_t N>
+struct NumHighPrioTasks {
+  static constexpr auto value = N;
+};
+
+template <std::size_t N>
+struct NumWaitingThreads {
+  static constexpr auto value = N;
+};
+
+template <std::size_t N>
+struct NumberOfAtomics {
+  static constexpr auto value = N;
+};
+
+struct KeepNotifying {
+  template <class Atomic>
+  static void notify(Atomic& a, std::stop_token st) {
+    while (!st.stop_requested()) {
+      a.fetch_add(1, std::memory_order_relaxed);
+      a.notify_all();
+    }
+  }
+};
+
+template <std::size_t N>
+struct NotifyEveryNus {
+  template <class Atomic>
+  static void notify(Atomic& a, std::stop_token st) {
+    while (!st.stop_requested()) {
+      auto start = std::chrono::system_clock::now();
+      a.fetch_add(1, std::memory_order_relaxed);
+      a.notify_all();
+      while (std::chrono::system_clock::now() - start < std::chrono::microseconds{N}) {
+      }
+    }
+  }
+};
+
+#endif // ATOMIC_WAIT_HELPER_H
\ No newline at end of file
diff --git a/libcxx/test/benchmarks/atomic_wait_multi_waiter_1_notifier.bench.cpp b/libcxx/test/benchmarks/atomic_wait_multi_waiter_1_notifier.bench.cpp
new file mode 100644
index 00000000000000..a14a6a2ad9c980
--- /dev/null
+++ b/libcxx/test/benchmarks/atomic_wait_multi_waiter_1_notifier.bench.cpp
@@ -0,0 +1,167 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+
+#include "atomic_wait_helper.h"
+
+#include <atomic>
+#include <cstdint>
+#include <numeric>
+#include <stop_token>
+#include <thread>
+#include <chrono>
+#include <array>
+
+#include "benchmark/benchmark.h"
+#include "make_test_thread.h"
+
+using namespace std::chrono_literals;
+
+template <class NotifyPolicy, class NumWaitingThreads, class NumPrioTasks>
+void BM_1_atomic_multi_waiter_1_notifier(benchmark::State& state) {
+  [[maybe_unused]] std::array<HighPrioTask, NumPrioTasks::value> tasks{};
+
+  std::atomic<std::uint64_t> a;
+  auto notify_func = [&](std::stop_token st) { NotifyPolicy::notify(a, st); };
+
+  std::uint64_t total_loop_test_param = state.range(0);
+  constexpr auto num_waiting_threads  = NumWaitingThreads::value;
+  std::vector<std::jthread> wait_threads;
+  wait_threads.reserve(num_waiting_threads);
+
+  auto notify_thread = support::make_test_jthread(notify_func);
+
+  std::atomic<std::uint64_t> start_flag = 0;
+  std::atomic<std::uint64_t> done_count = 0;
+  auto wait_func                        = [&a, &start_flag, &done_count, total_loop_test_param](std::stop_token st) {
+    auto old_start = 0;
+    while (!st.stop_requested()) {
+      start_flag.wait(old_start);
+      old_start = start_flag.load();
+      for (std::uint64_t i = 0; i < total_loop_test_param; ++i) {
+        auto old = a.load(std::memory_order_relaxed);
+        a.wait(old);
+      }
+      done_count.fetch_add(1);
+    }
+  };
+
+  for (size_t i = 0; i < num_waiting_threads; ++i) {
+    wait_threads.emplace_back(support::make_test_jthread(wait_func));
+  }
+
+  for (auto _ : state) {
+    done_count = 0;
+    start_flag.fetch_add(1);
+    start_flag.notify_all();
+    while (done_count < num_waiting_threads) {
+      std::this_thread::yield();
+    }
+  }
+  for (auto& t : wait_threads) {
+    t.request_stop();
+  }
+  start_flag.fetch_add(1);
+  start_flag.notify_all();
+  for (auto& t : wait_threads) {
+    t.join();
+  }
+}
+
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<3>, NumHighPrioTasks<0>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 14, 1 << 16);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<7>, NumHighPrioTasks<0>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 12, 1 << 14);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<15>, NumHighPrioTasks<0>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 10, 1 << 12);
+
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<3>, NumHighPrioTasks<0>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 10, 1 << 12);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<7>, NumHighPrioTasks<0>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 8, 1 << 10);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<15>, NumHighPrioTasks<0>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 6, 1 << 8);
+
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<3>, NumHighPrioTasks<0>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 8, 1 << 10);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<7>, NumHighPrioTasks<0>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 6, 1 << 8);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<15>, NumHighPrioTasks<0>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 4, 1 << 6);
+
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<3>, NumHighPrioTasks<4>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 8, 1 << 10);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<7>, NumHighPrioTasks<4>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 6, 1 << 8);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<15>, NumHighPrioTasks<4>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 4, 1 << 6);
+
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<3>, NumHighPrioTasks<4>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 8, 1 << 10);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<7>, NumHighPrioTasks<4>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 6, 1 << 8);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<15>, NumHighPrioTasks<4>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 4, 1 << 6);
+
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<3>, NumHighPrioTasks<4>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 8, 1 << 10);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<7>, NumHighPrioTasks<4>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 6, 1 << 8);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<15>, NumHighPrioTasks<4>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 4, 1 << 6);
+
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<3>, NumHighPrioTasks<7>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 4, 1 << 6);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<7>, NumHighPrioTasks<7>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 3, 1 << 5);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<15>, NumHighPrioTasks<7>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 2, 1 << 4);
+
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<3>, NumHighPrioTasks<7>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 3, 1 << 5);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<7>, NumHighPrioTasks<7>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 2, 1 << 4);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<15>, NumHighPrioTasks<7>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 1, 1 << 3);
+
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<3>, NumHighPrioTasks<7>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 3, 1 << 5);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<7>, NumHighPrioTasks<7>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 2, 1 << 4);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<15>, NumHighPrioTasks<7>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 1, 1 << 3);
+
+BENCHMARK_MAIN();



More information about the libcxx-commits mailing list