[libcxx-commits] [libcxx] [libc++] experiment with atomic_sync (PR #84471)

via libcxx-commits libcxx-commits at lists.llvm.org
Sat Nov 23 08:55:57 PST 2024


https://github.com/huixie90 updated https://github.com/llvm/llvm-project/pull/84471

>From 30851f54d32ed07cab99db33bf7699cccdf892c4 Mon Sep 17 00:00:00 2001
From: Hui <hui.xie0621 at gmail.com>
Date: Thu, 28 Mar 2024 20:52:29 +0000
Subject: [PATCH 1/2] [libc++] atomic wait more benchmark

---
 libcxx/test/benchmarks/atomic_wait.bench.cpp | 292 +++++++++++++++++--
 1 file changed, 273 insertions(+), 19 deletions(-)

diff --git a/libcxx/test/benchmarks/atomic_wait.bench.cpp b/libcxx/test/benchmarks/atomic_wait.bench.cpp
index 49503a318fda16..c85dc709d0d6c1 100644
--- a/libcxx/test/benchmarks/atomic_wait.bench.cpp
+++ b/libcxx/test/benchmarks/atomic_wait.bench.cpp
@@ -11,6 +11,8 @@
 #include <atomic>
 #include <numeric>
 #include <stop_token>
+#include <pthread.h>
+#include <sched.h>
 #include <thread>
 
 #include "benchmark/benchmark.h"
@@ -18,14 +20,77 @@
 
 using namespace std::chrono_literals;
 
-void BM_atomic_wait_one_thread_one_atomic_wait(benchmark::State& state) {
-  std::atomic<std::uint64_t> a;
-  auto thread_func = [&](std::stop_token st) {
+struct HighPrioTask {
+  sched_param param;
+  pthread_attr_t attr_t;
+  pthread_t thread;
+  std::atomic_bool stopped{false};
+
+  HighPrioTask(const HighPrioTask&) = delete;
+
+  HighPrioTask() {
+    pthread_attr_init(&attr_t);
+    pthread_attr_setschedpolicy(&attr_t, SCHED_FIFO);
+    param.sched_priority = sched_get_priority_max(SCHED_FIFO);
+    pthread_attr_setschedparam(&attr_t, &param);
+    pthread_attr_setinheritsched(&attr_t, PTHREAD_EXPLICIT_SCHED);
+
+    auto thread_fun = [](void* arg) -> void* {
+      auto* stop = reinterpret_cast<std::atomic_bool*>(arg);
+      while (!stop->load(std::memory_order_relaxed)) {
+        // spin
+      }
+      return nullptr;
+    };
+
+    if (pthread_create(&thread, &attr_t, thread_fun, &stopped) != 0) {
+      throw std::runtime_error("failed to create thread");
+    }
+  }
+
+  ~HighPrioTask() {
+    stopped = true;
+    pthread_attr_destroy(&attr_t);
+    pthread_join(thread, nullptr);
+  }
+};
+
+
+template <std::size_t N>
+struct NumHighPrioTasks {
+  static constexpr auto value = N;
+};
+
+
+struct KeepNotifying {
+  template <class Atomic>
+  static void notify(Atomic& a, std::stop_token st) {
     while (!st.stop_requested()) {
       a.fetch_add(1, std::memory_order_relaxed);
       a.notify_all();
     }
-  };
+  }
+};
+
+template <std::size_t N>
+struct NotifyEveryNus {
+  template <class Atomic>
+  static void notify(Atomic& a, std::stop_token st) {
+    while (!st.stop_requested()) {
+      auto start = std::chrono::system_clock::now();
+      a.fetch_add(1, std::memory_order_relaxed);
+      a.notify_all();
+      while (std::chrono::system_clock::now() - start < std::chrono::microseconds{N}) {
+      }
+    }
+  }
+};
+
+template <class NotifyPolicy, class NumPrioTasks>
+void BM_1_atomic_1_waiter_1_notifier(benchmark::State& state) {
+  [[maybe_unused]] std::array<HighPrioTask, NumPrioTasks::value> tasks{};
+  std::atomic<std::uint64_t> a;
+  auto thread_func = [&](std::stop_token st) { NotifyPolicy::notify(a, st); };
 
   std::uint64_t total_loop_test_param = state.range(0);
 
@@ -38,19 +103,34 @@ void BM_atomic_wait_one_thread_one_atomic_wait(benchmark::State& state) {
     }
   }
 }
-BENCHMARK(BM_atomic_wait_one_thread_one_atomic_wait)->RangeMultiplier(2)->Range(1 << 10, 1 << 24);
 
-void BM_atomic_wait_multi_thread_one_atomic_wait(benchmark::State& state) {
+BENCHMARK(BM_1_atomic_1_waiter_1_notifier<KeepNotifying, NumHighPrioTasks<0>>)->RangeMultiplier(2)->Range(1 << 10, 1 << 24);
+BENCHMARK(BM_1_atomic_1_waiter_1_notifier<NotifyEveryNus<50>, NumHighPrioTasks<0>>)->RangeMultiplier(2)->Range(1 << 10, 1 << 16);
+BENCHMARK(BM_1_atomic_1_waiter_1_notifier<NotifyEveryNus<100>, NumHighPrioTasks<0>>)->RangeMultiplier(2)->Range(1 << 10, 1 << 16);
+
+BENCHMARK(BM_1_atomic_1_waiter_1_notifier<KeepNotifying, NumHighPrioTasks<4>>)->RangeMultiplier(2)->Range(1 << 10, 1 << 24);
+BENCHMARK(BM_1_atomic_1_waiter_1_notifier<NotifyEveryNus<50>, NumHighPrioTasks<4>>)->RangeMultiplier(2)->Range(1 << 10, 1 << 16);
+BENCHMARK(BM_1_atomic_1_waiter_1_notifier<NotifyEveryNus<100>, NumHighPrioTasks<4>>)->RangeMultiplier(2)->Range(1 << 10, 1 << 16);
+
+BENCHMARK(BM_1_atomic_1_waiter_1_notifier<KeepNotifying, NumHighPrioTasks<7>>)->RangeMultiplier(2)->Range(1 << 4, 1 << 8);
+BENCHMARK(BM_1_atomic_1_waiter_1_notifier<NotifyEveryNus<50>, NumHighPrioTasks<7>>)->RangeMultiplier(2)->Range(1 << 4, 1 << 8);
+BENCHMARK(BM_1_atomic_1_waiter_1_notifier<NotifyEveryNus<100>, NumHighPrioTasks<7>>)->RangeMultiplier(2)->Range(1 << 4, 1 << 8);
+
+
+template <std::size_t N>
+struct NumWaitingThreads {
+  static constexpr auto value = N;
+};
+
+template <class NotifyPolicy, class NumWaitingThreads, class NumPrioTasks>
+void BM_1_atomic_multi_waiter_1_notifier(benchmark::State& state) {
+  [[maybe_unused]] std::array<HighPrioTask, NumPrioTasks::value> tasks{};
+
   std::atomic<std::uint64_t> a;
-  auto notify_func = [&](std::stop_token st) {
-    while (!st.stop_requested()) {
-      a.fetch_add(1, std::memory_order_relaxed);
-      a.notify_all();
-    }
-  };
+  auto notify_func = [&](std::stop_token st) { NotifyPolicy::notify(a, st); };
 
   std::uint64_t total_loop_test_param = state.range(0);
-  constexpr auto num_waiting_threads  = 15;
+  constexpr auto num_waiting_threads  = NumWaitingThreads::value;
   std::vector<std::jthread> wait_threads;
   wait_threads.reserve(num_waiting_threads);
 
@@ -92,17 +172,113 @@ void BM_atomic_wait_multi_thread_one_atomic_wait(benchmark::State& state) {
     t.join();
   }
 }
-BENCHMARK(BM_atomic_wait_multi_thread_one_atomic_wait)->RangeMultiplier(2)->Range(1 << 10, 1 << 20);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<3>, NumHighPrioTasks<0>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 10, 1 << 20);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<7>, NumHighPrioTasks<0>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 10, 1 << 20);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<15>, NumHighPrioTasks<0>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 10, 1 << 20);
+
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<3>, NumHighPrioTasks<0>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 10, 1 << 16);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<7>, NumHighPrioTasks<0>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 10, 1 << 16);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<15>, NumHighPrioTasks<0>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 10, 1 << 16);
+
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<3>, NumHighPrioTasks<0>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 8, 1 << 14);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<7>, NumHighPrioTasks<0>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 8, 1 << 14);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<15>, NumHighPrioTasks<0>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 8, 1 << 14);
+
+
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<3>, NumHighPrioTasks<4>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 10, 1 << 18);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<7>, NumHighPrioTasks<4>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 10, 1 << 18);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<15>, NumHighPrioTasks<4>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 10, 1 << 18);
+
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<3>, NumHighPrioTasks<4>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 10, 1 << 14);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<7>, NumHighPrioTasks<4>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 10, 1 << 14);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<15>, NumHighPrioTasks<4>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 10, 1 << 14);
 
-void BM_atomic_wait_multi_thread_wait_different_atomics(benchmark::State& state) {
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<3>, NumHighPrioTasks<4>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 8, 1 << 14);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<7>, NumHighPrioTasks<4>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 8, 1 << 14);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<15>, NumHighPrioTasks<4>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 8, 1 << 14);
+
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<3>, NumHighPrioTasks<7>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 4, 1 << 8);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<7>, NumHighPrioTasks<7>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 4, 1 << 8);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<KeepNotifying, NumWaitingThreads<15>, NumHighPrioTasks<7>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 4, 1 << 8);
+
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<3>, NumHighPrioTasks<7>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 4, 1 << 8);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<7>, NumHighPrioTasks<7>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 4, 1 << 8);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<50>, NumWaitingThreads<15>, NumHighPrioTasks<7>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 4, 1 << 8);
+
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<3>, NumHighPrioTasks<7>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 4, 1 << 8);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<7>, NumHighPrioTasks<7>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 4, 1 << 8);
+BENCHMARK(BM_1_atomic_multi_waiter_1_notifier<NotifyEveryNus<100>, NumWaitingThreads<15>, NumHighPrioTasks<7>>)
+    ->RangeMultiplier(2)
+    ->Range(1 << 4, 1 << 8);
+
+
+template <std::size_t N>
+struct NumberOfAtomics {
+  static constexpr auto value = N;
+};
+
+template <class NotifyPolicy, class NumberOfAtomics, class NumPrioTasks>
+void BM_N_atomics_N_waiter_N_notifier(benchmark::State& state) {
+  [[maybe_unused]] std::array<HighPrioTask, NumPrioTasks::value> tasks{};
   const std::uint64_t total_loop_test_param = state.range(0);
-  constexpr std::uint64_t num_atomics       = 7;
+  constexpr std::uint64_t num_atomics       = NumberOfAtomics::value;
   std::vector<std::atomic<std::uint64_t>> atomics(num_atomics);
 
   auto notify_func = [&](std::stop_token st, size_t idx) {
     while (!st.stop_requested()) {
-      atomics[idx].fetch_add(1, std::memory_order_relaxed);
-      atomics[idx].notify_all();
+      NotifyPolicy::notify(atomics[idx], st);
     }
   };
 
@@ -153,6 +329,84 @@ void BM_atomic_wait_multi_thread_wait_different_atomics(benchmark::State& state)
     t.join();
   }
 }
-BENCHMARK(BM_atomic_wait_multi_thread_wait_different_atomics)->RangeMultiplier(2)->Range(1 << 10, 1 << 20);
+
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<2>, NumHighPrioTasks<0>>)
+     ->RangeMultiplier(2)
+     ->Range(1 << 10, 1 << 20);
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<3>, NumHighPrioTasks<0>>)
+     ->RangeMultiplier(2)
+     ->Range(1 << 10, 1 << 20);
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<5>, NumHighPrioTasks<0>>)
+     ->RangeMultiplier(2)
+     ->Range(1 << 10, 1 << 20);
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<7>, NumHighPrioTasks<0>>)
+     ->RangeMultiplier(2)
+     ->Range(1 << 10, 1 << 20);
+
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<2>, NumHighPrioTasks<0>>)
+     ->RangeMultiplier(2)
+     ->Range(1 << 10, 1 << 16);
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<3>, NumHighPrioTasks<0>>)
+     ->RangeMultiplier(2)
+     ->Range(1 << 10, 1 << 16);
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<5>, NumHighPrioTasks<0>>)
+     ->RangeMultiplier(2)
+     ->Range(1 << 10, 1 << 16);
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<7>, NumHighPrioTasks<0>>)
+     ->RangeMultiplier(2)
+     ->Range(1 << 10, 1 << 16);
+
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<2>, NumHighPrioTasks<0>>)
+     ->RangeMultiplier(2)
+     ->Range(1 << 8, 1 << 14);
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<3>, NumHighPrioTasks<0>>)
+     ->RangeMultiplier(2)
+     ->Range(1 << 8, 1 << 14);
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<5>, NumHighPrioTasks<0>>)
+     ->RangeMultiplier(2)
+     ->Range(1 << 8, 1 << 14);
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<7>, NumHighPrioTasks<0>>)
+     ->RangeMultiplier(2)
+     ->Range(1 << 8, 1 << 14);
+
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<2>, NumHighPrioTasks<4>>)
+     ->RangeMultiplier(2)
+     ->Range(1 << 10, 1 << 20);
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<3>, NumHighPrioTasks<4>>)
+     ->RangeMultiplier(2)
+     ->Range(1 << 10, 1 << 20);
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<5>, NumHighPrioTasks<4>>)
+     ->RangeMultiplier(2)
+     ->Range(1 << 10, 1 << 20);
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<KeepNotifying, NumberOfAtomics<7>, NumHighPrioTasks<4>>)
+     ->RangeMultiplier(2)
+     ->Range(1 << 10, 1 << 20);
+
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<2>, NumHighPrioTasks<4>>)
+     ->RangeMultiplier(2)
+     ->Range(1 << 10, 1 << 16);
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<3>, NumHighPrioTasks<4>>)
+     ->RangeMultiplier(2)
+     ->Range(1 << 10, 1 << 16);
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<5>, NumHighPrioTasks<4>>)
+     ->RangeMultiplier(2)
+     ->Range(1 << 10, 1 << 16);
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<50>, NumberOfAtomics<7>, NumHighPrioTasks<4>>)
+     ->RangeMultiplier(2)
+     ->Range(1 << 10, 1 << 16);
+
+
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<2>, NumHighPrioTasks<4>>)
+     ->RangeMultiplier(2)
+     ->Range(1 << 8, 1 << 14);
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<3>, NumHighPrioTasks<4>>)
+     ->RangeMultiplier(2)
+     ->Range(1 << 8, 1 << 14);
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<5>, NumHighPrioTasks<4>>)
+     ->RangeMultiplier(2)
+     ->Range(1 << 6, 1 << 10);
+ BENCHMARK(BM_N_atomics_N_waiter_N_notifier<NotifyEveryNus<100>, NumberOfAtomics<7>, NumHighPrioTasks<4>>)
+     ->RangeMultiplier(2)
+     ->Range(1 << 4, 1 << 8);
 
 BENCHMARK_MAIN();

>From 632eb4cffdcab058471db120bc97bee0f8fed959 Mon Sep 17 00:00:00 2001
From: Hui Xie <huixie at Huis-MacBook-Pro.local>
Date: Sat, 23 Nov 2024 16:55:39 +0000
Subject: [PATCH 2/2] fix compiler error

---
 libcxx/test/benchmarks/atomic_wait.bench.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libcxx/test/benchmarks/atomic_wait.bench.cpp b/libcxx/test/benchmarks/atomic_wait.bench.cpp
index c85dc709d0d6c1..6d4df6e53fc374 100644
--- a/libcxx/test/benchmarks/atomic_wait.bench.cpp
+++ b/libcxx/test/benchmarks/atomic_wait.bench.cpp
@@ -14,6 +14,8 @@
 #include <pthread.h>
 #include <sched.h>
 #include <thread>
+#include <chrono>
+#include <array>
 
 #include "benchmark/benchmark.h"
 #include "make_test_thread.h"



More information about the libcxx-commits mailing list