[libcxx-commits] [libcxx] [libc++] `stop_token` uses `mutex` (PR #69600)

Thu Oct 19 05:42:51 PDT 2023

llvmbot wrote:




@llvm/pr-subscribers-libcxx

Author: Hui (huixie90)

<details>
<summary>Changes</summary>

- [libc++][test] add more benchmarks for `stop_token`
- [libc++] use `mutex` in the `stop_token`
- remove unused


---
Full diff: https://github.com/llvm/llvm-project/pull/69600.diff


2 Files Affected:

- (modified) libcxx/benchmarks/stop_token.bench.cpp (+78-3) 
- (modified) libcxx/include/__stop_token/stop_state.h (+60-46) 


``````````diff

diff --git a/libcxx/benchmarks/stop_token.bench.cpp b/libcxx/benchmarks/stop_token.bench.cpp
index 293d55ed82a08cf..e059a1166af16bd 100644
--- a/libcxx/benchmarks/stop_token.bench.cpp
+++ b/libcxx/benchmarks/stop_token.bench.cpp
@@ -14,6 +14,81 @@
 
 using namespace std::chrono_literals;
 
+// We have a single thread created by std::jthread consuming the stop_token:
+// polling for stop_requested.
+void BM_stop_token_single_thread_polling_stop_requested(benchmark::State& state) {
+  auto thread_func = [&](std::stop_token st, std::atomic<std::uint64_t>* loop_count) {
+    while (!st.stop_requested()) {
+      // doing some work
+      loop_count->fetch_add(1, std::memory_order_relaxed);
+    }
+  };
+
+  std::atomic<std::uint64_t> loop_count(0);
+  std::uint64_t total_loop_test_param = state.range(0);
+
+  auto thread = support::make_test_jthread(thread_func, &loop_count);
+
+  for (auto _ : state) {
+    auto start_total = loop_count.load(std::memory_order_relaxed);
+
+    while (loop_count.load(std::memory_order_relaxed) - start_total < total_loop_test_param) {
+      std::this_thread::yield();
+    }
+  }
+}
+
+BENCHMARK(BM_stop_token_single_thread_polling_stop_requested)->RangeMultiplier(2)->Range(1 << 10, 1 << 24);
+
+// We have multiple threads polling for stop_requested of the same stop_token.
+void BM_stop_token_multi_thread_polling_stop_requested(benchmark::State& state) {
+  std::atomic<bool> start{false};
+
+  auto thread_func = [&start](std::atomic<std::uint64_t>* loop_count, std::stop_token st) {
+    start.wait(false);
+    while (!st.stop_requested()) {
+      // doing some work
+      loop_count->fetch_add(1, std::memory_order_relaxed);
+    }
+  };
+
+  constexpr size_t thread_count = 20;
+
+  std::uint64_t total_loop_test_param = state.range(0);
+
+  std::vector<std::atomic<std::uint64_t>> loop_counts(thread_count);
+  std::stop_source ss;
+  std::vector<std::jthread> threads;
+  threads.reserve(thread_count);
+
+  for (size_t i = 0; i < thread_count; ++i) {
+    threads.emplace_back(support::make_test_jthread(thread_func, &loop_counts[i], ss.get_token()));
+  }
+
+  auto get_total_loop = [&loop_counts] {
+    std::uint64_t total = 0;
+    for (const auto& loop_count : loop_counts) {
+      total += loop_count.load(std::memory_order_relaxed);
+    }
+    return total;
+  };
+
+  start = true;
+  start.notify_all();
+
+  for (auto _ : state) {
+    auto start_total = get_total_loop();
+
+    while (get_total_loop() - start_total < total_loop_test_param) {
+      std::this_thread::yield();
+    }
+  }
+
+  ss.request_stop();
+}
+
+BENCHMARK(BM_stop_token_multi_thread_polling_stop_requested)->RangeMultiplier(2)->Range(1 << 10, 1 << 24);
+
 // We have a single thread created by std::jthread consuming the stop_token:
 // registering/deregistering callbacks, one at a time.
 void BM_stop_token_single_thread_reg_unreg_callback(benchmark::State& state) {
@@ -59,11 +134,11 @@ void BM_stop_token_async_reg_unreg_callback(benchmark::State& state) {
   std::atomic<bool> start{false};
 
   std::uint64_t total_reg_test_param = state.range(0);
+  std::vector<std::atomic<std::uint64_t>> reg_counts(thread_count);
 
   std::stop_source ss;
   std::vector<std::jthread> threads;
   threads.reserve(thread_count);
-  std::vector<std::atomic<std::uint64_t>> reg_counts(thread_count);
 
   auto thread_func = [&start](std::atomic<std::uint64_t>* count, std::stop_token st) {
     std::vector<std::optional<std::stop_callback<dummy_stop_callback>>> cbs(concurrent_request_count);
@@ -84,8 +159,8 @@ void BM_stop_token_async_reg_unreg_callback(benchmark::State& state) {
 
   auto get_total_reg = [&] {
     std::uint64_t total = 0;
-    for (const auto& reg_counts : reg_counts) {
-      total += reg_counts.load(std::memory_order_relaxed);
+    for (const auto& reg_count : reg_counts) {
+      total += reg_count.load(std::memory_order_relaxed);
     }
     return total;
   };
diff --git a/libcxx/include/__stop_token/stop_state.h b/libcxx/include/__stop_token/stop_state.h
index 462aa73952b84f9..f3fca6554b378a7 100644
--- a/libcxx/include/__stop_token/stop_state.h
+++ b/libcxx/include/__stop_token/stop_state.h
@@ -12,7 +12,7 @@
 
 #include <__availability>
 #include <__config>
-#include <__stop_token/atomic_unique_lock.h>
+#include <__mutex/mutex.h>
 #include <__stop_token/intrusive_list_view.h>
 #include <__thread/id.h>
 #include <atomic>
@@ -37,10 +37,51 @@ struct __stop_callback_base : __intrusive_node_base<__stop_callback_base> {
   bool* __destroyed_        = nullptr;
 };
 
+// stop_token needs to lock with noexcept. mutex::lock can throw.
+// wrap it with a while loop and catch all exceptions
+class __nothrow_mutex_lock {
+  std::mutex& __mutex_;
+  bool __is_locked_;
+
+public:
+  _LIBCPP_HIDE_FROM_ABI explicit __nothrow_mutex_lock(std::mutex& __mutex) noexcept
+      : __mutex_(__mutex), __is_locked_(true) {
+    __lock();
+  }
+
+  __nothrow_mutex_lock(const __nothrow_mutex_lock&)            = delete;
+  __nothrow_mutex_lock(__nothrow_mutex_lock&&)                 = delete;
+  __nothrow_mutex_lock& operator=(const __nothrow_mutex_lock&) = delete;
+  __nothrow_mutex_lock& operator=(__nothrow_mutex_lock&&)      = delete;
+
+  _LIBCPP_HIDE_FROM_ABI ~__nothrow_mutex_lock() {
+    if (__is_locked_) {
+      __unlock();
+    }
+  }
+
+  _LIBCPP_HIDE_FROM_ABI bool __owns_lock() const noexcept { return __is_locked_; }
+
+  _LIBCPP_HIDE_FROM_ABI void __lock() noexcept {
+    while (true) {
+      try {
+        __mutex_.lock();
+        break;
+      } catch (...) {
+      }
+    }
+    __is_locked_ = true;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI void __unlock() noexcept {
+    __mutex_.unlock(); // throws nothing
+    __is_locked_ = false;
+  }
+};
+
 class __stop_state {
   static constexpr uint32_t __stop_requested_bit        = 1;
-  static constexpr uint32_t __callback_list_locked_bit  = 1 << 1;
-  static constexpr uint32_t __stop_source_counter_shift = 2;
+  static constexpr uint32_t __stop_source_counter_shift = 1;
 
   // The "stop_source counter" is not used for lifetime reference counting.
   // When the number of stop_source reaches 0, the remaining stop_tokens's
@@ -49,9 +90,10 @@ class __stop_state {
   // The "callback list locked" bit implements the atomic_unique_lock to
   // guard the operations on the callback list
   //
-  //       31 - 2          |  1                   |    0           |
-  //  stop_source counter  | callback list locked | stop_requested |
+  //       31 - 1          |    0           |
+  //  stop_source counter  | stop_requested |
   atomic<uint32_t> __state_ = 0;
+  std::mutex __mutex_;
 
   // Reference count for stop_token + stop_callback + stop_source
   // When the counter reaches zero, the state is destroyed
@@ -59,7 +101,7 @@ class __stop_state {
   atomic<uint32_t> __ref_count_ = 0;
 
   using __state_t            = uint32_t;
-  using __callback_list_lock = __atomic_unique_lock<__state_t, __callback_list_locked_bit>;
+  using __callback_list_lock = __nothrow_mutex_lock;
   using __callback_list      = __intrusive_list_view<__stop_callback_base>;
 
   __callback_list __callback_list_;
@@ -101,8 +143,9 @@ class __stop_state {
   }
 
   _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI bool __request_stop() noexcept {
-    auto __cb_list_lock = __try_lock_for_request_stop();
-    if (!__cb_list_lock.__owns_lock()) {
+    __callback_list_lock __cb_list_lock(__mutex_);
+    auto __old = __state_.fetch_or(__stop_requested_bit, std::memory_order_release);
+    if ((__old & __stop_requested_bit) == __stop_requested_bit) {
       return false;
     }
     __requesting_thread_ = this_thread::get_id();
@@ -138,20 +181,15 @@ class __stop_state {
   }
 
   _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI bool __add_callback(__stop_callback_base* __cb) noexcept {
-    // If it is already stop_requested. Do not try to request it again.
-    const auto __give_up_trying_to_lock_condition = [__cb](__state_t __state) {
-      if ((__state & __stop_requested_bit) != 0) {
-        // already stop requested, synchronously run the callback and no need to lock the list again
-        __cb->__invoke();
-        return true;
-      }
-      // no stop source. no need to lock the list to add the callback as it can never be invoked
-      return (__state >> __stop_source_counter_shift) == 0;
-    };
-
-    __callback_list_lock __cb_list_lock(__state_, __give_up_trying_to_lock_condition);
+    __callback_list_lock __cb_list_lock(__mutex_);
+    auto __state = __state_.load(std::memory_order_acquire);
+    if ((__state & __stop_requested_bit) != 0) {
+      // already stop requested, synchronously run the callback and no need to lock the list again
+      __cb->__invoke();
+      return false;
+    }
 
-    if (!__cb_list_lock.__owns_lock()) {
+    if ((__state >> __stop_source_counter_shift) == 0) {
       return false;
     }
 
@@ -165,7 +203,7 @@ class __stop_state {
 
   // called by the destructor of stop_callback
   _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void __remove_callback(__stop_callback_base* __cb) noexcept {
-    __callback_list_lock __cb_list_lock(__state_);
+    __callback_list_lock __cb_list_lock(__mutex_);
 
     // under below condition, the request_stop call just popped __cb from the list and could execute it now
     bool __potentially_executing_now = __cb->__prev_ == nullptr && !__callback_list_.__is_head(__cb);
@@ -191,30 +229,6 @@ class __stop_state {
     }
   }
 
-private:
-  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI __callback_list_lock __try_lock_for_request_stop() noexcept {
-    // If it is already stop_requested, do not try to request stop or lock the list again.
-    const auto __lock_fail_condition = [](__state_t __state) { return (__state & __stop_requested_bit) != 0; };
-
-    // set locked and requested bit at the same time
-    const auto __after_lock_state = [](__state_t __state) {
-      return __state | __callback_list_locked_bit | __stop_requested_bit;
-    };
-
-    // acq because [thread.stoptoken.intro] Registration of a callback synchronizes with the invocation of that
-    //     callback. We are going to invoke the callback after getting the lock, acquire so that we can see the
-    //     registration of a callback (and other writes that happens-before the add_callback)
-    //     Note: the rel (unlock) in the add_callback syncs with this acq
-    // rel because [thread.stoptoken.intro] A call to request_stop that returns true synchronizes with a call
-    //     to stop_requested on an associated stop_token or stop_source object that returns true.
-    //     We need to make sure that all writes (including user code) before request_stop will be made visible
-    //     to the threads that waiting for `stop_requested == true`
-    //     Note: this rel syncs with the acq in `stop_requested`
-    const auto __locked_ordering = std::memory_order_acq_rel;
-
-    return __callback_list_lock(__state_, __lock_fail_condition, __after_lock_state, __locked_ordering);
-  }
-
   template <class _Tp>
   friend struct __intrusive_shared_ptr_traits;
 };

``````````

</details>


https://github.com/llvm/llvm-project/pull/69600