[libcxx-commits] [libcxx] [libc++] atomic_wait refator experiment (PR #85086)

Wed Mar 13 07:33:55 PDT 2024

https://github.com/huixie90 created https://github.com/llvm/llvm-project/pull/85086

None

>From bb79cd6fe7ea87af255d4e64020e04d2ec1fb1e2 Mon Sep 17 00:00:00 2001
From: Hui <hui.xie0621 at gmail.com>
Date: Wed, 13 Mar 2024 14:17:45 +0000
Subject: [PATCH] [libc++] atomic_wait refator experiment

---
 libcxx/include/__atomic/atomic_base.h |  9 ++-
 libcxx/include/__atomic/atomic_flag.h |  5 +-
 libcxx/include/__atomic/atomic_sync.h | 68 ++++++++------------
 libcxx/src/atomic.cpp                 | 89 ++++++++++++++++++++-------
 4 files changed, 103 insertions(+), 68 deletions(-)

diff --git a/libcxx/include/__atomic/atomic_base.h b/libcxx/include/__atomic/atomic_base.h
index 6ca01a7f1bf9b9..53d3bcd146de0d 100644
--- a/libcxx/include/__atomic/atomic_base.h
+++ b/libcxx/include/__atomic/atomic_base.h
@@ -211,12 +211,17 @@ struct __atomic_waitable_traits<__atomic_base<_Tp, _IsIntegral> > {
     return __this.load(__order);
   }
 
-  static _LIBCPP_HIDE_FROM_ABI const __cxx_atomic_impl<_Tp>*
+  using __contention_type =
+      _If<__is_same(__cxx_atomic_impl<_Tp>, __cxx_atomic_contention_t),
+          __atomic_waitable_contention_self,
+          __atomic_waitable_contention_global>;
+
+  static _LIBCPP_HIDE_FROM_ABI __contention_type
   __atomic_contention_address(const __atomic_base<_Tp, _IsIntegral>& __a) {
     return std::addressof(__a.__a_);
   }
 
-  static _LIBCPP_HIDE_FROM_ABI const volatile __cxx_atomic_impl<_Tp>*
+  static _LIBCPP_HIDE_FROM_ABI __contention_type
   __atomic_contention_address(const volatile __atomic_base<_Tp, _IsIntegral>& __this) {
     return std::addressof(__this.__a_);
   }
diff --git a/libcxx/include/__atomic/atomic_flag.h b/libcxx/include/__atomic/atomic_flag.h
index 084366237c16eb..8d633eda4ab621 100644
--- a/libcxx/include/__atomic/atomic_flag.h
+++ b/libcxx/include/__atomic/atomic_flag.h
@@ -90,12 +90,11 @@ struct __atomic_waitable_traits<atomic_flag> {
     return std::__cxx_atomic_load(&__a.__a_, __order);
   }
 
-  static _LIBCPP_HIDE_FROM_ABI const __cxx_atomic_impl<_LIBCPP_ATOMIC_FLAG_TYPE>*
-  __atomic_contention_address(const atomic_flag& __a) {
+  static _LIBCPP_HIDE_FROM_ABI __atomic_waitable_contention_global __atomic_contention_address(const atomic_flag& __a) {
     return std::addressof(__a.__a_);
   }
 
-  static _LIBCPP_HIDE_FROM_ABI const volatile __cxx_atomic_impl<_LIBCPP_ATOMIC_FLAG_TYPE>*
+  static _LIBCPP_HIDE_FROM_ABI __atomic_waitable_contention_global
   __atomic_contention_address(const volatile atomic_flag& __a) {
     return std::addressof(__a.__a_);
   }
diff --git a/libcxx/include/__atomic/atomic_sync.h b/libcxx/include/__atomic/atomic_sync.h
index e583dca38c4c73..b7bf5c9102d780 100644
--- a/libcxx/include/__atomic/atomic_sync.h
+++ b/libcxx/include/__atomic/atomic_sync.h
@@ -71,19 +71,27 @@ struct __atomic_wait_poll_impl {
 
 #ifndef _LIBCPP_HAS_NO_THREADS
 
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_one(void const volatile*);
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_all(void const volatile*);
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI __cxx_contention_t __libcpp_atomic_monitor(void const volatile*);
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void __libcpp_atomic_wait(void const volatile*, __cxx_contention_t);
-
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void
-__cxx_atomic_notify_one(__cxx_atomic_contention_t const volatile*);
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void
-__cxx_atomic_notify_all(__cxx_atomic_contention_t const volatile*);
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI __cxx_contention_t
-__libcpp_atomic_monitor(__cxx_atomic_contention_t const volatile*);
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void
-__libcpp_atomic_wait(__cxx_atomic_contention_t const volatile*, __cxx_contention_t);
+struct __atomic_waitable_contention_self {
+  volatile __cxx_atomic_contention_t* __waiter_count_;
+  const volatile __cxx_atomic_contention_t* __platform_state_;
+
+  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI __atomic_waitable_contention_self(const volatile __cxx_atomic_contention_t*);
+  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI __cxx_contention_t __monitor();
+  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void __wait(__cxx_contention_t __old_value);
+  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void __notify_one();
+  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void __notify_all();
+};
+
+struct __atomic_waitable_contention_global  {
+  volatile __cxx_atomic_contention_t* __waiter_count_;
+  volatile __cxx_atomic_contention_t* __platform_state_;
+
+  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI __atomic_waitable_contention_global(const volatile void*);
+  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI __cxx_contention_t __monitor();
+  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void __wait(__cxx_contention_t __old_value);
+  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void __notify_one();
+  _LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void __notify_all();
+};
 
 template <class _AtomicWaitable, class _Poll>
 struct __atomic_wait_backoff_impl {
@@ -93,37 +101,15 @@ struct __atomic_wait_backoff_impl {
 
   using __waitable_traits = __atomic_waitable_traits<__decay_t<_AtomicWaitable> >;
 
-  _LIBCPP_AVAILABILITY_SYNC
-  _LIBCPP_HIDE_FROM_ABI bool
-  __update_monitor_val_and_poll(__cxx_atomic_contention_t const volatile*, __cxx_contention_t& __monitor_val) const {
-    // In case the contention type happens to be __cxx_atomic_contention_t, i.e. __cxx_atomic_impl<int64_t>,
-    // the platform wait is directly monitoring the atomic value itself.
-    // `__poll_` takes the current value of the atomic as an in-out argument
-    // to potentially modify it. After it returns, `__monitor` has a value
-    // which can be safely waited on by `std::__libcpp_atomic_wait` without any
-    // ABA style issues.
-    __monitor_val = __waitable_traits::__atomic_load(__a_, __order_);
-    return __poll_(__monitor_val);
-  }
-
-  _LIBCPP_AVAILABILITY_SYNC
-  _LIBCPP_HIDE_FROM_ABI bool
-  __update_monitor_val_and_poll(void const volatile* __contention_address, __cxx_contention_t& __monitor_val) const {
-    // In case the contention type is anything else, platform wait is monitoring a __cxx_atomic_contention_t
-    // from the global pool, the monitor comes from __libcpp_atomic_monitor
-    __monitor_val      = std::__libcpp_atomic_monitor(__contention_address);
-    auto __current_val = __waitable_traits::__atomic_load(__a_, __order_);
-    return __poll_(__current_val);
-  }
-
   _LIBCPP_AVAILABILITY_SYNC
   _LIBCPP_HIDE_FROM_ABI bool operator()(chrono::nanoseconds __elapsed) const {
     if (__elapsed > chrono::microseconds(64)) {
       auto __contention_address = __waitable_traits::__atomic_contention_address(__a_);
-      __cxx_contention_t __monitor_val;
-      if (__update_monitor_val_and_poll(__contention_address, __monitor_val))
+      __cxx_contention_t __monitor_val = __contention_address.__monitor();
+      auto __current_val = __waitable_traits::__atomic_load(__a_, __order_);
+      if (__poll_(__current_val))
         return true;
-      std::__libcpp_atomic_wait(__contention_address, __monitor_val);
+      __contention_address.__wait(__monitor_val);
     } else if (__elapsed > chrono::microseconds(4))
       __libcpp_thread_yield();
     else {
@@ -152,13 +138,13 @@ __atomic_wait_unless(const _AtomicWaitable& __a, _Poll&& __poll, memory_order __
 template <class _AtomicWaitable>
 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void __atomic_notify_one(const _AtomicWaitable& __a) {
   static_assert(__atomic_waitable<_AtomicWaitable>::value, "");
-  std::__cxx_atomic_notify_one(__atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__atomic_contention_address(__a));
+  __atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__atomic_contention_address(__a).__notify_one();
 }
 
 template <class _AtomicWaitable>
 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void __atomic_notify_all(const _AtomicWaitable& __a) {
   static_assert(__atomic_waitable<_AtomicWaitable>::value, "");
-  std::__cxx_atomic_notify_all(__atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__atomic_contention_address(__a));
+  __atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__atomic_contention_address(__a).__notify_all();
 }
 
 #else // _LIBCPP_HAS_NO_THREADS
diff --git a/libcxx/src/atomic.cpp b/libcxx/src/atomic.cpp
index 2b67685c8a0a10..69b91f44d54000 100644
--- a/libcxx/src/atomic.cpp
+++ b/libcxx/src/atomic.cpp
@@ -104,9 +104,11 @@ static void __libcpp_platform_wake_by_address(__cxx_atomic_contention_t const vo
 static constexpr size_t __libcpp_contention_table_size = (1 << 8); /* < there's no magic in this number */
 
 struct alignas(64) /*  aim to avoid false sharing */ __libcpp_contention_table_entry {
-  __cxx_atomic_contention_t __contention_state;
+  __cxx_atomic_contention_t __contention_state; // this is the waiter count
   __cxx_atomic_contention_t __platform_state;
   inline constexpr __libcpp_contention_table_entry() : __contention_state(0), __platform_state(0) {}
+
+  __cxx_atomic_contention_t& __waiter_count() { return __contention_state; }
 };
 
 static __libcpp_contention_table_entry __libcpp_contention_table[__libcpp_contention_table_size];
@@ -120,40 +122,83 @@ static __libcpp_contention_table_entry* __libcpp_contention_state(void const vol
 /* Given an atomic to track contention and an atomic to actually wait on, which may be
    the same atomic, we try to detect contention to avoid spuriously calling the platform. */
 
-static void __libcpp_contention_notify(__cxx_atomic_contention_t volatile* __contention_state,
+static void __libcpp_contention_notify(__cxx_atomic_contention_t volatile* __waiter_count,
                                        __cxx_atomic_contention_t const volatile* __platform_state,
                                        bool __notify_one) {
-  if (0 != __cxx_atomic_load(__contention_state, memory_order_seq_cst))
+  if (0 != __cxx_atomic_load(__waiter_count, memory_order_seq_cst))
     // We only call 'wake' if we consumed a contention bit here.
     __libcpp_platform_wake_by_address(__platform_state, __notify_one);
 }
-static __cxx_contention_t
-__libcpp_contention_monitor_for_wait(__cxx_atomic_contention_t volatile* /*__contention_state*/,
-                                     __cxx_atomic_contention_t const volatile* __platform_state) {
-  // We will monitor this value.
-  return __cxx_atomic_load(__platform_state, memory_order_acquire);
-}
-static void __libcpp_contention_wait(__cxx_atomic_contention_t volatile* __contention_state,
+static void __libcpp_contention_wait(__cxx_atomic_contention_t volatile* __waiter_count,
                                      __cxx_atomic_contention_t const volatile* __platform_state,
                                      __cxx_contention_t __old_value) {
-  __cxx_atomic_fetch_add(__contention_state, __cxx_contention_t(1), memory_order_seq_cst);
+  __cxx_atomic_fetch_add(__waiter_count, __cxx_contention_t(1), memory_order_seq_cst);
   // We sleep as long as the monitored value hasn't changed.
   __libcpp_platform_wait_on_address(__platform_state, __old_value);
-  __cxx_atomic_fetch_sub(__contention_state, __cxx_contention_t(1), memory_order_release);
+  __cxx_atomic_fetch_sub(__waiter_count, __cxx_contention_t(1), memory_order_release);
+}
+
+namespace {
+
+_LIBCPP_EXPORTED_FROM_ABI const auto __get_contention_table_entry = &__libcpp_contention_state;
+
+}
+
+_LIBCPP_EXPORTED_FROM_ABI __atomic_waitable_contention_self::__atomic_waitable_contention_self(
+    const volatile __cxx_atomic_contention_t* __contention_address)
+    : __waiter_count_(&__get_contention_table_entry(__contention_address)->__waiter_count()),
+      __platform_state_(__contention_address) {}
+
+_LIBCPP_EXPORTED_FROM_ABI __cxx_contention_t __atomic_waitable_contention_self::__monitor() {
+  return __cxx_atomic_load(__platform_state_, memory_order_acquire);
+}
+
+_LIBCPP_EXPORTED_FROM_ABI void __atomic_waitable_contention_self::__wait(__cxx_contention_t __old_value) {
+  __libcpp_contention_wait(__waiter_count_, __platform_state_, __old_value);
+}
+
+_LIBCPP_EXPORTED_FROM_ABI void __atomic_waitable_contention_self::__notify_one() {
+  __libcpp_contention_notify(__waiter_count_, __platform_state_, true);
+}
+
+_LIBCPP_EXPORTED_FROM_ABI void __atomic_waitable_contention_self::__notify_all() {
+  __libcpp_contention_notify(__waiter_count_, __platform_state_, false);
+}
+
+_LIBCPP_EXPORTED_FROM_ABI __atomic_waitable_contention_global::__atomic_waitable_contention_global(
+    const volatile void* __atomic_waitable_address) {
+  auto const __entry = __get_contention_table_entry(__atomic_waitable_address);
+  __waiter_count_    = &__entry->__waiter_count();
+  __platform_state_  = &__entry->__platform_state;
+}
+
+_LIBCPP_EXPORTED_FROM_ABI void __atomic_waitable_contention_global::__notify_one() { __notify_all(); }
+_LIBCPP_EXPORTED_FROM_ABI void __atomic_waitable_contention_global::__notify_all() {
+  // The value sequence laundering happens on the next line below.
+  __cxx_atomic_fetch_add(__platform_state_, __cxx_contention_t(1), memory_order_release);
+  __libcpp_contention_notify(
+      __waiter_count_, __platform_state_, false /* when laundering, we can't handle notify_one */);
+}
+
+// All below functions unused now
+static __cxx_contention_t
+__libcpp_contention_monitor_for_wait(__cxx_atomic_contention_t volatile* /*__waiter_count*/,
+                                     __cxx_atomic_contention_t const volatile* __platform_state) {
+  // We will monitor this value.
+  return __cxx_atomic_load(__platform_state, memory_order_acquire);
 }
 
 /* When the incoming atomic is the wrong size for the platform wait size, need to
    launder the value sequence through an atomic from our table. */
 
 static void __libcpp_atomic_notify(void const volatile* __location) {
-  auto const __entry = __libcpp_contention_state(__location);
+  auto const __entry = __get_contention_table_entry(__location);
   // The value sequence laundering happens on the next line below.
   __cxx_atomic_fetch_add(&__entry->__platform_state, __cxx_contention_t(1), memory_order_release);
   __libcpp_contention_notify(
-      &__entry->__contention_state,
-      &__entry->__platform_state,
-      false /* when laundering, we can't handle notify_one */);
+      &__entry->__waiter_count(), &__entry->__platform_state, false /* when laundering, we can't handle notify_one */);
 }
+
 _LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_one(void const volatile* __location) {
   __libcpp_atomic_notify(__location);
 }
@@ -162,30 +207,30 @@ _LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_all(void const volatile* __lo
 }
 _LIBCPP_EXPORTED_FROM_ABI __cxx_contention_t __libcpp_atomic_monitor(void const volatile* __location) {
   auto const __entry = __libcpp_contention_state(__location);
-  return __libcpp_contention_monitor_for_wait(&__entry->__contention_state, &__entry->__platform_state);
+  return __libcpp_contention_monitor_for_wait(&__entry->__waiter_count(), &__entry->__platform_state);
 }
 _LIBCPP_EXPORTED_FROM_ABI void __libcpp_atomic_wait(void const volatile* __location, __cxx_contention_t __old_value) {
   auto const __entry = __libcpp_contention_state(__location);
-  __libcpp_contention_wait(&__entry->__contention_state, &__entry->__platform_state, __old_value);
+  __libcpp_contention_wait(&__entry->__waiter_count(), &__entry->__platform_state, __old_value);
 }
 
 /* When the incoming atomic happens to be the platform wait size, we still need to use the
    table for the contention detection, but we can use the atomic directly for the wait. */
 
 _LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_one(__cxx_atomic_contention_t const volatile* __location) {
-  __libcpp_contention_notify(&__libcpp_contention_state(__location)->__contention_state, __location, true);
+  __libcpp_contention_notify(&__libcpp_contention_state(__location)->__waiter_count(), __location, true);
 }
 _LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_all(__cxx_atomic_contention_t const volatile* __location) {
-  __libcpp_contention_notify(&__libcpp_contention_state(__location)->__contention_state, __location, false);
+  __libcpp_contention_notify(&__libcpp_contention_state(__location)->__waiter_count(), __location, false);
 }
 // This function is never used, but still exported for ABI compatibility.
 _LIBCPP_EXPORTED_FROM_ABI __cxx_contention_t
 __libcpp_atomic_monitor(__cxx_atomic_contention_t const volatile* __location) {
-  return __libcpp_contention_monitor_for_wait(&__libcpp_contention_state(__location)->__contention_state, __location);
+  return __libcpp_contention_monitor_for_wait(&__libcpp_contention_state(__location)->__waiter_count(), __location);
 }
 _LIBCPP_EXPORTED_FROM_ABI void
 __libcpp_atomic_wait(__cxx_atomic_contention_t const volatile* __location, __cxx_contention_t __old_value) {
-  __libcpp_contention_wait(&__libcpp_contention_state(__location)->__contention_state, __location, __old_value);
+  __libcpp_contention_wait(&__libcpp_contention_state(__location)->__waiter_count(), __location, __old_value);
 }
 
 _LIBCPP_END_NAMESPACE_STD