[libcxx-commits] [libcxx] [libc++] Allows any types of size 4 and 8 to use native platform ulock_wait (PR #161086)

Sun Oct 19 02:29:24 PDT 2025

https://github.com/huixie90 updated https://github.com/llvm/llvm-project/pull/161086

>From 2cacfb29453911f261b52a3a56c2cb30b9f9d13b Mon Sep 17 00:00:00 2001
From: Hui Xie <hui.xie1990 at gmail.com>
Date: Sun, 28 Sep 2025 16:04:35 +0100
Subject: [PATCH 1/7] [libc++] Allows any types of size 4 and 8 to use native
 platform ulock_wait

---
 libcxx/include/__atomic/atomic.h       |   2 +
 libcxx/include/__atomic/atomic_flag.h  |   2 +
 libcxx/include/__atomic/atomic_ref.h   |   2 +
 libcxx/include/__atomic/atomic_sync.h  |  60 +++++++++--
 libcxx/include/__atomic/contention_t.h |  17 +++
 libcxx/src/atomic.cpp                  | 144 ++++++++++++++++---------
 6 files changed, 172 insertions(+), 55 deletions(-)

diff --git a/libcxx/include/__atomic/atomic.h b/libcxx/include/__atomic/atomic.h
index 44835820bb06c..3eb5dc6a7a6ce 100644
--- a/libcxx/include/__atomic/atomic.h
+++ b/libcxx/include/__atomic/atomic.h
@@ -206,6 +206,8 @@ struct __atomic_base<_Tp, true> : public __atomic_base<_Tp, false> {
 // __atomic_base<int, false>. So specializing __atomic_base<_Tp> does not work
 template <class _Tp, bool _IsIntegral>
 struct __atomic_waitable_traits<__atomic_base<_Tp, _IsIntegral> > {
+  using __inner_type _LIBCPP_NODEBUG = _Tp;
+
   static _LIBCPP_HIDE_FROM_ABI _Tp __atomic_load(const __atomic_base<_Tp, _IsIntegral>& __a, memory_order __order) {
     return __a.load(__order);
   }
diff --git a/libcxx/include/__atomic/atomic_flag.h b/libcxx/include/__atomic/atomic_flag.h
index 28ed2d53b77da..39b3ad442e7bb 100644
--- a/libcxx/include/__atomic/atomic_flag.h
+++ b/libcxx/include/__atomic/atomic_flag.h
@@ -76,6 +76,8 @@ struct atomic_flag {
 
 template <>
 struct __atomic_waitable_traits<atomic_flag> {
+  using __inner_type _LIBCPP_NODEBUG = _LIBCPP_ATOMIC_FLAG_TYPE;
+
   static _LIBCPP_HIDE_FROM_ABI _LIBCPP_ATOMIC_FLAG_TYPE __atomic_load(const atomic_flag& __a, memory_order __order) {
     return std::__cxx_atomic_load(&__a.__a_, __order);
   }
diff --git a/libcxx/include/__atomic/atomic_ref.h b/libcxx/include/__atomic/atomic_ref.h
index 9bdc6b1160d2c..4da7c208a9268 100644
--- a/libcxx/include/__atomic/atomic_ref.h
+++ b/libcxx/include/__atomic/atomic_ref.h
@@ -230,6 +230,8 @@ struct __atomic_ref_base {
 
 template <class _Tp>
 struct __atomic_waitable_traits<__atomic_ref_base<_Tp>> {
+  using __inner_type _LIBCPP_NODEBUG = _Tp;
+
   static _LIBCPP_HIDE_FROM_ABI _Tp __atomic_load(const __atomic_ref_base<_Tp>& __a, memory_order __order) {
     return __a.load(__order);
   }
diff --git a/libcxx/include/__atomic/atomic_sync.h b/libcxx/include/__atomic/atomic_sync.h
index 93953dfbc9b8a..8365c0994a06f 100644
--- a/libcxx/include/__atomic/atomic_sync.h
+++ b/libcxx/include/__atomic/atomic_sync.h
@@ -38,6 +38,8 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 // The below implementations look ugly to support C++03
 template <class _Tp, class = void>
 struct __atomic_waitable_traits {
+  using __inner_type _LIBCPP_NODEBUG = void;
+
   template <class _AtomicWaitable>
   static void __atomic_load(_AtomicWaitable&&, memory_order) = delete;
 
@@ -58,6 +60,7 @@ struct __atomic_waitable< _Tp,
 #if _LIBCPP_STD_VER >= 20
 #  if _LIBCPP_HAS_THREADS
 
+<<<<<<< HEAD
 _LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_one(void const volatile*) _NOEXCEPT;
 _LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_all(void const volatile*) _NOEXCEPT;
 _LIBCPP_EXPORTED_FROM_ABI __cxx_contention_t __libcpp_atomic_monitor(void const volatile*) _NOEXCEPT;
@@ -69,6 +72,28 @@ _LIBCPP_EXPORTED_FROM_ABI __cxx_contention_t
 __libcpp_atomic_monitor(__cxx_atomic_contention_t const volatile*) _NOEXCEPT;
 _LIBCPP_EXPORTED_FROM_ABI void
 __libcpp_atomic_wait(__cxx_atomic_contention_t const volatile*, __cxx_contention_t) _NOEXCEPT;
+=======
+template <std::size_t _Size>
+_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void
+__libcpp_atomic_wait_native(void const volatile* __address, void const volatile* __old_value) _NOEXCEPT;
+
+_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI __cxx_contention_t
+__libcpp_atomic_monitor_global(void const volatile* __address) _NOEXCEPT;
+
+_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void
+__libcpp_atomic_wait_global_table(void const volatile* __address, __cxx_contention_t __monitor_value) _NOEXCEPT;
+
+_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_one_global_table(void const volatile*) _NOEXCEPT;
+_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_all_global_table(void const volatile*) _NOEXCEPT;
+
+template <std::size_t _Size>
+_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void
+__cxx_atomic_notify_one_native(const volatile void*) _NOEXCEPT;
+
+template <std::size_t _Size>
+_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void
+__cxx_atomic_notify_all_native(const volatile void*) _NOEXCEPT;
+>>>>>>> 59d6fc2ba487 ([libc++] Allows any types of size 4 and 8 to use native platform ulock_wait)
 
 template <class _AtomicWaitable, class _Poll>
 struct __atomic_wait_backoff_impl {
@@ -77,6 +102,7 @@ struct __atomic_wait_backoff_impl {
   memory_order __order_;
 
   using __waitable_traits _LIBCPP_NODEBUG = __atomic_waitable_traits<__decay_t<_AtomicWaitable> >;
+<<<<<<< HEAD
 
   _LIBCPP_HIDE_FROM_ABI bool
   __update_monitor_val_and_poll(__cxx_atomic_contention_t const volatile*, __cxx_contention_t& __monitor_val) const {
@@ -98,14 +124,26 @@ struct __atomic_wait_backoff_impl {
     auto __current_val = __waitable_traits::__atomic_load(__a_, __order_);
     return __poll_(__current_val);
   }
+=======
+  using __inner_type _LIBCPP_NODEBUG      = typename __waitable_traits::__inner_type;
+>>>>>>> 59d6fc2ba487 ([libc++] Allows any types of size 4 and 8 to use native platform ulock_wait)
 
   _LIBCPP_HIDE_FROM_ABI bool operator()(chrono::nanoseconds __elapsed) const {
     if (__elapsed > chrono::microseconds(4)) {
       auto __contention_address = __waitable_traits::__atomic_contention_address(__a_);
-      __cxx_contention_t __monitor_val;
-      if (__update_monitor_val_and_poll(__contention_address, __monitor_val))
-        return true;
-      std::__libcpp_atomic_wait(__contention_address, __monitor_val);
+
+      if constexpr (__is_atomic_wait_native_type<__inner_type>::value) {
+        auto __atomic_value = __waitable_traits::__atomic_load(__a_, __order_);
+        if (__poll_(__atomic_value))
+          return true;
+        std::__libcpp_atomic_wait_native<sizeof(__inner_type)>(__contention_address, &__atomic_value);
+      } else {
+        __cxx_contention_t __monitor_val = std::__libcpp_atomic_monitor_global(__contention_address);
+        auto __atomic_value              = __waitable_traits::__atomic_load(__a_, __order_);
+        if (__poll_(__atomic_value))
+          return true;
+        std::__libcpp_atomic_wait_global_table(__contention_address, __monitor_val);
+      }
     } else {
     } // poll
     return false;
@@ -136,13 +174,23 @@ _LIBCPP_HIDE_FROM_ABI void __atomic_wait_unless(const _AtomicWaitable& __a, memo
 template <class _AtomicWaitable>
 _LIBCPP_HIDE_FROM_ABI void __atomic_notify_one(const _AtomicWaitable& __a) {
   static_assert(__atomic_waitable<_AtomicWaitable>::value, "");
-  std::__cxx_atomic_notify_one(__atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__atomic_contention_address(__a));
+  using __inner_type _LIBCPP_NODEBUG = typename __atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__inner_type;
+  if constexpr (__is_atomic_wait_native_type<__inner_type>::value) {
+    std::__cxx_atomic_notify_one_native<sizeof(__inner_type)>(__atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__atomic_contention_address(__a));
+  } else {
+    std::__cxx_atomic_notify_one_global_table(__atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__atomic_contention_address(__a));
+  }
 }
 
 template <class _AtomicWaitable>
 _LIBCPP_HIDE_FROM_ABI void __atomic_notify_all(const _AtomicWaitable& __a) {
   static_assert(__atomic_waitable<_AtomicWaitable>::value, "");
-  std::__cxx_atomic_notify_all(__atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__atomic_contention_address(__a));
+  using __inner_type _LIBCPP_NODEBUG = typename __atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__inner_type;
+  if constexpr (__is_atomic_wait_native_type<__inner_type>::value) {
+    std::__cxx_atomic_notify_all_native<sizeof(__inner_type)>(__atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__atomic_contention_address(__a));
+  } else {
+    std::__cxx_atomic_notify_all_global_table(__atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__atomic_contention_address(__a));
+  }
 }
 
 #  else // _LIBCPP_HAS_THREADS
diff --git a/libcxx/include/__atomic/contention_t.h b/libcxx/include/__atomic/contention_t.h
index 5b42a0125f875..bf14d076d6281 100644
--- a/libcxx/include/__atomic/contention_t.h
+++ b/libcxx/include/__atomic/contention_t.h
@@ -11,6 +11,10 @@
 
 #include <__atomic/support.h>
 #include <__config>
+#include <__type_traits/enable_if.h>
+#include <__type_traits/integral_constant.h>
+#include <__type_traits/is_integral.h>
+#include <cstddef>
 #include <cstdint>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -19,10 +23,23 @@
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
+template <class _Tp, class = void>
+struct __is_atomic_wait_native_type : false_type {};
+
 #if defined(__linux__) || (defined(_AIX) && !defined(__64BIT__))
 using __cxx_contention_t _LIBCPP_NODEBUG = int32_t;
+
+template <class _Tp>
+struct __is_atomic_wait_native_type<_Tp, __enable_if_t<is_integral<_Tp>::value && sizeof(_Tp) == 4> > : true_type {};
+
 #else
 using __cxx_contention_t _LIBCPP_NODEBUG = int64_t;
+
+template <class _Tp>
+struct __is_atomic_wait_native_type<_Tp,
+                                    __enable_if_t<is_integral<_Tp>::value && (sizeof(_Tp) == 4 || sizeof(_Tp) == 8)> >
+    : true_type {};
+
 #endif // __linux__ || (_AIX && !__64BIT__)
 
 using __cxx_atomic_contention_t _LIBCPP_NODEBUG = __cxx_atomic_impl<__cxx_contention_t>;
diff --git a/libcxx/src/atomic.cpp b/libcxx/src/atomic.cpp
index b214ba1fd11c0..c5f03acdf1da1 100644
--- a/libcxx/src/atomic.cpp
+++ b/libcxx/src/atomic.cpp
@@ -9,6 +9,7 @@
 #include <__thread/timed_backoff_policy.h>
 #include <atomic>
 #include <climits>
+#include <cstddef>
 #include <functional>
 #include <thread>
 
@@ -53,6 +54,8 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 
 #ifdef __linux__
 
+
+// TODO : update
 static void
 __libcpp_platform_wait_on_address(__cxx_atomic_contention_t const volatile* __ptr, __cxx_contention_t __val) {
   static constexpr timespec __timeout = {2, 0};
@@ -70,22 +73,32 @@ extern "C" int __ulock_wait(
 extern "C" int __ulock_wake(uint32_t operation, void* addr, uint64_t wake_value);
 
 // https://github.com/apple/darwin-xnu/blob/2ff845c2e033bd0ff64b5b6aa6063a1f8f65aa32/bsd/sys/ulock.h#L82
+#  define UL_COMPARE_AND_WAIT 1
 #  define UL_COMPARE_AND_WAIT64 5
 #  define ULF_WAKE_ALL 0x00000100
 
-static void
-__libcpp_platform_wait_on_address(__cxx_atomic_contention_t const volatile* __ptr, __cxx_contention_t __val) {
-  static_assert(sizeof(__cxx_atomic_contention_t) == 8, "Waiting on 8 bytes value");
-  __ulock_wait(UL_COMPARE_AND_WAIT64, const_cast<__cxx_atomic_contention_t*>(__ptr), __val, 0);
+template <std::size_t _Size>
+static void __libcpp_platform_wait_on_address(void const volatile* __ptr, void const volatile* __val) {
+  static_assert(_Size == 8 || _Size == 4, "Can only wait on 8 bytes or 4 bytes value");
+  if constexpr (_Size == 4)
+    __ulock_wait(UL_COMPARE_AND_WAIT, const_cast<void*>(__ptr), *reinterpret_cast<uint32_t const volatile*>(__val), 0);
+  else
+    __ulock_wait(
+        UL_COMPARE_AND_WAIT64, const_cast<void*>(__ptr), *reinterpret_cast<uint64_t const volatile*>(__val), 0);
 }
 
-static void __libcpp_platform_wake_by_address(__cxx_atomic_contention_t const volatile* __ptr, bool __notify_one) {
-  static_assert(sizeof(__cxx_atomic_contention_t) == 8, "Waking up on 8 bytes value");
-  __ulock_wake(
-      UL_COMPARE_AND_WAIT64 | (__notify_one ? 0 : ULF_WAKE_ALL), const_cast<__cxx_atomic_contention_t*>(__ptr), 0);
+template <std::size_t _Size>
+static void __libcpp_platform_wake_by_address(void const volatile* __ptr, bool __notify_one) {
+  static_assert(_Size == 8 || _Size == 4, "Can only wake up on 8 bytes or 4 bytes value");
+
+  if constexpr (_Size == 4)
+    __ulock_wake(UL_COMPARE_AND_WAIT | (__notify_one ? 0 : ULF_WAKE_ALL), const_cast<void*>(__ptr), 0);
+  else
+    __ulock_wake(UL_COMPARE_AND_WAIT64 | (__notify_one ? 0 : ULF_WAKE_ALL), const_cast<void*>(__ptr), 0);
 }
 
 #elif defined(__FreeBSD__) && __SIZEOF_LONG__ == 8
+// TODO : update
 /*
  * Since __cxx_contention_t is int64_t even on 32bit FreeBSD
  * platforms, we have to use umtx ops that work on the long type, and
@@ -104,6 +117,7 @@ static void __libcpp_platform_wake_by_address(__cxx_atomic_contention_t const vo
 #else // <- Add other operating systems here
 
 // Baseline is just a timed backoff
+// TODO : update
 
 static void
 __libcpp_platform_wait_on_address(__cxx_atomic_contention_t const volatile* __ptr, __cxx_contention_t __val) {
@@ -128,83 +142,115 @@ static __libcpp_contention_table_entry __libcpp_contention_table[__libcpp_conten
 
 static hash<void const volatile*> __libcpp_contention_hasher;
 
-static __libcpp_contention_table_entry* __libcpp_contention_state(void const volatile* p) {
+static __libcpp_contention_table_entry* __get_global_contention_state(void const volatile* p) {
   return &__libcpp_contention_table[__libcpp_contention_hasher(p) & (__libcpp_contention_table_size - 1)];
 }
 
 /* Given an atomic to track contention and an atomic to actually wait on, which may be
    the same atomic, we try to detect contention to avoid spuriously calling the platform. */
 
-static void __libcpp_contention_notify(__cxx_atomic_contention_t volatile* __contention_state,
-                                       __cxx_atomic_contention_t const volatile* __platform_state,
+template <std::size_t _Size>
+static void __libcpp_contention_notify(__cxx_atomic_contention_t volatile* __global_contention_state,
+                                       void const volatile* __address_to_notify,
                                        bool __notify_one) {
-  if (0 != __cxx_atomic_load(__contention_state, memory_order_seq_cst))
+  if (0 != __cxx_atomic_load(__global_contention_state, memory_order_seq_cst))
     // We only call 'wake' if we consumed a contention bit here.
-    __libcpp_platform_wake_by_address(__platform_state, __notify_one);
-}
-static __cxx_contention_t
-__libcpp_contention_monitor_for_wait(__cxx_atomic_contention_t volatile* /*__contention_state*/,
-                                     __cxx_atomic_contention_t const volatile* __platform_state) {
-  // We will monitor this value.
-  return __cxx_atomic_load(__platform_state, memory_order_acquire);
+    __libcpp_platform_wake_by_address<_Size>(__address_to_notify, __notify_one);
 }
+
+template <std::size_t _Size>
 static void __libcpp_contention_wait(__cxx_atomic_contention_t volatile* __contention_state,
-                                     __cxx_atomic_contention_t const volatile* __platform_state,
-                                     __cxx_contention_t __old_value) {
+                                     void const volatile* __address_to_wait,
+                                     void const volatile* __old_value) {
   __cxx_atomic_fetch_add(__contention_state, __cxx_contention_t(1), memory_order_relaxed);
   // https://llvm.org/PR109290
   // There are no platform guarantees of a memory barrier in the platform wait implementation
   __cxx_atomic_thread_fence(memory_order_seq_cst);
   // We sleep as long as the monitored value hasn't changed.
-  __libcpp_platform_wait_on_address(__platform_state, __old_value);
+  __libcpp_platform_wait_on_address<_Size>(__address_to_wait, __old_value);
   __cxx_atomic_fetch_sub(__contention_state, __cxx_contention_t(1), memory_order_release);
 }
 
 /* When the incoming atomic is the wrong size for the platform wait size, need to
    launder the value sequence through an atomic from our table. */
 
-static void __libcpp_atomic_notify(void const volatile* __location) {
-  auto const __entry = __libcpp_contention_state(__location);
+static void __atomic_notify_global_table(void const volatile* __location) {
+  auto const __entry = __get_global_contention_state(__location);
   // The value sequence laundering happens on the next line below.
   __cxx_atomic_fetch_add(&__entry->__platform_state, __cxx_contention_t(1), memory_order_seq_cst);
-  __libcpp_contention_notify(
+  __libcpp_contention_notify<sizeof(__cxx_atomic_contention_t)>(
       &__entry->__contention_state,
       &__entry->__platform_state,
       false /* when laundering, we can't handle notify_one */);
 }
-_LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_one(void const volatile* __location) noexcept {
-  __libcpp_atomic_notify(__location);
+
+_LIBCPP_EXPORTED_FROM_ABI __cxx_contention_t __libcpp_atomic_monitor_global(void const volatile* __location) noexcept {
+  auto const __entry = __get_global_contention_state(__location);
+  return __cxx_atomic_load(&__entry->__platform_state, memory_order_acquire);
 }
-_LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_all(void const volatile* __location) noexcept {
-  __libcpp_atomic_notify(__location);
+
+_LIBCPP_EXPORTED_FROM_ABI void
+__libcpp_atomic_wait_global_table(void const volatile* __location, __cxx_contention_t __old_value) noexcept {
+  auto const __entry = __get_global_contention_state(__location);
+  __libcpp_contention_wait<sizeof(__cxx_atomic_contention_t)>(
+      &__entry->__contention_state, &__entry->__platform_state, &__old_value);
 }
-_LIBCPP_EXPORTED_FROM_ABI __cxx_contention_t __libcpp_atomic_monitor(void const volatile* __location) noexcept {
-  auto const __entry = __libcpp_contention_state(__location);
-  return __libcpp_contention_monitor_for_wait(&__entry->__contention_state, &__entry->__platform_state);
+
+template <std::size_t _Size>
+_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void
+__libcpp_atomic_wait_native(void const volatile* __address, void const volatile* __old_value) noexcept {
+  __libcpp_contention_wait<_Size>(
+      &__get_global_contention_state(__address)->__contention_state, __address, __old_value);
 }
-_LIBCPP_EXPORTED_FROM_ABI void
-__libcpp_atomic_wait(void const volatile* __location, __cxx_contention_t __old_value) noexcept {
-  auto const __entry = __libcpp_contention_state(__location);
-  __libcpp_contention_wait(&__entry->__contention_state, &__entry->__platform_state, __old_value);
+
+_LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_one_global_table(void const volatile* __location) noexcept {
+  __atomic_notify_global_table(__location);
+}
+_LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_all_global_table(void const volatile* __location) noexcept {
+  __atomic_notify_global_table(__location);
 }
 
 /* When the incoming atomic happens to be the platform wait size, we still need to use the
    table for the contention detection, but we can use the atomic directly for the wait. */
 
-_LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_one(__cxx_atomic_contention_t const volatile* __location) noexcept {
-  __libcpp_contention_notify(&__libcpp_contention_state(__location)->__contention_state, __location, true);
-}
-_LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_all(__cxx_atomic_contention_t const volatile* __location) noexcept {
-  __libcpp_contention_notify(&__libcpp_contention_state(__location)->__contention_state, __location, false);
+template <std::size_t _Size>
+_LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_one_native(void const volatile* __location) noexcept {
+  __libcpp_contention_notify<_Size>(&__get_global_contention_state(__location)->__contention_state, __location, true);
 }
-// This function is never used, but still exported for ABI compatibility.
-_LIBCPP_EXPORTED_FROM_ABI __cxx_contention_t
-__libcpp_atomic_monitor(__cxx_atomic_contention_t const volatile* __location) noexcept {
-  return __libcpp_contention_monitor_for_wait(&__libcpp_contention_state(__location)->__contention_state, __location);
-}
-_LIBCPP_EXPORTED_FROM_ABI void
-__libcpp_atomic_wait(__cxx_atomic_contention_t const volatile* __location, __cxx_contention_t __old_value) noexcept {
-  __libcpp_contention_wait(&__libcpp_contention_state(__location)->__contention_state, __location, __old_value);
+
+template <std::size_t _Size>
+_LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_all_native(void const volatile* __location) noexcept {
+  __libcpp_contention_notify<_Size>(&__get_global_contention_state(__location)->__contention_state, __location, false);
 }
 
+#ifdef __linux__
+
+// TODO
+
+#elif defined(__APPLE__) && defined(_LIBCPP_USE_ULOCK)
+
+template _LIBCPP_EXPORTED_FROM_ABI void
+__libcpp_atomic_wait_native<4>(void const volatile* __address, void const volatile* __old_value) noexcept;
+
+template _LIBCPP_EXPORTED_FROM_ABI void
+__libcpp_atomic_wait_native<8>(void const volatile* __address, void const volatile* __old_value) noexcept;
+
+template _LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_one_native<4>(void const volatile* __location) noexcept;
+
+template _LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_one_native<8>(void const volatile* __location) noexcept;
+
+template _LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_all_native<4>(void const volatile* __location) noexcept;
+
+template _LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_all_native<8>(void const volatile* __location) noexcept;
+
+#elif defined(__FreeBSD__) && __SIZEOF_LONG__ == 8
+
+// TODO
+
+#else // <- Add other operating systems here
+
+// TODO
+
+#endif // __linux__
+
 _LIBCPP_END_NAMESPACE_STD

>From 3d9fa7728416651a100507e9aa0ede9456c6b06c Mon Sep 17 00:00:00 2001
From: Hui Xie <hui.xie1990 at gmail.com>
Date: Sun, 28 Sep 2025 20:29:01 +0100
Subject: [PATCH 2/7] standard layout types

---
 libcxx/include/__atomic/contention_t.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libcxx/include/__atomic/contention_t.h b/libcxx/include/__atomic/contention_t.h
index bf14d076d6281..a356dc83a1d8b 100644
--- a/libcxx/include/__atomic/contention_t.h
+++ b/libcxx/include/__atomic/contention_t.h
@@ -13,7 +13,7 @@
 #include <__config>
 #include <__type_traits/enable_if.h>
 #include <__type_traits/integral_constant.h>
-#include <__type_traits/is_integral.h>
+#include <__type_traits/is_standard_layout.h>
 #include <cstddef>
 #include <cstdint>
 
@@ -30,14 +30,14 @@ struct __is_atomic_wait_native_type : false_type {};
 using __cxx_contention_t _LIBCPP_NODEBUG = int32_t;
 
 template <class _Tp>
-struct __is_atomic_wait_native_type<_Tp, __enable_if_t<is_integral<_Tp>::value && sizeof(_Tp) == 4> > : true_type {};
+struct __is_atomic_wait_native_type<_Tp, __enable_if_t<is_standard_layout<_Tp>::value && sizeof(_Tp) == 4> > : true_type {};
 
 #else
 using __cxx_contention_t _LIBCPP_NODEBUG = int64_t;
 
 template <class _Tp>
 struct __is_atomic_wait_native_type<_Tp,
-                                    __enable_if_t<is_integral<_Tp>::value && (sizeof(_Tp) == 4 || sizeof(_Tp) == 8)> >
+                                    __enable_if_t<is_standard_layout<_Tp>::value && (sizeof(_Tp) == 4 || sizeof(_Tp) == 8)> >
     : true_type {};
 
 #endif // __linux__ || (_AIX && !__64BIT__)

>From 40b2e5c7442341f9f5f617be775d052294834203 Mon Sep 17 00:00:00 2001
From: Hui Xie <hui.xie1990 at gmail.com>
Date: Sat, 11 Oct 2025 22:08:29 +0100
Subject: [PATCH 3/7] add abi macro and availablity macro

---
 libcxx/docs/ABIGuarantees.rst                 |   8 +
 libcxx/include/__atomic/atomic.h              |   2 +-
 libcxx/include/__atomic/atomic_flag.h         |   2 +-
 libcxx/include/__atomic/atomic_ref.h          |   2 +-
 libcxx/include/__atomic/atomic_sync.h         |  82 ++++----
 libcxx/include/__atomic/contention_t.h        |  24 ++-
 libcxx/include/__configuration/abi.h          |   1 +
 libcxx/include/__configuration/availability.h |  15 ++
 libcxx/src/atomic.cpp                         | 176 ++++++++++++------
 9 files changed, 202 insertions(+), 110 deletions(-)

diff --git a/libcxx/docs/ABIGuarantees.rst b/libcxx/docs/ABIGuarantees.rst
index 4d4674c7756a4..2d8ef5cc5f037 100644
--- a/libcxx/docs/ABIGuarantees.rst
+++ b/libcxx/docs/ABIGuarantees.rst
@@ -205,6 +205,14 @@ This flag fixes the implementation of CityHash used for ``hash<fundamental-type>
 CityHash has the problem that it drops some bits on the floor. Fixing the implementation changes the hash of values,
 resulting in an ABI break.
 
+``_LIBCPP_ABI_ATOMIC_WAIT_NATIVE_BY_SIZE``
+-------------------------------------------------
+This flag changes the implementation of ``atomic::wait()`` and ``atomic::notify_one()/notify_all()`` to use the
+native atomic wait/notify operations on platforms that support them based on the size of the atomic type, instead
+of the type itself. This changes the behaviour of ``atomic::wait()`` and ``atomic::notify_one()/notify_all()`` in
+some cases, resulting in an ABI break.
+
+
 inline namespaces
 =================
 Inline namespaces which contain types that are observable by the user need to be kept the same, since they affect
diff --git a/libcxx/include/__atomic/atomic.h b/libcxx/include/__atomic/atomic.h
index 3eb5dc6a7a6ce..554c111d695f2 100644
--- a/libcxx/include/__atomic/atomic.h
+++ b/libcxx/include/__atomic/atomic.h
@@ -206,7 +206,7 @@ struct __atomic_base<_Tp, true> : public __atomic_base<_Tp, false> {
 // __atomic_base<int, false>. So specializing __atomic_base<_Tp> does not work
 template <class _Tp, bool _IsIntegral>
 struct __atomic_waitable_traits<__atomic_base<_Tp, _IsIntegral> > {
-  using __inner_type _LIBCPP_NODEBUG = _Tp;
+  using __value_type _LIBCPP_NODEBUG = _Tp;
 
   static _LIBCPP_HIDE_FROM_ABI _Tp __atomic_load(const __atomic_base<_Tp, _IsIntegral>& __a, memory_order __order) {
     return __a.load(__order);
diff --git a/libcxx/include/__atomic/atomic_flag.h b/libcxx/include/__atomic/atomic_flag.h
index 39b3ad442e7bb..321a6283ba7ad 100644
--- a/libcxx/include/__atomic/atomic_flag.h
+++ b/libcxx/include/__atomic/atomic_flag.h
@@ -76,7 +76,7 @@ struct atomic_flag {
 
 template <>
 struct __atomic_waitable_traits<atomic_flag> {
-  using __inner_type _LIBCPP_NODEBUG = _LIBCPP_ATOMIC_FLAG_TYPE;
+  using __value_type _LIBCPP_NODEBUG = _LIBCPP_ATOMIC_FLAG_TYPE;
 
   static _LIBCPP_HIDE_FROM_ABI _LIBCPP_ATOMIC_FLAG_TYPE __atomic_load(const atomic_flag& __a, memory_order __order) {
     return std::__cxx_atomic_load(&__a.__a_, __order);
diff --git a/libcxx/include/__atomic/atomic_ref.h b/libcxx/include/__atomic/atomic_ref.h
index 4da7c208a9268..70ae116a86aaf 100644
--- a/libcxx/include/__atomic/atomic_ref.h
+++ b/libcxx/include/__atomic/atomic_ref.h
@@ -230,7 +230,7 @@ struct __atomic_ref_base {
 
 template <class _Tp>
 struct __atomic_waitable_traits<__atomic_ref_base<_Tp>> {
-  using __inner_type _LIBCPP_NODEBUG = _Tp;
+  using __value_type _LIBCPP_NODEBUG = _Tp;
 
   static _LIBCPP_HIDE_FROM_ABI _Tp __atomic_load(const __atomic_ref_base<_Tp>& __a, memory_order __order) {
     return __a.load(__order);
diff --git a/libcxx/include/__atomic/atomic_sync.h b/libcxx/include/__atomic/atomic_sync.h
index 8365c0994a06f..c8c5cf658550b 100644
--- a/libcxx/include/__atomic/atomic_sync.h
+++ b/libcxx/include/__atomic/atomic_sync.h
@@ -38,7 +38,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 // The below implementations look ugly to support C++03
 template <class _Tp, class = void>
 struct __atomic_waitable_traits {
-  using __inner_type _LIBCPP_NODEBUG = void;
+  using __value_type _LIBCPP_NODEBUG = void;
 
   template <class _AtomicWaitable>
   static void __atomic_load(_AtomicWaitable&&, memory_order) = delete;
@@ -60,7 +60,7 @@ struct __atomic_waitable< _Tp,
 #if _LIBCPP_STD_VER >= 20
 #  if _LIBCPP_HAS_THREADS
 
-<<<<<<< HEAD
+// old dylib interface
 _LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_one(void const volatile*) _NOEXCEPT;
 _LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_all(void const volatile*) _NOEXCEPT;
 _LIBCPP_EXPORTED_FROM_ABI __cxx_contention_t __libcpp_atomic_monitor(void const volatile*) _NOEXCEPT;
@@ -72,28 +72,30 @@ _LIBCPP_EXPORTED_FROM_ABI __cxx_contention_t
 __libcpp_atomic_monitor(__cxx_atomic_contention_t const volatile*) _NOEXCEPT;
 _LIBCPP_EXPORTED_FROM_ABI void
 __libcpp_atomic_wait(__cxx_atomic_contention_t const volatile*, __cxx_contention_t) _NOEXCEPT;
-=======
-template <std::size_t _Size>
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void
-__libcpp_atomic_wait_native(void const volatile* __address, void const volatile* __old_value) _NOEXCEPT;
 
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI __cxx_contention_t
+// new dylib interface
+_LIBCPP_AVAILABILITY_NEW_SYNC _LIBCPP_EXPORTED_FROM_ABI __cxx_contention_t
 __libcpp_atomic_monitor_global(void const volatile* __address) _NOEXCEPT;
 
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void
+_LIBCPP_AVAILABILITY_NEW_SYNC _LIBCPP_EXPORTED_FROM_ABI void
 __libcpp_atomic_wait_global_table(void const volatile* __address, __cxx_contention_t __monitor_value) _NOEXCEPT;
 
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_one_global_table(void const volatile*) _NOEXCEPT;
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_all_global_table(void const volatile*) _NOEXCEPT;
+_LIBCPP_AVAILABILITY_NEW_SYNC _LIBCPP_EXPORTED_FROM_ABI void
+__libcpp_atomic_notify_one_global_table(void const volatile*) _NOEXCEPT;
+_LIBCPP_AVAILABILITY_NEW_SYNC _LIBCPP_EXPORTED_FROM_ABI void
+__libcpp_atomic_notify_all_global_table(void const volatile*) _NOEXCEPT;
+
+template <std::size_t _Size>
+_LIBCPP_AVAILABILITY_NEW_SYNC _LIBCPP_EXPORTED_FROM_ABI void
 
+__libcpp_atomic_wait_native(void const volatile* __address, void const volatile* __old_value) _NOEXCEPT;
 template <std::size_t _Size>
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void
-__cxx_atomic_notify_one_native(const volatile void*) _NOEXCEPT;
+_LIBCPP_AVAILABILITY_NEW_SYNC _LIBCPP_EXPORTED_FROM_ABI void
+__libcpp_atomic_notify_one_native(const volatile void*) _NOEXCEPT;
 
 template <std::size_t _Size>
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void
-__cxx_atomic_notify_all_native(const volatile void*) _NOEXCEPT;
->>>>>>> 59d6fc2ba487 ([libc++] Allows any types of size 4 and 8 to use native platform ulock_wait)
+_LIBCPP_AVAILABILITY_NEW_SYNC _LIBCPP_EXPORTED_FROM_ABI void
+__libcpp_atomic_notify_all_native(const volatile void*) _NOEXCEPT;
 
 template <class _AtomicWaitable, class _Poll>
 struct __atomic_wait_backoff_impl {
@@ -102,41 +104,17 @@ struct __atomic_wait_backoff_impl {
   memory_order __order_;
 
   using __waitable_traits _LIBCPP_NODEBUG = __atomic_waitable_traits<__decay_t<_AtomicWaitable> >;
-<<<<<<< HEAD
-
-  _LIBCPP_HIDE_FROM_ABI bool
-  __update_monitor_val_and_poll(__cxx_atomic_contention_t const volatile*, __cxx_contention_t& __monitor_val) const {
-    // In case the contention type happens to be __cxx_atomic_contention_t, i.e. __cxx_atomic_impl<int64_t>,
-    // the platform wait is directly monitoring the atomic value itself.
-    // `__poll_` takes the current value of the atomic as an in-out argument
-    // to potentially modify it. After it returns, `__monitor` has a value
-    // which can be safely waited on by `std::__libcpp_atomic_wait` without any
-    // ABA style issues.
-    __monitor_val = __waitable_traits::__atomic_load(__a_, __order_);
-    return __poll_(__monitor_val);
-  }
-
-  _LIBCPP_HIDE_FROM_ABI bool
-  __update_monitor_val_and_poll(void const volatile* __contention_address, __cxx_contention_t& __monitor_val) const {
-    // In case the contention type is anything else, platform wait is monitoring a __cxx_atomic_contention_t
-    // from the global pool, the monitor comes from __libcpp_atomic_monitor
-    __monitor_val      = std::__libcpp_atomic_monitor(__contention_address);
-    auto __current_val = __waitable_traits::__atomic_load(__a_, __order_);
-    return __poll_(__current_val);
-  }
-=======
-  using __inner_type _LIBCPP_NODEBUG      = typename __waitable_traits::__inner_type;
->>>>>>> 59d6fc2ba487 ([libc++] Allows any types of size 4 and 8 to use native platform ulock_wait)
+  using __value_type _LIBCPP_NODEBUG = typename __waitable_traits::__value_type;
 
   _LIBCPP_HIDE_FROM_ABI bool operator()(chrono::nanoseconds __elapsed) const {
     if (__elapsed > chrono::microseconds(4)) {
       auto __contention_address = __waitable_traits::__atomic_contention_address(__a_);
 
-      if constexpr (__is_atomic_wait_native_type<__inner_type>::value) {
+      if constexpr (__is_atomic_wait_native_type<__value_type>::value) {
         auto __atomic_value = __waitable_traits::__atomic_load(__a_, __order_);
         if (__poll_(__atomic_value))
           return true;
-        std::__libcpp_atomic_wait_native<sizeof(__inner_type)>(__contention_address, &__atomic_value);
+        std::__libcpp_atomic_wait_native<sizeof(__value_type)>(__contention_address, &__atomic_value);
       } else {
         __cxx_contention_t __monitor_val = std::__libcpp_atomic_monitor_global(__contention_address);
         auto __atomic_value              = __waitable_traits::__atomic_load(__a_, __order_);
@@ -174,22 +152,26 @@ _LIBCPP_HIDE_FROM_ABI void __atomic_wait_unless(const _AtomicWaitable& __a, memo
 template <class _AtomicWaitable>
 _LIBCPP_HIDE_FROM_ABI void __atomic_notify_one(const _AtomicWaitable& __a) {
   static_assert(__atomic_waitable<_AtomicWaitable>::value, "");
-  using __inner_type _LIBCPP_NODEBUG = typename __atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__inner_type;
-  if constexpr (__is_atomic_wait_native_type<__inner_type>::value) {
-    std::__cxx_atomic_notify_one_native<sizeof(__inner_type)>(__atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__atomic_contention_address(__a));
+  using __value_type _LIBCPP_NODEBUG = typename __atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__value_type;
+  if constexpr (__is_atomic_wait_native_type<__value_type>::value) {
+    std::__libcpp_atomic_notify_one_native<sizeof(__value_type)>(
+        __atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__atomic_contention_address(__a));
   } else {
-    std::__cxx_atomic_notify_one_global_table(__atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__atomic_contention_address(__a));
+    std::__libcpp_atomic_notify_one_global_table(
+        __atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__atomic_contention_address(__a));
   }
 }
 
 template <class _AtomicWaitable>
 _LIBCPP_HIDE_FROM_ABI void __atomic_notify_all(const _AtomicWaitable& __a) {
   static_assert(__atomic_waitable<_AtomicWaitable>::value, "");
-  using __inner_type _LIBCPP_NODEBUG = typename __atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__inner_type;
-  if constexpr (__is_atomic_wait_native_type<__inner_type>::value) {
-    std::__cxx_atomic_notify_all_native<sizeof(__inner_type)>(__atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__atomic_contention_address(__a));
+  using __value_type _LIBCPP_NODEBUG = typename __atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__value_type;
+  if constexpr (__is_atomic_wait_native_type<__value_type>::value) {
+    std::__libcpp_atomic_notify_all_native<sizeof(__value_type)>(
+        __atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__atomic_contention_address(__a));
   } else {
-    std::__cxx_atomic_notify_all_global_table(__atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__atomic_contention_address(__a));
+    std::__libcpp_atomic_notify_all_global_table(
+        __atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__atomic_contention_address(__a));
   }
 }
 
diff --git a/libcxx/include/__atomic/contention_t.h b/libcxx/include/__atomic/contention_t.h
index a356dc83a1d8b..7a12d82850f7a 100644
--- a/libcxx/include/__atomic/contention_t.h
+++ b/libcxx/include/__atomic/contention_t.h
@@ -12,8 +12,9 @@
 #include <__atomic/support.h>
 #include <__config>
 #include <__type_traits/enable_if.h>
+#include <__type_traits/has_unique_object_representation.h>
 #include <__type_traits/integral_constant.h>
-#include <__type_traits/is_standard_layout.h>
+#include <__type_traits/is_same.h>
 #include <cstddef>
 #include <cstdint>
 
@@ -29,16 +30,31 @@ struct __is_atomic_wait_native_type : false_type {};
 #if defined(__linux__) || (defined(_AIX) && !defined(__64BIT__))
 using __cxx_contention_t _LIBCPP_NODEBUG = int32_t;
 
+#  if defined(_LIBCPP_ABI_ATOMIC_WAIT_NATIVE_BY_SIZE)
 template <class _Tp>
-struct __is_atomic_wait_native_type<_Tp, __enable_if_t<is_standard_layout<_Tp>::value && sizeof(_Tp) == 4> > : true_type {};
+struct __is_atomic_wait_native_type<_Tp,
+                                    __enable_if_t<has_unique_object_representations<_Tp>::value && sizeof(_Tp) == 4> >
+    : true_type {};
+#  else
+template <class _Tp>
+struct __is_atomic_wait_native_type<_Tp, __enable_if_t<is_same<_Tp, int32_t>::value && sizeof(_Tp) == 4> > : true_type {
+};
+#  endif // _LIBCPP_ABI_ATOMIC_WAIT_NATIVE_BY_SIZE
 
 #else
 using __cxx_contention_t _LIBCPP_NODEBUG = int64_t;
 
+#  if defined(_LIBCPP_ABI_ATOMIC_WAIT_NATIVE_BY_SIZE)
 template <class _Tp>
-struct __is_atomic_wait_native_type<_Tp,
-                                    __enable_if_t<is_standard_layout<_Tp>::value && (sizeof(_Tp) == 4 || sizeof(_Tp) == 8)> >
+struct __is_atomic_wait_native_type<
+    _Tp,
+    __enable_if_t<has_unique_object_representations<_Tp>::value && (sizeof(_Tp) == 4 || sizeof(_Tp) == 8)> >
     : true_type {};
+#  else
+template <class _Tp>
+struct __is_atomic_wait_native_type<_Tp, __enable_if_t<is_same<_Tp, int64_t>::value && sizeof(_Tp) == 4> > : true_type {
+};
+#  endif // _LIBCPP_ABI_ATOMIC_WAIT_NATIVE_BY_SIZE
 
 #endif // __linux__ || (_AIX && !__64BIT__)
 
diff --git a/libcxx/include/__configuration/abi.h b/libcxx/include/__configuration/abi.h
index 2d33b9c03090b..f54a4d8377717 100644
--- a/libcxx/include/__configuration/abi.h
+++ b/libcxx/include/__configuration/abi.h
@@ -59,6 +59,7 @@
 
 // These flags are documented in ABIGuarantees.rst
 #  define _LIBCPP_ABI_ALTERNATE_STRING_LAYOUT
+#  define _LIBCPP_ABI_ATOMIC_WAIT_NATIVE_BY_SIZE
 #  define _LIBCPP_ABI_DO_NOT_EXPORT_BASIC_STRING_COMMON
 #  define _LIBCPP_ABI_DO_NOT_EXPORT_VECTOR_BASE_COMMON
 #  define _LIBCPP_ABI_DO_NOT_EXPORT_TO_CHARS_BASE_10
diff --git a/libcxx/include/__configuration/availability.h b/libcxx/include/__configuration/availability.h
index d0414ecfac2bb..cc7a7582dbe32 100644
--- a/libcxx/include/__configuration/availability.h
+++ b/libcxx/include/__configuration/availability.h
@@ -84,6 +84,9 @@
 // in all versions of the library are available.
 #if !_LIBCPP_HAS_VENDOR_AVAILABILITY_ANNOTATIONS
 
+#  define _LIBCPP_INTRODUCED_IN_LLVM_22 1
+#  define _LIBCPP_INTRODUCED_IN_LLVM_22_ATTRIBUTE /* nothing */
+
 #  define _LIBCPP_INTRODUCED_IN_LLVM_21 1
 #  define _LIBCPP_INTRODUCED_IN_LLVM_21_ATTRIBUTE /* nothing */
 
@@ -112,6 +115,11 @@
 
 // clang-format off
 
+// LLVM 22
+// TODO: Fill this in
+#  define _LIBCPP_INTRODUCED_IN_LLVM_22 0
+#  define _LIBCPP_INTRODUCED_IN_LLVM_22_ATTRIBUTE __attribute__((unavailable))
+
 // LLVM 21
 // TODO: Fill this in
 #  define _LIBCPP_INTRODUCED_IN_LLVM_21 0
@@ -216,6 +224,13 @@
 
 #endif
 
+// This controls the availability of new implementation of std::atomic's
+// wait, notify_one and notify all. The new implementation uses
+// the native atomic wait/notify operations on platforms that support them
+// based on the size of the atomic type, instead of the type itself.
+#define _LIBCPP_AVAILABILITY_HAS_NEW_SYNC _LIBCPP_INTRODUCED_IN_LLVM_22
+#define _LIBCPP_AVAILABILITY_NEW_SYNC _LIBCPP_INTRODUCED_IN_LLVM_22_ATTRIBUTE
+
 // Enable additional explicit instantiations of iostreams components. This
 // reduces the number of weak definitions generated in programs that use
 // iostreams by providing a single strong definition in the shared library.
diff --git a/libcxx/src/atomic.cpp b/libcxx/src/atomic.cpp
index c5f03acdf1da1..0134ba60ac081 100644
--- a/libcxx/src/atomic.cpp
+++ b/libcxx/src/atomic.cpp
@@ -6,10 +6,12 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include <__atomic/contention_t.h>
 #include <__thread/timed_backoff_policy.h>
 #include <atomic>
 #include <climits>
 #include <cstddef>
+#include <cstring>
 #include <functional>
 #include <thread>
 
@@ -54,7 +56,6 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 
 #ifdef __linux__
 
-
 // TODO : update
 static void
 __libcpp_platform_wait_on_address(__cxx_atomic_contention_t const volatile* __ptr, __cxx_contention_t __val) {
@@ -80,11 +81,12 @@ extern "C" int __ulock_wake(uint32_t operation, void* addr, uint64_t wake_value)
 template <std::size_t _Size>
 static void __libcpp_platform_wait_on_address(void const volatile* __ptr, void const volatile* __val) {
   static_assert(_Size == 8 || _Size == 4, "Can only wait on 8 bytes or 4 bytes value");
+  char buffer[_Size];
+  std::memcpy(&buffer, const_cast<const void*>(__val), _Size);
   if constexpr (_Size == 4)
-    __ulock_wait(UL_COMPARE_AND_WAIT, const_cast<void*>(__ptr), *reinterpret_cast<uint32_t const volatile*>(__val), 0);
+    __ulock_wait(UL_COMPARE_AND_WAIT, const_cast<void*>(__ptr), *reinterpret_cast<uint32_t const*>(&buffer), 0);
   else
-    __ulock_wait(
-        UL_COMPARE_AND_WAIT64, const_cast<void*>(__ptr), *reinterpret_cast<uint64_t const volatile*>(__val), 0);
+    __ulock_wait(UL_COMPARE_AND_WAIT64, const_cast<void*>(__ptr), *reinterpret_cast<uint64_t const*>(&buffer), 0);
 }
 
 template <std::size_t _Size>
@@ -130,45 +132,70 @@ static void __libcpp_platform_wake_by_address(__cxx_atomic_contention_t const vo
 
 #endif // __linux__
 
-static constexpr size_t __libcpp_contention_table_size = (1 << 8); /* < there's no magic in this number */
-
-struct alignas(64) /*  aim to avoid false sharing */ __libcpp_contention_table_entry {
-  __cxx_atomic_contention_t __contention_state;
-  __cxx_atomic_contention_t __platform_state;
-  inline constexpr __libcpp_contention_table_entry() : __contention_state(0), __platform_state(0) {}
-};
-
-static __libcpp_contention_table_entry __libcpp_contention_table[__libcpp_contention_table_size];
-
-static hash<void const volatile*> __libcpp_contention_hasher;
-
-static __libcpp_contention_table_entry* __get_global_contention_state(void const volatile* p) {
-  return &__libcpp_contention_table[__libcpp_contention_hasher(p) & (__libcpp_contention_table_size - 1)];
-}
+// =============================
+// Local hidden helper functions
+// =============================
 
 /* Given an atomic to track contention and an atomic to actually wait on, which may be
    the same atomic, we try to detect contention to avoid spuriously calling the platform. */
 
 template <std::size_t _Size>
-static void __libcpp_contention_notify(__cxx_atomic_contention_t volatile* __global_contention_state,
-                                       void const volatile* __address_to_notify,
-                                       bool __notify_one) {
-  if (0 != __cxx_atomic_load(__global_contention_state, memory_order_seq_cst))
+static void __contention_notify(
+    __cxx_atomic_contention_t volatile* __waiter_count, void const volatile* __address_to_notify, bool __notify_one) {
+  if (0 != __cxx_atomic_load(__waiter_count, memory_order_seq_cst))
     // We only call 'wake' if we consumed a contention bit here.
     __libcpp_platform_wake_by_address<_Size>(__address_to_notify, __notify_one);
 }
 
 template <std::size_t _Size>
-static void __libcpp_contention_wait(__cxx_atomic_contention_t volatile* __contention_state,
-                                     void const volatile* __address_to_wait,
-                                     void const volatile* __old_value) {
-  __cxx_atomic_fetch_add(__contention_state, __cxx_contention_t(1), memory_order_relaxed);
+static void __contention_wait(__cxx_atomic_contention_t volatile* __waiter_count,
+                              void const volatile* __address_to_wait,
+                              void const volatile* __old_value) {
+  __cxx_atomic_fetch_add(__waiter_count, __cxx_contention_t(1), memory_order_relaxed);
   // https://llvm.org/PR109290
   // There are no platform guarantees of a memory barrier in the platform wait implementation
   __cxx_atomic_thread_fence(memory_order_seq_cst);
   // We sleep as long as the monitored value hasn't changed.
   __libcpp_platform_wait_on_address<_Size>(__address_to_wait, __old_value);
-  __cxx_atomic_fetch_sub(__contention_state, __cxx_contention_t(1), memory_order_release);
+  __cxx_atomic_fetch_sub(__waiter_count, __cxx_contention_t(1), memory_order_release);
+}
+
+#if defined(__APPLE__) && defined(__aarch64__)
+constexpr size_t __cache_line_size = 128;
+#else
+constexpr size_t __cache_line_size = 64;
+#endif
+
+static constexpr size_t __contention_table_size = (1 << 8); /* < there's no magic in this number */
+
+static constexpr hash<void const volatile*> __contention_hasher;
+
+// Waiter count table for all atomics with the correct size that use itself as the wait/notify address.
+
+struct alignas(__cache_line_size) /*  aim to avoid false sharing */ __contention_state_native {
+  __cxx_atomic_contention_t __waiter_count;
+  constexpr __contention_state_native() : __waiter_count(0) {}
+};
+
+static __contention_state_native __contention_table_native[__contention_table_size];
+
+static __cxx_atomic_contention_t* __get_native_waiter_count(void const volatile* p) {
+  return &__contention_table_native[__contention_hasher(p) & (__contention_table_size - 1)].__waiter_count;
+}
+
+// Global contention table for all atomics with the wrong size that use the global table's atomic as wait/notify
+// address.
+
+struct alignas(__cache_line_size) /*  aim to avoid false sharing */ __contention_state_global {
+  __cxx_atomic_contention_t __waiter_count;
+  __cxx_atomic_contention_t __platform_state;
+  constexpr __contention_state_global() : __waiter_count(0), __platform_state(0) {}
+};
+
+static __contention_state_global __contention_table_global[__contention_table_size];
+
+static __contention_state_global* __get_global_contention_state(void const volatile* p) {
+  return &__contention_table_global[__contention_hasher(p) & (__contention_table_size - 1)];
 }
 
 /* When the incoming atomic is the wrong size for the platform wait size, need to
@@ -178,12 +205,15 @@ static void __atomic_notify_global_table(void const volatile* __location) {
   auto const __entry = __get_global_contention_state(__location);
   // The value sequence laundering happens on the next line below.
   __cxx_atomic_fetch_add(&__entry->__platform_state, __cxx_contention_t(1), memory_order_seq_cst);
-  __libcpp_contention_notify<sizeof(__cxx_atomic_contention_t)>(
-      &__entry->__contention_state,
-      &__entry->__platform_state,
-      false /* when laundering, we can't handle notify_one */);
+  __contention_notify<sizeof(__cxx_atomic_contention_t)>(
+      &__entry->__waiter_count, &__entry->__platform_state, false /* when laundering, we can't handle notify_one */);
 }
 
+// =============================
+// New dylib exported symbols
+// =============================
+
+// global
 _LIBCPP_EXPORTED_FROM_ABI __cxx_contention_t __libcpp_atomic_monitor_global(void const volatile* __location) noexcept {
   auto const __entry = __get_global_contention_state(__location);
   return __cxx_atomic_load(&__entry->__platform_state, memory_order_acquire);
@@ -192,37 +222,37 @@ _LIBCPP_EXPORTED_FROM_ABI __cxx_contention_t __libcpp_atomic_monitor_global(void
 _LIBCPP_EXPORTED_FROM_ABI void
 __libcpp_atomic_wait_global_table(void const volatile* __location, __cxx_contention_t __old_value) noexcept {
   auto const __entry = __get_global_contention_state(__location);
-  __libcpp_contention_wait<sizeof(__cxx_atomic_contention_t)>(
-      &__entry->__contention_state, &__entry->__platform_state, &__old_value);
+  __contention_wait<sizeof(__cxx_atomic_contention_t)>(
+      &__entry->__waiter_count, &__entry->__platform_state, &__old_value);
 }
 
-template <std::size_t _Size>
-_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void
-__libcpp_atomic_wait_native(void const volatile* __address, void const volatile* __old_value) noexcept {
-  __libcpp_contention_wait<_Size>(
-      &__get_global_contention_state(__address)->__contention_state, __address, __old_value);
-}
-
-_LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_one_global_table(void const volatile* __location) noexcept {
+_LIBCPP_EXPORTED_FROM_ABI void __libcpp_atomic_notify_one_global_table(void const volatile* __location) noexcept {
   __atomic_notify_global_table(__location);
 }
-_LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_all_global_table(void const volatile* __location) noexcept {
+
+_LIBCPP_EXPORTED_FROM_ABI void __libcpp_atomic_notify_all_global_table(void const volatile* __location) noexcept {
   __atomic_notify_global_table(__location);
 }
 
-/* When the incoming atomic happens to be the platform wait size, we still need to use the
-   table for the contention detection, but we can use the atomic directly for the wait. */
+// native
+
+template <std::size_t _Size>
+_LIBCPP_EXPORTED_FROM_ABI void
+__libcpp_atomic_wait_native(void const volatile* __address, void const volatile* __old_value) noexcept {
+  __contention_wait<_Size>(__get_native_waiter_count(__address), __address, __old_value);
+}
 
 template <std::size_t _Size>
-_LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_one_native(void const volatile* __location) noexcept {
-  __libcpp_contention_notify<_Size>(&__get_global_contention_state(__location)->__contention_state, __location, true);
+_LIBCPP_EXPORTED_FROM_ABI void __libcpp_atomic_notify_one_native(void const volatile* __location) noexcept {
+  __contention_notify<_Size>(__get_native_waiter_count(__location), __location, true);
 }
 
 template <std::size_t _Size>
-_LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_all_native(void const volatile* __location) noexcept {
-  __libcpp_contention_notify<_Size>(&__get_global_contention_state(__location)->__contention_state, __location, false);
+_LIBCPP_EXPORTED_FROM_ABI void __libcpp_atomic_notify_all_native(void const volatile* __location) noexcept {
+  __contention_notify<_Size>(__get_native_waiter_count(__location), __location, false);
 }
 
+// Instantiation of the templates with supported size
 #ifdef __linux__
 
 // TODO
@@ -235,13 +265,13 @@ __libcpp_atomic_wait_native<4>(void const volatile* __address, void const volati
 template _LIBCPP_EXPORTED_FROM_ABI void
 __libcpp_atomic_wait_native<8>(void const volatile* __address, void const volatile* __old_value) noexcept;
 
-template _LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_one_native<4>(void const volatile* __location) noexcept;
+template _LIBCPP_EXPORTED_FROM_ABI void __libcpp_atomic_notify_one_native<4>(void const volatile* __location) noexcept;
 
-template _LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_one_native<8>(void const volatile* __location) noexcept;
+template _LIBCPP_EXPORTED_FROM_ABI void __libcpp_atomic_notify_one_native<8>(void const volatile* __location) noexcept;
 
-template _LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_all_native<4>(void const volatile* __location) noexcept;
+template _LIBCPP_EXPORTED_FROM_ABI void __libcpp_atomic_notify_all_native<4>(void const volatile* __location) noexcept;
 
-template _LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_all_native<8>(void const volatile* __location) noexcept;
+template _LIBCPP_EXPORTED_FROM_ABI void __libcpp_atomic_notify_all_native<8>(void const volatile* __location) noexcept;
 
 #elif defined(__FreeBSD__) && __SIZEOF_LONG__ == 8
 
@@ -253,4 +283,44 @@ template _LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_all_native<8>(void c
 
 #endif // __linux__
 
+// =============================================================
+// Old dylib exported symbols, for backwards compatibility
+// =============================================================
+
+_LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_one(void const volatile* __location) noexcept {
+  __libcpp_atomic_notify_one_global_table(__location);
+}
+
+_LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_all(void const volatile* __location) noexcept {
+  __libcpp_atomic_notify_all_global_table(__location);
+}
+
+_LIBCPP_EXPORTED_FROM_ABI __cxx_contention_t __libcpp_atomic_monitor(void const volatile* __location) noexcept {
+  return __libcpp_atomic_monitor_global(__location);
+}
+
+_LIBCPP_EXPORTED_FROM_ABI void
+__libcpp_atomic_wait(void const volatile* __location, __cxx_contention_t __old_value) noexcept {
+  __libcpp_atomic_wait_global_table(__location, __old_value);
+}
+
+_LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_one(__cxx_atomic_contention_t const volatile* __location) noexcept {
+  __libcpp_atomic_notify_one_native<sizeof(__cxx_atomic_contention_t)>(__location);
+}
+
+_LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_all(__cxx_atomic_contention_t const volatile* __location) noexcept {
+  __libcpp_atomic_notify_all_native<sizeof(__cxx_atomic_contention_t)>(__location);
+}
+
+_LIBCPP_EXPORTED_FROM_ABI void
+__libcpp_atomic_wait(__cxx_atomic_contention_t const volatile* __location, __cxx_contention_t __old_value) noexcept {
+  __libcpp_atomic_wait_native<sizeof(__cxx_atomic_contention_t)>(__location, &__old_value);
+}
+
+// this function is even unused in the old ABI
+_LIBCPP_EXPORTED_FROM_ABI __cxx_contention_t
+__libcpp_atomic_monitor(__cxx_atomic_contention_t const volatile* __location) noexcept {
+  return __cxx_atomic_load(__location, memory_order_acquire);
+}
+
 _LIBCPP_END_NAMESPACE_STD

>From 99d1157fadf0456b2bb604dd3ba070b34ff91196 Mon Sep 17 00:00:00 2001
From: Hui Xie <hui.xie1990 at gmail.com>
Date: Sat, 11 Oct 2025 22:17:07 +0100
Subject: [PATCH 4/7] fix volatile

---
 libcxx/src/atomic.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/libcxx/src/atomic.cpp b/libcxx/src/atomic.cpp
index 0134ba60ac081..d865288709310 100644
--- a/libcxx/src/atomic.cpp
+++ b/libcxx/src/atomic.cpp
@@ -79,7 +79,7 @@ extern "C" int __ulock_wake(uint32_t operation, void* addr, uint64_t wake_value)
 #  define ULF_WAKE_ALL 0x00000100
 
 template <std::size_t _Size>
-static void __libcpp_platform_wait_on_address(void const volatile* __ptr, void const volatile* __val) {
+static void __libcpp_platform_wait_on_address(void const volatile* __ptr, void const* __val) {
   static_assert(_Size == 8 || _Size == 4, "Can only wait on 8 bytes or 4 bytes value");
   char buffer[_Size];
   std::memcpy(&buffer, const_cast<const void*>(__val), _Size);
@@ -150,7 +150,7 @@ static void __contention_notify(
 template <std::size_t _Size>
 static void __contention_wait(__cxx_atomic_contention_t volatile* __waiter_count,
                               void const volatile* __address_to_wait,
-                              void const volatile* __old_value) {
+                              void const* __old_value) {
   __cxx_atomic_fetch_add(__waiter_count, __cxx_contention_t(1), memory_order_relaxed);
   // https://llvm.org/PR109290
   // There are no platform guarantees of a memory barrier in the platform wait implementation
@@ -238,7 +238,7 @@ _LIBCPP_EXPORTED_FROM_ABI void __libcpp_atomic_notify_all_global_table(void cons
 
 template <std::size_t _Size>
 _LIBCPP_EXPORTED_FROM_ABI void
-__libcpp_atomic_wait_native(void const volatile* __address, void const volatile* __old_value) noexcept {
+__libcpp_atomic_wait_native(void const volatile* __address, void const* __old_value) noexcept {
   __contention_wait<_Size>(__get_native_waiter_count(__address), __address, __old_value);
 }
 
@@ -260,10 +260,10 @@ _LIBCPP_EXPORTED_FROM_ABI void __libcpp_atomic_notify_all_native(void const vola
 #elif defined(__APPLE__) && defined(_LIBCPP_USE_ULOCK)
 
 template _LIBCPP_EXPORTED_FROM_ABI void
-__libcpp_atomic_wait_native<4>(void const volatile* __address, void const volatile* __old_value) noexcept;
+__libcpp_atomic_wait_native<4>(void const volatile* __address, void const* __old_value) noexcept;
 
 template _LIBCPP_EXPORTED_FROM_ABI void
-__libcpp_atomic_wait_native<8>(void const volatile* __address, void const volatile* __old_value) noexcept;
+__libcpp_atomic_wait_native<8>(void const volatile* __address, void const* __old_value) noexcept;
 
 template _LIBCPP_EXPORTED_FROM_ABI void __libcpp_atomic_notify_one_native<4>(void const volatile* __location) noexcept;
 

>From 03a1ddee2383fe74b25b5649e7833a641ea7ceb9 Mon Sep 17 00:00:00 2001
From: Hui Xie <hui.xie1990 at gmail.com>
Date: Sat, 11 Oct 2025 22:48:01 +0100
Subject: [PATCH 5/7] add support for linux

---
 libcxx/src/atomic.cpp | 61 +++++++++++++++++++++++++++++--------------
 1 file changed, 42 insertions(+), 19 deletions(-)

diff --git a/libcxx/src/atomic.cpp b/libcxx/src/atomic.cpp
index d865288709310..a05105cb753c1 100644
--- a/libcxx/src/atomic.cpp
+++ b/libcxx/src/atomic.cpp
@@ -56,14 +56,18 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 
 #ifdef __linux__
 
-// TODO : update
-static void
-__libcpp_platform_wait_on_address(__cxx_atomic_contention_t const volatile* __ptr, __cxx_contention_t __val) {
+template <std::size_t _Size>
+static void __libcpp_platform_wait_on_address(void const volatile* __ptr, void const* __val) {
+  static_assert(_Size == 4, "Can only wait on 4 bytes value");
+  char buffer[_Size];
+  std::memcpy(&buffer, const_cast<const void*>(__val), _Size);
   static constexpr timespec __timeout = {2, 0};
-  _LIBCPP_FUTEX(__ptr, FUTEX_WAIT_PRIVATE, __val, &__timeout, 0, 0);
+  _LIBCPP_FUTEX(__ptr, FUTEX_WAIT_PRIVATE, *reinterpret_cast<__cxx_contention_t const*>(&buffer), &__timeout, 0, 0);
 }
 
-static void __libcpp_platform_wake_by_address(__cxx_atomic_contention_t const volatile* __ptr, bool __notify_one) {
+template <std::size_t _Size>
+static void __libcpp_platform_wake_by_address(void const volatile* __ptr, bool __notify_one) {
+  static_assert(_Size == 4, "Can only wake up on 4 bytes value");
   _LIBCPP_FUTEX(__ptr, FUTEX_WAKE_PRIVATE, __notify_one ? 1 : INT_MAX, 0, 0, 0);
 }
 
@@ -100,35 +104,39 @@ static void __libcpp_platform_wake_by_address(void const volatile* __ptr, bool _
 }
 
 #elif defined(__FreeBSD__) && __SIZEOF_LONG__ == 8
-// TODO : update
 /*
  * Since __cxx_contention_t is int64_t even on 32bit FreeBSD
  * platforms, we have to use umtx ops that work on the long type, and
  * limit its use to architectures where long and int64_t are synonyms.
  */
 
-static void
-__libcpp_platform_wait_on_address(__cxx_atomic_contention_t const volatile* __ptr, __cxx_contention_t __val) {
-  _umtx_op(const_cast<__cxx_atomic_contention_t*>(__ptr), UMTX_OP_WAIT, __val, nullptr, nullptr);
+template <std::size_t _Size>
+static void __libcpp_platform_wait_on_address(void const volatile* __ptr, void const* __val) {
+  static_assert(_Size == 8, "Can only wait on 8 bytes value");
+  char buffer[_Size];
+  std::memcpy(&buffer, const_cast<const void*>(__val), _Size);
+  _umtx_op(const_cast<void*>(__ptr), UMTX_OP_WAIT, *reinterpret_cast<__cxx_contention_t*>(&buffer), nullptr, nullptr);
 }
 
-static void __libcpp_platform_wake_by_address(__cxx_atomic_contention_t const volatile* __ptr, bool __notify_one) {
-  _umtx_op(const_cast<__cxx_atomic_contention_t*>(__ptr), UMTX_OP_WAKE, __notify_one ? 1 : INT_MAX, nullptr, nullptr);
+template <std::size_t _Size>
+static void __libcpp_platform_wake_by_address(void const volatile* __ptr, bool __notify_one) {
+  static_assert(_Size == 8, "Can only wake up on 8 bytes value");
+  _umtx_op(const_cast<void*>(__ptr), UMTX_OP_WAKE, __notify_one ? 1 : INT_MAX, nullptr, nullptr);
 }
 
 #else // <- Add other operating systems here
 
 // Baseline is just a timed backoff
-// TODO : update
 
-static void
-__libcpp_platform_wait_on_address(__cxx_atomic_contention_t const volatile* __ptr, __cxx_contention_t __val) {
+template <std::size_t _Size>
+static void __libcpp_platform_wait_on_address(void const volatile* __ptr, void const* __val) {
   __libcpp_thread_poll_with_backoff(
-      [=]() -> bool { return !__cxx_nonatomic_compare_equal(__cxx_atomic_load(__ptr, memory_order_relaxed), __val); },
+      [=]() -> bool { return !std::memcmp(const_cast<const void*>(__ptr), __val, _Size); },
       __libcpp_timed_backoff_policy());
 }
 
-static void __libcpp_platform_wake_by_address(__cxx_atomic_contention_t const volatile*, bool) {}
+template <std::size_t _Size>
+static void __libcpp_platform_wake_by_address(void const volatile*, bool) {}
 
 #endif // __linux__
 
@@ -255,7 +263,12 @@ _LIBCPP_EXPORTED_FROM_ABI void __libcpp_atomic_notify_all_native(void const vola
 // Instantiation of the templates with supported size
 #ifdef __linux__
 
-// TODO
+template _LIBCPP_EXPORTED_FROM_ABI void
+__libcpp_atomic_wait_native<4>(void const volatile* __address, void const* __old_value) noexcept;
+
+template _LIBCPP_EXPORTED_FROM_ABI void __libcpp_atomic_notify_one_native<4>(void const volatile* __location) noexcept;
+
+template _LIBCPP_EXPORTED_FROM_ABI void __libcpp_atomic_notify_all_native<4>(void const volatile* __location) noexcept;
 
 #elif defined(__APPLE__) && defined(_LIBCPP_USE_ULOCK)
 
@@ -275,11 +288,21 @@ template _LIBCPP_EXPORTED_FROM_ABI void __libcpp_atomic_notify_all_native<8>(voi
 
 #elif defined(__FreeBSD__) && __SIZEOF_LONG__ == 8
 
-// TODO
+template _LIBCPP_EXPORTED_FROM_ABI void
+__libcpp_atomic_wait_native<8>(void const volatile* __address, void const* __old_value) noexcept;
+
+template _LIBCPP_EXPORTED_FROM_ABI void __libcpp_atomic_notify_one_native<8>(void const volatile* __location) noexcept;
+
+template _LIBCPP_EXPORTED_FROM_ABI void __libcpp_atomic_notify_all_native<8>(void const volatile* __location) noexcept;
 
 #else // <- Add other operating systems here
 
-// TODO
+template _LIBCPP_EXPORTED_FROM_ABI void
+__libcpp_atomic_wait_native<8>(void const volatile* __address, void const* __old_value) noexcept;
+
+template _LIBCPP_EXPORTED_FROM_ABI void __libcpp_atomic_notify_one_native<8>(void const volatile* __location) noexcept;
+
+template _LIBCPP_EXPORTED_FROM_ABI void __libcpp_atomic_notify_all_native<8>(void const volatile* __location) noexcept;
 
 #endif // __linux__
 

>From 7c24d674f1a36cee419429c2921d172c47f09244 Mon Sep 17 00:00:00 2001
From: Hui Xie <hui.xie1990 at gmail.com>
Date: Sat, 11 Oct 2025 22:55:23 +0100
Subject: [PATCH 6/7] format

---
 libcxx/include/__atomic/atomic_sync.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libcxx/include/__atomic/atomic_sync.h b/libcxx/include/__atomic/atomic_sync.h
index c8c5cf658550b..a5b7132f5c4f0 100644
--- a/libcxx/include/__atomic/atomic_sync.h
+++ b/libcxx/include/__atomic/atomic_sync.h
@@ -104,7 +104,7 @@ struct __atomic_wait_backoff_impl {
   memory_order __order_;
 
   using __waitable_traits _LIBCPP_NODEBUG = __atomic_waitable_traits<__decay_t<_AtomicWaitable> >;
-  using __value_type _LIBCPP_NODEBUG = typename __waitable_traits::__value_type;
+  using __value_type _LIBCPP_NODEBUG      = typename __waitable_traits::__value_type;
 
   _LIBCPP_HIDE_FROM_ABI bool operator()(chrono::nanoseconds __elapsed) const {
     if (__elapsed > chrono::microseconds(4)) {

>From a8ed700dd57c09bbff31f092355f56985dd6451e Mon Sep 17 00:00:00 2001
From: Hui Xie <hui.xie1990 at gmail.com>
Date: Sun, 19 Oct 2025 10:29:10 +0100
Subject: [PATCH 7/7] CI

---
 libcxx/include/__atomic/atomic_sync.h  | 54 ++++++++++++++++++++++++--
 libcxx/include/__atomic/contention_t.h | 33 ----------------
 libcxx/src/atomic.cpp                  | 53 ++++++++-----------------
 3 files changed, 67 insertions(+), 73 deletions(-)

diff --git a/libcxx/include/__atomic/atomic_sync.h b/libcxx/include/__atomic/atomic_sync.h
index a5b7132f5c4f0..774df93315bf5 100644
--- a/libcxx/include/__atomic/atomic_sync.h
+++ b/libcxx/include/__atomic/atomic_sync.h
@@ -9,16 +9,20 @@
 #ifndef _LIBCPP___ATOMIC_ATOMIC_SYNC_H
 #define _LIBCPP___ATOMIC_ATOMIC_SYNC_H
 
+#include <__algorithm/ranges_find.h>
 #include <__atomic/contention_t.h>
 #include <__atomic/memory_order.h>
 #include <__atomic/to_gcc_order.h>
 #include <__chrono/duration.h>
 #include <__config>
 #include <__memory/addressof.h>
+#include <__ranges/access.h>
 #include <__thread/poll_with_backoff.h>
 #include <__type_traits/conjunction.h>
 #include <__type_traits/decay.h>
+#include <__type_traits/has_unique_object_representation.h>
 #include <__type_traits/invoke.h>
+#include <__type_traits/is_same.h>
 #include <__type_traits/void_t.h>
 #include <__utility/declval.h>
 #include <cstring>
@@ -88,7 +92,7 @@ __libcpp_atomic_notify_all_global_table(void const volatile*) _NOEXCEPT;
 template <std::size_t _Size>
 _LIBCPP_AVAILABILITY_NEW_SYNC _LIBCPP_EXPORTED_FROM_ABI void
 
-__libcpp_atomic_wait_native(void const volatile* __address, void const volatile* __old_value) _NOEXCEPT;
+__libcpp_atomic_wait_native(void const volatile* __address, void const* __old_value) _NOEXCEPT;
 template <std::size_t _Size>
 _LIBCPP_AVAILABILITY_NEW_SYNC _LIBCPP_EXPORTED_FROM_ABI void
 __libcpp_atomic_notify_one_native(const volatile void*) _NOEXCEPT;
@@ -97,6 +101,48 @@ template <std::size_t _Size>
 _LIBCPP_AVAILABILITY_NEW_SYNC _LIBCPP_EXPORTED_FROM_ABI void
 __libcpp_atomic_notify_all_native(const volatile void*) _NOEXCEPT;
 
+// concepts defines the types are supported natively by the platform's wait
+
+#    if defined(_LIBCPP_ABI_ATOMIC_WAIT_NATIVE_BY_SIZE)
+
+#      ifdef __linux__
+
+#        define _LIBCPP_ATOMIC_WAIT_SIZES_LIST(_APPLY) _APPLY(4)
+
+#      elif defined(__APPLE__)
+
+#        define _LIBCPP_ATOMIC_WAIT_SIZES_LIST(_APPLY)                                                                 \
+          _APPLY(4)                                                                                                    \
+          _APPLY(8)
+
+#      elif defined(__FreeBSD__) && __SIZEOF_LONG__ == 8
+
+#        define _LIBCPP_ATOMIC_WAIT_SIZES_LIST(_APPLY) _APPLY(8)
+
+#      else
+
+#        define _LIBCPP_ATOMIC_WAIT_SIZES_LIST(_APPLY) _APPLY(sizeof(__cxx_contention_t))
+
+#      endif // __linux__
+
+inline constexpr std::size_t __supported_native_wait_sizes[] = {
+#      define _IDENTITY(_SIZE) _SIZE,
+    _LIBCPP_ATOMIC_WAIT_SIZES_LIST(_IDENTITY)
+#      undef _IDENTITY
+};
+
+template <class _Tp>
+concept __atomic_wait_native_type =
+    has_unique_object_representations_v<_Tp> &&
+    std::ranges::find(__supported_native_wait_sizes, sizeof(_Tp)) != ranges::end(__supported_native_wait_sizes);
+
+#    else // _LIBCPP_ABI_ATOMIC_WAIT_NATIVE_BY_SIZE
+
+template <class _Tp>
+concept __atomic_wait_native_type = is_same_v<_Tp, __cxx_contention_t>;
+
+#    endif // _LIBCPP_ABI_ATOMIC_WAIT_NATIVE_BY_SIZE
+
 template <class _AtomicWaitable, class _Poll>
 struct __atomic_wait_backoff_impl {
   const _AtomicWaitable& __a_;
@@ -110,7 +156,7 @@ struct __atomic_wait_backoff_impl {
     if (__elapsed > chrono::microseconds(4)) {
       auto __contention_address = __waitable_traits::__atomic_contention_address(__a_);
 
-      if constexpr (__is_atomic_wait_native_type<__value_type>::value) {
+      if constexpr (__atomic_wait_native_type<__value_type>) {
         auto __atomic_value = __waitable_traits::__atomic_load(__a_, __order_);
         if (__poll_(__atomic_value))
           return true;
@@ -153,7 +199,7 @@ template <class _AtomicWaitable>
 _LIBCPP_HIDE_FROM_ABI void __atomic_notify_one(const _AtomicWaitable& __a) {
   static_assert(__atomic_waitable<_AtomicWaitable>::value, "");
   using __value_type _LIBCPP_NODEBUG = typename __atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__value_type;
-  if constexpr (__is_atomic_wait_native_type<__value_type>::value) {
+  if constexpr (__atomic_wait_native_type<__value_type>) {
     std::__libcpp_atomic_notify_one_native<sizeof(__value_type)>(
         __atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__atomic_contention_address(__a));
   } else {
@@ -166,7 +212,7 @@ template <class _AtomicWaitable>
 _LIBCPP_HIDE_FROM_ABI void __atomic_notify_all(const _AtomicWaitable& __a) {
   static_assert(__atomic_waitable<_AtomicWaitable>::value, "");
   using __value_type _LIBCPP_NODEBUG = typename __atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__value_type;
-  if constexpr (__is_atomic_wait_native_type<__value_type>::value) {
+  if constexpr (__atomic_wait_native_type<__value_type>) {
     std::__libcpp_atomic_notify_all_native<sizeof(__value_type)>(
         __atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__atomic_contention_address(__a));
   } else {
diff --git a/libcxx/include/__atomic/contention_t.h b/libcxx/include/__atomic/contention_t.h
index 7a12d82850f7a..5b42a0125f875 100644
--- a/libcxx/include/__atomic/contention_t.h
+++ b/libcxx/include/__atomic/contention_t.h
@@ -11,11 +11,6 @@
 
 #include <__atomic/support.h>
 #include <__config>
-#include <__type_traits/enable_if.h>
-#include <__type_traits/has_unique_object_representation.h>
-#include <__type_traits/integral_constant.h>
-#include <__type_traits/is_same.h>
-#include <cstddef>
 #include <cstdint>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -24,38 +19,10 @@
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-template <class _Tp, class = void>
-struct __is_atomic_wait_native_type : false_type {};
-
 #if defined(__linux__) || (defined(_AIX) && !defined(__64BIT__))
 using __cxx_contention_t _LIBCPP_NODEBUG = int32_t;
-
-#  if defined(_LIBCPP_ABI_ATOMIC_WAIT_NATIVE_BY_SIZE)
-template <class _Tp>
-struct __is_atomic_wait_native_type<_Tp,
-                                    __enable_if_t<has_unique_object_representations<_Tp>::value && sizeof(_Tp) == 4> >
-    : true_type {};
-#  else
-template <class _Tp>
-struct __is_atomic_wait_native_type<_Tp, __enable_if_t<is_same<_Tp, int32_t>::value && sizeof(_Tp) == 4> > : true_type {
-};
-#  endif // _LIBCPP_ABI_ATOMIC_WAIT_NATIVE_BY_SIZE
-
 #else
 using __cxx_contention_t _LIBCPP_NODEBUG = int64_t;
-
-#  if defined(_LIBCPP_ABI_ATOMIC_WAIT_NATIVE_BY_SIZE)
-template <class _Tp>
-struct __is_atomic_wait_native_type<
-    _Tp,
-    __enable_if_t<has_unique_object_representations<_Tp>::value && (sizeof(_Tp) == 4 || sizeof(_Tp) == 8)> >
-    : true_type {};
-#  else
-template <class _Tp>
-struct __is_atomic_wait_native_type<_Tp, __enable_if_t<is_same<_Tp, int64_t>::value && sizeof(_Tp) == 4> > : true_type {
-};
-#  endif // _LIBCPP_ABI_ATOMIC_WAIT_NATIVE_BY_SIZE
-
 #endif // __linux__ || (_AIX && !__64BIT__)
 
 using __cxx_atomic_contention_t _LIBCPP_NODEBUG = __cxx_atomic_impl<__cxx_contention_t>;
diff --git a/libcxx/src/atomic.cpp b/libcxx/src/atomic.cpp
index a05105cb753c1..4f1d766049928 100644
--- a/libcxx/src/atomic.cpp
+++ b/libcxx/src/atomic.cpp
@@ -5,15 +5,13 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-
-#include <__atomic/contention_t.h>
-#include <__thread/timed_backoff_policy.h>
 #include <atomic>
 #include <climits>
 #include <cstddef>
 #include <cstring>
 #include <functional>
 #include <thread>
+#include <type_traits>
 
 #include "include/apple_availability.h"
 
@@ -260,51 +258,34 @@ _LIBCPP_EXPORTED_FROM_ABI void __libcpp_atomic_notify_all_native(void const vola
   __contention_notify<_Size>(__get_native_waiter_count(__location), __location, false);
 }
 
+// ==================================================
 // Instantiation of the templates with supported size
-#ifdef __linux__
-
-template _LIBCPP_EXPORTED_FROM_ABI void
-__libcpp_atomic_wait_native<4>(void const volatile* __address, void const* __old_value) noexcept;
-
-template _LIBCPP_EXPORTED_FROM_ABI void __libcpp_atomic_notify_one_native<4>(void const volatile* __location) noexcept;
+// ==================================================
 
-template _LIBCPP_EXPORTED_FROM_ABI void __libcpp_atomic_notify_all_native<4>(void const volatile* __location) noexcept;
+#if defined(_LIBCPP_ABI_ATOMIC_WAIT_NATIVE_BY_SIZE)
 
-#elif defined(__APPLE__) && defined(_LIBCPP_USE_ULOCK)
-
-template _LIBCPP_EXPORTED_FROM_ABI void
-__libcpp_atomic_wait_native<4>(void const volatile* __address, void const* __old_value) noexcept;
-
-template _LIBCPP_EXPORTED_FROM_ABI void
-__libcpp_atomic_wait_native<8>(void const volatile* __address, void const* __old_value) noexcept;
+#  define _INSTANTIATE(_SIZE)                                                                                          \
+    template _LIBCPP_EXPORTED_FROM_ABI void __libcpp_atomic_wait_native<_SIZE>(                                        \
+        void const volatile*, void const*) noexcept;                                                                   \
+    template _LIBCPP_EXPORTED_FROM_ABI void __libcpp_atomic_notify_one_native<_SIZE>(void const volatile*) noexcept;   \
+    template _LIBCPP_EXPORTED_FROM_ABI void __libcpp_atomic_notify_all_native<_SIZE>(void const volatile*) noexcept;
 
-template _LIBCPP_EXPORTED_FROM_ABI void __libcpp_atomic_notify_one_native<4>(void const volatile* __location) noexcept;
+_LIBCPP_ATOMIC_WAIT_SIZES_LIST(_INSTANTIATE)
 
-template _LIBCPP_EXPORTED_FROM_ABI void __libcpp_atomic_notify_one_native<8>(void const volatile* __location) noexcept;
+#  undef _INSTANTIATE
 
-template _LIBCPP_EXPORTED_FROM_ABI void __libcpp_atomic_notify_all_native<4>(void const volatile* __location) noexcept;
+#else // _LIBCPP_ABI_ATOMIC_WAIT_NATIVE_BY_SIZE
 
-template _LIBCPP_EXPORTED_FROM_ABI void __libcpp_atomic_notify_all_native<8>(void const volatile* __location) noexcept;
-
-#elif defined(__FreeBSD__) && __SIZEOF_LONG__ == 8
+template _LIBCPP_EXPORTED_FROM_ABI void __libcpp_atomic_wait_native<sizeof(__cxx_contention_t)>(
+    void const volatile* __address, void const* __old_value) noexcept;
 
 template _LIBCPP_EXPORTED_FROM_ABI void
-__libcpp_atomic_wait_native<8>(void const volatile* __address, void const* __old_value) noexcept;
-
-template _LIBCPP_EXPORTED_FROM_ABI void __libcpp_atomic_notify_one_native<8>(void const volatile* __location) noexcept;
-
-template _LIBCPP_EXPORTED_FROM_ABI void __libcpp_atomic_notify_all_native<8>(void const volatile* __location) noexcept;
-
-#else // <- Add other operating systems here
+__libcpp_atomic_notify_one_native<sizeof(__cxx_contention_t)>(void const volatile* __location) noexcept;
 
 template _LIBCPP_EXPORTED_FROM_ABI void
-__libcpp_atomic_wait_native<8>(void const volatile* __address, void const* __old_value) noexcept;
-
-template _LIBCPP_EXPORTED_FROM_ABI void __libcpp_atomic_notify_one_native<8>(void const volatile* __location) noexcept;
+__libcpp_atomic_notify_all_native<sizeof(__cxx_contention_t)>(void const volatile* __location) noexcept;
 
-template _LIBCPP_EXPORTED_FROM_ABI void __libcpp_atomic_notify_all_native<8>(void const volatile* __location) noexcept;
-
-#endif // __linux__
+#endif // _LIBCPP_ABI_ATOMIC_WAIT_NATIVE_BY_SIZE
 
 // =============================================================
 // Old dylib exported symbols, for backwards compatibility