[libcxx-commits] [libcxx] hxie/rcu (PR #175451)

via libcxx-commits libcxx-commits at lists.llvm.org
Sun Jan 11 11:00:05 PST 2026


https://github.com/huixie90 created https://github.com/llvm/llvm-project/pull/175451

- **init**
- **init impl**
- **callbacks**


>From 370e6177dfdf50378ef3963d7910c566adf75e73 Mon Sep 17 00:00:00 2001
From: Hui Xie <hui.xie1990 at gmail.com>
Date: Fri, 5 Dec 2025 18:41:50 +0000
Subject: [PATCH 1/3] init

---
 libcxx/docs/FeatureTestMacroTable.rst         |  2 +-
 libcxx/docs/ReleaseNotes/22.rst               |  2 +
 libcxx/include/CMakeLists.txt                 |  3 +
 libcxx/include/__configuration/experimental.h |  1 +
 libcxx/include/__rcu/rcu_domain.h             | 27 ++++++++
 libcxx/include/__rcu/rcu_obj_base.h           | 27 ++++++++
 libcxx/include/module.modulemap.in            |  7 ++
 libcxx/include/rcu                            | 54 +++++++++++++++
 libcxx/include/version                        |  4 +-
 libcxx/modules/std.cppm.in                    |  1 +
 libcxx/modules/std/rcu.inc                    |  2 -
 .../rcu.version.compile.pass.cpp              | 68 +++++++++++++++++++
 .../version.version.compile.pass.cpp          |  4 +-
 .../generate_feature_test_macro_components.py |  3 +-
 libcxx/utils/libcxx/header_information.py     |  1 +
 15 files changed, 199 insertions(+), 7 deletions(-)
 create mode 100644 libcxx/include/__rcu/rcu_domain.h
 create mode 100644 libcxx/include/__rcu/rcu_obj_base.h
 create mode 100644 libcxx/include/rcu
 create mode 100644 libcxx/test/std/language.support/support.limits/support.limits.general/rcu.version.compile.pass.cpp

diff --git a/libcxx/docs/FeatureTestMacroTable.rst b/libcxx/docs/FeatureTestMacroTable.rst
index 756bdf71f8b22..5fe2686d2e120 100644
--- a/libcxx/docs/FeatureTestMacroTable.rst
+++ b/libcxx/docs/FeatureTestMacroTable.rst
@@ -500,7 +500,7 @@ Status
     ---------------------------------------------------------- -----------------
     ``__cpp_lib_ratio``                                        ``202306L``
     ---------------------------------------------------------- -----------------
-    ``__cpp_lib_rcu``                                          *unimplemented*
+    ``__cpp_lib_rcu``                                          ``202306L``
     ---------------------------------------------------------- -----------------
     ``__cpp_lib_reference_wrapper``                            ``202403L``
     ---------------------------------------------------------- -----------------
diff --git a/libcxx/docs/ReleaseNotes/22.rst b/libcxx/docs/ReleaseNotes/22.rst
index 6ea323ced3003..9a428aa6dd4ff 100644
--- a/libcxx/docs/ReleaseNotes/22.rst
+++ b/libcxx/docs/ReleaseNotes/22.rst
@@ -49,6 +49,8 @@ Implemented Papers
 - P2835R7: Expose ``std::atomic_ref``'s object address (`Github <https://llvm.org/PR118377>`__)
 - P2944R3: Comparisons for ``reference_wrapper`` (`Github <https://llvm.org/PR105424>`__)
 - P3168R2: Give ``std::optional`` Range Support (`Github <https://llvm.org/PR105430>`__)
+- P2545R4: Read-Copy Update (RCU) (`Github <https://llvm.org/PRXXXXXX>`__) This feature is currently experimental and
+  therefore requires ``-fexperimental-library``.
 
 Improvements and New Features
 -----------------------------
diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt
index 85a2f8ac8ec4b..a54e6f198a926 100644
--- a/libcxx/include/CMakeLists.txt
+++ b/libcxx/include/CMakeLists.txt
@@ -743,6 +743,8 @@ set(files
   __ranges/views.h
   __ranges/zip_transform_view.h
   __ranges/zip_view.h
+  __rcu/rcu_domain.h
+  __rcu/rcu_obj_base.h
   __split_buffer
   __std_mbstate_t.h
   __stop_token/atomic_unique_lock.h
@@ -1054,6 +1056,7 @@ set(files
   ranges
   ratio
   regex
+  rcu
   scoped_allocator
   semaphore
   set
diff --git a/libcxx/include/__configuration/experimental.h b/libcxx/include/__configuration/experimental.h
index d14df3e5175f3..a71d03cfc571d 100644
--- a/libcxx/include/__configuration/experimental.h
+++ b/libcxx/include/__configuration/experimental.h
@@ -33,5 +33,6 @@
 #define _LIBCPP_HAS_EXPERIMENTAL_TZDB _LIBCPP_HAS_EXPERIMENTAL_LIBRARY
 #define _LIBCPP_HAS_EXPERIMENTAL_SYNCSTREAM _LIBCPP_HAS_EXPERIMENTAL_LIBRARY
 #define _LIBCPP_HAS_EXPERIMENTAL_HARDENING_OBSERVE_SEMANTIC _LIBCPP_HAS_EXPERIMENTAL_LIBRARY
+#define _LIBCPP_HAS_EXPERIMENTAL_RCU _LIBCPP_HAS_EXPERIMENTAL_LIBRARY
 
 #endif // _LIBCPP___CONFIGURATION_EXPERIMENTAL_H
diff --git a/libcxx/include/__rcu/rcu_domain.h b/libcxx/include/__rcu/rcu_domain.h
new file mode 100644
index 0000000000000..9b41cb7758458
--- /dev/null
+++ b/libcxx/include/__rcu/rcu_domain.h
@@ -0,0 +1,27 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___RCU_RCU_DOMAIN_H
+#define _LIBCPP___RCU_RCU_DOMAIN_H
+
+#include <__config>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER >= 26 && _LIBCPP_HAS_THREADS && _LIBCPP_HAS_EXPERIMENTAL_RCU
+
+#endif // _LIBCPP_STD_VER >= 26 && _LIBCPP_HAS_THREADS && _LIBCPP_HAS_EXPERIMENTAL_RCU
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___RCU_RCU_DOMAIN_H
diff --git a/libcxx/include/__rcu/rcu_obj_base.h b/libcxx/include/__rcu/rcu_obj_base.h
new file mode 100644
index 0000000000000..faff897a1f159
--- /dev/null
+++ b/libcxx/include/__rcu/rcu_obj_base.h
@@ -0,0 +1,27 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___RCU_RCU_OBJ_BASE_H
+#define _LIBCPP___RCU_RCU_OBJ_BASE_H
+
+#include <__config>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER >= 26 && _LIBCPP_HAS_THREADS && _LIBCPP_HAS_EXPERIMENTAL_RCU
+
+#endif // _LIBCPP_STD_VER >= 26 && _LIBCPP_HAS_THREADS && _LIBCPP_HAS_EXPERIMENTAL_RCU
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___RCU_RCU_OBJ_BASE_H
diff --git a/libcxx/include/module.modulemap.in b/libcxx/include/module.modulemap.in
index 0ac5a1ade817f..e197d32f938d6 100644
--- a/libcxx/include/module.modulemap.in
+++ b/libcxx/include/module.modulemap.in
@@ -1955,6 +1955,13 @@ module std [system] {
     export *
   }
 
+  module rcu {
+    module rcu_domain      { header "__rcu/rcu_domain.h" }
+    module rcu_obj_base    { header "__rcu/rcu_obj_base.h" }
+    header "rcu"
+    export *
+  }
+
   module regex {
     header "regex"
     export *
diff --git a/libcxx/include/rcu b/libcxx/include/rcu
new file mode 100644
index 0000000000000..833aae4db6909
--- /dev/null
+++ b/libcxx/include/rcu
@@ -0,0 +1,54 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP_RCU
+#define _LIBCPP_RCU
+
+/*
+
+namespace std {
+  // [saferecl.rcu.base], class template rcu_obj_base
+  template<class T, class D = default_delete<T>> class rcu_obj_base;
+
+  // [saferecl.rcu.domain], class rcu_domain
+  class rcu_domain;
+
+  // [saferecl.rcu.domain.func], non-member functions
+  rcu_domain& rcu_default_domain() noexcept;
+  void rcu_synchronize(rcu_domain& dom = rcu_default_domain()) noexcept;
+  void rcu_barrier(rcu_domain& dom = rcu_default_domain()) noexcept;
+  template<class T, class D = default_delete<T>>
+    void rcu_retire(T* p, D d = D(), rcu_domain& dom = rcu_default_domain());
+}
+
+*/
+
+#if __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS)
+#  include <__cxx03/__config>
+#else
+#  include <__config>
+
+#  if _LIBCPP_HAS_THREADS && _LIBCPP_HAS_EXPERIMENTAL_RCU
+
+#    if _LIBCPP_STD_VER >= 26
+#      include <__rcu/rcu_domain.h>
+#      include <__rcu/rcu_obj_base.h>
+#    endif
+
+#    include <version>
+
+#    if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#      pragma GCC system_header
+#    endif
+
+#  endif // _LIBCPP_HAS_THREADS && _LIBCPP_HAS_EXPERIMENTAL_RCU
+
+#endif // __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS)
+
+#endif // _LIBCPP_RCU
diff --git a/libcxx/include/version b/libcxx/include/version
index 05532ea731ff3..f6ba8083c4924 100644
--- a/libcxx/include/version
+++ b/libcxx/include/version
@@ -604,7 +604,9 @@ __cpp_lib_void_t                                        201411L <type_traits>
 // # define __cpp_lib_ranges_concat                        202403L
 # define __cpp_lib_ranges_indices                       202506L
 # define __cpp_lib_ratio                                202306L
-// # define __cpp_lib_rcu                                  202306L
+# if _LIBCPP_HAS_THREADS && _LIBCPP_HAS_EXPERIMENTAL_RCU
+#   define __cpp_lib_rcu                                202306L
+# endif
 # define __cpp_lib_reference_wrapper                    202403L
 # define __cpp_lib_saturation_arithmetic                202311L
 // # define __cpp_lib_senders                              202406L
diff --git a/libcxx/modules/std.cppm.in b/libcxx/modules/std.cppm.in
index 984b18321923c..e1204f85fdd03 100644
--- a/libcxx/modules/std.cppm.in
+++ b/libcxx/modules/std.cppm.in
@@ -93,6 +93,7 @@ module;
 #include <random>
 #include <ranges>
 #include <ratio>
+#include <rcu>
 #include <regex>
 #include <scoped_allocator>
 #include <semaphore>
diff --git a/libcxx/modules/std/rcu.inc b/libcxx/modules/std/rcu.inc
index abf5afad30c13..0dbad726474e7 100644
--- a/libcxx/modules/std/rcu.inc
+++ b/libcxx/modules/std/rcu.inc
@@ -8,7 +8,6 @@
 //===----------------------------------------------------------------------===//
 
 export namespace std {
-#if 0
 #  if _LIBCPP_STD_VER >= 23
   // 2.2.3, class template rcu_obj_base using std::rcu_obj_base;
   // 2.2.4, class rcu_domain
@@ -18,5 +17,4 @@ export namespace std {
   using std::rcu_retire;
   using std::rcu_synchronize;
 #  endif // _LIBCPP_STD_VER >= 23
-#endif
 } // namespace std
diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/rcu.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/rcu.version.compile.pass.cpp
new file mode 100644
index 0000000000000..62331b8564d48
--- /dev/null
+++ b/libcxx/test/std/language.support/support.limits/support.limits.general/rcu.version.compile.pass.cpp
@@ -0,0 +1,68 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// WARNING: This test was generated by generate_feature_test_macro_components.py
+// and should not be edited manually.
+
+// <rcu>
+
+// Test the feature test macros defined by <rcu>
+
+// clang-format off
+
+#include <rcu>
+#include "test_macros.h"
+
+#if TEST_STD_VER < 14
+
+#  ifdef __cpp_lib_rcu
+#    error "__cpp_lib_rcu should not be defined before c++26"
+#  endif
+
+#elif TEST_STD_VER == 14
+
+#  ifdef __cpp_lib_rcu
+#    error "__cpp_lib_rcu should not be defined before c++26"
+#  endif
+
+#elif TEST_STD_VER == 17
+
+#  ifdef __cpp_lib_rcu
+#    error "__cpp_lib_rcu should not be defined before c++26"
+#  endif
+
+#elif TEST_STD_VER == 20
+
+#  ifdef __cpp_lib_rcu
+#    error "__cpp_lib_rcu should not be defined before c++26"
+#  endif
+
+#elif TEST_STD_VER == 23
+
+#  ifdef __cpp_lib_rcu
+#    error "__cpp_lib_rcu should not be defined before c++26"
+#  endif
+
+#elif TEST_STD_VER > 23
+
+#  if !defined(_LIBCPP_VERSION) || (_LIBCPP_HAS_THREADS && _LIBCPP_HAS_EXPERIMENTAL_RCU)
+#    ifndef __cpp_lib_rcu
+#      error "__cpp_lib_rcu should be defined in c++26"
+#    endif
+#    if __cpp_lib_rcu != 202306L
+#      error "__cpp_lib_rcu should have the value 202306L in c++26"
+#    endif
+#  else
+#    ifdef __cpp_lib_rcu
+#      error "__cpp_lib_rcu should not be defined when the requirement '!defined(_LIBCPP_VERSION) || (_LIBCPP_HAS_THREADS && _LIBCPP_HAS_EXPERIMENTAL_RCU)' is not met!"
+#    endif
+#  endif
+
+#endif // TEST_STD_VER > 23
+
+// clang-format on
diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp
index 996ec29dce697..07db7b229b172 100644
--- a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp
@@ -7748,7 +7748,7 @@
 #    error "__cpp_lib_raw_memory_algorithms should have the value 201606L in c++26"
 #  endif
 
-#  if !defined(_LIBCPP_VERSION)
+#  if !defined(_LIBCPP_VERSION) || (_LIBCPP_HAS_THREADS && _LIBCPP_HAS_EXPERIMENTAL_RCU)
 #    ifndef __cpp_lib_rcu
 #      error "__cpp_lib_rcu should be defined in c++26"
 #    endif
@@ -7757,7 +7757,7 @@
 #    endif
 #  else
 #    ifdef __cpp_lib_rcu
-#      error "__cpp_lib_rcu should not be defined because it is unimplemented in libc++!"
+#      error "__cpp_lib_rcu should not be defined when the requirement '!defined(_LIBCPP_VERSION) || (_LIBCPP_HAS_THREADS && _LIBCPP_HAS_EXPERIMENTAL_RCU)' is not met!"
 #    endif
 #  endif
 
diff --git a/libcxx/utils/generate_feature_test_macro_components.py b/libcxx/utils/generate_feature_test_macro_components.py
index 0802f865f9406..823ca7941e2c0 100644
--- a/libcxx/utils/generate_feature_test_macro_components.py
+++ b/libcxx/utils/generate_feature_test_macro_components.py
@@ -1184,7 +1184,8 @@ def add_version_header(tc):
             "headers": [
                 "rcu"  # TODO verify this entry since the paper was underspecified.
             ],
-            "unimplemented": True,
+            "test_suite_guard": "!defined(_LIBCPP_VERSION) || (_LIBCPP_HAS_THREADS && _LIBCPP_HAS_EXPERIMENTAL_RCU)",
+            "libcxx_guard": "_LIBCPP_HAS_THREADS && _LIBCPP_HAS_EXPERIMENTAL_RCU",
         },
         {
             "name": "__cpp_lib_reference_from_temporary",
diff --git a/libcxx/utils/libcxx/header_information.py b/libcxx/utils/libcxx/header_information.py
index d06271a7908cc..1d1c6a39c45a4 100644
--- a/libcxx/utils/libcxx/header_information.py
+++ b/libcxx/utils/libcxx/header_information.py
@@ -196,6 +196,7 @@ def __hash__(self) -> int:
     "latch": "// UNSUPPORTED: no-threads, c++03, c++11, c++14, c++17",
     "mutex": "// UNSUPPORTED: no-threads, c++03",
     "print": "// UNSUPPORTED: no-filesystem, c++03, c++11, c++14, c++17, c++20, availability-fp_to_chars-missing", # TODO PRINT investigate
+    "rcu": "// UNSUPPORTED: no-threads, c++03, c++11, c++14, c++17, c++20, c++23",
     "semaphore": "// UNSUPPORTED: no-threads, c++03, c++11, c++14, c++17",
     "shared_mutex": "// UNSUPPORTED: no-threads, c++03, c++11",
     "stdatomic.h": "// UNSUPPORTED: no-threads, c++03, c++11, c++14, c++17, c++20",

>From 30bcfd1934dd5a5ef88f07c25b42c304adfc977b Mon Sep 17 00:00:00 2001
From: Hui Xie <hui.xie1990 at gmail.com>
Date: Wed, 7 Jan 2026 21:05:23 +0000
Subject: [PATCH 2/3] init impl

---
 libcxx/include/CMakeLists.txt                 |   1 +
 libcxx/include/__rcu/rcu_domain.h             | 211 ++++++++++++++++++
 libcxx/include/__rcu/rcu_list.h               |  36 +++
 libcxx/include/__rcu/rcu_obj_base.h           |  20 ++
 libcxx/include/__rcu/readme.md                |  69 ++++++
 libcxx/include/module.modulemap.in            |   1 +
 .../saferecl/saferecl.rcu/general.pass.cpp    | 117 ++++++++++
 7 files changed, 455 insertions(+)
 create mode 100644 libcxx/include/__rcu/rcu_list.h
 create mode 100644 libcxx/include/__rcu/readme.md
 create mode 100644 libcxx/test/std/thread/saferecl/saferecl.rcu/general.pass.cpp

diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt
index a54e6f198a926..062f487e3d492 100644
--- a/libcxx/include/CMakeLists.txt
+++ b/libcxx/include/CMakeLists.txt
@@ -744,6 +744,7 @@ set(files
   __ranges/zip_transform_view.h
   __ranges/zip_view.h
   __rcu/rcu_domain.h
+  __rcu/rcu_list.h
   __rcu/rcu_obj_base.h
   __split_buffer
   __std_mbstate_t.h
diff --git a/libcxx/include/__rcu/rcu_domain.h b/libcxx/include/__rcu/rcu_domain.h
index 9b41cb7758458..fbd19933e171d 100644
--- a/libcxx/include/__rcu/rcu_domain.h
+++ b/libcxx/include/__rcu/rcu_domain.h
@@ -12,6 +12,16 @@
 
 #include <__config>
 
+#include <__atomic/atomic.h>
+#include <atomic>
+#include <cstdint>
+#include <map>
+#include <mutex>
+#include <vector>
+
+// todo debug
+#include <cstdio>
+
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
 #endif
@@ -20,6 +30,207 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 
 #if _LIBCPP_STD_VER >= 26 && _LIBCPP_HAS_THREADS && _LIBCPP_HAS_EXPERIMENTAL_RCU
 
+template <class _ThreadLocal>
+class __thread_local_owner {
+  // todo put globals in experimental dylib
+  inline static thread_local map<const __thread_local_owner*, _ThreadLocal> __thread_local_instances;
+
+  // Keep track of all thread-local instances owned by this owner.
+  // Only emplaced the first time a thread is trying to access its thread-local instance.
+  vector<atomic_ref<_ThreadLocal>> __owned_instances_;
+  mutex __mtx_;
+
+  void __register(_ThreadLocal& __obj) {
+    lock_guard<std::mutex> __lg(__mtx_);
+    __owned_instances_.emplace_back(__obj);
+  }
+
+public:
+  __thread_local_owner()                       = default;
+  __thread_local_owner(__thread_local_owner&&) = delete;
+
+  atomic_ref<_ThreadLocal> __get_current_thread_instance() {
+    auto __it = __thread_local_instances.find(this);
+    if (__it == __thread_local_instances.end()) {
+      auto [new_it, _] = __thread_local_instances.try_emplace(this, _ThreadLocal());
+      auto& __obj      = new_it->second;
+      __register(__obj);
+      return atomic_ref(__obj);
+    }
+    return atomic_ref(__it->second);
+  }
+
+  template <class _Func>
+  void __for_each_owned_instances(_Func&& __f) {
+    unique_lock<std::mutex> __lock(__mtx_);
+    for (auto __instance : __owned_instances_) {
+      __f(__instance);
+    }
+  }
+};
+
+// Adopted the 2-phase implementation in the section
+// "3) General-Purpose RCU" of the paper
+// http://www.rdrop.com/users/paulmck/RCU/urcu-supp-accepted.2011.08.30a.pdf
+
+struct __reader_states {
+  // bit 15 is the grace period phase 0 or 1
+  // bits 0-14 is the reader nest level
+  //
+  // a thread can have nested reader locks, such as
+  // domain.lock();   // nest level = 1
+  // domain.lock();   // nest level = 2
+  // ...
+  // domain.unlock(); // nest level = 1
+  // domain.unlock(); // nest level = 0
+
+  static constexpr uint16_t __grace_period_phase_mask = 0b1000'0000'0000'0000;
+  static constexpr uint16_t __reader_nest_level_mask  = 0b0111'1111'1111'1111;
+
+  using __state_type = uint16_t;
+
+  __thread_local_owner<__state_type> __per_thread_states_;
+
+  static uint16_t __get_grace_period_phase(__state_type __state) { return __state & __grace_period_phase_mask; }
+
+  static uint16_t __get_reader_nest_level(__state_type __state) { return __state & __reader_nest_level_mask; }
+
+  static bool __is_quiescent_state(__state_type __state) { return __get_reader_nest_level(__state) == 0; }
+};
+
+class rcu_domain {
+  __reader_states __reader_states_;
+
+  // only the highest bit is used for the phase.
+  std::atomic<__reader_states::__state_type> __global_reader_phase_{};
+
+  // only one thread is allowed to update concurrently
+  std::mutex __reclamation_mutex_; // todo this is not noexcept
+
+  std::atomic<bool> __waiting_flag_ = false;
+
+  rcu_domain() = default;
+
+  friend struct __rcu_domain_access;
+
+  // todo put globals in dylib
+  static rcu_domain& __rcu_default_domain() noexcept {
+    static rcu_domain __default_domain;
+    return __default_domain;
+  }
+
+  void __synchronize() noexcept {
+    __cxx_atomic_thread_fence(memory_order_seq_cst);
+    std::unique_lock __lk(__reclamation_mutex_);
+    std::printf("rcu_domain::__synchronize() going through phase 1\n");
+    __update_phase_and_wait();
+    __barrier();
+    std::printf("rcu_domain::__synchronize() going through phase 2\n");
+    __update_phase_and_wait();
+    __lk.unlock();
+    __cxx_atomic_thread_fence(memory_order_seq_cst);
+  }
+
+  void __update_phase_and_wait() noexcept {
+    // Flip the global phase
+    auto __old_phase =
+        __global_reader_phase_.fetch_xor(__reader_states::__grace_period_phase_mask, std::memory_order_relaxed);
+    auto __new_phase = __old_phase ^ __reader_states::__grace_period_phase_mask;
+    std::printf("rcu_domain::__update_phase_and_wait() new phase: 0x%04x\n", __new_phase);
+
+    __barrier();
+    // Wait for all threads to quiesce in the old phase
+    while (__any_reader_in_ongoing_grace_period(__new_phase)) {
+      __waiting_flag_.store(true, std::memory_order_relaxed);
+      __waiting_flag_.wait(true, std::memory_order_relaxed);
+    }
+    __waiting_flag_.store(false, std::memory_order_relaxed);
+  }
+
+  bool __any_reader_in_ongoing_grace_period(__reader_states::__state_type __global_phase) noexcept {
+    bool __any_ongoing = false;
+    __reader_states_.__per_thread_states_.__for_each_owned_instances(
+        [this, __global_phase, &__any_ongoing](atomic_ref<__reader_states::__state_type> __state) {
+          if (__is_grace_period_ongoing(__state.load(memory_order_relaxed), __global_phase)) {
+            __any_ongoing = true;
+          }
+        });
+    return __any_ongoing;
+  }
+
+  bool __is_grace_period_ongoing(__reader_states::__state_type __thread_state,
+                                 __reader_states::__state_type __global_phase) const noexcept {
+    // https://lwn.net/Articles/323929/
+    // The phase is flipped at the beginning of a grace period.
+    // Any readers that started before the flip will have the old phase
+    // and we consider them as ongoing that we need to wait for before we can close the grace period.
+    return !__reader_states::__is_quiescent_state(__thread_state) &&
+           __reader_states::__get_grace_period_phase(__thread_state) != __global_phase;
+  }
+
+  void __barrier() noexcept { asm volatile("" : : : "memory"); }
+
+public:
+  rcu_domain(const rcu_domain&)            = delete;
+  rcu_domain& operator=(const rcu_domain&) = delete;
+
+  void printAllReaderStatesInHex() {
+    __reader_states_.__per_thread_states_.__for_each_owned_instances([](auto __state_ref) {
+      std::printf("Reader state: 0x%04x\n", __state_ref.load());
+    });
+  }
+
+  void lock() {
+    auto __current_thread_state_ref = __reader_states_.__per_thread_states_.__get_current_thread_instance();
+
+    if ((__reader_states::__is_quiescent_state(__current_thread_state_ref.load(memory_order_relaxed)))) {
+      // Entering a read-side critical section from a quiescent state.
+      __current_thread_state_ref.store(
+          __reader_states::__state_type(__global_reader_phase_.load(std::memory_order_relaxed) | uint16_t(1)),
+          memory_order_relaxed);
+      __cxx_atomic_thread_fence(memory_order_seq_cst);
+    } else {
+      // Already in read-side critical section, just increment the nest level.
+      __current_thread_state_ref.fetch_add(1, memory_order_relaxed);
+    }
+  }
+
+  bool try_lock() {
+    lock();
+    return true;
+  }
+
+  void unlock() {
+    auto __current_thread_state_ref = __reader_states_.__per_thread_states_.__get_current_thread_instance();
+    __cxx_atomic_thread_fence(memory_order_seq_cst);
+    // Decrement the nest level.
+    auto __old_state = __current_thread_state_ref.fetch_sub(1, memory_order_relaxed);
+
+    if (__reader_states::__get_reader_nest_level(__old_state) == 1 && __waiting_flag_.load(memory_order_relaxed)) {
+      // Transitioning to quiescent state, wake up waiters.
+      __waiting_flag_.store(false, std::memory_order_relaxed);
+      __waiting_flag_.notify_all();
+    }
+  }
+};
+
+struct __rcu_domain_access {
+  static rcu_domain& __rcu_default_domain() noexcept { return rcu_domain::__rcu_default_domain(); }
+  static void __rcu_synchronize(rcu_domain& __dom) noexcept { __dom.__synchronize(); }
+};
+
+// todo put it in the experimental dylib
+inline rcu_domain& rcu_default_domain() noexcept { return __rcu_domain_access::__rcu_default_domain(); }
+
+// todo put it in the experimental dylib
+inline void rcu_synchronize(rcu_domain& __dom = rcu_default_domain()) noexcept {
+  __rcu_domain_access::__rcu_synchronize(__dom);
+}
+
+void rcu_barrier(rcu_domain& dom = rcu_default_domain()) noexcept;
+template <class T, class D = default_delete<T>>
+void rcu_retire(T* p, D d = D(), rcu_domain& dom = rcu_default_domain());
+
 #endif // _LIBCPP_STD_VER >= 26 && _LIBCPP_HAS_THREADS && _LIBCPP_HAS_EXPERIMENTAL_RCU
 
 _LIBCPP_END_NAMESPACE_STD
diff --git a/libcxx/include/__rcu/rcu_list.h b/libcxx/include/__rcu/rcu_list.h
new file mode 100644
index 0000000000000..65ce35f1cd7a1
--- /dev/null
+++ b/libcxx/include/__rcu/rcu_list.h
@@ -0,0 +1,36 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___RCU_RCU_OBJ_BASE_H
+#define _LIBCPP___RCU_RCU_OBJ_BASE_H
+
+#include <__config>
+#include <__functional/function.h>
+#include <__rcu/rcu_domain.h>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER >= 26 && _LIBCPP_HAS_THREADS && _LIBCPP_HAS_EXPERIMENTAL_RCU
+
+struct __rcu_node {
+  function<void()> __callback_{};
+  __rcu_node* __next_ = nullptr;
+};
+
+struct __intrusive_linked_list_view {};
+
+#endif // _LIBCPP_STD_VER >= 26 && _LIBCPP_HAS_THREADS && _LIBCPP_HAS_EXPERIMENTAL_RCU
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___RCU_RCU_OBJ_BASE_H
diff --git a/libcxx/include/__rcu/rcu_obj_base.h b/libcxx/include/__rcu/rcu_obj_base.h
index faff897a1f159..7b58b0c8dfee7 100644
--- a/libcxx/include/__rcu/rcu_obj_base.h
+++ b/libcxx/include/__rcu/rcu_obj_base.h
@@ -11,6 +11,9 @@
 #define _LIBCPP___RCU_RCU_OBJ_BASE_H
 
 #include <__config>
+#include <__memory/unique_ptr.h> // for default_delete
+#include <__rcu/rcu_domain.h>
+#include <__rcu/rcu_list.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
@@ -20,6 +23,23 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 
 #if _LIBCPP_STD_VER >= 26 && _LIBCPP_HAS_THREADS && _LIBCPP_HAS_EXPERIMENTAL_RCU
 
+template <class T, class D = default_delete<T>>
+class rcu_obj_base : private __rcu_node {
+public:
+  void retire(D d = D(), rcu_domain& dom = rcu_default_domain()) noexcept;
+
+protected:
+  rcu_obj_base()                               = default;
+  rcu_obj_base(const rcu_obj_base&)            = default;
+  rcu_obj_base(rcu_obj_base&&)                 = default;
+  rcu_obj_base& operator=(const rcu_obj_base&) = default;
+  rcu_obj_base& operator=(rcu_obj_base&&)      = default;
+  ~rcu_obj_base()                              = default;
+
+private:
+  D deleter; // exposition only
+};
+
 #endif // _LIBCPP_STD_VER >= 26 && _LIBCPP_HAS_THREADS && _LIBCPP_HAS_EXPERIMENTAL_RCU
 
 _LIBCPP_END_NAMESPACE_STD
diff --git a/libcxx/include/__rcu/readme.md b/libcxx/include/__rcu/readme.md
new file mode 100644
index 0000000000000..ae83baf44dc61
--- /dev/null
+++ b/libcxx/include/__rcu/readme.md
@@ -0,0 +1,69 @@
+# folly
+
+
+## domain
+
+- `lock` :
+  - increment the counter with the epoch number == `_version`
+
+- `unlock`:
+  - decrement the counter (without arguments)
+
+- `retire` :
+  - push `cb` to `_q`
+  - if long time since last sync, try try_lock sync mutex and `half_sync` without blocking (to get some finished nodes)
+    - put the finished `cb`s to `executor` (immideately execute)
+
+- `half_sync` :
+  - `current` is the current `_version` number, `next` is +1
+  - move all nodes from `_q` to `queues[0]` (cannot just have a single `queue` and swap because of later readers) (thread safe with concurrent more push to `_q`)
+  - if is blocking, wait for zero for the epoch `next & 1`
+  - else, if epoch `next & 1` has reader is true, return
+  - at this stage, `next & 1` epoch reader zero, if late reader comes, it increments the `current` reader count. (no concurrent half_sync as it is mutex protected)
+  - move all nodes from `queue[1]` to `finished`, and move all nodes from `queue[0]` to `queue[1]`
+  - store `next` to `_version`
+  - notify threads that turn_.waitForTurn(next)
+
+- `sync`
+  - `current` is the current `_version` number, `target` is +2
+  - while true
+     - if current `work_` is smaller than `target` and cas to `target` succeeded,
+        `half_sync` until `version_` >= `target`, and run all the finished `cb`s, `return`
+     - else 
+       - if `version_` >= `target` , `return`
+       - else `turn_.waitForTurn(work)` (so if other's target >= our target, other's second epoch half sync will unblock us)
+
+
+
+## executor
+
+- immediately invoke
+- queue if the `f` schedule another `f2` in the `executor`
+
+
+## example
+
+version == 0
+
+  T1                            T2                       T3
+
+Reader1 lock (0, 1)
+read obj0
+                       obj = obj1
+                       retire(obj0)
+                       sync, target = 2
+                       half, next = 1
+                       wait_zero(epoch == 1)
+                       cb0 -> queue[1]
+                       version = 1
+                       notify(1)
+                                                   Read2 lock (1, 1)
+                                                   read obj1
+                      half, next = 2
+unlock (0,0)
+                      wait_zero(epoch==0)
+                      cb0 -> finished
+                      run cb0
+                      version = 2
+                      notify(2)
+                                                  unlock
diff --git a/libcxx/include/module.modulemap.in b/libcxx/include/module.modulemap.in
index e197d32f938d6..c998fe86a099f 100644
--- a/libcxx/include/module.modulemap.in
+++ b/libcxx/include/module.modulemap.in
@@ -1957,6 +1957,7 @@ module std [system] {
 
   module rcu {
     module rcu_domain      { header "__rcu/rcu_domain.h" }
+    module rcu_list        { header "__rcu/rcu_list.h" }
     module rcu_obj_base    { header "__rcu/rcu_obj_base.h" }
     header "rcu"
     export *
diff --git a/libcxx/test/std/thread/saferecl/saferecl.rcu/general.pass.cpp b/libcxx/test/std/thread/saferecl/saferecl.rcu/general.pass.cpp
new file mode 100644
index 0000000000000..88412976f7a62
--- /dev/null
+++ b/libcxx/test/std/thread/saferecl/saferecl.rcu/general.pass.cpp
@@ -0,0 +1,117 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// UNSUPPORTED: no-threads
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20, c++23
+
+// <rcu>
+
+#include <rcu>
+#include <thread>
+#include <chrono>
+#include <iostream>
+#include <print>
+
+#include "__rcu/rcu_domain.h"
+#include "make_test_thread.h"
+#include "test_macros.h"
+
+void log(auto start, std::string_view msg) {
+  auto now = std::chrono::system_clock::now();
+  std::println(std::cout, "[{:%H:%M:%S}] {}", now - start, msg);
+}
+
+int loop_num = 1;
+
+int main(int, char**) {
+  auto start = std::chrono::system_clock::now();
+  auto t1    = support::make_test_thread([start]() {
+    std::rcu_domain& dom = std::rcu_default_domain();
+    for (int i = 0; i < loop_num; ++i) {
+      log(start, "t1: entering rcu read-side critical section " + std::to_string(i));
+      dom.lock();
+      log(start, "t1: sleeping");
+      std::this_thread::sleep_for(std::chrono::seconds(1));
+      log(start, "t1: leaving rcu read-side critical section " + std::to_string(i));
+      dom.unlock();
+       log(start, "t1: printing all reader states");
+      dom.printAllReaderStatesInHex();
+    }
+  });
+
+  auto t2 = support::make_test_thread([start]() {
+    std::this_thread::sleep_for(std::chrono::milliseconds(200));
+    std::rcu_domain& dom = std::rcu_default_domain();
+    for (int i = 0; i < loop_num; ++i) {
+      log(start, "t2: entering rcu read-side critical section " + std::to_string(i));
+      dom.lock();
+      log(start, "t2: sleeping");
+      std::this_thread::sleep_for(std::chrono::seconds(1));
+      log(start, "t2: leaving rcu read-side critical section " + std::to_string(i));
+      dom.unlock();
+       log(start, "t2: printing all reader states");
+      dom.printAllReaderStatesInHex();
+    }
+  });
+
+  auto t3 = support::make_test_thread([start]() {
+    std::this_thread::sleep_for(std::chrono::milliseconds(1100));
+    std::rcu_domain& dom = std::rcu_default_domain();
+    for (int i = 0; i < loop_num; ++i) {
+      log(start, "t3: entering rcu read-side critical section " + std::to_string(i));
+      dom.lock();
+      log(start, "t3: sleeping");
+      std::this_thread::sleep_for(std::chrono::seconds(1));
+      log(start, "t3: leaving rcu read-side critical section " + std::to_string(i));
+      dom.unlock();
+       log(start, "t3: printing all reader states");
+      dom.printAllReaderStatesInHex();
+    }
+  });
+  auto t4 = support::make_test_thread([start]() {
+    std::this_thread::sleep_for(std::chrono::milliseconds(3000));
+    std::rcu_domain& dom = std::rcu_default_domain();
+    for (int i = 0; i < loop_num; ++i) {
+      log(start, "t4: entering rcu read-side critical section " + std::to_string(i));
+      dom.lock();
+      log(start, "t4: sleeping");
+      std::this_thread::sleep_for(std::chrono::seconds(1));
+      log(start, "t4: leaving rcu read-side critical section " + std::to_string(i));
+      dom.unlock();
+       log(start, "t4: printing all reader states");
+      dom.printAllReaderStatesInHex();
+    }
+  });
+  /*
+  auto t3 = support::make_test_thread([start]() {
+    std::rcu_domain& dom = std::rcu_default_domain();
+    log(start, "t3: entering rcu read-side critical section");
+    dom.lock();
+    log(start, "t3: sleeping");
+    std::this_thread::sleep_for(std::chrono::seconds(3));
+    log(start, "t3: leaving rcu read-side critical section");
+    dom.unlock();
+  });  */
+
+  std::this_thread::sleep_for(std::chrono::milliseconds(10));
+  for (int i = 0; i < loop_num+5; ++i) {
+
+  log(start, "t0: calling rcu_synchronize" + std::to_string(i));
+  std::rcu_synchronize();
+  log(start, "t0: rcu_synchronize returned" + std::to_string(i));
+    std::this_thread::sleep_for(std::chrono::seconds(1));
+  }
+
+
+  t1.join();
+  t2.join();
+  t3.join();
+  t4.join();
+
+  return 1;
+}

>From 4ede645061e762c54e5014e541ea48c3b2410975 Mon Sep 17 00:00:00 2001
From: Hui Xie <hui.xie1990 at gmail.com>
Date: Sun, 11 Jan 2026 18:59:42 +0000
Subject: [PATCH 3/3] callbacks

---
 libcxx/include/__rcu/rcu_domain.h             |  76 +++++++--
 libcxx/include/__rcu/rcu_list.h               |  51 +++++-
 libcxx/include/__rcu/rcu_obj_base.h           |   8 +-
 .../read_write_lock_compare.pass.cpp          | 154 ++++++++++++++++++
 .../saferecl/saferecl.rcu/retire.pass.cpp     | 150 +++++++++++++++++
 5 files changed, 417 insertions(+), 22 deletions(-)
 create mode 100644 libcxx/test/std/thread/saferecl/saferecl.rcu/read_write_lock_compare.pass.cpp
 create mode 100644 libcxx/test/std/thread/saferecl/saferecl.rcu/retire.pass.cpp

diff --git a/libcxx/include/__rcu/rcu_domain.h b/libcxx/include/__rcu/rcu_domain.h
index fbd19933e171d..8863849f09362 100644
--- a/libcxx/include/__rcu/rcu_domain.h
+++ b/libcxx/include/__rcu/rcu_domain.h
@@ -13,6 +13,9 @@
 #include <__config>
 
 #include <__atomic/atomic.h>
+#include <__rcu/rcu_list.h>
+
+// todo replace with internal headers
 #include <atomic>
 #include <cstdint>
 #include <map>
@@ -45,6 +48,8 @@ class __thread_local_owner {
     __owned_instances_.emplace_back(__obj);
   }
 
+  // todo: deregister on thread exit?
+
 public:
   __thread_local_owner()                       = default;
   __thread_local_owner(__thread_local_owner&&) = delete;
@@ -99,19 +104,32 @@ struct __reader_states {
 };
 
 class rcu_domain {
+  // todo optimize the layout
+
   __reader_states __reader_states_;
 
   // only the highest bit is used for the phase.
   std::atomic<__reader_states::__state_type> __global_reader_phase_{};
 
   // only one thread is allowed to update concurrently
-  std::mutex __reclamation_mutex_; // todo this is not noexcept
+  std::mutex __grace_period_mutex_; // todo this is not noexcept
+
+  std::atomic<bool> __grace_period_waiting_flag_ = false;
 
-  std::atomic<bool> __waiting_flag_ = false;
+  // todo: maybe use a lock-free queue
+  std::mutex __retire_queue_mutex_; // todo this is not noexcept
+  __rcu_singly_list_view __retired_callback_queue_;
+
+  // these two queues do not need extra synchronization
+  // as they are always processed under the grace period mutex
+  __rcu_singly_list_view __callbacks_phase_1_;
+  __rcu_singly_list_view __callbacks_phase_2_;
 
   rcu_domain() = default;
 
   friend struct __rcu_domain_access;
+  template <class, class>
+  friend class rcu_obj_base;
 
   // todo put globals in dylib
   static rcu_domain& __rcu_default_domain() noexcept {
@@ -119,32 +137,58 @@ class rcu_domain {
     return __default_domain;
   }
 
+  template <class Callback>
+  void __retire_callback(Callback&& __cb) noexcept {
+    auto* __node        = new __rcu_node();
+    __node->__callback_ = std::forward<Callback>(__cb);
+    std::unique_lock __lk(__retire_queue_mutex_);
+    __retired_callback_queue_.__push_back(__node);
+  }
+
   void __synchronize() noexcept {
     __cxx_atomic_thread_fence(memory_order_seq_cst);
-    std::unique_lock __lk(__reclamation_mutex_);
-    std::printf("rcu_domain::__synchronize() going through phase 1\n");
-    __update_phase_and_wait();
+    std::unique_lock __lk(__grace_period_mutex_);
+    //std::printf("rcu_domain::__synchronize() going through phase 1\n");
+    auto __ready_callbacks = __update_phase_and_wait();
+
+    // Invoke the ready callbacks outside of the grace period mutex
+    __lk.unlock();
+    __ready_callbacks.__for_each([](auto* __node) { __node->__callback_(); });
+    __lk.lock();
+
     __barrier();
-    std::printf("rcu_domain::__synchronize() going through phase 2\n");
-    __update_phase_and_wait();
+    //std::printf("rcu_domain::__synchronize() going through phase 2\n");
+    __ready_callbacks = __update_phase_and_wait();
+
+    // Invoke the ready callbacks outside of the grace period mutex
     __lk.unlock();
+    __ready_callbacks.__for_each([](auto* __node) { __node->__callback_(); });
     __cxx_atomic_thread_fence(memory_order_seq_cst);
   }
 
-  void __update_phase_and_wait() noexcept {
+  __rcu_singly_list_view __update_phase_and_wait() noexcept {
+    std::unique_lock __retire_lk(__retire_queue_mutex_);
+    __callbacks_phase_1_.__splice_back(__retired_callback_queue_);
+    __retire_lk.unlock();
+
     // Flip the global phase
     auto __old_phase =
         __global_reader_phase_.fetch_xor(__reader_states::__grace_period_phase_mask, std::memory_order_relaxed);
     auto __new_phase = __old_phase ^ __reader_states::__grace_period_phase_mask;
-    std::printf("rcu_domain::__update_phase_and_wait() new phase: 0x%04x\n", __new_phase);
+    //std::printf("rcu_domain::__update_phase_and_wait() new phase: 0x%04x\n", __new_phase);
 
     __barrier();
     // Wait for all threads to quiesce in the old phase
     while (__any_reader_in_ongoing_grace_period(__new_phase)) {
-      __waiting_flag_.store(true, std::memory_order_relaxed);
-      __waiting_flag_.wait(true, std::memory_order_relaxed);
+      __grace_period_waiting_flag_.store(true, std::memory_order_relaxed);
+      __grace_period_waiting_flag_.wait(true, std::memory_order_relaxed);
     }
-    __waiting_flag_.store(false, std::memory_order_relaxed);
+    __grace_period_waiting_flag_.store(false, std::memory_order_relaxed);
+
+    __rcu_singly_list_view __ready_callbacks;
+    __ready_callbacks.__splice_back(__callbacks_phase_2_);
+    __callbacks_phase_2_.__splice_back(__callbacks_phase_1_);
+    return __ready_callbacks;
   }
 
   bool __any_reader_in_ongoing_grace_period(__reader_states::__state_type __global_phase) noexcept {
@@ -206,10 +250,11 @@ class rcu_domain {
     // Decrement the nest level.
     auto __old_state = __current_thread_state_ref.fetch_sub(1, memory_order_relaxed);
 
-    if (__reader_states::__get_reader_nest_level(__old_state) == 1 && __waiting_flag_.load(memory_order_relaxed)) {
+    if (__reader_states::__get_reader_nest_level(__old_state) == 1 &&
+        __grace_period_waiting_flag_.load(memory_order_relaxed)) {
       // Transitioning to quiescent state, wake up waiters.
-      __waiting_flag_.store(false, std::memory_order_relaxed);
-      __waiting_flag_.notify_all();
+      __grace_period_waiting_flag_.store(false, std::memory_order_relaxed);
+      __grace_period_waiting_flag_.notify_all();
     }
   }
 };
@@ -228,6 +273,7 @@ inline void rcu_synchronize(rcu_domain& __dom = rcu_default_domain()) noexcept {
 }
 
 void rcu_barrier(rcu_domain& dom = rcu_default_domain()) noexcept;
+
 template <class T, class D = default_delete<T>>
 void rcu_retire(T* p, D d = D(), rcu_domain& dom = rcu_default_domain());
 
diff --git a/libcxx/include/__rcu/rcu_list.h b/libcxx/include/__rcu/rcu_list.h
index 65ce35f1cd7a1..2547d7223a7cc 100644
--- a/libcxx/include/__rcu/rcu_list.h
+++ b/libcxx/include/__rcu/rcu_list.h
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef _LIBCPP___RCU_RCU_OBJ_BASE_H
-#define _LIBCPP___RCU_RCU_OBJ_BASE_H
+#ifndef _LIBCPP___RCU_RCU_LIST_H
+#define _LIBCPP___RCU_RCU_LIST_H
 
 #include <__config>
 #include <__functional/function.h>
@@ -27,10 +27,53 @@ struct __rcu_node {
   __rcu_node* __next_ = nullptr;
 };
 
-struct __intrusive_linked_list_view {};
+class __rcu_singly_list_view {
+private:
+  __rcu_node* __head_ = nullptr;
+  __rcu_node* __tail_ = nullptr;
+
+public:
+  void __splice_back(__rcu_singly_list_view& __other) noexcept {
+    if (__other.__head_ == nullptr) {
+      return;
+    }
+    if (__head_ == nullptr) {
+      __head_ = __other.__head_;
+      __tail_ = __other.__tail_;
+    } else {
+      __tail_->__next_ = __other.__head_;
+      __tail_          = __other.__tail_;
+    }
+    __other.__head_ = nullptr;
+    __other.__tail_ = nullptr;
+  }
+
+  void __push_back(__rcu_node* __node) noexcept {
+    // assert(__node->__next_ == nullptr);
+    if (__head_ == nullptr) {
+      __head_ = __node;
+      __tail_ = __node;
+    } else {
+      __tail_->__next_ = __node;
+      __tail_          = __node;
+    }
+  }
+
+  template <class _Func>
+  void __for_each(_Func&& __f) noexcept {
+    __rcu_node* __current = __head_;
+    while (__current != nullptr) {
+      // __f could delete __current, so we need to get the next pointer first
+      auto __next = __current->__next_;
+      __f(__current);
+      __current = __next;
+    }
+  }
+};
+
 
 #endif // _LIBCPP_STD_VER >= 26 && _LIBCPP_HAS_THREADS && _LIBCPP_HAS_EXPERIMENTAL_RCU
 
 _LIBCPP_END_NAMESPACE_STD
 
-#endif // _LIBCPP___RCU_RCU_OBJ_BASE_H
+#endif // _LIBCPP___RCU_RCU_LIST_H
diff --git a/libcxx/include/__rcu/rcu_obj_base.h b/libcxx/include/__rcu/rcu_obj_base.h
index 7b58b0c8dfee7..3d1de353613a5 100644
--- a/libcxx/include/__rcu/rcu_obj_base.h
+++ b/libcxx/include/__rcu/rcu_obj_base.h
@@ -26,7 +26,11 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 template <class T, class D = default_delete<T>>
 class rcu_obj_base : private __rcu_node {
 public:
-  void retire(D d = D(), rcu_domain& dom = rcu_default_domain()) noexcept;
+  void retire(D d = D(), rcu_domain& dom = rcu_default_domain()) noexcept {
+    auto ptr = static_cast<T*>(this);
+    dom.__retire_callback(
+        [ptr, d = std::move(d)]() mutable { d(ptr); });
+  }
 
 protected:
   rcu_obj_base()                               = default;
@@ -36,8 +40,6 @@ class rcu_obj_base : private __rcu_node {
   rcu_obj_base& operator=(rcu_obj_base&&)      = default;
   ~rcu_obj_base()                              = default;
 
-private:
-  D deleter; // exposition only
 };
 
 #endif // _LIBCPP_STD_VER >= 26 && _LIBCPP_HAS_THREADS && _LIBCPP_HAS_EXPERIMENTAL_RCU
diff --git a/libcxx/test/std/thread/saferecl/saferecl.rcu/read_write_lock_compare.pass.cpp b/libcxx/test/std/thread/saferecl/saferecl.rcu/read_write_lock_compare.pass.cpp
new file mode 100644
index 0000000000000..2fc79013994e1
--- /dev/null
+++ b/libcxx/test/std/thread/saferecl/saferecl.rcu/read_write_lock_compare.pass.cpp
@@ -0,0 +1,154 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// UNSUPPORTED: no-threads
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20, c++23
+
+// <rcu>
+
+#include <atomic>
+#include <rcu>
+#include <shared_mutex>
+#include <stop_token>
+#include <thread>
+#include <chrono>
+#include <iostream>
+#include <print>
+#include <string>
+
+#include "__rcu/rcu_domain.h"
+#include "make_test_thread.h"
+#include "test_macros.h"
+
+constexpr int num_reader = 4;
+const std::chrono::seconds test_time(10);
+
+struct alignas(128) MyObject : public std::rcu_obj_base<MyObject> {
+  std::string data_;
+
+  inline static int instance_count = 0;
+  MyObject() : data_(std::to_string(instance_count++) + " instance very very very long string") {}
+
+  void doWork() {
+    auto spin_for = [](std::chrono::microseconds us) {
+      auto start = std::chrono::high_resolution_clock::now();
+      while (std::chrono::high_resolution_clock::now() - start < us)
+        ;
+    };
+    using namespace std::chrono_literals;
+    spin_for(10us);
+  }
+};
+
+
+void test_read_write_lock() {
+  MyObject* globalObjRWLock = new MyObject();
+  std::shared_mutex globalObjMutex;
+
+  std::vector<std::jthread> readers;
+  readers.reserve(num_reader);
+
+  auto reader_func = [&globalObjRWLock, &globalObjMutex](std::stop_token token) {
+    int read_count = 0;
+    while (!token.stop_requested()) {
+      std::shared_lock<std::shared_mutex> lock(globalObjMutex);
+      globalObjRWLock->doWork();
+      ++read_count;
+    }
+    std::println("Reader thread read {} times", read_count);
+  };
+
+  auto writer_func = [&globalObjRWLock, &globalObjMutex](std::stop_token token) {
+    int write_count = 0;
+    while (!token.stop_requested()) {
+      auto newObj = new MyObject();
+      std::unique_lock<std::shared_mutex> lock(globalObjMutex);
+      auto oldObj     = globalObjRWLock;
+      globalObjRWLock = newObj;
+      lock.unlock();
+      delete oldObj;
+      ++write_count;
+      std::this_thread::sleep_for(std::chrono::microseconds(100));
+    }
+    std::println("Writer thread wrote {} times", write_count);
+  };
+
+  for (int i = 0; i < num_reader; ++i) {
+    readers.emplace_back(reader_func);
+  }
+  std::jthread writer(writer_func);
+
+  std::this_thread::sleep_for(test_time);
+
+  for (auto& reader : readers) {
+    reader.request_stop();
+  }
+  writer.request_stop();
+}
+
+void test_rcu() {
+  std::rcu_domain& dom = std::rcu_default_domain();
+  std::atomic<MyObject*> global_obj_rcu = new MyObject();
+
+  std::vector<std::jthread> readers;
+  readers.reserve(num_reader);
+
+  auto reader_func = [&dom, &global_obj_rcu](std::stop_token token) {
+    int read_count = 0;
+    while (!token.stop_requested()) {
+      dom.lock();
+      auto obj = global_obj_rcu.load(std::memory_order_relaxed);
+      obj->doWork();
+      dom.unlock();
+      ++read_count;
+    }
+    std::println("RCU Reader thread read {} times", read_count);
+  };
+
+  auto writer_func = [&global_obj_rcu](std::stop_token token) {
+    int write_count = 0;
+    while (!token.stop_requested()) {
+      auto newObj = new MyObject();
+      auto oldObj = global_obj_rcu.exchange(newObj, std::memory_order_relaxed);
+      oldObj->retire();
+      ++write_count;
+      std::this_thread::sleep_for(std::chrono::microseconds(100));
+    }
+    std::println("RCU Writer thread wrote {} times", write_count);
+  };
+
+  auto syncer_func = [&dom](std::stop_token token) {
+    while (!token.stop_requested()) {
+      std::rcu_synchronize(dom);
+      std::this_thread::sleep_for(std::chrono::milliseconds(50));
+    }
+  };
+
+  for (int i = 0; i < num_reader; ++i) {
+    readers.emplace_back(reader_func);
+  }
+  std::jthread writer(writer_func);
+  std::jthread syncer(syncer_func);
+
+  std::this_thread::sleep_for(test_time);
+
+  for (auto& reader : readers) {
+    reader.request_stop();
+  }
+  writer.request_stop();
+  syncer.request_stop();
+  std::rcu_synchronize(dom);
+}
+
+int main(int, char**) {
+  std::println("Testing read-write lock:");
+  test_read_write_lock();
+  std::println("Testing RCU:");
+  test_rcu();
+  return 1;
+}
diff --git a/libcxx/test/std/thread/saferecl/saferecl.rcu/retire.pass.cpp b/libcxx/test/std/thread/saferecl/saferecl.rcu/retire.pass.cpp
new file mode 100644
index 0000000000000..dde6cdd323084
--- /dev/null
+++ b/libcxx/test/std/thread/saferecl/saferecl.rcu/retire.pass.cpp
@@ -0,0 +1,150 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// UNSUPPORTED: no-threads
+// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20, c++23
+
+// <rcu>
+
+#include <rcu>
+#include <thread>
+#include <chrono>
+#include <iostream>
+#include <print>
+#include <string>
+
+#include "__rcu/rcu_domain.h"
+#include "make_test_thread.h"
+#include "test_macros.h"
+
+void log(auto start, std::string_view msg) {
+  auto now = std::chrono::system_clock::now();
+  std::println(std::cout, "[{:%H:%M:%S}] {}", now - start, msg);
+}
+
+int loop_num = 10;
+
+struct MyObject : public std::rcu_obj_base<MyObject> {
+  std::string data_;
+
+  inline static int instance_count = 0;
+  MyObject() : data_(std::to_string(instance_count++) + " instance very very very long string") {}
+
+  ~MyObject() {
+    std::println(std::cout, "MyObject {} destructor called", data_);
+  }
+};
+
+std::atomic<MyObject*> global_obj = nullptr;
+
+int main(int, char**) {
+  auto start = std::chrono::system_clock::now();
+  auto t1    = support::make_test_thread([start]() {
+    std::rcu_domain& dom = std::rcu_default_domain();
+    for (int i = 0; i < loop_num; ++i) {
+      log(start, "t1: entering rcu read-side critical section " + std::to_string(i));
+      dom.lock();
+      auto obj = global_obj.load();
+      log(start, "t1: reading: " + (obj ? obj->data_ : "nullptr"));
+      std::this_thread::sleep_for(std::chrono::seconds(1));
+      log(start, "t1: leaving rcu read-side critical section " + std::to_string(i) + " with object " +
+                         (obj ? obj->data_ : "nullptr"));
+      dom.unlock();
+      log(start, "t1: printing all reader states");
+      dom.printAllReaderStatesInHex();
+    }
+  });
+
+  auto t2 = support::make_test_thread([start]() {
+    std::this_thread::sleep_for(std::chrono::milliseconds(200));
+    std::rcu_domain& dom = std::rcu_default_domain();
+    for (int i = 0; i < loop_num; ++i) {
+      log(start, "t2: entering rcu read-side critical section " + std::to_string(i));
+      dom.lock();
+      auto obj = global_obj.load();
+      log(start, "t2: reading: " + (obj ? obj->data_ : "nullptr"));
+      std::this_thread::sleep_for(std::chrono::seconds(1));
+      log(start, "t2: leaving rcu read-side critical section " + std::to_string(i) + " with object " +
+                         (obj ? obj->data_ : "nullptr"));
+      dom.unlock();
+      log(start, "t2: printing all reader states");
+      dom.printAllReaderStatesInHex();
+    }
+  });
+
+  auto t3 = support::make_test_thread([start]() {
+    std::this_thread::sleep_for(std::chrono::milliseconds(1100));
+    std::rcu_domain& dom = std::rcu_default_domain();
+    for (int i = 0; i < loop_num; ++i) {
+      log(start, "t3: entering rcu read-side critical section " + std::to_string(i));
+      dom.lock();
+      auto obj = global_obj.load();
+      log(start, "t3: reading: " + (obj ? obj->data_ : "nullptr"));
+      std::this_thread::sleep_for(std::chrono::seconds(1));
+      log(start, "t3: leaving rcu read-side critical section " + std::to_string(i) + " with object " +
+                         (obj ? obj->data_ : "nullptr"));
+      dom.unlock();
+      log(start, "t3: printing all reader states");
+      dom.printAllReaderStatesInHex();
+    }
+  });
+  auto t4 = support::make_test_thread([start]() {
+    std::this_thread::sleep_for(std::chrono::milliseconds(3000));
+    std::rcu_domain& dom = std::rcu_default_domain();
+    for (int i = 0; i < loop_num; ++i) {
+      log(start, "t4: entering rcu read-side critical section " + std::to_string(i));
+      dom.lock();
+      auto obj = global_obj.load();
+      log(start, "t4: reading: " + (obj ? obj->data_ : "nullptr"));
+      std::this_thread::sleep_for(std::chrono::seconds(1));
+      log(start, "t4: leaving rcu read-side critical section " + std::to_string(i) + " with object " +
+                         (obj ? obj->data_ : "nullptr"));
+      dom.unlock();
+      log(start, "t4: printing all reader states");
+      dom.printAllReaderStatesInHex();
+    }
+  });
+
+  auto t5 = support::make_test_thread([start]() {
+    std::this_thread::sleep_for(std::chrono::milliseconds(3000));
+    for (int i = 0; i < loop_num; ++i) {
+      auto new_obj = new MyObject();
+
+      log(start, "t5: updating global to : " + new_obj->data_);
+      auto old = global_obj.exchange(new_obj);
+      log(start, "t5: retiring old object " + (old ? old->data_ : "nullptr"));
+      old->retire();
+      std::this_thread::sleep_for(std::chrono::seconds(1));
+    }
+  });
+  /*
+  auto t3 = support::make_test_thread([start]() {
+    std::rcu_domain& dom = std::rcu_default_domain();
+    log(start, "t3: entering rcu read-side critical section");
+    dom.lock();
+    log(start, "t3: sleeping");
+    std::this_thread::sleep_for(std::chrono::seconds(3));
+    log(start, "t3: leaving rcu read-side critical section");
+    dom.unlock();
+  });  */
+
+  std::this_thread::sleep_for(std::chrono::milliseconds(10));
+  for (int i = 0; i < loop_num + 5; ++i) {
+    log(start, "t0: calling rcu_synchronize" + std::to_string(i));
+    std::rcu_synchronize();
+    log(start, "t0: rcu_synchronize returned" + std::to_string(i));
+    std::this_thread::sleep_for(std::chrono::seconds(1));
+  }
+
+  t1.join();
+  t2.join();
+  t3.join();
+  t4.join();
+
+  return 1;
+}



More information about the libcxx-commits mailing list