[libc-commits] [libc] [libc] add rwlock (PR #94156)

Schrodinger ZHU Yifan via libc-commits libc-commits at lists.llvm.org
Sun Jun 9 01:59:43 PDT 2024


https://github.com/SchrodingerZhu updated https://github.com/llvm/llvm-project/pull/94156

>From 32190b039eec473a23c9bfb03fdc8a6cbb8fcaf3 Mon Sep 17 00:00:00 2001
From: Yifan Zhu <yifzhu at nvidia.com>
Date: Sun, 2 Jun 2024 03:48:26 -0700
Subject: [PATCH 01/26] [libc] add rwlock implementation

---
 libc/config/config.json                       |   4 +
 libc/docs/configure.rst                       |   1 +
 .../__support/threads/linux/CMakeLists.txt    |  21 +-
 libc/src/__support/threads/linux/rwlock.h     | 519 ++++++++++++++++++
 4 files changed, 540 insertions(+), 5 deletions(-)
 create mode 100644 libc/src/__support/threads/linux/rwlock.h

diff --git a/libc/config/config.json b/libc/config/config.json
index d3d1ff1e28716..8d6a84e732597 100644
--- a/libc/config/config.json
+++ b/libc/config/config.json
@@ -49,6 +49,10 @@
     "LIBC_CONF_RAW_MUTEX_DEFAULT_SPIN_COUNT": {
       "value": 100,
       "doc": "Default number of spins before blocking if a mutex is in contention (default to 100)."
+    },
+    "LIBC_CONF_RWLOCK_DEFAULT_SPIN_COUNT": {
+      "value": 100,
+      "doc": "Default number of spins before blocking if a rwlock is in contention (default to 100)."
     }
   }
 }
diff --git a/libc/docs/configure.rst b/libc/docs/configure.rst
index 77ade07714fdf..bdae6c54052f2 100644
--- a/libc/docs/configure.rst
+++ b/libc/docs/configure.rst
@@ -36,6 +36,7 @@ to learn about the defaults for your platform and target.
     - ``LIBC_CONF_PRINTF_FLOAT_TO_STR_USE_MEGA_LONG_DOUBLE_TABLE``: Use large table for better printf long double performance.
 * **"pthread" options**
     - ``LIBC_CONF_RAW_MUTEX_DEFAULT_SPIN_COUNT``: Default number of spins before blocking if a mutex is in contention (default to 100).
+    - ``LIBC_CONF_RWLOCK_DEFAULT_SPIN_COUNT``: Default number of spins before blocking if a rwlock is in contention (default to 100).
     - ``LIBC_CONF_TIMEOUT_ENSURE_MONOTONICITY``: Automatically adjust timeout to CLOCK_MONOTONIC (default to true). POSIX API may require CLOCK_REALTIME, which can be unstable and leading to unexpected behavior. This option will convert the real-time timestamp to monotonic timestamp relative to the time of call.
 * **"string" options**
     - ``LIBC_CONF_MEMSET_X86_USE_SOFTWARE_PREFETCHING``: Inserts prefetch for write instructions (PREFETCHW) for memset on x86 to recover performance when hardware prefetcher is disabled.
diff --git a/libc/src/__support/threads/linux/CMakeLists.txt b/libc/src/__support/threads/linux/CMakeLists.txt
index 9bf88ccc84557..bac8073a66049 100644
--- a/libc/src/__support/threads/linux/CMakeLists.txt
+++ b/libc/src/__support/threads/linux/CMakeLists.txt
@@ -22,11 +22,11 @@ add_header_library(
     libc.src.__support.time.linux.abs_timeout
 )
 
-set(raw_mutex_additional_flags)
+set(monotonicity_flags)
 if (LIBC_CONF_TIMEOUT_ENSURE_MONOTONICITY)
-  set(raw_mutex_additional_flags -DLIBC_COPT_TIMEOUT_ENSURE_MONOTONICITY=1)
+  set(monotonicity_flags -DLIBC_COPT_TIMEOUT_ENSURE_MONOTONICITY=1)
 else()
-  set(raw_mutex_additional_flags -DLIBC_COPT_TIMEOUT_ENSURE_MONOTONICITY=0)
+  set(monotonicity_flags -DLIBC_COPT_TIMEOUT_ENSURE_MONOTONICITY=0)
 endif()
 
 add_header_library(
@@ -42,8 +42,19 @@ add_header_library(
     libc.hdr.types.pid_t
   COMPILE_OPTIONS
     -DLIBC_COPT_RAW_MUTEX_DEFAULT_SPIN_COUNT=${LIBC_CONF_RAW_MUTEX_DEFAULT_SPIN_COUNT}
-    ${raw_mutex_additional_flags}
-  
+    ${monotonicity_flags}
+)
+
+add_header_library(
+  rwlock
+  HDRS
+    rwlock.h
+  DEPENDS
+    .futex_utils
+    .raw_mutex
+  COMPILE_OPTIONS
+    -DLIBC_COPT_RWLOCK_DEFAULT_SPIN_COUNT=${LIBC_CONF_RWLOCK_DEFAULT_SPIN_COUNT}
+    ${monotonicity_flags}
 )
 
 add_header_library(
diff --git a/libc/src/__support/threads/linux/rwlock.h b/libc/src/__support/threads/linux/rwlock.h
new file mode 100644
index 0000000000000..e8da17681ddfe
--- /dev/null
+++ b/libc/src/__support/threads/linux/rwlock.h
@@ -0,0 +1,519 @@
+//===--- Implementation of a Linux RwLock class ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIBC_SRC_SUPPORT_THREADS_LINUX_RWLOCK_H
+#define LLVM_LIBC_SRC_SUPPORT_THREADS_LINUX_RWLOCK_H
+
+#include "hdr/errno_macros.h"
+#include "hdr/types/pid_t.h"
+#include "src/__support/CPP/atomic.h"
+#include "src/__support/CPP/expected.h"
+#include "src/__support/CPP/new.h"
+#include "src/__support/CPP/optional.h"
+#include "src/__support/CPP/type_traits/make_signed.h"
+#include "src/__support/OSUtil/linux/x86_64/syscall.h"
+#include "src/__support/common.h"
+#include "src/__support/libc_assert.h"
+#include "src/__support/macros/attributes.h"
+#include "src/__support/macros/optimization.h"
+#include "src/__support/threads/linux/futex_utils.h"
+#include "src/__support/threads/linux/futex_word.h"
+#include "src/__support/threads/linux/raw_mutex.h"
+#include "src/__support/threads/sleep.h"
+
+#ifndef LIBC_COPT_RWLOCK_DEFAULT_SPIN_COUNT
+#define LIBC_COPT_RWLOCK_DEFAULT_SPIN_COUNT 100
+#endif
+
+#ifndef LIBC_COPT_TIMEOUT_ENSURE_MONOTONICITY
+#define LIBC_COPT_TIMEOUT_ENSURE_MONOTONICITY 1
+#warning "LIBC_COPT_TIMEOUT_ENSURE_MONOTONICITY is not defined, defaulting to 1"
+#endif
+
+#if LIBC_COPT_TIMEOUT_ENSURE_MONOTONICITY
+#include "src/__support/time/linux/monotonicity.h"
+#endif
+
+namespace LIBC_NAMESPACE {
+class RwLock {
+private:
+  class WaitingQueue final : private RawMutex {
+    FutexWordType pending_reader;
+    FutexWordType pending_writer;
+    Futex reader_serialization;
+    Futex writer_serialization;
+
+  public:
+    class Guard {
+      WaitingQueue &queue;
+
+      LIBC_INLINE constexpr Guard(WaitingQueue &queue) : queue(queue) {}
+
+    public:
+      LIBC_INLINE ~Guard() { queue.unlock(); }
+      LIBC_INLINE FutexWordType &pending_reader() {
+        return queue.pending_reader;
+      }
+      LIBC_INLINE FutexWordType &pending_writer() {
+        return queue.pending_writer;
+      }
+      LIBC_INLINE FutexWordType &reader_serialization() {
+        return queue.reader_serialization.val;
+      }
+      LIBC_INLINE FutexWordType &writer_serialization() {
+        return queue.writer_serialization.val;
+      }
+      friend RwLock;
+    };
+
+  public:
+    LIBC_INLINE constexpr WaitingQueue()
+        : RawMutex(), pending_reader(0), pending_writer(0),
+          reader_serialization(0), writer_serialization(0) {}
+    LIBC_INLINE Guard acquire() {
+      this->lock();
+      return Guard(*this);
+    }
+    LIBC_INLINE long reader_wait(FutexWordType expected,
+                                 cpp::optional<Futex::Timeout> timeout,
+                                 bool is_pshared) {
+      return reader_serialization.wait(expected, timeout, is_pshared);
+    }
+    LIBC_INLINE long reader_notify_all(bool is_pshared) {
+      return reader_serialization.notify_all(is_pshared);
+    }
+    LIBC_INLINE long writer_wait(FutexWordType expected,
+                                 cpp::optional<Futex::Timeout> timeout,
+                                 bool is_pshared) {
+      return writer_serialization.wait(expected, timeout, is_pshared);
+    }
+    LIBC_INLINE long writer_notify_one(bool is_pshared) {
+      return writer_serialization.notify_one(is_pshared);
+    }
+  };
+
+public:
+  enum class Preference : char { Reader, Writer };
+  enum class LockResult {
+    Success = 0,
+    Timeout = ETIMEDOUT,
+    Overflow = EAGAIN,
+    Busy = EBUSY,
+    Deadlock = EDEADLOCK,
+    PermissionDenied = EPERM,
+  };
+
+private:
+  // The State of the RwLock is stored in a 32-bit word, consisting of the
+  // following components:
+  // -----------------------------------------------
+  // | Range |           Description               |
+  // ===============================================
+  // | 0     | Pending Reader Bit                  |
+  // -----------------------------------------------
+  // | 1     | Pending Writer Bit                  |
+  // -----------------------------------------------
+  // | 2-30  | Active Reader Count                 |
+  // -----------------------------------------------
+  // | 31    | Active Writer Bit                   |
+  // -----------------------------------------------
+  class State {
+    // We use the signed interger as the state type. It is easier
+    // to handle state trasitions and detections using signed integers.
+    using Type = int32_t;
+
+    // Shift amounts to access the components of the state.
+    LIBC_INLINE_VAR static constexpr Type PENDING_READER_SHIFT = 0;
+    LIBC_INLINE_VAR static constexpr Type PENDING_WRITER_SHIFT = 1;
+    LIBC_INLINE_VAR static constexpr Type ACTIVE_READER_SHIFT = 2;
+    LIBC_INLINE_VAR static constexpr Type ACTIVE_WRITER_SHIFT = 31;
+
+    // Bitmasks to access the components of the state.
+    LIBC_INLINE_VAR static constexpr Type PENDING_READER_BIT =
+        1 << PENDING_READER_SHIFT;
+    LIBC_INLINE_VAR static constexpr Type PENDING_WRITER_BIT =
+        1 << PENDING_WRITER_SHIFT;
+    LIBC_INLINE_VAR static constexpr Type ACTIVE_READER_COUNT_UNIT =
+        1 << ACTIVE_READER_SHIFT;
+    LIBC_INLINE_VAR static constexpr Type ACTIVE_WRITER_BIT =
+        1 << ACTIVE_WRITER_SHIFT;
+    LIBC_INLINE_VAR static constexpr Type PENDING_MASK =
+        PENDING_READER_BIT | PENDING_WRITER_BIT;
+
+  private:
+    Type state;
+
+  public:
+    // Construction and conversion functions.
+    LIBC_INLINE constexpr State(Type state = 0) : state(state) {}
+    LIBC_INLINE constexpr operator Type() const { return state; }
+
+    // Utilities to check the state of the RwLock.
+    LIBC_INLINE constexpr bool has_active_writer() const { return state < 0; }
+    LIBC_INLINE constexpr bool has_active_reader() const {
+      return state > ACTIVE_READER_COUNT_UNIT;
+    }
+    LIBC_INLINE constexpr bool has_acitve_owner() const {
+      return has_active_reader() || has_active_writer();
+    }
+    LIBC_INLINE constexpr bool has_last_reader() const {
+      return (state >> ACTIVE_READER_SHIFT) == 1;
+    }
+    LIBC_INLINE constexpr bool has_pending_writer() const {
+      return state & PENDING_WRITER_BIT;
+    }
+    LIBC_INLINE constexpr bool has_pending() const {
+      return state & PENDING_MASK;
+    }
+    LIBC_INLINE constexpr State set_writer_bit() const {
+      return State(state | ACTIVE_WRITER_BIT);
+    }
+    // The preference parameter changes the behavior of the lock acquisition
+    // if there are both readers and writers waiting for the lock. If writers
+    // are preferred, reader acquisition will be blocked until all pending
+    // writers are served.
+    LIBC_INLINE bool can_acquire_reader(Preference preference) const {
+      switch (preference) {
+      case Preference::Reader:
+        return !has_active_writer();
+      case Preference::Writer:
+        return !has_active_writer() && !has_pending_writer();
+      }
+    }
+    LIBC_INLINE bool can_acquire_writer(Preference /*unused*/) const {
+      return !has_acitve_owner();
+    }
+    // This function check if it is possible to grow the reader count without
+    // overflowing the state.
+    LIBC_INLINE cpp::optional<State> try_increase_reader_count() const {
+      LIBC_ASSERT(!has_active_writer() &&
+                  "try_increase_reader_count shall only be called when there "
+                  "is no active writer.");
+      State res;
+      if (LIBC_UNLIKELY(__builtin_sadd_overflow(state, ACTIVE_READER_COUNT_UNIT,
+                                                &res.state)))
+        return cpp::nullopt;
+      return res;
+    }
+
+    // Utilities to do atomic operations on the state.
+    LIBC_INLINE static State
+    fetch_sub_reader_count(cpp::Atomic<Type> &target,
+                           cpp::MemoryOrder order = cpp::MemoryOrder::SEQ_CST) {
+      return State(target.fetch_sub(ACTIVE_READER_COUNT_UNIT, order));
+    }
+    LIBC_INLINE static State
+    load(cpp::Atomic<Type> &target,
+         cpp::MemoryOrder order = cpp::MemoryOrder::SEQ_CST) {
+      return State(target.load(order));
+    }
+    LIBC_INLINE static State fetch_set_pending_reader(
+        cpp::Atomic<Type> &target,
+        cpp::MemoryOrder order = cpp::MemoryOrder::SEQ_CST) {
+      return State(target.fetch_or(PENDING_READER_BIT, order));
+    }
+    LIBC_INLINE static State fetch_clear_pending_reader(
+        cpp::Atomic<Type> &target,
+        cpp::MemoryOrder order = cpp::MemoryOrder::SEQ_CST) {
+      return State(target.fetch_and(~PENDING_READER_BIT, order));
+    }
+    LIBC_INLINE static State fetch_set_pending_writer(
+        cpp::Atomic<Type> &target,
+        cpp::MemoryOrder order = cpp::MemoryOrder::SEQ_CST) {
+      return State(target.fetch_or(PENDING_WRITER_BIT, order));
+    }
+    LIBC_INLINE static State fetch_clear_pending_writer(
+        cpp::Atomic<Type> &target,
+        cpp::MemoryOrder order = cpp::MemoryOrder::SEQ_CST) {
+      return State(target.fetch_and(~PENDING_WRITER_BIT, order));
+    }
+    LIBC_INLINE static State fetch_set_active_writer(
+        cpp::Atomic<Type> &target,
+        cpp::MemoryOrder order = cpp::MemoryOrder::SEQ_CST) {
+      return State(target.fetch_or(ACTIVE_WRITER_BIT, order));
+    }
+    LIBC_INLINE static State fetch_clear_active_writer(
+        cpp::Atomic<Type> &target,
+        cpp::MemoryOrder order = cpp::MemoryOrder::SEQ_CST) {
+      return State(target.fetch_and(~ACTIVE_WRITER_BIT, order));
+    }
+
+    LIBC_INLINE bool
+    compare_exchange_weak_with(cpp::Atomic<Type> &target, State desired,
+                               cpp::MemoryOrder success_order,
+                               cpp::MemoryOrder failure_order) {
+      return target.compare_exchange_weak(state, desired, success_order,
+                                          failure_order);
+    }
+
+    // Utilities to spin and reload the state.
+  private:
+    template <class F>
+    LIBC_INLINE static State spin_reload_until(cpp::Atomic<Type> &target,
+                                               F &&func, unsigned spin_count) {
+      for (;;) {
+        auto state = State::load(target);
+        if (func(state) || spin_count == 0)
+          return state;
+        sleep_briefly();
+        spin_count--;
+      }
+    }
+
+  public:
+    // Return the reader state if either the lock is available or there is any
+    // ongoing contention.
+    LIBC_INLINE static State spin_reload_for_reader(cpp::Atomic<Type> &target,
+                                                    Preference preference,
+                                                    unsigned spin_count) {
+      return spin_reload_until(
+          target,
+          [=](State state) {
+            return state.can_acquire_reader(preference) || state.has_pending();
+          },
+          spin_count);
+    }
+    // Return the writer state if either the lock is available or there is any
+    // contention *between writers*. Since writers can be way less than readers,
+    // we allow them to spin more to improve the fairness.
+    LIBC_INLINE static State spin_reload_for_writer(cpp::Atomic<Type> &target,
+                                                    Preference preference,
+                                                    unsigned spin_count) {
+      return spin_reload_until(
+          target,
+          [=](State state) {
+            return state.can_acquire_writer(preference) ||
+                   state.has_pending_writer();
+          },
+          spin_count);
+    }
+  };
+
+private:
+  // Whether the RwLock is shared between processes.
+  bool is_pshared;
+  // Reader/Writer preference.
+  Preference preference;
+  // State to keep track of the RwLock.
+  cpp::Atomic<int32_t> state;
+  // writer_tid is used to keep track of the thread id of the writer. Notice
+  // that TLS address is not a good idea here since it may remains the same
+  // across forked processes.
+  cpp::Atomic<pid_t> writer_tid;
+  // Waiting queue to keep track of the pending readers and writers.
+  WaitingQueue queue;
+
+private:
+  // TODO: use cached thread id once implemented.
+  LIBC_INLINE static pid_t gettid() { return syscall_impl<pid_t>(SYS_gettid); }
+
+  LIBC_INLINE LockResult try_read_lock(State &old) {
+    while (LIBC_LIKELY(old.can_acquire_reader(preference))) {
+      cpp::optional<State> next = old.try_increase_reader_count();
+      if (!next)
+        return LockResult::Overflow;
+      if (LIBC_LIKELY(old.compare_exchange_weak_with(
+              state, *next, cpp::MemoryOrder::ACQUIRE,
+              cpp::MemoryOrder::RELAXED)))
+        return LockResult::Success;
+      // Notice that old is updated by the compare_exchange_weak_with function.
+    }
+    return LockResult::Busy;
+  }
+
+  LIBC_INLINE LockResult try_write_lock(State &old) {
+    // This while loop should terminate quickly
+    while (LIBC_LIKELY(old.can_acquire_writer(preference))) {
+      if (LIBC_LIKELY(old.compare_exchange_weak_with(
+              state, old.set_writer_bit(), cpp::MemoryOrder::ACQUIRE,
+              cpp::MemoryOrder::RELAXED))) {
+        writer_tid.store(gettid(), cpp::MemoryOrder::RELAXED);
+        return LockResult::Success;
+      }
+      // Notice that old is updated by the compare_exchange_weak_with function.
+    }
+    return LockResult::Busy;
+  }
+
+public:
+  LIBC_INLINE constexpr RwLock(Preference preference = Preference::Reader,
+                               bool is_pshared = false)
+      : is_pshared(is_pshared), preference(preference), state(0), writer_tid(0),
+        queue() {}
+
+  LIBC_INLINE LockResult try_read_lock() {
+    State old = State::load(state, cpp::MemoryOrder::RELAXED);
+    return try_read_lock(old);
+  }
+  LIBC_INLINE LockResult try_write_lock() {
+    State old = State::load(state, cpp::MemoryOrder::RELAXED);
+    return try_write_lock(old);
+  }
+
+private:
+  template <State (&SpinReload)(cpp::Atomic<int32_t> &, Preference, unsigned),
+            State (&SetPending)(cpp::Atomic<int32_t> &, cpp::MemoryOrder),
+            State (&ClearPending)(cpp::Atomic<int32_t> &, cpp::MemoryOrder),
+            FutexWordType &(WaitingQueue::Guard::*Serialization)(),
+            FutexWordType &(WaitingQueue::Guard::*PendingCount)(),
+            LockResult (RwLock::*TryLock)(State &),
+            long (WaitingQueue::*Wait)(FutexWordType,
+                                       cpp::optional<Futex::Timeout>, bool),
+            bool (State::*CanAcquire)(Preference) const>
+  LIBC_INLINE LockResult
+  lock(cpp::optional<Futex::Timeout> timeout = cpp::nullopt,
+       unsigned spin_count = LIBC_COPT_RWLOCK_DEFAULT_SPIN_COUNT) {
+    // Phase 1: deadlock detection.
+    // A deadlock happens if this is a RAW/WAW lock in the same thread.
+    if (writer_tid.load(cpp::MemoryOrder::RELAXED) == gettid())
+      return LockResult::Deadlock;
+
+    // Phase 2: spin to get the initial state. We ignore the timing due to spin
+    // since it should end quickly.
+    State old = SpinReload(state, preference, spin_count);
+
+#if LIBC_COPT_TIMEOUT_ENSURE_MONOTONICITY
+    // Phase 3: convert the timeout if necessary.
+    if (timeout)
+      ensure_monotonicity(*timeout);
+#endif
+
+    // Enter the main acquisition loop.
+    for (;;) {
+      // Phase 4: if the lock can be acquired, try to acquire it.
+      LockResult result = (this->*TryLock)(old);
+      if (result != LockResult::Busy)
+        return result;
+
+      // Phase 5: register ourselves as a pending reader.
+      int serial_number;
+      {
+        // The queue need to be protected by a mutex since the operations in
+        // this block must be executed as a whole transaction. It is possible
+        // that this lock will make the timeout imprecise, but this is the best
+        // we can do. The transaction is small and everyone should make
+        // progress rather quickly.
+        WaitingQueue::Guard guard = queue.acquire();
+        (guard.*PendingCount)()++;
+
+        // Use atomic operation to guarantee the total order of the operations
+        // on the state. The pending flag update should be visible to any
+        // succeeding unlock events. Or, if a unlock does happen before we sleep
+        // on the futex, we can avoid such waiting.
+        old = SetPending(state, cpp::MemoryOrder::RELAXED);
+        // no need to use atomic since it is already protected by the mutex.
+        serial_number = (guard.*Serialization)();
+      }
+
+      // Phase 6: do futex wait until the lock is available or timeout is
+      // reached.
+      bool timeout_flag = false;
+      if (!(old.*CanAcquire)(preference)) {
+        timeout_flag =
+            ((queue.*Wait)(serial_number, timeout, is_pshared) == -ETIMEDOUT);
+
+        // Phase 7: unregister ourselves as a pending reader.
+        {
+          // Similarly, the unregister operation should also be an atomic
+          // transaction.
+          WaitingQueue::Guard guard = queue.acquire();
+          (guard.*PendingCount)()--;
+          // Clear the flag if we are the last reader. The flag must be cleared
+          // otherwise operations like trylock may fail even though there is no
+          // competitors.
+          if ((guard.*PendingCount)() == 0)
+            ClearPending(state, cpp::MemoryOrder::RELAXED);
+        }
+
+        // Phase 8: exit the loop is timeout is reached.
+        if (timeout_flag)
+          return LockResult::Timeout;
+
+        // Phase 9: reload the state and retry the acquisition.
+        old = SpinReload(state, preference, spin_count);
+      }
+    }
+  }
+
+public:
+  LIBC_INLINE LockResult
+  read_lock(cpp::optional<Futex::Timeout> timeout = cpp::nullopt,
+            unsigned spin_count = LIBC_COPT_RWLOCK_DEFAULT_SPIN_COUNT) {
+    return lock<State::spin_reload_for_reader, State::fetch_set_pending_reader,
+                State::fetch_clear_pending_reader,
+                &WaitingQueue::Guard::reader_serialization,
+                &WaitingQueue::Guard::pending_reader, &RwLock::try_read_lock,
+                &WaitingQueue::reader_wait, &State::can_acquire_reader>(
+        timeout, spin_count);
+  }
+  LIBC_INLINE LockResult
+  write_lock(cpp::optional<Futex::Timeout> timeout = cpp::nullopt,
+             unsigned spin_count = LIBC_COPT_RWLOCK_DEFAULT_SPIN_COUNT) {
+    return lock<State::spin_reload_for_writer, State::fetch_set_pending_writer,
+                State::fetch_clear_pending_writer,
+                &WaitingQueue::Guard::writer_serialization,
+                &WaitingQueue::Guard::pending_writer, &RwLock::try_write_lock,
+                &WaitingQueue::writer_wait, &State::can_acquire_writer>(
+        timeout, spin_count);
+  }
+  LIBC_INLINE LockResult unlock() {
+    State old = State::load(state, cpp::MemoryOrder::RELAXED);
+
+    if (old.has_active_writer()) {
+      // The lock is held by a writer.
+
+      // Check if we are the owner of the lock.
+      if (writer_tid.load(cpp::MemoryOrder::RELAXED) != gettid())
+        return LockResult::PermissionDenied;
+
+      // clear writer tid.
+      writer_tid.store(0, cpp::MemoryOrder::RELAXED);
+
+      // clear the writer bit.
+      old = State::fetch_clear_active_writer(state);
+
+      // If there is no pending readers or writers, we are done.
+      if (!old.has_pending())
+        return LockResult::Success;
+    } else if (old.has_active_reader()) {
+      // The lock is held by readers.
+
+      // Decrease the reader count.
+      old = State::fetch_sub_reader_count(state);
+
+      // If there is no pending readers or writers, we are done.
+      if (!old.has_last_reader() || !old.has_pending())
+        return LockResult::Success;
+    } else
+      return LockResult::PermissionDenied;
+
+    enum class WakeTarget { Readers, Writers, None };
+    WakeTarget status;
+
+    {
+      WaitingQueue::Guard guard = queue.acquire();
+      if (guard.pending_writer() != 0) {
+        guard.writer_serialization()++;
+        status = WakeTarget::Writers;
+      } else if (guard.pending_reader() != 0) {
+        guard.reader_serialization()++;
+        status = WakeTarget::Readers;
+      } else
+        status = WakeTarget::None;
+    }
+
+    if (status == WakeTarget::Readers)
+      queue.reader_notify_all(is_pshared);
+    else if (status == WakeTarget::Writers)
+      queue.writer_notify_one(is_pshared);
+
+    return LockResult::Success;
+  }
+};
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_SUPPORT_THREADS_LINUX_RWLOCK_H

>From 10cccb5ac80183bb25c204756a6784594ec6e15f Mon Sep 17 00:00:00 2001
From: Yifan Zhu <yifzhu at nvidia.com>
Date: Sun, 2 Jun 2024 11:33:19 -0700
Subject: [PATCH 02/26] [libc] clean up headers

---
 libc/src/__support/threads/linux/rwlock.h | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/libc/src/__support/threads/linux/rwlock.h b/libc/src/__support/threads/linux/rwlock.h
index e8da17681ddfe..6a9b2bed16b0d 100644
--- a/libc/src/__support/threads/linux/rwlock.h
+++ b/libc/src/__support/threads/linux/rwlock.h
@@ -11,11 +11,8 @@
 #include "hdr/errno_macros.h"
 #include "hdr/types/pid_t.h"
 #include "src/__support/CPP/atomic.h"
-#include "src/__support/CPP/expected.h"
-#include "src/__support/CPP/new.h"
 #include "src/__support/CPP/optional.h"
-#include "src/__support/CPP/type_traits/make_signed.h"
-#include "src/__support/OSUtil/linux/x86_64/syscall.h"
+#include "src/__support/OSUtil/syscall.h"
 #include "src/__support/common.h"
 #include "src/__support/libc_assert.h"
 #include "src/__support/macros/attributes.h"
@@ -100,7 +97,7 @@ class RwLock {
   enum class Preference : char { Reader, Writer };
   enum class LockResult {
     Success = 0,
-    Timeout = ETIMEDOUT,
+    TimedOut = ETIMEDOUT,
     Overflow = EAGAIN,
     Busy = EBUSY,
     Deadlock = EDEADLOCK,
@@ -431,7 +428,7 @@ class RwLock {
 
         // Phase 8: exit the loop is timeout is reached.
         if (timeout_flag)
-          return LockResult::Timeout;
+          return LockResult::TimedOut;
 
         // Phase 9: reload the state and retry the acquisition.
         old = SpinReload(state, preference, spin_count);

>From 41a8334288d40b657e9af30e98f9fc9cb247fa20 Mon Sep 17 00:00:00 2001
From: Yifan Zhu <yifzhu at nvidia.com>
Date: Sun, 2 Jun 2024 11:37:44 -0700
Subject: [PATCH 03/26] [libc] add another trylock before operating on the
 timestamp

---
 libc/src/__support/threads/linux/rwlock.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/libc/src/__support/threads/linux/rwlock.h b/libc/src/__support/threads/linux/rwlock.h
index 6a9b2bed16b0d..168d17e3c42ff 100644
--- a/libc/src/__support/threads/linux/rwlock.h
+++ b/libc/src/__support/threads/linux/rwlock.h
@@ -372,6 +372,11 @@ class RwLock {
     // Phase 2: spin to get the initial state. We ignore the timing due to spin
     // since it should end quickly.
     State old = SpinReload(state, preference, spin_count);
+    {
+      LockResult result = (this->*TryLock)(old);
+      if (result != LockResult::Busy)
+        return result;
+    }
 
 #if LIBC_COPT_TIMEOUT_ENSURE_MONOTONICITY
     // Phase 3: convert the timeout if necessary.

>From f5b778c342cf892e7351d3227df1b679fd8e63e8 Mon Sep 17 00:00:00 2001
From: Yifan Zhu <yifzhu at nvidia.com>
Date: Sun, 2 Jun 2024 11:43:18 -0700
Subject: [PATCH 04/26] [libc] more clean ups

---
 libc/src/__support/threads/linux/CMakeLists.txt | 2 ++
 libc/src/__support/threads/linux/rwlock.h       | 2 --
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/libc/src/__support/threads/linux/CMakeLists.txt b/libc/src/__support/threads/linux/CMakeLists.txt
index bac8073a66049..249aca0545e38 100644
--- a/libc/src/__support/threads/linux/CMakeLists.txt
+++ b/libc/src/__support/threads/linux/CMakeLists.txt
@@ -52,6 +52,8 @@ add_header_library(
   DEPENDS
     .futex_utils
     .raw_mutex
+    libc.src.__support.common
+    libc.src.__support.OSUtil.osutil
   COMPILE_OPTIONS
     -DLIBC_COPT_RWLOCK_DEFAULT_SPIN_COUNT=${LIBC_CONF_RWLOCK_DEFAULT_SPIN_COUNT}
     ${monotonicity_flags}
diff --git a/libc/src/__support/threads/linux/rwlock.h b/libc/src/__support/threads/linux/rwlock.h
index 168d17e3c42ff..c5a3498054f4f 100644
--- a/libc/src/__support/threads/linux/rwlock.h
+++ b/libc/src/__support/threads/linux/rwlock.h
@@ -15,8 +15,6 @@
 #include "src/__support/OSUtil/syscall.h"
 #include "src/__support/common.h"
 #include "src/__support/libc_assert.h"
-#include "src/__support/macros/attributes.h"
-#include "src/__support/macros/optimization.h"
 #include "src/__support/threads/linux/futex_utils.h"
 #include "src/__support/threads/linux/futex_word.h"
 #include "src/__support/threads/linux/raw_mutex.h"

>From e143ee63ea4f1f0b2586a6dd85a8fcd567331718 Mon Sep 17 00:00:00 2001
From: Yifan Zhu <yifzhu at nvidia.com>
Date: Sun, 2 Jun 2024 11:47:51 -0700
Subject: [PATCH 05/26] [libc] separate out the notification part

---
 libc/src/__support/threads/linux/rwlock.h | 53 +++++++++++------------
 1 file changed, 26 insertions(+), 27 deletions(-)

diff --git a/libc/src/__support/threads/linux/rwlock.h b/libc/src/__support/threads/linux/rwlock.h
index c5a3498054f4f..7f3644adcc2bf 100644
--- a/libc/src/__support/threads/linux/rwlock.h
+++ b/libc/src/__support/threads/linux/rwlock.h
@@ -460,57 +460,56 @@ class RwLock {
                 &WaitingQueue::writer_wait, &State::can_acquire_writer>(
         timeout, spin_count);
   }
+
+private:
+  LIBC_INLINE void notify_pending_threads() {
+    enum class WakeTarget { Readers, Writers, None };
+    WakeTarget status;
+
+    {
+      WaitingQueue::Guard guard = queue.acquire();
+      if (guard.pending_writer() != 0) {
+        guard.writer_serialization()++;
+        status = WakeTarget::Writers;
+      } else if (guard.pending_reader() != 0) {
+        guard.reader_serialization()++;
+        status = WakeTarget::Readers;
+      } else
+        status = WakeTarget::None;
+    }
+
+    if (status == WakeTarget::Readers)
+      queue.reader_notify_all(is_pshared);
+    else if (status == WakeTarget::Writers)
+      queue.writer_notify_one(is_pshared);
+  }
+
+public:
   LIBC_INLINE LockResult unlock() {
     State old = State::load(state, cpp::MemoryOrder::RELAXED);
-
     if (old.has_active_writer()) {
       // The lock is held by a writer.
-
       // Check if we are the owner of the lock.
       if (writer_tid.load(cpp::MemoryOrder::RELAXED) != gettid())
         return LockResult::PermissionDenied;
-
       // clear writer tid.
       writer_tid.store(0, cpp::MemoryOrder::RELAXED);
-
       // clear the writer bit.
       old = State::fetch_clear_active_writer(state);
-
       // If there is no pending readers or writers, we are done.
       if (!old.has_pending())
         return LockResult::Success;
     } else if (old.has_active_reader()) {
       // The lock is held by readers.
-
       // Decrease the reader count.
       old = State::fetch_sub_reader_count(state);
-
       // If there is no pending readers or writers, we are done.
       if (!old.has_last_reader() || !old.has_pending())
         return LockResult::Success;
     } else
       return LockResult::PermissionDenied;
 
-    enum class WakeTarget { Readers, Writers, None };
-    WakeTarget status;
-
-    {
-      WaitingQueue::Guard guard = queue.acquire();
-      if (guard.pending_writer() != 0) {
-        guard.writer_serialization()++;
-        status = WakeTarget::Writers;
-      } else if (guard.pending_reader() != 0) {
-        guard.reader_serialization()++;
-        status = WakeTarget::Readers;
-      } else
-        status = WakeTarget::None;
-    }
-
-    if (status == WakeTarget::Readers)
-      queue.reader_notify_all(is_pshared);
-    else if (status == WakeTarget::Writers)
-      queue.writer_notify_one(is_pshared);
-
+    notify_pending_threads();
     return LockResult::Success;
   }
 };

>From a7e2f5041d461307a070d42fc0df8dad464233f3 Mon Sep 17 00:00:00 2001
From: Yifan Zhu <yifzhu at nvidia.com>
Date: Sun, 2 Jun 2024 11:50:28 -0700
Subject: [PATCH 06/26] [libc] correct permission of Guard

---
 libc/src/__support/threads/linux/rwlock.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libc/src/__support/threads/linux/rwlock.h b/libc/src/__support/threads/linux/rwlock.h
index 7f3644adcc2bf..5dc6e3459079f 100644
--- a/libc/src/__support/threads/linux/rwlock.h
+++ b/libc/src/__support/threads/linux/rwlock.h
@@ -62,7 +62,7 @@ class RwLock {
       LIBC_INLINE FutexWordType &writer_serialization() {
         return queue.writer_serialization.val;
       }
-      friend RwLock;
+      friend WaitingQueue;
     };
 
   public:

>From 5b04d51ce4e10d1cd6462830bda485f9f4c0b7b6 Mon Sep 17 00:00:00 2001
From: Yifan Zhu <yifzhu at nvidia.com>
Date: Sun, 2 Jun 2024 14:07:57 -0700
Subject: [PATCH 07/26] [libc] address CRs

---
 .../llvm-libc-types/pthread_rwlock_t.h        |  26 +++++
 libc/src/__support/threads/linux/rwlock.h     | 102 ++++++++++--------
 2 files changed, 84 insertions(+), 44 deletions(-)
 create mode 100644 libc/include/llvm-libc-types/pthread_rwlock_t.h

diff --git a/libc/include/llvm-libc-types/pthread_rwlock_t.h b/libc/include/llvm-libc-types/pthread_rwlock_t.h
new file mode 100644
index 0000000000000..b7ba2821a9994
--- /dev/null
+++ b/libc/include/llvm-libc-types/pthread_rwlock_t.h
@@ -0,0 +1,26 @@
+//===-- Definition of pthread_mutex_t type --------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_TYPES_PTHREAD_MUTEX_T_H
+#define LLVM_LIBC_TYPES_PTHREAD_MUTEX_T_H
+
+#include "llvm-libc-types/__futex_word.h"
+#include "llvm-libc-types/pid_t.h"
+typedef struct {
+  bool __is_pshared;
+  char __preference;
+  int __state;
+  pid_t __writier_tid;
+  __futex_word __wait_queue_lock;
+  __futex_word __pending_reader;
+  __futex_word __pending_writer;
+  __futex_word __reader_serialization;
+  __futex_word __writer_serialization;
+} pthread_rwlock_t;
+
+#endif // LLVM_LIBC_TYPES_PTHREAD_MUTEX_T_H
diff --git a/libc/src/__support/threads/linux/rwlock.h b/libc/src/__support/threads/linux/rwlock.h
index 5dc6e3459079f..300aaaf031316 100644
--- a/libc/src/__support/threads/linux/rwlock.h
+++ b/libc/src/__support/threads/linux/rwlock.h
@@ -15,6 +15,7 @@
 #include "src/__support/OSUtil/syscall.h"
 #include "src/__support/common.h"
 #include "src/__support/libc_assert.h"
+#include "src/__support/macros/attributes.h"
 #include "src/__support/threads/linux/futex_utils.h"
 #include "src/__support/threads/linux/futex_word.h"
 #include "src/__support/threads/linux/raw_mutex.h"
@@ -350,28 +351,51 @@ class RwLock {
   }
 
 private:
-  template <State (&SpinReload)(cpp::Atomic<int32_t> &, Preference, unsigned),
-            State (&SetPending)(cpp::Atomic<int32_t> &, cpp::MemoryOrder),
-            State (&ClearPending)(cpp::Atomic<int32_t> &, cpp::MemoryOrder),
-            FutexWordType &(WaitingQueue::Guard::*Serialization)(),
-            FutexWordType &(WaitingQueue::Guard::*PendingCount)(),
-            LockResult (RwLock::*TryLock)(State &),
-            long (WaitingQueue::*Wait)(FutexWordType,
-                                       cpp::optional<Futex::Timeout>, bool),
-            bool (State::*CanAcquire)(Preference) const>
+  struct Proxy {
+    State (&spin_reload)(cpp::Atomic<int32_t> &, Preference, unsigned);
+    State (&set_pending)(cpp::Atomic<int32_t> &, cpp::MemoryOrder);
+    State (&clear_pending)(cpp::Atomic<int32_t> &, cpp::MemoryOrder);
+    FutexWordType &(WaitingQueue::Guard::*serialization)();
+    FutexWordType &(WaitingQueue::Guard::*pending_count)();
+    LockResult (RwLock::*try_lock)(State &);
+    long (WaitingQueue::*wait)(FutexWordType, cpp::optional<Futex::Timeout>,
+                               bool);
+    bool (State::*can_acquire)(Preference) const;
+  };
+
+  LIBC_INLINE_VAR static constexpr Proxy READER = {
+      State::spin_reload_for_reader,
+      State::fetch_set_pending_reader,
+      State::fetch_clear_pending_reader,
+      &WaitingQueue::Guard::reader_serialization,
+      &WaitingQueue::Guard::pending_reader,
+      &RwLock::try_read_lock,
+      &WaitingQueue::reader_wait,
+      &State::can_acquire_reader};
+
+  LIBC_INLINE_VAR static constexpr Proxy WRITER = {
+      State::spin_reload_for_writer,
+      State::fetch_set_pending_writer,
+      State::fetch_clear_pending_writer,
+      &WaitingQueue::Guard::writer_serialization,
+      &WaitingQueue::Guard::pending_writer,
+      &RwLock::try_write_lock,
+      &WaitingQueue::writer_wait,
+      &State::can_acquire_writer};
+
   LIBC_INLINE LockResult
-  lock(cpp::optional<Futex::Timeout> timeout = cpp::nullopt,
+  lock(const Proxy &proxy, cpp::optional<Futex::Timeout> timeout = cpp::nullopt,
        unsigned spin_count = LIBC_COPT_RWLOCK_DEFAULT_SPIN_COUNT) {
     // Phase 1: deadlock detection.
     // A deadlock happens if this is a RAW/WAW lock in the same thread.
     if (writer_tid.load(cpp::MemoryOrder::RELAXED) == gettid())
       return LockResult::Deadlock;
 
-    // Phase 2: spin to get the initial state. We ignore the timing due to spin
-    // since it should end quickly.
-    State old = SpinReload(state, preference, spin_count);
+    // Phase 2: spin to get the initial state. We ignore the timing due to
+    // spin since it should end quickly.
+    State old = proxy.spin_reload(state, preference, spin_count);
     {
-      LockResult result = (this->*TryLock)(old);
+      LockResult result = (this->*proxy.try_lock)(old);
       if (result != LockResult::Busy)
         return result;
     }
@@ -385,7 +409,7 @@ class RwLock {
     // Enter the main acquisition loop.
     for (;;) {
       // Phase 4: if the lock can be acquired, try to acquire it.
-      LockResult result = (this->*TryLock)(old);
+      LockResult result = (this->*proxy.try_lock)(old);
       if (result != LockResult::Busy)
         return result;
 
@@ -394,39 +418,39 @@ class RwLock {
       {
         // The queue need to be protected by a mutex since the operations in
         // this block must be executed as a whole transaction. It is possible
-        // that this lock will make the timeout imprecise, but this is the best
-        // we can do. The transaction is small and everyone should make
+        // that this lock will make the timeout imprecise, but this is the
+        // best we can do. The transaction is small and everyone should make
         // progress rather quickly.
         WaitingQueue::Guard guard = queue.acquire();
-        (guard.*PendingCount)()++;
+        (guard.*proxy.pending_count)()++;
 
         // Use atomic operation to guarantee the total order of the operations
         // on the state. The pending flag update should be visible to any
-        // succeeding unlock events. Or, if a unlock does happen before we sleep
-        // on the futex, we can avoid such waiting.
-        old = SetPending(state, cpp::MemoryOrder::RELAXED);
+        // succeeding unlock events. Or, if a unlock does happen before we
+        // sleep on the futex, we can avoid such waiting.
+        old = proxy.set_pending(state, cpp::MemoryOrder::RELAXED);
         // no need to use atomic since it is already protected by the mutex.
-        serial_number = (guard.*Serialization)();
+        serial_number = (guard.*proxy.serialization)();
       }
 
       // Phase 6: do futex wait until the lock is available or timeout is
       // reached.
       bool timeout_flag = false;
-      if (!(old.*CanAcquire)(preference)) {
-        timeout_flag =
-            ((queue.*Wait)(serial_number, timeout, is_pshared) == -ETIMEDOUT);
+      if (!(old.*proxy.can_acquire)(preference)) {
+        timeout_flag = ((queue.*proxy.wait)(serial_number, timeout,
+                                            is_pshared) == -ETIMEDOUT);
 
         // Phase 7: unregister ourselves as a pending reader.
         {
           // Similarly, the unregister operation should also be an atomic
           // transaction.
           WaitingQueue::Guard guard = queue.acquire();
-          (guard.*PendingCount)()--;
-          // Clear the flag if we are the last reader. The flag must be cleared
-          // otherwise operations like trylock may fail even though there is no
-          // competitors.
-          if ((guard.*PendingCount)() == 0)
-            ClearPending(state, cpp::MemoryOrder::RELAXED);
+          (guard.*proxy.pending_count)()--;
+          // Clear the flag if we are the last reader. The flag must be
+          // cleared otherwise operations like trylock may fail even though
+          // there is no competitors.
+          if ((guard.*proxy.pending_count)() == 0)
+            proxy.clear_pending(state, cpp::MemoryOrder::RELAXED);
         }
 
         // Phase 8: exit the loop is timeout is reached.
@@ -434,7 +458,7 @@ class RwLock {
           return LockResult::TimedOut;
 
         // Phase 9: reload the state and retry the acquisition.
-        old = SpinReload(state, preference, spin_count);
+        old = proxy.spin_reload(state, preference, spin_count);
       }
     }
   }
@@ -443,22 +467,12 @@ class RwLock {
   LIBC_INLINE LockResult
   read_lock(cpp::optional<Futex::Timeout> timeout = cpp::nullopt,
             unsigned spin_count = LIBC_COPT_RWLOCK_DEFAULT_SPIN_COUNT) {
-    return lock<State::spin_reload_for_reader, State::fetch_set_pending_reader,
-                State::fetch_clear_pending_reader,
-                &WaitingQueue::Guard::reader_serialization,
-                &WaitingQueue::Guard::pending_reader, &RwLock::try_read_lock,
-                &WaitingQueue::reader_wait, &State::can_acquire_reader>(
-        timeout, spin_count);
+    return lock(READER, timeout, spin_count);
   }
   LIBC_INLINE LockResult
   write_lock(cpp::optional<Futex::Timeout> timeout = cpp::nullopt,
              unsigned spin_count = LIBC_COPT_RWLOCK_DEFAULT_SPIN_COUNT) {
-    return lock<State::spin_reload_for_writer, State::fetch_set_pending_writer,
-                State::fetch_clear_pending_writer,
-                &WaitingQueue::Guard::writer_serialization,
-                &WaitingQueue::Guard::pending_writer, &RwLock::try_write_lock,
-                &WaitingQueue::writer_wait, &State::can_acquire_writer>(
-        timeout, spin_count);
+    return lock(WRITER, timeout, spin_count);
   }
 
 private:

>From 847772cf38baf7835f18f1edf3bb647309924be5 Mon Sep 17 00:00:00 2001
From: Yifan Zhu <yifzhu at nvidia.com>
Date: Sun, 2 Jun 2024 14:09:59 -0700
Subject: [PATCH 08/26] [libc] address CRs

---
 libc/src/__support/threads/linux/rwlock.h | 32 +++++++++++------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/libc/src/__support/threads/linux/rwlock.h b/libc/src/__support/threads/linux/rwlock.h
index 300aaaf031316..6b13c5270be4c 100644
--- a/libc/src/__support/threads/linux/rwlock.h
+++ b/libc/src/__support/threads/linux/rwlock.h
@@ -364,24 +364,24 @@ class RwLock {
   };
 
   LIBC_INLINE_VAR static constexpr Proxy READER = {
-      State::spin_reload_for_reader,
-      State::fetch_set_pending_reader,
-      State::fetch_clear_pending_reader,
-      &WaitingQueue::Guard::reader_serialization,
-      &WaitingQueue::Guard::pending_reader,
-      &RwLock::try_read_lock,
-      &WaitingQueue::reader_wait,
-      &State::can_acquire_reader};
+      /*spin_reload=*/State::spin_reload_for_reader,
+      /*set_pending=*/State::fetch_set_pending_reader,
+      /*clear_pending=*/State::fetch_clear_pending_reader,
+      /*serialization=*/&WaitingQueue::Guard::reader_serialization,
+      /*pending_count=*/&WaitingQueue::Guard::pending_reader,
+      /*try_lock=*/&RwLock::try_read_lock,
+      /*wait=*/&WaitingQueue::reader_wait,
+      /*can_acquire=*/&State::can_acquire_reader};
 
   LIBC_INLINE_VAR static constexpr Proxy WRITER = {
-      State::spin_reload_for_writer,
-      State::fetch_set_pending_writer,
-      State::fetch_clear_pending_writer,
-      &WaitingQueue::Guard::writer_serialization,
-      &WaitingQueue::Guard::pending_writer,
-      &RwLock::try_write_lock,
-      &WaitingQueue::writer_wait,
-      &State::can_acquire_writer};
+      /*spin_reload=*/State::spin_reload_for_writer,
+      /*set_pending=*/State::fetch_set_pending_writer,
+      /*clear_pending=*/State::fetch_clear_pending_writer,
+      /*serialization=*/&WaitingQueue::Guard::writer_serialization,
+      /*pending_count=*/&WaitingQueue::Guard::pending_writer,
+      /*try_lock=*/&RwLock::try_write_lock,
+      /*wait=*/&WaitingQueue::writer_wait,
+      /*can_acquire=*/&State::can_acquire_writer};
 
   LIBC_INLINE LockResult
   lock(const Proxy &proxy, cpp::optional<Futex::Timeout> timeout = cpp::nullopt,

>From ff183ca67f49c8893206a855f067ba29e7d9d613 Mon Sep 17 00:00:00 2001
From: Yifan Zhu <yifzhu at nvidia.com>
Date: Sun, 2 Jun 2024 15:05:26 -0700
Subject: [PATCH 09/26] [libc] add pthread_rwlock_init

---
 libc/config/linux/api.td                      |  2 +
 libc/config/linux/x86_64/entrypoints.txt      |  1 +
 libc/include/CMakeLists.txt                   |  1 +
 libc/include/llvm-libc-types/CMakeLists.txt   |  1 +
 .../llvm-libc-types/pthread_rwlock_t.h        |  8 +--
 libc/include/pthread.h.def                    |  1 +
 libc/spec/posix.td                            | 11 +++
 libc/src/__support/threads/linux/rwlock.h     |  7 ++
 libc/src/pthread/CMakeLists.txt               | 11 +++
 libc/src/pthread/pthread_rwlock_init.cpp      | 69 +++++++++++++++++++
 libc/src/pthread/pthread_rwlock_init.h        | 21 ++++++
 11 files changed, 129 insertions(+), 4 deletions(-)
 create mode 100644 libc/src/pthread/pthread_rwlock_init.cpp
 create mode 100644 libc/src/pthread/pthread_rwlock_init.h

diff --git a/libc/config/linux/api.td b/libc/config/linux/api.td
index 902839b3e5b8f..eb0090c80b0da 100644
--- a/libc/config/linux/api.td
+++ b/libc/config/linux/api.td
@@ -181,6 +181,7 @@ def PThreadAPI : PublicAPI<"pthread.h"> {
       "pthread_mutexattr_t",
       "pthread_once_t",
       "pthread_rwlockattr_t",
+      "pthread_rwlock_t",
       "pthread_t",
   ];
 }
@@ -270,6 +271,7 @@ def SysTypesAPI : PublicAPI<"sys/types.h"> {
     "pthread_mutexattr_t",
     "pthread_once_t",
     "pthread_rwlockattr_t",
+    "pthread_rwlock_t",
     "pthread_t",
     "size_t",
     "ssize_t",
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 367db7d384d23..1cac4ea2d28b7 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -680,6 +680,7 @@ if(LLVM_LIBC_FULL_BUILD)
     libc.src.pthread.pthread_mutexattr_setrobust
     libc.src.pthread.pthread_mutexattr_settype
     libc.src.pthread.pthread_once
+    libc.src.pthread.pthread_rwlock_init
     libc.src.pthread.pthread_rwlockattr_destroy
     libc.src.pthread.pthread_rwlockattr_getkind_np
     libc.src.pthread.pthread_rwlockattr_getpshared
diff --git a/libc/include/CMakeLists.txt b/libc/include/CMakeLists.txt
index 2a41ec46abdab..bb10fd4c94703 100644
--- a/libc/include/CMakeLists.txt
+++ b/libc/include/CMakeLists.txt
@@ -332,6 +332,7 @@ add_gen_header(
     .llvm-libc-types.pthread_mutex_t
     .llvm-libc-types.pthread_mutexattr_t
     .llvm-libc-types.pthread_once_t
+    .llvm-libc-types.pthread_rwlock_t
     .llvm-libc-types.pthread_rwlockattr_t
     .llvm-libc-types.pthread_t
 )
diff --git a/libc/include/llvm-libc-types/CMakeLists.txt b/libc/include/llvm-libc-types/CMakeLists.txt
index ee2c910b85b00..c9646253aad54 100644
--- a/libc/include/llvm-libc-types/CMakeLists.txt
+++ b/libc/include/llvm-libc-types/CMakeLists.txt
@@ -54,6 +54,7 @@ add_header(pthread_key_t HDR pthread_key_t.h)
 add_header(pthread_mutex_t HDR pthread_mutex_t.h DEPENDS .__futex_word .__mutex_type)
 add_header(pthread_mutexattr_t HDR pthread_mutexattr_t.h)
 add_header(pthread_once_t HDR pthread_once_t.h DEPENDS .__futex_word)
+add_header(pthread_rwlock_t HDR pthread_rwlock_t.h DEPENDS .__futex_word .pid_t)
 add_header(pthread_rwlockattr_t HDR pthread_rwlockattr_t.h)
 add_header(pthread_t HDR pthread_t.h DEPENDS .__thread_type)
 add_header(rlim_t HDR rlim_t.h)
diff --git a/libc/include/llvm-libc-types/pthread_rwlock_t.h b/libc/include/llvm-libc-types/pthread_rwlock_t.h
index b7ba2821a9994..4950547004632 100644
--- a/libc/include/llvm-libc-types/pthread_rwlock_t.h
+++ b/libc/include/llvm-libc-types/pthread_rwlock_t.h
@@ -6,8 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_LIBC_TYPES_PTHREAD_MUTEX_T_H
-#define LLVM_LIBC_TYPES_PTHREAD_MUTEX_T_H
+#ifndef LLVM_LIBC_TYPES_PTHREAD_RWLOCK_T_H
+#define LLVM_LIBC_TYPES_PTHREAD_RWLOCK_T_H
 
 #include "llvm-libc-types/__futex_word.h"
 #include "llvm-libc-types/pid_t.h"
@@ -16,11 +16,11 @@ typedef struct {
   char __preference;
   int __state;
   pid_t __writier_tid;
-  __futex_word __wait_queue_lock;
+  __futex_word __wait_queue_mutex;
   __futex_word __pending_reader;
   __futex_word __pending_writer;
   __futex_word __reader_serialization;
   __futex_word __writer_serialization;
 } pthread_rwlock_t;
 
-#endif // LLVM_LIBC_TYPES_PTHREAD_MUTEX_T_H
+#endif // LLVM_LIBC_TYPES_PTHREAD_RWLOCK_T_H
diff --git a/libc/include/pthread.h.def b/libc/include/pthread.h.def
index d41273b5590ea..33bd0060a5b4d 100644
--- a/libc/include/pthread.h.def
+++ b/libc/include/pthread.h.def
@@ -17,6 +17,7 @@
 #define PTHREAD_STACK_MIN (1 << 14) // 16KB
 
 #define PTHREAD_MUTEX_INITIALIZER {0}
+#define PTHREAD_RWLOCK_INITIALIZER {0}
 #define PTHREAD_ONCE_INIT {0}
 
 enum {
diff --git a/libc/spec/posix.td b/libc/spec/posix.td
index e16353b8142de..ce772a6e43482 100644
--- a/libc/spec/posix.td
+++ b/libc/spec/posix.td
@@ -113,6 +113,7 @@ def POSIX : StandardSpec<"POSIX"> {
   NamedType PThreadRWLockAttrTType = NamedType<"pthread_rwlockattr_t">;
   PtrType PThreadRWLockAttrTPtr = PtrType<PThreadRWLockAttrTType>;
   ConstType ConstPThreadRWLockAttrTPtr = ConstType<PThreadRWLockAttrTPtr>;
+  ConstType ConstRestrictedPThreadRWLockAttrTPtr = ConstType<RestrictedPtrType<PThreadRWLockAttrTType>>;
 
   NamedType PThreadMutexAttrTType = NamedType<"pthread_mutexattr_t">;
   PtrType PThreadMutexAttrTPtr = PtrType<PThreadMutexAttrTType>;
@@ -126,6 +127,9 @@ def POSIX : StandardSpec<"POSIX"> {
   ConstType ConstPThreadMutexTPtr = ConstType<PThreadMutexTPtr>;
   ConstType ConstRestrictedPThreadMutexTPtr = ConstType<RestrictedPThreadMutexTPtr>;
 
+  NamedType PThreadRWLockTType = NamedType<"pthread_rwlock_t">;
+  PtrType PThreadRWLockTPtr = PtrType<PThreadRWLockTType>;
+
   PtrType PThreadTPtr = PtrType<PThreadTType>;
   RestrictedPtrType RestrictedPThreadTPtr = RestrictedPtrType<PThreadTType>;
 
@@ -1003,6 +1007,7 @@ def POSIX : StandardSpec<"POSIX"> {
         PThreadOnceCallback,
         PThreadOnceT,
         PThreadRWLockAttrTType,
+        PThreadRWLockTType,
         PThreadStartT,
         PThreadTSSDtorT,
         PThreadTType,
@@ -1259,6 +1264,11 @@ def POSIX : StandardSpec<"POSIX"> {
           RetValSpec<IntType>,
           [ArgSpec<PThreadRWLockAttrTPtr>, ArgSpec<IntType>]
       >,
+      FunctionSpec<
+        "pthread_rwlock_init",
+        RetValSpec<IntType>,
+        [ArgSpec<PThreadRWLockTPtr>, ArgSpec<ConstRestrictedPThreadRWLockAttrTPtr>]
+      >
     ]
   >;
 
@@ -1616,6 +1626,7 @@ def POSIX : StandardSpec<"POSIX"> {
       PThreadMutexTType,
       PThreadOnceT,
       PThreadRWLockAttrTType,
+      PThreadRWLockTType,
       PThreadTType,
       PidT,
       SSizeTType,
diff --git a/libc/src/__support/threads/linux/rwlock.h b/libc/src/__support/threads/linux/rwlock.h
index 6b13c5270be4c..f3eaada748f41 100644
--- a/libc/src/__support/threads/linux/rwlock.h
+++ b/libc/src/__support/threads/linux/rwlock.h
@@ -526,6 +526,13 @@ class RwLock {
     notify_pending_threads();
     return LockResult::Success;
   }
+
+  LIBC_INLINE LockResult check_for_destroy() {
+    State old = State::load(state, cpp::MemoryOrder::RELAXED);
+    if (old.has_acitve_owner())
+      return LockResult::Busy;
+    return LockResult::Success;
+  }
 };
 } // namespace LIBC_NAMESPACE
 
diff --git a/libc/src/pthread/CMakeLists.txt b/libc/src/pthread/CMakeLists.txt
index e5bebb63c6401..8225bc30a566b 100644
--- a/libc/src/pthread/CMakeLists.txt
+++ b/libc/src/pthread/CMakeLists.txt
@@ -522,6 +522,17 @@ add_entrypoint_object(
     libc.include.errno
 )
 
+add_entrypoint_object(
+  pthread_rwlock_init
+  SRCS
+    pthread_rwlock_init.cpp
+  HDRS
+    pthread_rwlock_init.h
+  DEPENDS
+    libc.include.pthread
+    libc.src.__support.threads.linux.rwlock
+)
+
 add_entrypoint_object(
   pthread_once
   SRCS
diff --git a/libc/src/pthread/pthread_rwlock_init.cpp b/libc/src/pthread/pthread_rwlock_init.cpp
new file mode 100644
index 0000000000000..cc71df9b38165
--- /dev/null
+++ b/libc/src/pthread/pthread_rwlock_init.cpp
@@ -0,0 +1,69 @@
+//===-- Linux implementation of the pthread_rwlock_init function ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/pthread/pthread_rwlock_init.h"
+
+#include "src/__support/common.h"
+#include "src/__support/threads/linux/rwlock.h"
+
+#include <errno.h>
+#include <pthread.h>
+
+LIBC_INLINE void *operator new(size_t, pthread_rwlock_t *addr) noexcept {
+  return addr;
+}
+
+namespace LIBC_NAMESPACE {
+
+static_assert(
+    sizeof(RwLock) == sizeof(pthread_rwlock_t) &&
+        alignof(RwLock) == alignof(pthread_rwlock_t),
+    "The public pthread_rwlock_t type must be of the same size and alignment "
+    "as the internal rwlock type.");
+
+LLVM_LIBC_FUNCTION(int, pthread_rwlock_init,
+                   (pthread_rwlock_t * rwlock,
+                    const pthread_rwlockattr_t *__restrict attr)) {
+  pthread_rwlockattr_t rwlockattr{
+      /*pshared=*/PTHREAD_PROCESS_PRIVATE,
+      /*pref*/ PTHREAD_RWLOCK_PREFER_READER_NP,
+  };
+  if (attr)
+    rwlockattr = *attr;
+
+  ::new (rwlock) RwLock();
+
+  // PTHREAD_RWLOCK_PREFER_WRITER_NP is not supported.
+  RwLock::Preference preference;
+  switch (rwlockattr.pref) {
+  case PTHREAD_RWLOCK_PREFER_READER_NP:
+    preference = RwLock::Preference::Reader;
+    break;
+  case PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP:
+    preference = RwLock::Preference::Writer;
+    break;
+  default:
+    return EINVAL;
+  }
+  bool is_pshared;
+  switch (rwlockattr.pshared) {
+  case PTHREAD_PROCESS_PRIVATE:
+    is_pshared = false;
+    break;
+  case PTHREAD_PROCESS_SHARED:
+    is_pshared = true;
+    break;
+  default:
+    return EINVAL;
+  }
+
+  new (rwlock) RwLock(preference, is_pshared);
+  return 0;
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/pthread/pthread_rwlock_init.h b/libc/src/pthread/pthread_rwlock_init.h
new file mode 100644
index 0000000000000..59a4abe1abbb7
--- /dev/null
+++ b/libc/src/pthread/pthread_rwlock_init.h
@@ -0,0 +1,21 @@
+//===-- Implementation header for pthread_rwlock_init function ---*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_PTHREAD_PTHREAD_RWLOCK_INIT_H
+#define LLVM_LIBC_SRC_PTHREAD_PTHREAD_RWLOCK_INIT_H
+
+#include <pthread.h>
+
+namespace LIBC_NAMESPACE {
+
+int pthread_rwlock_init(pthread_rwlock_t *mutex,
+                        const pthread_rwlockattr_t *__restrict attr);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_PTHREAD_PTHREAD_RWLOCK_INIT_H

>From 2557a400d0214df39bfef8257071aa96f8152806 Mon Sep 17 00:00:00 2001
From: Yifan Zhu <yifzhu at nvidia.com>
Date: Sun, 2 Jun 2024 15:21:43 -0700
Subject: [PATCH 10/26] [libc] add pthread_rwlock_tryrdlock

---
 libc/config/linux/x86_64/entrypoints.txt      |  1 +
 libc/include/pthread.h.def                    |  2 +-
 libc/spec/posix.td                            |  5 ++++
 libc/src/__support/threads/linux/rwlock.h     |  2 +-
 libc/src/pthread/CMakeLists.txt               | 11 +++++++
 libc/src/pthread/pthread_rwlock_init.h        |  2 +-
 libc/src/pthread/pthread_rwlock_tryrdlock.cpp | 30 +++++++++++++++++++
 libc/src/pthread/pthread_rwlock_tryrdlock.h   | 20 +++++++++++++
 8 files changed, 70 insertions(+), 3 deletions(-)
 create mode 100644 libc/src/pthread/pthread_rwlock_tryrdlock.cpp
 create mode 100644 libc/src/pthread/pthread_rwlock_tryrdlock.h

diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 1cac4ea2d28b7..606a67edc9889 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -681,6 +681,7 @@ if(LLVM_LIBC_FULL_BUILD)
     libc.src.pthread.pthread_mutexattr_settype
     libc.src.pthread.pthread_once
     libc.src.pthread.pthread_rwlock_init
+    libc.src.pthread.pthread_rwlock_tryrdlock
     libc.src.pthread.pthread_rwlockattr_destroy
     libc.src.pthread.pthread_rwlockattr_getkind_np
     libc.src.pthread.pthread_rwlockattr_getpshared
diff --git a/libc/include/pthread.h.def b/libc/include/pthread.h.def
index 33bd0060a5b4d..4dbeed6b5f321 100644
--- a/libc/include/pthread.h.def
+++ b/libc/include/pthread.h.def
@@ -17,7 +17,7 @@
 #define PTHREAD_STACK_MIN (1 << 14) // 16KB
 
 #define PTHREAD_MUTEX_INITIALIZER {0}
-#define PTHREAD_RWLOCK_INITIALIZER {0}
+#define PTHREAD_RWLOCK_INITIALIZER {}
 #define PTHREAD_ONCE_INIT {0}
 
 enum {
diff --git a/libc/spec/posix.td b/libc/spec/posix.td
index ce772a6e43482..d909c0d1981ae 100644
--- a/libc/spec/posix.td
+++ b/libc/spec/posix.td
@@ -1268,6 +1268,11 @@ def POSIX : StandardSpec<"POSIX"> {
         "pthread_rwlock_init",
         RetValSpec<IntType>,
         [ArgSpec<PThreadRWLockTPtr>, ArgSpec<ConstRestrictedPThreadRWLockAttrTPtr>]
+      >,
+      FunctionSpec<
+        "pthread_rwlock_tryrdlock",
+        RetValSpec<IntType>,
+        [ArgSpec<PThreadRWLockTPtr>]
       >
     ]
   >;
diff --git a/libc/src/__support/threads/linux/rwlock.h b/libc/src/__support/threads/linux/rwlock.h
index f3eaada748f41..b454a794cda87 100644
--- a/libc/src/__support/threads/linux/rwlock.h
+++ b/libc/src/__support/threads/linux/rwlock.h
@@ -94,7 +94,7 @@ class RwLock {
 
 public:
   enum class Preference : char { Reader, Writer };
-  enum class LockResult {
+  enum class LockResult : int {
     Success = 0,
     TimedOut = ETIMEDOUT,
     Overflow = EAGAIN,
diff --git a/libc/src/pthread/CMakeLists.txt b/libc/src/pthread/CMakeLists.txt
index 8225bc30a566b..30c27d4336fbe 100644
--- a/libc/src/pthread/CMakeLists.txt
+++ b/libc/src/pthread/CMakeLists.txt
@@ -533,6 +533,17 @@ add_entrypoint_object(
     libc.src.__support.threads.linux.rwlock
 )
 
+add_entrypoint_object(
+  pthread_rwlock_tryrdlock
+  SRCS
+    pthread_rwlock_tryrdlock.cpp
+  HDRS
+    pthread_rwlock_tryrdlock.h
+  DEPENDS
+    libc.include.pthread
+    libc.src.__support.threads.linux.rwlock
+)
+
 add_entrypoint_object(
   pthread_once
   SRCS
diff --git a/libc/src/pthread/pthread_rwlock_init.h b/libc/src/pthread/pthread_rwlock_init.h
index 59a4abe1abbb7..78d2934882c1d 100644
--- a/libc/src/pthread/pthread_rwlock_init.h
+++ b/libc/src/pthread/pthread_rwlock_init.h
@@ -13,7 +13,7 @@
 
 namespace LIBC_NAMESPACE {
 
-int pthread_rwlock_init(pthread_rwlock_t *mutex,
+int pthread_rwlock_init(pthread_rwlock_t *rwlock,
                         const pthread_rwlockattr_t *__restrict attr);
 
 } // namespace LIBC_NAMESPACE
diff --git a/libc/src/pthread/pthread_rwlock_tryrdlock.cpp b/libc/src/pthread/pthread_rwlock_tryrdlock.cpp
new file mode 100644
index 0000000000000..a2101d2f4714f
--- /dev/null
+++ b/libc/src/pthread/pthread_rwlock_tryrdlock.cpp
@@ -0,0 +1,30 @@
+//===-- Implementation of the Rwlock's tryrdlock function -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/pthread/pthread_rwlock_tryrdlock.h"
+
+#include "src/__support/common.h"
+#include "src/__support/threads/linux/rwlock.h"
+
+#include <errno.h>
+#include <pthread.h>
+
+namespace LIBC_NAMESPACE {
+
+static_assert(
+    sizeof(RwLock) == sizeof(pthread_rwlock_t) &&
+        alignof(RwLock) == alignof(pthread_rwlock_t),
+    "The public pthread_rwlock_t type must be of the same size and alignment "
+    "as the internal rwlock type.");
+
+LLVM_LIBC_FUNCTION(int, pthread_rwlock_tryrdlock, (pthread_rwlock_t * rwlock)) {
+  RwLock *rw = reinterpret_cast<RwLock *>(rwlock);
+  return static_cast<int>(rw->try_read_lock());
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/pthread/pthread_rwlock_tryrdlock.h b/libc/src/pthread/pthread_rwlock_tryrdlock.h
new file mode 100644
index 0000000000000..b07ab5b152b1a
--- /dev/null
+++ b/libc/src/pthread/pthread_rwlock_tryrdlock.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for Rwlock's tryrdlock function ----*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_PTHREAD_PTHREAD_RWLOCK_TRYRDLOCK_H
+#define LLVM_LIBC_SRC_PTHREAD_PTHREAD_RWLOCK_TRYRDLOCK_H
+
+#include <pthread.h>
+
+namespace LIBC_NAMESPACE {
+
+int pthread_rwlock_tryrdlock(pthread_rwlock_t *rwlock);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_PTHREAD_PTHREAD_RWLOCK_TRYRDLOCK_H

>From 4222eac1d879d4adcc20ec8d3105ea798d54f319 Mon Sep 17 00:00:00 2001
From: Yifan Zhu <yifzhu at nvidia.com>
Date: Sun, 2 Jun 2024 18:03:36 -0700
Subject: [PATCH 11/26] [libc] clean up again

---
 libc/src/__support/threads/linux/rwlock.h | 296 ++++++++++------------
 libc/src/pthread/pthread_rwlock_init.cpp  |   6 +-
 2 files changed, 141 insertions(+), 161 deletions(-)

diff --git a/libc/src/__support/threads/linux/rwlock.h b/libc/src/__support/threads/linux/rwlock.h
index b454a794cda87..2ec2c5977bf85 100644
--- a/libc/src/__support/threads/linux/rwlock.h
+++ b/libc/src/__support/threads/linux/rwlock.h
@@ -36,6 +36,9 @@
 
 namespace LIBC_NAMESPACE {
 class RwLock {
+public:
+  enum class Role : char { Reader, Writer };
+
 private:
   class WaitingQueue final : private RawMutex {
     FutexWordType pending_reader;
@@ -51,17 +54,17 @@ class RwLock {
 
     public:
       LIBC_INLINE ~Guard() { queue.unlock(); }
-      LIBC_INLINE FutexWordType &pending_reader() {
-        return queue.pending_reader;
-      }
-      LIBC_INLINE FutexWordType &pending_writer() {
-        return queue.pending_writer;
+      template <Role role> LIBC_INLINE FutexWordType &pending_count() {
+        if constexpr (role == Role::Reader)
+          return queue.pending_reader;
+        else
+          return queue.pending_writer;
       }
-      LIBC_INLINE FutexWordType &reader_serialization() {
-        return queue.reader_serialization.val;
-      }
-      LIBC_INLINE FutexWordType &writer_serialization() {
-        return queue.writer_serialization.val;
+      template <Role role> LIBC_INLINE FutexWordType &serialization() {
+        if constexpr (role == Role::Reader)
+          return queue.reader_serialization.val;
+        else
+          return queue.writer_serialization.val;
       }
       friend WaitingQueue;
     };
@@ -74,26 +77,26 @@ class RwLock {
       this->lock();
       return Guard(*this);
     }
-    LIBC_INLINE long reader_wait(FutexWordType expected,
-                                 cpp::optional<Futex::Timeout> timeout,
-                                 bool is_pshared) {
-      return reader_serialization.wait(expected, timeout, is_pshared);
-    }
-    LIBC_INLINE long reader_notify_all(bool is_pshared) {
-      return reader_serialization.notify_all(is_pshared);
-    }
-    LIBC_INLINE long writer_wait(FutexWordType expected,
-                                 cpp::optional<Futex::Timeout> timeout,
-                                 bool is_pshared) {
-      return writer_serialization.wait(expected, timeout, is_pshared);
+
+    template <Role role>
+    LIBC_INLINE long wait(FutexWordType expected,
+                          cpp::optional<Futex::Timeout> timeout,
+                          bool is_pshared) {
+      if constexpr (role == Role::Reader)
+        return reader_serialization.wait(expected, timeout, is_pshared);
+      else
+        return writer_serialization.wait(expected, timeout, is_pshared);
     }
-    LIBC_INLINE long writer_notify_one(bool is_pshared) {
-      return writer_serialization.notify_one(is_pshared);
+
+    template <Role role> LIBC_INLINE long notify(bool is_pshared) {
+      if constexpr (role == Role::Reader)
+        return reader_serialization.notify_all(is_pshared);
+      else
+        return writer_serialization.notify_one(is_pshared);
     }
   };
 
 public:
-  enum class Preference : char { Reader, Writer };
   enum class LockResult : int {
     Success = 0,
     TimedOut = ETIMEDOUT,
@@ -168,21 +171,23 @@ class RwLock {
     LIBC_INLINE constexpr State set_writer_bit() const {
       return State(state | ACTIVE_WRITER_BIT);
     }
+
     // The preference parameter changes the behavior of the lock acquisition
     // if there are both readers and writers waiting for the lock. If writers
     // are preferred, reader acquisition will be blocked until all pending
     // writers are served.
-    LIBC_INLINE bool can_acquire_reader(Preference preference) const {
-      switch (preference) {
-      case Preference::Reader:
-        return !has_active_writer();
-      case Preference::Writer:
-        return !has_active_writer() && !has_pending_writer();
-      }
-    }
-    LIBC_INLINE bool can_acquire_writer(Preference /*unused*/) const {
-      return !has_acitve_owner();
+    template <Role role> LIBC_INLINE bool can_acquire(Role preference) const {
+      if constexpr (role == Role::Reader) {
+        switch (preference) {
+        case Role::Reader:
+          return !has_active_writer();
+        case Role::Writer:
+          return !has_active_writer() && !has_pending_writer();
+        }
+      } else
+        return !has_acitve_owner();
     }
+
     // This function check if it is possible to grow the reader count without
     // overflowing the state.
     LIBC_INLINE cpp::optional<State> try_increase_reader_count() const {
@@ -202,30 +207,30 @@ class RwLock {
                            cpp::MemoryOrder order = cpp::MemoryOrder::SEQ_CST) {
       return State(target.fetch_sub(ACTIVE_READER_COUNT_UNIT, order));
     }
+
     LIBC_INLINE static State
     load(cpp::Atomic<Type> &target,
          cpp::MemoryOrder order = cpp::MemoryOrder::SEQ_CST) {
       return State(target.load(order));
     }
-    LIBC_INLINE static State fetch_set_pending_reader(
-        cpp::Atomic<Type> &target,
-        cpp::MemoryOrder order = cpp::MemoryOrder::SEQ_CST) {
-      return State(target.fetch_or(PENDING_READER_BIT, order));
-    }
-    LIBC_INLINE static State fetch_clear_pending_reader(
-        cpp::Atomic<Type> &target,
-        cpp::MemoryOrder order = cpp::MemoryOrder::SEQ_CST) {
-      return State(target.fetch_and(~PENDING_READER_BIT, order));
-    }
-    LIBC_INLINE static State fetch_set_pending_writer(
-        cpp::Atomic<Type> &target,
-        cpp::MemoryOrder order = cpp::MemoryOrder::SEQ_CST) {
-      return State(target.fetch_or(PENDING_WRITER_BIT, order));
+
+    template <Role role>
+    LIBC_INLINE static State
+    fetch_set_pending_bit(cpp::Atomic<Type> &target,
+                          cpp::MemoryOrder order = cpp::MemoryOrder::SEQ_CST) {
+      if constexpr (role == Role::Reader)
+        return State(target.fetch_or(PENDING_READER_BIT, order));
+      else
+        return State(target.fetch_or(PENDING_WRITER_BIT, order));
     }
-    LIBC_INLINE static State fetch_clear_pending_writer(
+    template <Role role>
+    LIBC_INLINE static State fetch_clear_pending_bit(
         cpp::Atomic<Type> &target,
         cpp::MemoryOrder order = cpp::MemoryOrder::SEQ_CST) {
-      return State(target.fetch_and(~PENDING_WRITER_BIT, order));
+      if constexpr (role == Role::Reader)
+        return State(target.fetch_and(~PENDING_READER_BIT, order));
+      else
+        return State(target.fetch_and(~PENDING_WRITER_BIT, order));
     }
     LIBC_INLINE static State fetch_set_active_writer(
         cpp::Atomic<Type> &target,
@@ -261,31 +266,33 @@ class RwLock {
     }
 
   public:
-    // Return the reader state if either the lock is available or there is any
-    // ongoing contention.
-    LIBC_INLINE static State spin_reload_for_reader(cpp::Atomic<Type> &target,
-                                                    Preference preference,
-                                                    unsigned spin_count) {
-      return spin_reload_until(
-          target,
-          [=](State state) {
-            return state.can_acquire_reader(preference) || state.has_pending();
-          },
-          spin_count);
-    }
-    // Return the writer state if either the lock is available or there is any
-    // contention *between writers*. Since writers can be way less than readers,
-    // we allow them to spin more to improve the fairness.
-    LIBC_INLINE static State spin_reload_for_writer(cpp::Atomic<Type> &target,
-                                                    Preference preference,
-                                                    unsigned spin_count) {
-      return spin_reload_until(
-          target,
-          [=](State state) {
-            return state.can_acquire_writer(preference) ||
-                   state.has_pending_writer();
-          },
-          spin_count);
+    template <Role role>
+    LIBC_INLINE static State spin_reload(cpp::Atomic<Type> &target,
+                                         Role preference, unsigned spin_count) {
+      if constexpr (role == Role::Reader) {
+        // Return the reader state if either the lock is available or there is
+        // any
+        // ongoing contention.
+        return spin_reload_until(
+            target,
+            [=](State state) {
+              return state.can_acquire<Role::Reader>(preference) ||
+                     state.has_pending();
+            },
+            spin_count);
+      } else {
+        // Return the writer state if either the lock is available or there is
+        // any
+        // contention *between writers*. Since writers can be way less than
+        // readers, we allow them to spin more to improve the fairness.
+        return spin_reload_until(
+            target,
+            [=](State state) {
+              return state.can_acquire<Role::Writer>(preference) ||
+                     state.has_pending_writer();
+            },
+            spin_count);
+      }
     }
   };
 
@@ -293,7 +300,7 @@ class RwLock {
   // Whether the RwLock is shared between processes.
   bool is_pshared;
   // Reader/Writer preference.
-  Preference preference;
+  Role preference;
   // State to keep track of the RwLock.
   cpp::Atomic<int32_t> state;
   // writer_tid is used to keep track of the thread id of the writer. Notice
@@ -307,84 +314,55 @@ class RwLock {
   // TODO: use cached thread id once implemented.
   LIBC_INLINE static pid_t gettid() { return syscall_impl<pid_t>(SYS_gettid); }
 
-  LIBC_INLINE LockResult try_read_lock(State &old) {
-    while (LIBC_LIKELY(old.can_acquire_reader(preference))) {
-      cpp::optional<State> next = old.try_increase_reader_count();
-      if (!next)
-        return LockResult::Overflow;
-      if (LIBC_LIKELY(old.compare_exchange_weak_with(
-              state, *next, cpp::MemoryOrder::ACQUIRE,
-              cpp::MemoryOrder::RELAXED)))
-        return LockResult::Success;
-      // Notice that old is updated by the compare_exchange_weak_with function.
-    }
-    return LockResult::Busy;
-  }
-
-  LIBC_INLINE LockResult try_write_lock(State &old) {
-    // This while loop should terminate quickly
-    while (LIBC_LIKELY(old.can_acquire_writer(preference))) {
-      if (LIBC_LIKELY(old.compare_exchange_weak_with(
-              state, old.set_writer_bit(), cpp::MemoryOrder::ACQUIRE,
-              cpp::MemoryOrder::RELAXED))) {
-        writer_tid.store(gettid(), cpp::MemoryOrder::RELAXED);
-        return LockResult::Success;
+  template <Role role> LIBC_INLINE LockResult try_lock(State &old) {
+    if constexpr (role == Role::Reader) {
+      while (LIBC_LIKELY(old.can_acquire<Role::Reader>(preference))) {
+        cpp::optional<State> next = old.try_increase_reader_count();
+        if (!next)
+          return LockResult::Overflow;
+        if (LIBC_LIKELY(old.compare_exchange_weak_with(
+                state, *next, cpp::MemoryOrder::ACQUIRE,
+                cpp::MemoryOrder::RELAXED)))
+          return LockResult::Success;
+        // Notice that old is updated by the compare_exchange_weak_with
+        // function.
+      }
+      return LockResult::Busy;
+    } else {
+      // This while loop should terminate quickly
+      while (LIBC_LIKELY(old.can_acquire<Role::Writer>(preference))) {
+        if (LIBC_LIKELY(old.compare_exchange_weak_with(
+                state, old.set_writer_bit(), cpp::MemoryOrder::ACQUIRE,
+                cpp::MemoryOrder::RELAXED))) {
+          writer_tid.store(gettid(), cpp::MemoryOrder::RELAXED);
+          return LockResult::Success;
+        }
+        // Notice that old is updated by the compare_exchange_weak_with
+        // function.
       }
-      // Notice that old is updated by the compare_exchange_weak_with function.
+      return LockResult::Busy;
     }
-    return LockResult::Busy;
   }
 
 public:
-  LIBC_INLINE constexpr RwLock(Preference preference = Preference::Reader,
+  LIBC_INLINE constexpr RwLock(Role preference = Role::Reader,
                                bool is_pshared = false)
       : is_pshared(is_pshared), preference(preference), state(0), writer_tid(0),
         queue() {}
 
   LIBC_INLINE LockResult try_read_lock() {
     State old = State::load(state, cpp::MemoryOrder::RELAXED);
-    return try_read_lock(old);
+    return try_lock<Role::Reader>(old);
   }
   LIBC_INLINE LockResult try_write_lock() {
     State old = State::load(state, cpp::MemoryOrder::RELAXED);
-    return try_write_lock(old);
+    return try_lock<Role::Writer>(old);
   }
 
 private:
-  struct Proxy {
-    State (&spin_reload)(cpp::Atomic<int32_t> &, Preference, unsigned);
-    State (&set_pending)(cpp::Atomic<int32_t> &, cpp::MemoryOrder);
-    State (&clear_pending)(cpp::Atomic<int32_t> &, cpp::MemoryOrder);
-    FutexWordType &(WaitingQueue::Guard::*serialization)();
-    FutexWordType &(WaitingQueue::Guard::*pending_count)();
-    LockResult (RwLock::*try_lock)(State &);
-    long (WaitingQueue::*wait)(FutexWordType, cpp::optional<Futex::Timeout>,
-                               bool);
-    bool (State::*can_acquire)(Preference) const;
-  };
-
-  LIBC_INLINE_VAR static constexpr Proxy READER = {
-      /*spin_reload=*/State::spin_reload_for_reader,
-      /*set_pending=*/State::fetch_set_pending_reader,
-      /*clear_pending=*/State::fetch_clear_pending_reader,
-      /*serialization=*/&WaitingQueue::Guard::reader_serialization,
-      /*pending_count=*/&WaitingQueue::Guard::pending_reader,
-      /*try_lock=*/&RwLock::try_read_lock,
-      /*wait=*/&WaitingQueue::reader_wait,
-      /*can_acquire=*/&State::can_acquire_reader};
-
-  LIBC_INLINE_VAR static constexpr Proxy WRITER = {
-      /*spin_reload=*/State::spin_reload_for_writer,
-      /*set_pending=*/State::fetch_set_pending_writer,
-      /*clear_pending=*/State::fetch_clear_pending_writer,
-      /*serialization=*/&WaitingQueue::Guard::writer_serialization,
-      /*pending_count=*/&WaitingQueue::Guard::pending_writer,
-      /*try_lock=*/&RwLock::try_write_lock,
-      /*wait=*/&WaitingQueue::writer_wait,
-      /*can_acquire=*/&State::can_acquire_writer};
-
+  template <Role role>
   LIBC_INLINE LockResult
-  lock(const Proxy &proxy, cpp::optional<Futex::Timeout> timeout = cpp::nullopt,
+  lock(cpp::optional<Futex::Timeout> timeout = cpp::nullopt,
        unsigned spin_count = LIBC_COPT_RWLOCK_DEFAULT_SPIN_COUNT) {
     // Phase 1: deadlock detection.
     // A deadlock happens if this is a RAW/WAW lock in the same thread.
@@ -393,9 +371,9 @@ class RwLock {
 
     // Phase 2: spin to get the initial state. We ignore the timing due to
     // spin since it should end quickly.
-    State old = proxy.spin_reload(state, preference, spin_count);
+    State old = State::spin_reload<role>(state, preference, spin_count);
     {
-      LockResult result = (this->*proxy.try_lock)(old);
+      LockResult result = try_lock<role>(old);
       if (result != LockResult::Busy)
         return result;
     }
@@ -409,7 +387,7 @@ class RwLock {
     // Enter the main acquisition loop.
     for (;;) {
       // Phase 4: if the lock can be acquired, try to acquire it.
-      LockResult result = (this->*proxy.try_lock)(old);
+      LockResult result = try_lock<role>(old);
       if (result != LockResult::Busy)
         return result;
 
@@ -422,35 +400,37 @@ class RwLock {
         // best we can do. The transaction is small and everyone should make
         // progress rather quickly.
         WaitingQueue::Guard guard = queue.acquire();
-        (guard.*proxy.pending_count)()++;
+        guard.template pending_count<role>()++;
 
         // Use atomic operation to guarantee the total order of the operations
         // on the state. The pending flag update should be visible to any
         // succeeding unlock events. Or, if a unlock does happen before we
         // sleep on the futex, we can avoid such waiting.
-        old = proxy.set_pending(state, cpp::MemoryOrder::RELAXED);
+        old = State::fetch_set_pending_bit<role>(state,
+                                                 cpp::MemoryOrder::RELAXED);
         // no need to use atomic since it is already protected by the mutex.
-        serial_number = (guard.*proxy.serialization)();
+        serial_number = guard.serialization<role>();
       }
 
       // Phase 6: do futex wait until the lock is available or timeout is
       // reached.
       bool timeout_flag = false;
-      if (!(old.*proxy.can_acquire)(preference)) {
-        timeout_flag = ((queue.*proxy.wait)(serial_number, timeout,
-                                            is_pshared) == -ETIMEDOUT);
+      if (!old.can_acquire<role>(preference)) {
+        timeout_flag = (queue.wait<role>(serial_number, timeout, is_pshared) ==
+                        -ETIMEDOUT);
 
         // Phase 7: unregister ourselves as a pending reader.
         {
           // Similarly, the unregister operation should also be an atomic
           // transaction.
           WaitingQueue::Guard guard = queue.acquire();
-          (guard.*proxy.pending_count)()--;
+          guard.pending_count<role>()--;
           // Clear the flag if we are the last reader. The flag must be
           // cleared otherwise operations like trylock may fail even though
           // there is no competitors.
-          if ((guard.*proxy.pending_count)() == 0)
-            proxy.clear_pending(state, cpp::MemoryOrder::RELAXED);
+          if (guard.pending_count<role>() == 0)
+            State::fetch_clear_pending_bit<role>(state,
+                                                 cpp::MemoryOrder::RELAXED);
         }
 
         // Phase 8: exit the loop is timeout is reached.
@@ -458,7 +438,7 @@ class RwLock {
           return LockResult::TimedOut;
 
         // Phase 9: reload the state and retry the acquisition.
-        old = proxy.spin_reload(state, preference, spin_count);
+        old = State::spin_reload<role>(state, preference, spin_count);
       }
     }
   }
@@ -467,12 +447,12 @@ class RwLock {
   LIBC_INLINE LockResult
   read_lock(cpp::optional<Futex::Timeout> timeout = cpp::nullopt,
             unsigned spin_count = LIBC_COPT_RWLOCK_DEFAULT_SPIN_COUNT) {
-    return lock(READER, timeout, spin_count);
+    return lock<Role::Reader>(timeout, spin_count);
   }
   LIBC_INLINE LockResult
   write_lock(cpp::optional<Futex::Timeout> timeout = cpp::nullopt,
              unsigned spin_count = LIBC_COPT_RWLOCK_DEFAULT_SPIN_COUNT) {
-    return lock(WRITER, timeout, spin_count);
+    return lock<Role::Writer>(timeout, spin_count);
   }
 
 private:
@@ -482,20 +462,20 @@ class RwLock {
 
     {
       WaitingQueue::Guard guard = queue.acquire();
-      if (guard.pending_writer() != 0) {
-        guard.writer_serialization()++;
+      if (guard.pending_count<Role::Writer>() != 0) {
+        guard.serialization<Role::Writer>()++;
         status = WakeTarget::Writers;
-      } else if (guard.pending_reader() != 0) {
-        guard.reader_serialization()++;
+      } else if (guard.pending_count<Role::Reader>() != 0) {
+        guard.serialization<Role::Reader>()++;
         status = WakeTarget::Readers;
       } else
         status = WakeTarget::None;
     }
 
     if (status == WakeTarget::Readers)
-      queue.reader_notify_all(is_pshared);
+      queue.notify<Role::Reader>(is_pshared);
     else if (status == WakeTarget::Writers)
-      queue.writer_notify_one(is_pshared);
+      queue.notify<Role::Writer>(is_pshared);
   }
 
 public:
diff --git a/libc/src/pthread/pthread_rwlock_init.cpp b/libc/src/pthread/pthread_rwlock_init.cpp
index cc71df9b38165..dddc7ec655745 100644
--- a/libc/src/pthread/pthread_rwlock_init.cpp
+++ b/libc/src/pthread/pthread_rwlock_init.cpp
@@ -39,13 +39,13 @@ LLVM_LIBC_FUNCTION(int, pthread_rwlock_init,
   ::new (rwlock) RwLock();
 
   // PTHREAD_RWLOCK_PREFER_WRITER_NP is not supported.
-  RwLock::Preference preference;
+  RwLock::Role preference;
   switch (rwlockattr.pref) {
   case PTHREAD_RWLOCK_PREFER_READER_NP:
-    preference = RwLock::Preference::Reader;
+    preference = RwLock::Role::Reader;
     break;
   case PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP:
-    preference = RwLock::Preference::Writer;
+    preference = RwLock::Role::Writer;
     break;
   default:
     return EINVAL;

>From 11bfdd08a8b9a68dd20a23ff53277b97c2e1224e Mon Sep 17 00:00:00 2001
From: Yifan Zhu <yifzhu at nvidia.com>
Date: Sun, 2 Jun 2024 20:42:20 -0700
Subject: [PATCH 12/26] [libc] add pthread_rwlock_rdlock

---
 libc/config/linux/x86_64/entrypoints.txt   |  1 +
 libc/spec/posix.td                         |  5 ++++
 libc/src/__support/threads/linux/rwlock.h  |  4 +--
 libc/src/pthread/CMakeLists.txt            | 11 ++++++++
 libc/src/pthread/pthread_rwlock_rdlock.cpp | 30 ++++++++++++++++++++++
 libc/src/pthread/pthread_rwlock_rdlock.h   | 20 +++++++++++++++
 6 files changed, 69 insertions(+), 2 deletions(-)
 create mode 100644 libc/src/pthread/pthread_rwlock_rdlock.cpp
 create mode 100644 libc/src/pthread/pthread_rwlock_rdlock.h

diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 606a67edc9889..bc7c9e836b8e6 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -682,6 +682,7 @@ if(LLVM_LIBC_FULL_BUILD)
     libc.src.pthread.pthread_once
     libc.src.pthread.pthread_rwlock_init
     libc.src.pthread.pthread_rwlock_tryrdlock
+    libc.src.pthread.pthread_rwlock_rdlock
     libc.src.pthread.pthread_rwlockattr_destroy
     libc.src.pthread.pthread_rwlockattr_getkind_np
     libc.src.pthread.pthread_rwlockattr_getpshared
diff --git a/libc/spec/posix.td b/libc/spec/posix.td
index d909c0d1981ae..de0eec24683f5 100644
--- a/libc/spec/posix.td
+++ b/libc/spec/posix.td
@@ -1273,6 +1273,11 @@ def POSIX : StandardSpec<"POSIX"> {
         "pthread_rwlock_tryrdlock",
         RetValSpec<IntType>,
         [ArgSpec<PThreadRWLockTPtr>]
+      >,
+      FunctionSpec<
+        "pthread_rwlock_rdlock",
+        RetValSpec<IntType>,
+        [ArgSpec<PThreadRWLockTPtr>]
       >
     ]
   >;
diff --git a/libc/src/__support/threads/linux/rwlock.h b/libc/src/__support/threads/linux/rwlock.h
index 2ec2c5977bf85..ccf9378286098 100644
--- a/libc/src/__support/threads/linux/rwlock.h
+++ b/libc/src/__support/threads/linux/rwlock.h
@@ -361,7 +361,7 @@ class RwLock {
 
 private:
   template <Role role>
-  LIBC_INLINE LockResult
+  [[gnu::always_inline]] LIBC_INLINE LockResult
   lock(cpp::optional<Futex::Timeout> timeout = cpp::nullopt,
        unsigned spin_count = LIBC_COPT_RWLOCK_DEFAULT_SPIN_COUNT) {
     // Phase 1: deadlock detection.
@@ -444,7 +444,7 @@ class RwLock {
   }
 
 public:
-  LIBC_INLINE LockResult
+  [[gnu::always_inline]] LIBC_INLINE LockResult
   read_lock(cpp::optional<Futex::Timeout> timeout = cpp::nullopt,
             unsigned spin_count = LIBC_COPT_RWLOCK_DEFAULT_SPIN_COUNT) {
     return lock<Role::Reader>(timeout, spin_count);
diff --git a/libc/src/pthread/CMakeLists.txt b/libc/src/pthread/CMakeLists.txt
index 30c27d4336fbe..e09e3e2e42359 100644
--- a/libc/src/pthread/CMakeLists.txt
+++ b/libc/src/pthread/CMakeLists.txt
@@ -544,6 +544,17 @@ add_entrypoint_object(
     libc.src.__support.threads.linux.rwlock
 )
 
+add_entrypoint_object(
+  pthread_rwlock_rdlock
+  SRCS
+    pthread_rwlock_rdlock.cpp
+  HDRS
+    pthread_rwlock_rdlock.h
+  DEPENDS
+    libc.include.pthread
+    libc.src.__support.threads.linux.rwlock
+)
+
 add_entrypoint_object(
   pthread_once
   SRCS
diff --git a/libc/src/pthread/pthread_rwlock_rdlock.cpp b/libc/src/pthread/pthread_rwlock_rdlock.cpp
new file mode 100644
index 0000000000000..cb7bc439c1b1e
--- /dev/null
+++ b/libc/src/pthread/pthread_rwlock_rdlock.cpp
@@ -0,0 +1,30 @@
+//===-- Implementation of the Rwlock's rdlock function --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/pthread/pthread_rwlock_rdlock.h"
+
+#include "src/__support/common.h"
+#include "src/__support/threads/linux/rwlock.h"
+
+#include <errno.h>
+#include <pthread.h>
+
+namespace LIBC_NAMESPACE {
+
+static_assert(
+    sizeof(RwLock) == sizeof(pthread_rwlock_t) &&
+        alignof(RwLock) == alignof(pthread_rwlock_t),
+    "The public pthread_rwlock_t type must be of the same size and alignment "
+    "as the internal rwlock type.");
+
+LLVM_LIBC_FUNCTION(int, pthread_rwlock_rdlock, (pthread_rwlock_t * rwlock)) {
+  RwLock *rw = reinterpret_cast<RwLock *>(rwlock);
+  return static_cast<int>(rw->read_lock());
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/pthread/pthread_rwlock_rdlock.h b/libc/src/pthread/pthread_rwlock_rdlock.h
new file mode 100644
index 0000000000000..79027739f4b7c
--- /dev/null
+++ b/libc/src/pthread/pthread_rwlock_rdlock.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for Rwlock's rdlock function -------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_PTHREAD_PTHREAD_RWLOCK_RDLOCK_H
+#define LLVM_LIBC_SRC_PTHREAD_PTHREAD_RWLOCK_RDLOCK_H
+
+#include <pthread.h>
+
+namespace LIBC_NAMESPACE {
+
+int pthread_rwlock_rdlock(pthread_rwlock_t *rwlock);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_PTHREAD_PTHREAD_RWLOCK_RDLOCK_H

>From 88fcb9dfb0f04d9f91e1049f0f6cb8701e0f4143 Mon Sep 17 00:00:00 2001
From: Yifan Zhu <yifzhu at nvidia.com>
Date: Sun, 2 Jun 2024 20:47:41 -0700
Subject: [PATCH 13/26] [libc] adjust phase order

---
 libc/src/__support/threads/linux/rwlock.h | 33 ++++++++++++-----------
 1 file changed, 17 insertions(+), 16 deletions(-)

diff --git a/libc/src/__support/threads/linux/rwlock.h b/libc/src/__support/threads/linux/rwlock.h
index ccf9378286098..72db6ed47adbb 100644
--- a/libc/src/__support/threads/linux/rwlock.h
+++ b/libc/src/__support/threads/linux/rwlock.h
@@ -361,29 +361,24 @@ class RwLock {
 
 private:
   template <Role role>
-  [[gnu::always_inline]] LIBC_INLINE LockResult
-  lock(cpp::optional<Futex::Timeout> timeout = cpp::nullopt,
-       unsigned spin_count = LIBC_COPT_RWLOCK_DEFAULT_SPIN_COUNT) {
+  LIBC_INLINE LockResult
+  lock_slow(cpp::optional<Futex::Timeout> timeout = cpp::nullopt,
+            unsigned spin_count = LIBC_COPT_RWLOCK_DEFAULT_SPIN_COUNT) {
     // Phase 1: deadlock detection.
     // A deadlock happens if this is a RAW/WAW lock in the same thread.
     if (writer_tid.load(cpp::MemoryOrder::RELAXED) == gettid())
       return LockResult::Deadlock;
 
-    // Phase 2: spin to get the initial state. We ignore the timing due to
-    // spin since it should end quickly.
-    State old = State::spin_reload<role>(state, preference, spin_count);
-    {
-      LockResult result = try_lock<role>(old);
-      if (result != LockResult::Busy)
-        return result;
-    }
-
 #if LIBC_COPT_TIMEOUT_ENSURE_MONOTONICITY
-    // Phase 3: convert the timeout if necessary.
+    // Phase 2: convert the timeout if necessary.
     if (timeout)
       ensure_monotonicity(*timeout);
 #endif
 
+    // Phase 3: spin to get the initial state. We ignore the timing due to
+    // spin since it should end quickly.
+    State old = State::spin_reload<role>(state, preference, spin_count);
+
     // Enter the main acquisition loop.
     for (;;) {
       // Phase 4: if the lock can be acquired, try to acquire it.
@@ -444,15 +439,21 @@ class RwLock {
   }
 
 public:
-  [[gnu::always_inline]] LIBC_INLINE LockResult
+  LIBC_INLINE LockResult
   read_lock(cpp::optional<Futex::Timeout> timeout = cpp::nullopt,
             unsigned spin_count = LIBC_COPT_RWLOCK_DEFAULT_SPIN_COUNT) {
-    return lock<Role::Reader>(timeout, spin_count);
+    LockResult result = try_read_lock();
+    if (LIBC_LIKELY(result != LockResult::Busy))
+      return result;
+    return lock_slow<Role::Reader>(timeout, spin_count);
   }
   LIBC_INLINE LockResult
   write_lock(cpp::optional<Futex::Timeout> timeout = cpp::nullopt,
              unsigned spin_count = LIBC_COPT_RWLOCK_DEFAULT_SPIN_COUNT) {
-    return lock<Role::Writer>(timeout, spin_count);
+    LockResult result = try_write_lock();
+    if (LIBC_LIKELY(result != LockResult::Busy))
+      return result;
+    return lock_slow<Role::Writer>(timeout, spin_count);
   }
 
 private:

>From a6c01e1d61a4753ef9ffe53cb15b4923267c3e10 Mon Sep 17 00:00:00 2001
From: Yifan Zhu <yifzhu at nvidia.com>
Date: Sun, 2 Jun 2024 21:33:21 -0700
Subject: [PATCH 14/26] [libc] add pthread_rwlock_timedrdlock

---
 libc/config/linux/x86_64/entrypoints.txt      |  1 +
 libc/spec/posix.td                            |  6 +++
 libc/spec/spec.td                             |  1 +
 libc/src/pthread/CMakeLists.txt               | 11 +++++
 .../pthread/pthread_rwlock_timedrdlock.cpp    | 47 +++++++++++++++++++
 libc/src/pthread/pthread_rwlock_timedrdlock.h | 21 +++++++++
 6 files changed, 87 insertions(+)
 create mode 100644 libc/src/pthread/pthread_rwlock_timedrdlock.cpp
 create mode 100644 libc/src/pthread/pthread_rwlock_timedrdlock.h

diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index bc7c9e836b8e6..703202e910a77 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -683,6 +683,7 @@ if(LLVM_LIBC_FULL_BUILD)
     libc.src.pthread.pthread_rwlock_init
     libc.src.pthread.pthread_rwlock_tryrdlock
     libc.src.pthread.pthread_rwlock_rdlock
+    libc.src.pthread.pthread_rwlock_timedrdlock
     libc.src.pthread.pthread_rwlockattr_destroy
     libc.src.pthread.pthread_rwlockattr_getkind_np
     libc.src.pthread.pthread_rwlockattr_getpshared
diff --git a/libc/spec/posix.td b/libc/spec/posix.td
index de0eec24683f5..5f240acfc9a15 100644
--- a/libc/spec/posix.td
+++ b/libc/spec/posix.td
@@ -129,6 +129,7 @@ def POSIX : StandardSpec<"POSIX"> {
 
   NamedType PThreadRWLockTType = NamedType<"pthread_rwlock_t">;
   PtrType PThreadRWLockTPtr = PtrType<PThreadRWLockTType>;
+  RestrictedPtrType RestrictedPThreadRWLockTPtr = RestrictedPtrType<PThreadRWLockTType>;
 
   PtrType PThreadTPtr = PtrType<PThreadTType>;
   RestrictedPtrType RestrictedPThreadTPtr = RestrictedPtrType<PThreadTType>;
@@ -1274,6 +1275,11 @@ def POSIX : StandardSpec<"POSIX"> {
         RetValSpec<IntType>,
         [ArgSpec<PThreadRWLockTPtr>]
       >,
+      FunctionSpec<
+        "pthread_rwlock_timedrdlock",
+        RetValSpec<IntType>,
+        [ArgSpec<RestrictedPThreadRWLockTPtr>, ArgSpec<ConstRestructTimeSpecPtr>]
+      >,
       FunctionSpec<
         "pthread_rwlock_rdlock",
         RetValSpec<IntType>,
diff --git a/libc/spec/spec.td b/libc/spec/spec.td
index 056a3143c5a71..b216718774cfb 100644
--- a/libc/spec/spec.td
+++ b/libc/spec/spec.td
@@ -125,6 +125,7 @@ def TimeTType : NamedType<"time_t">;
 def StructTimeSpec : NamedType<"struct timespec">;
 def StructTimeSpecPtr : PtrType<StructTimeSpec>;
 def ConstStructTimeSpecPtr : ConstType<StructTimeSpecPtr>;
+def ConstRestructTimeSpecPtr : ConstType<RestrictedPtrType<StructTimeSpec>>;
 
 def BSearchCompareT : NamedType<"__bsearchcompare_t">;
 def QSortCompareT : NamedType<"__qsortcompare_t">;
diff --git a/libc/src/pthread/CMakeLists.txt b/libc/src/pthread/CMakeLists.txt
index e09e3e2e42359..f035ff00c295b 100644
--- a/libc/src/pthread/CMakeLists.txt
+++ b/libc/src/pthread/CMakeLists.txt
@@ -544,6 +544,17 @@ add_entrypoint_object(
     libc.src.__support.threads.linux.rwlock
 )
 
+add_entrypoint_object(
+  pthread_rwlock_timedrdlock
+  SRCS
+    pthread_rwlock_timedrdlock.cpp
+  HDRS
+    pthread_rwlock_timedrdlock.h
+  DEPENDS
+    libc.include.pthread
+    libc.src.__support.threads.linux.rwlock
+)
+
 add_entrypoint_object(
   pthread_rwlock_rdlock
   SRCS
diff --git a/libc/src/pthread/pthread_rwlock_timedrdlock.cpp b/libc/src/pthread/pthread_rwlock_timedrdlock.cpp
new file mode 100644
index 0000000000000..b0d83dafea069
--- /dev/null
+++ b/libc/src/pthread/pthread_rwlock_timedrdlock.cpp
@@ -0,0 +1,47 @@
+//===-- Implementation of the Rwlock's timedrdlock function ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/pthread/pthread_rwlock_timedrdlock.h"
+
+#include "src/__support/common.h"
+#include "src/__support/libc_assert.h"
+#include "src/__support/macros/optimization.h"
+#include "src/__support/threads/linux/futex_utils.h"
+#include "src/__support/threads/linux/rwlock.h"
+#include "src/__support/time/linux/abs_timeout.h"
+
+#include <errno.h>
+#include <pthread.h>
+
+namespace LIBC_NAMESPACE {
+
+static_assert(
+    sizeof(RwLock) == sizeof(pthread_rwlock_t) &&
+        alignof(RwLock) == alignof(pthread_rwlock_t),
+    "The public pthread_rwlock_t type must be of the same size and alignment "
+    "as the internal rwlock type.");
+
+LLVM_LIBC_FUNCTION(int, pthread_rwlock_timedrdlock,
+                   (pthread_rwlock_t * rwlock,
+                    const struct timespec *abstime)) {
+  RwLock *rw = reinterpret_cast<RwLock *>(rwlock);
+  LIBC_ASSERT(abstime && "timedrdlock called with a null timeout");
+  auto timeout =
+      internal::AbsTimeout::from_timespec(*abstime, /*is_realtime=*/true);
+  if (LIBC_LIKELY(timeout.has_value()))
+    return static_cast<int>(rw->read_lock(timeout.value()));
+
+  switch (timeout.error()) {
+  case internal::AbsTimeout::Error::Invalid:
+    return EINVAL;
+  case internal::AbsTimeout::Error::BeforeEpoch:
+    return ETIMEDOUT;
+  }
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/pthread/pthread_rwlock_timedrdlock.h b/libc/src/pthread/pthread_rwlock_timedrdlock.h
new file mode 100644
index 0000000000000..dfa43f25ba706
--- /dev/null
+++ b/libc/src/pthread/pthread_rwlock_timedrdlock.h
@@ -0,0 +1,21 @@
+//===-- Implementation header for Rwlock's timedrdlock function --*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_PTHREAD_PTHREAD_RWLOCK_TIMEDRDLOCK_H
+#define LLVM_LIBC_SRC_PTHREAD_PTHREAD_RWLOCK_TIMEDRDLOCK_H
+
+#include <pthread.h>
+
+namespace LIBC_NAMESPACE {
+
+int pthread_rwlock_timedrdlock(pthread_rwlock_t *__restrict rwlock,
+                               const struct timespec *__restrict abs_timeout);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_PTHREAD_PTHREAD_RWLOCK_TIMEDRDLOCK_H

>From 8858c4001be71e800e5f5e07303da187d0ab52a3 Mon Sep 17 00:00:00 2001
From: Yifan Zhu <yifzhu at nvidia.com>
Date: Sun, 2 Jun 2024 22:27:14 -0700
Subject: [PATCH 15/26] [libc] add remaining entrypoint functions

---
 libc/config/linux/x86_64/entrypoints.txt      |  5 ++
 libc/spec/posix.td                            | 27 ++++++++-
 libc/src/__support/threads/linux/rwlock.h     |  5 ++
 libc/src/pthread/CMakeLists.txt               | 55 +++++++++++++++++++
 libc/src/pthread/pthread_rwlock_destroy.cpp   | 31 +++++++++++
 libc/src/pthread/pthread_rwlock_destroy.h     | 20 +++++++
 .../pthread/pthread_rwlock_timedrdlock.cpp    |  1 -
 .../pthread/pthread_rwlock_timedwrlock.cpp    | 40 ++++++++++++++
 libc/src/pthread/pthread_rwlock_timedwrlock.h | 21 +++++++
 libc/src/pthread/pthread_rwlock_trywrlock.cpp | 30 ++++++++++
 libc/src/pthread/pthread_rwlock_trywrlock.h   | 20 +++++++
 libc/src/pthread/pthread_rwlock_unlock.cpp    | 24 ++++++++
 libc/src/pthread/pthread_rwlock_unlock.h      | 20 +++++++
 libc/src/pthread/pthread_rwlock_wrlock.cpp    | 30 ++++++++++
 libc/src/pthread/pthread_rwlock_wrlock.h      | 20 +++++++
 15 files changed, 347 insertions(+), 2 deletions(-)
 create mode 100644 libc/src/pthread/pthread_rwlock_destroy.cpp
 create mode 100644 libc/src/pthread/pthread_rwlock_destroy.h
 create mode 100644 libc/src/pthread/pthread_rwlock_timedwrlock.cpp
 create mode 100644 libc/src/pthread/pthread_rwlock_timedwrlock.h
 create mode 100644 libc/src/pthread/pthread_rwlock_trywrlock.cpp
 create mode 100644 libc/src/pthread/pthread_rwlock_trywrlock.h
 create mode 100644 libc/src/pthread/pthread_rwlock_unlock.cpp
 create mode 100644 libc/src/pthread/pthread_rwlock_unlock.h
 create mode 100644 libc/src/pthread/pthread_rwlock_wrlock.cpp
 create mode 100644 libc/src/pthread/pthread_rwlock_wrlock.h

diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 703202e910a77..9c1ca0ae39c3e 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -684,6 +684,11 @@ if(LLVM_LIBC_FULL_BUILD)
     libc.src.pthread.pthread_rwlock_tryrdlock
     libc.src.pthread.pthread_rwlock_rdlock
     libc.src.pthread.pthread_rwlock_timedrdlock
+    libc.src.pthread.pthread_rwlock_trywrlock
+    libc.src.pthread.pthread_rwlock_wrlock
+    libc.src.pthread.pthread_rwlock_timedwrlock
+    libc.src.pthread.pthread_rwlock_unlock
+    libc.src.pthread.pthread_rwlock_destroy
     libc.src.pthread.pthread_rwlockattr_destroy
     libc.src.pthread.pthread_rwlockattr_getkind_np
     libc.src.pthread.pthread_rwlockattr_getpshared
diff --git a/libc/spec/posix.td b/libc/spec/posix.td
index 5f240acfc9a15..dfb67b578237f 100644
--- a/libc/spec/posix.td
+++ b/libc/spec/posix.td
@@ -1275,16 +1275,41 @@ def POSIX : StandardSpec<"POSIX"> {
         RetValSpec<IntType>,
         [ArgSpec<PThreadRWLockTPtr>]
       >,
+      FunctionSpec<
+        "pthread_rwlock_trywrlock",
+        RetValSpec<IntType>,
+        [ArgSpec<PThreadRWLockTPtr>]
+      >,
       FunctionSpec<
         "pthread_rwlock_timedrdlock",
         RetValSpec<IntType>,
         [ArgSpec<RestrictedPThreadRWLockTPtr>, ArgSpec<ConstRestructTimeSpecPtr>]
       >,
+      FunctionSpec<
+        "pthread_rwlock_timedwrlock",
+        RetValSpec<IntType>,
+        [ArgSpec<RestrictedPThreadRWLockTPtr>, ArgSpec<ConstRestructTimeSpecPtr>]
+      >,
       FunctionSpec<
         "pthread_rwlock_rdlock",
         RetValSpec<IntType>,
         [ArgSpec<PThreadRWLockTPtr>]
-      >
+      >,
+      FunctionSpec<
+        "pthread_rwlock_wrlock",
+        RetValSpec<IntType>,
+        [ArgSpec<PThreadRWLockTPtr>]
+      >,
+      FunctionSpec<
+        "pthread_rwlock_unlock",
+        RetValSpec<IntType>,
+        [ArgSpec<PThreadRWLockTPtr>]
+      >,
+      FunctionSpec<
+        "pthread_rwlock_destroy",
+        RetValSpec<IntType>,
+        [ArgSpec<PThreadRWLockTPtr>]
+      >,
     ]
   >;
 
diff --git a/libc/src/__support/threads/linux/rwlock.h b/libc/src/__support/threads/linux/rwlock.h
index 72db6ed47adbb..8baff57389123 100644
--- a/libc/src/__support/threads/linux/rwlock.h
+++ b/libc/src/__support/threads/linux/rwlock.h
@@ -16,6 +16,7 @@
 #include "src/__support/common.h"
 #include "src/__support/libc_assert.h"
 #include "src/__support/macros/attributes.h"
+#include "src/__support/macros/optimization.h"
 #include "src/__support/threads/linux/futex_utils.h"
 #include "src/__support/threads/linux/futex_word.h"
 #include "src/__support/threads/linux/raw_mutex.h"
@@ -457,6 +458,10 @@ class RwLock {
   }
 
 private:
+  // Compiler somehow decides that this function may be inlined, which leads to
+  // a larger unlock function that is infeasible to be inlined. Since
+  // notifcation routine is colder we mark it as noinline explicitly.
+  [[gnu::noinline]]
   LIBC_INLINE void notify_pending_threads() {
     enum class WakeTarget { Readers, Writers, None };
     WakeTarget status;
diff --git a/libc/src/pthread/CMakeLists.txt b/libc/src/pthread/CMakeLists.txt
index f035ff00c295b..8ebe58c2193be 100644
--- a/libc/src/pthread/CMakeLists.txt
+++ b/libc/src/pthread/CMakeLists.txt
@@ -544,6 +544,17 @@ add_entrypoint_object(
     libc.src.__support.threads.linux.rwlock
 )
 
+add_entrypoint_object(
+  pthread_rwlock_trywrlock
+  SRCS
+    pthread_rwlock_trywrlock.cpp
+  HDRS
+    pthread_rwlock_trywrlock.h
+  DEPENDS
+    libc.include.pthread
+    libc.src.__support.threads.linux.rwlock
+)
+
 add_entrypoint_object(
   pthread_rwlock_timedrdlock
   SRCS
@@ -555,6 +566,17 @@ add_entrypoint_object(
     libc.src.__support.threads.linux.rwlock
 )
 
+add_entrypoint_object(
+  pthread_rwlock_timedwrlock
+  SRCS
+    pthread_rwlock_timedwrlock.cpp
+  HDRS
+    pthread_rwlock_timedwrlock.h
+  DEPENDS
+    libc.include.pthread
+    libc.src.__support.threads.linux.rwlock
+)
+
 add_entrypoint_object(
   pthread_rwlock_rdlock
   SRCS
@@ -566,6 +588,39 @@ add_entrypoint_object(
     libc.src.__support.threads.linux.rwlock
 )
 
+add_entrypoint_object(
+  pthread_rwlock_wrlock
+  SRCS
+    pthread_rwlock_wrlock.cpp
+  HDRS
+    pthread_rwlock_wrlock.h
+  DEPENDS
+    libc.include.pthread
+    libc.src.__support.threads.linux.rwlock
+)
+
+add_entrypoint_object(
+  pthread_rwlock_unlock
+  SRCS
+    pthread_rwlock_unlock.cpp
+  HDRS
+    pthread_rwlock_unlock.h
+  DEPENDS
+    libc.include.pthread
+    libc.src.__support.threads.linux.rwlock
+)
+
+add_entrypoint_object(
+  pthread_rwlock_destroy
+  SRCS
+    pthread_rwlock_destroy.cpp
+  HDRS
+    pthread_rwlock_destroy.h
+  DEPENDS
+    libc.include.pthread
+    libc.src.__support.threads.linux.rwlock
+)
+
 add_entrypoint_object(
   pthread_once
   SRCS
diff --git a/libc/src/pthread/pthread_rwlock_destroy.cpp b/libc/src/pthread/pthread_rwlock_destroy.cpp
new file mode 100644
index 0000000000000..4f97ba3b0b5c3
--- /dev/null
+++ b/libc/src/pthread/pthread_rwlock_destroy.cpp
@@ -0,0 +1,31 @@
+//===-- Implementation for Rwlock's destroy function ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/pthread/pthread_rwlock_destroy.h"
+
+#include "src/__support/common.h"
+#include "src/__support/threads/linux/rwlock.h"
+
+#include <errno.h>
+#include <pthread.h>
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(int, pthread_rwlock_destroy, (pthread_rwlock_t * rwlock)) {
+  auto *rw = reinterpret_cast<RwLock *>(rwlock);
+  RwLock::LockResult res = rw->check_for_destroy();
+
+  // this is currently no-op, but we still call the destructor as a symmetry
+  // to its constructor call;
+  if (res == RwLock::LockResult::Success)
+    rw->~RwLock();
+
+  return static_cast<int>(res);
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/pthread/pthread_rwlock_destroy.h b/libc/src/pthread/pthread_rwlock_destroy.h
new file mode 100644
index 0000000000000..f845e806d6e60
--- /dev/null
+++ b/libc/src/pthread/pthread_rwlock_destroy.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for Rwlock's destroy function -------*-C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_PTHREAD_PTHREAD_RWLOCK_DESTROY_H
+#define LLVM_LIBC_SRC_PTHREAD_PTHREAD_RWLOCK_DESTROY_H
+
+#include <pthread.h>
+
+namespace LIBC_NAMESPACE {
+
+int pthread_rwlock_destroy(pthread_rwlock_t *rwlock);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_PTHREAD_PTHREAD_RWLOCK_DESTROY_H
diff --git a/libc/src/pthread/pthread_rwlock_timedrdlock.cpp b/libc/src/pthread/pthread_rwlock_timedrdlock.cpp
index b0d83dafea069..0c56663c754aa 100644
--- a/libc/src/pthread/pthread_rwlock_timedrdlock.cpp
+++ b/libc/src/pthread/pthread_rwlock_timedrdlock.cpp
@@ -11,7 +11,6 @@
 #include "src/__support/common.h"
 #include "src/__support/libc_assert.h"
 #include "src/__support/macros/optimization.h"
-#include "src/__support/threads/linux/futex_utils.h"
 #include "src/__support/threads/linux/rwlock.h"
 #include "src/__support/time/linux/abs_timeout.h"
 
diff --git a/libc/src/pthread/pthread_rwlock_timedwrlock.cpp b/libc/src/pthread/pthread_rwlock_timedwrlock.cpp
new file mode 100644
index 0000000000000..9c86dcddd4b4f
--- /dev/null
+++ b/libc/src/pthread/pthread_rwlock_timedwrlock.cpp
@@ -0,0 +1,40 @@
+//===-- Implementation for Rwlock's timedwrlock function ------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/pthread/pthread_rwlock_timedwrlock.h"
+
+#include "src/__support/common.h"
+#include "src/__support/libc_assert.h"
+#include "src/__support/macros/optimization.h"
+#include "src/__support/threads/linux/rwlock.h"
+#include "src/__support/time/linux/abs_timeout.h"
+
+#include <errno.h>
+#include <pthread.h>
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(int, pthread_rwlock_timedwrlock,
+                   (pthread_rwlock_t *__restrict rwlock,
+                    const struct timespec *__restrict abstime)) {
+  RwLock *rw = reinterpret_cast<RwLock *>(rwlock);
+  LIBC_ASSERT(abstime && "timedwrlock called with a null timeout");
+  auto timeout =
+      internal::AbsTimeout::from_timespec(*abstime, /*is_realtime=*/true);
+  if (LIBC_LIKELY(timeout.has_value()))
+    return static_cast<int>(rw->write_lock(timeout.value()));
+
+  switch (timeout.error()) {
+  case internal::AbsTimeout::Error::Invalid:
+    return EINVAL;
+  case internal::AbsTimeout::Error::BeforeEpoch:
+    return ETIMEDOUT;
+  }
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/pthread/pthread_rwlock_timedwrlock.h b/libc/src/pthread/pthread_rwlock_timedwrlock.h
new file mode 100644
index 0000000000000..a39d8de8d330f
--- /dev/null
+++ b/libc/src/pthread/pthread_rwlock_timedwrlock.h
@@ -0,0 +1,21 @@
+//===-- Implementation header for Rwlock's timedwrlock function --*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_PTHREAD_PTHREAD_RWLOCK_TIMEDWRLOCK_H
+#define LLVM_LIBC_SRC_PTHREAD_PTHREAD_RWLOCK_TIMEDWRLOCK_H
+
+#include <pthread.h>
+
+namespace LIBC_NAMESPACE {
+
+int pthread_rwlock_timedwrlock(pthread_rwlock_t *__restrict rwlock,
+                               const struct timespec *__restrict abs_timeout);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_PTHREAD_PTHREAD_RWLOCK_TIMEDWRLOCK_H
diff --git a/libc/src/pthread/pthread_rwlock_trywrlock.cpp b/libc/src/pthread/pthread_rwlock_trywrlock.cpp
new file mode 100644
index 0000000000000..cfd44ac60fcbd
--- /dev/null
+++ b/libc/src/pthread/pthread_rwlock_trywrlock.cpp
@@ -0,0 +1,30 @@
+//===-- Implementation for Rwlock's trywrlock function -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/pthread/pthread_rwlock_trywrlock.h"
+
+#include "src/__support/common.h"
+#include "src/__support/threads/linux/rwlock.h"
+
+#include <errno.h>
+#include <pthread.h>
+
+namespace LIBC_NAMESPACE {
+
+static_assert(
+    sizeof(RwLock) == sizeof(pthread_rwlock_t) &&
+        alignof(RwLock) == alignof(pthread_rwlock_t),
+    "The public pthread_rwlock_t type must be of the same size and alignment "
+    "as the internal rwlock type.");
+
+LLVM_LIBC_FUNCTION(int, pthread_rwlock_trywrlock, (pthread_rwlock_t * rwlock)) {
+  RwLock *rw = reinterpret_cast<RwLock *>(rwlock);
+  return static_cast<int>(rw->try_write_lock());
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/pthread/pthread_rwlock_trywrlock.h b/libc/src/pthread/pthread_rwlock_trywrlock.h
new file mode 100644
index 0000000000000..fc146c6db859f
--- /dev/null
+++ b/libc/src/pthread/pthread_rwlock_trywrlock.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for Rwlock's trywrlock function ----*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_PTHREAD_PTHREAD_RWLOCK_TRYWRLOCK_H
+#define LLVM_LIBC_SRC_PTHREAD_PTHREAD_RWLOCK_TRYWRLOCK_H
+
+#include <pthread.h>
+
+namespace LIBC_NAMESPACE {
+
+int pthread_rwlock_trywrlock(pthread_rwlock_t *rwlock);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_PTHREAD_PTHREAD_RWLOCK_TRYWRLOCK_H
diff --git a/libc/src/pthread/pthread_rwlock_unlock.cpp b/libc/src/pthread/pthread_rwlock_unlock.cpp
new file mode 100644
index 0000000000000..67f32a3a6a8b3
--- /dev/null
+++ b/libc/src/pthread/pthread_rwlock_unlock.cpp
@@ -0,0 +1,24 @@
+//===-- Implementation for Rwlock's unlock function -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/pthread/pthread_rwlock_unlock.h"
+
+#include "src/__support/common.h"
+#include "src/__support/threads/linux/rwlock.h"
+
+#include <errno.h>
+#include <pthread.h>
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(int, pthread_rwlock_unlock, (pthread_rwlock_t * rwlock)) {
+  auto *rw = reinterpret_cast<RwLock *>(rwlock);
+  return static_cast<int>(rw->unlock());
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/pthread/pthread_rwlock_unlock.h b/libc/src/pthread/pthread_rwlock_unlock.h
new file mode 100644
index 0000000000000..b9a72f1e06992
--- /dev/null
+++ b/libc/src/pthread/pthread_rwlock_unlock.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for Rwlock's unlock function -------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_PTHREAD_PTHREAD_RWLOCK_UNLOCK_H
+#define LLVM_LIBC_SRC_PTHREAD_PTHREAD_RWLOCK_UNLOCK_H
+
+#include <pthread.h>
+
+namespace LIBC_NAMESPACE {
+
+int pthread_rwlock_unlock(pthread_rwlock_t *rwlock);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_PTHREAD_PTHREAD_RWLOCK_UNLOCK_H
diff --git a/libc/src/pthread/pthread_rwlock_wrlock.cpp b/libc/src/pthread/pthread_rwlock_wrlock.cpp
new file mode 100644
index 0000000000000..276a08245b6ac
--- /dev/null
+++ b/libc/src/pthread/pthread_rwlock_wrlock.cpp
@@ -0,0 +1,30 @@
+//===-- Implementation for Rwlock's wrlock function -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/pthread/pthread_rwlock_wrlock.h"
+
+#include "src/__support/common.h"
+#include "src/__support/threads/linux/rwlock.h"
+
+#include <errno.h>
+#include <pthread.h>
+
+namespace LIBC_NAMESPACE {
+
+static_assert(
+    sizeof(RwLock) == sizeof(pthread_rwlock_t) &&
+        alignof(RwLock) == alignof(pthread_rwlock_t),
+    "The public pthread_rwlock_t type must be of the same size and alignment "
+    "as the internal rwlock type.");
+
+LLVM_LIBC_FUNCTION(int, pthread_rwlock_wrlock, (pthread_rwlock_t * rwlock)) {
+  RwLock *rw = reinterpret_cast<RwLock *>(rwlock);
+  return static_cast<int>(rw->write_lock());
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/pthread/pthread_rwlock_wrlock.h b/libc/src/pthread/pthread_rwlock_wrlock.h
new file mode 100644
index 0000000000000..ba77c1f1f09ac
--- /dev/null
+++ b/libc/src/pthread/pthread_rwlock_wrlock.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for Rwlock's wrlock function -------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_PTHREAD_PTHREAD_RWLOCK_WRLOCK_H
+#define LLVM_LIBC_SRC_PTHREAD_PTHREAD_RWLOCK_WRLOCK_H
+
+#include <pthread.h>
+
+namespace LIBC_NAMESPACE {
+
+int pthread_rwlock_wrlock(pthread_rwlock_t *rwlock);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_PTHREAD_PTHREAD_RWLOCK_WRLOCK_H

>From f7c395c1e6a990f6421320ac8fe4308f812083e7 Mon Sep 17 00:00:00 2001
From: Yifan Zhu <yifzhu at nvidia.com>
Date: Sun, 2 Jun 2024 22:37:09 -0700
Subject: [PATCH 16/26] [libc] add missing explicit ordering

---
 libc/src/__support/threads/linux/rwlock.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libc/src/__support/threads/linux/rwlock.h b/libc/src/__support/threads/linux/rwlock.h
index 8baff57389123..95ab5f336fb91 100644
--- a/libc/src/__support/threads/linux/rwlock.h
+++ b/libc/src/__support/threads/linux/rwlock.h
@@ -258,7 +258,7 @@ class RwLock {
     LIBC_INLINE static State spin_reload_until(cpp::Atomic<Type> &target,
                                                F &&func, unsigned spin_count) {
       for (;;) {
-        auto state = State::load(target);
+        auto state = State::load(target, cpp::MemoryOrder::RELAXED);
         if (func(state) || spin_count == 0)
           return state;
         sleep_briefly();
@@ -495,14 +495,14 @@ class RwLock {
       // clear writer tid.
       writer_tid.store(0, cpp::MemoryOrder::RELAXED);
       // clear the writer bit.
-      old = State::fetch_clear_active_writer(state);
+      old = State::fetch_clear_active_writer(state, cpp::MemoryOrder::RELEASE);
       // If there is no pending readers or writers, we are done.
       if (!old.has_pending())
         return LockResult::Success;
     } else if (old.has_active_reader()) {
       // The lock is held by readers.
       // Decrease the reader count.
-      old = State::fetch_sub_reader_count(state);
+      old = State::fetch_sub_reader_count(state, cpp::MemoryOrder::RELEASE);
       // If there is no pending readers or writers, we are done.
       if (!old.has_last_reader() || !old.has_pending())
         return LockResult::Success;

>From 54d4bfc68d3cfd480a446edd3d642864b222e98c Mon Sep 17 00:00:00 2001
From: Yifan Zhu <yifzhu at nvidia.com>
Date: Sun, 2 Jun 2024 22:52:36 -0700
Subject: [PATCH 17/26] [libc] remove extra new expression

---
 libc/src/pthread/pthread_rwlock_init.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/libc/src/pthread/pthread_rwlock_init.cpp b/libc/src/pthread/pthread_rwlock_init.cpp
index dddc7ec655745..6fd4aa0b6357b 100644
--- a/libc/src/pthread/pthread_rwlock_init.cpp
+++ b/libc/src/pthread/pthread_rwlock_init.cpp
@@ -36,8 +36,6 @@ LLVM_LIBC_FUNCTION(int, pthread_rwlock_init,
   if (attr)
     rwlockattr = *attr;
 
-  ::new (rwlock) RwLock();
-
   // PTHREAD_RWLOCK_PREFER_WRITER_NP is not supported.
   RwLock::Role preference;
   switch (rwlockattr.pref) {

>From dc39e518e2a445be39dd452d54d063177e194550 Mon Sep 17 00:00:00 2001
From: Yifan Zhu <yifzhu at nvidia.com>
Date: Sat, 8 Jun 2024 21:47:30 -0700
Subject: [PATCH 18/26] address CRs

---
 .../llvm-libc-types/pthread_rwlock_t.h        |   8 +-
 libc/spec/posix.td                            |   7 +-
 libc/spec/spec.td                             |   3 +-
 libc/src/__support/macros/attributes.h        |   6 +
 .../__support/threads/linux/CMakeLists.txt    |   1 +
 libc/src/__support/threads/linux/rwlock.h     | 527 ++++++++++--------
 libc/src/pthread/pthread_rwlock_init.cpp      |   6 +-
 7 files changed, 302 insertions(+), 256 deletions(-)

diff --git a/libc/include/llvm-libc-types/pthread_rwlock_t.h b/libc/include/llvm-libc-types/pthread_rwlock_t.h
index 4950547004632..fefa9a2611166 100644
--- a/libc/include/llvm-libc-types/pthread_rwlock_t.h
+++ b/libc/include/llvm-libc-types/pthread_rwlock_t.h
@@ -12,13 +12,13 @@
 #include "llvm-libc-types/__futex_word.h"
 #include "llvm-libc-types/pid_t.h"
 typedef struct {
-  bool __is_pshared;
-  char __preference;
+  unsigned __is_pshared : 1;
+  unsigned __preference : 1;
   int __state;
   pid_t __writier_tid;
   __futex_word __wait_queue_mutex;
-  __futex_word __pending_reader;
-  __futex_word __pending_writer;
+  __futex_word __pending_readers;
+  __futex_word __pending_writers;
   __futex_word __reader_serialization;
   __futex_word __writer_serialization;
 } pthread_rwlock_t;
diff --git a/libc/spec/posix.td b/libc/spec/posix.td
index dfb67b578237f..40b9fa09dd61c 100644
--- a/libc/spec/posix.td
+++ b/libc/spec/posix.td
@@ -113,7 +113,8 @@ def POSIX : StandardSpec<"POSIX"> {
   NamedType PThreadRWLockAttrTType = NamedType<"pthread_rwlockattr_t">;
   PtrType PThreadRWLockAttrTPtr = PtrType<PThreadRWLockAttrTType>;
   ConstType ConstPThreadRWLockAttrTPtr = ConstType<PThreadRWLockAttrTPtr>;
-  ConstType ConstRestrictedPThreadRWLockAttrTPtr = ConstType<RestrictedPtrType<PThreadRWLockAttrTType>>;
+  RestrictedPtrType RestrictedPThreadRWLockAttrTPtr = RestrictedPtrType<PThreadRWLockAttrTType>;
+  ConstType ConstRestrictedPThreadRWLockAttrTPtr = ConstType<RestrictedPThreadRWLockAttrTPtr>;
 
   NamedType PThreadMutexAttrTType = NamedType<"pthread_mutexattr_t">;
   PtrType PThreadMutexAttrTPtr = PtrType<PThreadMutexAttrTType>;
@@ -1283,12 +1284,12 @@ def POSIX : StandardSpec<"POSIX"> {
       FunctionSpec<
         "pthread_rwlock_timedrdlock",
         RetValSpec<IntType>,
-        [ArgSpec<RestrictedPThreadRWLockTPtr>, ArgSpec<ConstRestructTimeSpecPtr>]
+        [ArgSpec<RestrictedPThreadRWLockTPtr>, ArgSpec<ConstRestrictStructTimeSpecPtr>]
       >,
       FunctionSpec<
         "pthread_rwlock_timedwrlock",
         RetValSpec<IntType>,
-        [ArgSpec<RestrictedPThreadRWLockTPtr>, ArgSpec<ConstRestructTimeSpecPtr>]
+        [ArgSpec<RestrictedPThreadRWLockTPtr>, ArgSpec<ConstRestrictStructTimeSpecPtr>]
       >,
       FunctionSpec<
         "pthread_rwlock_rdlock",
diff --git a/libc/spec/spec.td b/libc/spec/spec.td
index b216718774cfb..189247d446670 100644
--- a/libc/spec/spec.td
+++ b/libc/spec/spec.td
@@ -125,7 +125,8 @@ def TimeTType : NamedType<"time_t">;
 def StructTimeSpec : NamedType<"struct timespec">;
 def StructTimeSpecPtr : PtrType<StructTimeSpec>;
 def ConstStructTimeSpecPtr : ConstType<StructTimeSpecPtr>;
-def ConstRestructTimeSpecPtr : ConstType<RestrictedPtrType<StructTimeSpec>>;
+def RestrictStructTimeSpecPtr : RestrictedPtrType<StructTimeSpec>;
+def ConstRestrictStructTimeSpecPtr : ConstType<RestrictStructTimeSpecPtr>;
 
 def BSearchCompareT : NamedType<"__bsearchcompare_t">;
 def QSortCompareT : NamedType<"__qsortcompare_t">;
diff --git a/libc/src/__support/macros/attributes.h b/libc/src/__support/macros/attributes.h
index 8637e165fe3bc..ddab623e5779f 100644
--- a/libc/src/__support/macros/attributes.h
+++ b/libc/src/__support/macros/attributes.h
@@ -30,4 +30,10 @@
 #define LIBC_THREAD_LOCAL thread_local
 #endif
 
+#ifdef __clang__
+#define LIBC_PREFERED_TYPE(TYPE) [[clang::preferred_type(TYPE)]]
+#else
+#define LIBC_PREFERED_TYPE(TYPE)
+#endif
+
 #endif // LLVM_LIBC_SRC___SUPPORT_MACROS_ATTRIBUTES_H
diff --git a/libc/src/__support/threads/linux/CMakeLists.txt b/libc/src/__support/threads/linux/CMakeLists.txt
index 249aca0545e38..aefd5ae006bd1 100644
--- a/libc/src/__support/threads/linux/CMakeLists.txt
+++ b/libc/src/__support/threads/linux/CMakeLists.txt
@@ -54,6 +54,7 @@ add_header_library(
     .raw_mutex
     libc.src.__support.common
     libc.src.__support.OSUtil.osutil
+    libc.src.__support.CPP.limits
   COMPILE_OPTIONS
     -DLIBC_COPT_RWLOCK_DEFAULT_SPIN_COUNT=${LIBC_CONF_RWLOCK_DEFAULT_SPIN_COUNT}
     ${monotonicity_flags}
diff --git a/libc/src/__support/threads/linux/rwlock.h b/libc/src/__support/threads/linux/rwlock.h
index 95ab5f336fb91..2ad64629921a7 100644
--- a/libc/src/__support/threads/linux/rwlock.h
+++ b/libc/src/__support/threads/linux/rwlock.h
@@ -11,6 +11,7 @@
 #include "hdr/errno_macros.h"
 #include "hdr/types/pid_t.h"
 #include "src/__support/CPP/atomic.h"
+#include "src/__support/CPP/limits.h"
 #include "src/__support/CPP/optional.h"
 #include "src/__support/OSUtil/syscall.h"
 #include "src/__support/common.h"
@@ -36,274 +37,298 @@
 #endif
 
 namespace LIBC_NAMESPACE {
-class RwLock {
-public:
-  enum class Role : char { Reader, Writer };
+// Forward declaration of the RwLock class.
+class RwLock;
+// A namespace to rwlock specific utilities.
+namespace rwlock {
+// The role of the thread in the RwLock.
+enum class Role { Reader = 0, Writer = 1 };
+
+// A waiting queue to keep track of the pending readers and writers.
+class WaitingQueue final : private RawMutex {
+  /* FutexWordType raw_mutex;  (from base class) */
+
+  // Pending reader count (protected by the mutex)
+  FutexWordType pending_readers;
+  // Pending writer count (protected by the mutex)
+  FutexWordType pending_writers;
+  // Reader serialization (increases on each reader-waking operation)
+  Futex reader_serialization;
+  // Writer serialization (increases on each writer-waking operation)
+  Futex writer_serialization;
 
-private:
-  class WaitingQueue final : private RawMutex {
-    FutexWordType pending_reader;
-    FutexWordType pending_writer;
-    Futex reader_serialization;
-    Futex writer_serialization;
+public:
+  // RAII guard to lock and unlock the waiting queue.
+  class Guard {
+    WaitingQueue &queue;
 
-  public:
-    class Guard {
-      WaitingQueue &queue;
-
-      LIBC_INLINE constexpr Guard(WaitingQueue &queue) : queue(queue) {}
-
-    public:
-      LIBC_INLINE ~Guard() { queue.unlock(); }
-      template <Role role> LIBC_INLINE FutexWordType &pending_count() {
-        if constexpr (role == Role::Reader)
-          return queue.pending_reader;
-        else
-          return queue.pending_writer;
-      }
-      template <Role role> LIBC_INLINE FutexWordType &serialization() {
-        if constexpr (role == Role::Reader)
-          return queue.reader_serialization.val;
-        else
-          return queue.writer_serialization.val;
-      }
-      friend WaitingQueue;
-    };
+    LIBC_INLINE constexpr Guard(WaitingQueue &queue) : queue(queue) {}
 
   public:
-    LIBC_INLINE constexpr WaitingQueue()
-        : RawMutex(), pending_reader(0), pending_writer(0),
-          reader_serialization(0), writer_serialization(0) {}
-    LIBC_INLINE Guard acquire() {
-      this->lock();
-      return Guard(*this);
-    }
-
-    template <Role role>
-    LIBC_INLINE long wait(FutexWordType expected,
-                          cpp::optional<Futex::Timeout> timeout,
-                          bool is_pshared) {
+    LIBC_INLINE ~Guard() { queue.unlock(); }
+    template <Role role> LIBC_INLINE FutexWordType &pending_count() {
       if constexpr (role == Role::Reader)
-        return reader_serialization.wait(expected, timeout, is_pshared);
+        return queue.pending_readers;
       else
-        return writer_serialization.wait(expected, timeout, is_pshared);
+        return queue.pending_writers;
     }
-
-    template <Role role> LIBC_INLINE long notify(bool is_pshared) {
+    template <Role role> LIBC_INLINE FutexWordType &serialization() {
       if constexpr (role == Role::Reader)
-        return reader_serialization.notify_all(is_pshared);
+        return queue.reader_serialization.val;
       else
-        return writer_serialization.notify_one(is_pshared);
+        return queue.writer_serialization.val;
     }
+    friend WaitingQueue;
   };
 
 public:
-  enum class LockResult : int {
-    Success = 0,
-    TimedOut = ETIMEDOUT,
-    Overflow = EAGAIN,
-    Busy = EBUSY,
-    Deadlock = EDEADLOCK,
-    PermissionDenied = EPERM,
-  };
+  LIBC_INLINE constexpr WaitingQueue()
+      : RawMutex(), pending_readers(0), pending_writers(0),
+        reader_serialization(0), writer_serialization(0) {}
+
+  LIBC_INLINE Guard acquire() {
+    this->lock();
+    return Guard(*this);
+  }
+
+  template <Role role>
+  LIBC_INLINE long wait(FutexWordType expected,
+                        cpp::optional<Futex::Timeout> timeout,
+                        bool is_pshared) {
+    if constexpr (role == Role::Reader)
+      return reader_serialization.wait(expected, timeout, is_pshared);
+    else
+      return writer_serialization.wait(expected, timeout, is_pshared);
+  }
+
+  template <Role role> LIBC_INLINE long notify(bool is_pshared) {
+    if constexpr (role == Role::Reader)
+      return reader_serialization.notify_all(is_pshared);
+    else
+      return writer_serialization.notify_one(is_pshared);
+  }
+};
+
+// The State of the RwLock is stored in an integer word, consisting of the
+// following components:
+// -----------------------------------------------
+// | Range    |           Description            |
+// ===============================================
+// | 0        | Pending Reader Bit               |
+// -----------------------------------------------
+// | 1        | Pending Writer Bit               |
+// -----------------------------------------------
+// | [2, MSB) | Active Reader Count              |
+// -----------------------------------------------
+// | MSB      | Active Writer Bit                |
+// -----------------------------------------------
+class State {
+
+  // Shift amounts to access the components of the state.
+  LIBC_INLINE_VAR static constexpr int PENDING_READER_SHIFT = 0;
+  LIBC_INLINE_VAR static constexpr int PENDING_WRITER_SHIFT = 1;
+  LIBC_INLINE_VAR static constexpr int ACTIVE_READER_SHIFT = 2;
+  LIBC_INLINE_VAR static constexpr int ACTIVE_WRITER_SHIFT =
+      cpp::numeric_limits<int>::digits - 1;
+
+  // Bitmasks to access the components of the state.
+  LIBC_INLINE_VAR static constexpr int PENDING_READER_BIT =
+      1 << PENDING_READER_SHIFT;
+  LIBC_INLINE_VAR static constexpr int PENDING_WRITER_BIT =
+      1 << PENDING_WRITER_SHIFT;
+  LIBC_INLINE_VAR static constexpr int ACTIVE_READER_COUNT_UNIT =
+      1 << ACTIVE_READER_SHIFT;
+  LIBC_INLINE_VAR static constexpr int ACTIVE_WRITER_BIT =
+      1 << ACTIVE_WRITER_SHIFT;
+  LIBC_INLINE_VAR static constexpr int PENDING_MASK =
+      PENDING_READER_BIT | PENDING_WRITER_BIT;
 
 private:
-  // The State of the RwLock is stored in a 32-bit word, consisting of the
-  // following components:
-  // -----------------------------------------------
-  // | Range |           Description               |
-  // ===============================================
-  // | 0     | Pending Reader Bit                  |
-  // -----------------------------------------------
-  // | 1     | Pending Writer Bit                  |
-  // -----------------------------------------------
-  // | 2-30  | Active Reader Count                 |
-  // -----------------------------------------------
-  // | 31    | Active Writer Bit                   |
-  // -----------------------------------------------
-  class State {
-    // We use the signed interger as the state type. It is easier
-    // to handle state trasitions and detections using signed integers.
-    using Type = int32_t;
-
-    // Shift amounts to access the components of the state.
-    LIBC_INLINE_VAR static constexpr Type PENDING_READER_SHIFT = 0;
-    LIBC_INLINE_VAR static constexpr Type PENDING_WRITER_SHIFT = 1;
-    LIBC_INLINE_VAR static constexpr Type ACTIVE_READER_SHIFT = 2;
-    LIBC_INLINE_VAR static constexpr Type ACTIVE_WRITER_SHIFT = 31;
-
-    // Bitmasks to access the components of the state.
-    LIBC_INLINE_VAR static constexpr Type PENDING_READER_BIT =
-        1 << PENDING_READER_SHIFT;
-    LIBC_INLINE_VAR static constexpr Type PENDING_WRITER_BIT =
-        1 << PENDING_WRITER_SHIFT;
-    LIBC_INLINE_VAR static constexpr Type ACTIVE_READER_COUNT_UNIT =
-        1 << ACTIVE_READER_SHIFT;
-    LIBC_INLINE_VAR static constexpr Type ACTIVE_WRITER_BIT =
-        1 << ACTIVE_WRITER_SHIFT;
-    LIBC_INLINE_VAR static constexpr Type PENDING_MASK =
-        PENDING_READER_BIT | PENDING_WRITER_BIT;
-
-  private:
-    Type state;
+  // We use the signed integer as the state type. It is easier
+  // to reason about the state transitions using signness.
+  int state;
 
-  public:
-    // Construction and conversion functions.
-    LIBC_INLINE constexpr State(Type state = 0) : state(state) {}
-    LIBC_INLINE constexpr operator Type() const { return state; }
-
-    // Utilities to check the state of the RwLock.
-    LIBC_INLINE constexpr bool has_active_writer() const { return state < 0; }
-    LIBC_INLINE constexpr bool has_active_reader() const {
-      return state > ACTIVE_READER_COUNT_UNIT;
-    }
-    LIBC_INLINE constexpr bool has_acitve_owner() const {
-      return has_active_reader() || has_active_writer();
-    }
-    LIBC_INLINE constexpr bool has_last_reader() const {
-      return (state >> ACTIVE_READER_SHIFT) == 1;
-    }
-    LIBC_INLINE constexpr bool has_pending_writer() const {
-      return state & PENDING_WRITER_BIT;
-    }
-    LIBC_INLINE constexpr bool has_pending() const {
-      return state & PENDING_MASK;
-    }
-    LIBC_INLINE constexpr State set_writer_bit() const {
-      return State(state | ACTIVE_WRITER_BIT);
-    }
+public:
+  // Construction and conversion functions.
+  LIBC_INLINE constexpr State(int state = 0) : state(state) {}
+  LIBC_INLINE constexpr operator int() const { return state; }
+
+  // Utilities to check the state of the RwLock.
+  LIBC_INLINE constexpr bool has_active_writer() const { return state < 0; }
+  LIBC_INLINE constexpr bool has_active_reader() const {
+    return state > ACTIVE_READER_COUNT_UNIT;
+  }
+  LIBC_INLINE constexpr bool has_acitve_owner() const {
+    return has_active_reader() || has_active_writer();
+  }
+  LIBC_INLINE constexpr bool has_last_reader() const {
+    return (state >> ACTIVE_READER_SHIFT) == 1;
+  }
+  LIBC_INLINE constexpr bool has_pending_writer() const {
+    return state & PENDING_WRITER_BIT;
+  }
+  LIBC_INLINE constexpr bool has_pending() const {
+    return state & PENDING_MASK;
+  }
 
-    // The preference parameter changes the behavior of the lock acquisition
-    // if there are both readers and writers waiting for the lock. If writers
-    // are preferred, reader acquisition will be blocked until all pending
-    // writers are served.
-    template <Role role> LIBC_INLINE bool can_acquire(Role preference) const {
-      if constexpr (role == Role::Reader) {
-        switch (preference) {
-        case Role::Reader:
-          return !has_active_writer();
-        case Role::Writer:
-          return !has_active_writer() && !has_pending_writer();
-        }
-      } else
-        return !has_acitve_owner();
-    }
+  LIBC_INLINE constexpr State set_writer_bit() const {
+    return State(state | ACTIVE_WRITER_BIT);
+  }
 
-    // This function check if it is possible to grow the reader count without
-    // overflowing the state.
-    LIBC_INLINE cpp::optional<State> try_increase_reader_count() const {
-      LIBC_ASSERT(!has_active_writer() &&
-                  "try_increase_reader_count shall only be called when there "
-                  "is no active writer.");
-      State res;
-      if (LIBC_UNLIKELY(__builtin_sadd_overflow(state, ACTIVE_READER_COUNT_UNIT,
-                                                &res.state)))
-        return cpp::nullopt;
-      return res;
-    }
+  // The preference parameter changes the behavior of the lock acquisition
+  // if there are both readers and writers waiting for the lock. If writers
+  // are preferred, reader acquisition will be blocked until all pending
+  // writers are served.
+  template <Role role> LIBC_INLINE bool can_acquire(Role preference) const {
+    if constexpr (role == Role::Reader) {
+      switch (preference) {
+      case Role::Reader:
+        return !has_active_writer();
+      case Role::Writer:
+        return !has_active_writer() && !has_pending_writer();
+      }
+    } else
+      return !has_acitve_owner();
+  }
 
-    // Utilities to do atomic operations on the state.
-    LIBC_INLINE static State
-    fetch_sub_reader_count(cpp::Atomic<Type> &target,
-                           cpp::MemoryOrder order = cpp::MemoryOrder::SEQ_CST) {
-      return State(target.fetch_sub(ACTIVE_READER_COUNT_UNIT, order));
-    }
+  // This function check if it is possible to grow the reader count without
+  // overflowing the state.
+  LIBC_INLINE cpp::optional<State> try_increase_reader_count() const {
+    LIBC_ASSERT(!has_active_writer() &&
+                "try_increase_reader_count shall only be called when there "
+                "is no active writer.");
+    State res;
+    if (LIBC_UNLIKELY(__builtin_sadd_overflow(state, ACTIVE_READER_COUNT_UNIT,
+                                              &res.state)))
+      return cpp::nullopt;
+    return res;
+  }
 
-    LIBC_INLINE static State
-    load(cpp::Atomic<Type> &target,
-         cpp::MemoryOrder order = cpp::MemoryOrder::SEQ_CST) {
-      return State(target.load(order));
-    }
+  // Utilities to do atomic operations on the state.
+  LIBC_INLINE static State
+  fetch_sub_reader_count(cpp::Atomic<int> &target,
+                         cpp::MemoryOrder order = cpp::MemoryOrder::SEQ_CST) {
+    return State(target.fetch_sub(ACTIVE_READER_COUNT_UNIT, order));
+  }
+
+  LIBC_INLINE static State
+  load(cpp::Atomic<int> &target,
+       cpp::MemoryOrder order = cpp::MemoryOrder::SEQ_CST) {
+    return State(target.load(order));
+  }
 
-    template <Role role>
-    LIBC_INLINE static State
-    fetch_set_pending_bit(cpp::Atomic<Type> &target,
+  template <Role role>
+  LIBC_INLINE static State
+  fetch_set_pending_bit(cpp::Atomic<int> &target,
+                        cpp::MemoryOrder order = cpp::MemoryOrder::SEQ_CST) {
+    if constexpr (role == Role::Reader)
+      return State(target.fetch_or(PENDING_READER_BIT, order));
+    else
+      return State(target.fetch_or(PENDING_WRITER_BIT, order));
+  }
+  template <Role role>
+  LIBC_INLINE static State
+  fetch_clear_pending_bit(cpp::Atomic<int> &target,
                           cpp::MemoryOrder order = cpp::MemoryOrder::SEQ_CST) {
-      if constexpr (role == Role::Reader)
-        return State(target.fetch_or(PENDING_READER_BIT, order));
-      else
-        return State(target.fetch_or(PENDING_WRITER_BIT, order));
-    }
-    template <Role role>
-    LIBC_INLINE static State fetch_clear_pending_bit(
-        cpp::Atomic<Type> &target,
-        cpp::MemoryOrder order = cpp::MemoryOrder::SEQ_CST) {
-      if constexpr (role == Role::Reader)
-        return State(target.fetch_and(~PENDING_READER_BIT, order));
-      else
-        return State(target.fetch_and(~PENDING_WRITER_BIT, order));
-    }
-    LIBC_INLINE static State fetch_set_active_writer(
-        cpp::Atomic<Type> &target,
-        cpp::MemoryOrder order = cpp::MemoryOrder::SEQ_CST) {
-      return State(target.fetch_or(ACTIVE_WRITER_BIT, order));
-    }
-    LIBC_INLINE static State fetch_clear_active_writer(
-        cpp::Atomic<Type> &target,
-        cpp::MemoryOrder order = cpp::MemoryOrder::SEQ_CST) {
-      return State(target.fetch_and(~ACTIVE_WRITER_BIT, order));
-    }
+    if constexpr (role == Role::Reader)
+      return State(target.fetch_and(~PENDING_READER_BIT, order));
+    else
+      return State(target.fetch_and(~PENDING_WRITER_BIT, order));
+  }
+  LIBC_INLINE static State
+  fetch_set_active_writer(cpp::Atomic<int> &target,
+                          cpp::MemoryOrder order = cpp::MemoryOrder::SEQ_CST) {
+    return State(target.fetch_or(ACTIVE_WRITER_BIT, order));
+  }
+  LIBC_INLINE static State fetch_clear_active_writer(
+      cpp::Atomic<int> &target,
+      cpp::MemoryOrder order = cpp::MemoryOrder::SEQ_CST) {
+    return State(target.fetch_and(~ACTIVE_WRITER_BIT, order));
+  }
 
-    LIBC_INLINE bool
-    compare_exchange_weak_with(cpp::Atomic<Type> &target, State desired,
-                               cpp::MemoryOrder success_order,
-                               cpp::MemoryOrder failure_order) {
-      return target.compare_exchange_weak(state, desired, success_order,
-                                          failure_order);
-    }
+  LIBC_INLINE bool compare_exchange_weak_with(cpp::Atomic<int> &target,
+                                              State desired,
+                                              cpp::MemoryOrder success_order,
+                                              cpp::MemoryOrder failure_order) {
+    return target.compare_exchange_weak(state, desired, success_order,
+                                        failure_order);
+  }
 
-    // Utilities to spin and reload the state.
-  private:
-    template <class F>
-    LIBC_INLINE static State spin_reload_until(cpp::Atomic<Type> &target,
-                                               F &&func, unsigned spin_count) {
-      for (;;) {
-        auto state = State::load(target, cpp::MemoryOrder::RELAXED);
-        if (func(state) || spin_count == 0)
-          return state;
-        sleep_briefly();
-        spin_count--;
-      }
+  // Utilities to spin and reload the state.
+private:
+  template <class F>
+  LIBC_INLINE static State spin_reload_until(cpp::Atomic<int> &target, F &&func,
+                                             unsigned spin_count) {
+    for (;;) {
+      auto state = State::load(target, cpp::MemoryOrder::RELAXED);
+      if (func(state) || spin_count == 0)
+        return state;
+      sleep_briefly();
+      spin_count--;
     }
+  }
 
-  public:
-    template <Role role>
-    LIBC_INLINE static State spin_reload(cpp::Atomic<Type> &target,
-                                         Role preference, unsigned spin_count) {
-      if constexpr (role == Role::Reader) {
-        // Return the reader state if either the lock is available or there is
-        // any
-        // ongoing contention.
-        return spin_reload_until(
-            target,
-            [=](State state) {
-              return state.can_acquire<Role::Reader>(preference) ||
-                     state.has_pending();
-            },
-            spin_count);
-      } else {
-        // Return the writer state if either the lock is available or there is
-        // any
-        // contention *between writers*. Since writers can be way less than
-        // readers, we allow them to spin more to improve the fairness.
-        return spin_reload_until(
-            target,
-            [=](State state) {
-              return state.can_acquire<Role::Writer>(preference) ||
-                     state.has_pending_writer();
-            },
-            spin_count);
-      }
+public:
+  template <Role role>
+  LIBC_INLINE static State spin_reload(cpp::Atomic<int> &target,
+                                       Role preference, unsigned spin_count) {
+    if constexpr (role == Role::Reader) {
+      // Return the reader state if either the lock is available or there is
+      // any
+      // ongoing contention.
+      return spin_reload_until(
+          target,
+          [=](State state) {
+            return state.can_acquire<Role::Reader>(preference) ||
+                   state.has_pending();
+          },
+          spin_count);
+    } else {
+      // Return the writer state if either the lock is available or there is
+      // any
+      // contention *between writers*. Since writers can be way less than
+      // readers, we allow them to spin more to improve the fairness.
+      return spin_reload_until(
+          target,
+          [=](State state) {
+            return state.can_acquire<Role::Writer>(preference) ||
+                   state.has_pending_writer();
+          },
+          spin_count);
     }
+  }
+};
+} // namespace rwlock
+
+class RwLock {
+  using State = rwlock::State;
+  using Role = rwlock::Role;
+  using WaitingQueue = rwlock::WaitingQueue;
+
+public:
+  // Return types for the lock functions.
+  // All the locking routines returning this type are marked as [[nodiscard]]
+  // because it is a common error to assume the lock success without checking
+  // the return value, which can lead to undefined behaviors or other subtle
+  // bugs that are hard to reason about.
+  enum class LockResult : int {
+    Success = 0,
+    TimedOut = ETIMEDOUT,
+    Overflow = EAGAIN, /* EAGAIN is specified in the standard for overflow. */
+    Busy = EBUSY,
+    Deadlock = EDEADLOCK,
+    PermissionDenied = EPERM,
   };
 
 private:
   // Whether the RwLock is shared between processes.
-  bool is_pshared;
+  LIBC_PREFERED_TYPE(bool)
+  unsigned is_pshared : 1;
   // Reader/Writer preference.
-  Role preference;
+  LIBC_PREFERED_TYPE(Role)
+  unsigned preference : 1;
   // State to keep track of the RwLock.
-  cpp::Atomic<int32_t> state;
+  cpp::Atomic<int> state;
   // writer_tid is used to keep track of the thread id of the writer. Notice
   // that TLS address is not a good idea here since it may remains the same
   // across forked processes.
@@ -312,12 +337,16 @@ class RwLock {
   WaitingQueue queue;
 
 private:
+  // Load the bitfield preference.
+  LIBC_INLINE Role get_preference() const {
+    return static_cast<Role>(preference);
+  }
   // TODO: use cached thread id once implemented.
   LIBC_INLINE static pid_t gettid() { return syscall_impl<pid_t>(SYS_gettid); }
 
   template <Role role> LIBC_INLINE LockResult try_lock(State &old) {
     if constexpr (role == Role::Reader) {
-      while (LIBC_LIKELY(old.can_acquire<Role::Reader>(preference))) {
+      while (LIBC_LIKELY(old.can_acquire<Role::Reader>(get_preference()))) {
         cpp::optional<State> next = old.try_increase_reader_count();
         if (!next)
           return LockResult::Overflow;
@@ -331,7 +360,7 @@ class RwLock {
       return LockResult::Busy;
     } else {
       // This while loop should terminate quickly
-      while (LIBC_LIKELY(old.can_acquire<Role::Writer>(preference))) {
+      while (LIBC_LIKELY(old.can_acquire<Role::Writer>(get_preference()))) {
         if (LIBC_LIKELY(old.compare_exchange_weak_with(
                 state, old.set_writer_bit(), cpp::MemoryOrder::ACQUIRE,
                 cpp::MemoryOrder::RELAXED))) {
@@ -348,13 +377,15 @@ class RwLock {
 public:
   LIBC_INLINE constexpr RwLock(Role preference = Role::Reader,
                                bool is_pshared = false)
-      : is_pshared(is_pshared), preference(preference), state(0), writer_tid(0),
-        queue() {}
+      : is_pshared(is_pshared), preference(static_cast<unsigned>(preference)),
+        state(0), writer_tid(0), queue() {}
 
+  [[nodiscard]]
   LIBC_INLINE LockResult try_read_lock() {
     State old = State::load(state, cpp::MemoryOrder::RELAXED);
     return try_lock<Role::Reader>(old);
   }
+  [[nodiscard]]
   LIBC_INLINE LockResult try_write_lock() {
     State old = State::load(state, cpp::MemoryOrder::RELAXED);
     return try_lock<Role::Writer>(old);
@@ -378,7 +409,7 @@ class RwLock {
 
     // Phase 3: spin to get the initial state. We ignore the timing due to
     // spin since it should end quickly.
-    State old = State::spin_reload<role>(state, preference, spin_count);
+    State old = State::spin_reload<role>(state, get_preference(), spin_count);
 
     // Enter the main acquisition loop.
     for (;;) {
@@ -411,7 +442,7 @@ class RwLock {
       // Phase 6: do futex wait until the lock is available or timeout is
       // reached.
       bool timeout_flag = false;
-      if (!old.can_acquire<role>(preference)) {
+      if (!old.can_acquire<role>(get_preference())) {
         timeout_flag = (queue.wait<role>(serial_number, timeout, is_pshared) ==
                         -ETIMEDOUT);
 
@@ -434,12 +465,13 @@ class RwLock {
           return LockResult::TimedOut;
 
         // Phase 9: reload the state and retry the acquisition.
-        old = State::spin_reload<role>(state, preference, spin_count);
+        old = State::spin_reload<role>(state, get_preference(), spin_count);
       }
     }
   }
 
 public:
+  [[nodiscard]]
   LIBC_INLINE LockResult
   read_lock(cpp::optional<Futex::Timeout> timeout = cpp::nullopt,
             unsigned spin_count = LIBC_COPT_RWLOCK_DEFAULT_SPIN_COUNT) {
@@ -448,6 +480,7 @@ class RwLock {
       return result;
     return lock_slow<Role::Reader>(timeout, spin_count);
   }
+  [[nodiscard]]
   LIBC_INLINE LockResult
   write_lock(cpp::optional<Futex::Timeout> timeout = cpp::nullopt,
              unsigned spin_count = LIBC_COPT_RWLOCK_DEFAULT_SPIN_COUNT) {
@@ -458,9 +491,9 @@ class RwLock {
   }
 
 private:
-  // Compiler somehow decides that this function may be inlined, which leads to
-  // a larger unlock function that is infeasible to be inlined. Since
-  // notifcation routine is colder we mark it as noinline explicitly.
+  // Compiler (clang 19.0) somehow decides that this function may be inlined,
+  // which leads to a larger unlock function that is infeasible to be inlined.
+  // Since notifcation routine is colder we mark it as noinline explicitly.
   [[gnu::noinline]]
   LIBC_INLINE void notify_pending_threads() {
     enum class WakeTarget { Readers, Writers, None };
@@ -485,6 +518,7 @@ class RwLock {
   }
 
 public:
+  [[nodiscard]]
   LIBC_INLINE LockResult unlock() {
     State old = State::load(state, cpp::MemoryOrder::RELAXED);
     if (old.has_active_writer()) {
@@ -513,6 +547,9 @@ class RwLock {
     return LockResult::Success;
   }
 
+  // We do not allocate any special resources for the RwLock, so this function
+  // will only check if the lock is currently held by any thread.
+  [[nodiscard]]
   LIBC_INLINE LockResult check_for_destroy() {
     State old = State::load(state, cpp::MemoryOrder::RELAXED);
     if (old.has_acitve_owner())
diff --git a/libc/src/pthread/pthread_rwlock_init.cpp b/libc/src/pthread/pthread_rwlock_init.cpp
index 6fd4aa0b6357b..b04316ff402f7 100644
--- a/libc/src/pthread/pthread_rwlock_init.cpp
+++ b/libc/src/pthread/pthread_rwlock_init.cpp
@@ -37,13 +37,13 @@ LLVM_LIBC_FUNCTION(int, pthread_rwlock_init,
     rwlockattr = *attr;
 
   // PTHREAD_RWLOCK_PREFER_WRITER_NP is not supported.
-  RwLock::Role preference;
+  rwlock::Role preference;
   switch (rwlockattr.pref) {
   case PTHREAD_RWLOCK_PREFER_READER_NP:
-    preference = RwLock::Role::Reader;
+    preference = rwlock::Role::Reader;
     break;
   case PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP:
-    preference = RwLock::Role::Writer;
+    preference = rwlock::Role::Writer;
     break;
   default:
     return EINVAL;

>From 92d930fb375087e97727a5373032e2b7eb07b909 Mon Sep 17 00:00:00 2001
From: Yifan Zhu <yifzhu at nvidia.com>
Date: Sat, 8 Jun 2024 22:32:07 -0700
Subject: [PATCH 19/26] test and some fix (WIP)

---
 libc/src/__support/threads/linux/rwlock.h     |  4 +-
 .../integration/src/pthread/CMakeLists.txt    | 25 ++++++++++
 .../src/pthread/pthread_rwlock_test.cpp       | 48 +++++++++++++++++++
 3 files changed, 75 insertions(+), 2 deletions(-)
 create mode 100644 libc/test/integration/src/pthread/pthread_rwlock_test.cpp

diff --git a/libc/src/__support/threads/linux/rwlock.h b/libc/src/__support/threads/linux/rwlock.h
index 2ad64629921a7..63239b9b11052 100644
--- a/libc/src/__support/threads/linux/rwlock.h
+++ b/libc/src/__support/threads/linux/rwlock.h
@@ -129,7 +129,7 @@ class State {
   LIBC_INLINE_VAR static constexpr int PENDING_WRITER_SHIFT = 1;
   LIBC_INLINE_VAR static constexpr int ACTIVE_READER_SHIFT = 2;
   LIBC_INLINE_VAR static constexpr int ACTIVE_WRITER_SHIFT =
-      cpp::numeric_limits<int>::digits - 1;
+      cpp::numeric_limits<int>::digits;
 
   // Bitmasks to access the components of the state.
   LIBC_INLINE_VAR static constexpr int PENDING_READER_BIT =
@@ -156,7 +156,7 @@ class State {
   // Utilities to check the state of the RwLock.
   LIBC_INLINE constexpr bool has_active_writer() const { return state < 0; }
   LIBC_INLINE constexpr bool has_active_reader() const {
-    return state > ACTIVE_READER_COUNT_UNIT;
+    return state >= ACTIVE_READER_COUNT_UNIT;
   }
   LIBC_INLINE constexpr bool has_acitve_owner() const {
     return has_active_reader() || has_active_writer();
diff --git a/libc/test/integration/src/pthread/CMakeLists.txt b/libc/test/integration/src/pthread/CMakeLists.txt
index a10dc256200d9..b83a7ecf98961 100644
--- a/libc/test/integration/src/pthread/CMakeLists.txt
+++ b/libc/test/integration/src/pthread/CMakeLists.txt
@@ -17,6 +17,31 @@ add_integration_test(
     libc.src.pthread.pthread_join
 )
 
+add_integration_test(
+  pthread_rwlock_test
+  SUITE
+    libc-pthread-integration-tests
+  SRCS
+    pthread_rwlock_test.cpp
+  DEPENDS
+    libc.include.pthread
+    libc.include.time
+    libc.include.errno
+    libc.src.pthread.pthread_rwlock_destroy
+    libc.src.pthread.pthread_rwlock_init
+    libc.src.pthread.pthread_rwlock_rdlock
+    libc.src.pthread.pthread_rwlock_tryrdlock
+    libc.src.pthread.pthread_rwlock_timedrdlock
+    libc.src.pthread.pthread_rwlock_wrlock
+    libc.src.pthread.pthread_rwlock_trywrlock
+    libc.src.pthread.pthread_rwlock_timedwrlock
+    libc.src.pthread.pthread_rwlock_unlock
+    libc.src.pthread.pthread_create
+    libc.src.pthread.pthread_join
+    libc.src.time.clock_gettime
+    libc.src.unistd.fork
+)
+
 add_integration_test(
   pthread_test
   SUITE
diff --git a/libc/test/integration/src/pthread/pthread_rwlock_test.cpp b/libc/test/integration/src/pthread/pthread_rwlock_test.cpp
new file mode 100644
index 0000000000000..8e716367144de
--- /dev/null
+++ b/libc/test/integration/src/pthread/pthread_rwlock_test.cpp
@@ -0,0 +1,48 @@
+//===-- Tests for pthread_rwlock ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/pthread/pthread_rwlock_destroy.h"
+#include "src/pthread/pthread_rwlock_init.h"
+#include "src/pthread/pthread_rwlock_rdlock.h"
+#include "src/pthread/pthread_rwlock_timedrdlock.h"
+#include "src/pthread/pthread_rwlock_timedwrlock.h"
+#include "src/pthread/pthread_rwlock_tryrdlock.h"
+#include "src/pthread/pthread_rwlock_trywrlock.h"
+#include "src/pthread/pthread_rwlock_unlock.h"
+#include "src/pthread/pthread_rwlock_wrlock.h"
+
+#include "src/pthread/pthread_create.h"
+#include "src/pthread/pthread_join.h"
+
+#include "test/IntegrationTest/test.h"
+
+#include <errno.h>
+#include <pthread.h>
+#include <stdint.h> // uintptr_t
+
+static void smoke_test() {
+  pthread_rwlock_t rwlock = PTHREAD_RWLOCK_INITIALIZER;
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_init(&rwlock, nullptr), 0);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_rdlock(&rwlock), 0);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_tryrdlock(&rwlock), 0);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_trywrlock(&rwlock), EBUSY);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_unlock(&rwlock), 0);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_unlock(&rwlock), 0);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_wrlock(&rwlock), 0);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_rdlock(&rwlock), EDEADLK);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_wrlock(&rwlock), EDEADLK);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_tryrdlock(&rwlock), EBUSY);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_trywrlock(&rwlock), EBUSY);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_unlock(&rwlock), 0);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_destroy(&rwlock), 0);
+}
+
+TEST_MAIN() {
+  smoke_test();
+  return 0;
+}

>From 00ad32931aa1c52a8092d69900482954c98243aa Mon Sep 17 00:00:00 2001
From: Yifan Zhu <yifzhu at nvidia.com>
Date: Sun, 9 Jun 2024 00:17:15 -0700
Subject: [PATCH 20/26] add many tests

---
 libc/src/pthread/pthread_rwlock_destroy.cpp   |   2 +
 libc/src/pthread/pthread_rwlock_init.cpp      |   3 +
 libc/src/pthread/pthread_rwlock_rdlock.cpp    |   2 +
 .../pthread/pthread_rwlock_timedrdlock.cpp    |   2 +
 .../pthread/pthread_rwlock_timedwrlock.cpp    |   2 +
 libc/src/pthread/pthread_rwlock_tryrdlock.cpp |   2 +
 libc/src/pthread/pthread_rwlock_trywrlock.cpp |   2 +
 libc/src/pthread/pthread_rwlock_unlock.cpp    |   2 +
 libc/src/pthread/pthread_rwlock_wrlock.cpp    |   2 +
 .../integration/src/pthread/CMakeLists.txt    |  11 +
 .../src/pthread/pthread_rwlock_test.cpp       | 389 +++++++++++++++++-
 11 files changed, 413 insertions(+), 6 deletions(-)

diff --git a/libc/src/pthread/pthread_rwlock_destroy.cpp b/libc/src/pthread/pthread_rwlock_destroy.cpp
index 4f97ba3b0b5c3..d82bb376bda2c 100644
--- a/libc/src/pthread/pthread_rwlock_destroy.cpp
+++ b/libc/src/pthread/pthread_rwlock_destroy.cpp
@@ -17,6 +17,8 @@
 namespace LIBC_NAMESPACE {
 
 LLVM_LIBC_FUNCTION(int, pthread_rwlock_destroy, (pthread_rwlock_t * rwlock)) {
+  if (!rwlock)
+    return EINVAL;
   auto *rw = reinterpret_cast<RwLock *>(rwlock);
   RwLock::LockResult res = rw->check_for_destroy();
 
diff --git a/libc/src/pthread/pthread_rwlock_init.cpp b/libc/src/pthread/pthread_rwlock_init.cpp
index b04316ff402f7..81815e8b4296d 100644
--- a/libc/src/pthread/pthread_rwlock_init.cpp
+++ b/libc/src/pthread/pthread_rwlock_init.cpp
@@ -9,6 +9,7 @@
 #include "src/pthread/pthread_rwlock_init.h"
 
 #include "src/__support/common.h"
+#include "src/__support/libc_assert.h"
 #include "src/__support/threads/linux/rwlock.h"
 
 #include <errno.h>
@@ -33,6 +34,8 @@ LLVM_LIBC_FUNCTION(int, pthread_rwlock_init,
       /*pshared=*/PTHREAD_PROCESS_PRIVATE,
       /*pref*/ PTHREAD_RWLOCK_PREFER_READER_NP,
   };
+  // POSIX does not specify this check, so we add an assertion to catch it.
+  LIBC_ASSERT(rwlock && "rwlock is null");
   if (attr)
     rwlockattr = *attr;
 
diff --git a/libc/src/pthread/pthread_rwlock_rdlock.cpp b/libc/src/pthread/pthread_rwlock_rdlock.cpp
index cb7bc439c1b1e..e9aee5da4e7e4 100644
--- a/libc/src/pthread/pthread_rwlock_rdlock.cpp
+++ b/libc/src/pthread/pthread_rwlock_rdlock.cpp
@@ -23,6 +23,8 @@ static_assert(
     "as the internal rwlock type.");
 
 LLVM_LIBC_FUNCTION(int, pthread_rwlock_rdlock, (pthread_rwlock_t * rwlock)) {
+  if (!rwlock)
+    return EINVAL;
   RwLock *rw = reinterpret_cast<RwLock *>(rwlock);
   return static_cast<int>(rw->read_lock());
 }
diff --git a/libc/src/pthread/pthread_rwlock_timedrdlock.cpp b/libc/src/pthread/pthread_rwlock_timedrdlock.cpp
index 0c56663c754aa..2e57732eaca37 100644
--- a/libc/src/pthread/pthread_rwlock_timedrdlock.cpp
+++ b/libc/src/pthread/pthread_rwlock_timedrdlock.cpp
@@ -28,6 +28,8 @@ static_assert(
 LLVM_LIBC_FUNCTION(int, pthread_rwlock_timedrdlock,
                    (pthread_rwlock_t * rwlock,
                     const struct timespec *abstime)) {
+  if (!rwlock)
+    return EINVAL;
   RwLock *rw = reinterpret_cast<RwLock *>(rwlock);
   LIBC_ASSERT(abstime && "timedrdlock called with a null timeout");
   auto timeout =
diff --git a/libc/src/pthread/pthread_rwlock_timedwrlock.cpp b/libc/src/pthread/pthread_rwlock_timedwrlock.cpp
index 9c86dcddd4b4f..55fc669de27a9 100644
--- a/libc/src/pthread/pthread_rwlock_timedwrlock.cpp
+++ b/libc/src/pthread/pthread_rwlock_timedwrlock.cpp
@@ -22,6 +22,8 @@ namespace LIBC_NAMESPACE {
 LLVM_LIBC_FUNCTION(int, pthread_rwlock_timedwrlock,
                    (pthread_rwlock_t *__restrict rwlock,
                     const struct timespec *__restrict abstime)) {
+  if (!rwlock)
+    return EINVAL;
   RwLock *rw = reinterpret_cast<RwLock *>(rwlock);
   LIBC_ASSERT(abstime && "timedwrlock called with a null timeout");
   auto timeout =
diff --git a/libc/src/pthread/pthread_rwlock_tryrdlock.cpp b/libc/src/pthread/pthread_rwlock_tryrdlock.cpp
index a2101d2f4714f..9dc1bf09bc830 100644
--- a/libc/src/pthread/pthread_rwlock_tryrdlock.cpp
+++ b/libc/src/pthread/pthread_rwlock_tryrdlock.cpp
@@ -23,6 +23,8 @@ static_assert(
     "as the internal rwlock type.");
 
 LLVM_LIBC_FUNCTION(int, pthread_rwlock_tryrdlock, (pthread_rwlock_t * rwlock)) {
+  if (!rwlock)
+    return EINVAL;
   RwLock *rw = reinterpret_cast<RwLock *>(rwlock);
   return static_cast<int>(rw->try_read_lock());
 }
diff --git a/libc/src/pthread/pthread_rwlock_trywrlock.cpp b/libc/src/pthread/pthread_rwlock_trywrlock.cpp
index cfd44ac60fcbd..e4ace3cb350af 100644
--- a/libc/src/pthread/pthread_rwlock_trywrlock.cpp
+++ b/libc/src/pthread/pthread_rwlock_trywrlock.cpp
@@ -23,6 +23,8 @@ static_assert(
     "as the internal rwlock type.");
 
 LLVM_LIBC_FUNCTION(int, pthread_rwlock_trywrlock, (pthread_rwlock_t * rwlock)) {
+  if (!rwlock)
+    return EINVAL;
   RwLock *rw = reinterpret_cast<RwLock *>(rwlock);
   return static_cast<int>(rw->try_write_lock());
 }
diff --git a/libc/src/pthread/pthread_rwlock_unlock.cpp b/libc/src/pthread/pthread_rwlock_unlock.cpp
index 67f32a3a6a8b3..21cedf42a8d50 100644
--- a/libc/src/pthread/pthread_rwlock_unlock.cpp
+++ b/libc/src/pthread/pthread_rwlock_unlock.cpp
@@ -17,6 +17,8 @@
 namespace LIBC_NAMESPACE {
 
 LLVM_LIBC_FUNCTION(int, pthread_rwlock_unlock, (pthread_rwlock_t * rwlock)) {
+  if (!rwlock)
+    return EINVAL;
   auto *rw = reinterpret_cast<RwLock *>(rwlock);
   return static_cast<int>(rw->unlock());
 }
diff --git a/libc/src/pthread/pthread_rwlock_wrlock.cpp b/libc/src/pthread/pthread_rwlock_wrlock.cpp
index 276a08245b6ac..5d3868a58f4e9 100644
--- a/libc/src/pthread/pthread_rwlock_wrlock.cpp
+++ b/libc/src/pthread/pthread_rwlock_wrlock.cpp
@@ -23,6 +23,8 @@ static_assert(
     "as the internal rwlock type.");
 
 LLVM_LIBC_FUNCTION(int, pthread_rwlock_wrlock, (pthread_rwlock_t * rwlock)) {
+  if (!rwlock)
+    return EINVAL;
   RwLock *rw = reinterpret_cast<RwLock *>(rwlock);
   return static_cast<int>(rw->write_lock());
 }
diff --git a/libc/test/integration/src/pthread/CMakeLists.txt b/libc/test/integration/src/pthread/CMakeLists.txt
index b83a7ecf98961..fa9c38d6c76c9 100644
--- a/libc/test/integration/src/pthread/CMakeLists.txt
+++ b/libc/test/integration/src/pthread/CMakeLists.txt
@@ -38,8 +38,19 @@ add_integration_test(
     libc.src.pthread.pthread_rwlock_unlock
     libc.src.pthread.pthread_create
     libc.src.pthread.pthread_join
+    libc.src.pthread.pthread_rwlockattr_init
+    libc.src.pthread.pthread_rwlockattr_destroy
+    libc.src.pthread.pthread_rwlockattr_setpshared
+    libc.src.pthread.pthread_rwlockattr_setkind_np
+    libc.src.sys.mman.mmap
+    libc.src.sys.mman.munmap
     libc.src.time.clock_gettime
+    libc.src.sys.random.getrandom
     libc.src.unistd.fork
+    libc.src.sys.wait.waitpid
+    libc.src.stdlib.exit
+    libc.src.__support.CPP.atomic
+    libc.src.__support.threads.sleep
 )
 
 add_integration_test(
diff --git a/libc/test/integration/src/pthread/pthread_rwlock_test.cpp b/libc/test/integration/src/pthread/pthread_rwlock_test.cpp
index 8e716367144de..ee7be41605461 100644
--- a/libc/test/integration/src/pthread/pthread_rwlock_test.cpp
+++ b/libc/test/integration/src/pthread/pthread_rwlock_test.cpp
@@ -6,6 +6,10 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "src/__support/CPP/atomic.h"
+#include "src/__support/threads/sleep.h"
+#include "src/pthread/pthread_create.h"
+#include "src/pthread/pthread_join.h"
 #include "src/pthread/pthread_rwlock_destroy.h"
 #include "src/pthread/pthread_rwlock_init.h"
 #include "src/pthread/pthread_rwlock_rdlock.h"
@@ -15,15 +19,21 @@
 #include "src/pthread/pthread_rwlock_trywrlock.h"
 #include "src/pthread/pthread_rwlock_unlock.h"
 #include "src/pthread/pthread_rwlock_wrlock.h"
-
-#include "src/pthread/pthread_create.h"
-#include "src/pthread/pthread_join.h"
-
+#include "src/pthread/pthread_rwlockattr_destroy.h"
+#include "src/pthread/pthread_rwlockattr_init.h"
+#include "src/pthread/pthread_rwlockattr_setkind_np.h"
+#include "src/pthread/pthread_rwlockattr_setpshared.h"
+#include "src/stdlib/exit.h"
+#include "src/sys/mman/mmap.h"
+#include "src/sys/mman/munmap.h"
+#include "src/sys/random/getrandom.h"
+#include "src/sys/wait/waitpid.h"
+#include "src/time/clock_gettime.h"
+#include "src/unistd/fork.h"
 #include "test/IntegrationTest/test.h"
-
 #include <errno.h>
 #include <pthread.h>
-#include <stdint.h> // uintptr_t
+#include <time.h>
 
 static void smoke_test() {
   pthread_rwlock_t rwlock = PTHREAD_RWLOCK_INITIALIZER;
@@ -42,7 +52,374 @@ static void smoke_test() {
   ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_destroy(&rwlock), 0);
 }
 
+static void deadlock_detection_test() {
+  pthread_rwlock_t rwlock = PTHREAD_RWLOCK_INITIALIZER;
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_init(&rwlock, nullptr), 0);
+  // We only detect RAW, WAW deadlocks.
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_wrlock(&rwlock), 0);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_wrlock(&rwlock), EDEADLK);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_unlock(&rwlock), 0);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_destroy(&rwlock), 0);
+}
+
+static void try_lock_test() {
+  pthread_rwlock_t rwlock = PTHREAD_RWLOCK_INITIALIZER;
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_init(&rwlock, nullptr), 0);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_wrlock(&rwlock), 0);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_trywrlock(&rwlock), EBUSY);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_tryrdlock(&rwlock), EBUSY);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_unlock(&rwlock), 0);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_tryrdlock(&rwlock), 0);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_rdlock(&rwlock), 0);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_tryrdlock(&rwlock), 0);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_trywrlock(&rwlock), EBUSY);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_unlock(&rwlock), 0);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_unlock(&rwlock), 0);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_unlock(&rwlock), 0);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_destroy(&rwlock), 0);
+}
+
+static void destroy_before_unlock_test() {
+  pthread_rwlock_t rwlock = PTHREAD_RWLOCK_INITIALIZER;
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_init(&rwlock, nullptr), 0);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_wrlock(&rwlock), 0);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_destroy(&rwlock), EBUSY);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_unlock(&rwlock), 0);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_destroy(&rwlock), 0);
+}
+
+static void nullptr_test() {
+  timespec ts = {};
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_rdlock(nullptr), EINVAL);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_wrlock(nullptr), EINVAL);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_timedrdlock(nullptr, &ts), EINVAL);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_timedwrlock(nullptr, &ts), EINVAL);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_tryrdlock(nullptr), EINVAL);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_trywrlock(nullptr), EINVAL);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_unlock(nullptr), EINVAL);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_destroy(nullptr), EINVAL);
+}
+
+// If you are a user reading this code, please do not do something like this.
+// We manually modify the internal state of the rwlock to test high reader
+// counts.
+static void high_reader_count_test() {
+  pthread_rwlock_t rwlock = PTHREAD_RWLOCK_INITIALIZER;
+  rwlock.__state = 0b01111111'11111111'11111111'11111100;
+  //                 ^                                ^^
+  //                 |                                ||
+  //                 +-- writer bit                   ++-- pending bits
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_rdlock(&rwlock), EAGAIN);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_tryrdlock(&rwlock), EAGAIN);
+  // allocate 4 reader slots.
+  rwlock.__state -= 4 * 4;
+  pthread_t threads[20];
+  for (auto &i : threads) {
+    ASSERT_EQ(LIBC_NAMESPACE::pthread_create(
+                  &i, nullptr,
+                  [](void *arg) -> void * {
+                    pthread_rwlock_t *rwlock =
+                        reinterpret_cast<pthread_rwlock_t *>(arg);
+                    ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_trywrlock(rwlock),
+                              EBUSY);
+                    while (LIBC_NAMESPACE::pthread_rwlock_rdlock(rwlock) ==
+                           EAGAIN)
+                      LIBC_NAMESPACE::sleep_briefly();
+                    ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_unlock(rwlock), 0);
+                    return nullptr;
+                  },
+                  &rwlock),
+              0);
+  }
+  for (auto &i : threads) {
+    ASSERT_EQ(LIBC_NAMESPACE::pthread_join(i, nullptr), 0);
+  }
+}
+
+static void unusual_timespec_test() {
+  pthread_rwlock_t rwlock = PTHREAD_RWLOCK_INITIALIZER;
+  timespec ts = {0, -1};
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_timedrdlock(&rwlock, &ts), EINVAL);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_timedwrlock(&rwlock, &ts), EINVAL);
+  ts.tv_nsec = 1'000'000'000;
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_timedrdlock(&rwlock, &ts), EINVAL);
+  ts.tv_nsec += 1;
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_timedwrlock(&rwlock, &ts), EINVAL);
+  ts.tv_nsec = 0;
+  ts.tv_sec = -1;
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_timedrdlock(&rwlock, &ts),
+            ETIMEDOUT);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_timedwrlock(&rwlock, &ts),
+            ETIMEDOUT);
+}
+
+static void timedlock_with_deadlock_test() {
+  pthread_rwlock_t rwlock = PTHREAD_RWLOCK_INITIALIZER;
+  timespec ts{};
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_rdlock(&rwlock), 0);
+  LIBC_NAMESPACE::clock_gettime(CLOCK_REALTIME, &ts);
+  ts.tv_sec += 1;
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_timedwrlock(&rwlock, &ts),
+            ETIMEDOUT);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_timedrdlock(&rwlock, &ts), 0);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_unlock(&rwlock), 0);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_unlock(&rwlock), 0);
+  // notice that ts is already expired, but the following should still succeed.
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_tryrdlock(&rwlock), 0);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_unlock(&rwlock), 0);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_trywrlock(&rwlock), 0);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_unlock(&rwlock), 0);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_rdlock(&rwlock), 0);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_unlock(&rwlock), 0);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_wrlock(&rwlock), 0);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_unlock(&rwlock), 0);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_destroy(&rwlock), 0);
+}
+
+static void attributed_initialization_test() {
+  pthread_rwlockattr_t attr{};
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlockattr_init(&attr), 0);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlockattr_setkind_np(
+                &attr, PTHREAD_RWLOCK_PREFER_READER_NP),
+            0);
+  {
+    pthread_rwlock_t rwlock{};
+    ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_init(&rwlock, &attr), 0);
+    ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_destroy(&rwlock), 0);
+  }
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlockattr_setkind_np(
+                &attr, PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP),
+            0);
+  {
+    pthread_rwlock_t rwlock{};
+    ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_init(&rwlock, &attr), 0);
+    ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_destroy(&rwlock), 0);
+  }
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlockattr_setkind_np(
+                &attr, PTHREAD_RWLOCK_PREFER_WRITER_NP),
+            0);
+  {
+    pthread_rwlock_t rwlock{};
+    ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_init(&rwlock, &attr), EINVAL);
+  }
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlockattr_setkind_np(
+                &attr, PTHREAD_RWLOCK_PREFER_READER_NP),
+            0);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlockattr_setpshared(
+                &attr, PTHREAD_PROCESS_PRIVATE),
+            0);
+  {
+    pthread_rwlock_t rwlock{};
+    ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_init(&rwlock, &attr), 0);
+    ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_destroy(&rwlock), 0);
+  }
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlockattr_setpshared(
+                &attr, PTHREAD_PROCESS_SHARED),
+            0);
+  {
+    pthread_rwlock_t rwlock{};
+    ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_init(&rwlock, &attr), 0);
+    ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_destroy(&rwlock), 0);
+  }
+  attr.pref = -1;
+  {
+    pthread_rwlock_t rwlock{};
+    ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_init(&rwlock, &attr), EINVAL);
+  }
+  attr.pref = PTHREAD_RWLOCK_PREFER_READER_NP;
+  attr.pshared = -1;
+  {
+    pthread_rwlock_t rwlock{};
+    ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_init(&rwlock, &attr), EINVAL);
+  }
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlockattr_destroy(&attr), 0);
+}
+
+struct SharedData {
+  pthread_rwlock_t lock;
+  int data;
+  int reader_count;
+  bool writer_flag;
+  LIBC_NAMESPACE::cpp::Atomic<int> total_writer_count;
+};
+
+enum class Operation : int {
+  READ,
+  WRITE,
+  TIMED_READ,
+  TIMED_WRITE,
+  TRY_READ,
+  TRY_WRITE,
+  COUNT
+};
+
+static void randomized_thread_operation(SharedData *data) {
+  int buffer;
+  // We cannot reason about thread order anyway, let's go wild and randomize it
+  // directly using getrandom.
+  LIBC_NAMESPACE::getrandom(&buffer, sizeof(buffer), 0);
+  Operation op =
+      static_cast<Operation>(buffer % static_cast<int>(Operation::COUNT));
+  auto read_ops = [data]() {
+    ASSERT_FALSE(data->writer_flag);
+    ++data->reader_count;
+    for (int i = 0; i < 10; ++i) {
+      LIBC_NAMESPACE::sleep_briefly();
+    }
+    --data->reader_count;
+  };
+  auto write_ops = [data]() {
+    ASSERT_FALSE(data->writer_flag);
+    data->data += 1;
+    data->writer_flag = true;
+    for (int i = 0; i < 10; ++i) {
+      LIBC_NAMESPACE::sleep_briefly();
+    }
+    data->writer_flag = false;
+    data->total_writer_count.fetch_add(1);
+  };
+  auto get_ts = []() {
+    timespec ts{};
+    LIBC_NAMESPACE::clock_gettime(CLOCK_REALTIME, &ts);
+    ts.tv_nsec += 5'000;
+    if (ts.tv_nsec >= 1'000'000'000) {
+      ts.tv_nsec -= 1'000'000'000;
+      ts.tv_sec += 1;
+    }
+    return ts;
+  };
+  switch (op) {
+  case Operation::READ: {
+    LIBC_NAMESPACE::pthread_rwlock_rdlock(&data->lock);
+    read_ops();
+    LIBC_NAMESPACE::pthread_rwlock_unlock(&data->lock);
+    break;
+  }
+  case Operation::WRITE: {
+    LIBC_NAMESPACE::pthread_rwlock_wrlock(&data->lock);
+    write_ops();
+    LIBC_NAMESPACE::pthread_rwlock_unlock(&data->lock);
+    break;
+  }
+  case Operation::TIMED_READ: {
+    timespec ts = get_ts();
+    if (LIBC_NAMESPACE::pthread_rwlock_timedrdlock(&data->lock, &ts) == 0) {
+      read_ops();
+      LIBC_NAMESPACE::pthread_rwlock_unlock(&data->lock);
+    }
+    break;
+  }
+  case Operation::TIMED_WRITE: {
+    timespec ts = get_ts();
+    if (LIBC_NAMESPACE::pthread_rwlock_timedwrlock(&data->lock, &ts) == 0) {
+      write_ops();
+      LIBC_NAMESPACE::pthread_rwlock_unlock(&data->lock);
+    }
+    break;
+  }
+  case Operation::TRY_READ: {
+    if (LIBC_NAMESPACE::pthread_rwlock_tryrdlock(&data->lock) == 0) {
+      read_ops();
+      LIBC_NAMESPACE::pthread_rwlock_unlock(&data->lock);
+    }
+    break;
+  }
+  case Operation::TRY_WRITE: {
+    if (LIBC_NAMESPACE::pthread_rwlock_trywrlock(&data->lock) == 0) {
+      write_ops();
+      LIBC_NAMESPACE::pthread_rwlock_unlock(&data->lock);
+    }
+    break;
+  }
+  case Operation::COUNT:
+    __builtin_trap();
+  }
+}
+
+static void
+randomized_process_operation(SharedData &data,
+                             LIBC_NAMESPACE::cpp::Atomic<int> &finish_count,
+                             int expected_count) {
+  pthread_t threads[32];
+  for (auto &i : threads) {
+    ASSERT_EQ(LIBC_NAMESPACE::pthread_create(
+                  &i, nullptr,
+                  [](void *arg) -> void * {
+                    randomized_thread_operation(
+                        reinterpret_cast<SharedData *>(arg));
+                    return nullptr;
+                  },
+                  &data),
+              0);
+  }
+  for (auto &i : threads) {
+    ASSERT_EQ(LIBC_NAMESPACE::pthread_join(i, nullptr), 0);
+  }
+  finish_count.fetch_add(1);
+  while (finish_count.load() != expected_count) {
+    LIBC_NAMESPACE::sleep_briefly();
+  }
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_destroy(&data.lock), 0);
+  ASSERT_EQ(data.total_writer_count.load(), data.data);
+  ASSERT_FALSE(data.writer_flag);
+  ASSERT_EQ(data.reader_count, 0);
+}
+
+static void single_process_test(int preference) {
+  SharedData data{};
+  data.data = 0;
+  data.reader_count = 0;
+  data.writer_flag = false;
+  data.total_writer_count.store(0);
+  pthread_rwlockattr_t attr{};
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlockattr_init(&attr), 0);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlockattr_setkind_np(&attr, preference),
+            0);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_init(&data.lock, nullptr), 0);
+  LIBC_NAMESPACE::cpp::Atomic<int> finish_count{0};
+  randomized_process_operation(data, finish_count, 1);
+}
+
+static void multiple_process_test(int preference) {
+  struct PShared {
+    SharedData data;
+    LIBC_NAMESPACE::cpp::Atomic<int> finish_count;
+  };
+  PShared *shared_data = reinterpret_cast<PShared *>(
+      LIBC_NAMESPACE::mmap(nullptr, sizeof(PShared), PROT_READ | PROT_WRITE,
+                           MAP_SHARED | MAP_ANONYMOUS, -1, 0));
+  pthread_rwlockattr_t attr{};
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlockattr_init(&attr), 0);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlockattr_setkind_np(&attr, preference),
+            0);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlockattr_setpshared(
+                &attr, PTHREAD_PROCESS_SHARED),
+            0);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_init(&shared_data->data.lock, &attr),
+            0);
+  int pid = LIBC_NAMESPACE::fork();
+  randomized_process_operation(shared_data->data, shared_data->finish_count, 2);
+  if (pid == 0) {
+    LIBC_NAMESPACE::exit(0);
+  } else {
+    LIBC_NAMESPACE::waitpid(pid, nullptr, 0);
+  }
+  LIBC_NAMESPACE::munmap(shared_data, sizeof(PShared));
+}
+
 TEST_MAIN() {
   smoke_test();
+  deadlock_detection_test();
+  try_lock_test();
+  destroy_before_unlock_test();
+  nullptr_test();
+  high_reader_count_test();
+  unusual_timespec_test();
+  timedlock_with_deadlock_test();
+  attributed_initialization_test();
+  single_process_test(PTHREAD_RWLOCK_PREFER_READER_NP);
+  single_process_test(PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP);
+  multiple_process_test(PTHREAD_RWLOCK_PREFER_READER_NP);
+  multiple_process_test(PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP);
   return 0;
 }

>From d065e98830b8f7b65e2823fea96d825d90ac4005 Mon Sep 17 00:00:00 2001
From: Yifan Zhu <yifzhu at nvidia.com>
Date: Sun, 9 Jun 2024 00:19:49 -0700
Subject: [PATCH 21/26] check status

---
 libc/test/integration/src/pthread/pthread_rwlock_test.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/libc/test/integration/src/pthread/pthread_rwlock_test.cpp b/libc/test/integration/src/pthread/pthread_rwlock_test.cpp
index ee7be41605461..1060fd22bd9fd 100644
--- a/libc/test/integration/src/pthread/pthread_rwlock_test.cpp
+++ b/libc/test/integration/src/pthread/pthread_rwlock_test.cpp
@@ -402,7 +402,9 @@ static void multiple_process_test(int preference) {
   if (pid == 0) {
     LIBC_NAMESPACE::exit(0);
   } else {
-    LIBC_NAMESPACE::waitpid(pid, nullptr, 0);
+    int status;
+    LIBC_NAMESPACE::waitpid(pid, &status, 0);
+    ASSERT_EQ(status, 0);
   }
   LIBC_NAMESPACE::munmap(shared_data, sizeof(PShared));
 }

>From 37031668a6618cec0d8a3647eaca74995b59c95b Mon Sep 17 00:00:00 2001
From: Yifan Zhu <yifzhu at nvidia.com>
Date: Sun, 9 Jun 2024 00:52:39 -0700
Subject: [PATCH 22/26] fix tests

---
 .../src/pthread/pthread_rwlock_test.cpp       | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/libc/test/integration/src/pthread/pthread_rwlock_test.cpp b/libc/test/integration/src/pthread/pthread_rwlock_test.cpp
index 1060fd22bd9fd..32f23b7578200 100644
--- a/libc/test/integration/src/pthread/pthread_rwlock_test.cpp
+++ b/libc/test/integration/src/pthread/pthread_rwlock_test.cpp
@@ -112,7 +112,11 @@ static void high_reader_count_test() {
   ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_rdlock(&rwlock), EAGAIN);
   ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_tryrdlock(&rwlock), EAGAIN);
   // allocate 4 reader slots.
-  rwlock.__state -= 4 * 4;
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_unlock(&rwlock), 0);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_unlock(&rwlock), 0);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_unlock(&rwlock), 0);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_unlock(&rwlock), 0);
+
   pthread_t threads[20];
   for (auto &i : threads) {
     ASSERT_EQ(LIBC_NAMESPACE::pthread_create(
@@ -345,8 +349,9 @@ randomized_process_operation(SharedData &data,
     ASSERT_EQ(LIBC_NAMESPACE::pthread_create(
                   &i, nullptr,
                   [](void *arg) -> void * {
-                    randomized_thread_operation(
-                        reinterpret_cast<SharedData *>(arg));
+                    for (int i = 0; i < 5; ++i)
+                      randomized_thread_operation(
+                          reinterpret_cast<SharedData *>(arg));
                     return nullptr;
                   },
                   &data),
@@ -359,7 +364,6 @@ randomized_process_operation(SharedData &data,
   while (finish_count.load() != expected_count) {
     LIBC_NAMESPACE::sleep_briefly();
   }
-  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_destroy(&data.lock), 0);
   ASSERT_EQ(data.total_writer_count.load(), data.data);
   ASSERT_FALSE(data.writer_flag);
   ASSERT_EQ(data.reader_count, 0);
@@ -378,6 +382,7 @@ static void single_process_test(int preference) {
   ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_init(&data.lock, nullptr), 0);
   LIBC_NAMESPACE::cpp::Atomic<int> finish_count{0};
   randomized_process_operation(data, finish_count, 1);
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_destroy(&data.lock), 0);
 }
 
 static void multiple_process_test(int preference) {
@@ -388,6 +393,11 @@ static void multiple_process_test(int preference) {
   PShared *shared_data = reinterpret_cast<PShared *>(
       LIBC_NAMESPACE::mmap(nullptr, sizeof(PShared), PROT_READ | PROT_WRITE,
                            MAP_SHARED | MAP_ANONYMOUS, -1, 0));
+  shared_data->data.data = 0;
+  shared_data->data.reader_count = 0;
+  shared_data->data.writer_flag = false;
+  shared_data->data.total_writer_count.store(0);
+  shared_data->finish_count.store(0);
   pthread_rwlockattr_t attr{};
   ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlockattr_init(&attr), 0);
   ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlockattr_setkind_np(&attr, preference),
@@ -406,6 +416,7 @@ static void multiple_process_test(int preference) {
     LIBC_NAMESPACE::waitpid(pid, &status, 0);
     ASSERT_EQ(status, 0);
   }
+  ASSERT_EQ(LIBC_NAMESPACE::pthread_rwlock_destroy(&shared_data->data.lock), 0);
   LIBC_NAMESPACE::munmap(shared_data, sizeof(PShared));
 }
 

>From 99d7685a7eaa63dad8e5b1217b2cf5d78957bac6 Mon Sep 17 00:00:00 2001
From: Yifan Zhu <yifzhu at nvidia.com>
Date: Sun, 9 Jun 2024 00:58:17 -0700
Subject: [PATCH 23/26] fix tests

---
 libc/test/integration/src/pthread/pthread_rwlock_test.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libc/test/integration/src/pthread/pthread_rwlock_test.cpp b/libc/test/integration/src/pthread/pthread_rwlock_test.cpp
index 32f23b7578200..0088d3adf170c 100644
--- a/libc/test/integration/src/pthread/pthread_rwlock_test.cpp
+++ b/libc/test/integration/src/pthread/pthread_rwlock_test.cpp
@@ -279,6 +279,7 @@ static void randomized_thread_operation(SharedData *data) {
     for (int i = 0; i < 10; ++i) {
       LIBC_NAMESPACE::sleep_briefly();
     }
+    ASSERT_EQ(data->reader_count, 0);
     data->writer_flag = false;
     data->total_writer_count.fetch_add(1);
   };

>From f89210bacec9992bdfdf2dc01124fba35bce3f68 Mon Sep 17 00:00:00 2001
From: Yifan Zhu <yifzhu at nvidia.com>
Date: Sun, 9 Jun 2024 01:02:08 -0700
Subject: [PATCH 24/26] fix silly bug

---
 libc/test/integration/src/pthread/pthread_rwlock_test.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libc/test/integration/src/pthread/pthread_rwlock_test.cpp b/libc/test/integration/src/pthread/pthread_rwlock_test.cpp
index 0088d3adf170c..01ad0f1bce748 100644
--- a/libc/test/integration/src/pthread/pthread_rwlock_test.cpp
+++ b/libc/test/integration/src/pthread/pthread_rwlock_test.cpp
@@ -242,7 +242,7 @@ static void attributed_initialization_test() {
 struct SharedData {
   pthread_rwlock_t lock;
   int data;
-  int reader_count;
+  LIBC_NAMESPACE::cpp::Atomic<int> reader_count;
   bool writer_flag;
   LIBC_NAMESPACE::cpp::Atomic<int> total_writer_count;
 };
@@ -266,11 +266,11 @@ static void randomized_thread_operation(SharedData *data) {
       static_cast<Operation>(buffer % static_cast<int>(Operation::COUNT));
   auto read_ops = [data]() {
     ASSERT_FALSE(data->writer_flag);
-    ++data->reader_count;
+    data->reader_count.fetch_add(1, LIBC_NAMESPACE::cpp::MemoryOrder::RELAXED);
     for (int i = 0; i < 10; ++i) {
       LIBC_NAMESPACE::sleep_briefly();
     }
-    --data->reader_count;
+    data->reader_count.fetch_sub(1, LIBC_NAMESPACE::cpp::MemoryOrder::RELAXED);
   };
   auto write_ops = [data]() {
     ASSERT_FALSE(data->writer_flag);

>From a1c1aee36165161f7c0a5626516166a32ac9ee0c Mon Sep 17 00:00:00 2001
From: Yifan Zhu <yifzhu at nvidia.com>
Date: Sun, 9 Jun 2024 01:42:31 -0700
Subject: [PATCH 25/26] fix missed pshared flags

---
 libc/src/__support/threads/linux/rwlock.h | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/libc/src/__support/threads/linux/rwlock.h b/libc/src/__support/threads/linux/rwlock.h
index 63239b9b11052..124b6e0cefcc4 100644
--- a/libc/src/__support/threads/linux/rwlock.h
+++ b/libc/src/__support/threads/linux/rwlock.h
@@ -61,11 +61,15 @@ class WaitingQueue final : private RawMutex {
   // RAII guard to lock and unlock the waiting queue.
   class Guard {
     WaitingQueue &queue;
+    bool is_pshared;
 
-    LIBC_INLINE constexpr Guard(WaitingQueue &queue) : queue(queue) {}
+    LIBC_INLINE constexpr Guard(WaitingQueue &queue, bool is_pshared)
+        : queue(queue), is_pshared(is_pshared) {
+      queue.lock();
+    }
 
   public:
-    LIBC_INLINE ~Guard() { queue.unlock(); }
+    LIBC_INLINE ~Guard() { queue.unlock(is_pshared); }
     template <Role role> LIBC_INLINE FutexWordType &pending_count() {
       if constexpr (role == Role::Reader)
         return queue.pending_readers;
@@ -86,9 +90,9 @@ class WaitingQueue final : private RawMutex {
       : RawMutex(), pending_readers(0), pending_writers(0),
         reader_serialization(0), writer_serialization(0) {}
 
-  LIBC_INLINE Guard acquire() {
+  LIBC_INLINE Guard acquire(bool is_pshared) {
     this->lock();
-    return Guard(*this);
+    return Guard(*this, is_pshared);
   }
 
   template <Role role>
@@ -426,7 +430,7 @@ class RwLock {
         // that this lock will make the timeout imprecise, but this is the
         // best we can do. The transaction is small and everyone should make
         // progress rather quickly.
-        WaitingQueue::Guard guard = queue.acquire();
+        WaitingQueue::Guard guard = queue.acquire(is_pshared);
         guard.template pending_count<role>()++;
 
         // Use atomic operation to guarantee the total order of the operations
@@ -450,7 +454,7 @@ class RwLock {
         {
           // Similarly, the unregister operation should also be an atomic
           // transaction.
-          WaitingQueue::Guard guard = queue.acquire();
+          WaitingQueue::Guard guard = queue.acquire(is_pshared);
           guard.pending_count<role>()--;
           // Clear the flag if we are the last reader. The flag must be
           // cleared otherwise operations like trylock may fail even though
@@ -500,7 +504,7 @@ class RwLock {
     WakeTarget status;
 
     {
-      WaitingQueue::Guard guard = queue.acquire();
+      WaitingQueue::Guard guard = queue.acquire(is_pshared);
       if (guard.pending_count<Role::Writer>() != 0) {
         guard.serialization<Role::Writer>()++;
         status = WakeTarget::Writers;

>From 52ee9b5e91ea387ce04b350788c47e6f3387dd11 Mon Sep 17 00:00:00 2001
From: Yifan Zhu <yifzhu at nvidia.com>
Date: Sun, 9 Jun 2024 01:59:18 -0700
Subject: [PATCH 26/26] fix yet another bug

---
 libc/src/__support/threads/linux/rwlock.h                 | 3 +--
 libc/test/integration/src/pthread/pthread_rwlock_test.cpp | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/libc/src/__support/threads/linux/rwlock.h b/libc/src/__support/threads/linux/rwlock.h
index 124b6e0cefcc4..991a7821b40d2 100644
--- a/libc/src/__support/threads/linux/rwlock.h
+++ b/libc/src/__support/threads/linux/rwlock.h
@@ -65,7 +65,7 @@ class WaitingQueue final : private RawMutex {
 
     LIBC_INLINE constexpr Guard(WaitingQueue &queue, bool is_pshared)
         : queue(queue), is_pshared(is_pshared) {
-      queue.lock();
+      queue.lock(cpp::nullopt, is_pshared);
     }
 
   public:
@@ -91,7 +91,6 @@ class WaitingQueue final : private RawMutex {
         reader_serialization(0), writer_serialization(0) {}
 
   LIBC_INLINE Guard acquire(bool is_pshared) {
-    this->lock();
     return Guard(*this, is_pshared);
   }
 
diff --git a/libc/test/integration/src/pthread/pthread_rwlock_test.cpp b/libc/test/integration/src/pthread/pthread_rwlock_test.cpp
index 01ad0f1bce748..5d0f2768e4e1f 100644
--- a/libc/test/integration/src/pthread/pthread_rwlock_test.cpp
+++ b/libc/test/integration/src/pthread/pthread_rwlock_test.cpp
@@ -350,7 +350,7 @@ randomized_process_operation(SharedData &data,
     ASSERT_EQ(LIBC_NAMESPACE::pthread_create(
                   &i, nullptr,
                   [](void *arg) -> void * {
-                    for (int i = 0; i < 5; ++i)
+                    for (int i = 0; i < 10; ++i)
                       randomized_thread_operation(
                           reinterpret_cast<SharedData *>(arg));
                     return nullptr;



More information about the libc-commits mailing list