[libc-commits] [libc] [llvm] [libc][CndVar] reimplmement conditional variable with FIFO ordering (PR #192748)

Schrodinger ZHU Yifan via libc-commits libc-commits at lists.llvm.org
Tue Apr 21 13:58:51 PDT 2026


https://github.com/SchrodingerZhu updated https://github.com/llvm/llvm-project/pull/192748

>From e462c5865aa8aa10240b274281222c52f7d21269 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <yifanzhu at rochester.edu>
Date: Fri, 17 Apr 2026 17:53:55 -0400
Subject: [PATCH 01/22] wip

---
 .codex                                        |   0
 libc/src/__support/threads/CMakeLists.txt     |  13 +-
 libc/src/__support/threads/CndVar.h           | 249 ++++++++++++++++--
 .../__support/threads/linux/CMakeLists.txt    |  16 --
 libc/src/__support/threads/linux/CndVar.cpp   | 106 --------
 libc/src/__support/threads/linux/barrier.cpp  |  15 +-
 libc/src/__support/threads/raw_mutex.h        |   1 +
 libc/src/__support/threads/unix_mutex.h       |   6 +
 libc/src/threads/linux/cnd_destroy.cpp        |   2 +-
 libc/src/threads/linux/cnd_init.cpp           |   5 +-
 libc/src/threads/linux/cnd_wait.cpp           |   3 +-
 11 files changed, 251 insertions(+), 165 deletions(-)
 create mode 100644 .codex
 delete mode 100644 libc/src/__support/threads/linux/CndVar.cpp

diff --git a/.codex b/.codex
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/libc/src/__support/threads/CMakeLists.txt b/libc/src/__support/threads/CMakeLists.txt
index 290c27fa46d4f..0623a8743ad7a 100644
--- a/libc/src/__support/threads/CMakeLists.txt
+++ b/libc/src/__support/threads/CMakeLists.txt
@@ -147,12 +147,17 @@ if(TARGET libc.src.__support.threads.${LIBC_TARGET_OS}.callonce)
   )
 endif()
 
-if(TARGET libc.src.__support.threads.${LIBC_TARGET_OS}.CndVar)
-  add_object_library(
+if(TARGET libc.src.__support.threads.futex_utils)
+  add_header_library(
     CndVar
-    ALIAS
+    HDRS
+      CndVar.h
     DEPENDS
-    .${LIBC_TARGET_OS}.CndVar
+      .futex_utils
+      .mutex
+      .raw_mutex
+      libc.hdr.stdint_proxy
+      libc.src.__support.CPP.mutex
   )
 endif()
 
diff --git a/libc/src/__support/threads/CndVar.h b/libc/src/__support/threads/CndVar.h
index a423b65bfb0b5..8d32f77b6df47 100644
--- a/libc/src/__support/threads/CndVar.h
+++ b/libc/src/__support/threads/CndVar.h
@@ -6,49 +6,248 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_LIBC___SUPPORT_SRC_THREADS_LINUX_CNDVAR_H
-#define LLVM_LIBC___SUPPORT_SRC_THREADS_LINUX_CNDVAR_H
+#ifndef LLVM_LIBC_SRC___SUPPORT_THREADS_CNDVAR_H
+#define LLVM_LIBC_SRC___SUPPORT_THREADS_CNDVAR_H
 
 #include "hdr/stdint_proxy.h" // uint32_t
+#include "src/__support/CPP/mutex.h"
 #include "src/__support/macros/config.h"
-#include "src/__support/threads/futex_utils.h"       // Futex
-#include "src/__support/threads/mutex.h"             // Mutex
-#include "src/__support/threads/raw_mutex.h"         // RawMutex
+#include "src/__support/threads/futex_utils.h" // Futex
+#include "src/__support/threads/mutex.h"       // Mutex
+#include "src/__support/threads/raw_mutex.h"   // RawMutex
+
+#ifndef LIBC_COPT_TIMEOUT_ENSURE_MONOTONICITY
+#define LIBC_COPT_TIMEOUT_ENSURE_MONOTONICITY 1
+#endif
 
 namespace LIBC_NAMESPACE_DECL {
 
 class CndVar {
-  enum CndWaiterStatus : uint32_t {
-    WS_Waiting = 0xE,
-    WS_Signalled = 0x5,
+  enum WaiterState : uint8_t {
+    // Initial state after entering the wait queue.
+    Waiting = 0,
+    // A signal has been received.
+    Signalled = 1,
+    // A cancellation has been requested.
+    Cancelled = 2,
+  };
+
+  struct WaiterHeader {
+    WaiterHeader *prev;
+    WaiterHeader *next;
+
+    LIBC_INLINE void ensure_queue_initialization() {
+      if (LIBC_UNLIKELY(prev == nullptr)) {
+        prev = next = this;
+      }
+    }
+
+    LIBC_INLINE void push_back(WaiterHeader *waiter) {
+      ensure_queue_initialization();
+      waiter->next = this;
+      waiter->prev = prev;
+      waiter->next->prev = waiter;
+      waiter->prev->next = waiter;
+    }
+
+    LIBC_INLINE static void remove(WaiterHeader *waiter) {
+      waiter->next->prev = waiter->prev;
+      waiter->prev->next = waiter->next;
+      waiter->prev = waiter->next = waiter;
+    }
+
+    LIBC_INLINE WaiterHeader *pop_front() {
+      ensure_queue_initialization();
+      if (next == this)
+        return nullptr;
+      WaiterHeader *first = next;
+      remove(first);
+      return first;
+    }
   };
+  struct CndWaiter : WaiterHeader {
+    cpp::Atomic<Futex *> sender_futex;
+    RawMutex barrier;
+    cpp::Atomic<uint8_t> state;
+
+    LIBC_INLINE CndWaiter()
+        : WaiterHeader{}, sender_futex(nullptr), barrier{}, state{Waiting} {
+      // this lock should always success as no contention is possible
+      (void)barrier.try_lock();
+    }
 
-  struct CndWaiter {
-    Futex futex_word = WS_Waiting;
-    CndWaiter *next = nullptr;
+    LIBC_INLINE void confirm_cancellation() {
+      Futex *sender = sender_futex.load();
+      if (sender && sender->fetch_sub(1) == 1)
+        sender->notify_one();
+    }
   };
 
-  CndWaiter *waitq_front;
-  CndWaiter *waitq_back;
-  RawMutex qmtx;
+  union {
+    struct {
+      RawMutex queue_lock;
+      WaiterHeader waiter_queue;
+    };
+    struct {
+      Futex shared_futex;
+      cpp::Atomic<size_t> shared_waiters;
+    };
+  };
 
 public:
-  LIBC_INLINE static int init(CndVar *cv) {
-    cv->waitq_front = cv->waitq_back = nullptr;
-    RawMutex::init(&cv->qmtx);
-    return 0;
+  enum class CndVarResult {
+    Success,
+    MutexError,
+    Timeout,
+  };
+
+  using Timeout = internal::AbsTimeout;
+
+  LIBC_INLINE constexpr CndVar() : queue_lock{}, waiter_queue{} {}
+
+  LIBC_INLINE void reset() {
+    queue_lock.reset();
+    waiter_queue.prev = nullptr;
+    waiter_queue.next = nullptr;
+  }
+
+  // TODO: register callback for pthread cancellation
+  LIBC_INLINE CndVarResult wait(Mutex *mutex,
+                                cpp::optional<Timeout> timeout = cpp::nullopt,
+                                bool is_shared = false) {
+#if LIBC_COPT_TIMEOUT_ENSURE_MONOTONICITY
+    if (timeout)
+      ensure_monotonicity(*timeout);
+#endif
+
+    if (is_shared) {
+      shared_waiters.fetch_add(1);
+      FutexWordType old_val = shared_futex.load();
+      mutex->unlock();
+      ErrorOr<int> result =
+          shared_futex.wait(old_val, timeout, /*is_pshared=*/true);
+      shared_waiters.fetch_sub(1);
+      MutexError mutex_result = mutex->lock();
+      if (!result.has_value() && result.error() == ETIMEDOUT)
+        return CndVarResult::Timeout;
+      return mutex_result == MutexError::NONE ? CndVarResult::Success
+                                              : CndVarResult::MutexError;
+    }
+
+    CndWaiter waiter{};
+    // Register the waiter to the queue.
+    {
+      cpp::lock_guard lock(queue_lock);
+      waiter_queue.push_back(&waiter);
+    }
+
+    // Unlock the mutex and wait for the signal.
+    mutex->unlock();
+    bool locked = waiter.barrier.lock(timeout, /*is_shared=*/false);
+
+    // if we wake up and find that we are still waiting, this means
+    // timeout has been reached.
+    uint8_t old_state = Waiting;
+    if (waiter.state.compare_exchange_strong(old_state, Cancelled,
+                                             cpp::MemoryOrder::ACQ_REL)) {
+      // we haven't consumed the signal before timeout reaches.
+      {
+        cpp::lock_guard lock(queue_lock);
+        WaiterHeader::remove(&waiter);
+      }
+      waiter.confirm_cancellation();
+    } else if (!locked) {
+      // Whenever a signal is already consumed, we compete for the mutex
+      // in the FIFO order of the queue.
+      waiter.barrier.lock();
+    }
+
+    MutexError mutex_result = mutex->lock();
+    if (waiter.next != &waiter) {
+      auto *next_waiter = static_cast<CndWaiter *>(waiter.next);
+      WaiterHeader::remove(&waiter);
+      auto &next_barrier_futex = next_waiter->barrier.get_raw_futex();
+      auto &mutex_futex = mutex->get_raw_futex();
+      FutexWordType prev = next_barrier_futex.exchange(
+          RawMutex::UNLOCKED, cpp::MemoryOrder::RELEASE);
+      if (prev == RawMutex::IN_CONTENTION)
+        if (!mutex->can_be_requeued() ||
+            !next_barrier_futex
+                 .requeue_to(mutex_futex, cpp::nullopt, 0, 1,
+                             /*is_shared=*/false)
+                 .has_value())
+          next_waiter->barrier.wake(/*is_shared=*/false);
+    }
+    if (mutex_result != MutexError::NONE)
+      return CndVarResult::MutexError;
+    return old_state == Waiting ? CndVarResult::Timeout : CndVarResult::Success;
   }
 
-  LIBC_INLINE static void destroy(CndVar *cv) {
-    cv->waitq_front = cv->waitq_back = nullptr;
+private:
+  LIBC_INLINE void notify(bool broadcast, bool is_shared = false) {
+    if (is_shared) {
+      if (shared_waiters.load() == 0)
+        return;
+      shared_futex.fetch_add(1);
+      if (broadcast)
+        shared_futex.notify_all();
+      else
+        shared_futex.notify_one();
+      return;
+    }
+
+    Futex sender_futex{0};
+    auto wait_unregisteration_finish = [&]() {
+      constexpr size_t LIMIT = 100;
+      for (size_t i = 0; i < LIMIT; ++i) {
+        if (sender_futex.load(cpp::MemoryOrder::RELAXED) == 0)
+          break;
+        sleep_briefly();
+      }
+      while (auto remaining = sender_futex.load(cpp::MemoryOrder::RELAXED))
+        sender_futex.wait(remaining, cpp::nullopt, /*is_pshared=*/false);
+    };
+    CndWaiter *head = nullptr;
+    CndWaiter *cursor = nullptr;
+    size_t limit = broadcast ? cpp::numeric_limits<size_t>::max() : 1;
+    {
+      cpp::lock_guard lock(queue_lock);
+      for (cursor = static_cast<CndWaiter *>(waiter_queue.next);
+           cursor != &waiter_queue;
+           cursor = static_cast<CndWaiter *>(cursor->next)) {
+        if (limit == 0)
+          break;
+        uint8_t expected = Waiting;
+        if (!cursor->state.compare_exchange_strong(expected, Signalled)) {
+          sender_futex.fetch_add(1);
+          cursor->sender_futex.store(&sender_futex);
+          continue;
+        }
+        if (!head)
+          head = cursor;
+        limit--;
+      }
+      // remove everything before cursor
+      auto removed_head = waiter_queue.next;
+      auto removed_tail = cursor->prev;
+      waiter_queue.next = cursor;
+      cursor->prev = &waiter_queue;
+      removed_tail->next = removed_head;
+      removed_head->prev = removed_tail;
+    }
+    wait_unregisteration_finish();
+    if (head)
+      head->barrier.unlock();
   }
 
-  // Returns 0 on success, -1 on error.
-  int wait(Mutex *m);
-  void notify_one();
-  void broadcast();
+public:
+  LIBC_INLINE void notify_one(bool is_shared = false) { notify(1, is_shared); }
+
+  LIBC_INLINE void broadcast(bool is_shared = false) {
+    notify(cpp::numeric_limits<size_t>::max(), is_shared);
+  }
 };
 
 } // namespace LIBC_NAMESPACE_DECL
 
-#endif // LLVM_LIBC___SUPPORT_SRC_THREADS_LINUX_CNDVAR_H
+#endif // LLVM_LIBC_SRC___SUPPORT_THREADS_CNDVAR_H
diff --git a/libc/src/__support/threads/linux/CMakeLists.txt b/libc/src/__support/threads/linux/CMakeLists.txt
index 5ada10ed04b88..bb19588884087 100644
--- a/libc/src/__support/threads/linux/CMakeLists.txt
+++ b/libc/src/__support/threads/linux/CMakeLists.txt
@@ -65,22 +65,6 @@ add_header_library(
     libc.src.__support.macros.optimization
 )
 
-add_object_library(
-  CndVar
-  SRCS
-    CndVar.cpp
-  HDRS
-    ../CndVar.h
-  DEPENDS
-    libc.hdr.stdint_proxy
-    libc.include.sys_syscall
-    libc.src.__support.OSUtil.osutil
-    libc.src.__support.threads.linux.futex_word_type
-    libc.src.__support.threads.mutex
-    libc.src.__support.threads.raw_mutex
-    libc.src.__support.CPP.mutex
-)
-
 add_object_library(
   barrier
   HDRS
diff --git a/libc/src/__support/threads/linux/CndVar.cpp b/libc/src/__support/threads/linux/CndVar.cpp
deleted file mode 100644
index 60424673e819c..0000000000000
--- a/libc/src/__support/threads/linux/CndVar.cpp
+++ /dev/null
@@ -1,106 +0,0 @@
-//===-- Utility condition variable class ------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "src/__support/threads/CndVar.h"
-#include "src/__support/CPP/mutex.h"
-#include "src/__support/OSUtil/syscall.h" // syscall_impl
-#include "src/__support/macros/config.h"
-#include "src/__support/threads/linux/futex_word.h" // FutexWordType
-#include "src/__support/threads/mutex.h"            // Mutex
-#include "src/__support/threads/raw_mutex.h"        // RawMutex
-
-#include <sys/syscall.h> // For syscall numbers.
-
-namespace LIBC_NAMESPACE_DECL {
-
-int CndVar::wait(Mutex *m) {
-  // The goal is to perform "unlock |m| and wait" in an
-  // atomic operation. However, it is not possible to do it
-  // in the true sense so we do it in spirit. Before unlocking
-  // |m|, a new waiter object is added to the waiter queue with
-  // the waiter queue locked. Iff a signalling thread signals
-  // the waiter before the waiter actually starts waiting, the
-  // wait operation will not begin at all and the waiter immediately
-  // returns.
-
-  CndWaiter waiter;
-  {
-    cpp::lock_guard ml(qmtx);
-    CndWaiter *old_back = nullptr;
-    if (waitq_front == nullptr) {
-      waitq_front = waitq_back = &waiter;
-    } else {
-      old_back = waitq_back;
-      waitq_back->next = &waiter;
-      waitq_back = &waiter;
-    }
-
-    if (m->unlock() != MutexError::NONE) {
-      // If we do not remove the queued up waiter before returning,
-      // then another thread can potentially signal a non-existing
-      // waiter. Note also that we do this with |qmtx| locked. This
-      // ensures that another thread will not signal the withdrawing
-      // waiter.
-      waitq_back = old_back;
-      if (waitq_back == nullptr)
-        waitq_front = nullptr;
-      else
-        waitq_back->next = nullptr;
-
-      return -1;
-    }
-  }
-
-  waiter.futex_word.wait(WS_Waiting, cpp::nullopt, true);
-
-  // At this point, if locking |m| fails, we can simply return as the
-  // queued up waiter would have been removed from the queue.
-  auto err = m->lock();
-  return err == MutexError::NONE ? 0 : -1;
-}
-
-void CndVar::notify_one() {
-  // We don't use an RAII locker in this method as we want to unlock
-  // |qmtx| and signal the waiter using a single FUTEX_WAKE_OP signal.
-  qmtx.lock();
-  if (waitq_front == nullptr)
-    qmtx.unlock();
-
-  CndWaiter *first = waitq_front;
-  waitq_front = waitq_front->next;
-  if (waitq_front == nullptr)
-    waitq_back = nullptr;
-
-  qmtx.reset();
-
-  // this is a special WAKE_OP, so we use syscall directly
-  LIBC_NAMESPACE::syscall_impl<long>(
-      FUTEX_SYSCALL_ID, &qmtx.get_raw_futex(), FUTEX_WAKE_OP, 1, 1,
-      &first->futex_word.val,
-      FUTEX_OP(FUTEX_OP_SET, WS_Signalled, FUTEX_OP_CMP_EQ, WS_Waiting));
-}
-
-void CndVar::broadcast() {
-  cpp::lock_guard ml(qmtx);
-  uint32_t dummy_futex_word;
-  CndWaiter *waiter = waitq_front;
-  waitq_front = waitq_back = nullptr;
-  while (waiter != nullptr) {
-    // FUTEX_WAKE_OP is used instead of just FUTEX_WAKE as it allows us to
-    // atomically update the waiter status to WS_Signalled before waking
-    // up the waiter. A dummy location is used for the other futex of
-    // FUTEX_WAKE_OP.
-    LIBC_NAMESPACE::syscall_impl<long>(
-        FUTEX_SYSCALL_ID, &dummy_futex_word, FUTEX_WAKE_OP, 1, 1,
-        &waiter->futex_word.val,
-        FUTEX_OP(FUTEX_OP_SET, WS_Signalled, FUTEX_OP_CMP_EQ, WS_Waiting));
-    waiter = waiter->next;
-  }
-}
-
-} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/__support/threads/linux/barrier.cpp b/libc/src/__support/threads/linux/barrier.cpp
index cf7207b53094b..175652a70fac2 100644
--- a/libc/src/__support/threads/linux/barrier.cpp
+++ b/libc/src/__support/threads/linux/barrier.cpp
@@ -8,6 +8,7 @@
 
 #include "src/__support/threads/linux/barrier.h"
 #include "hdr/errno_macros.h"
+#include "src/__support/CPP/new.h"
 #include "src/__support/threads/CndVar.h"
 #include "src/__support/threads/mutex.h"
 
@@ -24,14 +25,8 @@ int Barrier::init(Barrier *b,
   b->waiting = 0;
   b->blocking = true;
 
-  int err;
-  err = CndVar::init(&b->entering);
-  if (err != 0)
-    return err;
-
-  err = CndVar::init(&b->exiting);
-  if (err != 0)
-    return err;
+  new (&b->entering) CndVar();
+  new (&b->exiting) CndVar();
 
   auto mutex_err = Mutex::init(&b->m, false, false, false, false);
   if (mutex_err != MutexError::NONE)
@@ -76,8 +71,8 @@ int Barrier::wait() {
 }
 
 int Barrier::destroy(Barrier *b) {
-  CndVar::destroy(&b->entering);
-  CndVar::destroy(&b->exiting);
+  b->entering.reset();
+  b->exiting.reset();
   Mutex::destroy(&b->m);
   return 0;
 }
diff --git a/libc/src/__support/threads/raw_mutex.h b/libc/src/__support/threads/raw_mutex.h
index 616e4129e87dd..12dec56084e0e 100644
--- a/libc/src/__support/threads/raw_mutex.h
+++ b/libc/src/__support/threads/raw_mutex.h
@@ -45,6 +45,7 @@ class RawMutex {
   LIBC_INLINE_VAR static constexpr FutexWordType UNLOCKED = 0b00;
   LIBC_INLINE_VAR static constexpr FutexWordType LOCKED = 0b01;
   LIBC_INLINE_VAR static constexpr FutexWordType IN_CONTENTION = 0b10;
+  friend class CndVar;
 
 private:
   LIBC_INLINE FutexWordType spin(unsigned spin_count) {
diff --git a/libc/src/__support/threads/unix_mutex.h b/libc/src/__support/threads/unix_mutex.h
index aaaa931efe310..ebe5de2c6e7a7 100644
--- a/libc/src/__support/threads/unix_mutex.h
+++ b/libc/src/__support/threads/unix_mutex.h
@@ -30,6 +30,12 @@ class Mutex final : private RawMutex {
   pid_t owner;
   unsigned long long lock_count;
 
+  friend class CndVar;
+
+  LIBC_INLINE bool can_be_requeued() const {
+    return !this->pshared && !this->robust;
+  }
+
 public:
   LIBC_INLINE constexpr Mutex(bool is_timed, bool is_recursive, bool is_robust,
                               bool is_pshared)
diff --git a/libc/src/threads/linux/cnd_destroy.cpp b/libc/src/threads/linux/cnd_destroy.cpp
index 963991bddfe45..e51728615da9c 100644
--- a/libc/src/threads/linux/cnd_destroy.cpp
+++ b/libc/src/threads/linux/cnd_destroy.cpp
@@ -19,7 +19,7 @@ static_assert(sizeof(CndVar) == sizeof(cnd_t));
 
 LLVM_LIBC_FUNCTION(void, cnd_destroy, (cnd_t * cond)) {
   CndVar *cndvar = reinterpret_cast<CndVar *>(cond);
-  CndVar::destroy(cndvar);
+  cndvar->reset();
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/threads/linux/cnd_init.cpp b/libc/src/threads/linux/cnd_init.cpp
index 478011a5255e8..a27c0a92a67db 100644
--- a/libc/src/threads/linux/cnd_init.cpp
+++ b/libc/src/threads/linux/cnd_init.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "src/threads/cnd_init.h"
+#include "src/__support/CPP/new.h"
 #include "src/__support/common.h"
 #include "src/__support/macros/config.h"
 #include "src/__support/threads/CndVar.h"
@@ -18,8 +19,8 @@ namespace LIBC_NAMESPACE_DECL {
 static_assert(sizeof(CndVar) == sizeof(cnd_t));
 
 LLVM_LIBC_FUNCTION(int, cnd_init, (cnd_t * cond)) {
-  CndVar *cndvar = reinterpret_cast<CndVar *>(cond);
-  return CndVar::init(cndvar) ? thrd_error : thrd_success;
+  new (cond) CndVar();
+  return thrd_success;
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/threads/linux/cnd_wait.cpp b/libc/src/threads/linux/cnd_wait.cpp
index 3633cc85277b9..da7d549af3ca2 100644
--- a/libc/src/threads/linux/cnd_wait.cpp
+++ b/libc/src/threads/linux/cnd_wait.cpp
@@ -21,7 +21,8 @@ static_assert(sizeof(CndVar) == sizeof(cnd_t));
 LLVM_LIBC_FUNCTION(int, cnd_wait, (cnd_t * cond, mtx_t *mtx)) {
   CndVar *cndvar = reinterpret_cast<CndVar *>(cond);
   Mutex *mutex = reinterpret_cast<Mutex *>(mtx);
-  return cndvar->wait(mutex) ? thrd_error : thrd_success;
+  return cndvar->wait(mutex) == CndVar::CndVarResult::Success ? thrd_success
+                                                              : thrd_error;
 }
 
 } // namespace LIBC_NAMESPACE_DECL

>From cfc43823a1a72d88b39760836d57b5c4083d44b2 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Fri, 17 Apr 2026 20:41:23 -0400
Subject: [PATCH 02/22] comments

---
 libc/src/__support/threads/CndVar.h | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/libc/src/__support/threads/CndVar.h b/libc/src/__support/threads/CndVar.h
index 8d32f77b6df47..e8dee53f80292 100644
--- a/libc/src/__support/threads/CndVar.h
+++ b/libc/src/__support/threads/CndVar.h
@@ -143,6 +143,8 @@ class CndVar {
 
     // Unlock the mutex and wait for the signal.
     mutex->unlock();
+    // Notice that lock is already initialized as lock. We abuse the LOCKED
+    // state to indicate that the waiter is pending.
     bool locked = waiter.barrier.lock(timeout, /*is_shared=*/false);
 
     // if we wake up and find that we are still waiting, this means
@@ -158,7 +160,9 @@ class CndVar {
       waiter.confirm_cancellation();
     } else if (!locked) {
       // Whenever a signal is already consumed, we compete for the mutex
-      // in the FIFO order of the queue.
+      // in the FIFO order of the queue. We only relock if we previously
+      // wake up due to timeout. Otherwise, it means that our turn has
+      // come, so we don't need to relock.
       waiter.barrier.lock();
     }
 
@@ -168,12 +172,15 @@ class CndVar {
       WaiterHeader::remove(&waiter);
       auto &next_barrier_futex = next_waiter->barrier.get_raw_futex();
       auto &mutex_futex = mutex->get_raw_futex();
+      // the following is basically an inlined version of mutex::unlock
+      // but with requeue instead of wake if it is possible.
       FutexWordType prev = next_barrier_futex.exchange(
           RawMutex::UNLOCKED, cpp::MemoryOrder::RELEASE);
       if (prev == RawMutex::IN_CONTENTION)
         if (!mutex->can_be_requeued() ||
             !next_barrier_futex
-                 .requeue_to(mutex_futex, cpp::nullopt, 0, 1,
+                 .requeue_to(mutex_futex, cpp::nullopt, /*wake_limit=*/0,
+                             /*requeue_limit=*/1,
                              /*is_shared=*/false)
                  .has_value())
           next_waiter->barrier.wake(/*is_shared=*/false);

>From d3ac8a9541035e96c3302e430361e2cc6782292b Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Fri, 17 Apr 2026 21:04:22 -0400
Subject: [PATCH 03/22] fix

---
 libc/src/__support/threads/CndVar.h | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/libc/src/__support/threads/CndVar.h b/libc/src/__support/threads/CndVar.h
index e8dee53f80292..77958d8467da6 100644
--- a/libc/src/__support/threads/CndVar.h
+++ b/libc/src/__support/threads/CndVar.h
@@ -37,9 +37,8 @@ class CndVar {
     WaiterHeader *next;
 
     LIBC_INLINE void ensure_queue_initialization() {
-      if (LIBC_UNLIKELY(prev == nullptr)) {
+      if (LIBC_UNLIKELY(prev == nullptr))
         prev = next = this;
-      }
     }
 
     LIBC_INLINE void push_back(WaiterHeader *waiter) {
@@ -143,7 +142,7 @@ class CndVar {
 
     // Unlock the mutex and wait for the signal.
     mutex->unlock();
-    // Notice that lock is already initialized as lock. We abuse the LOCKED
+    // Notice that lock is already initialized as LOCKED. We abuse the LOCKED
     // state to indicate that the waiter is pending.
     bool locked = waiter.barrier.lock(timeout, /*is_shared=*/false);
 
@@ -176,14 +175,21 @@ class CndVar {
       // but with requeue instead of wake if it is possible.
       FutexWordType prev = next_barrier_futex.exchange(
           RawMutex::UNLOCKED, cpp::MemoryOrder::RELEASE);
-      if (prev == RawMutex::IN_CONTENTION)
-        if (!mutex->can_be_requeued() ||
-            !next_barrier_futex
-                 .requeue_to(mutex_futex, cpp::nullopt, /*wake_limit=*/0,
-                             /*requeue_limit=*/1,
-                             /*is_shared=*/false)
-                 .has_value())
+      if (prev == RawMutex::IN_CONTENTION) {
+        if (mutex->can_be_requeued()) {
+          ErrorOr<int> res = next_barrier_futex.requeue_to(
+              mutex_futex, cpp::nullopt, /*wake_limit=*/0,
+              /*requeue_limit=*/1,
+              /*is_shared=*/false);
+          if (!res.has_value()) // cannot requeue on this system
+            next_waiter->barrier.wake(/*is_shared=*/false);
+          else if (res.value() >
+                   0) // requeue succeeded, the lock needs to be waked up
+            mutex->get_raw_futex().store(RawMutex::IN_CONTENTION);
+        } else { // cannot requeue under special lock mode
           next_waiter->barrier.wake(/*is_shared=*/false);
+        }
+      }
     }
     if (mutex_result != MutexError::NONE)
       return CndVarResult::MutexError;

>From 694723887f790965ddf1be6835c6acf52b703233 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Fri, 17 Apr 2026 21:10:45 -0400
Subject: [PATCH 04/22] fix

---
 libc/src/__support/threads/CndVar.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/libc/src/__support/threads/CndVar.h b/libc/src/__support/threads/CndVar.h
index 77958d8467da6..fc16119da0ae8 100644
--- a/libc/src/__support/threads/CndVar.h
+++ b/libc/src/__support/threads/CndVar.h
@@ -225,6 +225,9 @@ class CndVar {
     size_t limit = broadcast ? cpp::numeric_limits<size_t>::max() : 1;
     {
       cpp::lock_guard lock(queue_lock);
+      waiter_queue.ensure_queue_initialization();
+      if (waiter_queue.next == &waiter_queue)
+        return;
       for (cursor = static_cast<CndWaiter *>(waiter_queue.next);
            cursor != &waiter_queue;
            cursor = static_cast<CndWaiter *>(cursor->next)) {
@@ -254,7 +257,9 @@ class CndVar {
   }
 
 public:
-  LIBC_INLINE void notify_one(bool is_shared = false) { notify(1, is_shared); }
+  LIBC_INLINE void notify_one(bool is_shared = false) {
+    notify(/*broadcast=*/false, is_shared);
+  }
 
   LIBC_INLINE void broadcast(bool is_shared = false) {
     notify(cpp::numeric_limits<size_t>::max(), is_shared);

>From 402f973092b80092a521302fb9e95a9e5db11187 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Fri, 17 Apr 2026 21:11:06 -0400
Subject: [PATCH 05/22] fix

---
 .codex | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 .codex

diff --git a/.codex b/.codex
deleted file mode 100644
index e69de29bb2d1d..0000000000000

>From d6d144c95ec327f881ab09a7a456fd3a3a26f148 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Fri, 17 Apr 2026 21:15:53 -0400
Subject: [PATCH 06/22] fix

---
 libc/src/__support/threads/CndVar.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libc/src/__support/threads/CndVar.h b/libc/src/__support/threads/CndVar.h
index fc16119da0ae8..fc5f2473b01ce 100644
--- a/libc/src/__support/threads/CndVar.h
+++ b/libc/src/__support/threads/CndVar.h
@@ -262,7 +262,7 @@ class CndVar {
   }
 
   LIBC_INLINE void broadcast(bool is_shared = false) {
-    notify(cpp::numeric_limits<size_t>::max(), is_shared);
+    notify(/*broadcast=*/true, is_shared);
   }
 };
 

>From d9e0f022f22016639926387068f6810607846ac1 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Fri, 17 Apr 2026 23:02:05 -0400
Subject: [PATCH 07/22] adjust loop

---
 libc/src/__support/threads/CndVar.h | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/libc/src/__support/threads/CndVar.h b/libc/src/__support/threads/CndVar.h
index fc5f2473b01ce..96923381a68d1 100644
--- a/libc/src/__support/threads/CndVar.h
+++ b/libc/src/__support/threads/CndVar.h
@@ -23,6 +23,7 @@
 namespace LIBC_NAMESPACE_DECL {
 
 class CndVar {
+  LIBC_INLINE_VAR static constexpr size_t SPIN_LIMIT = 100;
   enum WaiterState : uint8_t {
     // Initial state after entering the wait queue.
     Waiting = 0,
@@ -211,14 +212,16 @@ class CndVar {
 
     Futex sender_futex{0};
     auto wait_unregisteration_finish = [&]() {
-      constexpr size_t LIMIT = 100;
-      for (size_t i = 0; i < LIMIT; ++i) {
-        if (sender_futex.load(cpp::MemoryOrder::RELAXED) == 0)
-          break;
+      size_t spin = 0;
+      while (auto remaining = sender_futex.load(cpp::MemoryOrder::RELAXED)) {
+        if (spin > SPIN_LIMIT) {
+          sender_futex.wait(remaining, cpp::nullopt, /*is_pshared=*/false);
+          spin = 0;
+          continue;
+        }
         sleep_briefly();
+        spin++;
       }
-      while (auto remaining = sender_futex.load(cpp::MemoryOrder::RELAXED))
-        sender_futex.wait(remaining, cpp::nullopt, /*is_pshared=*/false);
     };
     CndWaiter *head = nullptr;
     CndWaiter *cursor = nullptr;

>From a982c95bb7f2287d36212c65cc0179e435c6b62a Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Fri, 17 Apr 2026 23:10:38 -0400
Subject: [PATCH 08/22] adjust loop again

---
 libc/src/__support/threads/CndVar.h | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/libc/src/__support/threads/CndVar.h b/libc/src/__support/threads/CndVar.h
index 96923381a68d1..6e238424101e5 100644
--- a/libc/src/__support/threads/CndVar.h
+++ b/libc/src/__support/threads/CndVar.h
@@ -24,6 +24,7 @@ namespace LIBC_NAMESPACE_DECL {
 
 class CndVar {
   LIBC_INLINE_VAR static constexpr size_t SPIN_LIMIT = 100;
+  LIBC_INLINE_VAR static constexpr size_t CANCEL_STEP = 2;
   enum WaiterState : uint8_t {
     // Initial state after entering the wait queue.
     Waiting = 0,
@@ -78,8 +79,11 @@ class CndVar {
 
     LIBC_INLINE void confirm_cancellation() {
       Futex *sender = sender_futex.load();
-      if (sender && sender->fetch_sub(1) == 1)
-        sender->notify_one();
+      if (sender) {
+        FutexWordType res = sender->fetch_sub(CANCEL_STEP);
+        if (res <= CANCEL_STEP + 1 && (res & 1) != 0)
+          sender->notify_one();
+      }
     }
   };
 
@@ -215,7 +219,9 @@ class CndVar {
       size_t spin = 0;
       while (auto remaining = sender_futex.load(cpp::MemoryOrder::RELAXED)) {
         if (spin > SPIN_LIMIT) {
+          sender_futex.fetch_add(1);
           sender_futex.wait(remaining, cpp::nullopt, /*is_pshared=*/false);
+          sender_futex.fetch_sub(1);
           spin = 0;
           continue;
         }
@@ -238,7 +244,7 @@ class CndVar {
           break;
         uint8_t expected = Waiting;
         if (!cursor->state.compare_exchange_strong(expected, Signalled)) {
-          sender_futex.fetch_add(1);
+          sender_futex.fetch_add(CANCEL_STEP);
           cursor->sender_futex.store(&sender_futex);
           continue;
         }

>From 8faff0489475fdcdda1c04efb8eaa699bb5989b7 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Fri, 17 Apr 2026 23:12:50 -0400
Subject: [PATCH 09/22] adjust loop again

---
 libc/src/__support/threads/CndVar.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libc/src/__support/threads/CndVar.h b/libc/src/__support/threads/CndVar.h
index 6e238424101e5..9f00b380ccc2c 100644
--- a/libc/src/__support/threads/CndVar.h
+++ b/libc/src/__support/threads/CndVar.h
@@ -219,7 +219,7 @@ class CndVar {
       size_t spin = 0;
       while (auto remaining = sender_futex.load(cpp::MemoryOrder::RELAXED)) {
         if (spin > SPIN_LIMIT) {
-          sender_futex.fetch_add(1);
+          remaining = sender_futex.fetch_add(1) + 1;
           sender_futex.wait(remaining, cpp::nullopt, /*is_pshared=*/false);
           sender_futex.fetch_sub(1);
           spin = 0;

>From 079f1a80f37aaf873487a8b61f61c6ab438f290f Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Sat, 18 Apr 2026 10:11:31 -0400
Subject: [PATCH 10/22] change layout

---
 libc/src/__support/threads/CndVar.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libc/src/__support/threads/CndVar.h b/libc/src/__support/threads/CndVar.h
index 9f00b380ccc2c..1d606268b75ae 100644
--- a/libc/src/__support/threads/CndVar.h
+++ b/libc/src/__support/threads/CndVar.h
@@ -89,12 +89,12 @@ class CndVar {
 
   union {
     struct {
-      RawMutex queue_lock;
       WaiterHeader waiter_queue;
+      RawMutex queue_lock;
     };
     struct {
-      Futex shared_futex;
       cpp::Atomic<size_t> shared_waiters;
+      Futex shared_futex;
     };
   };
 
@@ -107,7 +107,7 @@ class CndVar {
 
   using Timeout = internal::AbsTimeout;
 
-  LIBC_INLINE constexpr CndVar() : queue_lock{}, waiter_queue{} {}
+  LIBC_INLINE constexpr CndVar() : waiter_queue{}, queue_lock{} {}
 
   LIBC_INLINE void reset() {
     queue_lock.reset();

>From 7d995b195ac0c4cc38fe31bfe697717b38ad94fc Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Sat, 18 Apr 2026 10:37:06 -0400
Subject: [PATCH 11/22] separate private CndVar and shared version

---
 libc/include/llvm-libc-types/__barrier_type.h |   4 +-
 libc/src/__support/threads/CndVar.h           | 174 +++++++++++++-----
 libc/src/__support/threads/linux/barrier.cpp  |   7 +-
 libc/src/__support/threads/linux/barrier.h    |   4 +-
 libc/src/__support/threads/raw_mutex.h        |   2 +-
 libc/src/__support/threads/unix_mutex.h       |   2 +-
 libc/src/threads/linux/cnd_broadcast.cpp      |   4 +-
 libc/src/threads/linux/cnd_destroy.cpp        |   4 +-
 libc/src/threads/linux/cnd_init.cpp           |   4 +-
 libc/src/threads/linux/cnd_signal.cpp         |   4 +-
 libc/src/threads/linux/cnd_wait.cpp           |   8 +-
 11 files changed, 145 insertions(+), 72 deletions(-)

diff --git a/libc/include/llvm-libc-types/__barrier_type.h b/libc/include/llvm-libc-types/__barrier_type.h
index 5752f832f04b9..627f87767f20b 100644
--- a/libc/include/llvm-libc-types/__barrier_type.h
+++ b/libc/include/llvm-libc-types/__barrier_type.h
@@ -15,8 +15,8 @@ typedef struct __attribute__((aligned(8 /* alignof (Barrier) */))) {
   unsigned expected;
   unsigned waiting;
   bool blocking;
-  char entering[24 /* sizeof (CndVar) */];
-  char exiting[24 /* sizeof (CndVar) */];
+  char entering[32 /* sizeof (CndVar) */];
+  char exiting[32 /* sizeof (CndVar) */];
   char mutex[24 /* sizeof (Mutex) */];
 } __barrier_type;
 
diff --git a/libc/src/__support/threads/CndVar.h b/libc/src/__support/threads/CndVar.h
index 1d606268b75ae..8c3c673816b9a 100644
--- a/libc/src/__support/threads/CndVar.h
+++ b/libc/src/__support/threads/CndVar.h
@@ -11,6 +11,7 @@
 
 #include "hdr/stdint_proxy.h" // uint32_t
 #include "src/__support/CPP/mutex.h"
+#include "src/__support/CPP/new.h"
 #include "src/__support/macros/config.h"
 #include "src/__support/threads/futex_utils.h" // Futex
 #include "src/__support/threads/mutex.h"       // Mutex
@@ -22,9 +23,16 @@
 
 namespace LIBC_NAMESPACE_DECL {
 
-class CndVar {
+enum class CndVarResult {
+  Success,
+  MutexError,
+  Timeout,
+};
+
+class PrivateCndVar {
   LIBC_INLINE_VAR static constexpr size_t SPIN_LIMIT = 100;
   LIBC_INLINE_VAR static constexpr size_t CANCEL_STEP = 2;
+
   enum WaiterState : uint8_t {
     // Initial state after entering the wait queue.
     Waiting = 0,
@@ -87,27 +95,22 @@ class CndVar {
     }
   };
 
-  union {
-    struct {
-      WaiterHeader waiter_queue;
-      RawMutex queue_lock;
-    };
-    struct {
-      cpp::Atomic<size_t> shared_waiters;
-      Futex shared_futex;
-    };
-  };
+  /*
+  Layout:
 
-public:
-  enum class CndVarResult {
-    Success,
-    MutexError,
-    Timeout,
-  };
+  struct {
+    void * __wait_queue_prev;
+    void * __wait_queue_next;
+    __futex_word __futex;
+  }
+  */
+  WaiterHeader waiter_queue;
+  RawMutex queue_lock;
 
+public:
   using Timeout = internal::AbsTimeout;
 
-  LIBC_INLINE constexpr CndVar() : waiter_queue{}, queue_lock{} {}
+  LIBC_INLINE constexpr PrivateCndVar() : waiter_queue{}, queue_lock{} {}
 
   LIBC_INLINE void reset() {
     queue_lock.reset();
@@ -117,27 +120,12 @@ class CndVar {
 
   // TODO: register callback for pthread cancellation
   LIBC_INLINE CndVarResult wait(Mutex *mutex,
-                                cpp::optional<Timeout> timeout = cpp::nullopt,
-                                bool is_shared = false) {
+                                cpp::optional<Timeout> timeout = cpp::nullopt) {
 #if LIBC_COPT_TIMEOUT_ENSURE_MONOTONICITY
     if (timeout)
       ensure_monotonicity(*timeout);
 #endif
 
-    if (is_shared) {
-      shared_waiters.fetch_add(1);
-      FutexWordType old_val = shared_futex.load();
-      mutex->unlock();
-      ErrorOr<int> result =
-          shared_futex.wait(old_val, timeout, /*is_pshared=*/true);
-      shared_waiters.fetch_sub(1);
-      MutexError mutex_result = mutex->lock();
-      if (!result.has_value() && result.error() == ETIMEDOUT)
-        return CndVarResult::Timeout;
-      return mutex_result == MutexError::NONE ? CndVarResult::Success
-                                              : CndVarResult::MutexError;
-    }
-
     CndWaiter waiter{};
     // Register the waiter to the queue.
     {
@@ -202,18 +190,7 @@ class CndVar {
   }
 
 private:
-  LIBC_INLINE void notify(bool broadcast, bool is_shared = false) {
-    if (is_shared) {
-      if (shared_waiters.load() == 0)
-        return;
-      shared_futex.fetch_add(1);
-      if (broadcast)
-        shared_futex.notify_all();
-      else
-        shared_futex.notify_one();
-      return;
-    }
-
+  LIBC_INLINE void notify(size_t limit) {
     Futex sender_futex{0};
     auto wait_unregisteration_finish = [&]() {
       size_t spin = 0;
@@ -231,7 +208,6 @@ class CndVar {
     };
     CndWaiter *head = nullptr;
     CndWaiter *cursor = nullptr;
-    size_t limit = broadcast ? cpp::numeric_limits<size_t>::max() : 1;
     {
       cpp::lock_guard lock(queue_lock);
       waiter_queue.ensure_queue_initialization();
@@ -266,12 +242,108 @@ class CndVar {
   }
 
 public:
-  LIBC_INLINE void notify_one(bool is_shared = false) {
-    notify(/*broadcast=*/false, is_shared);
+  LIBC_INLINE void notify_one() { notify(1); }
+  LIBC_INLINE void broadcast() { notify(cpp::numeric_limits<size_t>::max()); }
+};
+
+class SharedCndVar {
+  /*
+  Layout:
+    struct {
+      cpp::Atomic<size_t> shared_waiters;
+      Futex shared_futex;
+    };
+  */
+  cpp::Atomic<size_t> shared_waiters;
+  Futex shared_futex;
+
+public:
+  using Timeout = internal::AbsTimeout;
+
+  LIBC_INLINE constexpr SharedCndVar() : shared_waiters(0), shared_futex{0} {}
+
+  LIBC_INLINE void reset() {
+    shared_waiters.store(0);
+    shared_futex.store(0);
+  }
+
+  // TODO: register callback for pthread cancellation
+  LIBC_INLINE CndVarResult wait(Mutex *mutex,
+                                cpp::optional<Timeout> timeout = cpp::nullopt) {
+#if LIBC_COPT_TIMEOUT_ENSURE_MONOTONICITY
+    if (timeout)
+      ensure_monotonicity(*timeout);
+#endif
+
+    shared_waiters.fetch_add(1);
+    FutexWordType old_val = shared_futex.load();
+    mutex->unlock();
+    ErrorOr<int> result =
+        shared_futex.wait(old_val, timeout, /*is_pshared=*/true);
+    shared_waiters.fetch_sub(1);
+    MutexError mutex_result = mutex->lock();
+    if (!result.has_value() && result.error() == ETIMEDOUT)
+      return CndVarResult::Timeout;
+    return mutex_result == MutexError::NONE ? CndVarResult::Success
+                                            : CndVarResult::MutexError;
   }
 
-  LIBC_INLINE void broadcast(bool is_shared = false) {
-    notify(/*broadcast=*/true, is_shared);
+private:
+  LIBC_INLINE void notify(bool broadcast) {
+    if (shared_waiters.load() == 0)
+      return;
+    shared_futex.fetch_add(1);
+    if (broadcast)
+      shared_futex.notify_all();
+    else
+      shared_futex.notify_one();
+    return;
+  }
+
+public:
+  LIBC_INLINE void notify_one() { notify(/*broadcast=*/false); }
+  LIBC_INLINE void broadcast() { notify(/*broadcast=*/true); }
+};
+
+class CndVar {
+  union {
+    PrivateCndVar private_cnd_var{};
+    SharedCndVar shared_cnd_var;
+  } storage;
+  bool is_shared;
+
+public:
+  using Timeout = internal::AbsTimeout;
+  LIBC_INLINE constexpr CndVar(bool is_shared) : is_shared(is_shared) {
+    if (is_shared)
+      new (&storage.shared_cnd_var) SharedCndVar();
+    else
+      new (&storage.private_cnd_var) PrivateCndVar();
+  }
+  LIBC_INLINE void reset() {
+    if (is_shared)
+      storage.shared_cnd_var.reset();
+    else
+      storage.private_cnd_var.reset();
+  }
+  LIBC_INLINE CndVarResult wait(Mutex *mutex,
+                                cpp::optional<Timeout> timeout = cpp::nullopt) {
+    if (is_shared)
+      return storage.shared_cnd_var.wait(mutex, timeout);
+    else
+      return storage.private_cnd_var.wait(mutex, timeout);
+  }
+  LIBC_INLINE void notify_one() {
+    if (is_shared)
+      storage.shared_cnd_var.notify_one();
+    else
+      storage.private_cnd_var.notify_one();
+  }
+  LIBC_INLINE void broadcast() {
+    if (is_shared)
+      storage.shared_cnd_var.broadcast();
+    else
+      storage.private_cnd_var.broadcast();
   }
 };
 
diff --git a/libc/src/__support/threads/linux/barrier.cpp b/libc/src/__support/threads/linux/barrier.cpp
index 175652a70fac2..80f25f8ac7a21 100644
--- a/libc/src/__support/threads/linux/barrier.cpp
+++ b/libc/src/__support/threads/linux/barrier.cpp
@@ -25,10 +25,11 @@ int Barrier::init(Barrier *b,
   b->waiting = 0;
   b->blocking = true;
 
-  new (&b->entering) CndVar();
-  new (&b->exiting) CndVar();
+  new (&b->entering) CndVar(attr ? attr->pshared : false);
+  new (&b->exiting) CndVar(attr ? attr->pshared : false);
 
-  auto mutex_err = Mutex::init(&b->m, false, false, false, false);
+  auto mutex_err = Mutex::init(&b->m, false, false, false,
+                               /*pshared=*/attr ? attr->pshared : false);
   if (mutex_err != MutexError::NONE)
     return EAGAIN;
 
diff --git a/libc/src/__support/threads/linux/barrier.h b/libc/src/__support/threads/linux/barrier.h
index a632aa45b2aa2..eb34e2e268e10 100644
--- a/libc/src/__support/threads/linux/barrier.h
+++ b/libc/src/__support/threads/linux/barrier.h
@@ -44,10 +44,10 @@ static_assert(alignof(Barrier) <= alignof(pthread_barrier_t),
               "The public pthread_barrier_t type has insufficient alignment "
               "for the internal barrier type.");
 
-static_assert(sizeof(CndVar) <= 24,
+static_assert(sizeof(CndVar) <= 32,
               "CndVar size exceeds the size in __barrier_type.h");
 
-static_assert(sizeof(Mutex) <= 24,
+static_assert(sizeof(Mutex) <= 32,
               "Mutex size exceeds the size in __barrier_type.h");
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/__support/threads/raw_mutex.h b/libc/src/__support/threads/raw_mutex.h
index 12dec56084e0e..7495374513a43 100644
--- a/libc/src/__support/threads/raw_mutex.h
+++ b/libc/src/__support/threads/raw_mutex.h
@@ -45,7 +45,7 @@ class RawMutex {
   LIBC_INLINE_VAR static constexpr FutexWordType UNLOCKED = 0b00;
   LIBC_INLINE_VAR static constexpr FutexWordType LOCKED = 0b01;
   LIBC_INLINE_VAR static constexpr FutexWordType IN_CONTENTION = 0b10;
-  friend class CndVar;
+  friend class PrivateCndVar;
 
 private:
   LIBC_INLINE FutexWordType spin(unsigned spin_count) {
diff --git a/libc/src/__support/threads/unix_mutex.h b/libc/src/__support/threads/unix_mutex.h
index ebe5de2c6e7a7..501a330349ca5 100644
--- a/libc/src/__support/threads/unix_mutex.h
+++ b/libc/src/__support/threads/unix_mutex.h
@@ -30,7 +30,7 @@ class Mutex final : private RawMutex {
   pid_t owner;
   unsigned long long lock_count;
 
-  friend class CndVar;
+  friend class PrivateCndVar;
 
   LIBC_INLINE bool can_be_requeued() const {
     return !this->pshared && !this->robust;
diff --git a/libc/src/threads/linux/cnd_broadcast.cpp b/libc/src/threads/linux/cnd_broadcast.cpp
index 5c5187461bbed..280b666041db2 100644
--- a/libc/src/threads/linux/cnd_broadcast.cpp
+++ b/libc/src/threads/linux/cnd_broadcast.cpp
@@ -16,10 +16,10 @@
 
 namespace LIBC_NAMESPACE_DECL {
 
-static_assert(sizeof(CndVar) == sizeof(cnd_t));
+static_assert(sizeof(PrivateCndVar) == sizeof(cnd_t));
 
 LLVM_LIBC_FUNCTION(int, cnd_broadcast, (cnd_t * cond)) {
-  CndVar *cndvar = reinterpret_cast<CndVar *>(cond);
+  PrivateCndVar *cndvar = reinterpret_cast<PrivateCndVar *>(cond);
   cndvar->broadcast();
   return thrd_success;
 }
diff --git a/libc/src/threads/linux/cnd_destroy.cpp b/libc/src/threads/linux/cnd_destroy.cpp
index e51728615da9c..298250d69a235 100644
--- a/libc/src/threads/linux/cnd_destroy.cpp
+++ b/libc/src/threads/linux/cnd_destroy.cpp
@@ -15,10 +15,10 @@
 
 namespace LIBC_NAMESPACE_DECL {
 
-static_assert(sizeof(CndVar) == sizeof(cnd_t));
+static_assert(sizeof(PrivateCndVar) == sizeof(cnd_t));
 
 LLVM_LIBC_FUNCTION(void, cnd_destroy, (cnd_t * cond)) {
-  CndVar *cndvar = reinterpret_cast<CndVar *>(cond);
+  PrivateCndVar *cndvar = reinterpret_cast<PrivateCndVar *>(cond);
   cndvar->reset();
 }
 
diff --git a/libc/src/threads/linux/cnd_init.cpp b/libc/src/threads/linux/cnd_init.cpp
index a27c0a92a67db..33fa85a00cc26 100644
--- a/libc/src/threads/linux/cnd_init.cpp
+++ b/libc/src/threads/linux/cnd_init.cpp
@@ -16,10 +16,10 @@
 
 namespace LIBC_NAMESPACE_DECL {
 
-static_assert(sizeof(CndVar) == sizeof(cnd_t));
+static_assert(sizeof(PrivateCndVar) == sizeof(cnd_t));
 
 LLVM_LIBC_FUNCTION(int, cnd_init, (cnd_t * cond)) {
-  new (cond) CndVar();
+  new (cond) PrivateCndVar();
   return thrd_success;
 }
 
diff --git a/libc/src/threads/linux/cnd_signal.cpp b/libc/src/threads/linux/cnd_signal.cpp
index 0d218405d3ac5..53cd0bb445f15 100644
--- a/libc/src/threads/linux/cnd_signal.cpp
+++ b/libc/src/threads/linux/cnd_signal.cpp
@@ -15,10 +15,10 @@
 
 namespace LIBC_NAMESPACE_DECL {
 
-static_assert(sizeof(CndVar) == sizeof(cnd_t));
+static_assert(sizeof(PrivateCndVar) == sizeof(cnd_t));
 
 LLVM_LIBC_FUNCTION(int, cnd_signal, (cnd_t * cond)) {
-  CndVar *cndvar = reinterpret_cast<CndVar *>(cond);
+  PrivateCndVar *cndvar = reinterpret_cast<PrivateCndVar *>(cond);
   cndvar->notify_one();
   return thrd_success;
 }
diff --git a/libc/src/threads/linux/cnd_wait.cpp b/libc/src/threads/linux/cnd_wait.cpp
index da7d549af3ca2..2c2e9ad11030c 100644
--- a/libc/src/threads/linux/cnd_wait.cpp
+++ b/libc/src/threads/linux/cnd_wait.cpp
@@ -16,13 +16,13 @@
 
 namespace LIBC_NAMESPACE_DECL {
 
-static_assert(sizeof(CndVar) == sizeof(cnd_t));
+static_assert(sizeof(PrivateCndVar) == sizeof(cnd_t));
 
 LLVM_LIBC_FUNCTION(int, cnd_wait, (cnd_t * cond, mtx_t *mtx)) {
-  CndVar *cndvar = reinterpret_cast<CndVar *>(cond);
+  PrivateCndVar *cndvar = reinterpret_cast<PrivateCndVar *>(cond);
   Mutex *mutex = reinterpret_cast<Mutex *>(mtx);
-  return cndvar->wait(mutex) == CndVar::CndVarResult::Success ? thrd_success
-                                                              : thrd_error;
+  return cndvar->wait(mutex) == CndVarResult::Success ? thrd_success
+                                                      : thrd_error;
 }
 
 } // namespace LIBC_NAMESPACE_DECL

>From c06b841627c6a9456e40402232f383502f3c4f06 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Sat, 18 Apr 2026 10:49:49 -0400
Subject: [PATCH 12/22] separate cancellation barrier into its own class

---
 libc/src/__support/threads/CndVar.h | 72 ++++++++++++++++++-----------
 1 file changed, 45 insertions(+), 27 deletions(-)

diff --git a/libc/src/__support/threads/CndVar.h b/libc/src/__support/threads/CndVar.h
index 8c3c673816b9a..675095ef7fe7a 100644
--- a/libc/src/__support/threads/CndVar.h
+++ b/libc/src/__support/threads/CndVar.h
@@ -30,8 +30,44 @@ enum class CndVarResult {
 };
 
 class PrivateCndVar {
-  LIBC_INLINE_VAR static constexpr size_t SPIN_LIMIT = 100;
-  LIBC_INLINE_VAR static constexpr size_t CANCEL_STEP = 2;
+  class CancellationBarrier {
+    LIBC_INLINE_VAR static constexpr size_t SPIN_LIMIT = 100;
+    LIBC_INLINE_VAR static constexpr size_t CANCEL_STEP = 2;
+    LIBC_INLINE_VAR static constexpr size_t SLEEPING_BIT = 1;
+
+    // LSB indicates whether the waiter is in sleeping state.
+    Futex futex;
+
+  public:
+    LIBC_INLINE CancellationBarrier() : futex(0) {}
+    // Add one more notification request.
+    LIBC_INLINE void add_one() {
+      futex.fetch_add(CANCEL_STEP, cpp::MemoryOrder::RELAXED);
+    }
+    // Send notification to one waiter.
+    LIBC_INLINE void notify() {
+      FutexWordType res = futex.fetch_sub(CANCEL_STEP);
+      // Only need to goto syscall if waiter is sleep and we are the last one
+      if (res <= (CANCEL_STEP | SLEEPING_BIT) && (res & SLEEPING_BIT) != 0)
+        futex.notify_one();
+    }
+    LIBC_INLINE void wait() {
+      size_t spin = 0;
+      while (auto remaining = futex.load(cpp::MemoryOrder::RELAXED)) {
+        if (spin > SPIN_LIMIT) {
+          // Set LSB to 1 to indicate that the waiter is entering sleeping
+          // state.
+          remaining = futex.fetch_add(1) + 1;
+          futex.wait(remaining, /*timeout=*/cpp::nullopt, /*is_pshared=*/false);
+          futex.fetch_sub(1);
+          spin = 0;
+          continue;
+        }
+        sleep_briefly();
+        spin++;
+      }
+    }
+  };
 
   enum WaiterState : uint8_t {
     // Initial state after entering the wait queue.
@@ -75,7 +111,7 @@ class PrivateCndVar {
     }
   };
   struct CndWaiter : WaiterHeader {
-    cpp::Atomic<Futex *> sender_futex;
+    cpp::Atomic<CancellationBarrier *> sender_futex;
     RawMutex barrier;
     cpp::Atomic<uint8_t> state;
 
@@ -86,12 +122,8 @@ class PrivateCndVar {
     }
 
     LIBC_INLINE void confirm_cancellation() {
-      Futex *sender = sender_futex.load();
-      if (sender) {
-        FutexWordType res = sender->fetch_sub(CANCEL_STEP);
-        if (res <= CANCEL_STEP + 1 && (res & 1) != 0)
-          sender->notify_one();
-      }
+      if (CancellationBarrier *sender = sender_futex.load())
+        sender->notify();
     }
   };
 
@@ -191,21 +223,7 @@ class PrivateCndVar {
 
 private:
   LIBC_INLINE void notify(size_t limit) {
-    Futex sender_futex{0};
-    auto wait_unregisteration_finish = [&]() {
-      size_t spin = 0;
-      while (auto remaining = sender_futex.load(cpp::MemoryOrder::RELAXED)) {
-        if (spin > SPIN_LIMIT) {
-          remaining = sender_futex.fetch_add(1) + 1;
-          sender_futex.wait(remaining, cpp::nullopt, /*is_pshared=*/false);
-          sender_futex.fetch_sub(1);
-          spin = 0;
-          continue;
-        }
-        sleep_briefly();
-        spin++;
-      }
-    };
+    CancellationBarrier cancellation_barrier{};
     CndWaiter *head = nullptr;
     CndWaiter *cursor = nullptr;
     {
@@ -220,8 +238,8 @@ class PrivateCndVar {
           break;
         uint8_t expected = Waiting;
         if (!cursor->state.compare_exchange_strong(expected, Signalled)) {
-          sender_futex.fetch_add(CANCEL_STEP);
-          cursor->sender_futex.store(&sender_futex);
+          cancellation_barrier.add_one();
+          cursor->sender_futex.store(&cancellation_barrier);
           continue;
         }
         if (!head)
@@ -236,7 +254,7 @@ class PrivateCndVar {
       removed_tail->next = removed_head;
       removed_head->prev = removed_tail;
     }
-    wait_unregisteration_finish();
+    cancellation_barrier.wait();
     if (head)
       head->barrier.unlock();
   }

>From 4a66090b593502c27f4e4be8fa43f7f745f1b912 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Sat, 18 Apr 2026 10:59:40 -0400
Subject: [PATCH 13/22] fix cancellation wake up condition

---
 libc/src/__support/threads/CndVar.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/libc/src/__support/threads/CndVar.h b/libc/src/__support/threads/CndVar.h
index 675095ef7fe7a..43c2d9d0dc56f 100644
--- a/libc/src/__support/threads/CndVar.h
+++ b/libc/src/__support/threads/CndVar.h
@@ -191,7 +191,9 @@ class PrivateCndVar {
     }
 
     MutexError mutex_result = mutex->lock();
-    if (waiter.next != &waiter) {
+    // If we did consume the signal (old_state != Waiting) and there
+    // are other in the queue after us, we need to wake the next waiter.
+    if (old_state != Waiting && waiter.next != &waiter) {
       auto *next_waiter = static_cast<CndWaiter *>(waiter.next);
       WaiterHeader::remove(&waiter);
       auto &next_barrier_futex = next_waiter->barrier.get_raw_futex();

>From f48f4e6428bda33e1ae0d1bea3af77bc0b99b8af Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Sat, 18 Apr 2026 13:09:00 -0400
Subject: [PATCH 14/22] add stress test and fix bugs

---
 libc/src/__support/threads/CndVar.h           |  15 +-
 .../src/__support/threads/CMakeLists.txt      |  16 +++
 .../src/__support/threads/cndvar_test.cpp     | 129 ++++++++++++++++++
 3 files changed, 154 insertions(+), 6 deletions(-)
 create mode 100644 libc/test/integration/src/__support/threads/cndvar_test.cpp

diff --git a/libc/src/__support/threads/CndVar.h b/libc/src/__support/threads/CndVar.h
index 43c2d9d0dc56f..b596f77f29aa8 100644
--- a/libc/src/__support/threads/CndVar.h
+++ b/libc/src/__support/threads/CndVar.h
@@ -191,9 +191,13 @@ class PrivateCndVar {
     }
 
     MutexError mutex_result = mutex->lock();
-    // If we did consume the signal (old_state != Waiting) and there
-    // are other in the queue after us, we need to wake the next waiter.
-    if (old_state != Waiting && waiter.next != &waiter) {
+    // We need to establish contention after lock, otherwise
+    // requeued thread may clear the contention bit even though
+    // there are still waiters behind it.
+    mutex->get_raw_futex().store(RawMutex::IN_CONTENTION);
+    // If there is other in the queue after us, we need to wake the next waiter.
+    // If we cancelled, we should naturally have waiter.next == &waiter
+    if (waiter.next != &waiter) {
       auto *next_waiter = static_cast<CndWaiter *>(waiter.next);
       WaiterHeader::remove(&waiter);
       auto &next_barrier_futex = next_waiter->barrier.get_raw_futex();
@@ -202,6 +206,8 @@ class PrivateCndVar {
       // but with requeue instead of wake if it is possible.
       FutexWordType prev = next_barrier_futex.exchange(
           RawMutex::UNLOCKED, cpp::MemoryOrder::RELEASE);
+      // If next waiter in queue sleeps, it will establish contention its own
+      // barrier
       if (prev == RawMutex::IN_CONTENTION) {
         if (mutex->can_be_requeued()) {
           ErrorOr<int> res = next_barrier_futex.requeue_to(
@@ -210,9 +216,6 @@ class PrivateCndVar {
               /*is_shared=*/false);
           if (!res.has_value()) // cannot requeue on this system
             next_waiter->barrier.wake(/*is_shared=*/false);
-          else if (res.value() >
-                   0) // requeue succeeded, the lock needs to be waked up
-            mutex->get_raw_futex().store(RawMutex::IN_CONTENTION);
         } else { // cannot requeue under special lock mode
           next_waiter->barrier.wake(/*is_shared=*/false);
         }
diff --git a/libc/test/integration/src/__support/threads/CMakeLists.txt b/libc/test/integration/src/__support/threads/CMakeLists.txt
index 3ce9742cc2712..a89a29d053b5f 100644
--- a/libc/test/integration/src/__support/threads/CMakeLists.txt
+++ b/libc/test/integration/src/__support/threads/CMakeLists.txt
@@ -59,3 +59,19 @@ add_integration_test(
     libc.src.pthread.pthread_create
     libc.src.pthread.pthread_join
 )
+
+add_integration_test(
+  cndvar_test
+  SUITE
+    libc-support-threads-integration-tests
+  SRCS
+    cndvar_test.cpp
+  DEPENDS
+    libc.hdr.time_macros
+    libc.src.__support.threads.CndVar
+    libc.src.__support.threads.mutex
+    libc.src.__support.threads.sleep
+    libc.src.__support.time.clock_gettime
+    libc.src.threads.thrd_create
+    libc.src.threads.thrd_join
+)
diff --git a/libc/test/integration/src/__support/threads/cndvar_test.cpp b/libc/test/integration/src/__support/threads/cndvar_test.cpp
new file mode 100644
index 0000000000000..a1ee8ecc1dfe7
--- /dev/null
+++ b/libc/test/integration/src/__support/threads/cndvar_test.cpp
@@ -0,0 +1,129 @@
+//===-- Integration test for PrivateCndVar with C11 threads --------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "hdr/time_macros.h"
+#include "src/__support/CPP/expected.h"
+#include "src/__support/threads/CndVar.h"
+#include "src/__support/threads/mutex.h"
+#include "src/__support/threads/mutex_common.h"
+#include "src/__support/threads/sleep.h"
+#include "src/__support/time/clock_gettime.h"
+#include "src/threads/thrd_create.h"
+#include "src/threads/thrd_join.h"
+#include "test/IntegrationTest/test.h"
+
+namespace {
+
+constexpr int THREAD_COUNT = 16;
+constexpr int NUM_ITERATIONS = 250;
+
+struct QueueState {
+  LIBC_NAMESPACE::PrivateCndVar cnd{};
+  LIBC_NAMESPACE::Mutex m{false, false, false, false};
+  size_t consumed = 0;
+  size_t produced = 0;
+  LIBC_NAMESPACE::cpp::Atomic<size_t> exited_consumers = 0;
+  bool use_broadcast;
+  bool allow_timeout;
+};
+
+void stress_test(bool use_broadcast, bool allow_timeout) {
+  for (int iter = 0; iter < NUM_ITERATIONS; ++iter) {
+    QueueState state{};
+    state.use_broadcast = use_broadcast;
+    state.allow_timeout = allow_timeout;
+    constexpr size_t PRODUCER = THREAD_COUNT / 2;
+    constexpr size_t CONSUMER = THREAD_COUNT - PRODUCER;
+    constexpr size_t ITEMS_PER_PRODUCER = 200;
+    thrd_t producer_threads[PRODUCER];
+    thrd_t consumer_threads[CONSUMER];
+    using TimeoutOpt =
+        LIBC_NAMESPACE::cpp::optional<LIBC_NAMESPACE::PrivateCndVar::Timeout>;
+    for (size_t i = 0; i < CONSUMER; ++i)
+      ASSERT_EQ(
+          LIBC_NAMESPACE::thrd_create(
+              &consumer_threads[i],
+              [](void *arg) {
+                auto *state = static_cast<QueueState *>(arg);
+                state->m.lock();
+                while (state->consumed != PRODUCER * ITEMS_PER_PRODUCER) {
+                  TimeoutOpt timeout = LIBC_NAMESPACE::cpp::nullopt;
+                  if (state->allow_timeout) {
+                    timespec now{};
+                    LIBC_NAMESPACE::internal::clock_gettime(CLOCK_MONOTONIC,
+                                                            &now);
+                    size_t sleep_ns = 1000;
+                    now.tv_nsec += sleep_ns;
+                    if (now.tv_nsec >= 1'000'000'000) {
+                      now.tv_sec++;
+                      now.tv_nsec -= 1'000'000'000;
+                    }
+                    timeout = TimeoutOpt(
+                        LIBC_NAMESPACE::PrivateCndVar::Timeout::from_timespec(
+                            now,
+                            /*realtime=*/false)
+                            .value());
+                  }
+                  ASSERT_NE(state->cnd.wait(&state->m, timeout),
+                            LIBC_NAMESPACE::CndVarResult::MutexError);
+                  if (state->produced == 0)
+                    continue;
+                  state->produced--;
+                  state->consumed++;
+                }
+                state->m.unlock();
+                state->exited_consumers.fetch_add(1);
+                return 0;
+              },
+              &state),
+          int(thrd_success));
+    for (size_t i = 0; i < PRODUCER; ++i)
+      ASSERT_EQ(LIBC_NAMESPACE::thrd_create(
+                    &producer_threads[i],
+                    [](void *arg) {
+                      auto *state = static_cast<QueueState *>(arg);
+                      for (size_t j = 0; j < ITEMS_PER_PRODUCER; ++j) {
+                        state->m.lock();
+                        state->produced++;
+                        if (state->use_broadcast)
+                          state->cnd.broadcast();
+                        else
+                          state->cnd.notify_one();
+                        state->m.unlock();
+                      }
+                      return 0;
+                    },
+                    &state),
+                int(thrd_success));
+
+    // join producers
+    for (size_t i = 0; i < PRODUCER; ++i)
+      ASSERT_EQ(LIBC_NAMESPACE::thrd_join(producer_threads[i], nullptr),
+                int(thrd_success));
+    // keep signalling until all consumers have consumed all items
+    while (state.exited_consumers != CONSUMER) {
+      if (state.use_broadcast)
+        state.cnd.broadcast();
+      else
+        state.cnd.notify_one();
+    }
+    // join consumers
+    for (size_t i = 0; i < CONSUMER; ++i)
+      ASSERT_EQ(LIBC_NAMESPACE::thrd_join(consumer_threads[i], nullptr),
+                int(thrd_success));
+  }
+}
+} // namespace
+
+TEST_MAIN() {
+  stress_test(false, false);
+  stress_test(true, false);
+  stress_test(false, true);
+  stress_test(true, true);
+  return 0;
+}

>From 488f63428bbb99c63bdc8e9761fbbb4d6e35968c Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Sat, 18 Apr 2026 14:24:46 -0400
Subject: [PATCH 15/22] fix wrong barrier

---
 libc/src/__support/threads/CndVar.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/libc/src/__support/threads/CndVar.h b/libc/src/__support/threads/CndVar.h
index b596f77f29aa8..3136557165fa9 100644
--- a/libc/src/__support/threads/CndVar.h
+++ b/libc/src/__support/threads/CndVar.h
@@ -54,14 +54,14 @@ class PrivateCndVar {
     LIBC_INLINE void wait() {
       size_t spin = 0;
       while (auto remaining = futex.load(cpp::MemoryOrder::RELAXED)) {
-        if (spin > SPIN_LIMIT) {
-          // Set LSB to 1 to indicate that the waiter is entering sleeping
-          // state.
-          remaining = futex.fetch_add(1) + 1;
-          futex.wait(remaining, /*timeout=*/cpp::nullopt, /*is_pshared=*/false);
+        // Set LSB to 1 to indicate that the waiter is entering sleeping
+        // state.
+        FutexWordType new_val = remaining | SLEEPING_BIT;
+        if (spin > SPIN_LIMIT &&
+            futex.compare_exchange_strong(remaining, new_val)) {
+          futex.wait(new_val, /*timeout=*/cpp::nullopt, /*is_pshared=*/false);
           futex.fetch_sub(1);
           spin = 0;
-          continue;
         }
         sleep_briefly();
         spin++;

>From 92832889870b65dba8a64f1c5f58ad78fddc68ce Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Sat, 18 Apr 2026 18:07:36 -0400
Subject: [PATCH 16/22] comments

---
 libc/src/__support/threads/CndVar.h | 37 +++++++++++++++++++++++++----
 1 file changed, 33 insertions(+), 4 deletions(-)

diff --git a/libc/src/__support/threads/CndVar.h b/libc/src/__support/threads/CndVar.h
index 3136557165fa9..273d3e8207470 100644
--- a/libc/src/__support/threads/CndVar.h
+++ b/libc/src/__support/threads/CndVar.h
@@ -24,12 +24,19 @@
 namespace LIBC_NAMESPACE_DECL {
 
 enum class CndVarResult {
+  // The waiter successfully received a signal.
   Success,
+  // Error occurs during mutex acquisition.
   MutexError,
+  // Timeout occurs.
   Timeout,
 };
 
 class PrivateCndVar {
+  // A single-waiter multiple-notifier barrier used to keep
+  // track of cancellation threads. We use this barrier to
+  // ensure in-queue threads that have posted their cancellation
+  // request have finished dequeue themselves.
   class CancellationBarrier {
     LIBC_INLINE_VAR static constexpr size_t SPIN_LIMIT = 100;
     LIBC_INLINE_VAR static constexpr size_t CANCEL_STEP = 2;
@@ -82,11 +89,13 @@ class PrivateCndVar {
     WaiterHeader *prev;
     WaiterHeader *next;
 
+    // We use cyclic dummy node to avoid handing corner cases.
     LIBC_INLINE void ensure_queue_initialization() {
       if (LIBC_UNLIKELY(prev == nullptr))
         prev = next = this;
     }
 
+    // Assume `this` the dummy node of queue. Push back `waiter` to the queue.
     LIBC_INLINE void push_back(WaiterHeader *waiter) {
       ensure_queue_initialization();
       waiter->next = this;
@@ -95,12 +104,15 @@ class PrivateCndVar {
       waiter->prev->next = waiter;
     }
 
+    // Remove `waiter` from the queue.
     LIBC_INLINE static void remove(WaiterHeader *waiter) {
       waiter->next->prev = waiter->prev;
       waiter->prev->next = waiter->next;
       waiter->prev = waiter->next = waiter;
     }
 
+    // Assume `this` the dummy node of queue. Pop the first waiter from the
+    // queue.
     LIBC_INLINE WaiterHeader *pop_front() {
       ensure_queue_initialization();
       if (next == this)
@@ -110,19 +122,22 @@ class PrivateCndVar {
       return first;
     }
   };
+
+  // This node will be on the per-thread stack.
   struct CndWaiter : WaiterHeader {
-    cpp::Atomic<CancellationBarrier *> sender_futex;
+    cpp::Atomic<CancellationBarrier *> cancellation_barrier;
     RawMutex barrier;
     cpp::Atomic<uint8_t> state;
 
     LIBC_INLINE CndWaiter()
-        : WaiterHeader{}, sender_futex(nullptr), barrier{}, state{Waiting} {
+        : WaiterHeader{}, cancellation_barrier(nullptr), barrier{},
+          state{Waiting} {
       // this lock should always success as no contention is possible
       (void)barrier.try_lock();
     }
 
     LIBC_INLINE void confirm_cancellation() {
-      if (CancellationBarrier *sender = sender_futex.load())
+      if (CancellationBarrier *sender = cancellation_barrier.load())
         sender->notify();
     }
   };
@@ -231,6 +246,13 @@ class PrivateCndVar {
     CancellationBarrier cancellation_barrier{};
     CndWaiter *head = nullptr;
     CndWaiter *cursor = nullptr;
+    // Go through the queue, try send signal to waiters.
+    // 1. if signal is sent, we reduce the number of pending signals
+    // 2. if waiter cancelled before signal is sent, we add it
+    //    to cancellation barrier and continue
+    // Notice that cancelled sender will not continue before
+    // we release the queue lock, because they also need to
+    // acquire the lock and dequeue themselves.
     {
       cpp::lock_guard lock(queue_lock);
       waiter_queue.ensure_queue_initialization();
@@ -244,7 +266,7 @@ class PrivateCndVar {
         uint8_t expected = Waiting;
         if (!cursor->state.compare_exchange_strong(expected, Signalled)) {
           cancellation_barrier.add_one();
-          cursor->sender_futex.store(&cancellation_barrier);
+          cursor->cancellation_barrier.store(&cancellation_barrier);
           continue;
         }
         if (!head)
@@ -259,7 +281,14 @@ class PrivateCndVar {
       removed_tail->next = removed_head;
       removed_head->prev = removed_tail;
     }
+    // We want to make sure the propagation queue contain only threads
+    // that have consumed the signal. So we wait until all cancelled
+    // finishing their dequeue operation.
     cancellation_barrier.wait();
+    // Start propagate notification to the first waiter in the queue.
+    // Waiters in the queue will acquire the lock in strict FIFO order:
+    // Only when the predecessor has acquired the lock can the successor
+    // be waken up to compete for the mutex.
     if (head)
       head->barrier.unlock();
   }

>From b113d6a2326152da09b1797762963be284adcf78 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Sun, 19 Apr 2026 10:27:38 -0400
Subject: [PATCH 17/22] avoid always set contention bit

---
 libc/src/__support/threads/CndVar.h | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/libc/src/__support/threads/CndVar.h b/libc/src/__support/threads/CndVar.h
index 273d3e8207470..63dd6859dd263 100644
--- a/libc/src/__support/threads/CndVar.h
+++ b/libc/src/__support/threads/CndVar.h
@@ -83,6 +83,8 @@ class PrivateCndVar {
     Signalled = 1,
     // A cancellation has been requested.
     Cancelled = 2,
+    // The thread has been requeued to the mutex.
+    Requeued = 3,
   };
 
   struct WaiterHeader {
@@ -206,10 +208,11 @@ class PrivateCndVar {
     }
 
     MutexError mutex_result = mutex->lock();
-    // We need to establish contention after lock, otherwise
+    // If we are requeued, we need to establish contention after lock, otherwise
     // requeued thread may clear the contention bit even though
     // there are still waiters behind it.
-    mutex->get_raw_futex().store(RawMutex::IN_CONTENTION);
+    if (waiter.state.load(cpp::MemoryOrder::RELAXED) == Requeued)
+      mutex->get_raw_futex().store(RawMutex::IN_CONTENTION);
     // If there is other in the queue after us, we need to wake the next waiter.
     // If we cancelled, we should naturally have waiter.next == &waiter
     if (waiter.next != &waiter) {
@@ -231,6 +234,10 @@ class PrivateCndVar {
               /*is_shared=*/false);
           if (!res.has_value()) // cannot requeue on this system
             next_waiter->barrier.wake(/*is_shared=*/false);
+          else if (res.value() > 0) {
+            next_waiter->state.store(Requeued, cpp::MemoryOrder::RELAXED);
+            mutex->get_raw_futex().store(RawMutex::IN_CONTENTION);
+          }
         } else { // cannot requeue under special lock mode
           next_waiter->barrier.wake(/*is_shared=*/false);
         }

>From 18b70a1cb7cb484950838966515c431ee8662dfe Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Mon, 20 Apr 2026 18:55:41 -0400
Subject: [PATCH 18/22] address comments

Assisted-by: Codex with gpt-5.4 high fast
---
 libc/include/llvm-libc-types/__barrier_type.h |   4 +-
 libc/src/__support/threads/CMakeLists.txt     |   6 +
 libc/src/__support/threads/CndVar.h           | 316 +++++++-----------
 libc/src/__support/threads/linux/barrier.h    |   2 +-
 libc/src/__support/threads/raw_mutex.h        |   2 +-
 libc/src/__support/threads/unix_mutex.h       |   5 +-
 libc/src/threads/linux/CMakeLists.txt         |   1 +
 .../src/__support/threads/CMakeLists.txt      |   2 +
 .../integration/src/pthread/CMakeLists.txt    |   2 +
 .../integration/src/threads/CMakeLists.txt    |   1 +
 10 files changed, 148 insertions(+), 193 deletions(-)

diff --git a/libc/include/llvm-libc-types/__barrier_type.h b/libc/include/llvm-libc-types/__barrier_type.h
index 627f87767f20b..5752f832f04b9 100644
--- a/libc/include/llvm-libc-types/__barrier_type.h
+++ b/libc/include/llvm-libc-types/__barrier_type.h
@@ -15,8 +15,8 @@ typedef struct __attribute__((aligned(8 /* alignof (Barrier) */))) {
   unsigned expected;
   unsigned waiting;
   bool blocking;
-  char entering[32 /* sizeof (CndVar) */];
-  char exiting[32 /* sizeof (CndVar) */];
+  char entering[24 /* sizeof (CndVar) */];
+  char exiting[24 /* sizeof (CndVar) */];
   char mutex[24 /* sizeof (Mutex) */];
 } __barrier_type;
 
diff --git a/libc/src/__support/threads/CMakeLists.txt b/libc/src/__support/threads/CMakeLists.txt
index 0623a8743ad7a..4cc6ae80d4a10 100644
--- a/libc/src/__support/threads/CMakeLists.txt
+++ b/libc/src/__support/threads/CMakeLists.txt
@@ -155,9 +155,15 @@ if(TARGET libc.src.__support.threads.futex_utils)
     DEPENDS
       .futex_utils
       .mutex
+      .mutex_common
       .raw_mutex
       libc.hdr.stdint_proxy
+      libc.src.__support.CPP.expected
+      libc.src.__support.CPP.limits
       libc.src.__support.CPP.mutex
+      libc.src.__support.CPP.new
+      libc.src.__support.threads.sleep
+      libc.src.__support.time.monotonicity
   )
 endif()
 
diff --git a/libc/src/__support/threads/CndVar.h b/libc/src/__support/threads/CndVar.h
index 63dd6859dd263..e61158ff68485 100644
--- a/libc/src/__support/threads/CndVar.h
+++ b/libc/src/__support/threads/CndVar.h
@@ -10,12 +10,18 @@
 #define LLVM_LIBC_SRC___SUPPORT_THREADS_CNDVAR_H
 
 #include "hdr/stdint_proxy.h" // uint32_t
+#include "src/__support/CPP/limits.h"
 #include "src/__support/CPP/mutex.h"
 #include "src/__support/CPP/new.h"
 #include "src/__support/macros/config.h"
 #include "src/__support/threads/futex_utils.h" // Futex
 #include "src/__support/threads/mutex.h"       // Mutex
 #include "src/__support/threads/raw_mutex.h"   // RawMutex
+#include "src/__support/threads/sleep.h"
+
+#if LIBC_COPT_TIMEOUT_ENSURE_MONOTONICITY
+#include "src/__support/time/monotonicity.h"
+#endif
 
 #ifndef LIBC_COPT_TIMEOUT_ENSURE_MONOTONICITY
 #define LIBC_COPT_TIMEOUT_ENSURE_MONOTONICITY 1
@@ -24,21 +30,21 @@
 namespace LIBC_NAMESPACE_DECL {
 
 enum class CndVarResult {
-  // The waiter successfully received a signal.
   Success,
-  // Error occurs during mutex acquisition.
   MutexError,
-  // Timeout occurs.
   Timeout,
 };
 
-class PrivateCndVar {
+class CndVar {
+public:
+  using Timeout = internal::AbsTimeout;
+
+private:
   // A single-waiter multiple-notifier barrier used to keep
   // track of cancellation threads. We use this barrier to
   // ensure in-queue threads that have posted their cancellation
   // request have finished dequeue themselves.
   class CancellationBarrier {
-    LIBC_INLINE_VAR static constexpr size_t SPIN_LIMIT = 100;
     LIBC_INLINE_VAR static constexpr size_t CANCEL_STEP = 2;
     LIBC_INLINE_VAR static constexpr size_t SLEEPING_BIT = 1;
 
@@ -64,7 +70,7 @@ class PrivateCndVar {
         // Set LSB to 1 to indicate that the waiter is entering sleeping
         // state.
         FutexWordType new_val = remaining | SLEEPING_BIT;
-        if (spin > SPIN_LIMIT &&
+        if (spin > LIBC_COPT_RAW_MUTEX_DEFAULT_SPIN_COUNT &&
             futex.compare_exchange_strong(remaining, new_val)) {
           futex.wait(new_val, /*timeout=*/cpp::nullopt, /*is_pshared=*/false);
           futex.fetch_sub(1);
@@ -77,19 +83,15 @@ class PrivateCndVar {
   };
 
   enum WaiterState : uint8_t {
-    // Initial state after entering the wait queue.
     Waiting = 0,
-    // A signal has been received.
     Signalled = 1,
-    // A cancellation has been requested.
     Cancelled = 2,
-    // The thread has been requeued to the mutex.
     Requeued = 3,
   };
 
-  struct WaiterHeader {
-    WaiterHeader *prev;
-    WaiterHeader *next;
+  struct QueueNode {
+    QueueNode *prev;
+    QueueNode *next;
 
     // We use cyclic dummy node to avoid handing corner cases.
     LIBC_INLINE void ensure_queue_initialization() {
@@ -98,7 +100,7 @@ class PrivateCndVar {
     }
 
     // Assume `this` the dummy node of queue. Push back `waiter` to the queue.
-    LIBC_INLINE void push_back(WaiterHeader *waiter) {
+    LIBC_INLINE void push_back(QueueNode *waiter) {
       ensure_queue_initialization();
       waiter->next = this;
       waiter->prev = prev;
@@ -107,7 +109,7 @@ class PrivateCndVar {
     }
 
     // Remove `waiter` from the queue.
-    LIBC_INLINE static void remove(WaiterHeader *waiter) {
+    LIBC_INLINE static void remove(QueueNode *waiter) {
       waiter->next->prev = waiter->prev;
       waiter->prev->next = waiter->next;
       waiter->prev = waiter->next = waiter;
@@ -115,27 +117,28 @@ class PrivateCndVar {
 
     // Assume `this` the dummy node of queue. Pop the first waiter from the
     // queue.
-    LIBC_INLINE WaiterHeader *pop_front() {
+    LIBC_INLINE QueueNode *pop_front() {
       ensure_queue_initialization();
       if (next == this)
         return nullptr;
-      WaiterHeader *first = next;
+      QueueNode *first = next;
       remove(first);
       return first;
     }
   };
 
   // This node will be on the per-thread stack.
-  struct CndWaiter : WaiterHeader {
+  struct CndWaiter : QueueNode {
     cpp::Atomic<CancellationBarrier *> cancellation_barrier;
     RawMutex barrier;
     cpp::Atomic<uint8_t> state;
 
     LIBC_INLINE CndWaiter()
-        : WaiterHeader{}, cancellation_barrier(nullptr), barrier{},
+        : QueueNode{}, cancellation_barrier(nullptr), barrier{},
           state{Waiting} {
       // this lock should always success as no contention is possible
-      (void)barrier.try_lock();
+      [[maybe_unused]] bool locked = barrier.try_lock();
+      LIBC_ASSERT(locked);
     }
 
     LIBC_INLINE void confirm_cancellation() {
@@ -144,24 +147,98 @@ class PrivateCndVar {
     }
   };
 
-  /*
-  Layout:
+  union {
+    QueueNode waiter_queue;
+    cpp::Atomic<size_t> shared_waiters;
+  };
 
-  struct {
-    void * __wait_queue_prev;
-    void * __wait_queue_next;
-    __futex_word __futex;
+  union {
+    RawMutex queue_lock;
+    Futex shared_futex;
+  };
+
+  bool is_shared;
+
+  LIBC_INLINE void notify(bool is_broadcast) {
+    if (LIBC_UNLIKELY(is_shared)) {
+      if (shared_waiters.load() == 0)
+        return;
+      // increase the sequence number
+      shared_futex.fetch_add(1);
+      if (is_broadcast)
+        shared_futex.notify_all();
+      else
+        shared_futex.notify_one();
+      return;
+    }
+
+    size_t limit =
+        is_broadcast ? cpp::numeric_limits<size_t>::max() : size_t{1};
+    CancellationBarrier cancellation_barrier{};
+    CndWaiter *head = nullptr;
+    CndWaiter *cursor = nullptr;
+    // Go through the queue, try send signal to waiters.
+    // 1. if signal is sent, we reduce the number of pending signals
+    // 2. if waiter cancelled before signal is sent, we add it
+    //    to cancellation barrier and continue
+    // Notice that cancelled sender will not continue before
+    // we release the queue lock, because they also need to
+    // acquire the lock and dequeue themselves.
+    {
+      cpp::lock_guard lock(queue_lock);
+      waiter_queue.ensure_queue_initialization();
+      if (waiter_queue.next == &waiter_queue)
+        return;
+      for (cursor = static_cast<CndWaiter *>(waiter_queue.next);
+           cursor != &waiter_queue;
+           cursor = static_cast<CndWaiter *>(cursor->next)) {
+        if (limit == 0)
+          break;
+        uint8_t expected = Waiting;
+        if (!cursor->state.compare_exchange_strong(expected, Signalled)) {
+          cancellation_barrier.add_one();
+          cursor->cancellation_barrier.store(&cancellation_barrier);
+          continue;
+        }
+        if (!head)
+          head = cursor;
+        limit--;
+      }
+      // remove everything before cursor
+      auto removed_head = waiter_queue.next;
+      auto removed_tail = cursor->prev;
+      waiter_queue.next = cursor;
+      cursor->prev = &waiter_queue;
+      removed_tail->next = removed_head;
+      removed_head->prev = removed_tail;
+    }
+    // We want to make sure the propagation queue contain only threads
+    // that have consumed the signal. So we wait until all cancelled
+    // finishing their dequeue operation.
+    cancellation_barrier.wait();
+    // Start propagate notification to the first waiter in the queue.
+    // Waiters in the queue will acquire the lock in strict FIFO order:
+    // Only when the predecessor has acquired the lock can the successor
+    // be waken up to compete for the mutex.
+    if (head)
+      head->barrier.unlock();
   }
-  */
-  WaiterHeader waiter_queue;
-  RawMutex queue_lock;
 
 public:
-  using Timeout = internal::AbsTimeout;
-
-  LIBC_INLINE constexpr PrivateCndVar() : waiter_queue{}, queue_lock{} {}
+  LIBC_INLINE constexpr CndVar(bool is_shared)
+      : waiter_queue{}, queue_lock{}, is_shared(is_shared) {
+    if (is_shared) {
+      new (&shared_waiters) cpp::Atomic<size_t>(0);
+      new (&shared_futex) Futex(0);
+    }
+  }
 
   LIBC_INLINE void reset() {
+    if (is_shared) {
+      shared_waiters.store(0);
+      shared_futex.store(0);
+      return;
+    }
     queue_lock.reset();
     waiter_queue.prev = nullptr;
     waiter_queue.next = nullptr;
@@ -175,6 +252,20 @@ class PrivateCndVar {
       ensure_monotonicity(*timeout);
 #endif
 
+    if (LIBC_UNLIKELY(is_shared)) {
+      shared_waiters.fetch_add(1);
+      FutexWordType old_val = shared_futex.load();
+      mutex->unlock();
+      ErrorOr<int> result =
+          shared_futex.wait(old_val, timeout, /*is_pshared=*/true);
+      shared_waiters.fetch_sub(1);
+      MutexError mutex_result = mutex->lock();
+      if (!result.has_value() && result.error() == ETIMEDOUT)
+        return CndVarResult::Timeout;
+      return mutex_result == MutexError::NONE ? CndVarResult::Success
+                                              : CndVarResult::MutexError;
+    }
+
     CndWaiter waiter{};
     // Register the waiter to the queue.
     {
@@ -196,7 +287,7 @@ class PrivateCndVar {
       // we haven't consumed the signal before timeout reaches.
       {
         cpp::lock_guard lock(queue_lock);
-        WaiterHeader::remove(&waiter);
+        QueueNode::remove(&waiter);
       }
       waiter.confirm_cancellation();
     } else if (!locked) {
@@ -217,7 +308,7 @@ class PrivateCndVar {
     // If we cancelled, we should naturally have waiter.next == &waiter
     if (waiter.next != &waiter) {
       auto *next_waiter = static_cast<CndWaiter *>(waiter.next);
-      WaiterHeader::remove(&waiter);
+      QueueNode::remove(&waiter);
       auto &next_barrier_futex = next_waiter->barrier.get_raw_futex();
       auto &mutex_futex = mutex->get_raw_futex();
       // the following is basically an inlined version of mutex::unlock
@@ -248,162 +339,13 @@ class PrivateCndVar {
     return old_state == Waiting ? CndVarResult::Timeout : CndVarResult::Success;
   }
 
-private:
-  LIBC_INLINE void notify(size_t limit) {
-    CancellationBarrier cancellation_barrier{};
-    CndWaiter *head = nullptr;
-    CndWaiter *cursor = nullptr;
-    // Go through the queue, try send signal to waiters.
-    // 1. if signal is sent, we reduce the number of pending signals
-    // 2. if waiter cancelled before signal is sent, we add it
-    //    to cancellation barrier and continue
-    // Notice that cancelled sender will not continue before
-    // we release the queue lock, because they also need to
-    // acquire the lock and dequeue themselves.
-    {
-      cpp::lock_guard lock(queue_lock);
-      waiter_queue.ensure_queue_initialization();
-      if (waiter_queue.next == &waiter_queue)
-        return;
-      for (cursor = static_cast<CndWaiter *>(waiter_queue.next);
-           cursor != &waiter_queue;
-           cursor = static_cast<CndWaiter *>(cursor->next)) {
-        if (limit == 0)
-          break;
-        uint8_t expected = Waiting;
-        if (!cursor->state.compare_exchange_strong(expected, Signalled)) {
-          cancellation_barrier.add_one();
-          cursor->cancellation_barrier.store(&cancellation_barrier);
-          continue;
-        }
-        if (!head)
-          head = cursor;
-        limit--;
-      }
-      // remove everything before cursor
-      auto removed_head = waiter_queue.next;
-      auto removed_tail = cursor->prev;
-      waiter_queue.next = cursor;
-      cursor->prev = &waiter_queue;
-      removed_tail->next = removed_head;
-      removed_head->prev = removed_tail;
-    }
-    // We want to make sure the propagation queue contain only threads
-    // that have consumed the signal. So we wait until all cancelled
-    // finishing their dequeue operation.
-    cancellation_barrier.wait();
-    // Start propagate notification to the first waiter in the queue.
-    // Waiters in the queue will acquire the lock in strict FIFO order:
-    // Only when the predecessor has acquired the lock can the successor
-    // be waken up to compete for the mutex.
-    if (head)
-      head->barrier.unlock();
-  }
-
-public:
-  LIBC_INLINE void notify_one() { notify(1); }
-  LIBC_INLINE void broadcast() { notify(cpp::numeric_limits<size_t>::max()); }
+  LIBC_INLINE void notify_one() { notify(/*is_broadcast=*/false); }
+  LIBC_INLINE void broadcast() { notify(/*is_broadcast=*/true); }
 };
 
-class SharedCndVar {
-  /*
-  Layout:
-    struct {
-      cpp::Atomic<size_t> shared_waiters;
-      Futex shared_futex;
-    };
-  */
-  cpp::Atomic<size_t> shared_waiters;
-  Futex shared_futex;
-
+class PrivateCndVar final : public CndVar {
 public:
-  using Timeout = internal::AbsTimeout;
-
-  LIBC_INLINE constexpr SharedCndVar() : shared_waiters(0), shared_futex{0} {}
-
-  LIBC_INLINE void reset() {
-    shared_waiters.store(0);
-    shared_futex.store(0);
-  }
-
-  // TODO: register callback for pthread cancellation
-  LIBC_INLINE CndVarResult wait(Mutex *mutex,
-                                cpp::optional<Timeout> timeout = cpp::nullopt) {
-#if LIBC_COPT_TIMEOUT_ENSURE_MONOTONICITY
-    if (timeout)
-      ensure_monotonicity(*timeout);
-#endif
-
-    shared_waiters.fetch_add(1);
-    FutexWordType old_val = shared_futex.load();
-    mutex->unlock();
-    ErrorOr<int> result =
-        shared_futex.wait(old_val, timeout, /*is_pshared=*/true);
-    shared_waiters.fetch_sub(1);
-    MutexError mutex_result = mutex->lock();
-    if (!result.has_value() && result.error() == ETIMEDOUT)
-      return CndVarResult::Timeout;
-    return mutex_result == MutexError::NONE ? CndVarResult::Success
-                                            : CndVarResult::MutexError;
-  }
-
-private:
-  LIBC_INLINE void notify(bool broadcast) {
-    if (shared_waiters.load() == 0)
-      return;
-    shared_futex.fetch_add(1);
-    if (broadcast)
-      shared_futex.notify_all();
-    else
-      shared_futex.notify_one();
-    return;
-  }
-
-public:
-  LIBC_INLINE void notify_one() { notify(/*broadcast=*/false); }
-  LIBC_INLINE void broadcast() { notify(/*broadcast=*/true); }
-};
-
-class CndVar {
-  union {
-    PrivateCndVar private_cnd_var{};
-    SharedCndVar shared_cnd_var;
-  } storage;
-  bool is_shared;
-
-public:
-  using Timeout = internal::AbsTimeout;
-  LIBC_INLINE constexpr CndVar(bool is_shared) : is_shared(is_shared) {
-    if (is_shared)
-      new (&storage.shared_cnd_var) SharedCndVar();
-    else
-      new (&storage.private_cnd_var) PrivateCndVar();
-  }
-  LIBC_INLINE void reset() {
-    if (is_shared)
-      storage.shared_cnd_var.reset();
-    else
-      storage.private_cnd_var.reset();
-  }
-  LIBC_INLINE CndVarResult wait(Mutex *mutex,
-                                cpp::optional<Timeout> timeout = cpp::nullopt) {
-    if (is_shared)
-      return storage.shared_cnd_var.wait(mutex, timeout);
-    else
-      return storage.private_cnd_var.wait(mutex, timeout);
-  }
-  LIBC_INLINE void notify_one() {
-    if (is_shared)
-      storage.shared_cnd_var.notify_one();
-    else
-      storage.private_cnd_var.notify_one();
-  }
-  LIBC_INLINE void broadcast() {
-    if (is_shared)
-      storage.shared_cnd_var.broadcast();
-    else
-      storage.private_cnd_var.broadcast();
-  }
+  LIBC_INLINE constexpr PrivateCndVar() : CndVar(false) {}
 };
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/__support/threads/linux/barrier.h b/libc/src/__support/threads/linux/barrier.h
index eb34e2e268e10..efb452c573e4a 100644
--- a/libc/src/__support/threads/linux/barrier.h
+++ b/libc/src/__support/threads/linux/barrier.h
@@ -44,7 +44,7 @@ static_assert(alignof(Barrier) <= alignof(pthread_barrier_t),
               "The public pthread_barrier_t type has insufficient alignment "
               "for the internal barrier type.");
 
-static_assert(sizeof(CndVar) <= 32,
+static_assert(sizeof(CndVar) <= 24,
               "CndVar size exceeds the size in __barrier_type.h");
 
 static_assert(sizeof(Mutex) <= 32,
diff --git a/libc/src/__support/threads/raw_mutex.h b/libc/src/__support/threads/raw_mutex.h
index 7495374513a43..12dec56084e0e 100644
--- a/libc/src/__support/threads/raw_mutex.h
+++ b/libc/src/__support/threads/raw_mutex.h
@@ -45,7 +45,7 @@ class RawMutex {
   LIBC_INLINE_VAR static constexpr FutexWordType UNLOCKED = 0b00;
   LIBC_INLINE_VAR static constexpr FutexWordType LOCKED = 0b01;
   LIBC_INLINE_VAR static constexpr FutexWordType IN_CONTENTION = 0b10;
-  friend class PrivateCndVar;
+  friend class CndVar;
 
 private:
   LIBC_INLINE FutexWordType spin(unsigned spin_count) {
diff --git a/libc/src/__support/threads/unix_mutex.h b/libc/src/__support/threads/unix_mutex.h
index 501a330349ca5..d38d94d07d5e8 100644
--- a/libc/src/__support/threads/unix_mutex.h
+++ b/libc/src/__support/threads/unix_mutex.h
@@ -30,13 +30,14 @@ class Mutex final : private RawMutex {
   pid_t owner;
   unsigned long long lock_count;
 
-  friend class PrivateCndVar;
+  // CndVar needs to access Mutex as RawMutex
+  friend class CndVar;
 
+public:
   LIBC_INLINE bool can_be_requeued() const {
     return !this->pshared && !this->robust;
   }
 
-public:
   LIBC_INLINE constexpr Mutex(bool is_timed, bool is_recursive, bool is_robust,
                               bool is_pshared)
       : RawMutex(), timed(is_timed), recursive(is_recursive), robust(is_robust),
diff --git a/libc/src/threads/linux/CMakeLists.txt b/libc/src/threads/linux/CMakeLists.txt
index 3cbf2f85f3f9d..c33c48919be2c 100644
--- a/libc/src/threads/linux/CMakeLists.txt
+++ b/libc/src/threads/linux/CMakeLists.txt
@@ -21,6 +21,7 @@ add_entrypoint_object(
     ../cnd_init.h
   DEPENDS
     libc.include.threads
+    libc.src.__support.CPP.new
     libc.src.__support.threads.CndVar
 )
 
diff --git a/libc/test/integration/src/__support/threads/CMakeLists.txt b/libc/test/integration/src/__support/threads/CMakeLists.txt
index a89a29d053b5f..2ab77287f85c5 100644
--- a/libc/test/integration/src/__support/threads/CMakeLists.txt
+++ b/libc/test/integration/src/__support/threads/CMakeLists.txt
@@ -68,8 +68,10 @@ add_integration_test(
     cndvar_test.cpp
   DEPENDS
     libc.hdr.time_macros
+    libc.src.__support.CPP.expected
     libc.src.__support.threads.CndVar
     libc.src.__support.threads.mutex
+    libc.src.__support.threads.mutex_common
     libc.src.__support.threads.sleep
     libc.src.__support.time.clock_gettime
     libc.src.threads.thrd_create
diff --git a/libc/test/integration/src/pthread/CMakeLists.txt b/libc/test/integration/src/pthread/CMakeLists.txt
index 1cfac7aadf111..eaef5013d3c94 100644
--- a/libc/test/integration/src/pthread/CMakeLists.txt
+++ b/libc/test/integration/src/pthread/CMakeLists.txt
@@ -28,11 +28,13 @@ add_integration_test(
   DEPENDS
     libc.include.pthread
     libc.src.errno.errno
+    libc.src.__support.CPP.atomic
     libc.src.pthread.pthread_barrier_destroy
     libc.src.pthread.pthread_barrier_wait
     libc.src.pthread.pthread_barrier_init
     libc.src.pthread.pthread_create
     libc.src.pthread.pthread_join
+    libc.src.string.memset
     libc.src.stdio.printf
 )
 
diff --git a/libc/test/integration/src/threads/CMakeLists.txt b/libc/test/integration/src/threads/CMakeLists.txt
index d74b923751ad6..68e346a26457a 100644
--- a/libc/test/integration/src/threads/CMakeLists.txt
+++ b/libc/test/integration/src/threads/CMakeLists.txt
@@ -104,6 +104,7 @@ add_integration_test(
     cnd_test.cpp
   DEPENDS
     libc.include.threads
+    libc.src.__support.CPP.atomic
     libc.src.threads.cnd_init
     libc.src.threads.cnd_broadcast
     libc.src.threads.cnd_signal

>From 7b51b4d66c8f443f00c629f84ebb2923f7290a31 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Mon, 20 Apr 2026 20:39:36 -0400
Subject: [PATCH 19/22] remove PrivateCndVar

---
 libc/include/llvm-libc-types/cnd_t.h          |  1 +
 libc/src/__support/threads/CndVar.h           |  5 --
 libc/src/threads/linux/cnd_broadcast.cpp      |  4 +-
 libc/src/threads/linux/cnd_destroy.cpp        |  4 +-
 libc/src/threads/linux/cnd_init.cpp           |  4 +-
 libc/src/threads/linux/cnd_signal.cpp         |  4 +-
 libc/src/threads/linux/cnd_wait.cpp           |  4 +-
 .../src/__support/threads/cndvar_test.cpp     | 79 +++++++++----------
 8 files changed, 50 insertions(+), 55 deletions(-)

diff --git a/libc/include/llvm-libc-types/cnd_t.h b/libc/include/llvm-libc-types/cnd_t.h
index 77ec58352f074..f2c39d40688e0 100644
--- a/libc/include/llvm-libc-types/cnd_t.h
+++ b/libc/include/llvm-libc-types/cnd_t.h
@@ -15,6 +15,7 @@ typedef struct {
   void *__qfront;
   void *__qback;
   __futex_word __qmtx;
+  char __is_shared;
 } cnd_t;
 
 #endif // LLVM_LIBC_TYPES_CND_T_H
diff --git a/libc/src/__support/threads/CndVar.h b/libc/src/__support/threads/CndVar.h
index e61158ff68485..4b2a08726d598 100644
--- a/libc/src/__support/threads/CndVar.h
+++ b/libc/src/__support/threads/CndVar.h
@@ -343,11 +343,6 @@ class CndVar {
   LIBC_INLINE void broadcast() { notify(/*is_broadcast=*/true); }
 };
 
-class PrivateCndVar final : public CndVar {
-public:
-  LIBC_INLINE constexpr PrivateCndVar() : CndVar(false) {}
-};
-
 } // namespace LIBC_NAMESPACE_DECL
 
 #endif // LLVM_LIBC_SRC___SUPPORT_THREADS_CNDVAR_H
diff --git a/libc/src/threads/linux/cnd_broadcast.cpp b/libc/src/threads/linux/cnd_broadcast.cpp
index 280b666041db2..5c5187461bbed 100644
--- a/libc/src/threads/linux/cnd_broadcast.cpp
+++ b/libc/src/threads/linux/cnd_broadcast.cpp
@@ -16,10 +16,10 @@
 
 namespace LIBC_NAMESPACE_DECL {
 
-static_assert(sizeof(PrivateCndVar) == sizeof(cnd_t));
+static_assert(sizeof(CndVar) == sizeof(cnd_t));
 
 LLVM_LIBC_FUNCTION(int, cnd_broadcast, (cnd_t * cond)) {
-  PrivateCndVar *cndvar = reinterpret_cast<PrivateCndVar *>(cond);
+  CndVar *cndvar = reinterpret_cast<CndVar *>(cond);
   cndvar->broadcast();
   return thrd_success;
 }
diff --git a/libc/src/threads/linux/cnd_destroy.cpp b/libc/src/threads/linux/cnd_destroy.cpp
index 298250d69a235..e51728615da9c 100644
--- a/libc/src/threads/linux/cnd_destroy.cpp
+++ b/libc/src/threads/linux/cnd_destroy.cpp
@@ -15,10 +15,10 @@
 
 namespace LIBC_NAMESPACE_DECL {
 
-static_assert(sizeof(PrivateCndVar) == sizeof(cnd_t));
+static_assert(sizeof(CndVar) == sizeof(cnd_t));
 
 LLVM_LIBC_FUNCTION(void, cnd_destroy, (cnd_t * cond)) {
-  PrivateCndVar *cndvar = reinterpret_cast<PrivateCndVar *>(cond);
+  CndVar *cndvar = reinterpret_cast<CndVar *>(cond);
   cndvar->reset();
 }
 
diff --git a/libc/src/threads/linux/cnd_init.cpp b/libc/src/threads/linux/cnd_init.cpp
index 33fa85a00cc26..5bac8e39e3afb 100644
--- a/libc/src/threads/linux/cnd_init.cpp
+++ b/libc/src/threads/linux/cnd_init.cpp
@@ -16,10 +16,10 @@
 
 namespace LIBC_NAMESPACE_DECL {
 
-static_assert(sizeof(PrivateCndVar) == sizeof(cnd_t));
+static_assert(sizeof(CndVar) == sizeof(cnd_t));
 
 LLVM_LIBC_FUNCTION(int, cnd_init, (cnd_t * cond)) {
-  new (cond) PrivateCndVar();
+  new (cond) CndVar(false);
   return thrd_success;
 }
 
diff --git a/libc/src/threads/linux/cnd_signal.cpp b/libc/src/threads/linux/cnd_signal.cpp
index 53cd0bb445f15..0d218405d3ac5 100644
--- a/libc/src/threads/linux/cnd_signal.cpp
+++ b/libc/src/threads/linux/cnd_signal.cpp
@@ -15,10 +15,10 @@
 
 namespace LIBC_NAMESPACE_DECL {
 
-static_assert(sizeof(PrivateCndVar) == sizeof(cnd_t));
+static_assert(sizeof(CndVar) == sizeof(cnd_t));
 
 LLVM_LIBC_FUNCTION(int, cnd_signal, (cnd_t * cond)) {
-  PrivateCndVar *cndvar = reinterpret_cast<PrivateCndVar *>(cond);
+  CndVar *cndvar = reinterpret_cast<CndVar *>(cond);
   cndvar->notify_one();
   return thrd_success;
 }
diff --git a/libc/src/threads/linux/cnd_wait.cpp b/libc/src/threads/linux/cnd_wait.cpp
index 2c2e9ad11030c..179822f476dd9 100644
--- a/libc/src/threads/linux/cnd_wait.cpp
+++ b/libc/src/threads/linux/cnd_wait.cpp
@@ -16,10 +16,10 @@
 
 namespace LIBC_NAMESPACE_DECL {
 
-static_assert(sizeof(PrivateCndVar) == sizeof(cnd_t));
+static_assert(sizeof(CndVar) == sizeof(cnd_t));
 
 LLVM_LIBC_FUNCTION(int, cnd_wait, (cnd_t * cond, mtx_t *mtx)) {
-  PrivateCndVar *cndvar = reinterpret_cast<PrivateCndVar *>(cond);
+  CndVar *cndvar = reinterpret_cast<CndVar *>(cond);
   Mutex *mutex = reinterpret_cast<Mutex *>(mtx);
   return cndvar->wait(mutex) == CndVarResult::Success ? thrd_success
                                                       : thrd_error;
diff --git a/libc/test/integration/src/__support/threads/cndvar_test.cpp b/libc/test/integration/src/__support/threads/cndvar_test.cpp
index a1ee8ecc1dfe7..30e97f2745eb8 100644
--- a/libc/test/integration/src/__support/threads/cndvar_test.cpp
+++ b/libc/test/integration/src/__support/threads/cndvar_test.cpp
@@ -1,4 +1,4 @@
-//===-- Integration test for PrivateCndVar with C11 threads --------------===//
+//===-- Integration test for CndVar with C11 threads ----------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -23,7 +23,7 @@ constexpr int THREAD_COUNT = 16;
 constexpr int NUM_ITERATIONS = 250;
 
 struct QueueState {
-  LIBC_NAMESPACE::PrivateCndVar cnd{};
+  LIBC_NAMESPACE::CndVar cnd{false};
   LIBC_NAMESPACE::Mutex m{false, false, false, false};
   size_t consumed = 0;
   size_t produced = 0;
@@ -43,45 +43,44 @@ void stress_test(bool use_broadcast, bool allow_timeout) {
     thrd_t producer_threads[PRODUCER];
     thrd_t consumer_threads[CONSUMER];
     using TimeoutOpt =
-        LIBC_NAMESPACE::cpp::optional<LIBC_NAMESPACE::PrivateCndVar::Timeout>;
+        LIBC_NAMESPACE::cpp::optional<LIBC_NAMESPACE::CndVar::Timeout>;
     for (size_t i = 0; i < CONSUMER; ++i)
-      ASSERT_EQ(
-          LIBC_NAMESPACE::thrd_create(
-              &consumer_threads[i],
-              [](void *arg) {
-                auto *state = static_cast<QueueState *>(arg);
-                state->m.lock();
-                while (state->consumed != PRODUCER * ITEMS_PER_PRODUCER) {
-                  TimeoutOpt timeout = LIBC_NAMESPACE::cpp::nullopt;
-                  if (state->allow_timeout) {
-                    timespec now{};
-                    LIBC_NAMESPACE::internal::clock_gettime(CLOCK_MONOTONIC,
-                                                            &now);
-                    size_t sleep_ns = 1000;
-                    now.tv_nsec += sleep_ns;
-                    if (now.tv_nsec >= 1'000'000'000) {
-                      now.tv_sec++;
-                      now.tv_nsec -= 1'000'000'000;
-                    }
-                    timeout = TimeoutOpt(
-                        LIBC_NAMESPACE::PrivateCndVar::Timeout::from_timespec(
-                            now,
-                            /*realtime=*/false)
-                            .value());
-                  }
-                  ASSERT_NE(state->cnd.wait(&state->m, timeout),
-                            LIBC_NAMESPACE::CndVarResult::MutexError);
-                  if (state->produced == 0)
-                    continue;
-                  state->produced--;
-                  state->consumed++;
-                }
-                state->m.unlock();
-                state->exited_consumers.fetch_add(1);
-                return 0;
-              },
-              &state),
-          int(thrd_success));
+      ASSERT_EQ(LIBC_NAMESPACE::thrd_create(
+                    &consumer_threads[i],
+                    [](void *arg) {
+                      auto *state = static_cast<QueueState *>(arg);
+                      state->m.lock();
+                      while (state->consumed != PRODUCER * ITEMS_PER_PRODUCER) {
+                        TimeoutOpt timeout = LIBC_NAMESPACE::cpp::nullopt;
+                        if (state->allow_timeout) {
+                          timespec now{};
+                          LIBC_NAMESPACE::internal::clock_gettime(
+                              CLOCK_MONOTONIC, &now);
+                          size_t sleep_ns = 1000;
+                          now.tv_nsec += sleep_ns;
+                          if (now.tv_nsec >= 1'000'000'000) {
+                            now.tv_sec++;
+                            now.tv_nsec -= 1'000'000'000;
+                          }
+                          timeout = TimeoutOpt(
+                              LIBC_NAMESPACE::CndVar::Timeout::from_timespec(
+                                  now,
+                                  /*realtime=*/false)
+                                  .value());
+                        }
+                        ASSERT_NE(state->cnd.wait(&state->m, timeout),
+                                  LIBC_NAMESPACE::CndVarResult::MutexError);
+                        if (state->produced == 0)
+                          continue;
+                        state->produced--;
+                        state->consumed++;
+                      }
+                      state->m.unlock();
+                      state->exited_consumers.fetch_add(1);
+                      return 0;
+                    },
+                    &state),
+                int(thrd_success));
     for (size_t i = 0; i < PRODUCER; ++i)
       ASSERT_EQ(LIBC_NAMESPACE::thrd_create(
                     &producer_threads[i],

>From c9367739a1660ec9fab50e9f87bcb9e57ddebe47 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Mon, 20 Apr 2026 20:41:14 -0400
Subject: [PATCH 20/22] do not expose __is_shared

---
 libc/include/llvm-libc-types/cnd_t.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libc/include/llvm-libc-types/cnd_t.h b/libc/include/llvm-libc-types/cnd_t.h
index f2c39d40688e0..88b67ba1c9b1c 100644
--- a/libc/include/llvm-libc-types/cnd_t.h
+++ b/libc/include/llvm-libc-types/cnd_t.h
@@ -15,7 +15,7 @@ typedef struct {
   void *__qfront;
   void *__qback;
   __futex_word __qmtx;
-  char __is_shared;
+  char __padding[4];
 } cnd_t;
 
 #endif // LLVM_LIBC_TYPES_CND_T_H

>From f401cb265a42d8e208a05ac29d6206ab97d559af Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Tue, 21 Apr 2026 16:56:29 -0400
Subject: [PATCH 21/22] fix

---
 libc/cmake/modules/LLVMLibCCompileOptionRules.cmake | 4 ++++
 libc/src/__support/threads/CMakeLists.txt           | 3 ---
 libc/src/__support/threads/CndVar.h                 | 8 ++------
 libc/src/__support/threads/linux/CMakeLists.txt     | 7 -------
 libc/src/__support/threads/raw_mutex.h              | 8 ++------
 libc/src/__support/threads/raw_rwlock.h             | 9 ++-------
 6 files changed, 10 insertions(+), 29 deletions(-)

diff --git a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
index 426fcef7e36f4..ea86659cd5590 100644
--- a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
+++ b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
@@ -154,6 +154,10 @@ function(_get_compile_options_from_config output_var)
     libc_add_definition(config_options "LIBC_THREAD_MODE=${LIBC_CONF_THREAD_MODE}")
   endif()
 
+  if(LIBC_CONF_TIMEOUT_ENSURE_MONOTONICITY)
+    libc_add_definition(config_options "LIBC_COPT_TIMEOUT_ENSURE_MONOTONICITY")
+  endif()
+
   if(LIBC_CONF_TRAP_ON_RAISE_FP_EXCEPT)
     libc_add_definition(config_options "LIBC_TRAP_ON_RAISE_FP_EXCEPT")
   endif()
diff --git a/libc/src/__support/threads/CMakeLists.txt b/libc/src/__support/threads/CMakeLists.txt
index 4cc6ae80d4a10..8ddd33878bd97 100644
--- a/libc/src/__support/threads/CMakeLists.txt
+++ b/libc/src/__support/threads/CMakeLists.txt
@@ -38,8 +38,6 @@ if(TARGET libc.src.__support.threads.${LIBC_TARGET_OS}.futex_utils)
     raw_mutex
     HDRS
       raw_mutex.h
-    COMPILE_OPTIONS
-      ${monotonicity_flags}
     DEPENDS
       .futex_utils
       libc.src.__support.threads.sleep
@@ -55,7 +53,6 @@ if(TARGET libc.src.__support.threads.${LIBC_TARGET_OS}.futex_utils)
       raw_rwlock.h
     COMPILE_OPTIONS
       ${rwlock_default_spin_count}
-      ${monotonicity_flags}
     DEPENDS
       .raw_mutex
       libc.src.__support.common
diff --git a/libc/src/__support/threads/CndVar.h b/libc/src/__support/threads/CndVar.h
index 4b2a08726d598..b00f88e1994da 100644
--- a/libc/src/__support/threads/CndVar.h
+++ b/libc/src/__support/threads/CndVar.h
@@ -19,14 +19,10 @@
 #include "src/__support/threads/raw_mutex.h"   // RawMutex
 #include "src/__support/threads/sleep.h"
 
-#if LIBC_COPT_TIMEOUT_ENSURE_MONOTONICITY
+#ifdef LIBC_COPT_TIMEOUT_ENSURE_MONOTONICITY
 #include "src/__support/time/monotonicity.h"
 #endif
 
-#ifndef LIBC_COPT_TIMEOUT_ENSURE_MONOTONICITY
-#define LIBC_COPT_TIMEOUT_ENSURE_MONOTONICITY 1
-#endif
-
 namespace LIBC_NAMESPACE_DECL {
 
 enum class CndVarResult {
@@ -247,7 +243,7 @@ class CndVar {
   // TODO: register callback for pthread cancellation
   LIBC_INLINE CndVarResult wait(Mutex *mutex,
                                 cpp::optional<Timeout> timeout = cpp::nullopt) {
-#if LIBC_COPT_TIMEOUT_ENSURE_MONOTONICITY
+#ifdef LIBC_COPT_TIMEOUT_ENSURE_MONOTONICITY
     if (timeout)
       ensure_monotonicity(*timeout);
 #endif
diff --git a/libc/src/__support/threads/linux/CMakeLists.txt b/libc/src/__support/threads/linux/CMakeLists.txt
index bb19588884087..5bd67c60ad88b 100644
--- a/libc/src/__support/threads/linux/CMakeLists.txt
+++ b/libc/src/__support/threads/linux/CMakeLists.txt
@@ -24,13 +24,6 @@ add_header_library(
     libc.src.__support.time.abs_timeout
 )
 
-set(monotonicity_flags)
-if (LIBC_CONF_TIMEOUT_ENSURE_MONOTONICITY)
-  libc_set_definition(monotonicity_flags LIBC_COPT_TIMEOUT_ENSURE_MONOTONICITY=1)
-else()
-  libc_set_definition(monotonicity_flags LIBC_COPT_TIMEOUT_ENSURE_MONOTONICITY=0)
-endif()
-
 add_object_library(
   thread
   SRCS
diff --git a/libc/src/__support/threads/raw_mutex.h b/libc/src/__support/threads/raw_mutex.h
index 12dec56084e0e..e216e0e95b77c 100644
--- a/libc/src/__support/threads/raw_mutex.h
+++ b/libc/src/__support/threads/raw_mutex.h
@@ -21,12 +21,8 @@
 
 #include <stdio.h>
 
-#ifndef LIBC_COPT_TIMEOUT_ENSURE_MONOTONICITY
-#define LIBC_COPT_TIMEOUT_ENSURE_MONOTONICITY 1
-#endif
-
 // TODO(bojle): check this for darwin impl
-#if LIBC_COPT_TIMEOUT_ENSURE_MONOTONICITY
+#ifdef LIBC_COPT_TIMEOUT_ENSURE_MONOTONICITY
 #include "src/__support/time/monotonicity.h"
 #endif
 
@@ -75,7 +71,7 @@ class RawMutex {
         futex.compare_exchange_strong(state, LOCKED, cpp::MemoryOrder::ACQUIRE,
                                       cpp::MemoryOrder::RELAXED))
       return true;
-#if LIBC_COPT_TIMEOUT_ENSURE_MONOTONICITY
+#ifdef LIBC_COPT_TIMEOUT_ENSURE_MONOTONICITY
     /* ADL should kick in */
     if (timeout)
       ensure_monotonicity(*timeout);
diff --git a/libc/src/__support/threads/raw_rwlock.h b/libc/src/__support/threads/raw_rwlock.h
index 70f5eef11f9ce..2c14645a2e5ce 100644
--- a/libc/src/__support/threads/raw_rwlock.h
+++ b/libc/src/__support/threads/raw_rwlock.h
@@ -24,12 +24,7 @@
 #define LIBC_COPT_RWLOCK_DEFAULT_SPIN_COUNT 100
 #endif
 
-#ifndef LIBC_COPT_TIMEOUT_ENSURE_MONOTONICITY
-#define LIBC_COPT_TIMEOUT_ENSURE_MONOTONICITY 1
-#warning "LIBC_COPT_TIMEOUT_ENSURE_MONOTONICITY is not defined, defaulting to 1"
-#endif
-
-#if LIBC_COPT_TIMEOUT_ENSURE_MONOTONICITY
+#ifdef LIBC_COPT_TIMEOUT_ENSURE_MONOTONICITY
 #include "src/__support/time/monotonicity.h"
 #endif
 
@@ -361,7 +356,7 @@ class RawRwLock {
   LIBC_INLINE LockResult
   lock_slow(cpp::optional<Futex::Timeout> timeout = cpp::nullopt,
             unsigned spin_count = LIBC_COPT_RWLOCK_DEFAULT_SPIN_COUNT) {
-#if LIBC_COPT_TIMEOUT_ENSURE_MONOTONICITY
+#ifdef LIBC_COPT_TIMEOUT_ENSURE_MONOTONICITY
     // Phase 2: convert the timeout if necessary.
     if (timeout)
       ensure_monotonicity(*timeout);

>From 88b5c70b9d6b66d7a07b79301625a1a343498fdb Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Tue, 21 Apr 2026 16:58:16 -0400
Subject: [PATCH 22/22] fix

---
 libc/src/__support/threads/linux/barrier.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libc/src/__support/threads/linux/barrier.h b/libc/src/__support/threads/linux/barrier.h
index efb452c573e4a..a632aa45b2aa2 100644
--- a/libc/src/__support/threads/linux/barrier.h
+++ b/libc/src/__support/threads/linux/barrier.h
@@ -47,7 +47,7 @@ static_assert(alignof(Barrier) <= alignof(pthread_barrier_t),
 static_assert(sizeof(CndVar) <= 24,
               "CndVar size exceeds the size in __barrier_type.h");
 
-static_assert(sizeof(Mutex) <= 32,
+static_assert(sizeof(Mutex) <= 24,
               "Mutex size exceeds the size in __barrier_type.h");
 
 } // namespace LIBC_NAMESPACE_DECL



More information about the libc-commits mailing list