[libc-commits] [libc] [llvm] condvar rework (PR #192748)

Schrodinger ZHU Yifan via libc-commits libc-commits at lists.llvm.org
Sat Apr 18 11:25:08 PDT 2026


https://github.com/SchrodingerZhu updated https://github.com/llvm/llvm-project/pull/192748

>From 3687f9e71dba6a52d3dce5beea2b03b78939a451 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <yifanzhu at rochester.edu>
Date: Fri, 17 Apr 2026 09:42:04 -0400
Subject: [PATCH 01/16] [libc] remove staled header libraries

---
 libc/src/__support/CMakeLists.txt              |  9 ---------
 libc/src/__support/OSUtil/linux/CMakeLists.txt | 13 -------------
 2 files changed, 22 deletions(-)

diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt
index ce69d4e6f69ac..84de7766d071b 100644
--- a/libc/src/__support/CMakeLists.txt
+++ b/libc/src/__support/CMakeLists.txt
@@ -105,15 +105,6 @@ add_header_library(
     libc.hdr.stdint_proxy
 )
 
-add_header_library(
-  bit
-  HDRS
-    bit.h
-  DEPENDS
-    libc.src.__support.macros.attributes
-    libc.src.__support.CPP.type_traits
-)
-
 add_header_library(
   math_extras
   HDRS
diff --git a/libc/src/__support/OSUtil/linux/CMakeLists.txt b/libc/src/__support/OSUtil/linux/CMakeLists.txt
index 8a686d4bce7dc..b02fa141fa456 100644
--- a/libc/src/__support/OSUtil/linux/CMakeLists.txt
+++ b/libc/src/__support/OSUtil/linux/CMakeLists.txt
@@ -38,19 +38,6 @@ add_header_library(
     libc.src.__support.threads.callonce
 )
 
-add_header_library(
-  getrandom
-  HDRS
-    getrandom.h
-  DEPENDS
-    libc.src.__support.OSUtil.osutil
-    libc.src.__support.common
-    libc.src.__support.error_or
-    libc.src.__support.macros.config
-    libc.hdr.types.ssize_t
-    libc.include.sys_syscall
-)
-
 add_header_library(
   vdso_sym
   HDRS

>From cb7073500dd5ac4fcbaa5f525ea9ce2d8a38325b Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <yifanzhu at rochester.edu>
Date: Fri, 17 Apr 2026 17:53:55 -0400
Subject: [PATCH 02/16] wip

---
 .codex                                        |   0
 libc/src/__support/threads/CMakeLists.txt     |  13 +-
 libc/src/__support/threads/CndVar.h           | 249 ++++++++++++++++--
 .../__support/threads/linux/CMakeLists.txt    |  16 --
 libc/src/__support/threads/linux/CndVar.cpp   | 106 --------
 libc/src/__support/threads/linux/barrier.cpp  |  15 +-
 libc/src/__support/threads/raw_mutex.h        |   1 +
 libc/src/__support/threads/unix_mutex.h       |   6 +
 libc/src/threads/linux/cnd_destroy.cpp        |   2 +-
 libc/src/threads/linux/cnd_init.cpp           |   5 +-
 libc/src/threads/linux/cnd_wait.cpp           |   3 +-
 11 files changed, 251 insertions(+), 165 deletions(-)
 create mode 100644 .codex
 delete mode 100644 libc/src/__support/threads/linux/CndVar.cpp

diff --git a/.codex b/.codex
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/libc/src/__support/threads/CMakeLists.txt b/libc/src/__support/threads/CMakeLists.txt
index 290c27fa46d4f..0623a8743ad7a 100644
--- a/libc/src/__support/threads/CMakeLists.txt
+++ b/libc/src/__support/threads/CMakeLists.txt
@@ -147,12 +147,17 @@ if(TARGET libc.src.__support.threads.${LIBC_TARGET_OS}.callonce)
   )
 endif()
 
-if(TARGET libc.src.__support.threads.${LIBC_TARGET_OS}.CndVar)
-  add_object_library(
+if(TARGET libc.src.__support.threads.futex_utils)
+  add_header_library(
     CndVar
-    ALIAS
+    HDRS
+      CndVar.h
     DEPENDS
-    .${LIBC_TARGET_OS}.CndVar
+      .futex_utils
+      .mutex
+      .raw_mutex
+      libc.hdr.stdint_proxy
+      libc.src.__support.CPP.mutex
   )
 endif()
 
diff --git a/libc/src/__support/threads/CndVar.h b/libc/src/__support/threads/CndVar.h
index a423b65bfb0b5..8d32f77b6df47 100644
--- a/libc/src/__support/threads/CndVar.h
+++ b/libc/src/__support/threads/CndVar.h
@@ -6,49 +6,248 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_LIBC___SUPPORT_SRC_THREADS_LINUX_CNDVAR_H
-#define LLVM_LIBC___SUPPORT_SRC_THREADS_LINUX_CNDVAR_H
+#ifndef LLVM_LIBC_SRC___SUPPORT_THREADS_CNDVAR_H
+#define LLVM_LIBC_SRC___SUPPORT_THREADS_CNDVAR_H
 
 #include "hdr/stdint_proxy.h" // uint32_t
+#include "src/__support/CPP/mutex.h"
 #include "src/__support/macros/config.h"
-#include "src/__support/threads/futex_utils.h"       // Futex
-#include "src/__support/threads/mutex.h"             // Mutex
-#include "src/__support/threads/raw_mutex.h"         // RawMutex
+#include "src/__support/threads/futex_utils.h" // Futex
+#include "src/__support/threads/mutex.h"       // Mutex
+#include "src/__support/threads/raw_mutex.h"   // RawMutex
+
+#ifndef LIBC_COPT_TIMEOUT_ENSURE_MONOTONICITY
+#define LIBC_COPT_TIMEOUT_ENSURE_MONOTONICITY 1
+#endif
 
 namespace LIBC_NAMESPACE_DECL {
 
 class CndVar {
-  enum CndWaiterStatus : uint32_t {
-    WS_Waiting = 0xE,
-    WS_Signalled = 0x5,
+  enum WaiterState : uint8_t {
+    // Initial state after entering the wait queue.
+    Waiting = 0,
+    // A signal has been received.
+    Signalled = 1,
+    // A cancellation has been requested.
+    Cancelled = 2,
+  };
+
+  struct WaiterHeader {
+    WaiterHeader *prev;
+    WaiterHeader *next;
+
+    LIBC_INLINE void ensure_queue_initialization() {
+      if (LIBC_UNLIKELY(prev == nullptr)) {
+        prev = next = this;
+      }
+    }
+
+    LIBC_INLINE void push_back(WaiterHeader *waiter) {
+      ensure_queue_initialization();
+      waiter->next = this;
+      waiter->prev = prev;
+      waiter->next->prev = waiter;
+      waiter->prev->next = waiter;
+    }
+
+    LIBC_INLINE static void remove(WaiterHeader *waiter) {
+      waiter->next->prev = waiter->prev;
+      waiter->prev->next = waiter->next;
+      waiter->prev = waiter->next = waiter;
+    }
+
+    LIBC_INLINE WaiterHeader *pop_front() {
+      ensure_queue_initialization();
+      if (next == this)
+        return nullptr;
+      WaiterHeader *first = next;
+      remove(first);
+      return first;
+    }
   };
+  struct CndWaiter : WaiterHeader {
+    cpp::Atomic<Futex *> sender_futex;
+    RawMutex barrier;
+    cpp::Atomic<uint8_t> state;
+
+    LIBC_INLINE CndWaiter()
+        : WaiterHeader{}, sender_futex(nullptr), barrier{}, state{Waiting} {
+      // this lock should always success as no contention is possible
+      (void)barrier.try_lock();
+    }
 
-  struct CndWaiter {
-    Futex futex_word = WS_Waiting;
-    CndWaiter *next = nullptr;
+    LIBC_INLINE void confirm_cancellation() {
+      Futex *sender = sender_futex.load();
+      if (sender && sender->fetch_sub(1) == 1)
+        sender->notify_one();
+    }
   };
 
-  CndWaiter *waitq_front;
-  CndWaiter *waitq_back;
-  RawMutex qmtx;
+  union {
+    struct {
+      RawMutex queue_lock;
+      WaiterHeader waiter_queue;
+    };
+    struct {
+      Futex shared_futex;
+      cpp::Atomic<size_t> shared_waiters;
+    };
+  };
 
 public:
-  LIBC_INLINE static int init(CndVar *cv) {
-    cv->waitq_front = cv->waitq_back = nullptr;
-    RawMutex::init(&cv->qmtx);
-    return 0;
+  enum class CndVarResult {
+    Success,
+    MutexError,
+    Timeout,
+  };
+
+  using Timeout = internal::AbsTimeout;
+
+  LIBC_INLINE constexpr CndVar() : queue_lock{}, waiter_queue{} {}
+
+  LIBC_INLINE void reset() {
+    queue_lock.reset();
+    waiter_queue.prev = nullptr;
+    waiter_queue.next = nullptr;
+  }
+
+  // TODO: register callback for pthread cancellation
+  LIBC_INLINE CndVarResult wait(Mutex *mutex,
+                                cpp::optional<Timeout> timeout = cpp::nullopt,
+                                bool is_shared = false) {
+#if LIBC_COPT_TIMEOUT_ENSURE_MONOTONICITY
+    if (timeout)
+      ensure_monotonicity(*timeout);
+#endif
+
+    if (is_shared) {
+      shared_waiters.fetch_add(1);
+      FutexWordType old_val = shared_futex.load();
+      mutex->unlock();
+      ErrorOr<int> result =
+          shared_futex.wait(old_val, timeout, /*is_pshared=*/true);
+      shared_waiters.fetch_sub(1);
+      MutexError mutex_result = mutex->lock();
+      if (!result.has_value() && result.error() == ETIMEDOUT)
+        return CndVarResult::Timeout;
+      return mutex_result == MutexError::NONE ? CndVarResult::Success
+                                              : CndVarResult::MutexError;
+    }
+
+    CndWaiter waiter{};
+    // Register the waiter to the queue.
+    {
+      cpp::lock_guard lock(queue_lock);
+      waiter_queue.push_back(&waiter);
+    }
+
+    // Unlock the mutex and wait for the signal.
+    mutex->unlock();
+    bool locked = waiter.barrier.lock(timeout, /*is_shared=*/false);
+
+    // if we wake up and find that we are still waiting, this means
+    // timeout has been reached.
+    uint8_t old_state = Waiting;
+    if (waiter.state.compare_exchange_strong(old_state, Cancelled,
+                                             cpp::MemoryOrder::ACQ_REL)) {
+      // we haven't consumed the signal before timeout reaches.
+      {
+        cpp::lock_guard lock(queue_lock);
+        WaiterHeader::remove(&waiter);
+      }
+      waiter.confirm_cancellation();
+    } else if (!locked) {
+      // Whenever a signal is already consumed, we compete for the mutex
+      // in the FIFO order of the queue.
+      waiter.barrier.lock();
+    }
+
+    MutexError mutex_result = mutex->lock();
+    if (waiter.next != &waiter) {
+      auto *next_waiter = static_cast<CndWaiter *>(waiter.next);
+      WaiterHeader::remove(&waiter);
+      auto &next_barrier_futex = next_waiter->barrier.get_raw_futex();
+      auto &mutex_futex = mutex->get_raw_futex();
+      FutexWordType prev = next_barrier_futex.exchange(
+          RawMutex::UNLOCKED, cpp::MemoryOrder::RELEASE);
+      if (prev == RawMutex::IN_CONTENTION)
+        if (!mutex->can_be_requeued() ||
+            !next_barrier_futex
+                 .requeue_to(mutex_futex, cpp::nullopt, 0, 1,
+                             /*is_shared=*/false)
+                 .has_value())
+          next_waiter->barrier.wake(/*is_shared=*/false);
+    }
+    if (mutex_result != MutexError::NONE)
+      return CndVarResult::MutexError;
+    return old_state == Waiting ? CndVarResult::Timeout : CndVarResult::Success;
   }
 
-  LIBC_INLINE static void destroy(CndVar *cv) {
-    cv->waitq_front = cv->waitq_back = nullptr;
+private:
+  LIBC_INLINE void notify(bool broadcast, bool is_shared = false) {
+    if (is_shared) {
+      if (shared_waiters.load() == 0)
+        return;
+      shared_futex.fetch_add(1);
+      if (broadcast)
+        shared_futex.notify_all();
+      else
+        shared_futex.notify_one();
+      return;
+    }
+
+    Futex sender_futex{0};
+    auto wait_unregisteration_finish = [&]() {
+      constexpr size_t LIMIT = 100;
+      for (size_t i = 0; i < LIMIT; ++i) {
+        if (sender_futex.load(cpp::MemoryOrder::RELAXED) == 0)
+          break;
+        sleep_briefly();
+      }
+      while (auto remaining = sender_futex.load(cpp::MemoryOrder::RELAXED))
+        sender_futex.wait(remaining, cpp::nullopt, /*is_pshared=*/false);
+    };
+    CndWaiter *head = nullptr;
+    CndWaiter *cursor = nullptr;
+    size_t limit = broadcast ? cpp::numeric_limits<size_t>::max() : 1;
+    {
+      cpp::lock_guard lock(queue_lock);
+      for (cursor = static_cast<CndWaiter *>(waiter_queue.next);
+           cursor != &waiter_queue;
+           cursor = static_cast<CndWaiter *>(cursor->next)) {
+        if (limit == 0)
+          break;
+        uint8_t expected = Waiting;
+        if (!cursor->state.compare_exchange_strong(expected, Signalled)) {
+          sender_futex.fetch_add(1);
+          cursor->sender_futex.store(&sender_futex);
+          continue;
+        }
+        if (!head)
+          head = cursor;
+        limit--;
+      }
+      // remove everything before cursor
+      auto removed_head = waiter_queue.next;
+      auto removed_tail = cursor->prev;
+      waiter_queue.next = cursor;
+      cursor->prev = &waiter_queue;
+      removed_tail->next = removed_head;
+      removed_head->prev = removed_tail;
+    }
+    wait_unregisteration_finish();
+    if (head)
+      head->barrier.unlock();
   }
 
-  // Returns 0 on success, -1 on error.
-  int wait(Mutex *m);
-  void notify_one();
-  void broadcast();
+public:
+  LIBC_INLINE void notify_one(bool is_shared = false) { notify(1, is_shared); }
+
+  LIBC_INLINE void broadcast(bool is_shared = false) {
+    notify(cpp::numeric_limits<size_t>::max(), is_shared);
+  }
 };
 
 } // namespace LIBC_NAMESPACE_DECL
 
-#endif // LLVM_LIBC___SUPPORT_SRC_THREADS_LINUX_CNDVAR_H
+#endif // LLVM_LIBC_SRC___SUPPORT_THREADS_CNDVAR_H
diff --git a/libc/src/__support/threads/linux/CMakeLists.txt b/libc/src/__support/threads/linux/CMakeLists.txt
index 5ada10ed04b88..bb19588884087 100644
--- a/libc/src/__support/threads/linux/CMakeLists.txt
+++ b/libc/src/__support/threads/linux/CMakeLists.txt
@@ -65,22 +65,6 @@ add_header_library(
     libc.src.__support.macros.optimization
 )
 
-add_object_library(
-  CndVar
-  SRCS
-    CndVar.cpp
-  HDRS
-    ../CndVar.h
-  DEPENDS
-    libc.hdr.stdint_proxy
-    libc.include.sys_syscall
-    libc.src.__support.OSUtil.osutil
-    libc.src.__support.threads.linux.futex_word_type
-    libc.src.__support.threads.mutex
-    libc.src.__support.threads.raw_mutex
-    libc.src.__support.CPP.mutex
-)
-
 add_object_library(
   barrier
   HDRS
diff --git a/libc/src/__support/threads/linux/CndVar.cpp b/libc/src/__support/threads/linux/CndVar.cpp
deleted file mode 100644
index 60424673e819c..0000000000000
--- a/libc/src/__support/threads/linux/CndVar.cpp
+++ /dev/null
@@ -1,106 +0,0 @@
-//===-- Utility condition variable class ------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "src/__support/threads/CndVar.h"
-#include "src/__support/CPP/mutex.h"
-#include "src/__support/OSUtil/syscall.h" // syscall_impl
-#include "src/__support/macros/config.h"
-#include "src/__support/threads/linux/futex_word.h" // FutexWordType
-#include "src/__support/threads/mutex.h"            // Mutex
-#include "src/__support/threads/raw_mutex.h"        // RawMutex
-
-#include <sys/syscall.h> // For syscall numbers.
-
-namespace LIBC_NAMESPACE_DECL {
-
-int CndVar::wait(Mutex *m) {
-  // The goal is to perform "unlock |m| and wait" in an
-  // atomic operation. However, it is not possible to do it
-  // in the true sense so we do it in spirit. Before unlocking
-  // |m|, a new waiter object is added to the waiter queue with
-  // the waiter queue locked. Iff a signalling thread signals
-  // the waiter before the waiter actually starts waiting, the
-  // wait operation will not begin at all and the waiter immediately
-  // returns.
-
-  CndWaiter waiter;
-  {
-    cpp::lock_guard ml(qmtx);
-    CndWaiter *old_back = nullptr;
-    if (waitq_front == nullptr) {
-      waitq_front = waitq_back = &waiter;
-    } else {
-      old_back = waitq_back;
-      waitq_back->next = &waiter;
-      waitq_back = &waiter;
-    }
-
-    if (m->unlock() != MutexError::NONE) {
-      // If we do not remove the queued up waiter before returning,
-      // then another thread can potentially signal a non-existing
-      // waiter. Note also that we do this with |qmtx| locked. This
-      // ensures that another thread will not signal the withdrawing
-      // waiter.
-      waitq_back = old_back;
-      if (waitq_back == nullptr)
-        waitq_front = nullptr;
-      else
-        waitq_back->next = nullptr;
-
-      return -1;
-    }
-  }
-
-  waiter.futex_word.wait(WS_Waiting, cpp::nullopt, true);
-
-  // At this point, if locking |m| fails, we can simply return as the
-  // queued up waiter would have been removed from the queue.
-  auto err = m->lock();
-  return err == MutexError::NONE ? 0 : -1;
-}
-
-void CndVar::notify_one() {
-  // We don't use an RAII locker in this method as we want to unlock
-  // |qmtx| and signal the waiter using a single FUTEX_WAKE_OP signal.
-  qmtx.lock();
-  if (waitq_front == nullptr)
-    qmtx.unlock();
-
-  CndWaiter *first = waitq_front;
-  waitq_front = waitq_front->next;
-  if (waitq_front == nullptr)
-    waitq_back = nullptr;
-
-  qmtx.reset();
-
-  // this is a special WAKE_OP, so we use syscall directly
-  LIBC_NAMESPACE::syscall_impl<long>(
-      FUTEX_SYSCALL_ID, &qmtx.get_raw_futex(), FUTEX_WAKE_OP, 1, 1,
-      &first->futex_word.val,
-      FUTEX_OP(FUTEX_OP_SET, WS_Signalled, FUTEX_OP_CMP_EQ, WS_Waiting));
-}
-
-void CndVar::broadcast() {
-  cpp::lock_guard ml(qmtx);
-  uint32_t dummy_futex_word;
-  CndWaiter *waiter = waitq_front;
-  waitq_front = waitq_back = nullptr;
-  while (waiter != nullptr) {
-    // FUTEX_WAKE_OP is used instead of just FUTEX_WAKE as it allows us to
-    // atomically update the waiter status to WS_Signalled before waking
-    // up the waiter. A dummy location is used for the other futex of
-    // FUTEX_WAKE_OP.
-    LIBC_NAMESPACE::syscall_impl<long>(
-        FUTEX_SYSCALL_ID, &dummy_futex_word, FUTEX_WAKE_OP, 1, 1,
-        &waiter->futex_word.val,
-        FUTEX_OP(FUTEX_OP_SET, WS_Signalled, FUTEX_OP_CMP_EQ, WS_Waiting));
-    waiter = waiter->next;
-  }
-}
-
-} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/__support/threads/linux/barrier.cpp b/libc/src/__support/threads/linux/barrier.cpp
index cf7207b53094b..175652a70fac2 100644
--- a/libc/src/__support/threads/linux/barrier.cpp
+++ b/libc/src/__support/threads/linux/barrier.cpp
@@ -8,6 +8,7 @@
 
 #include "src/__support/threads/linux/barrier.h"
 #include "hdr/errno_macros.h"
+#include "src/__support/CPP/new.h"
 #include "src/__support/threads/CndVar.h"
 #include "src/__support/threads/mutex.h"
 
@@ -24,14 +25,8 @@ int Barrier::init(Barrier *b,
   b->waiting = 0;
   b->blocking = true;
 
-  int err;
-  err = CndVar::init(&b->entering);
-  if (err != 0)
-    return err;
-
-  err = CndVar::init(&b->exiting);
-  if (err != 0)
-    return err;
+  new (&b->entering) CndVar();
+  new (&b->exiting) CndVar();
 
   auto mutex_err = Mutex::init(&b->m, false, false, false, false);
   if (mutex_err != MutexError::NONE)
@@ -76,8 +71,8 @@ int Barrier::wait() {
 }
 
 int Barrier::destroy(Barrier *b) {
-  CndVar::destroy(&b->entering);
-  CndVar::destroy(&b->exiting);
+  b->entering.reset();
+  b->exiting.reset();
   Mutex::destroy(&b->m);
   return 0;
 }
diff --git a/libc/src/__support/threads/raw_mutex.h b/libc/src/__support/threads/raw_mutex.h
index 616e4129e87dd..12dec56084e0e 100644
--- a/libc/src/__support/threads/raw_mutex.h
+++ b/libc/src/__support/threads/raw_mutex.h
@@ -45,6 +45,7 @@ class RawMutex {
   LIBC_INLINE_VAR static constexpr FutexWordType UNLOCKED = 0b00;
   LIBC_INLINE_VAR static constexpr FutexWordType LOCKED = 0b01;
   LIBC_INLINE_VAR static constexpr FutexWordType IN_CONTENTION = 0b10;
+  friend class CndVar;
 
 private:
   LIBC_INLINE FutexWordType spin(unsigned spin_count) {
diff --git a/libc/src/__support/threads/unix_mutex.h b/libc/src/__support/threads/unix_mutex.h
index aaaa931efe310..ebe5de2c6e7a7 100644
--- a/libc/src/__support/threads/unix_mutex.h
+++ b/libc/src/__support/threads/unix_mutex.h
@@ -30,6 +30,12 @@ class Mutex final : private RawMutex {
   pid_t owner;
   unsigned long long lock_count;
 
+  friend class CndVar;
+
+  LIBC_INLINE bool can_be_requeued() const {
+    return !this->pshared && !this->robust;
+  }
+
 public:
   LIBC_INLINE constexpr Mutex(bool is_timed, bool is_recursive, bool is_robust,
                               bool is_pshared)
diff --git a/libc/src/threads/linux/cnd_destroy.cpp b/libc/src/threads/linux/cnd_destroy.cpp
index 963991bddfe45..e51728615da9c 100644
--- a/libc/src/threads/linux/cnd_destroy.cpp
+++ b/libc/src/threads/linux/cnd_destroy.cpp
@@ -19,7 +19,7 @@ static_assert(sizeof(CndVar) == sizeof(cnd_t));
 
 LLVM_LIBC_FUNCTION(void, cnd_destroy, (cnd_t * cond)) {
   CndVar *cndvar = reinterpret_cast<CndVar *>(cond);
-  CndVar::destroy(cndvar);
+  cndvar->reset();
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/threads/linux/cnd_init.cpp b/libc/src/threads/linux/cnd_init.cpp
index 478011a5255e8..a27c0a92a67db 100644
--- a/libc/src/threads/linux/cnd_init.cpp
+++ b/libc/src/threads/linux/cnd_init.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "src/threads/cnd_init.h"
+#include "src/__support/CPP/new.h"
 #include "src/__support/common.h"
 #include "src/__support/macros/config.h"
 #include "src/__support/threads/CndVar.h"
@@ -18,8 +19,8 @@ namespace LIBC_NAMESPACE_DECL {
 static_assert(sizeof(CndVar) == sizeof(cnd_t));
 
 LLVM_LIBC_FUNCTION(int, cnd_init, (cnd_t * cond)) {
-  CndVar *cndvar = reinterpret_cast<CndVar *>(cond);
-  return CndVar::init(cndvar) ? thrd_error : thrd_success;
+  new (cond) CndVar();
+  return thrd_success;
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/threads/linux/cnd_wait.cpp b/libc/src/threads/linux/cnd_wait.cpp
index 3633cc85277b9..da7d549af3ca2 100644
--- a/libc/src/threads/linux/cnd_wait.cpp
+++ b/libc/src/threads/linux/cnd_wait.cpp
@@ -21,7 +21,8 @@ static_assert(sizeof(CndVar) == sizeof(cnd_t));
 LLVM_LIBC_FUNCTION(int, cnd_wait, (cnd_t * cond, mtx_t *mtx)) {
   CndVar *cndvar = reinterpret_cast<CndVar *>(cond);
   Mutex *mutex = reinterpret_cast<Mutex *>(mtx);
-  return cndvar->wait(mutex) ? thrd_error : thrd_success;
+  return cndvar->wait(mutex) == CndVar::CndVarResult::Success ? thrd_success
+                                                              : thrd_error;
 }
 
 } // namespace LIBC_NAMESPACE_DECL

>From b304d9532c4d9f8f7a91a4a76c302bd61929411b Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Fri, 17 Apr 2026 20:41:23 -0400
Subject: [PATCH 03/16] comments

---
 libc/src/__support/threads/CndVar.h | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/libc/src/__support/threads/CndVar.h b/libc/src/__support/threads/CndVar.h
index 8d32f77b6df47..e8dee53f80292 100644
--- a/libc/src/__support/threads/CndVar.h
+++ b/libc/src/__support/threads/CndVar.h
@@ -143,6 +143,8 @@ class CndVar {
 
     // Unlock the mutex and wait for the signal.
     mutex->unlock();
+    // Notice that lock is already initialized as lock. We abuse the LOCKED
+    // state to indicate that the waiter is pending.
     bool locked = waiter.barrier.lock(timeout, /*is_shared=*/false);
 
     // if we wake up and find that we are still waiting, this means
@@ -158,7 +160,9 @@ class CndVar {
       waiter.confirm_cancellation();
     } else if (!locked) {
       // Whenever a signal is already consumed, we compete for the mutex
-      // in the FIFO order of the queue.
+      // in the FIFO order of the queue. We only relock if we previously
+      // wake up due to timeout. Otherwise, it means that our turn has
+      // come, so we don't need to relock.
       waiter.barrier.lock();
     }
 
@@ -168,12 +172,15 @@ class CndVar {
       WaiterHeader::remove(&waiter);
       auto &next_barrier_futex = next_waiter->barrier.get_raw_futex();
       auto &mutex_futex = mutex->get_raw_futex();
+      // the following is basically an inlined version of mutex::unlock
+      // but with requeue instead of wake if it is possible.
       FutexWordType prev = next_barrier_futex.exchange(
           RawMutex::UNLOCKED, cpp::MemoryOrder::RELEASE);
       if (prev == RawMutex::IN_CONTENTION)
         if (!mutex->can_be_requeued() ||
             !next_barrier_futex
-                 .requeue_to(mutex_futex, cpp::nullopt, 0, 1,
+                 .requeue_to(mutex_futex, cpp::nullopt, /*wake_limit=*/0,
+                             /*requeue_limit=*/1,
                              /*is_shared=*/false)
                  .has_value())
           next_waiter->barrier.wake(/*is_shared=*/false);

>From 76458c3c04665254730badb22707f223299d3810 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Fri, 17 Apr 2026 21:04:22 -0400
Subject: [PATCH 04/16] fix

---
 libc/src/__support/threads/CndVar.h | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/libc/src/__support/threads/CndVar.h b/libc/src/__support/threads/CndVar.h
index e8dee53f80292..77958d8467da6 100644
--- a/libc/src/__support/threads/CndVar.h
+++ b/libc/src/__support/threads/CndVar.h
@@ -37,9 +37,8 @@ class CndVar {
     WaiterHeader *next;
 
     LIBC_INLINE void ensure_queue_initialization() {
-      if (LIBC_UNLIKELY(prev == nullptr)) {
+      if (LIBC_UNLIKELY(prev == nullptr))
         prev = next = this;
-      }
     }
 
     LIBC_INLINE void push_back(WaiterHeader *waiter) {
@@ -143,7 +142,7 @@ class CndVar {
 
     // Unlock the mutex and wait for the signal.
     mutex->unlock();
-    // Notice that lock is already initialized as lock. We abuse the LOCKED
+    // Notice that lock is already initialized as LOCKED. We abuse the LOCKED
     // state to indicate that the waiter is pending.
     bool locked = waiter.barrier.lock(timeout, /*is_shared=*/false);
 
@@ -176,14 +175,21 @@ class CndVar {
       // but with requeue instead of wake if it is possible.
       FutexWordType prev = next_barrier_futex.exchange(
           RawMutex::UNLOCKED, cpp::MemoryOrder::RELEASE);
-      if (prev == RawMutex::IN_CONTENTION)
-        if (!mutex->can_be_requeued() ||
-            !next_barrier_futex
-                 .requeue_to(mutex_futex, cpp::nullopt, /*wake_limit=*/0,
-                             /*requeue_limit=*/1,
-                             /*is_shared=*/false)
-                 .has_value())
+      if (prev == RawMutex::IN_CONTENTION) {
+        if (mutex->can_be_requeued()) {
+          ErrorOr<int> res = next_barrier_futex.requeue_to(
+              mutex_futex, cpp::nullopt, /*wake_limit=*/0,
+              /*requeue_limit=*/1,
+              /*is_shared=*/false);
+          if (!res.has_value()) // cannot requeue on this system
+            next_waiter->barrier.wake(/*is_shared=*/false);
+          else if (res.value() >
+                   0) // requeue succeeded, the lock needs to be waked up
+            mutex->get_raw_futex().store(RawMutex::IN_CONTENTION);
+        } else { // cannot requeue under special lock mode
           next_waiter->barrier.wake(/*is_shared=*/false);
+        }
+      }
     }
     if (mutex_result != MutexError::NONE)
       return CndVarResult::MutexError;

>From 7940369db49b22d358e67a21b94ac73e3cade0d3 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Fri, 17 Apr 2026 21:10:45 -0400
Subject: [PATCH 05/16] fix

---
 libc/src/__support/threads/CndVar.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/libc/src/__support/threads/CndVar.h b/libc/src/__support/threads/CndVar.h
index 77958d8467da6..fc16119da0ae8 100644
--- a/libc/src/__support/threads/CndVar.h
+++ b/libc/src/__support/threads/CndVar.h
@@ -225,6 +225,9 @@ class CndVar {
     size_t limit = broadcast ? cpp::numeric_limits<size_t>::max() : 1;
     {
       cpp::lock_guard lock(queue_lock);
+      waiter_queue.ensure_queue_initialization();
+      if (waiter_queue.next == &waiter_queue)
+        return;
       for (cursor = static_cast<CndWaiter *>(waiter_queue.next);
            cursor != &waiter_queue;
            cursor = static_cast<CndWaiter *>(cursor->next)) {
@@ -254,7 +257,9 @@ class CndVar {
   }
 
 public:
-  LIBC_INLINE void notify_one(bool is_shared = false) { notify(1, is_shared); }
+  LIBC_INLINE void notify_one(bool is_shared = false) {
+    notify(/*broadcast=*/false, is_shared);
+  }
 
   LIBC_INLINE void broadcast(bool is_shared = false) {
     notify(cpp::numeric_limits<size_t>::max(), is_shared);

>From 91ef92058e8e5850d384f5105f20b2c912e58422 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Fri, 17 Apr 2026 21:11:06 -0400
Subject: [PATCH 06/16] fix

---
 .codex | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 .codex

diff --git a/.codex b/.codex
deleted file mode 100644
index e69de29bb2d1d..0000000000000

>From 461b76e63b34699c916d1bbe8011c2b9a58ab570 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Fri, 17 Apr 2026 21:15:53 -0400
Subject: [PATCH 07/16] fix

---
 libc/src/__support/threads/CndVar.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libc/src/__support/threads/CndVar.h b/libc/src/__support/threads/CndVar.h
index fc16119da0ae8..fc5f2473b01ce 100644
--- a/libc/src/__support/threads/CndVar.h
+++ b/libc/src/__support/threads/CndVar.h
@@ -262,7 +262,7 @@ class CndVar {
   }
 
   LIBC_INLINE void broadcast(bool is_shared = false) {
-    notify(cpp::numeric_limits<size_t>::max(), is_shared);
+    notify(/*broadcast=*/true, is_shared);
   }
 };
 

>From c244018e782be7b81390c02a0fe4b9b6f8d8d351 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Fri, 17 Apr 2026 23:02:05 -0400
Subject: [PATCH 08/16] adjust loop

---
 libc/src/__support/threads/CndVar.h | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/libc/src/__support/threads/CndVar.h b/libc/src/__support/threads/CndVar.h
index fc5f2473b01ce..96923381a68d1 100644
--- a/libc/src/__support/threads/CndVar.h
+++ b/libc/src/__support/threads/CndVar.h
@@ -23,6 +23,7 @@
 namespace LIBC_NAMESPACE_DECL {
 
 class CndVar {
+  LIBC_INLINE_VAR static constexpr size_t SPIN_LIMIT = 100;
   enum WaiterState : uint8_t {
     // Initial state after entering the wait queue.
     Waiting = 0,
@@ -211,14 +212,16 @@ class CndVar {
 
     Futex sender_futex{0};
     auto wait_unregisteration_finish = [&]() {
-      constexpr size_t LIMIT = 100;
-      for (size_t i = 0; i < LIMIT; ++i) {
-        if (sender_futex.load(cpp::MemoryOrder::RELAXED) == 0)
-          break;
+      size_t spin = 0;
+      while (auto remaining = sender_futex.load(cpp::MemoryOrder::RELAXED)) {
+        if (spin > SPIN_LIMIT) {
+          sender_futex.wait(remaining, cpp::nullopt, /*is_pshared=*/false);
+          spin = 0;
+          continue;
+        }
         sleep_briefly();
+        spin++;
       }
-      while (auto remaining = sender_futex.load(cpp::MemoryOrder::RELAXED))
-        sender_futex.wait(remaining, cpp::nullopt, /*is_pshared=*/false);
     };
     CndWaiter *head = nullptr;
     CndWaiter *cursor = nullptr;

>From bd9eceae73109e1fa43629ef43ecb8c74fa09845 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Fri, 17 Apr 2026 23:10:38 -0400
Subject: [PATCH 09/16] adjust loop again

---
 libc/src/__support/threads/CndVar.h | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/libc/src/__support/threads/CndVar.h b/libc/src/__support/threads/CndVar.h
index 96923381a68d1..6e238424101e5 100644
--- a/libc/src/__support/threads/CndVar.h
+++ b/libc/src/__support/threads/CndVar.h
@@ -24,6 +24,7 @@ namespace LIBC_NAMESPACE_DECL {
 
 class CndVar {
   LIBC_INLINE_VAR static constexpr size_t SPIN_LIMIT = 100;
+  LIBC_INLINE_VAR static constexpr size_t CANCEL_STEP = 2;
   enum WaiterState : uint8_t {
     // Initial state after entering the wait queue.
     Waiting = 0,
@@ -78,8 +79,11 @@ class CndVar {
 
     LIBC_INLINE void confirm_cancellation() {
       Futex *sender = sender_futex.load();
-      if (sender && sender->fetch_sub(1) == 1)
-        sender->notify_one();
+      if (sender) {
+        FutexWordType res = sender->fetch_sub(CANCEL_STEP);
+        if (res <= CANCEL_STEP + 1 && (res & 1) != 0)
+          sender->notify_one();
+      }
     }
   };
 
@@ -215,7 +219,9 @@ class CndVar {
       size_t spin = 0;
       while (auto remaining = sender_futex.load(cpp::MemoryOrder::RELAXED)) {
         if (spin > SPIN_LIMIT) {
+          sender_futex.fetch_add(1);
           sender_futex.wait(remaining, cpp::nullopt, /*is_pshared=*/false);
+          sender_futex.fetch_sub(1);
           spin = 0;
           continue;
         }
@@ -238,7 +244,7 @@ class CndVar {
           break;
         uint8_t expected = Waiting;
         if (!cursor->state.compare_exchange_strong(expected, Signalled)) {
-          sender_futex.fetch_add(1);
+          sender_futex.fetch_add(CANCEL_STEP);
           cursor->sender_futex.store(&sender_futex);
           continue;
         }

>From 16f56667954d8eac89186752e703e91b89a9a3bc Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Fri, 17 Apr 2026 23:12:50 -0400
Subject: [PATCH 10/16] adjust loop again

---
 libc/src/__support/threads/CndVar.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libc/src/__support/threads/CndVar.h b/libc/src/__support/threads/CndVar.h
index 6e238424101e5..9f00b380ccc2c 100644
--- a/libc/src/__support/threads/CndVar.h
+++ b/libc/src/__support/threads/CndVar.h
@@ -219,7 +219,7 @@ class CndVar {
       size_t spin = 0;
       while (auto remaining = sender_futex.load(cpp::MemoryOrder::RELAXED)) {
         if (spin > SPIN_LIMIT) {
-          sender_futex.fetch_add(1);
+          remaining = sender_futex.fetch_add(1) + 1;
           sender_futex.wait(remaining, cpp::nullopt, /*is_pshared=*/false);
           sender_futex.fetch_sub(1);
           spin = 0;

>From b9f46fdd1c918b02326980e1c3f8b86be3a9d831 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Sat, 18 Apr 2026 10:11:31 -0400
Subject: [PATCH 11/16] change layout

---
 libc/src/__support/threads/CndVar.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libc/src/__support/threads/CndVar.h b/libc/src/__support/threads/CndVar.h
index 9f00b380ccc2c..1d606268b75ae 100644
--- a/libc/src/__support/threads/CndVar.h
+++ b/libc/src/__support/threads/CndVar.h
@@ -89,12 +89,12 @@ class CndVar {
 
   union {
     struct {
-      RawMutex queue_lock;
       WaiterHeader waiter_queue;
+      RawMutex queue_lock;
     };
     struct {
-      Futex shared_futex;
       cpp::Atomic<size_t> shared_waiters;
+      Futex shared_futex;
     };
   };
 
@@ -107,7 +107,7 @@ class CndVar {
 
   using Timeout = internal::AbsTimeout;
 
-  LIBC_INLINE constexpr CndVar() : queue_lock{}, waiter_queue{} {}
+  LIBC_INLINE constexpr CndVar() : waiter_queue{}, queue_lock{} {}
 
   LIBC_INLINE void reset() {
     queue_lock.reset();

>From 15fc483353dc3ebe499b7b79eeb12e411c7b2f60 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Sat, 18 Apr 2026 10:37:06 -0400
Subject: [PATCH 12/16] separate private CndVar and shared version

---
 libc/include/llvm-libc-types/__barrier_type.h |   4 +-
 libc/src/__support/threads/CndVar.h           | 174 +++++++++++++-----
 libc/src/__support/threads/linux/barrier.cpp  |   7 +-
 libc/src/__support/threads/linux/barrier.h    |   4 +-
 libc/src/__support/threads/raw_mutex.h        |   2 +-
 libc/src/__support/threads/unix_mutex.h       |   2 +-
 libc/src/threads/linux/cnd_broadcast.cpp      |   4 +-
 libc/src/threads/linux/cnd_destroy.cpp        |   4 +-
 libc/src/threads/linux/cnd_init.cpp           |   4 +-
 libc/src/threads/linux/cnd_signal.cpp         |   4 +-
 libc/src/threads/linux/cnd_wait.cpp           |   8 +-
 11 files changed, 145 insertions(+), 72 deletions(-)

diff --git a/libc/include/llvm-libc-types/__barrier_type.h b/libc/include/llvm-libc-types/__barrier_type.h
index 5752f832f04b9..627f87767f20b 100644
--- a/libc/include/llvm-libc-types/__barrier_type.h
+++ b/libc/include/llvm-libc-types/__barrier_type.h
@@ -15,8 +15,8 @@ typedef struct __attribute__((aligned(8 /* alignof (Barrier) */))) {
   unsigned expected;
   unsigned waiting;
   bool blocking;
-  char entering[24 /* sizeof (CndVar) */];
-  char exiting[24 /* sizeof (CndVar) */];
+  char entering[32 /* sizeof (CndVar) */];
+  char exiting[32 /* sizeof (CndVar) */];
   char mutex[24 /* sizeof (Mutex) */];
 } __barrier_type;
 
diff --git a/libc/src/__support/threads/CndVar.h b/libc/src/__support/threads/CndVar.h
index 1d606268b75ae..8c3c673816b9a 100644
--- a/libc/src/__support/threads/CndVar.h
+++ b/libc/src/__support/threads/CndVar.h
@@ -11,6 +11,7 @@
 
 #include "hdr/stdint_proxy.h" // uint32_t
 #include "src/__support/CPP/mutex.h"
+#include "src/__support/CPP/new.h"
 #include "src/__support/macros/config.h"
 #include "src/__support/threads/futex_utils.h" // Futex
 #include "src/__support/threads/mutex.h"       // Mutex
@@ -22,9 +23,16 @@
 
 namespace LIBC_NAMESPACE_DECL {
 
-class CndVar {
+enum class CndVarResult {
+  Success,
+  MutexError,
+  Timeout,
+};
+
+class PrivateCndVar {
   LIBC_INLINE_VAR static constexpr size_t SPIN_LIMIT = 100;
   LIBC_INLINE_VAR static constexpr size_t CANCEL_STEP = 2;
+
   enum WaiterState : uint8_t {
     // Initial state after entering the wait queue.
     Waiting = 0,
@@ -87,27 +95,22 @@ class CndVar {
     }
   };
 
-  union {
-    struct {
-      WaiterHeader waiter_queue;
-      RawMutex queue_lock;
-    };
-    struct {
-      cpp::Atomic<size_t> shared_waiters;
-      Futex shared_futex;
-    };
-  };
+  /*
+  Layout:
 
-public:
-  enum class CndVarResult {
-    Success,
-    MutexError,
-    Timeout,
-  };
+  struct {
+    void * __wait_queue_prev;
+    void * __wait_queue_next;
+    __futex_word __futex;
+  }
+  */
+  WaiterHeader waiter_queue;
+  RawMutex queue_lock;
 
+public:
   using Timeout = internal::AbsTimeout;
 
-  LIBC_INLINE constexpr CndVar() : waiter_queue{}, queue_lock{} {}
+  LIBC_INLINE constexpr PrivateCndVar() : waiter_queue{}, queue_lock{} {}
 
   LIBC_INLINE void reset() {
     queue_lock.reset();
@@ -117,27 +120,12 @@ class CndVar {
 
   // TODO: register callback for pthread cancellation
   LIBC_INLINE CndVarResult wait(Mutex *mutex,
-                                cpp::optional<Timeout> timeout = cpp::nullopt,
-                                bool is_shared = false) {
+                                cpp::optional<Timeout> timeout = cpp::nullopt) {
 #if LIBC_COPT_TIMEOUT_ENSURE_MONOTONICITY
     if (timeout)
       ensure_monotonicity(*timeout);
 #endif
 
-    if (is_shared) {
-      shared_waiters.fetch_add(1);
-      FutexWordType old_val = shared_futex.load();
-      mutex->unlock();
-      ErrorOr<int> result =
-          shared_futex.wait(old_val, timeout, /*is_pshared=*/true);
-      shared_waiters.fetch_sub(1);
-      MutexError mutex_result = mutex->lock();
-      if (!result.has_value() && result.error() == ETIMEDOUT)
-        return CndVarResult::Timeout;
-      return mutex_result == MutexError::NONE ? CndVarResult::Success
-                                              : CndVarResult::MutexError;
-    }
-
     CndWaiter waiter{};
     // Register the waiter to the queue.
     {
@@ -202,18 +190,7 @@ class CndVar {
   }
 
 private:
-  LIBC_INLINE void notify(bool broadcast, bool is_shared = false) {
-    if (is_shared) {
-      if (shared_waiters.load() == 0)
-        return;
-      shared_futex.fetch_add(1);
-      if (broadcast)
-        shared_futex.notify_all();
-      else
-        shared_futex.notify_one();
-      return;
-    }
-
+  LIBC_INLINE void notify(size_t limit) {
     Futex sender_futex{0};
     auto wait_unregisteration_finish = [&]() {
       size_t spin = 0;
@@ -231,7 +208,6 @@ class CndVar {
     };
     CndWaiter *head = nullptr;
     CndWaiter *cursor = nullptr;
-    size_t limit = broadcast ? cpp::numeric_limits<size_t>::max() : 1;
     {
       cpp::lock_guard lock(queue_lock);
       waiter_queue.ensure_queue_initialization();
@@ -266,12 +242,108 @@ class CndVar {
   }
 
 public:
-  LIBC_INLINE void notify_one(bool is_shared = false) {
-    notify(/*broadcast=*/false, is_shared);
+  LIBC_INLINE void notify_one() { notify(1); }
+  LIBC_INLINE void broadcast() { notify(cpp::numeric_limits<size_t>::max()); }
+};
+
+class SharedCndVar {
+  /*
+  Layout:
+    struct {
+      cpp::Atomic<size_t> shared_waiters;
+      Futex shared_futex;
+    };
+  */
+  cpp::Atomic<size_t> shared_waiters;
+  Futex shared_futex;
+
+public:
+  using Timeout = internal::AbsTimeout;
+
+  LIBC_INLINE constexpr SharedCndVar() : shared_waiters(0), shared_futex{0} {}
+
+  LIBC_INLINE void reset() {
+    shared_waiters.store(0);
+    shared_futex.store(0);
+  }
+
+  // TODO: register callback for pthread cancellation
+  LIBC_INLINE CndVarResult wait(Mutex *mutex,
+                                cpp::optional<Timeout> timeout = cpp::nullopt) {
+#if LIBC_COPT_TIMEOUT_ENSURE_MONOTONICITY
+    if (timeout)
+      ensure_monotonicity(*timeout);
+#endif
+
+    shared_waiters.fetch_add(1);
+    FutexWordType old_val = shared_futex.load();
+    mutex->unlock();
+    ErrorOr<int> result =
+        shared_futex.wait(old_val, timeout, /*is_pshared=*/true);
+    shared_waiters.fetch_sub(1);
+    MutexError mutex_result = mutex->lock();
+    if (!result.has_value() && result.error() == ETIMEDOUT)
+      return CndVarResult::Timeout;
+    return mutex_result == MutexError::NONE ? CndVarResult::Success
+                                            : CndVarResult::MutexError;
   }
 
-  LIBC_INLINE void broadcast(bool is_shared = false) {
-    notify(/*broadcast=*/true, is_shared);
+private:
+  LIBC_INLINE void notify(bool broadcast) {
+    if (shared_waiters.load() == 0)
+      return;
+    shared_futex.fetch_add(1);
+    if (broadcast)
+      shared_futex.notify_all();
+    else
+      shared_futex.notify_one();
+    return;
+  }
+
+public:
+  LIBC_INLINE void notify_one() { notify(/*broadcast=*/false); }
+  LIBC_INLINE void broadcast() { notify(/*broadcast=*/true); }
+};
+
+class CndVar {
+  union {
+    PrivateCndVar private_cnd_var{};
+    SharedCndVar shared_cnd_var;
+  } storage;
+  bool is_shared;
+
+public:
+  using Timeout = internal::AbsTimeout;
+  LIBC_INLINE constexpr CndVar(bool is_shared) : is_shared(is_shared) {
+    if (is_shared)
+      new (&storage.shared_cnd_var) SharedCndVar();
+    else
+      new (&storage.private_cnd_var) PrivateCndVar();
+  }
+  LIBC_INLINE void reset() {
+    if (is_shared)
+      storage.shared_cnd_var.reset();
+    else
+      storage.private_cnd_var.reset();
+  }
+  LIBC_INLINE CndVarResult wait(Mutex *mutex,
+                                cpp::optional<Timeout> timeout = cpp::nullopt) {
+    if (is_shared)
+      return storage.shared_cnd_var.wait(mutex, timeout);
+    else
+      return storage.private_cnd_var.wait(mutex, timeout);
+  }
+  LIBC_INLINE void notify_one() {
+    if (is_shared)
+      storage.shared_cnd_var.notify_one();
+    else
+      storage.private_cnd_var.notify_one();
+  }
+  LIBC_INLINE void broadcast() {
+    if (is_shared)
+      storage.shared_cnd_var.broadcast();
+    else
+      storage.private_cnd_var.broadcast();
   }
 };
 
diff --git a/libc/src/__support/threads/linux/barrier.cpp b/libc/src/__support/threads/linux/barrier.cpp
index 175652a70fac2..80f25f8ac7a21 100644
--- a/libc/src/__support/threads/linux/barrier.cpp
+++ b/libc/src/__support/threads/linux/barrier.cpp
@@ -25,10 +25,11 @@ int Barrier::init(Barrier *b,
   b->waiting = 0;
   b->blocking = true;
 
-  new (&b->entering) CndVar();
-  new (&b->exiting) CndVar();
+  new (&b->entering) CndVar(attr ? attr->pshared : false);
+  new (&b->exiting) CndVar(attr ? attr->pshared : false);
 
-  auto mutex_err = Mutex::init(&b->m, false, false, false, false);
+  auto mutex_err = Mutex::init(&b->m, false, false, false,
+                               /*pshared=*/attr ? attr->pshared : false);
   if (mutex_err != MutexError::NONE)
     return EAGAIN;
 
diff --git a/libc/src/__support/threads/linux/barrier.h b/libc/src/__support/threads/linux/barrier.h
index a632aa45b2aa2..eb34e2e268e10 100644
--- a/libc/src/__support/threads/linux/barrier.h
+++ b/libc/src/__support/threads/linux/barrier.h
@@ -44,10 +44,10 @@ static_assert(alignof(Barrier) <= alignof(pthread_barrier_t),
               "The public pthread_barrier_t type has insufficient alignment "
               "for the internal barrier type.");
 
-static_assert(sizeof(CndVar) <= 24,
+static_assert(sizeof(CndVar) <= 32,
               "CndVar size exceeds the size in __barrier_type.h");
 
-static_assert(sizeof(Mutex) <= 24,
+static_assert(sizeof(Mutex) <= 32,
               "Mutex size exceeds the size in __barrier_type.h");
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/__support/threads/raw_mutex.h b/libc/src/__support/threads/raw_mutex.h
index 12dec56084e0e..7495374513a43 100644
--- a/libc/src/__support/threads/raw_mutex.h
+++ b/libc/src/__support/threads/raw_mutex.h
@@ -45,7 +45,7 @@ class RawMutex {
   LIBC_INLINE_VAR static constexpr FutexWordType UNLOCKED = 0b00;
   LIBC_INLINE_VAR static constexpr FutexWordType LOCKED = 0b01;
   LIBC_INLINE_VAR static constexpr FutexWordType IN_CONTENTION = 0b10;
-  friend class CndVar;
+  friend class PrivateCndVar;
 
 private:
   LIBC_INLINE FutexWordType spin(unsigned spin_count) {
diff --git a/libc/src/__support/threads/unix_mutex.h b/libc/src/__support/threads/unix_mutex.h
index ebe5de2c6e7a7..501a330349ca5 100644
--- a/libc/src/__support/threads/unix_mutex.h
+++ b/libc/src/__support/threads/unix_mutex.h
@@ -30,7 +30,7 @@ class Mutex final : private RawMutex {
   pid_t owner;
   unsigned long long lock_count;
 
-  friend class CndVar;
+  friend class PrivateCndVar;
 
   LIBC_INLINE bool can_be_requeued() const {
     return !this->pshared && !this->robust;
diff --git a/libc/src/threads/linux/cnd_broadcast.cpp b/libc/src/threads/linux/cnd_broadcast.cpp
index 5c5187461bbed..280b666041db2 100644
--- a/libc/src/threads/linux/cnd_broadcast.cpp
+++ b/libc/src/threads/linux/cnd_broadcast.cpp
@@ -16,10 +16,10 @@
 
 namespace LIBC_NAMESPACE_DECL {
 
-static_assert(sizeof(CndVar) == sizeof(cnd_t));
+static_assert(sizeof(PrivateCndVar) == sizeof(cnd_t));
 
 LLVM_LIBC_FUNCTION(int, cnd_broadcast, (cnd_t * cond)) {
-  CndVar *cndvar = reinterpret_cast<CndVar *>(cond);
+  PrivateCndVar *cndvar = reinterpret_cast<PrivateCndVar *>(cond);
   cndvar->broadcast();
   return thrd_success;
 }
diff --git a/libc/src/threads/linux/cnd_destroy.cpp b/libc/src/threads/linux/cnd_destroy.cpp
index e51728615da9c..298250d69a235 100644
--- a/libc/src/threads/linux/cnd_destroy.cpp
+++ b/libc/src/threads/linux/cnd_destroy.cpp
@@ -15,10 +15,10 @@
 
 namespace LIBC_NAMESPACE_DECL {
 
-static_assert(sizeof(CndVar) == sizeof(cnd_t));
+static_assert(sizeof(PrivateCndVar) == sizeof(cnd_t));
 
 LLVM_LIBC_FUNCTION(void, cnd_destroy, (cnd_t * cond)) {
-  CndVar *cndvar = reinterpret_cast<CndVar *>(cond);
+  PrivateCndVar *cndvar = reinterpret_cast<PrivateCndVar *>(cond);
   cndvar->reset();
 }
 
diff --git a/libc/src/threads/linux/cnd_init.cpp b/libc/src/threads/linux/cnd_init.cpp
index a27c0a92a67db..33fa85a00cc26 100644
--- a/libc/src/threads/linux/cnd_init.cpp
+++ b/libc/src/threads/linux/cnd_init.cpp
@@ -16,10 +16,10 @@
 
 namespace LIBC_NAMESPACE_DECL {
 
-static_assert(sizeof(CndVar) == sizeof(cnd_t));
+static_assert(sizeof(PrivateCndVar) == sizeof(cnd_t));
 
 LLVM_LIBC_FUNCTION(int, cnd_init, (cnd_t * cond)) {
-  new (cond) CndVar();
+  new (cond) PrivateCndVar();
   return thrd_success;
 }
 
diff --git a/libc/src/threads/linux/cnd_signal.cpp b/libc/src/threads/linux/cnd_signal.cpp
index 0d218405d3ac5..53cd0bb445f15 100644
--- a/libc/src/threads/linux/cnd_signal.cpp
+++ b/libc/src/threads/linux/cnd_signal.cpp
@@ -15,10 +15,10 @@
 
 namespace LIBC_NAMESPACE_DECL {
 
-static_assert(sizeof(CndVar) == sizeof(cnd_t));
+static_assert(sizeof(PrivateCndVar) == sizeof(cnd_t));
 
 LLVM_LIBC_FUNCTION(int, cnd_signal, (cnd_t * cond)) {
-  CndVar *cndvar = reinterpret_cast<CndVar *>(cond);
+  PrivateCndVar *cndvar = reinterpret_cast<PrivateCndVar *>(cond);
   cndvar->notify_one();
   return thrd_success;
 }
diff --git a/libc/src/threads/linux/cnd_wait.cpp b/libc/src/threads/linux/cnd_wait.cpp
index da7d549af3ca2..2c2e9ad11030c 100644
--- a/libc/src/threads/linux/cnd_wait.cpp
+++ b/libc/src/threads/linux/cnd_wait.cpp
@@ -16,13 +16,13 @@
 
 namespace LIBC_NAMESPACE_DECL {
 
-static_assert(sizeof(CndVar) == sizeof(cnd_t));
+static_assert(sizeof(PrivateCndVar) == sizeof(cnd_t));
 
 LLVM_LIBC_FUNCTION(int, cnd_wait, (cnd_t * cond, mtx_t *mtx)) {
-  CndVar *cndvar = reinterpret_cast<CndVar *>(cond);
+  PrivateCndVar *cndvar = reinterpret_cast<PrivateCndVar *>(cond);
   Mutex *mutex = reinterpret_cast<Mutex *>(mtx);
-  return cndvar->wait(mutex) == CndVar::CndVarResult::Success ? thrd_success
-                                                              : thrd_error;
+  return cndvar->wait(mutex) == CndVarResult::Success ? thrd_success
+                                                      : thrd_error;
 }
 
 } // namespace LIBC_NAMESPACE_DECL

>From c9e71ada34adaae8bf81ebf0a2e493614bba076e Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Sat, 18 Apr 2026 10:49:49 -0400
Subject: [PATCH 13/16] separate cancellation barrier into its own class

---
 libc/src/__support/threads/CndVar.h | 72 ++++++++++++++++++-----------
 1 file changed, 45 insertions(+), 27 deletions(-)

diff --git a/libc/src/__support/threads/CndVar.h b/libc/src/__support/threads/CndVar.h
index 8c3c673816b9a..675095ef7fe7a 100644
--- a/libc/src/__support/threads/CndVar.h
+++ b/libc/src/__support/threads/CndVar.h
@@ -30,8 +30,44 @@ enum class CndVarResult {
 };
 
 class PrivateCndVar {
-  LIBC_INLINE_VAR static constexpr size_t SPIN_LIMIT = 100;
-  LIBC_INLINE_VAR static constexpr size_t CANCEL_STEP = 2;
+  class CancellationBarrier {
+    LIBC_INLINE_VAR static constexpr size_t SPIN_LIMIT = 100;
+    LIBC_INLINE_VAR static constexpr size_t CANCEL_STEP = 2;
+    LIBC_INLINE_VAR static constexpr size_t SLEEPING_BIT = 1;
+
+    // LSB indicates whether the waiter is in sleeping state.
+    Futex futex;
+
+  public:
+    LIBC_INLINE CancellationBarrier() : futex(0) {}
+    // Add one more notification request.
+    LIBC_INLINE void add_one() {
+      futex.fetch_add(CANCEL_STEP, cpp::MemoryOrder::RELAXED);
+    }
+    // Send notification to one waiter.
+    LIBC_INLINE void notify() {
+      FutexWordType res = futex.fetch_sub(CANCEL_STEP);
+      // Only need to goto syscall if waiter is sleep and we are the last one
+      if (res <= (CANCEL_STEP | SLEEPING_BIT) && (res & SLEEPING_BIT) != 0)
+        futex.notify_one();
+    }
+    LIBC_INLINE void wait() {
+      size_t spin = 0;
+      while (auto remaining = futex.load(cpp::MemoryOrder::RELAXED)) {
+        if (spin > SPIN_LIMIT) {
+          // Set LSB to 1 to indicate that the waiter is entering sleeping
+          // state.
+          remaining = futex.fetch_add(1) + 1;
+          futex.wait(remaining, /*timeout=*/cpp::nullopt, /*is_pshared=*/false);
+          futex.fetch_sub(1);
+          spin = 0;
+          continue;
+        }
+        sleep_briefly();
+        spin++;
+      }
+    }
+  };
 
   enum WaiterState : uint8_t {
     // Initial state after entering the wait queue.
@@ -75,7 +111,7 @@ class PrivateCndVar {
     }
   };
   struct CndWaiter : WaiterHeader {
-    cpp::Atomic<Futex *> sender_futex;
+    cpp::Atomic<CancellationBarrier *> sender_futex;
     RawMutex barrier;
     cpp::Atomic<uint8_t> state;
 
@@ -86,12 +122,8 @@ class PrivateCndVar {
     }
 
     LIBC_INLINE void confirm_cancellation() {
-      Futex *sender = sender_futex.load();
-      if (sender) {
-        FutexWordType res = sender->fetch_sub(CANCEL_STEP);
-        if (res <= CANCEL_STEP + 1 && (res & 1) != 0)
-          sender->notify_one();
-      }
+      if (CancellationBarrier *sender = sender_futex.load())
+        sender->notify();
     }
   };
 
@@ -191,21 +223,7 @@ class PrivateCndVar {
 
 private:
   LIBC_INLINE void notify(size_t limit) {
-    Futex sender_futex{0};
-    auto wait_unregisteration_finish = [&]() {
-      size_t spin = 0;
-      while (auto remaining = sender_futex.load(cpp::MemoryOrder::RELAXED)) {
-        if (spin > SPIN_LIMIT) {
-          remaining = sender_futex.fetch_add(1) + 1;
-          sender_futex.wait(remaining, cpp::nullopt, /*is_pshared=*/false);
-          sender_futex.fetch_sub(1);
-          spin = 0;
-          continue;
-        }
-        sleep_briefly();
-        spin++;
-      }
-    };
+    CancellationBarrier cancellation_barrier{};
     CndWaiter *head = nullptr;
     CndWaiter *cursor = nullptr;
     {
@@ -220,8 +238,8 @@ class PrivateCndVar {
           break;
         uint8_t expected = Waiting;
         if (!cursor->state.compare_exchange_strong(expected, Signalled)) {
-          sender_futex.fetch_add(CANCEL_STEP);
-          cursor->sender_futex.store(&sender_futex);
+          cancellation_barrier.add_one();
+          cursor->sender_futex.store(&cancellation_barrier);
           continue;
         }
         if (!head)
@@ -236,7 +254,7 @@ class PrivateCndVar {
       removed_tail->next = removed_head;
       removed_head->prev = removed_tail;
     }
-    wait_unregisteration_finish();
+    cancellation_barrier.wait();
     if (head)
       head->barrier.unlock();
   }

>From 87b69bb2257fbca3aa14971fe4902d6e5bade59a Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Sat, 18 Apr 2026 10:59:40 -0400
Subject: [PATCH 14/16] fix cancellation wake up condition

---
 libc/src/__support/threads/CndVar.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/libc/src/__support/threads/CndVar.h b/libc/src/__support/threads/CndVar.h
index 675095ef7fe7a..43c2d9d0dc56f 100644
--- a/libc/src/__support/threads/CndVar.h
+++ b/libc/src/__support/threads/CndVar.h
@@ -191,7 +191,9 @@ class PrivateCndVar {
     }
 
     MutexError mutex_result = mutex->lock();
-    if (waiter.next != &waiter) {
+    // If we did consume the signal (old_state != Waiting) and there
+    // are other in the queue after us, we need to wake the next waiter.
+    if (old_state != Waiting && waiter.next != &waiter) {
       auto *next_waiter = static_cast<CndWaiter *>(waiter.next);
       WaiterHeader::remove(&waiter);
       auto &next_barrier_futex = next_waiter->barrier.get_raw_futex();

>From 81cdfe81bf28eebde1193bf18e4a1ed7d9d81f49 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Sat, 18 Apr 2026 13:09:00 -0400
Subject: [PATCH 15/16] add stress test and fix bugs

---
 libc/src/__support/threads/CndVar.h           |  15 +-
 .../src/__support/threads/CMakeLists.txt      |  16 +++
 .../src/__support/threads/cndvar_test.cpp     | 129 ++++++++++++++++++
 3 files changed, 154 insertions(+), 6 deletions(-)
 create mode 100644 libc/test/integration/src/__support/threads/cndvar_test.cpp

diff --git a/libc/src/__support/threads/CndVar.h b/libc/src/__support/threads/CndVar.h
index 43c2d9d0dc56f..b596f77f29aa8 100644
--- a/libc/src/__support/threads/CndVar.h
+++ b/libc/src/__support/threads/CndVar.h
@@ -191,9 +191,13 @@ class PrivateCndVar {
     }
 
     MutexError mutex_result = mutex->lock();
-    // If we did consume the signal (old_state != Waiting) and there
-    // are other in the queue after us, we need to wake the next waiter.
-    if (old_state != Waiting && waiter.next != &waiter) {
+    // We need to establish contention after lock, otherwise
+    // requeued thread may clear the contention bit even though
+    // there are still waiters behind it.
+    mutex->get_raw_futex().store(RawMutex::IN_CONTENTION);
+    // If there is other in the queue after us, we need to wake the next waiter.
+    // If we cancelled, we should naturally have waiter.next == &waiter
+    if (waiter.next != &waiter) {
       auto *next_waiter = static_cast<CndWaiter *>(waiter.next);
       WaiterHeader::remove(&waiter);
       auto &next_barrier_futex = next_waiter->barrier.get_raw_futex();
@@ -202,6 +206,8 @@ class PrivateCndVar {
       // but with requeue instead of wake if it is possible.
       FutexWordType prev = next_barrier_futex.exchange(
           RawMutex::UNLOCKED, cpp::MemoryOrder::RELEASE);
+      // If next waiter in queue sleeps, it will establish contention its own
+      // barrier
       if (prev == RawMutex::IN_CONTENTION) {
         if (mutex->can_be_requeued()) {
           ErrorOr<int> res = next_barrier_futex.requeue_to(
@@ -210,9 +216,6 @@ class PrivateCndVar {
               /*is_shared=*/false);
           if (!res.has_value()) // cannot requeue on this system
             next_waiter->barrier.wake(/*is_shared=*/false);
-          else if (res.value() >
-                   0) // requeue succeeded, the lock needs to be waked up
-            mutex->get_raw_futex().store(RawMutex::IN_CONTENTION);
         } else { // cannot requeue under special lock mode
           next_waiter->barrier.wake(/*is_shared=*/false);
         }
diff --git a/libc/test/integration/src/__support/threads/CMakeLists.txt b/libc/test/integration/src/__support/threads/CMakeLists.txt
index 3ce9742cc2712..a89a29d053b5f 100644
--- a/libc/test/integration/src/__support/threads/CMakeLists.txt
+++ b/libc/test/integration/src/__support/threads/CMakeLists.txt
@@ -59,3 +59,19 @@ add_integration_test(
     libc.src.pthread.pthread_create
     libc.src.pthread.pthread_join
 )
+
+add_integration_test(
+  cndvar_test
+  SUITE
+    libc-support-threads-integration-tests
+  SRCS
+    cndvar_test.cpp
+  DEPENDS
+    libc.hdr.time_macros
+    libc.src.__support.threads.CndVar
+    libc.src.__support.threads.mutex
+    libc.src.__support.threads.sleep
+    libc.src.__support.time.clock_gettime
+    libc.src.threads.thrd_create
+    libc.src.threads.thrd_join
+)
diff --git a/libc/test/integration/src/__support/threads/cndvar_test.cpp b/libc/test/integration/src/__support/threads/cndvar_test.cpp
new file mode 100644
index 0000000000000..a1ee8ecc1dfe7
--- /dev/null
+++ b/libc/test/integration/src/__support/threads/cndvar_test.cpp
@@ -0,0 +1,129 @@
+//===-- Integration test for PrivateCndVar with C11 threads --------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "hdr/time_macros.h"
+#include "src/__support/CPP/expected.h"
+#include "src/__support/threads/CndVar.h"
+#include "src/__support/threads/mutex.h"
+#include "src/__support/threads/mutex_common.h"
+#include "src/__support/threads/sleep.h"
+#include "src/__support/time/clock_gettime.h"
+#include "src/threads/thrd_create.h"
+#include "src/threads/thrd_join.h"
+#include "test/IntegrationTest/test.h"
+
+namespace {
+
+constexpr int THREAD_COUNT = 16;
+constexpr int NUM_ITERATIONS = 250;
+
+struct QueueState {
+  LIBC_NAMESPACE::PrivateCndVar cnd{};
+  LIBC_NAMESPACE::Mutex m{false, false, false, false};
+  size_t consumed = 0;
+  size_t produced = 0;
+  LIBC_NAMESPACE::cpp::Atomic<size_t> exited_consumers = 0;
+  bool use_broadcast;
+  bool allow_timeout;
+};
+
+void stress_test(bool use_broadcast, bool allow_timeout) {
+  for (int iter = 0; iter < NUM_ITERATIONS; ++iter) {
+    QueueState state{};
+    state.use_broadcast = use_broadcast;
+    state.allow_timeout = allow_timeout;
+    constexpr size_t PRODUCER = THREAD_COUNT / 2;
+    constexpr size_t CONSUMER = THREAD_COUNT - PRODUCER;
+    constexpr size_t ITEMS_PER_PRODUCER = 200;
+    thrd_t producer_threads[PRODUCER];
+    thrd_t consumer_threads[CONSUMER];
+    using TimeoutOpt =
+        LIBC_NAMESPACE::cpp::optional<LIBC_NAMESPACE::PrivateCndVar::Timeout>;
+    for (size_t i = 0; i < CONSUMER; ++i)
+      ASSERT_EQ(
+          LIBC_NAMESPACE::thrd_create(
+              &consumer_threads[i],
+              [](void *arg) {
+                auto *state = static_cast<QueueState *>(arg);
+                state->m.lock();
+                while (state->consumed != PRODUCER * ITEMS_PER_PRODUCER) {
+                  TimeoutOpt timeout = LIBC_NAMESPACE::cpp::nullopt;
+                  if (state->allow_timeout) {
+                    timespec now{};
+                    LIBC_NAMESPACE::internal::clock_gettime(CLOCK_MONOTONIC,
+                                                            &now);
+                    size_t sleep_ns = 1000;
+                    now.tv_nsec += sleep_ns;
+                    if (now.tv_nsec >= 1'000'000'000) {
+                      now.tv_sec++;
+                      now.tv_nsec -= 1'000'000'000;
+                    }
+                    timeout = TimeoutOpt(
+                        LIBC_NAMESPACE::PrivateCndVar::Timeout::from_timespec(
+                            now,
+                            /*realtime=*/false)
+                            .value());
+                  }
+                  ASSERT_NE(state->cnd.wait(&state->m, timeout),
+                            LIBC_NAMESPACE::CndVarResult::MutexError);
+                  if (state->produced == 0)
+                    continue;
+                  state->produced--;
+                  state->consumed++;
+                }
+                state->m.unlock();
+                state->exited_consumers.fetch_add(1);
+                return 0;
+              },
+              &state),
+          int(thrd_success));
+    for (size_t i = 0; i < PRODUCER; ++i)
+      ASSERT_EQ(LIBC_NAMESPACE::thrd_create(
+                    &producer_threads[i],
+                    [](void *arg) {
+                      auto *state = static_cast<QueueState *>(arg);
+                      for (size_t j = 0; j < ITEMS_PER_PRODUCER; ++j) {
+                        state->m.lock();
+                        state->produced++;
+                        if (state->use_broadcast)
+                          state->cnd.broadcast();
+                        else
+                          state->cnd.notify_one();
+                        state->m.unlock();
+                      }
+                      return 0;
+                    },
+                    &state),
+                int(thrd_success));
+
+    // join producers
+    for (size_t i = 0; i < PRODUCER; ++i)
+      ASSERT_EQ(LIBC_NAMESPACE::thrd_join(producer_threads[i], nullptr),
+                int(thrd_success));
+    // keep signalling until all consumers have consumed all items
+    while (state.exited_consumers != CONSUMER) {
+      if (state.use_broadcast)
+        state.cnd.broadcast();
+      else
+        state.cnd.notify_one();
+    }
+    // join consumers
+    for (size_t i = 0; i < CONSUMER; ++i)
+      ASSERT_EQ(LIBC_NAMESPACE::thrd_join(consumer_threads[i], nullptr),
+                int(thrd_success));
+  }
+}
+} // namespace
+
+TEST_MAIN() {
+  stress_test(false, false);
+  stress_test(true, false);
+  stress_test(false, true);
+  stress_test(true, true);
+  return 0;
+}

>From 8c79275891efa2dfd19d2fa3959074443c5262f8 Mon Sep 17 00:00:00 2001
From: Schrodinger ZHU Yifan <i at zhuyi.fan>
Date: Sat, 18 Apr 2026 14:24:46 -0400
Subject: [PATCH 16/16] fix wrong barrier

---
 libc/src/__support/threads/CndVar.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/libc/src/__support/threads/CndVar.h b/libc/src/__support/threads/CndVar.h
index b596f77f29aa8..3136557165fa9 100644
--- a/libc/src/__support/threads/CndVar.h
+++ b/libc/src/__support/threads/CndVar.h
@@ -54,14 +54,14 @@ class PrivateCndVar {
     LIBC_INLINE void wait() {
       size_t spin = 0;
       while (auto remaining = futex.load(cpp::MemoryOrder::RELAXED)) {
-        if (spin > SPIN_LIMIT) {
-          // Set LSB to 1 to indicate that the waiter is entering sleeping
-          // state.
-          remaining = futex.fetch_add(1) + 1;
-          futex.wait(remaining, /*timeout=*/cpp::nullopt, /*is_pshared=*/false);
+        // Set LSB to 1 to indicate that the waiter is entering sleeping
+        // state.
+        FutexWordType new_val = remaining | SLEEPING_BIT;
+        if (spin > SPIN_LIMIT &&
+            futex.compare_exchange_strong(remaining, new_val)) {
+          futex.wait(new_val, /*timeout=*/cpp::nullopt, /*is_pshared=*/false);
           futex.fetch_sub(1);
           spin = 0;
-          continue;
         }
         sleep_briefly();
         spin++;



More information about the libc-commits mailing list