[compiler-rt] [tsan] Don't treat uncontended pthread_once as a potentially blocking region (PR #132477)

David Benjamin via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 21 14:48:22 PDT 2025


https://github.com/davidben created https://github.com/llvm/llvm-project/pull/132477

guard_acquire is a helper function used to implement TSan's __cxa_guard_acquire and pthread_once interceptors. https://reviews.llvm.org/D54664 introduced optional hooks to support cooperative multi-threading. It worked by marking the entire guard_acquire call as a potentially blocking region.

In principle, only the contended case needs to be a potentially blocking region. This didn't matter for __cxa_guard_acquire because the compiler emits an inline fast path before calling __cxa_guard_acquire. That is, once we call __cxa_guard_acquire at all, we know we're in the contended case.

https://reviews.llvm.org/D107359 then unified the __cxa_guard_acquire and pthread_once interceptors, adding the hooks to pthread_once. However, unlike __cxa_guard_acquire, pthread_once callers are not expected to have an inline fast path. The fast path is inside the function.

As a result, TSan unnecessarily calls into the cooperative multi-threading engine on every pthread_once call, despite applications generally expecting pthread_once to be fast after initialization. Fix this by deferring the hooks to the contended case inside guard_acquire.

>From b787b383149af2d33bf0519774aa93f577fb2633 Mon Sep 17 00:00:00 2001
From: David Benjamin <davidben at google.com>
Date: Fri, 21 Mar 2025 17:45:32 -0400
Subject: [PATCH] [tsan] Don't treat uncontended pthread_once as a potentially
 blocking region

guard_acquire is a helper function used to implement TSan's
__cxa_guard_acquire and pthread_once interceptors.
https://reviews.llvm.org/D54664 introduced optional hooks to support
cooperative multi-threading. It worked by marking the entire
guard_acquire call as a potentially blocking region.

In principle, only the contended case needs to be a potentially blocking
region. This didn't matter for __cxa_guard_acquire because the compiler
emits an inline fast path before calling __cxa_guard_acquire. That is,
once we call __cxa_guard_acquire at all, we know we're in the contended
case.

https://reviews.llvm.org/D107359 then unified the __cxa_guard_acquire
and pthread_once interceptors, adding the hooks to pthread_once.
However, unlike __cxa_guard_acquire, pthread_once callers are not
expected to have an inline fast path. The fast path is inside the
function.

As a result, TSan unnecessarily calls into the cooperative
multi-threading engine on every pthread_once call, despite applications
generally expecting pthread_once to be fast after initialization. Fix
this by deferring the hooks to the contended case inside guard_acquire.
---
 .../lib/tsan/rtl/tsan_interceptors_posix.cpp  | 14 ++--
 compiler-rt/test/tsan/cxa_guard_acquire.cpp   | 76 +++++++++++++++++--
 2 files changed, 79 insertions(+), 11 deletions(-)

diff --git a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp
index f671c8167a3c4..6d79b80593379 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp
@@ -892,10 +892,9 @@ constexpr u32 kGuardWaiter = 1 << 17;
 
 static int guard_acquire(ThreadState *thr, uptr pc, atomic_uint32_t *g,
                          bool blocking_hooks = true) {
-  if (blocking_hooks)
-    OnPotentiallyBlockingRegionBegin();
-  auto on_exit = at_scope_exit([blocking_hooks] {
-    if (blocking_hooks)
+  bool in_potentially_blocking_region = false;
+  auto on_exit = at_scope_exit([&] {
+    if (in_potentially_blocking_region)
       OnPotentiallyBlockingRegionEnd();
   });
 
@@ -912,8 +911,13 @@ static int guard_acquire(ThreadState *thr, uptr pc, atomic_uint32_t *g,
     } else {
       if ((cmp & kGuardWaiter) ||
           atomic_compare_exchange_strong(g, &cmp, cmp | kGuardWaiter,
-                                         memory_order_relaxed))
+                                         memory_order_relaxed)) {
+        if (blocking_hooks && !in_potentially_blocking_region) {
+          in_potentially_blocking_region = true;
+          OnPotentiallyBlockingRegionBegin();
+        }
         FutexWait(g, cmp | kGuardWaiter);
+      }
     }
   }
 }
diff --git a/compiler-rt/test/tsan/cxa_guard_acquire.cpp b/compiler-rt/test/tsan/cxa_guard_acquire.cpp
index 100a40b281410..fc407259e8968 100644
--- a/compiler-rt/test/tsan/cxa_guard_acquire.cpp
+++ b/compiler-rt/test/tsan/cxa_guard_acquire.cpp
@@ -1,31 +1,95 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s
 
+#include "test.h"
+#include <assert.h>
 #include <sanitizer/tsan_interface.h>
 #include <stdio.h>
 
+// We only enter a potentially blocking region on thread contention. To reliably
+// trigger this, we force the initialization function to block until another
+// thread has entered the potentially blocking region.
+
+static bool init_done = false;
+
 namespace __tsan {
 
 #if (__APPLE__)
 __attribute__((weak))
 #endif
 void OnPotentiallyBlockingRegionBegin() {
-  printf("Enter __cxa_guard_acquire\n");
+  assert(!init_done);
+  printf("Enter potentially blocking region\n");
+  // Signal the other thread to finish initialization.
+  barrier_wait(&barrier);
 }
 
 #if (__APPLE__)
 __attribute__((weak))
 #endif
-void OnPotentiallyBlockingRegionEnd() { printf("Exit __cxa_guard_acquire\n"); }
+void OnPotentiallyBlockingRegionEnd() {
+  printf("Exit potentially blocking region\n");
+}
 
 } // namespace __tsan
 
+struct LazyInit {
+  LazyInit() {
+    assert(!init_done);
+    printf("Enter constructor\n");
+    // Wait for the other thread to get to the blocking region.
+    barrier_wait(&barrier);
+    printf("Exit constructor\n");
+  }
+};
+
+const LazyInit &get_lazy_init() {
+  static const LazyInit lazy_init;
+  return lazy_init;
+}
+
+void *thread(void *arg) {
+  get_lazy_init();
+  return nullptr;
+}
+
+struct LazyInit2 {
+  LazyInit2() { printf("Enter constructor 2\n"); }
+};
+
+const LazyInit2 &get_lazy_init2() {
+  static const LazyInit2 lazy_init2;
+  return lazy_init2;
+}
+
 int main(int argc, char **argv) {
   // CHECK: Enter main
   printf("Enter main\n");
-  // CHECK-NEXT: Enter __cxa_guard_acquire
-  // CHECK-NEXT: Exit __cxa_guard_acquire
-  static int s = argc;
-  (void)s;
+
+  // If initialization is contended, the blocked thread should enter a
+  // potentially blocking region.
+  //
+  // CHECK-NEXT: Enter constructor
+  // CHECK-NEXT: Enter potentially blocking region
+  // CHECK-NEXT: Exit constructor
+  // CHECK-NEXT: Exit potentially blocking region
+  barrier_init(&barrier, 2);
+  pthread_t th1, th2;
+  pthread_create(&th1, nullptr, thread, nullptr);
+  pthread_create(&th2, nullptr, thread, nullptr);
+  pthread_join(th1, nullptr);
+  pthread_join(th2, nullptr);
+
+  // Now that the value has been initialized, subsequent calls should not enter
+  // a potentially blocking region.
+  init_done = true;
+  get_lazy_init();
+
+  // If uncontended, there is no potentially blocking region.
+  //
+  // CHECK-NEXT: Enter constructor 2
+  get_lazy_init2();
+  get_lazy_init2();
+
   // CHECK-NEXT: Exit main
   printf("Exit main\n");
   return 0;



More information about the llvm-commits mailing list