[compiler-rt] e1021dd - tsan: refactor fork handling

Dmitry Vyukov via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 27 13:37:41 PDT 2021


Author: Dmitry Vyukov
Date: 2021-04-27T22:37:27+02:00
New Revision: e1021dd1fdfebff77cfb205892ada6b6a900865f

URL: https://github.com/llvm/llvm-project/commit/e1021dd1fdfebff77cfb205892ada6b6a900865f
DIFF: https://github.com/llvm/llvm-project/commit/e1021dd1fdfebff77cfb205892ada6b6a900865f.diff

LOG: tsan: refactor fork handling

Commit efd254b6362 ("tsan: fix deadlock in pthread_atfork callbacks")
fixed another deadlock related to atfork handling.
But builders with DCHECKs enabled reported failures of
pthread_atfork_deadlock2.c and pthread_atfork_deadlock3.c tests
related to the fact that we hold runtime locks on interceptor exit:
https://lab.llvm.org/buildbot/#/builders/70/builds/6727
This issue is somewhat inherent to the current approach,
we indeed execute user code (atfork callbacks) with runtime lock held.

Refactor fork handling to not run user code (atfork callbacks)
with runtime locks held. This change does this by installing
own atfork callbacks during runtime initialization.
Atfork callbacks run in LIFO order, so the expectation is that
our callbacks run last, right before the actual fork.
This way we lock runtime mutexes around fork, but not around
user callbacks.

Extend tests to also install after fork callbacks just to cover
more scenarios. Some tests also started reporting real races
that we previously suppressed.

Reviewed By: vitalybuka

Differential Revision: https://reviews.llvm.org/D101385

Added: 
    compiler-rt/test/tsan/pthread_atfork_deadlock3.c

Modified: 
    compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp
    compiler-rt/lib/tsan/rtl/tsan_mman.cpp
    compiler-rt/lib/tsan/rtl/tsan_rtl.cpp
    compiler-rt/lib/tsan/rtl/tsan_rtl.h
    compiler-rt/lib/tsan/rtl/tsan_rtl_mutex.cpp
    compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp
    compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cpp
    compiler-rt/test/tsan/pthread_atfork_deadlock.c
    compiler-rt/test/tsan/pthread_atfork_deadlock2.c

Removed: 
    


################################################################################
diff  --git a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp
index 3db470b5b963f..ab9ee3d97e19a 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp
@@ -81,6 +81,8 @@ extern "C" int pthread_attr_init(void *attr);
 extern "C" int pthread_attr_destroy(void *attr);
 DECLARE_REAL(int, pthread_attr_getdetachstate, void *, void *)
 extern "C" int pthread_attr_setstacksize(void *attr, uptr stacksize);
+extern "C" int pthread_atfork(void (*prepare)(void), void (*parent)(void),
+                              void (*child)(void));
 extern "C" int pthread_key_create(unsigned *key, void (*destructor)(void* v));
 extern "C" int pthread_setspecific(unsigned key, const void *v);
 DECLARE_REAL(int, pthread_mutexattr_gettype, void *, void *)
@@ -1976,7 +1978,8 @@ static void CallUserSignalHandler(ThreadState *thr, bool sync, bool acquire,
   // because in async signal processing case (when handler is called directly
   // from rtl_generic_sighandler) we have not yet received the reraised
   // signal; and it looks too fragile to intercept all ways to reraise a signal.
-  if (flags()->report_bugs && !sync && sig != SIGTERM && errno != 99) {
+  if (ShouldReport(thr, ReportTypeErrnoInSignal) && !sync && sig != SIGTERM &&
+      errno != 99) {
     VarSizeStackTrace stack;
     // StackTrace::GetNestInstructionPc(pc) is used because return address is
     // expected, OutputReport() will undo this.
@@ -2146,26 +2149,32 @@ TSAN_INTERCEPTOR(int, fork, int fake) {
   if (in_symbolizer())
     return REAL(fork)(fake);
   SCOPED_INTERCEPTOR_RAW(fork, fake);
+  return REAL(fork)(fake);
+}
+
+void atfork_prepare() {
+  if (in_symbolizer())
+    return;
+  ThreadState *thr = cur_thread();
+  const uptr pc = StackTrace::GetCurrentPc();
   ForkBefore(thr, pc);
-  int pid;
-  {
-    // On OS X, REAL(fork) can call intercepted functions (OSSpinLockLock), and
-    // we'll assert in CheckNoLocks() unless we ignore interceptors.
-    ScopedIgnoreInterceptors ignore;
-    pid = REAL(fork)(fake);
-  }
-  if (pid == 0) {
-    // child
-    ForkChildAfter(thr, pc);
-    FdOnFork(thr, pc);
-  } else if (pid > 0) {
-    // parent
-    ForkParentAfter(thr, pc);
-  } else {
-    // error
-    ForkParentAfter(thr, pc);
-  }
-  return pid;
+}
+
+void atfork_parent() {
+  if (in_symbolizer())
+    return;
+  ThreadState *thr = cur_thread();
+  const uptr pc = StackTrace::GetCurrentPc();
+  ForkParentAfter(thr, pc);
+}
+
+void atfork_child() {
+  if (in_symbolizer())
+    return;
+  ThreadState *thr = cur_thread();
+  const uptr pc = StackTrace::GetCurrentPc();
+  ForkChildAfter(thr, pc);
+  FdOnFork(thr, pc);
 }
 
 TSAN_INTERCEPTOR(int, vfork, int fake) {
@@ -2840,6 +2849,10 @@ void InitializeInterceptors() {
     Printf("ThreadSanitizer: failed to setup atexit callback\n");
     Die();
   }
+  if (pthread_atfork(atfork_prepare, atfork_parent, atfork_child)) {
+    Printf("ThreadSanitizer: failed to setup atfork callbacks\n");
+    Die();
+  }
 
 #if !SANITIZER_MAC && !SANITIZER_NETBSD && !SANITIZER_FREEBSD
   if (pthread_key_create(&interceptor_ctx()->finalize_key, &thread_finalize)) {

diff  --git a/compiler-rt/lib/tsan/rtl/tsan_mman.cpp b/compiler-rt/lib/tsan/rtl/tsan_mman.cpp
index 743e67bf2f7d9..45a39f0f8ec9a 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_mman.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_mman.cpp
@@ -145,7 +145,7 @@ void AllocatorPrintStats() {
 
 static void SignalUnsafeCall(ThreadState *thr, uptr pc) {
   if (atomic_load_relaxed(&thr->in_signal_handler) == 0 ||
-      !flags()->report_signal_unsafe)
+      !ShouldReport(thr, ReportTypeSignalUnsafe))
     return;
   VarSizeStackTrace stack;
   ObtainCurrentStack(thr, pc, &stack);

diff  --git a/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp
index ed6cc83450d90..f6672e9310264 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp
@@ -519,23 +519,27 @@ int Finalize(ThreadState *thr) {
 void ForkBefore(ThreadState *thr, uptr pc) {
   ctx->thread_registry->Lock();
   ctx->report_mtx.Lock();
-  // Ignore memory accesses in the pthread_atfork callbacks.
-  // If any of them triggers a data race we will deadlock
-  // on the report_mtx.
-  // We could ignore interceptors and sync operations as well,
+  // Suppress all reports in the pthread_atfork callbacks.
+  // Reports will deadlock on the report_mtx.
+  // We could ignore sync operations as well,
   // but so far it's unclear if it will do more good or harm.
   // Unnecessarily ignoring things can lead to false positives later.
-  ThreadIgnoreBegin(thr, pc);
+  thr->suppress_reports++;
+  // On OS X, REAL(fork) can call intercepted functions (OSSpinLockLock), and
+  // we'll assert in CheckNoLocks() unless we ignore interceptors.
+  thr->ignore_interceptors++;
 }
 
 void ForkParentAfter(ThreadState *thr, uptr pc) {
-  ThreadIgnoreEnd(thr, pc);  // Begin is in ForkBefore.
+  thr->suppress_reports--;  // Enabled in ForkBefore.
+  thr->ignore_interceptors--;
   ctx->report_mtx.Unlock();
   ctx->thread_registry->Unlock();
 }
 
 void ForkChildAfter(ThreadState *thr, uptr pc) {
-  ThreadIgnoreEnd(thr, pc);  // Begin is in ForkBefore.
+  thr->suppress_reports--;  // Enabled in ForkBefore.
+  thr->ignore_interceptors--;
   ctx->report_mtx.Unlock();
   ctx->thread_registry->Unlock();
 

diff  --git a/compiler-rt/lib/tsan/rtl/tsan_rtl.h b/compiler-rt/lib/tsan/rtl/tsan_rtl.h
index 04d474e044e15..fef8182f6fe87 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_rtl.h
+++ b/compiler-rt/lib/tsan/rtl/tsan_rtl.h
@@ -624,6 +624,7 @@ class ScopedReport : public ScopedReportBase {
   ScopedErrorReportLock lock_;
 };
 
+bool ShouldReport(ThreadState *thr, ReportType typ);
 ThreadContext *IsThreadStackOrTls(uptr addr, bool *is_stack);
 void RestoreStack(int tid, const u64 epoch, VarSizeStackTrace *stk,
                   MutexSet *mset, uptr *tag = nullptr);

diff  --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_mutex.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl_mutex.cpp
index 27897f0592b0e..6c4bf58c07769 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_rtl_mutex.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_mutex.cpp
@@ -51,6 +51,8 @@ static void ReportMutexMisuse(ThreadState *thr, uptr pc, ReportType typ,
   // or false positives (e.g. unlock in a 
diff erent thread).
   if (SANITIZER_GO)
     return;
+  if (!ShouldReport(thr, typ))
+    return;
   ThreadRegistryLock l(ctx->thread_registry);
   ScopedReport rep(typ);
   rep.AddMutex(mid);
@@ -107,7 +109,7 @@ void MutexDestroy(ThreadState *thr, uptr pc, uptr addr, u32 flagz) {
   if (!unlock_locked)
     s->Reset(thr->proc());  // must not reset it before the report is printed
   s->mtx.Unlock();
-  if (unlock_locked) {
+  if (unlock_locked && ShouldReport(thr, ReportTypeMutexDestroyLocked)) {
     ThreadRegistryLock l(ctx->thread_registry);
     ScopedReport rep(ReportTypeMutexDestroyLocked);
     rep.AddMutex(mid);
@@ -534,7 +536,7 @@ void AcquireReleaseImpl(ThreadState *thr, uptr pc, SyncClock *c) {
 }
 
 void ReportDeadlock(ThreadState *thr, uptr pc, DDReport *r) {
-  if (r == 0)
+  if (r == 0 || !ShouldReport(thr, ReportTypeDeadlock))
     return;
   ThreadRegistryLock l(ctx->thread_registry);
   ScopedReport rep(ReportTypeDeadlock);

diff  --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp
index 208d0df44df7b..cedf1a3ca7c9c 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp
@@ -142,6 +142,34 @@ static ReportStack *SymbolizeStack(StackTrace trace) {
   return stack;
 }
 
+bool ShouldReport(ThreadState *thr, ReportType typ) {
+  // We set thr->suppress_reports in the fork context.
+  // Taking any locking in the fork context can lead to deadlocks.
+  // If any locks are already taken, it's too late to do this check.
+  CheckNoLocks(thr);
+  // For the same reason check we didn't lock thread_registry yet.
+  if (SANITIZER_DEBUG)
+    ThreadRegistryLock l(ctx->thread_registry);
+  if (!flags()->report_bugs || thr->suppress_reports)
+    return false;
+  switch (typ) {
+    case ReportTypeSignalUnsafe:
+      return flags()->report_signal_unsafe;
+    case ReportTypeThreadLeak:
+#if !SANITIZER_GO
+      // It's impossible to join phantom threads
+      // in the child after fork.
+      if (ctx->after_multithreaded_fork)
+        return false;
+#endif
+      return flags()->report_thread_leaks;
+    case ReportTypeMutexDestroyLocked:
+      return flags()->report_destroy_locked;
+    default:
+      return true;
+  }
+}
+
 ScopedReportBase::ScopedReportBase(ReportType typ, uptr tag) {
   ctx->thread_registry->CheckLocked();
   void *mem = internal_alloc(MBlockReport, sizeof(ReportDesc));
@@ -497,8 +525,10 @@ static bool HandleRacyAddress(ThreadState *thr, uptr addr_min, uptr addr_max) {
 }
 
 bool OutputReport(ThreadState *thr, const ScopedReport &srep) {
-  if (!flags()->report_bugs || thr->suppress_reports)
-    return false;
+  // These should have been checked in ShouldReport.
+  // It's too late to check them here, we have already taken locks.
+  CHECK(flags()->report_bugs);
+  CHECK(!thr->suppress_reports);
   atomic_store_relaxed(&ctx->last_symbolize_time_ns, NanoTime());
   const ReportDesc *rep = srep.GetReport();
   CHECK_EQ(thr->current_report, nullptr);
@@ -589,7 +619,7 @@ void ReportRace(ThreadState *thr) {
   // at best it will cause deadlocks on internal mutexes.
   ScopedIgnoreInterceptors ignore;
 
-  if (!flags()->report_bugs)
+  if (!ShouldReport(thr, ReportTypeRace))
     return;
   if (!flags()->report_atomic_races && !RaceBetweenAtomicAndFree(thr))
     return;

diff  --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cpp
index d80146735ea79..97ab2059988ae 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cpp
@@ -210,7 +210,7 @@ static void ThreadCheckIgnore(ThreadState *thr) {}
 void ThreadFinalize(ThreadState *thr) {
   ThreadCheckIgnore(thr);
 #if !SANITIZER_GO
-  if (!flags()->report_thread_leaks)
+  if (!ShouldReport(thr, ReportTypeThreadLeak))
     return;
   ThreadRegistryLock l(ctx->thread_registry);
   Vector<ThreadLeak> leaks;

diff  --git a/compiler-rt/test/tsan/pthread_atfork_deadlock.c b/compiler-rt/test/tsan/pthread_atfork_deadlock.c
index 01107ee6692c1..b002d7d956fc7 100644
--- a/compiler-rt/test/tsan/pthread_atfork_deadlock.c
+++ b/compiler-rt/test/tsan/pthread_atfork_deadlock.c
@@ -21,7 +21,7 @@ void atfork() {
 
 int main() {
   barrier_init(&barrier, 2);
-  pthread_atfork(atfork, NULL, NULL);
+  pthread_atfork(atfork, atfork, atfork);
   pthread_t t;
   pthread_create(&t, NULL, worker, NULL);
   glob++;

diff  --git a/compiler-rt/test/tsan/pthread_atfork_deadlock2.c b/compiler-rt/test/tsan/pthread_atfork_deadlock2.c
index 700507c1e637c..43f24f0eebac6 100644
--- a/compiler-rt/test/tsan/pthread_atfork_deadlock2.c
+++ b/compiler-rt/test/tsan/pthread_atfork_deadlock2.c
@@ -1,4 +1,4 @@
-// RUN: %clang_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
 // Regression test for
 // https://groups.google.com/d/msg/thread-sanitizer/e_zB9gYqFHM/DmAiTsrLAwAJ
 // pthread_atfork() callback triggers a data race and we deadlocked
@@ -22,7 +22,7 @@ void atfork() {
 
 int main() {
   barrier_init(&barrier, 2);
-  pthread_atfork(atfork, NULL, NULL);
+  pthread_atfork(atfork, atfork, atfork);
   pthread_t t;
   pthread_create(&t, NULL, worker, NULL);
   barrier_wait(&barrier);
@@ -44,6 +44,10 @@ int main() {
   return 0;
 }
 
-// CHECK-NOT: ThreadSanitizer: data race
+// CHECK: ThreadSanitizer: data race
+// CHECK:   Write of size 4
+// CHECK:     #0 atfork
+// CHECK:   Previous write of size 4
+// CHECK:     #0 worker
 // CHECK: CHILD
 // CHECK: PARENT

diff  --git a/compiler-rt/test/tsan/pthread_atfork_deadlock3.c b/compiler-rt/test/tsan/pthread_atfork_deadlock3.c
new file mode 100644
index 0000000000000..2ced0067de5c7
--- /dev/null
+++ b/compiler-rt/test/tsan/pthread_atfork_deadlock3.c
@@ -0,0 +1,98 @@
+// RUN: %clang_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s
+// Regression test for
+// https://groups.google.com/g/thread-sanitizer/c/TQrr4-9PRYo/m/HFR4FMi6AQAJ
+#include "test.h"
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <errno.h>
+#include <string.h>
+#include <signal.h>
+
+long glob = 0;
+
+void *worker(void *main) {
+  glob++;
+  // synchronize with main
+  barrier_wait(&barrier);
+  // synchronize with atfork
+  barrier_wait(&barrier);
+  pthread_kill((pthread_t)main, SIGPROF);
+  barrier_wait(&barrier);
+  // synchronize with afterfork
+  barrier_wait(&barrier);
+  pthread_kill((pthread_t)main, SIGPROF);
+  barrier_wait(&barrier);
+  return NULL;
+}
+
+void atfork() {
+  barrier_wait(&barrier);
+  barrier_wait(&barrier);
+  write(2, "in atfork\n", strlen("in atfork\n"));
+  static volatile long a;
+  __atomic_fetch_add(&a, 1, __ATOMIC_RELEASE);
+}
+
+void afterfork() {
+  barrier_wait(&barrier);
+  barrier_wait(&barrier);
+  write(2, "in afterfork\n", strlen("in afterfork\n"));
+  static volatile long a;
+  __atomic_fetch_add(&a, 1, __ATOMIC_RELEASE);
+}
+
+void afterfork_child() {
+  // Can't synchronize with barriers because we are
+  // in the new process, but want consistent output.
+  sleep(1);
+  write(2, "in afterfork_child\n", strlen("in afterfork_child\n"));
+  glob++;
+}
+
+void handler(int sig) {
+  write(2, "in handler\n", strlen("in handler\n"));
+  glob++;
+}
+
+int main() {
+  barrier_init(&barrier, 2);
+  struct sigaction act = {};
+  act.sa_handler = &handler;
+  if (sigaction(SIGPROF, &act, 0)) {
+    perror("sigaction");
+    exit(1);
+  }
+  pthread_atfork(atfork, afterfork, afterfork_child);
+  pthread_t t;
+  pthread_create(&t, NULL, worker, (void*)pthread_self());
+  barrier_wait(&barrier);
+  pid_t pid = fork();
+  if (pid < 0) {
+    fprintf(stderr, "fork failed: %d\n", errno);
+    return 1;
+  }
+  if (pid == 0) {
+    fprintf(stderr, "CHILD\n");
+    return 0;
+  }
+  if (pid != waitpid(pid, NULL, 0)) {
+    fprintf(stderr, "waitpid failed: %d\n", errno);
+    return 1;
+  }
+  pthread_join(t, NULL);
+  fprintf(stderr, "PARENT\n");
+  return 0;
+}
+
+// CHECK: in atfork
+// CHECK: in handler
+// CHECK: ThreadSanitizer: data race
+// CHECK:   Write of size 8
+// CHECK:     #0 handler
+// CHECK:   Previous write of size 8
+// CHECK:     #0 worker
+// CHECK: afterfork
+// CHECK: in handler
+// CHECK: afterfork_child
+// CHECK: CHILD
+// CHECK: PARENT


        


More information about the llvm-commits mailing list