[compiler-rt] r231367 - tsan: fix signal handling during stop-the-world

Dmitry Vyukov dvyukov at google.com
Thu Mar 5 06:37:29 PST 2015


Author: dvyukov
Date: Thu Mar  5 08:37:28 2015
New Revision: 231367

URL: http://llvm.org/viewvc/llvm-project?rev=231367&view=rev
Log:
tsan: fix signal handling during stop-the-world

Long story short: stop-the-world briefly resets SIGSEGV handler to SIG_DFL.
This breaks programs that handle and continue after SIGSEGV (namely JVM).
See the test and comments for details.

This is reincarnation of reverted r229678 (http://reviews.llvm.org/D7722).
Changed:
- execute TracerThreadDieCallback only on tracer thread
- reset global data in TracerThreadSignalHandler/TracerThreadDieCallback
- handle EINTR from waitpid

Add 3 new test:
- SIGSEGV during leak checking
- StopTheWorld operation during signal storm from an external process
- StopTheWorld operation when the program generates and handles SIGSEGVs

http://reviews.llvm.org/D8032


Added:
    compiler-rt/trunk/test/asan/TestCases/Linux/leak_check_segv.cc
    compiler-rt/trunk/test/asan/TestCases/Linux/signal_during_stop_the_world.cc
    compiler-rt/trunk/test/sanitizer_common/TestCases/Linux/signal_segv_handler.cc
Modified:
    compiler-rt/trunk/lib/sanitizer_common/sanitizer_stoptheworld.h
    compiler-rt/trunk/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cc

Modified: compiler-rt/trunk/lib/sanitizer_common/sanitizer_stoptheworld.h
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/sanitizer_common/sanitizer_stoptheworld.h?rev=231367&r1=231366&r2=231367&view=diff
==============================================================================
--- compiler-rt/trunk/lib/sanitizer_common/sanitizer_stoptheworld.h (original)
+++ compiler-rt/trunk/lib/sanitizer_common/sanitizer_stoptheworld.h Thu Mar  5 08:37:28 2015
@@ -59,7 +59,8 @@ typedef void (*StopTheWorldCallback)(
 
 // Suspend all threads in the current process and run the callback on the list
 // of suspended threads. This function will resume the threads before returning.
-// The callback should not call any libc functions.
+// The callback should not call any libc functions. The callback must not call
+// exit nor _exit and instead return to the caller.
 // This function should NOT be called from multiple threads simultaneously.
 void StopTheWorld(StopTheWorldCallback callback, void *argument);
 

Modified: compiler-rt/trunk/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cc
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cc?rev=231367&r1=231366&r2=231367&view=diff
==============================================================================
--- compiler-rt/trunk/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cc (original)
+++ compiler-rt/trunk/lib/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cc Thu Mar  5 08:37:28 2015
@@ -19,6 +19,7 @@
 #include "sanitizer_stoptheworld.h"
 
 #include "sanitizer_platform_limits_posix.h"
+#include "sanitizer_atomic.h"
 
 #include <errno.h>
 #include <sched.h> // for CLONE_* definitions
@@ -70,11 +71,25 @@
 COMPILER_CHECK(sizeof(SuspendedThreadID) == sizeof(pid_t));
 
 namespace __sanitizer {
+
+// Structure for passing arguments into the tracer thread.
+struct TracerThreadArgument {
+  StopTheWorldCallback callback;
+  void *callback_argument;
+  // The tracer thread waits on this mutex while the parent finishes its
+  // preparations.
+  BlockingMutex mutex;
+  // Tracer thread signals its completion by setting done.
+  atomic_uintptr_t done;
+  uptr parent_pid;
+};
+
 // This class handles thread suspending/unsuspending in the tracer thread.
 class ThreadSuspender {
  public:
-  explicit ThreadSuspender(pid_t pid)
-    : pid_(pid) {
+  explicit ThreadSuspender(pid_t pid, TracerThreadArgument *arg)
+    : arg(arg)
+    , pid_(pid) {
       CHECK_GE(pid, 0);
     }
   bool SuspendAllThreads();
@@ -83,6 +98,7 @@ class ThreadSuspender {
   SuspendedThreadsList &suspended_threads_list() {
     return suspended_threads_list_;
   }
+  TracerThreadArgument *arg;
  private:
   SuspendedThreadsList suspended_threads_list_;
   pid_t pid_;
@@ -184,33 +200,27 @@ bool ThreadSuspender::SuspendAllThreads(
 // Pointer to the ThreadSuspender instance for use in signal handler.
 static ThreadSuspender *thread_suspender_instance = NULL;
 
-// Signals that should not be blocked (this is used in the parent thread as well
-// as the tracer thread).
-static const int kUnblockedSignals[] = { SIGABRT, SIGILL, SIGFPE, SIGSEGV,
-                                         SIGBUS, SIGXCPU, SIGXFSZ };
-
-// Structure for passing arguments into the tracer thread.
-struct TracerThreadArgument {
-  StopTheWorldCallback callback;
-  void *callback_argument;
-  // The tracer thread waits on this mutex while the parent finishes its
-  // preparations.
-  BlockingMutex mutex;
-  uptr parent_pid;
-};
+// Synchronous signals that should not be blocked.
+static const int kSyncSignals[] = { SIGABRT, SIGILL, SIGFPE, SIGSEGV, SIGBUS,
+                                    SIGXCPU, SIGXFSZ };
 
 static DieCallbackType old_die_callback;
 
 // Signal handler to wake up suspended threads when the tracer thread dies.
-void TracerThreadSignalHandler(int signum, void *siginfo, void *uctx) {
+static void TracerThreadSignalHandler(int signum, void *siginfo, void *uctx) {
   SignalContext ctx = SignalContext::Create(siginfo, uctx);
   VPrintf(1, "Tracer caught signal %d: addr=0x%zx pc=0x%zx sp=0x%zx\n",
       signum, ctx.addr, ctx.pc, ctx.sp);
-  if (thread_suspender_instance != NULL) {
+  ThreadSuspender *inst = thread_suspender_instance;
+  if (inst != NULL) {
     if (signum == SIGABRT)
-      thread_suspender_instance->KillAllThreads();
+      inst->KillAllThreads();
     else
-      thread_suspender_instance->ResumeAllThreads();
+      inst->ResumeAllThreads();
+    SetDieCallback(old_die_callback);
+    old_die_callback = NULL;
+    thread_suspender_instance = NULL;
+    atomic_store(&inst->arg->done, 1, memory_order_relaxed);
   }
   internal__exit((signum == SIGABRT) ? 1 : 2);
 }
@@ -222,10 +232,15 @@ static void TracerThreadDieCallback() {
   // point. So we correctly handle calls to Die() from within the callback, but
   // not those that happen before or after the callback. Hopefully there aren't
   // a lot of opportunities for that to happen...
-  if (thread_suspender_instance)
-    thread_suspender_instance->KillAllThreads();
+  ThreadSuspender *inst = thread_suspender_instance;
+  if (inst != NULL && stoptheworld_tracer_pid == internal_getpid()) {
+    inst->KillAllThreads();
+    thread_suspender_instance = NULL;
+  }
   if (old_die_callback)
     old_die_callback();
+  SetDieCallback(old_die_callback);
+  old_die_callback = NULL;
 }
 
 // Size of alternative stack for signal handlers in the tracer thread.
@@ -245,9 +260,10 @@ static int TracerThread(void* argument)
   tracer_thread_argument->mutex.Lock();
   tracer_thread_argument->mutex.Unlock();
 
+  old_die_callback = GetDieCallback();
   SetDieCallback(TracerThreadDieCallback);
 
-  ThreadSuspender thread_suspender(internal_getppid());
+  ThreadSuspender thread_suspender(internal_getppid(), tracer_thread_argument);
   // Global pointer for the signal handler.
   thread_suspender_instance = &thread_suspender;
 
@@ -259,17 +275,14 @@ static int TracerThread(void* argument)
   handler_stack.ss_size = kHandlerStackSize;
   internal_sigaltstack(&handler_stack, NULL);
 
-  // Install our handler for fatal signals. Other signals should be blocked by
-  // the mask we inherited from the caller thread.
-  for (uptr signal_index = 0; signal_index < ARRAY_SIZE(kUnblockedSignals);
-       signal_index++) {
-    __sanitizer_sigaction new_sigaction;
-    internal_memset(&new_sigaction, 0, sizeof(new_sigaction));
-    new_sigaction.sigaction = TracerThreadSignalHandler;
-    new_sigaction.sa_flags = SA_ONSTACK | SA_SIGINFO;
-    internal_sigfillset(&new_sigaction.sa_mask);
-    internal_sigaction_norestorer(kUnblockedSignals[signal_index],
-                                  &new_sigaction, NULL);
+  // Install our handler for synchronous signals. Other signals should be
+  // blocked by the mask we inherited from the parent thread.
+  for (uptr i = 0; i < ARRAY_SIZE(kSyncSignals); i++) {
+    __sanitizer_sigaction act;
+    internal_memset(&act, 0, sizeof(act));
+    act.sigaction = TracerThreadSignalHandler;
+    act.sa_flags = SA_ONSTACK | SA_SIGINFO;
+    internal_sigaction_norestorer(kSyncSignals[i], &act, 0);
   }
 
   int exit_code = 0;
@@ -282,9 +295,9 @@ static int TracerThread(void* argument)
     thread_suspender.ResumeAllThreads();
     exit_code = 0;
   }
+  SetDieCallback(old_die_callback);
   thread_suspender_instance = NULL;
-  handler_stack.ss_flags = SS_DISABLE;
-  internal_sigaltstack(&handler_stack, NULL);
+  atomic_store(&tracer_thread_argument->done, 1, memory_order_relaxed);
   return exit_code;
 }
 
@@ -316,53 +329,21 @@ class ScopedStackSpaceWithGuard {
 // into globals.
 static __sanitizer_sigset_t blocked_sigset;
 static __sanitizer_sigset_t old_sigset;
-static __sanitizer_sigaction old_sigactions
-    [ARRAY_SIZE(kUnblockedSignals)];
 
 class StopTheWorldScope {
  public:
   StopTheWorldScope() {
-    // Block all signals that can be blocked safely, and install
-    // default handlers for the remaining signals.
-    // We cannot allow user-defined handlers to run while the ThreadSuspender
-    // thread is active, because they could conceivably call some libc functions
-    // which modify errno (which is shared between the two threads).
-    internal_sigfillset(&blocked_sigset);
-    for (uptr signal_index = 0; signal_index < ARRAY_SIZE(kUnblockedSignals);
-         signal_index++) {
-      // Remove the signal from the set of blocked signals.
-      internal_sigdelset(&blocked_sigset, kUnblockedSignals[signal_index]);
-      // Install the default handler.
-      __sanitizer_sigaction new_sigaction;
-      internal_memset(&new_sigaction, 0, sizeof(new_sigaction));
-      new_sigaction.handler = SIG_DFL;
-      internal_sigfillset(&new_sigaction.sa_mask);
-      internal_sigaction_norestorer(kUnblockedSignals[signal_index],
-          &new_sigaction, &old_sigactions[signal_index]);
-    }
-    int sigprocmask_status =
-        internal_sigprocmask(SIG_BLOCK, &blocked_sigset, &old_sigset);
-    CHECK_EQ(sigprocmask_status, 0); // sigprocmask should never fail
     // Make this process dumpable. Processes that are not dumpable cannot be
     // attached to.
     process_was_dumpable_ = internal_prctl(PR_GET_DUMPABLE, 0, 0, 0, 0);
     if (!process_was_dumpable_)
       internal_prctl(PR_SET_DUMPABLE, 1, 0, 0, 0);
-    old_die_callback = GetDieCallback();
   }
 
   ~StopTheWorldScope() {
-    SetDieCallback(old_die_callback);
     // Restore the dumpable flag.
     if (!process_was_dumpable_)
       internal_prctl(PR_SET_DUMPABLE, 0, 0, 0, 0);
-    // Restore the signal handlers.
-    for (uptr signal_index = 0; signal_index < ARRAY_SIZE(kUnblockedSignals);
-         signal_index++) {
-      internal_sigaction_norestorer(kUnblockedSignals[signal_index],
-                                    &old_sigactions[signal_index], NULL);
-    }
-    internal_sigprocmask(SIG_SETMASK, &old_sigset, &old_sigset);
   }
 
  private:
@@ -390,16 +371,42 @@ void StopTheWorld(StopTheWorldCallback c
   tracer_thread_argument.callback = callback;
   tracer_thread_argument.callback_argument = argument;
   tracer_thread_argument.parent_pid = internal_getpid();
+  atomic_store(&tracer_thread_argument.done, 0, memory_order_relaxed);
   const uptr kTracerStackSize = 2 * 1024 * 1024;
   ScopedStackSpaceWithGuard tracer_stack(kTracerStackSize);
   // Block the execution of TracerThread until after we have set ptrace
   // permissions.
   tracer_thread_argument.mutex.Lock();
+  // Signal handling story.
+  // We don't want async signals to be delivered to the tracer thread,
+  // so we block all async signals before creating the thread. An async signal
+  // handler can temporary modify errno, which is shared with this thread.
+  // We ought to use pthread_sigmask here, because sigprocmask has undefined
+  // behavior in multithreaded programs. However, on linux sigprocmask is
+  // equivalent to pthread_sigmask with the exception that pthread_sigmask
+  // does not allow to block some signals used internally in pthread
+  // implementation. We are fine with blocking them here, we are really not
+  // going to pthread_cancel the thread.
+  // The tracer thread should not raise any synchronous signals. But in case it
+  // does, we setup a special handler for sync signals that properly kills the
+  // parent as well. Note: we don't pass CLONE_SIGHAND to clone, so handlers
+  // in the tracer thread won't interfere with user program. Double note: if a
+  // user does something along the lines of 'kill -11 pid', that can kill the
+  // process even if user setup own handler for SEGV.
+  // Thing to watch out for: this code should not change behavior of user code
+  // in any observable way. In particular it should not override user signal
+  // handlers.
+  internal_sigfillset(&blocked_sigset);
+  for (uptr i = 0; i < ARRAY_SIZE(kSyncSignals); i++)
+    internal_sigdelset(&blocked_sigset, kSyncSignals[i]);
+  int rv = internal_sigprocmask(SIG_BLOCK, &blocked_sigset, &old_sigset);
+  CHECK_EQ(rv, 0);
   uptr tracer_pid = internal_clone(
       TracerThread, tracer_stack.Bottom(),
       CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_UNTRACED,
       &tracer_thread_argument, 0 /* parent_tidptr */, 0 /* newtls */, 0
       /* child_tidptr */);
+  internal_sigprocmask(SIG_SETMASK, &old_sigset, 0);
   int local_errno = 0;
   if (internal_iserror(tracer_pid, &local_errno)) {
     VReport(1, "Failed spawning a tracer thread (errno %d).\n", local_errno);
@@ -413,14 +420,27 @@ void StopTheWorld(StopTheWorldCallback c
 #endif
     // Allow the tracer thread to start.
     tracer_thread_argument.mutex.Unlock();
-    // Since errno is shared between this thread and the tracer thread, we
-    // must avoid using errno while the tracer thread is running.
-    // At this point, any signal will either be blocked or kill us, so waitpid
-    // should never return (and set errno) while the tracer thread is alive.
-    uptr waitpid_status = internal_waitpid(tracer_pid, NULL, __WALL);
-    if (internal_iserror(waitpid_status, &local_errno))
+    // NOTE: errno is shared between this thread and the tracer thread.
+    // internal_waitpid may call syscall() which can access/spoil errno,
+    // so we can't call it now. Instead we for the tracer thread to finish using
+    // the spin loop below. Man page for sched_yield says "In the Linux
+    // implementation, sched_yield() always succeeds", so let's hope it does not
+    // spoil errno. Note that this spin loop runs only for brief periods before
+    // the tracer thread has suspended us and when it starts unblocking threads.
+    while (atomic_load(&tracer_thread_argument.done, memory_order_relaxed) == 0)
+      sched_yield();
+    // Now the tracer thread is about to exit and does not touch errno,
+    // wait for it.
+    for (;;) {
+      uptr waitpid_status = internal_waitpid(tracer_pid, NULL, __WALL);
+      if (!internal_iserror(waitpid_status, &local_errno))
+        break;
+      if (local_errno == EINTR)
+        continue;
       VReport(1, "Waiting on the tracer thread failed (errno %d).\n",
               local_errno);
+      break;
+    }
   }
 }
 

Added: compiler-rt/trunk/test/asan/TestCases/Linux/leak_check_segv.cc
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/test/asan/TestCases/Linux/leak_check_segv.cc?rev=231367&view=auto
==============================================================================
--- compiler-rt/trunk/test/asan/TestCases/Linux/leak_check_segv.cc (added)
+++ compiler-rt/trunk/test/asan/TestCases/Linux/leak_check_segv.cc Thu Mar  5 08:37:28 2015
@@ -0,0 +1,23 @@
+// Test that SIGSEGV during leak checking does not crash the process.
+// RUN: %clangxx_asan -O1 %s -o %t && LSAN_OPTIONS="verbosity=1" not %run %t 2>&1
+// REQUIRES: asan-64-bits
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/mman.h>
+#include <sanitizer/lsan_interface.h>
+
+char data[10 * 1024 * 1024];
+
+int main() {
+  void *p = malloc(10 * 1024 * 1024);
+  // surprise-surprise!
+  mprotect((void*)(((unsigned long)p + 4095) & ~4095), 16 * 1024, PROT_NONE); 
+  mprotect((void*)(((unsigned long)data + 4095) & ~4095), 16 * 1024, PROT_NONE);
+  __lsan_do_leak_check();
+  fprintf(stderr, "DONE\n");
+}
+
+// CHECK: Tracer caught signal 11
+// CHECK: LeakSanitizer has encountered a fatal error
+// CHECK-NOT: DONE
+

Added: compiler-rt/trunk/test/asan/TestCases/Linux/signal_during_stop_the_world.cc
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/test/asan/TestCases/Linux/signal_during_stop_the_world.cc?rev=231367&view=auto
==============================================================================
--- compiler-rt/trunk/test/asan/TestCases/Linux/signal_during_stop_the_world.cc (added)
+++ compiler-rt/trunk/test/asan/TestCases/Linux/signal_during_stop_the_world.cc Thu Mar  5 08:37:28 2015
@@ -0,0 +1,60 @@
+// Test StopTheWorld behavior during signal storm.
+// Historically StopTheWorld crashed because did not handle EINTR properly.
+// The test is somewhat convoluted, but that's what caused crashes previously.
+
+// RUN: %clangxx_asan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/prctl.h>
+#include <sys/wait.h>
+#include <time.h>
+#include <pthread.h>
+#include <sanitizer/lsan_interface.h>
+
+static void handler(int signo);
+static void *thr(void *arg);
+
+int main() {
+  struct sigaction act = {};
+  act.sa_handler = handler;
+  sigaction(SIGPROF, &act, 0);
+
+  pid_t pid = fork();
+  if (pid < 0) {
+    fprintf(stderr, "failed to fork\n");
+    exit(1);
+  }
+  if (pid == 0) {
+    // Child constantly sends signals to parent to cause spurious return from
+    // waitpid in StopTheWorld.
+    prctl(PR_SET_PDEATHSIG, SIGTERM, 0, 0, 0);
+    pid_t parent = getppid();
+    for (;;) {
+      // There is no strong reason for these two particular signals,
+      // but at least one of them ought to unblock waitpid.
+      kill(parent, SIGCHLD);
+      kill(parent, SIGPROF);
+    }
+  }
+  usleep(10000);  // Let the child start.
+  __lsan_do_leak_check();
+  // Kill and join the child.
+  kill(pid, SIGTERM);
+  waitpid(pid, 0, 0);
+  sleep(1);  // If the tracer thread still runs, give it time to crash.
+  fprintf(stderr, "DONE\n");
+// CHECK: DONE
+}
+
+static void handler(int signo) {
+}
+
+static void *thr(void *arg) {
+  for (;;)
+    sleep(1);
+  return 0;
+}

Added: compiler-rt/trunk/test/sanitizer_common/TestCases/Linux/signal_segv_handler.cc
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/test/sanitizer_common/TestCases/Linux/signal_segv_handler.cc?rev=231367&view=auto
==============================================================================
--- compiler-rt/trunk/test/sanitizer_common/TestCases/Linux/signal_segv_handler.cc (added)
+++ compiler-rt/trunk/test/sanitizer_common/TestCases/Linux/signal_segv_handler.cc Thu Mar  5 08:37:28 2015
@@ -0,0 +1,41 @@
+// RUN: %clangxx -O1 %s -o %t && TSAN_OPTIONS="flush_memory_ms=1 memory_limit_mb=1" ASAN_OPTIONS="handle_segv=0 allow_user_segv_handler=1" %run %t 2>&1 | FileCheck %s
+
+// JVM uses SEGV to preempt threads. All threads do a load from a known address
+// periodically. When runtime needs to preempt threads, it unmaps the page.
+// Threads start triggering SEGV one by one. The signal handler blocks
+// threads while runtime does its thing. Then runtime maps the page again
+// and resumes the threads.
+// Previously this pattern conflicted with stop-the-world machinery,
+// because it briefly reset SEGV handler to SIG_DFL.
+// As the consequence JVM just silently died.
+
+// This test sets memory flushing rate to maximum, then does series of
+// "benign" SEGVs that are handled by signal handler, and ensures that
+// the process survive.
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sys/mman.h>
+
+void *guard;
+
+void handler(int signo, siginfo_t *info, void *uctx) {
+  mprotect(guard, 4096, PROT_READ | PROT_WRITE);
+}
+
+int main() {
+  struct sigaction a, old;
+  a.sa_sigaction = handler;
+  a.sa_flags = SA_SIGINFO;
+  sigaction(SIGSEGV, &a, &old);
+  guard = mmap(0, 4096, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0);
+  for (int i = 0; i < 1000000; i++) {
+    mprotect(guard, 4096, PROT_NONE);
+    *(int*)guard = 1;
+  }
+  sigaction(SIGSEGV, &old, 0);
+  fprintf(stderr, "DONE\n");
+}
+
+// CHECK: DONE





More information about the llvm-commits mailing list