[compiler-rt] 2eb3e20 - tsan: fix deadlock during race reporting

Dmitry Vyukov via llvm-commits llvm-commits at lists.llvm.org
Mon Dec 20 09:52:51 PST 2021


Author: Dmitry Vyukov
Date: 2021-12-20T18:52:48+01:00
New Revision: 2eb3e204618209a7f3bd9fa9f6e98c38984997b2

URL: https://github.com/llvm/llvm-project/commit/2eb3e204618209a7f3bd9fa9f6e98c38984997b2
DIFF: https://github.com/llvm/llvm-project/commit/2eb3e204618209a7f3bd9fa9f6e98c38984997b2.diff

LOG: tsan: fix deadlock during race reporting

SlotPairLocker calls SlotLock under ctx->multi_slot_mtx.
SlotLock can invoke global reset DoReset if we are out of slots/epochs.
But DoReset locks ctx->multi_slot_mtx as well, which leads to deadlock.

Resolve the deadlock by removing SlotPairLocker/multi_slot_mtx
and only lock one slot for which we will do RestoreStack.
We need to lock that slot because RestoreStack accesses the slot journal.
But it's unclear why we need to lock the current slot.
Initially I did it just to be on the safer side (but at that time
we dit not lock the second slot, so it was easy just to lock the current slot).

Reviewed By: melver

Differential Revision: https://reviews.llvm.org/D116040

Added: 
    

Modified: 
    compiler-rt/lib/tsan/rtl/tsan_defs.h
    compiler-rt/lib/tsan/rtl/tsan_rtl.cpp
    compiler-rt/lib/tsan/rtl/tsan_rtl.h
    compiler-rt/lib/tsan/rtl/tsan_rtl_mutex.cpp
    compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp
    compiler-rt/lib/tsan/tests/unit/tsan_trace_test.cpp
    compiler-rt/test/tsan/stress.cpp

Removed: 
    


################################################################################
diff  --git a/compiler-rt/lib/tsan/rtl/tsan_defs.h b/compiler-rt/lib/tsan/rtl/tsan_defs.h
index d9f20d14a92ad..f20827caea8e1 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_defs.h
+++ b/compiler-rt/lib/tsan/rtl/tsan_defs.h
@@ -243,7 +243,6 @@ enum {
   MutexTypeTrace,
   MutexTypeSlot,
   MutexTypeSlots,
-  MutexTypeMultiSlot,
 };
 
 }  // namespace __tsan

diff  --git a/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp
index fc76ec451c311..6d37363558fb1 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp
@@ -210,7 +210,6 @@ static void DoResetImpl(uptr epoch) {
 // error: expecting mutex 'slot.mtx' to be held at start of each loop
 void DoReset(ThreadState* thr, uptr epoch) NO_THREAD_SAFETY_ANALYSIS {
   {
-    Lock l(&ctx->multi_slot_mtx);
     for (auto& slot : ctx->slots) {
       slot.mtx.Lock();
       if (UNLIKELY(epoch == 0))
@@ -337,6 +336,13 @@ void SlotDetach(ThreadState* thr) {
 
 void SlotLock(ThreadState* thr) NO_THREAD_SAFETY_ANALYSIS {
   DCHECK(!thr->slot_locked);
+#if SANITIZER_DEBUG
+  // Check these mutexes are not locked.
+  // We can call DoReset from SlotAttachAndLock, which will lock
+  // these mutexes, but it happens only every once in a while.
+  { ThreadRegistryLock lock(&ctx->thread_registry); }
+  { Lock lock(&ctx->slot_mtx); }
+#endif
   TidSlot* slot = thr->slot;
   slot->mtx.Lock();
   thr->slot_locked = true;
@@ -367,7 +373,6 @@ Context::Context()
       fired_suppressions_mtx(MutexTypeFired),
       clock_alloc(LINKER_INITIALIZED, "clock allocator"),
       slot_mtx(MutexTypeSlots),
-      multi_slot_mtx(MutexTypeMultiSlot),
       resetting() {
   fired_suppressions.reserve(8);
   for (uptr i = 0; i < ARRAY_SIZE(slots); i++) {
@@ -767,7 +772,6 @@ void ForkBefore(ThreadState *thr, uptr pc) NO_THREAD_SAFETY_ANALYSIS {
   // Detaching from the slot makes OnUserFree skip writing to the shadow.
   // The slot will be locked so any attempts to use it will deadlock anyway.
   SlotDetach(thr);
-  ctx->multi_slot_mtx.Lock();
   for (auto& slot : ctx->slots) slot.mtx.Lock();
   ctx->thread_registry.Lock();
   ctx->slot_mtx.Lock();
@@ -799,7 +803,6 @@ static void ForkAfter(ThreadState* thr) NO_THREAD_SAFETY_ANALYSIS {
   ctx->slot_mtx.Unlock();
   ctx->thread_registry.Unlock();
   for (auto& slot : ctx->slots) slot.mtx.Unlock();
-  ctx->multi_slot_mtx.Unlock();
   SlotAttachAndLock(thr);
   SlotUnlock(thr);
   GlobalProcessorUnlock();
@@ -1053,9 +1056,7 @@ MutexMeta mutex_meta[] = {
     {MutexTypeAtExit, "AtExit", {}},
     {MutexTypeFired, "Fired", {MutexLeaf}},
     {MutexTypeRacy, "Racy", {MutexLeaf}},
-    {MutexTypeGlobalProc,
-     "GlobalProc",
-     {MutexTypeSlot, MutexTypeSlots, MutexTypeMultiSlot}},
+    {MutexTypeGlobalProc, "GlobalProc", {MutexTypeSlot, MutexTypeSlots}},
     {MutexTypeInternalAlloc, "InternalAlloc", {MutexLeaf}},
     {MutexTypeTrace, "Trace", {}},
     {MutexTypeSlot,
@@ -1063,7 +1064,6 @@ MutexMeta mutex_meta[] = {
      {MutexMulti, MutexTypeTrace, MutexTypeSyncVar, MutexThreadRegistry,
       MutexTypeSlots}},
     {MutexTypeSlots, "Slots", {MutexTypeTrace, MutexTypeReport}},
-    {MutexTypeMultiSlot, "MultiSlot", {MutexTypeSlot, MutexTypeSlots}},
     {},
 };
 

diff  --git a/compiler-rt/lib/tsan/rtl/tsan_rtl.h b/compiler-rt/lib/tsan/rtl/tsan_rtl.h
index 3175847a880ab..013d6910de78b 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_rtl.h
+++ b/compiler-rt/lib/tsan/rtl/tsan_rtl.h
@@ -333,8 +333,6 @@ struct Context {
 
   // Protects global_epoch, slot_queue, trace_part_recycle.
   Mutex slot_mtx;
-  // Prevents lock order inversions when we lock more than 1 slot.
-  Mutex multi_slot_mtx;
   uptr global_epoch;  // guarded by slot_mtx and by all slot mutexes
   bool resetting;     // global reset is in progress
   IList<TidSlot, &TidSlot::node> slot_queue GUARDED_BY(slot_mtx);
@@ -609,16 +607,6 @@ enum FiberSwitchFlags {
   FiberSwitchFlagNoSync = 1 << 0, // __tsan_switch_to_fiber_no_sync
 };
 
-class SlotPairLocker {
- public:
-  SlotPairLocker(ThreadState *thr, Sid sid);
-  ~SlotPairLocker();
-
- private:
-  ThreadState *thr_;
-  TidSlot *slot_;
-};
-
 class SlotLocker {
  public:
   ALWAYS_INLINE

diff  --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_mutex.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl_mutex.cpp
index 5ca2e4fca827d..5d31005c2af0a 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_rtl_mutex.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_mutex.cpp
@@ -552,7 +552,9 @@ void ReportDeadlock(ThreadState *thr, uptr pc, DDReport *r) {
 
 void ReportDestroyLocked(ThreadState *thr, uptr pc, uptr addr,
                          FastState last_lock, StackID creation_stack_id) {
-  SlotPairLocker locker(thr, last_lock.sid());
+  // We need to lock the slot during RestoreStack because it protects
+  // the slot journal.
+  Lock slot_lock(&ctx->slots[static_cast<uptr>(last_lock.sid())].mtx);
   ThreadRegistryLock l0(&ctx->thread_registry);
   Lock slots_lock(&ctx->slot_mtx);
   ScopedReport rep(ReportTypeMutexDestroyLocked);

diff  --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp
index efe3abea63757..69a56a38efbb6 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp
@@ -729,27 +729,6 @@ static bool IsFiredSuppression(Context *ctx, ReportType type, uptr addr) {
   return false;
 }
 
-// We need to lock the target slot during RestoreStack because it protects
-// the slot journal. However, the target slot can be the slot of the current
-// thread or a 
diff erent slot.
-SlotPairLocker::SlotPairLocker(ThreadState *thr,
-                               Sid sid) NO_THREAD_SAFETY_ANALYSIS : thr_(thr),
-                                                                    slot_() {
-  CHECK_NE(sid, kFreeSid);
-  Lock l(&ctx->multi_slot_mtx);
-  SlotLock(thr);
-  if (sid == thr->slot->sid)
-    return;
-  slot_ = &ctx->slots[static_cast<uptr>(sid)];
-  slot_->mtx.Lock();
-}
-
-SlotPairLocker::~SlotPairLocker() NO_THREAD_SAFETY_ANALYSIS {
-  SlotUnlock(thr_);
-  if (slot_)
-    slot_->mtx.Unlock();
-}
-
 void ReportRace(ThreadState *thr, RawShadow *shadow_mem, Shadow cur, Shadow old,
                 AccessType typ0) {
   CheckedMutex::CheckNoLocks();
@@ -806,7 +785,9 @@ void ReportRace(ThreadState *thr, RawShadow *shadow_mem, Shadow cur, Shadow old,
   DynamicMutexSet mset1;
   MutexSet *mset[kMop] = {&thr->mset, mset1};
 
-  SlotPairLocker locker(thr, s[1].sid());
+  // We need to lock the slot during RestoreStack because it protects
+  // the slot journal.
+  Lock slot_lock(&ctx->slots[static_cast<uptr>(s[1].sid())].mtx);
   ThreadRegistryLock l0(&ctx->thread_registry);
   Lock slots_lock(&ctx->slot_mtx);
   if (!RestoreStack(EventType::kAccessExt, s[1].sid(), s[1].epoch(), addr1,

diff  --git a/compiler-rt/lib/tsan/tests/unit/tsan_trace_test.cpp b/compiler-rt/lib/tsan/tests/unit/tsan_trace_test.cpp
index 13c03353e70e1..a9f552c8ba9b1 100644
--- a/compiler-rt/lib/tsan/tests/unit/tsan_trace_test.cpp
+++ b/compiler-rt/lib/tsan/tests/unit/tsan_trace_test.cpp
@@ -88,7 +88,7 @@ TRACE_TEST(Trace, RestoreAccess) {
   // The previous one is equivalent, but RestoreStack must prefer
   // the last of the matchig accesses.
   CHECK(TryTraceMemoryAccess(thr, 0x2002, 0x3000, 8, kAccessRead));
-  SlotPairLocker locker(thr, thr->fast_state.sid());
+  Lock slot_lock(&ctx->slots[static_cast<uptr>(thr->fast_state.sid())].mtx);
   ThreadRegistryLock lock1(&ctx->thread_registry);
   Lock lock2(&ctx->slot_mtx);
   Tid tid = kInvalidTid;
@@ -148,7 +148,7 @@ TRACE_TEST(Trace, MemoryAccessSize) {
                                  kAccessRead);
           break;
       }
-      SlotPairLocker locker(thr, thr->fast_state.sid());
+      Lock slot_lock(&ctx->slots[static_cast<uptr>(thr->fast_state.sid())].mtx);
       ThreadRegistryLock lock1(&ctx->thread_registry);
       Lock lock2(&ctx->slot_mtx);
       Tid tid = kInvalidTid;
@@ -176,7 +176,7 @@ TRACE_TEST(Trace, RestoreMutexLock) {
   TraceMutexLock(thr, EventType::kLock, 0x4000, 0x5000, 0x6000);
   TraceMutexLock(thr, EventType::kRLock, 0x4001, 0x5001, 0x6001);
   TraceMutexLock(thr, EventType::kRLock, 0x4002, 0x5001, 0x6002);
-  SlotPairLocker locker(thr, thr->fast_state.sid());
+  Lock slot_lock(&ctx->slots[static_cast<uptr>(thr->fast_state.sid())].mtx);
   ThreadRegistryLock lock1(&ctx->thread_registry);
   Lock lock2(&ctx->slot_mtx);
   Tid tid = kInvalidTid;
@@ -219,7 +219,7 @@ TRACE_TEST(Trace, MultiPart) {
   FuncEntry(thr, 0x4000);
   TraceMutexLock(thr, EventType::kRLock, 0x4001, 0x5001, 0x6001);
   CHECK(TryTraceMemoryAccess(thr, 0x2002, 0x3000, 8, kAccessRead));
-  SlotPairLocker locker(thr, thr->fast_state.sid());
+  Lock slot_lock(&ctx->slots[static_cast<uptr>(thr->fast_state.sid())].mtx);
   ThreadRegistryLock lock1(&ctx->thread_registry);
   Lock lock2(&ctx->slot_mtx);
   Tid tid = kInvalidTid;

diff  --git a/compiler-rt/test/tsan/stress.cpp b/compiler-rt/test/tsan/stress.cpp
index 3656223b17ae2..756cb6488b4c7 100644
--- a/compiler-rt/test/tsan/stress.cpp
+++ b/compiler-rt/test/tsan/stress.cpp
@@ -1,10 +1,13 @@
-// RUN: %clangxx_tsan -O1 %s -o %t && %env_tsan_opts=flush_memory_ms=1:flush_symbolizer_ms=1:memory_limit_mb=1 %run %t 2>&1 | FileCheck %s
+// This run stresses global reset happenning concurrently with everything else.
+// RUN: %clangxx_tsan -O1 %s -o %t && %env_tsan_opts=flush_memory_ms=1:flush_symbolizer_ms=1:memory_limit_mb=1 %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-NORACE
+// This run stresses race reporting happenning concurrently with everything else.
+// RUN: %clangxx_tsan -O1 %s -DRACE=1 -o %t && %env_tsan_opts=suppress_equal_stacks=0:suppress_equal_addresses=0 %deflake %run %t | FileCheck %s --check-prefix=CHECK-RACE
 #include "test.h"
 #include <fcntl.h>
 #include <string.h>
 
 volatile long stop;
-long atomic, read_only;
+long atomic, read_only, racy;
 int fds[2];
 
 __attribute__((noinline)) void *SecondaryThread(void *x) {
@@ -22,7 +25,7 @@ void *Thread(void *x) {
       // just read the stop variable
     }
     if (me == 0 || me == 2) {
-      __atomic_store_n(&atomic, 1, __ATOMIC_RELEASE);
+      __atomic_store_n(&atomic, sink, __ATOMIC_RELEASE);
     }
     if (me == 0 || me == 3) {
       sink += __atomic_fetch_add(&atomic, 1, __ATOMIC_ACQ_REL);
@@ -47,6 +50,13 @@ void *Thread(void *x) {
       memcpy(&buf, &read_only, sizeof(buf));
       sink += buf;
     }
+    if (me == 0 || me == 9) {
+#if RACE
+      sink += racy++;
+#else
+      sink += racy;
+#endif
+    }
     // If you add more actions, update kActions in main.
   }
   return NULL;
@@ -60,8 +70,12 @@ int main() {
     exit((perror("fcntl"), 1));
   if (fcntl(fds[1], F_SETFL, O_NONBLOCK))
     exit((perror("fcntl"), 1));
-  const int kActions = 9;
+  const int kActions = 10;
+#if RACE
+  const int kMultiplier = 1;
+#else
   const int kMultiplier = 4;
+#endif
   pthread_t t[kActions * kMultiplier];
   for (int i = 0; i < kActions * kMultiplier; i++)
     pthread_create(&t[i], NULL, Thread, (void *)(long)(i % kActions));
@@ -73,6 +87,8 @@ int main() {
   return 0;
 }
 
-// CHECK-NOT: ThreadSanitizer:
-// CHECK: DONE
-// CHECK-NOT: ThreadSanitizer:
+// CHECK-NORACE-NOT: ThreadSanitizer:
+// CHECK-NORACE: DONE
+// CHECK-NORACE-NOT: ThreadSanitizer:
+// CHECK-RACE: ThreadSanitizer: data race
+// CHECK-RACE: DONE


        


More information about the llvm-commits mailing list