[compiler-rt] r209897 - tsan: optimize memory access functions

Fri May 30 06:36:29 PDT 2014

Author: dvyukov
Date: Fri May 30 08:36:29 2014
New Revision: 209897

URL: http://llvm.org/viewvc/llvm-project?rev=209897&view=rev
Log:
tsan: optimize memory access functions
The optimization is two-fold:
First, the algorithm now uses SSE instructions to
handle all 4 shadow slots at once. This makes processing
faster.
Second, if shadow contains the same access, we do not
store the event into trace. This increases effective
trace size, that is, tsan can remember up to 10x more
previous memory accesses.

Perofrmance impact:
Before:
[       OK ] DISABLED_BENCH.Mop8Read (2461 ms)
[       OK ] DISABLED_BENCH.Mop8Write (1836 ms)
After:
[       OK ] DISABLED_BENCH.Mop8Read (1204 ms)
[       OK ] DISABLED_BENCH.Mop8Write (976 ms)
But this measures only fast-path.
On large real applications the speedup is ~20%.

Trace size impact:
On app1:
Memory accesses                   :       1163265870
  Including same                  :        791312905 (68%)
on app2:
Memory accesses                   :        166875345
  Including same                  :        150449689 (90%)
90% of filtered events means that trace size is effectively 10x larger.



Modified:
    compiler-rt/trunk/lib/sanitizer_common/tests/sanitizer_deadlock_detector_test.cc
    compiler-rt/trunk/lib/tsan/check_analyze.sh
    compiler-rt/trunk/lib/tsan/rtl/Makefile.old
    compiler-rt/trunk/lib/tsan/rtl/tsan_defs.h
    compiler-rt/trunk/lib/tsan/rtl/tsan_rtl.cc
    compiler-rt/trunk/lib/tsan/rtl/tsan_rtl.h
    compiler-rt/trunk/lib/tsan/rtl/tsan_stat.cc
    compiler-rt/trunk/lib/tsan/rtl/tsan_stat.h
    compiler-rt/trunk/lib/tsan/rtl/tsan_update_shadow_word_inl.h
    compiler-rt/trunk/lib/tsan/tests/unit/tsan_mman_test.cc

Modified: compiler-rt/trunk/lib/sanitizer_common/tests/sanitizer_deadlock_detector_test.cc
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/sanitizer_common/tests/sanitizer_deadlock_detector_test.cc?rev=209897&r1=209896&r2=209897&view=diff
==============================================================================

--- compiler-rt/trunk/lib/sanitizer_common/tests/sanitizer_deadlock_detector_test.cc (original)
+++ compiler-rt/trunk/lib/sanitizer_common/tests/sanitizer_deadlock_detector_test.cc Fri May 30 08:36:29 2014
@@ -268,9 +268,14 @@ void RunMultipleEpochsTest() {
   }
   EXPECT_EQ(d.testOnlyGetEpoch(), 4 * d.size());
 
+#if TSAN_DEBUG == 0
+  // EXPECT_DEATH clones a thread with 4K stack,
+  // which is overflown by tsan memory accesses functions in debug mode.
+
   // Can not handle the locks from the previous epoch.
   // The caller should update the lock id.
   EXPECT_DEATH(d.onLock(&dtls, l0), "CHECK failed.*current_epoch_");
+#endif
 }
 
 TEST(DeadlockDetector, MultipleEpochsTest) {

Modified: compiler-rt/trunk/lib/tsan/check_analyze.sh
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/tsan/check_analyze.sh?rev=209897&r1=209896&r2=209897&view=diff
==============================================================================
--- compiler-rt/trunk/lib/tsan/check_analyze.sh (original)
+++ compiler-rt/trunk/lib/tsan/check_analyze.sh Fri May 30 08:36:29 2014
@@ -8,11 +8,11 @@ PrintRes() {
 
 PrintRes
 
-mops="write1 \
+wmops="write1 \
       write2 \
       write4 \
-      write8 \
-      read1 \
+      write8"
+rmops="read1 \
       read2 \
       read4 \
       read8"
@@ -27,10 +27,16 @@ check() {
   fi
 }
 
-for f in $mops; do
-  check $f rsp 1   # To read caller pc.
-  check $f push 0
-  check $f pop 0
+for f in $wmops; do
+  check $f rsp 3
+  check $f push 1
+  check $f pop 5
+done
+
+for f in $rmops; do
+  check $f rsp 3
+  check $f push 1
+  check $f pop 4
 done
 
 for f in $func; do

Modified: compiler-rt/trunk/lib/tsan/rtl/Makefile.old
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/tsan/rtl/Makefile.old?rev=209897&r1=209896&r2=209897&view=diff
==============================================================================
--- compiler-rt/trunk/lib/tsan/rtl/Makefile.old (original)
+++ compiler-rt/trunk/lib/tsan/rtl/Makefile.old Fri May 30 08:36:29 2014
@@ -1,4 +1,4 @@
-CXXFLAGS = -std=c++11 -fPIE -g -Wall -Werror -fno-builtin -DTSAN_DEBUG=$(DEBUG) -DSANITIZER_DEBUG=$(DEBUG)
+CXXFLAGS = -std=c++11 -fPIE -g -Wall -Werror -fno-builtin -msse3 -DTSAN_DEBUG=$(DEBUG) -DSANITIZER_DEBUG=$(DEBUG)
 CLANG=clang
 ifeq ($(DEBUG), 0)
   CXXFLAGS += -O3

Modified: compiler-rt/trunk/lib/tsan/rtl/tsan_defs.h
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/tsan/rtl/tsan_defs.h?rev=209897&r1=209896&r2=209897&view=diff
==============================================================================
--- compiler-rt/trunk/lib/tsan/rtl/tsan_defs.h (original)
+++ compiler-rt/trunk/lib/tsan/rtl/tsan_defs.h Fri May 30 08:36:29 2014
@@ -54,6 +54,7 @@ const uptr kShadowCnt = TSAN_SHADOW_COUN
 # endif
 #else
 // Count of shadow values in a shadow cell.
+#define TSAN_SHADOW_COUNT 4
 const uptr kShadowCnt = 4;
 #endif
 

Modified: compiler-rt/trunk/lib/tsan/rtl/tsan_rtl.cc
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/tsan/rtl/tsan_rtl.cc?rev=209897&r1=209896&r2=209897&view=diff
==============================================================================
--- compiler-rt/trunk/lib/tsan/rtl/tsan_rtl.cc (original)
+++ compiler-rt/trunk/lib/tsan/rtl/tsan_rtl.cc Fri May 30 08:36:29 2014
@@ -25,6 +25,16 @@
 #include "tsan_suppressions.h"
 #include "tsan_symbolize.h"
 
+#ifdef __SSE3__
+// <emmintrin.h> transitively includes <stdlib.h>,
+// and it's prohibited to include std headers into tsan runtime.
+// So we do this dirty trick.
+#define _MM_MALLOC_H_INCLUDED
+#define __MM_MALLOC_H
+#include <emmintrin.h>
+typedef __m128i m128;
+#endif
+
 volatile int __tsan_resumed = 0;
 
 extern "C" void __tsan_resume() {
@@ -471,7 +481,8 @@ void StoreIfNotYetStored(u64 *sp, u64 *s
   *s = 0;
 }
 
-static inline void HandleRace(ThreadState *thr, u64 *shadow_mem,
+ALWAYS_INLINE
+void HandleRace(ThreadState *thr, u64 *shadow_mem,
                               Shadow cur, Shadow old) {
   thr->racy_state[0] = cur.raw();
   thr->racy_state[1] = old.raw();
@@ -483,16 +494,12 @@ static inline void HandleRace(ThreadStat
 #endif
 }
 
-static inline bool OldIsInSameSynchEpoch(Shadow old, ThreadState *thr) {
-  return old.epoch() >= thr->fast_synch_epoch;
-}
-
 static inline bool HappensBefore(Shadow old, ThreadState *thr) {
   return thr->clock.get(old.TidWithIgnore()) >= old.epoch();
 }
 
-ALWAYS_INLINE USED
-void MemoryAccessImpl(ThreadState *thr, uptr addr,
+ALWAYS_INLINE
+void MemoryAccessImpl1(ThreadState *thr, uptr addr,
     int kAccessSizeLog, bool kAccessIsWrite, bool kIsAtomic,
     u64 *shadow_mem, Shadow cur) {
   StatInc(thr, StatMop);
@@ -586,6 +593,90 @@ void UnalignedMemoryAccess(ThreadState *
   }
 }
 
+ALWAYS_INLINE
+bool ContainsSameAccessSlow(u64 *s, u64 a, u64 sync_epoch, bool is_write) {
+  Shadow cur(a);
+  for (uptr i = 0; i < kShadowCnt; i++) {
+    Shadow old(LoadShadow(&s[i]));
+    if (Shadow::Addr0AndSizeAreEqual(cur, old) &&
+        old.TidWithIgnore() == cur.TidWithIgnore() &&
+        old.epoch() > sync_epoch &&
+        old.IsAtomic() == cur.IsAtomic() &&
+        old.IsRead() <= cur.IsRead())
+      return true;
+  }
+  return false;
+}
+
+#if defined(__SSE3__) && TSAN_SHADOW_COUNT == 4
+#define SHUF(v0, v1, i0, i1, i2, i3) _mm_castps_si128(_mm_shuffle_ps( \
+    _mm_castsi128_ps(v0), _mm_castsi128_ps(v1), \
+    (i0)*1 + (i1)*4 + (i2)*16 + (i3)*64))
+ALWAYS_INLINE
+bool ContainsSameAccessFast(u64 *s, u64 a, u64 sync_epoch, bool is_write) {
+  // This is an optimized version of ContainsSameAccessSlow.
+  // load current access into access[0:63]
+  const m128 access     = _mm_cvtsi64_si128(a);
+  // duplicate high part of access in addr0:
+  // addr0[0:31]        = access[32:63]
+  // addr0[32:63]       = access[32:63]
+  // addr0[64:95]       = access[32:63]
+  // addr0[96:127]      = access[32:63]
+  const m128 addr0      = SHUF(access, access, 1, 1, 1, 1);
+  // load 4 shadow slots
+  const m128 shadow0    = _mm_load_si128((__m128i*)s);
+  const m128 shadow1    = _mm_load_si128((__m128i*)s + 1);
+  // load high parts of 4 shadow slots into addr_vect:
+  // addr_vect[0:31]    = shadow0[32:63]
+  // addr_vect[32:63]   = shadow0[96:127]
+  // addr_vect[64:95]   = shadow1[32:63]
+  // addr_vect[96:127]  = shadow1[96:127]
+  m128 addr_vect        = SHUF(shadow0, shadow1, 1, 3, 1, 3);
+  if (!is_write) {
+    // set IsRead bit in addr_vect
+    const m128 rw_mask1 = _mm_cvtsi64_si128(1<<15);
+    const m128 rw_mask  = SHUF(rw_mask1, rw_mask1, 0, 0, 0, 0);
+    addr_vect           = _mm_or_si128(addr_vect, rw_mask);
+  }
+  // addr0 == addr_vect?
+  const m128 addr_res   = _mm_cmpeq_epi32(addr0, addr_vect);
+  // epoch1[0:63]       = sync_epoch
+  const m128 epoch1     = _mm_cvtsi64_si128(sync_epoch);
+  // epoch[0:31]        = sync_epoch[0:31]
+  // epoch[32:63]       = sync_epoch[0:31]
+  // epoch[64:95]       = sync_epoch[0:31]
+  // epoch[96:127]      = sync_epoch[0:31]
+  const m128 epoch      = SHUF(epoch1, epoch1, 0, 0, 0, 0);
+  // load low parts of shadow cell epochs into epoch_vect:
+  // epoch_vect[0:31]   = shadow0[0:31]
+  // epoch_vect[32:63]  = shadow0[64:95]
+  // epoch_vect[64:95]  = shadow1[0:31]
+  // epoch_vect[96:127] = shadow1[64:95]
+  const m128 epoch_vect = SHUF(shadow0, shadow1, 0, 2, 0, 2);
+  // epoch_vect >= sync_epoch?
+  const m128 epoch_res  = _mm_cmpgt_epi32(epoch_vect, epoch);
+  // addr_res & epoch_res
+  const m128 res        = _mm_and_si128(addr_res, epoch_res);
+  // mask[0] = res[7]
+  // mask[1] = res[15]
+  // ...
+  // mask[15] = res[127]
+  const int mask        = _mm_movemask_epi8(res);
+  return mask != 0;
+}
+#endif
+
+ALWAYS_INLINE
+bool ContainsSameAccess(u64 *s, u64 a, u64 sync_epoch, bool is_write) {
+#if defined(__SSE3__) && TSAN_SHADOW_COUNT == 4
+  bool res = ContainsSameAccessFast(s, a, sync_epoch, is_write);
+  DCHECK_EQ(res, ContainsSameAccessSlow(s, a, sync_epoch, is_write));
+  return res;
+#else
+  return ContainsSameAccessSlow(s, a, sync_epoch, is_write);
+#endif
+}
+
 ALWAYS_INLINE USED
 void MemoryAccess(ThreadState *thr, uptr pc, uptr addr,
     int kAccessSizeLog, bool kAccessIsWrite, bool kIsAtomic) {
@@ -618,14 +709,12 @@ void MemoryAccess(ThreadState *thr, uptr
   }
 
   FastState fast_state = thr->fast_state;
-  if (fast_state.GetIgnoreBit())
+  if (fast_state.GetIgnoreBit()) {
+    StatInc(thr, StatMop);
+    StatInc(thr, kAccessIsWrite ? StatMopWrite : StatMopRead);
+    StatInc(thr, (StatType)(StatMop1 + kAccessSizeLog));
+    StatInc(thr, StatMopIgnored);
     return;
-  if (kCollectHistory) {
-    fast_state.IncrementEpoch();
-    thr->fast_state = fast_state;
-    // We must not store to the trace if we do not store to the shadow.
-    // That is, this call must be moved somewhere below.
-    TraceAddEvent(thr, fast_state, EventTypeMop, pc);
   }
 
   Shadow cur(fast_state);
@@ -633,7 +722,40 @@ void MemoryAccess(ThreadState *thr, uptr
   cur.SetWrite(kAccessIsWrite);
   cur.SetAtomic(kIsAtomic);
 
-  MemoryAccessImpl(thr, addr, kAccessSizeLog, kAccessIsWrite, kIsAtomic,
+  if (LIKELY(ContainsSameAccess(shadow_mem, cur.raw(),
+      thr->fast_synch_epoch, kAccessIsWrite))) {
+    StatInc(thr, StatMop);
+    StatInc(thr, kAccessIsWrite ? StatMopWrite : StatMopRead);
+    StatInc(thr, (StatType)(StatMop1 + kAccessSizeLog));
+    StatInc(thr, StatMopSame);
+    return;
+  }
+
+  if (kCollectHistory) {
+    fast_state.IncrementEpoch();
+    TraceAddEvent(thr, fast_state, EventTypeMop, pc);
+    thr->fast_state = fast_state;
+    cur.IncrementEpoch();
+  }
+
+  MemoryAccessImpl1(thr, addr, kAccessSizeLog, kAccessIsWrite, kIsAtomic,
+      shadow_mem, cur);
+}
+
+// Called by MemoryAccessRange in tsan_rtl_thread.cc
+void MemoryAccessImpl(ThreadState *thr, uptr addr,
+    int kAccessSizeLog, bool kAccessIsWrite, bool kIsAtomic,
+    u64 *shadow_mem, Shadow cur) {
+  if (LIKELY(ContainsSameAccess(shadow_mem, cur.raw(),
+      thr->fast_synch_epoch, kAccessIsWrite))) {
+    StatInc(thr, StatMop);
+    StatInc(thr, kAccessIsWrite ? StatMopWrite : StatMopRead);
+    StatInc(thr, (StatType)(StatMop1 + kAccessSizeLog));
+    StatInc(thr, StatMopSame);
+    return;
+  }
+
+  MemoryAccessImpl1(thr, addr, kAccessSizeLog, kAccessIsWrite, kIsAtomic,
       shadow_mem, cur);
 }
 

Modified: compiler-rt/trunk/lib/tsan/rtl/tsan_rtl.h
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/tsan/rtl/tsan_rtl.h?rev=209897&r1=209896&r2=209897&view=diff
==============================================================================
--- compiler-rt/trunk/lib/tsan/rtl/tsan_rtl.h (original)
+++ compiler-rt/trunk/lib/tsan/rtl/tsan_rtl.h Fri May 30 08:36:29 2014
@@ -78,14 +78,14 @@ const u64 kShadowRodata = (u64)-1;  // .
 // FastState (from most significant bit):
 //   ignore          : 1
 //   tid             : kTidBits
-//   epoch           : kClkBits
 //   unused          : -
 //   history_size    : 3
+//   epoch           : kClkBits
 class FastState {
  public:
   FastState(u64 tid, u64 epoch) {
     x_ = tid << kTidShift;
-    x_ |= epoch << kClkShift;
+    x_ |= epoch;
     DCHECK_EQ(tid, this->tid());
     DCHECK_EQ(epoch, this->epoch());
     DCHECK_EQ(GetIgnoreBit(), false);
@@ -110,13 +110,13 @@ class FastState {
   }
 
   u64 epoch() const {
-    u64 res = (x_ << (kTidBits + 1)) >> (64 - kClkBits);
+    u64 res = x_ & ((1ull << kClkBits) - 1);
     return res;
   }
 
   void IncrementEpoch() {
     u64 old_epoch = epoch();
-    x_ += 1 << kClkShift;
+    x_ += 1;
     DCHECK_EQ(old_epoch + 1, epoch());
     (void)old_epoch;
   }
@@ -128,17 +128,19 @@ class FastState {
   void SetHistorySize(int hs) {
     CHECK_GE(hs, 0);
     CHECK_LE(hs, 7);
-    x_ = (x_ & ~7) | hs;
+    x_ = (x_ & ~(kHistoryMask << kHistoryShift)) | (u64(hs) << kHistoryShift);
   }
 
+  ALWAYS_INLINE
   int GetHistorySize() const {
-    return (int)(x_ & 7);
+    return (int)((x_ >> kHistoryShift) & kHistoryMask);
   }
 
   void ClearHistorySize() {
-    x_ &= ~7;
+    SetHistorySize(0);
   }
 
+  ALWAYS_INLINE
   u64 GetTracePos() const {
     const int hs = GetHistorySize();
     // When hs == 0, the trace consists of 2 parts.
@@ -149,20 +151,21 @@ class FastState {
  private:
   friend class Shadow;
   static const int kTidShift = 64 - kTidBits - 1;
-  static const int kClkShift = kTidShift - kClkBits;
   static const u64 kIgnoreBit = 1ull << 63;
   static const u64 kFreedBit = 1ull << 63;
+  static const u64 kHistoryShift = kClkBits;
+  static const u64 kHistoryMask = 7;
   u64 x_;
 };
 
 // Shadow (from most significant bit):
 //   freed           : 1
 //   tid             : kTidBits
-//   epoch           : kClkBits
 //   is_atomic       : 1
 //   is_read         : 1
 //   size_log        : 2
 //   addr0           : 3
+//   epoch           : kClkBits
 class Shadow : public FastState {
  public:
   explicit Shadow(u64 x)
@@ -175,10 +178,10 @@ class Shadow : public FastState {
   }
 
   void SetAddr0AndSizeLog(u64 addr0, unsigned kAccessSizeLog) {
-    DCHECK_EQ(x_ & 31, 0);
+    DCHECK_EQ((x_ >> kClkBits) & 31, 0);
     DCHECK_LE(addr0, 7);
     DCHECK_LE(kAccessSizeLog, 3);
-    x_ |= (kAccessSizeLog << 3) | addr0;
+    x_ |= ((kAccessSizeLog << 3) | addr0) << kClkBits;
     DCHECK_EQ(kAccessSizeLog, size_log());
     DCHECK_EQ(addr0, this->addr0());
   }
@@ -211,47 +214,34 @@ class Shadow : public FastState {
     return shifted_xor == 0;
   }
 
-  static inline bool Addr0AndSizeAreEqual(const Shadow s1, const Shadow s2) {
-    u64 masked_xor = (s1.x_ ^ s2.x_) & 31;
+  static ALWAYS_INLINE
+  bool Addr0AndSizeAreEqual(const Shadow s1, const Shadow s2) {
+    u64 masked_xor = ((s1.x_ ^ s2.x_) >> kClkBits) & 31;
     return masked_xor == 0;
   }
 
-  static inline bool TwoRangesIntersect(Shadow s1, Shadow s2,
+  static ALWAYS_INLINE bool TwoRangesIntersect(Shadow s1, Shadow s2,
       unsigned kS2AccessSize) {
     bool res = false;
     u64 diff = s1.addr0() - s2.addr0();
     if ((s64)diff < 0) {  // s1.addr0 < s2.addr0  // NOLINT
       // if (s1.addr0() + size1) > s2.addr0()) return true;
-      if (s1.size() > -diff)  res = true;
+      if (s1.size() > -diff)
+        res = true;
     } else {
       // if (s2.addr0() + kS2AccessSize > s1.addr0()) return true;
-      if (kS2AccessSize > diff) res = true;
+      if (kS2AccessSize > diff)
+        res = true;
     }
-    DCHECK_EQ(res, TwoRangesIntersectSLOW(s1, s2));
-    DCHECK_EQ(res, TwoRangesIntersectSLOW(s2, s1));
+    DCHECK_EQ(res, TwoRangesIntersectSlow(s1, s2));
+    DCHECK_EQ(res, TwoRangesIntersectSlow(s2, s1));
     return res;
   }
 
-  // The idea behind the offset is as follows.
-  // Consider that we have 8 bool's contained within a single 8-byte block
-  // (mapped to a single shadow "cell"). Now consider that we write to the bools
-  // from a single thread (which we consider the common case).
-  // W/o offsetting each access will have to scan 4 shadow values at average
-  // to find the corresponding shadow value for the bool.
-  // With offsetting we start scanning shadow with the offset so that
-  // each access hits necessary shadow straight off (at least in an expected
-  // optimistic case).
-  // This logic works seamlessly for any layout of user data. For example,
-  // if user data is {int, short, char, char}, then accesses to the int are
-  // offsetted to 0, short - 4, 1st char - 6, 2nd char - 7. Hopefully, accesses
-  // from a single thread won't need to scan all 8 shadow values.
-  unsigned ComputeSearchOffset() {
-    return x_ & 7;
-  }
-  u64 addr0() const { return x_ & 7; }
-  u64 size() const { return 1ull << size_log(); }
-  bool IsWrite() const { return !IsRead(); }
-  bool IsRead() const { return x_ & kReadBit; }
+  u64 ALWAYS_INLINE addr0() const { return (x_ >> kClkBits) & 7; }
+  u64 ALWAYS_INLINE size() const { return 1ull << size_log(); }
+  bool ALWAYS_INLINE IsWrite() const { return !IsRead(); }
+  bool ALWAYS_INLINE IsRead() const { return x_ & kReadBit; }
 
   // The idea behind the freed bit is as follows.
   // When the memory is freed (or otherwise unaccessible) we write to the shadow
@@ -276,15 +266,14 @@ class Shadow : public FastState {
     return res;
   }
 
-  bool IsBothReadsOrAtomic(bool kIsWrite, bool kIsAtomic) const {
-    // analyzes 5-th bit (is_read) and 6-th bit (is_atomic)
-    bool v = x_ & u64(((kIsWrite ^ 1) << kReadShift)
-        | (kIsAtomic << kAtomicShift));
+  bool ALWAYS_INLINE IsBothReadsOrAtomic(bool kIsWrite, bool kIsAtomic) const {
+    bool v = x_ & ((u64(kIsWrite ^ 1) << kReadShift)
+        | (u64(kIsAtomic) << kAtomicShift));
     DCHECK_EQ(v, (!IsWrite() && !kIsWrite) || (IsAtomic() && kIsAtomic));
     return v;
   }
 
-  bool IsRWNotWeaker(bool kIsWrite, bool kIsAtomic) const {
+  bool ALWAYS_INLINE IsRWNotWeaker(bool kIsWrite, bool kIsAtomic) const {
     bool v = ((x_ >> kReadShift) & 3)
         <= u64((kIsWrite ^ 1) | (kIsAtomic << 1));
     DCHECK_EQ(v, (IsAtomic() < kIsAtomic) ||
@@ -292,7 +281,7 @@ class Shadow : public FastState {
     return v;
   }
 
-  bool IsRWWeakerOrEqual(bool kIsWrite, bool kIsAtomic) const {
+  bool ALWAYS_INLINE IsRWWeakerOrEqual(bool kIsWrite, bool kIsAtomic) const {
     bool v = ((x_ >> kReadShift) & 3)
         >= u64((kIsWrite ^ 1) | (kIsAtomic << 1));
     DCHECK_EQ(v, (IsAtomic() > kIsAtomic) ||
@@ -301,14 +290,14 @@ class Shadow : public FastState {
   }
 
  private:
-  static const u64 kReadShift   = 5;
+  static const u64 kReadShift   = 5 + kClkBits;
   static const u64 kReadBit     = 1ull << kReadShift;
-  static const u64 kAtomicShift = 6;
+  static const u64 kAtomicShift = 6 + kClkBits;
   static const u64 kAtomicBit   = 1ull << kAtomicShift;
 
-  u64 size_log() const { return (x_ >> 3) & 3; }
+  u64 size_log() const { return (x_ >> (3 + kClkBits)) & 3; }
 
-  static bool TwoRangesIntersectSLOW(const Shadow s1, const Shadow s2) {
+  static bool TwoRangesIntersectSlow(const Shadow s1, const Shadow s2) {
     if (s1.addr0() == s2.addr0()) return true;
     if (s1.addr0() < s2.addr0() && s1.addr0() + s1.size() > s2.addr0())
       return true;

Modified: compiler-rt/trunk/lib/tsan/rtl/tsan_stat.cc
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/tsan/rtl/tsan_stat.cc?rev=209897&r1=209896&r2=209897&view=diff
==============================================================================
--- compiler-rt/trunk/lib/tsan/rtl/tsan_stat.cc (original)
+++ compiler-rt/trunk/lib/tsan/rtl/tsan_stat.cc Fri May 30 08:36:29 2014
@@ -37,6 +37,7 @@ void StatOutput(u64 *stat) {
   name[StatMop4]                         = "            size 4                ";
   name[StatMop8]                         = "            size 8                ";
   name[StatMopSame]                      = "  Including same                  ";
+  name[StatMopIgnored]                   = "  Including ignored               ";
   name[StatMopRange]                     = "  Including range                 ";
   name[StatMopRodata]                    = "  Including .rodata               ";
   name[StatMopRangeRodata]               = "  Including .rodata range         ";

Modified: compiler-rt/trunk/lib/tsan/rtl/tsan_stat.h
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/tsan/rtl/tsan_stat.h?rev=209897&r1=209896&r2=209897&view=diff
==============================================================================
--- compiler-rt/trunk/lib/tsan/rtl/tsan_stat.h (original)
+++ compiler-rt/trunk/lib/tsan/rtl/tsan_stat.h Fri May 30 08:36:29 2014
@@ -26,6 +26,7 @@ enum StatType {
   StatMop4,
   StatMop8,
   StatMopSame,
+  StatMopIgnored,
   StatMopRange,
   StatMopRodata,
   StatMopRangeRodata,

Modified: compiler-rt/trunk/lib/tsan/rtl/tsan_update_shadow_word_inl.h
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/tsan/rtl/tsan_update_shadow_word_inl.h?rev=209897&r1=209896&r2=209897&view=diff
==============================================================================
--- compiler-rt/trunk/lib/tsan/rtl/tsan_update_shadow_word_inl.h (original)
+++ compiler-rt/trunk/lib/tsan/rtl/tsan_update_shadow_word_inl.h Fri May 30 08:36:29 2014
@@ -16,8 +16,7 @@
 do {
   StatInc(thr, StatShadowProcessed);
   const unsigned kAccessSize = 1 << kAccessSizeLog;
-  unsigned off = cur.ComputeSearchOffset();
-  u64 *sp = &shadow_mem[(idx + off) % kShadowCnt];
+  u64 *sp = &shadow_mem[idx];
   old = LoadShadow(sp);
   if (old.IsZero()) {
     StatInc(thr, StatShadowZero);
@@ -33,16 +32,6 @@ do {
     // same thread?
     if (Shadow::TidsAreEqual(old, cur)) {
       StatInc(thr, StatShadowSameThread);
-      if (OldIsInSameSynchEpoch(old, thr)) {
-        if (old.IsRWNotWeaker(kAccessIsWrite, kIsAtomic)) {
-          // found a slot that holds effectively the same info
-          // (that is, same tid, same sync epoch and same size)
-          StatInc(thr, StatMopSame);
-          return;
-        }
-        StoreIfNotYetStored(sp, &store_word);
-        break;
-      }
       if (old.IsRWWeakerOrEqual(kAccessIsWrite, kIsAtomic))
         StoreIfNotYetStored(sp, &store_word);
       break;

Modified: compiler-rt/trunk/lib/tsan/tests/unit/tsan_mman_test.cc
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/tsan/tests/unit/tsan_mman_test.cc?rev=209897&r1=209896&r2=209897&view=diff
==============================================================================
--- compiler-rt/trunk/lib/tsan/tests/unit/tsan_mman_test.cc (original)
+++ compiler-rt/trunk/lib/tsan/tests/unit/tsan_mman_test.cc Fri May 30 08:36:29 2014
@@ -144,6 +144,11 @@ TEST(Mman, Stats) {
 }
 
 TEST(Mman, CallocOverflow) {
+#if TSAN_DEBUG
+  // EXPECT_DEATH clones a thread with 4K stack,
+  // which is overflown by tsan memory accesses functions in debug mode.
+  return;
+#endif
   size_t kArraySize = 4096;
   volatile size_t kMaxSizeT = std::numeric_limits<size_t>::max();
   volatile size_t kArraySize2 = kMaxSizeT / kArraySize + 10;