[compiler-rt] r204656 - tsan: optimize vector clock operations
Dmitry Vyukov
dvyukov at google.com
Mon Mar 24 11:54:21 PDT 2014
Author: dvyukov
Date: Mon Mar 24 13:54:20 2014
New Revision: 204656
URL: http://llvm.org/viewvc/llvm-project?rev=204656&view=rev
Log:
tsan: optimize vector clock operations
Make vector clock operations O(1) for several important classes of use cases.
See comments for details.
Below are stats from a large server app, 77% of all clock operations are handled as O(1).
Clock acquire : 25983645
empty clock : 6288080
fast from release-store : 14917504
contains my tid : 4515743
repeated (fast) : 2141428
full (slow) : 2636633
acquired something : 1426863
Clock release : 2544216
resize : 6241
fast1 : 197693
fast2 : 1016293
fast3 : 2007
full (slow) : 1797488
was acquired : 709227
clear tail : 1
last overflow : 0
Clock release store : 3446946
resize : 200516
fast : 469265
slow : 2977681
clear tail : 0
Clock acquire-release : 820028
Modified:
compiler-rt/trunk/lib/tsan/rtl/tsan_clock.cc
compiler-rt/trunk/lib/tsan/rtl/tsan_clock.h
compiler-rt/trunk/lib/tsan/rtl/tsan_rtl.cc
compiler-rt/trunk/lib/tsan/rtl/tsan_rtl_mutex.cc
compiler-rt/trunk/lib/tsan/rtl/tsan_stat.cc
compiler-rt/trunk/lib/tsan/rtl/tsan_stat.h
compiler-rt/trunk/lib/tsan/tests/unit/tsan_clock_test.cc
Modified: compiler-rt/trunk/lib/tsan/rtl/tsan_clock.cc
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/tsan/rtl/tsan_clock.cc?rev=204656&r1=204655&r2=204656&view=diff
==============================================================================
--- compiler-rt/trunk/lib/tsan/rtl/tsan_clock.cc (original)
+++ compiler-rt/trunk/lib/tsan/rtl/tsan_clock.cc Mon Mar 24 13:54:20 2014
@@ -13,66 +13,157 @@
#include "tsan_clock.h"
#include "tsan_rtl.h"
-// It's possible to optimize clock operations for some important cases
-// so that they are O(1). The cases include singletons, once's, local mutexes.
-// First, SyncClock must be re-implemented to allow indexing by tid.
-// It must not necessarily be a full vector clock, though. For example it may
-// be a multi-level table.
-// Then, each slot in SyncClock must contain a dirty bit (it's united with
-// the clock value, so no space increase). The acquire algorithm looks
-// as follows:
-// void acquire(thr, tid, thr_clock, sync_clock) {
-// if (!sync_clock[tid].dirty)
-// return; // No new info to acquire.
-// // This handles constant reads of singleton pointers and
-// // stop-flags.
-// acquire_impl(thr_clock, sync_clock); // As usual, O(N).
-// sync_clock[tid].dirty = false;
-// sync_clock.dirty_count--;
-// }
-// The release operation looks as follows:
-// void release(thr, tid, thr_clock, sync_clock) {
-// // thr->sync_cache is a simple fixed-size hash-based cache that holds
-// // several previous sync_clock's.
-// if (thr->sync_cache[sync_clock] >= thr->last_acquire_epoch) {
-// // The thread did no acquire operations since last release on this clock.
-// // So update only the thread's slot (other slots can't possibly change).
-// sync_clock[tid].clock = thr->epoch;
-// if (sync_clock.dirty_count == sync_clock.cnt
-// || (sync_clock.dirty_count == sync_clock.cnt - 1
-// && sync_clock[tid].dirty == false))
-// // All dirty flags are set, bail out.
-// return;
-// set all dirty bits, but preserve the thread's bit. // O(N)
-// update sync_clock.dirty_count;
-// return;
+// SyncClock and ThreadClock implement vector clocks for sync variables
+// (mutexes, atomic variables, file descriptors, etc) and threads, respectively.
+// ThreadClock contains fixed-size vector clock for maximum number of threads.
+// SyncClock contains growable vector clock for currently necessary number of
+// threads.
+// Together they implement very simple model of operations, namely:
+//
+// void ThreadClock::acquire(const SyncClock *src) {
+// for (int i = 0; i < kMaxThreads; i++)
+// clock[i] = max(clock[i], src->clock[i]);
// }
-// release_impl(thr_clock, sync_clock); // As usual, O(N).
-// set all dirty bits, but preserve the thread's bit.
-// // The previous step is combined with release_impl(), so that
-// // we scan the arrays only once.
-// update sync_clock.dirty_count;
-// }
+//
+// void ThreadClock::release(SyncClock *dst) const {
+// for (int i = 0; i < kMaxThreads; i++)
+// dst->clock[i] = max(dst->clock[i], clock[i]);
+// }
+//
+// void ThreadClock::ReleaseStore(SyncClock *dst) const {
+// for (int i = 0; i < kMaxThreads; i++)
+// dst->clock[i] = clock[i];
+// }
+//
+// void ThreadClock::acq_rel(SyncClock *dst) {
+// acquire(dst);
+// release(dst);
+// }
+//
+// Conformance to this model is extensively verified in tsan_clock_test.cc.
+// However, the implementation is significantly more complex. The complexity
+// allows to implement important classes of use cases in O(1) instead of O(N).
+//
+// The use cases are:
+// 1. Singleton/once atomic that has a single release-store operation followed
+// by zillions of acquire-loads (the acquire-load is O(1)).
+// 2. Thread-local mutex (both lock and unlock can be O(1)).
+// 3. Leaf mutex (unlock is O(1)).
+// 4. A mutex shared by 2 threads (both lock and unlock can be O(1)).
+// 5. An atomic with a single writer (writes can be O(1)).
+// The implementation dynamically adopts to workload. So if an atomic is in
+// read-only phase, these reads will be O(1); if it later switches to read/write
+// phase, the implementation will correctly handle that by switching to O(N).
+//
+// Thread-safety note: all const operations on SyncClock's are conducted under
+// a shared lock; all non-const operations on SyncClock's are conducted under
+// an exclusive lock; ThreadClock's are private to respective threads and so
+// do not need any protection.
+//
+// Description of ThreadClock state:
+// clk_ - fixed size vector clock.
+// nclk_ - effective size of the vector clock (the rest is zeros).
+// tid_ - index of the thread associated with he clock ("current thread").
+// last_acquire_ - current thread time when it acquired something from
+// other threads.
+//
+// Description of SyncClock state:
+// clk_ - variable size vector clock, low kClkBits hold timestamp,
+// the remaining bits hold "last_acq" counter;
+// if last_acq == release_seq_, then the respective thread has already
+// acquired this clock (except possibly dirty_tids_).
+// dirty_tids_ - holds up to two indeces in the vector clock that other threads
+// need to acquire regardless of last_acq value;
+// release_store_tid_ - denotes that the clock state is a result of
+// release-store operation by the thread with release_store_tid_ index.
+
+// We don't have ThreadState in these methods, so this is an ugly hack that
+// works only in C++.
+#ifndef TSAN_GO
+# define CPP_STAT_INC(typ) StatInc(cur_thread(), typ)
+#else
+# define CPP_STAT_INC(typ) (void)0
+#endif
namespace __tsan {
-ThreadClock::ThreadClock() {
- nclk_ = 0;
- for (uptr i = 0; i < (uptr)kMaxTidInClock; i++)
- clk_[i] = 0;
+const unsigned kInvalidTid = (unsigned)-1;
+
+ThreadClock::ThreadClock(unsigned tid)
+ : tid_(tid) {
+ DCHECK_LT(tid, kMaxTidInClock);
+ nclk_ = tid_ + 1;
+ internal_memset(clk_, 0, sizeof(clk_));
}
void ThreadClock::acquire(const SyncClock *src) {
DCHECK(nclk_ <= kMaxTid);
DCHECK(src->clk_.Size() <= kMaxTid);
+ CPP_STAT_INC(StatClockAcquire);
+ // Check if it's empty -> no need to do anything.
const uptr nclk = src->clk_.Size();
- if (nclk == 0)
+ if (nclk == 0) {
+ CPP_STAT_INC(StatClockAcquireEmpty);
+ return;
+ }
+
+ // If the clock is a result of release-store operation, and the current thread
+ // has already acquired from that thread after or at that time,
+ // don't need to do anything (src can't contain anything new for the
+ // current thread).
+ unsigned tid1 = src->release_store_tid_;
+ if (tid1 != kInvalidTid && (src->clk_[tid1] & kClkMask) <= clk_[tid1]) {
+ CPP_STAT_INC(StatClockAcquireFastRelease);
return;
+ }
+
+ // Check if we've already acquired src after the last release operation on src
+ bool acquired = false;
+ if (nclk > tid_) {
+ CPP_STAT_INC(StatClockAcquireLarge);
+ u64 myepoch = src->clk_[tid_];
+ u64 last_acq = myepoch >> kClkBits;
+ if (last_acq == src->release_seq_) {
+ CPP_STAT_INC(StatClockAcquireRepeat);
+ for (unsigned i = 0; i < kDirtyTids; i++) {
+ unsigned tid = src->dirty_tids_[i];
+ if (tid != kInvalidTid) {
+ u64 epoch = src->clk_[tid] & kClkMask;
+ if (clk_[tid] < epoch) {
+ clk_[tid] = epoch;
+ acquired = true;
+ }
+ }
+ }
+ if (acquired) {
+ CPP_STAT_INC(StatClockAcquiredSomething);
+ last_acquire_ = clk_[tid_];
+ }
+ return;
+ }
+ }
+
+ // O(N) acquire.
+ CPP_STAT_INC(StatClockAcquireFull);
nclk_ = max(nclk_, nclk);
for (uptr i = 0; i < nclk; i++) {
- if (clk_[i] < src->clk_[i])
- clk_[i] = src->clk_[i];
+ u64 epoch = src->clk_[i] & kClkMask;
+ if (clk_[i] < epoch) {
+ clk_[i] = epoch;
+ acquired = true;
+ }
+ }
+
+ // Remember that this thread has acquired this clock.
+ if (nclk > tid_) {
+ u64 myepoch = src->clk_[tid_];
+ src->clk_[tid_] = (myepoch & kClkMask) | (src->release_seq_ << kClkBits);
+ }
+
+ if (acquired) {
+ CPP_STAT_INC(StatClockAcquiredSomething);
+ last_acquire_ = clk_[tid_];
}
}
@@ -80,32 +171,185 @@ void ThreadClock::release(SyncClock *dst
DCHECK(nclk_ <= kMaxTid);
DCHECK(dst->clk_.Size() <= kMaxTid);
- if (dst->clk_.Size() < nclk_)
+ if (dst->clk_.Size() == 0) {
+ // ReleaseStore will correctly set release_store_tid_,
+ // which can be important for future operations.
+ ReleaseStore(dst);
+ return;
+ }
+
+ CPP_STAT_INC(StatClockRelease);
+ // Check if we need to resize dst.
+ if (dst->clk_.Size() < nclk_) {
+ CPP_STAT_INC(StatClockReleaseResize);
dst->clk_.Resize(nclk_);
- for (uptr i = 0; i < nclk_; i++) {
- if (dst->clk_[i] < clk_[i])
- dst->clk_[i] = clk_[i];
}
+
+ // Check if we had not acquired anything from other threads
+ // since the last release on dst. If so, we need to update
+ // only dst->clk_[tid_].
+ if ((dst->clk_[tid_] & kClkMask) > last_acquire_) {
+ UpdateCurrentThread(dst);
+ if (dst->release_store_tid_ != tid_)
+ dst->release_store_tid_ = kInvalidTid;
+ return;
+ }
+
+ // O(N) release.
+ CPP_STAT_INC(StatClockReleaseFull);
+ // First, remember whether we've acquired dst.
+ bool acquired = IsAlreadyAcquired(dst);
+ if (acquired)
+ CPP_STAT_INC(StatClockReleaseAcquired);
+ // Update dst->clk_.
+ for (uptr i = 0; i < nclk_; i++)
+ dst->clk_[i] = max(dst->clk_[i] & kClkMask, clk_[i]);
+ // Clear last_acq in the remaining elements.
+ if (nclk_ < dst->clk_.Size())
+ CPP_STAT_INC(StatClockReleaseClearTail);
+ for (uptr i = nclk_; i < dst->clk_.Size(); i++)
+ dst->clk_[i] = dst->clk_[i] & kClkMask;
+ // Since we've cleared all last_acq, we can reset release_seq_ as well.
+ dst->release_seq_ = 1;
+ for (unsigned i = 0; i < kDirtyTids; i++)
+ dst->dirty_tids_[i] = kInvalidTid;
+ dst->release_store_tid_ = kInvalidTid;
+ // If we've acquired dst, remember this fact,
+ // so that we don't need to acquire it on next acquire.
+ if (acquired)
+ dst->clk_[tid_] = dst->clk_[tid_] | (1ULL << kClkBits);
}
void ThreadClock::ReleaseStore(SyncClock *dst) const {
DCHECK(nclk_ <= kMaxTid);
DCHECK(dst->clk_.Size() <= kMaxTid);
+ CPP_STAT_INC(StatClockStore);
- if (dst->clk_.Size() < nclk_)
+ // Check if we need to resize dst.
+ if (dst->clk_.Size() < nclk_) {
+ CPP_STAT_INC(StatClockStoreResize);
dst->clk_.Resize(nclk_);
+ }
+
+ if (dst->release_store_tid_ == tid_ &&
+ (dst->clk_[tid_] & kClkMask) > last_acquire_) {
+ CPP_STAT_INC(StatClockStoreFast);
+ UpdateCurrentThread(dst);
+ return;
+ }
+
+ // O(N) release-store.
+ CPP_STAT_INC(StatClockStoreFull);
for (uptr i = 0; i < nclk_; i++)
dst->clk_[i] = clk_[i];
- for (uptr i = nclk_; i < dst->clk_.Size(); i++)
- dst->clk_[i] = 0;
+ // Clear the tail of dst->clk_.
+ if (nclk_ < dst->clk_.Size()) {
+ internal_memset(&dst->clk_[nclk_], 0,
+ (dst->clk_.Size() - nclk_) * sizeof(dst->clk_[0]));
+ CPP_STAT_INC(StatClockStoreTail);
+ }
+ // Since we've cleared all last_acq, we can reset release_seq_ as well.
+ dst->release_seq_ = 1;
+ for (unsigned i = 0; i < kDirtyTids; i++)
+ dst->dirty_tids_[i] = kInvalidTid;
+ dst->release_store_tid_ = tid_;
+ // Rememeber that we don't need to acquire it in future.
+ dst->clk_[tid_] = clk_[tid_] | (1ULL << kClkBits);
}
void ThreadClock::acq_rel(SyncClock *dst) {
+ CPP_STAT_INC(StatClockAcquireRelease);
acquire(dst);
- release(dst);
+ ReleaseStore(dst);
+}
+
+// Updates only single element related to the current thread in dst->clk_.
+void ThreadClock::UpdateCurrentThread(SyncClock *dst) const {
+ // Update the threads time, but preserve last_acq.
+ dst->clk_[tid_] = clk_[tid_] | (dst->clk_[tid_] & ~kClkMask);
+
+ for (unsigned i = 0; i < kDirtyTids; i++) {
+ if (dst->dirty_tids_[i] == tid_) {
+ CPP_STAT_INC(StatClockReleaseFast1);
+ return;
+ }
+ if (dst->dirty_tids_[i] == kInvalidTid) {
+ CPP_STAT_INC(StatClockReleaseFast2);
+ dst->dirty_tids_[i] = tid_;
+ return;
+ }
+ }
+ CPP_STAT_INC(StatClockReleaseFast3);
+ dst->release_seq_++;
+ for (unsigned i = 0; i < kDirtyTids; i++)
+ dst->dirty_tids_[i] = kInvalidTid;
+ if ((dst->release_seq_ << kClkBits) == 0) {
+ CPP_STAT_INC(StatClockReleaseLastOverflow);
+ dst->release_seq_ = 1;
+ for (uptr i = 0; i < dst->clk_.Size(); i++)
+ dst->clk_[i] = dst->clk_[i] & kClkMask;
+ }
+}
+
+// Checks whether the current threads has already acquired src.
+bool ThreadClock::IsAlreadyAcquired(const SyncClock *src) const {
+ u64 myepoch = src->clk_[tid_];
+ u64 last_acq = myepoch >> kClkBits;
+ if (last_acq != src->release_seq_)
+ return false;
+ for (unsigned i = 0; i < kDirtyTids; i++) {
+ unsigned tid = src->dirty_tids_[i];
+ if (tid != kInvalidTid) {
+ u64 epoch = src->clk_[tid] & kClkMask;
+ if (clk_[tid] < epoch)
+ return false;
+ }
+ }
+ return true;
+}
+
+// Sets a single element in the vector clock.
+// This function is called only from weird places like AcquireGlobal.
+void ThreadClock::set(unsigned tid, u64 v) {
+ DCHECK_LT(tid, kMaxTid);
+ DCHECK_GE(v, clk_[tid]);
+ clk_[tid] = v;
+ if (nclk_ <= tid)
+ nclk_ = tid + 1;
+ last_acquire_ = clk_[tid_];
+}
+
+void ThreadClock::DebugDump(int(*printf)(const char *s, ...)) {
+ printf("clock=[");
+ for (uptr i = 0; i < nclk_; i++)
+ printf("%s%llu", i == 0 ? "" : ",", clk_[i]);
+ printf("] tid=%u last_acq=%llu", tid_, last_acquire_);
}
SyncClock::SyncClock()
- : clk_(MBlockClock) {
+ : clk_(MBlockClock) {
+ for (uptr i = 0; i < kDirtyTids; i++)
+ dirty_tids_[i] = kInvalidTid;
+ release_seq_ = 0;
+ release_store_tid_ = kInvalidTid;
+}
+
+void SyncClock::Reset() {
+ clk_.Reset();
+ release_seq_ = 0;
+ release_store_tid_ = kInvalidTid;
+ for (uptr i = 0; i < kDirtyTids; i++)
+ dirty_tids_[i] = kInvalidTid;
+}
+
+void SyncClock::DebugDump(int(*printf)(const char *s, ...)) {
+ printf("clock=[");
+ for (uptr i = 0; i < clk_.Size(); i++)
+ printf("%s%llu", i == 0 ? "" : ",", clk_[i] & kClkMask);
+ printf("] last_acq=[");
+ for (uptr i = 0; i < clk_.Size(); i++)
+ printf("%s%llu", i == 0 ? "" : ",", clk_[i] >> kClkBits);
+ printf("] release_seq=%llu release_store_tid=%d dirty_tids=%d/%d",
+ release_seq_, release_store_tid_, dirty_tids_[0], dirty_tids_[1]);
}
} // namespace __tsan
Modified: compiler-rt/trunk/lib/tsan/rtl/tsan_clock.h
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/tsan/rtl/tsan_clock.h?rev=204656&r1=204655&r2=204656&view=diff
==============================================================================
--- compiler-rt/trunk/lib/tsan/rtl/tsan_clock.h (original)
+++ compiler-rt/trunk/lib/tsan/rtl/tsan_clock.h Mon Mar 24 13:54:20 2014
@@ -18,6 +18,8 @@
namespace __tsan {
+const u64 kClkMask = (1ULL << kClkBits) - 1;
+
// The clock that lives in sync variables (mutexes, atomics, etc).
class SyncClock {
public:
@@ -27,38 +29,44 @@ class SyncClock {
return clk_.Size();
}
- void Reset() {
- clk_.Reset();
+ u64 get(unsigned tid) const {
+ DCHECK_LT(tid, clk_.Size());
+ return clk_[tid] & kClkMask;
}
+ void Reset();
+
+ void DebugDump(int(*printf)(const char *s, ...));
+
private:
- Vector<u64> clk_;
+ u64 release_seq_;
+ unsigned release_store_tid_;
+ static const uptr kDirtyTids = 2;
+ unsigned dirty_tids_[kDirtyTids];
+ mutable Vector<u64> clk_;
friend struct ThreadClock;
};
// The clock that lives in threads.
struct ThreadClock {
public:
- ThreadClock();
+ explicit ThreadClock(unsigned tid);
u64 get(unsigned tid) const {
DCHECK_LT(tid, kMaxTidInClock);
+ DCHECK_EQ(clk_[tid], clk_[tid] & kClkMask);
return clk_[tid];
}
- void set(unsigned tid, u64 v) {
- DCHECK_LT(tid, kMaxTid);
- DCHECK_GE(v, clk_[tid]);
- clk_[tid] = v;
- if (nclk_ <= tid)
- nclk_ = tid + 1;
+ void set(unsigned tid, u64 v);
+
+ void set(u64 v) {
+ DCHECK_GE(v, clk_[tid_]);
+ clk_[tid_] = v;
}
- void tick(unsigned tid) {
- DCHECK_LT(tid, kMaxTid);
- clk_[tid]++;
- if (nclk_ <= tid)
- nclk_ = tid + 1;
+ void tick() {
+ clk_[tid_]++;
}
uptr size() const {
@@ -70,9 +78,17 @@ struct ThreadClock {
void acq_rel(SyncClock *dst);
void ReleaseStore(SyncClock *dst) const;
+ void DebugDump(int(*printf)(const char *s, ...));
+
private:
+ static const uptr kDirtyTids = SyncClock::kDirtyTids;
+ const unsigned tid_;
+ u64 last_acquire_;
uptr nclk_;
u64 clk_[kMaxTidInClock];
+
+ bool IsAlreadyAcquired(const SyncClock *src) const;
+ void UpdateCurrentThread(SyncClock *dst) const;
};
} // namespace __tsan
Modified: compiler-rt/trunk/lib/tsan/rtl/tsan_rtl.cc
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/tsan/rtl/tsan_rtl.cc?rev=204656&r1=204655&r2=204656&view=diff
==============================================================================
--- compiler-rt/trunk/lib/tsan/rtl/tsan_rtl.cc (original)
+++ compiler-rt/trunk/lib/tsan/rtl/tsan_rtl.cc Mon Mar 24 13:54:20 2014
@@ -90,6 +90,7 @@ ThreadState::ThreadState(Context *ctx, i
// they may be accessed before the ctor.
// , ignore_reads_and_writes()
// , ignore_interceptors()
+ , clock(tid)
#ifndef TSAN_GO
, jmp_bufs(MBlockJmpBuf)
#endif
@@ -98,7 +99,11 @@ ThreadState::ThreadState(Context *ctx, i
, stk_addr(stk_addr)
, stk_size(stk_size)
, tls_addr(tls_addr)
- , tls_size(tls_size) {
+ , tls_size(tls_size)
+#ifndef TSAN_GO
+ , last_sleep_clock(tid)
+#endif
+{
}
static void MemoryProfiler(Context *ctx, fd_t fd, int i) {
Modified: compiler-rt/trunk/lib/tsan/rtl/tsan_rtl_mutex.cc
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/tsan/rtl/tsan_rtl_mutex.cc?rev=204656&r1=204655&r2=204656&view=diff
==============================================================================
--- compiler-rt/trunk/lib/tsan/rtl/tsan_rtl_mutex.cc (original)
+++ compiler-rt/trunk/lib/tsan/rtl/tsan_rtl_mutex.cc Mon Mar 24 13:54:20 2014
@@ -376,7 +376,7 @@ void AfterSleep(ThreadState *thr, uptr p
void AcquireImpl(ThreadState *thr, uptr pc, SyncClock *c) {
if (thr->ignore_sync)
return;
- thr->clock.set(thr->tid, thr->fast_state.epoch());
+ thr->clock.set(thr->fast_state.epoch());
thr->clock.acquire(c);
StatInc(thr, StatSyncAcquire);
}
@@ -384,7 +384,7 @@ void AcquireImpl(ThreadState *thr, uptr
void ReleaseImpl(ThreadState *thr, uptr pc, SyncClock *c) {
if (thr->ignore_sync)
return;
- thr->clock.set(thr->tid, thr->fast_state.epoch());
+ thr->clock.set(thr->fast_state.epoch());
thr->fast_synch_epoch = thr->fast_state.epoch();
thr->clock.release(c);
StatInc(thr, StatSyncRelease);
@@ -393,7 +393,7 @@ void ReleaseImpl(ThreadState *thr, uptr
void ReleaseStoreImpl(ThreadState *thr, uptr pc, SyncClock *c) {
if (thr->ignore_sync)
return;
- thr->clock.set(thr->tid, thr->fast_state.epoch());
+ thr->clock.set(thr->fast_state.epoch());
thr->fast_synch_epoch = thr->fast_state.epoch();
thr->clock.ReleaseStore(c);
StatInc(thr, StatSyncRelease);
@@ -402,7 +402,7 @@ void ReleaseStoreImpl(ThreadState *thr,
void AcquireReleaseImpl(ThreadState *thr, uptr pc, SyncClock *c) {
if (thr->ignore_sync)
return;
- thr->clock.set(thr->tid, thr->fast_state.epoch());
+ thr->clock.set(thr->fast_state.epoch());
thr->fast_synch_epoch = thr->fast_state.epoch();
thr->clock.acq_rel(c);
StatInc(thr, StatSyncAcquire);
Modified: compiler-rt/trunk/lib/tsan/rtl/tsan_stat.cc
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/tsan/rtl/tsan_stat.cc?rev=204656&r1=204655&r2=204656&view=diff
==============================================================================
--- compiler-rt/trunk/lib/tsan/rtl/tsan_stat.cc (original)
+++ compiler-rt/trunk/lib/tsan/rtl/tsan_stat.cc Mon Mar 24 13:54:20 2014
@@ -74,6 +74,29 @@ void StatOutput(u64 *stat) {
name[StatSyncAcquire] = " acquired ";
name[StatSyncRelease] = " released ";
+ name[StatClockAcquire] = "Clock acquire ";
+ name[StatClockAcquireEmpty] = " empty clock ";
+ name[StatClockAcquireFastRelease] = " fast from release-store ";
+ name[StatClockAcquireLarge] = " contains my tid ";
+ name[StatClockAcquireRepeat] = " repeated (fast) ";
+ name[StatClockAcquireFull] = " full (slow) ";
+ name[StatClockAcquiredSomething] = " acquired something ";
+ name[StatClockRelease] = "Clock release ";
+ name[StatClockReleaseResize] = " resize ";
+ name[StatClockReleaseFast1] = " fast1 ";
+ name[StatClockReleaseFast2] = " fast2 ";
+ name[StatClockReleaseFast3] = " fast3 ";
+ name[StatClockReleaseFull] = " full (slow) ";
+ name[StatClockReleaseAcquired] = " was acquired ";
+ name[StatClockReleaseClearTail] = " clear tail ";
+ name[StatClockReleaseLastOverflow] = " last overflow ";
+ name[StatClockStore] = "Clock release store ";
+ name[StatClockStoreResize] = " resize ";
+ name[StatClockStoreFast] = " fast ";
+ name[StatClockStoreFull] = " slow ";
+ name[StatClockStoreTail] = " clear tail ";
+ name[StatClockAcquireRelease] = "Clock acquire-release ";
+
name[StatAtomic] = "Atomic operations ";
name[StatAtomicLoad] = " Including load ";
name[StatAtomicStore] = " store ";
@@ -150,7 +173,7 @@ void StatOutput(u64 *stat) {
Printf("Statistics:\n");
for (int i = 0; i < StatCnt; i++)
- Printf("%s: %zu\n", name[i], (uptr)stat[i]);
+ Printf("%s: %16zu\n", name[i], (uptr)stat[i]);
}
} // namespace __tsan
Modified: compiler-rt/trunk/lib/tsan/rtl/tsan_stat.h
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/tsan/rtl/tsan_stat.h?rev=204656&r1=204655&r2=204656&view=diff
==============================================================================
--- compiler-rt/trunk/lib/tsan/rtl/tsan_stat.h (original)
+++ compiler-rt/trunk/lib/tsan/rtl/tsan_stat.h Mon Mar 24 13:54:20 2014
@@ -69,6 +69,33 @@ enum StatType {
StatSyncAcquire,
StatSyncRelease,
+ // Clocks - acquire.
+ StatClockAcquire,
+ StatClockAcquireEmpty,
+ StatClockAcquireFastRelease,
+ StatClockAcquireLarge,
+ StatClockAcquireRepeat,
+ StatClockAcquireFull,
+ StatClockAcquiredSomething,
+ // Clocks - release.
+ StatClockRelease,
+ StatClockReleaseResize,
+ StatClockReleaseFast1,
+ StatClockReleaseFast2,
+ StatClockReleaseFast3,
+ StatClockReleaseFull,
+ StatClockReleaseAcquired,
+ StatClockReleaseClearTail,
+ StatClockReleaseLastOverflow,
+ // Clocks - release store.
+ StatClockStore,
+ StatClockStoreResize,
+ StatClockStoreFast,
+ StatClockStoreFull,
+ StatClockStoreTail,
+ // Clocks - acquire-release.
+ StatClockAcquireRelease,
+
// Atomics.
StatAtomic,
StatAtomicLoad,
Modified: compiler-rt/trunk/lib/tsan/tests/unit/tsan_clock_test.cc
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/tsan/tests/unit/tsan_clock_test.cc?rev=204656&r1=204655&r2=204656&view=diff
==============================================================================
--- compiler-rt/trunk/lib/tsan/tests/unit/tsan_clock_test.cc (original)
+++ compiler-rt/trunk/lib/tsan/tests/unit/tsan_clock_test.cc Mon Mar 24 13:54:20 2014
@@ -17,101 +17,294 @@
namespace __tsan {
TEST(Clock, VectorBasic) {
- ThreadClock clk;
- CHECK_EQ(clk.size(), 0);
- clk.tick(0);
- CHECK_EQ(clk.size(), 1);
- CHECK_EQ(clk.get(0), 1);
- clk.tick(3);
- CHECK_EQ(clk.size(), 4);
- CHECK_EQ(clk.get(0), 1);
- CHECK_EQ(clk.get(1), 0);
- CHECK_EQ(clk.get(2), 0);
- CHECK_EQ(clk.get(3), 1);
- clk.tick(3);
- CHECK_EQ(clk.get(3), 2);
+ ThreadClock clk(0);
+ ASSERT_EQ(clk.size(), 1);
+ clk.tick();
+ ASSERT_EQ(clk.size(), 1);
+ ASSERT_EQ(clk.get(0), 1);
+ clk.set(3, clk.get(3) + 1);
+ ASSERT_EQ(clk.size(), 4);
+ ASSERT_EQ(clk.get(0), 1);
+ ASSERT_EQ(clk.get(1), 0);
+ ASSERT_EQ(clk.get(2), 0);
+ ASSERT_EQ(clk.get(3), 1);
+ clk.set(3, clk.get(3) + 1);
+ ASSERT_EQ(clk.get(3), 2);
}
TEST(Clock, ChunkedBasic) {
- ThreadClock vector;
+ ThreadClock vector(0);
SyncClock chunked;
- CHECK_EQ(vector.size(), 0);
- CHECK_EQ(chunked.size(), 0);
+ ASSERT_EQ(vector.size(), 1);
+ ASSERT_EQ(chunked.size(), 0);
vector.acquire(&chunked);
- CHECK_EQ(vector.size(), 0);
- CHECK_EQ(chunked.size(), 0);
+ ASSERT_EQ(vector.size(), 1);
+ ASSERT_EQ(chunked.size(), 0);
vector.release(&chunked);
- CHECK_EQ(vector.size(), 0);
- CHECK_EQ(chunked.size(), 0);
+ ASSERT_EQ(vector.size(), 1);
+ ASSERT_EQ(chunked.size(), 1);
vector.acq_rel(&chunked);
- CHECK_EQ(vector.size(), 0);
- CHECK_EQ(chunked.size(), 0);
+ ASSERT_EQ(vector.size(), 1);
+ ASSERT_EQ(chunked.size(), 1);
}
TEST(Clock, AcquireRelease) {
- ThreadClock vector1;
- vector1.tick(100);
+ ThreadClock vector1(100);
+ vector1.tick();
SyncClock chunked;
vector1.release(&chunked);
- CHECK_EQ(chunked.size(), 101);
- ThreadClock vector2;
+ ASSERT_EQ(chunked.size(), 101);
+ ThreadClock vector2(0);
vector2.acquire(&chunked);
- CHECK_EQ(vector2.size(), 101);
- CHECK_EQ(vector2.get(0), 0);
- CHECK_EQ(vector2.get(1), 0);
- CHECK_EQ(vector2.get(99), 0);
- CHECK_EQ(vector2.get(100), 1);
+ ASSERT_EQ(vector2.size(), 101);
+ ASSERT_EQ(vector2.get(0), 0);
+ ASSERT_EQ(vector2.get(1), 0);
+ ASSERT_EQ(vector2.get(99), 0);
+ ASSERT_EQ(vector2.get(100), 1);
}
TEST(Clock, ManyThreads) {
SyncClock chunked;
for (int i = 0; i < 100; i++) {
- ThreadClock vector;
- vector.tick(i);
+ ThreadClock vector(0);
+ vector.tick();
+ vector.set(i, 1);
vector.release(&chunked);
- CHECK_EQ(chunked.size(), i + 1);
+ ASSERT_EQ(i + 1, chunked.size());
vector.acquire(&chunked);
- CHECK_EQ(vector.size(), i + 1);
+ ASSERT_EQ(i + 1, vector.size());
}
- ThreadClock vector;
+
+ for (int i = 0; i < 100; i++)
+ ASSERT_EQ(1, chunked.get(i));
+
+ ThreadClock vector(1);
vector.acquire(&chunked);
- CHECK_EQ(vector.size(), 100);
+ ASSERT_EQ(100, vector.size());
for (int i = 0; i < 100; i++)
- CHECK_EQ(vector.get(i), 1);
+ ASSERT_EQ(1, vector.get(i));
}
TEST(Clock, DifferentSizes) {
{
- ThreadClock vector1;
- vector1.tick(10);
- ThreadClock vector2;
- vector2.tick(20);
+ ThreadClock vector1(10);
+ vector1.tick();
+ ThreadClock vector2(20);
+ vector2.tick();
{
SyncClock chunked;
vector1.release(&chunked);
- CHECK_EQ(chunked.size(), 11);
+ ASSERT_EQ(chunked.size(), 11);
vector2.release(&chunked);
- CHECK_EQ(chunked.size(), 21);
+ ASSERT_EQ(chunked.size(), 21);
}
{
SyncClock chunked;
vector2.release(&chunked);
- CHECK_EQ(chunked.size(), 21);
+ ASSERT_EQ(chunked.size(), 21);
vector1.release(&chunked);
- CHECK_EQ(chunked.size(), 21);
+ ASSERT_EQ(chunked.size(), 21);
}
{
SyncClock chunked;
vector1.release(&chunked);
vector2.acquire(&chunked);
- CHECK_EQ(vector2.size(), 21);
+ ASSERT_EQ(vector2.size(), 21);
}
{
SyncClock chunked;
vector2.release(&chunked);
vector1.acquire(&chunked);
- CHECK_EQ(vector1.size(), 21);
+ ASSERT_EQ(vector1.size(), 21);
+ }
+ }
+}
+
+const int kThreads = 4;
+const int kClocks = 4;
+
+// SimpleSyncClock and SimpleThreadClock implement the same thing as
+// SyncClock and ThreadClock, but in a very simple way.
+struct SimpleSyncClock {
+ u64 clock[kThreads];
+ uptr size;
+
+ SimpleSyncClock() {
+ size = 0;
+ for (uptr i = 0; i < kThreads; i++)
+ clock[i] = 0;
+ }
+
+ bool verify(const SyncClock *other) const {
+ for (uptr i = 0; i < min(size, other->size()); i++) {
+ if (clock[i] != other->get(i))
+ return false;
+ }
+ for (uptr i = min(size, other->size()); i < max(size, other->size()); i++) {
+ if (i < size && clock[i] != 0)
+ return false;
+ if (i < other->size() && other->get(i) != 0)
+ return false;
}
+ return true;
+ }
+};
+
+struct SimpleThreadClock {
+ u64 clock[kThreads];
+ uptr size;
+ unsigned tid;
+
+ explicit SimpleThreadClock(unsigned tid) {
+ this->tid = tid;
+ size = tid + 1;
+ for (uptr i = 0; i < kThreads; i++)
+ clock[i] = 0;
+ }
+
+ void tick() {
+ clock[tid]++;
+ }
+
+ void acquire(const SimpleSyncClock *src) {
+ if (size < src->size)
+ size = src->size;
+ for (uptr i = 0; i < kThreads; i++)
+ clock[i] = max(clock[i], src->clock[i]);
+ }
+
+ void release(SimpleSyncClock *dst) const {
+ if (dst->size < size)
+ dst->size = size;
+ for (uptr i = 0; i < kThreads; i++)
+ dst->clock[i] = max(dst->clock[i], clock[i]);
+ }
+
+ void acq_rel(SimpleSyncClock *dst) {
+ acquire(dst);
+ release(dst);
+ }
+
+ void ReleaseStore(SimpleSyncClock *dst) const {
+ if (dst->size < size)
+ dst->size = size;
+ for (uptr i = 0; i < kThreads; i++)
+ dst->clock[i] = clock[i];
+ }
+
+ bool verify(const ThreadClock *other) const {
+ for (uptr i = 0; i < min(size, other->size()); i++) {
+ if (clock[i] != other->get(i))
+ return false;
+ }
+ for (uptr i = min(size, other->size()); i < max(size, other->size()); i++) {
+ if (i < size && clock[i] != 0)
+ return false;
+ if (i < other->size() && other->get(i) != 0)
+ return false;
+ }
+ return true;
+ }
+};
+
+static bool ClockFuzzer(bool printing) {
+ // Create kThreads thread clocks.
+ SimpleThreadClock *thr0[kThreads];
+ ThreadClock *thr1[kThreads];
+ for (unsigned i = 0; i < kThreads; i++) {
+ thr0[i] = new SimpleThreadClock(i);
+ thr1[i] = new ThreadClock(i);
+ }
+
+ // Create kClocks sync clocks.
+ SimpleSyncClock *sync0[kClocks];
+ SyncClock *sync1[kClocks];
+ for (unsigned i = 0; i < kClocks; i++) {
+ sync0[i] = new SimpleSyncClock();
+ sync1[i] = new SyncClock();
+ }
+
+ // Do N random operations (acquire, release, etc) and compare results
+ // for SimpleThread/SyncClock and real Thread/SyncClock.
+ for (int i = 0; i < 1000000; i++) {
+ unsigned tid = rand() % kThreads;
+ unsigned cid = rand() % kClocks;
+ thr0[tid]->tick();
+ thr1[tid]->tick();
+
+ switch (rand() % 4) {
+ case 0:
+ if (printing)
+ printf("acquire thr%d <- clk%d\n", tid, cid);
+ thr0[tid]->acquire(sync0[cid]);
+ thr1[tid]->acquire(sync1[cid]);
+ break;
+ case 1:
+ if (printing)
+ printf("release thr%d -> clk%d\n", tid, cid);
+ thr0[tid]->release(sync0[cid]);
+ thr1[tid]->release(sync1[cid]);
+ break;
+ case 2:
+ if (printing)
+ printf("acq_rel thr%d <> clk%d\n", tid, cid);
+ thr0[tid]->acq_rel(sync0[cid]);
+ thr1[tid]->acq_rel(sync1[cid]);
+ break;
+ case 3:
+ if (printing)
+ printf("rel_str thr%d >> clk%d\n", tid, cid);
+ thr0[tid]->ReleaseStore(sync0[cid]);
+ thr1[tid]->ReleaseStore(sync1[cid]);
+ break;
+ }
+
+ if (printing) {
+ for (unsigned i = 0; i < kThreads; i++) {
+ printf("thr%d: ", i);
+ thr1[i]->DebugDump(printf);
+ printf("\n");
+ }
+ for (unsigned i = 0; i < kClocks; i++) {
+ printf("clk%d: ", i);
+ sync1[i]->DebugDump(printf);
+ printf("\n");
+ }
+
+ printf("\n");
+ }
+
+ if (!thr0[tid]->verify(thr1[tid]) || !sync0[cid]->verify(sync1[cid])) {
+ if (!printing)
+ return false;
+ printf("differs with model:\n");
+ for (unsigned i = 0; i < kThreads; i++) {
+ printf("thr%d: clock=[", i);
+ for (uptr j = 0; j < thr0[i]->size; j++)
+ printf("%s%llu", j == 0 ? "" : ",", thr0[i]->clock[j]);
+ printf("]\n");
+ }
+ for (unsigned i = 0; i < kClocks; i++) {
+ printf("clk%d: clock=[", i);
+ for (uptr j = 0; j < sync0[i]->size; j++)
+ printf("%s%llu", j == 0 ? "" : ",", sync0[i]->clock[j]);
+ printf("]\n");
+ }
+ return false;
+ }
+ }
+ return true;
+}
+
+TEST(Clock, Fuzzer) {
+ int seed = time(0);
+ printf("seed=%d\n", seed);
+ srand(seed);
+ if (!ClockFuzzer(false)) {
+ // Redo the test with the same seed, but logging operations.
+ srand(seed);
+ ClockFuzzer(true);
+ ASSERT_TRUE(false);
}
}
More information about the llvm-commits
mailing list