[compiler-rt] TSan: Support relaxed accesses and fences (PR #142579)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 3 03:48:43 PDT 2025
https://github.com/rymrg created https://github.com/llvm/llvm-project/pull/142579
This PR adds support for relaxed accesses and fences. Since correct instrumentation increases the required overhead, the feature is put behind a feature flag.
The PR originates from the paper "Dynamic Robustness Verification against Weak Memory" DOI: https://doi.org/10.1145/3729277
The race detection extension is available at arXiv in appendix B: https://doi.org/10.48550/arXiv.2504.15036
https://github.com/google/sanitizers/issues/1415
>From 30e25502d6557f75e81ba37c780975b325e2dfde Mon Sep 17 00:00:00 2001
From: rymrg <54061433+rymrg at users.noreply.github.com>
Date: Tue, 3 Jun 2025 13:33:18 +0300
Subject: [PATCH] TSan: Support relaxed accesses and fences
---
compiler-rt/lib/tsan/rtl/tsan_flags.inc | 3 +
.../lib/tsan/rtl/tsan_interface_atomic.cpp | 104 ++++++++++++++----
compiler-rt/lib/tsan/rtl/tsan_rtl.h | 2 +
3 files changed, 86 insertions(+), 23 deletions(-)
diff --git a/compiler-rt/lib/tsan/rtl/tsan_flags.inc b/compiler-rt/lib/tsan/rtl/tsan_flags.inc
index 731d776cc893e..a4f240ee8612c 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_flags.inc
+++ b/compiler-rt/lib/tsan/rtl/tsan_flags.inc
@@ -80,3 +80,6 @@ TSAN_FLAG(bool, shared_ptr_interceptor, true,
TSAN_FLAG(bool, print_full_thread_history, false,
"If set, prints thread creation stacks for the threads involved in "
"the report and their ancestors up to the main thread.")
+TSAN_FLAG(bool, correct_race_detection, false,
+ "If set, remove optimizations and execute correct race detection "
+ "supporting fences and release sequence.")
diff --git a/compiler-rt/lib/tsan/rtl/tsan_interface_atomic.cpp b/compiler-rt/lib/tsan/rtl/tsan_interface_atomic.cpp
index 527e5a9b4a8d8..96bcd3815304a 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_interface_atomic.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_interface_atomic.cpp
@@ -27,6 +27,10 @@
using namespace __tsan;
+static bool correctRaceDetection(){
+ return flags()->correct_race_detection;
+}
+
#if !SANITIZER_GO && __TSAN_HAS_INT128
// Protects emulation of 128-bit atomic operations.
static StaticSpinMutex mutex128;
@@ -227,18 +231,36 @@ namespace {
template <typename T, T (*F)(volatile T *v, T op)>
static T AtomicRMW(ThreadState *thr, uptr pc, volatile T *a, T v, morder mo) {
MemoryAccess(thr, pc, (uptr)a, AccessSize<T>(), kAccessWrite | kAccessAtomic);
- if (LIKELY(mo == mo_relaxed))
- return F(a, v);
+ if (!correctRaceDetection()){
+ if (LIKELY(mo == mo_relaxed))
+ return F(a, v);
+ }
SlotLocker locker(thr);
{
auto s = ctx->metamap.GetSyncOrCreate(thr, pc, (uptr)a, false);
- RWLock lock(&s->mtx, IsReleaseOrder(mo));
- if (IsAcqRelOrder(mo))
- thr->clock.ReleaseAcquire(&s->clock);
- else if (IsReleaseOrder(mo))
- thr->clock.Release(&s->clock);
- else if (IsAcquireOrder(mo))
- thr->clock.Acquire(s->clock);
+ bool fullLock = correctRaceDetection() || IsReleaseOrder(mo);
+ RWLock lock(&s->mtx, fullLock);
+ if (!correctRaceDetection()){
+ if (IsAcqRelOrder(mo))
+ thr->clock.ReleaseAcquire(&s->clock);
+ else if (IsReleaseOrder(mo))
+ thr->clock.Release(&s->clock);
+ else if (IsAcquireOrder(mo))
+ thr->clock.Acquire(s->clock);
+ } else {
+ if (mo == mo_relaxed){
+ thr->clockA.Acquire(s->clock);
+ thr->clockR.Release(&s->clock);
+ } else if (IsAcqRelOrder(mo)) {
+ thr->clock.ReleaseAcquire(&s->clock);
+ } else if (IsReleaseOrder(mo)) {
+ thr->clockA.Acquire(s->clock);
+ thr->clock.Release(&s->clock);
+ } else if (IsAcquireOrder(mo)) {
+ thr->clock.Acquire(s->clock);
+ thr->clockR.Release(&s->clock);
+ }
+ }
v = F(a, v);
}
if (IsReleaseOrder(mo))
@@ -264,7 +286,7 @@ struct OpLoad {
DCHECK(IsLoadOrder(mo));
// This fast-path is critical for performance.
// Assume the access is atomic.
- if (!IsAcquireOrder(mo)) {
+ if (!correctRaceDetection() && !IsAcquireOrder(mo)) {
MemoryAccess(thr, pc, (uptr)a, AccessSize<T>(),
kAccessRead | kAccessAtomic);
return NoTsanAtomic(mo, a);
@@ -276,7 +298,11 @@ struct OpLoad {
if (s) {
SlotLocker locker(thr);
ReadLock lock(&s->mtx);
- thr->clock.Acquire(s->clock);
+ if (IsAcquireOrder(mo)) {
+ thr->clock.Acquire(s->clock);
+ } else if (correctRaceDetection()) {
+ thr->clockA.Acquire(s->clock);
+ }
// Re-read under sync mutex because we need a consistent snapshot
// of the value and the clock we acquire.
v = NoTsanAtomic(mo, a);
@@ -309,7 +335,7 @@ struct OpStore {
// Assume the access is atomic.
// Strictly saying even relaxed store cuts off release sequence,
// so must reset the clock.
- if (!IsReleaseOrder(mo)) {
+ if (!correctRaceDetection() && !IsReleaseOrder(mo)) {
NoTsanAtomic(mo, a, v);
return;
}
@@ -317,10 +343,14 @@ struct OpStore {
{
auto s = ctx->metamap.GetSyncOrCreate(thr, pc, (uptr)a, false);
Lock lock(&s->mtx);
- thr->clock.ReleaseStore(&s->clock);
+ if (IsReleaseOrder(mo))
+ thr->clock.ReleaseStore(&s->clock);
+ else if (correctRaceDetection())
+ thr->clockR.ReleaseStore(&s->clock);
NoTsanAtomic(mo, a, v);
}
- IncrementEpoch(thr);
+ if (IsReleaseOrder(mo))
+ IncrementEpoch(thr);
}
};
@@ -441,7 +471,7 @@ struct OpCAS {
MemoryAccess(thr, pc, (uptr)a, AccessSize<T>(),
kAccessWrite | kAccessAtomic);
- if (LIKELY(mo == mo_relaxed && fmo == mo_relaxed)) {
+ if (LIKELY(!correctRaceDetection() && mo == mo_relaxed && fmo == mo_relaxed)) {
T cc = *c;
T pr = func_cas(a, cc, v);
if (pr == cc)
@@ -454,7 +484,8 @@ struct OpCAS {
bool success;
{
auto s = ctx->metamap.GetSyncOrCreate(thr, pc, (uptr)a, false);
- RWLock lock(&s->mtx, release);
+ bool fullLock = correctRaceDetection() || release;
+ RWLock lock(&s->mtx, fullLock);
T cc = *c;
T pr = func_cas(a, cc, v);
success = pr == cc;
@@ -462,12 +493,27 @@ struct OpCAS {
*c = pr;
mo = fmo;
}
- if (success && IsAcqRelOrder(mo))
- thr->clock.ReleaseAcquire(&s->clock);
- else if (success && IsReleaseOrder(mo))
- thr->clock.Release(&s->clock);
- else if (IsAcquireOrder(mo))
- thr->clock.Acquire(s->clock);
+ if (!correctRaceDetection()){
+ if (success && IsAcqRelOrder(mo))
+ thr->clock.ReleaseAcquire(&s->clock);
+ else if (success && IsReleaseOrder(mo))
+ thr->clock.Release(&s->clock);
+ else if (IsAcquireOrder(mo))
+ thr->clock.Acquire(s->clock);
+ } else {
+ if (!IsAcquireOrder(mo)){
+ thr->clockA.Acquire(s->clock);
+ } else {
+ thr->clock.Acquire(s->clock);
+ }
+ if (success){
+ if (!IsReleaseOrder(mo)){
+ thr->clockR.Release(&s->clock);
+ } else {
+ thr->clock.Release(&s->clock);
+ }
+ }
+ }
}
if (success && release)
IncrementEpoch(thr);
@@ -487,7 +533,19 @@ struct OpFence {
static void NoTsanAtomic(morder mo) { __sync_synchronize(); }
static void Atomic(ThreadState *thr, uptr pc, morder mo) {
- // FIXME(dvyukov): not implemented.
+ if (correctRaceDetection()){
+ SlotLocker locker(thr);
+ if (IsAcquireOrder(mo))
+ thr->clock.Acquire(&thr->clockA);
+ if (mo == mo_seq_cst){
+ auto s = ctx->metamap.GetSyncOrCreate(thr, pc, 0, false);
+ thr->clock.ReleaseAcquire(&s->clock);
+ }
+ if (IsReleaseOrder(mo)){
+ thr->clockR.Acquire(&thr->clock);
+ IncrementEpoch(thr);
+ }
+ }
__sync_synchronize();
}
};
diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl.h b/compiler-rt/lib/tsan/rtl/tsan_rtl.h
index 4dc5e630c5249..6cd40c9eff07c 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_rtl.h
+++ b/compiler-rt/lib/tsan/rtl/tsan_rtl.h
@@ -177,6 +177,8 @@ struct alignas(SANITIZER_CACHE_LINE_SIZE) ThreadState {
atomic_sint32_t pending_signals;
VectorClock clock;
+ VectorClock clockR;
+ VectorClock clockA;
// This is a slow path flag. On fast path, fast_state.GetIgnoreBit() is read.
// We do not distinguish beteween ignoring reads and writes
More information about the llvm-commits
mailing list